diff --git a/100_core/src/gplx/core/primitives/Int_list.java b/100_core/src/gplx/core/primitives/Int_list.java
index 90108ac19..1adac6a53 100644
--- a/100_core/src/gplx/core/primitives/Int_list.java
+++ b/100_core/src/gplx/core/primitives/Int_list.java
@@ -17,7 +17,10 @@ along with this program. If not, see .
*/
package gplx.core.primitives; import gplx.*; import gplx.core.*;
public class Int_list {
+ private int capacity = 0;
private int[] ary = Int_.Ary_empty; private int ary_len, ary_max;
+ public Int_list() {this.capacity = 0; this.ary = Int_.Ary_empty;}
+ public Int_list(int capacity) {this.capacity = capacity; this.ary = new int[capacity];}
public void Add(int uid) {
int new_len = ary_len + 1;
if (new_len > ary_max) {
@@ -32,9 +35,17 @@ public class Int_list {
public int Len() {return ary_len;}
public int Get_at(int i) {return ary[i];}
public void Clear() {
- ary = Int_.Ary_empty;
+ if (ary_len > capacity) {
+ ary = (capacity == 0) ? Int_.Ary_empty : new int[capacity];
+ }
ary_len = ary_max = 0;
}
+ public int[] To_ary() {
+ int[] rv = new int[ary_len];
+ for (int i = 0; i < ary_len; i++)
+ rv[i] = ary[i];
+ return rv;
+ }
public static Int_list new_(int... ary) {
Int_list rv = new Int_list();
int len = ary.length;
diff --git a/400_xowa/src/gplx/langs/phps/utls/Php_preg_.java b/400_xowa/src/gplx/langs/phps/utls/Php_preg_.java
new file mode 100644
index 000000000..d2a952d9c
--- /dev/null
+++ b/400_xowa/src/gplx/langs/phps/utls/Php_preg_.java
@@ -0,0 +1,54 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
+import gplx.core.primitives.*;
+public class Php_preg_ {
+ public static byte[][] Split(Int_list list, byte[] src, int src_bgn, int src_end, byte[] dlm, boolean extend) {
+ // find delimiters
+ int dlm_len = dlm.length;
+ byte dlm_nth = dlm[dlm_len - 1];
+ int i = src_bgn;
+ list.Add(src_bgn);
+ while (true) {
+ if (i == src_end) break;
+ int dlm_end = i + dlm_len;
+ if (dlm_end < src_end && Bry_.Eq(src, i, dlm_end, dlm)) {
+ if (extend) {
+ dlm_end = Bry_find_.Find_fwd_while(src, i, src_end, dlm_nth);
+ }
+ list.Add(i);
+ list.Add(dlm_end);
+ i = dlm_end;
+ }
+ else
+ i++;
+ }
+ list.Add(src_end);
+
+ // create brys
+ int rv_len = list.Len() - 1;
+ if (rv_len == 1) return null;
+ byte[][] rv = new byte[rv_len][];
+ for (i = 0; i < rv_len; i += 2) {
+ rv[i ] = Bry_.Mid(src, list.Get_at(i + 0), list.Get_at(i + 1));
+ if (i + 1 == rv_len) break;
+ rv[i + 1] = Bry_.Mid(src, list.Get_at(i + 1), list.Get_at(i + 2));
+ }
+ return rv;
+ }
+}
diff --git a/400_xowa/src/gplx/langs/phps/utls/Php_preg___tst.java b/400_xowa/src/gplx/langs/phps/utls/Php_preg___tst.java
new file mode 100644
index 000000000..4e6ffc103
--- /dev/null
+++ b/400_xowa/src/gplx/langs/phps/utls/Php_preg___tst.java
@@ -0,0 +1,33 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
+import org.junit.*; import gplx.core.tests.*;
+public class Php_preg___tst {
+ private final Php_preg___fxt fxt = new Php_preg___fxt();
+ @Test public void Split() {
+ fxt.Test__split("a''b''c", "''", Bool_.N, "a", "''", "b", "''", "c");
+ }
+}
+class Php_preg___fxt {
+ public void Test__split(String src, String dlm, boolean extend, String... expd) {Test__split(src, 0, String_.Len(src), dlm, extend, expd);}
+ public void Test__split(String src, int src_bgn, int src_end, String dlm, boolean extend, String... expd) {
+ gplx.core.primitives.Int_list rv = new gplx.core.primitives.Int_list();
+ byte[][] actl = Php_preg_.Split(rv, Bry_.new_u8(src), src_bgn, src_end, Bry_.new_u8(dlm), extend);
+ Gftest.Eq__ary(expd, String_.Ary(actl), "find_failed");
+ }
+}
diff --git a/400_xowa/src/gplx/langs/phps/utls/Php_str_.java b/400_xowa/src/gplx/langs/phps/utls/Php_str_.java
index be95e004f..89bf40717 100644
--- a/400_xowa/src/gplx/langs/phps/utls/Php_str_.java
+++ b/400_xowa/src/gplx/langs/phps/utls/Php_str_.java
@@ -17,7 +17,12 @@ along with this program. If not, see .
*/
package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
public class Php_str_ {
- public static byte[] Substr(byte[] src, int bgn, int len) {return Bry_.Mid(src, bgn, bgn + len);}
+ public static byte[] Substr(byte[] src, int bgn) {
+ return src;
+ }
+ public static byte[] Substr(byte[] src, int bgn, int len) {
+ return Bry_.Mid(src, bgn, bgn + len);
+ }
public static int Strspn_fwd__byte(byte[] src, byte find, int bgn, int max, int src_len) {
if (max == -1) max = src_len;
int rv = 0;
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/blocks/Xomw_block_wkr.java b/400_xowa/src/gplx/xowa/parsers/mws/blocks/Xomw_block_wkr.java
index 3665e4ead..14727a688 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/blocks/Xomw_block_wkr.java
+++ b/400_xowa/src/gplx/xowa/parsers/mws/blocks/Xomw_block_wkr.java
@@ -248,4 +248,14 @@ public class Xomw_block_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
}
return Bry_split_.Rv__ok;
}
+// private static final int
+// Para_stack_none = 0 // false
+// , Para_stack_bgn = 1 //
+// , Para_stack_mid = 2 //
+// ;
+// private static final byte
+// Mode_none = 0 // ''
+// , Mode_para = 1 // p
+// , Mode_pre = 2 // pre
+// ;
}
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/quotes/Xomw_quote_wkr.java b/400_xowa/src/gplx/xowa/parsers/mws/quotes/Xomw_quote_wkr.java
new file mode 100644
index 000000000..56b0cd9f3
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/parsers/mws/quotes/Xomw_quote_wkr.java
@@ -0,0 +1,241 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.parsers.mws.quotes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+import gplx.langs.phps.utls.*;
+import gplx.xowa.parsers.htmls.*;
+import gplx.xowa.parsers.mws.utils.*; import gplx.xowa.parsers.uniqs.*;
+import gplx.core.primitives.*;
+public class Xomw_quote_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
+ private final Bry_bfr bfr = Bry_bfr_.New();
+ private final Bry_bfr tmp = Bry_bfr_.New();
+ private final Int_list apos_pos_ary = new Int_list(32);
+ public byte[] Do_all_quotes(byte[] src) {
+ Bry_split_.Split(src, 0, src.length, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode( "\n", $text );
+ // PORTED: `$outtext .= $this->doQuotes( $line ) . "\n";` NOTE: "\n" is added below
+ bfr.Del_by_1(); // $outtext = substr( $outtext, 0, -1 );
+ apos_pos_ary.Clear();
+ return bfr.To_bry_and_clear();
+ }
+ private static final byte[] Wtxt__apos = Bry_.new_a7("''");
+ public int Split(byte[] src, int itm_bgn, int itm_end) {
+ // PORTED: arr = preg_split("/(''+)/", text, -1, PREG_SPLIT_DELIM_CAPTURE);
+ byte[][] arr = Php_preg_.Split(apos_pos_ary, src, itm_bgn, itm_end, Wtxt__apos, Bool_.Y);
+ if (arr == null) {
+ bfr.Add_mid(src, itm_bgn, itm_end).Add_byte_nl();
+ return Bry_split_.Rv__ok;
+ }
+ int arr_len = arr.length;
+
+ // First, do some preliminary work. This may shift some apostrophes from
+ // being mark-up to being text. It also counts the number of occurrences
+ // of bold and italics mark-ups.
+ int num_bold = 0;
+ int num_italics = 0;
+ for (int i = 1; i < arr_len; i += 2) {
+ int apos_len = arr[i].length;
+ // If there are ever four apostrophes, assume the first is supposed to
+ // be text, and the remaining three constitute mark-up for bold text.
+ // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
+ if (apos_len == 4) {
+ arr[i - 1] = Bry_.Add(arr[i - 1], Byte_ascii.Apos_bry);
+ arr[i] = Bry_.new_a7("'''");
+ apos_len = 3;
+ }
+ else if (apos_len > 5) {
+ // If there are more than 5 apostrophes in a row, assume they're all
+ // text except for the last 5.
+ // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
+ arr[i - 1] = Bry_.Add(arr[i - 1], Bry_.Repeat(Byte_ascii.Apos, apos_len - 5));
+ arr[i] = Bry_.new_a7("'''''");
+ apos_len = 5;
+ }
+ // Count the number of occurrences of bold and italics mark-ups.
+ if (apos_len == 2) {
+ num_italics++;
+ }
+ else if (apos_len == 3) {
+ num_bold++;
+ }
+ else if (apos_len == 5) {
+ num_italics++;
+ num_bold++;
+ }
+ }
+
+ // If there is an odd number of both bold and italics, it is likely
+ // that one of the bold ones was meant to be an apostrophe followed
+ // by italics. Which one we cannot know for certain, but it is more
+ // likely to be one that has a single-letter word before it.
+ if ((num_bold % 2 == 1) && (num_italics % 2 == 1)) {
+ int first_word_1 = -1;
+ int first_word_n = -1;
+ int first_space = -1;
+ for (int i = 1; i < arr_len; i += 2) {
+ if (arr[i].length == 3) {
+ byte[] prv = arr[i - 1];
+ byte[] x1 = Php_str_.Substr(prv, -1);
+ byte[] x2 = Php_str_.Substr(prv, -2, 1);
+ if (Bry_.Eq(x1, Byte_ascii.Space_bry)) {
+ if (first_space == -1) {
+ first_space = i;
+ }
+ }
+ else if (Bry_.Eq(x2, Byte_ascii.Space_bry)) {
+ first_word_1 = i;
+ // if $firstsingleletterword is set, we don't
+ // look at the other options, so we can bail early.
+ break;
+ }
+ else {
+ if (first_word_n == -1) {
+ first_word_n = i;
+ }
+ }
+ }
+ }
+
+ // If there is a single-letter word, use it!
+ if (first_word_1 > -1) {
+ arr[first_word_1] = Wtxt__apos;
+ arr[first_word_1 - 1] = Bry_.Add(arr[first_word_1 - 1], Byte_ascii.Apos);
+ }
+ else if (first_word_n > -1) {
+ // If not, but there's a multi-letter word, use that one.
+ arr[first_word_n] = Wtxt__apos;
+ arr[first_word_n - 1] = Bry_.Add(arr[first_word_n - 1], Byte_ascii.Apos);
+ }
+ else if (first_space > -1) {
+ // ... otherwise use the first one that has neither.
+ // (notice that it is possible for all three to be -1 if, for example,
+ // there is only one pentuple-apostrophe in the line)
+ arr[first_space] = Wtxt__apos;
+ arr[first_space - 1] = Bry_.Add(arr[first_space - 1], Byte_ascii.Apos);
+ }
+ }
+
+ // Now let's actually convert our apostrophic mush to HTML!
+ int state = State__empty;
+ for (int j = 0; j < arr_len; j++) {
+ if ((j % 2) == 0) {
+ if (state == State__both) {
+ tmp.Add(arr[j]);
+ }
+ else {
+ bfr.Add(arr[j]);
+ }
+ }
+ else {
+ int apos_len = 2; // strlen(r);
+ if (apos_len == 2) {
+ if (state == State__i) {
+ bfr.Add_str_a7("");
+ state = State__empty;
+ }
+ else if (state == State__bi) {
+ bfr.Add_str_a7("");
+ state = State__b;
+ }
+ else if (state == State__ib) {
+ bfr.Add_str_a7("");
+ state = State__b;
+ }
+ else if (state == State__both) {
+ bfr.Add_str_a7("").Add_bfr_and_preserve(tmp).Add_str_a7("");
+ state = State__b;
+ }
+ else { // state can be 'b' or ''
+ bfr.Add_str_a7("");
+ state = state == State__b ? State__bi : State__i;
+ }
+ }
+ else if (apos_len == 3) {
+ if (state == State__b) {
+ bfr.Add_str_a7("");
+ state = State__empty;
+ }
+ else if (state == State__bi) {
+ bfr.Add_str_a7("");
+ state = State__i;
+ }
+ else if (state == State__ib) {
+ bfr.Add_str_a7("");
+ state = State__i;
+ }
+ else if (state == State__both) {
+ bfr.Add_str_a7("").Add_bfr_and_preserve(tmp).Add_str_a7("");
+ state = State__i;
+ }
+ else { // state can be 'i' or ''
+ bfr.Add_str_a7("");
+ state = state == State__i ? State__ib : State__b;
+ }
+ }
+ else if (apos_len == 5) {
+ if (state == State__b) {
+ bfr.Add_str_a7("");
+ state = State__i;
+ }
+ else if (state == State__i) {
+ bfr.Add_str_a7("");
+ state = State__b;
+ }
+ else if (state == State__bi) {
+ bfr.Add_str_a7("");
+ state = State__empty;
+ }
+ else if (state == State__ib) {
+ bfr.Add_str_a7("");
+ state = State__empty;
+ }
+ else if (state == State__both) {
+ bfr.Add_str_a7("' . buffer . '");
+ state = State__empty;
+ }
+ else { // (state == '')
+ tmp.Clear();
+ state = State__both;
+ }
+ }
+ }
+ }
+ // Now close all remaining tags. Notice that the order is important.
+ if (state == State__b || state == State__ib) {
+ bfr.Add_str_a7("");
+ }
+ if (state == State__i || state == State__bi || state == State__ib) {
+ bfr.Add_str_a7("");
+ }
+ if (state == State__bi) {
+ bfr.Add_str_a7("");
+ }
+ // There might be lonely ''''', so make sure we have a buffer
+ if (state == State__both && tmp.Len_gt_0()) {
+ bfr.Add_str_a7("").Add_bfr_and_clear(tmp).Add_str_a7("");
+ }
+ bfr.Add_byte_nl();
+ return Bry_split_.Rv__ok;
+ }
+ private static final int
+ State__empty = 0
+ , State__b = 1
+ , State__i = 2
+ , State__bi = 3
+ , State__ib = 4
+ , State__both = 5
+ ;
+}
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/quotes/Xomw_quote_wkr__tst.java b/400_xowa/src/gplx/xowa/parsers/mws/quotes/Xomw_quote_wkr__tst.java
new file mode 100644
index 000000000..6fe66f3c3
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/parsers/mws/quotes/Xomw_quote_wkr__tst.java
@@ -0,0 +1,34 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.parsers.mws.quotes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+import org.junit.*;
+public class Xomw_quote_wkr__tst {
+ private final Xomw_quote_wkr__fxt fxt = new Xomw_quote_wkr__fxt();
+ @Test public void Basic() {
+ fxt.Test__parse("a''b''c", "abc");
+ }
+}
+class Xomw_quote_wkr__fxt {
+// private final Xomw_parser_ctx ctx = new Xomw_parser_ctx();
+ private final Xomw_quote_wkr wkr = new Xomw_quote_wkr();
+ public void Test__parse(String src_str, String expd) {
+ byte[] src_bry = Bry_.new_u8(src_str);
+ byte[] actl = wkr.Do_all_quotes(src_bry);
+ Tfds.Eq_str_lines(expd, String_.new_u8(actl), src_str);
+ }
+}