1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-07-12 21:10:02 -04:00
commit 794b5a232f
3099 changed files with 238212 additions and 0 deletions

View File

@@ -0,0 +1,28 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
public class Xop_amp_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_amp;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Amp, this);}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
return ctx.Amp().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);
}
public static final Xop_amp_lxr _ = new Xop_amp_lxr();
}

View File

@@ -0,0 +1,121 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
public class Xop_amp_mgr {
private final Bry_bfr tmp_bfr = Bry_bfr.reset_(32);
public Btrie_slim_mgr Amp_trie() {return amp_trie;} private final Btrie_slim_mgr amp_trie = Xop_amp_trie._;
public int Rslt_pos() {return rslt_pos;} private int rslt_pos;
public int Rslt_val() {return rslt_val;} private int rslt_val;
public Xop_tkn_itm Parse_as_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, int amp_pos, int cur_pos) {
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
Object o = amp_trie.Match_bgn(src, cur_pos, src_len);
cur_pos = amp_trie.Match_pos();
if (o == null) return null;
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
switch (itm.Tid()) {
case Xop_amp_trie_itm.Tid_name_std:
case Xop_amp_trie_itm.Tid_name_xowa:
rslt_pos = cur_pos;
return tkn_mkr.Amp_txt(amp_pos, cur_pos, itm);
case Xop_amp_trie_itm.Tid_num_hex:
case Xop_amp_trie_itm.Tid_num_dec:
boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
boolean pass = Parse_as_int(ncr_is_hex, src, src_len, amp_pos, cur_pos);
return pass ? tkn_mkr.Amp_num(amp_pos, rslt_pos, rslt_val) : null;
default: throw Exc_.new_unhandled(itm.Tid());
}
}
public boolean Parse_as_int(boolean ncr_is_hex, byte[] src, int src_len, int amp_pos, int int_bgn) {
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
rslt_val = -1; // clear any previous setting
int cur_pos = int_bgn, int_end = -1;
int semic_pos = Bry_finder.Find_fwd(src, Byte_ascii.Semic, cur_pos, src_len);
if (semic_pos == Bry_finder.Not_found) return false;
int_end = semic_pos - 1; // int_end = pos before semicolon
int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
for (int i = int_end; i >= int_bgn; i--) {
byte b = src[i];
if (ncr_is_hex) {
if (b >= 48 && b <= 57) cur = b - 48;
else if (b >= 65 && b <= 70) cur = b - 55;
else if (b >= 97 && b <= 102) cur = b - 87;
else if((b >= 71 && b <= 90)
|| (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
else return false;
}
else {
cur = b - Byte_ascii.Num_0;
if (cur < 0 || cur > 10) return false;
}
val += cur * factor;
if (val > gplx.intl.Utf8_.Codepoint_max) return false; // fail if value > largest_unicode_codepoint
factor *= multiple;
}
rslt_val = val;
rslt_pos = semic_pos + 1; // position after semic
return true;
}
public byte[] Decode_as_bry(byte[] src) {
if (src == null) return src;
int src_len = src.length;
boolean dirty = false;
int pos = 0;
while (pos < src_len) {
byte b = src[pos];
if (b == Byte_ascii.Amp) {
int nxt_pos = pos + 1;
if (nxt_pos < src_len) {
byte nxt_b = src[nxt_pos];
Object amp_obj = amp_trie.Match_bgn_w_byte(nxt_b, src, nxt_pos, src_len);
if (amp_obj != null) {
if (!dirty) {
tmp_bfr.Add_mid(src, 0, pos);
dirty = true;
}
Xop_amp_trie_itm amp_itm = (Xop_amp_trie_itm)amp_obj;
switch (amp_itm.Tid()) {
case Xop_amp_trie_itm.Tid_name_std:
case Xop_amp_trie_itm.Tid_name_xowa:
tmp_bfr.Add(amp_itm.Utf8_bry());
pos = amp_trie.Match_pos();
break;
case Xop_amp_trie_itm.Tid_num_hex:
case Xop_amp_trie_itm.Tid_num_dec:
boolean ncr_is_hex = amp_itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
int int_bgn = amp_trie.Match_pos();
if (Parse_as_int(ncr_is_hex, src, src_len, pos, int_bgn))
tmp_bfr.Add_u8_int(rslt_val);
else
tmp_bfr.Add_mid(src, pos, nxt_pos);
pos = rslt_pos;
break;
default:
throw Exc_.new_unhandled(amp_itm.Tid());
}
continue;
}
}
}
if (dirty)
tmp_bfr.Add_byte(b);
++pos;
}
return dirty ? tmp_bfr.Xto_bry_and_clear() : src;
}
}

View File

@@ -0,0 +1,44 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_amp_mgr_decode_tst {
@Before public void init() {fxt.Reset();} private Xop_amp_mgr_fxt fxt = new Xop_amp_mgr_fxt();
@Test public void Text() {fxt.Test_decode_as_bry("a" , "a");}
@Test public void Name() {fxt.Test_decode_as_bry("&amp;" , "&");}
@Test public void Name_w_text() {fxt.Test_decode_as_bry("a&amp;b" , "a&b");}
@Test public void Name_fail_semic_missing() {fxt.Test_decode_as_bry("a&ampb" , "a&ampb");}
@Test public void Name_fail_amp_only() {fxt.Test_decode_as_bry("a&" , "a&");}
@Test public void Num_fail() {fxt.Test_decode_as_bry("&#!;" , "&#!;");} // ! is not valid num
@Test public void Hex_fail() {fxt.Test_decode_as_bry("&#x!;" , "&#x!;");} // ! is not valid hex
@Test public void Num_basic() {fxt.Test_decode_as_bry("&#0931;" , "Σ");}
@Test public void Num_zero_padded() {fxt.Test_decode_as_bry("&#00931;" , "Σ");}
@Test public void Hex_upper() {fxt.Test_decode_as_bry("&#x3A3;" , "Σ");}
@Test public void Hex_lower() {fxt.Test_decode_as_bry("&#x3a3;" , "Σ");}
@Test public void Hex_zero_padded() {fxt.Test_decode_as_bry("&#x03a3;" , "Σ");}
@Test public void Hex_upper_x() {fxt.Test_decode_as_bry("&#X3A3;" , "Σ");}
@Test public void Num_fail_large_codepoint() {fxt.Test_decode_as_bry("&#538189831;" , "&#538189831;");}
@Test public void Num_ignore_extra_x() {fxt.Test_decode_as_bry("&#xx26D0;" , Char_.XtoStr(Char_.XbyInt(9936)));} // 2nd x is ignored
}
class Xop_amp_mgr_fxt {
private Xop_amp_mgr amp_mgr = new Xop_amp_mgr();
public void Reset() {}
public void Test_decode_as_bry(String raw, String expd) {
Tfds.Eq(expd, String_.new_u8(amp_mgr.Decode_as_bry(Bry_.new_u8(raw))));
}
}

View File

@@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_amp_tkn_num extends Xop_tkn_itm_base {
public Xop_amp_tkn_num(int bgn, int end, int val, byte[] str_as_bry) {
this.val = val; this.str_as_bry = str_as_bry;
this.Tkn_ini_pos(false, bgn, end);
}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_html_ncr;}
public int Val() {return val;} private int val;
public byte[] Str_as_bry() {return str_as_bry;} private byte[] str_as_bry;
}

View File

@@ -0,0 +1,31 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_amp_tkn_txt extends Xop_tkn_itm_base {
private Xop_amp_trie_itm html_ref_itm;
public Xop_amp_tkn_txt(int bgn, int end, Xop_amp_trie_itm html_ref_itm) {
this.html_ref_itm = html_ref_itm;
this.Tkn_ini_pos(false, bgn, end);
}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_html_ref;}
public int Char_int() {return html_ref_itm.Char_int();}
public byte[] Xml_name_bry() {return html_ref_itm.Xml_name_bry();}
public boolean Itm_is_custom() {return html_ref_itm.Tid() == Xop_amp_trie_itm.Tid_name_xowa;}
public void Print_ncr(Bry_bfr bfr) {html_ref_itm.Print_ncr(bfr);}
public void Print_literal(Bry_bfr bfr) {html_ref_itm.Print_literal(bfr);}
}

View File

@@ -0,0 +1,318 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
public class Xop_amp_trie {
public static final byte[] // NOTE: top_define
Bry_xowa_lt = Bry_.new_a7("&xowa_lt;")
, Bry_xowa_brack_bgn = Bry_.new_a7("&xowa_brack_bgn;")
, Bry_xowa_brack_end = Bry_.new_a7("&xowa_brack_end;")
, Bry_xowa_pipe = Bry_.new_a7("&xowa_pipe;")
, Bry_xowa_apos = Bry_.new_a7("&xowa_apos;")
, Bry_xowa_colon = Bry_.new_a7("&xowa_colon;")
, Bry_xowa_underline = Bry_.new_a7("&xowa_underline;")
, Bry_xowa_asterisk = Bry_.new_a7("&xowa_asterisk;")
, Bry_xowa_space = Bry_.new_a7("&xowa_space;")
, Bry_xowa_nl = Bry_.new_a7("&xowa_nl;")
, Bry_xowa_dash = Bry_.new_a7("&xowa_dash;")
;
public static final Btrie_slim_mgr _ = new_(); Xop_amp_trie() {}
private static Btrie_slim_mgr new_() {// REF.MW: Sanitizer|$wgHtmlEntities; NOTE:added apos
Btrie_slim_mgr rv = Btrie_slim_mgr.cs_();
Reg_name(rv, Bool_.Y, 60, Bry_xowa_lt);
Reg_name(rv, Bool_.Y, 91, Bry_xowa_brack_bgn);
Reg_name(rv, Bool_.Y, 93, Bry_xowa_brack_end);
Reg_name(rv, Bool_.Y, 124, Bry_xowa_pipe);
Reg_name(rv, Bool_.Y, 39, Bry_xowa_apos);
Reg_name(rv, Bool_.Y, 58, Bry_xowa_colon);
Reg_name(rv, Bool_.Y, 95, Bry_xowa_underline);
Reg_name(rv, Bool_.Y, 42, Bry_xowa_asterisk);
Reg_name(rv, Bool_.Y, 32, Bry_xowa_space);
Reg_name(rv, Bool_.Y, 10, Bry_xowa_nl);
Reg_name(rv, Bool_.Y, 45, Bry_xowa_dash);
Reg_name(rv, Bool_.N, 39, "&apos;");
Reg_name(rv, Bool_.N, 193, "&Aacute;");
Reg_name(rv, Bool_.N, 225, "&aacute;");
Reg_name(rv, Bool_.N, 194, "&Acirc;");
Reg_name(rv, Bool_.N, 226, "&acirc;");
Reg_name(rv, Bool_.N, 180, "&acute;");
Reg_name(rv, Bool_.N, 198, "&AElig;");
Reg_name(rv, Bool_.N, 230, "&aelig;");
Reg_name(rv, Bool_.N, 192, "&Agrave;");
Reg_name(rv, Bool_.N, 224, "&agrave;");
Reg_name(rv, Bool_.N, 8501, "&alefsym;");
Reg_name(rv, Bool_.N, 913, "&Alpha;");
Reg_name(rv, Bool_.N, 945, "&alpha;");
Reg_name(rv, Bool_.N, 38, "&amp;");
Reg_name(rv, Bool_.N, 8743, "&and;");
Reg_name(rv, Bool_.N, 8736, "&ang;");
Reg_name(rv, Bool_.N, 197, "&Aring;");
Reg_name(rv, Bool_.N, 229, "&aring;");
Reg_name(rv, Bool_.N, 8776, "&asymp;");
Reg_name(rv, Bool_.N, 195, "&Atilde;");
Reg_name(rv, Bool_.N, 227, "&atilde;");
Reg_name(rv, Bool_.N, 196, "&Auml;");
Reg_name(rv, Bool_.N, 228, "&auml;");
Reg_name(rv, Bool_.N, 8222, "&bdquo;");
Reg_name(rv, Bool_.N, 914, "&Beta;");
Reg_name(rv, Bool_.N, 946, "&beta;");
Reg_name(rv, Bool_.N, 166, "&brvbar;");
Reg_name(rv, Bool_.N, 8226, "&bull;");
Reg_name(rv, Bool_.N, 8745, "&cap;");
Reg_name(rv, Bool_.N, 199, "&Ccedil;");
Reg_name(rv, Bool_.N, 231, "&ccedil;");
Reg_name(rv, Bool_.N, 184, "&cedil;");
Reg_name(rv, Bool_.N, 162, "&cent;");
Reg_name(rv, Bool_.N, 935, "&Chi;");
Reg_name(rv, Bool_.N, 967, "&chi;");
Reg_name(rv, Bool_.N, 710, "&circ;");
Reg_name(rv, Bool_.N, 9827, "&clubs;");
Reg_name(rv, Bool_.N, 8773, "&cong;");
Reg_name(rv, Bool_.N, 169, "&copy;");
Reg_name(rv, Bool_.N, 8629, "&crarr;");
Reg_name(rv, Bool_.N, 8746, "&cup;");
Reg_name(rv, Bool_.N, 164, "&curren;");
Reg_name(rv, Bool_.N, 8224, "&dagger;");
Reg_name(rv, Bool_.N, 8225, "&Dagger;");
Reg_name(rv, Bool_.N, 8595, "&darr;");
Reg_name(rv, Bool_.N, 8659, "&dArr;");
Reg_name(rv, Bool_.N, 176, "&deg;");
Reg_name(rv, Bool_.N, 916, "&Delta;");
Reg_name(rv, Bool_.N, 948, "&delta;");
Reg_name(rv, Bool_.N, 9830, "&diams;");
Reg_name(rv, Bool_.N, 247, "&divide;");
Reg_name(rv, Bool_.N, 201, "&Eacute;");
Reg_name(rv, Bool_.N, 233, "&eacute;");
Reg_name(rv, Bool_.N, 202, "&Ecirc;");
Reg_name(rv, Bool_.N, 234, "&ecirc;");
Reg_name(rv, Bool_.N, 200, "&Egrave;");
Reg_name(rv, Bool_.N, 232, "&egrave;");
Reg_name(rv, Bool_.N, 8709, "&empty;");
Reg_name(rv, Bool_.N, 8195, "&emsp;");
Reg_name(rv, Bool_.N, 8194, "&ensp;");
Reg_name(rv, Bool_.N, 917, "&Epsilon;");
Reg_name(rv, Bool_.N, 949, "&epsilon;");
Reg_name(rv, Bool_.N, 8801, "&equiv;");
Reg_name(rv, Bool_.N, 919, "&Eta;");
Reg_name(rv, Bool_.N, 951, "&eta;");
Reg_name(rv, Bool_.N, 208, "&ETH;");
Reg_name(rv, Bool_.N, 240, "&eth;");
Reg_name(rv, Bool_.N, 203, "&Euml;");
Reg_name(rv, Bool_.N, 235, "&euml;");
Reg_name(rv, Bool_.N, 8364, "&euro;");
Reg_name(rv, Bool_.N, 8707, "&exist;");
Reg_name(rv, Bool_.N, 402, "&fnof;");
Reg_name(rv, Bool_.N, 8704, "&forall;");
Reg_name(rv, Bool_.N, 189, "&frac12;");
Reg_name(rv, Bool_.N, 188, "&frac14;");
Reg_name(rv, Bool_.N, 190, "&frac34;");
Reg_name(rv, Bool_.N, 8260, "&frasl;");
Reg_name(rv, Bool_.N, 915, "&Gamma;");
Reg_name(rv, Bool_.N, 947, "&gamma;");
Reg_name(rv, Bool_.N, 8805, "&ge;");
Reg_name(rv, Bool_.N, 62, "&gt;");
Reg_name(rv, Bool_.N, 8596, "&harr;");
Reg_name(rv, Bool_.N, 8660, "&hArr;");
Reg_name(rv, Bool_.N, 9829, "&hearts;");
Reg_name(rv, Bool_.N, 8230, "&hellip;");
Reg_name(rv, Bool_.N, 205, "&Iacute;");
Reg_name(rv, Bool_.N, 237, "&iacute;");
Reg_name(rv, Bool_.N, 206, "&Icirc;");
Reg_name(rv, Bool_.N, 238, "&icirc;");
Reg_name(rv, Bool_.N, 161, "&iexcl;");
Reg_name(rv, Bool_.N, 204, "&Igrave;");
Reg_name(rv, Bool_.N, 236, "&igrave;");
Reg_name(rv, Bool_.N, 8465, "&image;");
Reg_name(rv, Bool_.N, 8734, "&infin;");
Reg_name(rv, Bool_.N, 8747, "&int;");
Reg_name(rv, Bool_.N, 921, "&Iota;");
Reg_name(rv, Bool_.N, 953, "&iota;");
Reg_name(rv, Bool_.N, 191, "&iquest;");
Reg_name(rv, Bool_.N, 8712, "&isin;");
Reg_name(rv, Bool_.N, 207, "&Iuml;");
Reg_name(rv, Bool_.N, 239, "&iuml;");
Reg_name(rv, Bool_.N, 922, "&Kappa;");
Reg_name(rv, Bool_.N, 954, "&kappa;");
Reg_name(rv, Bool_.N, 923, "&Lambda;");
Reg_name(rv, Bool_.N, 955, "&lambda;");
Reg_name(rv, Bool_.N, 9001, "&lang;");
Reg_name(rv, Bool_.N, 171, "&laquo;");
Reg_name(rv, Bool_.N, 8592, "&larr;");
Reg_name(rv, Bool_.N, 8656, "&lArr;");
Reg_name(rv, Bool_.N, 8968, "&lceil;");
Reg_name(rv, Bool_.N, 8220, "&ldquo;");
Reg_name(rv, Bool_.N, 8804, "&le;");
Reg_name(rv, Bool_.N, 8970, "&lfloor;");
Reg_name(rv, Bool_.N, 8727, "&lowast;");
Reg_name(rv, Bool_.N, 9674, "&loz;");
Reg_name(rv, Bool_.N, 8206, "&lrm;");
Reg_name(rv, Bool_.N, 8249, "&lsaquo;");
Reg_name(rv, Bool_.N, 8216, "&lsquo;");
Reg_name(rv, Bool_.N, 60, "&lt;");
Reg_name(rv, Bool_.N, 175, "&macr;");
Reg_name(rv, Bool_.N, 8212, "&mdash;");
Reg_name(rv, Bool_.N, 181, "&micro;");
Reg_name(rv, Bool_.N, 183, "&middot;");
Reg_name(rv, Bool_.N, 8722, "&minus;");
Reg_name(rv, Bool_.N, 924, "&Mu;");
Reg_name(rv, Bool_.N, 956, "&mu;");
Reg_name(rv, Bool_.N, 8711, "&nabla;");
Reg_name(rv, Bool_.N, 160, "&nbsp;");
Reg_name(rv, Bool_.N, 8211, "&ndash;");
Reg_name(rv, Bool_.N, 8800, "&ne;");
Reg_name(rv, Bool_.N, 8715, "&ni;");
Reg_name(rv, Bool_.N, 172, "&not;");
Reg_name(rv, Bool_.N, 8713, "&notin;");
Reg_name(rv, Bool_.N, 8836, "&nsub;");
Reg_name(rv, Bool_.N, 209, "&Ntilde;");
Reg_name(rv, Bool_.N, 241, "&ntilde;");
Reg_name(rv, Bool_.N, 925, "&Nu;");
Reg_name(rv, Bool_.N, 957, "&nu;");
Reg_name(rv, Bool_.N, 211, "&Oacute;");
Reg_name(rv, Bool_.N, 243, "&oacute;");
Reg_name(rv, Bool_.N, 212, "&Ocirc;");
Reg_name(rv, Bool_.N, 244, "&ocirc;");
Reg_name(rv, Bool_.N, 338, "&OElig;");
Reg_name(rv, Bool_.N, 339, "&oelig;");
Reg_name(rv, Bool_.N, 210, "&Ograve;");
Reg_name(rv, Bool_.N, 242, "&ograve;");
Reg_name(rv, Bool_.N, 8254, "&oline;");
Reg_name(rv, Bool_.N, 937, "&Omega;");
Reg_name(rv, Bool_.N, 969, "&omega;");
Reg_name(rv, Bool_.N, 927, "&Omicron;");
Reg_name(rv, Bool_.N, 959, "&omicron;");
Reg_name(rv, Bool_.N, 8853, "&oplus;");
Reg_name(rv, Bool_.N, 8744, "&or;");
Reg_name(rv, Bool_.N, 170, "&ordf;");
Reg_name(rv, Bool_.N, 186, "&ordm;");
Reg_name(rv, Bool_.N, 216, "&Oslash;");
Reg_name(rv, Bool_.N, 248, "&oslash;");
Reg_name(rv, Bool_.N, 213, "&Otilde;");
Reg_name(rv, Bool_.N, 245, "&otilde;");
Reg_name(rv, Bool_.N, 8855, "&otimes;");
Reg_name(rv, Bool_.N, 214, "&Ouml;");
Reg_name(rv, Bool_.N, 246, "&ouml;");
Reg_name(rv, Bool_.N, 182, "&para;");
Reg_name(rv, Bool_.N, 8706, "&part;");
Reg_name(rv, Bool_.N, 8240, "&permil;");
Reg_name(rv, Bool_.N, 8869, "&perp;");
Reg_name(rv, Bool_.N, 934, "&Phi;");
Reg_name(rv, Bool_.N, 966, "&phi;");
Reg_name(rv, Bool_.N, 928, "&Pi;");
Reg_name(rv, Bool_.N, 960, "&pi;");
Reg_name(rv, Bool_.N, 982, "&piv;");
Reg_name(rv, Bool_.N, 177, "&plusmn;");
Reg_name(rv, Bool_.N, 163, "&pound;");
Reg_name(rv, Bool_.N, 8242, "&prime;");
Reg_name(rv, Bool_.N, 8243, "&Prime;");
Reg_name(rv, Bool_.N, 8719, "&prod;");
Reg_name(rv, Bool_.N, 8733, "&prop;");
Reg_name(rv, Bool_.N, 936, "&Psi;");
Reg_name(rv, Bool_.N, 968, "&psi;");
Reg_name(rv, Bool_.N, 34, "&quot;");
Reg_name(rv, Bool_.N, 8730, "&radic;");
Reg_name(rv, Bool_.N, 9002, "&rang;");
Reg_name(rv, Bool_.N, 187, "&raquo;");
Reg_name(rv, Bool_.N, 8594, "&rarr;");
Reg_name(rv, Bool_.N, 8658, "&rArr;");
Reg_name(rv, Bool_.N, 8969, "&rceil;");
Reg_name(rv, Bool_.N, 8221, "&rdquo;");
Reg_name(rv, Bool_.N, 8476, "&real;");
Reg_name(rv, Bool_.N, 174, "&reg;");
Reg_name(rv, Bool_.N, 8971, "&rfloor;");
Reg_name(rv, Bool_.N, 929, "&Rho;");
Reg_name(rv, Bool_.N, 961, "&rho;");
Reg_name(rv, Bool_.N, 8207, "&rlm;");
Reg_name(rv, Bool_.N, 8250, "&rsaquo;");
Reg_name(rv, Bool_.N, 8217, "&rsquo;");
Reg_name(rv, Bool_.N, 8218, "&sbquo;");
Reg_name(rv, Bool_.N, 352, "&Scaron;");
Reg_name(rv, Bool_.N, 353, "&scaron;");
Reg_name(rv, Bool_.N, 8901, "&sdot;");
Reg_name(rv, Bool_.N, 167, "&sect;");
Reg_name(rv, Bool_.N, 173, "&shy;");
Reg_name(rv, Bool_.N, 931, "&Sigma;");
Reg_name(rv, Bool_.N, 963, "&sigma;");
Reg_name(rv, Bool_.N, 962, "&sigmaf;");
Reg_name(rv, Bool_.N, 8764, "&sim;");
Reg_name(rv, Bool_.N, 9824, "&spades;");
Reg_name(rv, Bool_.N, 8834, "&sub;");
Reg_name(rv, Bool_.N, 8838, "&sube;");
Reg_name(rv, Bool_.N, 8721, "&sum;");
Reg_name(rv, Bool_.N, 8835, "&sup;");
Reg_name(rv, Bool_.N, 185, "&sup1;");
Reg_name(rv, Bool_.N, 178, "&sup2;");
Reg_name(rv, Bool_.N, 179, "&sup3;");
Reg_name(rv, Bool_.N, 8839, "&supe;");
Reg_name(rv, Bool_.N, 223, "&szlig;");
Reg_name(rv, Bool_.N, 932, "&Tau;");
Reg_name(rv, Bool_.N, 964, "&tau;");
Reg_name(rv, Bool_.N, 8756, "&there4;");
Reg_name(rv, Bool_.N, 920, "&Theta;");
Reg_name(rv, Bool_.N, 952, "&theta;");
Reg_name(rv, Bool_.N, 977, "&thetasym;");
Reg_name(rv, Bool_.N, 8201, "&thinsp;");
Reg_name(rv, Bool_.N, 222, "&THORN;");
Reg_name(rv, Bool_.N, 254, "&thorn;");
Reg_name(rv, Bool_.N, 732, "&tilde;");
Reg_name(rv, Bool_.N, 215, "&times;");
Reg_name(rv, Bool_.N, 8482, "&trade;");
Reg_name(rv, Bool_.N, 218, "&Uacute;");
Reg_name(rv, Bool_.N, 250, "&uacute;");
Reg_name(rv, Bool_.N, 8593, "&uarr;");
Reg_name(rv, Bool_.N, 8657, "&uArr;");
Reg_name(rv, Bool_.N, 219, "&Ucirc;");
Reg_name(rv, Bool_.N, 251, "&ucirc;");
Reg_name(rv, Bool_.N, 217, "&Ugrave;");
Reg_name(rv, Bool_.N, 249, "&ugrave;");
Reg_name(rv, Bool_.N, 168, "&uml;");
Reg_name(rv, Bool_.N, 978, "&upsih;");
Reg_name(rv, Bool_.N, 933, "&Upsilon;");
Reg_name(rv, Bool_.N, 965, "&upsilon;");
Reg_name(rv, Bool_.N, 220, "&Uuml;");
Reg_name(rv, Bool_.N, 252, "&uuml;");
Reg_name(rv, Bool_.N, 8472, "&weierp;");
Reg_name(rv, Bool_.N, 926, "&Xi;");
Reg_name(rv, Bool_.N, 958, "&xi;");
Reg_name(rv, Bool_.N, 221, "&Yacute;");
Reg_name(rv, Bool_.N, 253, "&yacute;");
Reg_name(rv, Bool_.N, 165, "&yen;");
Reg_name(rv, Bool_.N, 376, "&Yuml;");
Reg_name(rv, Bool_.N, 255, "&yuml;");
Reg_name(rv, Bool_.N, 918, "&Zeta;");
Reg_name(rv, Bool_.N, 950, "&zeta;");
Reg_name(rv, Bool_.N, 8205, "&zwj;");
Reg_name(rv, Bool_.N, 8204, "&zwnj;");
Reg_prefix(rv, Xop_amp_trie_itm.Tid_num_hex, "#x");
Reg_prefix(rv, Xop_amp_trie_itm.Tid_num_hex, "#X");
Reg_prefix(rv, Xop_amp_trie_itm.Tid_num_dec, "#");
return rv;
}
private static void Reg_name(Btrie_slim_mgr trie, boolean tid_is_xowa, int char_int, String xml_name_str) {Reg_name(trie, tid_is_xowa, char_int, Bry_.new_a7(xml_name_str));}
private static void Reg_name(Btrie_slim_mgr trie, boolean tid_is_xowa, int char_int, byte[] xml_name_bry) {
byte itm_tid = tid_is_xowa ? Xop_amp_trie_itm.Tid_name_xowa : Xop_amp_trie_itm.Tid_name_std;
Xop_amp_trie_itm itm = new Xop_amp_trie_itm(itm_tid, char_int, xml_name_bry);
byte[] key = Bry_.Mid(xml_name_bry, 1, xml_name_bry.length); // ignore & for purpose of trie; EX: "amp;"; NOTE: must keep trailing ";" else "&amp " will be valid;
trie.Add_obj(key, itm);
}
private static void Reg_prefix(Btrie_slim_mgr trie, byte prefix_type, String prefix) {
byte[] prefix_ary = Bry_.new_a7(prefix);
Xop_amp_trie_itm itm = new Xop_amp_trie_itm(prefix_type, Xop_amp_trie_itm.Char_int_null, prefix_ary);
trie.Add_obj(prefix_ary, itm);
}
}

View File

@@ -0,0 +1,58 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.html.*; import gplx.xowa.html.lnkis.*;
public class Xop_amp_trie_itm {
public Xop_amp_trie_itm(byte tid, int char_int, byte[] xml_name_bry) {
this.tid = tid;
this.char_int = char_int;
this.utf8_bry = gplx.intl.Utf16_.Encode_int_to_bry(char_int);
this.xml_name_bry = xml_name_bry;
this.key_name_len = xml_name_bry.length - 2; // 2 for & and ;
}
public byte Tid() {return tid;} private final byte tid;
public int Char_int() {return char_int;} private final int char_int; // val; EX: 160
public byte[] Utf8_bry() {return utf8_bry;} private final byte[] utf8_bry; // EX: new byte[] {192, 160}; (C2, A0)
public byte[] Xml_name_bry() {return xml_name_bry;} private final byte[] xml_name_bry; // EX: "&nbsp;"
public int Key_name_len() {return key_name_len;} private final int key_name_len; // EX: "nbsp".Len
public void Print_ncr(Bry_bfr bfr) {
switch (char_int) {
case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Quote: case Byte_ascii.Amp:
bfr.Add(xml_name_bry); // NOTE: never write actual char; EX: "&lt;" should be written as "&lt;", not "<"
break;
default:
bfr.Add(Xoh_lnki_title_fmtr.Escape_bgn); // &#
bfr.Add_int_variable(char_int); // 160
bfr.Add_byte(Byte_ascii.Semic); // ;
break;
}
}
public void Print_literal(Bry_bfr bfr) {
switch (char_int) {
case Byte_ascii.Lt: bfr.Add(Html_entity_.Lt_bry); break; // NOTE: never write actual char; EX: "&lt;" should be written as "&lt;", not "<"; MW does same; DATE:2014-11-07
case Byte_ascii.Gt: bfr.Add(Html_entity_.Gt_bry); break;
case Byte_ascii.Quote: bfr.Add(Html_entity_.Quote_bry); break;
case Byte_ascii.Amp: bfr.Add(Html_entity_.Amp_bry); break;
default:
bfr.Add(utf8_bry); // write literal; EX: "[" not "&#91;"
break;
}
}
public static final byte Tid_name_std = 1, Tid_name_xowa = 2, Tid_num_hex = 3, Tid_num_dec = 4;
public static final int Char_int_null = -1;
}

View File

@@ -0,0 +1,32 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_amp_wkr implements Xop_ctx_wkr {
public void Ctor_ctx(Xop_ctx ctx) {}
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn, int cur_pos) {
if (cur_pos == src_len) return ctx.Lxr_make_txt_(cur_pos); // NOTE: & is last char in page; strange and rare, but don't raise error
Xop_amp_mgr amp_mgr = ctx.App().Parser_amp_mgr();
Xop_tkn_itm amp_tkn = amp_mgr.Parse_as_tkn(tkn_mkr, src, src_len, bgn, cur_pos);
int rv_pos = amp_mgr.Rslt_pos();
if (amp_tkn == null) return ctx.Lxr_make_txt_(rv_pos);
ctx.Subs_add(root, amp_tkn);
return rv_pos;
}
}

View File

@@ -0,0 +1,41 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_amp_wkr_tst {
private Xop_fxt fxt = new Xop_fxt();
@Test public void Name() {fxt.Test_parse_page_wiki("&amp;" , fxt.tkn_html_ref_("&amp;"));} // check for html_ref
@Test public void Name_fail() {fxt.Test_parse_page_wiki("&nil;" , fxt.tkn_txt_(0, 5));} // check for text
@Test public void Hex() {fxt.Test_parse_page_wiki("&#x3A3;" , fxt.tkn_html_ncr_(931));} // check for html_ncr; Σ: http://en.wikipedia.org/wiki/Numeric_character_reference
@Test public void Num_fail_incomplete() {fxt.Test_parse_page_wiki("&#" , fxt.tkn_txt_());}
@Test public void Convert_to_named() {fxt.Test_parse_page_wiki_str("&amp;" , "&amp;");} // note that &amp; is printed, not &
@Test public void Convert_to_named_amp() {fxt.Test_parse_page_wiki_str("&" , "&amp;");} // PURPOSE: html_wtr was not handling & only
@Test public void Convert_to_numeric() {fxt.Test_parse_page_wiki_str("&aacute;" , "&#225;");} // testing that &#225; is outputted, not á
@Test public void Defect_bad_code_fails() { // PURPOSE: early rewrite of Xop_amp_mgr caused Xoh_html_wtr_escaper to fail with array out of bounds error; EX:w:Czech_Republic; DATE:2014-05-11
fxt.Test_parse_page_wiki_str
( "[[File:A.png|alt=<p>&#10;</p>]]" // basically checks amp parsing inside xnde inside lnki's alt (which uses different parsing code
, "<a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\"&#10;\" src=\"file:///mem/wiki/repo/trg/orig/7/0/A.png\" width=\"0\" height=\"0\" /></a>"
);
}
@Test public void Ignore_ncr() { // PURPOSE: check that ncr is unescaped; PAGE:de.w:Cross-Site-Scripting; DATE:2014-07-23
fxt.Test_parse_page_all_str
( "a <code>&#60;iframe&#62;</code>) b"
, "a <code>&#60;iframe&#62;</code>) b" // &#60; should not become <
);
}
}

View File

@@ -0,0 +1,83 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_apos_dat {
public int State() {return state;} public void State_clear() {state = Xop_apos_tkn_.State_nil;} private int state = Xop_apos_tkn_.State_nil;
public int Typ() {return typ;} private int typ;
public int Cmd() {return cmd;} private int cmd;
public int Lit_apos() {return lit_apos;} private int lit_apos;
public int Dual_cmd() {return dual_cmd;} private int dual_cmd;
public void Ident(Xop_ctx ctx, byte[] src, int apos_len, int cur_pos) {
typ = cmd = lit_apos = dual_cmd = 0;
switch (apos_len) {
case Xop_apos_tkn_.Len_ital: case Xop_apos_tkn_.Len_bold: case Xop_apos_tkn_.Len_dual:
Ident_props(apos_len); break;
case Xop_apos_tkn_.Len_apos_bold:
lit_apos = 1;
Ident_props(Xop_apos_tkn_.Len_bold); break;
default:
lit_apos = apos_len - Xop_apos_tkn_.Len_dual;
Ident_props(Xop_apos_tkn_.Len_dual);
if (lit_apos > 1)
ctx.Msg_log().Add_itm_none(Xop_apos_log.Multiple_apos, src, cur_pos - apos_len, cur_pos);
break;
}
}
private void Ident_props(int apos_len) {
typ = apos_len;
switch (apos_len) {
case Xop_apos_tkn_.Len_ital: {
switch (state) {
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_nil; break;
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_b; break;
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_bi_end__b_bgn; state = Xop_apos_tkn_.State_b; break;
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_b; dual_cmd = Xop_apos_tkn_.Cmd_bi_bgn; break;
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_i_bgn; state = Xop_apos_tkn_.State_bi; break;
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_i_bgn; state = Xop_apos_tkn_.State_i; break;
default: throw Exc_.new_unhandled(state);
}
break;
}
case Xop_apos_tkn_.Len_bold: {
switch (state) {
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_nil; break;
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_ib_end__i_bgn; state = Xop_apos_tkn_.State_i; break;
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_i; break;
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_i; break; // NOTE: dual_cmd = Cmd_ib_bgn is implied
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_b_bgn; state = Xop_apos_tkn_.State_ib; break;
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_b_bgn; state = Xop_apos_tkn_.State_b; break;
default: throw Exc_.new_unhandled(state);
}
break;
}
case Xop_apos_tkn_.Len_dual: {
switch (state) {
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_b_end__i_bgn; state = Xop_apos_tkn_.State_i; break;
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_i_end__b_bgn; state = Xop_apos_tkn_.State_b; break;
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_ib_end; state = Xop_apos_tkn_.State_nil; break;
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_bi_end; state = Xop_apos_tkn_.State_nil; break;
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_bi_end; state = Xop_apos_tkn_.State_nil; break; // NOTE: dual_cmd = Cmd_ib_bgn is implied
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_ib_bgn; state = Xop_apos_tkn_.State_dual; break;
default: throw Exc_.new_unhandled(state);
}
break;
}
default: throw Exc_.new_unhandled(apos_len);
}
}
}

View File

@@ -0,0 +1,26 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_apos_log {
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "apos");
public static final Gfo_msg_itm
Bold_converted_to_ital = Gfo_msg_itm_.new_note_(owner, "Bold_converted_to_ital")
, Dangling_apos = Gfo_msg_itm_.new_note_(owner, "Dangling_apos")
, Multiple_apos = Gfo_msg_itm_.new_note_(owner, "Multiple_apos")
;
}

View File

@@ -0,0 +1,26 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
public class Xop_apos_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_apos;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Apos_ary, this);} private static final byte[] Apos_ary = new byte[] {Byte_ascii.Apos, Byte_ascii.Apos};
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Apos().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
public static final Xop_apos_lxr _ = new Xop_apos_lxr(); Xop_apos_lxr() {}
}

View File

@@ -0,0 +1,29 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_apos_tkn extends Xop_tkn_itm_base {
public Xop_apos_tkn(int bgn, int end, int apos_len, int apos_tid, int apos_cmd, int apos_lit) {
this.apos_len = apos_len; this.apos_tid = apos_tid; this.apos_cmd = apos_cmd; this.apos_lit = apos_lit;
this.Tkn_ini_pos(false, bgn, end);
}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_apos;}
public int Apos_len() {return apos_len;} private int apos_len;
public int Apos_lit() {return apos_lit;} public Xop_apos_tkn Apos_lit_(int v) {apos_lit = v; return this;} private int apos_lit;
public int Apos_tid() {return apos_tid;} public Xop_apos_tkn Apos_tid_(int v) {apos_tid = v; return this;} private int apos_tid;
public int Apos_cmd() {return apos_cmd;} public Xop_apos_tkn Apos_cmd_(int v) {apos_cmd = v; return this;} private int apos_cmd;
}

View File

@@ -0,0 +1,36 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_apos_tkn_ {
public static final int
Cmd_nil = 0
, Cmd_i_bgn = 1, Cmd_i_end = 2, Cmd_b_bgn = 3, Cmd_b_end = 4
, Cmd_bi_bgn = 5, Cmd_ib_bgn = 6, Cmd_ib_end = 7, Cmd_bi_end = 8
, Cmd_bi_end__b_bgn = 9, Cmd_ib_end__i_bgn = 10, Cmd_b_end__i_bgn = 11, Cmd_i_end__b_bgn = 12;
public static final byte[][] Cmds
= new byte[][]
{ Bry_.new_a7("nil")
, Bry_.new_a7("i+"), Bry_.new_a7("i-"), Bry_.new_a7("b+"), Bry_.new_a7("b-")
, Bry_.new_a7("bi+"), Bry_.new_a7("ib+"), Bry_.new_a7("ib-"), Bry_.new_a7("bi-")
, Bry_.new_a7("bi-b+"), Bry_.new_a7("ib-i+"), Bry_.new_a7("b-i+"), Bry_.new_a7("i-b+")
};
public static String Cmd_str(int id) {return String_.new_u8(Cmds[id]);}
public static final int Len_ital = 2, Len_bold = 3, Len_dual = 5, Len_apos_bold = 4;
public static final int Typ_ital = 2, Typ_bold = 3, Typ_dual = 5;
public static final int State_nil = 0, State_i = 1, State_b = 2, State_bi = 3, State_ib = 4, State_dual = 5;
}

View File

@@ -0,0 +1,30 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_apos_tkn_chkr extends Xop_tkn_chkr_base {
@Override public Class<?> TypeOf() {return Xop_apos_tkn.class;}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_apos;}
public int Apos_cmd() {return apos_cmd;} public Xop_apos_tkn_chkr Apos_cmd_(int v) {apos_cmd = v; return this;} private int apos_cmd = Xop_apos_tkn_.Cmd_nil;
public int Apos_lit() {return apos_lit;} public Xop_apos_tkn_chkr Apos_lit_(int v) {apos_lit = v; return this;} private int apos_lit = -1;
@Override public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {
Xop_apos_tkn actl = (Xop_apos_tkn)actl_obj;
err += mgr.Tst_val(apos_cmd == Xop_apos_tkn_.Cmd_nil, path, "apos_cmd", Xop_apos_tkn_.Cmd_str(apos_cmd), Xop_apos_tkn_.Cmd_str(actl.Apos_cmd()));
err += mgr.Tst_val(apos_lit == -1, path, "apos_lit", apos_lit, actl.Apos_lit());
return err;
}
}

View File

@@ -0,0 +1,161 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_apos_wkr implements Xop_ctx_wkr {
public Xop_apos_dat Dat() {return dat;} private Xop_apos_dat dat = new Xop_apos_dat();
private List_adp stack = List_adp_.new_(); private int bold_count, ital_count; private Xop_apos_tkn dual_tkn = null;
public void Ctor_ctx(Xop_ctx ctx) {}
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {
Reset();
}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {
this.EndFrame(ctx, root, src, src_len, false);
}
public void AutoClose(Xop_ctx ctx, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {}
public int Stack_len() {return stack.Count();}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
cur_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Apos);
int apos_len = cur_pos - bgn_pos;
dat.Ident(ctx, src, apos_len, cur_pos);
Xop_apos_tkn apos_tkn = tkn_mkr.Apos(bgn_pos, cur_pos, apos_len, dat.Typ(), dat.Cmd(), dat.Lit_apos());
ctx.Subs_add(root, apos_tkn);
ctx.Apos().RegTkn(apos_tkn, cur_pos);
return cur_pos;
}
public void RegTkn(Xop_apos_tkn tkn, int cur_pos) { // REF.MW: Parser|doQuotes
stack.Add(tkn);
switch (tkn.Apos_tid()) {
case Xop_apos_tkn_.Len_ital: ital_count++; break;
case Xop_apos_tkn_.Len_bold: bold_count++; break;
case Xop_apos_tkn_.Len_dual: //bold_count++; ital_count++; // NOTE: removed b/c of '''''a''b'' was trying to convert ''''' to bold
dual_tkn = tkn;
break;
}
if (dat.Dual_cmd() != 0) { // earlier dual tkn assumed to be <i><b>; </i> encountered so change dual to <b><i>
if (dual_tkn == null) throw Exc_.new_("dual tkn is null"); // should never happen
dual_tkn.Apos_cmd_(dat.Dual_cmd());
dual_tkn = null;
}
}
public void EndFrame(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int cur_pos, boolean skip_cancel_if_lnki_and_apos) {
int state = dat.State();
if (state == 0) {Reset(); return;}
if (bold_count % 2 == 1 && ital_count % 2 == 1) ConvertBoldToItal(ctx, src);
state = dat.State();
int closeCmd = 0, closeTyp = 0;
if (state == 0) {Reset(); return;} // all closed: return
byte cur_tkn_tid = ctx.Cur_tkn_tid();
Xop_apos_tkn prv = Previous_bgn(stack, closeTyp);
if ( skip_cancel_if_lnki_and_apos // NOTE: if \n or tblw
&& cur_tkn_tid == Xop_tkn_itm_.Tid_lnki // and cur scope is lnki
// && prv.Ctx_tkn_tid() != Xop_tkn_itm_.Tid_lnki // but apos_bgn is not lnki; NOTE: disabled on 2013-11-10
)
return; // don't end frame
switch (state) {
case Xop_apos_tkn_.State_i: closeTyp = Xop_apos_tkn_.Typ_ital; closeCmd = Xop_apos_tkn_.Cmd_i_end; break;
case Xop_apos_tkn_.State_b: closeTyp = Xop_apos_tkn_.Typ_bold; closeCmd = Xop_apos_tkn_.Cmd_b_end; break;
case Xop_apos_tkn_.State_dual:
case Xop_apos_tkn_.State_ib: closeTyp = Xop_apos_tkn_.Typ_dual; closeCmd = Xop_apos_tkn_.Cmd_bi_end; break;
case Xop_apos_tkn_.State_bi: closeTyp = Xop_apos_tkn_.Typ_dual; closeCmd = Xop_apos_tkn_.Cmd_ib_end; break;
}
ctx.Msg_log().Add_itm_none(Xop_apos_log.Dangling_apos, src, prv.Src_bgn(), cur_pos);
ctx.Subs_add(root, ctx.Tkn_mkr().Apos(cur_pos, cur_pos, 0, closeTyp, closeCmd, 0));
Reset();
}
private void ConvertBoldToItal(Xop_ctx ctx, byte[] src) {
Xop_apos_tkn idxNeg1 = null, idxNeg2 = null, idxNone = null; // look at previous tkn for spaces; EX: "a '''" -> idxNeg1; " a'''" -> idxNeg2; "ab'''" -> idxNone
int tknsLen = stack.Count();
for (int i = 0; i < tknsLen; i++) {
Xop_apos_tkn apos = (Xop_apos_tkn)stack.Get_at(i);
if (apos.Apos_tid() != Xop_apos_tkn_.Typ_bold) continue; // only look for bold
int tknBgn = apos.Src_bgn();
boolean idxNeg1Space = tknBgn > 0 && src[tknBgn - 1] == Byte_ascii.Space;
boolean idxNeg2Space = tknBgn > 1 && src[tknBgn - 2] == Byte_ascii.Space;
if (idxNeg1 == null && idxNeg1Space) {idxNeg1 = apos;}
else if (idxNeg2 == null && idxNeg2Space) {idxNeg2 = apos;}
else if (idxNone == null && !idxNeg1Space && !idxNeg2Space) {idxNone = apos;}
}
if (idxNeg2 != null) ConvertBoldToItal(ctx, src, idxNeg2); // 1st single letter word
else if (idxNone != null) ConvertBoldToItal(ctx, src, idxNone); // 1st multi letter word
else if (idxNeg1 != null) ConvertBoldToItal(ctx, src, idxNeg1); // everything else
// now recalc all cmds for stack
dat.State_clear();
for (int i = 0; i < tknsLen; i++) {
Xop_apos_tkn apos = (Xop_apos_tkn)stack.Get_at(i);
dat.Ident(ctx, src, apos.Apos_tid(), apos.Src_end()); // NOTE: apos.Typ() must map to apos_len
int newCmd = dat.Cmd();
if (newCmd == apos.Apos_cmd()) continue;
apos.Apos_cmd_(newCmd);
}
}
private void ConvertBoldToItal(Xop_ctx ctx, byte[] src, Xop_apos_tkn oldTkn) {
ctx.Msg_log().Add_itm_none(Xop_apos_log.Bold_converted_to_ital, src, oldTkn.Src_bgn(), oldTkn.Src_end());
oldTkn.Apos_tid_(Xop_apos_tkn_.Typ_ital).Apos_cmd_(Xop_apos_tkn_.Cmd_i_bgn).Apos_lit_(oldTkn.Apos_lit() + 1);// NOTE: Cmd_i_bgn may be overridden later
}
private void Reset() {
bold_count = ital_count = 0;
dual_tkn = null;
stack.Clear();
dat.State_clear();
}
private static Xop_apos_tkn Previous_bgn(List_adp stack, int typ) {
int stack_len = stack.Count();
for (int i = stack_len - 1; i > -1; --i) {
Xop_apos_tkn apos = (Xop_apos_tkn)stack.Get_at(i);
int cmd = apos.Apos_cmd();
switch (typ) {
case Xop_apos_tkn_.Typ_ital:
switch (cmd) {
case Xop_apos_tkn_.Cmd_i_bgn:
case Xop_apos_tkn_.Cmd_ib_bgn:
case Xop_apos_tkn_.Cmd_bi_bgn:
case Xop_apos_tkn_.Cmd_ib_end__i_bgn:
case Xop_apos_tkn_.Cmd_b_end__i_bgn:
return apos;
}
break;
case Xop_apos_tkn_.Typ_bold:
switch (cmd) {
case Xop_apos_tkn_.Cmd_b_bgn:
case Xop_apos_tkn_.Cmd_ib_bgn:
case Xop_apos_tkn_.Cmd_bi_bgn:
case Xop_apos_tkn_.Cmd_bi_end__b_bgn:
case Xop_apos_tkn_.Cmd_i_end__b_bgn:
return apos;
}
break;
default: // NOTE: this is approximate; will not be exact in most dual situations; EX: <b>a<i>b will return <i>; should return <b> and <i>
switch (cmd) {
case Xop_apos_tkn_.Cmd_b_bgn:
case Xop_apos_tkn_.Cmd_i_bgn:
case Xop_apos_tkn_.Cmd_ib_bgn:
case Xop_apos_tkn_.Cmd_bi_bgn:
case Xop_apos_tkn_.Cmd_bi_end__b_bgn:
case Xop_apos_tkn_.Cmd_i_end__b_bgn:
case Xop_apos_tkn_.Cmd_ib_end__i_bgn:
case Xop_apos_tkn_.Cmd_b_end__i_bgn:
return apos;
}
break;
}
}
return null;
}
}

View File

@@ -0,0 +1,159 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
import gplx.xowa.parsers.lists.*;
public class Xop_apos_wkr_tst {
private Xop_fxt fxt = new Xop_fxt();
@Test public void Basic() {
fxt.Test_parse_page_wiki("''a''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn) , fxt.tkn_txt_(2, 3), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
fxt.Test_parse_page_wiki("'''a'''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn) , fxt.tkn_txt_(3, 4), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
fxt.Test_parse_page_wiki("'''''a'''''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn) , fxt.tkn_txt_(5, 6), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_end));
}
@Test public void Advanced() {
fxt.Test_parse_page_wiki("''''a''''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn).Apos_lit_(1) , fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end).Apos_lit_(1)); // 1 apos + bold
fxt.Test_parse_page_wiki("''''''''a''''''''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn).Apos_lit_(3) , fxt.tkn_txt_(), fxt.tkn_apos_( Xop_apos_tkn_.Cmd_bi_end).Apos_lit_(3)); // 3 apos + dual
}
@Test public void Combo() {
fxt.Test_parse_page_wiki("''a'''b'''c''", fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // b{i}
fxt.Test_parse_page_wiki("'''a''b''c'''", fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)); // i{b}
fxt.Test_parse_page_wiki("''a''b'''c'''", fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)); // b_i
}
@Test public void Assume_apos() {
fxt.Test_parse_page_wiki("a01'''b01 '''c0 1'''d01''" // pick c0 1, b/c it is idxNeg2
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)
, fxt.tkn_txt_(), fxt.tkn_space_(), fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn)
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // idx_neg2
fxt.Test_parse_page_wiki("a01 '''b01 '''c01'''d01''" // pick c01, b/c it is idxNone
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn)
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // idx_none
fxt.Test_parse_page_wiki("a01 '''b01 '''c01 '''d01''" // pick a01 , b/c it is idxNeg1
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn)
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // idx_neg1
fxt.Test_parse_page_wiki("a''''b''" // strange outlier condition
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Apos_lit_(2)
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // 4 apos -> 2 apos + ital
}
@Test public void Dual() {
fxt.Test_parse_page_wiki("'''''a'''b''" // +ib -b -i; 5apos defaults to ib
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
fxt.Test_parse_page_wiki("'''''a''b'''" // +bi -i -b; change 5apos to bi
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
fxt.Test_parse_page_wiki("''b'''''c'''" // 5q toggles ital n, bold y
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end__b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
}
@Test public void Unclosed() {
fxt.Test_parse_page_wiki("''a"
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
fxt.Test_parse_page_wiki("'''a"
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
fxt.Test_parse_page_wiki("'''''a"
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_end));
}
@Test public void Outliers() {
fxt.Test_parse_page_wiki("''a'''b'''c'''" // '''b -> ' +i b
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Apos_lit_(1)
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
fxt.Test_parse_page_wiki("''a'''b''c''" // '''b -> ' +i b; double check with closing itals
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Apos_lit_(1)
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
fxt.Test_parse_page_wiki("''a'''b''c" // ''c -> -bi + b
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_end__b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
}
@Test public void MultiLines() {
fxt.Test_parse_page_wiki("a''b\nc''d"
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(3, 4), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_nl_char_len1_(4)
, fxt.tkn_txt_(5, 6), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
}
@Test public void Lnki() {
fxt.Test_parse_page_wiki_str("[[''a''']]", "<a href=\"/wiki/%27%27a%27%27%27\">''a'''</a>");
}
@Test public void Dual_exceptions() {
fxt.Test_parse_page_wiki("'''''a''b''"
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_end)
);
}
@Test public void Mix_list_autoClose() {
fxt.Test_parse_page_wiki("''a\n*b"
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Src_rng_(0, 2)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Src_rng_(3, 3)
, fxt.tkn_list_bgn_(3, 5, Xop_list_tkn_.List_itmTyp_ul)
, fxt.tkn_txt_(5, 6)
, fxt.tkn_list_end_(6)
);
}
@Test public void Mix_hr_autoClose() {
fxt.Test_parse_page_wiki("''a\n----"
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Src_rng_(0, 2)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Src_rng_(3, 3)
, fxt.tkn_para_blank_(3)
, fxt.tkn_hr_(3, 8)
);
}
@Test public void Mix_hdr_autoClose() {
fxt.Test_parse_page_wiki("''a\n==b=="
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Src_rng_(0, 2)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Src_rng_(3, 3)
, fxt.tkn_hdr_(3, 9, 2).Subs_
( fxt.tkn_txt_(6, 7)
));
}
@Test public void Apos_broken_by_tblw_th() { // DATE:2013-04-24
fxt.Test_parse_page_all_str("A ''[[b!!]]'' c", "A <i><a href=\"/wiki/B!!\">b!!</a></i> c");
}
@Test public void Nowiki() { // PAGE:en.w:Wiki; DATE:2013-05-13
fxt.Test_parse_page_all_str("<nowiki>''a''</nowiki>", "''a''");
}
@Test public void Lnki_multi_line() { // PURPOSE: handle apos within multi-line lnki caption; DATE:2013-11-10
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "[[A|b '' c"
, "d '' e ]]"
)
, "<a href=\"/wiki/A\">b <i> c d </i> e</a>"); // NOTE: c d should be italicized, not c e (latter occurs when apos is ended on each line)
}
@Test public void French() { // PURPOSE: L'''A'' -> L'<i>A</i>; DATE:2014-01-06
fxt.Test_parse_page_all_str("L''''A'''", "L'<b>A</b>");
fxt.Test_parse_page_all_str("L'''A''", "L'<i>A</i>");
}
// @Test public void Mix_lnke() { // FUTURE: requires rewrite of apos
// fxt.Test_parse_page_wiki("''a[irc://b c''d''e]f''"
// , fxt.tkn_apos_(0, 2, Xop_apos_tkn_.Cmd_i_bgn)
// , fxt.tkn_txt_(2, 3)
// , fxt.tkn_lnke_(3, 20).Subs_add_ary
// ( fxt.tkn_txt_(12, 13)
// , fxt.tkn_apos_(13, 15, Xop_apos_tkn_.Cmd_i_bgn)
// , fxt.tkn_txt_(15, 16)
// , fxt.tkn_apos_(16, 18, Xop_apos_tkn_.Cmd_i_end)
// , fxt.tkn_txt_(18, 19)
// )
// , fxt.tkn_txt_(20, 21)
// , fxt.tkn_apos_(21, 23, Xop_apos_tkn_.Cmd_i_bgn)
// );
// }
}
/*
*/

View File

@@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_hdr_log {
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "hdr");
public static final Gfo_msg_itm
Dangling_hdr = Gfo_msg_itm_.new_warn_(owner, "dangling_hdr")
, Mismatched = Gfo_msg_itm_.new_warn_(owner, "mismatched")
, Len_1 = Gfo_msg_itm_.new_warn_(owner, "len_1")
, Len_7_or_more = Gfo_msg_itm_.new_warn_(owner, "len_7_or_more")
;
}

View File

@@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
public class Xop_hdr_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_hdr;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Hook_bgn, this);} static final byte[] Hook_bgn = new byte[] {Byte_ascii.Nl, Byte_ascii.Eq};
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Hdr().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
public static final Xop_hdr_lxr _ = new Xop_hdr_lxr(); Xop_hdr_lxr() {}
public static final byte Hook = Byte_ascii.Eq;
}

View File

@@ -0,0 +1,33 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_hdr_tkn extends Xop_tkn_itm_base {
public Xop_hdr_tkn(int bgn, int end, int hdr_len) {this.Tkn_ini_pos(false, bgn, end); this.hdr_len = hdr_len;}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_hdr;}
public int Hdr_len() {return hdr_len;} public Xop_hdr_tkn Hdr_len_(int v) {hdr_len = v; return this;} private int hdr_len = -1;
public int Hdr_bgn_manual() {return hdr_bgn_manual;} public Xop_hdr_tkn Hdr_bgn_manual_(int v) {hdr_bgn_manual = v; return this;} private int hdr_bgn_manual;
public int Hdr_end_manual() {return hdr_end_manual;} public Xop_hdr_tkn Hdr_end_manual_(int v) {hdr_end_manual = v; return this;} private int hdr_end_manual;
public boolean Hdr_html_first() {return hdr_html_first;} public Xop_hdr_tkn Hdr_html_first_y_() {hdr_html_first = true; return this;} private boolean hdr_html_first;
public int Hdr_html_dupe_idx() {return hdr_html_dupe_idx;} private int hdr_html_dupe_idx;
public byte[] Hdr_toc_text() {return hdr_toc_text;} public Xop_hdr_tkn Hdr_toc_text_(byte[] v) {hdr_toc_text = v; return this;} private byte[] hdr_toc_text;
public int Hdr_html_dupe_idx_next() {
hdr_html_dupe_idx = hdr_html_dupe_idx == 0 ? 2 : hdr_html_dupe_idx + 1;
return hdr_html_dupe_idx;
}
public byte[] Hdr_html_id() {return hdr_html_id;} public Xop_hdr_tkn Hdr_html_id_(byte[] v) {hdr_html_id = v; return this;} private byte[] hdr_html_id = Bry_.Empty;
}

View File

@@ -0,0 +1,33 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_hdr_tkn_chkr extends Xop_tkn_chkr_base {
@Override public Class<?> TypeOf() {return Xop_hdr_tkn.class;}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_hdr;}
public int Hdr_len() {return hdr_len;} public Xop_hdr_tkn_chkr Hdr_len_(int v) {hdr_len = v; return this;} private int hdr_len = -1;
public int Hdr_ws_bgn() {return hdr_ws_bgn;} public Xop_hdr_tkn_chkr Hdr_ws_bgn_(int v) {hdr_ws_bgn = v; return this;} private int hdr_ws_bgn = -1;
public int Hdr_ws_end() {return hdr_ws_end;} public Xop_hdr_tkn_chkr Hdr_ws_end_(int v) {hdr_ws_end = v; return this;} private int hdr_ws_end = -1;
public int Hdr_ws_trailing() {return hdr_ws_trailing;} public Xop_hdr_tkn_chkr Hdr_ws_trailing_(int v) {hdr_ws_trailing = v; return this;} private int hdr_ws_trailing = -1;
public Xop_hdr_tkn_chkr Hdr_html_id_(String v) {hdr_html_id = Bry_.new_a7(v); return this;} private byte[] hdr_html_id = Bry_.Empty;
@Override public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {
Xop_hdr_tkn actl = (Xop_hdr_tkn)actl_obj;
err += mgr.Tst_val(hdr_len == -1, path, "hdr_len", hdr_len, actl.Hdr_len());
err += mgr.Tst_val(hdr_html_id == Bry_.Empty, path, "hdr_html_id", String_.new_a7(hdr_html_id), String_.new_a7(actl.Hdr_html_id()));
return err;
}
}

View File

@@ -0,0 +1,123 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_hdr_wkr implements Xop_ctx_wkr {
public void Ctor_ctx(Xop_ctx ctx) {}
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
public void AutoClose(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {
// bgn never closed; mark inert; EX: "==a"
Xop_hdr_tkn bgn = (Xop_hdr_tkn)tkn;
int bgn_hdr_len = bgn.Hdr_len();
bgn.Hdr_bgn_manual_(bgn_hdr_len);
bgn.Hdr_len_(0);
if (bgn_hdr_len > 1 && ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki) // NOTE: \n= is not uncommon for templates; ignore them;
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Dangling_hdr, src, bgn.Src_bgn(), bgn_pos);
}
public int Make_tkn_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
if (bgn_pos == Xop_parser_.Doc_bgn_bos) bgn_pos = 0; // do not allow -1 pos
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false);
Close_open_itms(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);
ctx.Para().Process_block__bgn__nl_w_symbol(ctx, root, src, bgn_pos, cur_pos, Xop_xnde_tag_.Tag_h2); // pass h2; should pass h# where # is correct #, but for purpose of Para_wkr, <h2> tag does not matter
int new_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Xop_hdr_lxr.Hook); // count all =
int hdr_len = new_pos - cur_pos + 1; // +1 b/c Hook has 1 eq: "\n="
switch (hdr_len) {
case 1: ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_1, src, bgn_pos, new_pos); break; // <h1>; flag
case 2: case 3: case 4: case 5: case 6: break; // <h2>-<h6>: normal
default: ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_7_or_more, src, bgn_pos, new_pos); break; // <h7>+; limit to 6; flag; NOTE: only 14 pages in 2011-07-27
}
Xop_hdr_tkn tkn = tkn_mkr.Hdr(bgn_pos, new_pos, hdr_len); // make tkn
ctx.StackTkn_add(root, tkn);
return new_pos;
}
public int Make_tkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int stackPos, int end_hdr_len) {// REF.MW: Parser|doHeadings
if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_tmpl_curly_bgn) return ctx.Lxr_make_txt_(cur_pos);
Xop_hdr_tkn hdr = (Xop_hdr_tkn)ctx.Stack_pop_til(root, src, stackPos, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_hdr);
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false); // end any apos; EX: ==''a==
int hdr_len = hdr.Hdr_len(), bgn_manual = 0, end_manual = 0;
boolean dirty = false;
if (end_hdr_len < hdr_len) { // mismatch: end has more; adjust hdr
bgn_manual = hdr_len - end_hdr_len;
hdr_len = end_hdr_len;
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Mismatched, src, bgn_pos, cur_pos);
if (hdr_len == 1) ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_1, src, bgn_pos, cur_pos);
dirty = true;
}
else if (end_hdr_len > hdr_len) { // mismatch: hdr has more; adjust variables
end_manual = end_hdr_len - hdr_len;
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Mismatched, src, bgn_pos, cur_pos);
dirty = true;
}
if (hdr_len > 6) { // <h7>+; limit to 6; NOTE: both bgn/end are equal length; EX: bgn=8,end=7 -> bgn=7,end=7;bgn_manual=1
bgn_manual = end_manual = hdr_len - 6;
hdr_len = 6;
dirty = true;
}
if (dirty)
hdr.Hdr_bgn_manual_(bgn_manual).Hdr_end_manual_(end_manual).Hdr_len_(hdr_len);
cur_pos = Find_fwd_while_ws_hdr_version(src, cur_pos, src_len); // NOTE: hdr gobbles up trailing ws; EX: "==a== \n\t \n \nb" gobbles up all 3 "\n"s; otherwise para_wkr will process <br/>
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_h2);
hdr.Subs_move(root);
hdr.Src_end_(cur_pos);
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki)
ctx.Cur_page().Hdr_mgr().Add(ctx, hdr, src);
return cur_pos;
}
private void Close_open_itms(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
int stack_pos = -1, stack_len = ctx.Stack_len(); boolean stop = false;
for (int i = 0; i < stack_len; i++) { // loop over stack
Xop_tkn_itm prv_tkn = ctx.Stack_get(i);
switch (prv_tkn.Tkn_tid()) { // find first list/hdr; close everything until this
case Xop_tkn_itm_.Tid_list:
case Xop_tkn_itm_.Tid_hdr:
stack_pos = i; stop = true; break;
}
if (stop) break;
}
if (stack_pos == -1) return;
ctx.Stack_pop_til(root, src, stack_pos, true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_hdr);
}
private static int Find_fwd_while_ws_hdr_version(byte[] src, int cur, int end) {
int last_nl = -1;
while (true) {
if (cur == end) return cur;
byte b = src[cur];
switch (b) {
case Byte_ascii.Nl:
cur++;
last_nl = cur;
break;
case Byte_ascii.Space:
case Byte_ascii.Tab:
cur++;
break;
default:
return last_nl == -1 ? cur : last_nl - 1;
}
}
}
}
/*
NOTE:hdr.trailing_nl
. by design, the hdr_tkn's src_end will not include the trailing \n
.. for example, for "\n==a==\n", the src_bgn will be 0, but the src_end will be 6
.. note that at 6, it does not include the \n at pos 6
. this is needed to leave the \n for the parser to handle other tkns, such as hdrs, tblws, lists.
. for example, in "\n==a==\n*b", if the \n at pos 6 was taken by the hdr_tkn, then the parser would encounter a "*" instead of a "\n*"
*/

View File

@@ -0,0 +1,127 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_hdr_wkr__basic_tst {
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void H2() {fxt.Test_parse_page_wiki_str("==a==" , "<h2>a</h2>\n");}
@Test public void H3() {fxt.Test_parse_page_wiki_str("===a===" , "<h3>a</h3>\n");}
@Test public void H6_limit() {fxt.Test_parse_page_wiki_str("=======a=======" , "<h6>=a=</h6>\n");}
@Test public void Mismatch_bgn() {fxt.Test_parse_page_wiki_str("=====a==" , "<h2>===a</h2>\n");}
@Test public void Mismatch_end() {fxt.Test_parse_page_wiki_str("==a=====" , "<h2>a===</h2>\n");}
@Test public void Dangling() {fxt.Test_parse_page_wiki_str("==a" , "==a");}
@Test public void Comment_bgn() {fxt.Test_parse_page_all_str ("<!--b-->==a==" , "<h2>a</h2>\n");}
@Test public void Comment_end() {fxt.Test_parse_page_all_str ("==a==<!--b-->" , "<h2>a</h2>\n");}
@Test public void Ws_end() { // PURPOSE: "==\n" merges all ws following it; \n\n\n is not transformed by Para_wkr to "<br/>"
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "==a== \t"
, ""
, ""
, ""
, "b"
), String_.Concat_lines_nl_skip_last
( "<h2>a</h2>"
, "b"
));
}
@Test public void Many() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "==a=="
, "===b==="
), String_.Concat_lines_nl_skip_last
( "<h2>a</h2>"
, ""
, "<h3>b</h3>"
, ""
));
}
@Test public void Hdr_w_tblw() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "==a=="
, "{|"
, "|+"
, "|}"
), String_.Concat_lines_nl_skip_last
( "<h2>a</h2>"
, "<table>"
, " <caption>"
, " </caption>"
, "</table>"
, ""
));
}
@Test public void Hdr_w_hr() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "==a=="
, "----"
), String_.Concat_lines_nl_skip_last
( "<h2>a</h2>"
, "<hr/>"
));
}
@Test public void Mix_apos_dangling() {fxt.Test_parse_page_wiki_str("==''a==" , "<h2><i>a</i></h2>\n");}
@Test public void Mix_xnde_dangling() {fxt.Test_parse_page_wiki_str("==<i>a==" , "<h2><i>a</i></h2>\n");}
@Test public void Mix_tblw_cell() {fxt.Test_parse_page_wiki_str("==a!!==" , "<h2>a!!</h2>\n");}
@Test public void Ws() {fxt.Test_parse_page_wiki_str("== a b ==" , "<h2> a b </h2>\n");}
@Test public void Err_hdr() {fxt.Init_log_(Xop_hdr_log.Mismatched) .Test_parse_page_wiki_str("====a== ==" , "<h2>==a== </h2>\n").tst_Log_check();}
@Test public void Err_end_hdr_is_1() {fxt.Init_log_(Xop_hdr_log.Mismatched, Xop_hdr_log.Len_1).Test_parse_page_wiki_str("==a=" , "<h1>=a</h1>\n").tst_Log_check();}
@Test public void Html_hdr_many() {
fxt.Wtr_cfg().Toc__show_(Bool_.Y);
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "==a=="
, "==a=="
, "==a=="
), String_.Concat_lines_nl_skip_last
( "<h2><span class='mw-headline' id='a'>a</span></h2>"
, ""
, "<h2><span class='mw-headline' id='a_2'>a</span></h2>"
, ""
, "<h2><span class='mw-headline' id='a_3'>a</span></h2>"
, ""
));
fxt.Wtr_cfg().Toc__show_(Bool_.N);
}
@Test public void Hdr_inside_dangling_tmpl_fix() { // PURPOSE: one-off fix to handle == inside dangling tmpl; DATE:2014-02-11
fxt.Test_parse_page_all_str("{{a|}\n==b=="
, String_.Concat_lines_nl_skip_last
( "{{a|}"
, ""
, "<h2>b</h2>"
, ""
));
}
@Test public void Pfunc() {// multiple = should not be interpreted as key-val equals; PAGE:en.w:Wikipedia:Picture_of_the_day/June_2014 DATE:2014-07-21
fxt.Test_parse_page_all_str
( "{{#if:exists|==a==|no}}"
, String_.Concat_lines_nl_skip_last
( "<h2>a</h2>"
, ""
));
}
// @Test public void Hdr_inside_dangling_tmpl_fix_2() { // PURPOSE: hdr == inside dangling tmpl; DATE:2014-06-10
// fxt.Init_defn_add("Print", "{{{1}}}");
// fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
// ( "=={{Print|b=="
// , "}}"
// ), String_.Concat_lines_nl_skip_last
// ( "==b="
// , ""
// ));
// }
}

View File

@@ -0,0 +1,56 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_hdr_wkr__div_wrapper_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Basic() { // PURPOSE: basic div_wrapper test; DATE:2015-06-24
fxt.Wtr_cfg().Hdr__div_wrapper_(Bool_.Y);
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "==a=="
, "b"
, "==c=="
, "d"
, "==e=="
, "f"
), String_.Concat_lines_nl_skip_last
( "<h2>a</h2>"
, "<div>"
, ""
, "<p>b"
, "</p>"
, ""
, "</div>"
, "<h2>c</h2>"
, "<div>"
, ""
, "<p>d"
, "</p>"
, ""
, "</div>"
, "<h2>e</h2>"
, "<div>"
, ""
, "<p>f"
, "</p>"
, "</div>"
));
fxt.Wtr_cfg().Hdr__div_wrapper_(Bool_.N);
}
}

View File

@@ -0,0 +1,26 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_hdr_wkr__para_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Hdr_at_bos() { // PURPOSE: check that BOS==a== does not throw null ref in para; DATE:2014-02-18
fxt.Test_parse_page_all_str("==a==", "<h2>a</h2>\n");
}
}

View File

@@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
public class Xop_list_lxr implements Xop_lxr {//20111222
public byte Lxr_tid() {return Xop_lxr_.Tid_list;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {Add_ary(core_trie, this, Xop_list_tkn_.Hook_ul, Xop_list_tkn_.Hook_ol, Xop_list_tkn_.Hook_dt, Xop_list_tkn_.Hook_dd);}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
private void Add_ary(Btrie_fast_mgr core_trie, Object val, byte[]... ary) {for (byte[] itm : ary) core_trie.Add(itm, val);}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.List().MakeTkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
public static final Xop_list_lxr _ = new Xop_list_lxr(); Xop_list_lxr() {}
}

View File

@@ -0,0 +1,32 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_list_tkn extends Xop_tkn_itm_base {
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_list;}
public int List_uid() {return list_uid;} public Xop_list_tkn List_uid_(int v) {list_uid = v; return this;} private int list_uid = -1;
public byte List_bgn() {return list_bgn;} private byte list_bgn;
public byte List_itmTyp() {return list_itmTyp;} public Xop_list_tkn List_itmTyp_(byte v) {list_itmTyp = v; return this;} private byte list_itmTyp = Xop_list_tkn_.List_itmTyp_null;
public int[] List_path() {return path;} public Xop_list_tkn List_path_(int... v) {path = v; return this;} private int[] path = Int_.Ary_empty;
public int List_path_idx() {return path[path.length - 1];}
public boolean List_sub_first() {return List_path_idx() == 0;}
public byte List_sub_last() {return list_sub_last;} public Xop_list_tkn List_sub_last_(byte v) {list_sub_last = v; return this;} private byte list_sub_last = Bool_.__byte;
public static Xop_list_tkn bgn_(int bgn, int end, byte list_itmTyp, int symLen) {return new Xop_list_tkn(bgn, end, Bool_.Y_byte, list_itmTyp);}
public static Xop_list_tkn end_(int pos, byte list_itmTyp) {return new Xop_list_tkn(pos, pos, Bool_.N_byte, list_itmTyp);}
public Xop_list_tkn(int bgn, int end, byte bgnEndType, byte list_itmTyp) {this.Tkn_ini_pos(false, bgn, end); this.list_bgn = bgnEndType; this.list_itmTyp = list_itmTyp;}
public static final Xop_list_tkn Null = new Xop_list_tkn(); Xop_list_tkn() {}
}

View File

@@ -0,0 +1,54 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_list_tkn_ {
public static final byte[]
Hook_ul = new byte[] {Byte_ascii.Nl, Byte_ascii.Star}, Hook_ol = new byte[] {Byte_ascii.Nl, Byte_ascii.Hash}
, Hook_dt = new byte[] {Byte_ascii.Nl, Byte_ascii.Semic} , Hook_dd = new byte[] {Byte_ascii.Nl, Byte_ascii.Colon};
public static final byte List_itmTyp_null = 0, List_itmTyp_ul = Byte_ascii.Star, List_itmTyp_ol = Byte_ascii.Hash, List_itmTyp_dt = Byte_ascii.Semic, List_itmTyp_dd = Byte_ascii.Colon;
public static final String Str_li = "li", Str_ol = "ol", Str_ul = "ul", Str_dl = "dl", Str_dt = "dt", Str_dd = "dd";
public static final byte[] Byt_li = Bry_.new_a7(Str_li), Byt_ol = Bry_.new_a7(Str_ol), Byt_ul = Bry_.new_a7(Str_ul)
, Byt_dl = Bry_.new_a7(Str_dl), Byt_dt = Bry_.new_a7(Str_dt), Byt_dd = Bry_.new_a7(Str_dd);
public static byte[] XmlTag_lst(byte b) {
switch (b) {
case List_itmTyp_ul: return Byt_ul;
case List_itmTyp_ol: return Byt_ol;
case List_itmTyp_dt:
case List_itmTyp_dd: return Byt_dl;
default: throw Exc_.new_unhandled(b);
}
}
public static byte[] XmlTag_itm(byte b) {
switch (b) {
case List_itmTyp_ul:
case List_itmTyp_ol: return Byt_li;
case List_itmTyp_dt: return Byt_dt;
case List_itmTyp_dd: return Byt_dd;
default: throw Exc_.new_unhandled(b);
}
}
public static byte Char_lst(byte b) {
switch (b) {
case List_itmTyp_ul: return Byte_ascii.Star;
case List_itmTyp_ol: return Byte_ascii.Hash;
case List_itmTyp_dt: return Byte_ascii.Semic;
case List_itmTyp_dd: return Byte_ascii.Colon;
default: throw Exc_.new_unhandled(b);
}
}
}

View File

@@ -0,0 +1,36 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_list_tkn_chkr extends Xop_tkn_chkr_base {
@Override public Class<?> TypeOf() {return Xop_list_tkn.class;}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_list;}
public int List_uid() {return list_uid;} public Xop_list_tkn_chkr List_uid_(int v) {list_uid = v; return this;} private int list_uid = -1;
public byte List_bgn() {return list_bgn;} public Xop_list_tkn_chkr List_bgn_(byte v) {list_bgn = v; return this;} private byte list_bgn;
public byte List_itmTyp() {return list_itmTyp;} public Xop_list_tkn_chkr List_itmTyp_(byte v) {list_itmTyp = v; return this;} private byte list_itmTyp = Xop_list_tkn_.List_itmTyp_null;
public int[] List_path() {return list_path;} public Xop_list_tkn_chkr List_path_(int... v) {list_path = v; return this;} private int[] list_path = Int_.Ary_empty;
public byte List_sub_last() {return list_sub_last;} public Xop_list_tkn_chkr List_sub_last_(byte v) {list_sub_last = v; return this;} private byte list_sub_last = Bool_.__byte;
@Override public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {
Xop_list_tkn actl = (Xop_list_tkn)actl_obj;
err += mgr.Tst_val(list_uid == -1, path, "list_uid", list_uid, actl.List_uid());
err += mgr.Tst_val(list_bgn == 0, path, "list_bgn", list_bgn, actl.List_bgn());
err += mgr.Tst_val(list_itmTyp == Xop_list_tkn_.List_itmTyp_null, path, "list_itmTyp", list_itmTyp, actl.List_itmTyp());
err += mgr.Tst_val(list_sub_last == Bool_.__byte, path, "list_sub_last", list_sub_last, actl.List_sub_last());
err += mgr.Tst_val(list_path == Int_.Ary_empty, path, "list_path", Array_.XtoStr(list_path), Array_.XtoStr(actl.List_path()));
return err;
}
}

View File

@@ -0,0 +1,185 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.parsers.tblws.*;
public class Xop_list_wkr implements Xop_ctx_wkr {
private int listId = 0; byte[] curSymAry = new byte[Max_list_depth]; int curSymLen = 0; byte[] prvSymAry = Bry_.Empty;
private HierPosAryBldr posBldr = new HierPosAryBldr(Max_list_depth);
private boolean SymAry_fill_overflow;
public void Ctor_ctx(Xop_ctx ctx) {}
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {Reset(0);}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
public boolean List_dirty() {return posBldr.Dirty();}
public boolean Dd_chk() {return dd_chk;} public Xop_list_wkr Dd_chk_(boolean v) {dd_chk = v; return this;} private boolean dd_chk;
public void AutoClose(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {
// NOTE: list_tkns can not be explicitly closed, so auto-close will happen for all items
MakeTkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, (Xop_list_tkn)tkn, Bool_.Y_byte);
Reset(listId + 1);
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_ul);
}
public int MakeTkn_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {// REF.MW: Parser|doBlockLevels
if (bgn_pos == Xop_parser_.Doc_bgn_bos) bgn_pos = 0; // do not allow -1 pos
// pop hdr if exists; EX: \n== a ==\n*b; \n* needs to close hdr
int acsPos = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_hdr);
if (acsPos != -1) ctx.Stack_pop_til(root, src, acsPos, true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_list);
// close apos
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false);
byte symByt = src[cur_pos - 1]; // -1 b/c symByt is byte before curByt; EX: \n*a; cur_pos is at a; want to get *
int prvSymLen = curSymLen;
cur_pos = SymAry_fill(src, cur_pos, src_len, symByt);
symByt = src[cur_pos - 1]; // NOTE: get symByt again b/c cur_pos may have changed; EX: "#*"; # may have triggered list, but last symByt should be *
if (SymAry_fill_overflow) return ctx.Lxr_make_txt_(cur_pos);
PrvItm_compare();
ctx.Para().Process_block__bgn__nl_w_symbol(ctx, root, src, bgn_pos, cur_pos - 1, Xop_xnde_tag_.Tag_li); // -1 b/c cur_pos includes sym_byte; EX: \n*; pass li; should pass correct tag, but for purposes of para_wkr, <li> doesn't matter
if (prvSymMatch) {
PopTil(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, Bool_.N_byte);
posBldr.MoveNext();
prvSymAry = Xop_list_wkr_.MakeSymAry(curSymAry, curSymLen);
Xop_list_tkn prvItm = tkn_mkr.List_bgn(bgn_pos, cur_pos, curSymAry[curSymLen - 1], curSymLen).List_path_(posBldr.XtoIntAry()).List_uid_(listId);
ctx.Subs_add_and_stack(root, prvItm);
ctx.Empty_ignored_y_();
}
else {
for (int i = prvSymLen; i > commonSymLen; i--) { // close all discontinued itms: EX: ##\n#\n
PopTil(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, Bool_.Y_byte);
posBldr.MoveUp();
}
if (commonSymLen == 0 && prvSymLen != 0) { // nothing in common; reset list
listId++;
posBldr.Init();
}
if (curSymLen == commonSymLen) { // add another itm if continuing; EX: #\n#\n
PopTil(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, Bool_.N_byte);
if ((prvSymLen - curSymLen) > 0 // moving up many levels; do not open new list; just MoveNext; EX: #1\n###3\n##2
&& curSymLen != 1) { // do not moveNext if at level 1; this has to do with strange incrementing logic in posBldr at rootLvl
posBldr.MoveNext();
}
else {
posBldr.MoveUp(); posBldr.MoveDown();
}
prvSymAry = Xop_list_wkr_.MakeSymAry(curSymAry, curSymLen);
symByt = src[cur_pos - 1];
Xop_list_tkn prvItm = tkn_mkr.List_bgn(bgn_pos, cur_pos, symByt, curSymLen).List_path_(posBldr.XtoIntAry()).List_uid_(listId);
ctx.Subs_add_and_stack(root, prvItm);
ctx.Empty_ignored_y_();
}
for (int i = commonSymLen; i < curSymLen; i++) { // open new itms; EX: #\n##\n
posBldr.MoveDown();
symByt = curSymAry[i];
prvSymAry = Xop_list_wkr_.MakeSymAry(curSymAry, curSymLen);
Xop_list_tkn prvItm = tkn_mkr.List_bgn(bgn_pos, cur_pos, symByt, i + List_adp_.Base1).List_path_(posBldr.XtoIntAry()).List_uid_(listId);
ctx.Subs_add_and_stack(root, prvItm);
ctx.Empty_ignored_y_();
}
}
if (allDd && cur_pos < src_len - 2 && src[cur_pos] == '{' && src[cur_pos + 1] == '|') // NOTE: if indent && next == {| then invoke table; EX: ":::{|"
return ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, cur_pos, cur_pos + 2, false, Xop_tblw_wkr.Tblw_type_tb, Xop_tblw_wkr.Called_from_list, -1, -1); // NOTE: ws_enabled must be set to true; see test for Adinkras; Cato the Elder
else {
dd_chk = symByt == Xop_list_tkn_.List_itmTyp_dt;
return cur_pos;
}
}
public void MakeTkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_list_tkn bgn, byte sub_last) {
// boolean empty_ignored = ctx.Empty_ignored(); // commented; see below; DATE:2014-06-24
Xop_tkn_itm end_tkn = tkn_mkr.List_end(bgn_pos, bgn.List_itmTyp()).List_path_(bgn.List_path()).List_uid_(listId).List_sub_last_(sub_last);
ctx.Subs_add(root, end_tkn);
// if (empty_ignored) ctx.Empty_ignore(root, bgn.Tkn_sub_idx()); // commented; code was incorrectly deactivating "*a" when "<li>" encountered; PAGE:en.w:Bristol_Bullfinch DATE:2014-06-24
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_ul);
}
private Xop_list_tkn PopTil(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, byte subLast) {
int acs_pos = ctx.Stack_idx_find_but_stop_at_tbl(Xop_tkn_itm_.Tid_list);
if (acs_pos == -1) return null;
Xop_list_tkn rv = (Xop_list_tkn)ctx.Stack_pop_til(root, src, acs_pos, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_list);
MakeTkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, rv, subLast);
return rv;
}
private void PrvItm_compare() {
int prvSymLen = prvSymAry.length;
prvSymMatch = curSymLen == prvSymLen; commonSymLen = 0;
for (int i = 0; i < curSymLen; i++) {
if (i < prvSymLen && (Xop_list_wkr_.Compare_normalize(curSymAry[i]) == Xop_list_wkr_.Compare_normalize(prvSymAry[i]))) {
commonSymLen = i + 1;
}
else {
prvSymMatch = false;
break;
}
}
} boolean prvSymMatch; int commonSymLen = 0; boolean allDd = false;
private int SymAry_fill(byte[] src, int cur_pos, int src_len, byte curByt) {
curSymLen = 0;
curSymAry[curSymLen++] = curByt;
allDd = true;
boolean loop = true;
SymAry_fill_overflow = false;
while (loop) {
if (cur_pos == src_len) break;
if (curSymLen == Max_list_depth) { // WORKAROUND: xowa imposes max list depth of 256; MW is unlimited; may change for future release but 256 should accomodate all real-world usages
boolean stop = false;
for (int i = cur_pos; i < src_len; i++) {
curByt = src[i];
switch (curByt) {
case Byte_ascii.Star:
case Byte_ascii.Hash:
case Byte_ascii.Semic:
case Byte_ascii.Colon:
cur_pos = i;
break;
default:
stop = true;
break;
}
if (stop) break;
}
for (int i = 0; i < Max_list_depth; i++)
curSymAry[i] = Byte_ascii.Nil;
curSymLen = 0;
SymAry_fill_overflow = true;
return cur_pos;
}
curByt = src[cur_pos];
switch (curByt) {
case Byte_ascii.Star:
case Byte_ascii.Hash:
case Byte_ascii.Semic:
curSymAry[curSymLen++] = curByt;
cur_pos++;
allDd = false;
break;
case Byte_ascii.Colon:
curSymAry[curSymLen++] = curByt;
cur_pos++;
break;
default:
loop = false;
break;
}
}
return cur_pos;
}
private void Reset(int newListId) {
posBldr.Init();
curSymLen = 0;
prvSymAry = Bry_.Empty;
dd_chk = false;
listId = newListId;
}
public static final int Max_list_depth = 256;
}

View File

@@ -0,0 +1,54 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_list_wkr_ {
public static byte[] MakeSymAry(byte[] curSymAry, int curSymLen) {
byte[] rv = new byte[curSymLen];
for (int i = 0; i < curSymLen; i++)
rv[i] = curSymAry[i];
return rv;
}
public static byte Compare_normalize(byte b) { // convert : to ; for sake of determining levels; EX: ";:" is actually same group
switch (b) {
case Byte_ascii.Star:
case Byte_ascii.Hash:
case Byte_ascii.Semic: return b;
case Byte_ascii.Colon: return Byte_ascii.Semic;
default: throw Exc_.new_unhandled(b);
}
}
public static void Close_list_if_present(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int bgn_pos, int cur_pos) {// close all list tkns on stack; EX: ***\n should close all 3 stars; used to only close 1
if (ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tmpl_invk) != Xop_ctx.Stack_not_found) return; // list is inside template; do not close;
int acs_pos = -1, acs_len = ctx.Stack_len();
for (int i = acs_len - 1; i > -1; i--) { // loop backwards until earliest list tkn
byte cur_acs_tid = ctx.Stack_get(i).Tkn_tid();
switch (cur_acs_tid) {
case Xop_tkn_itm_.Tid_tblw_tb:
case Xop_tkn_itm_.Tid_tblw_tc:
case Xop_tkn_itm_.Tid_tblw_te:
case Xop_tkn_itm_.Tid_tblw_td:
case Xop_tkn_itm_.Tid_tblw_th:
case Xop_tkn_itm_.Tid_tblw_tr: i = -1; break; // tblw: stop loop; do not close a list above tbl; EX: ": {| |- *a |b }" should not close ":"; stops at "|-"
case Xop_tkn_itm_.Tid_list: acs_pos = i; break; // list: update acs_pos
default: break; // else: keep looping
}
}
if (acs_pos == Xop_ctx.Stack_not_found) return; // no list tokens found; exit
ctx.Stack_pop_til(root, src, acs_pos, true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_list);
}
}

View File

@@ -0,0 +1,337 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_list_wkr_basic_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void List_1() {
fxt.Test_parse_page_wiki("\n*a"
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_list_end_(3).List_path_(0).List_uid_(0)
);
}
@Test public void Bos() {
fxt.Test_parse_page_wiki("*a"
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(1, 2)
, fxt.tkn_list_end_(2).List_path_(0).List_uid_(0)
);
}
@Test public void List_1_2() {
fxt.Test_parse_page_wiki("\n*a\n**b"
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
, fxt.tkn_txt_(6, 7)
, fxt.tkn_list_end_(7).List_path_(0, 0)
, fxt.tkn_list_end_(7).List_path_(0)
);
}
@Test public void List_1_2_2() {
fxt.Test_parse_page_wiki("\n*a\n**b\n**c"
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
, fxt.tkn_txt_(6, 7)
, fxt.tkn_list_end_(7).List_path_(0, 0)
, fxt.tkn_list_bgn_(7, 10, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 1).List_uid_(0)
, fxt.tkn_txt_(10, 11)
, fxt.tkn_list_end_(11).List_path_(0, 1)
, fxt.tkn_list_end_(11).List_path_(0)
);
}
@Test public void List_1_2_3() {
fxt.Test_parse_page_wiki("\n*a\n**b\n***c"
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
, fxt.tkn_txt_(6, 7)
, fxt.tkn_list_bgn_(7, 11, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0, 0).List_uid_(0)
, fxt.tkn_txt_(11, 12)
, fxt.tkn_list_end_(12).List_path_(0, 0, 0)
, fxt.tkn_list_end_(12).List_path_(0, 0)
, fxt.tkn_list_end_(12).List_path_(0)
);
}
@Test public void List_2() {
fxt.Test_parse_page_wiki("\n**a"
, fxt.tkn_list_bgn_(0, 3, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_list_bgn_(0, 3, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
, fxt.tkn_txt_(3, 4)
, fxt.tkn_list_end_(4).List_path_(0, 0)
, fxt.tkn_list_end_(4).List_path_(0)
);
}
@Test public void List_1_3() {
fxt.Test_parse_page_wiki("\n*a\n***b"
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_list_bgn_(3, 7, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
, fxt.tkn_list_bgn_(3, 7, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0, 0).List_uid_(0)
, fxt.tkn_txt_(7, 8)
, fxt.tkn_list_end_(8).List_path_(0, 0, 0)
, fxt.tkn_list_end_(8).List_path_(0, 0)
, fxt.tkn_list_end_(8).List_path_(0)
);
}
@Test public void List_1_2_1() {
fxt.Test_parse_page_wiki("\n*a\n**b\n*c"
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
, fxt.tkn_txt_(6, 7)
, fxt.tkn_list_end_(7).List_path_(0, 0)
, fxt.tkn_list_end_(7).List_path_(0)
, fxt.tkn_list_bgn_(7, 9, Xop_list_tkn_.List_itmTyp_ul).List_path_(1).List_uid_(0)
, fxt.tkn_txt_(9, 10)
, fxt.tkn_list_end_(10).List_path_(1)
);
}
@Test public void List_1_1_1() {
fxt.Test_parse_page_wiki("\n*a\n*b\n*c"
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_list_end_(3).List_path_(0)
, fxt.tkn_list_bgn_(3, 5, Xop_list_tkn_.List_itmTyp_ul).List_path_(1).List_uid_(0)
, fxt.tkn_txt_(5, 6)
, fxt.tkn_list_end_(6).List_path_(1)
, fxt.tkn_list_bgn_(6, 8, Xop_list_tkn_.List_itmTyp_ul).List_path_(2).List_uid_(0)
, fxt.tkn_txt_(8, 9)
, fxt.tkn_list_end_(9).List_path_(2)
);
}
@Test public void List_1___1() {
fxt.Test_parse_page_wiki("\n*a\n\n*b"
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_list_end_(3).List_path_(0)
, fxt.tkn_nl_char_len1_(3)
, fxt.tkn_list_bgn_(4, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(1)
, fxt.tkn_txt_(6, 7)
, fxt.tkn_list_end_(7).List_path_(0)
);
}
@Test public void List_1_3_1() {
fxt.Test_parse_page_wiki("\n*a\n***b\n*c"
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_list_bgn_(3, 7, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
, fxt.tkn_list_bgn_(3, 7, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0, 0).List_uid_(0)
, fxt.tkn_txt_(7, 8)
, fxt.tkn_list_end_(8).List_path_(0, 0, 0)
, fxt.tkn_list_end_(8).List_path_(0, 0)
, fxt.tkn_list_end_(8).List_path_(0)
, fxt.tkn_list_bgn_(8, 10, Xop_list_tkn_.List_itmTyp_ul).List_path_(1).List_uid_(0)
, fxt.tkn_txt_(10, 11)
, fxt.tkn_list_end_(11).List_path_(1)
);
}
@Test public void Mix_2o_2u() {
fxt.Test_parse_page_wiki("\n**a\n##b"
, fxt.tkn_list_bgn_(0, 3, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_list_bgn_(0, 3, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
, fxt.tkn_txt_(3, 4)
, fxt.tkn_list_end_(4).List_path_(0, 0)
, fxt.tkn_list_end_(4).List_path_(0)
, fxt.tkn_list_bgn_(4, 7, Xop_list_tkn_.List_itmTyp_ol).List_path_(0).List_uid_(1)
, fxt.tkn_list_bgn_(4, 7, Xop_list_tkn_.List_itmTyp_ol).List_path_(0, 0).List_uid_(1)
, fxt.tkn_txt_(7, 8)
, fxt.tkn_list_end_(8).List_path_(0, 0)
, fxt.tkn_list_end_(8).List_path_(0)
);
}
@Test public void Dt_dd() {
fxt.Test_parse_page_wiki(";a\n:b"
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_dt).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(1, 2)
, fxt.tkn_list_end_(2).List_path_(0)
, fxt.tkn_list_bgn_(2, 4, Xop_list_tkn_.List_itmTyp_dd).List_path_(1).List_uid_(0)
, fxt.tkn_txt_(4, 5)
, fxt.tkn_list_end_(5).List_path_(1)
);
}
@Test public void Dt_dd_inline() {
fxt.Test_parse_page_wiki(";a:b" // NOTE: no line break
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_dt).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(1, 2)
, fxt.tkn_list_end_(2).List_path_(0)
, fxt.tkn_list_bgn_(2, 3, Xop_list_tkn_.List_itmTyp_dd).List_path_(1).List_uid_(0)
, fxt.tkn_txt_(3, 4)
, fxt.tkn_list_end_(4).List_path_(1)
);
}
@Test public void Mix_1dd_1ul() {
fxt.Test_parse_page_wiki(":*a"
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_dd).List_path_(0).List_uid_(0)
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_list_end_(3).List_path_(0, 0)
, fxt.tkn_list_end_(3).List_path_(0)
);
}
@Test public void Mix_1ul__1dd_1ul() {
fxt.Test_parse_page_wiki("*a\n:*b"
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(1, 2)
, fxt.tkn_list_end_(2).List_path_(0).List_uid_(0)
, fxt.tkn_list_bgn_(2, 5, Xop_list_tkn_.List_itmTyp_dd).List_path_(0).List_uid_(1)
, fxt.tkn_list_bgn_(2, 5, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(1)
, fxt.tkn_txt_(5, 6)
, fxt.tkn_list_end_(6).List_path_(0, 0)
, fxt.tkn_list_end_(6).List_path_(0)
);
}
@Test public void Mix_1dd_1ul__1dd_1ul() {
fxt.Test_parse_page_wiki(":*a\n:*b"
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_dd).List_path_(0).List_uid_(0)
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_list_end_(3).List_path_(0, 0)
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 1).List_uid_(0)
, fxt.tkn_txt_(6, 7)
, fxt.tkn_list_end_(7).List_path_(0, 1)
, fxt.tkn_list_end_(7).List_path_(0)
);
}
@Test public void Mix_1ul_1hdr() {
fxt.Test_parse_page_wiki("*a\n==a==\n"
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(1, 2)
, fxt.tkn_list_end_(2).List_path_(0).List_uid_(0)
, fxt.tkn_hdr_(2, 9, 2).Hdr_ws_trailing_(1).Subs_
( fxt.tkn_txt_(5, 6)
)
);
}
@Test public void Mix_1ul_1hdr_1ul() {
fxt.Test_parse_page_wiki("*a\n==a==\n*b"
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(1, 2)
, fxt.tkn_list_end_(2).List_path_(0).List_uid_(0)
, fxt.tkn_hdr_(2, 8, 2).Subs_
( fxt.tkn_txt_(5, 6)
)
, fxt.tkn_list_bgn_(8, 10, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(1)
, fxt.tkn_txt_(10, 11)
, fxt.tkn_list_end_(11).List_path_(0)
);
}
@Test public void Mix_1ol_1hr_1ol() {
fxt.Test_parse_page_wiki("#a\n----\n#b"
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ol).List_path_(0).List_uid_(0)
, fxt.tkn_txt_(1, 2)
, fxt.tkn_list_end_(2)
, fxt.tkn_para_blank_(2)
, fxt.tkn_hr_(2, 7)
, fxt.tkn_list_bgn_(7, 9, Xop_list_tkn_.List_itmTyp_ol).List_path_(0).List_uid_(1)
, fxt.tkn_txt_(9, 10)
, fxt.tkn_list_end_(10)
);
}
@Test public void Mix_tblw() {
fxt.Test_parse_page_wiki("::{|\n|a\n|}"
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_dd).List_path_(0).List_uid_(0)
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_dd).List_path_(0, 0).List_uid_(0)
, fxt.tkn_tblw_tb_(2, 10).Subs_
( fxt.tkn_tblw_tr_(4, 7).Subs_
( fxt.tkn_tblw_td_(4, 7).Subs_(fxt.tkn_txt_(6, 7), fxt.tkn_para_blank_(8)))
)
, fxt.tkn_list_end_(10).List_path_(0, 0)
, fxt.tkn_list_end_(10).List_path_(0)
);
}
@Test public void Dif_lvls_1_3_1() {
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "*1"
, "***3"
, "*1"
) , String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li>1"
, " <ul>"
, " <li>"
, " <ul>"
, " <li>3"
, " </li>"
, " </ul>"
, " </li>"
, " </ul>"
, " </li>"
, " <li>1"
, " </li>"
, "</ul>"
));
}
@Test public void Dif_lvls_1_3_2() {// uneven lists
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "*1"
, "***3"
, "**2"
) , String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li>1"
, " <ul>"
, " <li>"
, " <ul>"
, " <li>3"
, " </li>"
, " </ul>"
, " </li>"
, " <li>2"
, " </li>"
, " </ul>"
, " </li>"
, "</ul>"
));
}
@Test public void New_lines() {
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "*a"
, ""
, "**b"
, ""
, "**c"
) , String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li>a"
, " </li>"
, "</ul>"
, ""
, "<ul>"
, " <li>"
, " <ul>"
, " <li>b"
, " </li>"
, " </ul>"
, " </li>"
, "</ul>"
, ""
, "<ul>"
, " <li>"
, " <ul>"
, " <li>c"
, " </li>"
, " </ul>"
, " </li>"
, "</ul>"
));
}
}

View File

@@ -0,0 +1,88 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_list_wkr_para_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Basic() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "*a"
) , String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li>a"
, " </li>"
, "</ul>"
, ""
)
);
}
@Test public void Multiple() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "*a"
, "*b"
) , String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li>a"
, " </li>"
, " <li>b"
, " </li>"
, "</ul>"
)
);
}
@Test public void Multiple_w_1_nl() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "*a"
, ""
, "*b"
) , String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li>a"
, " </li>"
, "</ul>"
, ""
, "<ul>"
, " <li>b"
, " </li>"
, "</ul>"
)
);
}
@Test public void Pre_between_lists() { // PURPOSE: list should close pre; EX:en.b:Knowing Knoppix/Other applications; DATE:2014-02-18
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "#a"
, " b"
, "#c" // should close <pre> opened by b
) , String_.Concat_lines_nl_skip_last
( "<ol>"
, " <li>a"
, " </li>"
, "</ol>"
, ""
, "<pre>b"
, "</pre>"
, ""
, "<ol>"
, " <li>c"
, " </li>"
, "</ol>"
)
);
}
}

View File

@@ -0,0 +1,409 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_list_wkr_uncommon_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Bug_specified_div() { // FIX: </div> was not clearing state for lnki; PAGE:en.w:Ananke (moon)
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "<div>"
, "#<i>a"
, "</div>"
, "*b"
), String_.Concat_lines_nl_skip_last
( "<div>"
, "<ol>"
, " <li><i>a"
, "</i>"
, " </li>"
, "</ol></div>"
, "<ul>"
, " <li>b"
, " </li>"
, "</ul>"
));
}
@Test public void Bug_mismatched() { // FIX: </div> was not clearing state for lnki; PAGE:en.w:Ananke (moon)
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "::a"
, ":::1"
, "::::11"
, ":::::111"
, "::b"
), String_.Concat_lines_nl_skip_last
( "<dl>"
, " <dd>"
, " <dl>"
, " <dd>a"
, " <dl>"
, " <dd>1"
, " <dl>"
, " <dd>11"
, " <dl>"
, " <dd>111"
, " </dd>"
, " </dl>"
, " </dd>"
, " </dl>"
, " </dd>"
, " </dl>"
, " </dd>"
, " <dd>b"
, " </dd>"
, " </dl>"
, " </dd>"
, "</dl>"
));
}
@Test public void Empty_li_ignored() { // PURPOSE: inner template can cause dupe li; PAGE:en.w:any Calendar day and NYT link; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "*a"
, "* "
, "*b"
, "*c"
), String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li>a"
, " </li>"
, " <li> "
, " </li>"
, " <li>b"
, " </li>"
, " <li>c"
, " </li>"
, "</ul>"
, ""
));
fxt.Init_para_n_();
}
@Test public void List_in_tblw() { // PURPOSE: list inside table should not be close outer list; PAGE:en.w:Cato the Elder
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "*a"
, "{|"
, "|b"
, "::c"
, "|}"
), String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li>a"
, " </li>"
, "</ul>"
, "<table>"
, " <tr>"
, " <td>b"
, ""
, " <dl>"
, " <dd>"
, " <dl>"
, " <dd>c"
, " </dd>"
, " </dl>"
, " </dd>"
, " </dl>"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
fxt.Init_para_n_();
}
@Test public void Dt_dd_colon_at_eol() { // PURPOSE: dangling ":" should not put next line in <dt>; PAGE:en.w:Stein; b was being wrapped in <dt>b</dt>; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( ";a:"
, "*b"
, ""
, ";c"
, "*d"
), String_.Concat_lines_nl_skip_last
( "<dl>"
, " <dt>a"
, " </dt>"
, " <dd>"
, " </dd>"
, "</dl>"
, "<ul>"
, " <li>b"
, " </li>"
, "</ul>"
, ""
, "<dl>"
, " <dt>c"
, " </dt>"
, "</dl>"
, "<ul>"
, " <li>d"
, " </li>"
, "</ul>"
, ""
));
fxt.Init_para_n_();
}
@Test public void Dd_should_not_print_colon() {// PURPOSE: ;a:\n should show as ";a" not ";a:". colon should still be considered as part of empty list; DATE:2013-11-07; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
fxt.Test_parse_page_all_str
( ";a:\nb"
, String_.Concat_lines_nl_skip_last
( "<dl>"
, " <dt>a"
, " </dt>"
, " <dd>"
, " </dd>"
, "</dl>"
, "b"
));
}
@Test public void Dt_dd_colon_in_lnki() { // PURPOSE: "; [[Portal:a]]" should not split lnki; PAGE:en.w:Wikipedia:WikiProject Military history/Operation Majestic Titan; "; [[Wikipedia:WikiProject Military history/Operation Majestic Titan/Phase I|Phase I]]: a b"
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( ";[[Portal:a]]"
), String_.Concat_lines_nl_skip_last
( "<dl>"
, " <dt><a href=\"/wiki/Portal:A\">Portal:A</a>"
, " </dt>"
, "</dl>"
, ""
));
fxt.Init_para_n_();
}
@Test public void Max_list_depth() { // PURPOSE: 256+ * caused list parser to fail; ignore; PAGE:en.w:Bariatric surgery
String multiple = String_.Repeat("*", 300);
fxt.Test_parse_page_all_str(multiple, multiple);
}
@Test public void Numbered_list_resets_incorrectly() { // PURPOSE: as description
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "#A"
, "#*Aa"
, "#**Aaa"
, "#*Ab"
, "#B"
), String_.Concat_lines_nl_skip_last
( "<ol>"
, " <li>A"
, ""
, " <ul>"
, " <li>Aa"
, ""
, " <ul>"
, " <li>Aaa"
, " </li>"
, " </ul>"
, " </li>"
, " <li>Ab"
, " </li>"
, " </ul>" // was showing as </ol>
, " </li>"
, " <li>B"
, " </li>"
, "</ol>"
, ""
));
fxt.Init_para_n_();
}
@Test public void List_should_not_end_indented_table() {// PURPOSE: :{| was being closed by \n*; EX:w:Maxwell's equations; DATE:20121231
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( ":{|"
, "|-"
, "|"
, "*a"
, "|b"
, "|}"
), String_.Concat_lines_nl_skip_last
( "<dl>"
, " <dd>"
, " <table>"
, " <tr>"
, " <td>"
, " <ul>"
, " <li>a"
, " </li>"
, " </ul>"
, " </td>"
, " <td>b"
, " </td>"
, " </tr>"
, " </table>"
, " </dd>"
, "</dl>"
));
}
@Test public void Dt_dd_broken_by_xnde() { // PURPOSE.fix: xnde was resetting dl incorrectly; EX:w:Virus; DATE:2013-01-31
fxt.Test_parse_page_all_str(";<b>a</b>:c"
, String_.Concat_lines_nl_skip_last
( "<dl>"
, " <dt><b>a</b>"
, " </dt>"
, " <dd>c"
, " </dd>"
, "</dl>"
));
}
@Test public void Trim_empty_list_items() { // PURPOSE: empty list items should be ignored; DATE:2013-07-02; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
fxt.Test_parse_page_all_str
("*** \n"
, String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li>"
, " <ul>"
, " <li>"
, " <ul>"
, " <li> "
, " </li>"
, " </ul>"
, " </li>"
, " </ul>"
, " </li>"
, "</ul>"
, ""
));
}
@Test public void Trim_empty_list_items_error() { // PURPOSE.fix: do not add empty itm's nesting to current list; DATE:2013-07-07; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( "* a"
, "** " // was: do not add ** to nest; now: add ** and \s
, "*** b"
, "* c"
), String_.Concat_lines_nl
( "<ul>"
, " <li> a"
, " <ul>"
, " <li> "
, " <ul>"
, " <li> b"
, " </li>"
, " </ul>"
, " </li>"
, " </ul>"
, " </li>"
, " <li> c"
, " </li>"
, "</ul>"
));
}
@Test public void Tblw_should_autoclose() {// PURPOSE: tblw should auto-close open list
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "#a"
, "{|"
, "|b"
, "|}"
), String_.Concat_lines_nl_skip_last
( "<ol>"
, " <li>a"
, " </li>"
, "</ol>"
, "<table>"
, " <tr>"
, " <td>b"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
}
@Test public void Tblx_should_not_autoclose() { // PURPOSE: do not auto-close list if table is xnde; DATE:2014-02-05
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( "#a"
, "# <table><tr><td>b</td></tr></table>"
, "c"
), String_.Concat_lines_nl
( "<ol>"
, " <li>a"
, " </li>"
, " <li> "
, " <table>"
, " <tr>"
, " <td>b"
, " </td>"
, " </tr>"
, " </table>"
, " </li>"
, "</ol>"
, "c"
));
}
@Test public void Li_disappears() { // PURPOSE: "\n*" disappears when followed by "<li>"; PAGE:en.w:Bristol_Bullfinch; DATE:2014-06-24
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( "a"
, "*b<li>"
), String_.Concat_lines_nl_skip_last // NOTE: tag sequence matches MW output
( "a"
, "<ul>"
, " <li>b"
, "<li>"
, "</li>"
, " </li>"
, "</ul>"
));
}
@Test public void Ul_should_end_wlst() { // PURPOSE: </ul> should end wiki_list; PAGE:en.w:Bristol_Bullfinch; DATE:2014-06-24
fxt.Test_parse_page_all_str
( "*a</ul>b"
, String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li>a</ul>b" // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
, " </li>"
, "</ul>"
));
}
@Test public void Colon_causes_dd() { // PURPOSE: colon was mistakenly being ignored due to proximity to "\n;"; PAGE:de.w:Schmach_von_Tirana#Kuriosit.C3.A4t:_EM-Qualifikationsspiel_vom_20._November_1983 DATE:2014-07-11
fxt.Test_parse_page_all_str
( String_.Concat_lines_nl_skip_last
( "a:b"
, ";c"
), String_.Concat_lines_nl_skip_last
( "a:b"
, "<dl>"
, " <dt>c"
, " </dt>"
, "</dl>"
));
}
@Test public void Pre_and_nested() { // PURPOSE: pre should interrupt list; PAGE:fi.w:Luettelo_hyönteisistä; DATE:2015-03-31
fxt.Init_para_y_();
fxt.Test_parse_page_all_str
( String_.Concat_lines_nl_skip_last
( "*a"
, "**b"
, " c" // pre
, "*d" // *d treated mistakenly as **d
), String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li>a"
, ""
, " <ul>"
, " <li>b"
, " </li>"
, " </ul>"
, " </li>"
, "</ul>"
, ""
, "<pre>c"
, "</pre>"
, ""
, "<ul>"
, " <li>d"
, " </li>"
, "</ul>"
, ""
));
fxt.Init_para_n_();
}
}

View File

@@ -0,0 +1,96 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.html.*; import gplx.xowa.html.hrefs.*;
import gplx.xowa.net.*;
public class Xoh_lnke_wtr {
private Xoae_app app;
public Xoh_lnke_wtr(Xowe_wiki wiki) {this.app = wiki.Appe();}
public void Write_all(Bry_bfr bfr, Xoh_html_wtr html_wtr, Xoh_wtr_ctx hctx, Xop_ctx ctx, byte[] src, Xop_lnke_tkn lnke) {
int lnke_bgn = lnke.Lnke_bgn(), lnke_end = lnke.Lnke_end(); boolean proto_is_xowa = lnke.Proto_tid() == Xoo_protocol_itm.Tid_xowa;
if (!hctx.Mode_is_alt()) { // write href, unless mode is alt
if (hctx.Mode_is_hdump()) {
if (lnke.Lnke_typ() == Xop_lnke_tkn.Lnke_typ_text)
bfr.Add_str_a7("<a xtid='a_lnke_txt' href=\"");
else {
if (lnke.Subs_len() == 0)
bfr.Add_str_a7("<a xtid='a_lnke_brk_n' href=\"");
else
bfr.Add_str_a7("<a xtid='a_lnke_brk_y' href=\"");
}
}
else
bfr.Add(Xoh_consts.A_bgn);
if (Write_href(bfr, ctx, src, lnke, lnke_bgn, lnke_end, proto_is_xowa))
bfr.Add(A_lhs_end_external);
else
bfr.Add(A_lhs_end_internal);
}
Write_caption(bfr, html_wtr, hctx, ctx, src, lnke, lnke_bgn, lnke_end, proto_is_xowa);
if (!hctx.Mode_is_alt()) {
if (proto_is_xowa) // add <img />
bfr.Add(Xoh_consts.Img_bgn).Add(html_wtr.Html_mgr().Img_xowa_protocol()).Add(Xoh_consts.__inline_quote);
bfr.Add(Xoh_consts.A_end);
}
}
public boolean Write_href(Bry_bfr bfr, Xop_ctx ctx, byte[] src, Xop_lnke_tkn lnke, int lnke_bgn, int lnke_end, boolean proto_is_xowa) {
byte[] lnke_xwiki_wiki = lnke.Lnke_xwiki_wiki();
if (lnke_xwiki_wiki == null) {
if (lnke.Lnke_relative()) { // relative; EX: //a.org
bfr.Add(app.Utl__url_parser().Url_parser().Relative_url_protocol_bry()).Add_mid(src, lnke_bgn, lnke_end);
return true;
}
else { // xowa or regular; EX: http://a.org
if (proto_is_xowa) {
bfr.Add(Xop_lnke_wkr.Bry_xowa_protocol);
Xoa_app_.Utl__encoder_mgr().Gfs().Encode(bfr, src, lnke_bgn, lnke_end);
return false;
}
else { // regular; add href
bfr.Add_mid(src, lnke_bgn, lnke_end);
return true;
}
}
}
else { // xwiki
Url_encoder href_encoder = Xoa_app_.Utl__encoder_mgr().Href_quotes();
bfr.Add(Xoh_href_parser.Href_site_bry).Add(lnke_xwiki_wiki).Add(Xoh_href_parser.Href_wiki_bry)
.Add(href_encoder.Encode(lnke.Lnke_xwiki_page())); // NOTE: must encode page; EX:%22%3D -> '">' which will end attribute; PAGE:en.w:List_of_Category_A_listed_buildings_in_West_Lothian DATE:2014-07-15
if (lnke.Lnke_xwiki_qargs() != null)
Xoa_url_arg_hash.Concat_bfr(bfr, href_encoder, lnke.Lnke_xwiki_qargs()); // NOTE: must encode args
return false;
}
}
public void Write_caption(Bry_bfr bfr, Xoh_html_wtr html_wtr, Xoh_wtr_ctx hctx, Xop_ctx ctx, byte[] src, Xop_lnke_tkn lnke, int lnke_bgn, int lnke_end, boolean proto_is_xowa) {
int subs_len = lnke.Subs_len();
if (subs_len == 0) { // no text; auto-number; EX: "[1]"
if (lnke.Lnke_typ() == Xop_lnke_tkn.Lnke_typ_text)
bfr.Add_mid(src, lnke_bgn, lnke_end);
else
bfr.Add_byte(Byte_ascii.Brack_bgn).Add_int_variable(ctx.Cur_page().Html_data().Lnke_autonumber_next()).Add_byte(Byte_ascii.Brack_end);
}
else { // text available
for (int i = 0; i < subs_len; i++)
html_wtr.Write_tkn(bfr, ctx, hctx, src, lnke, i, lnke.Subs_get(i));
}
}
private static final byte[]
A_lhs_end_external = Bry_.new_a7("\" class=\"external text\" rel=\"nofollow\">")
, A_lhs_end_internal = Bry_.new_a7("\">")
;
}

View File

@@ -0,0 +1,36 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xoh_lnke_wtr_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_(); fxt.Reset();}
@Test public void Basic() {fxt.Test_parse_page_wiki_str("[irc://a]" , "<a href=\"irc://a\" class=\"external text\" rel=\"nofollow\">[1]</a>");}
@Test public void Autonumber() {fxt.Test_parse_page_wiki_str("[irc://a] [irc://b]" , "<a href=\"irc://a\" class=\"external text\" rel=\"nofollow\">[1]</a> <a href=\"irc://b\" class=\"external text\" rel=\"nofollow\">[2]</a>");}
@Test public void Caption() {fxt.Test_parse_page_wiki_str("[irc://a b]" , "<a href=\"irc://a\" class=\"external text\" rel=\"nofollow\">b</a>");}
@Test public void Caption_wtxt() {fxt.Test_parse_page_wiki_str("[irc://a ''b'']" , "<a href=\"irc://a\" class=\"external text\" rel=\"nofollow\"><i>b</i></a>");}
@Test public void Xowa_protocol() {
String img = "<img src=\"file:///mem/xowa/user/test_user/app/img/xowa/protocol.png\"/>";
fxt.Wiki().Sys_cfg().Xowa_proto_enabled_(true);
fxt.Test_parse_page_wiki_str("[xowa-cmd:\"a\" z]" , "<a href=\"xowa-cmd:a\">z" + img + "</a>");
fxt.Test_parse_page_wiki_str("[xowa-cmd:\"a.b('c_d');\" z]" , "<a href=\"xowa-cmd:a.b('c_d');\">z" + img + "</a>");
fxt.Test_parse_page_wiki_str("[xowa-cmd:*\"a\"b*c\"* z]" , "<a href=\"xowa-cmd:a%22b%2Ac\">z" + img + "</a>");
fxt.Wiki().Sys_cfg().Xowa_proto_enabled_(false);
fxt.Test_parse_page_wiki_str("[xowa-cmd:\"a\" b]" , "[xowa-cmd:&quot;a&quot; b]"); // protocol is disabled: literalize String (i.e.: don't make it an anchor)
}
}

View File

@@ -0,0 +1,26 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
public class Xop_lnke_end_lxr implements Xop_lxr {//20111222
public byte Lxr_tid() {return Xop_lxr_.Tid_lnke_end;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Brack_end, this);}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Lnke().MakeTkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
public static final Xop_lnke_end_lxr _ = new Xop_lnke_end_lxr(); Xop_lnke_end_lxr() {}
}

View File

@@ -0,0 +1,22 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_lnke_log {
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "lnke");
public static final Gfo_msg_itm Dangling = Gfo_msg_itm_.new_note_(owner, "dangling"); // NOTE: WP.BOT:YOBOT;PAGE:en.w:Pan_flute
}

View File

@@ -0,0 +1,44 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.xowa.net.*;
public class Xop_lnke_lxr implements Xop_lxr {
Xop_lnke_lxr(byte lnke_typ, byte[] protocol, byte tid) {this.lnke_typ = lnke_typ; this.protocol = protocol; this.tid = tid;} private byte lnke_typ; byte[] protocol; byte tid;
public byte Lxr_tid() {return Xop_lxr_.Tid_lnke_bgn;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {
Xoo_protocol_itm[] ary = Xoo_protocol_itm.Ary();
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {
Xoo_protocol_itm itm = ary[i];
Ctor_lxr_add(core_trie, itm.Key_w_colon_bry(), itm.Tid());
}
core_trie.Add(Bry_relative_1, new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, Xoa_consts.Url_relative_prefix, Xoo_protocol_itm.Tid_relative_1));
core_trie.Add(Bry_relative_2, new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, Xoa_consts.Url_relative_prefix, Xoo_protocol_itm.Tid_relative_2));
Ctor_lxr_add(core_trie, Bry_.new_a7("xowa-cmd"), Xoo_protocol_itm.Tid_xowa);
} private static final byte[] Bry_relative_1 = Bry_.new_a7("[//"), Bry_relative_2 = Bry_.new_a7("[[//");
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
private void Ctor_lxr_add(Btrie_fast_mgr core_trie, byte[] protocol_bry, byte tid) {
core_trie.Add(protocol_bry , new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_text, protocol_bry, tid));
core_trie.Add(Bry_.Add(Byte_ascii.Brack_bgn, protocol_bry) , new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, protocol_bry, tid));
}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
if (this.tid == Xoo_protocol_itm.Tid_xowa && !ctx.Wiki().Sys_cfg().Xowa_proto_enabled()) return ctx.Lxr_make_txt_(cur_pos);
return ctx.Lnke().MakeTkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, protocol, tid, lnke_typ);
}
public static final Xop_lnke_lxr _ = new Xop_lnke_lxr(); Xop_lnke_lxr() {}
}

View File

@@ -0,0 +1,40 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.parsers.lnkes.*;
public class Xop_lnke_tkn extends Xop_tkn_itm_base {//20111222
public static final byte Lnke_typ_null = 0, Lnke_typ_brack = 1, Lnke_typ_text = 2, Lnke_typ_brack_dangling = 3;
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_lnke;}
public boolean Lnke_relative() {return lnke_relative;} public Xop_lnke_tkn Lnke_relative_(boolean v) {lnke_relative = v; return this;} private boolean lnke_relative;
public byte Lnke_typ() {return lnke_typ;} public Xop_lnke_tkn Lnke_typ_(byte v) {lnke_typ = v; return this;} private byte lnke_typ = Lnke_typ_null;
public byte[] Lnke_site() {return lnke_site;} public Xop_lnke_tkn Lnke_site_(byte[] v) {lnke_site = v; return this;} private byte[] lnke_site;
public byte[] Lnke_xwiki_wiki() {return lnke_xwiki_wiki;} private byte[] lnke_xwiki_wiki;
public byte[] Lnke_xwiki_page() {return lnke_xwiki_page;} private byte[] lnke_xwiki_page;
public Gfo_url_arg[] Lnke_xwiki_qargs() {return lnke_xwiki_qargs;} Gfo_url_arg[] lnke_xwiki_qargs;
public void Lnke_xwiki_(byte[] wiki, byte[] page, Gfo_url_arg[] args) {this.lnke_xwiki_wiki = wiki; this.lnke_xwiki_page = page; this.lnke_xwiki_qargs = args;}
public int Lnke_bgn() {return lnke_bgn;} private int lnke_bgn;
public int Lnke_end() {return lnke_end;} private int lnke_end;
public Xop_lnke_tkn Lnke_rng_(int bgn, int end) {lnke_bgn = bgn; lnke_end = end; return this;}
public byte[] Protocol() {return protocol;} private byte[] protocol;
public byte Proto_tid() {return proto_tid;} private byte proto_tid;
public Xop_lnke_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
public Xop_lnke_tkn(int bgn, int end, byte[] protocol, byte proto_tid, byte lnke_typ, int lnke_bgn, int lnke_end) {
this.Tkn_ini_pos(false, bgn, end); this.protocol = protocol; this.proto_tid = proto_tid; this.lnke_typ = lnke_typ; this.lnke_bgn = lnke_bgn; this.lnke_end = lnke_end;
} Xop_lnke_tkn() {}
}

View File

@@ -0,0 +1,306 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.apps.progs.*; import gplx.xowa.net.*; import gplx.xowa.wikis.xwikis.*;
public class Xop_lnke_wkr implements Xop_ctx_wkr {
public void Ctor_ctx(Xop_ctx ctx) {url_parser = ctx.App().Utl__url_parser().Url_parser();} Gfo_url_parser url_parser; Gfo_url_site_data site_data = new Gfo_url_site_data(); Xoa_url_parser xo_url_parser = new Xoa_url_parser(); Xoa_url xo_url_parser_url = Xoa_url.blank_();
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
public boolean Dangling_goes_on_stack() {return dangling_goes_on_stack;} public void Dangling_goes_on_stack_(boolean v) {dangling_goes_on_stack = v;} private boolean dangling_goes_on_stack;
public void AutoClose(Xop_ctx ctx, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {
// "[" but no "]"; EX: "[irc://a"; NOTE: lnkes that start with protocol will be ac'd in MakeTkn_bgn; EX: "http://a"
Xop_lnke_tkn bgn_tkn = (Xop_lnke_tkn)tkn;
bgn_tkn.Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack_dangling);
bgn_tkn.Src_end_(bgn_tkn.Lnke_end()); // NOTE: endPos is lnke_end, not cur_pos or src_len; EX: "[irc://a b", lnk ends at a, not b; NOTE: still bgns at [
ctx.Msg_log().Add_itm_none(Xop_lnke_log.Dangling, src, tkn.Src_bgn(), cur_pos);
}
public static final String Str_xowa_protocol = "xowa-cmd:";
public static final byte[] Bry_xowa_protocol = Bry_.new_a7(Str_xowa_protocol);
public int MakeTkn_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, byte[] protocol, byte proto_tid, byte lnke_type) {
boolean lnke_type_brack = (lnke_type == Xop_lnke_tkn.Lnke_typ_brack);
if ( !lnke_type_brack // lnke doesn't have "["; EX: "ttl:"
&& !Valid_text_lnke(ctx, src, src_len, bgn_pos, cur_pos) // tkn is part of work; EX: " ttl:" vs "attl:"
)
return ctx.Lxr_make_txt_(cur_pos - 1); // -1 to ignore ":" in making text colon; needed to process ":" for list like "; attl: b" PAGE:de.w:Mord_(Deutschland)#Besonders_verwerfliche_Begehungsweise; DATE:2015-01-09
if (ctx.Stack_get_typ(Xop_tkn_itm_.Tid_lnke) != null) return ctx.Lxr_make_txt_(cur_pos); // no nested lnke; return cur lnke as text; EX: "[irc://a irc://b]" -> "<a href='irc:a'>irc:b</a>"
if (proto_tid == Xoo_protocol_itm.Tid_xowa) return Make_tkn_xowa(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, protocol, proto_tid, lnke_type);
// HACK: need to disable lnke if enclosing type is lnki and (1) arg is "link=" or (2) in 1st arg; basically, only enable for caption tkns (and preferably, thumb only) (which should be neither 1 or 2)
if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_lnki && lnke_type == Xop_lnke_tkn.Lnke_typ_text) {
byte mode = Lnki_linkMode_init;
int lnki_pipe_count = 0;
int tkn_idx = -1;
for (int i = root.Subs_len() - 1; i > -1; i--) {
Xop_tkn_itm link_tkn = root.Subs_get(i);
tkn_idx = i;
switch (link_tkn.Tkn_tid()) {
case Xop_tkn_itm_.Tid_pipe:
if (mode == Lnki_linkMode_text) {ctx.Lxr_make_(false); return bgn_pos + 1;} // +1 to position after lnke_hook; EX:[[File:A.png|link=http:b.org]] position at t in http so http hook won't be invoked.
else {i = -1; ++lnki_pipe_count;}
break;
case Xop_tkn_itm_.Tid_txt:
if (mode == Lnki_linkMode_eq) mode = Lnki_linkMode_text;
// else i = -1; // DELETE: do not be overly strict; need to handle pattern of link=http://a.org?b=http://c.org; DATE:2013-02-03
break;
case Xop_tkn_itm_.Tid_eq:
if (mode == Lnki_linkMode_init) mode = Lnki_linkMode_eq;
// else i = -1; // DELETE: do not be overly strict; need to handle pattern of link=http://a.org?b=http://c.org; DATE:2013-02-03
break;
case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab:
break;
}
}
if (lnki_pipe_count == 0) {
for (int i = tkn_idx; i > -1; i--) {
Xop_tkn_itm link_tkn = root.Subs_get(i);
tkn_idx = i;
switch (link_tkn.Tkn_tid()) {
// case Xop_tkn_itm_.Tid_txt: return cur_pos; // REMOVED:2012-11-12: was causing [[http://a.org a]] [[http://b.org b]] to fail; PAGE:en.w:Template:Infobox_country
case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab: break;
}
}
}
}
int lnke_bgn = bgn_pos, lnke_end = -1, brack_end_pos = -1;
int lnke_end_tid = End_tid_null;
while (true) { // loop until lnke_end_tid char;
if (cur_pos == src_len) {lnke_end_tid = End_tid_eos; lnke_end = cur_pos; break;}
switch (src[cur_pos]) {
case Byte_ascii.Brack_end:
if (lnke_type_brack) { // NOTE: check that frame begins with [ in order to end with ]
lnke_end_tid = End_tid_brack; brack_end_pos = cur_pos + Xoa_prog_mgr.Adj_next_char;
}
else { // NOTE: frame does not begin with [ but ] encountered. mark "invalid" in order to force parser to stop before "]"
lnke_end_tid = End_tid_invalid;
}
break;
case Byte_ascii.Space: lnke_end_tid = End_tid_space; break;
case Byte_ascii.Nl: lnke_end_tid = End_tid_nl; break;
case Byte_ascii.Gt: case Byte_ascii.Lt:
lnke_end_tid = End_tid_invalid;
break;
case Byte_ascii.Apos:
if (cur_pos + 1 < src_len && src[cur_pos + 1] == Byte_ascii.Apos) // NOTE: '' breaks link, but not '; EX: [http://a.org''b'']]; DATE:2013-03-18
lnke_end_tid = End_tid_invalid;
break;
case Byte_ascii.Brack_bgn: // NOTE: always stop lnke at "[" regardless of brack_type; EX: [http:a.org[[B]]] and http:a.org[[B]]; DATE:2014-07-11
case Byte_ascii.Quote: // NOTE: quote should also stop lnke; DATE:2014-10-10
lnke_end_tid = End_tid_symbol;
break;
}
if (lnke_end_tid == End_tid_null) cur_pos++;
else {
lnke_end = cur_pos;
cur_pos++;
break;
}
}
if (lnke_type_brack) {
switch (lnke_end_tid) {
case End_tid_eos:
if (brack_end_pos == -1) { // eos but no ]; EX: "[irc://a"
if (dangling_goes_on_stack) { // added for Xow_popup_parser which needs to handle dangling lnke due to block_len; DATE:2014-06-20
ctx.Subs_add_and_stack(root, tkn_mkr.Txt(bgn_pos, src_len)); // note that tkn doesn't matter, as Xow_popup_parser only cares *if* something is on stack, not *what* is on stack
return src_len;
}
ctx.Subs_add(root, tkn_mkr.Txt(bgn_pos, bgn_pos + 1));// convert open brack to txt; // FUTURE: don't make brack_tkn; just flag
bgn_pos += 1;
brack_end_pos = cur_pos;
lnke_bgn = bgn_pos;
lnke_type = Xop_lnke_tkn.Lnke_typ_brack_dangling;
}
break;
case End_tid_nl:
lnke_type = Xop_lnke_tkn.Lnke_typ_brack_dangling;
return ctx.Lxr_make_txt_(lnke_end); // textify lnk; EX: [irc://a\n] textifies "[irc://a"
default:
lnke_bgn += proto_tid == Xoo_protocol_itm.Tid_relative_2 ? 2 : 1; // if Tid_relative_2, then starts with [[; adjust by 2; EX:"[[//en" should have lnke_bgn at "//en", not "[//en"
lnke_type = Xop_lnke_tkn.Lnke_typ_brack;
break;
}
}
else { // else, plain text
brack_end_pos = lnke_end;
lnke_type = Xop_lnke_tkn.Lnke_typ_text;
if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_lnki) { // SEE:NOTE_1
Xop_tkn_itm prv_tkn = root.Subs_get(root.Subs_len() - 1); // get last tkn
if (prv_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) { // is tkn lnki?
root.Subs_del_after(prv_tkn.Tkn_sub_idx()); // delete [[ tkn and replace with [ tkn
root.Subs_add(tkn_mkr.Txt(prv_tkn.Src_bgn(), prv_tkn.Src_bgn() + 1));
ctx.Stack_pop_last(); // don't forget to remove from stack
lnke_type = Xop_lnke_tkn.Lnke_typ_brack; // change lnke_typee to brack
--bgn_pos;
}
}
}
if (proto_tid == Xoo_protocol_itm.Tid_relative_2) // for "[[//", add "["; rest of code handles "[//" normally, but still want to include literal "["; DATE:2013-02-02
ctx.Subs_add(root, tkn_mkr.Txt(lnke_bgn - 1, lnke_bgn));
url_parser.Parse_site_fast(site_data, src, lnke_bgn, lnke_end);
int site_bgn = site_data.Site_bgn(), site_end = site_data.Site_end();
if (site_bgn == site_end) return ctx.Lxr_make_txt_(cur_pos); // empty proto should return text, not lnke; EX: "http:", "http://", "[http://]"; DATE:2014-10-09
int adj = Ignore_punctuation_at_end(src, site_bgn, lnke_end);
if (adj != 0) {
lnke_end -= adj;
brack_end_pos -= adj;
cur_pos -= adj;
}
Xop_lnke_tkn tkn = tkn_mkr.Lnke(bgn_pos, brack_end_pos, protocol, proto_tid, lnke_type, lnke_bgn, lnke_end);
tkn.Lnke_relative_(site_data.Rel());
Xow_xwiki_itm xwiki = ctx.App().Usere().Wiki().Xwiki_mgr().Get_by_mid(src, site_bgn, site_end); // NOTE: check User_wiki.Xwiki_mgr, not App.Wiki_mgr() b/c only it is guaranteed to know all wikis on system
if (xwiki != null) { // lnke is to an xwiki; EX: [http://en.wikipedia.org/A a]
Xowe_wiki wiki = ctx.Wiki();
Xoa_url_parser.Parse_url(xo_url_parser_url, ctx.App(), wiki, src, lnke_bgn, lnke_end, false);
byte[] xwiki_wiki = xo_url_parser_url.Wiki_bry();
byte[] xwiki_page = xo_url_parser_url.Page_bry();
byte[] ttl_bry = xo_url_parser_url.Page_bry();
Xoa_ttl ttl = Xoa_ttl.parse_(wiki, ttl_bry);
if (ttl != null && ttl.Wik_itm() != null) {
xwiki_wiki = ttl.Wik_itm().Domain_bry();
xwiki_page = ttl.Page_url();
}
tkn.Lnke_xwiki_(xwiki_wiki, xwiki_page, xo_url_parser_url.Args());
}
ctx.Subs_add(root, tkn);
if (lnke_type == Xop_lnke_tkn.Lnke_typ_brack) {
if (lnke_end_tid == End_tid_brack) {
tkn.Src_end_(cur_pos);
tkn.Subs_move(root);
return cur_pos;
}
ctx.Stack_add(tkn);
if (lnke_end_tid == End_tid_invalid) {
return cur_pos - 1; // -1 to return before < or >
}
}
else {
switch (lnke_end_tid) {
case End_tid_space:
ctx.Subs_add(root, tkn_mkr.Space(root, cur_pos - 1, cur_pos));
break;
case End_tid_symbol:
case End_tid_nl:
case End_tid_invalid: // NOTE that cur_pos is set after <, must subtract 1 else </xnde> will be ignored; EX: <span>irc://a</span>
return cur_pos - 1;
}
}
return cur_pos;
}
private static int Ignore_punctuation_at_end(byte[] src, int proto_end, int lnke_end) { // DATE:2014-10-09
int rv = 0;
int pos = lnke_end - 1; // -1 b/c pos is after char; EX: "abc" has pos of 3; need --pos to start at src[2] = 'c'
byte paren_bgn_chk = Bool_.__byte;
while (pos >= proto_end) {
byte b = src[pos];
switch (b) { // REF.MW: $sep = ',;\.:!?';
case Byte_ascii.Comma: case Byte_ascii.Semic: case Byte_ascii.Backslash: case Byte_ascii.Dot:
case Byte_ascii.Bang: case Byte_ascii.Question:
break;
case Byte_ascii.Colon: // differentiate between "http:" (don't trim) and "http://a.org:" (trim)
if (pos == proto_end -1) return rv;
break;
case Byte_ascii.Paren_end: // differentiate between "(http://a.org)" (trim) and "http://a.org/b(c)" (don't trim)
if (paren_bgn_chk == Bool_.__byte) {
int paren_bgn_pos = Bry_finder.Find_fwd(src, Byte_ascii.Paren_bgn, proto_end, lnke_end);
paren_bgn_chk = paren_bgn_pos == Bry_finder.Not_found ? Bool_.N_byte : Bool_.Y_byte;
}
if (paren_bgn_chk == Bool_.Y_byte) // "(" found; do not ignore ")"
return rv;
else
break;
default:
return rv;
}
--pos;
++rv;
}
return rv;
}
private static final byte Lnki_linkMode_init = 0, Lnki_linkMode_eq = 1, Lnki_linkMode_text = 2;
private static final byte End_tid_null = 0, End_tid_eos = 1, End_tid_brack = 2, End_tid_space = 3, End_tid_nl = 4, End_tid_symbol = 5, End_tid_invalid = 6;
public int MakeTkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
// Xop_tkn_itm last_tkn = ctx.Stack_get_last(); // BLOCK:invalid_ttl_check; // TODO: backout apos changes
// if ( last_tkn != null
// && last_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) {
// Xop_lnki_tkn lnki = (Xop_lnki_tkn)last_tkn;
// if ( lnki.Pipe_count_is_zero()) { // always invalid
// ctx.Stack_pop_last();
// return Xop_lnki_wkr_.Invalidate_lnki(ctx, src, root, lnki, bgn_pos);
// }
// }
int lnke_bgn_idx = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_lnke);
if (lnke_bgn_idx == -1) return ctx.Lxr_make_txt_(cur_pos); // no lnke_bgn tkn; occurs when just ]; EX: "a]b"
Xop_lnke_tkn bgnTkn = (Xop_lnke_tkn)ctx.Stack_pop_til(root, src, lnke_bgn_idx, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_lnke);
bgnTkn.Src_end_(cur_pos);
bgnTkn.Subs_move(root);
return cur_pos;
}
private static boolean Valid_text_lnke(Xop_ctx ctx, byte[] src, int src_len, int bgn_pos, int cur_pos) {
if (bgn_pos == Xop_parser_.Doc_bgn_char_0) return true; // lnke starts at 0; always true
int prv_pos = bgn_pos - 1;
byte prv_byte = src[prv_pos];
switch (prv_byte) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
return false; // alpha-numerical is invalid; EX: "titel:" should not generate a lnke for "tel:"
}
if (prv_byte >= Byte_ascii.Ascii_min && prv_byte <= Byte_ascii.Ascii_max) return true; // consider all other ASCII chars as true; EX: \t\n !, etc;
prv_pos = gplx.intl.Utf8_.Get_pos0_of_char_bwd(src, prv_pos);
prv_byte = src[prv_pos];
boolean prv_char_is_letter = ctx.Lang().Case_mgr().Match_any_exists(prv_byte, src, prv_pos, bgn_pos);
return !prv_char_is_letter;
}
private int Make_tkn_xowa(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, byte[] protocol, byte proto_tid, byte lnke_type) {
// NOTE: fmt is [xowa-cmd:^"app.setup_mgr.import_wiki('');"^ ]
if (lnke_type != Xop_lnke_tkn.Lnke_typ_brack) return ctx.Lxr_make_txt_(cur_pos); // NOTE: must check for [ or else C:\xowa\ will cause it to evaluate as lnke
int proto_end_pos = cur_pos + 1; // +1 to skip past :
int lhs_dlm_pos = Bry_finder.Find_fwd(src, Byte_ascii.Quote, proto_end_pos, src_len); if (lhs_dlm_pos == Bry_.NotFound) return ctx.Lxr_make_txt_(cur_pos);
int lnke_bgn_pos = lhs_dlm_pos + 1;
byte[] rhs_dlm_bry = Bry_quote;
if (lhs_dlm_pos - proto_end_pos > 0) {
Bry_bfr bfr = ctx.App().Utl__bfr_mkr().Get_k004();
rhs_dlm_bry = bfr.Add(Bry_quote).Add_mid(src, proto_end_pos, lhs_dlm_pos).Xto_bry_and_clear();
bfr.Mkr_rls();
}
int rhs_dlm_pos = Bry_finder.Find_fwd(src, rhs_dlm_bry, lnke_bgn_pos, src_len); if (rhs_dlm_pos == Bry_.NotFound) return ctx.Lxr_make_txt_(cur_pos);
int txt_bgn = Bry_finder.Find_fwd_while_space_or_tab(src, rhs_dlm_pos + rhs_dlm_bry.length, src_len); if (txt_bgn == Bry_.NotFound) return ctx.Lxr_make_txt_(cur_pos);
int txt_end = Bry_finder.Find_fwd(src, Byte_ascii.Brack_end, txt_bgn, src_len); if (txt_end == Bry_.NotFound) return ctx.Lxr_make_txt_(cur_pos);
int end_pos = txt_end + 1; // +1 to place after ]
Xop_lnke_tkn tkn = tkn_mkr.Lnke(bgn_pos, end_pos, protocol, proto_tid, lnke_type, lnke_bgn_pos, rhs_dlm_pos); // +1 to ignore [
ctx.Subs_add(root, tkn);
tkn.Subs_add(tkn_mkr.Txt(txt_bgn, txt_end));
return end_pos;
} private static final byte[] Bry_quote = new byte[] {Byte_ascii.Quote};
}
/*
NOTE_1
lnke takes precedence over lnki.
EX: [[irc://a b]]
pass: [<a href="irc://a">b</a>] i.e. [b] where b is a lnke with caption b and trg of irc://a
fail: <a href="irc://a">b</a> i.e. b where b is a lnki with caption b and trg of irc://a
*/

View File

@@ -0,0 +1,94 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_lnke_wkr_brack_tst {
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
@Test public void Brace_noText() {
fxt.Test_parse_page_wiki("[irc://a]", fxt.tkn_lnke_(0, 9).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack).Lnke_rng_(1, 8));
}
@Test public void Brace_eos() {
fxt.Test_parse_page_wiki("[irc://a", fxt.tkn_txt_(0, 1), fxt.tkn_lnke_(1, 8).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack_dangling).Lnke_rng_(1, 8));
}
@Test public void Brace_text() {
fxt.Test_parse_page_wiki("[irc://a b c]", fxt.tkn_lnke_(0, 13).Lnke_rng_(1, 8).Subs_(fxt.tkn_txt_(9, 10), fxt.tkn_space_(10, 11), fxt.tkn_txt_(11, 12)));
}
@Test public void Brace_lt() {
fxt.Init_log_(Xop_xnde_log.Eos_while_closing_tag).Test_parse_page_wiki("[irc://a<b c]", fxt.tkn_lnke_(0, 13).Lnke_rng_(1, 8).Subs_(fxt.tkn_txt_(8, 10), fxt.tkn_space_(10, 11), fxt.tkn_txt_(11, 12)));
}
@Test public void Brace_xnde_bgn() {// PURPOSE: occurred at ref of UK; a {{cite web|url=http://www.abc.gov/{{dead link|date=December 2011}}|title=UK}} b
fxt.Test_parse_page_wiki_str
( "[http://b.org<sup>c</sup>]"
, "<a href=\"http://b.org\" class=\"external text\" rel=\"nofollow\"><sup>c</sup></a>"
);
}
@Test public void Brace_newLine() {
fxt.Test_parse_page_wiki("[irc://a\n]", fxt.tkn_txt_(0, 8), fxt.tkn_nl_char_len1_(8), fxt.tkn_txt_(9, 10));
}
@Test public void Html_brack() {
fxt.Test_parse_page_wiki_str("[irc://a]", "<a href=\"irc://a\" class=\"external text\" rel=\"nofollow\">[1]</a>");
}
@Test public void Apos() {
fxt.Test_parse_page_wiki_str("[http://www.a.org''b'']", "<a href=\"http://www.a.org\" class=\"external text\" rel=\"nofollow\"><i>b</i></a>");
fxt.Test_parse_page_wiki_str("[http://www.a.org'b]", "<a href=\"http://www.a.org'b\" class=\"external text\" rel=\"nofollow\">[1]</a>");
}
@Test public void Nowiki() {
fxt.Test_parse_page_all_str
( "<nowiki>http://a.org</nowiki>"
, "http://a.org"
);
}
@Test public void Lnki_one() { // PURPOSE: parallel test for "http://a.org[[B]]"; DATE:2014-07-11
fxt.Test_parse_page_wiki_str
( "[http://a.org b [[C]] d]"
,String_.Concat_lines_nl_skip_last
( "<a href=\"http://a.org\" class=\"external text\" rel=\"nofollow\">b <a href=\"/wiki/C\">C</a> d</a>"
));
}
@Test public void Encode_xwiki() { // PURPOSE: href title and args should always be encoded; PAGE:en.w:List_of_Category_A_listed_buildings_in_West_Lothian DATE:2014-07-15
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_u8("commons.wikimedia.org"), Bry_.new_u8("commons.wikimedia.org"));
fxt.Test_parse_page_wiki_str // encode page
( "[http://commons.wikimedia.org/%22%3E_A B]"
, "<a href=\"/site/commons.wikimedia.org/wiki/%22%3E_A\">B</a>" // '%22%3E' not '">'
);
fxt.Test_parse_page_wiki_str // encode args
( "[http://commons.wikimedia.org/A?b=%22%3E_C D]"
, "<a href=\"/site/commons.wikimedia.org/wiki/A?b=%22%3E_C\">D</a>" // '%22%3E' not '">'
);
}
@Test public void Encode_basic() { // PURPOSE: counterpart to Encode_xwiki; DATE:2014-07-15
fxt.Test_parse_page_wiki_str // encode page
( "[http://a.org/%22%3E_A B]"
, "<a href=\"http://a.org/%22%3E_A\" class=\"external text\" rel=\"nofollow\">B</a>" // '%22%3E' not '">'
);
fxt.Test_parse_page_wiki_str // encode args
( "[http://a.org/A?b=%22%3E_C D]"
, "<a href=\"http://a.org/A?b=%22%3E_C\" class=\"external text\" rel=\"nofollow\">D</a>" // '%22%3E' not '">'
);
}
@Test public void Encode_relative() { // PURPOSE: counterpart to Encode_xwiki; DATE:2014-07-15
fxt.Test_parse_page_wiki_str // encode page
( "[//a.org/%22%3E_A B]"
, "<a href=\"http://a.org/%22%3E_A\" class=\"external text\" rel=\"nofollow\">B</a>" // '%22%3E' not '">'
);
fxt.Test_parse_page_wiki_str // encode args
( "[//a.org/A?b=%22%3E_C D]"
, "<a href=\"http://a.org/A?b=%22%3E_C\" class=\"external text\" rel=\"nofollow\">D</a>" // '%22%3E' not '">'
);
}
}

View File

@@ -0,0 +1,39 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_lnke_wkr_dangling_tst {
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
@Test public void Dangling_eos() {
fxt.Test_parse_page_wiki("[irc://a b"
, fxt.tkn_lnke_(0, 8).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack_dangling)
, fxt.tkn_txt_(9, 10)
);
}
@Test public void Dangling_newLine() {
fxt.Test_parse_page_wiki("[irc://a b\nc]"
, fxt.tkn_lnke_(0, 8).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack_dangling)
, fxt.tkn_txt_(9, 10)
, fxt.tkn_nl_char_len1_(10)
, fxt.tkn_txt_(11, 13)
);
}
@Test public void Dangling_gt() {
fxt.Test_parse_page_wiki("[irc://a>b c]", fxt.tkn_lnke_(0, 13).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack).Subs_(fxt.tkn_txt_(8, 10), fxt.tkn_space_(10, 11), fxt.tkn_txt_(11, 12)));
}
}

View File

@@ -0,0 +1,42 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_lnke_wkr_relative_tst {
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
@Test public void Relative_obj() {
fxt.Test_parse_page_wiki("[//a b]"
, fxt.tkn_lnke_(0, 7).Lnke_rng_(1, 4).Subs_(fxt.tkn_txt_(5, 6))
);
}
@Test public void Relative_external() {
fxt.Test_parse_page_wiki_str("[//www.a.org a]", "<a href=\"http://www.a.org\" class=\"external text\" rel=\"nofollow\">a</a>");
}
@Test public void Relative_internal() {
fxt.Init_xwiki_add_user_("en.wikipedia.org");
fxt.Test_parse_page_wiki_str("[//en.wikipedia.org/wiki Wikipedia]", "<a href=\"/site/en.wikipedia.org/wiki/\">Wikipedia</a>");
}
@Test public void Relative_w_category() { // EX: [//commons.wikimedia.org/wiki/Category:Diomedeidae A]
fxt.Init_xwiki_add_user_("en.wikipedia.org");
fxt.Test_parse_page_wiki_str("[//en.wikipedia.org/wiki/Category:A A]", "<a href=\"/site/en.wikipedia.org/wiki/Category:A\">A</a>");
}
@Test public void Relurl() {
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_u8("en.wikipedia.org"), Bry_.new_u8("en.wikipedia.org"));
fxt.Test_parse_page_wiki_str("[[//en.wikipedia.org/ a]]", "[<a href=\"/site/en.wikipedia.org/wiki/\">a</a>]");
}
}

View File

@@ -0,0 +1,99 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*; import gplx.xowa.langs.cases.*;
public class Xop_lnke_wkr_text_tst {
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
@Test public void Text_obj() {
fxt.Test_parse_page_wiki("irc://a", fxt.tkn_lnke_(0, 7).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_text).Lnke_rng_(0, 7));
}
@Test public void Text_html() {
fxt.Test_parse_page_wiki_str("irc://a", "<a href=\"irc://a\" class=\"external text\" rel=\"nofollow\">irc://a</a>");
}
@Test public void Text_after() {
fxt.Test_parse_page_wiki("irc://a b c", fxt.tkn_lnke_(0, 7).Lnke_rng_(0, 7), fxt.tkn_space_(7, 8), fxt.tkn_txt_(8, 9), fxt.tkn_space_(9, 10), fxt.tkn_txt_(10, 11));
}
@Test public void Text_before_ascii() { // PURPOSE: free form external urls should not match if preceded by letters; EX:de.w:Sylvie_und_Bruno; DATE:2014-05-11
fxt.Ctx().Lang().Case_mgr_utf8_();
String expd_lnke_html = "<a href=\"tel:a\" class=\"external text\" rel=\"nofollow\">tel:a</a>";
fxt.Test_parse_page_wiki_str("titel:a" , "titel:a");
fxt.Test_parse_page_wiki_str(" tel:a" , " " + expd_lnke_html);
fxt.Test_parse_page_wiki_str("!tel:a" , "!" + expd_lnke_html);
fxt.Test_parse_page_wiki_str("ätel:a" , "ätel:a");
fxt.Test_parse_page_wiki_str("€tel:a" , "" + expd_lnke_html);
}
@Test public void Invalid_lnki_and_list_dt_dd() { // PURPOSE: invalid lnke should still allow processing of ":" in list <dd>; PAGE:de.w:Mord_(Deutschland)#Besonders_verwerfliche_Begehungsweise DATE:2015-01-08
fxt.Test_parse_page_wiki_str("; atel: b" , String_.Concat_lines_nl_skip_last
( "<dl>"
, " <dt> atel"
, " </dt>"
, " <dd> b"
, " </dd>"
, "</dl>"
));
}
@Test public void Xnde() {// NOTE: compare to Brace_lt
fxt.Test_parse_page_wiki("<span>irc://a</span>"
, fxt.tkn_xnde_(0, 20).Subs_
( fxt.tkn_lnke_(6, 13)
)
);
}
@Test public void List() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "*irc://a"
, "*irc://b"
),String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li><a href=\"irc://a\" class=\"external text\" rel=\"nofollow\">irc://a</a>"
, " </li>"
, " <li><a href=\"irc://b\" class=\"external text\" rel=\"nofollow\">irc://b</a>"
, " </li>"
, "</ul>"
));
}
@Test public void Defect_reverse_caption_link() { // PURPOSE: bad lnke formatting (caption before link); ] should show up at end, but only [ shows up; PAGE:en.w:Paul Philippoteaux; [caption http://www.americanheritage.com]
fxt.Test_parse_page_wiki_str("[caption irc://a]", "[caption <a href=\"irc://a\" class=\"external text\" rel=\"nofollow\">irc://a</a>]");
}
@Test public void Lnki() { // PURPOSE: trailing lnki should not get absorbed into lnke; DATE:2014-07-11
fxt.Test_parse_page_wiki_str
( "http://a.org[[B]]" // NOTE: [[ should create another lnki
,String_.Concat_lines_nl_skip_last
( "<a href=\"http://a.org\" class=\"external text\" rel=\"nofollow\">http://a.org</a><a href=\"/wiki/B\">B</a>"
));
}
@Test public void Protocol_only() { // PURPOSE: protocol only should return text; DATE:2014-10-09
fxt.Test_parse_page_wiki_str("http://" , "http://");
fxt.Test_parse_page_wiki_str("http:" , "http:");
fxt.Test_parse_page_wiki_str("[http://]" , "[http://]");
fxt.Test_parse_page_wiki_str("[http:]" , "[http:]");
}
@Test public void Ignore_punctuation_at_end() { // PURPOSE: ignore "," and related punctuation at end; DATE:2014-10-09
fxt.Test_parse_page_wiki_str("http://a.org," , "<a href=\"http://a.org\" class=\"external text\" rel=\"nofollow\">http://a.org</a>,"); // basic
fxt.Test_parse_page_wiki_str("http://a.org,," , "<a href=\"http://a.org\" class=\"external text\" rel=\"nofollow\">http://a.org</a>,,"); // many
fxt.Test_parse_page_wiki_str("http://a.org/b,c" , "<a href=\"http://a.org/b,c\" class=\"external text\" rel=\"nofollow\">http://a.org/b,c</a>"); // do not ignore if in middle
fxt.Test_parse_page_wiki_str("http://a.org:" , "<a href=\"http://a.org\" class=\"external text\" rel=\"nofollow\">http://a.org</a>:"); // colon at end; compare to "http:"
}
@Test public void Ignore_punctuation_at_end__paren_end() { // PURPOSE: end parent has special rules; DATE:2014-10-10
fxt.Test_parse_page_wiki_str("(http://a.org)" , "(<a href=\"http://a.org\" class=\"external text\" rel=\"nofollow\">http://a.org</a>)"); // trim=y
fxt.Test_parse_page_wiki_str("http://a.org/b(c)", "<a href=\"http://a.org/b(c)\" class=\"external text\" rel=\"nofollow\">http://a.org/b(c)</a>"); // trim=n
}
@Test public void Sym_quote() { // PURPOSE: quote should interrupt lnke; DATE:2014-10-10
fxt.Test_parse_page_wiki_str("http://a.org/b\"c", "<a href=\"http://a.org/b\" class=\"external text\" rel=\"nofollow\">http://a.org/b</a>&quot;c");
}
}

View File

@@ -0,0 +1,49 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_lnke_wkr_uncommon_tst {
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
@Test public void Err_multiple() {
fxt.Test_parse_page_wiki("[irc://a][irc://b]"
, fxt.tkn_lnke_(0, 9)
, fxt.tkn_lnke_(9, 18)
);
}
@Test public void Err_txt_is_protocol() {
fxt.Test_parse_page_wiki("[irc://a irc://b]"
, fxt.tkn_lnke_(0, 17).Lnke_rng_(1, 8).Subs_(fxt.tkn_txt_(9, 16))
);
}
@Test public void Lnke_should_precede_lnki() { // PURPOSE: [[ should not be interpreted as lnki if [irc is available
fxt.Test_parse_page_wiki("[[irc://a/b c]]"
, fxt.tkn_txt_(0, 1)
, fxt.tkn_lnke_(1, 14).Subs_
( fxt.tkn_txt_(12, 13)
)
, fxt.tkn_txt_(14, 15)
);
}
@Test public void Defect_2nd_consecutive_lnke() { // PURPOSE: bad code that was causing lnkes to show up; PAGE:en.w:Template:Infobox_country;
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "[[http://a.org a]] [[http://b.org b]]"
), String_.Concat_lines_nl_skip_last
( "[<a href=\"http://a.org\" class=\"external text\" rel=\"nofollow\">a</a>] [<a href=\"http://b.org\" class=\"external text\" rel=\"nofollow\">b</a>]"
));
}
}

View File

@@ -0,0 +1,43 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_lnke_wkr_xwiki_tst {
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
@Test public void Xwiki() {
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_u8("en.wikipedia.org"), Bry_.new_u8("en.wikipedia.org"));
fxt.Test_parse_page_wiki_str("[http://en.wikipedia.org/wiki/A a]", "<a href=\"/site/en.wikipedia.org/wiki/A\">a</a>");
}
@Test public void Xwiki_relative() {
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_u8("en.wikipedia.org"), Bry_.new_u8("en.wikipedia.org"));
fxt.Test_parse_page_wiki_str("[//en.wikipedia.org/ a]", "<a href=\"/site/en.wikipedia.org/wiki/\">a</a>");
}
@Test public void Xwiki_qarg() {// DATE:2013-02-02
fxt.Init_xwiki_add_user_("en.wikipedia.org");
fxt.Test_parse_page_wiki_str("http://en.wikipedia.org/wiki/Special:Allpages?from=Earth", "<a href=\"/site/en.wikipedia.org/wiki/Special:Allpages?from=Earth\">http://en.wikipedia.org/wiki/Special:Allpages?from=Earth</a>");
}
@Test public void Lang_prefix() {
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_u8("en.wikipedia.org"), Bry_.new_u8("en.wikipedia.org"));
fxt.Wiki().Xwiki_mgr().Add_full(Bry_.new_a7("fr"), Bry_.new_a7("fr.wikipedia.org"));
fxt.Test_parse_page_wiki_str("[http://en.wikipedia.org/wiki/fr:A a]", "<a href=\"/site/fr.wikipedia.org/wiki/A\">a</a>");
}
@Test public void Xwiki_query_arg() {
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_u8("en.wikipedia.org"), Bry_.new_u8("en.wikipedia.org"));
fxt.Test_parse_page_wiki_str("[http://en.wikipedia.org/wiki/A?action=edit a]", "<a href=\"/site/en.wikipedia.org/wiki/A?action=edit\">a</a>");
}
}

View File

@@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkis.cfgs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
public class Xoc_lnki_cfg implements GfoInvkAble {
public Xoc_lnki_cfg(Xowe_wiki wiki) {xwiki_repo_mgr = new Xoc_xwiki_repo_mgr(wiki);}
public Xoc_xwiki_repo_mgr Xwiki_repo_mgr() {return xwiki_repo_mgr;} private Xoc_xwiki_repo_mgr xwiki_repo_mgr;
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_xwiki_repos)) return xwiki_repo_mgr;
else return GfoInvkAble_.Rv_unhandled;
}
private static final String Invk_xwiki_repos = "xwiki_repos";
}

View File

@@ -0,0 +1,45 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkis.cfgs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
public class Xoc_xwiki_repo_mgr implements GfoInvkAble {
private Ordered_hash hash = Ordered_hash_.new_bry_();
private Xowe_wiki wiki;
public Xoc_xwiki_repo_mgr(Xowe_wiki wiki) {this.wiki = wiki;}
public boolean Has(byte[] abrv) {
Xoc_xwiki_repo_itm itm = (Xoc_xwiki_repo_itm)hash.Get_by(abrv);
return itm != null;
}
public void Add_or_mod(byte[] abrv) {
Xoc_xwiki_repo_itm itm = (Xoc_xwiki_repo_itm)hash.Get_by(abrv);
if (itm == null) {
itm = new Xoc_xwiki_repo_itm(abrv);
hash.Add(abrv, itm);
wiki.Cfg_parser_lnki_xwiki_repos_enabled_(true);
}
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_add)) Add_or_mod(m.ReadBry("xwiki"));
else return GfoInvkAble_.Rv_unhandled;
return this;
}
private static final String Invk_add = "add";
}
class Xoc_xwiki_repo_itm {
public Xoc_xwiki_repo_itm(byte[] abrv) {this.abrv = abrv;}
public byte[] Abrv() {return abrv;} private byte[] abrv;
}

View File

@@ -0,0 +1,121 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkis.redlinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.langs.vnts.*; import gplx.xowa.gui.views.*; import gplx.xowa.pages.*; import gplx.xowa.html.hdumps.core.*;
public class Xog_redlink_mgr implements GfoInvkAble {
private Xog_win_itm win; private Xog_html_itm html_itm; private Xowe_wiki wiki; private Xoae_page page;
private Xopg_redlink_lnki_list redlink_lnki_list; private List_adp lnki_list; private boolean log_enabled; private Gfo_usr_dlg usr_dlg; private int thread_id;
public Xog_redlink_mgr(Xog_win_itm win, Xoae_page page, boolean log_enabled) {
this.win = win; this.page = page; this.wiki = page.Wikie();
this.html_itm = page.Tab_data().Tab().Html_itm(); // NOTE: caching locally b/c page.Tab() is sometimes null
this.redlink_lnki_list = page.Redlink_lnki_list();
this.lnki_list = redlink_lnki_list.Lnki_list();
this.thread_id = redlink_lnki_list.Thread_id();
this.log_enabled = log_enabled; this.usr_dlg = log_enabled ? Gfo_usr_dlg_.I : Gfo_usr_dlg_.Noop;
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_run)) Redlink();
else return GfoInvkAble_.Rv_unhandled;
return this;
} public static final String Invk_run = "run";
public void Redlink() {
synchronized (this) { // NOTE: attempt to eliminate random IndexBounds errors; DATE:2014-09-02
if (redlink_lnki_list.Disabled()) return;
List_adp work_list = List_adp_.new_();
Ordered_hash page_hash = Ordered_hash_.new_bry_();
page_hash.Clear(); // NOTE: do not clear in Page_bgn, else will fail b/c of threading; EX: Open Page -> Preview -> Save; DATE:2013-11-17
work_list.Clear();
int len = lnki_list.Count();
if (log_enabled) usr_dlg.Log_many("", "", "redlink.redlink_bgn: page=~{0} total_links=~{1}", String_.new_u8(page.Ttl().Raw()), len);
for (int i = 0; i < len; i++) { // make a copy of list else thread issues
if (win.Usr_dlg().Canceled()) return;
if (redlink_lnki_list.Thread_id() != thread_id) return;
work_list.Add(lnki_list.Get_at(i));
}
for (int i = 0; i < len; i++) {
if (win.Usr_dlg().Canceled()) return;
if (redlink_lnki_list.Thread_id() != thread_id) return;
Xop_lnki_tkn lnki = (Xop_lnki_tkn)work_list.Get_at(i);
Xoa_ttl ttl = lnki.Ttl();
Xowd_page_itm db_page = new Xowd_page_itm().Ttl_(ttl);
byte[] full_txt = ttl.Full_db();
if (!page_hash.Has(full_txt))
page_hash.Add(full_txt, db_page);
}
int page_len = page_hash.Count();
for (int i = 0; i < page_len; i += Batch_size) {
if (win.Usr_dlg().Canceled()) return;
if (redlink_lnki_list.Thread_id() != thread_id) return;
int end = i + Batch_size;
if (end > page_len) end = page_len;
wiki.Db_mgr().Load_mgr().Load_by_ttls(win.Usr_dlg(), page_hash, Bool_.Y, i, end);
}
int redlink_count = 0;
Bry_bfr bfr = null;
boolean variants_enabled = wiki.Lang().Vnt_mgr().Enabled();
Xol_vnt_mgr vnt_mgr = wiki.Lang().Vnt_mgr();
Xopg_redlink_idx_list redlink_mgr = page.Hdump_data().Redlink_mgr();
for (int j = 0; j < len; j++) {
Xop_lnki_tkn lnki = (Xop_lnki_tkn)work_list.Get_at(j);
byte[] full_db = lnki.Ttl().Full_db();
Xowd_page_itm db_page = (Xowd_page_itm)page_hash.Get_by(full_db);
if (db_page == null) continue; // pages shouldn't be null, but just in case
if (!db_page.Exists()) {
String lnki_id = Xopg_redlink_lnki_list.Lnki_id_prefix + Int_.Xto_str(lnki.Html_uid());
if (variants_enabled) {
Xowd_page_itm vnt_page = vnt_mgr.Convert_ttl(wiki, lnki.Ttl());
if (vnt_page != null) {
Xoa_ttl vnt_ttl = Xoa_ttl.parse_(wiki, lnki.Ttl().Ns().Id(), vnt_page.Ttl_page_db());
html_itm.Html_atr_set(lnki_id, "href", "/wiki/" + String_.new_u8(vnt_ttl.Full_url()));
if (!String_.Eq(vnt_mgr.Html_style(), ""))
html_itm.Html_atr_set(lnki_id, "style", vnt_mgr.Html_style());
continue;
}
}
if (log_enabled) {
if (bfr == null) bfr = Bry_bfr.new_();
bfr.Add_int_variable(lnki.Html_uid()).Add_byte_pipe().Add(Xop_tkn_.Lnki_bgn).Add(full_db).Add(Xop_tkn_.Lnki_end).Add_byte(Byte_ascii.Semic).Add_byte_space();
}
if (win.Usr_dlg().Canceled()) return;
if (redlink_lnki_list.Thread_id() != thread_id) return;
int uid = lnki.Html_uid();
gplx.xowa.files.gui.Js_img_mgr.Update_link_missing(html_itm, Xopg_redlink_lnki_list.Lnki_id_prefix + Int_.Xto_str(uid));
redlink_mgr.Add(uid);
++redlink_count;
}
}
if (log_enabled)
usr_dlg.Log_many("", "", "redlink.redlink_end: redlinks_run=~{0} links=~{1}", redlink_count, bfr == null ? String_.Empty : bfr.Xto_str_and_clear());
}
}
public static final Xog_redlink_mgr Null = new Xog_redlink_mgr(); Xog_redlink_mgr() {}
private static final int Batch_size = 32;
}
class Xog_redlink_wkr {
public static void Redlink(Xog_html_itm html_itm, Int_list list) {
int len = list.Len();
for (int i = 0; i < len; ++i) {
int uid = list.Get_at(i);
Redlink(html_itm, uid);
}
}
public static void Redlink(Xog_html_itm html_itm, int uid) {
gplx.xowa.files.gui.Js_img_mgr.Update_link_missing(html_itm, Xopg_redlink_lnki_list.Lnki_id_prefix + Int_.Xto_str(uid));
}
}

View File

@@ -0,0 +1,32 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkis.redlinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
public class Xopg_redlink_idx_list {
private final Int_list list = new Int_list();
public int Len() {return list.Len();}
public int Max() {return max;} private int max;
public int Get_at(int i) {return list.Get_at(i);}
public void Clear() {
list.Clear();
max = 0;
}
public void Add(int i) {
list.Add(i);
if (i > max) max = i;
}
}

View File

@@ -0,0 +1,50 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkis.redlinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.users.*;
public class Xopg_redlink_lnki_list {
private int lnki_idx = gplx.xowa.html.lnkis.Xoh_lnki_wtr.Lnki_id_min; // NOTE: default to 1, not 0, b/c 0 is ignored by wtr; DATE:2014-10-09
public Xopg_redlink_lnki_list(boolean ttl_is_module) { // never redlink in Module ns; particularly since Lua has multi-line comments for [[ ]]
this.disabled = ttl_is_module;
}
public boolean Disabled() {return disabled;} private final boolean disabled;
public List_adp Lnki_list() {return lnki_list;} private final List_adp lnki_list = List_adp_.new_();
public int Thread_id() {return thread_id;} private int thread_id = 1;
public void Clear() {
if (disabled) return;
lnki_idx = 0; // NOTE: must start at 0, so that ++lnki_idx is > 0; html_wtr checks for > 0
lnki_list.Clear();
thread_id++;
}
public void Lnki_add(Xop_lnki_tkn lnki) {
if (disabled) return;
Xoa_ttl ttl = lnki.Ttl(); if (ttl == null) return; // occurs for invalid links
Xow_ns ns = ttl.Ns();
lnki.Html_uid_(++lnki_idx); // NOTE: set html_id in order html to print out "id='xowa_lnki_1'; want to print out id for consistency's sake, even if these links won't be check for redlinks; DATE:2015-05-07
if ( ns.Id_file_or_media() // ignore files which will usually not be in local wiki (most are in commons), and whose html is built up separately
|| (ns.Id_ctg() && !ttl.ForceLiteralLink()) // ignore ctgs which have their own html builder, unless it is literal; EX: [[:Category:A]]; DATE:2014-02-24
|| ns.Id_special() // ignore special, especially Search; EX: Special:Search/Earth
|| ttl.Anch_bgn() == Xoa_ttl.Anch_bgn_anchor_only // anchor only link; EX: [[#anchor]]
|| ttl.Wik_itm() != null // xwiki lnki; EX: simplewiki links in homewiki; [[simplewiki:Earth]]
)
return;
lnki_list.Add(lnki);
}
public static final String Lnki_id_prefix = "xowa_lnki_";
public static final int Lnki_id_prefix_len = String_.Len(Lnki_id_prefix);
}

View File

@@ -0,0 +1,21 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkis.redlinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
public interface Xopg_redlink_logger {
void Wkr_exec(Xop_ctx ctx, byte[] src, Xop_lnki_tkn lnki, byte lnki_src_tid);
}

View File

@@ -0,0 +1,63 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.logs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.dbs.engines.sqlite.*;
public class Xop_log_basic_tbl {
private Db_stmt stmt_insert;
public Xop_log_basic_tbl(Db_conn conn){this.conn = conn; this.Create_table();}
public Db_conn Conn() {return conn;} private Db_conn conn;
private void Create_table() {Sqlite_engine_.Tbl_create(conn, Tbl_name, Tbl_sql);}
public void Delete() {conn.Exec_qry(Db_qry_delete.new_all_(Tbl_name));}
public void Insert(int log_tid, String log_msg, int log_time, int page_id, String page_ttl, int args_len, String args_str, int src_len, String src_str) {
if (stmt_insert == null) stmt_insert = Db_stmt_.new_insert_(conn, Tbl_name, Fld_log_tid, Fld_log_msg, Fld_log_time, Fld_page_id, Fld_page_ttl, Fld_args_len, Fld_args_str, Fld_src_len, Fld_src_str);
stmt_insert.Clear()
.Val_int(log_tid)
.Val_str(log_msg)
.Val_int(log_time)
.Val_int(page_id)
.Val_str(page_ttl)
.Val_int(args_len)
.Val_str(args_str)
.Val_int(src_len)
.Val_str(src_str)
.Exec_insert();
}
public void Rls() {
stmt_insert.Rls();
}
public static final String Tbl_name = "log_basic_temp"
, Fld_log_tid = "log_tid", Fld_log_msg = "log_msg", Fld_log_time = "log_time"
, Fld_page_id = "page_id", Fld_page_ttl = "page_ttl"
, Fld_args_len = "args_len", Fld_args_str = "args_str"
, Fld_src_len = "src_len", Fld_src_str = "src_str"
;
private static final String Tbl_sql = String_.Concat_lines_nl
( "CREATE TABLE IF NOT EXISTS log_basic_temp"
, "( log_id integer NOT NULL PRIMARY KEY AUTOINCREMENT"
, ", log_tid integer NOT NULL"
, ", log_msg varchar(255) NOT NULL"
, ", log_time integer NOT NULL"
, ", page_id integer NOT NULL"
, ", page_ttl varchar(255) NOT NULL"
, ", args_len integer NOT NULL"
, ", args_str varchar(4096) NOT NULL"
, ", src_len integer NOT NULL"
, ", src_str varchar(4096) NOT NULL"
, ");"
);
}

View File

@@ -0,0 +1,72 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.logs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.dbs.*;
public class Xop_log_basic_wkr implements GfoInvkAble {
private Xop_log_mgr log_mgr; private Xop_log_basic_tbl log_tbl;
private boolean save_page_ttl, save_log_time, save_args_len, save_args_str;
public boolean Save_src_str() {return save_src_str;} public Xop_log_basic_wkr Save_src_str_(boolean v) {save_src_str = v; return this;} private boolean save_src_str;
public Xop_log_basic_wkr(Xop_log_mgr log_mgr, Xop_log_basic_tbl log_tbl) {this.log_mgr = log_mgr; this.log_tbl = log_tbl;}
public boolean Log_bgn(Xoae_page page, byte[] src, Xop_xnde_tkn xnde) {return true;}
public void Log_end_xnde(Xoae_page page, int log_tid, byte[] src, Xop_xnde_tkn xnde_tkn) {
Xop_xatr_itm[] atrs_ary = xnde_tkn.Atrs_ary();
Log_end(page, Null_log_bgn, log_tid, Null_log_msg, src
, xnde_tkn.Src_bgn(), xnde_tkn.Src_end()
, atrs_ary == null ? 0 : atrs_ary.length
, xnde_tkn.Atrs_bgn(), xnde_tkn.Atrs_end()
);
}
public void Log_end(Xoae_page page, long log_bgn, int log_tid, byte[] log_msg, byte[] src, int src_bgn, int src_end, int args_len, int args_bgn, int args_end) {
log_tbl.Insert
( log_tid
, log_msg == Xop_log_basic_wkr.Null_log_msg ? "" : String_.new_u8(log_msg)
, save_log_time ? Env_.TickCount_elapsed_in_frac(log_bgn) : Xop_log_basic_wkr.Null_log_time
, page.Revision_data().Id()
, save_page_ttl ? String_.new_u8(page.Ttl().Full_db()) : Xop_log_basic_wkr.Null_page_ttl
, save_args_len ? args_len : Xop_log_basic_wkr.Null_args_len
, save_args_str ? String_.new_u8(src, args_bgn, args_end) : Xop_log_basic_wkr.Null_args_str
, src_end - src_bgn
, save_src_str ? String_.new_u8(src, src_bgn, src_end) : Xop_log_basic_wkr.Null_src_str
);
log_mgr.Commit_chk();
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_save_page_ttl_)) save_page_ttl = m.ReadYn("v");
else if (ctx.Match(k, Invk_save_log_time_)) save_log_time = m.ReadYn("v");
else if (ctx.Match(k, Invk_save_args_len_)) save_args_len = m.ReadYn("v");
else if (ctx.Match(k, Invk_save_args_str_)) save_args_str = m.ReadYn("v");
else if (ctx.Match(k, Invk_save_src_str_)) save_src_str = m.ReadYn("v");
else return GfoInvkAble_.Rv_unhandled;
return this;
}
private static final String
Invk_save_page_ttl_ = "save_page_ttl_", Invk_save_log_time_ = "save_log_time_"
, Invk_save_args_len_ = "save_args_len_", Invk_save_args_str_ = "save_args_str_", Invk_save_src_str_ = "save_src_str_"
;
public static final Xop_log_basic_wkr Null = null;
public static final int Null_page_id = -1, Null_log_bgn = -1, Null_log_time = -1, Null_args_len = -1, Null_src_len = -1;
public static final String Null_page_ttl = "", Null_args_str = "", Null_src_str = "";
public static final byte[] Null_log_msg = null;
public static final int
Tid_gallery = 1
, Tid_imageMap = 2
, Tid_timeline = 3
, Tid_score = 4
, Tid_hiero = 5
;
}

View File

@@ -0,0 +1,82 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.logs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.dbs.engines.sqlite.*; import gplx.xowa.parsers.logs.*;
import gplx.xowa.xtns.scribunto.*;
public class Xop_log_invoke_wkr implements GfoInvkAble {
private Xop_log_mgr log_mgr;
private Db_conn conn; private Db_stmt stmt;
private boolean log_enabled = true;
private Hash_adp_bry exclude_mod_names = Hash_adp_bry.cs_();
public Scrib_err_filter_mgr Err_filter_mgr() {return err_filter_mgr;} private final Scrib_err_filter_mgr err_filter_mgr = new Scrib_err_filter_mgr();
public Xop_log_invoke_wkr(Xop_log_mgr log_mgr, Db_conn conn) {
this.log_mgr = log_mgr;
this.conn = conn;
if (log_enabled) {
Xop_log_invoke_tbl.Create_table(conn);
stmt = Xop_log_invoke_tbl.Insert_stmt(conn);
}
}
public void Init_reset() {Xop_log_invoke_tbl.Delete(conn);}
public boolean Eval_bgn(Xoae_page page, byte[] mod_name, byte[] fnc_name) {return !exclude_mod_names.Has(mod_name);}
public void Eval_end(Xoae_page page, byte[] mod_name, byte[] fnc_name, long invoke_time_bgn) {
if (log_enabled && stmt != null) {
int eval_time = (int)(Env_.TickCount() - invoke_time_bgn);
Xop_log_invoke_tbl.Insert(stmt, page.Ttl().Rest_txt(), mod_name, fnc_name, eval_time);
log_mgr.Commit_chk();
}
}
private void Exclude_mod_names_add(String[] v) {
int len = v.length;
for (int i = 0; i < len; i++) {
byte[] bry = Bry_.new_u8(v[i]);
exclude_mod_names.Add_bry_bry(bry);
}
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_exclude_mod_names_add)) Exclude_mod_names_add(m.ReadStrAry("v", "|"));
else if (ctx.Match(k, Invk_log_enabled_)) log_enabled = m.ReadYn("v");
else if (ctx.Match(k, Invk_err_filter)) return err_filter_mgr;
else return GfoInvkAble_.Rv_unhandled;
return this;
}
private static final String Invk_exclude_mod_names_add = "exclude_mod_names_add", Invk_log_enabled_ = "log_enabled_", Invk_err_filter = "err_filter";
}
class Xop_log_invoke_tbl {
public static void Create_table(Db_conn conn) {Sqlite_engine_.Tbl_create(conn, Tbl_name, Tbl_sql);}
public static void Delete(Db_conn conn) {conn.Exec_qry(Db_qry_delete.new_all_(Tbl_name));}
public static Db_stmt Insert_stmt(Db_conn conn) {return Db_stmt_.new_insert_(conn, Tbl_name, Fld_invk_page_ttl, Fld_invk_mod_name, Fld_invk_fnc_name, Fld_invk_eval_time);}
public static void Insert(Db_stmt stmt, byte[] page_ttl, byte[] mod_name, byte[] fnc_name, int eval_time) {
stmt.Clear()
.Val_bry_as_str(page_ttl)
.Val_bry_as_str(mod_name)
.Val_bry_as_str(fnc_name)
.Val_int(eval_time)
.Exec_insert();
}
public static final String Tbl_name = "log_invoke_temp", Fld_invk_page_ttl = "invk_page_ttl", Fld_invk_mod_name = "invk_mod_name", Fld_invk_fnc_name = "invk_fnc_name", Fld_invk_eval_time = "invk_eval_time";
private static final String Tbl_sql = String_.Concat_lines_nl
( "CREATE TABLE IF NOT EXISTS log_invoke_temp"
, "( invk_id integer NOT NULL PRIMARY KEY AUTOINCREMENT"
, ", invk_page_ttl varchar(255) NOT NULL"
, ", invk_mod_name varchar(255) NOT NULL"
, ", invk_fnc_name varchar(255) NOT NULL"
, ", invk_eval_time integer NOT NULL"
, ");"
);
}

View File

@@ -0,0 +1,68 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.logs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.dbs.*; import gplx.xowa.bldrs.*;
public class Xop_log_mgr implements GfoInvkAble {
private Db_conn conn;
private Xoae_app app; private Xop_log_basic_tbl log_tbl;
private int exec_count = 0, commit_interval = 1000;
public Xop_log_mgr(Xoae_app app) {this.app = app;}
public Io_url Log_dir() {return log_dir;}
public Xop_log_mgr Log_dir_(Io_url v) {
log_dir = v;
// if (conn != null) { // COMMENTED: need to implement a conn.Renew()
// conn.Rls(); // invalidate conn; note that during build other cmds will bind Conn which will place temp.log in /temp/ dir instead of /wiki/ dir; DATE:2014-04-16
// }
return this;
} private Io_url log_dir;
private Db_conn Conn() {
if (conn == null) {
if (log_dir == null) log_dir = app.Usere().Fsys_mgr().App_temp_dir();
Xob_db_file db_file = Xob_db_file.new__temp_log(log_dir);
conn = db_file.Conn();
}
return conn;
}
public Xop_log_invoke_wkr Make_wkr_invoke() {return new Xop_log_invoke_wkr(this, this.Conn());}
public Xop_log_property_wkr Make_wkr_property() {return new Xop_log_property_wkr(this, this.Conn());}
public Xop_log_basic_wkr Make_wkr() {
if (log_tbl == null)
log_tbl = new Xop_log_basic_tbl(this.Conn());
return new Xop_log_basic_wkr(this, log_tbl);
}
public void Commit_chk() {
++exec_count;
if ((exec_count % commit_interval) == 0)
conn.Txn_sav();
}
public void Delete_all() {
log_tbl.Delete();
}
public void Txn_bgn() {conn.Txn_bgn();}
public void Txn_end() {conn.Txn_end();}
public void Rls() {
if (log_tbl != null) log_tbl.Rls();
if (conn != null) {conn.Rls_conn(); conn = null;}
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_commit_interval_)) commit_interval = m.ReadInt("v");
else return GfoInvkAble_.Rv_unhandled;
return this;
}
private static final String Invk_commit_interval_ = "commit_interval_";
}

View File

@@ -0,0 +1,77 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.logs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.dbs.engines.sqlite.*;
public class Xop_log_property_wkr implements GfoInvkAble {
private Xop_log_mgr log_mgr; private Db_conn conn; private Db_stmt stmt;
private boolean log_enabled = true;
private boolean include_all = true;
private Hash_adp_bry include_props = Hash_adp_bry.cs_();
public Xop_log_property_wkr(Xop_log_mgr log_mgr, Db_conn conn) {
this.log_mgr = log_mgr;
this.conn = conn;
if (log_enabled) {
Xob_log_property_temp_tbl.Create_table(conn);
stmt = Xob_log_property_temp_tbl.Insert_stmt(conn);
}
}
public void Init_reset() {Xob_log_property_temp_tbl.Delete(conn);}
public boolean Eval_bgn(Xoae_page page, byte[] prop) {return include_all || include_props.Has(prop);}
public void Eval_end(Xoae_page page, byte[] prop, long invoke_time_bgn) {
if (log_enabled && stmt != null) {
int eval_time = (int)(Env_.TickCount() - invoke_time_bgn);
Xob_log_property_temp_tbl.Insert(stmt, page.Ttl().Rest_txt(), prop, eval_time);
log_mgr.Commit_chk();
}
}
private void Include_props_add(String[] v) {
int len = v.length;
for (int i = 0; i < len; i++) {
byte[] bry = Bry_.new_u8(v[i]);
include_props.Add_bry_bry(bry);
}
include_all = false; // set include_all to false, since specific items added
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_include_props_add)) Include_props_add(m.ReadStrAry("v", "|"));
else if (ctx.Match(k, Invk_log_enabled_)) log_enabled = m.ReadYn("v");
else return GfoInvkAble_.Rv_unhandled;
return this;
} private static final String Invk_include_props_add = "include_props_add", Invk_log_enabled_ = "log_enabled_";
}
class Xob_log_property_temp_tbl {
public static void Create_table(Db_conn conn) {Sqlite_engine_.Tbl_create(conn, Tbl_name, Tbl_sql);}
public static void Delete(Db_conn conn) {conn.Exec_qry(Db_qry_delete.new_all_(Tbl_name));}
public static Db_stmt Insert_stmt(Db_conn conn) {return Db_stmt_.new_insert_(conn, Tbl_name, Fld_prop_page_ttl, Fld_prop_prop_name, Fld_prop_eval_time);}
public static void Insert(Db_stmt stmt, byte[] page_ttl, byte[] prop_name, int eval_time) {
stmt.Clear()
.Val_bry_as_str(page_ttl)
.Val_bry_as_str(prop_name)
.Val_int(eval_time)
.Exec_insert();
}
public static final String Tbl_name = "log_property_temp", Fld_prop_page_ttl = "prop_page_ttl", Fld_prop_prop_name = "prop_prop_name", Fld_prop_eval_time = "prop_eval_time";
private static final String Tbl_sql = String_.Concat_lines_nl
( "CREATE TABLE IF NOT EXISTS log_property_temp"
, "( prop_id integer NOT NULL PRIMARY KEY AUTOINCREMENT"
, ", prop_page_ttl varchar(255) NOT NULL"
, ", prop_prop_name varchar(255) NOT NULL"
, ", prop_eval_time integer NOT NULL"
, ");"
);
}

View File

@@ -0,0 +1,115 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.tblws.*;
public class Xop_nl_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_nl;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Nl, this);}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
if (bgn_pos == Xop_parser_.Doc_bgn_bos) return ctx.Lxr_make_txt_(cur_pos); // simulated nl at beginning of every parse
int trim_category_pos = Scan_fwd_for_ctg(ctx, src, cur_pos, src_len);
if (trim_category_pos != Bry_.NotFound) { // [[Category]] found after ws
int root_subs_len = root.Subs_len();
if (root_subs_len > 0) {
Xop_tkn_itm tkn = root.Subs_get(root_subs_len - 1);
if (tkn.Tkn_tid() == Xop_tkn_itm_.Tid_eq) {
Xop_eq_tkn eq_tkn = (Xop_eq_tkn)tkn;
if (eq_tkn.Eq_len() > 1) {
Xop_nl_tkn nl_tkn = tkn_mkr.NewLine(bgn_pos, cur_pos, Xop_nl_tkn.Tid_char, 1);
ctx.Subs_add(root, nl_tkn);
}
}
}
return trim_category_pos;
}
Xop_tkn_itm last_tkn = ctx.Stack_get_last(); // BLOCK:invalid_ttl_check
if ( !ctx.Tid_is_image_map()
&& last_tkn != null
&& last_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) {
Xop_lnki_tkn lnki = (Xop_lnki_tkn)last_tkn;
if ( lnki.Pipe_count_is_zero()) { // always invalid
ctx.Stack_pop_last();
return Xop_lnki_wkr_.Invalidate_lnki(ctx, src, root, lnki, bgn_pos);
}
}
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, true); // NOTE: frame should at end at bgn_pos (before \n) not after; else, will create tkn at (5,5), while tkn_mkr.Space creates one at (4,5); DATE:2013-10-31
ctx.Tblw().Cell_pipe_seen_(false); // flip off "|" in tblw seq; EX: "| a\n||" needs to flip off "|" else "||" will be seen as style dlm"; NOTE: not covered by test?
Xop_para_wkr para_wkr = ctx.Para();
switch (ctx.Cur_tkn_tid()) {
case Xop_tkn_itm_.Tid_hdr: // last tkn was hdr; close it; EX: \n==a==\nb; "\n" should close 2nd "=="; DATE:2014-02-17
int acs_pos = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_hdr);
ctx.Stack_pop_til(root, src, acs_pos, true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_newLine);
para_wkr.Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_h2);
break;
case Xop_tkn_itm_.Tid_list: // close list
Xop_list_wkr_.Close_list_if_present(ctx, root, src, bgn_pos, cur_pos);
para_wkr.Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_li);
break;
case Xop_tkn_itm_.Tid_lnke: // close lnke
if (ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tmpl_invk) == -1) // only close if no tmpl; MWR: [[SHA-2]]; * {{cite journal|title=Proposed
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_lnke), true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_newLine);
break;
case Xop_tkn_itm_.Tid_lnki: // NOTE: \n in caption or other multipart lnki; don't call para_wkr.Process
Xop_tkn_itm nl_tkn = tkn_mkr.Space(root, bgn_pos, cur_pos); // convert \n to \s. may result in multiple \s, but rely on htmlViewer to suppress; EX: w:Schwarzschild_radius; and the stellar [[Velocity dispersion|velocity\ndispersion]];
ctx.Subs_add(root, nl_tkn);
return cur_pos;
// case Xop_tkn_itm_.Tid_tblw_tc: case Xop_tkn_itm_.Tid_tblw_td: // STUB: tc/td should not have attributes
case Xop_tkn_itm_.Tid_tblw_tb: case Xop_tkn_itm_.Tid_tblw_tr: case Xop_tkn_itm_.Tid_tblw_th: // nl should close previous tblw's atrs range; EX {{Infobox planet}} and |-\n<tr>
Xop_tblw_wkr.Atrs_close(ctx, src, root, Bool_.N);
break;
}
if ( ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki // parse_mode is wiki
&& para_wkr.Enabled() // check that para is enabled
)
para_wkr.Process_nl(ctx, root, src, bgn_pos, cur_pos);
else { // parse mode is tmpl, or para is disabled; for latter, adding \n for pretty-print
Xop_nl_tkn nl_tkn = tkn_mkr.NewLine(bgn_pos, cur_pos, Xop_nl_tkn.Tid_char, 1);
ctx.Subs_add(root, nl_tkn);
}
return cur_pos;
}
public static int Scan_fwd_for_ctg(Xop_ctx ctx, byte[] src, int cur_pos, int src_len) {
for (int i = cur_pos; i < src_len; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: // ignore ws
break;
case Byte_ascii.Brack_bgn: // [
if ( Bry_.Eq_itm(src, src_len, i + 1, Byte_ascii.Brack_bgn) // [[
&& i + 2 < src_len) {
int ttl_bgn = Bry_finder.Find_fwd_while(src, i + 2, src_len, Byte_ascii.Space);
Btrie_slim_mgr ctg_trie = ctx.Wiki().Ns_mgr().Category_trie();
Object ctg_ns = ctg_trie.Match_bgn(src, ttl_bgn, src_len);
if (ctg_ns != null // "[[Category" found
&& Bry_.Eq_itm(src, src_len, ctg_trie.Match_pos(), Byte_ascii.Colon)) { // check that next char is :
return i;// return pos of 1st [
}
return Bry_.NotFound;
}
break;
default: // non-ws; return not found
return Bry_.NotFound;
}
}
return Bry_.NotFound;
}
public static final Xop_nl_lxr _ = new Xop_nl_lxr(); Xop_nl_lxr() {}
}

View File

@@ -0,0 +1,51 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.xowa.parsers.tblws.*;
public class Xop_nl_tab_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_nl_tab;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Hook_nl_tab, this);} private static final byte[] Hook_nl_tab = new byte[] {Byte_ascii.Nl, Byte_ascii.Tab};
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
int non_ws_pos = Bry_finder.Find_fwd_while_space_or_tab(src, cur_pos, src_len);
if (non_ws_pos < src_len) { // bounds check
Btrie_slim_mgr tblw_trie = ctx.App().Utl_trie_tblw_ws();
Object tblw_obj = tblw_trie.Match_bgn(src, non_ws_pos, src_len);
if (tblw_obj != null) {
Xop_tblw_ws_itm tblw_itm = (Xop_tblw_ws_itm)tblw_obj;
byte itm_type = tblw_itm.Tblw_type();
switch (itm_type) {
case Xop_tblw_ws_itm.Type_nl: // ignore nl
case Xop_tblw_ws_itm.Type_xnde: // ignore xnde
break;
default: { // handle tblw
int tblw_rv = ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, non_ws_pos + tblw_itm.Hook_len(), false, itm_type, Xop_tblw_wkr.Called_from_pre, -1, -1);
if (tblw_rv != -1) // \n\s| is valid tblw tkn and processed; otherwise fall through;
return tblw_rv;
break;
}
}
}
}
if (bgn_pos != Xop_parser_.Doc_bgn_bos) // don't add \n if BOS; EX: "<BOS> a" should be " ", not "\n "
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos + 1, Xop_nl_tkn.Tid_char, 1));
ctx.Subs_add(root, tkn_mkr.Tab(cur_pos - 1, cur_pos));
return cur_pos;
}
public static final Xop_nl_tab_lxr _ = new Xop_nl_tab_lxr(); Xop_nl_tab_lxr() {}
}

View File

@@ -0,0 +1,65 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_nl_tab_lxr_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void teardown() {fxt.Init_para_n_();}
@Test public void Basic() { // PURPOSE: \n\t|- should be recognized as tblw; EX:zh.v:西安; DATE:2014-05-06
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl
( "{|"
, "\t|-"
, "|a"
, "|}"
), String_.Concat_lines_nl
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void Ws() { // PURPOSE: \n\t|- should be recognized as tblw; EX:zh.v:西安; DATE:2014-05-06
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl
( "{|"
, "\t |-" // \t
, "|a"
, "|}"
), String_.Concat_lines_nl
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void Ignore() {// PURPOSE: \n\t should not be pre; EX:pl.w:Main_Page; DATE:2014-05-06
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "a"
, "\t b"
, "c"
), String_.Concat_lines_nl_skip_last
( "<p>a"
, "\t b"
, "c"
, "</p>"
));
}
}

View File

@@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_nl_tkn extends Xop_tkn_itm_base {
public Xop_nl_tkn(int bgn, int end, byte nl_tid, int nl_len) {
this.Tkn_ini_pos(false, bgn, end);
this.nl_tid = nl_tid;
}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_newLine;}
public byte Nl_tid() {return nl_tid;} private byte nl_tid = Xop_nl_tkn.Tid_unknown;
public static final byte Tid_unknown = 0, Tid_char = 1, Tid_hdr = 2, Tid_hr = 3, Tid_list = 4, Tid_tblw = 5, Tid_file = 6;
}

View File

@@ -0,0 +1,31 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_para_tkn extends Xop_tkn_itm_base {
public Xop_para_tkn(int pos) {this.Tkn_ini_pos(false, pos, pos);}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_para;}
public byte Para_end() {return para_end;} public Xop_para_tkn Para_end_(byte v) {para_end = v; return this;} private byte para_end = Tid_none;
public byte Para_bgn() {return para_bgn;} public Xop_para_tkn Para_bgn_(byte v) {para_bgn = v; return this;} private byte para_bgn = Tid_none;
public int Space_bgn() {return space_bgn;} public Xop_para_tkn Space_bgn_(int v) {space_bgn = v; return this;} private int space_bgn = 0;
public boolean Nl_bgn() {return nl_bgn;} public Xop_para_tkn Nl_bgn_y_() {nl_bgn = true; return this;} private boolean nl_bgn;
public static final byte
Tid_none = 0 //
, Tid_para = 1 // </p>
, Tid_pre = 2 // </pre>
;
}

View File

@@ -0,0 +1,344 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.parsers.tblws.*; import gplx.core.btries.*;
public class Xop_para_wkr implements Xop_ctx_wkr {
private boolean para_enabled;
private byte cur_mode;
private int para_stack;
private boolean in_block, block_is_bgn_xnde, block_is_end_xnde, in_blockquote, block_is_bgn_blockquote, block_is_end_blockquote;
private int prv_nl_pos; private Xop_para_tkn prv_para; private int prv_ws_bgn;
public boolean Enabled() {return enabled;} public Xop_para_wkr Enabled_(boolean v) {enabled = v; return this;} private boolean enabled = true;
public Xop_para_wkr Enabled_y_() {enabled = true; return this;} public Xop_para_wkr Enabled_n_() {enabled = false; return this;}
public void Ctor_ctx(Xop_ctx ctx) {}
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {
this.Clear();
para_enabled = enabled && ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki; // only enable for wikitext (not for template)
if (para_enabled)
Prv_para_new(ctx, root, -1, 0); // create <para> at bos
}
private void Clear() {
cur_mode = Mode_none;
para_stack = Para_stack_none;
in_block = block_is_bgn_xnde = block_is_end_xnde = false;
in_blockquote = block_is_bgn_blockquote = block_is_end_blockquote = false;
prv_nl_pos = -1;
prv_para = null;
prv_ws_bgn = 0;
}
public void AutoClose(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {
if (para_enabled) {
Process_nl(ctx, root, src, src_len, src_len);
this.Prv_para_end(); // close anything created by Process_nl()
}
this.Clear();
}
public void Process_block__bgn_y__end_n(Xop_xnde_tag tag) {Process_block(tag, Bool_.Y, Bool_.N);} // NOTE: disables para for rest of page; Process_block__bgn_n__end_y must be called; DATE:2014-04-18
public void Process_block__bgn_n__end_y(Xop_xnde_tag tag) {Process_block(tag, Bool_.N, Bool_.Y);}
public void Process_block__xnde(Xop_xnde_tag tag, byte mode) {
if (mode == Xop_xnde_tag.Block_bgn) Process_block(tag, Bool_.Y, Bool_.N);
else if (mode == Xop_xnde_tag.Block_end) Process_block(tag, Bool_.N, Bool_.Y);
}
public void Process_block_lnki_div() { // bgn_lhs is pos of [[; end_lhs is pos of ]]
if (prv_ws_bgn > 0) // if pre at start of line; ignore it b/c of div; EX: "\n\s[[File:A.png|thumb]]" should not produce thumb; also [[File:A.png|right]]; DATE:2014-02-17
prv_ws_bgn = 0;
this.Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_div);
}
private void Process_block(Xop_xnde_tag tag, boolean bgn, boolean end) {
if (prv_ws_bgn > 0) {
prv_para.Space_bgn_(prv_ws_bgn);
prv_ws_bgn = 0;
}
block_is_bgn_xnde = bgn;
block_is_end_xnde = end;
switch (tag.Id()) {
case Xop_xnde_tag_.Tid_blockquote:
if (bgn) block_is_bgn_blockquote = true;
if (end) block_is_end_blockquote = true;
break;
}
}
public void Process_block__bgn__nl_w_symbol(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int bgn_pos, int cur_pos, Xop_xnde_tag tag) {// handle \n== and \n* \n{|; note that nl is at rng of bgn_pos to bgn_pos + 1 (not cur_pos)
if (!para_enabled) return;
Process_nl(ctx, root, src, bgn_pos, bgn_pos + 1);
Process_block__bgn_y__end_n(tag);
}
public void Process_nl(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int bgn_pos, int cur_pos) {// REF.MW:Parser.php|doBlockLevels
Dd_clear(ctx);
if (block_is_bgn_xnde || block_is_end_xnde) {
para_stack = Para_stack_none; // MW: $paragraphStack = false;
Prv_para_end(); // MW: $output .= $this->closeParagraph()
if (block_is_bgn_blockquote && !block_is_end_blockquote) // MW: if ( $preOpenMatch and !$preCloseMatch )
in_blockquote = true; // MW: $this->mInPre = true;
else
in_blockquote = false; // XO: turn off blockquote else following para / nl won't work; w:Snappy_(software); DATE:2014-04-25
in_block = !block_is_end_xnde; // MW: $inBlockElem = !$closematch;
}
else if (!in_block && !in_blockquote) { // MW: elseif ( !$inBlockElem && !$this->mInPre ) {
boolean line_is_ws = Line_is_ws(src, bgn_pos);
if (prv_ws_bgn > 0 && (cur_mode == Mode_pre || !line_is_ws)) { // MW: if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) ) {
if (cur_mode != Mode_pre) { // MW: if ( $this->mLastSection !== 'pre' ) {
para_stack = Para_stack_none; // MW: $paragraphStack = false;
prv_para.Space_bgn_(prv_ws_bgn - 1); // -1 to ignore 1st "\s" in "\n\s"; note that prv_ws_bgn only includes spaces, so BOS doesn't matter; DATE:2014-04-14
Prv_para_end(); Prv_para_bgn(Xop_para_tkn.Tid_pre); // MW: $output .= $this->closeParagraph() . '<pre>';
cur_mode = Mode_pre; // MW: $this->mLastSection = 'pre';
}
else { // already in pre
if (line_is_ws) { // line is entirely ws
int next_char_pos = prv_nl_pos + 2; // "\n\s".length
if ( next_char_pos < src.length // bounds check
&& src[next_char_pos] == Byte_ascii.Nl // is "\n\s\n"; i.e.: "\n" only
) {
ctx.Subs_add(root, ctx.Tkn_mkr().Bry_raw(bgn_pos, bgn_pos, Byte_ascii.Nl_bry)); // add a "\n" tkn; note that adding a NewLine tkn doesn't work, b/c Xoh_html_wtr has code to remove consecutive \n; PAGE:en.w:Preferred_numbers DATE:2014-06-24
prv_nl_pos = bgn_pos;
}
}
}
prv_ws_bgn = 0; // MW: $t = substr( $t, 1 );
}
else {
if (bgn_pos - prv_nl_pos == 1 || line_is_ws) { // line is blank ("b" for blank) MW: if ( trim( $t ) === '' ) {
if (para_stack != Para_stack_none) { // "b1"; stack has "<p>" or "</p><p>"; output "<br/>"; MW: if ( $paragraphStack ) {
Para_stack_end(cur_pos); Add_br(ctx, root, bgn_pos); // MW: $output .= $paragraphStack . '<br />';
para_stack = Para_stack_none; // MW: $paragraphStack = false;
cur_mode = Mode_para; // MW: $this->mLastSection = 'p';
}
else { // stack is empty
if (cur_mode != Mode_para) { // "b2"; cur is '' or <pre> MW: if ( $this->mLastSection !== 'p' ) {
Prv_para_end(); // MW: $output .= $this->closeParagraph();
cur_mode = Mode_none; // MW: $this->mLastSection = '';
para_stack = Para_stack_bgn; // put <p> on stack MW: $paragraphStack = '<p>';
}
else // "b3"; cur is p
para_stack = Para_stack_mid; // put </p><p> on stack MW: $paragraphStack = '</p><p>';
}
}
else { // line has text ("t" for text); NOTE: tkn already added before \n, so must change prv_para; EX: "a\n" -> this code is called for "\n" but "a" already processed
if (para_stack != Para_stack_none) { // "t1" MW: if ( $paragraphStack ) {
Para_stack_end(cur_pos); // MW: $output .= $paragraphStack;
para_stack = Para_stack_none; // MW: $paragraphStack = false;
cur_mode = Mode_para; // MW: $this->mLastSection = 'p';
}
else if (cur_mode != Mode_para) { // "t2"; cur is '' or <pre> MW: elseif ( $this->mLastSection !== 'p' ) {
Prv_para_end(); Prv_para_bgn(Xop_para_tkn.Tid_para); // MW: $output .= $this->closeParagraph() . '<p>';
cur_mode = Mode_para; // MW: $this->mLastSection = 'p';
}
else {} // "t3"
}
}
}
if (in_blockquote && prv_ws_bgn > 0) // handle blockquote separate; EX: <blockquote>\n\sa\n</blockquote>; note that "\s" needs to be added literally; MW doesn't have this logic specifically, since it assumes all characters go into $output, whereas XO, sets aside the "\s" in "\n\s" separately
prv_para.Space_bgn_(prv_ws_bgn);
prv_ws_bgn = 0; // nl encountered and processed; always prv_ws_bgn set to 0, else ws from one line will carry over to next
// in_blockquote = false;
block_is_bgn_xnde = block_is_end_xnde = false;
// if ( $preCloseMatch && $this->mInPre )
// $this->mInPre = false;
// prv_ws_bgn = false;
Prv_para_new(ctx, root, bgn_pos, cur_pos); // add a prv_para placeholder
if (para_stack == Para_stack_none) // "x1" MW: if ( $paragraphStack === false ) {
if (prv_para != null) prv_para.Nl_bgn_y_(); // add nl; note that "$t" has already been processed; MW: $output .= $t . "\n";
}
public int Process_pre(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int txt_pos) {
Dd_clear(ctx);
Btrie_slim_mgr tblw_ws_trie = ctx.App().Utl_trie_tblw_ws();
Object o = tblw_ws_trie.Match_bgn(src, txt_pos, src_len);
if (o != null) { // tblw_ws found
Xop_tblw_ws_itm ws_itm = (Xop_tblw_ws_itm)o;
byte tblw_type = ws_itm.Tblw_type();
switch (tblw_type) {
case Xop_tblw_ws_itm.Type_nl: // \n\s
if (cur_mode == Mode_pre) { // already in pre; just process "\n\s"
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos, Xop_nl_tkn.Tid_char, 1));
prv_nl_pos = bgn_pos; // NOTE: must update prv_nl_pos; PAGE:en.w:Preferred_number DATE:2014-06-24
return txt_pos;
}
break;
case Xop_tblw_ws_itm.Type_xnde:
int nxt_pos = tblw_ws_trie.Match_pos();
if (nxt_pos < src_len) { // bounds check
switch (src[nxt_pos]) { // check that next char is "end" of xnde name; guard against false matches like "<trk" PAGE:de.v:Via_Jutlandica/Gpx DATE:2014-11-29
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // whitespace
case Byte_ascii.Slash: case Byte_ascii.Gt: // end node
case Byte_ascii.Quote: case Byte_ascii.Apos: // quotes
if (bgn_pos != Xop_parser_.Doc_bgn_bos)
ctx.Para().Process_nl(ctx, root, src, bgn_pos, cur_pos);
return ctx.Xnde().Make_tkn(ctx, tkn_mkr, root, src, src_len, txt_pos, txt_pos + 1);
}
}
break;
default: {
int tblw_rv = ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, txt_pos + ws_itm.Hook_len(), false, tblw_type, Xop_tblw_wkr.Called_from_pre, -1, -1);
if (tblw_rv != -1) // \n\s| is valid tblw tkn and processed; otherwise process pre-code below; EX:w:Wikipedia:WikiProject_History/CategoryExample; DATE:2014-04-14
return tblw_rv;
break;
}
}
}
// NOTE: pre lxr emulates MW for "\n\s" by (1) calling Process nl for "\n"; (2) anticipating next line by setting prv_ws_bgn
// EX: "\na\n b\n"; note that "\n " is cur
if (bgn_pos != Xop_parser_.Doc_bgn_bos) // if bos, then don't close 1st para
Process_nl(ctx, root, src, bgn_pos, bgn_pos + 1); // note that tkn is \n\s; so, bgn_pos -> bgn_pos + 1 is \n ...
if (cur_mode == Mode_pre) // in pre_mode
ctx.Subs_add(root, tkn_mkr.Space(root, cur_pos, txt_pos)); // cur_pos to start after \s; do not capture "\s" in "\n\s"; (not sure why not before \s)
prv_ws_bgn = txt_pos - cur_pos + 1;
return txt_pos;
}
public void Process_lnki_category(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int pos, int src_len) { // REF.MW:Parser.php|replaceInternalLinks2|Strip the whitespace Category links produce;
if (!para_enabled) return;
int subs_len = root.Subs_len();
for (int i = subs_len - 2; i > -1; i--) { // -2: -1 b/c subs_len is invalid; -1 to skip current lnki
Xop_tkn_itm sub_tkn = root.Subs_get(i);
switch (sub_tkn.Tkn_tid()) {
case Xop_tkn_itm_.Tid_para: // nl found; note this means that BOL -> [[Category:]] is all ws;
if (prv_ws_bgn > 0) { // line begins with ws a
if (sub_tkn.Src_bgn() != 0) // do not ignore BOS para; needed b/c it is often <p>; needed for test;
sub_tkn.Ignore_y_(); // ignore nl (pretty-printing only)
prv_ws_bgn = 0; // remove ws
if (ctx.Stack_has(Xop_tkn_itm_.Tid_list)){ // HACK: if in list, set prv_nl_pos to EOL; only here for one test to pass
int nl_at_eol = -1;
for (int j = pos; j < src_len; j++) { // check if rest of line is ws
byte b = src[j];
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Tab: break; // ignore space / tab
case Byte_ascii.Nl:
nl_at_eol = j;
j = src_len;
break;
default: // something else besides ws; stop
j = src_len;
break;
}
if (nl_at_eol != -1)
prv_nl_pos = nl_at_eol + 1; // SEE:NOTE_2
}
}
}
return;
default: // exit if anything except para / nl in front of [[Category:]]
i = -1;
break;
}
}
// if (para_found) // BOS exit; just remove prv_ws_bgn
prv_ws_bgn = 0;
}
private void Prv_para_new(Xop_ctx ctx, Xop_root_tkn root, int prv_nl_pos, int para_pos) {
this.prv_nl_pos = prv_nl_pos;
prv_para = ctx.Tkn_mkr().Para(para_pos);
ctx.Subs_add(root, prv_para);
}
private void Prv_para_end() { // MW: closeParagraph();
// following switch is equivalent to:
// MW: if ( $this->mLastSection != '' )
// MW: $result = '</' . $this->mLastSection . ">\n";
switch (cur_mode) {
case Mode_none: return;
case Mode_pre: prv_para.Para_end_(Xop_para_tkn.Tid_pre); break;
case Mode_para: prv_para.Para_end_(Xop_para_tkn.Tid_para); break;
}
// in_pre = false; // MW: $this->mInPre = false;
cur_mode = Mode_none; // MW: $this->mLastSection = '';
}
private void Prv_para_bgn(byte mode) {
if (prv_para != null) prv_para.Para_bgn_(mode);
}
private void Para_stack_end(int cur_pos) { // MW: $output .= $paragraphStack;
switch (para_stack) {
case Para_stack_none: break;
case Para_stack_bgn: prv_para.Para_end_(Xop_para_tkn.Tid_none).Para_bgn_(Xop_para_tkn.Tid_para); break; // '<p>'
case Para_stack_mid: prv_para.Para_end_(Xop_para_tkn.Tid_para).Para_bgn_(Xop_para_tkn.Tid_para); break; // '</p><p>'
}
}
private void Add_br(Xop_ctx ctx, Xop_root_tkn root, int bgn_pos) {
ctx.Subs_add(root, ctx.Tkn_mkr().Xnde(bgn_pos, bgn_pos).Tag_(Xop_xnde_tag_.Tag_br));
}
private boolean Line_is_ws(byte[] src, int pos) {
if (prv_nl_pos == -1) return false;
boolean ws = true;
for (int i = prv_nl_pos + 1; i < pos; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Tab:
case Byte_ascii.Space:
break;
default:
ws = false;
i = pos;
break;
}
}
return ws;
}
private void Dd_clear(Xop_ctx ctx) {ctx.List().Dd_chk_(false);}
private static final int
Para_stack_none = 0 // false
, Para_stack_bgn = 1 // <p>
, Para_stack_mid = 2 // </p><p>
;
private static final byte
Mode_none = 0 // ''
, Mode_para = 1 // p
, Mode_pre = 2 // pre
;
}
/*
NOTE_1:
xowa uses \n as the leading character for multi-character hooks; EX: "\n*","\n{|","\n==",etc..
For this section of code, xowa treats \n separately from the rest of the hook for the purpose of emulating MW code.
EX: a\n==b==
MW:
- split into two lines: "a", "==b=="
- call process_nl on "a"
- call process_nl on "==b=="
XO:
- split into "tkns": "a", "\n==", "b", "=="
- add "a"
- add "\n=="
- since there is a "\n", call process_nl, which will effectively call it for "a"
- note that page_end will effectively call process_nl on "==b=="
NOTE_2: Category needs to "trim" previous line
EX:
* a
* b
[[Category:c]]
* d
MW does the following: (REF.MW:Parser.php|replaceInternalLinks2|Strip the whitespace Category links produce;)
- removes the \n after b (REF: $s = rtrim( $s . "\n" ); # bug 87)
- trims all space " " in front of [[ (NOTE: this makes it a non-pre line)
- plucks out the [[Category:c]]
- joins everything after ]] (starting with the \n) to the * b (REF: $s .= trim( $prefix . $trail, "\n" ) == '' ? '': $prefix . $trail;)
This effectively "blanks" out the entire line "\n [[Category:c]]" -> ""
XOWA tries to emulate this by doing the following
- mark the para_tkn after \b as blank
- disable pre for the line
- keep the [[Category:c]], but *simulate* a blank line by moving the prv_nl_pos to after the ]]
NOTE_3: if (last_section_is_pre)
PURPOSE: if Category trims previous nl, but nl was part of pre, deactivate it
REASON: occurs b/c MW does separate passes for pre and Category while XO does one pass.
EX: "a\n [[Category:c]]"
- pre is activated by \n\s
- [[Category:c]] indicates that \n\s should be trimmed
so, disable_pre, etc.
*/

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,109 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_para_wkr_para_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void teardown() {fxt.Init_para_n_();}
@Test public void Pre_then_xnde_pre() { // PURPOSE: if ws_pre is in effect, xnde_pre should end it; EX: b:Knowing Knoppix/Other applications
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( " a"
, "b<pre>c"
, "d</pre>"
, "e"
), String_.Concat_lines_nl_skip_last
( "<pre>a"
, "</pre>"
, "b<pre>c"
, "d</pre>"
, ""
, "<p>e"
, "</p>"
, ""
));
}
@Test public void List_ignore_pre_lines() { // PURPOSE: "\s\n" should create new list; was continuing previous list; DATE:2013-07-12
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( ": a"
, ":* b"
, " "
, ": c"
, ":* d"
)
, String_.Concat_lines_nl_skip_last
( "<dl>"
, " <dd> a"
, ""
, " <ul>"
, " <li> b"
, " </li>"
, " </ul>"
, " </dd>"
, "</dl>"
, ""
, "<dl>"
, " <dd> c"
, ""
, " <ul>"
, " <li> d"
, " </li>"
, " </ul>"
, " </dd>"
, "</dl>"
, ""
));
}
@Test public void Multiple_nl_in_tblx() { // PURPOSE: "\n\n\n" was causing multiple breaks; EX:fr.w:Portail:G<>nie m<>canique; DATE:2014-02-17
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( "<table><tr><td>a"
, "</td>"
, ""
, ""
, ""
, ""
, ""
, "</tr></table>"
)
, String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Ignore_cr() { // PURPOSE: handle "\r\n"; EX: Special:MovePage; DATE:2014-03-02
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( "a\r"
, "\r"
, "b\r"
)
, String_.Concat_lines_nl_skip_last
( "<p>a"
, "</p>"
, ""
, "<p>b"
, "</p>"
, ""
)
);
}
}

View File

@@ -0,0 +1,258 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_para_wkr_pre_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void teardown() {fxt.Init_para_n_();}
@Test public void Pre_ignore_bos() { // PURPOSE: ignore pre at bgn; DATE:2013-07-09
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( " "
, "b"
), String_.Concat_lines_nl
( "<p>"
, "b"
, "</p>"
));
}
@Test public void Pre_ignore_bos_tblw() { // PURPOSE: ignore pre at bgn shouldn't break tblw; EX:commons.wikimedia.org; DATE:2013-07-11
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( " "
, "{|"
, "|-"
, "|a"
, "|}"
), String_.Concat_lines_nl
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void Ignore_bos_xnde() { // PURPOSE: space at bgn shouldn't create pre; EX:commons.wikimedia.org; " <center>a\n</center>"; DATE:2013-11-28
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( " <center>a" // NOTE: leading " " matches MW; DATE:2014-06-23
, "</center>"
), String_.Concat_lines_nl_skip_last
( " <center>a"
, "</center>"
, ""
));
}
@Test public void Ignore_pre_in_gallery() {// PURPOSE: pre in gallery should be ignored; EX:uk.w:EP2; DATE:2014-03-11
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.Y_byte;
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "a"
, ""
, " <gallery>"
, " File:A.png"
, " </gallery>"
), String_.Concat_lines_nl_skip_last
( "<p>a"
, "</p>"
, " <ul id=\"xowa_gallery_ul_0\" class=\"gallery mw-gallery-traditional\">"
, " <li id=\"xowa_gallery_li_0\" class=\"gallerybox\" style=\"width: 155px\">"
, " <div style=\"width: 155px\">"
, " <div class=\"thumb\" style=\"width: 150px;\">"
, " <div style=\"margin:15px auto;\">"
, " <a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\"A.png\" src=\"file:///mem/wiki/repo/trg/thumb/7/0/A.png/120px.png\" width=\"120\" height=\"120\" /></a>"
, " </div>"
, " </div>"
, " <div class=\"gallerytext\">"
, " </div>"
, " </div>"
, " </li>"
, "</ul>"
,""
));
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.N_byte;
}
@Test public void Pre_xnde_gallery() { // PURPOSE: <gallery> should invalidate pre; EX: en.w:Mary, Queen of Scots
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.Y_byte;
fxt.Wiki().Xtn_mgr().Init_by_wiki(fxt.Wiki());
String raw = String_.Concat_lines_nl_skip_last
( " <gallery>"
, "File:A.png|b"
, "</gallery>"
);
fxt.Test_parse_page_wiki_str(raw, String_.Concat_lines_nl_skip_last
( " <ul id=\"xowa_gallery_ul_0\" class=\"gallery mw-gallery-traditional\">" // NOTE: leading " " matches MW; DATE:2014-06-23
, " <li id=\"xowa_gallery_li_0\" class=\"gallerybox\" style=\"width: 155px\">"
, " <div style=\"width: 155px\">"
, " <div class=\"thumb\" style=\"width: 150px;\">"
, " <div style=\"margin:15px auto;\">"
, " <a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\"\" src=\"file:///mem/wiki/repo/trg/thumb/7/0/A.png/120px.png\" width=\"120\" height=\"120\" /></a>"
, " </div>"
, " </div>"
, " <div class=\"gallerytext\"><p>b"
, "</p>"
, ""
, " </div>"
, " </div>"
, " </li>"
, "</ul>"
));
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.N_byte;
}
@Test public void Ignore_pre_in_center() {// PURPOSE: pre in gallery should be ignored; EX:uk.w:EP2; DATE:2014-03-11
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "a"
, " <center>b"
, " </center>"
, "d"
), String_.Concat_lines_nl_skip_last
( "<p>a"
, "</p>"
, " <center>b"
, " </center>"
, ""
, "<p>d"
, "</p>"
)
);
}
@Test public void Remove_only_1st_space() { // PURPOSE: pre should only remove 1st space]; EX: w:Wikipedia:WikiProject_History/CategoryExample; DATE:2014-04-14
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( " a"
, " b"
, " c"
), String_.Concat_lines_nl_skip_last
( "<pre> a"
, " b"
, " c"
, "</pre>"
)
);
}
@Test public void Remove_only_1st_space__bos() { // PURPOSE: similar to above but check that pre at \n\s is indented correctly; DATE:2014-04-14
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( ""
, " a"
, " b"
), String_.Concat_lines_nl_skip_last
( ""
, "<pre> a"
, " b"
, "</pre>"
)
);
}
@Test public void Ignore_tblw_td() {// PURPOSE: \n\s| should continue pre; EX:w:Wikipedia:WikiProject_History/CategoryExample; DATE:2014-04-14
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( " a"
, " |"
, " b"
), String_.Concat_lines_nl_skip_last
( "<pre>a"
, "|"
, "b"
, "</pre>"
)
);
}
@Test public void Tab() { // PURPOSE: tab inside pre was being converted to space; PAGE:en.w:Cascading_Style_Sheets DATE:2014-06-23
fxt.Test_html_full_str
( " \ta"
, String_.Concat_lines_nl
( "<pre>\ta"
, "</pre>"
));
}
@Test public void Style() { // PURPOSE: " <style>" was not being put in pre; PAGE:en.w:Cascading_Style_Sheets DATE:2014-06-23
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl
( " <style>"
, " </style>"
), String_.Concat_lines_nl
( "<pre>&lt;style>"
, "&lt;/style>"
, "</pre>"
));
}
@Test public void Nl_only() { // PURPOSE: wiki_pre with \n only was being dropped; PAGE:en.w:Preferred_number DATE:2014-06-24
fxt.Test_html_full_str(String_.Concat_lines_nl_skip_last
( " a"
, " " // was being dropped
, " b"
), String_.Concat_lines_nl
( "<pre>a"
, "" // make sure it's still there
, "b"
, "</pre>"
));
}
@Test public void Nl_w_ws() { // PURPOSE: based on Nl_only; make sure that 1 or more spaces does not add extra \n; PAGE:en.w:Preferred_number DATE:2014-06-24
fxt.Test_html_full_str(String_.Concat_lines_nl_skip_last
( " a"
, " " // 2 spaces
, " b"
), String_.Concat_lines_nl
( "<pre>a"
, " " // 1 space
, "b"
, "</pre>"
));
}
@Test public void Nl_many() { // PURPOSE: handle alternating \n\s; PAGE:en.w:Preferred_number DATE:2014-06-24
fxt.Test_html_full_str(String_.Concat_lines_nl_skip_last
( " a"
, " "
, " b"
, " "
, " c"
), String_.Concat_lines_nl
( "<pre>a"
, ""
, "b"
, ""
, "c"
, "</pre>"
));
}
@Test public void Source() { // PURPOSE: " <source>" in pre has issues; PAGE:en.w:Comment_(computer_programming) DATE:2014-06-23
fxt.Init_para_y_();
fxt.Test_html_wiki_str(String_.Concat_lines_nl
( " "
, " <source>"
, " a"
, " </source>"
, " "
), String_.Concat_lines_nl
( "<p>" // this is wrong, but will be stripped by tidy
, "</p>"
, " <pre>"
, " a"
, "</pre>"
, ""
, "<p><br/>" // also wrong, but leave for now
, "</p>"
));
}
@Test public void False_match_xnde() { // PURPOSE: "\s<trk>" being evaluted as "\s<tr>"; PAGE:de.v:Via_Jutlandica/Gpx DATE:2014-11-29
fxt.Init_para_y_();
fxt.Test_html_wiki_str(String_.Concat_lines_nl
( ""
, " <trk>"
), String_.Concat_lines_nl
( ""
, "<pre>&lt;trk&gt;"
, "</pre>"
));
}
}

View File

@@ -0,0 +1,92 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.tblws.*;
public class Xop_pre_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_pre;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Hook_space, this);} // NOTE: do not treat \n\t as shorthand pre; EX:pl.w:Main_Page; DATE:2014-05-06
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
if ( !ctx.Para().Enabled() // para disabled; "\n\s" should just be "\n\s"; NOTE: para disabled in <gallery>
|| ( ctx.Stack_len() > 0 // bounds check
&& ctx.Stack_get_last().Tkn_tid() == Xop_tkn_itm_.Tid_lnki // last tkn is lnki; EX: [[File:A.png|a\n\sb]]; PAGE:s.w:Virus;DATE:2015-03-31
)
) {
if (bgn_pos != Xop_parser_.Doc_bgn_bos) // don't add \n if BOS; EX: "<BOS> a" should be " ", not "\n "
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos + 1, Xop_nl_tkn.Tid_char, 1));
ctx.Subs_add(root, tkn_mkr.Space(root, cur_pos - 1, cur_pos));
return cur_pos;
}
int txt_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Space); // NOTE: was Find_fwd_while_tab_or_space, which incorrectly converted tabs to spaces; PAGE:en.w:Cascading_Style_Sheets; DATE:2014-06-23
if (txt_pos == src_len) return cur_pos; // "\n\s" at EOS; treat as \n only; EX: "a\n " -> ""; also bounds check
byte b = src[txt_pos];
if (bgn_pos == Xop_parser_.Doc_bgn_bos) { // BOS; gobble up all \s\t; EX: "BOS\s\s\sa" -> "BOSa"
if (b == Byte_ascii.Nl) { // next char is nl
cur_pos = txt_pos; // position at nl; NOTE: do not position after nl, else may break hdr, tblw, list, etc; EX: "\s\n{|" needs to preserve "\n" for tblw
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, cur_pos, Xop_ignore_tkn.Ignore_tid_pre_at_bos));
return cur_pos; // ignore pre if blank line at bos; EX: "BOS\s\s\n" -> "BOS\n"
}
if (b == Byte_ascii.Lt) // next char is <; possible xnde; flag so that xnde can escape; DATE:2013-11-28; moved outside Doc_bgn_bos block above; PAGE:en.w:Comment_(computer_programming); DATE:2014-06-23
ctx.Xnde().Pre_at_bos_(true);
}
switch (ctx.Cur_tkn_tid()) {
case Xop_tkn_itm_.Tid_tblw_tb: // close tblw attrs; NOTE: after BOS (since no tblw at BOS) but before "\n !" check
case Xop_tkn_itm_.Tid_tblw_tr: case Xop_tkn_itm_.Tid_tblw_th:
Xop_tblw_wkr.Atrs_close(ctx, src, root, Bool_.N);
break;
case Xop_tkn_itm_.Tid_list: // close all lists unless [[Category]]; SEE:NOTE_4; rewritten; DATE:2015-03-31
boolean close_all_lists = true;
if (Bry_finder.Find_fwd(src, Xop_tkn_.Lnki_bgn, txt_pos, src_len) == txt_pos) { // look for "[["
int tmp_pos = txt_pos + Xop_tkn_.Lnki_bgn.length;
if (Bry_finder.Find_fwd(src, ctx.Wiki().Ns_mgr().Ns_category().Name_db_w_colon(), tmp_pos, src_len) == tmp_pos) // look for "Category:"
close_all_lists = false; // "[[Category:" found; "\n\s[[Category:" should not close list; note that [[Category]] is invisible
}
if (close_all_lists)
Xop_list_wkr_.Close_list_if_present(ctx, root, src, bgn_pos, cur_pos);
break;
}
switch (b) { // handle "\n !" which can be tbl
case Byte_ascii.Bang:
switch (ctx.Cur_tkn_tid()) {
case Xop_tkn_itm_.Tid_tblw_tb: case Xop_tkn_itm_.Tid_tblw_tc: case Xop_tkn_itm_.Tid_tblw_tr:
case Xop_tkn_itm_.Tid_tblw_th: case Xop_tkn_itm_.Tid_tblw_td: case Xop_tkn_itm_.Tid_tblw_te:
int new_cur_pos = txt_pos + 1; // +1 to skip Byte_ascii.Bang
Xop_tblw_lxr_ws.Make(ctx, tkn_mkr, root, src, src_len, bgn_pos, new_cur_pos, Xop_tblw_wkr.Tblw_type_th, true);
return new_cur_pos;
}
break;
}
return ctx.Para().Process_pre(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, txt_pos);
}
public static final Xop_pre_lxr _ = new Xop_pre_lxr(); Xop_pre_lxr() {}
private static final byte[] Hook_space = new byte[] {Byte_ascii.Nl, Byte_ascii.Space};
}
/*
NOTE_4: Close_all_lists_unless_category; PAGE:en.w:SHA-2
PURPOSE: \n should ordinarily close list. However, if \n[[Category:A]], then don't close list since [[Category:A]] will trim preceding \n
REASON: occurs b/c MW does separate passes for list and Category while XO does one pass.
EX: closes *a list
*a
*b
EX: does not close
*a
[[Category:A]]
*b
*/

View File

@@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_pre_tkn extends Xop_tkn_itm_base {
public Xop_pre_tkn(int bgn, int end, byte pre_tid, Xop_tkn_itm pre_bgn_tkn) {
this.Tkn_ini_pos(false, bgn, end);
this.pre_tid = pre_tid;
}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_pre;}
public byte Pre_tid() {return pre_tid;} private byte pre_tid = Pre_tid_null;
public static final byte Pre_tid_null = 0, Pre_tid_bgn = 1, Pre_tid_end = 2;
}

View File

@@ -0,0 +1,135 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.xowa.parsers.paras.*;
public class Xop_tblw_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_tblw;}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
int rv = Handle_bang(wlxr_type, ctx, ctx.Tkn_mkr(), root, src, src_len, bgn_pos, cur_pos);
if (rv != Continue) return rv;
rv = Handle_lnki(wlxr_type, ctx, ctx.Tkn_mkr(), root, src, src_len, bgn_pos, cur_pos);
if (rv != Continue) return rv;
return ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, false, wlxr_type, Xop_tblw_wkr.Called_from_general, -1, -1);
}
public static final int Continue = -2; // -2 b/c -1 used by Called_from_pre
public static int Handle_bang(int wlxr_type, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
// standalone "!" should be ignored if no tblw present; EX: "a b! c" should not trigger ! for header
switch (wlxr_type) {
case Xop_tblw_wkr.Tblw_type_th: // \n!
case Xop_tblw_wkr.Tblw_type_th2: // !!
case Xop_tblw_wkr.Tblw_type_td: // \n|
Xop_tkn_itm owner_tblw_tb = ctx.Stack_get_typ(Xop_tkn_itm_.Tid_tblw_tb); // check entire stack for tblw; DATE:2014-03-11
if ( owner_tblw_tb == null // no tblw in stack; highly probably that current sequence is not tblw tkn
|| ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_lnki // cur tid is lnki; PAGE:en.w:Pink_(singer); DATE:2014-06-25
) {
int lnki_pos = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_lnki);
if (lnki_pos != Xop_ctx.Stack_not_found && wlxr_type == Xop_tblw_wkr.Tblw_type_td) {// lnki present;// NOTE: added Xop_tblw_wkr.Tblw_type_td b/c th should not apply when tkn_mkr.Pipe() is called below; DATE:2013-04-24
Xop_tkn_itm lnki_tkn = ctx.Stack_pop_til(root, src, lnki_pos, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_tblw_td); // pop any intervening nodes until lnki
ctx.Stack_add(lnki_tkn); // push lnki back onto stack; TODO: combine these 2 lines into 1
// NOTE: this is a "\n|" inside a [[ ]]; must create two tokens for lnki to build correctly;
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos + 1, Xop_nl_tkn.Tid_char, 1));
return Xop_pipe_lxr._.Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos); // NOTE: need to call pipe_lxr in order to invalidate if lnki; DATE:2014-06-06
}
else { // \n| or \n! but no tbl
if ( bgn_pos != Xop_parser_.Doc_bgn_bos // avoid ! at BOS
&& src[bgn_pos] == Byte_ascii.Nl) // handle "!" etc.
return Xop_tblw_wkr.Handle_false_tblw_match(ctx, root, src, bgn_pos, cur_pos, tkn_mkr.Txt(bgn_pos + 1, cur_pos), true); // +1 to ignore \n of "\n!", "\n!!", "\n|"; DATE:2014-02-19
else // handle "!!" only
return ctx.Lxr_make_txt_(cur_pos);
}
}
if (wlxr_type == Xop_tblw_wkr.Tblw_type_th2) { // !!; extra check to make sure \n! exists; DATE:2014-10-19
int prv_th_pos = Bry_finder.Find_bwd(src, Byte_ascii.Nl, bgn_pos); // search for previous \n
boolean invalid = prv_th_pos == Bry_finder.Not_found; // no \n; invalid
if (!invalid) {
++prv_th_pos; // skip \n
prv_th_pos = Bry_finder.Find_fwd_while_space_or_tab(src, prv_th_pos, src_len); // skip \s; needed for "\n\s!" which is still a tblw
if (prv_th_pos == bgn_pos) // invalid: "\n" is directly in front of "!!"
invalid = true;
else
invalid = src[prv_th_pos] != Byte_ascii.Bang; // invalid if not "\n!"
}
if (invalid)
return Xop_tblw_wkr.Handle_false_tblw_match(ctx, root, src, bgn_pos, cur_pos, tkn_mkr.Txt(bgn_pos, cur_pos), false);
}
break;
}
return Continue;
}
public static int Handle_lnki(int wlxr_type, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
Xop_tkn_itm last_tkn = ctx.Stack_get_last();
if ( last_tkn != null
&& last_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) {
Xop_lnki_tkn lnki = (Xop_lnki_tkn)last_tkn;
if ( lnki.Pipe_count_is_zero()) { // 1st pipe; EX: [[A\n|+B]]
boolean invalidate = false;
switch (wlxr_type) { // tblw found; check if in lnki and validate ttl; DATE:2014-03-29
case Xop_tblw_wkr.Tblw_type_tb: // \n{|
case Xop_tblw_wkr.Tblw_type_tc: // \n|+
case Xop_tblw_wkr.Tblw_type_tr: // \n|-
case Xop_tblw_wkr.Tblw_type_te: // \n|}
invalidate = true; // always invalidate
break;
case Xop_tblw_wkr.Tblw_type_td2: // ||; EX: [[A||B]]
if (ctx.Tid_is_image_map()) { // if in ImageMap, then treat "||" as "pipe" (not "pipe_text"); note that outer tbl is ignored; EX:w:United_States_presidential_election,_1992
ctx.Subs_add(root, tkn_mkr.Pipe(bgn_pos, cur_pos));
return cur_pos;
}
invalidate = !Xop_lnki_wkr_.Parse_ttl(ctx, src, lnki, bgn_pos); // check if invalid; EX: "[[A<||]]" would be invalid b/c of <
if (!invalidate) { // "valid" title, but "||" must be converted to pipe inside lnki; EX:cs.w:Main_Page; DATE:2014-05-09
ctx.Subs_add(root, tkn_mkr.Pipe(bgn_pos, cur_pos)); // NOTE: technically need to check if pipe or pipe_text; for now, do pipe as pipe_text could break [[File:A.png||20px]]; DATE:2014-05-06
return cur_pos;
}
break;
}
if (invalidate) {
ctx.Stack_pop_last();
return Xop_lnki_wkr_.Invalidate_lnki(ctx, src, root, lnki, bgn_pos);
}
}
else { // nth pipe; no need to check for invalidate
switch (wlxr_type) {
case Xop_tblw_wkr.Tblw_type_td2: // ||
ctx.Subs_add(root, tkn_mkr.Pipe(bgn_pos, cur_pos));
return cur_pos;
case Xop_tblw_wkr.Tblw_type_th2: // !!
case Xop_tblw_wkr.Tblw_type_th: // !
ctx.Subs_add(root, tkn_mkr.Txt(bgn_pos, cur_pos)); // NOTE: cur_pos should handle ! and !!
return cur_pos;
}
}
}
return Continue;
}
public Xop_tblw_lxr(byte wlxr_type) {this.wlxr_type = wlxr_type;} private byte wlxr_type;
public static final Xop_tblw_lxr _ = new Xop_tblw_lxr(); Xop_tblw_lxr() {}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {
core_trie.Add(Hook_tb, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_tb));
core_trie.Add(Hook_te, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_te));
core_trie.Add(Hook_tr, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_tr));
core_trie.Add(Hook_td, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_td));
core_trie.Add(Hook_th, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_th));
core_trie.Add(Hook_tc, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_tc));
core_trie.Add(Hook_td2, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_td2));
core_trie.Add(Hook_th2, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_th2));
}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public static final byte[] Hook_tb = Bry_.new_a7("\n{|"), Hook_te = Bry_.new_a7("\n|}"), Hook_tr = Bry_.new_a7("\n|-")
, Hook_td = Bry_.new_a7("\n|"), Hook_th = Bry_.new_a7("\n!"), Hook_tc = Bry_.new_a7("\n|+")
, Hook_td2 = Bry_.new_a7("||"), Hook_th2 = Bry_.new_a7("!!");
}

View File

@@ -0,0 +1,66 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_tblw_lxr_ws {
public static int Make(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, byte wlxr_type, boolean called_from_pre) {
int rv = Xop_tblw_lxr.Handle_bang(wlxr_type, ctx, ctx.Tkn_mkr(), root, src, src_len, bgn_pos, cur_pos);
if (rv != Xop_tblw_lxr.Continue) return rv;
rv = Xop_tblw_lxr.Handle_lnki(wlxr_type, ctx, ctx.Tkn_mkr(), root, src, src_len, bgn_pos, cur_pos);
if (rv != Xop_tblw_lxr.Continue) return rv;
if (!called_from_pre) { // skip if called from pre, else will return text, since pre_lxr has not created \n tkn yet; EX: "\n ! a"; DATE:2014-02-14
// find first non-ws tkn; check if nl or para
int root_subs_len = root.Subs_len();
int tkn_idx = root_subs_len - 1;
boolean loop = true, nl_found = false;
while (loop) {
if (tkn_idx < 0) break;
Xop_tkn_itm tkn = root.Subs_get(tkn_idx);
switch (tkn.Tkn_tid()) {
case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab: // ws: keep moving backwards
tkn_idx--;
break;
case Xop_tkn_itm_.Tid_newLine:
case Xop_tkn_itm_.Tid_para:
loop = false;
nl_found = true;
break;
default:
loop = false;
break;
}
}
if (tkn_idx == -1) { // bos reached; all tkns are ws;
if (wlxr_type == Xop_tblw_wkr.Tblw_type_tb) { // wlxr_type is {|;
root.Subs_del_after(0); // trim
return ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, false, wlxr_type, Xop_tblw_wkr.Called_from_general, -1, -1); // process {|
}
else // wlxr_type is something else, but invalid since no containing {|
return ctx.Lxr_make_txt_(cur_pos);
}
if (!nl_found && wlxr_type == Xop_tblw_wkr.Tblw_type_td) // | but no nl; return control to pipe_lxr for further processing
return Tblw_ws_cell_pipe;
if (nl_found)
root.Subs_del_after(tkn_idx);
}
return ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, false, wlxr_type, Xop_tblw_wkr.Called_from_general, -1, -1);
}
public static final byte[] Hook_tb = Bry_.new_a7("{|"), Hook_te = Bry_.new_a7("|}"), Hook_tr = Bry_.new_a7("|-")
, Hook_th = Bry_.new_a7("!"), Hook_tc = Bry_.new_a7("|+");
public static final int Tblw_ws_cell_pipe = -1;
}

View File

@@ -0,0 +1,37 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_tblw_tb_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
public Xop_tblw_tb_tkn(int bgn, int end, boolean tblw_xml, boolean auto_created) {
this.tblw_xml = tblw_xml; this.Tkn_ini_pos(false, bgn, end);
if (auto_created) // auto-created should be marked as having no attributes, else text may get gobbled up incorrectly; EX:Paris#Demographics DATE:2014-03-18
atrs_bgn = atrs_end = bgn;
}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_tblw_tb;}
public int Tblw_tid() {return Xop_xnde_tag_.Tid_table;}
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
public void Tblw_xml_(boolean v) {tblw_xml = v;}
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
public int Caption_count() {return caption_count;} public Xop_tblw_tb_tkn Caption_count_(int v) {caption_count = v; return this;} private int caption_count = 0;
public Xop_tblw_tb_tkn Caption_count_add_1() {++caption_count; return this;}
public Xop_tblw_tb_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
}

View File

@@ -0,0 +1,30 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_tblw_tc_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_tblw_tc;}
public int Tblw_tid() {return Xop_xnde_tag_.Tid_caption;}
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
public Xop_tblw_tc_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
public Xop_tblw_tc_tkn(int bgn, int end, boolean tblw_xml) {this.tblw_xml = tblw_xml; this.Tkn_ini_pos(false, bgn, end);}
}

View File

@@ -0,0 +1,30 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_tblw_td_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_tblw_td;}
public int Tblw_tid() {return Xop_xnde_tag_.Tid_td;}
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
public Xop_tblw_td_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
public Xop_tblw_td_tkn(int bgn, int end, boolean tblw_xml) {this.tblw_xml = tblw_xml; this.Tkn_ini_pos(false, bgn, end);}
}

View File

@@ -0,0 +1,30 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_tblw_th_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_tblw_th;}
public int Tblw_tid() {return Xop_xnde_tag_.Tid_th;}
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
public Xop_tblw_th_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
public Xop_tblw_th_tkn(int bgn, int end, boolean tblw_xml) {this.tblw_xml = tblw_xml; this.Tkn_ini_pos(false, bgn, end);}
}

View File

@@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public interface Xop_tblw_tkn extends Xop_tkn_itm {
int Tblw_tid();
boolean Tblw_xml();
int Tblw_subs_len(); void Tblw_subs_len_add_();
int Atrs_bgn();
int Atrs_end();
void Atrs_rng_set(int bgn, int end);
Xop_xatr_itm[] Atrs_ary(); Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v);
}

View File

@@ -0,0 +1,34 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_tblw_tr_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
public Xop_tblw_tr_tkn(int bgn, int end, boolean tblw_xml, boolean auto_created) {
this.tblw_xml = tblw_xml; this.Tkn_ini_pos(false, bgn, end);
if (auto_created) // auto-created should be marked as having no attributes, else text may get gobbled up incorrectly; EX:Paris#Demographics DATE:2014-03-18
atrs_bgn = atrs_end = bgn;
}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_tblw_tr;}
public int Tblw_tid() {return Xop_xnde_tag_.Tid_tr;}
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
public Xop_tblw_tr_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
}

View File

@@ -0,0 +1,551 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.paras.*;
public class Xop_tblw_wkr implements Xop_ctx_wkr {
private int tblw_te_ignore_count = 0;
public boolean Cell_pipe_seen() {return cell_pipe_seen;} public Xop_tblw_wkr Cell_pipe_seen_(boolean v) {cell_pipe_seen = v; return this;} private boolean cell_pipe_seen; // status of 1st cell pipe; EX: \n| a | b | c || -> flag pipe between a and b but ignore b and c
public void Ctor_ctx(Xop_ctx ctx) {}
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {cell_pipe_seen = false; tblw_te_ignore_count = 0;}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
public void AutoClose(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {
tkn.Subs_move(root);
tkn.Src_end_(cur_pos);
}
public static final byte Called_from_general = 0, Called_from_list = 1, Called_from_pre = 2;
public int Make_tkn_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, boolean tbl_is_xml, byte wlxr_type, byte called_from, int atrs_bgn, int atrs_end) {// REF.MW: Parser|doTableStuff
if (bgn_pos == Xop_parser_.Doc_bgn_bos) {
bgn_pos = 0; // do not allow -1 pos
}
int list_tkn_idx = ctx.Stack_idx_find_but_stop_at_tbl(Xop_tkn_itm_.Tid_list);
if ( list_tkn_idx != -1 // list is in effect; DATE:2014-05-05
&& !tbl_is_xml // tbl is wiki-syntax; ie: auto-close if "{|" but do not close if "<table>"; DATE:2014-02-05
&& called_from != Called_from_list // do not close if called from list; EX: consider "{|"; "* a {|" is called from list_wkr, and should not close; "* a\n{|" is called from tblw_lxr and should close; DATE:2014-02-14
) {
if (wlxr_type == Tblw_type_td2) { // if in list, treat "||" as lnki, not tblw; EX: es.d:casa; es.d:tres; DATE:2014-02-15
ctx.Subs_add(root, ctx.Tkn_mkr().Pipe(bgn_pos, cur_pos)); // NOTE: technically need to check if pipe or pipe_text; for now, do pipe as pipe_text could break [[File:A.png||20px]]; DATE:2014-05-06
return cur_pos;
}
else {
Xop_list_wkr_.Close_list_if_present(ctx, root, src, bgn_pos, cur_pos);
}
}
if (ctx.Apos().Stack_len() > 0) // open apos; note that apos keeps its own stack, as they are not "structural" (not sure about this)
ctx.Apos().EndFrame(ctx, root, src, cur_pos, true); // close it
Xop_tblw_tkn prv_tkn = ctx.Stack_get_tbl();
if ( prv_tkn == null // prv_tkn not found; i.e.: no earlier "{|" or "<table>"
|| ( ctx.Stack_get_tblw_tb() == null // no {| on stack; DATE:2014-05-05
&& !tbl_is_xml // and cur is tblw (i.e.: not xnde); DATE:2014-05-05
)
) {
switch (wlxr_type) {
case Tblw_type_tb: // "{|";
break; // noop; by definition "{|" does not need to have a previous "{|"
case Tblw_type_td: // "|"
case Tblw_type_td2: // "||"
if (tbl_is_xml) { // <td> should automatically add <table><tr>
ctx.Subs_add_and_stack_tblw(root, prv_tkn, tkn_mkr.Tblw_tb(bgn_pos, bgn_pos, tbl_is_xml, true));
prv_tkn = tkn_mkr.Tblw_tr(bgn_pos, bgn_pos, tbl_is_xml, true);
ctx.Subs_add_and_stack_tblw(root, prv_tkn, prv_tkn);
break;
}
else {
if (called_from == Called_from_pre)
return -1;
else { // DATE:2014-02-19; NOTE: do not add nl if ||; DATE:2014-04-14
if (wlxr_type == Tblw_type_td) { // "\n|"
ctx.Subs_add(root, ctx.Tkn_mkr().NewLine(bgn_pos, bgn_pos + 1, Xop_nl_tkn.Tid_char, 1));
ctx.Subs_add(root, ctx.Tkn_mkr().Pipe(bgn_pos + 1, cur_pos));
}
else // "||"
ctx.Subs_add(root, ctx.Tkn_mkr().Pipe(bgn_pos, cur_pos));
return cur_pos;
}
}
case Tblw_type_th: // "!"
case Tblw_type_th2: // "!!"
case Tblw_type_tc: // "|+"
case Tblw_type_tr: // "|-"
if (tbl_is_xml) { // <tr> should automatically add <table>; DATE:2014-02-13
prv_tkn = tkn_mkr.Tblw_tb(bgn_pos, bgn_pos, tbl_is_xml, true);
ctx.Subs_add_and_stack_tblw(root, prv_tkn, prv_tkn);
break;
}
else {
if (called_from == Called_from_pre)
return -1;
else
return Xop_tblw_wkr.Handle_false_tblw_match(ctx, root, src, bgn_pos, cur_pos, ctx.Tkn_mkr().Txt(bgn_pos + 1, cur_pos), true); // DATE:2014-02-19
}
case Tblw_type_te: // "|}"
if (tblw_te_ignore_count > 0) {
--tblw_te_ignore_count;
return cur_pos;
}
else {
if (called_from == Called_from_pre)
return -1;
else
return Xop_tblw_wkr.Handle_false_tblw_match(ctx, root, src, bgn_pos, cur_pos, tkn_mkr.Txt(bgn_pos + 1, cur_pos), true); // +1 to skip "\n" in "\n|}" (don't convert \n to text); DATE:2014-02-19
}
default: throw Exc_.new_unhandled(wlxr_type);
}
}
int prv_tid = prv_tkn == null ? Xop_tkn_itm_.Tid_null : prv_tkn.Tkn_tid();
if (prv_tkn != null && !prv_tkn.Tblw_xml()) { // note that this logic is same as Atrs_close; repeated here for "perf"
switch (prv_tid) {
case Xop_tkn_itm_.Tid_tblw_tb: case Xop_tkn_itm_.Tid_tblw_tr:
Atrs_make(ctx, src, root, this, prv_tkn, Bool_.N);
break;
}
}
if (wlxr_type == Tblw_type_te)
return Make_tkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_tblw_te, wlxr_type, prv_tkn, prv_tid, tbl_is_xml);
else
return Make_tkn_bgn_tblw(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, wlxr_type, tbl_is_xml, atrs_bgn, atrs_end, prv_tkn, prv_tid);
}
private int Make_tkn_bgn_tblw(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, byte wlxr_type, boolean tbl_is_xml, int atrs_bgn, int atrs_end, Xop_tblw_tkn prv_tkn, int prv_tid) {
if (wlxr_type != Tblw_type_tb) // NOTE: do not ignore ws if {|; will cause strange behavior with pre; DATE:2013-02-12
Ignore_ws(ctx, root);
Xop_tblw_tkn new_tkn = null;
switch (wlxr_type) {
case Tblw_type_tb: // <table>
boolean ignore_prv = false, auto_create = false;
switch (prv_tid) {
case Xop_tkn_itm_.Tid_null: // noop; <table>
break;
case Xop_tkn_itm_.Tid_tblw_td: // noop; <td><table>
case Xop_tkn_itm_.Tid_tblw_th: // noop; <th><table>
break;
case Xop_tkn_itm_.Tid_tblw_tb: // fix; <table><table> -> <table>; ignore current table; DATE:2014-02-02
if (prv_tkn.Tblw_xml()) { // fix: <table><table> -> <table>; earlier tbl is xnde; ignore; EX:en.b:Wikibooks:Featured books; DATE:2014-02-08
((Xop_tblw_tb_tkn)prv_tkn).Tblw_xml_(false); // if <table>{|, discard <table>, but mark {| as <table>; needed to handle <table>\n{|\n| where "|" must be treated as tblw dlm; DATE:2014-02-22
ignore_prv = true;
}
// else // fix: <table><table> -> <table><tr><td><table>; earlier tbl is tblw; auto-create; EX:it.w:Main_Page; DATE:2014-02-08; TIDY:depend on tidy to fix; PAGE: it.w:Portal:Animali; DATE:2014-05-31
// auto_create = true;
break;
case Xop_tkn_itm_.Tid_tblw_tr: // noop: <table><tr><table> -> <table><tr><td><table>; should probably auto-create td, but MW does not; DATE:2014-03-18
case Xop_tkn_itm_.Tid_tblw_tc: // noop; <caption><table>; TIDY:was <caption></caption><tr><td><table>; PAGE: es.w:Savilla DATE:2014-06-29
break;
}
if (ignore_prv) {
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, cur_pos, Xop_ignore_tkn.Ignore_tid_htmlTidy_tblw));
++tblw_te_ignore_count;
cur_pos = Bry_finder.Find_fwd_until(src, cur_pos, src_len, Byte_ascii.Nl); // NOTE: minor hack; this tblw tkn will be ignored, so ignore any of its attributes as well; gobble up all chars till nl. see: if two consecutive tbs, ignore attributes on 2nd; en.wikibooks.org/wiki/Wikibooks:Featured books
return cur_pos;
}
if (auto_create) {
ctx.Subs_add_and_stack_tblw(root, prv_tkn, tkn_mkr.Tblw_tr(bgn_pos, bgn_pos, tbl_is_xml, true));
ctx.Subs_add_and_stack_tblw(root, prv_tkn, tkn_mkr.Tblw_td(bgn_pos, bgn_pos, tbl_is_xml));
}
Xop_tblw_tb_tkn tb_tkn = tkn_mkr.Tblw_tb(bgn_pos, cur_pos, tbl_is_xml, false);
new_tkn = tb_tkn;
break;
case Tblw_type_tr: // <tr>
switch (prv_tid) {
case Xop_tkn_itm_.Tid_tblw_tb: break; // noop; <table><tr>
case Xop_tkn_itm_.Tid_tblw_tc: // fix; <caption><tr> -> <caption></caption><tr>
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tc), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
break;
case Xop_tkn_itm_.Tid_tblw_td: // fix; <td><tr> -> <td></td></tr><tr>
case Xop_tkn_itm_.Tid_tblw_th: // fix; <th><tr> -> <th></th></tr><tr>
if (!tbl_is_xml)
ctx.Para().Process_nl(ctx, root, src, bgn_pos, bgn_pos + 1); // simulate "\n"; 2012-12-08
int stack_pos = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tr);
if (stack_pos != Xop_ctx.Stack_not_found) // don't pop <tr> if none found; PAGE:en.w:Turks_in_Denmark DATE:2014-03-02
ctx.Stack_pop_til(root, src, stack_pos, true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
break;
case Xop_tkn_itm_.Tid_tblw_tr: // fix; <tr><tr> -> <tr>
if (prv_tkn.Tblw_subs_len() == 0) { // NOTE: set prv_row to ignore, but do not pop; see Tr_dupe_xnde and [[Jupiter]]; only invoke if same type; EX: <tr><tr> but not |-<tr>; DATE:2013-12-09
Xop_tkn_itm prv_row = ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tr), false, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
prv_row.Ignore_y_();
}
else
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tr), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
break;
}
Xop_tblw_tr_tkn tr_tkn = tkn_mkr.Tblw_tr(bgn_pos, cur_pos, tbl_is_xml, false);
new_tkn = tr_tkn;
break;
case Tblw_type_td: // <td>
case Tblw_type_td2:
boolean create_th = false;
switch (prv_tid) {
case Xop_tkn_itm_.Tid_tblw_tr: break; // noop; <tr><td>
case Xop_tkn_itm_.Tid_tblw_td: // fix; <td><td> -> <td></td><td>
if (!tbl_is_xml) // only for "\n|" not <td>
ctx.Para().Process_nl(ctx, root, src, bgn_pos, bgn_pos + 1); // simulate "\n"; DATE:2014-02-20; ru.w:;[[Help:Download]]; DATE:2014-02-20
ctx.Para().Process_block__bgn_y__end_n(Xop_xnde_tag_.Tag_td); // <td>
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(prv_tid), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
break;
case Xop_tkn_itm_.Tid_tblw_th: // fix; <th><td> -> <th></th><td>
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(prv_tid), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
if (wlxr_type == Tblw_type_td2) create_th = true; // !a||b -> <th><th>; but !a|b -> <th><td>
break;
case Xop_tkn_itm_.Tid_tblw_tb: // fix; <table><td> -> <table><tr><td>
if (wlxr_type == Tblw_type_td2) { // NOTE: ignore || if preceded by {|; {|a||b\n
prv_tkn.Atrs_rng_set(-1, -1); // reset atrs_bgn; remainder of line will become part of tb atr
return cur_pos;
}
else {
new_tkn = tkn_mkr.Tblw_tr(bgn_pos, cur_pos, tbl_is_xml, true);
new_tkn.Atrs_rng_set(bgn_pos, bgn_pos);
ctx.Subs_add_and_stack_tblw(root, prv_tkn, new_tkn);
prv_tid = new_tkn.Tkn_tid();
}
break;
case Xop_tkn_itm_.Tid_tblw_tc: // fix; <caption><td> -> <caption></caption><tr><td>
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tc), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
new_tkn = tkn_mkr.Tblw_tr(bgn_pos, cur_pos, tbl_is_xml, true);
ctx.Subs_add_and_stack_tblw(root, prv_tkn, new_tkn);
prv_tid = new_tkn.Tkn_tid();
break;
}
// if (prv_tid == Xop_tkn_itm_.Tid_xnde)
// ctx.Stack_auto_close(root, src, prv_tkn, prv_tkn.Src_bgn(), prv_tkn.Src_end());
if (create_th) new_tkn = tkn_mkr.Tblw_th(bgn_pos, cur_pos, tbl_is_xml);
else new_tkn = tkn_mkr.Tblw_td(bgn_pos, cur_pos, tbl_is_xml);
cell_pipe_seen = false;
break;
case Tblw_type_th: // <th>
case Tblw_type_th2:
switch (prv_tid) {
case Xop_tkn_itm_.Tid_tblw_tr: break; // noop; <tr><th>
case Xop_tkn_itm_.Tid_tblw_th: // fix; <th><th> -> <th></th><th>
if (tbl_is_xml // tbl_is_xml always closes previous token
|| (wlxr_type == Tblw_type_th2 || wlxr_type == Tblw_type_th)) // ! always closes; EX: "! !!"; "!! !!"; REMOVE: 2012-05-07; had (&& !ws_enabled) but caused "\n !" to fail; guard is no longer necessary since tblw_ws changed...
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(prv_tid), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
else {
ctx.Subs_add(root, tkn_mkr.Txt(bgn_pos, cur_pos));
return cur_pos;
}
break;
case Xop_tkn_itm_.Tid_tblw_td: // fix; <td><th> -> <td></td><th> NOTE: common use of using <th> after <td> for formatting
if (tbl_is_xml // tbl_is_xml always closes previous token
|| (wlxr_type == Tblw_type_th)) // "| !" closes; "| !!" does not;
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(prv_tid), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
else {
ctx.Subs_add(root, tkn_mkr.Txt(bgn_pos, cur_pos));
return cur_pos;
}
break;
case Xop_tkn_itm_.Tid_tblw_tb: // fix; <table><th> -> <table><tr><th>
ctx.Subs_add_and_stack_tblw(root, prv_tkn, tkn_mkr.Tblw_tr(bgn_pos, cur_pos, tbl_is_xml, true));
break;
case Xop_tkn_itm_.Tid_tblw_tc: // fix; <caption><th> -> <caption></caption><tr><th>
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tc), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
ctx.Subs_add_and_stack_tblw(root, prv_tkn, tkn_mkr.Tblw_tr(bgn_pos, cur_pos, tbl_is_xml, true));
break;
}
new_tkn = tkn_mkr.Tblw_th(bgn_pos, cur_pos, tbl_is_xml);
cell_pipe_seen = false;
break;
case Tblw_type_tc: // <caption>
switch (prv_tid) {
case Xop_tkn_itm_.Tid_tblw_tb: break; // noop; <table><caption>
case Xop_tkn_itm_.Tid_tblw_tr: // fix; <tr><caption> -> <tr></tr><caption> TODO: caption should be ignored and placed in quarantine
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tr), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
break;
case Xop_tkn_itm_.Tid_tblw_td: // fix; <td><caption> -> <td></td><caption>
case Xop_tkn_itm_.Tid_tblw_th: // fix; <th><caption> -> <th></th><caption>
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tr), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td); // NOTE: closing <tr> in order to close <td>/<th>
ctx.Msg_log().Add_itm_none(Xop_tblw_log.Caption_after_td, src, prv_tkn.Src_bgn(), bgn_pos);
break;
case Xop_tkn_itm_.Tid_tblw_tc: // fix; <caption><caption> -> <caption></caption><caption>
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tc), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
ctx.Msg_log().Add_itm_none(Xop_tblw_log.Caption_after_tc, src, prv_tkn.Src_bgn(), bgn_pos);
break;
}
new_tkn = tkn_mkr.Tblw_tc(bgn_pos, cur_pos, tbl_is_xml);
Xop_tblw_tb_tkn tblw_tb_tkn = (Xop_tblw_tb_tkn)ctx.Stack_get_typ(Xop_tkn_itm_.Tid_tblw_tb);
tblw_tb_tkn.Caption_count_add_1(); // NOTE: null check is not necessary (impossible to have a caption without a tblw); DATE:2013-12-20
cell_pipe_seen = false; // NOTE: always mark !seen; see Atrs_tc()
break;
}
ctx.Subs_add_and_stack_tblw(root, prv_tkn, new_tkn);
if (atrs_bgn > Xop_tblw_wkr.Atrs_ignore_check) {
new_tkn.Atrs_rng_set(atrs_bgn, atrs_end);
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki) {
Xop_xatr_itm[] atrs = ctx.App().Xatr_parser().Parse(ctx.Msg_log(), src, atrs_bgn, atrs_end);
new_tkn.Atrs_ary_as_tblw_(atrs);
}
}
switch (wlxr_type) {
case Tblw_type_tb:
case Tblw_type_tr:
ctx.Para().Process_block__bgn_y__end_n(Xop_xnde_tag_.Tag_tr);
break;
case Tblw_type_td:
case Tblw_type_th:
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_td);
break;
}
return cur_pos;
}
public int Make_tkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int typeId, byte wlxr_type, Xop_tblw_tkn prv_tkn, int prv_tid, boolean tbl_is_xml) {
if (!tbl_is_xml) // only for "\n|}" not </table>
ctx.Para().Process_nl(ctx, root, src, bgn_pos, bgn_pos + 1); // simulate "\n"; process para (which will create paras for cells) 2012-12-08
if (tbl_is_xml && typeId == Xop_tkn_itm_.Tid_tblw_tb // tblx: </table>
&& prv_tkn != null && !prv_tkn.Tblw_xml()) { // tblw is prv_tkn
++tblw_te_ignore_count; // suppress subsequent occurrences of "|}"; EX:ru.q:Авель; DATE:2014-02-22
}
Ignore_ws(ctx, root);
if (wlxr_type == Tblw_type_te) {
switch (prv_tid) {
case Xop_tkn_itm_.Tid_tblw_td: // fix; <td></table> -> <td></td></tr></table>
case Xop_tkn_itm_.Tid_tblw_th: // fix; <th></table> -> <th></th></tr></table>
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tr), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
break;
case Xop_tkn_itm_.Tid_tblw_tc: // fix; <caption></table> -> <caption></caption></table>
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tc), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
break;
case Xop_tkn_itm_.Tid_tblw_tr: // fix; <tr></table> -> </table> : tr but no tds; remove tr
boolean blank = true;
for (int j = prv_tkn.Tkn_sub_idx() + 1; j < root.Subs_len(); j++) {
Xop_tkn_itm t = root.Subs_get(j);
switch (t.Tkn_tid()) {
case Xop_tkn_itm_.Tid_newLine:
case Xop_tkn_itm_.Tid_para:
break;
default:
blank = false;
j = root.Subs_len();
break;
}
}
if (blank)
root.Subs_del_after(prv_tkn.Tkn_sub_idx());
break;
case Xop_tkn_itm_.Tid_tblw_tb: // fix; <table></table> -> <table><tr><td></td></tr></table>
boolean has_subs = false;
for (int i = prv_tkn.Tkn_sub_idx() + 1; i < root.Subs_len(); i++) {
int cur_id = root.Subs_get(i).Tkn_tid();
switch (cur_id) {
case Xop_tkn_itm_.Tid_tblw_tc:
case Xop_tkn_itm_.Tid_tblw_td:
case Xop_tkn_itm_.Tid_tblw_th:
case Xop_tkn_itm_.Tid_tblw_tr:
has_subs = true;
i = root.Subs_len();
break;
}
}
if (!has_subs) {
Xop_tkn_itm new_tkn = tkn_mkr.Tblw_tr(bgn_pos, bgn_pos, tbl_is_xml, true);
ctx.Subs_add_and_stack_tblw(root, prv_tkn, new_tkn);
new_tkn = tkn_mkr.Tblw_td(bgn_pos, bgn_pos, tbl_is_xml);
ctx.Subs_add_and_stack_tblw(root, prv_tkn, new_tkn);
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tb), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
return cur_pos;
}
break;
}
int tb_idx = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tb);
if (tb_idx == -1) return cur_pos; // NOTE: tb_idx can be -1 when called from Pipe in Tmpl mode
Xop_tblw_tb_tkn tb = (Xop_tblw_tb_tkn)ctx.Stack_pop_til(root, src, tb_idx, false, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td); // NOTE: need to pop manually in order to set all intermediate node ends to bgn_pos, but tb ent to cur_pos; EX: for stack of "tb,tr,td" tr and td get End_() of bgn_pos but tb gets End_() of cur_pos
tb.Subs_move(root);
tb.Src_end_(cur_pos);
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_table); // NOTE: must clear block state that was started by <tr>; code implicitly relies on td clearing block state, but no td was created
return cur_pos;
}
int acs_typeId = typeId;
if (prv_tid != typeId // NOTE: special logic to handle auto-close of <td></th> or <th></td>
&& ( (prv_tid == Xop_tkn_itm_.Tid_tblw_td && typeId == Xop_tkn_itm_.Tid_tblw_th)
|| (prv_tid == Xop_tkn_itm_.Tid_tblw_th && typeId == Xop_tkn_itm_.Tid_tblw_td)
)
)
acs_typeId = prv_tid;
int acs_pos = -1, acs_len = ctx.Stack_len();
for (int i = acs_len - 1; i > -1; i--) { // find auto-close pos
byte cur_acs_tid = ctx.Stack_get(i).Tkn_tid();
switch (acs_typeId) {
case Xop_tkn_itm_.Tid_tblw_tb: // if </table>, match <table> only; note that it needs to be handled separately b/c of tb logic below
if (acs_typeId == cur_acs_tid) {
acs_pos = i;
i = -1; // force break;
}
break;
default: // if </t*>, match <t*> but stop at <table>; do not allow </t*> to close <t*> outside <table>
if (cur_acs_tid == Xop_tkn_itm_.Tid_tblw_tb) // <table>; do not allow </t*> to close any <t*>'s above <table>; EX:w:Enthalpy_of_fusion; {{States of matter}}
i = -1; // this will skip acs_pos != -1 below and discard token
else if (cur_acs_tid == acs_typeId) { // </t*> matches <t*>
acs_pos = i;
i = -1; // force break
}
break;
}
}
if (acs_pos != -1) {
Xop_tblw_tkn bgn_tkn = (Xop_tblw_tkn)ctx.Stack_pop_til(root, src, acs_pos, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_tblw_td);
switch (wlxr_type) {
case Tblw_type_tb:
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_table);
break;
case Tblw_type_td:
case Tblw_type_th:
ctx.Para().Process_block__bgn_y__end_n(Xop_xnde_tag_.Tag_td);
break;
}
bgn_tkn.Subs_move(root);
bgn_tkn.Src_end_(cur_pos);
}
return cur_pos;
}
public static void Atrs_close(Xop_ctx ctx, byte[] src, Xop_root_tkn root, boolean called_from_xnde) {
Xop_tblw_tkn prv_tkn = ctx.Stack_get_tbl();
if (prv_tkn == null || prv_tkn.Tblw_xml()) return; // no tblw or tblw_xnde (which does not have tblw atrs)
switch (prv_tkn.Tkn_tid()) {
case Xop_tkn_itm_.Tid_tblw_tb: case Xop_tkn_itm_.Tid_tblw_tr: // only tb and tr have tblw atrs (EX: "{|id=1\n"); td/th use pipes for atrs (EX: "|id=1|a"); tc has no atrs; te is never on stack
Xop_tblw_wkr.Atrs_make(ctx, src, root, ctx.Tblw(), prv_tkn, called_from_xnde);
break;
}
}
public static boolean Atrs_make(Xop_ctx ctx, byte[] src, Xop_root_tkn root, Xop_tblw_wkr wkr, Xop_tblw_tkn prv_tblw, boolean called_from_xnde) {
if (prv_tblw.Atrs_bgn() != Xop_tblw_wkr.Atrs_null) { // atr_bgn/end is empty or already has explicit value; ignore;
if (prv_tblw.Atrs_bgn() == Atrs_invalid_by_xnde) { // atr range marked invalid; ignore all tkns between prv_tblw and end of root; EX:"|-id=1<br/>"; PAGE:en.w:A DATE:2014-07-16
for (int j = root.Subs_len() - 1; j > -1; --j) {
Xop_tkn_itm sub = root.Subs_get(j);
if (sub == prv_tblw)
return false;
else
sub.Ignore_y_();
}
ctx.App().Usr_dlg().Warn_many("", "", "xnde.invalided attributes could not find previous tkn; page=~{0}", ctx.Page_url_str()); // should never happen; DATE:2014-07-16
}
return false;
}
int subs_bgn = prv_tblw.Tkn_sub_idx() + 1, subs_end = root.Subs_len() - 1;
int subs_pos = subs_bgn;
Xop_tkn_itm last_atr_tkn = null;
boolean loop = true;
while (loop) { // loop over tkns after prv_tkn to find last_atr_tkn
if (subs_pos > subs_end) break;
Xop_tkn_itm tmp_tkn = root.Subs_get(subs_pos);
switch (tmp_tkn.Tkn_tid()) {
case Xop_tkn_itm_.Tid_newLine: // nl stops; EX: "{| a b c \nd"; bgn at {| and pick up " a b c " as atrs
case Xop_tkn_itm_.Tid_hdr: case Xop_tkn_itm_.Tid_hr: // hdr/hr incorporate nl into tkn so include these as well; EX: "{|a\n==b==" becomes tblw,txt,hdr (note that \n is part of hdr
case Xop_tkn_itm_.Tid_list: // list stops; EX: "{| a b c\n* d"; "*d" ends atrs; EX: ru.d: DATE:2014-02-22
loop = false;
break;
default:
++subs_pos;
last_atr_tkn = tmp_tkn;
break;
}
}
if (last_atr_tkn == null) { // no atrs found; mark tblw_tkn as Atrs_empty
int atr_rng_tid
= called_from_xnde
&& !prv_tblw.Tblw_xml()
&& prv_tblw.Tkn_tid() == Xop_tkn_itm_.Tid_tblw_tr // called from xnde && current tid is Tblw_tr; EX:"|- <br/>" PAGE:en.w:A DATE:2014-07-16
? Atrs_invalid_by_xnde // invalidate everything
: Atrs_empty
;
prv_tblw.Atrs_rng_set(atr_rng_tid, atr_rng_tid);
return false;
}
root.Subs_del_between(ctx, subs_bgn, subs_pos);
int atrs_bgn = prv_tblw.Src_end(), atrs_end = last_atr_tkn.Src_end();
if (prv_tblw.Tkn_tid() == Xop_tkn_itm_.Tid_tblw_tr) // NOTE: if "|-" gobble all trailing dashes; REF: Parser.php!doTableStuff; $line = preg_replace( '#^\|-+#', '', $line ); DATE:2013-06-21
atrs_bgn = Bry_finder.Find_fwd_while(src, atrs_bgn, src.length, Byte_ascii.Dash);
prv_tblw.Atrs_rng_set(atrs_bgn, atrs_end);
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki && atrs_bgn != -1) {
Xop_xatr_itm[] atrs = ctx.App().Xatr_parser().Parse(ctx.Msg_log(), src, atrs_bgn, atrs_end);
prv_tblw.Atrs_ary_as_tblw_(atrs);
}
wkr.Cell_pipe_seen_(true);
return true;
}
private void Ignore_ws(Xop_ctx ctx, Xop_root_tkn root) {
int end = root.Subs_len() - 1;
// get last tr, tc, tb; cannot use ctx.Stack_get_tblw b/c this gets last open tblw, and we want last tblw; EX: "<table><tr></tr>"; Stack_get_tblw gets <table> want </tr>
boolean found = false;
Xop_tkn_itm prv_tkn = null;
for (int i = end; i > -1; i--) {
prv_tkn = root.Subs_get(i);
switch (prv_tkn.Tkn_tid()) {
case Xop_tkn_itm_.Tid_tblw_tr:
case Xop_tkn_itm_.Tid_tblw_tc:
case Xop_tkn_itm_.Tid_tblw_tb:
found = true;
i = -1;
break;
case Xop_tkn_itm_.Tid_tblw_td: // exclude td
case Xop_tkn_itm_.Tid_tblw_th: // exclude th
i = -1;
break;
}
}
if (!found) return;
int bgn = prv_tkn.Tkn_sub_idx() + 1;
int rv = Ignore_ws_rng(ctx, root, bgn, end, true);
if (rv == -1) return; // entire range is ws; don't bother trimming end
Ignore_ws_rng(ctx, root, end, bgn, false);
}
private int Ignore_ws_rng(Xop_ctx ctx, Xop_root_tkn root, int bgn, int end, boolean fwd) {
int cur = bgn, adj = fwd ? 1 : -1;
while (true) {
if (fwd) {
if (cur > end) return -1;
}
else {
if (cur < end) return -1;
}
Xop_tkn_itm ws_tkn = root.Subs_get(cur);
switch (ws_tkn.Tkn_tid()) {
case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab: case Xop_tkn_itm_.Tid_newLine:
case Xop_tkn_itm_.Tid_para:
ws_tkn.Ignore_y_grp_(ctx, root, cur);
break;
case Xop_tkn_itm_.Tid_xnde:
if (ws_tkn.Src_bgn() == ws_tkn.Src_end() // NOTE: para_wkr inserts <br/>. these should be disabled in Ignore_ws_rng; they are identified as having bgn == end; normal <br/>s will have bgn < end
&& ((Xop_xnde_tkn)ws_tkn).Tag().Id() == Xop_xnde_tag_.Tid_br)
ws_tkn.Ignore_y_grp_(ctx, root, cur);
break;
default:
return cur;
}
cur += adj;
}
}
public static int Handle_false_tblw_match(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int bgn_pos, int cur_pos, Xop_tkn_itm tkn, boolean add_nl) {
if (add_nl)
ctx.Para().Process_nl(ctx, root, src, bgn_pos, cur_pos);
ctx.Subs_add(root, tkn);
return cur_pos;
}
public static final int Atrs_null = -1, Atrs_empty = -2, Atrs_invalid_by_xnde = -3, Atrs_ignore_check = -1;
public static final byte Tblw_type_tb = 0, Tblw_type_te = 1, Tblw_type_tr = 2, Tblw_type_td = 3, Tblw_type_th = 4, Tblw_type_tc = 5, Tblw_type_td2 = 6, Tblw_type_th2 = 7;
}
/*
NOTE_1:
Code tries to emulate HTML tidy behavior. Specifically:
- ignore <table> when directly under <table>
- if tblw, scan to end of line to ignore attributes
- ignore any closing tblws
EX:
{|id=1
{|id=2 <- ignore id=2
|}
|}
*/

View File

@@ -0,0 +1,212 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_tblw_wkr__atrs_tst {
private Xop_fxt fxt = new Xop_fxt();
@Test public void Tr() {
fxt.Test_parse_page_wiki(String_.Concat_lines_nl_skip_last
( "{|"
, "|-style='a'"
, "|b"
, "|}"
), fxt.tkn_tblw_tb_(0, 20).Subs_
( fxt.tkn_tblw_tr_(2, 17).Atrs_rng_(5, 14).Subs_
( fxt.tkn_tblw_td_(14, 17).Subs_(fxt.tkn_txt_(16, 17), fxt.tkn_para_blank_(18))
))
);
}
@Test public void Td() {
fxt.Test_parse_page_wiki(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|style='a'|b"
, "|}"
), fxt.tkn_tblw_tb_(0, 21).Subs_
( fxt.tkn_tblw_tr_(2, 18).Subs_
( fxt.tkn_tblw_td_(5, 18).Atrs_rng_(7, 16).Subs_(fxt.tkn_txt_(17, 18), fxt.tkn_para_blank_(19))
))
);
}
@Test public void Td_mult() {
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|"
, " {|"
, " |-"
, " | id='1'|"
, " | id='2'|a"
, " | id='3'|"
, " |}"
, "|}"
)
, String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, " <table>"
, " <tr>"
, " <td id='1'>"
, " </td>"
, " <td id='2'>a"
, " </td>"
, " <td id='3'>"
, " </td>"
, " </tr>"
, " </table>"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
fxt.Init_para_n_();
}
@Test public void Tc() { // PAGE:en.w:1920_Palm_Sunday_tornado_outbreak
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|id='1'"
, "|+id='2'|a"
, "|}"
)
, String_.Concat_lines_nl_skip_last
( "<table id='1'>"
, " <caption id='2'>a"
, " </caption>"
, "</table>"
, ""
)
);
fxt.Init_para_n_();
}
@Test public void Td_mixed() {
fxt.Test_parse_page_wiki(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|style='a'|b||c"
, "|}"
), fxt.tkn_tblw_tb_(0, 24).Subs_
( fxt.tkn_tblw_tr_(2, 21).Subs_
( fxt.tkn_tblw_td_( 5, 18).Atrs_rng_(7, 16).Subs_(fxt.tkn_txt_(17, 18), fxt.tkn_para_blank_(19))
, fxt.tkn_tblw_td_(18, 21).Subs_(fxt.tkn_txt_(20, 21), fxt.tkn_para_blank_(22))
))
);
}
@Test public void Th() {
fxt.Test_parse_page_wiki(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "!style='a'|b"
, "|}"
), fxt.tkn_tblw_tb_(0, 21).Subs_
( fxt.tkn_tblw_tr_(2, 18).Subs_
( fxt.tkn_tblw_th_(5, 18).Atrs_rng_(7, 16).Subs_(fxt.tkn_txt_(17, 18), fxt.tkn_para_blank_(19))
))
);
}
@Test public void Skip_hdr() {
fxt.Test_parse_page_wiki(String_.Concat_lines_nl_skip_last
( "{|"
, "|+b"
, "!style='a'|b"
, "|}"
), fxt.tkn_tblw_tb_(0, 22).Caption_count_(1).Subs_
( fxt.tkn_tblw_tc_(2, 6).Subs_(fxt.tkn_txt_( 5, 6))
, fxt.tkn_tblw_tr_(6, 19).Subs_
( fxt.tkn_tblw_th_(6, 19).Atrs_rng_(8, 17).Subs_(fxt.tkn_txt_(18, 19), fxt.tkn_para_blank_(20))
)
));
}
@Test public void Td_bg_color() { // PURPOSE: atr_parser should treat # as valid character in unquoted val; PAGE:en.w:UTF8; |bgcolor=#eeeeee|<small>Indic</small><br/><small>0800*</small><br/>'''''224'''''
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|bgcolor=#eeeeee|a"
, "|}"
)
, String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td bgcolor=\"#eeeeee\">a"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
fxt.Init_para_n_();
}
@Test public void Xnde_tb() { // PURPOSE: xnde should close any open xatrs; PAGE:en.w:Western_Front_(World_War_I); stray > after == Dramatizations ==
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|id='1'<p></p>"
, "|a"
, "|}"), String_.Concat_lines_nl_skip_last
( "<table id='1'><p></p>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
}
@Test public void Xnde_tr() { // PURPOSE: xnde should disable all tkns; PAGE:en.w:A DATE:2014-07-16
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-<b>c</b>id='d'<br/>" // note that id='d' should not show up since <b> invalidates entire line
, "|a"
, "|}"
), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void Xnde_mix_tblw_tblx() { // PURPOSE: issue with </tr> somehow rolling up everything after <td>; PAGE:en.w:20th_century; {{Decades and years}}
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<table><tr><td>a"
, "{|id=1"
, "|-"
, "|b"
, "|}</td></tr></table>"
)
, String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, " <table id=\"1\">"
, " <tr>"
, " <td>b"
, " </td>"
, " </tr>"
, " </table>"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
fxt.Init_para_n_();
}
}

View File

@@ -0,0 +1,823 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_tblw_wkr__basic_tst {
private Xop_fxt fxt = new Xop_fxt();
@Test public void Td() { // Tb_tr_td_te
fxt.Test_parse_page_wiki("{|\n|-\n|a\n|}"
, fxt.tkn_tblw_tb_(0, 11).Subs_
( fxt.tkn_tblw_tr_(2, 8).Subs_
( fxt.tkn_tblw_td_(5, 8).Subs_(fxt.tkn_txt_(7, 8), fxt.tkn_para_blank_(9))))
);
}
@Test public void Td2() { // Tb_tr_td_td2_te
fxt.Test_parse_page_wiki("{|\n|-\n|a||b\n|}"
, fxt.tkn_tblw_tb_(0, 14).Subs_
( fxt.tkn_tblw_tr_(2, 11).Subs_
( fxt.tkn_tblw_td_(5, 8).Subs_(fxt.tkn_txt_( 7, 8), fxt.tkn_para_blank_(9))
, fxt.tkn_tblw_td_(8, 11).Subs_(fxt.tkn_txt_(10, 11), fxt.tkn_para_blank_(12))
)));
}
@Test public void Tc() { // Tb_tc_te
fxt.Test_parse_page_wiki("{|\n|+a\n|}"
, fxt.tkn_tblw_tb_(0, 9).Caption_count_(1).Subs_
( fxt.tkn_tblw_tc_(2, 6).Subs_
( fxt.tkn_txt_(5, 6)
, fxt.tkn_para_blank_(7)
)
)
);
}
@Test public void Tc_longer() { // Tb_tc_tr_td_te
fxt.Test_parse_page_wiki("{|\n|+a\n|-\n|b\n|}"
, fxt.tkn_tblw_tb_(0, 15).Caption_count_(1).Subs_
( fxt.tkn_tblw_tc_(2, 6).Subs_(fxt.tkn_txt_(5, 6))
, fxt.tkn_tblw_tr_(6, 12).Subs_
( fxt.tkn_tblw_td_(9, 12).Subs_(fxt.tkn_txt_(11, 12), fxt.tkn_para_blank_(13))
)
));
}
@Test public void Th() { // Tb_th_te
fxt.Test_parse_page_wiki("{|\n|-\n!a\n|}"
, fxt.tkn_tblw_tb_(0, 11).Subs_
( fxt.tkn_tblw_tr_(2, 8).Subs_
( fxt.tkn_tblw_th_(5, 8).Subs_(fxt.tkn_txt_(7, 8), fxt.tkn_para_blank_(9))
)));
}
@Test public void Th2() { // Tb_th_th2_te
fxt.Test_parse_page_wiki("{|\n|-\n!a!!b\n|}"
, fxt.tkn_tblw_tb_(0, 14).Subs_
( fxt.tkn_tblw_tr_(2, 11).Subs_
( fxt.tkn_tblw_th_(5, 8).Subs_(fxt.tkn_txt_( 7, 8))
, fxt.tkn_tblw_th_(8, 11).Subs_(fxt.tkn_txt_(10, 11), fxt.tkn_para_blank_(12))
)));
}
@Test public void Th2_td_syntax() { // Tb_th_td; || should be treated as th
fxt.Test_parse_page_wiki("{|\n|-\n!a||b\n|}"
, fxt.tkn_tblw_tb_(0, 14).Subs_
( fxt.tkn_tblw_tr_(2, 11).Subs_
( fxt.tkn_tblw_th_(5, 8).Subs_(fxt.tkn_txt_( 7, 8))
, fxt.tkn_tblw_th_(8, 11).Subs_(fxt.tkn_txt_(10, 11), fxt.tkn_para_blank_(12))
)));
}
@Test public void Tb_td2() { // PAGE:en.w:Hectare; {| class="wikitable" || style="border: 1px solid #FFFFFF;"
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|id='1' || class='a'"
, "|-"
, "|a"
, "|}")
, String_.Concat_lines_nl_skip_last
( "<table id='1' class='a'>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
}
@Test public void Td_lnki() {
fxt.Test_parse_page_wiki("{|\n|-\n|[[a|b]]\n|}"
, fxt.tkn_tblw_tb_(0, 17).Subs_
( fxt.tkn_tblw_tr_(2, 14).Subs_
( fxt.tkn_tblw_td_(5, 14).Subs_(fxt.tkn_lnki_(7, 14), fxt.tkn_para_blank_(15))))
);
}
@Test public void Tr_dupe_xnde() { // PURPOSE: redundant tr should not be dropped; see [[Jupiter]]
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "<tr><td>a</td></tr>"
, "|-"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Tr_dupe_xnde_2() { // <td></th> causes problems
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "<tr><th>a</td></tr>"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <th>a"
, " </th>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Bang_should_not_make_cell_td_1_bang() { // PURPOSE: "| a! b" ! should not separate cell
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last("{|", "|-", "|a!b", "|}"), String_.Concat_lines_nl_skip_last("<table>", " <tr>", " <td>a!b" , " </td>", " </tr>", "</table>", ""));
}
@Test public void Bang_should_not_make_cell_td_2_bang() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last("{|", "|-", "|a!!b", "|}"), String_.Concat_lines_nl_skip_last("<table>", " <tr>", " <td>a!!b" , " </td>", " </tr>", "</table>", ""));
}
@Test public void Bang_should_not_make_cell_th_1_bang() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last("{|", "|-", "!a!b", "|}"), String_.Concat_lines_nl_skip_last("<table>", " <tr>", " <th>a!b" , " </th>", " </tr>", "</table>", ""));
}
@Test public void Bang_should_not_make_cell_th_2_bang() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last("{|", "|-", "!a!!b", "|}")
, String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <th>a"
, " </th>"
, " <th>b"
, " </th>"
, " </tr>"
, "</table>"
, ""
));
}
@Test public void Bang_should_not_make_cell_th_mult_line() { // FIX: make sure code does not disable subsequent bangs
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last("{|", "|-", "!a", "!b", "|}")
, String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <th>a"
, " </th>"
, " <th>b"
, " </th>"
, " </tr>"
, "</table>"
, ""
));
}
@Test public void Fix_extra_cell() { // PURPOSE: trim should not affect td; WP:Base32
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "!id='1'|a"
, "|"
, "!id='2'|b"
, "|-"
, "|a1|| ||b1"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <th id='1'>a"
, " </th>"
, " <td>"
, " </td>"
, " <th id='2'>b"
, " </th>"
, " </tr>"
, " <tr>"
, " <td>a1"
, " </td>"
, " <td> "
, " </td>"
, " <td>b1"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
fxt.Init_para_n_();
}
@Test public void Nl_td() { // PURPOSE: <p> inside <td> does not get enclosed
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<table>"
, "<tr>"
, "<td>"
, ""
, ""
, "a"
, ""
, ""
, "</td>"
, "</tr>"
, "</table>"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, ""
, "<p><br/>"
, "a"
, "</p>"
, ""
, "<p><br/>"
, "</p>"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
fxt.Init_para_n_();
}
@Test public void Trim_ws() { // PURPOSE: trim should be done from both sides
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<table>"
, "<tr>"
, "<td>"
, "</td>"
, "</tr>"
, ""
, ""
, "a"
, ""
, ""
, "</table>"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, " </td>"
, " </tr>"
, "a"
, "</table>"
, ""
)
);
fxt.Init_para_n_();
}
@Test public void Trim_ws_tr() { // PURPOSE: trim should be done from both sides
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<table>"
, "<tr>"
, "<td>"
, "</td>"
, "</tr>"
, ""
, ""
, ""
, ""
, "<tr>"
, "<td>"
, "</td>"
, "</tr>"
, "</table>"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, " </td>"
, " </tr>"
, " <tr>"
, " <td>"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
fxt.Init_para_n_();
}
@Test public void Trim_ws_td() { // PURPOSE: trim should not affect td
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<table>"
, "<tr>"
, "<td>"
, ""
, ""
, "a"
, ""
, ""
, "</td>"
, "</tr>"
, "</table>"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, ""
, "<p><br/>"
, "a"
, "</p>"
, ""
, "<p><br/>"
, "</p>"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
fxt.Init_para_n_();
}
@Test public void No_wiki_3() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|style=<nowiki>'a[b]c'</nowiki>|d"
, "|}"
), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td style='a[b]c'>d"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
}
@Test public void Trailing_tr_breaks_para_mode() {// PURPOSE.fix: empty trailing tr breaks para mode; EX:w:Sibelius
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|a"
, "|-" // causes lines below not to be put in paras
, "|}"
, "b"
, ""
, "c"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, ""
, "<p>b"
, "</p>"
, ""
, "<p>c"
, "</p>"
, ""
));
fxt.Init_para_n_();
}
@Test public void Blank_line_should_be_own_para() {// PURPOSE.fix: caption does not begin on own line; EX:w:Old St. Peter's Basilica
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|a"
, "b"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, ""
, "<p>b"
, "</p>"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
fxt.Init_para_n_();
}
@Test public void Blank_line_should_be_own_para_2() {// PURPOSE.fix: caption does not begin on own line; EX:w:Old St. Peter's Basilica
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|a"
, "b"
, "|-"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, ""
, "<p>b"
, "</p>"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
fxt.Init_para_n_();
}
@Test public void Bold_stops_at_table() { // PURPOSE: do not allow unclosed bold to extend over tables;
fxt.Test_parse_page_all_str("'''<table><tr><td>a</td></tr></table>", String_.Concat_lines_nl_skip_last
( "<b></b>"
, "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
fxt.Init_defn_clear();
}
@Test public void Orphaned_tr_breaks_nested_tables() { // PUPRPOSE: </tr> should not match <tr> outside scope; EX:w:Enthalpy_of_fusion; {{States of matter}}
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "<table>"
, "<tr>"
, "<td>"
, "<table>"
, "</tr>"
, "</table>"
, "</td>"
, "<td>a"
, "</td>"
, "</tr>"
, "</table>"
),
String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, " <table>"
, " </table>"
, " </td>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Space_causes_extra_p() {// PURPOSE: "\n\s</td>" should be equivalent to "\n</td>"; EX: w:Earth
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "<table><tr><td>"
, "b"
, "<br/>c"
, " </td></tr></table>"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, ""
, "<p>b" // used to close <p> here; <p>b</p>
, "<br/>c"
, "</p>"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
fxt.Init_para_n_();
}
@Test public void Br_should_not_be_ignored() {// PURPOSE: document <br />'s should not be ignored between tables; 20121226
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|a"
, "|}"
, "<br />"
, "{|"
, "|-"
, "|b"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, "<br />" // was being ignored
, "<table>"
, " <tr>"
, " <td>b"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
fxt.Init_para_n_();
}
@Test public void AutoClose_td_when_new_tr() { // retain; needed for de.w:Main_Page; DATE:2013-12-09
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "==a=="
, "|}"
)
, String_.Concat_lines_nl_skip_last
( "<table>"
, ""
, "<h2>a</h2>" // NOTE: malformed html matches MW
, " <tr>"
, " <td>"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
fxt.Test_parse_page_wiki("{|\n==b==\n|}"
, fxt.tkn_tblw_tb_(0, 8).Subs_
( fxt.tkn_hdr_(2, 8, 2).Subs_
( fxt.tkn_txt_(5, 6)
)
, fxt.tkn_para_blank_(9)
, fxt.tkn_tblw_tr_(8, 8).Subs_
( fxt.tkn_tblw_td_( 8, 8))
));
}
@Test public void Auto_create_table() {// PURPOSE: <td> should create table; EX:w:Hatfield-McCoy_feud; DATE:20121226
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "<td>a"
, "</td>"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
}
@Test public void List_and_orphaned_td2_should_not_create_tblw() {// PURPOSE: !! was creating table; DATE:2013-04-28
fxt.Test_parse_page_all_str("*a !! b", String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li>a !! b"
, " </li>"
, "</ul>"
));
}
@Test public void Tr_trailing_dashes_should_be_stripped() {// PURPOSE: trailing dashes should be stripped; |--- -> |-; EX: |--style="x" was being ignored; DATE:2013-06-21
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-----style='a'"
, "|b"
, "|}"
), String_.Concat_lines_nl
( "<table>"
, " <tr style='a'>"
, " <td>b"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void Th_without_tr() { // PURPOSE: !! without preceding ! should not create table-cell; DATE:2013-12-18
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|"
, "a!!b"
, "|}"
), String_.Concat_lines_nl
( "<table>"
, " <tr>"
, " <td>"
, "a!!b"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void Td_at_eos() {// PURPOSE.fix: !! at eos fails; EX:es.s:Si_mis_manos_pudieran_deshojar; DATE:2014-02-11
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "| <poem>!!</poem>" // note that "!!" is eos inside the <poem> src
, "|}"
), String_.Concat_lines_nl
( "<table>"
, " <tr>"
, " <td> <div class=\"poem\">"
, "<p>"
, "!!"
, "</p>"
, "</div>"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void Tr_without_tb_should_start_tb() {// PURPOSE: orphaned tr should automatically start table; EX: pl.w:Portal:Technika; DATE:2014-02-13
fxt.Test_parse_page_all_str("<tr><td>a"
, String_.Concat_lines_nl
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void Tblx_should_not_close_tblw() {// PURPOSE: </table> should not close {|; EX:fr.w:Exp%C3%A9dition_Endurance; DATE:2014-02-13
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|"
, "</table>"
, "|}"
)
, String_.Concat_lines_nl
( "<table>"
, " <tr>"
, " <td>"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void Tblx_should_not_close_tblw_2() {// PURPOSE: </table> should close {|; ignore latter |}; EX:ru.q:Авель; DATE:2014-02-22
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|a"
, "</table>"
, "{|"
, "|-"
, "|b"
, "</table>"
, "{|"
, "|-"
, "|c"
, "</table>"
, "|}"
)
, String_.Concat_lines_nl
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, "<table>"
, " <tr>"
, " <td>b"
, " </td>"
, " </tr>"
, "</table>"
, "<table>"
, " <tr>"
, " <td>c"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void Td_in_list_in_tblw_should_be_ignored() {// PURPOSE: || should be ignored if in list; EX:es.d:casa; DATE:2014-02-15
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|"
, "* a || b"
, "|}"
)
, String_.Concat_lines_nl
( "<table>"
, " <tr>"
, " <td>"
, " <ul>"
, " <li> a || b"
, " </li>"
, " </ul>"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void List_in_tblw() {// PURPOSE: list should close previous cell; EX: ru.d:Викисловарь:Условные_сокращения; DATE:2014-02-22
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|"
, "{|"
, "*a"
, "|}"
, "|}"
)
, String_.Concat_lines_nl
( "<table>"
, " <tr>"
, " <td>"
, " <table>"
, " <ul>" // NOTE: this should probably be inside <tr>, but this matches MW behavior; DATE:2014-02-22
, " <li>a"
, " </li>"
, " </ul>"
, " <tr>"
, " <td>"
, " </td>"
, " </tr>"
, " </table>"
, " </td>"
, " </tr>"
, "</table>"
));
}
}
// @Test public void Tb_under_tr_is_ignored() { // PURPOSE: table directly under tr is ignored; PAGE:en.w:Category:Dessert stubs; TODO: complicated, especially to handle 2nd |}
// fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
// ( "{|"
// , "|-id='a'"
// , "{|style='border:1px;'"
// , "|-id='b'"
// , "|b"
// , "|}"
// , "|}"
// ), String_.Concat_lines_nl_skip_last
// ( "<table>"
// , " <tr id=\"b\">"
// , " <td>b"
// , " </td>"
// , " </tr>"
// , "</table>"
// , ""
// ));
// }
// @Test public void Leading_ws() { // PAGE:en.w:Corneal dystrophy (human)
// fxt.Test_parse_page_wiki(String_.Concat_lines_nl_skip_last
// ( " {|"
// , " |-"
// , " |a"
// , " |}"
// )
// , fxt.tkn_tblw_tb_(1, 15).Subs_
// ( fxt.tkn_tblw_tr_(3, 11).Subs_
// ( fxt.tkn_tblw_td_(7, 11).Subs_
// ( fxt.tkn_txt_())
// )
// )
// );
// }
// @Test public void Atrs_tb() { // Tb_te // FUTURE: reinstate; WHEN: Template
// fxt.Init_log_(Xop_tblw_log.Tbl_empty).Test_parse_page_wiki("{|style='a'\n|}"
// , fxt.tkn_tblw_tb_(0, 14).Atrs_rng_(2, 11).Subs_
// ( fxt.tkn_tblw_tr_(11, 11).Subs_
// ( fxt.tkn_tblw_td_(11, 11)
// )));
// }
// @Test public void Td_p() { // PURPOSE: <p> not being closed correctly
// fxt.Init_para_y_();
// fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
// ( "{|"
// , "|-"
// , "|"
// , "a"
// , "|}"), String_.Concat_lines_nl_skip_last
// ( "<table>"
// , " <tr>"
// , " <td>"
// , ""
// , "<p>a"
// , "</p>"
// , " </td>"
// , " </tr>"
// , "</table>"
// , ""
// ));
// fxt.Init_para_n_();
// }
// @Test public void Tb_tb() {
// fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
// ( "{|id='1'"
// , "{|id='2'"
// , "|-id='3'"
// , "|a"
// , "|}"
// , "|}"), String_.Concat_lines_nl_skip_last
// ( "<table id='1'>"
// , " <tr id='3'>"
// , " <td>a"
// , " </td>"
// , " </tr>"
// , "</table>"
// , ""
// ));
// }
// @Test public void Tb_tb_2() {
// fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
// ( "{|id='1'"
// , "{|id='2' <table id='3'>"
// , "|a"
// , "</table>"
// , "|}"
// , "|}"), String_.Concat_lines_nl_skip_last
// ( "<table id='1'>"
// , " <tr id='3'>"
// , " <td>a"
// , " </td>"
// , " </tr>"
// , "</table>"
// , ""
// ));
// }

View File

@@ -0,0 +1,57 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_tblw_wkr__dangling_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Dangling_tb_in_xnde() {// PURPOSE: dangling tblw incorrectly auto-closed by </xnde>; PAGE:en.w:Atlanta_Olympics; DATE:2014-03-18
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "<div align='center'>"
, "{|"
, "|-"
, "|"
, "{|"
, "|-"
, "|a"
, "|}"
, "</div>"
, "b"
)
, String_.Concat_lines_nl
( "<div align='center'>"
, "<table>"
, " <tr>"
, " <td>"
, " <table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, " </table>"
, "</div>" // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
, ""
, "<p>b"
, " </td>"
, " </tr>"
, "</table>"
, "</div>"
, "</p>"
));
}
}

View File

@@ -0,0 +1,108 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_tblw_wkr__double_pipe_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void No_tblw() { // PURPOSE: if || has no tblw, treat as lnki; none; DATE:2014-05-06
fxt.Test_parse_page_all_str("[[A||b|c]]", String_.Concat_lines_nl_skip_last
( "<p><a href=\"/wiki/A\">b|c</a>" // NOTE: technically this should be "|b|c", but difficult to implement; DATE:2014-05-06
, "</p>"
, ""
));
}
@Test public void Lnki_nth() { // PURPOSE: if || is nth pipe, then treat as lnki; PAGE:en.w:Main_Page;de.w:Main_Page; DATE:2014-05-06
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|[[File:A.png|b||c]]"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td><a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\"c\" src=\"file:///mem/wiki/repo/trg/orig/7/0/A.png\" width=\"0\" height=\"0\" /></a>"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Lnki_list_1st() { // PURPOSE: if || is 1st pipe, but inside list, then treat as lnki; EX:w:Second_Boer_War; DATE:2014-05-05
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|"
, "*[[A||b]]"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, ""
, " <ul>"
, " <li><a href=\"/wiki/A\">b</a>" // NOTE: technically this should be "|b", but difficult to implement; DATE:2014-05-06
, " </li>"
, " </ul>"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Double_bang_lnki() { // PURPOSE: do not treat !! as tblw; PAGE:en.w:Pink_(singer); DATE:2014-06-25
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|"
, "[[A!!b]]"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, ""
, "<p><a href=\"/wiki/A!!b\">A!!b</a>"
, "</p>"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Double_bang_list() { // PURPOSE: do not treat !! as tblw; PAGE:en.w:Wikipedia:Featured_picture_candidates; DATE:2014-10-19
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "* a !! b"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <ul>"
, " <li> a !! b"
, " </li>"
, " </ul>"
, " <tr>"
, " <td>"
, " </td>"
, " </tr>"
, "</table>"
, "</p>" // NOTE: </p> is incorrect, but benign
)
);
}
}

View File

@@ -0,0 +1,94 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_tblw_wkr__errs_tst {
private Xop_fxt fxt = new Xop_fxt();
@Test public void Err_row_empty() {
fxt.Test_parse_page_wiki("{|\n|-\n|-\n|a\n|}"
, fxt.tkn_tblw_tb_(0, 14).Subs_
( fxt.tkn_tblw_tr_(2, 5)
, fxt.tkn_tblw_tr_(5, 11).Subs_
( fxt.tkn_tblw_td_(8, 11).Subs_(fxt.tkn_txt_(10, 11), fxt.tkn_para_blank_(12))
))
);
}
@Test public void Err_row_trailing() {
fxt.Test_parse_page_wiki("{|\n|-\n|a\n|-\n|}"
, fxt.tkn_tblw_tb_(0, 14).Subs_
( fxt.tkn_tblw_tr_(2, 8).Subs_
( fxt.tkn_tblw_td_(5, 8).Subs_(fxt.tkn_txt_(7, 8), fxt.tkn_para_blank_(9))
))
);
}
@Test public void Err_caption_after_tr() {
fxt.Test_parse_page_wiki("{|\n|-\n|+a\n|}"
, fxt.tkn_tblw_tb_(0, 12).Caption_count_(1).Subs_
( fxt.tkn_tblw_tr_(2, 5)
, fxt.tkn_tblw_tc_(5, 9).Subs_(fxt.tkn_txt_(8, 9), fxt.tkn_para_blank_(10)))
);
}
@Test public void Err_caption_after_td() {
fxt.Init_log_(Xop_tblw_log.Caption_after_td).Test_parse_page_wiki("{|\n|-\n|a\n|+b\n|}"
, fxt.tkn_tblw_tb_(0, 15).Caption_count_(1).Subs_
( fxt.tkn_tblw_tr_(2, 8).Subs_
( fxt.tkn_tblw_td_(5, 8).Subs_(fxt.tkn_txt_(7, 8)))
, fxt.tkn_tblw_tc_(8, 12).Subs_(fxt.tkn_txt_(11, 12), fxt.tkn_para_blank_(13)))
);
}
@Test public void Err_caption_after_tc() {
fxt.Init_log_(Xop_tblw_log.Caption_after_tc).Test_parse_page_wiki("{|\n|+a\n|+b\n|}"
, fxt.tkn_tblw_tb_(0, 13).Caption_count_(2).Subs_
( fxt.tkn_tblw_tc_(2, 6).Subs_(fxt.tkn_txt_( 5, 6))
, fxt.tkn_tblw_tc_(6, 10).Subs_(fxt.tkn_txt_( 9, 10), fxt.tkn_para_blank_(11)))
);
}
@Test public void Err_row_auto_opened() {
fxt.Test_parse_page_wiki("{|\n|a\n|}"
, fxt.tkn_tblw_tb_(0, 8).Subs_
( fxt.tkn_tblw_tr_(2, 5).Subs_
( fxt.tkn_tblw_td_(2, 5).Subs_(fxt.tkn_txt_(4, 5), fxt.tkn_para_blank_(6))
)));
}
@Test public void Err_caption_auto_closed() {
fxt.Test_parse_page_wiki("{|\n|+a\n|b\n|}"
, fxt.tkn_tblw_tb_(0, 12).Caption_count_(1).Subs_
( fxt.tkn_tblw_tc_(2, 6).Subs_(fxt.tkn_txt_(5, 6))
, fxt.tkn_tblw_tr_(6, 9).Subs_
( fxt.tkn_tblw_td_(6, 9).Subs_(fxt.tkn_txt_(8, 9),fxt.tkn_para_blank_(10))
)));
}
@Test public void Err_Atrs_dumped_into_text() { // PURPOSE: [[Prawn]] and {{Taxobox}} was dumping text
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|-id='a'"
, "|b"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr id='a'>"
, " <td>b"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
}
}

View File

@@ -0,0 +1,200 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_tblw_wkr__nested_tst {
private Xop_fxt fxt = new Xop_fxt();
@Test public void Basic() {
fxt.Test_parse_page_wiki(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|"
, "{|"
, "|-"
, "|a"
, "|}"
, "|b"
, "|}"
)
, fxt.tkn_tblw_tb_(0, 25).Subs_
( fxt.tkn_tblw_tr_(2, 22).Subs_
( fxt.tkn_tblw_td_(5, 19).Subs_
( fxt.tkn_tblw_tb_(7, 19).Subs_
( fxt.tkn_tblw_tr_(10, 16).Subs_
( fxt.tkn_tblw_td_(13, 16).Subs_(fxt.tkn_txt_(15, 16), fxt.tkn_para_blank_(17))
)
)
, fxt.tkn_para_blank_(20)
)
, fxt.tkn_tblw_td_(19, 22).Subs_(fxt.tkn_txt_(21, 22), fxt.tkn_para_blank_(23))
)
)
);
}
@Test public void Leading_ws() {
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|id='a'"
, "|-"
, "|a"
, "|-"
, "|id='b'|"
, " {|id='c'"
, " |-"
, " |d"
, " |}"
, "|}"
)
, String_.Concat_lines_nl_skip_last
( "<table id='a'>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, " <tr>"
, " <td id='b'>"
, " <table id='c'>"
, " <tr>"
, " <td>d"
, " </td>"
, " </tr>"
, " </table>"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
fxt.Init_para_n_();
}
@Test public void Tblx_tblw() { // PURPOSE: if <table> followed by {|, ignore 2nd table; EX: en.b:Wikibooks:Featured_books; DATE:2014-02-08
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<table cellpadding=\"0\">"
, "{| cellspacing=\"0\""
, "|a"
, "|}"
, "</table>"
), String_.Concat_lines_nl_skip_last
( "<table cellpadding=\"0\">"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
}
@Test public void Caption_and_tblw() { // TIDY: don't try to fix <caption><table> sequence; PAGE:es.w:Sevilla; DATE:2014-06-29
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|+"
, "{|"
, "|}"
, "|}"), String_.Concat_lines_nl_skip_last
( "<table>"
, " <caption>"
, " <table>"
, " <tr>"
, " <td>"
, " </td>"
, " </tr>"
, " </table>"
, " </caption>"
, "</table>"
, ""
));
}
@Test public void Tb_tr_tb() { // PURPOSE: if <tr><table>, auto-create <tr><td>; EX:w:Paris; DATE:2014-03-18
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "{|"
, "|}"
, "|}"), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <table>"
, " <tr>"
, " <td>"
, " </td>"
, " </tr>"
, " </table>"
, " </tr>"
, "</table>"
, ""
));
}
@Test public void Tblw_tblx_tblw_fails() { // PURPOSE: {| -> <table> -> \n| was not rendering as <td>; PAGE:en.w:Paris#Demographics; DATE:2014-03-18
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|a"
, "</td></tr>"
, "<tr><td><table>"
, "<tr><td>b</td>"
, "</tr>"
, "|c"
, "</td></tr></table>"
, "|}"
), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, " <tr>"
, " <td>"
, " <table>"
, " <tr>"
, " <td>b"
, " </td>"
, " </tr>"
, " <tr>"
, " <td>c"
, " </td>"
, " </tr>"
, " </table>"
, " </td>"
, " </tr>"
, "</table>"
));
}
// @Test public void Nested_tbl_missing() { // PURPOSE: nested table not rendering properly; EX:ar.s:; DATE:2014-03-18
// fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
// ( "{|"
// , "|-"
// , "{|"
// , "|-"
// , "|}"
// , "| width='50%' | a"
// , "|}"
// ), String_.Concat_lines_nl_skip_last
// ( "<table>"
// , " <tr>"
// , " <td>a"
// , " </td>"
// , " <td>[[b|c"
// , " </td>"
// , " </tr>"
// , "</table>"
// , ""
// , "<p>d"
// , "</p>"
// ));
// }
}

View File

@@ -0,0 +1,156 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_tblw_wkr__para_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Para() { // PURPOSE: para causing strange breaks; SEE:[[John F. Kennedy]] and "two Supreme Court appointments"
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "<p></p>"
, "|a"
, "<p></p>"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table><p></p>"
, " <tr>"
, " <td>a"
, "<p></p>"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Nl() { // PURPOSE: para causing strange breaks; SEE:[[John F. Kennedy]] and "two Supreme Court appointments"
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "!a"
, ""
, "|-"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <th>a"
, " </th>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Unnecessary_para() { // PURPOSE: tblw causes unnecessary <p>; [[Help:Download]]; DATE:2014-02-20
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|"
, "a<br/>"
, "b"
, "|"
, "c<br/>"
, "d"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, ""
, "<p>a<br/>"
, "b"
, "</p>"
, " </td>"
, " <td>"
, ""
, "<p>c<br/>"
, "d"
, "</p>"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Ws_leading() { // PAGE:en.w:AGPLv3
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, " !a"
, " !b"
, "|}"
)
, String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <th>a"
, " </th>"
, " <th>b"
, " </th>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Ws_th_2() { // "\n\s!" should still be interpreted as tblw; s.w:Manchester; DATE:2014-02-14
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|!style='color:red'|a"
, " !style=\"color:blue\"|b"
, "|}"
)
, String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " <th style=\"color:blue\">b"
, " </th>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Ws_th_3() { // "\n\s!" and "!!" breaks tblw; ru.w:Храмы_Санкт-Петербурга (List of churches in St Petersburg); DATE:2014-02-20
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, " ! id='1' | a !! id='2' | b"
, "|}"
)
, String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <th id='1'> a "
, " </th>"
, " <th id='2'> b"
, " </th>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Tblw_td2_should_not_create_ws() { // PURPOSE: a||b -> a\n||b; EX:none;discovered during luaj test; DATE:2014-04-14
fxt.Test_parse_page_wiki_str("a||b", "<p>a||b\n</p>");
}
}

View File

@@ -0,0 +1,71 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_tblw_wkr__tblx_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Ignore_td() { // PURPOSE: do not parse pipe as td if in <table>; EX:ru.w:Сочи; DATE:2014-02-22
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, "| b"
, " </td>"
, " </tr>"
, "</table>"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, "| b"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Ignore_tr() { // PURPOSE: do not parse "\n|-", "\n!" if in <table>; EX:s.w:Uranus; DATE:2014-05-05
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, "|-"
, "! b"
, "| c"
, " </td>"
, " </tr>"
, "</table>"
) , String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, ""
, "<p>|-"
, "! b"
, "| c"
, "</p>"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
}
}

View File

@@ -0,0 +1,104 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_tblw_wkr__uncommon_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Tr_pops_entire_stack() { // PURPOSE: in strange cases, tr will pop entire stack; PAGE:en.w:Turks_in_Denmark; DATE:2014-03-02
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{|"
, "<caption>a"
, "|b"
, "|-"
, "|c"
, "|}"
)
, String_.Concat_lines_nl
( "<table>"
, " <caption>a"
, " </caption>"
, " <tr>"
, " <td>b"
, " </td>"
, " </tr>"
, " <tr>"
, " <td>c"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void Atrs_defect() { // PURPOSE: < in atrs was causing premature termination; PAGE:en.w:Wikipedia:List of hoaxes on Wikipedia
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|id=\"a<b\""
, "|a"
, "|}"), String_.Concat_lines_nl_skip_last
( "<table id=\"a.3Cb\">"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
}
@Test public void Broken_lnki() { // PURPOSE: broken lnki was not closing table properly; PAGE:en.w:Wikipedia:Changing_attribution_for_an_edit; DATE:2014-03-16
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|a"
, "|[[b|c"
, "|}"
, "d"
), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " <td>[[b|c"
, " </td>"
, " </tr>"
, "</table>"
, ""
, "<p>d"
, "</p>"
));
}
@Test public void Broken_lnki_2() { // PURPOSE: variation on above; PAGE:hr.b:Knjiga_pojmova_u_zrakoplovstvu/Kratice_u_zrakoplovstvu/S; DATE:2014-09-05
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "| [[A | b"
, "|-"
, "| B"
, "|}"
), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td> [[A | b"
, " </td>"
, " </tr>"
, " <tr>"
, " <td> B"
, " </td>"
, " </tr>"
, "</table>"
));
}
}

View File

@@ -0,0 +1,63 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
public class Xop_tblw_ws_itm {
public byte Tblw_type() {return tblw_type;} private byte tblw_type;
public int Hook_len() {return hook_len;} private int hook_len;
public Xop_tblw_ws_itm(byte tblw_type, int hook_len) {this.tblw_type = tblw_type; this.hook_len = hook_len;}
public static final byte Type_tb = Xop_tblw_wkr.Tblw_type_tb, Type_te = Xop_tblw_wkr.Tblw_type_te, Type_tr = Xop_tblw_wkr.Tblw_type_tr, Type_tc = Xop_tblw_wkr.Tblw_type_tc
, Type_th = Xop_tblw_wkr.Tblw_type_th, Type_td = Xop_tblw_wkr.Tblw_type_td, Type_nl = 16, Type_xnde = 17;
public static Btrie_slim_mgr trie_() {// MW.REF:Parser.php|doBlockLevels
Btrie_slim_mgr rv = Btrie_slim_mgr.cs_();
trie_itm(rv, Type_tb, Xop_tblw_lxr_ws.Hook_tb);
trie_itm(rv, Type_te, Xop_tblw_lxr_ws.Hook_te);
trie_itm(rv, Type_tr, Xop_tblw_lxr_ws.Hook_tr);
trie_itm(rv, Type_th, Xop_tblw_lxr_ws.Hook_th);
trie_itm(rv, Type_tc, Xop_tblw_lxr_ws.Hook_tc);
trie_itm(rv, Type_td, Bry_.bytes_(Byte_ascii.Pipe));
trie_itm(rv, Type_nl, Bry_.bytes_(Byte_ascii.Nl));
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_table);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_tr);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_td);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_th);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_blockquote);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_h1);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_h2);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_h3);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_h4);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_h5);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_h6);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_pre);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_p);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_div);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_hr);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_li);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_ul);
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_ol);
return rv;
}
private static void trie_itm(Btrie_slim_mgr trie, byte type, byte[] bry) {trie.Add_obj(bry, new Xop_tblw_ws_itm(type, bry.length));}
private static void trie_itm_xnde(Btrie_slim_mgr trie, Xop_xnde_tag tag) {
byte[] tag_name = tag.Name_bry();
int tag_name_len = tag_name.length;
trie.Add_obj(Bry_.Add(Bry_xnde_bgn, tag_name), new Xop_tblw_ws_itm(Type_xnde, tag_name_len));
trie.Add_obj(Bry_.Add(Bry_xnde_end, tag_name), new Xop_tblw_ws_itm(Type_xnde, tag_name_len + 1));
} static byte[] Bry_xnde_bgn = new byte[] {Byte_ascii.Lt, Byte_ascii.Slash}, Bry_xnde_end = new byte[] {Byte_ascii.Lt};
}

View File

@@ -0,0 +1,68 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tmpls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.html.*; import gplx.xowa.parsers.amps.*;
public class Nowiki_escape_itm {
public Nowiki_escape_itm(byte[] src, byte[] trg) {this.src = src; this.trg = trg; this.src_adj = src.length - 1;}
private int src_adj;
public byte[] Src() {return src;} private byte[] src;
public byte[] Trg() {return trg;} private byte[] trg;
public static boolean Escape(Bry_bfr tmp_bfr, byte[] src, int bgn, int end) {// <nowiki> works by escaping all wtxt symbols so that wtxt parser does not hook into any of them
boolean dirty = false;
for (int i = bgn; i < end; i++) {
byte b = src[i];
Object o = trie.Match_bgn_w_byte(b, src, i, end);
if (o == null) {
if (dirty)
tmp_bfr.Add_byte(b);
}
else {
if (!dirty) {
tmp_bfr.Add_mid(src, bgn, i);
dirty = true;
}
Nowiki_escape_itm itm = (Nowiki_escape_itm)o;
tmp_bfr.Add(itm.Trg());
i += itm.src_adj;
}
}
return dirty;
}
private static final byte[] Pre_bry = new byte[] {Byte_ascii.Nl, Byte_ascii.Space}; // NOTE: must go before trie_new
private static final Btrie_slim_mgr trie = trie_new();
private static Btrie_slim_mgr trie_new() {
Btrie_slim_mgr rv = Btrie_slim_mgr.cs_();
trie_new_itm(rv, Byte_ascii.Lt_bry , Xop_amp_trie.Bry_xowa_lt);
trie_new_itm(rv, Byte_ascii.Brack_bgn_bry , Xop_amp_trie.Bry_xowa_brack_bgn);
trie_new_itm(rv, Byte_ascii.Brack_end_bry , Xop_amp_trie.Bry_xowa_brack_end); // PAGE:en.w: Tall_poppy_syndrome DATE:2014-07-23
trie_new_itm(rv, Byte_ascii.Pipe_bry , Xop_amp_trie.Bry_xowa_pipe);
trie_new_itm(rv, Byte_ascii.Apos_bry , Xop_amp_trie.Bry_xowa_apos); // NOTE: for backward compatibility, use &apos; note that amp_wkr will turn &apos; -> &#39 but &#39 -> '; DATE:2014-07-03
trie_new_itm(rv, Byte_ascii.Colon_bry , Xop_amp_trie.Bry_xowa_colon);
trie_new_itm(rv, Byte_ascii.Underline_bry , Xop_amp_trie.Bry_xowa_underline);
trie_new_itm(rv, Byte_ascii.Asterisk_bry , Xop_amp_trie.Bry_xowa_asterisk);
trie_new_itm(rv, Byte_ascii.Dash_bry , Xop_amp_trie.Bry_xowa_dash); // needed to handle "|<nowiki>-</nowiki>"; PAGE:de.w:Liste_von_Vereinen_und_Vereinigungen_von_Gl<47>ubigen_(r<>misch-katholische_Kirche) DATE:2015-01-08
trie_new_itm(rv, Byte_ascii.Space_bry , Xop_amp_trie.Bry_xowa_space);
trie_new_itm(rv, Byte_ascii.Nl_bry , Xop_amp_trie.Bry_xowa_nl);
trie_new_itm(rv, Pre_bry , Pre_bry);
return rv;
}
private static void trie_new_itm(Btrie_slim_mgr rv, byte[] src, byte[] trg) {
Nowiki_escape_itm itm = new Nowiki_escape_itm(src, trg);
rv.Add_obj(src, itm);
}
}