mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.7.2.1
This commit is contained in:
28
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_lxr.java
Normal file
28
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_lxr.java
Normal file
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xop_amp_lxr implements Xop_lxr {
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_amp;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Amp, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
return ctx.Amp().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);
|
||||
}
|
||||
public static final Xop_amp_lxr _ = new Xop_amp_lxr();
|
||||
}
|
||||
121
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_mgr.java
Normal file
121
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_mgr.java
Normal file
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xop_amp_mgr {
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr.reset_(32);
|
||||
public Btrie_slim_mgr Amp_trie() {return amp_trie;} private final Btrie_slim_mgr amp_trie = Xop_amp_trie._;
|
||||
public int Rslt_pos() {return rslt_pos;} private int rslt_pos;
|
||||
public int Rslt_val() {return rslt_val;} private int rslt_val;
|
||||
public Xop_tkn_itm Parse_as_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, int amp_pos, int cur_pos) {
|
||||
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
Object o = amp_trie.Match_bgn(src, cur_pos, src_len);
|
||||
cur_pos = amp_trie.Match_pos();
|
||||
if (o == null) return null;
|
||||
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
|
||||
switch (itm.Tid()) {
|
||||
case Xop_amp_trie_itm.Tid_name_std:
|
||||
case Xop_amp_trie_itm.Tid_name_xowa:
|
||||
rslt_pos = cur_pos;
|
||||
return tkn_mkr.Amp_txt(amp_pos, cur_pos, itm);
|
||||
case Xop_amp_trie_itm.Tid_num_hex:
|
||||
case Xop_amp_trie_itm.Tid_num_dec:
|
||||
boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
|
||||
boolean pass = Parse_as_int(ncr_is_hex, src, src_len, amp_pos, cur_pos);
|
||||
return pass ? tkn_mkr.Amp_num(amp_pos, rslt_pos, rslt_val) : null;
|
||||
default: throw Exc_.new_unhandled(itm.Tid());
|
||||
}
|
||||
}
|
||||
public boolean Parse_as_int(boolean ncr_is_hex, byte[] src, int src_len, int amp_pos, int int_bgn) {
|
||||
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
rslt_val = -1; // clear any previous setting
|
||||
int cur_pos = int_bgn, int_end = -1;
|
||||
int semic_pos = Bry_finder.Find_fwd(src, Byte_ascii.Semic, cur_pos, src_len);
|
||||
if (semic_pos == Bry_finder.Not_found) return false;
|
||||
int_end = semic_pos - 1; // int_end = pos before semicolon
|
||||
int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
|
||||
for (int i = int_end; i >= int_bgn; i--) {
|
||||
byte b = src[i];
|
||||
if (ncr_is_hex) {
|
||||
if (b >= 48 && b <= 57) cur = b - 48;
|
||||
else if (b >= 65 && b <= 70) cur = b - 55;
|
||||
else if (b >= 97 && b <= 102) cur = b - 87;
|
||||
else if((b >= 71 && b <= 90)
|
||||
|| (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
|
||||
else return false;
|
||||
}
|
||||
else {
|
||||
cur = b - Byte_ascii.Num_0;
|
||||
if (cur < 0 || cur > 10) return false;
|
||||
}
|
||||
val += cur * factor;
|
||||
if (val > gplx.intl.Utf8_.Codepoint_max) return false; // fail if value > largest_unicode_codepoint
|
||||
factor *= multiple;
|
||||
}
|
||||
rslt_val = val;
|
||||
rslt_pos = semic_pos + 1; // position after semic
|
||||
return true;
|
||||
}
|
||||
public byte[] Decode_as_bry(byte[] src) {
|
||||
if (src == null) return src;
|
||||
int src_len = src.length;
|
||||
boolean dirty = false;
|
||||
int pos = 0;
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
if (b == Byte_ascii.Amp) {
|
||||
int nxt_pos = pos + 1;
|
||||
if (nxt_pos < src_len) {
|
||||
byte nxt_b = src[nxt_pos];
|
||||
Object amp_obj = amp_trie.Match_bgn_w_byte(nxt_b, src, nxt_pos, src_len);
|
||||
if (amp_obj != null) {
|
||||
if (!dirty) {
|
||||
tmp_bfr.Add_mid(src, 0, pos);
|
||||
dirty = true;
|
||||
}
|
||||
Xop_amp_trie_itm amp_itm = (Xop_amp_trie_itm)amp_obj;
|
||||
switch (amp_itm.Tid()) {
|
||||
case Xop_amp_trie_itm.Tid_name_std:
|
||||
case Xop_amp_trie_itm.Tid_name_xowa:
|
||||
tmp_bfr.Add(amp_itm.Utf8_bry());
|
||||
pos = amp_trie.Match_pos();
|
||||
break;
|
||||
case Xop_amp_trie_itm.Tid_num_hex:
|
||||
case Xop_amp_trie_itm.Tid_num_dec:
|
||||
boolean ncr_is_hex = amp_itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
|
||||
int int_bgn = amp_trie.Match_pos();
|
||||
if (Parse_as_int(ncr_is_hex, src, src_len, pos, int_bgn))
|
||||
tmp_bfr.Add_u8_int(rslt_val);
|
||||
else
|
||||
tmp_bfr.Add_mid(src, pos, nxt_pos);
|
||||
pos = rslt_pos;
|
||||
break;
|
||||
default:
|
||||
throw Exc_.new_unhandled(amp_itm.Tid());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (dirty)
|
||||
tmp_bfr.Add_byte(b);
|
||||
++pos;
|
||||
}
|
||||
return dirty ? tmp_bfr.Xto_bry_and_clear() : src;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_amp_mgr_decode_tst {
|
||||
@Before public void init() {fxt.Reset();} private Xop_amp_mgr_fxt fxt = new Xop_amp_mgr_fxt();
|
||||
@Test public void Text() {fxt.Test_decode_as_bry("a" , "a");}
|
||||
@Test public void Name() {fxt.Test_decode_as_bry("&" , "&");}
|
||||
@Test public void Name_w_text() {fxt.Test_decode_as_bry("a&b" , "a&b");}
|
||||
@Test public void Name_fail_semic_missing() {fxt.Test_decode_as_bry("a&b" , "a&b");}
|
||||
@Test public void Name_fail_amp_only() {fxt.Test_decode_as_bry("a&" , "a&");}
|
||||
@Test public void Num_fail() {fxt.Test_decode_as_bry("&#!;" , "&#!;");} // ! is not valid num
|
||||
@Test public void Hex_fail() {fxt.Test_decode_as_bry("&#x!;" , "&#x!;");} // ! is not valid hex
|
||||
@Test public void Num_basic() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Num_zero_padded() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_upper() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_lower() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_zero_padded() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_upper_x() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Num_fail_large_codepoint() {fxt.Test_decode_as_bry("�" , "�");}
|
||||
@Test public void Num_ignore_extra_x() {fxt.Test_decode_as_bry("&#xx26D0;" , Char_.XtoStr(Char_.XbyInt(9936)));} // 2nd x is ignored
|
||||
}
|
||||
class Xop_amp_mgr_fxt {
|
||||
private Xop_amp_mgr amp_mgr = new Xop_amp_mgr();
|
||||
public void Reset() {}
|
||||
public void Test_decode_as_bry(String raw, String expd) {
|
||||
Tfds.Eq(expd, String_.new_u8(amp_mgr.Decode_as_bry(Bry_.new_u8(raw))));
|
||||
}
|
||||
}
|
||||
27
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_tkn_num.java
Normal file
27
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_tkn_num.java
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_amp_tkn_num extends Xop_tkn_itm_base {
|
||||
public Xop_amp_tkn_num(int bgn, int end, int val, byte[] str_as_bry) {
|
||||
this.val = val; this.str_as_bry = str_as_bry;
|
||||
this.Tkn_ini_pos(false, bgn, end);
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_html_ncr;}
|
||||
public int Val() {return val;} private int val;
|
||||
public byte[] Str_as_bry() {return str_as_bry;} private byte[] str_as_bry;
|
||||
}
|
||||
31
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_tkn_txt.java
Normal file
31
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_tkn_txt.java
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_amp_tkn_txt extends Xop_tkn_itm_base {
|
||||
private Xop_amp_trie_itm html_ref_itm;
|
||||
public Xop_amp_tkn_txt(int bgn, int end, Xop_amp_trie_itm html_ref_itm) {
|
||||
this.html_ref_itm = html_ref_itm;
|
||||
this.Tkn_ini_pos(false, bgn, end);
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_html_ref;}
|
||||
public int Char_int() {return html_ref_itm.Char_int();}
|
||||
public byte[] Xml_name_bry() {return html_ref_itm.Xml_name_bry();}
|
||||
public boolean Itm_is_custom() {return html_ref_itm.Tid() == Xop_amp_trie_itm.Tid_name_xowa;}
|
||||
public void Print_ncr(Bry_bfr bfr) {html_ref_itm.Print_ncr(bfr);}
|
||||
public void Print_literal(Bry_bfr bfr) {html_ref_itm.Print_literal(bfr);}
|
||||
}
|
||||
318
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_trie.java
Normal file
318
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_trie.java
Normal file
@@ -0,0 +1,318 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xop_amp_trie {
|
||||
public static final byte[] // NOTE: top_define
|
||||
Bry_xowa_lt = Bry_.new_a7("&xowa_lt;")
|
||||
, Bry_xowa_brack_bgn = Bry_.new_a7("&xowa_brack_bgn;")
|
||||
, Bry_xowa_brack_end = Bry_.new_a7("&xowa_brack_end;")
|
||||
, Bry_xowa_pipe = Bry_.new_a7("&xowa_pipe;")
|
||||
, Bry_xowa_apos = Bry_.new_a7("&xowa_apos;")
|
||||
, Bry_xowa_colon = Bry_.new_a7("&xowa_colon;")
|
||||
, Bry_xowa_underline = Bry_.new_a7("&xowa_underline;")
|
||||
, Bry_xowa_asterisk = Bry_.new_a7("&xowa_asterisk;")
|
||||
, Bry_xowa_space = Bry_.new_a7("&xowa_space;")
|
||||
, Bry_xowa_nl = Bry_.new_a7("&xowa_nl;")
|
||||
, Bry_xowa_dash = Bry_.new_a7("&xowa_dash;")
|
||||
;
|
||||
public static final Btrie_slim_mgr _ = new_(); Xop_amp_trie() {}
|
||||
private static Btrie_slim_mgr new_() {// REF.MW: Sanitizer|$wgHtmlEntities; NOTE:added apos
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.cs_();
|
||||
Reg_name(rv, Bool_.Y, 60, Bry_xowa_lt);
|
||||
Reg_name(rv, Bool_.Y, 91, Bry_xowa_brack_bgn);
|
||||
Reg_name(rv, Bool_.Y, 93, Bry_xowa_brack_end);
|
||||
Reg_name(rv, Bool_.Y, 124, Bry_xowa_pipe);
|
||||
Reg_name(rv, Bool_.Y, 39, Bry_xowa_apos);
|
||||
Reg_name(rv, Bool_.Y, 58, Bry_xowa_colon);
|
||||
Reg_name(rv, Bool_.Y, 95, Bry_xowa_underline);
|
||||
Reg_name(rv, Bool_.Y, 42, Bry_xowa_asterisk);
|
||||
Reg_name(rv, Bool_.Y, 32, Bry_xowa_space);
|
||||
Reg_name(rv, Bool_.Y, 10, Bry_xowa_nl);
|
||||
Reg_name(rv, Bool_.Y, 45, Bry_xowa_dash);
|
||||
Reg_name(rv, Bool_.N, 39, "'");
|
||||
Reg_name(rv, Bool_.N, 193, "Á");
|
||||
Reg_name(rv, Bool_.N, 225, "á");
|
||||
Reg_name(rv, Bool_.N, 194, "Â");
|
||||
Reg_name(rv, Bool_.N, 226, "â");
|
||||
Reg_name(rv, Bool_.N, 180, "´");
|
||||
Reg_name(rv, Bool_.N, 198, "Æ");
|
||||
Reg_name(rv, Bool_.N, 230, "æ");
|
||||
Reg_name(rv, Bool_.N, 192, "À");
|
||||
Reg_name(rv, Bool_.N, 224, "à");
|
||||
Reg_name(rv, Bool_.N, 8501, "ℵ");
|
||||
Reg_name(rv, Bool_.N, 913, "Α");
|
||||
Reg_name(rv, Bool_.N, 945, "α");
|
||||
Reg_name(rv, Bool_.N, 38, "&");
|
||||
Reg_name(rv, Bool_.N, 8743, "∧");
|
||||
Reg_name(rv, Bool_.N, 8736, "∠");
|
||||
Reg_name(rv, Bool_.N, 197, "Å");
|
||||
Reg_name(rv, Bool_.N, 229, "å");
|
||||
Reg_name(rv, Bool_.N, 8776, "≈");
|
||||
Reg_name(rv, Bool_.N, 195, "Ã");
|
||||
Reg_name(rv, Bool_.N, 227, "ã");
|
||||
Reg_name(rv, Bool_.N, 196, "Ä");
|
||||
Reg_name(rv, Bool_.N, 228, "ä");
|
||||
Reg_name(rv, Bool_.N, 8222, "„");
|
||||
Reg_name(rv, Bool_.N, 914, "Β");
|
||||
Reg_name(rv, Bool_.N, 946, "β");
|
||||
Reg_name(rv, Bool_.N, 166, "¦");
|
||||
Reg_name(rv, Bool_.N, 8226, "•");
|
||||
Reg_name(rv, Bool_.N, 8745, "∩");
|
||||
Reg_name(rv, Bool_.N, 199, "Ç");
|
||||
Reg_name(rv, Bool_.N, 231, "ç");
|
||||
Reg_name(rv, Bool_.N, 184, "¸");
|
||||
Reg_name(rv, Bool_.N, 162, "¢");
|
||||
Reg_name(rv, Bool_.N, 935, "Χ");
|
||||
Reg_name(rv, Bool_.N, 967, "χ");
|
||||
Reg_name(rv, Bool_.N, 710, "ˆ");
|
||||
Reg_name(rv, Bool_.N, 9827, "♣");
|
||||
Reg_name(rv, Bool_.N, 8773, "≅");
|
||||
Reg_name(rv, Bool_.N, 169, "©");
|
||||
Reg_name(rv, Bool_.N, 8629, "↵");
|
||||
Reg_name(rv, Bool_.N, 8746, "∪");
|
||||
Reg_name(rv, Bool_.N, 164, "¤");
|
||||
Reg_name(rv, Bool_.N, 8224, "†");
|
||||
Reg_name(rv, Bool_.N, 8225, "‡");
|
||||
Reg_name(rv, Bool_.N, 8595, "↓");
|
||||
Reg_name(rv, Bool_.N, 8659, "⇓");
|
||||
Reg_name(rv, Bool_.N, 176, "°");
|
||||
Reg_name(rv, Bool_.N, 916, "Δ");
|
||||
Reg_name(rv, Bool_.N, 948, "δ");
|
||||
Reg_name(rv, Bool_.N, 9830, "♦");
|
||||
Reg_name(rv, Bool_.N, 247, "÷");
|
||||
Reg_name(rv, Bool_.N, 201, "É");
|
||||
Reg_name(rv, Bool_.N, 233, "é");
|
||||
Reg_name(rv, Bool_.N, 202, "Ê");
|
||||
Reg_name(rv, Bool_.N, 234, "ê");
|
||||
Reg_name(rv, Bool_.N, 200, "È");
|
||||
Reg_name(rv, Bool_.N, 232, "è");
|
||||
Reg_name(rv, Bool_.N, 8709, "∅");
|
||||
Reg_name(rv, Bool_.N, 8195, " ");
|
||||
Reg_name(rv, Bool_.N, 8194, " ");
|
||||
Reg_name(rv, Bool_.N, 917, "Ε");
|
||||
Reg_name(rv, Bool_.N, 949, "ε");
|
||||
Reg_name(rv, Bool_.N, 8801, "≡");
|
||||
Reg_name(rv, Bool_.N, 919, "Η");
|
||||
Reg_name(rv, Bool_.N, 951, "η");
|
||||
Reg_name(rv, Bool_.N, 208, "Ð");
|
||||
Reg_name(rv, Bool_.N, 240, "ð");
|
||||
Reg_name(rv, Bool_.N, 203, "Ë");
|
||||
Reg_name(rv, Bool_.N, 235, "ë");
|
||||
Reg_name(rv, Bool_.N, 8364, "€");
|
||||
Reg_name(rv, Bool_.N, 8707, "∃");
|
||||
Reg_name(rv, Bool_.N, 402, "ƒ");
|
||||
Reg_name(rv, Bool_.N, 8704, "∀");
|
||||
Reg_name(rv, Bool_.N, 189, "½");
|
||||
Reg_name(rv, Bool_.N, 188, "¼");
|
||||
Reg_name(rv, Bool_.N, 190, "¾");
|
||||
Reg_name(rv, Bool_.N, 8260, "⁄");
|
||||
Reg_name(rv, Bool_.N, 915, "Γ");
|
||||
Reg_name(rv, Bool_.N, 947, "γ");
|
||||
Reg_name(rv, Bool_.N, 8805, "≥");
|
||||
Reg_name(rv, Bool_.N, 62, ">");
|
||||
Reg_name(rv, Bool_.N, 8596, "↔");
|
||||
Reg_name(rv, Bool_.N, 8660, "⇔");
|
||||
Reg_name(rv, Bool_.N, 9829, "♥");
|
||||
Reg_name(rv, Bool_.N, 8230, "…");
|
||||
Reg_name(rv, Bool_.N, 205, "Í");
|
||||
Reg_name(rv, Bool_.N, 237, "í");
|
||||
Reg_name(rv, Bool_.N, 206, "Î");
|
||||
Reg_name(rv, Bool_.N, 238, "î");
|
||||
Reg_name(rv, Bool_.N, 161, "¡");
|
||||
Reg_name(rv, Bool_.N, 204, "Ì");
|
||||
Reg_name(rv, Bool_.N, 236, "ì");
|
||||
Reg_name(rv, Bool_.N, 8465, "ℑ");
|
||||
Reg_name(rv, Bool_.N, 8734, "∞");
|
||||
Reg_name(rv, Bool_.N, 8747, "∫");
|
||||
Reg_name(rv, Bool_.N, 921, "Ι");
|
||||
Reg_name(rv, Bool_.N, 953, "ι");
|
||||
Reg_name(rv, Bool_.N, 191, "¿");
|
||||
Reg_name(rv, Bool_.N, 8712, "∈");
|
||||
Reg_name(rv, Bool_.N, 207, "Ï");
|
||||
Reg_name(rv, Bool_.N, 239, "ï");
|
||||
Reg_name(rv, Bool_.N, 922, "Κ");
|
||||
Reg_name(rv, Bool_.N, 954, "κ");
|
||||
Reg_name(rv, Bool_.N, 923, "Λ");
|
||||
Reg_name(rv, Bool_.N, 955, "λ");
|
||||
Reg_name(rv, Bool_.N, 9001, "⟨");
|
||||
Reg_name(rv, Bool_.N, 171, "«");
|
||||
Reg_name(rv, Bool_.N, 8592, "←");
|
||||
Reg_name(rv, Bool_.N, 8656, "⇐");
|
||||
Reg_name(rv, Bool_.N, 8968, "⌈");
|
||||
Reg_name(rv, Bool_.N, 8220, "“");
|
||||
Reg_name(rv, Bool_.N, 8804, "≤");
|
||||
Reg_name(rv, Bool_.N, 8970, "⌊");
|
||||
Reg_name(rv, Bool_.N, 8727, "∗");
|
||||
Reg_name(rv, Bool_.N, 9674, "◊");
|
||||
Reg_name(rv, Bool_.N, 8206, "‎");
|
||||
Reg_name(rv, Bool_.N, 8249, "‹");
|
||||
Reg_name(rv, Bool_.N, 8216, "‘");
|
||||
Reg_name(rv, Bool_.N, 60, "<");
|
||||
Reg_name(rv, Bool_.N, 175, "¯");
|
||||
Reg_name(rv, Bool_.N, 8212, "—");
|
||||
Reg_name(rv, Bool_.N, 181, "µ");
|
||||
Reg_name(rv, Bool_.N, 183, "·");
|
||||
Reg_name(rv, Bool_.N, 8722, "−");
|
||||
Reg_name(rv, Bool_.N, 924, "Μ");
|
||||
Reg_name(rv, Bool_.N, 956, "μ");
|
||||
Reg_name(rv, Bool_.N, 8711, "∇");
|
||||
Reg_name(rv, Bool_.N, 160, " ");
|
||||
Reg_name(rv, Bool_.N, 8211, "–");
|
||||
Reg_name(rv, Bool_.N, 8800, "≠");
|
||||
Reg_name(rv, Bool_.N, 8715, "∋");
|
||||
Reg_name(rv, Bool_.N, 172, "¬");
|
||||
Reg_name(rv, Bool_.N, 8713, "∉");
|
||||
Reg_name(rv, Bool_.N, 8836, "⊄");
|
||||
Reg_name(rv, Bool_.N, 209, "Ñ");
|
||||
Reg_name(rv, Bool_.N, 241, "ñ");
|
||||
Reg_name(rv, Bool_.N, 925, "Ν");
|
||||
Reg_name(rv, Bool_.N, 957, "ν");
|
||||
Reg_name(rv, Bool_.N, 211, "Ó");
|
||||
Reg_name(rv, Bool_.N, 243, "ó");
|
||||
Reg_name(rv, Bool_.N, 212, "Ô");
|
||||
Reg_name(rv, Bool_.N, 244, "ô");
|
||||
Reg_name(rv, Bool_.N, 338, "Œ");
|
||||
Reg_name(rv, Bool_.N, 339, "œ");
|
||||
Reg_name(rv, Bool_.N, 210, "Ò");
|
||||
Reg_name(rv, Bool_.N, 242, "ò");
|
||||
Reg_name(rv, Bool_.N, 8254, "‾");
|
||||
Reg_name(rv, Bool_.N, 937, "Ω");
|
||||
Reg_name(rv, Bool_.N, 969, "ω");
|
||||
Reg_name(rv, Bool_.N, 927, "Ο");
|
||||
Reg_name(rv, Bool_.N, 959, "ο");
|
||||
Reg_name(rv, Bool_.N, 8853, "⊕");
|
||||
Reg_name(rv, Bool_.N, 8744, "∨");
|
||||
Reg_name(rv, Bool_.N, 170, "ª");
|
||||
Reg_name(rv, Bool_.N, 186, "º");
|
||||
Reg_name(rv, Bool_.N, 216, "Ø");
|
||||
Reg_name(rv, Bool_.N, 248, "ø");
|
||||
Reg_name(rv, Bool_.N, 213, "Õ");
|
||||
Reg_name(rv, Bool_.N, 245, "õ");
|
||||
Reg_name(rv, Bool_.N, 8855, "⊗");
|
||||
Reg_name(rv, Bool_.N, 214, "Ö");
|
||||
Reg_name(rv, Bool_.N, 246, "ö");
|
||||
Reg_name(rv, Bool_.N, 182, "¶");
|
||||
Reg_name(rv, Bool_.N, 8706, "∂");
|
||||
Reg_name(rv, Bool_.N, 8240, "‰");
|
||||
Reg_name(rv, Bool_.N, 8869, "⊥");
|
||||
Reg_name(rv, Bool_.N, 934, "Φ");
|
||||
Reg_name(rv, Bool_.N, 966, "φ");
|
||||
Reg_name(rv, Bool_.N, 928, "Π");
|
||||
Reg_name(rv, Bool_.N, 960, "π");
|
||||
Reg_name(rv, Bool_.N, 982, "ϖ");
|
||||
Reg_name(rv, Bool_.N, 177, "±");
|
||||
Reg_name(rv, Bool_.N, 163, "£");
|
||||
Reg_name(rv, Bool_.N, 8242, "′");
|
||||
Reg_name(rv, Bool_.N, 8243, "″");
|
||||
Reg_name(rv, Bool_.N, 8719, "∏");
|
||||
Reg_name(rv, Bool_.N, 8733, "∝");
|
||||
Reg_name(rv, Bool_.N, 936, "Ψ");
|
||||
Reg_name(rv, Bool_.N, 968, "ψ");
|
||||
Reg_name(rv, Bool_.N, 34, """);
|
||||
Reg_name(rv, Bool_.N, 8730, "√");
|
||||
Reg_name(rv, Bool_.N, 9002, "⟩");
|
||||
Reg_name(rv, Bool_.N, 187, "»");
|
||||
Reg_name(rv, Bool_.N, 8594, "→");
|
||||
Reg_name(rv, Bool_.N, 8658, "⇒");
|
||||
Reg_name(rv, Bool_.N, 8969, "⌉");
|
||||
Reg_name(rv, Bool_.N, 8221, "”");
|
||||
Reg_name(rv, Bool_.N, 8476, "ℜ");
|
||||
Reg_name(rv, Bool_.N, 174, "®");
|
||||
Reg_name(rv, Bool_.N, 8971, "⌋");
|
||||
Reg_name(rv, Bool_.N, 929, "Ρ");
|
||||
Reg_name(rv, Bool_.N, 961, "ρ");
|
||||
Reg_name(rv, Bool_.N, 8207, "‏");
|
||||
Reg_name(rv, Bool_.N, 8250, "›");
|
||||
Reg_name(rv, Bool_.N, 8217, "’");
|
||||
Reg_name(rv, Bool_.N, 8218, "‚");
|
||||
Reg_name(rv, Bool_.N, 352, "Š");
|
||||
Reg_name(rv, Bool_.N, 353, "š");
|
||||
Reg_name(rv, Bool_.N, 8901, "⋅");
|
||||
Reg_name(rv, Bool_.N, 167, "§");
|
||||
Reg_name(rv, Bool_.N, 173, "­");
|
||||
Reg_name(rv, Bool_.N, 931, "Σ");
|
||||
Reg_name(rv, Bool_.N, 963, "σ");
|
||||
Reg_name(rv, Bool_.N, 962, "ς");
|
||||
Reg_name(rv, Bool_.N, 8764, "∼");
|
||||
Reg_name(rv, Bool_.N, 9824, "♠");
|
||||
Reg_name(rv, Bool_.N, 8834, "⊂");
|
||||
Reg_name(rv, Bool_.N, 8838, "⊆");
|
||||
Reg_name(rv, Bool_.N, 8721, "∑");
|
||||
Reg_name(rv, Bool_.N, 8835, "⊃");
|
||||
Reg_name(rv, Bool_.N, 185, "¹");
|
||||
Reg_name(rv, Bool_.N, 178, "²");
|
||||
Reg_name(rv, Bool_.N, 179, "³");
|
||||
Reg_name(rv, Bool_.N, 8839, "⊇");
|
||||
Reg_name(rv, Bool_.N, 223, "ß");
|
||||
Reg_name(rv, Bool_.N, 932, "Τ");
|
||||
Reg_name(rv, Bool_.N, 964, "τ");
|
||||
Reg_name(rv, Bool_.N, 8756, "∴");
|
||||
Reg_name(rv, Bool_.N, 920, "Θ");
|
||||
Reg_name(rv, Bool_.N, 952, "θ");
|
||||
Reg_name(rv, Bool_.N, 977, "ϑ");
|
||||
Reg_name(rv, Bool_.N, 8201, " ");
|
||||
Reg_name(rv, Bool_.N, 222, "Þ");
|
||||
Reg_name(rv, Bool_.N, 254, "þ");
|
||||
Reg_name(rv, Bool_.N, 732, "˜");
|
||||
Reg_name(rv, Bool_.N, 215, "×");
|
||||
Reg_name(rv, Bool_.N, 8482, "™");
|
||||
Reg_name(rv, Bool_.N, 218, "Ú");
|
||||
Reg_name(rv, Bool_.N, 250, "ú");
|
||||
Reg_name(rv, Bool_.N, 8593, "↑");
|
||||
Reg_name(rv, Bool_.N, 8657, "⇑");
|
||||
Reg_name(rv, Bool_.N, 219, "Û");
|
||||
Reg_name(rv, Bool_.N, 251, "û");
|
||||
Reg_name(rv, Bool_.N, 217, "Ù");
|
||||
Reg_name(rv, Bool_.N, 249, "ù");
|
||||
Reg_name(rv, Bool_.N, 168, "¨");
|
||||
Reg_name(rv, Bool_.N, 978, "ϒ");
|
||||
Reg_name(rv, Bool_.N, 933, "Υ");
|
||||
Reg_name(rv, Bool_.N, 965, "υ");
|
||||
Reg_name(rv, Bool_.N, 220, "Ü");
|
||||
Reg_name(rv, Bool_.N, 252, "ü");
|
||||
Reg_name(rv, Bool_.N, 8472, "℘");
|
||||
Reg_name(rv, Bool_.N, 926, "Ξ");
|
||||
Reg_name(rv, Bool_.N, 958, "ξ");
|
||||
Reg_name(rv, Bool_.N, 221, "Ý");
|
||||
Reg_name(rv, Bool_.N, 253, "ý");
|
||||
Reg_name(rv, Bool_.N, 165, "¥");
|
||||
Reg_name(rv, Bool_.N, 376, "Ÿ");
|
||||
Reg_name(rv, Bool_.N, 255, "ÿ");
|
||||
Reg_name(rv, Bool_.N, 918, "Ζ");
|
||||
Reg_name(rv, Bool_.N, 950, "ζ");
|
||||
Reg_name(rv, Bool_.N, 8205, "‍");
|
||||
Reg_name(rv, Bool_.N, 8204, "‌");
|
||||
Reg_prefix(rv, Xop_amp_trie_itm.Tid_num_hex, "#x");
|
||||
Reg_prefix(rv, Xop_amp_trie_itm.Tid_num_hex, "#X");
|
||||
Reg_prefix(rv, Xop_amp_trie_itm.Tid_num_dec, "#");
|
||||
return rv;
|
||||
}
|
||||
private static void Reg_name(Btrie_slim_mgr trie, boolean tid_is_xowa, int char_int, String xml_name_str) {Reg_name(trie, tid_is_xowa, char_int, Bry_.new_a7(xml_name_str));}
|
||||
private static void Reg_name(Btrie_slim_mgr trie, boolean tid_is_xowa, int char_int, byte[] xml_name_bry) {
|
||||
byte itm_tid = tid_is_xowa ? Xop_amp_trie_itm.Tid_name_xowa : Xop_amp_trie_itm.Tid_name_std;
|
||||
Xop_amp_trie_itm itm = new Xop_amp_trie_itm(itm_tid, char_int, xml_name_bry);
|
||||
byte[] key = Bry_.Mid(xml_name_bry, 1, xml_name_bry.length); // ignore & for purpose of trie; EX: "amp;"; NOTE: must keep trailing ";" else "& " will be valid;
|
||||
trie.Add_obj(key, itm);
|
||||
}
|
||||
private static void Reg_prefix(Btrie_slim_mgr trie, byte prefix_type, String prefix) {
|
||||
byte[] prefix_ary = Bry_.new_a7(prefix);
|
||||
Xop_amp_trie_itm itm = new Xop_amp_trie_itm(prefix_type, Xop_amp_trie_itm.Char_int_null, prefix_ary);
|
||||
trie.Add_obj(prefix_ary, itm);
|
||||
}
|
||||
}
|
||||
58
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_trie_itm.java
Normal file
58
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_trie_itm.java
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.html.*; import gplx.xowa.html.lnkis.*;
|
||||
public class Xop_amp_trie_itm {
|
||||
public Xop_amp_trie_itm(byte tid, int char_int, byte[] xml_name_bry) {
|
||||
this.tid = tid;
|
||||
this.char_int = char_int;
|
||||
this.utf8_bry = gplx.intl.Utf16_.Encode_int_to_bry(char_int);
|
||||
this.xml_name_bry = xml_name_bry;
|
||||
this.key_name_len = xml_name_bry.length - 2; // 2 for & and ;
|
||||
}
|
||||
public byte Tid() {return tid;} private final byte tid;
|
||||
public int Char_int() {return char_int;} private final int char_int; // val; EX: 160
|
||||
public byte[] Utf8_bry() {return utf8_bry;} private final byte[] utf8_bry; // EX: new byte[] {192, 160}; (C2, A0)
|
||||
public byte[] Xml_name_bry() {return xml_name_bry;} private final byte[] xml_name_bry; // EX: " "
|
||||
public int Key_name_len() {return key_name_len;} private final int key_name_len; // EX: "nbsp".Len
|
||||
public void Print_ncr(Bry_bfr bfr) {
|
||||
switch (char_int) {
|
||||
case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Quote: case Byte_ascii.Amp:
|
||||
bfr.Add(xml_name_bry); // NOTE: never write actual char; EX: "<" should be written as "<", not "<"
|
||||
break;
|
||||
default:
|
||||
bfr.Add(Xoh_lnki_title_fmtr.Escape_bgn); // &#
|
||||
bfr.Add_int_variable(char_int); // 160
|
||||
bfr.Add_byte(Byte_ascii.Semic); // ;
|
||||
break;
|
||||
}
|
||||
}
|
||||
public void Print_literal(Bry_bfr bfr) {
|
||||
switch (char_int) {
|
||||
case Byte_ascii.Lt: bfr.Add(Html_entity_.Lt_bry); break; // NOTE: never write actual char; EX: "<" should be written as "<", not "<"; MW does same; DATE:2014-11-07
|
||||
case Byte_ascii.Gt: bfr.Add(Html_entity_.Gt_bry); break;
|
||||
case Byte_ascii.Quote: bfr.Add(Html_entity_.Quote_bry); break;
|
||||
case Byte_ascii.Amp: bfr.Add(Html_entity_.Amp_bry); break;
|
||||
default:
|
||||
bfr.Add(utf8_bry); // write literal; EX: "[" not "["
|
||||
break;
|
||||
}
|
||||
}
|
||||
public static final byte Tid_name_std = 1, Tid_name_xowa = 2, Tid_num_hex = 3, Tid_num_dec = 4;
|
||||
public static final int Char_int_null = -1;
|
||||
}
|
||||
32
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_wkr.java
Normal file
32
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_wkr.java
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_amp_wkr implements Xop_ctx_wkr {
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn, int cur_pos) {
|
||||
if (cur_pos == src_len) return ctx.Lxr_make_txt_(cur_pos); // NOTE: & is last char in page; strange and rare, but don't raise error
|
||||
Xop_amp_mgr amp_mgr = ctx.App().Parser_amp_mgr();
|
||||
Xop_tkn_itm amp_tkn = amp_mgr.Parse_as_tkn(tkn_mkr, src, src_len, bgn, cur_pos);
|
||||
int rv_pos = amp_mgr.Rslt_pos();
|
||||
if (amp_tkn == null) return ctx.Lxr_make_txt_(rv_pos);
|
||||
ctx.Subs_add(root, amp_tkn);
|
||||
return rv_pos;
|
||||
}
|
||||
}
|
||||
41
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_wkr_tst.java
Normal file
41
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_wkr_tst.java
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_amp_wkr_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Name() {fxt.Test_parse_page_wiki("&" , fxt.tkn_html_ref_("&"));} // check for html_ref
|
||||
@Test public void Name_fail() {fxt.Test_parse_page_wiki("&nil;" , fxt.tkn_txt_(0, 5));} // check for text
|
||||
@Test public void Hex() {fxt.Test_parse_page_wiki("Σ" , fxt.tkn_html_ncr_(931));} // check for html_ncr; Σ: http://en.wikipedia.org/wiki/Numeric_character_reference
|
||||
@Test public void Num_fail_incomplete() {fxt.Test_parse_page_wiki("&#" , fxt.tkn_txt_());}
|
||||
@Test public void Convert_to_named() {fxt.Test_parse_page_wiki_str("&" , "&");} // note that & is printed, not &
|
||||
@Test public void Convert_to_named_amp() {fxt.Test_parse_page_wiki_str("&" , "&");} // PURPOSE: html_wtr was not handling & only
|
||||
@Test public void Convert_to_numeric() {fxt.Test_parse_page_wiki_str("á" , "á");} // testing that á is outputted, not á
|
||||
@Test public void Defect_bad_code_fails() { // PURPOSE: early rewrite of Xop_amp_mgr caused Xoh_html_wtr_escaper to fail with array out of bounds error; EX:w:Czech_Republic; DATE:2014-05-11
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "[[File:A.png|alt=<p> </p>]]" // basically checks amp parsing inside xnde inside lnki's alt (which uses different parsing code
|
||||
, "<a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\" \" src=\"file:///mem/wiki/repo/trg/orig/7/0/A.png\" width=\"0\" height=\"0\" /></a>"
|
||||
);
|
||||
}
|
||||
@Test public void Ignore_ncr() { // PURPOSE: check that ncr is unescaped; PAGE:de.w:Cross-Site-Scripting; DATE:2014-07-23
|
||||
fxt.Test_parse_page_all_str
|
||||
( "a <code><iframe></code>) b"
|
||||
, "a <code><iframe></code>) b" // < should not become <
|
||||
);
|
||||
}
|
||||
}
|
||||
83
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_dat.java
Normal file
83
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_dat.java
Normal file
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_apos_dat {
|
||||
public int State() {return state;} public void State_clear() {state = Xop_apos_tkn_.State_nil;} private int state = Xop_apos_tkn_.State_nil;
|
||||
public int Typ() {return typ;} private int typ;
|
||||
public int Cmd() {return cmd;} private int cmd;
|
||||
public int Lit_apos() {return lit_apos;} private int lit_apos;
|
||||
public int Dual_cmd() {return dual_cmd;} private int dual_cmd;
|
||||
public void Ident(Xop_ctx ctx, byte[] src, int apos_len, int cur_pos) {
|
||||
typ = cmd = lit_apos = dual_cmd = 0;
|
||||
switch (apos_len) {
|
||||
case Xop_apos_tkn_.Len_ital: case Xop_apos_tkn_.Len_bold: case Xop_apos_tkn_.Len_dual:
|
||||
Ident_props(apos_len); break;
|
||||
case Xop_apos_tkn_.Len_apos_bold:
|
||||
lit_apos = 1;
|
||||
Ident_props(Xop_apos_tkn_.Len_bold); break;
|
||||
default:
|
||||
lit_apos = apos_len - Xop_apos_tkn_.Len_dual;
|
||||
Ident_props(Xop_apos_tkn_.Len_dual);
|
||||
if (lit_apos > 1)
|
||||
ctx.Msg_log().Add_itm_none(Xop_apos_log.Multiple_apos, src, cur_pos - apos_len, cur_pos);
|
||||
break;
|
||||
}
|
||||
}
|
||||
private void Ident_props(int apos_len) {
|
||||
typ = apos_len;
|
||||
switch (apos_len) {
|
||||
case Xop_apos_tkn_.Len_ital: {
|
||||
switch (state) {
|
||||
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_b; break;
|
||||
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_bi_end__b_bgn; state = Xop_apos_tkn_.State_b; break;
|
||||
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_b; dual_cmd = Xop_apos_tkn_.Cmd_bi_bgn; break;
|
||||
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_i_bgn; state = Xop_apos_tkn_.State_bi; break;
|
||||
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_i_bgn; state = Xop_apos_tkn_.State_i; break;
|
||||
default: throw Exc_.new_unhandled(state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Xop_apos_tkn_.Len_bold: {
|
||||
switch (state) {
|
||||
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_ib_end__i_bgn; state = Xop_apos_tkn_.State_i; break;
|
||||
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_i; break;
|
||||
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_i; break; // NOTE: dual_cmd = Cmd_ib_bgn is implied
|
||||
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_b_bgn; state = Xop_apos_tkn_.State_ib; break;
|
||||
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_b_bgn; state = Xop_apos_tkn_.State_b; break;
|
||||
default: throw Exc_.new_unhandled(state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Xop_apos_tkn_.Len_dual: {
|
||||
switch (state) {
|
||||
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_b_end__i_bgn; state = Xop_apos_tkn_.State_i; break;
|
||||
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_i_end__b_bgn; state = Xop_apos_tkn_.State_b; break;
|
||||
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_ib_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_bi_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_bi_end; state = Xop_apos_tkn_.State_nil; break; // NOTE: dual_cmd = Cmd_ib_bgn is implied
|
||||
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_ib_bgn; state = Xop_apos_tkn_.State_dual; break;
|
||||
default: throw Exc_.new_unhandled(state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: throw Exc_.new_unhandled(apos_len);
|
||||
}
|
||||
}
|
||||
}
|
||||
26
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_log.java
Normal file
26
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_log.java
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_apos_log {
|
||||
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "apos");
|
||||
public static final Gfo_msg_itm
|
||||
Bold_converted_to_ital = Gfo_msg_itm_.new_note_(owner, "Bold_converted_to_ital")
|
||||
, Dangling_apos = Gfo_msg_itm_.new_note_(owner, "Dangling_apos")
|
||||
, Multiple_apos = Gfo_msg_itm_.new_note_(owner, "Multiple_apos")
|
||||
;
|
||||
}
|
||||
26
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_lxr.java
Normal file
26
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_lxr.java
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xop_apos_lxr implements Xop_lxr {
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_apos;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Apos_ary, this);} private static final byte[] Apos_ary = new byte[] {Byte_ascii.Apos, Byte_ascii.Apos};
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Apos().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_apos_lxr _ = new Xop_apos_lxr(); Xop_apos_lxr() {}
|
||||
}
|
||||
29
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_tkn.java
Normal file
29
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_tkn.java
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_apos_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_apos_tkn(int bgn, int end, int apos_len, int apos_tid, int apos_cmd, int apos_lit) {
|
||||
this.apos_len = apos_len; this.apos_tid = apos_tid; this.apos_cmd = apos_cmd; this.apos_lit = apos_lit;
|
||||
this.Tkn_ini_pos(false, bgn, end);
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_apos;}
|
||||
public int Apos_len() {return apos_len;} private int apos_len;
|
||||
public int Apos_lit() {return apos_lit;} public Xop_apos_tkn Apos_lit_(int v) {apos_lit = v; return this;} private int apos_lit;
|
||||
public int Apos_tid() {return apos_tid;} public Xop_apos_tkn Apos_tid_(int v) {apos_tid = v; return this;} private int apos_tid;
|
||||
public int Apos_cmd() {return apos_cmd;} public Xop_apos_tkn Apos_cmd_(int v) {apos_cmd = v; return this;} private int apos_cmd;
|
||||
}
|
||||
36
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_tkn_.java
Normal file
36
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_tkn_.java
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_apos_tkn_ {
|
||||
public static final int
|
||||
Cmd_nil = 0
|
||||
, Cmd_i_bgn = 1, Cmd_i_end = 2, Cmd_b_bgn = 3, Cmd_b_end = 4
|
||||
, Cmd_bi_bgn = 5, Cmd_ib_bgn = 6, Cmd_ib_end = 7, Cmd_bi_end = 8
|
||||
, Cmd_bi_end__b_bgn = 9, Cmd_ib_end__i_bgn = 10, Cmd_b_end__i_bgn = 11, Cmd_i_end__b_bgn = 12;
|
||||
public static final byte[][] Cmds
|
||||
= new byte[][]
|
||||
{ Bry_.new_a7("nil")
|
||||
, Bry_.new_a7("i+"), Bry_.new_a7("i-"), Bry_.new_a7("b+"), Bry_.new_a7("b-")
|
||||
, Bry_.new_a7("bi+"), Bry_.new_a7("ib+"), Bry_.new_a7("ib-"), Bry_.new_a7("bi-")
|
||||
, Bry_.new_a7("bi-b+"), Bry_.new_a7("ib-i+"), Bry_.new_a7("b-i+"), Bry_.new_a7("i-b+")
|
||||
};
|
||||
public static String Cmd_str(int id) {return String_.new_u8(Cmds[id]);}
|
||||
public static final int Len_ital = 2, Len_bold = 3, Len_dual = 5, Len_apos_bold = 4;
|
||||
public static final int Typ_ital = 2, Typ_bold = 3, Typ_dual = 5;
|
||||
public static final int State_nil = 0, State_i = 1, State_b = 2, State_bi = 3, State_ib = 4, State_dual = 5;
|
||||
}
|
||||
30
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_tkn_chkr.java
Normal file
30
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_tkn_chkr.java
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_apos_tkn_chkr extends Xop_tkn_chkr_base {
|
||||
@Override public Class<?> TypeOf() {return Xop_apos_tkn.class;}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_apos;}
|
||||
public int Apos_cmd() {return apos_cmd;} public Xop_apos_tkn_chkr Apos_cmd_(int v) {apos_cmd = v; return this;} private int apos_cmd = Xop_apos_tkn_.Cmd_nil;
|
||||
public int Apos_lit() {return apos_lit;} public Xop_apos_tkn_chkr Apos_lit_(int v) {apos_lit = v; return this;} private int apos_lit = -1;
|
||||
@Override public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {
|
||||
Xop_apos_tkn actl = (Xop_apos_tkn)actl_obj;
|
||||
err += mgr.Tst_val(apos_cmd == Xop_apos_tkn_.Cmd_nil, path, "apos_cmd", Xop_apos_tkn_.Cmd_str(apos_cmd), Xop_apos_tkn_.Cmd_str(actl.Apos_cmd()));
|
||||
err += mgr.Tst_val(apos_lit == -1, path, "apos_lit", apos_lit, actl.Apos_lit());
|
||||
return err;
|
||||
}
|
||||
}
|
||||
161
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_wkr.java
Normal file
161
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_wkr.java
Normal file
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_apos_wkr implements Xop_ctx_wkr {
|
||||
public Xop_apos_dat Dat() {return dat;} private Xop_apos_dat dat = new Xop_apos_dat();
|
||||
private List_adp stack = List_adp_.new_(); private int bold_count, ital_count; private Xop_apos_tkn dual_tkn = null;
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {
|
||||
Reset();
|
||||
}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {
|
||||
this.EndFrame(ctx, root, src, src_len, false);
|
||||
}
|
||||
public void AutoClose(Xop_ctx ctx, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {}
|
||||
public int Stack_len() {return stack.Count();}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
cur_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Apos);
|
||||
int apos_len = cur_pos - bgn_pos;
|
||||
dat.Ident(ctx, src, apos_len, cur_pos);
|
||||
Xop_apos_tkn apos_tkn = tkn_mkr.Apos(bgn_pos, cur_pos, apos_len, dat.Typ(), dat.Cmd(), dat.Lit_apos());
|
||||
ctx.Subs_add(root, apos_tkn);
|
||||
ctx.Apos().RegTkn(apos_tkn, cur_pos);
|
||||
return cur_pos;
|
||||
}
|
||||
public void RegTkn(Xop_apos_tkn tkn, int cur_pos) { // REF.MW: Parser|doQuotes
|
||||
stack.Add(tkn);
|
||||
switch (tkn.Apos_tid()) {
|
||||
case Xop_apos_tkn_.Len_ital: ital_count++; break;
|
||||
case Xop_apos_tkn_.Len_bold: bold_count++; break;
|
||||
case Xop_apos_tkn_.Len_dual: //bold_count++; ital_count++; // NOTE: removed b/c of '''''a''b'' was trying to convert ''''' to bold
|
||||
dual_tkn = tkn;
|
||||
break;
|
||||
}
|
||||
if (dat.Dual_cmd() != 0) { // earlier dual tkn assumed to be <i><b>; </i> encountered so change dual to <b><i>
|
||||
if (dual_tkn == null) throw Exc_.new_("dual tkn is null"); // should never happen
|
||||
dual_tkn.Apos_cmd_(dat.Dual_cmd());
|
||||
dual_tkn = null;
|
||||
}
|
||||
}
|
||||
public void EndFrame(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int cur_pos, boolean skip_cancel_if_lnki_and_apos) {
|
||||
int state = dat.State();
|
||||
if (state == 0) {Reset(); return;}
|
||||
if (bold_count % 2 == 1 && ital_count % 2 == 1) ConvertBoldToItal(ctx, src);
|
||||
|
||||
state = dat.State();
|
||||
int closeCmd = 0, closeTyp = 0;
|
||||
if (state == 0) {Reset(); return;} // all closed: return
|
||||
byte cur_tkn_tid = ctx.Cur_tkn_tid();
|
||||
Xop_apos_tkn prv = Previous_bgn(stack, closeTyp);
|
||||
if ( skip_cancel_if_lnki_and_apos // NOTE: if \n or tblw
|
||||
&& cur_tkn_tid == Xop_tkn_itm_.Tid_lnki // and cur scope is lnki
|
||||
// && prv.Ctx_tkn_tid() != Xop_tkn_itm_.Tid_lnki // but apos_bgn is not lnki; NOTE: disabled on 2013-11-10
|
||||
)
|
||||
return; // don't end frame
|
||||
switch (state) {
|
||||
case Xop_apos_tkn_.State_i: closeTyp = Xop_apos_tkn_.Typ_ital; closeCmd = Xop_apos_tkn_.Cmd_i_end; break;
|
||||
case Xop_apos_tkn_.State_b: closeTyp = Xop_apos_tkn_.Typ_bold; closeCmd = Xop_apos_tkn_.Cmd_b_end; break;
|
||||
case Xop_apos_tkn_.State_dual:
|
||||
case Xop_apos_tkn_.State_ib: closeTyp = Xop_apos_tkn_.Typ_dual; closeCmd = Xop_apos_tkn_.Cmd_bi_end; break;
|
||||
case Xop_apos_tkn_.State_bi: closeTyp = Xop_apos_tkn_.Typ_dual; closeCmd = Xop_apos_tkn_.Cmd_ib_end; break;
|
||||
}
|
||||
ctx.Msg_log().Add_itm_none(Xop_apos_log.Dangling_apos, src, prv.Src_bgn(), cur_pos);
|
||||
ctx.Subs_add(root, ctx.Tkn_mkr().Apos(cur_pos, cur_pos, 0, closeTyp, closeCmd, 0));
|
||||
Reset();
|
||||
}
|
||||
private void ConvertBoldToItal(Xop_ctx ctx, byte[] src) {
|
||||
Xop_apos_tkn idxNeg1 = null, idxNeg2 = null, idxNone = null; // look at previous tkn for spaces; EX: "a '''" -> idxNeg1; " a'''" -> idxNeg2; "ab'''" -> idxNone
|
||||
int tknsLen = stack.Count();
|
||||
for (int i = 0; i < tknsLen; i++) {
|
||||
Xop_apos_tkn apos = (Xop_apos_tkn)stack.Get_at(i);
|
||||
if (apos.Apos_tid() != Xop_apos_tkn_.Typ_bold) continue; // only look for bold
|
||||
int tknBgn = apos.Src_bgn();
|
||||
boolean idxNeg1Space = tknBgn > 0 && src[tknBgn - 1] == Byte_ascii.Space;
|
||||
boolean idxNeg2Space = tknBgn > 1 && src[tknBgn - 2] == Byte_ascii.Space;
|
||||
if (idxNeg1 == null && idxNeg1Space) {idxNeg1 = apos;}
|
||||
else if (idxNeg2 == null && idxNeg2Space) {idxNeg2 = apos;}
|
||||
else if (idxNone == null && !idxNeg1Space && !idxNeg2Space) {idxNone = apos;}
|
||||
}
|
||||
if (idxNeg2 != null) ConvertBoldToItal(ctx, src, idxNeg2); // 1st single letter word
|
||||
else if (idxNone != null) ConvertBoldToItal(ctx, src, idxNone); // 1st multi letter word
|
||||
else if (idxNeg1 != null) ConvertBoldToItal(ctx, src, idxNeg1); // everything else
|
||||
|
||||
// now recalc all cmds for stack
|
||||
dat.State_clear();
|
||||
for (int i = 0; i < tknsLen; i++) {
|
||||
Xop_apos_tkn apos = (Xop_apos_tkn)stack.Get_at(i);
|
||||
dat.Ident(ctx, src, apos.Apos_tid(), apos.Src_end()); // NOTE: apos.Typ() must map to apos_len
|
||||
int newCmd = dat.Cmd();
|
||||
if (newCmd == apos.Apos_cmd()) continue;
|
||||
apos.Apos_cmd_(newCmd);
|
||||
}
|
||||
}
|
||||
private void ConvertBoldToItal(Xop_ctx ctx, byte[] src, Xop_apos_tkn oldTkn) {
|
||||
ctx.Msg_log().Add_itm_none(Xop_apos_log.Bold_converted_to_ital, src, oldTkn.Src_bgn(), oldTkn.Src_end());
|
||||
oldTkn.Apos_tid_(Xop_apos_tkn_.Typ_ital).Apos_cmd_(Xop_apos_tkn_.Cmd_i_bgn).Apos_lit_(oldTkn.Apos_lit() + 1);// NOTE: Cmd_i_bgn may be overridden later
|
||||
}
|
||||
private void Reset() {
|
||||
bold_count = ital_count = 0;
|
||||
dual_tkn = null;
|
||||
stack.Clear();
|
||||
dat.State_clear();
|
||||
}
|
||||
private static Xop_apos_tkn Previous_bgn(List_adp stack, int typ) {
|
||||
int stack_len = stack.Count();
|
||||
for (int i = stack_len - 1; i > -1; --i) {
|
||||
Xop_apos_tkn apos = (Xop_apos_tkn)stack.Get_at(i);
|
||||
int cmd = apos.Apos_cmd();
|
||||
switch (typ) {
|
||||
case Xop_apos_tkn_.Typ_ital:
|
||||
switch (cmd) {
|
||||
case Xop_apos_tkn_.Cmd_i_bgn:
|
||||
case Xop_apos_tkn_.Cmd_ib_bgn:
|
||||
case Xop_apos_tkn_.Cmd_bi_bgn:
|
||||
case Xop_apos_tkn_.Cmd_ib_end__i_bgn:
|
||||
case Xop_apos_tkn_.Cmd_b_end__i_bgn:
|
||||
return apos;
|
||||
}
|
||||
break;
|
||||
case Xop_apos_tkn_.Typ_bold:
|
||||
switch (cmd) {
|
||||
case Xop_apos_tkn_.Cmd_b_bgn:
|
||||
case Xop_apos_tkn_.Cmd_ib_bgn:
|
||||
case Xop_apos_tkn_.Cmd_bi_bgn:
|
||||
case Xop_apos_tkn_.Cmd_bi_end__b_bgn:
|
||||
case Xop_apos_tkn_.Cmd_i_end__b_bgn:
|
||||
return apos;
|
||||
}
|
||||
break;
|
||||
default: // NOTE: this is approximate; will not be exact in most dual situations; EX: <b>a<i>b will return <i>; should return <b> and <i>
|
||||
switch (cmd) {
|
||||
case Xop_apos_tkn_.Cmd_b_bgn:
|
||||
case Xop_apos_tkn_.Cmd_i_bgn:
|
||||
case Xop_apos_tkn_.Cmd_ib_bgn:
|
||||
case Xop_apos_tkn_.Cmd_bi_bgn:
|
||||
case Xop_apos_tkn_.Cmd_bi_end__b_bgn:
|
||||
case Xop_apos_tkn_.Cmd_i_end__b_bgn:
|
||||
case Xop_apos_tkn_.Cmd_ib_end__i_bgn:
|
||||
case Xop_apos_tkn_.Cmd_b_end__i_bgn:
|
||||
return apos;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
159
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_wkr_tst.java
Normal file
159
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_wkr_tst.java
Normal file
@@ -0,0 +1,159 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
import gplx.xowa.parsers.lists.*;
|
||||
public class Xop_apos_wkr_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test_parse_page_wiki("''a''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn) , fxt.tkn_txt_(2, 3), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
|
||||
fxt.Test_parse_page_wiki("'''a'''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn) , fxt.tkn_txt_(3, 4), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
|
||||
fxt.Test_parse_page_wiki("'''''a'''''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn) , fxt.tkn_txt_(5, 6), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_end));
|
||||
}
|
||||
@Test public void Advanced() {
|
||||
fxt.Test_parse_page_wiki("''''a''''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn).Apos_lit_(1) , fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end).Apos_lit_(1)); // 1 apos + bold
|
||||
fxt.Test_parse_page_wiki("''''''''a''''''''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn).Apos_lit_(3) , fxt.tkn_txt_(), fxt.tkn_apos_( Xop_apos_tkn_.Cmd_bi_end).Apos_lit_(3)); // 3 apos + dual
|
||||
}
|
||||
@Test public void Combo() {
|
||||
fxt.Test_parse_page_wiki("''a'''b'''c''", fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // b{i}
|
||||
fxt.Test_parse_page_wiki("'''a''b''c'''", fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)); // i{b}
|
||||
fxt.Test_parse_page_wiki("''a''b'''c'''", fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)); // b_i
|
||||
}
|
||||
@Test public void Assume_apos() {
|
||||
fxt.Test_parse_page_wiki("a01'''b01 '''c0 1'''d01''" // pick c0 1, b/c it is idxNeg2
|
||||
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
|
||||
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)
|
||||
, fxt.tkn_txt_(), fxt.tkn_space_(), fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn)
|
||||
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // idx_neg2
|
||||
fxt.Test_parse_page_wiki("a01 '''b01 '''c01'''d01''" // pick c01, b/c it is idxNone
|
||||
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
|
||||
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)
|
||||
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn)
|
||||
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // idx_none
|
||||
fxt.Test_parse_page_wiki("a01 '''b01 '''c01 '''d01''" // pick a01 , b/c it is idxNeg1
|
||||
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn)
|
||||
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
|
||||
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)
|
||||
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // idx_neg1
|
||||
fxt.Test_parse_page_wiki("a''''b''" // strange outlier condition
|
||||
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Apos_lit_(2)
|
||||
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // 4 apos -> 2 apos + ital
|
||||
}
|
||||
@Test public void Dual() {
|
||||
fxt.Test_parse_page_wiki("'''''a'''b''" // +ib -b -i; 5apos defaults to ib
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
|
||||
fxt.Test_parse_page_wiki("'''''a''b'''" // +bi -i -b; change 5apos to bi
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
|
||||
fxt.Test_parse_page_wiki("''b'''''c'''" // 5q toggles ital n, bold y
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end__b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
|
||||
}
|
||||
@Test public void Unclosed() {
|
||||
fxt.Test_parse_page_wiki("''a"
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
|
||||
fxt.Test_parse_page_wiki("'''a"
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
|
||||
fxt.Test_parse_page_wiki("'''''a"
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_end));
|
||||
}
|
||||
@Test public void Outliers() {
|
||||
fxt.Test_parse_page_wiki("''a'''b'''c'''" // '''b -> ' +i b
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Apos_lit_(1)
|
||||
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
|
||||
fxt.Test_parse_page_wiki("''a'''b''c''" // '''b -> ' +i b; double check with closing itals
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Apos_lit_(1)
|
||||
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
|
||||
fxt.Test_parse_page_wiki("''a'''b''c" // ''c -> -bi + b
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
|
||||
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_end__b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
|
||||
}
|
||||
@Test public void MultiLines() {
|
||||
fxt.Test_parse_page_wiki("a''b\nc''d"
|
||||
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(3, 4), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_nl_char_len1_(4)
|
||||
, fxt.tkn_txt_(5, 6), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
|
||||
}
|
||||
@Test public void Lnki() {
|
||||
fxt.Test_parse_page_wiki_str("[[''a''']]", "<a href=\"/wiki/%27%27a%27%27%27\">''a'''</a>");
|
||||
}
|
||||
@Test public void Dual_exceptions() {
|
||||
fxt.Test_parse_page_wiki("'''''a''b''"
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_end)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_list_autoClose() {
|
||||
fxt.Test_parse_page_wiki("''a\n*b"
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Src_rng_(0, 2)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Src_rng_(3, 3)
|
||||
, fxt.tkn_list_bgn_(3, 5, Xop_list_tkn_.List_itmTyp_ul)
|
||||
, fxt.tkn_txt_(5, 6)
|
||||
, fxt.tkn_list_end_(6)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_hr_autoClose() {
|
||||
fxt.Test_parse_page_wiki("''a\n----"
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Src_rng_(0, 2)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Src_rng_(3, 3)
|
||||
, fxt.tkn_para_blank_(3)
|
||||
, fxt.tkn_hr_(3, 8)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_hdr_autoClose() {
|
||||
fxt.Test_parse_page_wiki("''a\n==b=="
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Src_rng_(0, 2)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Src_rng_(3, 3)
|
||||
, fxt.tkn_hdr_(3, 9, 2).Subs_
|
||||
( fxt.tkn_txt_(6, 7)
|
||||
));
|
||||
}
|
||||
@Test public void Apos_broken_by_tblw_th() { // DATE:2013-04-24
|
||||
fxt.Test_parse_page_all_str("A ''[[b!!]]'' c", "A <i><a href=\"/wiki/B!!\">b!!</a></i> c");
|
||||
}
|
||||
@Test public void Nowiki() { // PAGE:en.w:Wiki; DATE:2013-05-13
|
||||
fxt.Test_parse_page_all_str("<nowiki>''a''</nowiki>", "''a''");
|
||||
}
|
||||
@Test public void Lnki_multi_line() { // PURPOSE: handle apos within multi-line lnki caption; DATE:2013-11-10
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "[[A|b '' c"
|
||||
, "d '' e ]]"
|
||||
)
|
||||
, "<a href=\"/wiki/A\">b <i> c d </i> e</a>"); // NOTE: c d should be italicized, not c e (latter occurs when apos is ended on each line)
|
||||
}
|
||||
@Test public void French() { // PURPOSE: L'''A'' -> L'<i>A</i>; DATE:2014-01-06
|
||||
fxt.Test_parse_page_all_str("L''''A'''", "L'<b>A</b>");
|
||||
fxt.Test_parse_page_all_str("L'''A''", "L'<i>A</i>");
|
||||
}
|
||||
// @Test public void Mix_lnke() { // FUTURE: requires rewrite of apos
|
||||
// fxt.Test_parse_page_wiki("''a[irc://b c''d''e]f''"
|
||||
// , fxt.tkn_apos_(0, 2, Xop_apos_tkn_.Cmd_i_bgn)
|
||||
// , fxt.tkn_txt_(2, 3)
|
||||
// , fxt.tkn_lnke_(3, 20).Subs_add_ary
|
||||
// ( fxt.tkn_txt_(12, 13)
|
||||
// , fxt.tkn_apos_(13, 15, Xop_apos_tkn_.Cmd_i_bgn)
|
||||
// , fxt.tkn_txt_(15, 16)
|
||||
// , fxt.tkn_apos_(16, 18, Xop_apos_tkn_.Cmd_i_end)
|
||||
// , fxt.tkn_txt_(18, 19)
|
||||
// )
|
||||
// , fxt.tkn_txt_(20, 21)
|
||||
// , fxt.tkn_apos_(21, 23, Xop_apos_tkn_.Cmd_i_bgn)
|
||||
// );
|
||||
// }
|
||||
}
|
||||
/*
|
||||
*/
|
||||
27
400_xowa/src/gplx/xowa/parsers/hdrs/Xop_hdr_log.java
Normal file
27
400_xowa/src/gplx/xowa/parsers/hdrs/Xop_hdr_log.java
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_hdr_log {
|
||||
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "hdr");
|
||||
public static final Gfo_msg_itm
|
||||
Dangling_hdr = Gfo_msg_itm_.new_warn_(owner, "dangling_hdr")
|
||||
, Mismatched = Gfo_msg_itm_.new_warn_(owner, "mismatched")
|
||||
, Len_1 = Gfo_msg_itm_.new_warn_(owner, "len_1")
|
||||
, Len_7_or_more = Gfo_msg_itm_.new_warn_(owner, "len_7_or_more")
|
||||
;
|
||||
}
|
||||
27
400_xowa/src/gplx/xowa/parsers/hdrs/Xop_hdr_lxr.java
Normal file
27
400_xowa/src/gplx/xowa/parsers/hdrs/Xop_hdr_lxr.java
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xop_hdr_lxr implements Xop_lxr {
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_hdr;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Hook_bgn, this);} static final byte[] Hook_bgn = new byte[] {Byte_ascii.Nl, Byte_ascii.Eq};
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Hdr().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_hdr_lxr _ = new Xop_hdr_lxr(); Xop_hdr_lxr() {}
|
||||
public static final byte Hook = Byte_ascii.Eq;
|
||||
}
|
||||
33
400_xowa/src/gplx/xowa/parsers/hdrs/Xop_hdr_tkn.java
Normal file
33
400_xowa/src/gplx/xowa/parsers/hdrs/Xop_hdr_tkn.java
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_hdr_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_hdr_tkn(int bgn, int end, int hdr_len) {this.Tkn_ini_pos(false, bgn, end); this.hdr_len = hdr_len;}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_hdr;}
|
||||
public int Hdr_len() {return hdr_len;} public Xop_hdr_tkn Hdr_len_(int v) {hdr_len = v; return this;} private int hdr_len = -1;
|
||||
public int Hdr_bgn_manual() {return hdr_bgn_manual;} public Xop_hdr_tkn Hdr_bgn_manual_(int v) {hdr_bgn_manual = v; return this;} private int hdr_bgn_manual;
|
||||
public int Hdr_end_manual() {return hdr_end_manual;} public Xop_hdr_tkn Hdr_end_manual_(int v) {hdr_end_manual = v; return this;} private int hdr_end_manual;
|
||||
public boolean Hdr_html_first() {return hdr_html_first;} public Xop_hdr_tkn Hdr_html_first_y_() {hdr_html_first = true; return this;} private boolean hdr_html_first;
|
||||
public int Hdr_html_dupe_idx() {return hdr_html_dupe_idx;} private int hdr_html_dupe_idx;
|
||||
public byte[] Hdr_toc_text() {return hdr_toc_text;} public Xop_hdr_tkn Hdr_toc_text_(byte[] v) {hdr_toc_text = v; return this;} private byte[] hdr_toc_text;
|
||||
public int Hdr_html_dupe_idx_next() {
|
||||
hdr_html_dupe_idx = hdr_html_dupe_idx == 0 ? 2 : hdr_html_dupe_idx + 1;
|
||||
return hdr_html_dupe_idx;
|
||||
}
|
||||
public byte[] Hdr_html_id() {return hdr_html_id;} public Xop_hdr_tkn Hdr_html_id_(byte[] v) {hdr_html_id = v; return this;} private byte[] hdr_html_id = Bry_.Empty;
|
||||
}
|
||||
33
400_xowa/src/gplx/xowa/parsers/hdrs/Xop_hdr_tkn_chkr.java
Normal file
33
400_xowa/src/gplx/xowa/parsers/hdrs/Xop_hdr_tkn_chkr.java
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_hdr_tkn_chkr extends Xop_tkn_chkr_base {
|
||||
@Override public Class<?> TypeOf() {return Xop_hdr_tkn.class;}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_hdr;}
|
||||
public int Hdr_len() {return hdr_len;} public Xop_hdr_tkn_chkr Hdr_len_(int v) {hdr_len = v; return this;} private int hdr_len = -1;
|
||||
public int Hdr_ws_bgn() {return hdr_ws_bgn;} public Xop_hdr_tkn_chkr Hdr_ws_bgn_(int v) {hdr_ws_bgn = v; return this;} private int hdr_ws_bgn = -1;
|
||||
public int Hdr_ws_end() {return hdr_ws_end;} public Xop_hdr_tkn_chkr Hdr_ws_end_(int v) {hdr_ws_end = v; return this;} private int hdr_ws_end = -1;
|
||||
public int Hdr_ws_trailing() {return hdr_ws_trailing;} public Xop_hdr_tkn_chkr Hdr_ws_trailing_(int v) {hdr_ws_trailing = v; return this;} private int hdr_ws_trailing = -1;
|
||||
public Xop_hdr_tkn_chkr Hdr_html_id_(String v) {hdr_html_id = Bry_.new_a7(v); return this;} private byte[] hdr_html_id = Bry_.Empty;
|
||||
@Override public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {
|
||||
Xop_hdr_tkn actl = (Xop_hdr_tkn)actl_obj;
|
||||
err += mgr.Tst_val(hdr_len == -1, path, "hdr_len", hdr_len, actl.Hdr_len());
|
||||
err += mgr.Tst_val(hdr_html_id == Bry_.Empty, path, "hdr_html_id", String_.new_a7(hdr_html_id), String_.new_a7(actl.Hdr_html_id()));
|
||||
return err;
|
||||
}
|
||||
}
|
||||
123
400_xowa/src/gplx/xowa/parsers/hdrs/Xop_hdr_wkr.java
Normal file
123
400_xowa/src/gplx/xowa/parsers/hdrs/Xop_hdr_wkr.java
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_hdr_wkr implements Xop_ctx_wkr {
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
|
||||
public void AutoClose(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {
|
||||
// bgn never closed; mark inert; EX: "==a"
|
||||
Xop_hdr_tkn bgn = (Xop_hdr_tkn)tkn;
|
||||
int bgn_hdr_len = bgn.Hdr_len();
|
||||
bgn.Hdr_bgn_manual_(bgn_hdr_len);
|
||||
bgn.Hdr_len_(0);
|
||||
if (bgn_hdr_len > 1 && ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki) // NOTE: \n= is not uncommon for templates; ignore them;
|
||||
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Dangling_hdr, src, bgn.Src_bgn(), bgn_pos);
|
||||
}
|
||||
public int Make_tkn_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_bos) bgn_pos = 0; // do not allow -1 pos
|
||||
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false);
|
||||
Close_open_itms(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);
|
||||
ctx.Para().Process_block__bgn__nl_w_symbol(ctx, root, src, bgn_pos, cur_pos, Xop_xnde_tag_.Tag_h2); // pass h2; should pass h# where # is correct #, but for purpose of Para_wkr, <h2> tag does not matter
|
||||
int new_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Xop_hdr_lxr.Hook); // count all =
|
||||
int hdr_len = new_pos - cur_pos + 1; // +1 b/c Hook has 1 eq: "\n="
|
||||
switch (hdr_len) {
|
||||
case 1: ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_1, src, bgn_pos, new_pos); break; // <h1>; flag
|
||||
case 2: case 3: case 4: case 5: case 6: break; // <h2>-<h6>: normal
|
||||
default: ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_7_or_more, src, bgn_pos, new_pos); break; // <h7>+; limit to 6; flag; NOTE: only 14 pages in 2011-07-27
|
||||
}
|
||||
|
||||
Xop_hdr_tkn tkn = tkn_mkr.Hdr(bgn_pos, new_pos, hdr_len); // make tkn
|
||||
ctx.StackTkn_add(root, tkn);
|
||||
return new_pos;
|
||||
}
|
||||
public int Make_tkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int stackPos, int end_hdr_len) {// REF.MW: Parser|doHeadings
|
||||
if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_tmpl_curly_bgn) return ctx.Lxr_make_txt_(cur_pos);
|
||||
Xop_hdr_tkn hdr = (Xop_hdr_tkn)ctx.Stack_pop_til(root, src, stackPos, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_hdr);
|
||||
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false); // end any apos; EX: ==''a==
|
||||
int hdr_len = hdr.Hdr_len(), bgn_manual = 0, end_manual = 0;
|
||||
boolean dirty = false;
|
||||
if (end_hdr_len < hdr_len) { // mismatch: end has more; adjust hdr
|
||||
bgn_manual = hdr_len - end_hdr_len;
|
||||
hdr_len = end_hdr_len;
|
||||
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Mismatched, src, bgn_pos, cur_pos);
|
||||
if (hdr_len == 1) ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_1, src, bgn_pos, cur_pos);
|
||||
dirty = true;
|
||||
}
|
||||
else if (end_hdr_len > hdr_len) { // mismatch: hdr has more; adjust variables
|
||||
end_manual = end_hdr_len - hdr_len;
|
||||
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Mismatched, src, bgn_pos, cur_pos);
|
||||
dirty = true;
|
||||
}
|
||||
if (hdr_len > 6) { // <h7>+; limit to 6; NOTE: both bgn/end are equal length; EX: bgn=8,end=7 -> bgn=7,end=7;bgn_manual=1
|
||||
bgn_manual = end_manual = hdr_len - 6;
|
||||
hdr_len = 6;
|
||||
dirty = true;
|
||||
}
|
||||
if (dirty)
|
||||
hdr.Hdr_bgn_manual_(bgn_manual).Hdr_end_manual_(end_manual).Hdr_len_(hdr_len);
|
||||
cur_pos = Find_fwd_while_ws_hdr_version(src, cur_pos, src_len); // NOTE: hdr gobbles up trailing ws; EX: "==a== \n\t \n \nb" gobbles up all 3 "\n"s; otherwise para_wkr will process <br/>
|
||||
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_h2);
|
||||
hdr.Subs_move(root);
|
||||
hdr.Src_end_(cur_pos);
|
||||
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki)
|
||||
ctx.Cur_page().Hdr_mgr().Add(ctx, hdr, src);
|
||||
return cur_pos;
|
||||
}
|
||||
private void Close_open_itms(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
int stack_pos = -1, stack_len = ctx.Stack_len(); boolean stop = false;
|
||||
for (int i = 0; i < stack_len; i++) { // loop over stack
|
||||
Xop_tkn_itm prv_tkn = ctx.Stack_get(i);
|
||||
switch (prv_tkn.Tkn_tid()) { // find first list/hdr; close everything until this
|
||||
case Xop_tkn_itm_.Tid_list:
|
||||
case Xop_tkn_itm_.Tid_hdr:
|
||||
stack_pos = i; stop = true; break;
|
||||
}
|
||||
if (stop) break;
|
||||
}
|
||||
if (stack_pos == -1) return;
|
||||
ctx.Stack_pop_til(root, src, stack_pos, true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_hdr);
|
||||
}
|
||||
private static int Find_fwd_while_ws_hdr_version(byte[] src, int cur, int end) {
|
||||
int last_nl = -1;
|
||||
while (true) {
|
||||
if (cur == end) return cur;
|
||||
byte b = src[cur];
|
||||
switch (b) {
|
||||
case Byte_ascii.Nl:
|
||||
cur++;
|
||||
last_nl = cur;
|
||||
break;
|
||||
case Byte_ascii.Space:
|
||||
case Byte_ascii.Tab:
|
||||
cur++;
|
||||
break;
|
||||
default:
|
||||
return last_nl == -1 ? cur : last_nl - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
NOTE:hdr.trailing_nl
|
||||
. by design, the hdr_tkn's src_end will not include the trailing \n
|
||||
.. for example, for "\n==a==\n", the src_bgn will be 0, but the src_end will be 6
|
||||
.. note that at 6, it does not include the \n at pos 6
|
||||
. this is needed to leave the \n for the parser to handle other tkns, such as hdrs, tblws, lists.
|
||||
. for example, in "\n==a==\n*b", if the \n at pos 6 was taken by the hdr_tkn, then the parser would encounter a "*" instead of a "\n*"
|
||||
*/
|
||||
127
400_xowa/src/gplx/xowa/parsers/hdrs/Xop_hdr_wkr__basic_tst.java
Normal file
127
400_xowa/src/gplx/xowa/parsers/hdrs/Xop_hdr_wkr__basic_tst.java
Normal file
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_hdr_wkr__basic_tst {
|
||||
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void H2() {fxt.Test_parse_page_wiki_str("==a==" , "<h2>a</h2>\n");}
|
||||
@Test public void H3() {fxt.Test_parse_page_wiki_str("===a===" , "<h3>a</h3>\n");}
|
||||
@Test public void H6_limit() {fxt.Test_parse_page_wiki_str("=======a=======" , "<h6>=a=</h6>\n");}
|
||||
@Test public void Mismatch_bgn() {fxt.Test_parse_page_wiki_str("=====a==" , "<h2>===a</h2>\n");}
|
||||
@Test public void Mismatch_end() {fxt.Test_parse_page_wiki_str("==a=====" , "<h2>a===</h2>\n");}
|
||||
@Test public void Dangling() {fxt.Test_parse_page_wiki_str("==a" , "==a");}
|
||||
@Test public void Comment_bgn() {fxt.Test_parse_page_all_str ("<!--b-->==a==" , "<h2>a</h2>\n");}
|
||||
@Test public void Comment_end() {fxt.Test_parse_page_all_str ("==a==<!--b-->" , "<h2>a</h2>\n");}
|
||||
@Test public void Ws_end() { // PURPOSE: "==\n" merges all ws following it; \n\n\n is not transformed by Para_wkr to "<br/>"
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a== \t"
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, "b"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, "b"
|
||||
));
|
||||
}
|
||||
@Test public void Many() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a=="
|
||||
, "===b==="
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, ""
|
||||
, "<h3>b</h3>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Hdr_w_tblw() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a=="
|
||||
, "{|"
|
||||
, "|+"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, "<table>"
|
||||
, " <caption>"
|
||||
, " </caption>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Hdr_w_hr() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a=="
|
||||
, "----"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, "<hr/>"
|
||||
));
|
||||
}
|
||||
@Test public void Mix_apos_dangling() {fxt.Test_parse_page_wiki_str("==''a==" , "<h2><i>a</i></h2>\n");}
|
||||
@Test public void Mix_xnde_dangling() {fxt.Test_parse_page_wiki_str("==<i>a==" , "<h2><i>a</i></h2>\n");}
|
||||
@Test public void Mix_tblw_cell() {fxt.Test_parse_page_wiki_str("==a!!==" , "<h2>a!!</h2>\n");}
|
||||
@Test public void Ws() {fxt.Test_parse_page_wiki_str("== a b ==" , "<h2> a b </h2>\n");}
|
||||
@Test public void Err_hdr() {fxt.Init_log_(Xop_hdr_log.Mismatched) .Test_parse_page_wiki_str("====a== ==" , "<h2>==a== </h2>\n").tst_Log_check();}
|
||||
@Test public void Err_end_hdr_is_1() {fxt.Init_log_(Xop_hdr_log.Mismatched, Xop_hdr_log.Len_1).Test_parse_page_wiki_str("==a=" , "<h1>=a</h1>\n").tst_Log_check();}
|
||||
@Test public void Html_hdr_many() {
|
||||
fxt.Wtr_cfg().Toc__show_(Bool_.Y);
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a=="
|
||||
, "==a=="
|
||||
, "==a=="
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2><span class='mw-headline' id='a'>a</span></h2>"
|
||||
, ""
|
||||
, "<h2><span class='mw-headline' id='a_2'>a</span></h2>"
|
||||
, ""
|
||||
, "<h2><span class='mw-headline' id='a_3'>a</span></h2>"
|
||||
, ""
|
||||
));
|
||||
fxt.Wtr_cfg().Toc__show_(Bool_.N);
|
||||
}
|
||||
@Test public void Hdr_inside_dangling_tmpl_fix() { // PURPOSE: one-off fix to handle == inside dangling tmpl; DATE:2014-02-11
|
||||
fxt.Test_parse_page_all_str("{{a|}\n==b=="
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "{{a|}"
|
||||
, ""
|
||||
, "<h2>b</h2>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Pfunc() {// multiple = should not be interpreted as key-val equals; PAGE:en.w:Wikipedia:Picture_of_the_day/June_2014 DATE:2014-07-21
|
||||
fxt.Test_parse_page_all_str
|
||||
( "{{#if:exists|==a==|no}}"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
// @Test public void Hdr_inside_dangling_tmpl_fix_2() { // PURPOSE: hdr == inside dangling tmpl; DATE:2014-06-10
|
||||
// fxt.Init_defn_add("Print", "{{{1}}}");
|
||||
// fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
// ( "=={{Print|b=="
|
||||
// , "}}"
|
||||
// ), String_.Concat_lines_nl_skip_last
|
||||
// ( "==b="
|
||||
// , ""
|
||||
// ));
|
||||
// }
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_hdr_wkr__div_wrapper_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Basic() { // PURPOSE: basic div_wrapper test; DATE:2015-06-24
|
||||
fxt.Wtr_cfg().Hdr__div_wrapper_(Bool_.Y);
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a=="
|
||||
, "b"
|
||||
, "==c=="
|
||||
, "d"
|
||||
, "==e=="
|
||||
, "f"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, "<div>"
|
||||
, ""
|
||||
, "<p>b"
|
||||
, "</p>"
|
||||
, ""
|
||||
, "</div>"
|
||||
, "<h2>c</h2>"
|
||||
, "<div>"
|
||||
, ""
|
||||
, "<p>d"
|
||||
, "</p>"
|
||||
, ""
|
||||
, "</div>"
|
||||
, "<h2>e</h2>"
|
||||
, "<div>"
|
||||
, ""
|
||||
, "<p>f"
|
||||
, "</p>"
|
||||
, "</div>"
|
||||
));
|
||||
fxt.Wtr_cfg().Hdr__div_wrapper_(Bool_.N);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_hdr_wkr__para_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Hdr_at_bos() { // PURPOSE: check that BOS==a== does not throw null ref in para; DATE:2014-02-18
|
||||
fxt.Test_parse_page_all_str("==a==", "<h2>a</h2>\n");
|
||||
}
|
||||
}
|
||||
27
400_xowa/src/gplx/xowa/parsers/lists/Xop_list_lxr.java
Normal file
27
400_xowa/src/gplx/xowa/parsers/lists/Xop_list_lxr.java
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xop_list_lxr implements Xop_lxr {//20111222
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_list;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {Add_ary(core_trie, this, Xop_list_tkn_.Hook_ul, Xop_list_tkn_.Hook_ol, Xop_list_tkn_.Hook_dt, Xop_list_tkn_.Hook_dd);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
private void Add_ary(Btrie_fast_mgr core_trie, Object val, byte[]... ary) {for (byte[] itm : ary) core_trie.Add(itm, val);}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.List().MakeTkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_list_lxr _ = new Xop_list_lxr(); Xop_list_lxr() {}
|
||||
}
|
||||
32
400_xowa/src/gplx/xowa/parsers/lists/Xop_list_tkn.java
Normal file
32
400_xowa/src/gplx/xowa/parsers/lists/Xop_list_tkn.java
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_list_tkn extends Xop_tkn_itm_base {
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_list;}
|
||||
public int List_uid() {return list_uid;} public Xop_list_tkn List_uid_(int v) {list_uid = v; return this;} private int list_uid = -1;
|
||||
public byte List_bgn() {return list_bgn;} private byte list_bgn;
|
||||
public byte List_itmTyp() {return list_itmTyp;} public Xop_list_tkn List_itmTyp_(byte v) {list_itmTyp = v; return this;} private byte list_itmTyp = Xop_list_tkn_.List_itmTyp_null;
|
||||
public int[] List_path() {return path;} public Xop_list_tkn List_path_(int... v) {path = v; return this;} private int[] path = Int_.Ary_empty;
|
||||
public int List_path_idx() {return path[path.length - 1];}
|
||||
public boolean List_sub_first() {return List_path_idx() == 0;}
|
||||
public byte List_sub_last() {return list_sub_last;} public Xop_list_tkn List_sub_last_(byte v) {list_sub_last = v; return this;} private byte list_sub_last = Bool_.__byte;
|
||||
public static Xop_list_tkn bgn_(int bgn, int end, byte list_itmTyp, int symLen) {return new Xop_list_tkn(bgn, end, Bool_.Y_byte, list_itmTyp);}
|
||||
public static Xop_list_tkn end_(int pos, byte list_itmTyp) {return new Xop_list_tkn(pos, pos, Bool_.N_byte, list_itmTyp);}
|
||||
public Xop_list_tkn(int bgn, int end, byte bgnEndType, byte list_itmTyp) {this.Tkn_ini_pos(false, bgn, end); this.list_bgn = bgnEndType; this.list_itmTyp = list_itmTyp;}
|
||||
public static final Xop_list_tkn Null = new Xop_list_tkn(); Xop_list_tkn() {}
|
||||
}
|
||||
54
400_xowa/src/gplx/xowa/parsers/lists/Xop_list_tkn_.java
Normal file
54
400_xowa/src/gplx/xowa/parsers/lists/Xop_list_tkn_.java
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_list_tkn_ {
|
||||
public static final byte[]
|
||||
Hook_ul = new byte[] {Byte_ascii.Nl, Byte_ascii.Star}, Hook_ol = new byte[] {Byte_ascii.Nl, Byte_ascii.Hash}
|
||||
, Hook_dt = new byte[] {Byte_ascii.Nl, Byte_ascii.Semic} , Hook_dd = new byte[] {Byte_ascii.Nl, Byte_ascii.Colon};
|
||||
public static final byte List_itmTyp_null = 0, List_itmTyp_ul = Byte_ascii.Star, List_itmTyp_ol = Byte_ascii.Hash, List_itmTyp_dt = Byte_ascii.Semic, List_itmTyp_dd = Byte_ascii.Colon;
|
||||
public static final String Str_li = "li", Str_ol = "ol", Str_ul = "ul", Str_dl = "dl", Str_dt = "dt", Str_dd = "dd";
|
||||
public static final byte[] Byt_li = Bry_.new_a7(Str_li), Byt_ol = Bry_.new_a7(Str_ol), Byt_ul = Bry_.new_a7(Str_ul)
|
||||
, Byt_dl = Bry_.new_a7(Str_dl), Byt_dt = Bry_.new_a7(Str_dt), Byt_dd = Bry_.new_a7(Str_dd);
|
||||
public static byte[] XmlTag_lst(byte b) {
|
||||
switch (b) {
|
||||
case List_itmTyp_ul: return Byt_ul;
|
||||
case List_itmTyp_ol: return Byt_ol;
|
||||
case List_itmTyp_dt:
|
||||
case List_itmTyp_dd: return Byt_dl;
|
||||
default: throw Exc_.new_unhandled(b);
|
||||
}
|
||||
}
|
||||
public static byte[] XmlTag_itm(byte b) {
|
||||
switch (b) {
|
||||
case List_itmTyp_ul:
|
||||
case List_itmTyp_ol: return Byt_li;
|
||||
case List_itmTyp_dt: return Byt_dt;
|
||||
case List_itmTyp_dd: return Byt_dd;
|
||||
default: throw Exc_.new_unhandled(b);
|
||||
}
|
||||
}
|
||||
public static byte Char_lst(byte b) {
|
||||
switch (b) {
|
||||
case List_itmTyp_ul: return Byte_ascii.Star;
|
||||
case List_itmTyp_ol: return Byte_ascii.Hash;
|
||||
case List_itmTyp_dt: return Byte_ascii.Semic;
|
||||
case List_itmTyp_dd: return Byte_ascii.Colon;
|
||||
default: throw Exc_.new_unhandled(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
36
400_xowa/src/gplx/xowa/parsers/lists/Xop_list_tkn_chkr.java
Normal file
36
400_xowa/src/gplx/xowa/parsers/lists/Xop_list_tkn_chkr.java
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_list_tkn_chkr extends Xop_tkn_chkr_base {
|
||||
@Override public Class<?> TypeOf() {return Xop_list_tkn.class;}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_list;}
|
||||
public int List_uid() {return list_uid;} public Xop_list_tkn_chkr List_uid_(int v) {list_uid = v; return this;} private int list_uid = -1;
|
||||
public byte List_bgn() {return list_bgn;} public Xop_list_tkn_chkr List_bgn_(byte v) {list_bgn = v; return this;} private byte list_bgn;
|
||||
public byte List_itmTyp() {return list_itmTyp;} public Xop_list_tkn_chkr List_itmTyp_(byte v) {list_itmTyp = v; return this;} private byte list_itmTyp = Xop_list_tkn_.List_itmTyp_null;
|
||||
public int[] List_path() {return list_path;} public Xop_list_tkn_chkr List_path_(int... v) {list_path = v; return this;} private int[] list_path = Int_.Ary_empty;
|
||||
public byte List_sub_last() {return list_sub_last;} public Xop_list_tkn_chkr List_sub_last_(byte v) {list_sub_last = v; return this;} private byte list_sub_last = Bool_.__byte;
|
||||
@Override public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {
|
||||
Xop_list_tkn actl = (Xop_list_tkn)actl_obj;
|
||||
err += mgr.Tst_val(list_uid == -1, path, "list_uid", list_uid, actl.List_uid());
|
||||
err += mgr.Tst_val(list_bgn == 0, path, "list_bgn", list_bgn, actl.List_bgn());
|
||||
err += mgr.Tst_val(list_itmTyp == Xop_list_tkn_.List_itmTyp_null, path, "list_itmTyp", list_itmTyp, actl.List_itmTyp());
|
||||
err += mgr.Tst_val(list_sub_last == Bool_.__byte, path, "list_sub_last", list_sub_last, actl.List_sub_last());
|
||||
err += mgr.Tst_val(list_path == Int_.Ary_empty, path, "list_path", Array_.XtoStr(list_path), Array_.XtoStr(actl.List_path()));
|
||||
return err;
|
||||
}
|
||||
}
|
||||
185
400_xowa/src/gplx/xowa/parsers/lists/Xop_list_wkr.java
Normal file
185
400_xowa/src/gplx/xowa/parsers/lists/Xop_list_wkr.java
Normal file
@@ -0,0 +1,185 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.tblws.*;
|
||||
public class Xop_list_wkr implements Xop_ctx_wkr {
|
||||
private int listId = 0; byte[] curSymAry = new byte[Max_list_depth]; int curSymLen = 0; byte[] prvSymAry = Bry_.Empty;
|
||||
private HierPosAryBldr posBldr = new HierPosAryBldr(Max_list_depth);
|
||||
private boolean SymAry_fill_overflow;
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {Reset(0);}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
|
||||
public boolean List_dirty() {return posBldr.Dirty();}
|
||||
public boolean Dd_chk() {return dd_chk;} public Xop_list_wkr Dd_chk_(boolean v) {dd_chk = v; return this;} private boolean dd_chk;
|
||||
public void AutoClose(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {
|
||||
// NOTE: list_tkns can not be explicitly closed, so auto-close will happen for all items
|
||||
MakeTkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, (Xop_list_tkn)tkn, Bool_.Y_byte);
|
||||
Reset(listId + 1);
|
||||
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_ul);
|
||||
}
|
||||
public int MakeTkn_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {// REF.MW: Parser|doBlockLevels
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_bos) bgn_pos = 0; // do not allow -1 pos
|
||||
|
||||
// pop hdr if exists; EX: \n== a ==\n*b; \n* needs to close hdr
|
||||
int acsPos = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_hdr);
|
||||
if (acsPos != -1) ctx.Stack_pop_til(root, src, acsPos, true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_list);
|
||||
|
||||
// close apos
|
||||
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false);
|
||||
byte symByt = src[cur_pos - 1]; // -1 b/c symByt is byte before curByt; EX: \n*a; cur_pos is at a; want to get *
|
||||
int prvSymLen = curSymLen;
|
||||
cur_pos = SymAry_fill(src, cur_pos, src_len, symByt);
|
||||
symByt = src[cur_pos - 1]; // NOTE: get symByt again b/c cur_pos may have changed; EX: "#*"; # may have triggered list, but last symByt should be *
|
||||
if (SymAry_fill_overflow) return ctx.Lxr_make_txt_(cur_pos);
|
||||
PrvItm_compare();
|
||||
ctx.Para().Process_block__bgn__nl_w_symbol(ctx, root, src, bgn_pos, cur_pos - 1, Xop_xnde_tag_.Tag_li); // -1 b/c cur_pos includes sym_byte; EX: \n*; pass li; should pass correct tag, but for purposes of para_wkr, <li> doesn't matter
|
||||
if (prvSymMatch) {
|
||||
PopTil(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, Bool_.N_byte);
|
||||
posBldr.MoveNext();
|
||||
prvSymAry = Xop_list_wkr_.MakeSymAry(curSymAry, curSymLen);
|
||||
Xop_list_tkn prvItm = tkn_mkr.List_bgn(bgn_pos, cur_pos, curSymAry[curSymLen - 1], curSymLen).List_path_(posBldr.XtoIntAry()).List_uid_(listId);
|
||||
ctx.Subs_add_and_stack(root, prvItm);
|
||||
ctx.Empty_ignored_y_();
|
||||
}
|
||||
else {
|
||||
for (int i = prvSymLen; i > commonSymLen; i--) { // close all discontinued itms: EX: ##\n#\n
|
||||
PopTil(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, Bool_.Y_byte);
|
||||
posBldr.MoveUp();
|
||||
}
|
||||
if (commonSymLen == 0 && prvSymLen != 0) { // nothing in common; reset list
|
||||
listId++;
|
||||
posBldr.Init();
|
||||
}
|
||||
if (curSymLen == commonSymLen) { // add another itm if continuing; EX: #\n#\n
|
||||
PopTil(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, Bool_.N_byte);
|
||||
if ((prvSymLen - curSymLen) > 0 // moving up many levels; do not open new list; just MoveNext; EX: #1\n###3\n##2
|
||||
&& curSymLen != 1) { // do not moveNext if at level 1; this has to do with strange incrementing logic in posBldr at rootLvl
|
||||
posBldr.MoveNext();
|
||||
}
|
||||
else {
|
||||
posBldr.MoveUp(); posBldr.MoveDown();
|
||||
}
|
||||
prvSymAry = Xop_list_wkr_.MakeSymAry(curSymAry, curSymLen);
|
||||
symByt = src[cur_pos - 1];
|
||||
Xop_list_tkn prvItm = tkn_mkr.List_bgn(bgn_pos, cur_pos, symByt, curSymLen).List_path_(posBldr.XtoIntAry()).List_uid_(listId);
|
||||
ctx.Subs_add_and_stack(root, prvItm);
|
||||
ctx.Empty_ignored_y_();
|
||||
}
|
||||
for (int i = commonSymLen; i < curSymLen; i++) { // open new itms; EX: #\n##\n
|
||||
posBldr.MoveDown();
|
||||
symByt = curSymAry[i];
|
||||
prvSymAry = Xop_list_wkr_.MakeSymAry(curSymAry, curSymLen);
|
||||
Xop_list_tkn prvItm = tkn_mkr.List_bgn(bgn_pos, cur_pos, symByt, i + List_adp_.Base1).List_path_(posBldr.XtoIntAry()).List_uid_(listId);
|
||||
ctx.Subs_add_and_stack(root, prvItm);
|
||||
ctx.Empty_ignored_y_();
|
||||
}
|
||||
}
|
||||
if (allDd && cur_pos < src_len - 2 && src[cur_pos] == '{' && src[cur_pos + 1] == '|') // NOTE: if indent && next == {| then invoke table; EX: ":::{|"
|
||||
return ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, cur_pos, cur_pos + 2, false, Xop_tblw_wkr.Tblw_type_tb, Xop_tblw_wkr.Called_from_list, -1, -1); // NOTE: ws_enabled must be set to true; see test for Adinkras; Cato the Elder
|
||||
else {
|
||||
dd_chk = symByt == Xop_list_tkn_.List_itmTyp_dt;
|
||||
return cur_pos;
|
||||
}
|
||||
}
|
||||
public void MakeTkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_list_tkn bgn, byte sub_last) {
|
||||
// boolean empty_ignored = ctx.Empty_ignored(); // commented; see below; DATE:2014-06-24
|
||||
Xop_tkn_itm end_tkn = tkn_mkr.List_end(bgn_pos, bgn.List_itmTyp()).List_path_(bgn.List_path()).List_uid_(listId).List_sub_last_(sub_last);
|
||||
ctx.Subs_add(root, end_tkn);
|
||||
// if (empty_ignored) ctx.Empty_ignore(root, bgn.Tkn_sub_idx()); // commented; code was incorrectly deactivating "*a" when "<li>" encountered; PAGE:en.w:Bristol_Bullfinch DATE:2014-06-24
|
||||
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_ul);
|
||||
}
|
||||
private Xop_list_tkn PopTil(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, byte subLast) {
|
||||
int acs_pos = ctx.Stack_idx_find_but_stop_at_tbl(Xop_tkn_itm_.Tid_list);
|
||||
if (acs_pos == -1) return null;
|
||||
Xop_list_tkn rv = (Xop_list_tkn)ctx.Stack_pop_til(root, src, acs_pos, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_list);
|
||||
MakeTkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, rv, subLast);
|
||||
return rv;
|
||||
}
|
||||
private void PrvItm_compare() {
|
||||
int prvSymLen = prvSymAry.length;
|
||||
prvSymMatch = curSymLen == prvSymLen; commonSymLen = 0;
|
||||
for (int i = 0; i < curSymLen; i++) {
|
||||
if (i < prvSymLen && (Xop_list_wkr_.Compare_normalize(curSymAry[i]) == Xop_list_wkr_.Compare_normalize(prvSymAry[i]))) {
|
||||
commonSymLen = i + 1;
|
||||
}
|
||||
else {
|
||||
prvSymMatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} boolean prvSymMatch; int commonSymLen = 0; boolean allDd = false;
|
||||
private int SymAry_fill(byte[] src, int cur_pos, int src_len, byte curByt) {
|
||||
curSymLen = 0;
|
||||
curSymAry[curSymLen++] = curByt;
|
||||
allDd = true;
|
||||
boolean loop = true;
|
||||
SymAry_fill_overflow = false;
|
||||
while (loop) {
|
||||
if (cur_pos == src_len) break;
|
||||
if (curSymLen == Max_list_depth) { // WORKAROUND: xowa imposes max list depth of 256; MW is unlimited; may change for future release but 256 should accomodate all real-world usages
|
||||
boolean stop = false;
|
||||
for (int i = cur_pos; i < src_len; i++) {
|
||||
curByt = src[i];
|
||||
switch (curByt) {
|
||||
case Byte_ascii.Star:
|
||||
case Byte_ascii.Hash:
|
||||
case Byte_ascii.Semic:
|
||||
case Byte_ascii.Colon:
|
||||
cur_pos = i;
|
||||
break;
|
||||
default:
|
||||
stop = true;
|
||||
break;
|
||||
}
|
||||
if (stop) break;
|
||||
}
|
||||
for (int i = 0; i < Max_list_depth; i++)
|
||||
curSymAry[i] = Byte_ascii.Nil;
|
||||
curSymLen = 0;
|
||||
SymAry_fill_overflow = true;
|
||||
return cur_pos;
|
||||
}
|
||||
curByt = src[cur_pos];
|
||||
switch (curByt) {
|
||||
case Byte_ascii.Star:
|
||||
case Byte_ascii.Hash:
|
||||
case Byte_ascii.Semic:
|
||||
curSymAry[curSymLen++] = curByt;
|
||||
cur_pos++;
|
||||
allDd = false;
|
||||
break;
|
||||
case Byte_ascii.Colon:
|
||||
curSymAry[curSymLen++] = curByt;
|
||||
cur_pos++;
|
||||
break;
|
||||
default:
|
||||
loop = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return cur_pos;
|
||||
}
|
||||
private void Reset(int newListId) {
|
||||
posBldr.Init();
|
||||
curSymLen = 0;
|
||||
prvSymAry = Bry_.Empty;
|
||||
dd_chk = false;
|
||||
listId = newListId;
|
||||
}
|
||||
public static final int Max_list_depth = 256;
|
||||
}
|
||||
54
400_xowa/src/gplx/xowa/parsers/lists/Xop_list_wkr_.java
Normal file
54
400_xowa/src/gplx/xowa/parsers/lists/Xop_list_wkr_.java
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_list_wkr_ {
|
||||
public static byte[] MakeSymAry(byte[] curSymAry, int curSymLen) {
|
||||
byte[] rv = new byte[curSymLen];
|
||||
for (int i = 0; i < curSymLen; i++)
|
||||
rv[i] = curSymAry[i];
|
||||
return rv;
|
||||
}
|
||||
public static byte Compare_normalize(byte b) { // convert : to ; for sake of determining levels; EX: ";:" is actually same group
|
||||
switch (b) {
|
||||
case Byte_ascii.Star:
|
||||
case Byte_ascii.Hash:
|
||||
case Byte_ascii.Semic: return b;
|
||||
case Byte_ascii.Colon: return Byte_ascii.Semic;
|
||||
default: throw Exc_.new_unhandled(b);
|
||||
}
|
||||
}
|
||||
public static void Close_list_if_present(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int bgn_pos, int cur_pos) {// close all list tkns on stack; EX: ***\n should close all 3 stars; used to only close 1
|
||||
if (ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tmpl_invk) != Xop_ctx.Stack_not_found) return; // list is inside template; do not close;
|
||||
int acs_pos = -1, acs_len = ctx.Stack_len();
|
||||
for (int i = acs_len - 1; i > -1; i--) { // loop backwards until earliest list tkn
|
||||
byte cur_acs_tid = ctx.Stack_get(i).Tkn_tid();
|
||||
switch (cur_acs_tid) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb:
|
||||
case Xop_tkn_itm_.Tid_tblw_tc:
|
||||
case Xop_tkn_itm_.Tid_tblw_te:
|
||||
case Xop_tkn_itm_.Tid_tblw_td:
|
||||
case Xop_tkn_itm_.Tid_tblw_th:
|
||||
case Xop_tkn_itm_.Tid_tblw_tr: i = -1; break; // tblw: stop loop; do not close a list above tbl; EX: ": {| |- *a |b }" should not close ":"; stops at "|-"
|
||||
case Xop_tkn_itm_.Tid_list: acs_pos = i; break; // list: update acs_pos
|
||||
default: break; // else: keep looping
|
||||
}
|
||||
}
|
||||
if (acs_pos == Xop_ctx.Stack_not_found) return; // no list tokens found; exit
|
||||
ctx.Stack_pop_til(root, src, acs_pos, true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_list);
|
||||
}
|
||||
}
|
||||
337
400_xowa/src/gplx/xowa/parsers/lists/Xop_list_wkr_basic_tst.java
Normal file
337
400_xowa/src/gplx/xowa/parsers/lists/Xop_list_wkr_basic_tst.java
Normal file
@@ -0,0 +1,337 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_list_wkr_basic_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void List_1() {
|
||||
fxt.Test_parse_page_wiki("\n*a"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_end_(3).List_path_(0).List_uid_(0)
|
||||
);
|
||||
}
|
||||
@Test public void Bos() {
|
||||
fxt.Test_parse_page_wiki("*a"
|
||||
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(1, 2)
|
||||
, fxt.tkn_list_end_(2).List_path_(0).List_uid_(0)
|
||||
);
|
||||
}
|
||||
@Test public void List_1_2() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n**b"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(6, 7)
|
||||
, fxt.tkn_list_end_(7).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(7).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void List_1_2_2() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n**b\n**c"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(6, 7)
|
||||
, fxt.tkn_list_end_(7).List_path_(0, 0)
|
||||
, fxt.tkn_list_bgn_(7, 10, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 1).List_uid_(0)
|
||||
, fxt.tkn_txt_(10, 11)
|
||||
, fxt.tkn_list_end_(11).List_path_(0, 1)
|
||||
, fxt.tkn_list_end_(11).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void List_1_2_3() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n**b\n***c"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(6, 7)
|
||||
, fxt.tkn_list_bgn_(7, 11, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(11, 12)
|
||||
, fxt.tkn_list_end_(12).List_path_(0, 0, 0)
|
||||
, fxt.tkn_list_end_(12).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(12).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void List_2() {
|
||||
fxt.Test_parse_page_wiki("\n**a"
|
||||
, fxt.tkn_list_bgn_(0, 3, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(0, 3, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(3, 4)
|
||||
, fxt.tkn_list_end_(4).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(4).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void List_1_3() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n***b"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_bgn_(3, 7, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(3, 7, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(7, 8)
|
||||
, fxt.tkn_list_end_(8).List_path_(0, 0, 0)
|
||||
, fxt.tkn_list_end_(8).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(8).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void List_1_2_1() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n**b\n*c"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(6, 7)
|
||||
, fxt.tkn_list_end_(7).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(7).List_path_(0)
|
||||
, fxt.tkn_list_bgn_(7, 9, Xop_list_tkn_.List_itmTyp_ul).List_path_(1).List_uid_(0)
|
||||
, fxt.tkn_txt_(9, 10)
|
||||
, fxt.tkn_list_end_(10).List_path_(1)
|
||||
);
|
||||
}
|
||||
@Test public void List_1_1_1() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n*b\n*c"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_end_(3).List_path_(0)
|
||||
, fxt.tkn_list_bgn_(3, 5, Xop_list_tkn_.List_itmTyp_ul).List_path_(1).List_uid_(0)
|
||||
, fxt.tkn_txt_(5, 6)
|
||||
, fxt.tkn_list_end_(6).List_path_(1)
|
||||
, fxt.tkn_list_bgn_(6, 8, Xop_list_tkn_.List_itmTyp_ul).List_path_(2).List_uid_(0)
|
||||
, fxt.tkn_txt_(8, 9)
|
||||
, fxt.tkn_list_end_(9).List_path_(2)
|
||||
);
|
||||
}
|
||||
@Test public void List_1___1() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n\n*b"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_end_(3).List_path_(0)
|
||||
, fxt.tkn_nl_char_len1_(3)
|
||||
, fxt.tkn_list_bgn_(4, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(1)
|
||||
, fxt.tkn_txt_(6, 7)
|
||||
, fxt.tkn_list_end_(7).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void List_1_3_1() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n***b\n*c"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_bgn_(3, 7, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(3, 7, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(7, 8)
|
||||
, fxt.tkn_list_end_(8).List_path_(0, 0, 0)
|
||||
, fxt.tkn_list_end_(8).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(8).List_path_(0)
|
||||
, fxt.tkn_list_bgn_(8, 10, Xop_list_tkn_.List_itmTyp_ul).List_path_(1).List_uid_(0)
|
||||
, fxt.tkn_txt_(10, 11)
|
||||
, fxt.tkn_list_end_(11).List_path_(1)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_2o_2u() {
|
||||
fxt.Test_parse_page_wiki("\n**a\n##b"
|
||||
, fxt.tkn_list_bgn_(0, 3, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(0, 3, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(3, 4)
|
||||
, fxt.tkn_list_end_(4).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(4).List_path_(0)
|
||||
, fxt.tkn_list_bgn_(4, 7, Xop_list_tkn_.List_itmTyp_ol).List_path_(0).List_uid_(1)
|
||||
, fxt.tkn_list_bgn_(4, 7, Xop_list_tkn_.List_itmTyp_ol).List_path_(0, 0).List_uid_(1)
|
||||
, fxt.tkn_txt_(7, 8)
|
||||
, fxt.tkn_list_end_(8).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(8).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void Dt_dd() {
|
||||
fxt.Test_parse_page_wiki(";a\n:b"
|
||||
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_dt).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(1, 2)
|
||||
, fxt.tkn_list_end_(2).List_path_(0)
|
||||
, fxt.tkn_list_bgn_(2, 4, Xop_list_tkn_.List_itmTyp_dd).List_path_(1).List_uid_(0)
|
||||
, fxt.tkn_txt_(4, 5)
|
||||
, fxt.tkn_list_end_(5).List_path_(1)
|
||||
);
|
||||
}
|
||||
@Test public void Dt_dd_inline() {
|
||||
fxt.Test_parse_page_wiki(";a:b" // NOTE: no line break
|
||||
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_dt).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(1, 2)
|
||||
, fxt.tkn_list_end_(2).List_path_(0)
|
||||
, fxt.tkn_list_bgn_(2, 3, Xop_list_tkn_.List_itmTyp_dd).List_path_(1).List_uid_(0)
|
||||
, fxt.tkn_txt_(3, 4)
|
||||
, fxt.tkn_list_end_(4).List_path_(1)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_1dd_1ul() {
|
||||
fxt.Test_parse_page_wiki(":*a"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_dd).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_end_(3).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(3).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_1ul__1dd_1ul() {
|
||||
fxt.Test_parse_page_wiki("*a\n:*b"
|
||||
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(1, 2)
|
||||
, fxt.tkn_list_end_(2).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(2, 5, Xop_list_tkn_.List_itmTyp_dd).List_path_(0).List_uid_(1)
|
||||
, fxt.tkn_list_bgn_(2, 5, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(1)
|
||||
, fxt.tkn_txt_(5, 6)
|
||||
, fxt.tkn_list_end_(6).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(6).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_1dd_1ul__1dd_1ul() {
|
||||
fxt.Test_parse_page_wiki(":*a\n:*b"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_dd).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_end_(3).List_path_(0, 0)
|
||||
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 1).List_uid_(0)
|
||||
, fxt.tkn_txt_(6, 7)
|
||||
, fxt.tkn_list_end_(7).List_path_(0, 1)
|
||||
, fxt.tkn_list_end_(7).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_1ul_1hdr() {
|
||||
fxt.Test_parse_page_wiki("*a\n==a==\n"
|
||||
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(1, 2)
|
||||
, fxt.tkn_list_end_(2).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_hdr_(2, 9, 2).Hdr_ws_trailing_(1).Subs_
|
||||
( fxt.tkn_txt_(5, 6)
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_1ul_1hdr_1ul() {
|
||||
fxt.Test_parse_page_wiki("*a\n==a==\n*b"
|
||||
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(1, 2)
|
||||
, fxt.tkn_list_end_(2).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_hdr_(2, 8, 2).Subs_
|
||||
( fxt.tkn_txt_(5, 6)
|
||||
)
|
||||
, fxt.tkn_list_bgn_(8, 10, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(1)
|
||||
, fxt.tkn_txt_(10, 11)
|
||||
, fxt.tkn_list_end_(11).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_1ol_1hr_1ol() {
|
||||
fxt.Test_parse_page_wiki("#a\n----\n#b"
|
||||
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ol).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(1, 2)
|
||||
, fxt.tkn_list_end_(2)
|
||||
, fxt.tkn_para_blank_(2)
|
||||
, fxt.tkn_hr_(2, 7)
|
||||
, fxt.tkn_list_bgn_(7, 9, Xop_list_tkn_.List_itmTyp_ol).List_path_(0).List_uid_(1)
|
||||
, fxt.tkn_txt_(9, 10)
|
||||
, fxt.tkn_list_end_(10)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_tblw() {
|
||||
fxt.Test_parse_page_wiki("::{|\n|a\n|}"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_dd).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_dd).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_tblw_tb_(2, 10).Subs_
|
||||
( fxt.tkn_tblw_tr_(4, 7).Subs_
|
||||
( fxt.tkn_tblw_td_(4, 7).Subs_(fxt.tkn_txt_(6, 7), fxt.tkn_para_blank_(8)))
|
||||
|
||||
)
|
||||
, fxt.tkn_list_end_(10).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(10).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void Dif_lvls_1_3_1() {
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "*1"
|
||||
, "***3"
|
||||
, "*1"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>1"
|
||||
, " <ul>"
|
||||
, " <li>"
|
||||
, " <ul>"
|
||||
, " <li>3"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, " <li>1"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Dif_lvls_1_3_2() {// uneven lists
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "*1"
|
||||
, "***3"
|
||||
, "**2"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>1"
|
||||
, " <ul>"
|
||||
, " <li>"
|
||||
, " <ul>"
|
||||
, " <li>3"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, " <li>2"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void New_lines() {
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "*a"
|
||||
, ""
|
||||
, "**b"
|
||||
, ""
|
||||
, "**c"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
, "<ul>"
|
||||
, " <li>"
|
||||
, " <ul>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
, "<ul>"
|
||||
, " <li>"
|
||||
, " <ul>"
|
||||
, " <li>c"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_list_wkr_para_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Basic() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "*a"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Multiple() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "*a"
|
||||
, "*b"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Multiple_w_1_nl() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "*a"
|
||||
, ""
|
||||
, "*b"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
, "<ul>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Pre_between_lists() { // PURPOSE: list should close pre; EX:en.b:Knowing Knoppix/Other applications; DATE:2014-02-18
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "#a"
|
||||
, " b"
|
||||
, "#c" // should close <pre> opened by b
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<ol>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, "</ol>"
|
||||
, ""
|
||||
, "<pre>b"
|
||||
, "</pre>"
|
||||
, ""
|
||||
, "<ol>"
|
||||
, " <li>c"
|
||||
, " </li>"
|
||||
, "</ol>"
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,409 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_list_wkr_uncommon_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Bug_specified_div() { // FIX: </div> was not clearing state for lnki; PAGE:en.w:Ananke (moon)
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "<div>"
|
||||
, "#<i>a"
|
||||
, "</div>"
|
||||
, "*b"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<div>"
|
||||
, "<ol>"
|
||||
, " <li><i>a"
|
||||
, "</i>"
|
||||
, " </li>"
|
||||
, "</ol></div>"
|
||||
, "<ul>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Bug_mismatched() { // FIX: </div> was not clearing state for lnki; PAGE:en.w:Ananke (moon)
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "::a"
|
||||
, ":::1"
|
||||
, "::::11"
|
||||
, ":::::111"
|
||||
, "::b"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dd>"
|
||||
, " <dl>"
|
||||
, " <dd>a"
|
||||
, " <dl>"
|
||||
, " <dd>1"
|
||||
, " <dl>"
|
||||
, " <dd>11"
|
||||
, " <dl>"
|
||||
, " <dd>111"
|
||||
, " </dd>"
|
||||
, " </dl>"
|
||||
, " </dd>"
|
||||
, " </dl>"
|
||||
, " </dd>"
|
||||
, " </dl>"
|
||||
, " </dd>"
|
||||
, " <dd>b"
|
||||
, " </dd>"
|
||||
, " </dl>"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
));
|
||||
}
|
||||
@Test public void Empty_li_ignored() { // PURPOSE: inner template can cause dupe li; PAGE:en.w:any Calendar day and NYT link; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "*a"
|
||||
, "* "
|
||||
, "*b"
|
||||
, "*c"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, " <li> "
|
||||
, " </li>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, " <li>c"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void List_in_tblw() { // PURPOSE: list inside table should not be close outer list; PAGE:en.w:Cato the Elder
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "*a"
|
||||
, "{|"
|
||||
, "|b"
|
||||
, "::c"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, "<table>"
|
||||
, " <tr>"
|
||||
, " <td>b"
|
||||
, ""
|
||||
, " <dl>"
|
||||
, " <dd>"
|
||||
, " <dl>"
|
||||
, " <dd>c"
|
||||
, " </dd>"
|
||||
, " </dl>"
|
||||
, " </dd>"
|
||||
, " </dl>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Dt_dd_colon_at_eol() { // PURPOSE: dangling ":" should not put next line in <dt>; PAGE:en.w:Stein; b was being wrapped in <dt>b</dt>; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( ";a:"
|
||||
, "*b"
|
||||
, ""
|
||||
, ";c"
|
||||
, "*d"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dt>a"
|
||||
, " </dt>"
|
||||
, " <dd>"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
, "<ul>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
, "<dl>"
|
||||
, " <dt>c"
|
||||
, " </dt>"
|
||||
, "</dl>"
|
||||
, "<ul>"
|
||||
, " <li>d"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Dd_should_not_print_colon() {// PURPOSE: ;a:\n should show as ";a" not ";a:". colon should still be considered as part of empty list; DATE:2013-11-07; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
|
||||
fxt.Test_parse_page_all_str
|
||||
( ";a:\nb"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dt>a"
|
||||
, " </dt>"
|
||||
, " <dd>"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
, "b"
|
||||
));
|
||||
}
|
||||
@Test public void Dt_dd_colon_in_lnki() { // PURPOSE: "; [[Portal:a]]" should not split lnki; PAGE:en.w:Wikipedia:WikiProject Military history/Operation Majestic Titan; "; [[Wikipedia:WikiProject Military history/Operation Majestic Titan/Phase I|Phase I]]: a b"
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( ";[[Portal:a]]"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dt><a href=\"/wiki/Portal:A\">Portal:A</a>"
|
||||
, " </dt>"
|
||||
, "</dl>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Max_list_depth() { // PURPOSE: 256+ * caused list parser to fail; ignore; PAGE:en.w:Bariatric surgery
|
||||
String multiple = String_.Repeat("*", 300);
|
||||
fxt.Test_parse_page_all_str(multiple, multiple);
|
||||
}
|
||||
@Test public void Numbered_list_resets_incorrectly() { // PURPOSE: as description
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "#A"
|
||||
, "#*Aa"
|
||||
, "#**Aaa"
|
||||
, "#*Ab"
|
||||
, "#B"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<ol>"
|
||||
, " <li>A"
|
||||
, ""
|
||||
, " <ul>"
|
||||
, " <li>Aa"
|
||||
, ""
|
||||
, " <ul>"
|
||||
, " <li>Aaa"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, " <li>Ab"
|
||||
, " </li>"
|
||||
, " </ul>" // was showing as </ol>
|
||||
, " </li>"
|
||||
, " <li>B"
|
||||
, " </li>"
|
||||
, "</ol>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void List_should_not_end_indented_table() {// PURPOSE: :{| was being closed by \n*; EX:w:Maxwell's equations; DATE:20121231
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( ":{|"
|
||||
, "|-"
|
||||
, "|"
|
||||
, "*a"
|
||||
, "|b"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dd>"
|
||||
, " <table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " <ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </td>"
|
||||
, " <td>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
));
|
||||
}
|
||||
@Test public void Dt_dd_broken_by_xnde() { // PURPOSE.fix: xnde was resetting dl incorrectly; EX:w:Virus; DATE:2013-01-31
|
||||
fxt.Test_parse_page_all_str(";<b>a</b>:c"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dt><b>a</b>"
|
||||
, " </dt>"
|
||||
, " <dd>c"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
));
|
||||
}
|
||||
@Test public void Trim_empty_list_items() { // PURPOSE: empty list items should be ignored; DATE:2013-07-02; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
|
||||
fxt.Test_parse_page_all_str
|
||||
("*** \n"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>"
|
||||
, " <ul>"
|
||||
, " <li>"
|
||||
, " <ul>"
|
||||
, " <li> "
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Trim_empty_list_items_error() { // PURPOSE.fix: do not add empty itm's nesting to current list; DATE:2013-07-07; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( "* a"
|
||||
, "** " // was: do not add ** to nest; now: add ** and \s
|
||||
, "*** b"
|
||||
, "* c"
|
||||
), String_.Concat_lines_nl
|
||||
( "<ul>"
|
||||
, " <li> a"
|
||||
, " <ul>"
|
||||
, " <li> "
|
||||
, " <ul>"
|
||||
, " <li> b"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, " <li> c"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Tblw_should_autoclose() {// PURPOSE: tblw should auto-close open list
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "#a"
|
||||
, "{|"
|
||||
, "|b"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<ol>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, "</ol>"
|
||||
, "<table>"
|
||||
, " <tr>"
|
||||
, " <td>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Tblx_should_not_autoclose() { // PURPOSE: do not auto-close list if table is xnde; DATE:2014-02-05
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( "#a"
|
||||
, "# <table><tr><td>b</td></tr></table>"
|
||||
, "c"
|
||||
), String_.Concat_lines_nl
|
||||
( "<ol>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, " <li> "
|
||||
, " <table>"
|
||||
, " <tr>"
|
||||
, " <td>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
, " </li>"
|
||||
, "</ol>"
|
||||
, "c"
|
||||
));
|
||||
}
|
||||
@Test public void Li_disappears() { // PURPOSE: "\n*" disappears when followed by "<li>"; PAGE:en.w:Bristol_Bullfinch; DATE:2014-06-24
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( "a"
|
||||
, "*b<li>"
|
||||
), String_.Concat_lines_nl_skip_last // NOTE: tag sequence matches MW output
|
||||
( "a"
|
||||
, "<ul>"
|
||||
, " <li>b"
|
||||
, "<li>"
|
||||
, "</li>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Ul_should_end_wlst() { // PURPOSE: </ul> should end wiki_list; PAGE:en.w:Bristol_Bullfinch; DATE:2014-06-24
|
||||
fxt.Test_parse_page_all_str
|
||||
( "*a</ul>b"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a</ul>b" // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Colon_causes_dd() { // PURPOSE: colon was mistakenly being ignored due to proximity to "\n;"; PAGE:de.w:Schmach_von_Tirana#Kuriosit.C3.A4t:_EM-Qualifikationsspiel_vom_20._November_1983 DATE:2014-07-11
|
||||
fxt.Test_parse_page_all_str
|
||||
( String_.Concat_lines_nl_skip_last
|
||||
( "a:b"
|
||||
, ";c"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "a:b"
|
||||
, "<dl>"
|
||||
, " <dt>c"
|
||||
, " </dt>"
|
||||
, "</dl>"
|
||||
));
|
||||
}
|
||||
@Test public void Pre_and_nested() { // PURPOSE: pre should interrupt list; PAGE:fi.w:Luettelo_hyönteisistä; DATE:2015-03-31
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str
|
||||
( String_.Concat_lines_nl_skip_last
|
||||
( "*a"
|
||||
, "**b"
|
||||
, " c" // pre
|
||||
, "*d" // *d treated mistakenly as **d
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, ""
|
||||
, " <ul>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
, "<pre>c"
|
||||
, "</pre>"
|
||||
, ""
|
||||
, "<ul>"
|
||||
, " <li>d"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
}
|
||||
96
400_xowa/src/gplx/xowa/parsers/lnkes/Xoh_lnke_wtr.java
Normal file
96
400_xowa/src/gplx/xowa/parsers/lnkes/Xoh_lnke_wtr.java
Normal file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.html.*; import gplx.xowa.html.hrefs.*;
|
||||
import gplx.xowa.net.*;
|
||||
public class Xoh_lnke_wtr {
|
||||
private Xoae_app app;
|
||||
public Xoh_lnke_wtr(Xowe_wiki wiki) {this.app = wiki.Appe();}
|
||||
public void Write_all(Bry_bfr bfr, Xoh_html_wtr html_wtr, Xoh_wtr_ctx hctx, Xop_ctx ctx, byte[] src, Xop_lnke_tkn lnke) {
|
||||
int lnke_bgn = lnke.Lnke_bgn(), lnke_end = lnke.Lnke_end(); boolean proto_is_xowa = lnke.Proto_tid() == Xoo_protocol_itm.Tid_xowa;
|
||||
if (!hctx.Mode_is_alt()) { // write href, unless mode is alt
|
||||
if (hctx.Mode_is_hdump()) {
|
||||
if (lnke.Lnke_typ() == Xop_lnke_tkn.Lnke_typ_text)
|
||||
bfr.Add_str_a7("<a xtid='a_lnke_txt' href=\"");
|
||||
else {
|
||||
if (lnke.Subs_len() == 0)
|
||||
bfr.Add_str_a7("<a xtid='a_lnke_brk_n' href=\"");
|
||||
else
|
||||
bfr.Add_str_a7("<a xtid='a_lnke_brk_y' href=\"");
|
||||
}
|
||||
}
|
||||
else
|
||||
bfr.Add(Xoh_consts.A_bgn);
|
||||
if (Write_href(bfr, ctx, src, lnke, lnke_bgn, lnke_end, proto_is_xowa))
|
||||
bfr.Add(A_lhs_end_external);
|
||||
else
|
||||
bfr.Add(A_lhs_end_internal);
|
||||
}
|
||||
Write_caption(bfr, html_wtr, hctx, ctx, src, lnke, lnke_bgn, lnke_end, proto_is_xowa);
|
||||
if (!hctx.Mode_is_alt()) {
|
||||
if (proto_is_xowa) // add <img />
|
||||
bfr.Add(Xoh_consts.Img_bgn).Add(html_wtr.Html_mgr().Img_xowa_protocol()).Add(Xoh_consts.__inline_quote);
|
||||
bfr.Add(Xoh_consts.A_end);
|
||||
}
|
||||
}
|
||||
public boolean Write_href(Bry_bfr bfr, Xop_ctx ctx, byte[] src, Xop_lnke_tkn lnke, int lnke_bgn, int lnke_end, boolean proto_is_xowa) {
|
||||
byte[] lnke_xwiki_wiki = lnke.Lnke_xwiki_wiki();
|
||||
if (lnke_xwiki_wiki == null) {
|
||||
if (lnke.Lnke_relative()) { // relative; EX: //a.org
|
||||
bfr.Add(app.Utl__url_parser().Url_parser().Relative_url_protocol_bry()).Add_mid(src, lnke_bgn, lnke_end);
|
||||
return true;
|
||||
}
|
||||
else { // xowa or regular; EX: http://a.org
|
||||
if (proto_is_xowa) {
|
||||
bfr.Add(Xop_lnke_wkr.Bry_xowa_protocol);
|
||||
Xoa_app_.Utl__encoder_mgr().Gfs().Encode(bfr, src, lnke_bgn, lnke_end);
|
||||
return false;
|
||||
}
|
||||
else { // regular; add href
|
||||
bfr.Add_mid(src, lnke_bgn, lnke_end);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else { // xwiki
|
||||
Url_encoder href_encoder = Xoa_app_.Utl__encoder_mgr().Href_quotes();
|
||||
bfr.Add(Xoh_href_parser.Href_site_bry).Add(lnke_xwiki_wiki).Add(Xoh_href_parser.Href_wiki_bry)
|
||||
.Add(href_encoder.Encode(lnke.Lnke_xwiki_page())); // NOTE: must encode page; EX:%22%3D -> '">' which will end attribute; PAGE:en.w:List_of_Category_A_listed_buildings_in_West_Lothian DATE:2014-07-15
|
||||
if (lnke.Lnke_xwiki_qargs() != null)
|
||||
Xoa_url_arg_hash.Concat_bfr(bfr, href_encoder, lnke.Lnke_xwiki_qargs()); // NOTE: must encode args
|
||||
return false;
|
||||
}
|
||||
}
|
||||
public void Write_caption(Bry_bfr bfr, Xoh_html_wtr html_wtr, Xoh_wtr_ctx hctx, Xop_ctx ctx, byte[] src, Xop_lnke_tkn lnke, int lnke_bgn, int lnke_end, boolean proto_is_xowa) {
|
||||
int subs_len = lnke.Subs_len();
|
||||
if (subs_len == 0) { // no text; auto-number; EX: "[1]"
|
||||
if (lnke.Lnke_typ() == Xop_lnke_tkn.Lnke_typ_text)
|
||||
bfr.Add_mid(src, lnke_bgn, lnke_end);
|
||||
else
|
||||
bfr.Add_byte(Byte_ascii.Brack_bgn).Add_int_variable(ctx.Cur_page().Html_data().Lnke_autonumber_next()).Add_byte(Byte_ascii.Brack_end);
|
||||
}
|
||||
else { // text available
|
||||
for (int i = 0; i < subs_len; i++)
|
||||
html_wtr.Write_tkn(bfr, ctx, hctx, src, lnke, i, lnke.Subs_get(i));
|
||||
}
|
||||
}
|
||||
private static final byte[]
|
||||
A_lhs_end_external = Bry_.new_a7("\" class=\"external text\" rel=\"nofollow\">")
|
||||
, A_lhs_end_internal = Bry_.new_a7("\">")
|
||||
;
|
||||
}
|
||||
36
400_xowa/src/gplx/xowa/parsers/lnkes/Xoh_lnke_wtr_tst.java
Normal file
36
400_xowa/src/gplx/xowa/parsers/lnkes/Xoh_lnke_wtr_tst.java
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xoh_lnke_wtr_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_(); fxt.Reset();}
|
||||
@Test public void Basic() {fxt.Test_parse_page_wiki_str("[irc://a]" , "<a href=\"irc://a\" class=\"external text\" rel=\"nofollow\">[1]</a>");}
|
||||
@Test public void Autonumber() {fxt.Test_parse_page_wiki_str("[irc://a] [irc://b]" , "<a href=\"irc://a\" class=\"external text\" rel=\"nofollow\">[1]</a> <a href=\"irc://b\" class=\"external text\" rel=\"nofollow\">[2]</a>");}
|
||||
@Test public void Caption() {fxt.Test_parse_page_wiki_str("[irc://a b]" , "<a href=\"irc://a\" class=\"external text\" rel=\"nofollow\">b</a>");}
|
||||
@Test public void Caption_wtxt() {fxt.Test_parse_page_wiki_str("[irc://a ''b'']" , "<a href=\"irc://a\" class=\"external text\" rel=\"nofollow\"><i>b</i></a>");}
|
||||
@Test public void Xowa_protocol() {
|
||||
String img = "<img src=\"file:///mem/xowa/user/test_user/app/img/xowa/protocol.png\"/>";
|
||||
fxt.Wiki().Sys_cfg().Xowa_proto_enabled_(true);
|
||||
fxt.Test_parse_page_wiki_str("[xowa-cmd:\"a\" z]" , "<a href=\"xowa-cmd:a\">z" + img + "</a>");
|
||||
fxt.Test_parse_page_wiki_str("[xowa-cmd:\"a.b('c_d');\" z]" , "<a href=\"xowa-cmd:a.b('c_d');\">z" + img + "</a>");
|
||||
fxt.Test_parse_page_wiki_str("[xowa-cmd:*\"a\"b*c\"* z]" , "<a href=\"xowa-cmd:a%22b%2Ac\">z" + img + "</a>");
|
||||
fxt.Wiki().Sys_cfg().Xowa_proto_enabled_(false);
|
||||
fxt.Test_parse_page_wiki_str("[xowa-cmd:\"a\" b]" , "[xowa-cmd:"a" b]"); // protocol is disabled: literalize String (i.e.: don't make it an anchor)
|
||||
}
|
||||
}
|
||||
26
400_xowa/src/gplx/xowa/parsers/lnkes/Xop_lnke_end_lxr.java
Normal file
26
400_xowa/src/gplx/xowa/parsers/lnkes/Xop_lnke_end_lxr.java
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xop_lnke_end_lxr implements Xop_lxr {//20111222
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_lnke_end;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Brack_end, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Lnke().MakeTkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_lnke_end_lxr _ = new Xop_lnke_end_lxr(); Xop_lnke_end_lxr() {}
|
||||
}
|
||||
22
400_xowa/src/gplx/xowa/parsers/lnkes/Xop_lnke_log.java
Normal file
22
400_xowa/src/gplx/xowa/parsers/lnkes/Xop_lnke_log.java
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_lnke_log {
|
||||
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "lnke");
|
||||
public static final Gfo_msg_itm Dangling = Gfo_msg_itm_.new_note_(owner, "dangling"); // NOTE: WP.BOT:YOBOT;PAGE:en.w:Pan_flute
|
||||
}
|
||||
44
400_xowa/src/gplx/xowa/parsers/lnkes/Xop_lnke_lxr.java
Normal file
44
400_xowa/src/gplx/xowa/parsers/lnkes/Xop_lnke_lxr.java
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.net.*;
|
||||
public class Xop_lnke_lxr implements Xop_lxr {
|
||||
Xop_lnke_lxr(byte lnke_typ, byte[] protocol, byte tid) {this.lnke_typ = lnke_typ; this.protocol = protocol; this.tid = tid;} private byte lnke_typ; byte[] protocol; byte tid;
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_lnke_bgn;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {
|
||||
Xoo_protocol_itm[] ary = Xoo_protocol_itm.Ary();
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
Xoo_protocol_itm itm = ary[i];
|
||||
Ctor_lxr_add(core_trie, itm.Key_w_colon_bry(), itm.Tid());
|
||||
}
|
||||
core_trie.Add(Bry_relative_1, new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, Xoa_consts.Url_relative_prefix, Xoo_protocol_itm.Tid_relative_1));
|
||||
core_trie.Add(Bry_relative_2, new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, Xoa_consts.Url_relative_prefix, Xoo_protocol_itm.Tid_relative_2));
|
||||
Ctor_lxr_add(core_trie, Bry_.new_a7("xowa-cmd"), Xoo_protocol_itm.Tid_xowa);
|
||||
} private static final byte[] Bry_relative_1 = Bry_.new_a7("[//"), Bry_relative_2 = Bry_.new_a7("[[//");
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
private void Ctor_lxr_add(Btrie_fast_mgr core_trie, byte[] protocol_bry, byte tid) {
|
||||
core_trie.Add(protocol_bry , new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_text, protocol_bry, tid));
|
||||
core_trie.Add(Bry_.Add(Byte_ascii.Brack_bgn, protocol_bry) , new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, protocol_bry, tid));
|
||||
}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if (this.tid == Xoo_protocol_itm.Tid_xowa && !ctx.Wiki().Sys_cfg().Xowa_proto_enabled()) return ctx.Lxr_make_txt_(cur_pos);
|
||||
return ctx.Lnke().MakeTkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, protocol, tid, lnke_typ);
|
||||
}
|
||||
public static final Xop_lnke_lxr _ = new Xop_lnke_lxr(); Xop_lnke_lxr() {}
|
||||
}
|
||||
40
400_xowa/src/gplx/xowa/parsers/lnkes/Xop_lnke_tkn.java
Normal file
40
400_xowa/src/gplx/xowa/parsers/lnkes/Xop_lnke_tkn.java
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.lnkes.*;
|
||||
public class Xop_lnke_tkn extends Xop_tkn_itm_base {//20111222
|
||||
public static final byte Lnke_typ_null = 0, Lnke_typ_brack = 1, Lnke_typ_text = 2, Lnke_typ_brack_dangling = 3;
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_lnke;}
|
||||
public boolean Lnke_relative() {return lnke_relative;} public Xop_lnke_tkn Lnke_relative_(boolean v) {lnke_relative = v; return this;} private boolean lnke_relative;
|
||||
public byte Lnke_typ() {return lnke_typ;} public Xop_lnke_tkn Lnke_typ_(byte v) {lnke_typ = v; return this;} private byte lnke_typ = Lnke_typ_null;
|
||||
public byte[] Lnke_site() {return lnke_site;} public Xop_lnke_tkn Lnke_site_(byte[] v) {lnke_site = v; return this;} private byte[] lnke_site;
|
||||
public byte[] Lnke_xwiki_wiki() {return lnke_xwiki_wiki;} private byte[] lnke_xwiki_wiki;
|
||||
public byte[] Lnke_xwiki_page() {return lnke_xwiki_page;} private byte[] lnke_xwiki_page;
|
||||
public Gfo_url_arg[] Lnke_xwiki_qargs() {return lnke_xwiki_qargs;} Gfo_url_arg[] lnke_xwiki_qargs;
|
||||
public void Lnke_xwiki_(byte[] wiki, byte[] page, Gfo_url_arg[] args) {this.lnke_xwiki_wiki = wiki; this.lnke_xwiki_page = page; this.lnke_xwiki_qargs = args;}
|
||||
public int Lnke_bgn() {return lnke_bgn;} private int lnke_bgn;
|
||||
public int Lnke_end() {return lnke_end;} private int lnke_end;
|
||||
public Xop_lnke_tkn Lnke_rng_(int bgn, int end) {lnke_bgn = bgn; lnke_end = end; return this;}
|
||||
public byte[] Protocol() {return protocol;} private byte[] protocol;
|
||||
public byte Proto_tid() {return proto_tid;} private byte proto_tid;
|
||||
public Xop_lnke_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
|
||||
|
||||
public Xop_lnke_tkn(int bgn, int end, byte[] protocol, byte proto_tid, byte lnke_typ, int lnke_bgn, int lnke_end) {
|
||||
this.Tkn_ini_pos(false, bgn, end); this.protocol = protocol; this.proto_tid = proto_tid; this.lnke_typ = lnke_typ; this.lnke_bgn = lnke_bgn; this.lnke_end = lnke_end;
|
||||
} Xop_lnke_tkn() {}
|
||||
}
|
||||
306
400_xowa/src/gplx/xowa/parsers/lnkes/Xop_lnke_wkr.java
Normal file
306
400_xowa/src/gplx/xowa/parsers/lnkes/Xop_lnke_wkr.java
Normal file
@@ -0,0 +1,306 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.apps.progs.*; import gplx.xowa.net.*; import gplx.xowa.wikis.xwikis.*;
|
||||
public class Xop_lnke_wkr implements Xop_ctx_wkr {
|
||||
public void Ctor_ctx(Xop_ctx ctx) {url_parser = ctx.App().Utl__url_parser().Url_parser();} Gfo_url_parser url_parser; Gfo_url_site_data site_data = new Gfo_url_site_data(); Xoa_url_parser xo_url_parser = new Xoa_url_parser(); Xoa_url xo_url_parser_url = Xoa_url.blank_();
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
|
||||
public boolean Dangling_goes_on_stack() {return dangling_goes_on_stack;} public void Dangling_goes_on_stack_(boolean v) {dangling_goes_on_stack = v;} private boolean dangling_goes_on_stack;
|
||||
public void AutoClose(Xop_ctx ctx, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {
|
||||
// "[" but no "]"; EX: "[irc://a"; NOTE: lnkes that start with protocol will be ac'd in MakeTkn_bgn; EX: "http://a"
|
||||
Xop_lnke_tkn bgn_tkn = (Xop_lnke_tkn)tkn;
|
||||
bgn_tkn.Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack_dangling);
|
||||
bgn_tkn.Src_end_(bgn_tkn.Lnke_end()); // NOTE: endPos is lnke_end, not cur_pos or src_len; EX: "[irc://a b", lnk ends at a, not b; NOTE: still bgns at [
|
||||
ctx.Msg_log().Add_itm_none(Xop_lnke_log.Dangling, src, tkn.Src_bgn(), cur_pos);
|
||||
}
|
||||
public static final String Str_xowa_protocol = "xowa-cmd:";
|
||||
public static final byte[] Bry_xowa_protocol = Bry_.new_a7(Str_xowa_protocol);
|
||||
public int MakeTkn_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, byte[] protocol, byte proto_tid, byte lnke_type) {
|
||||
boolean lnke_type_brack = (lnke_type == Xop_lnke_tkn.Lnke_typ_brack);
|
||||
if ( !lnke_type_brack // lnke doesn't have "["; EX: "ttl:"
|
||||
&& !Valid_text_lnke(ctx, src, src_len, bgn_pos, cur_pos) // tkn is part of work; EX: " ttl:" vs "attl:"
|
||||
)
|
||||
return ctx.Lxr_make_txt_(cur_pos - 1); // -1 to ignore ":" in making text colon; needed to process ":" for list like "; attl: b" PAGE:de.w:Mord_(Deutschland)#Besonders_verwerfliche_Begehungsweise; DATE:2015-01-09
|
||||
if (ctx.Stack_get_typ(Xop_tkn_itm_.Tid_lnke) != null) return ctx.Lxr_make_txt_(cur_pos); // no nested lnke; return cur lnke as text; EX: "[irc://a irc://b]" -> "<a href='irc:a'>irc:b</a>"
|
||||
if (proto_tid == Xoo_protocol_itm.Tid_xowa) return Make_tkn_xowa(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, protocol, proto_tid, lnke_type);
|
||||
|
||||
// HACK: need to disable lnke if enclosing type is lnki and (1) arg is "link=" or (2) in 1st arg; basically, only enable for caption tkns (and preferably, thumb only) (which should be neither 1 or 2)
|
||||
if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_lnki && lnke_type == Xop_lnke_tkn.Lnke_typ_text) {
|
||||
byte mode = Lnki_linkMode_init;
|
||||
int lnki_pipe_count = 0;
|
||||
int tkn_idx = -1;
|
||||
for (int i = root.Subs_len() - 1; i > -1; i--) {
|
||||
Xop_tkn_itm link_tkn = root.Subs_get(i);
|
||||
tkn_idx = i;
|
||||
switch (link_tkn.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_pipe:
|
||||
if (mode == Lnki_linkMode_text) {ctx.Lxr_make_(false); return bgn_pos + 1;} // +1 to position after lnke_hook; EX:[[File:A.png|link=http:b.org]] position at t in http so http hook won't be invoked.
|
||||
else {i = -1; ++lnki_pipe_count;}
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_txt:
|
||||
if (mode == Lnki_linkMode_eq) mode = Lnki_linkMode_text;
|
||||
// else i = -1; // DELETE: do not be overly strict; need to handle pattern of link=http://a.org?b=http://c.org; DATE:2013-02-03
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_eq:
|
||||
if (mode == Lnki_linkMode_init) mode = Lnki_linkMode_eq;
|
||||
// else i = -1; // DELETE: do not be overly strict; need to handle pattern of link=http://a.org?b=http://c.org; DATE:2013-02-03
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (lnki_pipe_count == 0) {
|
||||
for (int i = tkn_idx; i > -1; i--) {
|
||||
Xop_tkn_itm link_tkn = root.Subs_get(i);
|
||||
tkn_idx = i;
|
||||
switch (link_tkn.Tkn_tid()) {
|
||||
// case Xop_tkn_itm_.Tid_txt: return cur_pos; // REMOVED:2012-11-12: was causing [[http://a.org a]] [[http://b.org b]] to fail; PAGE:en.w:Template:Infobox_country
|
||||
case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
int lnke_bgn = bgn_pos, lnke_end = -1, brack_end_pos = -1;
|
||||
int lnke_end_tid = End_tid_null;
|
||||
while (true) { // loop until lnke_end_tid char;
|
||||
if (cur_pos == src_len) {lnke_end_tid = End_tid_eos; lnke_end = cur_pos; break;}
|
||||
switch (src[cur_pos]) {
|
||||
case Byte_ascii.Brack_end:
|
||||
if (lnke_type_brack) { // NOTE: check that frame begins with [ in order to end with ]
|
||||
lnke_end_tid = End_tid_brack; brack_end_pos = cur_pos + Xoa_prog_mgr.Adj_next_char;
|
||||
}
|
||||
else { // NOTE: frame does not begin with [ but ] encountered. mark "invalid" in order to force parser to stop before "]"
|
||||
lnke_end_tid = End_tid_invalid;
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Space: lnke_end_tid = End_tid_space; break;
|
||||
case Byte_ascii.Nl: lnke_end_tid = End_tid_nl; break;
|
||||
case Byte_ascii.Gt: case Byte_ascii.Lt:
|
||||
lnke_end_tid = End_tid_invalid;
|
||||
break;
|
||||
case Byte_ascii.Apos:
|
||||
if (cur_pos + 1 < src_len && src[cur_pos + 1] == Byte_ascii.Apos) // NOTE: '' breaks link, but not '; EX: [http://a.org''b'']]; DATE:2013-03-18
|
||||
lnke_end_tid = End_tid_invalid;
|
||||
break;
|
||||
case Byte_ascii.Brack_bgn: // NOTE: always stop lnke at "[" regardless of brack_type; EX: [http:a.org[[B]]] and http:a.org[[B]]; DATE:2014-07-11
|
||||
case Byte_ascii.Quote: // NOTE: quote should also stop lnke; DATE:2014-10-10
|
||||
lnke_end_tid = End_tid_symbol;
|
||||
break;
|
||||
}
|
||||
if (lnke_end_tid == End_tid_null) cur_pos++;
|
||||
else {
|
||||
lnke_end = cur_pos;
|
||||
cur_pos++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (lnke_type_brack) {
|
||||
switch (lnke_end_tid) {
|
||||
case End_tid_eos:
|
||||
if (brack_end_pos == -1) { // eos but no ]; EX: "[irc://a"
|
||||
if (dangling_goes_on_stack) { // added for Xow_popup_parser which needs to handle dangling lnke due to block_len; DATE:2014-06-20
|
||||
ctx.Subs_add_and_stack(root, tkn_mkr.Txt(bgn_pos, src_len)); // note that tkn doesn't matter, as Xow_popup_parser only cares *if* something is on stack, not *what* is on stack
|
||||
return src_len;
|
||||
}
|
||||
ctx.Subs_add(root, tkn_mkr.Txt(bgn_pos, bgn_pos + 1));// convert open brack to txt; // FUTURE: don't make brack_tkn; just flag
|
||||
bgn_pos += 1;
|
||||
brack_end_pos = cur_pos;
|
||||
lnke_bgn = bgn_pos;
|
||||
lnke_type = Xop_lnke_tkn.Lnke_typ_brack_dangling;
|
||||
}
|
||||
break;
|
||||
case End_tid_nl:
|
||||
lnke_type = Xop_lnke_tkn.Lnke_typ_brack_dangling;
|
||||
return ctx.Lxr_make_txt_(lnke_end); // textify lnk; EX: [irc://a\n] textifies "[irc://a"
|
||||
default:
|
||||
lnke_bgn += proto_tid == Xoo_protocol_itm.Tid_relative_2 ? 2 : 1; // if Tid_relative_2, then starts with [[; adjust by 2; EX:"[[//en" should have lnke_bgn at "//en", not "[//en"
|
||||
lnke_type = Xop_lnke_tkn.Lnke_typ_brack;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else { // else, plain text
|
||||
brack_end_pos = lnke_end;
|
||||
lnke_type = Xop_lnke_tkn.Lnke_typ_text;
|
||||
if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_lnki) { // SEE:NOTE_1
|
||||
Xop_tkn_itm prv_tkn = root.Subs_get(root.Subs_len() - 1); // get last tkn
|
||||
if (prv_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) { // is tkn lnki?
|
||||
root.Subs_del_after(prv_tkn.Tkn_sub_idx()); // delete [[ tkn and replace with [ tkn
|
||||
root.Subs_add(tkn_mkr.Txt(prv_tkn.Src_bgn(), prv_tkn.Src_bgn() + 1));
|
||||
ctx.Stack_pop_last(); // don't forget to remove from stack
|
||||
lnke_type = Xop_lnke_tkn.Lnke_typ_brack; // change lnke_typee to brack
|
||||
--bgn_pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (proto_tid == Xoo_protocol_itm.Tid_relative_2) // for "[[//", add "["; rest of code handles "[//" normally, but still want to include literal "["; DATE:2013-02-02
|
||||
ctx.Subs_add(root, tkn_mkr.Txt(lnke_bgn - 1, lnke_bgn));
|
||||
url_parser.Parse_site_fast(site_data, src, lnke_bgn, lnke_end);
|
||||
int site_bgn = site_data.Site_bgn(), site_end = site_data.Site_end();
|
||||
if (site_bgn == site_end) return ctx.Lxr_make_txt_(cur_pos); // empty proto should return text, not lnke; EX: "http:", "http://", "[http://]"; DATE:2014-10-09
|
||||
int adj = Ignore_punctuation_at_end(src, site_bgn, lnke_end);
|
||||
if (adj != 0) {
|
||||
lnke_end -= adj;
|
||||
brack_end_pos -= adj;
|
||||
cur_pos -= adj;
|
||||
}
|
||||
Xop_lnke_tkn tkn = tkn_mkr.Lnke(bgn_pos, brack_end_pos, protocol, proto_tid, lnke_type, lnke_bgn, lnke_end);
|
||||
tkn.Lnke_relative_(site_data.Rel());
|
||||
Xow_xwiki_itm xwiki = ctx.App().Usere().Wiki().Xwiki_mgr().Get_by_mid(src, site_bgn, site_end); // NOTE: check User_wiki.Xwiki_mgr, not App.Wiki_mgr() b/c only it is guaranteed to know all wikis on system
|
||||
if (xwiki != null) { // lnke is to an xwiki; EX: [http://en.wikipedia.org/A a]
|
||||
Xowe_wiki wiki = ctx.Wiki();
|
||||
Xoa_url_parser.Parse_url(xo_url_parser_url, ctx.App(), wiki, src, lnke_bgn, lnke_end, false);
|
||||
byte[] xwiki_wiki = xo_url_parser_url.Wiki_bry();
|
||||
byte[] xwiki_page = xo_url_parser_url.Page_bry();
|
||||
byte[] ttl_bry = xo_url_parser_url.Page_bry();
|
||||
Xoa_ttl ttl = Xoa_ttl.parse_(wiki, ttl_bry);
|
||||
if (ttl != null && ttl.Wik_itm() != null) {
|
||||
xwiki_wiki = ttl.Wik_itm().Domain_bry();
|
||||
xwiki_page = ttl.Page_url();
|
||||
}
|
||||
tkn.Lnke_xwiki_(xwiki_wiki, xwiki_page, xo_url_parser_url.Args());
|
||||
}
|
||||
ctx.Subs_add(root, tkn);
|
||||
if (lnke_type == Xop_lnke_tkn.Lnke_typ_brack) {
|
||||
if (lnke_end_tid == End_tid_brack) {
|
||||
tkn.Src_end_(cur_pos);
|
||||
tkn.Subs_move(root);
|
||||
return cur_pos;
|
||||
}
|
||||
ctx.Stack_add(tkn);
|
||||
if (lnke_end_tid == End_tid_invalid) {
|
||||
return cur_pos - 1; // -1 to return before < or >
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (lnke_end_tid) {
|
||||
case End_tid_space:
|
||||
ctx.Subs_add(root, tkn_mkr.Space(root, cur_pos - 1, cur_pos));
|
||||
break;
|
||||
case End_tid_symbol:
|
||||
case End_tid_nl:
|
||||
case End_tid_invalid: // NOTE that cur_pos is set after <, must subtract 1 else </xnde> will be ignored; EX: <span>irc://a</span>
|
||||
return cur_pos - 1;
|
||||
}
|
||||
}
|
||||
return cur_pos;
|
||||
}
|
||||
private static int Ignore_punctuation_at_end(byte[] src, int proto_end, int lnke_end) { // DATE:2014-10-09
|
||||
int rv = 0;
|
||||
int pos = lnke_end - 1; // -1 b/c pos is after char; EX: "abc" has pos of 3; need --pos to start at src[2] = 'c'
|
||||
byte paren_bgn_chk = Bool_.__byte;
|
||||
while (pos >= proto_end) {
|
||||
byte b = src[pos];
|
||||
switch (b) { // REF.MW: $sep = ',;\.:!?';
|
||||
case Byte_ascii.Comma: case Byte_ascii.Semic: case Byte_ascii.Backslash: case Byte_ascii.Dot:
|
||||
case Byte_ascii.Bang: case Byte_ascii.Question:
|
||||
break;
|
||||
case Byte_ascii.Colon: // differentiate between "http:" (don't trim) and "http://a.org:" (trim)
|
||||
if (pos == proto_end -1) return rv;
|
||||
break;
|
||||
case Byte_ascii.Paren_end: // differentiate between "(http://a.org)" (trim) and "http://a.org/b(c)" (don't trim)
|
||||
if (paren_bgn_chk == Bool_.__byte) {
|
||||
int paren_bgn_pos = Bry_finder.Find_fwd(src, Byte_ascii.Paren_bgn, proto_end, lnke_end);
|
||||
paren_bgn_chk = paren_bgn_pos == Bry_finder.Not_found ? Bool_.N_byte : Bool_.Y_byte;
|
||||
}
|
||||
if (paren_bgn_chk == Bool_.Y_byte) // "(" found; do not ignore ")"
|
||||
return rv;
|
||||
else
|
||||
break;
|
||||
default:
|
||||
return rv;
|
||||
}
|
||||
--pos;
|
||||
++rv;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private static final byte Lnki_linkMode_init = 0, Lnki_linkMode_eq = 1, Lnki_linkMode_text = 2;
|
||||
private static final byte End_tid_null = 0, End_tid_eos = 1, End_tid_brack = 2, End_tid_space = 3, End_tid_nl = 4, End_tid_symbol = 5, End_tid_invalid = 6;
|
||||
public int MakeTkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
// Xop_tkn_itm last_tkn = ctx.Stack_get_last(); // BLOCK:invalid_ttl_check; // TODO: backout apos changes
|
||||
// if ( last_tkn != null
|
||||
// && last_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) {
|
||||
// Xop_lnki_tkn lnki = (Xop_lnki_tkn)last_tkn;
|
||||
// if ( lnki.Pipe_count_is_zero()) { // always invalid
|
||||
// ctx.Stack_pop_last();
|
||||
// return Xop_lnki_wkr_.Invalidate_lnki(ctx, src, root, lnki, bgn_pos);
|
||||
// }
|
||||
// }
|
||||
int lnke_bgn_idx = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_lnke);
|
||||
if (lnke_bgn_idx == -1) return ctx.Lxr_make_txt_(cur_pos); // no lnke_bgn tkn; occurs when just ]; EX: "a]b"
|
||||
Xop_lnke_tkn bgnTkn = (Xop_lnke_tkn)ctx.Stack_pop_til(root, src, lnke_bgn_idx, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_lnke);
|
||||
bgnTkn.Src_end_(cur_pos);
|
||||
bgnTkn.Subs_move(root);
|
||||
return cur_pos;
|
||||
}
|
||||
private static boolean Valid_text_lnke(Xop_ctx ctx, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_char_0) return true; // lnke starts at 0; always true
|
||||
int prv_pos = bgn_pos - 1;
|
||||
byte prv_byte = src[prv_pos];
|
||||
switch (prv_byte) {
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
return false; // alpha-numerical is invalid; EX: "titel:" should not generate a lnke for "tel:"
|
||||
}
|
||||
if (prv_byte >= Byte_ascii.Ascii_min && prv_byte <= Byte_ascii.Ascii_max) return true; // consider all other ASCII chars as true; EX: \t\n !, etc;
|
||||
prv_pos = gplx.intl.Utf8_.Get_pos0_of_char_bwd(src, prv_pos);
|
||||
prv_byte = src[prv_pos];
|
||||
boolean prv_char_is_letter = ctx.Lang().Case_mgr().Match_any_exists(prv_byte, src, prv_pos, bgn_pos);
|
||||
return !prv_char_is_letter;
|
||||
}
|
||||
private int Make_tkn_xowa(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, byte[] protocol, byte proto_tid, byte lnke_type) {
|
||||
// NOTE: fmt is [xowa-cmd:^"app.setup_mgr.import_wiki('');"^ ]
|
||||
if (lnke_type != Xop_lnke_tkn.Lnke_typ_brack) return ctx.Lxr_make_txt_(cur_pos); // NOTE: must check for [ or else C:\xowa\ will cause it to evaluate as lnke
|
||||
int proto_end_pos = cur_pos + 1; // +1 to skip past :
|
||||
int lhs_dlm_pos = Bry_finder.Find_fwd(src, Byte_ascii.Quote, proto_end_pos, src_len); if (lhs_dlm_pos == Bry_.NotFound) return ctx.Lxr_make_txt_(cur_pos);
|
||||
int lnke_bgn_pos = lhs_dlm_pos + 1;
|
||||
byte[] rhs_dlm_bry = Bry_quote;
|
||||
if (lhs_dlm_pos - proto_end_pos > 0) {
|
||||
Bry_bfr bfr = ctx.App().Utl__bfr_mkr().Get_k004();
|
||||
rhs_dlm_bry = bfr.Add(Bry_quote).Add_mid(src, proto_end_pos, lhs_dlm_pos).Xto_bry_and_clear();
|
||||
bfr.Mkr_rls();
|
||||
}
|
||||
int rhs_dlm_pos = Bry_finder.Find_fwd(src, rhs_dlm_bry, lnke_bgn_pos, src_len); if (rhs_dlm_pos == Bry_.NotFound) return ctx.Lxr_make_txt_(cur_pos);
|
||||
int txt_bgn = Bry_finder.Find_fwd_while_space_or_tab(src, rhs_dlm_pos + rhs_dlm_bry.length, src_len); if (txt_bgn == Bry_.NotFound) return ctx.Lxr_make_txt_(cur_pos);
|
||||
int txt_end = Bry_finder.Find_fwd(src, Byte_ascii.Brack_end, txt_bgn, src_len); if (txt_end == Bry_.NotFound) return ctx.Lxr_make_txt_(cur_pos);
|
||||
|
||||
int end_pos = txt_end + 1; // +1 to place after ]
|
||||
Xop_lnke_tkn tkn = tkn_mkr.Lnke(bgn_pos, end_pos, protocol, proto_tid, lnke_type, lnke_bgn_pos, rhs_dlm_pos); // +1 to ignore [
|
||||
ctx.Subs_add(root, tkn);
|
||||
tkn.Subs_add(tkn_mkr.Txt(txt_bgn, txt_end));
|
||||
return end_pos;
|
||||
} private static final byte[] Bry_quote = new byte[] {Byte_ascii.Quote};
|
||||
}
|
||||
/*
|
||||
NOTE_1
|
||||
lnke takes precedence over lnki.
|
||||
EX: [[irc://a b]]
|
||||
pass: [<a href="irc://a">b</a>] i.e. [b] where b is a lnke with caption b and trg of irc://a
|
||||
fail: <a href="irc://a">b</a> i.e. b where b is a lnki with caption b and trg of irc://a
|
||||
*/
|
||||
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_lnke_wkr_brack_tst {
|
||||
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Brace_noText() {
|
||||
fxt.Test_parse_page_wiki("[irc://a]", fxt.tkn_lnke_(0, 9).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack).Lnke_rng_(1, 8));
|
||||
}
|
||||
@Test public void Brace_eos() {
|
||||
fxt.Test_parse_page_wiki("[irc://a", fxt.tkn_txt_(0, 1), fxt.tkn_lnke_(1, 8).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack_dangling).Lnke_rng_(1, 8));
|
||||
}
|
||||
@Test public void Brace_text() {
|
||||
fxt.Test_parse_page_wiki("[irc://a b c]", fxt.tkn_lnke_(0, 13).Lnke_rng_(1, 8).Subs_(fxt.tkn_txt_(9, 10), fxt.tkn_space_(10, 11), fxt.tkn_txt_(11, 12)));
|
||||
}
|
||||
@Test public void Brace_lt() {
|
||||
fxt.Init_log_(Xop_xnde_log.Eos_while_closing_tag).Test_parse_page_wiki("[irc://a<b c]", fxt.tkn_lnke_(0, 13).Lnke_rng_(1, 8).Subs_(fxt.tkn_txt_(8, 10), fxt.tkn_space_(10, 11), fxt.tkn_txt_(11, 12)));
|
||||
}
|
||||
@Test public void Brace_xnde_bgn() {// PURPOSE: occurred at ref of UK; a {{cite web|url=http://www.abc.gov/{{dead link|date=December 2011}}|title=UK}} b
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "[http://b.org<sup>c</sup>]"
|
||||
, "<a href=\"http://b.org\" class=\"external text\" rel=\"nofollow\"><sup>c</sup></a>"
|
||||
);
|
||||
}
|
||||
@Test public void Brace_newLine() {
|
||||
fxt.Test_parse_page_wiki("[irc://a\n]", fxt.tkn_txt_(0, 8), fxt.tkn_nl_char_len1_(8), fxt.tkn_txt_(9, 10));
|
||||
}
|
||||
@Test public void Html_brack() {
|
||||
fxt.Test_parse_page_wiki_str("[irc://a]", "<a href=\"irc://a\" class=\"external text\" rel=\"nofollow\">[1]</a>");
|
||||
}
|
||||
@Test public void Apos() {
|
||||
fxt.Test_parse_page_wiki_str("[http://www.a.org''b'']", "<a href=\"http://www.a.org\" class=\"external text\" rel=\"nofollow\"><i>b</i></a>");
|
||||
fxt.Test_parse_page_wiki_str("[http://www.a.org'b]", "<a href=\"http://www.a.org'b\" class=\"external text\" rel=\"nofollow\">[1]</a>");
|
||||
}
|
||||
@Test public void Nowiki() {
|
||||
fxt.Test_parse_page_all_str
|
||||
( "<nowiki>http://a.org</nowiki>"
|
||||
, "http://a.org"
|
||||
);
|
||||
}
|
||||
@Test public void Lnki_one() { // PURPOSE: parallel test for "http://a.org[[B]]"; DATE:2014-07-11
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "[http://a.org b [[C]] d]"
|
||||
,String_.Concat_lines_nl_skip_last
|
||||
( "<a href=\"http://a.org\" class=\"external text\" rel=\"nofollow\">b <a href=\"/wiki/C\">C</a> d</a>"
|
||||
));
|
||||
}
|
||||
@Test public void Encode_xwiki() { // PURPOSE: href title and args should always be encoded; PAGE:en.w:List_of_Category_A_listed_buildings_in_West_Lothian DATE:2014-07-15
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_u8("commons.wikimedia.org"), Bry_.new_u8("commons.wikimedia.org"));
|
||||
fxt.Test_parse_page_wiki_str // encode page
|
||||
( "[http://commons.wikimedia.org/%22%3E_A B]"
|
||||
, "<a href=\"/site/commons.wikimedia.org/wiki/%22%3E_A\">B</a>" // '%22%3E' not '">'
|
||||
);
|
||||
fxt.Test_parse_page_wiki_str // encode args
|
||||
( "[http://commons.wikimedia.org/A?b=%22%3E_C D]"
|
||||
, "<a href=\"/site/commons.wikimedia.org/wiki/A?b=%22%3E_C\">D</a>" // '%22%3E' not '">'
|
||||
);
|
||||
}
|
||||
@Test public void Encode_basic() { // PURPOSE: counterpart to Encode_xwiki; DATE:2014-07-15
|
||||
fxt.Test_parse_page_wiki_str // encode page
|
||||
( "[http://a.org/%22%3E_A B]"
|
||||
, "<a href=\"http://a.org/%22%3E_A\" class=\"external text\" rel=\"nofollow\">B</a>" // '%22%3E' not '">'
|
||||
);
|
||||
fxt.Test_parse_page_wiki_str // encode args
|
||||
( "[http://a.org/A?b=%22%3E_C D]"
|
||||
, "<a href=\"http://a.org/A?b=%22%3E_C\" class=\"external text\" rel=\"nofollow\">D</a>" // '%22%3E' not '">'
|
||||
);
|
||||
}
|
||||
@Test public void Encode_relative() { // PURPOSE: counterpart to Encode_xwiki; DATE:2014-07-15
|
||||
fxt.Test_parse_page_wiki_str // encode page
|
||||
( "[//a.org/%22%3E_A B]"
|
||||
, "<a href=\"http://a.org/%22%3E_A\" class=\"external text\" rel=\"nofollow\">B</a>" // '%22%3E' not '">'
|
||||
);
|
||||
fxt.Test_parse_page_wiki_str // encode args
|
||||
( "[//a.org/A?b=%22%3E_C D]"
|
||||
, "<a href=\"http://a.org/A?b=%22%3E_C\" class=\"external text\" rel=\"nofollow\">D</a>" // '%22%3E' not '">'
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_lnke_wkr_dangling_tst {
|
||||
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Dangling_eos() {
|
||||
fxt.Test_parse_page_wiki("[irc://a b"
|
||||
, fxt.tkn_lnke_(0, 8).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack_dangling)
|
||||
, fxt.tkn_txt_(9, 10)
|
||||
);
|
||||
}
|
||||
@Test public void Dangling_newLine() {
|
||||
fxt.Test_parse_page_wiki("[irc://a b\nc]"
|
||||
, fxt.tkn_lnke_(0, 8).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack_dangling)
|
||||
, fxt.tkn_txt_(9, 10)
|
||||
, fxt.tkn_nl_char_len1_(10)
|
||||
, fxt.tkn_txt_(11, 13)
|
||||
);
|
||||
}
|
||||
@Test public void Dangling_gt() {
|
||||
fxt.Test_parse_page_wiki("[irc://a>b c]", fxt.tkn_lnke_(0, 13).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack).Subs_(fxt.tkn_txt_(8, 10), fxt.tkn_space_(10, 11), fxt.tkn_txt_(11, 12)));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_lnke_wkr_relative_tst {
|
||||
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Relative_obj() {
|
||||
fxt.Test_parse_page_wiki("[//a b]"
|
||||
, fxt.tkn_lnke_(0, 7).Lnke_rng_(1, 4).Subs_(fxt.tkn_txt_(5, 6))
|
||||
);
|
||||
}
|
||||
@Test public void Relative_external() {
|
||||
fxt.Test_parse_page_wiki_str("[//www.a.org a]", "<a href=\"http://www.a.org\" class=\"external text\" rel=\"nofollow\">a</a>");
|
||||
}
|
||||
@Test public void Relative_internal() {
|
||||
fxt.Init_xwiki_add_user_("en.wikipedia.org");
|
||||
fxt.Test_parse_page_wiki_str("[//en.wikipedia.org/wiki Wikipedia]", "<a href=\"/site/en.wikipedia.org/wiki/\">Wikipedia</a>");
|
||||
}
|
||||
@Test public void Relative_w_category() { // EX: [//commons.wikimedia.org/wiki/Category:Diomedeidae A]
|
||||
fxt.Init_xwiki_add_user_("en.wikipedia.org");
|
||||
fxt.Test_parse_page_wiki_str("[//en.wikipedia.org/wiki/Category:A A]", "<a href=\"/site/en.wikipedia.org/wiki/Category:A\">A</a>");
|
||||
}
|
||||
@Test public void Relurl() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_u8("en.wikipedia.org"), Bry_.new_u8("en.wikipedia.org"));
|
||||
fxt.Test_parse_page_wiki_str("[[//en.wikipedia.org/ a]]", "[<a href=\"/site/en.wikipedia.org/wiki/\">a</a>]");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.langs.cases.*;
|
||||
public class Xop_lnke_wkr_text_tst {
|
||||
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Text_obj() {
|
||||
fxt.Test_parse_page_wiki("irc://a", fxt.tkn_lnke_(0, 7).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_text).Lnke_rng_(0, 7));
|
||||
}
|
||||
@Test public void Text_html() {
|
||||
fxt.Test_parse_page_wiki_str("irc://a", "<a href=\"irc://a\" class=\"external text\" rel=\"nofollow\">irc://a</a>");
|
||||
}
|
||||
@Test public void Text_after() {
|
||||
fxt.Test_parse_page_wiki("irc://a b c", fxt.tkn_lnke_(0, 7).Lnke_rng_(0, 7), fxt.tkn_space_(7, 8), fxt.tkn_txt_(8, 9), fxt.tkn_space_(9, 10), fxt.tkn_txt_(10, 11));
|
||||
}
|
||||
@Test public void Text_before_ascii() { // PURPOSE: free form external urls should not match if preceded by letters; EX:de.w:Sylvie_und_Bruno; DATE:2014-05-11
|
||||
fxt.Ctx().Lang().Case_mgr_utf8_();
|
||||
String expd_lnke_html = "<a href=\"tel:a\" class=\"external text\" rel=\"nofollow\">tel:a</a>";
|
||||
fxt.Test_parse_page_wiki_str("titel:a" , "titel:a");
|
||||
fxt.Test_parse_page_wiki_str(" tel:a" , " " + expd_lnke_html);
|
||||
fxt.Test_parse_page_wiki_str("!tel:a" , "!" + expd_lnke_html);
|
||||
fxt.Test_parse_page_wiki_str("ätel:a" , "ätel:a");
|
||||
fxt.Test_parse_page_wiki_str("€tel:a" , "€" + expd_lnke_html);
|
||||
}
|
||||
@Test public void Invalid_lnki_and_list_dt_dd() { // PURPOSE: invalid lnke should still allow processing of ":" in list <dd>; PAGE:de.w:Mord_(Deutschland)#Besonders_verwerfliche_Begehungsweise DATE:2015-01-08
|
||||
fxt.Test_parse_page_wiki_str("; atel: b" , String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dt> atel"
|
||||
, " </dt>"
|
||||
, " <dd> b"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
));
|
||||
}
|
||||
@Test public void Xnde() {// NOTE: compare to Brace_lt
|
||||
fxt.Test_parse_page_wiki("<span>irc://a</span>"
|
||||
, fxt.tkn_xnde_(0, 20).Subs_
|
||||
( fxt.tkn_lnke_(6, 13)
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void List() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "*irc://a"
|
||||
, "*irc://b"
|
||||
),String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li><a href=\"irc://a\" class=\"external text\" rel=\"nofollow\">irc://a</a>"
|
||||
, " </li>"
|
||||
, " <li><a href=\"irc://b\" class=\"external text\" rel=\"nofollow\">irc://b</a>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Defect_reverse_caption_link() { // PURPOSE: bad lnke formatting (caption before link); ] should show up at end, but only [ shows up; PAGE:en.w:Paul Philippoteaux; [caption http://www.americanheritage.com]
|
||||
fxt.Test_parse_page_wiki_str("[caption irc://a]", "[caption <a href=\"irc://a\" class=\"external text\" rel=\"nofollow\">irc://a</a>]");
|
||||
}
|
||||
@Test public void Lnki() { // PURPOSE: trailing lnki should not get absorbed into lnke; DATE:2014-07-11
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "http://a.org[[B]]" // NOTE: [[ should create another lnki
|
||||
,String_.Concat_lines_nl_skip_last
|
||||
( "<a href=\"http://a.org\" class=\"external text\" rel=\"nofollow\">http://a.org</a><a href=\"/wiki/B\">B</a>"
|
||||
));
|
||||
}
|
||||
@Test public void Protocol_only() { // PURPOSE: protocol only should return text; DATE:2014-10-09
|
||||
fxt.Test_parse_page_wiki_str("http://" , "http://");
|
||||
fxt.Test_parse_page_wiki_str("http:" , "http:");
|
||||
fxt.Test_parse_page_wiki_str("[http://]" , "[http://]");
|
||||
fxt.Test_parse_page_wiki_str("[http:]" , "[http:]");
|
||||
}
|
||||
@Test public void Ignore_punctuation_at_end() { // PURPOSE: ignore "," and related punctuation at end; DATE:2014-10-09
|
||||
fxt.Test_parse_page_wiki_str("http://a.org," , "<a href=\"http://a.org\" class=\"external text\" rel=\"nofollow\">http://a.org</a>,"); // basic
|
||||
fxt.Test_parse_page_wiki_str("http://a.org,," , "<a href=\"http://a.org\" class=\"external text\" rel=\"nofollow\">http://a.org</a>,,"); // many
|
||||
fxt.Test_parse_page_wiki_str("http://a.org/b,c" , "<a href=\"http://a.org/b,c\" class=\"external text\" rel=\"nofollow\">http://a.org/b,c</a>"); // do not ignore if in middle
|
||||
fxt.Test_parse_page_wiki_str("http://a.org:" , "<a href=\"http://a.org\" class=\"external text\" rel=\"nofollow\">http://a.org</a>:"); // colon at end; compare to "http:"
|
||||
}
|
||||
@Test public void Ignore_punctuation_at_end__paren_end() { // PURPOSE: end parent has special rules; DATE:2014-10-10
|
||||
fxt.Test_parse_page_wiki_str("(http://a.org)" , "(<a href=\"http://a.org\" class=\"external text\" rel=\"nofollow\">http://a.org</a>)"); // trim=y
|
||||
fxt.Test_parse_page_wiki_str("http://a.org/b(c)", "<a href=\"http://a.org/b(c)\" class=\"external text\" rel=\"nofollow\">http://a.org/b(c)</a>"); // trim=n
|
||||
}
|
||||
@Test public void Sym_quote() { // PURPOSE: quote should interrupt lnke; DATE:2014-10-10
|
||||
fxt.Test_parse_page_wiki_str("http://a.org/b\"c", "<a href=\"http://a.org/b\" class=\"external text\" rel=\"nofollow\">http://a.org/b</a>"c");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_lnke_wkr_uncommon_tst {
|
||||
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Err_multiple() {
|
||||
fxt.Test_parse_page_wiki("[irc://a][irc://b]"
|
||||
, fxt.tkn_lnke_(0, 9)
|
||||
, fxt.tkn_lnke_(9, 18)
|
||||
);
|
||||
}
|
||||
@Test public void Err_txt_is_protocol() {
|
||||
fxt.Test_parse_page_wiki("[irc://a irc://b]"
|
||||
, fxt.tkn_lnke_(0, 17).Lnke_rng_(1, 8).Subs_(fxt.tkn_txt_(9, 16))
|
||||
);
|
||||
}
|
||||
@Test public void Lnke_should_precede_lnki() { // PURPOSE: [[ should not be interpreted as lnki if [irc is available
|
||||
fxt.Test_parse_page_wiki("[[irc://a/b c]]"
|
||||
, fxt.tkn_txt_(0, 1)
|
||||
, fxt.tkn_lnke_(1, 14).Subs_
|
||||
( fxt.tkn_txt_(12, 13)
|
||||
)
|
||||
, fxt.tkn_txt_(14, 15)
|
||||
);
|
||||
}
|
||||
@Test public void Defect_2nd_consecutive_lnke() { // PURPOSE: bad code that was causing lnkes to show up; PAGE:en.w:Template:Infobox_country;
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "[[http://a.org a]] [[http://b.org b]]"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "[<a href=\"http://a.org\" class=\"external text\" rel=\"nofollow\">a</a>] [<a href=\"http://b.org\" class=\"external text\" rel=\"nofollow\">b</a>]"
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_lnke_wkr_xwiki_tst {
|
||||
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Xwiki() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_u8("en.wikipedia.org"), Bry_.new_u8("en.wikipedia.org"));
|
||||
fxt.Test_parse_page_wiki_str("[http://en.wikipedia.org/wiki/A a]", "<a href=\"/site/en.wikipedia.org/wiki/A\">a</a>");
|
||||
}
|
||||
@Test public void Xwiki_relative() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_u8("en.wikipedia.org"), Bry_.new_u8("en.wikipedia.org"));
|
||||
fxt.Test_parse_page_wiki_str("[//en.wikipedia.org/ a]", "<a href=\"/site/en.wikipedia.org/wiki/\">a</a>");
|
||||
}
|
||||
@Test public void Xwiki_qarg() {// DATE:2013-02-02
|
||||
fxt.Init_xwiki_add_user_("en.wikipedia.org");
|
||||
fxt.Test_parse_page_wiki_str("http://en.wikipedia.org/wiki/Special:Allpages?from=Earth", "<a href=\"/site/en.wikipedia.org/wiki/Special:Allpages?from=Earth\">http://en.wikipedia.org/wiki/Special:Allpages?from=Earth</a>");
|
||||
}
|
||||
@Test public void Lang_prefix() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_u8("en.wikipedia.org"), Bry_.new_u8("en.wikipedia.org"));
|
||||
fxt.Wiki().Xwiki_mgr().Add_full(Bry_.new_a7("fr"), Bry_.new_a7("fr.wikipedia.org"));
|
||||
fxt.Test_parse_page_wiki_str("[http://en.wikipedia.org/wiki/fr:A a]", "<a href=\"/site/fr.wikipedia.org/wiki/A\">a</a>");
|
||||
}
|
||||
@Test public void Xwiki_query_arg() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_u8("en.wikipedia.org"), Bry_.new_u8("en.wikipedia.org"));
|
||||
fxt.Test_parse_page_wiki_str("[http://en.wikipedia.org/wiki/A?action=edit a]", "<a href=\"/site/en.wikipedia.org/wiki/A?action=edit\">a</a>");
|
||||
}
|
||||
}
|
||||
27
400_xowa/src/gplx/xowa/parsers/lnkis/cfgs/Xoc_lnki_cfg.java
Normal file
27
400_xowa/src/gplx/xowa/parsers/lnkis/cfgs/Xoc_lnki_cfg.java
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis.cfgs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
|
||||
public class Xoc_lnki_cfg implements GfoInvkAble {
|
||||
public Xoc_lnki_cfg(Xowe_wiki wiki) {xwiki_repo_mgr = new Xoc_xwiki_repo_mgr(wiki);}
|
||||
public Xoc_xwiki_repo_mgr Xwiki_repo_mgr() {return xwiki_repo_mgr;} private Xoc_xwiki_repo_mgr xwiki_repo_mgr;
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_xwiki_repos)) return xwiki_repo_mgr;
|
||||
else return GfoInvkAble_.Rv_unhandled;
|
||||
}
|
||||
private static final String Invk_xwiki_repos = "xwiki_repos";
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis.cfgs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
|
||||
public class Xoc_xwiki_repo_mgr implements GfoInvkAble {
|
||||
private Ordered_hash hash = Ordered_hash_.new_bry_();
|
||||
private Xowe_wiki wiki;
|
||||
public Xoc_xwiki_repo_mgr(Xowe_wiki wiki) {this.wiki = wiki;}
|
||||
public boolean Has(byte[] abrv) {
|
||||
Xoc_xwiki_repo_itm itm = (Xoc_xwiki_repo_itm)hash.Get_by(abrv);
|
||||
return itm != null;
|
||||
}
|
||||
public void Add_or_mod(byte[] abrv) {
|
||||
Xoc_xwiki_repo_itm itm = (Xoc_xwiki_repo_itm)hash.Get_by(abrv);
|
||||
if (itm == null) {
|
||||
itm = new Xoc_xwiki_repo_itm(abrv);
|
||||
hash.Add(abrv, itm);
|
||||
wiki.Cfg_parser_lnki_xwiki_repos_enabled_(true);
|
||||
}
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_add)) Add_or_mod(m.ReadBry("xwiki"));
|
||||
else return GfoInvkAble_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
private static final String Invk_add = "add";
|
||||
}
|
||||
class Xoc_xwiki_repo_itm {
|
||||
public Xoc_xwiki_repo_itm(byte[] abrv) {this.abrv = abrv;}
|
||||
public byte[] Abrv() {return abrv;} private byte[] abrv;
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis.redlinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
|
||||
import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.langs.vnts.*; import gplx.xowa.gui.views.*; import gplx.xowa.pages.*; import gplx.xowa.html.hdumps.core.*;
|
||||
public class Xog_redlink_mgr implements GfoInvkAble {
|
||||
private Xog_win_itm win; private Xog_html_itm html_itm; private Xowe_wiki wiki; private Xoae_page page;
|
||||
private Xopg_redlink_lnki_list redlink_lnki_list; private List_adp lnki_list; private boolean log_enabled; private Gfo_usr_dlg usr_dlg; private int thread_id;
|
||||
public Xog_redlink_mgr(Xog_win_itm win, Xoae_page page, boolean log_enabled) {
|
||||
this.win = win; this.page = page; this.wiki = page.Wikie();
|
||||
this.html_itm = page.Tab_data().Tab().Html_itm(); // NOTE: caching locally b/c page.Tab() is sometimes null
|
||||
this.redlink_lnki_list = page.Redlink_lnki_list();
|
||||
this.lnki_list = redlink_lnki_list.Lnki_list();
|
||||
this.thread_id = redlink_lnki_list.Thread_id();
|
||||
this.log_enabled = log_enabled; this.usr_dlg = log_enabled ? Gfo_usr_dlg_.I : Gfo_usr_dlg_.Noop;
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_run)) Redlink();
|
||||
else return GfoInvkAble_.Rv_unhandled;
|
||||
return this;
|
||||
} public static final String Invk_run = "run";
|
||||
public void Redlink() {
|
||||
synchronized (this) { // NOTE: attempt to eliminate random IndexBounds errors; DATE:2014-09-02
|
||||
if (redlink_lnki_list.Disabled()) return;
|
||||
List_adp work_list = List_adp_.new_();
|
||||
Ordered_hash page_hash = Ordered_hash_.new_bry_();
|
||||
page_hash.Clear(); // NOTE: do not clear in Page_bgn, else will fail b/c of threading; EX: Open Page -> Preview -> Save; DATE:2013-11-17
|
||||
work_list.Clear();
|
||||
int len = lnki_list.Count();
|
||||
if (log_enabled) usr_dlg.Log_many("", "", "redlink.redlink_bgn: page=~{0} total_links=~{1}", String_.new_u8(page.Ttl().Raw()), len);
|
||||
for (int i = 0; i < len; i++) { // make a copy of list else thread issues
|
||||
if (win.Usr_dlg().Canceled()) return;
|
||||
if (redlink_lnki_list.Thread_id() != thread_id) return;
|
||||
work_list.Add(lnki_list.Get_at(i));
|
||||
}
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (win.Usr_dlg().Canceled()) return;
|
||||
if (redlink_lnki_list.Thread_id() != thread_id) return;
|
||||
Xop_lnki_tkn lnki = (Xop_lnki_tkn)work_list.Get_at(i);
|
||||
Xoa_ttl ttl = lnki.Ttl();
|
||||
Xowd_page_itm db_page = new Xowd_page_itm().Ttl_(ttl);
|
||||
byte[] full_txt = ttl.Full_db();
|
||||
if (!page_hash.Has(full_txt))
|
||||
page_hash.Add(full_txt, db_page);
|
||||
}
|
||||
int page_len = page_hash.Count();
|
||||
for (int i = 0; i < page_len; i += Batch_size) {
|
||||
if (win.Usr_dlg().Canceled()) return;
|
||||
if (redlink_lnki_list.Thread_id() != thread_id) return;
|
||||
int end = i + Batch_size;
|
||||
if (end > page_len) end = page_len;
|
||||
wiki.Db_mgr().Load_mgr().Load_by_ttls(win.Usr_dlg(), page_hash, Bool_.Y, i, end);
|
||||
}
|
||||
int redlink_count = 0;
|
||||
Bry_bfr bfr = null;
|
||||
boolean variants_enabled = wiki.Lang().Vnt_mgr().Enabled();
|
||||
Xol_vnt_mgr vnt_mgr = wiki.Lang().Vnt_mgr();
|
||||
Xopg_redlink_idx_list redlink_mgr = page.Hdump_data().Redlink_mgr();
|
||||
for (int j = 0; j < len; j++) {
|
||||
Xop_lnki_tkn lnki = (Xop_lnki_tkn)work_list.Get_at(j);
|
||||
byte[] full_db = lnki.Ttl().Full_db();
|
||||
Xowd_page_itm db_page = (Xowd_page_itm)page_hash.Get_by(full_db);
|
||||
if (db_page == null) continue; // pages shouldn't be null, but just in case
|
||||
if (!db_page.Exists()) {
|
||||
String lnki_id = Xopg_redlink_lnki_list.Lnki_id_prefix + Int_.Xto_str(lnki.Html_uid());
|
||||
if (variants_enabled) {
|
||||
Xowd_page_itm vnt_page = vnt_mgr.Convert_ttl(wiki, lnki.Ttl());
|
||||
if (vnt_page != null) {
|
||||
Xoa_ttl vnt_ttl = Xoa_ttl.parse_(wiki, lnki.Ttl().Ns().Id(), vnt_page.Ttl_page_db());
|
||||
html_itm.Html_atr_set(lnki_id, "href", "/wiki/" + String_.new_u8(vnt_ttl.Full_url()));
|
||||
if (!String_.Eq(vnt_mgr.Html_style(), ""))
|
||||
html_itm.Html_atr_set(lnki_id, "style", vnt_mgr.Html_style());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (log_enabled) {
|
||||
if (bfr == null) bfr = Bry_bfr.new_();
|
||||
bfr.Add_int_variable(lnki.Html_uid()).Add_byte_pipe().Add(Xop_tkn_.Lnki_bgn).Add(full_db).Add(Xop_tkn_.Lnki_end).Add_byte(Byte_ascii.Semic).Add_byte_space();
|
||||
}
|
||||
if (win.Usr_dlg().Canceled()) return;
|
||||
if (redlink_lnki_list.Thread_id() != thread_id) return;
|
||||
int uid = lnki.Html_uid();
|
||||
gplx.xowa.files.gui.Js_img_mgr.Update_link_missing(html_itm, Xopg_redlink_lnki_list.Lnki_id_prefix + Int_.Xto_str(uid));
|
||||
redlink_mgr.Add(uid);
|
||||
++redlink_count;
|
||||
}
|
||||
}
|
||||
if (log_enabled)
|
||||
usr_dlg.Log_many("", "", "redlink.redlink_end: redlinks_run=~{0} links=~{1}", redlink_count, bfr == null ? String_.Empty : bfr.Xto_str_and_clear());
|
||||
}
|
||||
}
|
||||
public static final Xog_redlink_mgr Null = new Xog_redlink_mgr(); Xog_redlink_mgr() {}
|
||||
private static final int Batch_size = 32;
|
||||
}
|
||||
class Xog_redlink_wkr {
|
||||
public static void Redlink(Xog_html_itm html_itm, Int_list list) {
|
||||
int len = list.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
int uid = list.Get_at(i);
|
||||
Redlink(html_itm, uid);
|
||||
}
|
||||
}
|
||||
public static void Redlink(Xog_html_itm html_itm, int uid) {
|
||||
gplx.xowa.files.gui.Js_img_mgr.Update_link_missing(html_itm, Xopg_redlink_lnki_list.Lnki_id_prefix + Int_.Xto_str(uid));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis.redlinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
|
||||
public class Xopg_redlink_idx_list {
|
||||
private final Int_list list = new Int_list();
|
||||
public int Len() {return list.Len();}
|
||||
public int Max() {return max;} private int max;
|
||||
public int Get_at(int i) {return list.Get_at(i);}
|
||||
public void Clear() {
|
||||
list.Clear();
|
||||
max = 0;
|
||||
}
|
||||
public void Add(int i) {
|
||||
list.Add(i);
|
||||
if (i > max) max = i;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis.redlinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
|
||||
import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.users.*;
|
||||
public class Xopg_redlink_lnki_list {
|
||||
private int lnki_idx = gplx.xowa.html.lnkis.Xoh_lnki_wtr.Lnki_id_min; // NOTE: default to 1, not 0, b/c 0 is ignored by wtr; DATE:2014-10-09
|
||||
public Xopg_redlink_lnki_list(boolean ttl_is_module) { // never redlink in Module ns; particularly since Lua has multi-line comments for [[ ]]
|
||||
this.disabled = ttl_is_module;
|
||||
}
|
||||
public boolean Disabled() {return disabled;} private final boolean disabled;
|
||||
public List_adp Lnki_list() {return lnki_list;} private final List_adp lnki_list = List_adp_.new_();
|
||||
public int Thread_id() {return thread_id;} private int thread_id = 1;
|
||||
public void Clear() {
|
||||
if (disabled) return;
|
||||
lnki_idx = 0; // NOTE: must start at 0, so that ++lnki_idx is > 0; html_wtr checks for > 0
|
||||
lnki_list.Clear();
|
||||
thread_id++;
|
||||
}
|
||||
public void Lnki_add(Xop_lnki_tkn lnki) {
|
||||
if (disabled) return;
|
||||
Xoa_ttl ttl = lnki.Ttl(); if (ttl == null) return; // occurs for invalid links
|
||||
Xow_ns ns = ttl.Ns();
|
||||
lnki.Html_uid_(++lnki_idx); // NOTE: set html_id in order html to print out "id='xowa_lnki_1'; want to print out id for consistency's sake, even if these links won't be check for redlinks; DATE:2015-05-07
|
||||
if ( ns.Id_file_or_media() // ignore files which will usually not be in local wiki (most are in commons), and whose html is built up separately
|
||||
|| (ns.Id_ctg() && !ttl.ForceLiteralLink()) // ignore ctgs which have their own html builder, unless it is literal; EX: [[:Category:A]]; DATE:2014-02-24
|
||||
|| ns.Id_special() // ignore special, especially Search; EX: Special:Search/Earth
|
||||
|| ttl.Anch_bgn() == Xoa_ttl.Anch_bgn_anchor_only // anchor only link; EX: [[#anchor]]
|
||||
|| ttl.Wik_itm() != null // xwiki lnki; EX: simplewiki links in homewiki; [[simplewiki:Earth]]
|
||||
)
|
||||
return;
|
||||
lnki_list.Add(lnki);
|
||||
}
|
||||
public static final String Lnki_id_prefix = "xowa_lnki_";
|
||||
public static final int Lnki_id_prefix_len = String_.Len(Lnki_id_prefix);
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis.redlinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
|
||||
public interface Xopg_redlink_logger {
|
||||
void Wkr_exec(Xop_ctx ctx, byte[] src, Xop_lnki_tkn lnki, byte lnki_src_tid);
|
||||
}
|
||||
63
400_xowa/src/gplx/xowa/parsers/logs/Xop_log_basic_tbl.java
Normal file
63
400_xowa/src/gplx/xowa/parsers/logs/Xop_log_basic_tbl.java
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.logs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.dbs.engines.sqlite.*;
|
||||
public class Xop_log_basic_tbl {
|
||||
private Db_stmt stmt_insert;
|
||||
public Xop_log_basic_tbl(Db_conn conn){this.conn = conn; this.Create_table();}
|
||||
public Db_conn Conn() {return conn;} private Db_conn conn;
|
||||
private void Create_table() {Sqlite_engine_.Tbl_create(conn, Tbl_name, Tbl_sql);}
|
||||
public void Delete() {conn.Exec_qry(Db_qry_delete.new_all_(Tbl_name));}
|
||||
public void Insert(int log_tid, String log_msg, int log_time, int page_id, String page_ttl, int args_len, String args_str, int src_len, String src_str) {
|
||||
if (stmt_insert == null) stmt_insert = Db_stmt_.new_insert_(conn, Tbl_name, Fld_log_tid, Fld_log_msg, Fld_log_time, Fld_page_id, Fld_page_ttl, Fld_args_len, Fld_args_str, Fld_src_len, Fld_src_str);
|
||||
stmt_insert.Clear()
|
||||
.Val_int(log_tid)
|
||||
.Val_str(log_msg)
|
||||
.Val_int(log_time)
|
||||
.Val_int(page_id)
|
||||
.Val_str(page_ttl)
|
||||
.Val_int(args_len)
|
||||
.Val_str(args_str)
|
||||
.Val_int(src_len)
|
||||
.Val_str(src_str)
|
||||
.Exec_insert();
|
||||
}
|
||||
public void Rls() {
|
||||
stmt_insert.Rls();
|
||||
}
|
||||
public static final String Tbl_name = "log_basic_temp"
|
||||
, Fld_log_tid = "log_tid", Fld_log_msg = "log_msg", Fld_log_time = "log_time"
|
||||
, Fld_page_id = "page_id", Fld_page_ttl = "page_ttl"
|
||||
, Fld_args_len = "args_len", Fld_args_str = "args_str"
|
||||
, Fld_src_len = "src_len", Fld_src_str = "src_str"
|
||||
;
|
||||
private static final String Tbl_sql = String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS log_basic_temp"
|
||||
, "( log_id integer NOT NULL PRIMARY KEY AUTOINCREMENT"
|
||||
, ", log_tid integer NOT NULL"
|
||||
, ", log_msg varchar(255) NOT NULL"
|
||||
, ", log_time integer NOT NULL"
|
||||
, ", page_id integer NOT NULL"
|
||||
, ", page_ttl varchar(255) NOT NULL"
|
||||
, ", args_len integer NOT NULL"
|
||||
, ", args_str varchar(4096) NOT NULL"
|
||||
, ", src_len integer NOT NULL"
|
||||
, ", src_str varchar(4096) NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
72
400_xowa/src/gplx/xowa/parsers/logs/Xop_log_basic_wkr.java
Normal file
72
400_xowa/src/gplx/xowa/parsers/logs/Xop_log_basic_wkr.java
Normal file
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.logs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.dbs.*;
|
||||
public class Xop_log_basic_wkr implements GfoInvkAble {
|
||||
private Xop_log_mgr log_mgr; private Xop_log_basic_tbl log_tbl;
|
||||
private boolean save_page_ttl, save_log_time, save_args_len, save_args_str;
|
||||
public boolean Save_src_str() {return save_src_str;} public Xop_log_basic_wkr Save_src_str_(boolean v) {save_src_str = v; return this;} private boolean save_src_str;
|
||||
public Xop_log_basic_wkr(Xop_log_mgr log_mgr, Xop_log_basic_tbl log_tbl) {this.log_mgr = log_mgr; this.log_tbl = log_tbl;}
|
||||
public boolean Log_bgn(Xoae_page page, byte[] src, Xop_xnde_tkn xnde) {return true;}
|
||||
public void Log_end_xnde(Xoae_page page, int log_tid, byte[] src, Xop_xnde_tkn xnde_tkn) {
|
||||
Xop_xatr_itm[] atrs_ary = xnde_tkn.Atrs_ary();
|
||||
Log_end(page, Null_log_bgn, log_tid, Null_log_msg, src
|
||||
, xnde_tkn.Src_bgn(), xnde_tkn.Src_end()
|
||||
, atrs_ary == null ? 0 : atrs_ary.length
|
||||
, xnde_tkn.Atrs_bgn(), xnde_tkn.Atrs_end()
|
||||
);
|
||||
}
|
||||
public void Log_end(Xoae_page page, long log_bgn, int log_tid, byte[] log_msg, byte[] src, int src_bgn, int src_end, int args_len, int args_bgn, int args_end) {
|
||||
log_tbl.Insert
|
||||
( log_tid
|
||||
, log_msg == Xop_log_basic_wkr.Null_log_msg ? "" : String_.new_u8(log_msg)
|
||||
, save_log_time ? Env_.TickCount_elapsed_in_frac(log_bgn) : Xop_log_basic_wkr.Null_log_time
|
||||
, page.Revision_data().Id()
|
||||
, save_page_ttl ? String_.new_u8(page.Ttl().Full_db()) : Xop_log_basic_wkr.Null_page_ttl
|
||||
, save_args_len ? args_len : Xop_log_basic_wkr.Null_args_len
|
||||
, save_args_str ? String_.new_u8(src, args_bgn, args_end) : Xop_log_basic_wkr.Null_args_str
|
||||
, src_end - src_bgn
|
||||
, save_src_str ? String_.new_u8(src, src_bgn, src_end) : Xop_log_basic_wkr.Null_src_str
|
||||
);
|
||||
log_mgr.Commit_chk();
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_save_page_ttl_)) save_page_ttl = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_save_log_time_)) save_log_time = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_save_args_len_)) save_args_len = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_save_args_str_)) save_args_str = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_save_src_str_)) save_src_str = m.ReadYn("v");
|
||||
else return GfoInvkAble_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
private static final String
|
||||
Invk_save_page_ttl_ = "save_page_ttl_", Invk_save_log_time_ = "save_log_time_"
|
||||
, Invk_save_args_len_ = "save_args_len_", Invk_save_args_str_ = "save_args_str_", Invk_save_src_str_ = "save_src_str_"
|
||||
;
|
||||
public static final Xop_log_basic_wkr Null = null;
|
||||
public static final int Null_page_id = -1, Null_log_bgn = -1, Null_log_time = -1, Null_args_len = -1, Null_src_len = -1;
|
||||
public static final String Null_page_ttl = "", Null_args_str = "", Null_src_str = "";
|
||||
public static final byte[] Null_log_msg = null;
|
||||
public static final int
|
||||
Tid_gallery = 1
|
||||
, Tid_imageMap = 2
|
||||
, Tid_timeline = 3
|
||||
, Tid_score = 4
|
||||
, Tid_hiero = 5
|
||||
;
|
||||
}
|
||||
82
400_xowa/src/gplx/xowa/parsers/logs/Xop_log_invoke_wkr.java
Normal file
82
400_xowa/src/gplx/xowa/parsers/logs/Xop_log_invoke_wkr.java
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.logs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.dbs.engines.sqlite.*; import gplx.xowa.parsers.logs.*;
|
||||
import gplx.xowa.xtns.scribunto.*;
|
||||
public class Xop_log_invoke_wkr implements GfoInvkAble {
|
||||
private Xop_log_mgr log_mgr;
|
||||
private Db_conn conn; private Db_stmt stmt;
|
||||
private boolean log_enabled = true;
|
||||
private Hash_adp_bry exclude_mod_names = Hash_adp_bry.cs_();
|
||||
public Scrib_err_filter_mgr Err_filter_mgr() {return err_filter_mgr;} private final Scrib_err_filter_mgr err_filter_mgr = new Scrib_err_filter_mgr();
|
||||
public Xop_log_invoke_wkr(Xop_log_mgr log_mgr, Db_conn conn) {
|
||||
this.log_mgr = log_mgr;
|
||||
this.conn = conn;
|
||||
if (log_enabled) {
|
||||
Xop_log_invoke_tbl.Create_table(conn);
|
||||
stmt = Xop_log_invoke_tbl.Insert_stmt(conn);
|
||||
}
|
||||
}
|
||||
public void Init_reset() {Xop_log_invoke_tbl.Delete(conn);}
|
||||
public boolean Eval_bgn(Xoae_page page, byte[] mod_name, byte[] fnc_name) {return !exclude_mod_names.Has(mod_name);}
|
||||
public void Eval_end(Xoae_page page, byte[] mod_name, byte[] fnc_name, long invoke_time_bgn) {
|
||||
if (log_enabled && stmt != null) {
|
||||
int eval_time = (int)(Env_.TickCount() - invoke_time_bgn);
|
||||
Xop_log_invoke_tbl.Insert(stmt, page.Ttl().Rest_txt(), mod_name, fnc_name, eval_time);
|
||||
log_mgr.Commit_chk();
|
||||
}
|
||||
}
|
||||
private void Exclude_mod_names_add(String[] v) {
|
||||
int len = v.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte[] bry = Bry_.new_u8(v[i]);
|
||||
exclude_mod_names.Add_bry_bry(bry);
|
||||
}
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_exclude_mod_names_add)) Exclude_mod_names_add(m.ReadStrAry("v", "|"));
|
||||
else if (ctx.Match(k, Invk_log_enabled_)) log_enabled = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_err_filter)) return err_filter_mgr;
|
||||
else return GfoInvkAble_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
private static final String Invk_exclude_mod_names_add = "exclude_mod_names_add", Invk_log_enabled_ = "log_enabled_", Invk_err_filter = "err_filter";
|
||||
}
|
||||
class Xop_log_invoke_tbl {
|
||||
public static void Create_table(Db_conn conn) {Sqlite_engine_.Tbl_create(conn, Tbl_name, Tbl_sql);}
|
||||
public static void Delete(Db_conn conn) {conn.Exec_qry(Db_qry_delete.new_all_(Tbl_name));}
|
||||
public static Db_stmt Insert_stmt(Db_conn conn) {return Db_stmt_.new_insert_(conn, Tbl_name, Fld_invk_page_ttl, Fld_invk_mod_name, Fld_invk_fnc_name, Fld_invk_eval_time);}
|
||||
public static void Insert(Db_stmt stmt, byte[] page_ttl, byte[] mod_name, byte[] fnc_name, int eval_time) {
|
||||
stmt.Clear()
|
||||
.Val_bry_as_str(page_ttl)
|
||||
.Val_bry_as_str(mod_name)
|
||||
.Val_bry_as_str(fnc_name)
|
||||
.Val_int(eval_time)
|
||||
.Exec_insert();
|
||||
}
|
||||
public static final String Tbl_name = "log_invoke_temp", Fld_invk_page_ttl = "invk_page_ttl", Fld_invk_mod_name = "invk_mod_name", Fld_invk_fnc_name = "invk_fnc_name", Fld_invk_eval_time = "invk_eval_time";
|
||||
private static final String Tbl_sql = String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS log_invoke_temp"
|
||||
, "( invk_id integer NOT NULL PRIMARY KEY AUTOINCREMENT"
|
||||
, ", invk_page_ttl varchar(255) NOT NULL"
|
||||
, ", invk_mod_name varchar(255) NOT NULL"
|
||||
, ", invk_fnc_name varchar(255) NOT NULL"
|
||||
, ", invk_eval_time integer NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
68
400_xowa/src/gplx/xowa/parsers/logs/Xop_log_mgr.java
Normal file
68
400_xowa/src/gplx/xowa/parsers/logs/Xop_log_mgr.java
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.logs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.dbs.*; import gplx.xowa.bldrs.*;
|
||||
public class Xop_log_mgr implements GfoInvkAble {
|
||||
private Db_conn conn;
|
||||
private Xoae_app app; private Xop_log_basic_tbl log_tbl;
|
||||
private int exec_count = 0, commit_interval = 1000;
|
||||
public Xop_log_mgr(Xoae_app app) {this.app = app;}
|
||||
public Io_url Log_dir() {return log_dir;}
|
||||
public Xop_log_mgr Log_dir_(Io_url v) {
|
||||
log_dir = v;
|
||||
// if (conn != null) { // COMMENTED: need to implement a conn.Renew()
|
||||
// conn.Rls(); // invalidate conn; note that during build other cmds will bind Conn which will place temp.log in /temp/ dir instead of /wiki/ dir; DATE:2014-04-16
|
||||
// }
|
||||
return this;
|
||||
} private Io_url log_dir;
|
||||
private Db_conn Conn() {
|
||||
if (conn == null) {
|
||||
if (log_dir == null) log_dir = app.Usere().Fsys_mgr().App_temp_dir();
|
||||
Xob_db_file db_file = Xob_db_file.new__temp_log(log_dir);
|
||||
conn = db_file.Conn();
|
||||
}
|
||||
return conn;
|
||||
}
|
||||
public Xop_log_invoke_wkr Make_wkr_invoke() {return new Xop_log_invoke_wkr(this, this.Conn());}
|
||||
public Xop_log_property_wkr Make_wkr_property() {return new Xop_log_property_wkr(this, this.Conn());}
|
||||
public Xop_log_basic_wkr Make_wkr() {
|
||||
if (log_tbl == null)
|
||||
log_tbl = new Xop_log_basic_tbl(this.Conn());
|
||||
return new Xop_log_basic_wkr(this, log_tbl);
|
||||
}
|
||||
public void Commit_chk() {
|
||||
++exec_count;
|
||||
if ((exec_count % commit_interval) == 0)
|
||||
conn.Txn_sav();
|
||||
}
|
||||
public void Delete_all() {
|
||||
log_tbl.Delete();
|
||||
}
|
||||
public void Txn_bgn() {conn.Txn_bgn();}
|
||||
public void Txn_end() {conn.Txn_end();}
|
||||
public void Rls() {
|
||||
if (log_tbl != null) log_tbl.Rls();
|
||||
if (conn != null) {conn.Rls_conn(); conn = null;}
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_commit_interval_)) commit_interval = m.ReadInt("v");
|
||||
else return GfoInvkAble_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
private static final String Invk_commit_interval_ = "commit_interval_";
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.logs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.dbs.engines.sqlite.*;
|
||||
public class Xop_log_property_wkr implements GfoInvkAble {
|
||||
private Xop_log_mgr log_mgr; private Db_conn conn; private Db_stmt stmt;
|
||||
private boolean log_enabled = true;
|
||||
private boolean include_all = true;
|
||||
private Hash_adp_bry include_props = Hash_adp_bry.cs_();
|
||||
public Xop_log_property_wkr(Xop_log_mgr log_mgr, Db_conn conn) {
|
||||
this.log_mgr = log_mgr;
|
||||
this.conn = conn;
|
||||
if (log_enabled) {
|
||||
Xob_log_property_temp_tbl.Create_table(conn);
|
||||
stmt = Xob_log_property_temp_tbl.Insert_stmt(conn);
|
||||
}
|
||||
}
|
||||
public void Init_reset() {Xob_log_property_temp_tbl.Delete(conn);}
|
||||
public boolean Eval_bgn(Xoae_page page, byte[] prop) {return include_all || include_props.Has(prop);}
|
||||
public void Eval_end(Xoae_page page, byte[] prop, long invoke_time_bgn) {
|
||||
if (log_enabled && stmt != null) {
|
||||
int eval_time = (int)(Env_.TickCount() - invoke_time_bgn);
|
||||
Xob_log_property_temp_tbl.Insert(stmt, page.Ttl().Rest_txt(), prop, eval_time);
|
||||
log_mgr.Commit_chk();
|
||||
}
|
||||
}
|
||||
private void Include_props_add(String[] v) {
|
||||
int len = v.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte[] bry = Bry_.new_u8(v[i]);
|
||||
include_props.Add_bry_bry(bry);
|
||||
}
|
||||
include_all = false; // set include_all to false, since specific items added
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_include_props_add)) Include_props_add(m.ReadStrAry("v", "|"));
|
||||
else if (ctx.Match(k, Invk_log_enabled_)) log_enabled = m.ReadYn("v");
|
||||
else return GfoInvkAble_.Rv_unhandled;
|
||||
return this;
|
||||
} private static final String Invk_include_props_add = "include_props_add", Invk_log_enabled_ = "log_enabled_";
|
||||
}
|
||||
class Xob_log_property_temp_tbl {
|
||||
public static void Create_table(Db_conn conn) {Sqlite_engine_.Tbl_create(conn, Tbl_name, Tbl_sql);}
|
||||
public static void Delete(Db_conn conn) {conn.Exec_qry(Db_qry_delete.new_all_(Tbl_name));}
|
||||
public static Db_stmt Insert_stmt(Db_conn conn) {return Db_stmt_.new_insert_(conn, Tbl_name, Fld_prop_page_ttl, Fld_prop_prop_name, Fld_prop_eval_time);}
|
||||
public static void Insert(Db_stmt stmt, byte[] page_ttl, byte[] prop_name, int eval_time) {
|
||||
stmt.Clear()
|
||||
.Val_bry_as_str(page_ttl)
|
||||
.Val_bry_as_str(prop_name)
|
||||
.Val_int(eval_time)
|
||||
.Exec_insert();
|
||||
}
|
||||
public static final String Tbl_name = "log_property_temp", Fld_prop_page_ttl = "prop_page_ttl", Fld_prop_prop_name = "prop_prop_name", Fld_prop_eval_time = "prop_eval_time";
|
||||
private static final String Tbl_sql = String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS log_property_temp"
|
||||
, "( prop_id integer NOT NULL PRIMARY KEY AUTOINCREMENT"
|
||||
, ", prop_page_ttl varchar(255) NOT NULL"
|
||||
, ", prop_prop_name varchar(255) NOT NULL"
|
||||
, ", prop_eval_time integer NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
115
400_xowa/src/gplx/xowa/parsers/paras/Xop_nl_lxr.java
Normal file
115
400_xowa/src/gplx/xowa/parsers/paras/Xop_nl_lxr.java
Normal file
@@ -0,0 +1,115 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.tblws.*;
|
||||
public class Xop_nl_lxr implements Xop_lxr {
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_nl;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Nl, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_bos) return ctx.Lxr_make_txt_(cur_pos); // simulated nl at beginning of every parse
|
||||
int trim_category_pos = Scan_fwd_for_ctg(ctx, src, cur_pos, src_len);
|
||||
if (trim_category_pos != Bry_.NotFound) { // [[Category]] found after ws
|
||||
int root_subs_len = root.Subs_len();
|
||||
if (root_subs_len > 0) {
|
||||
Xop_tkn_itm tkn = root.Subs_get(root_subs_len - 1);
|
||||
if (tkn.Tkn_tid() == Xop_tkn_itm_.Tid_eq) {
|
||||
Xop_eq_tkn eq_tkn = (Xop_eq_tkn)tkn;
|
||||
if (eq_tkn.Eq_len() > 1) {
|
||||
Xop_nl_tkn nl_tkn = tkn_mkr.NewLine(bgn_pos, cur_pos, Xop_nl_tkn.Tid_char, 1);
|
||||
ctx.Subs_add(root, nl_tkn);
|
||||
}
|
||||
}
|
||||
}
|
||||
return trim_category_pos;
|
||||
}
|
||||
Xop_tkn_itm last_tkn = ctx.Stack_get_last(); // BLOCK:invalid_ttl_check
|
||||
if ( !ctx.Tid_is_image_map()
|
||||
&& last_tkn != null
|
||||
&& last_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) {
|
||||
Xop_lnki_tkn lnki = (Xop_lnki_tkn)last_tkn;
|
||||
if ( lnki.Pipe_count_is_zero()) { // always invalid
|
||||
ctx.Stack_pop_last();
|
||||
return Xop_lnki_wkr_.Invalidate_lnki(ctx, src, root, lnki, bgn_pos);
|
||||
}
|
||||
}
|
||||
|
||||
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, true); // NOTE: frame should at end at bgn_pos (before \n) not after; else, will create tkn at (5,5), while tkn_mkr.Space creates one at (4,5); DATE:2013-10-31
|
||||
ctx.Tblw().Cell_pipe_seen_(false); // flip off "|" in tblw seq; EX: "| a\n||" needs to flip off "|" else "||" will be seen as style dlm"; NOTE: not covered by test?
|
||||
|
||||
Xop_para_wkr para_wkr = ctx.Para();
|
||||
switch (ctx.Cur_tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_hdr: // last tkn was hdr; close it; EX: \n==a==\nb; "\n" should close 2nd "=="; DATE:2014-02-17
|
||||
int acs_pos = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_hdr);
|
||||
ctx.Stack_pop_til(root, src, acs_pos, true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_newLine);
|
||||
para_wkr.Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_h2);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_list: // close list
|
||||
Xop_list_wkr_.Close_list_if_present(ctx, root, src, bgn_pos, cur_pos);
|
||||
para_wkr.Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_li);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_lnke: // close lnke
|
||||
if (ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tmpl_invk) == -1) // only close if no tmpl; MWR: [[SHA-2]]; * {{cite journal|title=Proposed
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_lnke), true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_newLine);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_lnki: // NOTE: \n in caption or other multipart lnki; don't call para_wkr.Process
|
||||
Xop_tkn_itm nl_tkn = tkn_mkr.Space(root, bgn_pos, cur_pos); // convert \n to \s. may result in multiple \s, but rely on htmlViewer to suppress; EX: w:Schwarzschild_radius; and the stellar [[Velocity dispersion|velocity\ndispersion]];
|
||||
ctx.Subs_add(root, nl_tkn);
|
||||
return cur_pos;
|
||||
// case Xop_tkn_itm_.Tid_tblw_tc: case Xop_tkn_itm_.Tid_tblw_td: // STUB: tc/td should not have attributes
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: case Xop_tkn_itm_.Tid_tblw_tr: case Xop_tkn_itm_.Tid_tblw_th: // nl should close previous tblw's atrs range; EX {{Infobox planet}} and |-\n<tr>
|
||||
Xop_tblw_wkr.Atrs_close(ctx, src, root, Bool_.N);
|
||||
break;
|
||||
}
|
||||
if ( ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki // parse_mode is wiki
|
||||
&& para_wkr.Enabled() // check that para is enabled
|
||||
)
|
||||
para_wkr.Process_nl(ctx, root, src, bgn_pos, cur_pos);
|
||||
else { // parse mode is tmpl, or para is disabled; for latter, adding \n for pretty-print
|
||||
Xop_nl_tkn nl_tkn = tkn_mkr.NewLine(bgn_pos, cur_pos, Xop_nl_tkn.Tid_char, 1);
|
||||
ctx.Subs_add(root, nl_tkn);
|
||||
}
|
||||
return cur_pos;
|
||||
}
|
||||
public static int Scan_fwd_for_ctg(Xop_ctx ctx, byte[] src, int cur_pos, int src_len) {
|
||||
for (int i = cur_pos; i < src_len; i++) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: // ignore ws
|
||||
break;
|
||||
case Byte_ascii.Brack_bgn: // [
|
||||
if ( Bry_.Eq_itm(src, src_len, i + 1, Byte_ascii.Brack_bgn) // [[
|
||||
&& i + 2 < src_len) {
|
||||
int ttl_bgn = Bry_finder.Find_fwd_while(src, i + 2, src_len, Byte_ascii.Space);
|
||||
Btrie_slim_mgr ctg_trie = ctx.Wiki().Ns_mgr().Category_trie();
|
||||
Object ctg_ns = ctg_trie.Match_bgn(src, ttl_bgn, src_len);
|
||||
if (ctg_ns != null // "[[Category" found
|
||||
&& Bry_.Eq_itm(src, src_len, ctg_trie.Match_pos(), Byte_ascii.Colon)) { // check that next char is :
|
||||
return i;// return pos of 1st [
|
||||
}
|
||||
return Bry_.NotFound;
|
||||
}
|
||||
break;
|
||||
default: // non-ws; return not found
|
||||
return Bry_.NotFound;
|
||||
}
|
||||
}
|
||||
return Bry_.NotFound;
|
||||
}
|
||||
public static final Xop_nl_lxr _ = new Xop_nl_lxr(); Xop_nl_lxr() {}
|
||||
}
|
||||
51
400_xowa/src/gplx/xowa/parsers/paras/Xop_nl_tab_lxr.java
Normal file
51
400_xowa/src/gplx/xowa/parsers/paras/Xop_nl_tab_lxr.java
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.parsers.tblws.*;
|
||||
public class Xop_nl_tab_lxr implements Xop_lxr {
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_nl_tab;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Hook_nl_tab, this);} private static final byte[] Hook_nl_tab = new byte[] {Byte_ascii.Nl, Byte_ascii.Tab};
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
int non_ws_pos = Bry_finder.Find_fwd_while_space_or_tab(src, cur_pos, src_len);
|
||||
if (non_ws_pos < src_len) { // bounds check
|
||||
Btrie_slim_mgr tblw_trie = ctx.App().Utl_trie_tblw_ws();
|
||||
Object tblw_obj = tblw_trie.Match_bgn(src, non_ws_pos, src_len);
|
||||
if (tblw_obj != null) {
|
||||
Xop_tblw_ws_itm tblw_itm = (Xop_tblw_ws_itm)tblw_obj;
|
||||
byte itm_type = tblw_itm.Tblw_type();
|
||||
switch (itm_type) {
|
||||
case Xop_tblw_ws_itm.Type_nl: // ignore nl
|
||||
case Xop_tblw_ws_itm.Type_xnde: // ignore xnde
|
||||
break;
|
||||
default: { // handle tblw
|
||||
int tblw_rv = ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, non_ws_pos + tblw_itm.Hook_len(), false, itm_type, Xop_tblw_wkr.Called_from_pre, -1, -1);
|
||||
if (tblw_rv != -1) // \n\s| is valid tblw tkn and processed; otherwise fall through;
|
||||
return tblw_rv;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bgn_pos != Xop_parser_.Doc_bgn_bos) // don't add \n if BOS; EX: "<BOS> a" should be " ", not "\n "
|
||||
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos + 1, Xop_nl_tkn.Tid_char, 1));
|
||||
ctx.Subs_add(root, tkn_mkr.Tab(cur_pos - 1, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
public static final Xop_nl_tab_lxr _ = new Xop_nl_tab_lxr(); Xop_nl_tab_lxr() {}
|
||||
}
|
||||
65
400_xowa/src/gplx/xowa/parsers/paras/Xop_nl_tab_lxr_tst.java
Normal file
65
400_xowa/src/gplx/xowa/parsers/paras/Xop_nl_tab_lxr_tst.java
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_nl_tab_lxr_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void teardown() {fxt.Init_para_n_();}
|
||||
@Test public void Basic() { // PURPOSE: \n\t|- should be recognized as tblw; EX:zh.v:西安; DATE:2014-05-06
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl
|
||||
( "{|"
|
||||
, "\t|-"
|
||||
, "|a"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void Ws() { // PURPOSE: \n\t|- should be recognized as tblw; EX:zh.v:西安; DATE:2014-05-06
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl
|
||||
( "{|"
|
||||
, "\t |-" // \t
|
||||
, "|a"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void Ignore() {// PURPOSE: \n\t should not be pre; EX:pl.w:Main_Page; DATE:2014-05-06
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, "\t b"
|
||||
, "c"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<p>a"
|
||||
, "\t b"
|
||||
, "c"
|
||||
, "</p>"
|
||||
));
|
||||
}
|
||||
}
|
||||
27
400_xowa/src/gplx/xowa/parsers/paras/Xop_nl_tkn.java
Normal file
27
400_xowa/src/gplx/xowa/parsers/paras/Xop_nl_tkn.java
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_nl_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_nl_tkn(int bgn, int end, byte nl_tid, int nl_len) {
|
||||
this.Tkn_ini_pos(false, bgn, end);
|
||||
this.nl_tid = nl_tid;
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_newLine;}
|
||||
public byte Nl_tid() {return nl_tid;} private byte nl_tid = Xop_nl_tkn.Tid_unknown;
|
||||
public static final byte Tid_unknown = 0, Tid_char = 1, Tid_hdr = 2, Tid_hr = 3, Tid_list = 4, Tid_tblw = 5, Tid_file = 6;
|
||||
}
|
||||
31
400_xowa/src/gplx/xowa/parsers/paras/Xop_para_tkn.java
Normal file
31
400_xowa/src/gplx/xowa/parsers/paras/Xop_para_tkn.java
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_para_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_para_tkn(int pos) {this.Tkn_ini_pos(false, pos, pos);}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_para;}
|
||||
public byte Para_end() {return para_end;} public Xop_para_tkn Para_end_(byte v) {para_end = v; return this;} private byte para_end = Tid_none;
|
||||
public byte Para_bgn() {return para_bgn;} public Xop_para_tkn Para_bgn_(byte v) {para_bgn = v; return this;} private byte para_bgn = Tid_none;
|
||||
public int Space_bgn() {return space_bgn;} public Xop_para_tkn Space_bgn_(int v) {space_bgn = v; return this;} private int space_bgn = 0;
|
||||
public boolean Nl_bgn() {return nl_bgn;} public Xop_para_tkn Nl_bgn_y_() {nl_bgn = true; return this;} private boolean nl_bgn;
|
||||
public static final byte
|
||||
Tid_none = 0 //
|
||||
, Tid_para = 1 // </p>
|
||||
, Tid_pre = 2 // </pre>
|
||||
;
|
||||
}
|
||||
344
400_xowa/src/gplx/xowa/parsers/paras/Xop_para_wkr.java
Normal file
344
400_xowa/src/gplx/xowa/parsers/paras/Xop_para_wkr.java
Normal file
@@ -0,0 +1,344 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.tblws.*; import gplx.core.btries.*;
|
||||
public class Xop_para_wkr implements Xop_ctx_wkr {
|
||||
private boolean para_enabled;
|
||||
private byte cur_mode;
|
||||
private int para_stack;
|
||||
private boolean in_block, block_is_bgn_xnde, block_is_end_xnde, in_blockquote, block_is_bgn_blockquote, block_is_end_blockquote;
|
||||
private int prv_nl_pos; private Xop_para_tkn prv_para; private int prv_ws_bgn;
|
||||
public boolean Enabled() {return enabled;} public Xop_para_wkr Enabled_(boolean v) {enabled = v; return this;} private boolean enabled = true;
|
||||
public Xop_para_wkr Enabled_y_() {enabled = true; return this;} public Xop_para_wkr Enabled_n_() {enabled = false; return this;}
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {
|
||||
this.Clear();
|
||||
para_enabled = enabled && ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki; // only enable for wikitext (not for template)
|
||||
if (para_enabled)
|
||||
Prv_para_new(ctx, root, -1, 0); // create <para> at bos
|
||||
}
|
||||
private void Clear() {
|
||||
cur_mode = Mode_none;
|
||||
para_stack = Para_stack_none;
|
||||
in_block = block_is_bgn_xnde = block_is_end_xnde = false;
|
||||
in_blockquote = block_is_bgn_blockquote = block_is_end_blockquote = false;
|
||||
prv_nl_pos = -1;
|
||||
prv_para = null;
|
||||
prv_ws_bgn = 0;
|
||||
}
|
||||
public void AutoClose(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {
|
||||
if (para_enabled) {
|
||||
Process_nl(ctx, root, src, src_len, src_len);
|
||||
this.Prv_para_end(); // close anything created by Process_nl()
|
||||
}
|
||||
this.Clear();
|
||||
}
|
||||
public void Process_block__bgn_y__end_n(Xop_xnde_tag tag) {Process_block(tag, Bool_.Y, Bool_.N);} // NOTE: disables para for rest of page; Process_block__bgn_n__end_y must be called; DATE:2014-04-18
|
||||
public void Process_block__bgn_n__end_y(Xop_xnde_tag tag) {Process_block(tag, Bool_.N, Bool_.Y);}
|
||||
public void Process_block__xnde(Xop_xnde_tag tag, byte mode) {
|
||||
if (mode == Xop_xnde_tag.Block_bgn) Process_block(tag, Bool_.Y, Bool_.N);
|
||||
else if (mode == Xop_xnde_tag.Block_end) Process_block(tag, Bool_.N, Bool_.Y);
|
||||
}
|
||||
public void Process_block_lnki_div() { // bgn_lhs is pos of [[; end_lhs is pos of ]]
|
||||
if (prv_ws_bgn > 0) // if pre at start of line; ignore it b/c of div; EX: "\n\s[[File:A.png|thumb]]" should not produce thumb; also [[File:A.png|right]]; DATE:2014-02-17
|
||||
prv_ws_bgn = 0;
|
||||
this.Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_div);
|
||||
}
|
||||
private void Process_block(Xop_xnde_tag tag, boolean bgn, boolean end) {
|
||||
if (prv_ws_bgn > 0) {
|
||||
prv_para.Space_bgn_(prv_ws_bgn);
|
||||
prv_ws_bgn = 0;
|
||||
}
|
||||
block_is_bgn_xnde = bgn;
|
||||
block_is_end_xnde = end;
|
||||
switch (tag.Id()) {
|
||||
case Xop_xnde_tag_.Tid_blockquote:
|
||||
if (bgn) block_is_bgn_blockquote = true;
|
||||
if (end) block_is_end_blockquote = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
public void Process_block__bgn__nl_w_symbol(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int bgn_pos, int cur_pos, Xop_xnde_tag tag) {// handle \n== and \n* \n{|; note that nl is at rng of bgn_pos to bgn_pos + 1 (not cur_pos)
|
||||
if (!para_enabled) return;
|
||||
Process_nl(ctx, root, src, bgn_pos, bgn_pos + 1);
|
||||
Process_block__bgn_y__end_n(tag);
|
||||
}
|
||||
public void Process_nl(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int bgn_pos, int cur_pos) {// REF.MW:Parser.php|doBlockLevels
|
||||
Dd_clear(ctx);
|
||||
if (block_is_bgn_xnde || block_is_end_xnde) {
|
||||
para_stack = Para_stack_none; // MW: $paragraphStack = false;
|
||||
Prv_para_end(); // MW: $output .= $this->closeParagraph()
|
||||
if (block_is_bgn_blockquote && !block_is_end_blockquote) // MW: if ( $preOpenMatch and !$preCloseMatch )
|
||||
in_blockquote = true; // MW: $this->mInPre = true;
|
||||
else
|
||||
in_blockquote = false; // XO: turn off blockquote else following para / nl won't work; w:Snappy_(software); DATE:2014-04-25
|
||||
in_block = !block_is_end_xnde; // MW: $inBlockElem = !$closematch;
|
||||
}
|
||||
else if (!in_block && !in_blockquote) { // MW: elseif ( !$inBlockElem && !$this->mInPre ) {
|
||||
boolean line_is_ws = Line_is_ws(src, bgn_pos);
|
||||
if (prv_ws_bgn > 0 && (cur_mode == Mode_pre || !line_is_ws)) { // MW: if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) ) {
|
||||
if (cur_mode != Mode_pre) { // MW: if ( $this->mLastSection !== 'pre' ) {
|
||||
para_stack = Para_stack_none; // MW: $paragraphStack = false;
|
||||
prv_para.Space_bgn_(prv_ws_bgn - 1); // -1 to ignore 1st "\s" in "\n\s"; note that prv_ws_bgn only includes spaces, so BOS doesn't matter; DATE:2014-04-14
|
||||
Prv_para_end(); Prv_para_bgn(Xop_para_tkn.Tid_pre); // MW: $output .= $this->closeParagraph() . '<pre>';
|
||||
cur_mode = Mode_pre; // MW: $this->mLastSection = 'pre';
|
||||
}
|
||||
else { // already in pre
|
||||
if (line_is_ws) { // line is entirely ws
|
||||
int next_char_pos = prv_nl_pos + 2; // "\n\s".length
|
||||
if ( next_char_pos < src.length // bounds check
|
||||
&& src[next_char_pos] == Byte_ascii.Nl // is "\n\s\n"; i.e.: "\n" only
|
||||
) {
|
||||
ctx.Subs_add(root, ctx.Tkn_mkr().Bry_raw(bgn_pos, bgn_pos, Byte_ascii.Nl_bry)); // add a "\n" tkn; note that adding a NewLine tkn doesn't work, b/c Xoh_html_wtr has code to remove consecutive \n; PAGE:en.w:Preferred_numbers DATE:2014-06-24
|
||||
prv_nl_pos = bgn_pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
prv_ws_bgn = 0; // MW: $t = substr( $t, 1 );
|
||||
}
|
||||
else {
|
||||
if (bgn_pos - prv_nl_pos == 1 || line_is_ws) { // line is blank ("b" for blank) MW: if ( trim( $t ) === '' ) {
|
||||
if (para_stack != Para_stack_none) { // "b1"; stack has "<p>" or "</p><p>"; output "<br/>"; MW: if ( $paragraphStack ) {
|
||||
Para_stack_end(cur_pos); Add_br(ctx, root, bgn_pos); // MW: $output .= $paragraphStack . '<br />';
|
||||
para_stack = Para_stack_none; // MW: $paragraphStack = false;
|
||||
cur_mode = Mode_para; // MW: $this->mLastSection = 'p';
|
||||
}
|
||||
else { // stack is empty
|
||||
if (cur_mode != Mode_para) { // "b2"; cur is '' or <pre> MW: if ( $this->mLastSection !== 'p' ) {
|
||||
Prv_para_end(); // MW: $output .= $this->closeParagraph();
|
||||
cur_mode = Mode_none; // MW: $this->mLastSection = '';
|
||||
para_stack = Para_stack_bgn; // put <p> on stack MW: $paragraphStack = '<p>';
|
||||
}
|
||||
else // "b3"; cur is p
|
||||
para_stack = Para_stack_mid; // put </p><p> on stack MW: $paragraphStack = '</p><p>';
|
||||
}
|
||||
}
|
||||
else { // line has text ("t" for text); NOTE: tkn already added before \n, so must change prv_para; EX: "a\n" -> this code is called for "\n" but "a" already processed
|
||||
if (para_stack != Para_stack_none) { // "t1" MW: if ( $paragraphStack ) {
|
||||
Para_stack_end(cur_pos); // MW: $output .= $paragraphStack;
|
||||
para_stack = Para_stack_none; // MW: $paragraphStack = false;
|
||||
cur_mode = Mode_para; // MW: $this->mLastSection = 'p';
|
||||
}
|
||||
else if (cur_mode != Mode_para) { // "t2"; cur is '' or <pre> MW: elseif ( $this->mLastSection !== 'p' ) {
|
||||
Prv_para_end(); Prv_para_bgn(Xop_para_tkn.Tid_para); // MW: $output .= $this->closeParagraph() . '<p>';
|
||||
cur_mode = Mode_para; // MW: $this->mLastSection = 'p';
|
||||
}
|
||||
else {} // "t3"
|
||||
}
|
||||
}
|
||||
}
|
||||
if (in_blockquote && prv_ws_bgn > 0) // handle blockquote separate; EX: <blockquote>\n\sa\n</blockquote>; note that "\s" needs to be added literally; MW doesn't have this logic specifically, since it assumes all characters go into $output, whereas XO, sets aside the "\s" in "\n\s" separately
|
||||
prv_para.Space_bgn_(prv_ws_bgn);
|
||||
prv_ws_bgn = 0; // nl encountered and processed; always prv_ws_bgn set to 0, else ws from one line will carry over to next
|
||||
// in_blockquote = false;
|
||||
block_is_bgn_xnde = block_is_end_xnde = false;
|
||||
// if ( $preCloseMatch && $this->mInPre )
|
||||
// $this->mInPre = false;
|
||||
// prv_ws_bgn = false;
|
||||
Prv_para_new(ctx, root, bgn_pos, cur_pos); // add a prv_para placeholder
|
||||
if (para_stack == Para_stack_none) // "x1" MW: if ( $paragraphStack === false ) {
|
||||
if (prv_para != null) prv_para.Nl_bgn_y_(); // add nl; note that "$t" has already been processed; MW: $output .= $t . "\n";
|
||||
}
|
||||
public int Process_pre(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int txt_pos) {
|
||||
Dd_clear(ctx);
|
||||
Btrie_slim_mgr tblw_ws_trie = ctx.App().Utl_trie_tblw_ws();
|
||||
Object o = tblw_ws_trie.Match_bgn(src, txt_pos, src_len);
|
||||
if (o != null) { // tblw_ws found
|
||||
Xop_tblw_ws_itm ws_itm = (Xop_tblw_ws_itm)o;
|
||||
byte tblw_type = ws_itm.Tblw_type();
|
||||
switch (tblw_type) {
|
||||
case Xop_tblw_ws_itm.Type_nl: // \n\s
|
||||
if (cur_mode == Mode_pre) { // already in pre; just process "\n\s"
|
||||
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos, Xop_nl_tkn.Tid_char, 1));
|
||||
prv_nl_pos = bgn_pos; // NOTE: must update prv_nl_pos; PAGE:en.w:Preferred_number DATE:2014-06-24
|
||||
return txt_pos;
|
||||
}
|
||||
break;
|
||||
case Xop_tblw_ws_itm.Type_xnde:
|
||||
int nxt_pos = tblw_ws_trie.Match_pos();
|
||||
if (nxt_pos < src_len) { // bounds check
|
||||
switch (src[nxt_pos]) { // check that next char is "end" of xnde name; guard against false matches like "<trk" PAGE:de.v:Via_Jutlandica/Gpx DATE:2014-11-29
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // whitespace
|
||||
case Byte_ascii.Slash: case Byte_ascii.Gt: // end node
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos: // quotes
|
||||
if (bgn_pos != Xop_parser_.Doc_bgn_bos)
|
||||
ctx.Para().Process_nl(ctx, root, src, bgn_pos, cur_pos);
|
||||
return ctx.Xnde().Make_tkn(ctx, tkn_mkr, root, src, src_len, txt_pos, txt_pos + 1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default: {
|
||||
int tblw_rv = ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, txt_pos + ws_itm.Hook_len(), false, tblw_type, Xop_tblw_wkr.Called_from_pre, -1, -1);
|
||||
if (tblw_rv != -1) // \n\s| is valid tblw tkn and processed; otherwise process pre-code below; EX:w:Wikipedia:WikiProject_History/CategoryExample; DATE:2014-04-14
|
||||
return tblw_rv;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// NOTE: pre lxr emulates MW for "\n\s" by (1) calling Process nl for "\n"; (2) anticipating next line by setting prv_ws_bgn
|
||||
// EX: "\na\n b\n"; note that "\n " is cur
|
||||
if (bgn_pos != Xop_parser_.Doc_bgn_bos) // if bos, then don't close 1st para
|
||||
Process_nl(ctx, root, src, bgn_pos, bgn_pos + 1); // note that tkn is \n\s; so, bgn_pos -> bgn_pos + 1 is \n ...
|
||||
if (cur_mode == Mode_pre) // in pre_mode
|
||||
ctx.Subs_add(root, tkn_mkr.Space(root, cur_pos, txt_pos)); // cur_pos to start after \s; do not capture "\s" in "\n\s"; (not sure why not before \s)
|
||||
prv_ws_bgn = txt_pos - cur_pos + 1;
|
||||
return txt_pos;
|
||||
}
|
||||
public void Process_lnki_category(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int pos, int src_len) { // REF.MW:Parser.php|replaceInternalLinks2|Strip the whitespace Category links produce;
|
||||
if (!para_enabled) return;
|
||||
int subs_len = root.Subs_len();
|
||||
for (int i = subs_len - 2; i > -1; i--) { // -2: -1 b/c subs_len is invalid; -1 to skip current lnki
|
||||
Xop_tkn_itm sub_tkn = root.Subs_get(i);
|
||||
switch (sub_tkn.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_para: // nl found; note this means that BOL -> [[Category:]] is all ws;
|
||||
if (prv_ws_bgn > 0) { // line begins with ws a
|
||||
if (sub_tkn.Src_bgn() != 0) // do not ignore BOS para; needed b/c it is often <p>; needed for test;
|
||||
sub_tkn.Ignore_y_(); // ignore nl (pretty-printing only)
|
||||
prv_ws_bgn = 0; // remove ws
|
||||
if (ctx.Stack_has(Xop_tkn_itm_.Tid_list)){ // HACK: if in list, set prv_nl_pos to EOL; only here for one test to pass
|
||||
int nl_at_eol = -1;
|
||||
for (int j = pos; j < src_len; j++) { // check if rest of line is ws
|
||||
byte b = src[j];
|
||||
switch (b) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Tab: break; // ignore space / tab
|
||||
case Byte_ascii.Nl:
|
||||
nl_at_eol = j;
|
||||
j = src_len;
|
||||
break;
|
||||
default: // something else besides ws; stop
|
||||
j = src_len;
|
||||
break;
|
||||
}
|
||||
if (nl_at_eol != -1)
|
||||
prv_nl_pos = nl_at_eol + 1; // SEE:NOTE_2
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
default: // exit if anything except para / nl in front of [[Category:]]
|
||||
i = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// if (para_found) // BOS exit; just remove prv_ws_bgn
|
||||
prv_ws_bgn = 0;
|
||||
}
|
||||
private void Prv_para_new(Xop_ctx ctx, Xop_root_tkn root, int prv_nl_pos, int para_pos) {
|
||||
this.prv_nl_pos = prv_nl_pos;
|
||||
prv_para = ctx.Tkn_mkr().Para(para_pos);
|
||||
ctx.Subs_add(root, prv_para);
|
||||
}
|
||||
private void Prv_para_end() { // MW: closeParagraph();
|
||||
// following switch is equivalent to:
|
||||
// MW: if ( $this->mLastSection != '' )
|
||||
// MW: $result = '</' . $this->mLastSection . ">\n";
|
||||
switch (cur_mode) {
|
||||
case Mode_none: return;
|
||||
case Mode_pre: prv_para.Para_end_(Xop_para_tkn.Tid_pre); break;
|
||||
case Mode_para: prv_para.Para_end_(Xop_para_tkn.Tid_para); break;
|
||||
}
|
||||
// in_pre = false; // MW: $this->mInPre = false;
|
||||
cur_mode = Mode_none; // MW: $this->mLastSection = '';
|
||||
}
|
||||
private void Prv_para_bgn(byte mode) {
|
||||
if (prv_para != null) prv_para.Para_bgn_(mode);
|
||||
}
|
||||
private void Para_stack_end(int cur_pos) { // MW: $output .= $paragraphStack;
|
||||
switch (para_stack) {
|
||||
case Para_stack_none: break;
|
||||
case Para_stack_bgn: prv_para.Para_end_(Xop_para_tkn.Tid_none).Para_bgn_(Xop_para_tkn.Tid_para); break; // '<p>'
|
||||
case Para_stack_mid: prv_para.Para_end_(Xop_para_tkn.Tid_para).Para_bgn_(Xop_para_tkn.Tid_para); break; // '</p><p>'
|
||||
}
|
||||
}
|
||||
private void Add_br(Xop_ctx ctx, Xop_root_tkn root, int bgn_pos) {
|
||||
ctx.Subs_add(root, ctx.Tkn_mkr().Xnde(bgn_pos, bgn_pos).Tag_(Xop_xnde_tag_.Tag_br));
|
||||
}
|
||||
private boolean Line_is_ws(byte[] src, int pos) {
|
||||
if (prv_nl_pos == -1) return false;
|
||||
boolean ws = true;
|
||||
for (int i = prv_nl_pos + 1; i < pos; i++) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Tab:
|
||||
case Byte_ascii.Space:
|
||||
break;
|
||||
default:
|
||||
ws = false;
|
||||
i = pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ws;
|
||||
}
|
||||
private void Dd_clear(Xop_ctx ctx) {ctx.List().Dd_chk_(false);}
|
||||
private static final int
|
||||
Para_stack_none = 0 // false
|
||||
, Para_stack_bgn = 1 // <p>
|
||||
, Para_stack_mid = 2 // </p><p>
|
||||
;
|
||||
private static final byte
|
||||
Mode_none = 0 // ''
|
||||
, Mode_para = 1 // p
|
||||
, Mode_pre = 2 // pre
|
||||
;
|
||||
}
|
||||
/*
|
||||
NOTE_1:
|
||||
xowa uses \n as the leading character for multi-character hooks; EX: "\n*","\n{|","\n==",etc..
|
||||
For this section of code, xowa treats \n separately from the rest of the hook for the purpose of emulating MW code.
|
||||
EX: a\n==b==
|
||||
MW:
|
||||
- split into two lines: "a", "==b=="
|
||||
- call process_nl on "a"
|
||||
- call process_nl on "==b=="
|
||||
XO:
|
||||
- split into "tkns": "a", "\n==", "b", "=="
|
||||
- add "a"
|
||||
- add "\n=="
|
||||
- since there is a "\n", call process_nl, which will effectively call it for "a"
|
||||
- note that page_end will effectively call process_nl on "==b=="
|
||||
|
||||
NOTE_2: Category needs to "trim" previous line
|
||||
EX:
|
||||
* a
|
||||
* b
|
||||
[[Category:c]]
|
||||
* d
|
||||
|
||||
MW does the following: (REF.MW:Parser.php|replaceInternalLinks2|Strip the whitespace Category links produce;)
|
||||
- removes the \n after b (REF: $s = rtrim( $s . "\n" ); # bug 87)
|
||||
- trims all space " " in front of [[ (NOTE: this makes it a non-pre line)
|
||||
- plucks out the [[Category:c]]
|
||||
- joins everything after ]] (starting with the \n) to the * b (REF: $s .= trim( $prefix . $trail, "\n" ) == '' ? '': $prefix . $trail;)
|
||||
This effectively "blanks" out the entire line "\n [[Category:c]]" -> ""
|
||||
|
||||
XOWA tries to emulate this by doing the following
|
||||
- mark the para_tkn after \b as blank
|
||||
- disable pre for the line
|
||||
- keep the [[Category:c]], but *simulate* a blank line by moving the prv_nl_pos to after the ]]
|
||||
|
||||
NOTE_3: if (last_section_is_pre)
|
||||
PURPOSE: if Category trims previous nl, but nl was part of pre, deactivate it
|
||||
REASON: occurs b/c MW does separate passes for pre and Category while XO does one pass.
|
||||
EX: "a\n [[Category:c]]"
|
||||
- pre is activated by \n\s
|
||||
- [[Category:c]] indicates that \n\s should be trimmed
|
||||
so, disable_pre, etc.
|
||||
|
||||
*/
|
||||
1074
400_xowa/src/gplx/xowa/parsers/paras/Xop_para_wkr_basic_tst.java
Normal file
1074
400_xowa/src/gplx/xowa/parsers/paras/Xop_para_wkr_basic_tst.java
Normal file
File diff suppressed because it is too large
Load Diff
109
400_xowa/src/gplx/xowa/parsers/paras/Xop_para_wkr_para_tst.java
Normal file
109
400_xowa/src/gplx/xowa/parsers/paras/Xop_para_wkr_para_tst.java
Normal file
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_para_wkr_para_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void teardown() {fxt.Init_para_n_();}
|
||||
@Test public void Pre_then_xnde_pre() { // PURPOSE: if ws_pre is in effect, xnde_pre should end it; EX: b:Knowing Knoppix/Other applications
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( " a"
|
||||
, "b<pre>c"
|
||||
, "d</pre>"
|
||||
, "e"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<pre>a"
|
||||
, "</pre>"
|
||||
, "b<pre>c"
|
||||
, "d</pre>"
|
||||
, ""
|
||||
, "<p>e"
|
||||
, "</p>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void List_ignore_pre_lines() { // PURPOSE: "\s\n" should create new list; was continuing previous list; DATE:2013-07-12
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( ": a"
|
||||
, ":* b"
|
||||
, " "
|
||||
, ": c"
|
||||
, ":* d"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dd> a"
|
||||
, ""
|
||||
, " <ul>"
|
||||
, " <li> b"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
, ""
|
||||
, "<dl>"
|
||||
, " <dd> c"
|
||||
, ""
|
||||
, " <ul>"
|
||||
, " <li> d"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Multiple_nl_in_tblx() { // PURPOSE: "\n\n\n" was causing multiple breaks; EX:fr.w:Portail:G<>nie m<>canique; DATE:2014-02-17
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( "<table><tr><td>a"
|
||||
, "</td>"
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, "</tr></table>"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Ignore_cr() { // PURPOSE: handle "\r\n"; EX: Special:MovePage; DATE:2014-03-02
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( "a\r"
|
||||
, "\r"
|
||||
, "b\r"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<p>a"
|
||||
, "</p>"
|
||||
, ""
|
||||
, "<p>b"
|
||||
, "</p>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
258
400_xowa/src/gplx/xowa/parsers/paras/Xop_para_wkr_pre_tst.java
Normal file
258
400_xowa/src/gplx/xowa/parsers/paras/Xop_para_wkr_pre_tst.java
Normal file
@@ -0,0 +1,258 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_para_wkr_pre_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void teardown() {fxt.Init_para_n_();}
|
||||
@Test public void Pre_ignore_bos() { // PURPOSE: ignore pre at bgn; DATE:2013-07-09
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( " "
|
||||
, "b"
|
||||
), String_.Concat_lines_nl
|
||||
( "<p>"
|
||||
, "b"
|
||||
, "</p>"
|
||||
));
|
||||
}
|
||||
@Test public void Pre_ignore_bos_tblw() { // PURPOSE: ignore pre at bgn shouldn't break tblw; EX:commons.wikimedia.org; DATE:2013-07-11
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( " "
|
||||
, "{|"
|
||||
, "|-"
|
||||
, "|a"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void Ignore_bos_xnde() { // PURPOSE: space at bgn shouldn't create pre; EX:commons.wikimedia.org; " <center>a\n</center>"; DATE:2013-11-28
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( " <center>a" // NOTE: leading " " matches MW; DATE:2014-06-23
|
||||
, "</center>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( " <center>a"
|
||||
, "</center>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Ignore_pre_in_gallery() {// PURPOSE: pre in gallery should be ignored; EX:uk.w:EP2; DATE:2014-03-11
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.Y_byte;
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, ""
|
||||
, " <gallery>"
|
||||
, " File:A.png"
|
||||
, " </gallery>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<p>a"
|
||||
, "</p>"
|
||||
, " <ul id=\"xowa_gallery_ul_0\" class=\"gallery mw-gallery-traditional\">"
|
||||
, " <li id=\"xowa_gallery_li_0\" class=\"gallerybox\" style=\"width: 155px\">"
|
||||
, " <div style=\"width: 155px\">"
|
||||
, " <div class=\"thumb\" style=\"width: 150px;\">"
|
||||
, " <div style=\"margin:15px auto;\">"
|
||||
, " <a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\"A.png\" src=\"file:///mem/wiki/repo/trg/thumb/7/0/A.png/120px.png\" width=\"120\" height=\"120\" /></a>"
|
||||
, " </div>"
|
||||
, " </div>"
|
||||
, " <div class=\"gallerytext\">"
|
||||
, " </div>"
|
||||
, " </div>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
,""
|
||||
));
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.N_byte;
|
||||
}
|
||||
@Test public void Pre_xnde_gallery() { // PURPOSE: <gallery> should invalidate pre; EX: en.w:Mary, Queen of Scots
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.Y_byte;
|
||||
fxt.Wiki().Xtn_mgr().Init_by_wiki(fxt.Wiki());
|
||||
String raw = String_.Concat_lines_nl_skip_last
|
||||
( " <gallery>"
|
||||
, "File:A.png|b"
|
||||
, "</gallery>"
|
||||
);
|
||||
fxt.Test_parse_page_wiki_str(raw, String_.Concat_lines_nl_skip_last
|
||||
( " <ul id=\"xowa_gallery_ul_0\" class=\"gallery mw-gallery-traditional\">" // NOTE: leading " " matches MW; DATE:2014-06-23
|
||||
, " <li id=\"xowa_gallery_li_0\" class=\"gallerybox\" style=\"width: 155px\">"
|
||||
, " <div style=\"width: 155px\">"
|
||||
, " <div class=\"thumb\" style=\"width: 150px;\">"
|
||||
, " <div style=\"margin:15px auto;\">"
|
||||
, " <a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\"\" src=\"file:///mem/wiki/repo/trg/thumb/7/0/A.png/120px.png\" width=\"120\" height=\"120\" /></a>"
|
||||
, " </div>"
|
||||
, " </div>"
|
||||
, " <div class=\"gallerytext\"><p>b"
|
||||
, "</p>"
|
||||
, ""
|
||||
, " </div>"
|
||||
, " </div>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.N_byte;
|
||||
}
|
||||
@Test public void Ignore_pre_in_center() {// PURPOSE: pre in gallery should be ignored; EX:uk.w:EP2; DATE:2014-03-11
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, " <center>b"
|
||||
, " </center>"
|
||||
, "d"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<p>a"
|
||||
, "</p>"
|
||||
, " <center>b"
|
||||
, " </center>"
|
||||
, ""
|
||||
, "<p>d"
|
||||
, "</p>"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Remove_only_1st_space() { // PURPOSE: pre should only remove 1st space]; EX: w:Wikipedia:WikiProject_History/CategoryExample; DATE:2014-04-14
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( " a"
|
||||
, " b"
|
||||
, " c"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<pre> a"
|
||||
, " b"
|
||||
, " c"
|
||||
, "</pre>"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Remove_only_1st_space__bos() { // PURPOSE: similar to above but check that pre at \n\s is indented correctly; DATE:2014-04-14
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( ""
|
||||
, " a"
|
||||
, " b"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( ""
|
||||
, "<pre> a"
|
||||
, " b"
|
||||
, "</pre>"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Ignore_tblw_td() {// PURPOSE: \n\s| should continue pre; EX:w:Wikipedia:WikiProject_History/CategoryExample; DATE:2014-04-14
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( " a"
|
||||
, " |"
|
||||
, " b"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<pre>a"
|
||||
, "|"
|
||||
, "b"
|
||||
, "</pre>"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Tab() { // PURPOSE: tab inside pre was being converted to space; PAGE:en.w:Cascading_Style_Sheets DATE:2014-06-23
|
||||
fxt.Test_html_full_str
|
||||
( " \ta"
|
||||
, String_.Concat_lines_nl
|
||||
( "<pre>\ta"
|
||||
, "</pre>"
|
||||
));
|
||||
}
|
||||
@Test public void Style() { // PURPOSE: " <style>" was not being put in pre; PAGE:en.w:Cascading_Style_Sheets DATE:2014-06-23
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl
|
||||
( " <style>"
|
||||
, " </style>"
|
||||
), String_.Concat_lines_nl
|
||||
( "<pre><style>"
|
||||
, "</style>"
|
||||
, "</pre>"
|
||||
));
|
||||
}
|
||||
@Test public void Nl_only() { // PURPOSE: wiki_pre with \n only was being dropped; PAGE:en.w:Preferred_number DATE:2014-06-24
|
||||
fxt.Test_html_full_str(String_.Concat_lines_nl_skip_last
|
||||
( " a"
|
||||
, " " // was being dropped
|
||||
, " b"
|
||||
), String_.Concat_lines_nl
|
||||
( "<pre>a"
|
||||
, "" // make sure it's still there
|
||||
, "b"
|
||||
, "</pre>"
|
||||
));
|
||||
}
|
||||
@Test public void Nl_w_ws() { // PURPOSE: based on Nl_only; make sure that 1 or more spaces does not add extra \n; PAGE:en.w:Preferred_number DATE:2014-06-24
|
||||
fxt.Test_html_full_str(String_.Concat_lines_nl_skip_last
|
||||
( " a"
|
||||
, " " // 2 spaces
|
||||
, " b"
|
||||
), String_.Concat_lines_nl
|
||||
( "<pre>a"
|
||||
, " " // 1 space
|
||||
, "b"
|
||||
, "</pre>"
|
||||
));
|
||||
}
|
||||
@Test public void Nl_many() { // PURPOSE: handle alternating \n\s; PAGE:en.w:Preferred_number DATE:2014-06-24
|
||||
fxt.Test_html_full_str(String_.Concat_lines_nl_skip_last
|
||||
( " a"
|
||||
, " "
|
||||
, " b"
|
||||
, " "
|
||||
, " c"
|
||||
), String_.Concat_lines_nl
|
||||
( "<pre>a"
|
||||
, ""
|
||||
, "b"
|
||||
, ""
|
||||
, "c"
|
||||
, "</pre>"
|
||||
));
|
||||
}
|
||||
@Test public void Source() { // PURPOSE: " <source>" in pre has issues; PAGE:en.w:Comment_(computer_programming) DATE:2014-06-23
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_html_wiki_str(String_.Concat_lines_nl
|
||||
( " "
|
||||
, " <source>"
|
||||
, " a"
|
||||
, " </source>"
|
||||
, " "
|
||||
), String_.Concat_lines_nl
|
||||
( "<p>" // this is wrong, but will be stripped by tidy
|
||||
, "</p>"
|
||||
, " <pre>"
|
||||
, " a"
|
||||
, "</pre>"
|
||||
, ""
|
||||
, "<p><br/>" // also wrong, but leave for now
|
||||
, "</p>"
|
||||
));
|
||||
}
|
||||
@Test public void False_match_xnde() { // PURPOSE: "\s<trk>" being evaluted as "\s<tr>"; PAGE:de.v:Via_Jutlandica/Gpx DATE:2014-11-29
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_html_wiki_str(String_.Concat_lines_nl
|
||||
( ""
|
||||
, " <trk>"
|
||||
), String_.Concat_lines_nl
|
||||
( ""
|
||||
, "<pre><trk>"
|
||||
, "</pre>"
|
||||
));
|
||||
}
|
||||
}
|
||||
92
400_xowa/src/gplx/xowa/parsers/paras/Xop_pre_lxr.java
Normal file
92
400_xowa/src/gplx/xowa/parsers/paras/Xop_pre_lxr.java
Normal file
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.tblws.*;
|
||||
public class Xop_pre_lxr implements Xop_lxr {
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_pre;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Hook_space, this);} // NOTE: do not treat \n\t as shorthand pre; EX:pl.w:Main_Page; DATE:2014-05-06
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if ( !ctx.Para().Enabled() // para disabled; "\n\s" should just be "\n\s"; NOTE: para disabled in <gallery>
|
||||
|| ( ctx.Stack_len() > 0 // bounds check
|
||||
&& ctx.Stack_get_last().Tkn_tid() == Xop_tkn_itm_.Tid_lnki // last tkn is lnki; EX: [[File:A.png|a\n\sb]]; PAGE:s.w:Virus;DATE:2015-03-31
|
||||
)
|
||||
) {
|
||||
if (bgn_pos != Xop_parser_.Doc_bgn_bos) // don't add \n if BOS; EX: "<BOS> a" should be " ", not "\n "
|
||||
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos + 1, Xop_nl_tkn.Tid_char, 1));
|
||||
ctx.Subs_add(root, tkn_mkr.Space(root, cur_pos - 1, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
int txt_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Space); // NOTE: was Find_fwd_while_tab_or_space, which incorrectly converted tabs to spaces; PAGE:en.w:Cascading_Style_Sheets; DATE:2014-06-23
|
||||
if (txt_pos == src_len) return cur_pos; // "\n\s" at EOS; treat as \n only; EX: "a\n " -> ""; also bounds check
|
||||
byte b = src[txt_pos];
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_bos) { // BOS; gobble up all \s\t; EX: "BOS\s\s\sa" -> "BOSa"
|
||||
if (b == Byte_ascii.Nl) { // next char is nl
|
||||
cur_pos = txt_pos; // position at nl; NOTE: do not position after nl, else may break hdr, tblw, list, etc; EX: "\s\n{|" needs to preserve "\n" for tblw
|
||||
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, cur_pos, Xop_ignore_tkn.Ignore_tid_pre_at_bos));
|
||||
return cur_pos; // ignore pre if blank line at bos; EX: "BOS\s\s\n" -> "BOS\n"
|
||||
}
|
||||
if (b == Byte_ascii.Lt) // next char is <; possible xnde; flag so that xnde can escape; DATE:2013-11-28; moved outside Doc_bgn_bos block above; PAGE:en.w:Comment_(computer_programming); DATE:2014-06-23
|
||||
ctx.Xnde().Pre_at_bos_(true);
|
||||
}
|
||||
switch (ctx.Cur_tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: // close tblw attrs; NOTE: after BOS (since no tblw at BOS) but before "\n !" check
|
||||
case Xop_tkn_itm_.Tid_tblw_tr: case Xop_tkn_itm_.Tid_tblw_th:
|
||||
Xop_tblw_wkr.Atrs_close(ctx, src, root, Bool_.N);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_list: // close all lists unless [[Category]]; SEE:NOTE_4; rewritten; DATE:2015-03-31
|
||||
boolean close_all_lists = true;
|
||||
if (Bry_finder.Find_fwd(src, Xop_tkn_.Lnki_bgn, txt_pos, src_len) == txt_pos) { // look for "[["
|
||||
int tmp_pos = txt_pos + Xop_tkn_.Lnki_bgn.length;
|
||||
if (Bry_finder.Find_fwd(src, ctx.Wiki().Ns_mgr().Ns_category().Name_db_w_colon(), tmp_pos, src_len) == tmp_pos) // look for "Category:"
|
||||
close_all_lists = false; // "[[Category:" found; "\n\s[[Category:" should not close list; note that [[Category]] is invisible
|
||||
}
|
||||
if (close_all_lists)
|
||||
Xop_list_wkr_.Close_list_if_present(ctx, root, src, bgn_pos, cur_pos);
|
||||
break;
|
||||
}
|
||||
switch (b) { // handle "\n !" which can be tbl
|
||||
case Byte_ascii.Bang:
|
||||
switch (ctx.Cur_tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: case Xop_tkn_itm_.Tid_tblw_tc: case Xop_tkn_itm_.Tid_tblw_tr:
|
||||
case Xop_tkn_itm_.Tid_tblw_th: case Xop_tkn_itm_.Tid_tblw_td: case Xop_tkn_itm_.Tid_tblw_te:
|
||||
int new_cur_pos = txt_pos + 1; // +1 to skip Byte_ascii.Bang
|
||||
Xop_tblw_lxr_ws.Make(ctx, tkn_mkr, root, src, src_len, bgn_pos, new_cur_pos, Xop_tblw_wkr.Tblw_type_th, true);
|
||||
return new_cur_pos;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return ctx.Para().Process_pre(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, txt_pos);
|
||||
}
|
||||
public static final Xop_pre_lxr _ = new Xop_pre_lxr(); Xop_pre_lxr() {}
|
||||
private static final byte[] Hook_space = new byte[] {Byte_ascii.Nl, Byte_ascii.Space};
|
||||
}
|
||||
/*
|
||||
NOTE_4: Close_all_lists_unless_category; PAGE:en.w:SHA-2
|
||||
PURPOSE: \n should ordinarily close list. However, if \n[[Category:A]], then don't close list since [[Category:A]] will trim preceding \n
|
||||
REASON: occurs b/c MW does separate passes for list and Category while XO does one pass.
|
||||
EX: closes *a list
|
||||
*a
|
||||
|
||||
*b
|
||||
|
||||
EX: does not close
|
||||
*a
|
||||
[[Category:A]]
|
||||
*b
|
||||
*/
|
||||
27
400_xowa/src/gplx/xowa/parsers/paras/Xop_pre_tkn.java
Normal file
27
400_xowa/src/gplx/xowa/parsers/paras/Xop_pre_tkn.java
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_pre_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_pre_tkn(int bgn, int end, byte pre_tid, Xop_tkn_itm pre_bgn_tkn) {
|
||||
this.Tkn_ini_pos(false, bgn, end);
|
||||
this.pre_tid = pre_tid;
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_pre;}
|
||||
public byte Pre_tid() {return pre_tid;} private byte pre_tid = Pre_tid_null;
|
||||
public static final byte Pre_tid_null = 0, Pre_tid_bgn = 1, Pre_tid_end = 2;
|
||||
}
|
||||
135
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_lxr.java
Normal file
135
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_lxr.java
Normal file
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.parsers.paras.*;
|
||||
public class Xop_tblw_lxr implements Xop_lxr {
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_tblw;}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
int rv = Handle_bang(wlxr_type, ctx, ctx.Tkn_mkr(), root, src, src_len, bgn_pos, cur_pos);
|
||||
if (rv != Continue) return rv;
|
||||
rv = Handle_lnki(wlxr_type, ctx, ctx.Tkn_mkr(), root, src, src_len, bgn_pos, cur_pos);
|
||||
if (rv != Continue) return rv;
|
||||
return ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, false, wlxr_type, Xop_tblw_wkr.Called_from_general, -1, -1);
|
||||
}
|
||||
public static final int Continue = -2; // -2 b/c -1 used by Called_from_pre
|
||||
public static int Handle_bang(int wlxr_type, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
// standalone "!" should be ignored if no tblw present; EX: "a b! c" should not trigger ! for header
|
||||
switch (wlxr_type) {
|
||||
case Xop_tblw_wkr.Tblw_type_th: // \n!
|
||||
case Xop_tblw_wkr.Tblw_type_th2: // !!
|
||||
case Xop_tblw_wkr.Tblw_type_td: // \n|
|
||||
Xop_tkn_itm owner_tblw_tb = ctx.Stack_get_typ(Xop_tkn_itm_.Tid_tblw_tb); // check entire stack for tblw; DATE:2014-03-11
|
||||
if ( owner_tblw_tb == null // no tblw in stack; highly probably that current sequence is not tblw tkn
|
||||
|| ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_lnki // cur tid is lnki; PAGE:en.w:Pink_(singer); DATE:2014-06-25
|
||||
) {
|
||||
int lnki_pos = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_lnki);
|
||||
if (lnki_pos != Xop_ctx.Stack_not_found && wlxr_type == Xop_tblw_wkr.Tblw_type_td) {// lnki present;// NOTE: added Xop_tblw_wkr.Tblw_type_td b/c th should not apply when tkn_mkr.Pipe() is called below; DATE:2013-04-24
|
||||
Xop_tkn_itm lnki_tkn = ctx.Stack_pop_til(root, src, lnki_pos, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_tblw_td); // pop any intervening nodes until lnki
|
||||
ctx.Stack_add(lnki_tkn); // push lnki back onto stack; TODO: combine these 2 lines into 1
|
||||
// NOTE: this is a "\n|" inside a [[ ]]; must create two tokens for lnki to build correctly;
|
||||
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos + 1, Xop_nl_tkn.Tid_char, 1));
|
||||
return Xop_pipe_lxr._.Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos); // NOTE: need to call pipe_lxr in order to invalidate if lnki; DATE:2014-06-06
|
||||
}
|
||||
else { // \n| or \n! but no tbl
|
||||
if ( bgn_pos != Xop_parser_.Doc_bgn_bos // avoid ! at BOS
|
||||
&& src[bgn_pos] == Byte_ascii.Nl) // handle "!" etc.
|
||||
return Xop_tblw_wkr.Handle_false_tblw_match(ctx, root, src, bgn_pos, cur_pos, tkn_mkr.Txt(bgn_pos + 1, cur_pos), true); // +1 to ignore \n of "\n!", "\n!!", "\n|"; DATE:2014-02-19
|
||||
else // handle "!!" only
|
||||
return ctx.Lxr_make_txt_(cur_pos);
|
||||
}
|
||||
}
|
||||
if (wlxr_type == Xop_tblw_wkr.Tblw_type_th2) { // !!; extra check to make sure \n! exists; DATE:2014-10-19
|
||||
int prv_th_pos = Bry_finder.Find_bwd(src, Byte_ascii.Nl, bgn_pos); // search for previous \n
|
||||
boolean invalid = prv_th_pos == Bry_finder.Not_found; // no \n; invalid
|
||||
if (!invalid) {
|
||||
++prv_th_pos; // skip \n
|
||||
prv_th_pos = Bry_finder.Find_fwd_while_space_or_tab(src, prv_th_pos, src_len); // skip \s; needed for "\n\s!" which is still a tblw
|
||||
if (prv_th_pos == bgn_pos) // invalid: "\n" is directly in front of "!!"
|
||||
invalid = true;
|
||||
else
|
||||
invalid = src[prv_th_pos] != Byte_ascii.Bang; // invalid if not "\n!"
|
||||
}
|
||||
if (invalid)
|
||||
return Xop_tblw_wkr.Handle_false_tblw_match(ctx, root, src, bgn_pos, cur_pos, tkn_mkr.Txt(bgn_pos, cur_pos), false);
|
||||
}
|
||||
break;
|
||||
}
|
||||
return Continue;
|
||||
}
|
||||
public static int Handle_lnki(int wlxr_type, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
Xop_tkn_itm last_tkn = ctx.Stack_get_last();
|
||||
if ( last_tkn != null
|
||||
&& last_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) {
|
||||
Xop_lnki_tkn lnki = (Xop_lnki_tkn)last_tkn;
|
||||
if ( lnki.Pipe_count_is_zero()) { // 1st pipe; EX: [[A\n|+B]]
|
||||
boolean invalidate = false;
|
||||
switch (wlxr_type) { // tblw found; check if in lnki and validate ttl; DATE:2014-03-29
|
||||
case Xop_tblw_wkr.Tblw_type_tb: // \n{|
|
||||
case Xop_tblw_wkr.Tblw_type_tc: // \n|+
|
||||
case Xop_tblw_wkr.Tblw_type_tr: // \n|-
|
||||
case Xop_tblw_wkr.Tblw_type_te: // \n|}
|
||||
invalidate = true; // always invalidate
|
||||
break;
|
||||
case Xop_tblw_wkr.Tblw_type_td2: // ||; EX: [[A||B]]
|
||||
if (ctx.Tid_is_image_map()) { // if in ImageMap, then treat "||" as "pipe" (not "pipe_text"); note that outer tbl is ignored; EX:w:United_States_presidential_election,_1992
|
||||
ctx.Subs_add(root, tkn_mkr.Pipe(bgn_pos, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
invalidate = !Xop_lnki_wkr_.Parse_ttl(ctx, src, lnki, bgn_pos); // check if invalid; EX: "[[A<||]]" would be invalid b/c of <
|
||||
if (!invalidate) { // "valid" title, but "||" must be converted to pipe inside lnki; EX:cs.w:Main_Page; DATE:2014-05-09
|
||||
ctx.Subs_add(root, tkn_mkr.Pipe(bgn_pos, cur_pos)); // NOTE: technically need to check if pipe or pipe_text; for now, do pipe as pipe_text could break [[File:A.png||20px]]; DATE:2014-05-06
|
||||
return cur_pos;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (invalidate) {
|
||||
ctx.Stack_pop_last();
|
||||
return Xop_lnki_wkr_.Invalidate_lnki(ctx, src, root, lnki, bgn_pos);
|
||||
}
|
||||
}
|
||||
else { // nth pipe; no need to check for invalidate
|
||||
switch (wlxr_type) {
|
||||
case Xop_tblw_wkr.Tblw_type_td2: // ||
|
||||
ctx.Subs_add(root, tkn_mkr.Pipe(bgn_pos, cur_pos));
|
||||
return cur_pos;
|
||||
case Xop_tblw_wkr.Tblw_type_th2: // !!
|
||||
case Xop_tblw_wkr.Tblw_type_th: // !
|
||||
ctx.Subs_add(root, tkn_mkr.Txt(bgn_pos, cur_pos)); // NOTE: cur_pos should handle ! and !!
|
||||
return cur_pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Continue;
|
||||
}
|
||||
public Xop_tblw_lxr(byte wlxr_type) {this.wlxr_type = wlxr_type;} private byte wlxr_type;
|
||||
public static final Xop_tblw_lxr _ = new Xop_tblw_lxr(); Xop_tblw_lxr() {}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {
|
||||
core_trie.Add(Hook_tb, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_tb));
|
||||
core_trie.Add(Hook_te, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_te));
|
||||
core_trie.Add(Hook_tr, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_tr));
|
||||
core_trie.Add(Hook_td, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_td));
|
||||
core_trie.Add(Hook_th, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_th));
|
||||
core_trie.Add(Hook_tc, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_tc));
|
||||
core_trie.Add(Hook_td2, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_td2));
|
||||
core_trie.Add(Hook_th2, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_th2));
|
||||
}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public static final byte[] Hook_tb = Bry_.new_a7("\n{|"), Hook_te = Bry_.new_a7("\n|}"), Hook_tr = Bry_.new_a7("\n|-")
|
||||
, Hook_td = Bry_.new_a7("\n|"), Hook_th = Bry_.new_a7("\n!"), Hook_tc = Bry_.new_a7("\n|+")
|
||||
, Hook_td2 = Bry_.new_a7("||"), Hook_th2 = Bry_.new_a7("!!");
|
||||
}
|
||||
66
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_lxr_ws.java
Normal file
66
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_lxr_ws.java
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_tblw_lxr_ws {
|
||||
public static int Make(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, byte wlxr_type, boolean called_from_pre) {
|
||||
int rv = Xop_tblw_lxr.Handle_bang(wlxr_type, ctx, ctx.Tkn_mkr(), root, src, src_len, bgn_pos, cur_pos);
|
||||
if (rv != Xop_tblw_lxr.Continue) return rv;
|
||||
rv = Xop_tblw_lxr.Handle_lnki(wlxr_type, ctx, ctx.Tkn_mkr(), root, src, src_len, bgn_pos, cur_pos);
|
||||
if (rv != Xop_tblw_lxr.Continue) return rv;
|
||||
if (!called_from_pre) { // skip if called from pre, else will return text, since pre_lxr has not created \n tkn yet; EX: "\n ! a"; DATE:2014-02-14
|
||||
// find first non-ws tkn; check if nl or para
|
||||
int root_subs_len = root.Subs_len();
|
||||
int tkn_idx = root_subs_len - 1;
|
||||
boolean loop = true, nl_found = false;
|
||||
while (loop) {
|
||||
if (tkn_idx < 0) break;
|
||||
Xop_tkn_itm tkn = root.Subs_get(tkn_idx);
|
||||
switch (tkn.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab: // ws: keep moving backwards
|
||||
tkn_idx--;
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_newLine:
|
||||
case Xop_tkn_itm_.Tid_para:
|
||||
loop = false;
|
||||
nl_found = true;
|
||||
break;
|
||||
default:
|
||||
loop = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (tkn_idx == -1) { // bos reached; all tkns are ws;
|
||||
if (wlxr_type == Xop_tblw_wkr.Tblw_type_tb) { // wlxr_type is {|;
|
||||
root.Subs_del_after(0); // trim
|
||||
return ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, false, wlxr_type, Xop_tblw_wkr.Called_from_general, -1, -1); // process {|
|
||||
}
|
||||
else // wlxr_type is something else, but invalid since no containing {|
|
||||
return ctx.Lxr_make_txt_(cur_pos);
|
||||
}
|
||||
|
||||
if (!nl_found && wlxr_type == Xop_tblw_wkr.Tblw_type_td) // | but no nl; return control to pipe_lxr for further processing
|
||||
return Tblw_ws_cell_pipe;
|
||||
if (nl_found)
|
||||
root.Subs_del_after(tkn_idx);
|
||||
}
|
||||
return ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, false, wlxr_type, Xop_tblw_wkr.Called_from_general, -1, -1);
|
||||
}
|
||||
public static final byte[] Hook_tb = Bry_.new_a7("{|"), Hook_te = Bry_.new_a7("|}"), Hook_tr = Bry_.new_a7("|-")
|
||||
, Hook_th = Bry_.new_a7("!"), Hook_tc = Bry_.new_a7("|+");
|
||||
public static final int Tblw_ws_cell_pipe = -1;
|
||||
}
|
||||
37
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_tb_tkn.java
Normal file
37
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_tb_tkn.java
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_tblw_tb_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
|
||||
public Xop_tblw_tb_tkn(int bgn, int end, boolean tblw_xml, boolean auto_created) {
|
||||
this.tblw_xml = tblw_xml; this.Tkn_ini_pos(false, bgn, end);
|
||||
if (auto_created) // auto-created should be marked as having no attributes, else text may get gobbled up incorrectly; EX:Paris#Demographics DATE:2014-03-18
|
||||
atrs_bgn = atrs_end = bgn;
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_tblw_tb;}
|
||||
public int Tblw_tid() {return Xop_xnde_tag_.Tid_table;}
|
||||
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
|
||||
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
|
||||
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
|
||||
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
|
||||
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
|
||||
public void Tblw_xml_(boolean v) {tblw_xml = v;}
|
||||
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
|
||||
public int Caption_count() {return caption_count;} public Xop_tblw_tb_tkn Caption_count_(int v) {caption_count = v; return this;} private int caption_count = 0;
|
||||
public Xop_tblw_tb_tkn Caption_count_add_1() {++caption_count; return this;}
|
||||
public Xop_tblw_tb_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
|
||||
}
|
||||
30
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_tc_tkn.java
Normal file
30
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_tc_tkn.java
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_tblw_tc_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_tblw_tc;}
|
||||
public int Tblw_tid() {return Xop_xnde_tag_.Tid_caption;}
|
||||
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
|
||||
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
|
||||
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
|
||||
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
|
||||
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
|
||||
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
|
||||
public Xop_tblw_tc_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
|
||||
public Xop_tblw_tc_tkn(int bgn, int end, boolean tblw_xml) {this.tblw_xml = tblw_xml; this.Tkn_ini_pos(false, bgn, end);}
|
||||
}
|
||||
30
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_td_tkn.java
Normal file
30
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_td_tkn.java
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_tblw_td_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_tblw_td;}
|
||||
public int Tblw_tid() {return Xop_xnde_tag_.Tid_td;}
|
||||
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
|
||||
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
|
||||
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
|
||||
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
|
||||
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
|
||||
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
|
||||
public Xop_tblw_td_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
|
||||
public Xop_tblw_td_tkn(int bgn, int end, boolean tblw_xml) {this.tblw_xml = tblw_xml; this.Tkn_ini_pos(false, bgn, end);}
|
||||
}
|
||||
30
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_th_tkn.java
Normal file
30
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_th_tkn.java
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_tblw_th_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_tblw_th;}
|
||||
public int Tblw_tid() {return Xop_xnde_tag_.Tid_th;}
|
||||
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
|
||||
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
|
||||
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
|
||||
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
|
||||
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
|
||||
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
|
||||
public Xop_tblw_th_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
|
||||
public Xop_tblw_th_tkn(int bgn, int end, boolean tblw_xml) {this.tblw_xml = tblw_xml; this.Tkn_ini_pos(false, bgn, end);}
|
||||
}
|
||||
27
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_tkn.java
Normal file
27
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_tkn.java
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public interface Xop_tblw_tkn extends Xop_tkn_itm {
|
||||
int Tblw_tid();
|
||||
boolean Tblw_xml();
|
||||
int Tblw_subs_len(); void Tblw_subs_len_add_();
|
||||
int Atrs_bgn();
|
||||
int Atrs_end();
|
||||
void Atrs_rng_set(int bgn, int end);
|
||||
Xop_xatr_itm[] Atrs_ary(); Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v);
|
||||
}
|
||||
34
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_tr_tkn.java
Normal file
34
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_tr_tkn.java
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_tblw_tr_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
|
||||
public Xop_tblw_tr_tkn(int bgn, int end, boolean tblw_xml, boolean auto_created) {
|
||||
this.tblw_xml = tblw_xml; this.Tkn_ini_pos(false, bgn, end);
|
||||
if (auto_created) // auto-created should be marked as having no attributes, else text may get gobbled up incorrectly; EX:Paris#Demographics DATE:2014-03-18
|
||||
atrs_bgn = atrs_end = bgn;
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_tblw_tr;}
|
||||
public int Tblw_tid() {return Xop_xnde_tag_.Tid_tr;}
|
||||
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
|
||||
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
|
||||
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
|
||||
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
|
||||
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
|
||||
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
|
||||
public Xop_tblw_tr_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
|
||||
}
|
||||
551
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_wkr.java
Normal file
551
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_wkr.java
Normal file
@@ -0,0 +1,551 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.paras.*;
|
||||
public class Xop_tblw_wkr implements Xop_ctx_wkr {
|
||||
private int tblw_te_ignore_count = 0;
|
||||
public boolean Cell_pipe_seen() {return cell_pipe_seen;} public Xop_tblw_wkr Cell_pipe_seen_(boolean v) {cell_pipe_seen = v; return this;} private boolean cell_pipe_seen; // status of 1st cell pipe; EX: \n| a | b | c || -> flag pipe between a and b but ignore b and c
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {cell_pipe_seen = false; tblw_te_ignore_count = 0;}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
|
||||
public void AutoClose(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {
|
||||
tkn.Subs_move(root);
|
||||
tkn.Src_end_(cur_pos);
|
||||
}
|
||||
public static final byte Called_from_general = 0, Called_from_list = 1, Called_from_pre = 2;
|
||||
public int Make_tkn_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, boolean tbl_is_xml, byte wlxr_type, byte called_from, int atrs_bgn, int atrs_end) {// REF.MW: Parser|doTableStuff
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_bos) {
|
||||
bgn_pos = 0; // do not allow -1 pos
|
||||
}
|
||||
|
||||
int list_tkn_idx = ctx.Stack_idx_find_but_stop_at_tbl(Xop_tkn_itm_.Tid_list);
|
||||
if ( list_tkn_idx != -1 // list is in effect; DATE:2014-05-05
|
||||
&& !tbl_is_xml // tbl is wiki-syntax; ie: auto-close if "{|" but do not close if "<table>"; DATE:2014-02-05
|
||||
&& called_from != Called_from_list // do not close if called from list; EX: consider "{|"; "* a {|" is called from list_wkr, and should not close; "* a\n{|" is called from tblw_lxr and should close; DATE:2014-02-14
|
||||
) {
|
||||
if (wlxr_type == Tblw_type_td2) { // if in list, treat "||" as lnki, not tblw; EX: es.d:casa; es.d:tres; DATE:2014-02-15
|
||||
ctx.Subs_add(root, ctx.Tkn_mkr().Pipe(bgn_pos, cur_pos)); // NOTE: technically need to check if pipe or pipe_text; for now, do pipe as pipe_text could break [[File:A.png||20px]]; DATE:2014-05-06
|
||||
return cur_pos;
|
||||
}
|
||||
else {
|
||||
Xop_list_wkr_.Close_list_if_present(ctx, root, src, bgn_pos, cur_pos);
|
||||
}
|
||||
}
|
||||
if (ctx.Apos().Stack_len() > 0) // open apos; note that apos keeps its own stack, as they are not "structural" (not sure about this)
|
||||
ctx.Apos().EndFrame(ctx, root, src, cur_pos, true); // close it
|
||||
|
||||
Xop_tblw_tkn prv_tkn = ctx.Stack_get_tbl();
|
||||
if ( prv_tkn == null // prv_tkn not found; i.e.: no earlier "{|" or "<table>"
|
||||
|| ( ctx.Stack_get_tblw_tb() == null // no {| on stack; DATE:2014-05-05
|
||||
&& !tbl_is_xml // and cur is tblw (i.e.: not xnde); DATE:2014-05-05
|
||||
)
|
||||
) {
|
||||
switch (wlxr_type) {
|
||||
case Tblw_type_tb: // "{|";
|
||||
break; // noop; by definition "{|" does not need to have a previous "{|"
|
||||
case Tblw_type_td: // "|"
|
||||
case Tblw_type_td2: // "||"
|
||||
if (tbl_is_xml) { // <td> should automatically add <table><tr>
|
||||
ctx.Subs_add_and_stack_tblw(root, prv_tkn, tkn_mkr.Tblw_tb(bgn_pos, bgn_pos, tbl_is_xml, true));
|
||||
prv_tkn = tkn_mkr.Tblw_tr(bgn_pos, bgn_pos, tbl_is_xml, true);
|
||||
ctx.Subs_add_and_stack_tblw(root, prv_tkn, prv_tkn);
|
||||
break;
|
||||
}
|
||||
else {
|
||||
if (called_from == Called_from_pre)
|
||||
return -1;
|
||||
else { // DATE:2014-02-19; NOTE: do not add nl if ||; DATE:2014-04-14
|
||||
if (wlxr_type == Tblw_type_td) { // "\n|"
|
||||
ctx.Subs_add(root, ctx.Tkn_mkr().NewLine(bgn_pos, bgn_pos + 1, Xop_nl_tkn.Tid_char, 1));
|
||||
ctx.Subs_add(root, ctx.Tkn_mkr().Pipe(bgn_pos + 1, cur_pos));
|
||||
}
|
||||
else // "||"
|
||||
ctx.Subs_add(root, ctx.Tkn_mkr().Pipe(bgn_pos, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
}
|
||||
case Tblw_type_th: // "!"
|
||||
case Tblw_type_th2: // "!!"
|
||||
case Tblw_type_tc: // "|+"
|
||||
case Tblw_type_tr: // "|-"
|
||||
if (tbl_is_xml) { // <tr> should automatically add <table>; DATE:2014-02-13
|
||||
prv_tkn = tkn_mkr.Tblw_tb(bgn_pos, bgn_pos, tbl_is_xml, true);
|
||||
ctx.Subs_add_and_stack_tblw(root, prv_tkn, prv_tkn);
|
||||
break;
|
||||
}
|
||||
else {
|
||||
if (called_from == Called_from_pre)
|
||||
return -1;
|
||||
else
|
||||
return Xop_tblw_wkr.Handle_false_tblw_match(ctx, root, src, bgn_pos, cur_pos, ctx.Tkn_mkr().Txt(bgn_pos + 1, cur_pos), true); // DATE:2014-02-19
|
||||
}
|
||||
case Tblw_type_te: // "|}"
|
||||
if (tblw_te_ignore_count > 0) {
|
||||
--tblw_te_ignore_count;
|
||||
return cur_pos;
|
||||
}
|
||||
else {
|
||||
if (called_from == Called_from_pre)
|
||||
return -1;
|
||||
else
|
||||
return Xop_tblw_wkr.Handle_false_tblw_match(ctx, root, src, bgn_pos, cur_pos, tkn_mkr.Txt(bgn_pos + 1, cur_pos), true); // +1 to skip "\n" in "\n|}" (don't convert \n to text); DATE:2014-02-19
|
||||
}
|
||||
default: throw Exc_.new_unhandled(wlxr_type);
|
||||
}
|
||||
}
|
||||
|
||||
int prv_tid = prv_tkn == null ? Xop_tkn_itm_.Tid_null : prv_tkn.Tkn_tid();
|
||||
if (prv_tkn != null && !prv_tkn.Tblw_xml()) { // note that this logic is same as Atrs_close; repeated here for "perf"
|
||||
switch (prv_tid) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: case Xop_tkn_itm_.Tid_tblw_tr:
|
||||
Atrs_make(ctx, src, root, this, prv_tkn, Bool_.N);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (wlxr_type == Tblw_type_te)
|
||||
return Make_tkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_tblw_te, wlxr_type, prv_tkn, prv_tid, tbl_is_xml);
|
||||
else
|
||||
return Make_tkn_bgn_tblw(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, wlxr_type, tbl_is_xml, atrs_bgn, atrs_end, prv_tkn, prv_tid);
|
||||
}
|
||||
private int Make_tkn_bgn_tblw(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, byte wlxr_type, boolean tbl_is_xml, int atrs_bgn, int atrs_end, Xop_tblw_tkn prv_tkn, int prv_tid) {
|
||||
if (wlxr_type != Tblw_type_tb) // NOTE: do not ignore ws if {|; will cause strange behavior with pre; DATE:2013-02-12
|
||||
Ignore_ws(ctx, root);
|
||||
Xop_tblw_tkn new_tkn = null;
|
||||
switch (wlxr_type) {
|
||||
case Tblw_type_tb: // <table>
|
||||
boolean ignore_prv = false, auto_create = false;
|
||||
switch (prv_tid) {
|
||||
case Xop_tkn_itm_.Tid_null: // noop; <table>
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_td: // noop; <td><table>
|
||||
case Xop_tkn_itm_.Tid_tblw_th: // noop; <th><table>
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: // fix; <table><table> -> <table>; ignore current table; DATE:2014-02-02
|
||||
if (prv_tkn.Tblw_xml()) { // fix: <table><table> -> <table>; earlier tbl is xnde; ignore; EX:en.b:Wikibooks:Featured books; DATE:2014-02-08
|
||||
((Xop_tblw_tb_tkn)prv_tkn).Tblw_xml_(false); // if <table>{|, discard <table>, but mark {| as <table>; needed to handle <table>\n{|\n| where "|" must be treated as tblw dlm; DATE:2014-02-22
|
||||
ignore_prv = true;
|
||||
}
|
||||
// else // fix: <table><table> -> <table><tr><td><table>; earlier tbl is tblw; auto-create; EX:it.w:Main_Page; DATE:2014-02-08; TIDY:depend on tidy to fix; PAGE: it.w:Portal:Animali; DATE:2014-05-31
|
||||
// auto_create = true;
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_tr: // noop: <table><tr><table> -> <table><tr><td><table>; should probably auto-create td, but MW does not; DATE:2014-03-18
|
||||
case Xop_tkn_itm_.Tid_tblw_tc: // noop; <caption><table>; TIDY:was <caption></caption><tr><td><table>; PAGE: es.w:Savilla DATE:2014-06-29
|
||||
break;
|
||||
}
|
||||
if (ignore_prv) {
|
||||
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, cur_pos, Xop_ignore_tkn.Ignore_tid_htmlTidy_tblw));
|
||||
++tblw_te_ignore_count;
|
||||
cur_pos = Bry_finder.Find_fwd_until(src, cur_pos, src_len, Byte_ascii.Nl); // NOTE: minor hack; this tblw tkn will be ignored, so ignore any of its attributes as well; gobble up all chars till nl. see: if two consecutive tbs, ignore attributes on 2nd; en.wikibooks.org/wiki/Wikibooks:Featured books
|
||||
return cur_pos;
|
||||
}
|
||||
if (auto_create) {
|
||||
ctx.Subs_add_and_stack_tblw(root, prv_tkn, tkn_mkr.Tblw_tr(bgn_pos, bgn_pos, tbl_is_xml, true));
|
||||
ctx.Subs_add_and_stack_tblw(root, prv_tkn, tkn_mkr.Tblw_td(bgn_pos, bgn_pos, tbl_is_xml));
|
||||
}
|
||||
Xop_tblw_tb_tkn tb_tkn = tkn_mkr.Tblw_tb(bgn_pos, cur_pos, tbl_is_xml, false);
|
||||
new_tkn = tb_tkn;
|
||||
break;
|
||||
case Tblw_type_tr: // <tr>
|
||||
switch (prv_tid) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: break; // noop; <table><tr>
|
||||
case Xop_tkn_itm_.Tid_tblw_tc: // fix; <caption><tr> -> <caption></caption><tr>
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tc), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_td: // fix; <td><tr> -> <td></td></tr><tr>
|
||||
case Xop_tkn_itm_.Tid_tblw_th: // fix; <th><tr> -> <th></th></tr><tr>
|
||||
if (!tbl_is_xml)
|
||||
ctx.Para().Process_nl(ctx, root, src, bgn_pos, bgn_pos + 1); // simulate "\n"; 2012-12-08
|
||||
int stack_pos = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tr);
|
||||
if (stack_pos != Xop_ctx.Stack_not_found) // don't pop <tr> if none found; PAGE:en.w:Turks_in_Denmark DATE:2014-03-02
|
||||
ctx.Stack_pop_til(root, src, stack_pos, true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_tr: // fix; <tr><tr> -> <tr>
|
||||
if (prv_tkn.Tblw_subs_len() == 0) { // NOTE: set prv_row to ignore, but do not pop; see Tr_dupe_xnde and [[Jupiter]]; only invoke if same type; EX: <tr><tr> but not |-<tr>; DATE:2013-12-09
|
||||
Xop_tkn_itm prv_row = ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tr), false, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
prv_row.Ignore_y_();
|
||||
}
|
||||
else
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tr), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
break;
|
||||
}
|
||||
Xop_tblw_tr_tkn tr_tkn = tkn_mkr.Tblw_tr(bgn_pos, cur_pos, tbl_is_xml, false);
|
||||
new_tkn = tr_tkn;
|
||||
break;
|
||||
case Tblw_type_td: // <td>
|
||||
case Tblw_type_td2:
|
||||
boolean create_th = false;
|
||||
switch (prv_tid) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tr: break; // noop; <tr><td>
|
||||
case Xop_tkn_itm_.Tid_tblw_td: // fix; <td><td> -> <td></td><td>
|
||||
if (!tbl_is_xml) // only for "\n|" not <td>
|
||||
ctx.Para().Process_nl(ctx, root, src, bgn_pos, bgn_pos + 1); // simulate "\n"; DATE:2014-02-20; ru.w:;[[Help:Download]]; DATE:2014-02-20
|
||||
ctx.Para().Process_block__bgn_y__end_n(Xop_xnde_tag_.Tag_td); // <td>
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(prv_tid), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_th: // fix; <th><td> -> <th></th><td>
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(prv_tid), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
if (wlxr_type == Tblw_type_td2) create_th = true; // !a||b -> <th><th>; but !a|b -> <th><td>
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: // fix; <table><td> -> <table><tr><td>
|
||||
if (wlxr_type == Tblw_type_td2) { // NOTE: ignore || if preceded by {|; {|a||b\n
|
||||
prv_tkn.Atrs_rng_set(-1, -1); // reset atrs_bgn; remainder of line will become part of tb atr
|
||||
return cur_pos;
|
||||
}
|
||||
else {
|
||||
new_tkn = tkn_mkr.Tblw_tr(bgn_pos, cur_pos, tbl_is_xml, true);
|
||||
new_tkn.Atrs_rng_set(bgn_pos, bgn_pos);
|
||||
ctx.Subs_add_and_stack_tblw(root, prv_tkn, new_tkn);
|
||||
prv_tid = new_tkn.Tkn_tid();
|
||||
}
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_tc: // fix; <caption><td> -> <caption></caption><tr><td>
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tc), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
new_tkn = tkn_mkr.Tblw_tr(bgn_pos, cur_pos, tbl_is_xml, true);
|
||||
ctx.Subs_add_and_stack_tblw(root, prv_tkn, new_tkn);
|
||||
prv_tid = new_tkn.Tkn_tid();
|
||||
break;
|
||||
}
|
||||
// if (prv_tid == Xop_tkn_itm_.Tid_xnde)
|
||||
// ctx.Stack_auto_close(root, src, prv_tkn, prv_tkn.Src_bgn(), prv_tkn.Src_end());
|
||||
if (create_th) new_tkn = tkn_mkr.Tblw_th(bgn_pos, cur_pos, tbl_is_xml);
|
||||
else new_tkn = tkn_mkr.Tblw_td(bgn_pos, cur_pos, tbl_is_xml);
|
||||
cell_pipe_seen = false;
|
||||
break;
|
||||
case Tblw_type_th: // <th>
|
||||
case Tblw_type_th2:
|
||||
switch (prv_tid) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tr: break; // noop; <tr><th>
|
||||
case Xop_tkn_itm_.Tid_tblw_th: // fix; <th><th> -> <th></th><th>
|
||||
if (tbl_is_xml // tbl_is_xml always closes previous token
|
||||
|| (wlxr_type == Tblw_type_th2 || wlxr_type == Tblw_type_th)) // ! always closes; EX: "! !!"; "!! !!"; REMOVE: 2012-05-07; had (&& !ws_enabled) but caused "\n !" to fail; guard is no longer necessary since tblw_ws changed...
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(prv_tid), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
else {
|
||||
ctx.Subs_add(root, tkn_mkr.Txt(bgn_pos, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_td: // fix; <td><th> -> <td></td><th> NOTE: common use of using <th> after <td> for formatting
|
||||
if (tbl_is_xml // tbl_is_xml always closes previous token
|
||||
|| (wlxr_type == Tblw_type_th)) // "| !" closes; "| !!" does not;
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(prv_tid), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
else {
|
||||
ctx.Subs_add(root, tkn_mkr.Txt(bgn_pos, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: // fix; <table><th> -> <table><tr><th>
|
||||
ctx.Subs_add_and_stack_tblw(root, prv_tkn, tkn_mkr.Tblw_tr(bgn_pos, cur_pos, tbl_is_xml, true));
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_tc: // fix; <caption><th> -> <caption></caption><tr><th>
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tc), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
ctx.Subs_add_and_stack_tblw(root, prv_tkn, tkn_mkr.Tblw_tr(bgn_pos, cur_pos, tbl_is_xml, true));
|
||||
break;
|
||||
}
|
||||
new_tkn = tkn_mkr.Tblw_th(bgn_pos, cur_pos, tbl_is_xml);
|
||||
cell_pipe_seen = false;
|
||||
break;
|
||||
case Tblw_type_tc: // <caption>
|
||||
switch (prv_tid) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: break; // noop; <table><caption>
|
||||
case Xop_tkn_itm_.Tid_tblw_tr: // fix; <tr><caption> -> <tr></tr><caption> TODO: caption should be ignored and placed in quarantine
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tr), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_td: // fix; <td><caption> -> <td></td><caption>
|
||||
case Xop_tkn_itm_.Tid_tblw_th: // fix; <th><caption> -> <th></th><caption>
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tr), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td); // NOTE: closing <tr> in order to close <td>/<th>
|
||||
ctx.Msg_log().Add_itm_none(Xop_tblw_log.Caption_after_td, src, prv_tkn.Src_bgn(), bgn_pos);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_tc: // fix; <caption><caption> -> <caption></caption><caption>
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tc), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
ctx.Msg_log().Add_itm_none(Xop_tblw_log.Caption_after_tc, src, prv_tkn.Src_bgn(), bgn_pos);
|
||||
break;
|
||||
}
|
||||
new_tkn = tkn_mkr.Tblw_tc(bgn_pos, cur_pos, tbl_is_xml);
|
||||
Xop_tblw_tb_tkn tblw_tb_tkn = (Xop_tblw_tb_tkn)ctx.Stack_get_typ(Xop_tkn_itm_.Tid_tblw_tb);
|
||||
tblw_tb_tkn.Caption_count_add_1(); // NOTE: null check is not necessary (impossible to have a caption without a tblw); DATE:2013-12-20
|
||||
cell_pipe_seen = false; // NOTE: always mark !seen; see Atrs_tc()
|
||||
break;
|
||||
}
|
||||
ctx.Subs_add_and_stack_tblw(root, prv_tkn, new_tkn);
|
||||
if (atrs_bgn > Xop_tblw_wkr.Atrs_ignore_check) {
|
||||
new_tkn.Atrs_rng_set(atrs_bgn, atrs_end);
|
||||
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki) {
|
||||
Xop_xatr_itm[] atrs = ctx.App().Xatr_parser().Parse(ctx.Msg_log(), src, atrs_bgn, atrs_end);
|
||||
new_tkn.Atrs_ary_as_tblw_(atrs);
|
||||
}
|
||||
}
|
||||
switch (wlxr_type) {
|
||||
case Tblw_type_tb:
|
||||
case Tblw_type_tr:
|
||||
ctx.Para().Process_block__bgn_y__end_n(Xop_xnde_tag_.Tag_tr);
|
||||
break;
|
||||
case Tblw_type_td:
|
||||
case Tblw_type_th:
|
||||
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_td);
|
||||
break;
|
||||
}
|
||||
return cur_pos;
|
||||
}
|
||||
public int Make_tkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int typeId, byte wlxr_type, Xop_tblw_tkn prv_tkn, int prv_tid, boolean tbl_is_xml) {
|
||||
if (!tbl_is_xml) // only for "\n|}" not </table>
|
||||
ctx.Para().Process_nl(ctx, root, src, bgn_pos, bgn_pos + 1); // simulate "\n"; process para (which will create paras for cells) 2012-12-08
|
||||
if (tbl_is_xml && typeId == Xop_tkn_itm_.Tid_tblw_tb // tblx: </table>
|
||||
&& prv_tkn != null && !prv_tkn.Tblw_xml()) { // tblw is prv_tkn
|
||||
++tblw_te_ignore_count; // suppress subsequent occurrences of "|}"; EX:ru.q:Авель; DATE:2014-02-22
|
||||
}
|
||||
Ignore_ws(ctx, root);
|
||||
if (wlxr_type == Tblw_type_te) {
|
||||
switch (prv_tid) {
|
||||
case Xop_tkn_itm_.Tid_tblw_td: // fix; <td></table> -> <td></td></tr></table>
|
||||
case Xop_tkn_itm_.Tid_tblw_th: // fix; <th></table> -> <th></th></tr></table>
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tr), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_tc: // fix; <caption></table> -> <caption></caption></table>
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tc), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_tr: // fix; <tr></table> -> </table> : tr but no tds; remove tr
|
||||
boolean blank = true;
|
||||
for (int j = prv_tkn.Tkn_sub_idx() + 1; j < root.Subs_len(); j++) {
|
||||
Xop_tkn_itm t = root.Subs_get(j);
|
||||
switch (t.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_newLine:
|
||||
case Xop_tkn_itm_.Tid_para:
|
||||
break;
|
||||
default:
|
||||
blank = false;
|
||||
j = root.Subs_len();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (blank)
|
||||
root.Subs_del_after(prv_tkn.Tkn_sub_idx());
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: // fix; <table></table> -> <table><tr><td></td></tr></table>
|
||||
boolean has_subs = false;
|
||||
for (int i = prv_tkn.Tkn_sub_idx() + 1; i < root.Subs_len(); i++) {
|
||||
int cur_id = root.Subs_get(i).Tkn_tid();
|
||||
switch (cur_id) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tc:
|
||||
case Xop_tkn_itm_.Tid_tblw_td:
|
||||
case Xop_tkn_itm_.Tid_tblw_th:
|
||||
case Xop_tkn_itm_.Tid_tblw_tr:
|
||||
has_subs = true;
|
||||
i = root.Subs_len();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!has_subs) {
|
||||
Xop_tkn_itm new_tkn = tkn_mkr.Tblw_tr(bgn_pos, bgn_pos, tbl_is_xml, true);
|
||||
ctx.Subs_add_and_stack_tblw(root, prv_tkn, new_tkn);
|
||||
new_tkn = tkn_mkr.Tblw_td(bgn_pos, bgn_pos, tbl_is_xml);
|
||||
ctx.Subs_add_and_stack_tblw(root, prv_tkn, new_tkn);
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tb), true, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
return cur_pos;
|
||||
}
|
||||
break;
|
||||
}
|
||||
int tb_idx = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tblw_tb);
|
||||
if (tb_idx == -1) return cur_pos; // NOTE: tb_idx can be -1 when called from Pipe in Tmpl mode
|
||||
Xop_tblw_tb_tkn tb = (Xop_tblw_tb_tkn)ctx.Stack_pop_til(root, src, tb_idx, false, bgn_pos, bgn_pos, Xop_tkn_itm_.Tid_tblw_td); // NOTE: need to pop manually in order to set all intermediate node ends to bgn_pos, but tb ent to cur_pos; EX: for stack of "tb,tr,td" tr and td get End_() of bgn_pos but tb gets End_() of cur_pos
|
||||
tb.Subs_move(root);
|
||||
tb.Src_end_(cur_pos);
|
||||
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_table); // NOTE: must clear block state that was started by <tr>; code implicitly relies on td clearing block state, but no td was created
|
||||
return cur_pos;
|
||||
}
|
||||
int acs_typeId = typeId;
|
||||
if (prv_tid != typeId // NOTE: special logic to handle auto-close of <td></th> or <th></td>
|
||||
&& ( (prv_tid == Xop_tkn_itm_.Tid_tblw_td && typeId == Xop_tkn_itm_.Tid_tblw_th)
|
||||
|| (prv_tid == Xop_tkn_itm_.Tid_tblw_th && typeId == Xop_tkn_itm_.Tid_tblw_td)
|
||||
)
|
||||
)
|
||||
acs_typeId = prv_tid;
|
||||
|
||||
int acs_pos = -1, acs_len = ctx.Stack_len();
|
||||
for (int i = acs_len - 1; i > -1; i--) { // find auto-close pos
|
||||
byte cur_acs_tid = ctx.Stack_get(i).Tkn_tid();
|
||||
switch (acs_typeId) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: // if </table>, match <table> only; note that it needs to be handled separately b/c of tb logic below
|
||||
if (acs_typeId == cur_acs_tid) {
|
||||
acs_pos = i;
|
||||
i = -1; // force break;
|
||||
}
|
||||
break;
|
||||
default: // if </t*>, match <t*> but stop at <table>; do not allow </t*> to close <t*> outside <table>
|
||||
if (cur_acs_tid == Xop_tkn_itm_.Tid_tblw_tb) // <table>; do not allow </t*> to close any <t*>'s above <table>; EX:w:Enthalpy_of_fusion; {{States of matter}}
|
||||
i = -1; // this will skip acs_pos != -1 below and discard token
|
||||
else if (cur_acs_tid == acs_typeId) { // </t*> matches <t*>
|
||||
acs_pos = i;
|
||||
i = -1; // force break
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (acs_pos != -1) {
|
||||
Xop_tblw_tkn bgn_tkn = (Xop_tblw_tkn)ctx.Stack_pop_til(root, src, acs_pos, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_tblw_td);
|
||||
switch (wlxr_type) {
|
||||
case Tblw_type_tb:
|
||||
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_table);
|
||||
break;
|
||||
case Tblw_type_td:
|
||||
case Tblw_type_th:
|
||||
ctx.Para().Process_block__bgn_y__end_n(Xop_xnde_tag_.Tag_td);
|
||||
break;
|
||||
}
|
||||
bgn_tkn.Subs_move(root);
|
||||
bgn_tkn.Src_end_(cur_pos);
|
||||
}
|
||||
return cur_pos;
|
||||
}
|
||||
public static void Atrs_close(Xop_ctx ctx, byte[] src, Xop_root_tkn root, boolean called_from_xnde) {
|
||||
Xop_tblw_tkn prv_tkn = ctx.Stack_get_tbl();
|
||||
if (prv_tkn == null || prv_tkn.Tblw_xml()) return; // no tblw or tblw_xnde (which does not have tblw atrs)
|
||||
switch (prv_tkn.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: case Xop_tkn_itm_.Tid_tblw_tr: // only tb and tr have tblw atrs (EX: "{|id=1\n"); td/th use pipes for atrs (EX: "|id=1|a"); tc has no atrs; te is never on stack
|
||||
Xop_tblw_wkr.Atrs_make(ctx, src, root, ctx.Tblw(), prv_tkn, called_from_xnde);
|
||||
break;
|
||||
}
|
||||
}
|
||||
public static boolean Atrs_make(Xop_ctx ctx, byte[] src, Xop_root_tkn root, Xop_tblw_wkr wkr, Xop_tblw_tkn prv_tblw, boolean called_from_xnde) {
|
||||
if (prv_tblw.Atrs_bgn() != Xop_tblw_wkr.Atrs_null) { // atr_bgn/end is empty or already has explicit value; ignore;
|
||||
if (prv_tblw.Atrs_bgn() == Atrs_invalid_by_xnde) { // atr range marked invalid; ignore all tkns between prv_tblw and end of root; EX:"|-id=1<br/>"; PAGE:en.w:A DATE:2014-07-16
|
||||
for (int j = root.Subs_len() - 1; j > -1; --j) {
|
||||
Xop_tkn_itm sub = root.Subs_get(j);
|
||||
if (sub == prv_tblw)
|
||||
return false;
|
||||
else
|
||||
sub.Ignore_y_();
|
||||
}
|
||||
ctx.App().Usr_dlg().Warn_many("", "", "xnde.invalided attributes could not find previous tkn; page=~{0}", ctx.Page_url_str()); // should never happen; DATE:2014-07-16
|
||||
}
|
||||
return false;
|
||||
}
|
||||
int subs_bgn = prv_tblw.Tkn_sub_idx() + 1, subs_end = root.Subs_len() - 1;
|
||||
int subs_pos = subs_bgn;
|
||||
Xop_tkn_itm last_atr_tkn = null;
|
||||
boolean loop = true;
|
||||
while (loop) { // loop over tkns after prv_tkn to find last_atr_tkn
|
||||
if (subs_pos > subs_end) break;
|
||||
Xop_tkn_itm tmp_tkn = root.Subs_get(subs_pos);
|
||||
switch (tmp_tkn.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_newLine: // nl stops; EX: "{| a b c \nd"; bgn at {| and pick up " a b c " as atrs
|
||||
case Xop_tkn_itm_.Tid_hdr: case Xop_tkn_itm_.Tid_hr: // hdr/hr incorporate nl into tkn so include these as well; EX: "{|a\n==b==" becomes tblw,txt,hdr (note that \n is part of hdr
|
||||
case Xop_tkn_itm_.Tid_list: // list stops; EX: "{| a b c\n* d"; "*d" ends atrs; EX: ru.d: DATE:2014-02-22
|
||||
loop = false;
|
||||
break;
|
||||
default:
|
||||
++subs_pos;
|
||||
last_atr_tkn = tmp_tkn;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (last_atr_tkn == null) { // no atrs found; mark tblw_tkn as Atrs_empty
|
||||
int atr_rng_tid
|
||||
= called_from_xnde
|
||||
&& !prv_tblw.Tblw_xml()
|
||||
&& prv_tblw.Tkn_tid() == Xop_tkn_itm_.Tid_tblw_tr // called from xnde && current tid is Tblw_tr; EX:"|- <br/>" PAGE:en.w:A DATE:2014-07-16
|
||||
? Atrs_invalid_by_xnde // invalidate everything
|
||||
: Atrs_empty
|
||||
;
|
||||
prv_tblw.Atrs_rng_set(atr_rng_tid, atr_rng_tid);
|
||||
return false;
|
||||
}
|
||||
root.Subs_del_between(ctx, subs_bgn, subs_pos);
|
||||
int atrs_bgn = prv_tblw.Src_end(), atrs_end = last_atr_tkn.Src_end();
|
||||
if (prv_tblw.Tkn_tid() == Xop_tkn_itm_.Tid_tblw_tr) // NOTE: if "|-" gobble all trailing dashes; REF: Parser.php!doTableStuff; $line = preg_replace( '#^\|-+#', '', $line ); DATE:2013-06-21
|
||||
atrs_bgn = Bry_finder.Find_fwd_while(src, atrs_bgn, src.length, Byte_ascii.Dash);
|
||||
prv_tblw.Atrs_rng_set(atrs_bgn, atrs_end);
|
||||
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki && atrs_bgn != -1) {
|
||||
Xop_xatr_itm[] atrs = ctx.App().Xatr_parser().Parse(ctx.Msg_log(), src, atrs_bgn, atrs_end);
|
||||
prv_tblw.Atrs_ary_as_tblw_(atrs);
|
||||
}
|
||||
wkr.Cell_pipe_seen_(true);
|
||||
return true;
|
||||
}
|
||||
private void Ignore_ws(Xop_ctx ctx, Xop_root_tkn root) {
|
||||
int end = root.Subs_len() - 1;
|
||||
// get last tr, tc, tb; cannot use ctx.Stack_get_tblw b/c this gets last open tblw, and we want last tblw; EX: "<table><tr></tr>"; Stack_get_tblw gets <table> want </tr>
|
||||
boolean found = false;
|
||||
Xop_tkn_itm prv_tkn = null;
|
||||
for (int i = end; i > -1; i--) {
|
||||
prv_tkn = root.Subs_get(i);
|
||||
switch (prv_tkn.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tr:
|
||||
case Xop_tkn_itm_.Tid_tblw_tc:
|
||||
case Xop_tkn_itm_.Tid_tblw_tb:
|
||||
found = true;
|
||||
i = -1;
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_tblw_td: // exclude td
|
||||
case Xop_tkn_itm_.Tid_tblw_th: // exclude th
|
||||
i = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) return;
|
||||
int bgn = prv_tkn.Tkn_sub_idx() + 1;
|
||||
int rv = Ignore_ws_rng(ctx, root, bgn, end, true);
|
||||
if (rv == -1) return; // entire range is ws; don't bother trimming end
|
||||
Ignore_ws_rng(ctx, root, end, bgn, false);
|
||||
}
|
||||
private int Ignore_ws_rng(Xop_ctx ctx, Xop_root_tkn root, int bgn, int end, boolean fwd) {
|
||||
int cur = bgn, adj = fwd ? 1 : -1;
|
||||
while (true) {
|
||||
if (fwd) {
|
||||
if (cur > end) return -1;
|
||||
}
|
||||
else {
|
||||
if (cur < end) return -1;
|
||||
}
|
||||
Xop_tkn_itm ws_tkn = root.Subs_get(cur);
|
||||
switch (ws_tkn.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab: case Xop_tkn_itm_.Tid_newLine:
|
||||
case Xop_tkn_itm_.Tid_para:
|
||||
ws_tkn.Ignore_y_grp_(ctx, root, cur);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_xnde:
|
||||
if (ws_tkn.Src_bgn() == ws_tkn.Src_end() // NOTE: para_wkr inserts <br/>. these should be disabled in Ignore_ws_rng; they are identified as having bgn == end; normal <br/>s will have bgn < end
|
||||
&& ((Xop_xnde_tkn)ws_tkn).Tag().Id() == Xop_xnde_tag_.Tid_br)
|
||||
ws_tkn.Ignore_y_grp_(ctx, root, cur);
|
||||
break;
|
||||
default:
|
||||
return cur;
|
||||
}
|
||||
cur += adj;
|
||||
}
|
||||
}
|
||||
public static int Handle_false_tblw_match(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int bgn_pos, int cur_pos, Xop_tkn_itm tkn, boolean add_nl) {
|
||||
if (add_nl)
|
||||
ctx.Para().Process_nl(ctx, root, src, bgn_pos, cur_pos);
|
||||
ctx.Subs_add(root, tkn);
|
||||
return cur_pos;
|
||||
}
|
||||
public static final int Atrs_null = -1, Atrs_empty = -2, Atrs_invalid_by_xnde = -3, Atrs_ignore_check = -1;
|
||||
public static final byte Tblw_type_tb = 0, Tblw_type_te = 1, Tblw_type_tr = 2, Tblw_type_td = 3, Tblw_type_th = 4, Tblw_type_tc = 5, Tblw_type_td2 = 6, Tblw_type_th2 = 7;
|
||||
}
|
||||
/*
|
||||
NOTE_1:
|
||||
Code tries to emulate HTML tidy behavior. Specifically:
|
||||
- ignore <table> when directly under <table>
|
||||
- if tblw, scan to end of line to ignore attributes
|
||||
- ignore any closing tblws
|
||||
EX:
|
||||
{|id=1
|
||||
{|id=2 <- ignore id=2
|
||||
|}
|
||||
|}
|
||||
*/
|
||||
212
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_wkr__atrs_tst.java
Normal file
212
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_wkr__atrs_tst.java
Normal file
@@ -0,0 +1,212 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_tblw_wkr__atrs_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Tr() {
|
||||
fxt.Test_parse_page_wiki(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-style='a'"
|
||||
, "|b"
|
||||
, "|}"
|
||||
), fxt.tkn_tblw_tb_(0, 20).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 17).Atrs_rng_(5, 14).Subs_
|
||||
( fxt.tkn_tblw_td_(14, 17).Subs_(fxt.tkn_txt_(16, 17), fxt.tkn_para_blank_(18))
|
||||
))
|
||||
);
|
||||
}
|
||||
@Test public void Td() {
|
||||
fxt.Test_parse_page_wiki(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|style='a'|b"
|
||||
, "|}"
|
||||
), fxt.tkn_tblw_tb_(0, 21).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 18).Subs_
|
||||
( fxt.tkn_tblw_td_(5, 18).Atrs_rng_(7, 16).Subs_(fxt.tkn_txt_(17, 18), fxt.tkn_para_blank_(19))
|
||||
))
|
||||
);
|
||||
}
|
||||
@Test public void Td_mult() {
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|"
|
||||
, " {|"
|
||||
, " |-"
|
||||
, " | id='1'|"
|
||||
, " | id='2'|a"
|
||||
, " | id='3'|"
|
||||
, " |}"
|
||||
, "|}"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " <table>"
|
||||
, " <tr>"
|
||||
, " <td id='1'>"
|
||||
, " </td>"
|
||||
, " <td id='2'>a"
|
||||
, " </td>"
|
||||
, " <td id='3'>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Tc() { // PAGE:en.w:1920_Palm_Sunday_tornado_outbreak
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|id='1'"
|
||||
, "|+id='2'|a"
|
||||
, "|}"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table id='1'>"
|
||||
, " <caption id='2'>a"
|
||||
, " </caption>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Td_mixed() {
|
||||
fxt.Test_parse_page_wiki(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|style='a'|b||c"
|
||||
, "|}"
|
||||
), fxt.tkn_tblw_tb_(0, 24).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 21).Subs_
|
||||
( fxt.tkn_tblw_td_( 5, 18).Atrs_rng_(7, 16).Subs_(fxt.tkn_txt_(17, 18), fxt.tkn_para_blank_(19))
|
||||
, fxt.tkn_tblw_td_(18, 21).Subs_(fxt.tkn_txt_(20, 21), fxt.tkn_para_blank_(22))
|
||||
))
|
||||
);
|
||||
}
|
||||
@Test public void Th() {
|
||||
fxt.Test_parse_page_wiki(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "!style='a'|b"
|
||||
, "|}"
|
||||
), fxt.tkn_tblw_tb_(0, 21).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 18).Subs_
|
||||
( fxt.tkn_tblw_th_(5, 18).Atrs_rng_(7, 16).Subs_(fxt.tkn_txt_(17, 18), fxt.tkn_para_blank_(19))
|
||||
))
|
||||
);
|
||||
}
|
||||
@Test public void Skip_hdr() {
|
||||
fxt.Test_parse_page_wiki(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|+b"
|
||||
, "!style='a'|b"
|
||||
, "|}"
|
||||
), fxt.tkn_tblw_tb_(0, 22).Caption_count_(1).Subs_
|
||||
( fxt.tkn_tblw_tc_(2, 6).Subs_(fxt.tkn_txt_( 5, 6))
|
||||
, fxt.tkn_tblw_tr_(6, 19).Subs_
|
||||
( fxt.tkn_tblw_th_(6, 19).Atrs_rng_(8, 17).Subs_(fxt.tkn_txt_(18, 19), fxt.tkn_para_blank_(20))
|
||||
)
|
||||
));
|
||||
}
|
||||
@Test public void Td_bg_color() { // PURPOSE: atr_parser should treat # as valid character in unquoted val; PAGE:en.w:UTF8; |bgcolor=#eeeeee|<small>Indic</small><br/><small>0800*</small><br/>'''''224'''''
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|bgcolor=#eeeeee|a"
|
||||
, "|}"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td bgcolor=\"#eeeeee\">a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Xnde_tb() { // PURPOSE: xnde should close any open xatrs; PAGE:en.w:Western_Front_(World_War_I); stray > after == Dramatizations ==
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|id='1'<p></p>"
|
||||
, "|a"
|
||||
, "|}"), String_.Concat_lines_nl_skip_last
|
||||
( "<table id='1'><p></p>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Xnde_tr() { // PURPOSE: xnde should disable all tkns; PAGE:en.w:A DATE:2014-07-16
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-<b>c</b>id='d'<br/>" // note that id='d' should not show up since <b> invalidates entire line
|
||||
, "|a"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void Xnde_mix_tblw_tblx() { // PURPOSE: issue with </tr> somehow rolling up everything after <td>; PAGE:en.w:20th_century; {{Decades and years}}
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table><tr><td>a"
|
||||
, "{|id=1"
|
||||
, "|-"
|
||||
, "|b"
|
||||
, "|}</td></tr></table>"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " <table id=\"1\">"
|
||||
, " <tr>"
|
||||
, " <td>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,823 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_tblw_wkr__basic_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Td() { // Tb_tr_td_te
|
||||
fxt.Test_parse_page_wiki("{|\n|-\n|a\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 11).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 8).Subs_
|
||||
( fxt.tkn_tblw_td_(5, 8).Subs_(fxt.tkn_txt_(7, 8), fxt.tkn_para_blank_(9))))
|
||||
);
|
||||
}
|
||||
@Test public void Td2() { // Tb_tr_td_td2_te
|
||||
fxt.Test_parse_page_wiki("{|\n|-\n|a||b\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 14).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 11).Subs_
|
||||
( fxt.tkn_tblw_td_(5, 8).Subs_(fxt.tkn_txt_( 7, 8), fxt.tkn_para_blank_(9))
|
||||
, fxt.tkn_tblw_td_(8, 11).Subs_(fxt.tkn_txt_(10, 11), fxt.tkn_para_blank_(12))
|
||||
)));
|
||||
}
|
||||
@Test public void Tc() { // Tb_tc_te
|
||||
fxt.Test_parse_page_wiki("{|\n|+a\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 9).Caption_count_(1).Subs_
|
||||
( fxt.tkn_tblw_tc_(2, 6).Subs_
|
||||
( fxt.tkn_txt_(5, 6)
|
||||
, fxt.tkn_para_blank_(7)
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Tc_longer() { // Tb_tc_tr_td_te
|
||||
fxt.Test_parse_page_wiki("{|\n|+a\n|-\n|b\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 15).Caption_count_(1).Subs_
|
||||
( fxt.tkn_tblw_tc_(2, 6).Subs_(fxt.tkn_txt_(5, 6))
|
||||
, fxt.tkn_tblw_tr_(6, 12).Subs_
|
||||
( fxt.tkn_tblw_td_(9, 12).Subs_(fxt.tkn_txt_(11, 12), fxt.tkn_para_blank_(13))
|
||||
)
|
||||
));
|
||||
}
|
||||
@Test public void Th() { // Tb_th_te
|
||||
fxt.Test_parse_page_wiki("{|\n|-\n!a\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 11).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 8).Subs_
|
||||
( fxt.tkn_tblw_th_(5, 8).Subs_(fxt.tkn_txt_(7, 8), fxt.tkn_para_blank_(9))
|
||||
)));
|
||||
}
|
||||
@Test public void Th2() { // Tb_th_th2_te
|
||||
fxt.Test_parse_page_wiki("{|\n|-\n!a!!b\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 14).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 11).Subs_
|
||||
( fxt.tkn_tblw_th_(5, 8).Subs_(fxt.tkn_txt_( 7, 8))
|
||||
, fxt.tkn_tblw_th_(8, 11).Subs_(fxt.tkn_txt_(10, 11), fxt.tkn_para_blank_(12))
|
||||
)));
|
||||
}
|
||||
@Test public void Th2_td_syntax() { // Tb_th_td; || should be treated as th
|
||||
fxt.Test_parse_page_wiki("{|\n|-\n!a||b\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 14).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 11).Subs_
|
||||
( fxt.tkn_tblw_th_(5, 8).Subs_(fxt.tkn_txt_( 7, 8))
|
||||
, fxt.tkn_tblw_th_(8, 11).Subs_(fxt.tkn_txt_(10, 11), fxt.tkn_para_blank_(12))
|
||||
)));
|
||||
}
|
||||
@Test public void Tb_td2() { // PAGE:en.w:Hectare; {| class="wikitable" || style="border: 1px solid #FFFFFF;"
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|id='1' || class='a'"
|
||||
, "|-"
|
||||
, "|a"
|
||||
, "|}")
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table id='1' class='a'>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Td_lnki() {
|
||||
fxt.Test_parse_page_wiki("{|\n|-\n|[[a|b]]\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 17).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 14).Subs_
|
||||
( fxt.tkn_tblw_td_(5, 14).Subs_(fxt.tkn_lnki_(7, 14), fxt.tkn_para_blank_(15))))
|
||||
);
|
||||
}
|
||||
@Test public void Tr_dupe_xnde() { // PURPOSE: redundant tr should not be dropped; see [[Jupiter]]
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "<tr><td>a</td></tr>"
|
||||
, "|-"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Tr_dupe_xnde_2() { // <td></th> causes problems
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "<tr><th>a</td></tr>"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <th>a"
|
||||
, " </th>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Bang_should_not_make_cell_td_1_bang() { // PURPOSE: "| a! b" ! should not separate cell
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last("{|", "|-", "|a!b", "|}"), String_.Concat_lines_nl_skip_last("<table>", " <tr>", " <td>a!b" , " </td>", " </tr>", "</table>", ""));
|
||||
}
|
||||
@Test public void Bang_should_not_make_cell_td_2_bang() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last("{|", "|-", "|a!!b", "|}"), String_.Concat_lines_nl_skip_last("<table>", " <tr>", " <td>a!!b" , " </td>", " </tr>", "</table>", ""));
|
||||
}
|
||||
@Test public void Bang_should_not_make_cell_th_1_bang() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last("{|", "|-", "!a!b", "|}"), String_.Concat_lines_nl_skip_last("<table>", " <tr>", " <th>a!b" , " </th>", " </tr>", "</table>", ""));
|
||||
}
|
||||
@Test public void Bang_should_not_make_cell_th_2_bang() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last("{|", "|-", "!a!!b", "|}")
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <th>a"
|
||||
, " </th>"
|
||||
, " <th>b"
|
||||
, " </th>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Bang_should_not_make_cell_th_mult_line() { // FIX: make sure code does not disable subsequent bangs
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last("{|", "|-", "!a", "!b", "|}")
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <th>a"
|
||||
, " </th>"
|
||||
, " <th>b"
|
||||
, " </th>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Fix_extra_cell() { // PURPOSE: trim should not affect td; WP:Base32
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "!id='1'|a"
|
||||
, "|"
|
||||
, "!id='2'|b"
|
||||
, "|-"
|
||||
, "|a1|| ||b1"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <th id='1'>a"
|
||||
, " </th>"
|
||||
, " <td>"
|
||||
, " </td>"
|
||||
, " <th id='2'>b"
|
||||
, " </th>"
|
||||
, " </tr>"
|
||||
, " <tr>"
|
||||
, " <td>a1"
|
||||
, " </td>"
|
||||
, " <td> "
|
||||
, " </td>"
|
||||
, " <td>b1"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Nl_td() { // PURPOSE: <p> inside <td> does not get enclosed
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, "<tr>"
|
||||
, "<td>"
|
||||
, ""
|
||||
, ""
|
||||
, "a"
|
||||
, ""
|
||||
, ""
|
||||
, "</td>"
|
||||
, "</tr>"
|
||||
, "</table>"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, ""
|
||||
, "<p><br/>"
|
||||
, "a"
|
||||
, "</p>"
|
||||
, ""
|
||||
, "<p><br/>"
|
||||
, "</p>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Trim_ws() { // PURPOSE: trim should be done from both sides
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, "<tr>"
|
||||
, "<td>"
|
||||
, "</td>"
|
||||
, "</tr>"
|
||||
, ""
|
||||
, ""
|
||||
, "a"
|
||||
, ""
|
||||
, ""
|
||||
, "</table>"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "a"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Trim_ws_tr() { // PURPOSE: trim should be done from both sides
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, "<tr>"
|
||||
, "<td>"
|
||||
, "</td>"
|
||||
, "</tr>"
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, "<tr>"
|
||||
, "<td>"
|
||||
, "</td>"
|
||||
, "</tr>"
|
||||
, "</table>"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Trim_ws_td() { // PURPOSE: trim should not affect td
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, "<tr>"
|
||||
, "<td>"
|
||||
, ""
|
||||
, ""
|
||||
, "a"
|
||||
, ""
|
||||
, ""
|
||||
, "</td>"
|
||||
, "</tr>"
|
||||
, "</table>"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, ""
|
||||
, "<p><br/>"
|
||||
, "a"
|
||||
, "</p>"
|
||||
, ""
|
||||
, "<p><br/>"
|
||||
, "</p>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void No_wiki_3() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|style=<nowiki>'a[b]c'</nowiki>|d"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td style='a[b]c'>d"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Trailing_tr_breaks_para_mode() {// PURPOSE.fix: empty trailing tr breaks para mode; EX:w:Sibelius
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|a"
|
||||
, "|-" // causes lines below not to be put in paras
|
||||
, "|}"
|
||||
, "b"
|
||||
, ""
|
||||
, "c"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
, "<p>b"
|
||||
, "</p>"
|
||||
, ""
|
||||
, "<p>c"
|
||||
, "</p>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Blank_line_should_be_own_para() {// PURPOSE.fix: caption does not begin on own line; EX:w:Old St. Peter's Basilica
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|a"
|
||||
, "b"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, ""
|
||||
, "<p>b"
|
||||
, "</p>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Blank_line_should_be_own_para_2() {// PURPOSE.fix: caption does not begin on own line; EX:w:Old St. Peter's Basilica
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|a"
|
||||
, "b"
|
||||
, "|-"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, ""
|
||||
, "<p>b"
|
||||
, "</p>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Bold_stops_at_table() { // PURPOSE: do not allow unclosed bold to extend over tables;
|
||||
fxt.Test_parse_page_all_str("'''<table><tr><td>a</td></tr></table>", String_.Concat_lines_nl_skip_last
|
||||
( "<b></b>"
|
||||
, "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_defn_clear();
|
||||
}
|
||||
@Test public void Orphaned_tr_breaks_nested_tables() { // PUPRPOSE: </tr> should not match <tr> outside scope; EX:w:Enthalpy_of_fusion; {{States of matter}}
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, "<tr>"
|
||||
, "<td>"
|
||||
, "<table>"
|
||||
, "</tr>"
|
||||
, "</table>"
|
||||
, "</td>"
|
||||
, "<td>a"
|
||||
, "</td>"
|
||||
, "</tr>"
|
||||
, "</table>"
|
||||
),
|
||||
String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " <table>"
|
||||
, " </table>"
|
||||
, " </td>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Space_causes_extra_p() {// PURPOSE: "\n\s</td>" should be equivalent to "\n</td>"; EX: w:Earth
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table><tr><td>"
|
||||
, "b"
|
||||
, "<br/>c"
|
||||
, " </td></tr></table>"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, ""
|
||||
, "<p>b" // used to close <p> here; <p>b</p>
|
||||
, "<br/>c"
|
||||
, "</p>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Br_should_not_be_ignored() {// PURPOSE: document <br />'s should not be ignored between tables; 20121226
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|a"
|
||||
, "|}"
|
||||
, "<br />"
|
||||
, "{|"
|
||||
, "|-"
|
||||
, "|b"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, "<br />" // was being ignored
|
||||
, "<table>"
|
||||
, " <tr>"
|
||||
, " <td>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void AutoClose_td_when_new_tr() { // retain; needed for de.w:Main_Page; DATE:2013-12-09
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "==a=="
|
||||
, "|}"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, ""
|
||||
, "<h2>a</h2>" // NOTE: malformed html matches MW
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
fxt.Test_parse_page_wiki("{|\n==b==\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 8).Subs_
|
||||
( fxt.tkn_hdr_(2, 8, 2).Subs_
|
||||
( fxt.tkn_txt_(5, 6)
|
||||
)
|
||||
, fxt.tkn_para_blank_(9)
|
||||
, fxt.tkn_tblw_tr_(8, 8).Subs_
|
||||
( fxt.tkn_tblw_td_( 8, 8))
|
||||
));
|
||||
}
|
||||
@Test public void Auto_create_table() {// PURPOSE: <td> should create table; EX:w:Hatfield-McCoy_feud; DATE:20121226
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "<td>a"
|
||||
, "</td>"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void List_and_orphaned_td2_should_not_create_tblw() {// PURPOSE: !! was creating table; DATE:2013-04-28
|
||||
fxt.Test_parse_page_all_str("*a !! b", String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a !! b"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Tr_trailing_dashes_should_be_stripped() {// PURPOSE: trailing dashes should be stripped; |--- -> |-; EX: |--style="x" was being ignored; DATE:2013-06-21
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-----style='a'"
|
||||
, "|b"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr style='a'>"
|
||||
, " <td>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void Th_without_tr() { // PURPOSE: !! without preceding ! should not create table-cell; DATE:2013-12-18
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|"
|
||||
, "a!!b"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, "a!!b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void Td_at_eos() {// PURPOSE.fix: !! at eos fails; EX:es.s:Si_mis_manos_pudieran_deshojar; DATE:2014-02-11
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "| <poem>!!</poem>" // note that "!!" is eos inside the <poem> src
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td> <div class=\"poem\">"
|
||||
, "<p>"
|
||||
, "!!"
|
||||
, "</p>"
|
||||
, "</div>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void Tr_without_tb_should_start_tb() {// PURPOSE: orphaned tr should automatically start table; EX: pl.w:Portal:Technika; DATE:2014-02-13
|
||||
fxt.Test_parse_page_all_str("<tr><td>a"
|
||||
, String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void Tblx_should_not_close_tblw() {// PURPOSE: </table> should not close {|; EX:fr.w:Exp%C3%A9dition_Endurance; DATE:2014-02-13
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|"
|
||||
, "</table>"
|
||||
, "|}"
|
||||
)
|
||||
, String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void Tblx_should_not_close_tblw_2() {// PURPOSE: </table> should close {|; ignore latter |}; EX:ru.q:Авель; DATE:2014-02-22
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|a"
|
||||
, "</table>"
|
||||
, "{|"
|
||||
, "|-"
|
||||
, "|b"
|
||||
, "</table>"
|
||||
, "{|"
|
||||
, "|-"
|
||||
, "|c"
|
||||
, "</table>"
|
||||
, "|}"
|
||||
)
|
||||
, String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, "<table>"
|
||||
, " <tr>"
|
||||
, " <td>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, "<table>"
|
||||
, " <tr>"
|
||||
, " <td>c"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void Td_in_list_in_tblw_should_be_ignored() {// PURPOSE: || should be ignored if in list; EX:es.d:casa; DATE:2014-02-15
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|"
|
||||
, "* a || b"
|
||||
, "|}"
|
||||
)
|
||||
, String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " <ul>"
|
||||
, " <li> a || b"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void List_in_tblw() {// PURPOSE: list should close previous cell; EX: ru.d:Викисловарь:Условные_сокращения; DATE:2014-02-22
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|"
|
||||
, "{|"
|
||||
, "*a"
|
||||
, "|}"
|
||||
, "|}"
|
||||
)
|
||||
, String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " <table>"
|
||||
, " <ul>" // NOTE: this should probably be inside <tr>, but this matches MW behavior; DATE:2014-02-22
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
}
|
||||
// @Test public void Tb_under_tr_is_ignored() { // PURPOSE: table directly under tr is ignored; PAGE:en.w:Category:Dessert stubs; TODO: complicated, especially to handle 2nd |}
|
||||
// fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
// ( "{|"
|
||||
// , "|-id='a'"
|
||||
// , "{|style='border:1px;'"
|
||||
// , "|-id='b'"
|
||||
// , "|b"
|
||||
// , "|}"
|
||||
// , "|}"
|
||||
// ), String_.Concat_lines_nl_skip_last
|
||||
// ( "<table>"
|
||||
// , " <tr id=\"b\">"
|
||||
// , " <td>b"
|
||||
// , " </td>"
|
||||
// , " </tr>"
|
||||
// , "</table>"
|
||||
// , ""
|
||||
// ));
|
||||
// }
|
||||
// @Test public void Leading_ws() { // PAGE:en.w:Corneal dystrophy (human)
|
||||
// fxt.Test_parse_page_wiki(String_.Concat_lines_nl_skip_last
|
||||
// ( " {|"
|
||||
// , " |-"
|
||||
// , " |a"
|
||||
// , " |}"
|
||||
// )
|
||||
// , fxt.tkn_tblw_tb_(1, 15).Subs_
|
||||
// ( fxt.tkn_tblw_tr_(3, 11).Subs_
|
||||
// ( fxt.tkn_tblw_td_(7, 11).Subs_
|
||||
// ( fxt.tkn_txt_())
|
||||
// )
|
||||
// )
|
||||
// );
|
||||
// }
|
||||
// @Test public void Atrs_tb() { // Tb_te // FUTURE: reinstate; WHEN: Template
|
||||
// fxt.Init_log_(Xop_tblw_log.Tbl_empty).Test_parse_page_wiki("{|style='a'\n|}"
|
||||
// , fxt.tkn_tblw_tb_(0, 14).Atrs_rng_(2, 11).Subs_
|
||||
// ( fxt.tkn_tblw_tr_(11, 11).Subs_
|
||||
// ( fxt.tkn_tblw_td_(11, 11)
|
||||
// )));
|
||||
// }
|
||||
// @Test public void Td_p() { // PURPOSE: <p> not being closed correctly
|
||||
// fxt.Init_para_y_();
|
||||
// fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
// ( "{|"
|
||||
// , "|-"
|
||||
// , "|"
|
||||
// , "a"
|
||||
// , "|}"), String_.Concat_lines_nl_skip_last
|
||||
// ( "<table>"
|
||||
// , " <tr>"
|
||||
// , " <td>"
|
||||
// , ""
|
||||
// , "<p>a"
|
||||
// , "</p>"
|
||||
// , " </td>"
|
||||
// , " </tr>"
|
||||
// , "</table>"
|
||||
// , ""
|
||||
// ));
|
||||
// fxt.Init_para_n_();
|
||||
// }
|
||||
// @Test public void Tb_tb() {
|
||||
// fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
// ( "{|id='1'"
|
||||
// , "{|id='2'"
|
||||
// , "|-id='3'"
|
||||
// , "|a"
|
||||
// , "|}"
|
||||
// , "|}"), String_.Concat_lines_nl_skip_last
|
||||
// ( "<table id='1'>"
|
||||
// , " <tr id='3'>"
|
||||
// , " <td>a"
|
||||
// , " </td>"
|
||||
// , " </tr>"
|
||||
// , "</table>"
|
||||
// , ""
|
||||
// ));
|
||||
// }
|
||||
// @Test public void Tb_tb_2() {
|
||||
// fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
// ( "{|id='1'"
|
||||
// , "{|id='2' <table id='3'>"
|
||||
// , "|a"
|
||||
// , "</table>"
|
||||
// , "|}"
|
||||
// , "|}"), String_.Concat_lines_nl_skip_last
|
||||
// ( "<table id='1'>"
|
||||
// , " <tr id='3'>"
|
||||
// , " <td>a"
|
||||
// , " </td>"
|
||||
// , " </tr>"
|
||||
// , "</table>"
|
||||
// , ""
|
||||
// ));
|
||||
// }
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_tblw_wkr__dangling_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Dangling_tb_in_xnde() {// PURPOSE: dangling tblw incorrectly auto-closed by </xnde>; PAGE:en.w:Atlanta_Olympics; DATE:2014-03-18
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "<div align='center'>"
|
||||
, "{|"
|
||||
, "|-"
|
||||
, "|"
|
||||
, "{|"
|
||||
, "|-"
|
||||
, "|a"
|
||||
, "|}"
|
||||
, "</div>"
|
||||
, "b"
|
||||
)
|
||||
, String_.Concat_lines_nl
|
||||
( "<div align='center'>"
|
||||
, "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " <table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
, "</div>" // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
|
||||
, ""
|
||||
, "<p>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, "</div>"
|
||||
, "</p>"
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_tblw_wkr__double_pipe_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void No_tblw() { // PURPOSE: if || has no tblw, treat as lnki; none; DATE:2014-05-06
|
||||
fxt.Test_parse_page_all_str("[[A||b|c]]", String_.Concat_lines_nl_skip_last
|
||||
( "<p><a href=\"/wiki/A\">b|c</a>" // NOTE: technically this should be "|b|c", but difficult to implement; DATE:2014-05-06
|
||||
, "</p>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Lnki_nth() { // PURPOSE: if || is nth pipe, then treat as lnki; PAGE:en.w:Main_Page;de.w:Main_Page; DATE:2014-05-06
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|[[File:A.png|b||c]]"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td><a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\"c\" src=\"file:///mem/wiki/repo/trg/orig/7/0/A.png\" width=\"0\" height=\"0\" /></a>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Lnki_list_1st() { // PURPOSE: if || is 1st pipe, but inside list, then treat as lnki; EX:w:Second_Boer_War; DATE:2014-05-05
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|"
|
||||
, "*[[A||b]]"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, ""
|
||||
, " <ul>"
|
||||
, " <li><a href=\"/wiki/A\">b</a>" // NOTE: technically this should be "|b", but difficult to implement; DATE:2014-05-06
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Double_bang_lnki() { // PURPOSE: do not treat !! as tblw; PAGE:en.w:Pink_(singer); DATE:2014-06-25
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|"
|
||||
, "[[A!!b]]"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, ""
|
||||
, "<p><a href=\"/wiki/A!!b\">A!!b</a>"
|
||||
, "</p>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Double_bang_list() { // PURPOSE: do not treat !! as tblw; PAGE:en.w:Wikipedia:Featured_picture_candidates; DATE:2014-10-19
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "* a !! b"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <ul>"
|
||||
, " <li> a !! b"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, "</p>" // NOTE: </p> is incorrect, but benign
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_tblw_wkr__errs_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Err_row_empty() {
|
||||
fxt.Test_parse_page_wiki("{|\n|-\n|-\n|a\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 14).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 5)
|
||||
, fxt.tkn_tblw_tr_(5, 11).Subs_
|
||||
( fxt.tkn_tblw_td_(8, 11).Subs_(fxt.tkn_txt_(10, 11), fxt.tkn_para_blank_(12))
|
||||
))
|
||||
);
|
||||
}
|
||||
@Test public void Err_row_trailing() {
|
||||
fxt.Test_parse_page_wiki("{|\n|-\n|a\n|-\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 14).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 8).Subs_
|
||||
( fxt.tkn_tblw_td_(5, 8).Subs_(fxt.tkn_txt_(7, 8), fxt.tkn_para_blank_(9))
|
||||
))
|
||||
);
|
||||
}
|
||||
@Test public void Err_caption_after_tr() {
|
||||
fxt.Test_parse_page_wiki("{|\n|-\n|+a\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 12).Caption_count_(1).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 5)
|
||||
, fxt.tkn_tblw_tc_(5, 9).Subs_(fxt.tkn_txt_(8, 9), fxt.tkn_para_blank_(10)))
|
||||
);
|
||||
}
|
||||
@Test public void Err_caption_after_td() {
|
||||
fxt.Init_log_(Xop_tblw_log.Caption_after_td).Test_parse_page_wiki("{|\n|-\n|a\n|+b\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 15).Caption_count_(1).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 8).Subs_
|
||||
( fxt.tkn_tblw_td_(5, 8).Subs_(fxt.tkn_txt_(7, 8)))
|
||||
, fxt.tkn_tblw_tc_(8, 12).Subs_(fxt.tkn_txt_(11, 12), fxt.tkn_para_blank_(13)))
|
||||
);
|
||||
}
|
||||
@Test public void Err_caption_after_tc() {
|
||||
fxt.Init_log_(Xop_tblw_log.Caption_after_tc).Test_parse_page_wiki("{|\n|+a\n|+b\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 13).Caption_count_(2).Subs_
|
||||
( fxt.tkn_tblw_tc_(2, 6).Subs_(fxt.tkn_txt_( 5, 6))
|
||||
, fxt.tkn_tblw_tc_(6, 10).Subs_(fxt.tkn_txt_( 9, 10), fxt.tkn_para_blank_(11)))
|
||||
);
|
||||
}
|
||||
@Test public void Err_row_auto_opened() {
|
||||
fxt.Test_parse_page_wiki("{|\n|a\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 8).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 5).Subs_
|
||||
( fxt.tkn_tblw_td_(2, 5).Subs_(fxt.tkn_txt_(4, 5), fxt.tkn_para_blank_(6))
|
||||
)));
|
||||
}
|
||||
@Test public void Err_caption_auto_closed() {
|
||||
fxt.Test_parse_page_wiki("{|\n|+a\n|b\n|}"
|
||||
, fxt.tkn_tblw_tb_(0, 12).Caption_count_(1).Subs_
|
||||
( fxt.tkn_tblw_tc_(2, 6).Subs_(fxt.tkn_txt_(5, 6))
|
||||
, fxt.tkn_tblw_tr_(6, 9).Subs_
|
||||
( fxt.tkn_tblw_td_(6, 9).Subs_(fxt.tkn_txt_(8, 9),fxt.tkn_para_blank_(10))
|
||||
)));
|
||||
}
|
||||
@Test public void Err_Atrs_dumped_into_text() { // PURPOSE: [[Prawn]] and {{Taxobox}} was dumping text
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|-id='a'"
|
||||
, "|b"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr id='a'>"
|
||||
, " <td>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,200 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_tblw_wkr__nested_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test_parse_page_wiki(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|"
|
||||
, "{|"
|
||||
, "|-"
|
||||
, "|a"
|
||||
, "|}"
|
||||
, "|b"
|
||||
, "|}"
|
||||
)
|
||||
, fxt.tkn_tblw_tb_(0, 25).Subs_
|
||||
( fxt.tkn_tblw_tr_(2, 22).Subs_
|
||||
( fxt.tkn_tblw_td_(5, 19).Subs_
|
||||
( fxt.tkn_tblw_tb_(7, 19).Subs_
|
||||
( fxt.tkn_tblw_tr_(10, 16).Subs_
|
||||
( fxt.tkn_tblw_td_(13, 16).Subs_(fxt.tkn_txt_(15, 16), fxt.tkn_para_blank_(17))
|
||||
)
|
||||
)
|
||||
, fxt.tkn_para_blank_(20)
|
||||
)
|
||||
, fxt.tkn_tblw_td_(19, 22).Subs_(fxt.tkn_txt_(21, 22), fxt.tkn_para_blank_(23))
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Leading_ws() {
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|id='a'"
|
||||
, "|-"
|
||||
, "|a"
|
||||
, "|-"
|
||||
, "|id='b'|"
|
||||
, " {|id='c'"
|
||||
, " |-"
|
||||
, " |d"
|
||||
, " |}"
|
||||
, "|}"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table id='a'>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " <tr>"
|
||||
, " <td id='b'>"
|
||||
, " <table id='c'>"
|
||||
, " <tr>"
|
||||
, " <td>d"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Tblx_tblw() { // PURPOSE: if <table> followed by {|, ignore 2nd table; EX: en.b:Wikibooks:Featured_books; DATE:2014-02-08
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table cellpadding=\"0\">"
|
||||
, "{| cellspacing=\"0\""
|
||||
, "|a"
|
||||
, "|}"
|
||||
, "</table>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table cellpadding=\"0\">"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Caption_and_tblw() { // TIDY: don't try to fix <caption><table> sequence; PAGE:es.w:Sevilla; DATE:2014-06-29
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|+"
|
||||
, "{|"
|
||||
, "|}"
|
||||
, "|}"), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <caption>"
|
||||
, " <table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
, " </caption>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Tb_tr_tb() { // PURPOSE: if <tr><table>, auto-create <tr><td>; EX:w:Paris; DATE:2014-03-18
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "{|"
|
||||
, "|}"
|
||||
, "|}"), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Tblw_tblx_tblw_fails() { // PURPOSE: {| -> <table> -> \n| was not rendering as <td>; PAGE:en.w:Paris#Demographics; DATE:2014-03-18
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|a"
|
||||
, "</td></tr>"
|
||||
, "<tr><td><table>"
|
||||
, "<tr><td>b</td>"
|
||||
, "</tr>"
|
||||
, "|c"
|
||||
, "</td></tr></table>"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " <table>"
|
||||
, " <tr>"
|
||||
, " <td>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " <tr>"
|
||||
, " <td>c"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
// @Test public void Nested_tbl_missing() { // PURPOSE: nested table not rendering properly; EX:ar.s:; DATE:2014-03-18
|
||||
// fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
// ( "{|"
|
||||
// , "|-"
|
||||
// , "{|"
|
||||
// , "|-"
|
||||
// , "|}"
|
||||
// , "| width='50%' | a"
|
||||
// , "|}"
|
||||
// ), String_.Concat_lines_nl_skip_last
|
||||
// ( "<table>"
|
||||
// , " <tr>"
|
||||
// , " <td>a"
|
||||
// , " </td>"
|
||||
// , " <td>[[b|c"
|
||||
// , " </td>"
|
||||
// , " </tr>"
|
||||
// , "</table>"
|
||||
// , ""
|
||||
// , "<p>d"
|
||||
// , "</p>"
|
||||
// ));
|
||||
// }
|
||||
}
|
||||
156
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_wkr__para_tst.java
Normal file
156
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_wkr__para_tst.java
Normal file
@@ -0,0 +1,156 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_tblw_wkr__para_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Para() { // PURPOSE: para causing strange breaks; SEE:[[John F. Kennedy]] and "two Supreme Court appointments"
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "<p></p>"
|
||||
, "|a"
|
||||
, "<p></p>"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table><p></p>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, "<p></p>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Nl() { // PURPOSE: para causing strange breaks; SEE:[[John F. Kennedy]] and "two Supreme Court appointments"
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "!a"
|
||||
, ""
|
||||
, "|-"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <th>a"
|
||||
, " </th>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Unnecessary_para() { // PURPOSE: tblw causes unnecessary <p>; [[Help:Download]]; DATE:2014-02-20
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|"
|
||||
, "a<br/>"
|
||||
, "b"
|
||||
, "|"
|
||||
, "c<br/>"
|
||||
, "d"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, ""
|
||||
, "<p>a<br/>"
|
||||
, "b"
|
||||
, "</p>"
|
||||
, " </td>"
|
||||
, " <td>"
|
||||
, ""
|
||||
, "<p>c<br/>"
|
||||
, "d"
|
||||
, "</p>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Ws_leading() { // PAGE:en.w:AGPLv3
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, " !a"
|
||||
, " !b"
|
||||
, "|}"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <th>a"
|
||||
, " </th>"
|
||||
, " <th>b"
|
||||
, " </th>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Ws_th_2() { // "\n\s!" should still be interpreted as tblw; s.w:Manchester; DATE:2014-02-14
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|!style='color:red'|a"
|
||||
, " !style=\"color:blue\"|b"
|
||||
, "|}"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " <th style=\"color:blue\">b"
|
||||
, " </th>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Ws_th_3() { // "\n\s!" and "!!" breaks tblw; ru.w:Храмы_Санкт-Петербурга (List of churches in St Petersburg); DATE:2014-02-20
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, " ! id='1' | a !! id='2' | b"
|
||||
, "|}"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <th id='1'> a "
|
||||
, " </th>"
|
||||
, " <th id='2'> b"
|
||||
, " </th>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Tblw_td2_should_not_create_ws() { // PURPOSE: a||b -> a\n||b; EX:none;discovered during luaj test; DATE:2014-04-14
|
||||
fxt.Test_parse_page_wiki_str("a||b", "<p>a||b\n</p>");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_tblw_wkr__tblx_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Ignore_td() { // PURPOSE: do not parse pipe as td if in <table>; EX:ru.w:Сочи; DATE:2014-02-22
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, "| b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, "| b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Ignore_tr() { // PURPOSE: do not parse "\n|-", "\n!" if in <table>; EX:s.w:Uranus; DATE:2014-05-05
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, "|-"
|
||||
, "! b"
|
||||
, "| c"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, ""
|
||||
, "<p>|-"
|
||||
, "! b"
|
||||
, "| c"
|
||||
, "</p>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_tblw_wkr__uncommon_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Tr_pops_entire_stack() { // PURPOSE: in strange cases, tr will pop entire stack; PAGE:en.w:Turks_in_Denmark; DATE:2014-03-02
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "<caption>a"
|
||||
, "|b"
|
||||
, "|-"
|
||||
, "|c"
|
||||
, "|}"
|
||||
)
|
||||
, String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <caption>a"
|
||||
, " </caption>"
|
||||
, " <tr>"
|
||||
, " <td>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " <tr>"
|
||||
, " <td>c"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void Atrs_defect() { // PURPOSE: < in atrs was causing premature termination; PAGE:en.w:Wikipedia:List of hoaxes on Wikipedia
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|id=\"a<b\""
|
||||
, "|a"
|
||||
, "|}"), String_.Concat_lines_nl_skip_last
|
||||
( "<table id=\"a.3Cb\">"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Broken_lnki() { // PURPOSE: broken lnki was not closing table properly; PAGE:en.w:Wikipedia:Changing_attribution_for_an_edit; DATE:2014-03-16
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|a"
|
||||
, "|[[b|c"
|
||||
, "|}"
|
||||
, "d"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " <td>[[b|c"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
, "<p>d"
|
||||
, "</p>"
|
||||
));
|
||||
}
|
||||
@Test public void Broken_lnki_2() { // PURPOSE: variation on above; PAGE:hr.b:Knjiga_pojmova_u_zrakoplovstvu/Kratice_u_zrakoplovstvu/S; DATE:2014-09-05
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "| [[A | b"
|
||||
, "|-"
|
||||
, "| B"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td> [[A | b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " <tr>"
|
||||
, " <td> B"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
}
|
||||
63
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_ws_itm.java
Normal file
63
400_xowa/src/gplx/xowa/parsers/tblws/Xop_tblw_ws_itm.java
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xop_tblw_ws_itm {
|
||||
public byte Tblw_type() {return tblw_type;} private byte tblw_type;
|
||||
public int Hook_len() {return hook_len;} private int hook_len;
|
||||
public Xop_tblw_ws_itm(byte tblw_type, int hook_len) {this.tblw_type = tblw_type; this.hook_len = hook_len;}
|
||||
|
||||
public static final byte Type_tb = Xop_tblw_wkr.Tblw_type_tb, Type_te = Xop_tblw_wkr.Tblw_type_te, Type_tr = Xop_tblw_wkr.Tblw_type_tr, Type_tc = Xop_tblw_wkr.Tblw_type_tc
|
||||
, Type_th = Xop_tblw_wkr.Tblw_type_th, Type_td = Xop_tblw_wkr.Tblw_type_td, Type_nl = 16, Type_xnde = 17;
|
||||
public static Btrie_slim_mgr trie_() {// MW.REF:Parser.php|doBlockLevels
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.cs_();
|
||||
trie_itm(rv, Type_tb, Xop_tblw_lxr_ws.Hook_tb);
|
||||
trie_itm(rv, Type_te, Xop_tblw_lxr_ws.Hook_te);
|
||||
trie_itm(rv, Type_tr, Xop_tblw_lxr_ws.Hook_tr);
|
||||
trie_itm(rv, Type_th, Xop_tblw_lxr_ws.Hook_th);
|
||||
trie_itm(rv, Type_tc, Xop_tblw_lxr_ws.Hook_tc);
|
||||
trie_itm(rv, Type_td, Bry_.bytes_(Byte_ascii.Pipe));
|
||||
trie_itm(rv, Type_nl, Bry_.bytes_(Byte_ascii.Nl));
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_table);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_tr);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_td);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_th);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_blockquote);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_h1);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_h2);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_h3);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_h4);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_h5);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_h6);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_pre);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_p);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_div);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_hr);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_li);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_ul);
|
||||
trie_itm_xnde(rv, Xop_xnde_tag_.Tag_ol);
|
||||
return rv;
|
||||
}
|
||||
private static void trie_itm(Btrie_slim_mgr trie, byte type, byte[] bry) {trie.Add_obj(bry, new Xop_tblw_ws_itm(type, bry.length));}
|
||||
private static void trie_itm_xnde(Btrie_slim_mgr trie, Xop_xnde_tag tag) {
|
||||
byte[] tag_name = tag.Name_bry();
|
||||
int tag_name_len = tag_name.length;
|
||||
trie.Add_obj(Bry_.Add(Bry_xnde_bgn, tag_name), new Xop_tblw_ws_itm(Type_xnde, tag_name_len));
|
||||
trie.Add_obj(Bry_.Add(Bry_xnde_end, tag_name), new Xop_tblw_ws_itm(Type_xnde, tag_name_len + 1));
|
||||
} static byte[] Bry_xnde_bgn = new byte[] {Byte_ascii.Lt, Byte_ascii.Slash}, Bry_xnde_end = new byte[] {Byte_ascii.Lt};
|
||||
}
|
||||
68
400_xowa/src/gplx/xowa/parsers/tmpls/Nowiki_escape_itm.java
Normal file
68
400_xowa/src/gplx/xowa/parsers/tmpls/Nowiki_escape_itm.java
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tmpls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.html.*; import gplx.xowa.parsers.amps.*;
|
||||
public class Nowiki_escape_itm {
|
||||
public Nowiki_escape_itm(byte[] src, byte[] trg) {this.src = src; this.trg = trg; this.src_adj = src.length - 1;}
|
||||
private int src_adj;
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public byte[] Trg() {return trg;} private byte[] trg;
|
||||
public static boolean Escape(Bry_bfr tmp_bfr, byte[] src, int bgn, int end) {// <nowiki> works by escaping all wtxt symbols so that wtxt parser does not hook into any of them
|
||||
boolean dirty = false;
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = src[i];
|
||||
Object o = trie.Match_bgn_w_byte(b, src, i, end);
|
||||
if (o == null) {
|
||||
if (dirty)
|
||||
tmp_bfr.Add_byte(b);
|
||||
}
|
||||
else {
|
||||
if (!dirty) {
|
||||
tmp_bfr.Add_mid(src, bgn, i);
|
||||
dirty = true;
|
||||
}
|
||||
Nowiki_escape_itm itm = (Nowiki_escape_itm)o;
|
||||
tmp_bfr.Add(itm.Trg());
|
||||
i += itm.src_adj;
|
||||
}
|
||||
}
|
||||
return dirty;
|
||||
}
|
||||
private static final byte[] Pre_bry = new byte[] {Byte_ascii.Nl, Byte_ascii.Space}; // NOTE: must go before trie_new
|
||||
private static final Btrie_slim_mgr trie = trie_new();
|
||||
private static Btrie_slim_mgr trie_new() {
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.cs_();
|
||||
trie_new_itm(rv, Byte_ascii.Lt_bry , Xop_amp_trie.Bry_xowa_lt);
|
||||
trie_new_itm(rv, Byte_ascii.Brack_bgn_bry , Xop_amp_trie.Bry_xowa_brack_bgn);
|
||||
trie_new_itm(rv, Byte_ascii.Brack_end_bry , Xop_amp_trie.Bry_xowa_brack_end); // PAGE:en.w: Tall_poppy_syndrome DATE:2014-07-23
|
||||
trie_new_itm(rv, Byte_ascii.Pipe_bry , Xop_amp_trie.Bry_xowa_pipe);
|
||||
trie_new_itm(rv, Byte_ascii.Apos_bry , Xop_amp_trie.Bry_xowa_apos); // NOTE: for backward compatibility, use ' note that amp_wkr will turn ' -> ' but ' -> '; DATE:2014-07-03
|
||||
trie_new_itm(rv, Byte_ascii.Colon_bry , Xop_amp_trie.Bry_xowa_colon);
|
||||
trie_new_itm(rv, Byte_ascii.Underline_bry , Xop_amp_trie.Bry_xowa_underline);
|
||||
trie_new_itm(rv, Byte_ascii.Asterisk_bry , Xop_amp_trie.Bry_xowa_asterisk);
|
||||
trie_new_itm(rv, Byte_ascii.Dash_bry , Xop_amp_trie.Bry_xowa_dash); // needed to handle "|<nowiki>-</nowiki>"; PAGE:de.w:Liste_von_Vereinen_und_Vereinigungen_von_Gl<47>ubigen_(r<>misch-katholische_Kirche) DATE:2015-01-08
|
||||
trie_new_itm(rv, Byte_ascii.Space_bry , Xop_amp_trie.Bry_xowa_space);
|
||||
trie_new_itm(rv, Byte_ascii.Nl_bry , Xop_amp_trie.Bry_xowa_nl);
|
||||
trie_new_itm(rv, Pre_bry , Pre_bry);
|
||||
return rv;
|
||||
}
|
||||
private static void trie_new_itm(Btrie_slim_mgr rv, byte[] src, byte[] trg) {
|
||||
Nowiki_escape_itm itm = new Nowiki_escape_itm(src, trg);
|
||||
rv.Add_obj(src, itm);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user