1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-07-12 21:10:02 -04:00
commit 794b5a232f
3099 changed files with 238212 additions and 0 deletions

View File

@@ -0,0 +1,47 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_xatr_hash {
private final Ordered_hash hash = Ordered_hash_.new_bry_();
private final byte[] src;
Xop_xatr_hash(byte[] src) {this.src = src;}
public int Len() {return hash.Count();}
public Xop_xatr_itm Get_at(int idx) {
return (Xop_xatr_itm)hash.Get_at(idx);
}
public Xop_xatr_itm Get_by(String key) {
return (Xop_xatr_itm)hash.Get_by(Bry_.new_u8(key));
}
public byte[] Get_as_bry_or(String key, byte[] or) {
Xop_xatr_itm itm = Get_by(key);
return itm == null ? or : itm.Val_as_bry(src);
}
public boolean Match(String key, String val) {
Xop_xatr_itm itm = Get_by(key); if (itm == null) return false;
return String_.Eq(itm.Val_as_str(src), val);
}
private void Add(Xop_xatr_itm itm) {
hash.Add_if_dupe_use_nth(itm.Key_bry(), itm);
}
public static Xop_xatr_hash new_ary(byte[] src, Xop_xatr_itm[] ary) {
Xop_xatr_hash rv = new Xop_xatr_hash(src);
for (Xop_xatr_itm itm : ary)
rv.Add(itm);
return rv;
}
}

View File

@@ -0,0 +1,65 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_xatr_itm {
public static final byte Tid_null = 0, Tid_invalid = 1, Tid_repeat = 2, Tid_key_val = 3, Tid_key_only = 4; // NOTE: id order is important; see below;
public byte Tid() {return tid;} private byte tid;
public void Tid_to_repeat_() {tid = Tid_repeat;}
public void Tid_to_invalid_() {tid = Tid_invalid;}
public boolean Tid_is_key_only() {return tid == Tid_key_only;}
public int Key_bgn() {return key_bgn;} private int key_bgn;
public int Key_end() {return key_end;} private int key_end;
public byte[] Key_bry() {return key_bry;} public Xop_xatr_itm Key_bry_(byte[] v) {key_bry = v; return this;} private byte[] key_bry;
public byte[] Val_bry() {return val_bry;} public Xop_xatr_itm Val_bry_(byte[] v) {val_bry = v; return this;} private byte[] val_bry;
public void Key_rng_(int key_bgn, int key_end) {this.key_bgn = key_bgn; this.key_end = key_end;}
public byte Key_tid() {return key_tid;} public Xop_xatr_itm Key_tid_(byte v) {key_tid = v; return this;} private byte key_tid;
public int Val_bgn() {return val_bgn;} private int val_bgn;
public int Val_end() {return val_end;} private int val_end;
public int Atr_bgn() {return atr_bgn;} private int atr_bgn;
public int Atr_end() {return atr_end;} private int atr_end;
public int Eq_pos() {return eq_pos;} private int eq_pos;
public boolean Invalid() {return tid < Tid_key_val;} // NOTE: Tid order is important
public byte Quote_byte() {return quote_byte;} private byte quote_byte;
public String Val_as_str(byte[] src) {return String_.new_u8(Val_as_bry(src));}
public byte[] Val_as_bry(byte[] src) {if (val_bry == null) val_bry = Bry_.Mid(src, val_bgn, val_end); return val_bry;} // NOTE: val_bry is cached
public byte[] Val_as_bry__blank_to_null(byte[] src) {byte[] rv = Val_as_bry(src); return Bry_.Len_eq_0(rv) ? null : rv;}
public int Val_as_int_or(byte[] src, int or) {return val_bry == null ? Bry_.Xto_int_or_lax(src, val_bgn, val_end, or) : Bry_.Xto_int_or(val_bry, or);}
public boolean Val_as_bool_by_int(byte[] src) {return Val_as_int_or(src, 0) == 1;}
public boolean Val_as_bool(byte[] src) {return Bry_.Eq(Bry_.Lower_ascii(Val_as_bry(src)), Bool_.True_bry);}
public static Xop_xatr_itm[] Xatr_parse(Xoae_app app, Xop_xnde_atr_parser parser, Hash_adp_bry hash, Xowe_wiki wiki, byte[] src, Xop_xnde_tkn xnde) {
Xop_xatr_itm[] xatr_ary = app.Xatr_parser().Parse(app.Msg_log(), src, xnde.Atrs_bgn(), xnde.Atrs_end());
for (int i = 0; i < xatr_ary.length; i++) {
Xop_xatr_itm xatr = xatr_ary[i];
if (xatr.Invalid()) continue;
Object xatr_key_obj = hash.Get_by_mid(src, xatr.Key_bgn(), xatr.Key_end());
parser.Xatr_parse(wiki, src, xatr, xatr_key_obj);
}
return xatr_ary;
}
public Xop_xatr_itm(int atr_bgn, int atr_end) {
this.tid = Tid_invalid; this.atr_bgn = atr_bgn; this.atr_end = atr_end;
}
public Xop_xatr_itm(byte quote_byte, int atr_bgn, int atr_end, int key_bgn, int key_end) {
this.tid = Tid_key_only; this.quote_byte = quote_byte; this.atr_bgn = atr_bgn; this.atr_end = atr_end; this.key_bgn = key_bgn; this.key_end = key_end; this.val_bgn = key_bgn; this.val_end = key_end;
}
public Xop_xatr_itm(byte quote_byte, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end, int eq_pos) {
this.tid = Tid_key_val; this.quote_byte = quote_byte; this.atr_bgn = atr_bgn; this.atr_end = atr_end; this.key_bgn = key_bgn; this.key_end = key_end; this.val_bgn = val_bgn; this.val_end = val_end; this.eq_pos = eq_pos;
}
public static final Xop_xatr_itm[] Ary_empty = new Xop_xatr_itm[0];
public static final byte Key_tid_generic = 0, Key_tid_id = 1, Key_tid_style = 2, Key_tid_role = 3;
}

View File

@@ -0,0 +1,408 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.primitives.*;
public class Xop_xatr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATTRIBS_REGEX
private final List_adp xatrs = List_adp_.new_();
private static final byte Mode_atr_bgn = 1, Mode_invalid = 2, Mode_key = 3, Mode_eq = 4, Mode_val_bgn = 5, Mode_val_quote = 6, Mode_val_raw = 7;
private byte mode = Mode_atr_bgn;
private int atr_bgn = -1, key_bgn = -1, key_end = -1, eq_pos = -1, val_bgn = -1, val_end = -1; boolean valid = true;
private byte quote_byte = Byte_ascii.Nil;
private final Hash_adp_bry repeated_atrs_hash = Hash_adp_bry.ci_ascii_(); // ASCII:xnde_atrs
private final Bry_bfr key_bfr = Bry_bfr.new_(), val_bfr = Bry_bfr.new_();
private boolean key_bfr_on = false, val_bfr_on = false, ws_is_before_val = false;
public Bry_obj_ref Bry_obj() {return bry_ref;} private final Bry_obj_ref bry_ref = Bry_obj_ref.null_();
public int Xnde_find_gt_find(byte[] src, int pos, int end) {
bry_ref.Val_(null);
byte b = src[pos];
if (b == Byte_ascii.Slash && pos + 1 < end) { // if </ move pos to after /
++pos;
b = src[pos];
}
int gt_pos = Bry_finder.Find_fwd(src, Byte_ascii.Gt, pos, end); if (gt_pos == Bry_.NotFound) return String_.Find_none;
byte[] bry = (byte[])xnde_hash.Get_by_mid(src, pos, gt_pos);
bry_ref.Val_(bry);
return bry == null ? String_.Find_none : bry.length + pos;
}
private int Xnde_find_gt(Gfo_msg_log log_mgr, byte[] src, int lt_pos, int end) {
int pos = lt_pos + 1;
byte b = src[pos];
if (b == Byte_ascii.Slash && pos + 1 < end) {
++pos;
b = src[pos];
}
int match_pos = Xnde_find_gt_find(src, pos, end);
if (match_pos == String_.Find_none) {log_mgr.Add_str_warn_key_none(Msg_mgr, "invalid lt", src, lt_pos); return String_.Find_none;}
boolean slash_found = false;
for (int i = match_pos; i < end; i++) {
b = src[i];
switch (b) {
case Byte_ascii.Gt: return i;
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip any ws
break;
case Byte_ascii.Slash:
if (slash_found) {log_mgr.Add_str_warn_key_none(Msg_mgr, "multiple slashes not allowed", src, i); return String_.Find_none;} // only allow one slash
else slash_found = true;
break;
default:
log_mgr.Add_str_warn_key_none(Msg_mgr, "invalid character", src, i);
return String_.Find_none;
}
}
log_mgr.Add_str_warn_key_none(Msg_mgr, "eos", src, lt_pos);
return String_.Find_none;
}
public Xop_xatr_itm[] Parse(Gfo_msg_log log_mgr, byte[] src, int bgn, int end) {
xatrs.Clear();
repeated_atrs_hash.Clear();
int i = bgn;
mode = Mode_atr_bgn;
boolean prv_is_ws = false;
while (true) {
if (i == end) {
if (mode == Mode_val_quote) { // quote still open
int reset_pos = Bry_finder.Find_fwd(src, Byte_ascii.Space, atr_bgn, end); // try to find 1st space within quote; EX:"a='b c=d" should try to reset at c=d
boolean reset_found = reset_pos != Bry_finder.Not_found;
valid = false; val_end = reset_found ? reset_pos : end;
Make(log_mgr, src, val_end); // create invalid atr
if (reset_found) { // space found; resume from text after space; EX: "a='b c=d"; PAGE:en.w:Aubervilliers DATE:2014-06-25
i = Bry_finder.Find_fwd_while_not_ws(src, reset_pos, end); // skip ws
atr_bgn = -1;
mode = Mode_atr_bgn;
val_bfr.Clear();
val_bfr_on = false;
ws_is_before_val = false;
continue;
}
else
break;
}
else {
if (mode == Mode_val_bgn) // NOTE: handle dangling "k=" else will be "k"; EX: <a b=> x> <a b>; PAGE:en.s:Notes_by_the_Way/Chapter_2; DATE:2015-01-31
valid = false;
if (atr_bgn != -1) { // atr_bgn will be -1 if atrs ends on quoted (EX:"a='b'"); else, pending atr that needs to be processed; EX: "a=b" b wil be in bfr
val_end = end;
Make(log_mgr, src, end);
}
break;
}
}
else if (i > end)
break;
byte b = src[i];
switch (mode) {
case Mode_atr_bgn:
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip any ws at bgn; note that once a non-ws char is encountered, it will immediately go into another mode
break;
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Colon:
if (atr_bgn == -1) atr_bgn = i;
mode = Mode_key;
key_bgn = i;
break;
case Byte_ascii.Lt:
int gt_pos = Xnde_find_gt(log_mgr, src, i, end);
if (gt_pos == String_.Find_none) {
valid = false; mode = Mode_invalid; if (atr_bgn == -1) atr_bgn = i;
}
else {
i = gt_pos; // note that there is ++i below and loop will continue at gt_pos + 1 (next character after)
}
break;
default:
valid = false; mode = Mode_invalid; if (atr_bgn == -1) atr_bgn = i;
break;
}
break;
case Mode_invalid:
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
Make(log_mgr, src, i);
mode = Mode_atr_bgn;
break;
default:
break;
}
break;
case Mode_key:
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Colon: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Underline:
if (key_bfr_on) key_bfr.Add_byte(b);
break;
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
if (valid) {
key_end = i;
mode = Mode_eq;
}
else
Make(log_mgr, src, i);
break;
case Byte_ascii.Eq:
if (valid) {
key_end = i;
mode = Mode_val_bgn;
eq_pos = i;
}
break;
case Byte_ascii.Lt:
int gt_pos = Xnde_find_gt(log_mgr, src, i, end);
if (gt_pos == String_.Find_none) {
valid = false; mode = Mode_invalid;
}
else {
if (!key_bfr_on) key_bfr.Add_mid(src, key_bgn, i);
i = gt_pos; // note that there is ++i below and loop will continue at gt_pos + 1 (next character after)
key_bfr_on = true;
}
break;
default:
valid = false; mode = Mode_invalid;
break;
}
break;
case Mode_eq:
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip ws
if (key_end == -1) { // EX: "a = b"; key_end != -1 b/c 1st \s sets key_end; EX: "a b = c"; key_end
val_end = i - 1;
Make(log_mgr, src, i);
mode = Mode_atr_bgn;
continue;
}
break;
case Byte_ascii.Eq:
eq_pos = i;
mode = Mode_val_bgn;
break;
case Byte_ascii.Quote: case Byte_ascii.Apos: // FUTURE: previous word was key
default: // NOTE: added this late; xml_parser was not handling "line start=3" DATE:2013-07-03
val_end = i - 1;
Make(log_mgr, src, i);
mode = Mode_atr_bgn;
continue;
}
break;
case Mode_val_bgn:
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip-ws
ws_is_before_val = true;
break;
case Byte_ascii.Quote: case Byte_ascii.Apos:
mode = Mode_val_quote; quote_byte = b; prv_is_ws = false;
break;
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Colon:
case Byte_ascii.Hash:
mode = Mode_val_raw;
val_bgn = i;
break;
case Byte_ascii.Lt:
int gt_pos = Xnde_find_gt(log_mgr, src, i, end);
if (gt_pos == String_.Find_none) {
valid = false; mode = Mode_invalid;
}
else {
i = gt_pos; // note that there is ++i below and loop will continue at gt_pos + 1 (next character after)
}
break;
default:
break;
}
break;
case Mode_val_quote:
if (val_bgn == -1) val_bgn = i;
switch (b) {
case Byte_ascii.Quote: case Byte_ascii.Apos:
if (quote_byte == b) {
val_end = i;
Make(log_mgr, src, i + 1); // NOTE: set atr_end *after* quote
}
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
break;
case Byte_ascii.Lt: // "<" try to find nowiki inside atr
int gt_pos = Xnde_find_gt(log_mgr, src, i, end);
if (gt_pos == String_.Find_none) {
// valid = false; mode = Mode_invalid; // DELETE: 2012-11-13; unpaired < should not mark atr invalid; EX: style='margin:1em<f'
if (!val_bfr_on) val_bfr.Add_mid(src, val_bgn, i + 1); // +1 to include <
val_bfr_on = true;
}
else {
if (!val_bfr_on) val_bfr.Add_mid(src, val_bgn, i);
i = gt_pos; // note that there is ++i below and loop will continue at gt_pos + 1 (next character after)
val_bfr_on = true;
}
prv_is_ws = false;
break;
case Byte_ascii.Nl: case Byte_ascii.Tab: case Byte_ascii.Cr: // REF.MW:Sanitizer.php|decodeTagAttributes $value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
case Byte_ascii.Space:
if (!val_bfr_on) {
val_bfr.Add_mid(src, val_bgn, i);
val_bfr_on = true;
}
if (prv_is_ws) {} // noop; only allow one ws at a time
else {
prv_is_ws = true; val_bfr.Add_byte(Byte_ascii.Space);
}
break;
default:
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
break;
}
break;
case Mode_val_raw: // no quotes; EX:a=bcd
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Bang: case Byte_ascii.Hash: case Byte_ascii.Dollar: case Byte_ascii.Percent:
case Byte_ascii.Amp: case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star:
case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Slash:
case Byte_ascii.Colon: case Byte_ascii.Semic: case Byte_ascii.Gt:
case Byte_ascii.Question: case Byte_ascii.At: case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end:
case Byte_ascii.Pow: case Byte_ascii.Underline: case Byte_ascii.Tick:
case Byte_ascii.Curly_bgn: case Byte_ascii.Pipe: case Byte_ascii.Curly_end: case Byte_ascii.Tilde:
break;
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl:
val_end = i;
Make(log_mgr, src, i);
break;
case Byte_ascii.Eq: // EX:"a= b=c" or "a=b=c"; PAGE:en.w:2013_in_American_television
if (ws_is_before_val) { // "a= b=c"; discard 1st and resume at 2nd
int old_val_bgn = val_bgn;
valid = false; mode = Mode_invalid; Make(log_mgr, src, val_bgn); // invalidate cur atr; EX:"a="
atr_bgn = key_bgn = old_val_bgn; // reset atr / key to new atr; EX: "b"
key_end = i;
mode = Mode_val_bgn; // set mode to val_bgn (basically, put after =)
}
else { // "a=b=c"; discard all
valid = false; mode = Mode_invalid;
}
break;
case Byte_ascii.Lt:
val_end = i;
Make(log_mgr, src, i);
--i; // NOTE: --i to include "<" as part of next atr; above ws excludes from next atr
break;
default:
valid = false; mode = Mode_invalid;
break;
}
break;
}
++i;
}
repeated_atrs_hash.Clear();
return (Xop_xatr_itm[])xatrs.To_ary(Xop_xatr_itm.class);
}
private void Make(Gfo_msg_log log_mgr, byte[] src, int atr_end) {
Xop_xatr_itm xatr = null;
boolean key_bgn_exists = key_bgn != -1;
boolean val_bgn_exists = val_bgn != -1;
if (valid) {
if (key_bgn_exists && val_bgn_exists)
xatr = new Xop_xatr_itm(quote_byte, atr_bgn, atr_end, key_bgn, key_end, val_bgn, val_end, eq_pos);
else {
if (key_end == -1) key_end = val_end; // NOTE: key_end == -1 when eos; EX: "a" would have key_bgn = 0; key_end = -1; val_end = 1 DATE:2014-07-03
xatr = new Xop_xatr_itm(quote_byte, atr_bgn, atr_end, key_bgn, key_end);
}
}
else {
xatr = new Xop_xatr_itm(atr_bgn, atr_end);
log_mgr.Add_itm_none(Log_invalid_atr, src, atr_bgn, atr_end);
}
if (valid) { // note that invalid will have no key_bgn / key_end
byte[] key_bry = key_bfr_on ? key_bfr.Xto_bry_and_clear() : Bry_.Mid(src, xatr.Key_bgn(), xatr.Key_end());
xatr.Key_bry_(key_bry);
Invalidate_repeated_atr(xatr, key_bry);
}
if (val_bfr_on) xatr.Val_bry_(val_bfr.Xto_bry_and_clear());
xatrs.Add(xatr);
mode = Mode_atr_bgn; quote_byte = Byte_ascii.Nil; valid = true;
atr_bgn = key_bgn = val_bgn = key_end = val_end = eq_pos = -1;
val_bfr_on = key_bfr_on = ws_is_before_val = false;
}
private void Invalidate_repeated_atr(Xop_xatr_itm cur, byte[] key_bry) {
Xop_xatr_itm prv = (Xop_xatr_itm)repeated_atrs_hash.Get_by(key_bry);
if (prv != null) {
prv.Tid_to_repeat_();
repeated_atrs_hash.Del(key_bry);
}
repeated_atrs_hash.Add(key_bry, cur);
}
private static final Hash_adp_bry xnde_hash = Hash_adp_bry.ci_ascii_()
.Add_bry_bry(Xop_xnde_tag_.Tag_nowiki.Name_bry())
.Add_bry_bry(Xop_xnde_tag_.Tag_noinclude.Name_bry())
.Add_bry_bry(Xop_xnde_tag_.Tag_includeonly.Name_bry())
.Add_bry_bry(Xop_xnde_tag_.Tag_onlyinclude.Name_bry())
;
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "xatr_parser");
public static final Gfo_msg_itm
Log_invalid_atr = Gfo_msg_itm_.new_warn_(owner, "invalid_atr")
;
private static final String Msg_mgr = "gplx.xowa.wiki.parser.xatr";
}
/*
NOTE: this parser can be done with a trie and hooks on Quote,Apos,Eq,NewLine,Space,Tab, but...
- multi-byte lookup is not needed (main advantage of trie)
- less performant
- logic is indirect (b/c different chars are valid if first letter of key, raw mode, quoted)
*/

View File

@@ -0,0 +1,113 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xatr_parser_tst {
@Test public void Kv_quote_double() {fxt.tst_("a=\"b\"", fxt.new_atr_("a", "b"));} private Xop_xatr_parser_fxt fxt = new Xop_xatr_parser_fxt();
@Test public void Kv_quote_single() {fxt.tst_("a='b'", fxt.new_atr_("a", "b"));}
@Test public void Kv_quote_none() {fxt.tst_("a=b", fxt.new_atr_("a", "b"));}
@Test public void Kv_empty() {fxt.tst_("a=''", fxt.new_atr_("a", ""));}
@Test public void Kv_key_has_underline() {fxt.tst_("a_b=c", fxt.new_atr_("a_b", "c"));}
@Test public void Val_quote_none() {fxt.tst_("b", fxt.new_atr_("b", "b"));}
@Test public void Val_quote_none_ws() {fxt.tst_(" b ", fxt.new_atr_("b", "b"));} // PURPOSE:discovered while writing test for ref's "lower-alpha" DATE:2014-07-03
@Test public void Invalid_key_plus() {fxt.tst_("a+b", fxt.new_invalid_(0, 3));}
@Test public void Invalid_key_plus_many() {fxt.tst_("a+b c=d", fxt.new_invalid_(0, 3), fxt.new_atr_("c", "d"));}
@Test public void Invalid_val_plus() {fxt.tst_("a=b+c", fxt.new_invalid_(0, 5));}
@Test public void Invalid_recover() {fxt.tst_("* a=b", fxt.new_invalid_(0, 1), fxt.new_atr_("a", "b"));} // PURPOSE: * is invalid, but should not stop parsing of a=b
@Test public void Nowiki_val() {fxt.tst_("a=<nowiki>'b'</nowiki>", fxt.new_atr_("a", "b").Expd_atr_rng_(0, 13).Expd_key_("a").Expd_val_("b"));}
@Test public void Nowiki_key() {fxt.tst_("<nowiki>a=b</nowiki>", fxt.new_atr_("a", "b").Expd_atr_rng_(8, 11));}
@Test public void Nowiki_key_2() {fxt.tst_("a<nowiki>b</nowiki>c=d", fxt.new_atr_("abc", "d").Expd_atr_rng_(0, 22));}
@Test public void Nowiki_key_3() {fxt.tst_("a<nowiki>=</nowiki>\"b\"", fxt.new_atr_("a", "b").Expd_atr_rng_(0, 22));} // EX:fr.w:{{Portail|Transpédia|Californie}}
@Test public void Nowiki_quote() {fxt.tst_("a=\"b<nowiki>c</nowiki>d<nowiki>e</nowiki>f\"", fxt.new_atr_("a", "bcdef"));}
@Test public void Int_value() {fxt.tst_int("a='-123'", -123);}
@Test public void Many_apos() {fxt.tst_("a='b' c='d' e='f'", fxt.new_atr_("a", "b"), fxt.new_atr_("c", "d"), fxt.new_atr_("e", "f"));}
@Test public void Many_raw() {fxt.tst_("a=b c=d e=f", fxt.new_atr_("a", "b"), fxt.new_atr_("c", "d"), fxt.new_atr_("e", "f"));}
@Test public void Ws_ini() {fxt.tst_(" a='b'", fxt.new_atr_("a", "b").Expd_atr_rng_(1, 6));}
@Test public void Ws_end() {fxt.tst_(" a='b' c='d'", fxt.new_atr_("a", "b").Expd_atr_rng_(1, 6), fxt.new_atr_("c", "d").Expd_atr_rng_(7, 12));}
@Test public void Quote_ws_nl() {fxt.tst_("a='b\nc'", fxt.new_atr_("a", "b c"));}
@Test public void Quote_ws_mult() {fxt.tst_("a='b c'", fxt.new_atr_("a", "b c"));}
@Test public void Quote_ws_mult_mult() {fxt.tst_("a='b c d'", fxt.new_atr_("a", "b c d"));} // PURPOSE: fix wherein 1st-gobble gobbled rest of spaces (was b cd)
@Test public void Quote_apos() {fxt.tst_("a=\"b c'd\"", fxt.new_atr_("a", "b c'd"));} // PURPOSE: fix wherein apos was gobbled up; PAGE:en.s:Alice's_Adventures_in_Wonderland; DATE:2013-11-22
@Test public void Quote_apos_2() {fxt.tst_("a=\"b'c d\"", fxt.new_atr_("a", "b'c d"));} // PURPOSE: fix wherein apos was causing "'b'c d"; PAGE:en.s:Grimm's_Household_Tales,_Volume_1; DATE:2013-12-22
@Test public void Multiple() {fxt.tst_("a b1 c", fxt.new_atr_("a", "a"), fxt.new_atr_("b1", "b1"), fxt.new_atr_("c", "c"));}
@Test public void Ws() {fxt.tst_("a = 'b'", fxt.new_atr_("a", "b"));} // PURPOSE: fix wherein multiple space was causing "a=a"; PAGE:fr.s:La_Sculpture_dans_les_cimetières_de_Paris/Père-Lachaise; DATE:2014-01-18
@Test public void Dangling_eos() {fxt.tst_("a='b' c='d", fxt.new_atr_("a", "b"), fxt.new_invalid_(6, 10));} // PURPOSE: handle dangling quote at eos; PAGE:en.w:Aubervilliers DATE:2014-06-25
@Test public void Dangling_bos() {fxt.tst_("a='b c=d", fxt.new_invalid_(0, 4), fxt.new_atr_("c", "d"));} // PURPOSE: handle dangling quote at bos; resume at next valid atr; PAGE:en.w:Aubervilliers DATE:2014-06-25
@Test public void Invalid_incomplete() {fxt.tst_("a= c=d", fxt.new_invalid_(0, 3), fxt.new_atr_("c", "d"));} // PURPOSE: discard xatr if incomplete and followed by valid atr; PAGE:en.w:2013_in_American_television DATE:2014-09-25
@Test public void Invalid_incomplete_2() {fxt.tst_("a=c=d", fxt.new_invalid_(0, 5));} // PURPOSE: variation of above; per MW regex, missing space invalidates entire attribute; DATE:2014-09-25
@Test public void Invalid_incomplete_pair(){fxt.tst_("a= b=", fxt.new_invalid_(0, 3), fxt.new_invalid_(3, 5));} // PURPOSE: "b=" should be invalid not a kv of "b" = "b"; PAGE:en.s:Notes_by_the_Way/Chapter_2; DATE:2015-01-31
/*
TODO:
change ws to be end; EX: "a=b c=d" atr1 ends at 4 (not 3)
*/
// @Test public void Val_quote_none_many() {
// fxt.tst_("a b", fxt.new_atr_("", "a"), fxt.new_atr_("", "b"));
//// fxt.tst_("a='b' c d e='f'", fxt.new_atr_("a", "b"), fxt.new_atr_("", "c"), fxt.new_atr_("", "d"), fxt.new_atr_("e", "f"));
// }
}
class Xop_xatr_parser_fxt {
Xop_xatr_parser parser = new Xop_xatr_parser();
Tst_mgr tst_mgr = new Tst_mgr();
public Xop_xatr_itm_chkr new_invalid_(int bgn, int end) {return new Xop_xatr_itm_chkr().Expd_atr_rng_(bgn, end).Expd_typeId_(Xop_xatr_itm.Tid_invalid);}
public Xop_xatr_itm_chkr new_atr_(String key, String val) {return new Xop_xatr_itm_chkr().Expd_key_(key).Expd_val_(val);}
public void tst_(String src_str, Xop_xatr_itm_chkr... expd) {
byte[] src = Bry_.new_u8(src_str);
Gfo_msg_log msg_log = new Gfo_msg_log(Xoa_app_.Name);
Xop_xatr_itm[] actl = parser.Parse(msg_log, src, 0, src.length);
tst_mgr.Vars().Clear().Add("raw_bry", src);
tst_mgr.Tst_ary("xatr:", expd, actl);
}
public void tst_int(String src_str, int... expd) {
byte[] src = Bry_.new_u8(src_str);
Gfo_msg_log msg_log = new Gfo_msg_log(Xoa_app_.Name);
Xop_xatr_itm[] actl_atr = parser.Parse(msg_log, src, 0, src.length);
int[] actl = new int[actl_atr.length];
for (int i = 0; i < actl.length; i++)
actl[i] = actl_atr[i].Val_as_int_or(src, 0);
Tfds.Eq_ary(expd, actl);
}
}
class Xop_xatr_itm_chkr implements Tst_chkr {
public Class<?> TypeOf() {return Xop_xatr_itm.class;}
public Xop_xatr_itm_chkr Expd_atr_rng_(int bgn, int end) {expd_atr_bgn = bgn; expd_atr_end = end; return this;} private int expd_atr_bgn = -1, expd_atr_end = -1;
public Xop_xatr_itm_chkr Expd_key_rng_(int bgn, int end) {expd_key_bgn = bgn; expd_key_end = end; return this;} private int expd_key_bgn = -1, expd_key_end = -1;
public Xop_xatr_itm_chkr Expd_key_(String v) {expd_key = v; return this;} private String expd_key;
public Xop_xatr_itm_chkr Expd_val_(String v) {expd_val = v; return this;} private String expd_val;
public Xop_xatr_itm_chkr Expd_typeId_(byte v) {expd_typeId = v; return this;} private byte expd_typeId = Xop_xatr_itm.Tid_null;
public int Chk(Tst_mgr mgr, String path, Object actl_obj) {
Xop_xatr_itm actl = (Xop_xatr_itm)actl_obj;
int err = 0;
err += mgr.Tst_val(expd_typeId == Xop_xatr_itm.Tid_null, path, "atr_typeId", expd_typeId, actl.Tid());
err += mgr.Tst_val(expd_atr_bgn == -1, path, "atr_bgn", expd_atr_bgn, actl.Atr_bgn());
err += mgr.Tst_val(expd_atr_end == -1, path, "atr_end", expd_atr_end, actl.Atr_end());
err += mgr.Tst_val(expd_key_bgn == -1, path, "key_bgn", expd_key_bgn, actl.Key_bgn());
err += mgr.Tst_val(expd_key_end == -1, path, "key_end", expd_key_end, actl.Key_end());
if (actl.Key_bry() == null)
err += mgr.Tst_val(expd_key == null, path, "key", expd_key, mgr.Vars_get_bry_as_str("raw_bry", actl.Key_bgn(), actl.Key_end()));
else
err += mgr.Tst_val(expd_key == null, path, "key", expd_key, String_.new_u8(actl.Key_bry()));
if (actl.Val_bry() == null)
err += mgr.Tst_val(expd_val == null, path, "val", expd_val, mgr.Vars_get_bry_as_str("raw_bry", actl.Val_bgn(), actl.Val_end()));
else
err += mgr.Tst_val(expd_val == null, path, "val", expd_val, String_.new_u8(actl.Val_bry()));
return err;
}
}
/*
*/

View File

@@ -0,0 +1,263 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.primitives.*; import gplx.core.btries.*;
public class Xop_xatr_whitelist_mgr {
public boolean Chk(int tag_id, byte[] src, Xop_xatr_itm xatr) {
byte[] key_bry = xatr.Key_bry();
byte[] chk_bry; int chk_bgn, chk_end;
if (key_bry == null) {
chk_bry = src;
chk_bgn = xatr.Key_bgn();
chk_end = xatr.Key_end();
if (chk_end - chk_bgn == 0) return true; // no key; nothing to whitelist; return true
}
else { // key_bry specified manually; EX: "id<nowiki>=1" has a manual key_bry of "id"
chk_bry = key_bry;
chk_bgn = 0;
chk_end = key_bry.length;
}
Object o = key_trie.Match_bgn(chk_bry, chk_bgn, chk_end);
if (o == null) return false;// unknown atr_key; EX: <b unknown=1/>
Xop_xatr_whitelist_itm itm = (Xop_xatr_whitelist_itm)o;
byte itm_key_tid = itm.Key_tid();
xatr.Key_tid_(itm_key_tid);
boolean rv = itm.Tags()[tag_id] == 1 // is atr allowed for tag
&& (itm.Exact() ? key_trie.Match_pos() == chk_end : true) // if exact, check for exact; else always true
;
switch (itm_key_tid) {
case Xop_xatr_itm.Key_tid_style:
if (!Scrub_style(xatr, src)) return false;
break;
case Xop_xatr_itm.Key_tid_role:
if (!Bry_.Eq(Val_role_presentation, xatr.Val_as_bry(src))) return false; // MW: For now we only support role="presentation"; DATE:2014-04-05
break;
}
return rv;
}
public Xop_xatr_whitelist_mgr Ini() { // REF.MW:Sanitizer.php|setupAttributeWhitelist
Ini_grp("common" , null , "id", "class", "lang", "dir", "title", "style", "role");
Ini_grp("block" , "common" , "align");
Ini_grp("tablealign" , null , "align", "char", "charoff", "valign");
Ini_grp("tablecell" , null , "abbr", "axis", "headers", "scope", "rowspan", "colspan", "nowrap", "width", "height", "bgcolor");
Ini_nde(Xop_xnde_tag_.Tid_div , "block");
Ini_nde(Xop_xnde_tag_.Tid_center , "common");
Ini_nde(Xop_xnde_tag_.Tid_span , "block");
Ini_nde(Xop_xnde_tag_.Tid_h1 , "block");
Ini_nde(Xop_xnde_tag_.Tid_h2 , "block");
Ini_nde(Xop_xnde_tag_.Tid_h3 , "block");
Ini_nde(Xop_xnde_tag_.Tid_h4 , "block");
Ini_nde(Xop_xnde_tag_.Tid_h5 , "block");
Ini_nde(Xop_xnde_tag_.Tid_h6 , "block");
Ini_nde(Xop_xnde_tag_.Tid_em , "common");
Ini_nde(Xop_xnde_tag_.Tid_strong , "common");
Ini_nde(Xop_xnde_tag_.Tid_cite , "common");
Ini_nde(Xop_xnde_tag_.Tid_dfn , "common");
Ini_nde(Xop_xnde_tag_.Tid_code , "common");
Ini_nde(Xop_xnde_tag_.Tid_samp , "common");
Ini_nde(Xop_xnde_tag_.Tid_kbd , "common");
Ini_nde(Xop_xnde_tag_.Tid_var , "common");
Ini_nde(Xop_xnde_tag_.Tid_abbr , "common");
Ini_nde(Xop_xnde_tag_.Tid_blockquote , "common", "cite");
Ini_nde(Xop_xnde_tag_.Tid_sub , "common");
Ini_nde(Xop_xnde_tag_.Tid_sup , "common");
Ini_nde(Xop_xnde_tag_.Tid_p , "block");
Ini_nde(Xop_xnde_tag_.Tid_br , "id", "class", "title", "style", "clear");
Ini_nde(Xop_xnde_tag_.Tid_pre , "common", "width");
Ini_nde(Xop_xnde_tag_.Tid_ins , "common", "cite", "datetime");
Ini_nde(Xop_xnde_tag_.Tid_del , "common", "cite", "datetime");
Ini_nde(Xop_xnde_tag_.Tid_ul , "common", "type");
Ini_nde(Xop_xnde_tag_.Tid_ol , "common", "type", "start");
Ini_nde(Xop_xnde_tag_.Tid_li , "common", "type", "value");
Ini_nde(Xop_xnde_tag_.Tid_dl , "common");
Ini_nde(Xop_xnde_tag_.Tid_dd , "common");
Ini_nde(Xop_xnde_tag_.Tid_dt , "common");
Ini_nde(Xop_xnde_tag_.Tid_table , "common", "summary", "width", "border", "frame", "rules", "cellspacing", "cellpadding", "align", "bgcolor");
Ini_nde(Xop_xnde_tag_.Tid_caption , "common", "align");
Ini_nde(Xop_xnde_tag_.Tid_thead , "common", "tablealign");
Ini_nde(Xop_xnde_tag_.Tid_tfoot , "common", "tablealign");
Ini_nde(Xop_xnde_tag_.Tid_tbody , "common", "tablealign");
Ini_nde(Xop_xnde_tag_.Tid_colgroup , "common", "span", "width", "tablealign");
Ini_nde(Xop_xnde_tag_.Tid_col , "common", "span", "width", "tablealign");
Ini_nde(Xop_xnde_tag_.Tid_tr , "common", "bgcolor", "tablealign");
Ini_nde(Xop_xnde_tag_.Tid_td , "common", "tablecell", "tablealign");
Ini_nde(Xop_xnde_tag_.Tid_th , "common", "tablecell", "tablealign");
Ini_nde(Xop_xnde_tag_.Tid_a , "common", "href", "rel", "rev");
Ini_nde(Xop_xnde_tag_.Tid_img , "common", "alt", "src", "width", "height");
Ini_nde(Xop_xnde_tag_.Tid_tt , "common");
Ini_nde(Xop_xnde_tag_.Tid_b , "common");
Ini_nde(Xop_xnde_tag_.Tid_i , "common");
Ini_nde(Xop_xnde_tag_.Tid_big , "common");
Ini_nde(Xop_xnde_tag_.Tid_small , "common");
Ini_nde(Xop_xnde_tag_.Tid_strike , "common");
Ini_nde(Xop_xnde_tag_.Tid_s , "common");
Ini_nde(Xop_xnde_tag_.Tid_u , "common");
Ini_nde(Xop_xnde_tag_.Tid_font , "common", "size", "color", "face");
Ini_nde(Xop_xnde_tag_.Tid_hr , "common", "noshade", "size", "width");
Ini_nde(Xop_xnde_tag_.Tid_ruby , "common");
Ini_nde(Xop_xnde_tag_.Tid_rb , "common");
Ini_nde(Xop_xnde_tag_.Tid_rt , "common");
Ini_nde(Xop_xnde_tag_.Tid_rp , "common");
Ini_nde(Xop_xnde_tag_.Tid_math , "class", "style", "id", "title");
Ini_nde(Xop_xnde_tag_.Tid_time , "class", "datetime");
Ini_nde(Xop_xnde_tag_.Tid_bdi , "common");
Ini_nde(Xop_xnde_tag_.Tid_data , "common", "value");
Ini_nde(Xop_xnde_tag_.Tid_mark , "common");
Ini_nde(Xop_xnde_tag_.Tid_q , "common");
Ini_all_loose("data");
return this;
}
private Hash_adp_bry grp_hash = Hash_adp_bry.cs_();
private void Ini_grp(String key_str, String base_grp, String... cur_itms) {
byte[][] itms = Bry_.Ary(cur_itms);
if (base_grp != null)
itms = Bry_.Ary_add(itms, (byte[][])grp_hash.Get_by_bry(Bry_.new_a7(base_grp)));
byte[] key = Bry_.new_a7(key_str);
grp_hash.Add_bry_obj(key, itms);
}
private void Ini_nde(int tag_tid, String... key_strs) {
List_adp keys = List_adp_.new_();
int len = key_strs.length;
for (int i = 0; i < len; i++) {
byte[] key = Bry_.new_a7(key_strs[i]);
Object grp_obj = grp_hash.Get_by_bry(key); // is the key a grp? EX: "common"
if (grp_obj == null)
keys.Add(key);
else {
byte[][] grp_keys = (byte[][])grp_obj;
int grp_keys_len = grp_keys.length;
for (int j = 0; j < grp_keys_len; j++)
keys.Add(grp_keys[j]);
}
}
len = keys.Count();
for (int i = 0; i < len; i++) {
byte[] key_bry = (byte[])keys.Get_at(i);
Xop_xatr_whitelist_itm itm = (Xop_xatr_whitelist_itm)key_trie.Match_exact(key_bry, 0, key_bry.length);
if (itm == null) {
itm = Ini_key_trie_add(key_bry, true);
key_trie.Add_obj(key_bry, itm);
}
itm.Tags()[tag_tid] = 1;
}
}
private void Ini_all_loose(String key_str) {
byte[] key_bry = Bry_.new_a7(key_str);
Ini_key_trie_add(key_bry, false);
Xop_xatr_whitelist_itm itm = Ini_key_trie_add(key_bry, false);
key_trie.Add_obj(key_bry, itm);
int len = Xop_xnde_tag_._MaxLen;
for (int i = 0; i < len; i++)
itm.Tags()[i] = 1;
}
private Xop_xatr_whitelist_itm Ini_key_trie_add(byte[] key, boolean exact) {
Object key_tid_obj = tid_hash.Get_by(key);
byte key_tid = key_tid_obj == null ? Xop_xatr_itm.Key_tid_generic : ((Byte_obj_val)key_tid_obj).Val();
Xop_xatr_whitelist_itm rv = new Xop_xatr_whitelist_itm(key, key_tid, exact);
key_trie.Add_obj(key, rv);
return rv;
}
private Hash_adp_bry tid_hash = Hash_adp_bry.ci_ascii_()
.Add_str_byte("id", Xop_xatr_itm.Key_tid_id)
.Add_str_byte("style", Xop_xatr_itm.Key_tid_style)
.Add_str_byte("role", Xop_xatr_itm.Key_tid_role)
;
private Btrie_slim_mgr key_trie = Btrie_slim_mgr.ci_ascii_(); // NOTE:ci.ascii:HTML.node_name
public boolean Scrub_style(Xop_xatr_itm xatr, byte[] raw) { // REF:Sanitizer.php|checkCss; '! expression | filter\s*: | accelerator\s*: | url\s*\( !ix'; NOTE: this seems to affect MS IE only; DATE:2013-04-01
byte[] val_bry = xatr.Val_bry();
byte[] chk_bry; int chk_bgn, chk_end;
if (val_bry == null) {
chk_bry = raw;
chk_bgn = xatr.Val_bgn();
chk_end = xatr.Val_end();
if (chk_end - chk_bgn == 0) return true; // no val; nothing to scrub; return true
}
else { // val_bry specified manually; EX: "id=<nowiki>1</nowiki>" has a manual val_bry of "1"
chk_bry = val_bry;
chk_bgn = 0;
chk_end = val_bry.length;
}
int pos = chk_bgn;
while (pos < chk_end) {
Object o = style_trie.Match_bgn(chk_bry, pos, chk_end);
if (o == null)
++pos;
else {
pos = style_trie.Match_pos();
byte style_tid = ((Byte_obj_val)o).Val();
switch (style_tid) {
case Style_expression:
xatr.Val_bry_(Bry_.Empty);
return false;
case Style_filter:
case Style_accelerator:
if (Next_non_ws_byte(chk_bry, pos, chk_end) == Byte_ascii.Colon) {
xatr.Val_bry_(Bry_.Empty);
return false;
}
break;
case Style_url:
case Style_urls:
case Style_image:
case Style_image_set:
if (Next_non_ws_byte(chk_bry, pos, chk_end) == Byte_ascii.Paren_bgn) {
xatr.Val_bry_(Bry_.Empty);
return false;
}
break;
}
}
}
return true;
}
byte Next_non_ws_byte(byte[] raw, int bgn, int end) {
for (int i = bgn; i < end; i++) {
byte b = raw[i];
switch (b) {
case Byte_ascii.Space:
case Byte_ascii.Tab:
case Byte_ascii.Cr:
case Byte_ascii.Nl:
break;
default:
return b;
}
}
return Byte_ascii.Nil;
}
static final byte Style_expression = 0, Style_filter = 1, Style_accelerator = 2, Style_url = 3, Style_urls = 4, Style_comment = 5, Style_image = 6, Style_image_set = 7;
private static Btrie_slim_mgr style_trie = Btrie_slim_mgr.ci_ascii_() // NOTE:ci.ascii:Javascript
.Add_str_byte("expression" , Style_expression)
.Add_str_byte("filter" , Style_filter)
.Add_str_byte("accelerator" , Style_accelerator)
.Add_str_byte("url" , Style_url)
.Add_str_byte("urls" , Style_urls)
.Add_str_byte("image" , Style_image)
.Add_str_byte("image-set" , Style_image_set)
.Add_str_byte("/*" , Style_comment)
;
private static final byte[] Val_role_presentation = Bry_.new_a7("presentation");
}
class Xop_xatr_whitelist_itm {
public Xop_xatr_whitelist_itm(byte[] key, byte key_tid, boolean exact) {this.key = key; this.key_tid = key_tid; this.exact = exact;}
public byte[] Key() {return key;} private byte[] key;
public byte Key_tid() {return key_tid;} private byte key_tid;
public boolean Exact() {return exact;} private boolean exact;
public byte[] Tags() {return tags;} private byte[] tags = new byte[Xop_xnde_tag_._MaxLen];
}

View File

@@ -0,0 +1,71 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xatr_whitelist_mgr_tst {
Xop_xatr_whitelist_fxt fxt = new Xop_xatr_whitelist_fxt();
@Before public void init() {fxt.Clear();}
@Test public void Basic() {
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "style" , true);
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "xstyle" , false);
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "stylex" , false);
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "styl" , false);
fxt.Whitelist(Xop_xnde_tag_.Tid_img , "alt" , true);
fxt.Whitelist(Xop_xnde_tag_.Tid_img , "span" , false);
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "data-sort-type" , true);
fxt.Whitelist(Xop_xnde_tag_.Tid_data , "value" , true);
fxt.Whitelist(Xop_xnde_tag_.Tid_data , "valuex" , false);
}
@Test public void Role() {
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "role" , "presentation", true);
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "role" , "other", false);
}
@Test public void Scrub() {
fxt.Scrub_style_fail("expression");
fxt.Scrub_style_fail("filter:a");
fxt.Scrub_style_fail("filter\t \n:a");
fxt.Scrub_style_fail("accelerator:a");
fxt.Scrub_style_fail("url()");
fxt.Scrub_style_fail("urls()");
fxt.Scrub_style_pass("filterx");
}
}
class Xop_xatr_whitelist_fxt {
public void Clear() {
if (whitelist_mgr == null) whitelist_mgr = new Xop_xatr_whitelist_mgr().Ini();
} private Xop_xatr_whitelist_mgr whitelist_mgr;
public void Whitelist(byte tag_id, String key_str, boolean expd) {
byte[] key_bry = Bry_.new_a7(key_str);
atr_itm.Key_rng_(0, key_bry.length);
Tfds.Eq(expd, whitelist_mgr.Chk(tag_id, key_bry, atr_itm), key_str);
} private Xop_xatr_itm atr_itm = new Xop_xatr_itm(0, 0);
public void Whitelist(byte tag_id, String key_str, String val_str, boolean expd) {
byte[] key_bry = Bry_.new_a7(key_str);
atr_itm.Key_rng_(0, key_bry.length);
atr_itm.Val_bry_(Bry_.new_a7(val_str));
Tfds.Eq(expd, whitelist_mgr.Chk(tag_id, key_bry, atr_itm), key_str);
}
public void Scrub_style_pass(String style_val_str) {Scrub_style(style_val_str, style_val_str);}
public void Scrub_style_fail(String val_str) {Scrub_style(val_str, "");}
public void Scrub_style(String val_str, String expd) {
byte[] val_bry = Bry_.new_a7(val_str);
atr_itm.Val_bry_(val_bry);
whitelist_mgr.Scrub_style(atr_itm, val_bry);
Tfds.Eq(expd, String_.new_a7(atr_itm.Val_bry()));
}
}

View File

@@ -0,0 +1,21 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public interface Xop_xnde_atr_parser {
void Xatr_parse(Xowe_wiki wiki, byte[] src, Xop_xatr_itm xatr, Object xatr_key_obj);
}

View File

@@ -0,0 +1,26 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*;
class Xop_xnde_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_xnde;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Lt, this);}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Xnde().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
public static final Xop_xnde_lxr _ = new Xop_xnde_lxr(); Xop_xnde_lxr() {}
}

View File

@@ -0,0 +1,81 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.primitives.*;
public class Xop_xnde_tag {
public Xop_xnde_tag(int id, String name_str) { // NOTE: should only be used by Xop_xnde_tag_
this.id = id;
this.name_bry = Bry_.new_a7(name_str);
this.name_str = name_str;
name_len = name_bry.length;
xtn_end_tag = Bry_.Add(Xop_xnde_tag_.XtnEndTag_bgn, name_bry); // always force endtag; needed for <noinclude>
xtn_end_tag_tmp = new byte[xtn_end_tag.length]; Array_.Copy(xtn_end_tag, xtn_end_tag_tmp);
}
public int Id() {return id;} public Xop_xnde_tag Id_(int v) {id = v; return this;} private int id;
public byte[] Name_bry() {return name_bry;} private byte[] name_bry;
public String Name_str() {return name_str;} private String name_str;
public int Name_len() {return name_len;} private int name_len;
public boolean Xtn() {return xtn;} public Xop_xnde_tag Xtn_() {xtn = true; return this;} private boolean xtn;
public byte[] XtnEndTag() {return xtn_end_tag;} private byte[] xtn_end_tag;
public byte[] XtnEndTag_tmp() {return xtn_end_tag_tmp;} private byte[] xtn_end_tag_tmp;
public int BgnNdeMode() {return bgnNdeMode;} private int bgnNdeMode = Xop_xnde_tag_.BgnNdeMode_normal;
public Xop_xnde_tag BgnNdeMode_inline_() {bgnNdeMode = Xop_xnde_tag_.BgnNdeMode_inline; return this;}
public int EndNdeMode() {return endNdeMode;} private int endNdeMode = Xop_xnde_tag_.EndNdeMode_normal;
public Xop_xnde_tag EndNdeMode_inline_() {endNdeMode = Xop_xnde_tag_.EndNdeMode_inline; return this;}
public Xop_xnde_tag EndNdeMode_escape_() {endNdeMode = Xop_xnde_tag_.EndNdeMode_escape; return this;}
public boolean SingleOnly() {return singleOnly;} public Xop_xnde_tag SingleOnly_() {singleOnly = true; return this;} private boolean singleOnly;
public boolean TblSub() {return tblSub;} public Xop_xnde_tag TblSub_() {tblSub = true; return this;} private boolean tblSub;
public boolean Restricted() {return restricted;} public Xop_xnde_tag Restricted_() {restricted = true; return this;} private boolean restricted;
public boolean NoInline() {return noInline;} public Xop_xnde_tag NoInline_() {noInline = true; return this;} private boolean noInline;
public boolean Inline_by_backslash() {return inline_by_backslash;} public Xop_xnde_tag Inline_by_backslash_() {inline_by_backslash = true; return this;} private boolean inline_by_backslash;
public boolean Section() {return section;} public Xop_xnde_tag Section_() {section = true; return this;} private boolean section;
public boolean Repeat_ends() {return repeat_ends;} public Xop_xnde_tag Repeat_ends_() {repeat_ends = true; return this;} private boolean repeat_ends;
public boolean Repeat_mids() {return repeat_mids;} public Xop_xnde_tag Repeat_mids_() {repeat_mids = true; return this;} private boolean repeat_mids;
public boolean Empty_ignored() {return empty_ignored;} public Xop_xnde_tag Empty_ignored_() {empty_ignored = true; return this;} private boolean empty_ignored;
public boolean Raw() {return raw;} public Xop_xnde_tag Raw_() {raw = true; return this;} private boolean raw;
public static final byte Block_noop = 0, Block_bgn = 1, Block_end = 2;
public byte Block_open() {return block_open;} private byte block_open = Block_noop;
public byte Block_close() {return block_close;} private byte block_close = Block_noop;
public Xop_xnde_tag Block_open_bgn_() {block_open = Block_bgn; return this;} public Xop_xnde_tag Block_open_end_() {block_open = Block_end; return this;}
public Xop_xnde_tag Block_close_bgn_() {block_close = Block_bgn; return this;} public Xop_xnde_tag Block_close_end_() {block_close = Block_end; return this;}
public boolean Xtn_auto_close() {return xtn_auto_close;} public Xop_xnde_tag Xtn_auto_close_() {xtn_auto_close = true; return this;} private boolean xtn_auto_close;
public boolean Ignore_empty() {return ignore_empty;} public Xop_xnde_tag Ignore_empty_() {ignore_empty = true; return this;} private boolean ignore_empty;
public boolean Xtn_skips_template_args() {return xtn_skips_template_args;} public Xop_xnde_tag Xtn_skips_template_args_() {xtn_skips_template_args = true; return this;} private boolean xtn_skips_template_args;
public Ordered_hash Langs() {return langs;} private Ordered_hash langs; private Int_obj_ref langs_key;
public Xop_xnde_tag Langs_(int lang_code, String name) {
if (langs == null) {
langs = Ordered_hash_.new_();
langs_key = Int_obj_ref.neg1_();
}
Xop_xnde_tag_lang lang_tag = new Xop_xnde_tag_lang(lang_code, name);
langs.Add(lang_tag.Lang_code(), lang_tag);
return this;
}
public Xop_xnde_tag_lang Langs_get(gplx.xowa.langs.cases.Xol_case_mgr case_mgr, int cur_lang, byte[] src, int bgn, int end) {
if (langs == null) return Xop_xnde_tag_lang._; // no langs defined; always return true; EX:<b>
if (Bry_.Eq(name_bry, src, bgn, end)) return Xop_xnde_tag_lang._; // canonical name (name_bry) is valid in all langs; EX: <section> and cur_lang=de
synchronized (langs) {
langs_key.Val_(cur_lang);
}
Xop_xnde_tag_lang lang = (Xop_xnde_tag_lang)langs.Get_by(langs_key);
if (lang == null) return null; // cur tag is a lang tag, but no tag for this lang; EX: "<trecho>" and cur_lang=de
return Bry_.Eq_ci_ascii(lang.Name_bry(), src, bgn, end)
? lang
: null;
}
}

View File

@@ -0,0 +1,264 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.xowa.langs.*;
public class Xop_xnde_tag_ {
public static final int EndNdeMode_normal = 0, EndNdeMode_inline = 1, EndNdeMode_escape = 2; // escape is for hr which does not support </hr>
public static final int BgnNdeMode_normal = 0, BgnNdeMode_inline = 1;
public static final byte[] Name_onlyinclude = Bry_.new_a7("onlyinclude");
public static final byte[] XtnEndTag_bgn = Bry_.new_a7("</");//, XtnEndTag_end = Bry_.new_a7(">");
public static final byte
Tid_b = 0
, Tid_strong = 1
, Tid_i = 2
, Tid_em = 3
, Tid_cite = 4
, Tid_dfn = 5
, Tid_var = 6
, Tid_u = 7
, Tid_ins = 8
, Tid_abbr = 9
, Tid_strike = 10
, Tid_del = 11
, Tid_s = 12
, Tid_sub = 13
, Tid_sup = 14
, Tid_big = 15
, Tid_small = 16
, Tid_code = 17
, Tid_tt = 18
, Tid_kbd = 19
, Tid_samp = 20
, Tid_blockquote = 21
, Tid_pre = 22
, Tid_font = 23
, Tid_center = 24
, Tid_p = 25
, Tid_span = 26
, Tid_div = 27
, Tid_hr = 28
, Tid_br = 29
, Tid_h1 = 30
, Tid_h2 = 31
, Tid_h3 = 32
, Tid_h4 = 33
, Tid_h5 = 34
, Tid_h6 = 35
, Tid_li = 36
, Tid_dt = 37
, Tid_dd = 38
, Tid_ol = 39
, Tid_ul = 40
, Tid_dl = 41
, Tid_table = 42
, Tid_tr = 43
, Tid_td = 44
, Tid_th = 45
, Tid_thead = 46
, Tid_tfoot = 47
, Tid_tbody = 48
, Tid_caption = 49
, Tid_colgroup = 50
, Tid_col = 51
, Tid_a = 52
, Tid_img = 53
, Tid_ruby = 54
, Tid_rt = 55
, Tid_rb = 56
, Tid_rp = 57
, Tid_includeonly = 58
, Tid_noinclude = 59
, Tid_onlyinclude = 60
, Tid_nowiki = 61
, Tid_xowa_cmd = 62
, Tid_poem = 63
, Tid_math = 64
, Tid_ref = 65
, Tid_references = 66
, Tid_source = 67
, Tid_syntaxHighlight = 68
, Tid_gallery = 69
, Tid_imageMap = 70
, Tid_timeline = 71
, Tid_hiero = 72
, Tid_inputBox = 73
, Tid_pages = 74
, Tid_section = 75
, Tid_pagequality = 76
, Tid_pagelist = 77
, Tid_categoryList = 78
, Tid_categoryTree = 79
, Tid_dynamicPageList = 80
, Tid_time = 81
, Tid_input = 82
, Tid_textarea = 83
, Tid_score = 84
, Tid_button = 85
, Tid_select = 86
, Tid_option = 87
, Tid_optgroup = 88
, Tid_script = 89
, Tid_style = 90
, Tid_form = 91
, Tid_translate = 92
, Tid_languages = 93
, Tid_templateData = 94
, Tid_bdi = 95
, Tid_data = 96
, Tid_mark = 97
, Tid_wbr = 98
, Tid_bdo = 99
, Tid_listing_buy = 100
, Tid_listing_do = 101
, Tid_listing_drink = 102
, Tid_listing_eat = 103
, Tid_listing_listing = 104
, Tid_listing_see = 105
, Tid_listing_sleep = 106
, Tid_rss = 107
, Tid_xowa_html = 108
, Tid_xowa_tag_bgn = 109
, Tid_xowa_tag_end = 110
, Tid_quiz = 111
, Tid_indicator = 112
, Tid_q = 113
;
public static final int _MaxLen = 114;
public static final Xop_xnde_tag[] Ary = new Xop_xnde_tag[_MaxLen];
private static Xop_xnde_tag new_(int id, String name) {
Xop_xnde_tag rv = new Xop_xnde_tag(id, name);
Ary[id] = rv;
return rv;
}
public static final Xop_xnde_tag
Tag_b = new_(Tid_b, "b").NoInline_()
, Tag_strong = new_(Tid_strong, "strong").NoInline_()
, Tag_i = new_(Tid_i, "i").NoInline_()
, Tag_em = new_(Tid_em, "em").NoInline_()
, Tag_cite = new_(Tid_cite, "cite").NoInline_()
, Tag_dfn = new_(Tid_dfn, "dfn").NoInline_()
, Tag_var = new_(Tid_var, "var").NoInline_()
, Tag_u = new_(Tid_u, "u").NoInline_().Repeat_ends_() // PAGE:en.b:Textbook_of_Psychiatry/Alcoholism_and_Psychoactive_Substance_Use_Disorders; DATE:2014-09-05
, Tag_ins = new_(Tid_ins, "ins").NoInline_()
, Tag_abbr = new_(Tid_abbr, "abbr").NoInline_()
, Tag_strike = new_(Tid_strike, "strike").NoInline_()
, Tag_del = new_(Tid_del, "del").NoInline_()
, Tag_s = new_(Tid_s, "s").NoInline_()
, Tag_sub = new_(Tid_sub, "sub").NoInline_()
, Tag_sup = new_(Tid_sup, "sup").NoInline_()
, Tag_big = new_(Tid_big, "big").NoInline_()
, Tag_small = new_(Tid_small, "small").NoInline_()
, Tag_code = new_(Tid_code, "code").NoInline_().Repeat_ends_()
, Tag_tt = new_(Tid_tt, "tt").NoInline_().Repeat_ends_()
, Tag_kbd = new_(Tid_kbd, "kbd").NoInline_()
, Tag_samp = new_(Tid_samp, "samp").NoInline_()
, Tag_blockquote = new_(Tid_blockquote, "blockquote").NoInline_().Repeat_mids_().Section_().Block_open_bgn_().Block_close_end_() // NOTE: should be open_end_, but leaving for now; DATE:2014-03-11; added Repeat_mids_(); PAGE:en.w:Ring_a_Ring_o'_Roses DATE:2014-06-26
, Tag_pre = new_(Tid_pre, "pre").NoInline_().Section_().Xtn_().Raw_().Block_open_bgn_().Block_close_end_().Ignore_empty_().Xtn_skips_template_args_()
, Tag_font = new_(Tid_font, "font").NoInline_()
, Tag_center = new_(Tid_center, "center").NoInline_().Block_open_end_().Block_close_end_() // removed .Repeat_ends_(); added Nest_(); EX: w:Burr Truss; DATE:2012-12-12
, Tag_p = new_(Tid_p, "p").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_span = new_(Tid_span, "span").Section_()
, Tag_div = new_(Tid_div, "div").Section_().Block_open_end_().Block_close_end_()
, Tag_hr = new_(Tid_hr, "hr").SingleOnly_().BgnNdeMode_inline_().Inline_by_backslash_().EndNdeMode_escape_().Section_().Block_close_end_()
, Tag_br = new_(Tid_br, "br").SingleOnly_().BgnNdeMode_inline_().Inline_by_backslash_().EndNdeMode_inline_().Section_()
, Tag_h1 = new_(Tid_h1, "h1").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h2 = new_(Tid_h2, "h2").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h3 = new_(Tid_h3, "h3").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h4 = new_(Tid_h4, "h4").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h5 = new_(Tid_h5, "h5").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h6 = new_(Tid_h6, "h6").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_li = new_(Tid_li, "li").Repeat_mids_().Empty_ignored_().Block_open_bgn_().Block_close_end_()
, Tag_dt = new_(Tid_dt, "dt").Repeat_mids_()
, Tag_dd = new_(Tid_dd, "dd").Repeat_mids_()
, Tag_ol = new_(Tid_ol, "ol").NoInline_().Block_open_bgn_().Block_close_end_()
, Tag_ul = new_(Tid_ul, "ul").NoInline_().Block_open_bgn_().Block_close_end_()
, Tag_dl = new_(Tid_dl, "dl").NoInline_()
, Tag_table = new_(Tid_table, "table").NoInline_().Block_open_bgn_().Block_close_end_()
, Tag_tr = new_(Tid_tr, "tr").TblSub_().Block_open_bgn_().Block_open_end_()
, Tag_td = new_(Tid_td, "td").TblSub_().Block_open_end_().Block_close_bgn_()
, Tag_th = new_(Tid_th, "th").TblSub_().Block_open_end_().Block_close_bgn_()
, Tag_thead = new_(Tid_thead, "thead")
, Tag_tfoot = new_(Tid_tfoot, "tfoot")
, Tag_tbody = new_(Tid_tbody, "tbody")
, Tag_caption = new_(Tid_caption, "caption").NoInline_().TblSub_()
, Tag_colgroup = new_(Tid_colgroup, "colgroup")
, Tag_col = new_(Tid_col, "col")
, Tag_a = new_(Tid_a, "a").Restricted_()
, Tag_img = new_(Tid_img, "img").Restricted_() // NOTE: was .Xtn() DATE:2014-11-06
, Tag_ruby = new_(Tid_ruby, "ruby").NoInline_()
, Tag_rt = new_(Tid_rt, "rt").NoInline_()
, Tag_rb = new_(Tid_rb, "rb").NoInline_()
, Tag_rp = new_(Tid_rp, "rp").NoInline_()
, Tag_includeonly = new_(Tid_includeonly, "includeonly")
, Tag_noinclude = new_(Tid_noinclude, "noinclude")
, Tag_onlyinclude = new_(Tid_onlyinclude, "onlyinclude")
, Tag_nowiki = new_(Tid_nowiki, "nowiki")
, Tag_xowa_cmd = new_(Tid_xowa_cmd, "xowa_cmd").Xtn_()
, Tag_poem = new_(Tid_poem, "poem").Xtn_().Xtn_auto_close_()
, Tag_math = new_(Tid_math, "math").Xtn_()
, Tag_ref = new_(Tid_ref, "ref").Xtn_()
, Tag_references = new_(Tid_references, "references").Xtn_()
, Tag_source = new_(Tid_source, "source").Xtn_().Block_open_bgn_().Block_close_end_() // deactivate pre; pre; PAGE:en.w:Comment_(computer_programming); DATE:2014-06-23
, Tag_syntaxHighlight = new_(Tid_syntaxHighlight, "syntaxHighlight").Xtn_().Block_open_bgn_().Block_close_end_() // deactivate pre; pre; PAGE:en.w:Comment_(computer_programming); DATE:2014-06-23
, Tag_gallery = new_(Tid_gallery, "gallery").Xtn_().Block_open_bgn_().Block_close_end_().Xtn_auto_close_()
, Tag_imageMap = new_(Tid_imageMap, "imageMap").Xtn_()
, Tag_timeline = new_(Tid_timeline, "timeline").Xtn_()
, Tag_hiero = new_(Tid_hiero, "hiero").Xtn_()
, Tag_inputBox = new_(Tid_inputBox, "inputBox").Xtn_()
, Tag_pages = new_(Tid_pages, "pages").Xtn_()
, Tag_section = new_(Tid_section, "section").Xtn_().Langs_(Xol_lang_itm_.Id_de, "Abschnitt").Langs_(Xol_lang_itm_.Id_he, "קטע").Langs_(Xol_lang_itm_.Id_pt, "trecho") // DATE:2014-07-18
, Tag_pagequality = new_(Tid_pagequality, "pagequality").Xtn_()
, Tag_pagelist = new_(Tid_pagelist, "pagelist").Xtn_()
, Tag_categoryList = new_(Tid_categoryList, "categoryList").Xtn_()
, Tag_categoryTree = new_(Tid_categoryTree, "categoryTree").Xtn_()
, Tag_dynamicPageList = new_(Tid_dynamicPageList, "dynamicPageList").Xtn_()
, Tag_time = new_(Tid_time, "time")
, Tag_input = new_(Tid_input, "input").Restricted_()
, Tag_textarea = new_(Tid_textarea, "textarea").Restricted_()
, Tag_score = new_(Tid_score, "score").Xtn_()
, Tag_button = new_(Tid_button, "button").Restricted_()
, Tag_select = new_(Tid_select, "select").Restricted_()
, Tag_option = new_(Tid_option, "option").Restricted_()
, Tag_optgroup = new_(Tid_optgroup, "optgroup").Restricted_()
, Tag_script = new_(Tid_script, "script").Restricted_() // NOTE: had ".Block_open_bgn_().Block_close_end_()"; PAGE:en.w:Cascading_Style_Sheets DATE:2014-06-23
, Tag_style = new_(Tid_style, "style").Restricted_() // NOTE: had ".Block_open_bgn_().Block_close_end_()"; PAGE:en.w:Cascading_Style_Sheets DATE:2014-06-23
, Tag_form = new_(Tid_form, "form").Restricted_() // NOTE: had ".Block_open_bgn_().Block_close_end_()"; PAGE:en.w:Cascading_Style_Sheets DATE:2014-06-23
, Tag_translate = new_(Tid_translate, "translate").Xtn_()
, Tag_languages = new_(Tid_languages, "languages").Xtn_()
, Tag_templateData = new_(Tid_templateData, "templateData").Xtn_()
, Tag_bdi = new_(Tid_bdi, "bdi")
, Tag_data = new_(Tid_data, "data")
, Tag_mark = new_(Tid_mark, "mark")
, Tag_wbr = new_(Tid_wbr, "wbr").SingleOnly_()
, Tag_bdo = new_(Tid_bdo, "bdo").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_listing_buy = new_(Tid_listing_buy, "buy").Xtn_()
, Tag_listing_do = new_(Tid_listing_do, "do").Xtn_()
, Tag_listing_drink = new_(Tid_listing_drink, "drink").Xtn_()
, Tag_listing_eat = new_(Tid_listing_eat, "eat").Xtn_()
, Tag_listing_listing = new_(Tid_listing_listing, "listing").Xtn_()
, Tag_listing_see = new_(Tid_listing_see, "see").Xtn_()
, Tag_listing_sleep = new_(Tid_listing_sleep, "sleep").Xtn_()
, Tag_rss = new_(Tid_rss, "rss").Xtn_()
, Tag_xowa_html = new_(Tid_xowa_html, "xowa_html").Xtn_()
, Tag_xowa_tag_bgn = new_(Tid_xowa_tag_bgn, "xtag_bgn").Xtn_()
, Tag_xowa_tag_end = new_(Tid_xowa_tag_end, "xtag_end").Xtn_()
, Tag_quiz = new_(Tid_quiz, "quiz").Xtn_()
, Tag_indicator = new_(Tid_indicator, "indicator").Xtn_()
, Tag_q = new_(Tid_q, "q")
;
}

View File

@@ -0,0 +1,32 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.primitives.*;
public class Xop_xnde_tag_lang {
public Xop_xnde_tag_lang(int lang_code_int, String name_str) {
lang_code = Int_obj_ref.new_(lang_code_int);
this.name_str = name_str;
this.name_bry = Bry_.new_u8(name_str);
this.xtnEndTag_tmp = Bry_.Add(Xop_xnde_tag_.XtnEndTag_bgn, name_bry);
}
public Int_obj_ref Lang_code() {return lang_code;} private Int_obj_ref lang_code;
public String Name_str() {return name_str;} private String name_str;
public byte[] Name_bry() {return name_bry;} private byte[] name_bry;
public byte[] XtnEndTag_tmp() {return xtnEndTag_tmp;} private byte[] xtnEndTag_tmp;
public static final Xop_xnde_tag_lang _ = new Xop_xnde_tag_lang(-1, String_.Empty);
}

View File

@@ -0,0 +1,61 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*;
public class Xop_xnde_tag_regy {
public Btrie_slim_mgr XndeNames(int i) {
if (nild) {Init(); nild = false;}
switch (i) {
case Xop_parser_.Parse_tid_tmpl: return tag_regy_tmpl;
case Xop_parser_.Parse_tid_page_tmpl: return tag_regy_wiki_tmpl;
case Xop_parser_.Parse_tid_page_wiki: return tag_regy_wiki_main;
default: return tag_regy_wiki_tmpl; //throw Exc_.new_unhandled(i);
}
} boolean nild = true;
public void Init() {
Init_reg(tag_regy_tmpl , FilterXtns(Xop_xnde_tag_.Ary, Xop_xnde_tag_.Tag_includeonly, Xop_xnde_tag_.Tag_noinclude, Xop_xnde_tag_.Tag_onlyinclude, Xop_xnde_tag_.Tag_nowiki));
Init_reg(tag_regy_wiki_tmpl , FilterXtns(Xop_xnde_tag_.Ary, Xop_xnde_tag_.Tag_includeonly, Xop_xnde_tag_.Tag_noinclude, Xop_xnde_tag_.Tag_onlyinclude, Xop_xnde_tag_.Tag_nowiki));
Init_reg(tag_regy_wiki_main , Xop_xnde_tag_.Ary);
}
private Xop_xnde_tag[] FilterXtns(Xop_xnde_tag[] ary, Xop_xnde_tag... more) {
List_adp rv = List_adp_.new_();
for (Xop_xnde_tag itm : ary)
if (itm.Xtn()) rv.Add(itm);
for (Xop_xnde_tag itm : more)
rv.Add(itm);
return (Xop_xnde_tag[])rv.To_ary(Xop_xnde_tag.class);
}
private void Init_reg(Btrie_slim_mgr tag_regy, Xop_xnde_tag... ary) {
for (Xop_xnde_tag tag : ary) {
tag_regy.Add_obj(tag.Name_bry(), tag);
Ordered_hash langs = tag.Langs();
if (langs != null) { // tag has langs; EX: <section>; DATE:2014-07-18
int langs_len = langs.Count();
for (int i = 0; i < langs_len; ++i) { // register each lang's tag; EX:"<Abschnitt>", "<trecho>"
Xop_xnde_tag_lang lang = (Xop_xnde_tag_lang)langs.Get_at(i);
tag_regy.Add_obj(lang.Name_bry(), tag);
}
}
}
}
private Btrie_slim_mgr
tag_regy_wiki_main = Btrie_slim_mgr.ci_utf_8_() // NOTE:ci.utf8; he.s and <section> alias DATE:2014-07-18
, tag_regy_wiki_tmpl = Btrie_slim_mgr.ci_utf_8_()
, tag_regy_tmpl = Btrie_slim_mgr.ci_utf_8_()
;
}

View File

@@ -0,0 +1,35 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_xnde_tag_stack {
public void Push() {xmlTagsStack.Add(xmlTags); xmlTags = new int[Xop_xnde_tag_._MaxLen];}
public void Pop() {xmlTags = (int[])List_adp_.Pop(xmlTagsStack);}
public boolean Has(int id) {return xmlTags[id] != 0;}
public void Add(int id) {++xmlTags[id];}
public void Del(int id) {
int val = --xmlTags[id];
if (val == -1) xmlTags[id] = 0;
}
public void Clear() {
for (int i = 0; i < Xop_xnde_tag_._MaxLen; i++)
xmlTags[i] = 0;
xmlTagsStack.Clear();
}
List_adp xmlTagsStack = List_adp_.new_();
int[] xmlTags = new int[Xop_xnde_tag_._MaxLen];
}

View File

@@ -0,0 +1,119 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.xowa.xtns.*; import gplx.xowa.parsers.tblws.*;
public class Xop_xnde_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_xnde;}
public int Tblw_tid() {return tag.Id();} // NOTE: tblw tkns actually return xnde as Tblw_tid
public boolean Tblw_xml() {return true;}
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
public byte CloseMode() {return closeMode;} public Xop_xnde_tkn CloseMode_(byte v) {closeMode = v; return this;} private byte closeMode = Xop_xnde_tkn.CloseMode_null;
public boolean Tag_visible() {return tag_visible;} public Xop_xnde_tkn Tag_visible_(boolean v) {tag_visible = v; return this;} private boolean tag_visible = true;
public int Name_bgn() {return name_bgn;} public Xop_xnde_tkn Name_bgn_(int v) {name_bgn = v; return this;} private int name_bgn = -1;
public int Name_end() {return name_end;} public Xop_xnde_tkn Name_end_(int v) {name_end = v; return this;} private int name_end = -1;
public Xop_xnde_tkn Name_rng_(int bgn, int end) {name_bgn = bgn; name_end = end; return this;}
public int Atrs_bgn() {return atrs_bgn;} public Xop_xnde_tkn Atrs_bgn_(int v) {atrs_bgn = v; return this;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
public int Atrs_end() {return atrs_end;} public Xop_xnde_tkn Atrs_end_(int v) {atrs_end = v; return this;} private int atrs_end = Xop_tblw_wkr.Atrs_null;
public Xop_xnde_tkn Atrs_rng_(int bgn, int end) {atrs_bgn = bgn; atrs_end = end; return this;}
public void Atrs_rng_set(int bgn, int end) {Atrs_rng_(bgn, end);}
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;}
public Xop_xnde_tkn Atrs_ary_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;}
public Xop_xnde_tag Tag() {return tag;} public Xop_xnde_tkn Tag_(Xop_xnde_tag v) {tag = v; return this;} private Xop_xnde_tag tag;
public int Tag_open_bgn() {return tag_open_bgn;} private int tag_open_bgn = Int_.Null;
public int Tag_open_end() {return tag_open_end;} private int tag_open_end = Int_.Null;
public Xop_xnde_tkn Tag_open_rng_(int bgn, int end) {this.tag_open_bgn = bgn; this.tag_open_end = end; return this;}
public int Tag_close_bgn() {return tag_close_bgn;} private int tag_close_bgn = Int_.Null;
public int Tag_close_end() {return tag_close_end;} private int tag_close_end = Int_.Null;
public Xop_xnde_tkn Tag_close_rng_(int bgn, int end) {this.tag_close_bgn = bgn; this.tag_close_end = end; return this;}
public Xop_xnde_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) Subs_add(itm); return this;}
public Xox_xnde Xnde_xtn() {return xnde_xtn;} public Xop_xnde_tkn Xnde_xtn_(Xox_xnde v) {xnde_xtn = v; return this;} private Xox_xnde xnde_xtn;
@Override public void Tmpl_compile(Xop_ctx ctx, byte[] src, Xot_compile_data prep_data) {
switch (tag.Id()) {
case Xop_xnde_tag_.Tid_noinclude: // NOTE: prep_mode is false to force recompile; see Ex_Tmpl_noinclude and {{{1<ni>|a</ni>}}}
case Xop_xnde_tag_.Tid_includeonly: // NOTE: changed to always ignore <includeonly>; DATE:2014-05-10
break;
case Xop_xnde_tag_.Tid_nowiki: {
int subs_len = this.Subs_len();
for (int i = 0; i < subs_len; i++) {
Xop_tkn_itm sub = this.Subs_get(i);
sub.Tmpl_compile(ctx, src, prep_data);
}
break;
}
case Xop_xnde_tag_.Tid_onlyinclude: {
int subs_len = this.Subs_len();
for (int i = 0; i < subs_len; i++) {
Xop_tkn_itm sub = this.Subs_get(i);
sub.Tmpl_compile(ctx, src, prep_data);
}
prep_data.OnlyInclude_exists = true;
break;
}
default: {
int subs_len = this.Subs_len();
for (int i = 0; i < subs_len; i++) {
Xop_tkn_itm sub = this.Subs_get(i);
sub.Tmpl_compile(ctx, src, prep_data);
}
break; // can happen in compile b/c invks are now being compiled
}
}
}
@Override public boolean Tmpl_evaluate(Xop_ctx ctx, byte[] src, Xot_invk caller, Bry_bfr bfr) {
int subs_len = this.Subs_len();
switch (tag.Id()) {
case Xop_xnde_tag_.Tid_noinclude: // do not evaluate subs
break;
case Xop_xnde_tag_.Tid_includeonly: // evaluate subs
if (!ctx.Only_include_evaluate()) {
for (int i = 0; i < subs_len; i++)
this.Subs_get(i).Tmpl_evaluate(ctx, src, caller, bfr);
}
break;
case Xop_xnde_tag_.Tid_nowiki: // evaluate subs; add tags
bfr.Add_byte(Byte_ascii.Lt).Add(Xop_xnde_tag_.Tag_nowiki.Name_bry()).Add_byte(Byte_ascii.Gt);
for (int i = 0; i < subs_len; i++)
this.Subs_get(i).Tmpl_evaluate(ctx, src, caller, bfr);
bfr.Add_byte(Byte_ascii.Lt).Add_byte(Byte_ascii.Slash).Add(Xop_xnde_tag_.Tag_nowiki.Name_bry()).Add_byte(Byte_ascii.Gt);
break;
case Xop_xnde_tag_.Tid_onlyinclude: // evaluate subs but toggle onlyinclude flag on/off
// boolean prv_val = ctx.Onlyinclude_enabled;
// ctx.Onlyinclude_enabled = false;
for (int i = 0; i < subs_len; i++)
this.Subs_get(i).Tmpl_evaluate(ctx, src, caller, bfr);
// ctx.Onlyinclude_enabled = prv_val;
break;
default: // ignore tags except for xtn; NOTE: Xtn tags are part of tagRegy_wiki_tmpl stage
if (tag.Xtn()) {
bfr.Add_mid(src, tag_open_bgn, tag_open_end); // write tag_bgn
for (int i = 0; i < subs_len; i++) // always evaluate subs; handle <poem>{{{1}}}</poem>; DATE:2014-03-03
this.Subs_get(i).Tmpl_evaluate(ctx, src, caller, bfr);
bfr.Add_mid(src, tag_close_bgn, tag_close_end); // write tag_end
if (tag_close_bgn == Int_.MinValue) {// xtn is unclosed; add a </xtn> else rest of page will be gobbled; PAGE:en.w:Provinces_and_territories_of_Canada DATE:2014-11-13
bfr.Add(tag.XtnEndTag());
bfr.Add(Byte_ascii.Gt_bry);
}
}
break;
}
return true;
}
public static Xop_xnde_tkn new_() {return new Xop_xnde_tkn();} private Xop_xnde_tkn() {}
public static final byte CloseMode_null = 0, CloseMode_inline = 1, CloseMode_pair = 2, CloseMode_open = 3;
}

View File

@@ -0,0 +1,757 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*; import gplx.xowa.apps.progs.*; import gplx.xowa.wikis.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.strings.*; import gplx.html.*;
import gplx.xowa.parsers.logs.*; import gplx.xowa.parsers.tblws.*;
public class Xop_xnde_wkr implements Xop_ctx_wkr {
public void Ctor_ctx(Xop_ctx ctx) {}
public boolean Pre_at_bos() {return pre_at_bos;} public void Pre_at_bos_(boolean v) {pre_at_bos = v;} private boolean pre_at_bos;
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {this.Clear();}
private void Clear() {
pre_at_bos = false;
}
public void AutoClose(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn, int closing_tkn_tid) {
Xop_xnde_tkn xnde = (Xop_xnde_tkn)tkn;
xnde.Src_end_(src_len);
xnde.Subs_move(root); // NOTE: ctx.Root used to be root which was a member variable; DATE:2013-12-11
if (closing_tkn_tid == Xop_tkn_itm_.Tid_lnki_end) Xop_xnde_wkr_.AutoClose_handle_dangling_nde_in_caption(root, tkn); // PAGE:sr.w:Сићевачка_клисура; DATE:2014-07-03
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Dangling_xnde, src, xnde.Src_bgn(), xnde.Name_end()); // NOTE: xnde.Src_bgn to start at <; xnde.Name_end b/c xnde.Src_end is -1
}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
if (bgn_pos == Xop_parser_.Doc_bgn_bos) {
bgn_pos = 0; // do not allow -1 pos
}
if (cur_pos == src_len) return ctx.Lxr_make_txt_(src_len); // "<" is last char in page; strange, but don't raise error;
Xop_tkn_itm last_tkn = ctx.Stack_get_last(); // BLOCK:invalid_ttl_check
if ( last_tkn != null
&& last_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) {
Xop_lnki_tkn lnki = (Xop_lnki_tkn)last_tkn;
if ( lnki.Pipe_count_is_zero()
// && !Xop_lnki_wkr_.Parse_ttl(ctx, src, lnki, bgn_pos) // NOTE: no ttl parse check; <xnde> in ttl is automatically invalid; EX: [[a<b>c</b>|d]]; "a" is valid ttl, but "a<b>c</b>" is not
) {
ctx.Stack_pop_last();
return Xop_lnki_wkr_.Invalidate_lnki(ctx, src, root, lnki, bgn_pos);
}
}
// find >
byte cur_byt = src[cur_pos];
boolean tag_is_closing = false;
if (cur_byt == Byte_ascii.Slash) { // "</" encountered (note that < enters this frame)
++cur_pos;
if (cur_pos == src_len) return ctx.Lxr_make_txt_(src_len); // "</" are last chars on page; strange, but don't raise error;
cur_byt = src[cur_pos];
tag_is_closing = true;
}
Btrie_slim_mgr tag_trie = ctx.App().Xnde_tag_regy().XndeNames(ctx.Xnde_names_tid());
Object tag_obj = tag_trie.Match_bgn_w_byte(cur_byt, src, cur_pos, src_len); // NOTE:tag_obj can be null in wiki_tmpl mode; EX: "<ul" is not a valid tag in wiki_tmpl, but is valid in wiki_main
int atrs_bgn_pos = tag_trie.Match_pos();
int name_bgn = cur_pos, name_end = atrs_bgn_pos;
int tag_end_pos = atrs_bgn_pos - 1;
if (tag_obj != null) {
if (atrs_bgn_pos >= src_len) return ctx.Lxr_make_txt_(atrs_bgn_pos); // truncated tag; EX: "<br"
switch (src[atrs_bgn_pos]) { // NOTE: not sure about rules; Preprocessor_DOM.php calls preg_match on $elementsRegex which seems to break on word boundaries; $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
++atrs_bgn_pos; // set bgn_pos to be after ws
break;
case Byte_ascii.Slash: case Byte_ascii.Gt:
++atrs_bgn_pos; // set bgn_pos to be after char
break;
case Byte_ascii.Backslash:
++tag_end_pos;
break;
case Byte_ascii.Dollar:// handles <br$2>;
default: // allow all other symbols by defaults
break;
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
tag_obj = null;
break;
}
}
boolean ctx_cur_tid_is_tblw_atr_owner = false;
switch (ctx.Cur_tkn_tid()) {
case Xop_tkn_itm_.Tid_tblw_tb: case Xop_tkn_itm_.Tid_tblw_tr: case Xop_tkn_itm_.Tid_tblw_th:
ctx_cur_tid_is_tblw_atr_owner = true;
break;
}
if (tag_obj == null) { // not a known xml tag; EX: "<abcd>"; "if 5 < 7 then"
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki) {
if (ctx_cur_tid_is_tblw_atr_owner) // unknown_tag is occurring inside tblw element (EX: {| style='margin:1em<f'); just add to txt tkn
return ctx.Lxr_make_txt_(cur_pos);
else { // unknown_tag is occurring anywhere else; escape < to &lt; and resume from character just after it;
ctx.Subs_add(root, Make_bry_tkn(tkn_mkr, src, bgn_pos, cur_pos));
return cur_pos;
}
}
else {
if (ctx_cur_tid_is_tblw_atr_owner) Xop_tblw_wkr.Atrs_close(ctx, src, root, Bool_.N);
return ctx.Lxr_make_txt_(cur_pos);
}
}
Xop_xnde_tag tag = (Xop_xnde_tag)tag_obj;
if (pre_at_bos) {
pre_at_bos = false;
if (tag.Block_close() == Xop_xnde_tag.Block_end
) { // NOTE: only ignore if Block_end; loosely based on Parser.php|doBlockLevels|$closematch; DATE:2013-12-01
ctx.Para().Process_block__bgn_n__end_y(tag);
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, cur_pos, Xop_ignore_tkn.Ignore_tid_pre_at_bos));
}
}
int gt_pos = -1; // find closing >; NOTE: MW does not ignore > inside quotes; EX: <div id="a>b">abc</div> -> <div id="a>
boolean pre2_hack = false;
int end_name_pos = cur_pos + tag.Name_len();
Xop_xatr_parser atr_parser = ctx.App().Xatr_parser();
for (int i = end_name_pos; i < src_len; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Lt: // < encountered; may be inner node inside tag which is legal in wikitext; EX: "<ul style=<nowiki>#</nowiki>FFFFFF>"
int name_bgn_pos = i + 1;
if (name_bgn_pos < src_len) { // chk that name_bgn is less than src_len else arrayIndex error; EX: <ref><p></p<<ref/>; not that "<" is last char of String; DATE:2014-01-18
int valid_inner_xnde_gt = atr_parser.Xnde_find_gt_find(src, name_bgn_pos, src_len); // check if <nowiki>, <noinclude>, <includeonly> or <onlyinclude> (which can exist inside tag)
if (valid_inner_xnde_gt == String_.Find_none){ // not a <nowiki>
switch (tag.Id()) {
case Xop_xnde_tag_.Tid_input: break; // noop; needed for Options which may have < in value; DATE:2014-07-04
default: return ctx.Lxr_make_txt_(cur_pos); // escape text; EX: "<div </div>" -> "&lt;div </div>"; SEE:it.u:; DATE:2014-02-03
}
}
else { // is a <nowiki> skip to </nowiki>
if ( i == end_name_pos
&& ctx.Parse_tid() == Xop_parser_.Parse_tid_tmpl
&& Bry_.Eq(atr_parser.Bry_obj().Val(), Xop_xnde_tag_.Tag_includeonly.Name_bry())
) {
pre2_hack = true;
}
i = valid_inner_xnde_gt;
}
}
break;
case Byte_ascii.Gt:
gt_pos = i;
i = src_len;
break;
}
}
if (pre2_hack) {
// Xop_xnde_tkn tt = tkn_mkr.Xnde(bgn_pos, gt_pos + 1).Tag_(tag);
// ctx.Stack_add(tt);
pre2_pending = true;
return ctx.Lxr_make_txt_(cur_pos);
}
if (gt_pos == -1) {return ctx.Lxr_make_log_(Xop_xnde_log.Eos_while_closing_tag, src, bgn_pos, cur_pos);}
boolean force_xtn_for_nowiki = false;
int end_pos = gt_pos + 1;
switch (ctx.Parse_tid()) { // NOTE: special logic to handle <*include*>; SEE: NOTE_1 below
case Xop_parser_.Parse_tid_page_wiki: // NOTE: ignore if (a) wiki and (b) <noinclude> or <onlyinclude>
switch (tag.Id()) {
case Xop_xnde_tag_.Tid_noinclude:
case Xop_xnde_tag_.Tid_onlyinclude:
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, end_pos, Xop_ignore_tkn.Ignore_tid_include_wiki));
return end_pos;
case Xop_xnde_tag_.Tid_nowiki:
force_xtn_for_nowiki = true;
ctx_cur_tid_is_tblw_atr_owner = false;
break;
}
break;
case Xop_parser_.Parse_tid_tmpl: // NOTE: ignore if (a) tmpl and (b) <includeonly>
switch (tag.Id()) {
case Xop_xnde_tag_.Tid_includeonly:
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, end_pos, Xop_ignore_tkn.Ignore_tid_include_tmpl));
return end_pos;
case Xop_xnde_tag_.Tid_noinclude:
return Make_noinclude(ctx, tkn_mkr, root, src, src_len, bgn_pos, gt_pos, tag, atrs_bgn_pos - 1, tag_is_closing); // -1 b/c atrs_bgn_pos may be set past >; may need to adjust above logic; DATE:2014-06-24
case Xop_xnde_tag_.Tid_nowiki:
force_xtn_for_nowiki = true;
break;
case Xop_xnde_tag_.Tid_onlyinclude:
break;
default:
break;
}
break;
case Xop_parser_.Parse_tid_page_tmpl: // NOTE: added late; SEE:comment test for "a <!-<noinclude></noinclude>- b -->c"
switch (tag.Id()) {
case Xop_xnde_tag_.Tid_noinclude:
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, end_pos, Xop_ignore_tkn.Ignore_tid_include_tmpl));
return end_pos;
case Xop_xnde_tag_.Tid_nowiki: // if encountered in page_tmpl stage, mark nowiki as xtn; added for nowiki_xnde_frag; DATE:2013-01-27
case Xop_xnde_tag_.Tid_includeonly: // includeonly should be resolved during template stage; EX: =<io>=</io>A=<io>=</io>; DATE:2014-02-12
force_xtn_for_nowiki = true;
break;
}
break;
}
if (ctx_cur_tid_is_tblw_atr_owner)
Xop_tblw_wkr.Atrs_close(ctx, src, root, Bool_.Y); // < found inside tblw; close off tblw attributes; EX: |- id='abcd' <td>a</td> (which is valid wikitext; NOTE: must happen after <nowiki>
if (tag_is_closing)
return Make_xtag_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, gt_pos, tag);
else
return Make_xtag_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, gt_pos, name_bgn, name_end, tag, atrs_bgn_pos, src[tag_end_pos], force_xtn_for_nowiki, pre2_hack);
}
private static Xop_tkn_itm Make_bry_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int bgn_pos, int cur_pos) {
int len = cur_pos - bgn_pos;
byte[] bry = null;
if (len == 1 && src[bgn_pos] == Byte_ascii.Lt) bry = Html_entity_.Lt_bry;
else if (len == 2 && src[bgn_pos] == Byte_ascii.Lt
&& src[bgn_pos + 1] == Byte_ascii.Slash) bry = Bry_escape_lt_slash; // NOTE: should use bgn_pos, not cur_pos; DATE:2014-10-22
else bry = Bry_.Add(Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, cur_pos)); // +1 to skip <
return tkn_mkr.Bry_raw(bgn_pos, cur_pos, bry);
}
private int Make_noinclude(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int gtPos, Xop_xnde_tag tag, int tag_end_pos, boolean tag_is_closing) {
tag_end_pos = Bry_finder.Find_fwd_while(src, tag_end_pos, src_len, Byte_ascii.Space);// NOTE: must skip spaces else "<noinclude />" will not work with safesubst; PAGE:en.w:Wikipedia:Featured_picture_candidates; DATE:2014-06-24
byte tag_end_byte = src[tag_end_pos];
if (tag_end_byte == Byte_ascii.Slash) { // inline
boolean valid = true;
for (int i = tag_end_pos; i < gtPos; i++) {
switch (src[i]) {
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: break;
case Byte_ascii.Slash: break;
default: valid = false; break;
}
}
if (valid) {
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, gtPos, Xop_ignore_tkn.Ignore_tid_include_tmpl));
return gtPos + Xoa_prog_mgr.Adj_next_char;
}
else {
return ctx.Lxr_make_txt_(gtPos);
}
}
int end_rhs = -1, findPos = gtPos;
byte[] end_bry = Xop_xnde_tag_.Tag_noinclude.XtnEndTag(); int end_bry_len = end_bry.length;
if (tag_is_closing) // </noinclude>; no end tag to search for; DATE:2014-05-02
end_rhs = gtPos;
else { // <noinclude>; search for end tag
while (true) {
int end_lhs = Bry_finder.Find_fwd(src, end_bry, findPos);
if (end_lhs == -1 || (end_lhs + end_bry_len) == src_len) break; // nothing found or EOS;
findPos = end_lhs;
for (int i = end_lhs + end_bry_len; i < src_len; i++) {
switch (src[i]) {
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: break;
case Byte_ascii.Slash: break;
case Byte_ascii.Gt: end_rhs = i + 1; i = src_len; break; // +1 to place after Gt
default: findPos = i ; i = src_len; break;
}
}
if (end_rhs != -1) break;
}
if (end_rhs == -1) // end tag not found; match to end of String
end_rhs = src_len;
}
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, end_rhs, Xop_ignore_tkn.Ignore_tid_include_tmpl));
return end_rhs;
}
private boolean pre2_pending = false;
private int Make_xtag_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int gtPos, int name_bgn, int name_end, Xop_xnde_tag tag, int tag_end_pos, byte tag_end_byte, boolean force_xtn_for_nowiki, boolean pre2_hack) {
boolean inline = false;
int open_tag_end = gtPos + Xoa_prog_mgr.Adj_next_char, atrs_bgn = -1, atrs_end = -1;
// calc (a) inline; (b) atrs
switch (tag_end_byte) { // look at last char of tag; EX: for b, following are registered: "b/","b>","b\s","b\n","b\t"
case Byte_ascii.Slash: // "/" EX: "<br/"; // NOTE: <pre/a>, <pre//> are allowed
inline = true;
break;
case Byte_ascii.Backslash: // allow <br\>; EX:w:Mosquito
if (tag.Inline_by_backslash())
src[tag_end_pos] = Byte_ascii.Slash;
break;
case Byte_ascii.Gt: // ">" "normal" tag; noop
break;
default: // "\s", "\n", "\t"
atrs_bgn = tag_end_pos; // set atrs_bgn to first char after ws; EX: "<a\shref='b/>" atrs_bgn = pos(h)
atrs_end = gtPos; // set atrs_end to gtPos; EX: "<a\shref='b/>" atrs_end = pos(>)
if (src[gtPos - 1] == Byte_ascii.Slash) { // adjust if inline
--atrs_end;
inline = true;
}
break;
}
Xop_xatr_itm[] atrs = null;
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki) {
atrs = ctx.App().Xatr_parser().Parse(ctx.Msg_log(), src, atrs_bgn, atrs_end);
}
if (( ( tag.Xtn()
&& ( ctx.Parse_tid() != Xop_parser_.Parse_tid_tmpl // do not gobble up rest if in tmpl; handle <poem>{{{1}}}</poem>; DATE:2014-03-03
|| tag.Xtn_skips_template_args() // ignore above if tag specifically skips template args; EX: <pre>; DATE:2014-04-10
)
)
|| (force_xtn_for_nowiki && !inline)
)
) {
return Make_xnde_xtn(ctx, tkn_mkr, root, src, src_len, tag, bgn_pos, gtPos + 1, name_bgn, name_end, atrs_bgn, atrs_end, atrs, inline, pre2_hack); // find end tag and do not parse anything inbetween
}
if (tag.Restricted()) {
Xoae_page page = ctx.Cur_page();
if ( page.Html_data().Html_restricted()
&& page.Wiki().Domain_tid() != Xow_domain_type_.Tid_home) {
int end_pos = gtPos + 1;
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, end_pos, Bry_.Add(gplx.html.Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, end_pos)))); // +1 to skip <
return end_pos;
}
}
int prv_acs = ctx.Stack_idx_find_but_stop_at_tbl(Xop_tkn_itm_.Tid_xnde);
Xop_xnde_tkn prv_xnde = prv_acs == -1 ? null : (Xop_xnde_tkn)ctx.Stack_get(prv_acs); //(Xop_xnde_tkn)ctx.Stack_get_typ(Xop_tkn_itm_.Tid_xnde);
int prv_xnde_tagId = prv_xnde == null ? Xop_tkn_itm_.Tid_null : prv_xnde.Tag().Id();
boolean tag_ignore = false;
int tagId = tag.Id();
if (tagId == Xop_xnde_tag_.Tid_table || tag.TblSub()) { // tbl tag; EX: <table>,<tr>,<td>,<th>
Tblw_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, gtPos + 1, tagId, atrs_bgn, atrs_end);
return gtPos + 1;
}
else if (prv_xnde_tagId == Xop_xnde_tag_.Tid_p && tagId == Xop_xnde_tag_.Tid_p) {
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Auto_closing_section, src, bgn_pos, bgn_pos);
End_tag(ctx, root, prv_xnde, src, src_len, bgn_pos - 1, bgn_pos - 1, tagId, true, tag);
}
else if (tagId == prv_xnde_tagId && tag.Repeat_ends()) { // EX: "<code>a<code>b" -> "<code>a</code>b"
End_tag(ctx, root, prv_xnde, src, src_len, bgn_pos - 1, bgn_pos - 1, tagId, true, tag);
return gtPos + 1;
}
else if (tagId == prv_xnde_tagId && tag.Repeat_mids()) { // EX: "<li>a<li>b" -> "<li>a</li><li>b"
End_tag(ctx, root, prv_xnde, src, src_len, bgn_pos - 1, bgn_pos - 1, tagId, true, tag);
}
else if (tag.SingleOnly()) inline = true; // <br></br> not allowed; convert <br> to <br/> </br> will be escaped
else if (tag.NoInline() && inline) {
Xop_xnde_tkn xnde_inline = Xnde_bgn(ctx, tkn_mkr, root, tag, Xop_xnde_tkn.CloseMode_open, src, bgn_pos, open_tag_end, atrs_bgn, atrs_end, atrs);
End_tag(ctx, root, xnde_inline, src, src_len, bgn_pos, gtPos, tagId, false, tag);
ctx.Msg_log().Add_itm_none(Xop_xnde_log.No_inline, src, bgn_pos, gtPos);
return gtPos + Int_.Const_position_after_char;
}
Xop_xnde_tkn xnde = null;
xnde = Xnde_bgn(ctx, tkn_mkr, root, tag, inline ? Xop_xnde_tkn.CloseMode_inline : Xop_xnde_tkn.CloseMode_open, src, bgn_pos, open_tag_end, atrs_bgn, atrs_end, atrs);
if (!inline && tag.BgnNdeMode() != Xop_xnde_tag_.BgnNdeMode_inline)
ctx.Stack_add(xnde);
if (tag_ignore)
xnde.Tag_visible_(false);
if (tag.Empty_ignored()) ctx.Empty_ignored_y_();
return open_tag_end;
}
private boolean Stack_find_xnde(Xop_ctx ctx, int cur_tag_id) {
int acs_end = ctx.Stack_len() - 1;
if (acs_end == -1) return false;
for (int i = acs_end; i > -1; i--) {
Xop_tkn_itm tkn = ctx.Stack_get(i);
switch (tkn.Tkn_tid()) {
case Xop_tkn_itm_.Tid_tblw_tb: // needed for badly formed tables;PAGE:ro.b:Pagina_principala DATE:2014-06-26
case Xop_tkn_itm_.Tid_tblw_td:
case Xop_tkn_itm_.Tid_tblw_th:
case Xop_tkn_itm_.Tid_tblw_tc: // tables always reset tag_stack; EX: <table><tr><td><li><table><tr><td><li>; 2nd li is not nested in 1st
return false;
case Xop_tkn_itm_.Tid_xnde:
Xop_xnde_tkn xnde_tkn = (Xop_xnde_tkn)tkn;
int stack_tag_id = xnde_tkn.Tag().Id();
if (cur_tag_id == Xop_xnde_tag_.Tid_li) {
switch (stack_tag_id) {
case Xop_xnde_tag_.Tid_ul: // ul / ol resets tag_stack for li; EX: <li><ul><li>; 2nd li is not nested in 1st
case Xop_xnde_tag_.Tid_ol:
return false;
}
}
if (stack_tag_id == cur_tag_id) return true;
break;
}
}
return false;
}
private void Tblw_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int tagId, int atrs_bgn, int atrs_end) {
byte wlxr_type = 0;
switch (tagId) {
case Xop_xnde_tag_.Tid_table: wlxr_type = Xop_tblw_wkr.Tblw_type_tb; break;
case Xop_xnde_tag_.Tid_tr: wlxr_type = Xop_tblw_wkr.Tblw_type_tr; break;
case Xop_xnde_tag_.Tid_td: wlxr_type = Xop_tblw_wkr.Tblw_type_td; break;
case Xop_xnde_tag_.Tid_th: wlxr_type = Xop_tblw_wkr.Tblw_type_th; break;
case Xop_xnde_tag_.Tid_caption: wlxr_type = Xop_tblw_wkr.Tblw_type_tc; break;
}
ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, true, wlxr_type, Xop_tblw_wkr.Called_from_general, atrs_bgn, atrs_end);
}
private void Tblw_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int tagId) {
int typeId = 0;
byte wlxr_type = 0;
switch (tagId) {
case Xop_xnde_tag_.Tid_table: typeId = Xop_tkn_itm_.Tid_tblw_tb; wlxr_type = Xop_tblw_wkr.Tblw_type_tb; break;
case Xop_xnde_tag_.Tid_tr: typeId = Xop_tkn_itm_.Tid_tblw_tr; wlxr_type = Xop_tblw_wkr.Tblw_type_tr; break;
case Xop_xnde_tag_.Tid_td: typeId = Xop_tkn_itm_.Tid_tblw_td; wlxr_type = Xop_tblw_wkr.Tblw_type_td; break;
case Xop_xnde_tag_.Tid_th: typeId = Xop_tkn_itm_.Tid_tblw_th; wlxr_type = Xop_tblw_wkr.Tblw_type_th; break;
case Xop_xnde_tag_.Tid_caption: typeId = Xop_tkn_itm_.Tid_tblw_tc; wlxr_type = Xop_tblw_wkr.Tblw_type_tc; break;
}
Xop_tblw_tkn prv_tkn = ctx.Stack_get_tbl();
int prv_tkn_typeId = prv_tkn == null ? -1 : prv_tkn.Tkn_tid();
ctx.Tblw().Make_tkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, typeId, wlxr_type, prv_tkn, prv_tkn_typeId, true);
// ctx.Para().Process_block__bgn_n__end_y(ctx, root, src, bgn_pos, cur_pos);
}
private int Make_xtag_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_xnde_tag end_tag) {
int end_tag_id = end_tag.Id();
cur_pos = Bry_finder.Find_fwd_while_not_ws(src, cur_pos, src_len) + 1;
int prv_xnde_pos = ctx.Stack_idx_find_but_stop_at_tbl(Xop_tkn_itm_.Tid_xnde); // find any previous xnde on stack
Xop_xnde_tkn bgn_nde = (Xop_xnde_tkn)ctx.Stack_get(prv_xnde_pos);
int bgn_tag_id = bgn_nde == null ? -1 : bgn_nde.Tag().Id();
int end_nde_mode = end_tag.EndNdeMode();
boolean force_end_tag_to_match_bgn_tag = false;
switch (bgn_tag_id) {
case Xop_xnde_tag_.Tid_sub: if (end_tag_id == Xop_xnde_tag_.Tid_sup) force_end_tag_to_match_bgn_tag = true; break;
case Xop_xnde_tag_.Tid_sup: if (end_tag_id == Xop_xnde_tag_.Tid_sub) force_end_tag_to_match_bgn_tag = true; break;
case Xop_xnde_tag_.Tid_mark: if (end_tag_id == Xop_xnde_tag_.Tid_span) force_end_tag_to_match_bgn_tag = true; break;
case Xop_xnde_tag_.Tid_span: if (end_tag_id == Xop_xnde_tag_.Tid_font) force_end_tag_to_match_bgn_tag = true; break;
}
if (force_end_tag_to_match_bgn_tag) {
end_tag_id = bgn_tag_id;
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Sub_sup_swapped, src, bgn_pos, cur_pos);
}
if (end_tag_id == Xop_xnde_tag_.Tid_table || end_tag.TblSub()) {
Tblw_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, end_tag_id);
return cur_pos;
}
if (end_tag.Empty_ignored() && ctx.Empty_ignored() // emulate TidyHtml logic for pruning empty tags; EX: "<li> </li>" -> "")
&& bgn_nde != null) { // bgn_nde will be null if only end_nde; EX:WP:Sukhoi Su-47; "* </li>"
ctx.Empty_ignore(root, bgn_nde.Tkn_sub_idx());
End_tag(ctx, root, bgn_nde, src, src_len, bgn_pos, cur_pos, end_tag_id, true, end_tag);
return cur_pos;
}
switch (end_nde_mode) {
case Xop_xnde_tag_.EndNdeMode_inline: // PATCH.WP: allows </br>, </br/> and many other variants
Xnde_bgn(ctx, tkn_mkr, root, end_tag, Xop_xnde_tkn.CloseMode_inline, src, bgn_pos, cur_pos, Int_.MinValue, Int_.MinValue, null); // NOTE: atrs is null b/c </br> will never have atrs
return cur_pos;
case Xop_xnde_tag_.EndNdeMode_escape: // handle </hr>
ctx.Lxr_make_(false);
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Escaped_xnde, src, bgn_pos, cur_pos - 1);
return cur_pos;
}
if (prv_xnde_pos != Xop_ctx.Stack_not_found) { // something found
if (bgn_tag_id == end_tag_id) { // end_nde matches bgn_nde; normal;
End_tag(ctx, root, bgn_nde, src, src_len, bgn_pos, cur_pos, end_tag_id, true, end_tag);
return cur_pos;
}
else {
if (Stack_find_xnde(ctx, end_tag_id)) { // end_tag has bgnTag somewhere in stack;
int end = ctx.Stack_len() - 1;
for (int i = end; i > -1; i--) { // iterate stack and close all nodes until bgn_nde that matches end_nde
Xop_tkn_itm tkn = ctx.Stack_get(i);
if (tkn.Tkn_tid() == Xop_tkn_itm_.Tid_xnde) {
Xop_xnde_tkn xnde_tkn = (Xop_xnde_tkn)tkn;
End_tag(ctx, root, xnde_tkn, src, src_len, bgn_pos, bgn_pos, xnde_tkn.Tag().Id(), false, end_tag);
ctx.Stack_pop_idx(i);
if (xnde_tkn.Tag().Id() == end_tag_id) {
xnde_tkn.Src_end_(cur_pos);
return cur_pos;
}
else
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Auto_closing_section, src, bgn_nde.Src_bgn(), bgn_nde.Name_end());
}
else
ctx.Stack_auto_close(root, src, tkn, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_xnde);
}
}
}
}
if (end_tag.Restricted()) // restricted tags (like <script>) are not placed on stack; for now, just write it out
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, cur_pos, Bry_.Add(gplx.html.Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, cur_pos)))); // +1 to skip <
else {
if (pre2_pending) {
pre2_pending = false;
return ctx.Lxr_make_txt_(cur_pos);
}
else {
if (end_tag.Xtn()) // if xtn end tag, ignore it; tidy / browser doesn't know about xtn_tags like "</poem>" so these need to be hidden, else they will show; DATE:2014-07-22
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, cur_pos, Xop_ignore_tkn.Ignore_tid_xnde_dangling));
else // regular tag; show it; depend on tidy to clean up; DATE:2014-07-22
ctx.Subs_add(root, tkn_mkr.Bry_mid(src, bgn_pos, cur_pos));
}
}
ctx.Para().Process_block__xnde(end_tag, end_tag.Block_close());
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Escaped_xnde, src, bgn_pos, cur_pos - 1);
return cur_pos;
}
private void End_tag(Xop_ctx ctx, Xop_root_tkn root, Xop_xnde_tkn bgn_nde, byte[] src, int src_len, int bgn_pos, int cur_pos, int tagId, boolean pop, Xop_xnde_tag end_tag) {
bgn_nde.Src_end_(cur_pos);
bgn_nde.CloseMode_(Xop_xnde_tkn.CloseMode_pair);
bgn_nde.Tag_close_rng_(bgn_pos, cur_pos);
if (pop)
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_xnde), false, cur_pos, cur_pos, Xop_tkn_itm_.Tid_xnde);
bgn_nde.Subs_move(root); // NOTE: Subs_move must go after Stack_pop_til, b/c Stack_pop_til adds tkns; see Xnde_td_list
ctx.Para().Process_block__xnde(end_tag, end_tag.Block_close());
}
private Xop_xnde_tkn Xnde_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, Xop_xnde_tag tag, byte closeMode, byte[] src, int bgn_pos, int cur_pos, int atrs_bgn, int atrs_end, Xop_xatr_itm[] atrs) {
Xop_xnde_tkn xnde = tkn_mkr.Xnde(bgn_pos, cur_pos).CloseMode_(closeMode);
int xndeBgn = bgn_pos + 1;
xnde.Name_rng_(xndeBgn, xndeBgn + tag.Name_len());
xnde.Tag_(tag);
xnde.Tag_open_rng_(bgn_pos, cur_pos);
if (atrs_bgn > 0) {
xnde.Atrs_rng_(atrs_bgn, atrs_end);
xnde.Atrs_ary_(atrs);
}
ctx.Subs_add(root, xnde);
ctx.Para().Process_block__xnde(tag, tag.Block_open());
return xnde;
}
private int Find_end_tag_pos(byte[] src, int src_len, int find_bgn) {
int rv = find_bgn;
boolean found = false, loop = true;
while (loop) {
if (rv == src_len) break;
byte b = src[rv];
switch (b) {
case Byte_ascii.Space:
case Byte_ascii.Nl:
case Byte_ascii.Tab:
++rv;
break;
case Byte_ascii.Gt:
found = true;
loop = false;
++rv; // add 1 to position after >
break;
default:
loop = false;
break;
}
}
return found ? rv : Bry_.NotFound;
}
private int Find_xtn_end_lhs(Xop_ctx ctx, Xop_xnde_tag tag, byte[] src, int src_len, int open_bgn, int open_end, byte[] close_bry) {
int tag_bgn = open_bgn - Pfunc_tag.Xtag_len;
if (tag_bgn > -1
&& Bry_.Eq(Pfunc_tag.Xtag_bgn_lhs, src, tag_bgn, tag_bgn + Pfunc_tag.Xtag_bgn_lhs.length)) // xtn created by tag
return Find_xtn_end_tag(ctx, src, src_len, open_end, close_bry, tag_bgn + Pfunc_tag.Xtag_bgn);
else { // search rest of String for case-insensitive name; NOTE: used to do CS first, then fall-back on CI; DATE:2013-12-02
xtn_end_tag_trie.Clear();
xtn_end_tag_trie.Add_obj(close_bry, close_bry);
for (int i = open_end; i < src_len; i++) {
Object o = xtn_end_tag_trie.Match_bgn(src, i, src_len);
if (o != null) {
return i;
}
}
return Bry_.NotFound;
}
}
private int Find_xtn_end_tag(Xop_ctx ctx, byte[] src, int src_len, int open_end, byte[] close_bry, int tag_bgn) {
int tag_id = Bry_.Xto_int_or(src, tag_bgn, tag_bgn + 10, -1);
if (tag_id == -1) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not extract int: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
Bry_bfr tmp = ctx.Wiki().Utl__bfr_mkr().Get_b128();
tmp.Add(Pfunc_tag.Xtag_end_lhs).Add_int_pad_bgn(Byte_ascii.Num_0, 10, tag_id).Add(Pfunc_tag.Xtag_rhs);
byte[] tag_end = tmp.To_bry_and_rls();
int rv = Bry_finder.Find_fwd(src, tag_end, open_end + Pfunc_tag.Xtag_rhs.length);
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find end: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
rv = Bry_finder.Find_bwd(src, Byte_ascii.Lt, rv - 1);
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find <: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
return rv;
}
private int Make_xnde_xtn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, Xop_xnde_tag tag, int open_bgn, int open_end, int name_bgn, int name_end, int atrs_bgn, int atrs_end, Xop_xatr_itm[] atrs, boolean inline, boolean pre2_hack) {
// NOTE: find end_tag that exactly matches bgnTag; must be case sensitive;
int xnde_end = open_end;
Xop_xnde_tkn xnde = null;
if (inline) {
xnde = Xnde_bgn(ctx, tkn_mkr, root, tag, Xop_xnde_tkn.CloseMode_inline, src, open_bgn, open_end, atrs_bgn, atrs_end, atrs);
xnde.Tag_close_rng_(open_end, open_end); // NOTE: inline tag, so set TagClose to open_end; should noop
}
else {
byte[] close_bry = tag.XtnEndTag_tmp(); // get tmp bry (so as not to new)
if (tag.Langs() != null) { // cur tag has langs; EX:<section>; DATE:2014-07-18
Xop_xnde_tag_lang tag_lang = tag.Langs_get(ctx.Lang().Case_mgr(), ctx.Cur_page().Lang().Lang_id(), src, name_bgn, name_end);
if (tag_lang == null) // tag does not match lang; EX:<trecho> and lang=de;
return ctx.Lxr_make_txt_(open_end);
if (tag_lang != Xop_xnde_tag_lang._) // tag matches; note Xop_xnde_tag_lang._ is a wildcard match; EX:<section>
close_bry = tag_lang.XtnEndTag_tmp();
}
int src_offset = open_bgn - 1; // open bgn to start at <; -2 to ignore </ ; +1 to include <
int close_ary_len = close_bry.length;
for (int i = 2; i < close_ary_len; i++) // 2 to ignore </
close_bry[i] = src[src_offset + i];
boolean auto_close = false;
int close_bgn = Find_xtn_end_lhs(ctx, tag, src, src_len, open_bgn, open_end, close_bry);
if (close_bgn == Bry_.NotFound) auto_close = true; // auto-close if end not found; verified with <poem>, <gallery>, <imagemap>, <hiero>, <references> DATE:2014-08-23
int close_end = -1;
if (auto_close) {
xnde_end = close_bgn = close_end = src_len;
}
else {
close_end = Find_end_tag_pos(src, src_len, close_bgn + close_bry.length);
if (close_end == Bry_.NotFound) return ctx.Lxr_make_log_(Xop_xnde_log.Xtn_end_not_found, src, open_bgn, open_end);
xnde_end = close_end;
}
if (pre2_hack)
return ctx.Lxr_make_txt_(close_end);
xnde = New_xnde_pair(ctx, root, tkn_mkr, tag, open_bgn, open_end, close_bgn, close_end);
xnde.Atrs_rng_(atrs_bgn, atrs_end);
xnde.Atrs_ary_(atrs);
if (close_bgn - open_end > 0)
xnde.Subs_add(tkn_mkr.Txt(open_end, close_bgn));
}
switch (ctx.Parse_tid()) {
case Xop_parser_.Parse_tid_page_tmpl: {
Xox_xnde xnde_xtn = null;
switch (tag.Id()) {
case Xop_xnde_tag_.Tid_xowa_cmd: xnde_xtn = tkn_mkr.Xnde_xowa_cmd(); break;
}
if (xnde_xtn != null) {
xnde_xtn.Xtn_parse(ctx.Wiki(), ctx, root, src, xnde);
xnde.Xnde_xtn_(xnde_xtn);
}
break;
}
case Xop_parser_.Parse_tid_page_wiki: {
Xox_xnde xnde_xtn = null;
int tag_id = tag.Id();
boolean escaped = false;
switch (tag_id) {
case Xop_xnde_tag_.Tid_xowa_cmd: xnde_xtn = tkn_mkr.Xnde_xowa_cmd(); break;
case Xop_xnde_tag_.Tid_math: xnde_xtn = tkn_mkr.Xnde_math(); break;
case Xop_xnde_tag_.Tid_poem: xnde_xtn = tkn_mkr.Xnde_poem(); break;
case Xop_xnde_tag_.Tid_ref: xnde_xtn = gplx.xowa.xtns.cite.References_nde.Enabled ? tkn_mkr.Xnde_ref() : null; break;
case Xop_xnde_tag_.Tid_references: xnde_xtn = gplx.xowa.xtns.cite.References_nde.Enabled ? tkn_mkr.Xnde_references() : null; break;
case Xop_xnde_tag_.Tid_gallery: xnde_xtn = tkn_mkr.Xnde_gallery(); break;
case Xop_xnde_tag_.Tid_imageMap: xnde_xtn = tkn_mkr.Xnde_imageMap(); break;
case Xop_xnde_tag_.Tid_hiero: xnde_xtn = tkn_mkr.Xnde_hiero(); break;
case Xop_xnde_tag_.Tid_inputBox: xnde_xtn = tkn_mkr.Xnde_inputbox(); break;
case Xop_xnde_tag_.Tid_dynamicPageList: xnde_xtn = tkn_mkr.Xnde_dynamicPageList(); break;
case Xop_xnde_tag_.Tid_pages: {
xnde_xtn = tkn_mkr.Xnde_pages();
boolean enabled = ctx.Wiki().Xtn_mgr().Xtn_proofread().Enabled();
if (!enabled) { // if Page / Index ns does not exist, disable xtn and escape content; DATE:2014-11-28
escaped = true;
xnde_xtn = null;
}
break;
}
case Xop_xnde_tag_.Tid_pagequality: xnde_xtn = tkn_mkr.Xnde_pagequality(); break;
case Xop_xnde_tag_.Tid_pagelist: xnde_xtn = tkn_mkr.Xnde_pagelist(); break;
case Xop_xnde_tag_.Tid_section: xnde_xtn = tkn_mkr.Xnde_section(); break;
case Xop_xnde_tag_.Tid_categoryList: xnde_xtn = tkn_mkr.Xnde_categoryList(); break;
case Xop_xnde_tag_.Tid_syntaxHighlight: xnde_xtn = tkn_mkr.Xnde_syntaxHighlight(); break;
case Xop_xnde_tag_.Tid_score: xnde_xtn = tkn_mkr.Xnde_score(); break;
case Xop_xnde_tag_.Tid_translate: xnde_xtn = tkn_mkr.Xnde_translate(); break;
case Xop_xnde_tag_.Tid_languages: xnde_xtn = tkn_mkr.Xnde_languages(); break;
case Xop_xnde_tag_.Tid_templateData: xnde_xtn = tkn_mkr.Xnde_templateData(); break;
case Xop_xnde_tag_.Tid_rss: xnde_xtn = tkn_mkr.Xnde_rss(); break;
case Xop_xnde_tag_.Tid_quiz: xnde_xtn = tkn_mkr.Xnde_quiz(); break;
case Xop_xnde_tag_.Tid_indicator: xnde_xtn = tkn_mkr.Xnde_indicator(); break;
case Xop_xnde_tag_.Tid_xowa_html: xnde_xtn = tkn_mkr.Xnde_xowa_html(); break;
case Xop_xnde_tag_.Tid_listing_buy:
case Xop_xnde_tag_.Tid_listing_do:
case Xop_xnde_tag_.Tid_listing_drink:
case Xop_xnde_tag_.Tid_listing_eat:
case Xop_xnde_tag_.Tid_listing_listing:
case Xop_xnde_tag_.Tid_listing_see:
case Xop_xnde_tag_.Tid_listing_sleep: xnde_xtn = tkn_mkr.Xnde_listing(tag_id); break;
case Xop_xnde_tag_.Tid_timeline:
boolean log_wkr_enabled = Timeline_log_wkr != Xop_log_basic_wkr.Null; if (log_wkr_enabled) Timeline_log_wkr.Log_end_xnde(ctx.Cur_page(), Xop_log_basic_wkr.Tid_timeline, src, xnde);
ctx.Cur_page().Html_data().Module_mgr().Itm__timeline().Enabled_y_();
break;
case Xop_xnde_tag_.Tid_xowa_tag_bgn:
case Xop_xnde_tag_.Tid_xowa_tag_end:
break;
case Xop_xnde_tag_.Tid_source: // added on DATE:2014-06-24
case Xop_xnde_tag_.Tid_pre: // NOTE: pre must be an xtn, but does not create an xtn node (it gobbles up everything between); still need to touch the para_wkr; DATE:2014-02-20
ctx.Para().Process_block__xnde(tag, Xop_xnde_tag.Block_bgn);
if (Bry_finder.Find_fwd(src, Byte_ascii.Nl, xnde.Tag_open_end(), xnde.Tag_close_bgn()) != Bry_finder.Not_found)
ctx.Para().Process_nl(ctx, root, src, xnde.Tag_open_bgn(), xnde.Tag_open_bgn());
ctx.Para().Process_block__xnde(tag, Xop_xnde_tag.Block_end);
break;
}
if (escaped) {
root.Subs_del_after(root.Subs_len() - 1); // since content is escaped, delete xnde_xtn; DATE:2014-09-08
return ctx.Lxr_make_txt_(open_end); // return after lhs_end, not entire xnde;
}
if (xnde_xtn != null) {
try {
xnde.Xnde_xtn_(xnde_xtn); // NOTE: must set xnde_xtn, else null ref (html_wtr expects non-null nde)
xnde_xtn.Xtn_parse(ctx.Wiki(), ctx, root, src, xnde);
}
catch (Exception e) {
String err_msg = String_.Format("failed to render extension: title={0} excerpt={1} err={2}", ctx.Cur_page().Ttl().Full_txt()
, Bry_.Mid(src, xnde.Tag_open_end(), xnde.Tag_close_bgn())
, Err_.Message_gplx_brief(e));
if (Env_.Mode_testing())
throw Exc_.new_exc(e, "xo", err_msg);
else
ctx.Wiki().Appe().Usr_dlg().Warn_many("", "", err_msg);
}
}
break;
}
}
return xnde_end;
} private Btrie_slim_mgr xtn_end_tag_trie = Btrie_slim_mgr.ci_ascii_(); // NOTE:ci.ascii:MW_const.en; listed XML node names are en
private Xop_xnde_tkn New_xnde_pair(Xop_ctx ctx, Xop_root_tkn root, Xop_tkn_mkr tkn_mkr, Xop_xnde_tag tag, int open_bgn, int open_end, int close_bgn, int close_end) {
Xop_xnde_tkn rv = tkn_mkr.Xnde(open_bgn, close_end).Tag_(tag).Tag_open_rng_(open_bgn, open_end).Tag_close_rng_(close_bgn, close_end).CloseMode_(Xop_xnde_tkn.CloseMode_pair);
int name_bgn = open_bgn + 1;
rv.Name_rng_(name_bgn, name_bgn + tag.Name_len());
ctx.Subs_add(root, rv);
return rv;
}
private static final byte[]
Bry_escape_lt_slash = Bry_.new_a7("&lt;/")
;
public static int Find_gt_pos(Xop_ctx ctx, byte[] src, int cur_pos, int src_len) { // UNUSED
int gt_pos = -1; // find closing >
for (int i = cur_pos; i < src_len; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Lt: // < encountered; may be inner node inside tag which is legal in wikitext; EX: "<ul style=<nowiki>#</nowiki>FFFFFF>"
int valid_inner_xnde_gt = ctx.App().Xatr_parser().Xnde_find_gt_find(src, i + 1, src_len);
if (valid_inner_xnde_gt != String_.Find_none) {
i = valid_inner_xnde_gt;
}
break;
case Byte_ascii.Gt:
gt_pos = i;
i = src_len;
break;
}
}
return gt_pos;
}
public static Xop_log_basic_wkr Timeline_log_wkr = Xop_log_basic_wkr.Null;
}
class Xop_xnde_wkr_ {
public static void AutoClose_handle_dangling_nde_in_caption(Xop_root_tkn root, Xop_tkn_itm owner) {
int subs_bgn = -1, subs_len = owner.Subs_len();
for (int i = 0; i < subs_len; i++) {
Xop_tkn_itm sub_itm = owner.Subs_get(i);
if (sub_itm.Tkn_tid() == Xop_tkn_itm_.Tid_pipe) { // tkn is "|"; assume that caption should end here
subs_bgn = i;
break;
}
}
if (subs_bgn != -1)
root.Subs_move(owner, subs_bgn, subs_len); // move everything after "|" back to root
}
}
/*
NOTE_1: special logic for <*include*>
cannot process like regular xnde tag b/c cannot auto-close tags on tmpl
EX: <includeonly>{{subst:</includeonly><includeonly>substcheck}}</includeonly>
1st </io> would autoclose {{subst:
Since the basic intent is to "hide" the tags in certain modes, then basically create ignore_tkn and exit
*/

View File

@@ -0,0 +1,158 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xnde_wkr__basic_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Escape_lt() { // PURPOSE: some templates have unknown tags; PAGE:en.w:PHP
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str("a<code><?</code>b", String_.Concat_lines_nl_skip_last
( "<p>a<code>&lt;?</code>b"
, "</p>"
, ""
));
fxt.Init_para_n_();
}
@Test public void Inline() {
fxt.Test_parse_page_wiki("<ref/>" , fxt.tkn_xnde_(0, 6).CloseMode_(Xop_xnde_tkn.CloseMode_inline).Name_rng_(1, 4));
}
@Test public void Pair() {
fxt.Test_parse_page_wiki("<div></div>", fxt.tkn_xnde_(0, 11).CloseMode_(Xop_xnde_tkn.CloseMode_pair).Name_rng_(1, 4));
}
@Test public void Pair_text() {
fxt.Test_parse_page_wiki("<div>b</div>", fxt.tkn_xnde_(0, 12).Subs_(fxt.tkn_txt_(5, 6)));
}
@Test public void Deep1_pair1() {
fxt.Test_parse_page_wiki("<div><div></div></div>", fxt.tkn_xnde_(0, 22).Name_rng_(1, 4)
.Subs_(fxt.tkn_xnde_(5, 16).Name_rng_(6, 9)));
}
@Test public void Deep1_inline1() {
fxt.Test_parse_page_wiki("<div><ref/></div>", fxt.tkn_xnde_(0, 17).Name_rng_(1, 4)
.Subs_(fxt.tkn_xnde_(5, 11).Name_rng_(6, 9)) );
}
@Test public void Deep1_pair2() {
fxt.Test_parse_page_wiki("<div><div></div><div></div></div>", fxt.tkn_xnde_(0, 33).Name_rng_(1, 4)
.Subs_
( fxt.tkn_xnde_( 5, 16).Name_rng_( 6, 9)
, fxt.tkn_xnde_(16, 27).Name_rng_(17, 20)
));
}
@Test public void Deep2_pair1() {
fxt.Test_parse_page_wiki("<div><div><div></div></div></div>", fxt.tkn_xnde_(0, 33).Name_rng_(1, 4)
.Subs_
( fxt.tkn_xnde_( 5, 27).Name_rng_( 6, 9)
.Subs_
( fxt.tkn_xnde_(10, 21).Name_rng_(11, 14))
));
}
@Test public void Slash() {// b/c mw allows unquoted attributes
fxt.Test_parse_page_wiki("<ref / >a</ref>", fxt.tkn_xnde_(0, 15).Atrs_rng_(5, 7).Subs_(fxt.tkn_txt_(8, 9)));
fxt.Test_parse_page_wiki("<ref name=a/b/>", fxt.tkn_xnde_(0, 15).Atrs_rng_(5, 13));
}
@Test public void Escaped() {
fxt.Init_log_(Xop_xnde_log.Escaped_xnde).Test_parse_page_wiki("<div></span></div>", fxt.tkn_xnde_(0, 18).Subs_(fxt.tkn_bry_(5, 12)));// TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
}
@Test public void Xtn() {
fxt.Test_parse_page_wiki("<math><div></math>", fxt.tkn_xnde_(0, 18).Subs_(fxt.tkn_txt_(6, 11))); // NOTE: no dangling nde b/c .Xtn skips
}
@Test public void Xtn_ref() {
fxt.Test_parse_page_wiki("<ref name=\"a\">b</ref>", fxt.tkn_xnde_(0, 21).Name_rng_(1, 4).Atrs_rng_(5, 13).Subs_(fxt.tkn_txt_(14, 15)));
}
@Test public void Lnki() {
fxt.Test_parse_page_wiki("[[Image:a|b<br/>d]]"
, fxt.tkn_lnki_().Ns_id_(Xow_ns_.Id_file).Trg_tkn_(fxt.tkn_arg_nde_().Val_tkn_(fxt.tkn_arg_itm_(fxt.tkn_txt_(2, 7), fxt.tkn_colon_(7), fxt.tkn_txt_(8, 9))))
.Caption_tkn_(fxt.tkn_arg_nde_(10, 17).Val_tkn_(fxt.tkn_arg_itm_(fxt.tkn_txt_(10, 11), fxt.tkn_xnde_(11, 16), fxt.tkn_txt_(16, 17))))
);
}
@Test public void Br_converted_to_reguar_br() {
fxt.Test_parse_page_wiki("</br>a" , fxt.tkn_xnde_(0, 5), fxt.tkn_txt_(5, 6));
fxt.Test_parse_page_wiki("<br/>a" , fxt.tkn_xnde_(0, 5), fxt.tkn_txt_(5, 6));
fxt.Test_parse_page_wiki("</br/>a" , fxt.tkn_xnde_(0, 6), fxt.tkn_txt_(6, 7));
}
@Test public void CaseSensitivity() {
fxt.Test_parse_page_wiki("<DiV></dIv>", fxt.tkn_xnde_(0, 11).CloseMode_(Xop_xnde_tkn.CloseMode_pair).Name_rng_(1, 4));
}
@Test public void CaseSensitivity_xtn_1() {
fxt.Test_parse_page_wiki_str
( "<Inputbox>a</Inputbox>b<inputbox>c</inputbox>"
, "b"
);
}
@Test public void CaseSensitivity_xtn_2() { // PURPOSE: xtn end_tag may not match bgn_tag; EX: w:Ehrenfest_paradox; <References></references>
fxt.Test_parse_page_all_str("a<ref name=b /><References><ref name=b>c</ref></references>", String_.Concat_lines_nl
( "a<sup id=\"cite_ref-b_0-0\" class=\"reference\"><a href=\"#cite_note-b-0\">[1]</a></sup><ol class=\"references\">"
, "<li id=\"cite_note-b-0\"><span class=\"mw-cite-backlink\"><a href=\"#cite_ref-b_0-0\">^</a></span> <span class=\"reference-text\">c</span></li>"
, "</ol>"
));
}
@Test public void CaseSensitivity_xtn_3() {// PURPOSE: xtn xnde must do case-insensitive match DATE:2013-12-02
fxt.Test_parse_page_all_str
( "<matH>a</math> b <math>c</matH>" // <matH> should match </math> not </matH>
, "<span id='xowa_math_txt_0'>a</span> b <span id='xowa_math_txt_0'>c</span>"
);
}
@Test public void Whitelist() {
fxt.Test_parse_page_all_str("<span onload='alert()'></span>", "<span></span>");
}
@Test public void Whitelist_pre() { // PURPOSE: <pre style="overflow:auto">a</pre> somehow becoming <prestyle="overflow:auto">a</pre>; Template:Infobox_country; ISSUE: old xatr code being triggered; PURPOSE:(2) style being stripped when it shouldn't be
fxt.Test_parse_page_all_str("<pre style=\"overflow:auto\">a</pre>", "<pre style=\"overflow:auto\">a</pre>");
}
@Test public void Whitelist_style() {
fxt.Test_parse_page_all_str("<div style=\"url(bad)\"></div>", "<div></div>");
}
@Test public void Script() { // PURPOSE: nested script should (a) write attributes; (b) write close tag; DATE:2014-01-24
fxt.Test_parse_page_all_str("<code><script src='a'>b</script></code>", "<code>&lt;script src='a'>b&lt;/script></code>");
}
@Test public void Script_in_syntaxhighlight() {
fxt.Test_parse_page_all_str("<syntaxhighlight><script>alert('fail');</script></syntaxhighlight>", "<pre style=\"overflow:auto;\">&lt;script&gt;alert('fail');&lt;/script&gt;</pre>");
}
@Test public void Script_in_math() {
fxt.App().File_mgr().Math_mgr().Renderer_is_mathjax_(false);
fxt.Test_parse_page_all_str("<math><script>alert('fail');</script></math>", "<img id='xowa_math_img_0' src='' width='' height=''/><span id='xowa_math_txt_0'>&lt;script>alert('fail');</script></span>");
fxt.App().File_mgr().Math_mgr().Renderer_is_mathjax_(true);
}
@Test public void Html5_time() {// PURPOSE: HTML5; should output self (i.e.: must be whitelisted)
fxt.Test_parse_page_wiki_str("<time class=\"dtstart\" datetime=\"2010-10-10\">10 October 2010</time>", "<time class=\"dtstart\" datetime=\"2010-10-10\">10 October 2010</time>");
}
@Test public void Html5_bdi() {// PURPOSE: HTML5; should output self (i.e.: must be whitelisted); DATE:2013-12-07
fxt.Test_parse_page_wiki_str("<bdi lang=\"en\">a</bdi>", "<bdi lang=\"en\">a</bdi>");
}
@Test public void Html5_mark() {// PURPOSE: HTML5; should output self (i.e.: must be whitelisted); DATE:2014-01-03
fxt.Test_parse_page_wiki_str("<mark lang=\"en\">a</mark>", "<mark lang=\"en\">a</mark>");
}
@Test public void Html5_mark_span() {// PURPOSE: </span> should close <mark> tag; EX: zh.wikipedia.org/wiki/异体字; DATE:2014-01-03
fxt.Test_parse_page_wiki_str("<mark>a</span>", "<mark>a</mark>");
}
@Test public void Html5_wbr() {// PURPOSE: HTML5; should output self (i.e.: must be whitelisted); DATE:2014-01-03
fxt.Test_parse_page_wiki_str("a<wbr>b<wbr>c", "a<wbr></wbr>b<wbr></wbr>c");
}
@Test public void Html5_bdo() {// PURPOSE: HTML5; should output self (i.e.: must be whitelisted); DATE:2014-01-03
fxt.Test_parse_page_wiki_str("<bdo>a</bdo>", "<bdo>a</bdo>");
}
@Test public void Pre_always_parsed() { // PURPOSE: pre should not interpret templates; DATE:2014-04-10
fxt.Init_defn_clear();
fxt.Init_defn_add("a", "a");
fxt.Init_defn_add("test", "<pre>{{a}}</pre>");
fxt.Test_parse_page_all_str("{{test}}", "<pre>{{a}}</pre>");
fxt.Init_defn_clear();
}
@Test public void Quote() {// PURPOSE: handle <q> element; DATE:2015-05-29
fxt.Test_parse_page_wiki_str("<q>a</q>", "<q>a</q>");
}
}

View File

@@ -0,0 +1,60 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xnde_wkr__blockquote_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Pre() { // PURPOSE: preserve leading spaces within blockquote; PAGE:en.w:Tenerife_airport_disaster
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<blockquote>"
, " a"
, "</blockquote>"
), String_.Concat_lines_nl_skip_last
( "<blockquote>"
, " a"
, "</blockquote>"
));
fxt.Init_para_n_();
}
@Test public void Trailing_nls() { // PURPOSE: para/pre not working after blockquote; PAGE:en.w:Snappy_(software); DATE:2014-04-25
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<blockquote>a"
, "</blockquote>"
, ""
, "b"
, ""
, " c"
), String_.Concat_lines_nl_skip_last
( "<blockquote>a"
, "</blockquote>"
, ""
, "<p>b"
, "</p>"
, ""
, "<pre>c"
, "</pre>"
));
fxt.Init_para_n_();
}
@Test public void Dangling_multiple() { // PURPOSE: handle multiple dangling; PAGE:en.w:Ring_a_Ring_o'_Roses DATE:2014-06-26
fxt.Test_parse_page_wiki_str("<blockquote>a<blockquote>b", "<blockquote>a</blockquote><blockquote>b</blockquote>");
}
}

View File

@@ -0,0 +1,198 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*; import gplx.xowa.parsers.lists.*;
public class Xop_xnde_wkr__err_dangling_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Basic() {
fxt.Init_log_(Xop_xnde_log.Dangling_xnde)
.Test_parse_page_wiki("<div>", fxt.tkn_xnde_(0, 5));
}
@Test public void Many() {
fxt.Init_log_(Xop_xnde_log.Dangling_xnde, Xop_xnde_log.Dangling_xnde, Xop_xnde_log.Dangling_xnde)
.Test_parse_page_wiki("<div><div><div>", fxt.tkn_xnde_(0, 15).Subs_(fxt.tkn_xnde_(5, 15).Subs_(fxt.tkn_xnde_(10, 15))));
}
@Test public void Nested() {
fxt.Test_parse_page_wiki_str
( "<div><div><center>a</div></div>"
, "<div><div><center>a</center></div></div>"
);
}
@Test public void Center() {
fxt.Init_log_(Xop_xnde_log.Dangling_xnde).Test_parse_page_wiki("a<center>b"
, fxt.tkn_txt_(0, 1)
, fxt.tkn_xnde_(1, 10).CloseMode_(Xop_xnde_tkn.CloseMode_open).Subs_(fxt.tkn_txt_(9, 10))
);
}
@Test public void P() {
fxt.Init_log_(Xop_xnde_log.Auto_closing_section).Test_parse_page_wiki("a<p>b<p>c</p>"
, fxt.tkn_txt_ (0, 1)
, fxt.tkn_xnde_ (1, 4).Subs_(fxt.tkn_txt_(4, 5))
, fxt.tkn_xnde_ (5, 13).Subs_(fxt.tkn_txt_(8, 9))
);
}
@Test public void Alternating() { // PURPOSE: confirmation test for alternating dangling nodes; PAGE:en.w:Portal:Pornography/Selected_historical_image/Archive; DATE:2014-09-24
fxt.Test_parse_page_wiki_str
( "c<b><i>d<b><i>e"
, "c<b><i>d<b><i>e</i></b></i></b>"
);
}
@Test public void Li() { // PURPOSE: auto-close <li>; NOTE: no longer encloses in <ul/>; DATE:2014-06-26
fxt.Test_parse_page_wiki_str
( "<li>a<li>b"
, String_.Concat_lines_nl_skip_last
( "<li>a</li>"
, "<li>b</li>"
));
}
@Test public void Br() {
fxt.Test_parse_page_wiki("<br>a" , fxt.tkn_xnde_(0, 4), fxt.tkn_txt_(4, 5));
fxt.Test_parse_page_wiki("a<br name=b>c", fxt.tkn_txt_(0, 1), fxt.tkn_xnde_(1, 12), fxt.tkn_txt_(12, 13));
}
@Test public void Td_and_td() { // PURPOSE: when "<td>a<td>", 2nd <td> should auto-close
fxt.Test_parse_page_wiki("<table><tr><td>a<td></tr><tr><td>b</td></tr></table>"
, fxt.tkn_tblw_tb_(0, 52).Subs_
( fxt.tkn_tblw_tr_(7, 25).Subs_
( fxt.tkn_tblw_td_(11, 16).Subs_(fxt.tkn_txt_(15, 16)) // FUTURE: change to 11,20
, fxt.tkn_tblw_td_(16, 25) // FUTURE: change this to 16, 20
)
, fxt.tkn_tblw_tr_(25, 44).Subs_
( fxt.tkn_tblw_td_(29, 39).Subs_(fxt.tkn_txt_(33, 34))
)
)
);
}
@Test public void Tblw_and_tr() {// PURPOSE: <tr> should auto-close |-; EX:fr.wikipedia.org/wiki/Napoléon_Ier; DATE:2013-12-09
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl
( "{|"
, "|-"
, "<td>row1</td>"
, "<tr><td>row2</td>"
, "|}"
)
, String_.Concat_lines_nl
( "<table>"
, " <tr>"
, " <td>row1"
, " </td>"
, " </tr>"
, " <tr>"
, " <td>row2"
, " </td>"
, " </tr>"
, "</table>"
)
);
}
@Test public void Tblx_and_b() {
fxt.Init_log_(Xop_xnde_log.Dangling_xnde).Test_parse_page_wiki("<table><tr><td><b>a<td></tr></table>"
, fxt.tkn_tblw_tb_(0, 36).Subs_
( fxt.tkn_tblw_tr_(7, 28).Subs_
( fxt.tkn_tblw_td_(11, 19).Subs_ // FUTURE: change to 11,23
( fxt.tkn_xnde_(15, 36).Subs_(fxt.tkn_txt_(18, 19)) // FUTURE: should be 19, but xnde.Close() is passing in src_len
)
, fxt.tkn_tblw_td_(19, 28) // FUTURE: should be 23
)
)
);
}
@Test public void Tblx_and_li() { // PURPOSE: </td> should close list; see Stamp Act 1765
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<table><tr><td>"
, "*abc</td></tr><tr><td>bcd</td></tr>"
, "</table>"
), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, ""
, " <ul>"
, " <li>abc"
, " </li>"
, " </ul>"
, " </td>"
, " </tr>"
, " <tr>"
, " <td>bcd"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
fxt.Init_para_n_();
}
@Test public void Tblx_and_small() { // PURPOSE: </td> should close <small> correctly; see Stamp Act 1765
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<table><tr><td>"
, "<small>abc</td></tr><tr><td>bcd</td></tr>"
, "</table>"
), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, "<small>abc</small>"
, " </td>"
, " </tr>"
, " <tr>"
, " <td>bcd"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
fxt.Init_para_n_();
}
@Test public void Blockquote_and_p() {
fxt.Init_log_(Xop_xnde_log.Auto_closing_section).Test_parse_page_wiki("<blockquote>a<p>b</blockquote>"
, fxt.tkn_xnde_(0, 30).Subs_
( fxt.tkn_txt_(12, 13)
, fxt.tkn_xnde_(13, 17).Subs_(fxt.tkn_txt_(16, 17))
));
}
@Test public void List_and_b() {
fxt.Init_log_(Xop_xnde_log.Dangling_xnde).Test_parse_page_wiki("*<b>a\n*"
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ul).List_path_(0)
, fxt.tkn_xnde_(1, 7).Subs_(fxt.tkn_txt_(4, 5))
, fxt.tkn_list_end_(5).List_path_(0)
, fxt.tkn_list_bgn_(5, 7, Xop_list_tkn_.List_itmTyp_ul).List_path_(1)
, fxt.tkn_list_end_(7).List_path_(1)
);
}
@Test public void Underline() { // PURPOSE: 2nd <u> should auto-close; PAGE:en.b:Textbook_of_Psychiatry/Alcoholism_and_Psychoactive_Substance_Use_Disorders DATE:2014-09-05
fxt.Test_html_full_str("a<u>b<u>c", "a<u>b</u>c");
}
@Test public void Xtn_template() { // PURPOSE: dangling xtns within templates should be auto-closed inside template, not in calling page; PAGE:en.w:Provinces_and_territories_of_Canada DATE:2014-11-13
fxt.Init_page_create("Template:A", "<poem>A");
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{{A}}"
, " b" // poem should not extend to " b"
), String_.Concat_lines_nl_skip_last
( "<div class=\"poem\">"
, "<p>"
, "A"
, "</p>"
, "</div>" // poem ends here
, " b"
));
}
}

View File

@@ -0,0 +1,74 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xnde_wkr__err_malformed_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Lt_only() {
fxt.Test_parse_page_wiki("<", fxt.tkn_txt_(0, 1));
}
@Test public void Eos_while_closing_tag() {
fxt.Init_log_(Xop_xnde_log.Eos_while_closing_tag).Test_parse_page_wiki("<ref [[a]]", fxt.tkn_txt_(0, 4), fxt.tkn_space_(4, 5), fxt.tkn_lnki_(5, 10));
}
@Test public void End_tag_broken() { // chk that name_bgn is less than src_len else arrayIndex error; EX: <ref><p></p<<ref/>; DATE:2014-01-18
fxt.Wiki().Xtn_mgr().Init_by_wiki(fxt.Wiki());
fxt.Test_parse_page_all_str("<poem><p></p<</poem>", String_.Concat_lines_nl_skip_last
( "<div class=\"poem\">"
, "<p>" // NOTE: technically MW / WP does not add this <p>; however, easier to hardcode <p>; no "visual" effect; DATE:2014-04-27
, "<p>&lt;/p&lt;</p>"
, "</p>"
, "</div>"
));
}
@Test public void Incomplete_tag_div() { // PURPOSE: handle broken tags; EX: <div a </div> -> &lt;div a; DATE:2014-02-03
fxt.Test_parse_page_all_str("<div a </div>", "&lt;div a </div>"); // note that "<div a " is escaped (not considered xnde; while "</div>" is literally printed; // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
}
@Test public void Incomplete_tag_ref() {// PURPOSE: invalid tag shouldn't break parser; EX:w:Cullen_(surname); "http://www.surnamedb.com/Surname/Cullen<ref"
fxt.Test_parse_page_all_str("a<ref", "a&lt;ref");
}
@Test public void Inline_tag_fix() { // PURPOSE: force <b/> to be <b></b>; EX: w:Exchange_value
fxt.Init_log_(Xop_xnde_log.No_inline);
fxt.Test_parse_page_all_str("<b/>", "<b></b>");
}
@Test public void Tblw() { // PURPOSE.fix: don't auto-close past tblw PAGE:ro.b:Pagina_principala DATE:2014-06-26
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "<div>"
, "{|" // this should stop xnde search
, "<center>"
, "</div>" // this should not find <div> as its bgn_tag; note that it will "drop out" below
, "|}"
, "</div>"
), String_.Concat_lines_nl_skip_last
( "<div>"
, "<table><center></div>" // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
, " <tr>"
, " <td>"
, " </td>"
, " </tr>"
, "</center>"
, "</table>"
, "</div>"
));
}
@Test public void Incomplete_tag() { // PURPOSE: handle incomplete tag sequences; DATE:2014-10-22
fxt.Test_parse_page_all_str("<", "&lt;");
fxt.Test_parse_page_all_str("</", "&lt;/");
fxt.Test_parse_page_all_str("</<", "&lt;/&lt;"); // this used to fail
}
}

View File

@@ -0,0 +1,190 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xnde_wkr__err_misc_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Error_br_removed() {
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <th><span>a</span><br/><span>b</span>"
, " </th>"
, " </tr>"
, "</table>"
), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <th><span>a</span><br/><span>b</span>"
, " </th>"
, " </tr>"
, "</table>"
, ""
));
fxt.Init_para_n_();
}
@Test public void Div_should_not_pop_past_td() { // PURPOSE: extra </div> should not close <div> that is outside of <td>; PAGE:en.w:Rome en.w:Ankara
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<table>"
, "<tr>"
, "<td>"
, "<div>" // this is <div> #1
, "<table>"
, "<tr>"
, "<td>"
, "<div>" // this is <div> #2
, "<div>"
, "a"
, "</div>"
, "</td>"
, "<td>"
, "<div>"
, "b"
, "</div>"
, "</div>" // this </div> was supposed to pop <div> #2, but can't (b/c of HTML rules); however, do not try to pop <div> #1;
, "</td>"
, "<td>"
, "<div>"
, "c"
, "</div>"
, "</td>"
, "</tr>"
, "</table>"
, "</div>"
, "</td>"
, "</tr>"
, "</table>"
), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, "<div>"
, " <table>"
, " <tr>"
, " <td>"
, "<div>"
, "<div>"
, ""
, "<p>a"
, "</p>"
, "</div>"
, "</div>"
, " </td>"
, " <td>"
, "<div>"
, ""
, "<p>b"
, "</p>"
, "</div>"
, "</div>" // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
, " </td>"
, " <td>"
, "<div>"
, ""
, "<p>c"
, "</p>"
, "</div>"
, " </td>"
, " </tr>"
, " </table>"
, "</div>"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
fxt.Init_para_n_();
}
@Test public void Xnde_pops() { // PURPOSE: somehow xnde pops upper nde; PAGE:en.w:Greek government debt crisis; "History of government debt"
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<i>"
, "{|"
, "|-"
, "|<i>a</i>"
, "|}"
, "</i>"
), String_.Concat_lines_nl_skip_last
( "<i>"
, "<table>"
, " <tr>"
, " <td><i>a</i>"
, " </td>"
, " </tr>"
, "</table>"
, "</i>"
));
}
@Test public void Err_inline_extension() {
fxt.Test_parse_page_all_str
( "<poem/>"
, ""
);
}
@Test public void Xnde_para() { // PURPOSE: buggy code caused </p> to close everything; keeping test b/c of <p> logic
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<table>"
, "<tr>"
, "<td>"
, "<div>"
, "<p>"
, "<span>"
, "</span>"
, "</p>"
, "</div>"
, "</td>"
, "</tr>"
, "</table>"
), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, "<div>"
, "<p>"
, "<span>"
, "</span>"
, "</p>"
, "</div>"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Sup_bug() { // PURPOSE: occurred at ref of UK; a {{cite web|url=http://www.abc.gov/{{dead link|date=December 2011}}|title=UK}} b
fxt.Test_parse_page_wiki_str("x <b><sup>y</b> z", "x <b><sup>y</sup></b> z");
}
@Test public void Br_backslash() { // PURPOSE: allow <br\>; EX:w:Mosquito; [[Acalyptratae|A<br\>c<br\>a<br\>l<br\>y<br\>p<br\>t<br\>r<br\>a<br\>t<br\>a<br\>e]]
fxt.Test_parse_page_all_str("<br\\>", "<br/>");
}
@Test public void Tt_does_not_repeat() { // PURPOSE: handle <tt>a<tt>; EX:w:Domain name registry
fxt.Test_parse_page_all_str("<tt>a<tt>", "<tt>a</tt>");
}
@Test public void Loose_xnde_names() { // PURPOSE: MW allows <font-> and other variations; EX:w:2012_in_film
fxt.Test_parse_page_all_str("<font-size='100%'>a</font>", "<font>a</font>");
}
@Test public void Anchor_nested() {
fxt.Test_parse_page_all_str("b<a>c<a>d [[e]] f", "b&lt;a>c&lt;a>d <a href=\"/wiki/E\">e</a> f");
}
@Test public void Img_should_not_be_xtn() { // PURPOSE:<img> marked as .xtn; unclosed <img> was escaping rest of text; PAGE:de.w:Wikipedia:Technik/Archiv/2014 DATE:2014-11-06
fxt.Test_parse_page_all_str("<img>''a''", "&lt;img><i>a</i>");
}
}

View File

@@ -0,0 +1,83 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xnde_wkr__include_basic_tst {
private Xop_fxt fxt = new Xop_fxt();
@Before public void init() {fxt.Reset();}
@Test public void Tmpl_includeonly() {fxt.Test_parse_tmpl_str_test("a<includeonly>b</includeonly>c" , "{{test}}", "abc");}
@Test public void Tmpl_noinclude() {fxt.Test_parse_tmpl_str_test("a<noinclude>b</noinclude>c" , "{{test}}", "ac");}
@Test public void Tmpl_onlyinclude() {fxt.Test_parse_tmpl_str_test("a<onlyinclude>b</onlyinclude>c" , "{{test}}", "b");}
@Test public void Tmpl_onlyinclude_nest() {fxt.Test_parse_tmpl_str_test("{{#ifeq:y|y|a<onlyinclude>b</onlyinclude>c|n}}" , "{{test}}", "b");} // PURPOSE: check that onlyinclude handles (a) inside {{#if}} function (old engine did not); and (b) that abc are correctly added together
@Test public void Tmpl_onlyinclude_page() {// PURPOSE: handle scenario similar to {{FA Number}} where # of articles is buried in page between onlyinclude tags; added noinclude as additional stress test
fxt.Init_page_create("Transclude_1", "<noinclude>a<onlyinclude>b</onlyinclude>c</noinclude>d");
fxt.Test_parse_tmpl_str_test("{{:Transclude_1}}" , "{{test}}", "b");
}
@Test public void Tmpl_onlyinclude_page2() { // PURPOSE: handle scenario similar to PS3 wherein onlyinclude was being skipped (somewhat correctly) but following text (<pre>) was also included
fxt.Init_page_create("Transclude_2", "a<onlyinclude>b<includeonly>c</includeonly>d</onlyinclude>e<pre>f</pre>g");
fxt.Test_parse_tmpl_str_test("{{:Transclude_2}}" , "{{test}}", "bcd");
}
@Test public void Tmpl_noinclude_unmatched() { // PURPOSE.fix: ignore unmatched </noinclude>; EX:fi.w:Sergio_Leone; DATE:2014-05-02
fxt.Test_parse_tmpl_str_test("{{{1|</noinclude>}}}", "{{test|a}}", "a"); // was "{{{test|"
}
@Test public void Wiki_includeonly() {fxt.Test_parse_page_all_str("a<includeonly>b</includeonly>c" , "ac");}
@Test public void Wiki_noinclude() {fxt.Test_parse_page_all_str("a<noinclude>b</noinclude>c" , "abc");}
@Test public void Wiki_onlyinclude() {fxt.Test_parse_page_all_str("a<onlyinclude>b</onlyinclude>c" , "abc");}
@Test public void Wiki_oi_io() {fxt.Test_parse_page_all_str("a<onlyinclude>b<includeonly>c</includeonly>d</onlyinclude>e" , "abde");}
@Test public void Wiki_oi_io_tblw() {
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "<onlyinclude>"
, "{|"
, "|-"
, "|a<includeonly>"
, "|}</includeonly></onlyinclude>"
, "|-"
, "|b"
, "|}"
), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, " <tr>"
, " <td>b"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
}
}
/*
<includeonly>-({{{1}}}={{{1}}}round-5)-({{{1}}}={{{1}}}round-4)-({{{1}}}={{{1}}}round-3)-({{{1}}}={{{1}}}round-2)-({{{1}}}={{{1}}}round-1)</includeonly><noinclude>
{{pp-template}}Called by {{lt|precision/0}}</noinclude>
==includeonly -- aka: do not eval in template ==
main: a<includeonly>b</includeonly>c<br/>
tmpl: {{mwo_include_only|a|b|c}}
==noinclude -- aka: eval in template only==
main: a<noinclude>b</noinclude>c<br/>
tmpl: {{mwo_no_include|a|b|c}}
==onlyinclude -- aka: only include in template only (ignore everything else) ==
main: a<onlyinclude>b</onlyinclude>c<br/>
tmpl: {{mwo_only_include|a|b|c}}
*/

View File

@@ -0,0 +1,194 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xnde_wkr__include_uncommon_tst {
private Xop_fxt fxt = new Xop_fxt();
@Before public void init() {fxt.Reset();}
@Test public void Ex_Tmpl_io_oi() { // PURPOSE: <includeonly> not parsing internals; PAGE:en.w:[[Template:MONTHNAME]]
fxt.Test_parse_tmpl_str_test("<includeonly>{{#if:{{{1}}}|a|b}}</includeonly><noinclude>c</noinclude>", "{{test|1}}", "a");
}
@Test public void Ex_Tmpl_io_subst() { // PURPOSE: <includeonly> and @gplx.Internal protected subst; PAGE:en.w:[[Template:Dubious]]
fxt.Init_defn_clear();
fxt.Init_defn_add("mwo_print", "{{{1}}}");
fxt.Init_defn_add("substcheck", "SUBST");
fxt.Test_parse_tmpl_str_test(String_.Concat_lines_nl_skip_last
( "{{mwo_print"
, "|<includeonly>{{subst:</includeonly><includeonly>substcheck}}</includeonly>"
, "}}"
), "{{test}}"
, "{{subst:substcheck}}\n"
);
fxt.Reset();
fxt.Test_parse_tmpl_str_test(String_.Concat_lines_nl_skip_last
( "{{mwo_print"
, "|<includeonly>{{safesubst:</includeonly><includeonly>substcheck}}</includeonly>"
, "}}"
), "{{test}}"
, "SUBST\n");
fxt.Init_defn_clear();
}
@Test public void Ex_Tmpl_noinclude_prm_1() { // PURPOSE: <noinclude> should not process @gplx.Internal protected tkns; PAGE:en.w:[[Template:See]]
fxt.Init_defn_clear();
fxt.Init_defn_add("mwo_print", "{{{1}}}{{{2}}}");
fxt.Test_parse_tmpl_str_test
( "{{mwo_print|{{{1<noinclude>|not_seen</noinclude>}}}|{{{2}}}}}"
, "{{test|a|b}}"
, "ab"
);
fxt.Init_defn_clear();
}
@Test public void Ex_Tmpl_noinclude_prm_2() { // PURPOSE: <noinclude> should not process default tkn;
fxt.Test_parse_tmpl_str_test
( "{{#if: {{{x|<noinclude>y</noinclude>}}} | visible | hidden}}" // {{#if: {{{x|<noinclude>y</noinclude>}}} -> {{#if: {{{x|}} -> hidden
, "{{test}}"
, "hidden"
);
}
@Test public void Ex_Tmpl_noinclude2() { // PURPOSE: <noinclude> should be separate from tkns {{convert|50|km|0|abbr=on}}
fxt.Init_defn_clear();
fxt.Init_defn_add("mwo_print", "{{{1}}}{{{2}}}");
fxt.Test_parse_tmpl_str_test
( "{{mwo_print<noinclude>{{{?}}}</noinclude>|a|b}}"
, "{{test}}"
, "ab"
);
fxt.Init_defn_clear();
}
@Test public void Exception_incompleteTag_matchNext() { // PURPOSE: "</noinclude" should not be matched;
fxt.Test_parse_tmpl_str_test
( "a<noinclude>b</noinclude c<noinclude>d</noinclude>e"
, "{{test}}"
, "ae"
);
}
@Test public void Exception_noCloseTag() {
fxt.Test_parse_tmpl_str_test
( "a<noinclude>bcde"
, "{{test}}"
, "a"
);
}
@Test public void Exception_inline() {
fxt.Test_parse_tmpl_str_test
( "a<noinclude/>bcde"
, "{{test}}"
, "abcde"
);
}
@Test public void Exception_inline_2() {
fxt.Test_parse_tmpl_str_test
( "a<noinclude/a/>bcde"
, "{{test}}"
, "a<noinclude/a/>bcde"
);
}
@Test public void Defect_onlyinclude_inside_template() { // PURPOSE: was eating up next template; PAGE:en.w:Wikipedia:Featured_articles
fxt.Test_parse_page_all_str
( "{{formatnum: <onlyinclude>1</onlyinclude>}} {{formatnum:2}}"
, "1 2"
);
}
@Test public void Only_include_preserves_nl() { // PURPOSE: given "a\n<onlyinclude>{|\n", "{|" should be table; PAGE:en.w:Wikipedia:Reference_desk
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( "a"
, "<onlyinclude>==b==</onlyinclude>"
, "c"
)
// , "{{test}}"
, String_.Concat_lines_nl
( "a"
, ""
, "<h2>b</h2>"
, "c"
));
}
@Test public void Only_include_interprets_template() { // PURPOSE: <oi> should interpret templates
fxt.Init_defn_clear();
fxt.Init_defn_add("test", "see_me");
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( "a"
, "<onlyinclude>{{test}}</onlyinclude>"
, "c"
)
, String_.Concat_lines_nl
( "a"
, "see_me"
, "c"
));
}
@Test public void Include_only_in_template_name() {// PURPOSE: includeonly in tmpl_name should be ignored; EX:de.w:Wikipedia:Projektdiskussion; DATE:2014-01-24
fxt.Init_defn_clear();
fxt.Init_defn_add("test", "abc");
fxt.Test_parse_page_all_str("{{<includeonly></includeonly>test}}", "abc");
}
@Test public void Include_only_in_transcluded_page() {// PURPOSE: include only in transcluded page should be ignored; EX:de.w:Wikipedia:Projektdiskussion; DATE:2014-01-24; DATE:2014-05-10
fxt.Init_page_create("page", "abc"); // create page in main ns
fxt.Test_parse_page_all_str("{{:<includeonly>safesubst:</includeonly>page}}", "abc"); // will become {{:page}} which should then transclude page
}
@Test public void Include_only_subst_in_function() {// PURPOSE: includeonly and subst inside function should be ignored; PAGE:en.w:WikiProject_Articles_for_creation/BLD_Preload; DATE:2014-04-29
fxt.Test_parse_page_all_str("{{<includeonly>subst:</includeonly>#expr:0}}", "0");
}
@Test public void Hdr() { // PURPOSE: includeonly should be evaluated during template parse; EX: es.b:Billar/T<>cnica/Clases_de_puentes; DATE:2014-02-12
fxt.Test_parse_page_all_str("=<includeonly>=</includeonly>A=<includeonly>=</includeonly>", "<h1>A</h1>\n");
}
// @Test public void Noinclude_nested() { // PURPOSE: nested noincludes don't work; th.w:ISO_3166-1;DATE:2014-04-06
// fxt.Init_defn_clear();
// fxt.Init_defn_add("test", "a<noinclude>b<noinclude>c</noinclude>d</noinclude>e");
// fxt.Test_parse_page_all_str("{{test}}", "ae");
// }
// @Test public void Wiki_includeonly_ignore() {fxt.Test_parse_wiki_text("[[a<includeonly>b</includeonly>c]]", "[[ac]]");} // FUTURE: ttl parses by idx, and ignores includeonly: WHEN: upon encountering; may need to redo in other parsers?
@Test public void Defect_noinclude_inside_main() { // PURPOSE: <onlyinclude> inside main was not returning content; PAGE:en.w:Wikipedia:Featured_articles
fxt.Init_defn_clear();
fxt.Init_defn_add("Test_tmpl", "{{:Test_page}}");
fxt.Data_create("Test_page", "a{{#expr:<onlyinclude>1</onlyinclude>}}c");
fxt.Test_parse_page_all_str
( "{{Test_tmpl}}"
, "1"
);
fxt.Init_defn_clear();
}
@Test public void Pre_and_includeonly() { // PAGE:https://en.wikipedia.org/wiki/BSD_licenses DATE:2014-05-23
fxt.Init_defn_add("pre2", "<pre<includeonly></includeonly>>{{{1}}}</pre>");
fxt.Test_parse_page_all_str
( "{{pre2|a}}"
, String_.Concat_lines_nl_skip_last
( "<pre>a</pre>"
));
}
// @Test public void Pre_and_includeonly2() {
// fxt.Init_defn_add("pre2", "<pre<includeonly></includeonly>><nowiki>{{{1}}}</nowiki></pre>");
// fxt.Test_parse_page_all_str
// ( "{{pre2|a}}"
// , String_.Concat_lines_nl_skip_last
// ( "<pre>a</pre>"
// ));
// }
@Test public void Noinclude_inline_w_space_inside_safesubst() { // PURPOSE: "<noinclude />" did not work with safesubst b/c of space; PAGE:en.w:Wikipedia:Featured_picture_candidates; DATE:2014-06-24
fxt.Test_parse_tmpl_str_test("{{SAFESUBST:<noinclude />#if:val_exists|y|n}}", "{{test}}", "y");
}
@Test public void Subst() {// PURPOSE: handle subst-includeonly-subst combination; PAGE:pt.w:Argentina DATE:2014-09-24
fxt.Init_defn_clear();
fxt.Init_defn_add("test", "{{<includeonly>subst:</includeonly>#switch:1|1=y|default=n}}");
//fxt.Init_defn_add("test", "{{subst:#switch:1|1=y|default=n}}"); // keeping around for debugging purposes
//fxt.Init_defn_add("test", "{{<includeonly>#switch:</includeonly>1|1=y|default=n}}"); // keeping around for debugging purposes
fxt.Test_parse_page_all_str("{{test}}", "{{subst:#switch:1|1=y|default=n}}"); // note that subst is preserved b/c of <includeonly>
fxt.Test_parse_page_all_str("{{subst:test}}", "y"); // note that expression is evaluated b/c of subst:
}
}

View File

@@ -0,0 +1,104 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xnde_wkr__li_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Inside_tblx() { // PURPOSE: auto-close <li> (EX: "<li>a<li>") was causing 3rd <li> to close incorrectly
fxt.Test_parse_page_wiki_str
( "<table><tr><td><ul><li>a</li><li>b</li><li>c</li></ul></td></tr></table>"
, String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td><ul>"
, "<li>a</li>"
, "<li>b</li>"
, "<li>c</li></ul>"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
}
@Test public void Li_nested_inside_ul() { // PURPOSE: nested li in ul should not be escaped; DATE:2013-12-04
fxt.Test_parse_page_wiki_str
( "<ul><li>a<ul><li>b</li></ul></li></ul>"
, String_.Concat_lines_nl_skip_last
( "<ul>"
, "<li>a<ul>"
, "<li>b</li></ul></li></ul>" // note that <li><li>b becomes <li>&lt;li>b but <li><ul><li>b should stay the same
));
}
@Test public void Empty_ignored() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<ul>"
, "<li>a"
, "</li><li>"
, "</li><li>b"
, "</li>"
, "</ul>"
), String_.Concat_lines_nl_skip_last
( "<ul>"
, "<li>a"
, "</li>"
, "<li>b"
, "</li>"
, "</ul>"
));
}
@Test public void Empty_ignored_error() { // PAGE:en.w:Sukhoi_Su-47; "* </li>" causes error b/c </li> tries to close non-existent node
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "* a"
, "* </li>"
), String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li> a"
, " </li>"
, " <li> </li>" // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
, " </li>"
, "</ul>"
));
}
@Test public void Insert_nl() {// PURPOSE: <li> should always be separated by nl, or else items will merge, creating long horizontal scroll bar; EX:w:Music
fxt.Init_para_y_();
fxt.Test_parse_page_all_str("<ul><li>a</li><li>b</li></ul>"
, String_.Concat_lines_nl_skip_last
( "<ul>"
, "<li>a</li>"
, "<li>b</li></ul>"
, ""
));
fxt.Init_para_n_();
}
@Test public void Duplicate() { // PURPOSE: redundant li; EX: "* <li>"; PAGE:it.w:Milano#Bibliographie; DATE:2013-07-23
fxt.Test_parse_page_all_str("* <li>x</li>", String_.Concat_lines_nl_skip_last
( "<ul>"
, " <li> "
, "<li>x</li>" // TIDY: duplicate li will be stripped out; DATE:2014-06-26
, " </li>"
, "</ul>"
));
}
@Test public void Dangling_inside_xnde() { // PURPOSE.TIDY: handle "<li><span>a<li><span>b"; PAGE:ro.w:Pagina principala; DATE:2014-06-26
fxt.Test_parse_page_all_str("<li><span>a<li><span>b", String_.Concat_lines_nl_skip_last
( "<li><span>a"
, "<li><span>b</span></li></span></li>" // TIDY: will (a) move </span></li> to 1st line
));
}
}

View File

@@ -0,0 +1,144 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xnde_wkr__nowiki_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Basic() {
fxt.Test_parse_page_wiki_str
( "<nowiki>''a''</nowiki>b"
, "''a''b"
);
}
@Test public void Template() {
fxt.Init_para_y_();
fxt.Init_defn_add("nowiki_test", "<nowiki>#</nowiki>a");
fxt.Test_parse_page_all_str
( "{{nowiki_test}}"
, String_.Concat_lines_nl_skip_last
( "<p>#a"
, "</p>"
, ""
));
fxt.Init_para_n_();
}
@Test public void H2() { // PAGE:en.w:HTML
fxt.Test_parse_page_all_str
( "a<nowiki><h1>b<h6></nowiki>c"
, String_.Concat_lines_nl_skip_last
( "a&lt;h1&gt;b&lt;h6&gt;c"
));
}
@Test public void Lnke() { // PAGE:en.w:Doomsday_argument; <nowiki>[0,&nbsp;1]</nowiki>
fxt.Test_parse_page_wiki_str("a <nowiki>[0,&nbsp;1]</nowiki> b", "a [0,&nbsp;1] b"); // NOTE: not "0" + Byte_.XtoStr(160) + "1"; depend on browser to translate &nbsp;
}
@Test public void Xatrs_val_text() {
fxt.Test_parse_page_all_str
( "<div id=<nowiki>a</nowiki>>b</div>"
, String_.Concat_lines_nl_skip_last
( "<div id=\"a\">b</div>"
));
}
@Test public void Xatrs_val_quote() {
fxt.Test_parse_page_all_str
( "<div id='a<nowiki>b</nowiki>c'>d</div>"
, String_.Concat_lines_nl_skip_last
( "<div id='abc'>d</div>"
));
}
@Test public void Xatrs_eq() {
fxt.Test_parse_page_all_str("<ul id<nowiki>=</nowiki>\"a\" class<nowiki>=</nowiki>\"b\"><li><span class=\"c\">d</li></ul>", String_.Concat_lines_nl_skip_last
( "<ul id=\"a\" class=\"b\">"
, "<li><span class=\"c\">d</span></li></ul>"
));
}
@Test public void Tblw_atr() {// PURPOSE: nowiki breaks token
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{|style=\"background-color:<nowiki>#</nowiki>FFCC99\""
, "|a"
, "|}"
) , String_.Concat_lines_nl_skip_last
( "<table style=\"background-color:#FFCC99\">"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
}
@Test public void Prex() { // PURPOSE: nowikis inside pre should be ignored; DATE:2013-03-30
fxt.Test_parse_page_all_str("<pre>a<nowiki>&lt;</nowiki>b</pre>" , "<pre>a&lt;b</pre>"); // basic
fxt.Test_parse_page_all_str("<pre>a<nowiki>&lt;<nowiki>b</pre>" , "<pre>a&lt;nowiki&gt;&lt;&lt;nowiki&gt;b</pre>"); // not closed
fxt.Test_parse_page_all_str("<pre><nowiki>a<nowiki>b</nowiki>c</nowiki></pre>" , "<pre>&lt;nowiki&gt;abc&lt;/nowiki&gt;</pre>"); // nested; this is wrong, but leave for now; should be a<nowiki>b</nowiki>c
}
@Test public void Prew() { // PURPOSE: space inside nowiki should be ignored; ru.b:Rubyn DATE:2014-07-03
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( " a<nowiki>"
, " <b></b></nowiki>" // note that "\s" must remain "\s" so that <pre> continues uninterrupted
), String_.Concat_lines_nl
( "<pre>a"
, "&lt;b&gt;&lt;/b&gt;"
, "</pre>"
)
);
fxt.Init_para_n_();
}
@Test public void Prew_2() { // PURPOSE: prew should continue over nowiki, even if no space DATE:2014-07-03
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( " <nowiki>a"
, "b</nowiki>" // note that "b" should be in pre b/c it is part of <nowiki> which is pre'd (even though there is no \n\s)
), String_.Concat_lines_nl
( "<pre>a"
, "b"
, "</pre>"
)
);
fxt.Init_para_n_();
}
@Test public void Code() { // PURPOSE.fix:HtmlNcr-escaped refs were being ignored; caused by HtmlTidy fix for frwiki templates;DATE:2013-06-27
fxt.Test_parse_page_all_str("<code><nowiki>|:</nowiki></code>", "<code>|:</code>");
}
@Test public void Brack_end() { // PURPOSE: check that "]" is escaped; PAGE:en.w:Tall_poppy_syndrome; DATE:2014-07-23
fxt.Test_parse_page_all_str
( "<nowiki>[</nowiki>[[A]]<nowiki>]</nowiki>"
, "[<a href=\"/wiki/A\">A</a>]"); // was showing up as [[[A]]]
}
@Test public void Tblw_tr() { // PURPOSE: dash should be escaped in nowiki PAGE:de.w:Liste_von_Vereinen_und_Vereinigungen_von_Gläubigen_(römisch-katholische_Kirche) DATE:2015-01-08
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|a"
, "|<nowiki>-</nowiki>" // do not treat as "|-"
, "|}"
), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " <td>-" // "|" creates <td>; "-" is rendered literally
, " </td>"
, " </tr>"
, "</table>"
, ""
));
}
}

View File

@@ -0,0 +1,80 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xnde_wkr__tblx_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Table() {
fxt.Test_parse_page_wiki("a<table><tr><td>b</td></tr></table>c"
, fxt.tkn_txt_ ( 0, 1)
, fxt.tkn_tblw_tb_(1, 35).Subs_
( fxt.tkn_tblw_tr_(8, 27).Subs_
( fxt.tkn_tblw_td_(12, 22).Subs_(fxt.tkn_txt_(16, 17))
)
)
, fxt.tkn_txt_ (35, 36)
);
}
@Test public void Ws_bgn() { // PURPOSE: some templates return leading ws; PAGE:en.w:UK
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( " <table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, " </table>"
), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, ""
));
fxt.Init_para_n_();
}
@Test public void Td_in_lnki_should_be_ignored() {// PURPOSE: \n| inside lnki should not be interpreted as table cell; EX: uk.w:Дніпро; DATE:2014-03-11
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<table><tr><td>"
, "[[File:A.png|150px"
, "|B]]</td></tr></table>"
), String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>"
, "<a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\"B\" src=\"file:///mem/wiki/repo/trg/thumb/7/0/A.png/150px.png\" width=\"150\" height=\"0\" /></a>"
, " </td>"
, " </tr>"
, "</table>"
));
fxt.Init_para_n_();
}
@Test public void Nl() {
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str
( "<table>\n\n\n\n\n</table>"
, "<table>\n"
+ "</table>\n"
);
fxt.Init_para_n_();
}
}

View File

@@ -0,0 +1,79 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xnde_wkr__text_block_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Source_wikitext() { // PURPOSE.ASSERT: wikitext should be rendered literally; DATE:2014-03-11
fxt.Test_parse_page_wiki_str("<source>''a''</source>", "<pre>''a''</pre>");
}
@Test public void Source_nowiki() { // PURPOSE.ASSERT: onlyinclude should be rendered literally; DATE:2014-03-11
fxt.Test_parse_page_wiki_str("<source><onlyinclude>a</onlyinclude></source>", "<pre>&lt;onlyinclude&gt;a&lt;/onlyinclude&gt;</pre>");
}
@Test public void Source_escape() {
fxt.Test_parse_page_wiki_str("<source><b></source>", "<pre>&lt;b&gt;</pre>");
}
@Test public void Source_escape_amp() { // PURPOSE: &lt; should be rendered as &amp;lt; PAGE:uk.b:HTML; DATE:2014-03-11
fxt.Test_parse_page_wiki_str("<source>&lt;</source>", "<pre>&amp;lt;</pre>");
}
@Test public void Source_pre() { // PURPOSE: handle pre; PAGE:en.w:Comment_(computer_programming); DATE:2014-06-23
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( " <source>"
, " a"
, " </source>"
), String_.Concat_lines_nl_skip_last
( " <pre>"
, " a"
, "</pre>"
));
fxt.Init_para_n_();
}
@Test public void Code_dangling() { // PAGE:en.w:HTML; <code>&lt;i&gt;<code> and <code>&lt;center&gt;<code> tags. There are
fxt.Test_parse_page_wiki_str("a<code>b<code>c", "a<code>b</code>c");
}
@Test public void Code_do_not_escape() { // PURPOSE: <code> was mistakenly marked as escape, causing inner tags to be rendered incorrectly; PAGE:en.w:UTF8
fxt.Test_parse_page_all_str
( "<code><span style=\"color:red;\">0100100</span></code>"
, "<code><span style=\"color:red;\">0100100</span></code>"
);
}
@Test public void Pre_and_html_chars() {// PURPOSE: <pre> should handle '"<> according to context
fxt.Test_parse_page_all_str("<pre>a&#09;b</pre>" , "<pre>a&#09;b</pre>"); // known ncr/dec; embed and depend on browser transforming; EX: de.w:Wikipedia:Technik/Skin/Werkstatt
fxt.Test_parse_page_all_str("<pre>a&#9999999999;b</pre>" , "<pre>a&amp;#9999999999;b</pre>"); // unknown ncr/dec; escape & (since browser cannot render);
fxt.Test_parse_page_all_str("<pre>a&#af ;b</pre>" , "<pre>a&amp;#af ;b</pre>"); // unknown ncr/dec 2
fxt.Test_parse_page_all_str("<pre>a&#x9;b</pre>" , "<pre>a&#x9;b</pre>"); // known ncr/hex
fxt.Test_parse_page_all_str("<pre>a&apos;b</pre>" , "<pre>a&apos;b</pre>"); // known name; embed
fxt.Test_parse_page_all_str("<pre>a&apox;b</pre>" , "<pre>a&amp;apox;b</pre>"); // unknown name; escape
fxt.Test_parse_page_all_str("<pre>&\"<></pre>" , "<pre>&amp;&quot;&lt;&gt;</pre>"); // no ncr or name; escape; needed for <pre><img ...></pre>; PAGE:en.w:Alt attribute
}
@Test public void Pre_and_space() {// PURPOSE: make sure pre does not careate <p></p> around it; also, make sure " a" is preserved; DATE:2014-02-20
fxt.Init_para_y_();
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<pre>"
, " a"
, "</pre>"
), String_.Concat_lines_nl_skip_last
( "<pre>"
, " a"
, "</pre>"
));
fxt.Init_para_n_();
}
}

View File

@@ -0,0 +1,46 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xnde_wkr__tidy_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Sub_sup_autocorrect() {
fxt.Test_parse_page_wiki_str("<sub>a</sup>b", "<sub>a</sub>b");
fxt.Test_parse_page_wiki_str("<sup>a</sub>b", "<sup>a</sup>b");
}
@Test public void Span_font_autocorrect() { // PURPOSE: force </font> to close <span>; EX:w:Rupee; DATE:2014-04-07
fxt.Test_parse_page_wiki_str("<span>a</font>b", "<span>a</span>b");
}
@Test public void Move_ws_char() {
fxt.Test_parse_page_all_str("a<i> b </i>c", "a <i>b</i> c");
}
@Test public void Move_ws_ent() {
fxt.Test_parse_page_all_str("a<i>&#32;b&#32;</i>c", "a&#32;<i>b</i>&#32;c");
}
@Test public void Ignore_empty_tags() { // PURPOSE: ignore tag if marked ignore_empty; EX:uk.b:HTML; DATE:2014-03-12
fxt.Test_parse_page_all_str("a<pre></pre>b", "ab");
}
// @Test public void Escaped_div() { // NOTE: WP <div><span>a</span></div><span>b</span>; MW: <div><span>a</div>b</span> // REVISIT: 2012-05-11; WP does harder split-span
// fxt.Init_log_(Xop_xnde_log.Auto_closing_section, Xop_xnde_log.Escaped_xnde).Test_parse_page_wiki("<div><span></div></span>"
// , fxt.tkn_xnde_(0, 17).Subs_
// ( fxt.tkn_xnde_(5, 11))
// , fxt.tkn_ignore_(17, 24)
// );
// }
}

View File

@@ -0,0 +1,57 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xnde_wkr__xatrs_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Inline() {
fxt.Test_parse_page_wiki("<ref cd=\"ef\" />" , fxt.tkn_xnde_(0, 15).Atrs_rng_(5, 13));
fxt.Test_parse_page_wiki("<ref cd = \"e f\" />" , fxt.tkn_xnde_(0, 21).Atrs_rng_(5, 19)); // ws
}
@Test public void Bgn() {
fxt.Test_parse_page_wiki("<div cd=\"ef\"></div>" , fxt.tkn_xnde_(0, 19).Atrs_rng_(5, 12)); // basic
}
@Test public void Repeated() { // PURPOSE: if atr is repeated, take 1st, not last; EX: it.u:Dipartimento:Fisica_e_Astronomia; DATE:2014-02-09
fxt.Test_parse_page_all_str("<span style='color:red' style='color:green'>a</span>" , "<span style='color:green'>a</span>"); // two
fxt.Test_parse_page_all_str("<span style='color:red' style='color:green' style='color:blue'>a</span>" , "<span style='color:blue'>a</span>"); // three
}
@Test public void Non_ws() { // PURPOSE: <br$2/> is valid; symbols function as ws
fxt.Init_log_(Xop_xatr_parser.Log_invalid_atr).Test_parse_page_wiki("<br$2/>" , fxt.tkn_xnde_(0, 7).Atrs_rng_(3, 5));
}
@Test public void Invalid() { // PURPOSE: make sure brx does not match br
fxt.Test_parse_page_wiki("<brx/>" , fxt.tkn_bry_(0, 1), fxt.tkn_txt_(1, 6));
}
@Test public void Id_encode() {
fxt.Test_parse_page_all_str("<div id=\"a b c\"></div>", "<div id=\"a_b_c\"></div>");
}
@Test public void Lt_should_not_be_escaped_in_input() { // PURPOSE: options textboxes were escaped if input's value had "<"; DATE:2014-07-04
fxt.Page().Html_data().Html_restricted_n_();
fxt.Test_parse_page_wiki_str("<input value='a<'></input>", "<input value='a<'></input>"); // NOTE: do not call parse_page_all_str which will call Page.Clear and reset Restricted
fxt.Page().Html_data().Html_restricted_y_();
}
// @Test public void Unclosed() { // PURPOSE: unclosed atr should be treated as key, which should be ignored; PAGE:en.w:Palace of Versailles
// fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
// ( "<span id=\"1<>>a" // id="1<> -> key named 'id="1<>' which fails whitelist keys
// , "</span>"
// ), String_.Concat_lines_nl_skip_last
// ( "<span>a"
// , "</span>"
// ));
// }
}