mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.7.2.1
This commit is contained in:
47
400_xowa/src_490_xnde/gplx/xowa/Xop_xatr_hash.java
Normal file
47
400_xowa/src_490_xnde/gplx/xowa/Xop_xatr_hash.java
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
public class Xop_xatr_hash {
|
||||
private final Ordered_hash hash = Ordered_hash_.new_bry_();
|
||||
private final byte[] src;
|
||||
Xop_xatr_hash(byte[] src) {this.src = src;}
|
||||
public int Len() {return hash.Count();}
|
||||
public Xop_xatr_itm Get_at(int idx) {
|
||||
return (Xop_xatr_itm)hash.Get_at(idx);
|
||||
}
|
||||
public Xop_xatr_itm Get_by(String key) {
|
||||
return (Xop_xatr_itm)hash.Get_by(Bry_.new_u8(key));
|
||||
}
|
||||
public byte[] Get_as_bry_or(String key, byte[] or) {
|
||||
Xop_xatr_itm itm = Get_by(key);
|
||||
return itm == null ? or : itm.Val_as_bry(src);
|
||||
}
|
||||
public boolean Match(String key, String val) {
|
||||
Xop_xatr_itm itm = Get_by(key); if (itm == null) return false;
|
||||
return String_.Eq(itm.Val_as_str(src), val);
|
||||
}
|
||||
private void Add(Xop_xatr_itm itm) {
|
||||
hash.Add_if_dupe_use_nth(itm.Key_bry(), itm);
|
||||
}
|
||||
public static Xop_xatr_hash new_ary(byte[] src, Xop_xatr_itm[] ary) {
|
||||
Xop_xatr_hash rv = new Xop_xatr_hash(src);
|
||||
for (Xop_xatr_itm itm : ary)
|
||||
rv.Add(itm);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
65
400_xowa/src_490_xnde/gplx/xowa/Xop_xatr_itm.java
Normal file
65
400_xowa/src_490_xnde/gplx/xowa/Xop_xatr_itm.java
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
public class Xop_xatr_itm {
|
||||
public static final byte Tid_null = 0, Tid_invalid = 1, Tid_repeat = 2, Tid_key_val = 3, Tid_key_only = 4; // NOTE: id order is important; see below;
|
||||
public byte Tid() {return tid;} private byte tid;
|
||||
public void Tid_to_repeat_() {tid = Tid_repeat;}
|
||||
public void Tid_to_invalid_() {tid = Tid_invalid;}
|
||||
public boolean Tid_is_key_only() {return tid == Tid_key_only;}
|
||||
public int Key_bgn() {return key_bgn;} private int key_bgn;
|
||||
public int Key_end() {return key_end;} private int key_end;
|
||||
public byte[] Key_bry() {return key_bry;} public Xop_xatr_itm Key_bry_(byte[] v) {key_bry = v; return this;} private byte[] key_bry;
|
||||
public byte[] Val_bry() {return val_bry;} public Xop_xatr_itm Val_bry_(byte[] v) {val_bry = v; return this;} private byte[] val_bry;
|
||||
public void Key_rng_(int key_bgn, int key_end) {this.key_bgn = key_bgn; this.key_end = key_end;}
|
||||
public byte Key_tid() {return key_tid;} public Xop_xatr_itm Key_tid_(byte v) {key_tid = v; return this;} private byte key_tid;
|
||||
public int Val_bgn() {return val_bgn;} private int val_bgn;
|
||||
public int Val_end() {return val_end;} private int val_end;
|
||||
public int Atr_bgn() {return atr_bgn;} private int atr_bgn;
|
||||
public int Atr_end() {return atr_end;} private int atr_end;
|
||||
public int Eq_pos() {return eq_pos;} private int eq_pos;
|
||||
public boolean Invalid() {return tid < Tid_key_val;} // NOTE: Tid order is important
|
||||
public byte Quote_byte() {return quote_byte;} private byte quote_byte;
|
||||
public String Val_as_str(byte[] src) {return String_.new_u8(Val_as_bry(src));}
|
||||
public byte[] Val_as_bry(byte[] src) {if (val_bry == null) val_bry = Bry_.Mid(src, val_bgn, val_end); return val_bry;} // NOTE: val_bry is cached
|
||||
public byte[] Val_as_bry__blank_to_null(byte[] src) {byte[] rv = Val_as_bry(src); return Bry_.Len_eq_0(rv) ? null : rv;}
|
||||
public int Val_as_int_or(byte[] src, int or) {return val_bry == null ? Bry_.Xto_int_or_lax(src, val_bgn, val_end, or) : Bry_.Xto_int_or(val_bry, or);}
|
||||
public boolean Val_as_bool_by_int(byte[] src) {return Val_as_int_or(src, 0) == 1;}
|
||||
public boolean Val_as_bool(byte[] src) {return Bry_.Eq(Bry_.Lower_ascii(Val_as_bry(src)), Bool_.True_bry);}
|
||||
public static Xop_xatr_itm[] Xatr_parse(Xoae_app app, Xop_xnde_atr_parser parser, Hash_adp_bry hash, Xowe_wiki wiki, byte[] src, Xop_xnde_tkn xnde) {
|
||||
Xop_xatr_itm[] xatr_ary = app.Xatr_parser().Parse(app.Msg_log(), src, xnde.Atrs_bgn(), xnde.Atrs_end());
|
||||
for (int i = 0; i < xatr_ary.length; i++) {
|
||||
Xop_xatr_itm xatr = xatr_ary[i];
|
||||
if (xatr.Invalid()) continue;
|
||||
Object xatr_key_obj = hash.Get_by_mid(src, xatr.Key_bgn(), xatr.Key_end());
|
||||
parser.Xatr_parse(wiki, src, xatr, xatr_key_obj);
|
||||
}
|
||||
return xatr_ary;
|
||||
}
|
||||
public Xop_xatr_itm(int atr_bgn, int atr_end) {
|
||||
this.tid = Tid_invalid; this.atr_bgn = atr_bgn; this.atr_end = atr_end;
|
||||
}
|
||||
public Xop_xatr_itm(byte quote_byte, int atr_bgn, int atr_end, int key_bgn, int key_end) {
|
||||
this.tid = Tid_key_only; this.quote_byte = quote_byte; this.atr_bgn = atr_bgn; this.atr_end = atr_end; this.key_bgn = key_bgn; this.key_end = key_end; this.val_bgn = key_bgn; this.val_end = key_end;
|
||||
}
|
||||
public Xop_xatr_itm(byte quote_byte, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end, int eq_pos) {
|
||||
this.tid = Tid_key_val; this.quote_byte = quote_byte; this.atr_bgn = atr_bgn; this.atr_end = atr_end; this.key_bgn = key_bgn; this.key_end = key_end; this.val_bgn = val_bgn; this.val_end = val_end; this.eq_pos = eq_pos;
|
||||
}
|
||||
public static final Xop_xatr_itm[] Ary_empty = new Xop_xatr_itm[0];
|
||||
public static final byte Key_tid_generic = 0, Key_tid_id = 1, Key_tid_style = 2, Key_tid_role = 3;
|
||||
}
|
||||
408
400_xowa/src_490_xnde/gplx/xowa/Xop_xatr_parser.java
Normal file
408
400_xowa/src_490_xnde/gplx/xowa/Xop_xatr_parser.java
Normal file
@@ -0,0 +1,408 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import gplx.core.primitives.*;
|
||||
public class Xop_xatr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATTRIBS_REGEX
|
||||
private final List_adp xatrs = List_adp_.new_();
|
||||
private static final byte Mode_atr_bgn = 1, Mode_invalid = 2, Mode_key = 3, Mode_eq = 4, Mode_val_bgn = 5, Mode_val_quote = 6, Mode_val_raw = 7;
|
||||
private byte mode = Mode_atr_bgn;
|
||||
private int atr_bgn = -1, key_bgn = -1, key_end = -1, eq_pos = -1, val_bgn = -1, val_end = -1; boolean valid = true;
|
||||
private byte quote_byte = Byte_ascii.Nil;
|
||||
private final Hash_adp_bry repeated_atrs_hash = Hash_adp_bry.ci_ascii_(); // ASCII:xnde_atrs
|
||||
private final Bry_bfr key_bfr = Bry_bfr.new_(), val_bfr = Bry_bfr.new_();
|
||||
private boolean key_bfr_on = false, val_bfr_on = false, ws_is_before_val = false;
|
||||
public Bry_obj_ref Bry_obj() {return bry_ref;} private final Bry_obj_ref bry_ref = Bry_obj_ref.null_();
|
||||
public int Xnde_find_gt_find(byte[] src, int pos, int end) {
|
||||
bry_ref.Val_(null);
|
||||
byte b = src[pos];
|
||||
if (b == Byte_ascii.Slash && pos + 1 < end) { // if </ move pos to after /
|
||||
++pos;
|
||||
b = src[pos];
|
||||
}
|
||||
int gt_pos = Bry_finder.Find_fwd(src, Byte_ascii.Gt, pos, end); if (gt_pos == Bry_.NotFound) return String_.Find_none;
|
||||
byte[] bry = (byte[])xnde_hash.Get_by_mid(src, pos, gt_pos);
|
||||
bry_ref.Val_(bry);
|
||||
return bry == null ? String_.Find_none : bry.length + pos;
|
||||
}
|
||||
private int Xnde_find_gt(Gfo_msg_log log_mgr, byte[] src, int lt_pos, int end) {
|
||||
int pos = lt_pos + 1;
|
||||
byte b = src[pos];
|
||||
if (b == Byte_ascii.Slash && pos + 1 < end) {
|
||||
++pos;
|
||||
b = src[pos];
|
||||
}
|
||||
int match_pos = Xnde_find_gt_find(src, pos, end);
|
||||
if (match_pos == String_.Find_none) {log_mgr.Add_str_warn_key_none(Msg_mgr, "invalid lt", src, lt_pos); return String_.Find_none;}
|
||||
boolean slash_found = false;
|
||||
for (int i = match_pos; i < end; i++) {
|
||||
b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Gt: return i;
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip any ws
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
if (slash_found) {log_mgr.Add_str_warn_key_none(Msg_mgr, "multiple slashes not allowed", src, i); return String_.Find_none;} // only allow one slash
|
||||
else slash_found = true;
|
||||
break;
|
||||
default:
|
||||
log_mgr.Add_str_warn_key_none(Msg_mgr, "invalid character", src, i);
|
||||
return String_.Find_none;
|
||||
}
|
||||
}
|
||||
log_mgr.Add_str_warn_key_none(Msg_mgr, "eos", src, lt_pos);
|
||||
return String_.Find_none;
|
||||
}
|
||||
public Xop_xatr_itm[] Parse(Gfo_msg_log log_mgr, byte[] src, int bgn, int end) {
|
||||
xatrs.Clear();
|
||||
repeated_atrs_hash.Clear();
|
||||
int i = bgn;
|
||||
mode = Mode_atr_bgn;
|
||||
boolean prv_is_ws = false;
|
||||
while (true) {
|
||||
if (i == end) {
|
||||
if (mode == Mode_val_quote) { // quote still open
|
||||
int reset_pos = Bry_finder.Find_fwd(src, Byte_ascii.Space, atr_bgn, end); // try to find 1st space within quote; EX:"a='b c=d" should try to reset at c=d
|
||||
boolean reset_found = reset_pos != Bry_finder.Not_found;
|
||||
valid = false; val_end = reset_found ? reset_pos : end;
|
||||
Make(log_mgr, src, val_end); // create invalid atr
|
||||
if (reset_found) { // space found; resume from text after space; EX: "a='b c=d"; PAGE:en.w:Aubervilliers DATE:2014-06-25
|
||||
i = Bry_finder.Find_fwd_while_not_ws(src, reset_pos, end); // skip ws
|
||||
atr_bgn = -1;
|
||||
mode = Mode_atr_bgn;
|
||||
val_bfr.Clear();
|
||||
val_bfr_on = false;
|
||||
ws_is_before_val = false;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
else {
|
||||
if (mode == Mode_val_bgn) // NOTE: handle dangling "k=" else will be "k"; EX: <a b=> x> <a b>; PAGE:en.s:Notes_by_the_Way/Chapter_2; DATE:2015-01-31
|
||||
valid = false;
|
||||
if (atr_bgn != -1) { // atr_bgn will be -1 if atrs ends on quoted (EX:"a='b'"); else, pending atr that needs to be processed; EX: "a=b" b wil be in bfr
|
||||
val_end = end;
|
||||
Make(log_mgr, src, end);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (i > end)
|
||||
break;
|
||||
byte b = src[i];
|
||||
switch (mode) {
|
||||
case Mode_atr_bgn:
|
||||
switch (b) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip any ws at bgn; note that once a non-ws char is encountered, it will immediately go into another mode
|
||||
break;
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Colon:
|
||||
if (atr_bgn == -1) atr_bgn = i;
|
||||
mode = Mode_key;
|
||||
key_bgn = i;
|
||||
break;
|
||||
case Byte_ascii.Lt:
|
||||
int gt_pos = Xnde_find_gt(log_mgr, src, i, end);
|
||||
if (gt_pos == String_.Find_none) {
|
||||
valid = false; mode = Mode_invalid; if (atr_bgn == -1) atr_bgn = i;
|
||||
}
|
||||
else {
|
||||
i = gt_pos; // note that there is ++i below and loop will continue at gt_pos + 1 (next character after)
|
||||
}
|
||||
break;
|
||||
default:
|
||||
valid = false; mode = Mode_invalid; if (atr_bgn == -1) atr_bgn = i;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Mode_invalid:
|
||||
switch (b) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
|
||||
Make(log_mgr, src, i);
|
||||
mode = Mode_atr_bgn;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Mode_key:
|
||||
switch (b) {
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Colon: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Underline:
|
||||
if (key_bfr_on) key_bfr.Add_byte(b);
|
||||
break;
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
|
||||
if (valid) {
|
||||
key_end = i;
|
||||
mode = Mode_eq;
|
||||
}
|
||||
else
|
||||
Make(log_mgr, src, i);
|
||||
break;
|
||||
case Byte_ascii.Eq:
|
||||
if (valid) {
|
||||
key_end = i;
|
||||
mode = Mode_val_bgn;
|
||||
eq_pos = i;
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Lt:
|
||||
int gt_pos = Xnde_find_gt(log_mgr, src, i, end);
|
||||
if (gt_pos == String_.Find_none) {
|
||||
valid = false; mode = Mode_invalid;
|
||||
}
|
||||
else {
|
||||
if (!key_bfr_on) key_bfr.Add_mid(src, key_bgn, i);
|
||||
i = gt_pos; // note that there is ++i below and loop will continue at gt_pos + 1 (next character after)
|
||||
key_bfr_on = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
valid = false; mode = Mode_invalid;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Mode_eq:
|
||||
switch (b) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip ws
|
||||
if (key_end == -1) { // EX: "a = b"; key_end != -1 b/c 1st \s sets key_end; EX: "a b = c"; key_end
|
||||
val_end = i - 1;
|
||||
Make(log_mgr, src, i);
|
||||
mode = Mode_atr_bgn;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Eq:
|
||||
eq_pos = i;
|
||||
mode = Mode_val_bgn;
|
||||
break;
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos: // FUTURE: previous word was key
|
||||
default: // NOTE: added this late; xml_parser was not handling "line start=3" DATE:2013-07-03
|
||||
val_end = i - 1;
|
||||
Make(log_mgr, src, i);
|
||||
mode = Mode_atr_bgn;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case Mode_val_bgn:
|
||||
switch (b) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip-ws
|
||||
ws_is_before_val = true;
|
||||
break;
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos:
|
||||
mode = Mode_val_quote; quote_byte = b; prv_is_ws = false;
|
||||
break;
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Colon:
|
||||
case Byte_ascii.Hash:
|
||||
mode = Mode_val_raw;
|
||||
val_bgn = i;
|
||||
break;
|
||||
case Byte_ascii.Lt:
|
||||
int gt_pos = Xnde_find_gt(log_mgr, src, i, end);
|
||||
if (gt_pos == String_.Find_none) {
|
||||
valid = false; mode = Mode_invalid;
|
||||
}
|
||||
else {
|
||||
i = gt_pos; // note that there is ++i below and loop will continue at gt_pos + 1 (next character after)
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Mode_val_quote:
|
||||
if (val_bgn == -1) val_bgn = i;
|
||||
switch (b) {
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos:
|
||||
if (quote_byte == b) {
|
||||
val_end = i;
|
||||
Make(log_mgr, src, i + 1); // NOTE: set atr_end *after* quote
|
||||
}
|
||||
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
|
||||
break;
|
||||
case Byte_ascii.Lt: // "<" try to find nowiki inside atr
|
||||
int gt_pos = Xnde_find_gt(log_mgr, src, i, end);
|
||||
if (gt_pos == String_.Find_none) {
|
||||
// valid = false; mode = Mode_invalid; // DELETE: 2012-11-13; unpaired < should not mark atr invalid; EX: style='margin:1em<f'
|
||||
if (!val_bfr_on) val_bfr.Add_mid(src, val_bgn, i + 1); // +1 to include <
|
||||
val_bfr_on = true;
|
||||
}
|
||||
else {
|
||||
if (!val_bfr_on) val_bfr.Add_mid(src, val_bgn, i);
|
||||
i = gt_pos; // note that there is ++i below and loop will continue at gt_pos + 1 (next character after)
|
||||
val_bfr_on = true;
|
||||
}
|
||||
prv_is_ws = false;
|
||||
break;
|
||||
case Byte_ascii.Nl: case Byte_ascii.Tab: case Byte_ascii.Cr: // REF.MW:Sanitizer.php|decodeTagAttributes $value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
|
||||
case Byte_ascii.Space:
|
||||
if (!val_bfr_on) {
|
||||
val_bfr.Add_mid(src, val_bgn, i);
|
||||
val_bfr_on = true;
|
||||
}
|
||||
if (prv_is_ws) {} // noop; only allow one ws at a time
|
||||
else {
|
||||
prv_is_ws = true; val_bfr.Add_byte(Byte_ascii.Space);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Mode_val_raw: // no quotes; EX:a=bcd
|
||||
switch (b) {
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Bang: case Byte_ascii.Hash: case Byte_ascii.Dollar: case Byte_ascii.Percent:
|
||||
case Byte_ascii.Amp: case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star:
|
||||
case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Slash:
|
||||
case Byte_ascii.Colon: case Byte_ascii.Semic: case Byte_ascii.Gt:
|
||||
case Byte_ascii.Question: case Byte_ascii.At: case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end:
|
||||
case Byte_ascii.Pow: case Byte_ascii.Underline: case Byte_ascii.Tick:
|
||||
case Byte_ascii.Curly_bgn: case Byte_ascii.Pipe: case Byte_ascii.Curly_end: case Byte_ascii.Tilde:
|
||||
break;
|
||||
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl:
|
||||
val_end = i;
|
||||
Make(log_mgr, src, i);
|
||||
break;
|
||||
case Byte_ascii.Eq: // EX:"a= b=c" or "a=b=c"; PAGE:en.w:2013_in_American_television
|
||||
if (ws_is_before_val) { // "a= b=c"; discard 1st and resume at 2nd
|
||||
int old_val_bgn = val_bgn;
|
||||
valid = false; mode = Mode_invalid; Make(log_mgr, src, val_bgn); // invalidate cur atr; EX:"a="
|
||||
atr_bgn = key_bgn = old_val_bgn; // reset atr / key to new atr; EX: "b"
|
||||
key_end = i;
|
||||
mode = Mode_val_bgn; // set mode to val_bgn (basically, put after =)
|
||||
}
|
||||
else { // "a=b=c"; discard all
|
||||
valid = false; mode = Mode_invalid;
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Lt:
|
||||
val_end = i;
|
||||
Make(log_mgr, src, i);
|
||||
--i; // NOTE: --i to include "<" as part of next atr; above ws excludes from next atr
|
||||
break;
|
||||
default:
|
||||
valid = false; mode = Mode_invalid;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
repeated_atrs_hash.Clear();
|
||||
return (Xop_xatr_itm[])xatrs.To_ary(Xop_xatr_itm.class);
|
||||
}
|
||||
private void Make(Gfo_msg_log log_mgr, byte[] src, int atr_end) {
|
||||
Xop_xatr_itm xatr = null;
|
||||
boolean key_bgn_exists = key_bgn != -1;
|
||||
boolean val_bgn_exists = val_bgn != -1;
|
||||
if (valid) {
|
||||
if (key_bgn_exists && val_bgn_exists)
|
||||
xatr = new Xop_xatr_itm(quote_byte, atr_bgn, atr_end, key_bgn, key_end, val_bgn, val_end, eq_pos);
|
||||
else {
|
||||
if (key_end == -1) key_end = val_end; // NOTE: key_end == -1 when eos; EX: "a" would have key_bgn = 0; key_end = -1; val_end = 1 DATE:2014-07-03
|
||||
xatr = new Xop_xatr_itm(quote_byte, atr_bgn, atr_end, key_bgn, key_end);
|
||||
}
|
||||
}
|
||||
else {
|
||||
xatr = new Xop_xatr_itm(atr_bgn, atr_end);
|
||||
log_mgr.Add_itm_none(Log_invalid_atr, src, atr_bgn, atr_end);
|
||||
}
|
||||
if (valid) { // note that invalid will have no key_bgn / key_end
|
||||
byte[] key_bry = key_bfr_on ? key_bfr.Xto_bry_and_clear() : Bry_.Mid(src, xatr.Key_bgn(), xatr.Key_end());
|
||||
xatr.Key_bry_(key_bry);
|
||||
Invalidate_repeated_atr(xatr, key_bry);
|
||||
}
|
||||
if (val_bfr_on) xatr.Val_bry_(val_bfr.Xto_bry_and_clear());
|
||||
xatrs.Add(xatr);
|
||||
mode = Mode_atr_bgn; quote_byte = Byte_ascii.Nil; valid = true;
|
||||
atr_bgn = key_bgn = val_bgn = key_end = val_end = eq_pos = -1;
|
||||
val_bfr_on = key_bfr_on = ws_is_before_val = false;
|
||||
}
|
||||
private void Invalidate_repeated_atr(Xop_xatr_itm cur, byte[] key_bry) {
|
||||
Xop_xatr_itm prv = (Xop_xatr_itm)repeated_atrs_hash.Get_by(key_bry);
|
||||
if (prv != null) {
|
||||
prv.Tid_to_repeat_();
|
||||
repeated_atrs_hash.Del(key_bry);
|
||||
}
|
||||
repeated_atrs_hash.Add(key_bry, cur);
|
||||
}
|
||||
private static final Hash_adp_bry xnde_hash = Hash_adp_bry.ci_ascii_()
|
||||
.Add_bry_bry(Xop_xnde_tag_.Tag_nowiki.Name_bry())
|
||||
.Add_bry_bry(Xop_xnde_tag_.Tag_noinclude.Name_bry())
|
||||
.Add_bry_bry(Xop_xnde_tag_.Tag_includeonly.Name_bry())
|
||||
.Add_bry_bry(Xop_xnde_tag_.Tag_onlyinclude.Name_bry())
|
||||
;
|
||||
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "xatr_parser");
|
||||
public static final Gfo_msg_itm
|
||||
Log_invalid_atr = Gfo_msg_itm_.new_warn_(owner, "invalid_atr")
|
||||
;
|
||||
private static final String Msg_mgr = "gplx.xowa.wiki.parser.xatr";
|
||||
}
|
||||
/*
|
||||
NOTE: this parser can be done with a trie and hooks on Quote,Apos,Eq,NewLine,Space,Tab, but...
|
||||
- multi-byte lookup is not needed (main advantage of trie)
|
||||
- less performant
|
||||
- logic is indirect (b/c different chars are valid if first letter of key, raw mode, quoted)
|
||||
*/
|
||||
113
400_xowa/src_490_xnde/gplx/xowa/Xop_xatr_parser_tst.java
Normal file
113
400_xowa/src_490_xnde/gplx/xowa/Xop_xatr_parser_tst.java
Normal file
@@ -0,0 +1,113 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xatr_parser_tst {
|
||||
@Test public void Kv_quote_double() {fxt.tst_("a=\"b\"", fxt.new_atr_("a", "b"));} private Xop_xatr_parser_fxt fxt = new Xop_xatr_parser_fxt();
|
||||
@Test public void Kv_quote_single() {fxt.tst_("a='b'", fxt.new_atr_("a", "b"));}
|
||||
@Test public void Kv_quote_none() {fxt.tst_("a=b", fxt.new_atr_("a", "b"));}
|
||||
@Test public void Kv_empty() {fxt.tst_("a=''", fxt.new_atr_("a", ""));}
|
||||
@Test public void Kv_key_has_underline() {fxt.tst_("a_b=c", fxt.new_atr_("a_b", "c"));}
|
||||
@Test public void Val_quote_none() {fxt.tst_("b", fxt.new_atr_("b", "b"));}
|
||||
@Test public void Val_quote_none_ws() {fxt.tst_(" b ", fxt.new_atr_("b", "b"));} // PURPOSE:discovered while writing test for ref's "lower-alpha" DATE:2014-07-03
|
||||
@Test public void Invalid_key_plus() {fxt.tst_("a+b", fxt.new_invalid_(0, 3));}
|
||||
@Test public void Invalid_key_plus_many() {fxt.tst_("a+b c=d", fxt.new_invalid_(0, 3), fxt.new_atr_("c", "d"));}
|
||||
@Test public void Invalid_val_plus() {fxt.tst_("a=b+c", fxt.new_invalid_(0, 5));}
|
||||
@Test public void Invalid_recover() {fxt.tst_("* a=b", fxt.new_invalid_(0, 1), fxt.new_atr_("a", "b"));} // PURPOSE: * is invalid, but should not stop parsing of a=b
|
||||
@Test public void Nowiki_val() {fxt.tst_("a=<nowiki>'b'</nowiki>", fxt.new_atr_("a", "b").Expd_atr_rng_(0, 13).Expd_key_("a").Expd_val_("b"));}
|
||||
@Test public void Nowiki_key() {fxt.tst_("<nowiki>a=b</nowiki>", fxt.new_atr_("a", "b").Expd_atr_rng_(8, 11));}
|
||||
@Test public void Nowiki_key_2() {fxt.tst_("a<nowiki>b</nowiki>c=d", fxt.new_atr_("abc", "d").Expd_atr_rng_(0, 22));}
|
||||
@Test public void Nowiki_key_3() {fxt.tst_("a<nowiki>=</nowiki>\"b\"", fxt.new_atr_("a", "b").Expd_atr_rng_(0, 22));} // EX:fr.w:{{Portail|Transpédia|Californie}}
|
||||
@Test public void Nowiki_quote() {fxt.tst_("a=\"b<nowiki>c</nowiki>d<nowiki>e</nowiki>f\"", fxt.new_atr_("a", "bcdef"));}
|
||||
@Test public void Int_value() {fxt.tst_int("a='-123'", -123);}
|
||||
@Test public void Many_apos() {fxt.tst_("a='b' c='d' e='f'", fxt.new_atr_("a", "b"), fxt.new_atr_("c", "d"), fxt.new_atr_("e", "f"));}
|
||||
@Test public void Many_raw() {fxt.tst_("a=b c=d e=f", fxt.new_atr_("a", "b"), fxt.new_atr_("c", "d"), fxt.new_atr_("e", "f"));}
|
||||
@Test public void Ws_ini() {fxt.tst_(" a='b'", fxt.new_atr_("a", "b").Expd_atr_rng_(1, 6));}
|
||||
@Test public void Ws_end() {fxt.tst_(" a='b' c='d'", fxt.new_atr_("a", "b").Expd_atr_rng_(1, 6), fxt.new_atr_("c", "d").Expd_atr_rng_(7, 12));}
|
||||
@Test public void Quote_ws_nl() {fxt.tst_("a='b\nc'", fxt.new_atr_("a", "b c"));}
|
||||
@Test public void Quote_ws_mult() {fxt.tst_("a='b c'", fxt.new_atr_("a", "b c"));}
|
||||
@Test public void Quote_ws_mult_mult() {fxt.tst_("a='b c d'", fxt.new_atr_("a", "b c d"));} // PURPOSE: fix wherein 1st-gobble gobbled rest of spaces (was b cd)
|
||||
@Test public void Quote_apos() {fxt.tst_("a=\"b c'd\"", fxt.new_atr_("a", "b c'd"));} // PURPOSE: fix wherein apos was gobbled up; PAGE:en.s:Alice's_Adventures_in_Wonderland; DATE:2013-11-22
|
||||
@Test public void Quote_apos_2() {fxt.tst_("a=\"b'c d\"", fxt.new_atr_("a", "b'c d"));} // PURPOSE: fix wherein apos was causing "'b'c d"; PAGE:en.s:Grimm's_Household_Tales,_Volume_1; DATE:2013-12-22
|
||||
@Test public void Multiple() {fxt.tst_("a b1 c", fxt.new_atr_("a", "a"), fxt.new_atr_("b1", "b1"), fxt.new_atr_("c", "c"));}
|
||||
@Test public void Ws() {fxt.tst_("a = 'b'", fxt.new_atr_("a", "b"));} // PURPOSE: fix wherein multiple space was causing "a=a"; PAGE:fr.s:La_Sculpture_dans_les_cimetières_de_Paris/Père-Lachaise; DATE:2014-01-18
|
||||
@Test public void Dangling_eos() {fxt.tst_("a='b' c='d", fxt.new_atr_("a", "b"), fxt.new_invalid_(6, 10));} // PURPOSE: handle dangling quote at eos; PAGE:en.w:Aubervilliers DATE:2014-06-25
|
||||
@Test public void Dangling_bos() {fxt.tst_("a='b c=d", fxt.new_invalid_(0, 4), fxt.new_atr_("c", "d"));} // PURPOSE: handle dangling quote at bos; resume at next valid atr; PAGE:en.w:Aubervilliers DATE:2014-06-25
|
||||
@Test public void Invalid_incomplete() {fxt.tst_("a= c=d", fxt.new_invalid_(0, 3), fxt.new_atr_("c", "d"));} // PURPOSE: discard xatr if incomplete and followed by valid atr; PAGE:en.w:2013_in_American_television DATE:2014-09-25
|
||||
@Test public void Invalid_incomplete_2() {fxt.tst_("a=c=d", fxt.new_invalid_(0, 5));} // PURPOSE: variation of above; per MW regex, missing space invalidates entire attribute; DATE:2014-09-25
|
||||
@Test public void Invalid_incomplete_pair(){fxt.tst_("a= b=", fxt.new_invalid_(0, 3), fxt.new_invalid_(3, 5));} // PURPOSE: "b=" should be invalid not a kv of "b" = "b"; PAGE:en.s:Notes_by_the_Way/Chapter_2; DATE:2015-01-31
|
||||
/*
|
||||
TODO:
|
||||
change ws to be end; EX: "a=b c=d" atr1 ends at 4 (not 3)
|
||||
*/
|
||||
// @Test public void Val_quote_none_many() {
|
||||
// fxt.tst_("a b", fxt.new_atr_("", "a"), fxt.new_atr_("", "b"));
|
||||
//// fxt.tst_("a='b' c d e='f'", fxt.new_atr_("a", "b"), fxt.new_atr_("", "c"), fxt.new_atr_("", "d"), fxt.new_atr_("e", "f"));
|
||||
// }
|
||||
}
|
||||
class Xop_xatr_parser_fxt {
|
||||
Xop_xatr_parser parser = new Xop_xatr_parser();
|
||||
Tst_mgr tst_mgr = new Tst_mgr();
|
||||
public Xop_xatr_itm_chkr new_invalid_(int bgn, int end) {return new Xop_xatr_itm_chkr().Expd_atr_rng_(bgn, end).Expd_typeId_(Xop_xatr_itm.Tid_invalid);}
|
||||
public Xop_xatr_itm_chkr new_atr_(String key, String val) {return new Xop_xatr_itm_chkr().Expd_key_(key).Expd_val_(val);}
|
||||
public void tst_(String src_str, Xop_xatr_itm_chkr... expd) {
|
||||
byte[] src = Bry_.new_u8(src_str);
|
||||
Gfo_msg_log msg_log = new Gfo_msg_log(Xoa_app_.Name);
|
||||
Xop_xatr_itm[] actl = parser.Parse(msg_log, src, 0, src.length);
|
||||
tst_mgr.Vars().Clear().Add("raw_bry", src);
|
||||
tst_mgr.Tst_ary("xatr:", expd, actl);
|
||||
}
|
||||
public void tst_int(String src_str, int... expd) {
|
||||
byte[] src = Bry_.new_u8(src_str);
|
||||
Gfo_msg_log msg_log = new Gfo_msg_log(Xoa_app_.Name);
|
||||
Xop_xatr_itm[] actl_atr = parser.Parse(msg_log, src, 0, src.length);
|
||||
int[] actl = new int[actl_atr.length];
|
||||
|
||||
for (int i = 0; i < actl.length; i++)
|
||||
actl[i] = actl_atr[i].Val_as_int_or(src, 0);
|
||||
Tfds.Eq_ary(expd, actl);
|
||||
}
|
||||
}
|
||||
class Xop_xatr_itm_chkr implements Tst_chkr {
|
||||
public Class<?> TypeOf() {return Xop_xatr_itm.class;}
|
||||
public Xop_xatr_itm_chkr Expd_atr_rng_(int bgn, int end) {expd_atr_bgn = bgn; expd_atr_end = end; return this;} private int expd_atr_bgn = -1, expd_atr_end = -1;
|
||||
public Xop_xatr_itm_chkr Expd_key_rng_(int bgn, int end) {expd_key_bgn = bgn; expd_key_end = end; return this;} private int expd_key_bgn = -1, expd_key_end = -1;
|
||||
public Xop_xatr_itm_chkr Expd_key_(String v) {expd_key = v; return this;} private String expd_key;
|
||||
public Xop_xatr_itm_chkr Expd_val_(String v) {expd_val = v; return this;} private String expd_val;
|
||||
public Xop_xatr_itm_chkr Expd_typeId_(byte v) {expd_typeId = v; return this;} private byte expd_typeId = Xop_xatr_itm.Tid_null;
|
||||
public int Chk(Tst_mgr mgr, String path, Object actl_obj) {
|
||||
Xop_xatr_itm actl = (Xop_xatr_itm)actl_obj;
|
||||
int err = 0;
|
||||
err += mgr.Tst_val(expd_typeId == Xop_xatr_itm.Tid_null, path, "atr_typeId", expd_typeId, actl.Tid());
|
||||
err += mgr.Tst_val(expd_atr_bgn == -1, path, "atr_bgn", expd_atr_bgn, actl.Atr_bgn());
|
||||
err += mgr.Tst_val(expd_atr_end == -1, path, "atr_end", expd_atr_end, actl.Atr_end());
|
||||
err += mgr.Tst_val(expd_key_bgn == -1, path, "key_bgn", expd_key_bgn, actl.Key_bgn());
|
||||
err += mgr.Tst_val(expd_key_end == -1, path, "key_end", expd_key_end, actl.Key_end());
|
||||
if (actl.Key_bry() == null)
|
||||
err += mgr.Tst_val(expd_key == null, path, "key", expd_key, mgr.Vars_get_bry_as_str("raw_bry", actl.Key_bgn(), actl.Key_end()));
|
||||
else
|
||||
err += mgr.Tst_val(expd_key == null, path, "key", expd_key, String_.new_u8(actl.Key_bry()));
|
||||
if (actl.Val_bry() == null)
|
||||
err += mgr.Tst_val(expd_val == null, path, "val", expd_val, mgr.Vars_get_bry_as_str("raw_bry", actl.Val_bgn(), actl.Val_end()));
|
||||
else
|
||||
err += mgr.Tst_val(expd_val == null, path, "val", expd_val, String_.new_u8(actl.Val_bry()));
|
||||
return err;
|
||||
}
|
||||
}
|
||||
/*
|
||||
*/
|
||||
263
400_xowa/src_490_xnde/gplx/xowa/Xop_xatr_whitelist_mgr.java
Normal file
263
400_xowa/src_490_xnde/gplx/xowa/Xop_xatr_whitelist_mgr.java
Normal file
@@ -0,0 +1,263 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*;
|
||||
public class Xop_xatr_whitelist_mgr {
|
||||
public boolean Chk(int tag_id, byte[] src, Xop_xatr_itm xatr) {
|
||||
byte[] key_bry = xatr.Key_bry();
|
||||
byte[] chk_bry; int chk_bgn, chk_end;
|
||||
if (key_bry == null) {
|
||||
chk_bry = src;
|
||||
chk_bgn = xatr.Key_bgn();
|
||||
chk_end = xatr.Key_end();
|
||||
if (chk_end - chk_bgn == 0) return true; // no key; nothing to whitelist; return true
|
||||
}
|
||||
else { // key_bry specified manually; EX: "id<nowiki>=1" has a manual key_bry of "id"
|
||||
chk_bry = key_bry;
|
||||
chk_bgn = 0;
|
||||
chk_end = key_bry.length;
|
||||
}
|
||||
Object o = key_trie.Match_bgn(chk_bry, chk_bgn, chk_end);
|
||||
if (o == null) return false;// unknown atr_key; EX: <b unknown=1/>
|
||||
Xop_xatr_whitelist_itm itm = (Xop_xatr_whitelist_itm)o;
|
||||
byte itm_key_tid = itm.Key_tid();
|
||||
xatr.Key_tid_(itm_key_tid);
|
||||
boolean rv = itm.Tags()[tag_id] == 1 // is atr allowed for tag
|
||||
&& (itm.Exact() ? key_trie.Match_pos() == chk_end : true) // if exact, check for exact; else always true
|
||||
;
|
||||
switch (itm_key_tid) {
|
||||
case Xop_xatr_itm.Key_tid_style:
|
||||
if (!Scrub_style(xatr, src)) return false;
|
||||
break;
|
||||
case Xop_xatr_itm.Key_tid_role:
|
||||
if (!Bry_.Eq(Val_role_presentation, xatr.Val_as_bry(src))) return false; // MW: For now we only support role="presentation"; DATE:2014-04-05
|
||||
break;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public Xop_xatr_whitelist_mgr Ini() { // REF.MW:Sanitizer.php|setupAttributeWhitelist
|
||||
Ini_grp("common" , null , "id", "class", "lang", "dir", "title", "style", "role");
|
||||
Ini_grp("block" , "common" , "align");
|
||||
Ini_grp("tablealign" , null , "align", "char", "charoff", "valign");
|
||||
Ini_grp("tablecell" , null , "abbr", "axis", "headers", "scope", "rowspan", "colspan", "nowrap", "width", "height", "bgcolor");
|
||||
|
||||
Ini_nde(Xop_xnde_tag_.Tid_div , "block");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_center , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_span , "block");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_h1 , "block");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_h2 , "block");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_h3 , "block");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_h4 , "block");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_h5 , "block");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_h6 , "block");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_em , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_strong , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_cite , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_dfn , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_code , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_samp , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_kbd , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_var , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_abbr , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_blockquote , "common", "cite");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_sub , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_sup , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_p , "block");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_br , "id", "class", "title", "style", "clear");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_pre , "common", "width");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_ins , "common", "cite", "datetime");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_del , "common", "cite", "datetime");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_ul , "common", "type");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_ol , "common", "type", "start");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_li , "common", "type", "value");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_dl , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_dd , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_dt , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_table , "common", "summary", "width", "border", "frame", "rules", "cellspacing", "cellpadding", "align", "bgcolor");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_caption , "common", "align");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_thead , "common", "tablealign");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_tfoot , "common", "tablealign");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_tbody , "common", "tablealign");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_colgroup , "common", "span", "width", "tablealign");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_col , "common", "span", "width", "tablealign");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_tr , "common", "bgcolor", "tablealign");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_td , "common", "tablecell", "tablealign");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_th , "common", "tablecell", "tablealign");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_a , "common", "href", "rel", "rev");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_img , "common", "alt", "src", "width", "height");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_tt , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_b , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_i , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_big , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_small , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_strike , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_s , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_u , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_font , "common", "size", "color", "face");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_hr , "common", "noshade", "size", "width");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_ruby , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_rb , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_rt , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_rp , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_math , "class", "style", "id", "title");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_time , "class", "datetime");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_bdi , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_data , "common", "value");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_mark , "common");
|
||||
Ini_nde(Xop_xnde_tag_.Tid_q , "common");
|
||||
Ini_all_loose("data");
|
||||
return this;
|
||||
}
|
||||
private Hash_adp_bry grp_hash = Hash_adp_bry.cs_();
|
||||
private void Ini_grp(String key_str, String base_grp, String... cur_itms) {
|
||||
byte[][] itms = Bry_.Ary(cur_itms);
|
||||
if (base_grp != null)
|
||||
itms = Bry_.Ary_add(itms, (byte[][])grp_hash.Get_by_bry(Bry_.new_a7(base_grp)));
|
||||
byte[] key = Bry_.new_a7(key_str);
|
||||
grp_hash.Add_bry_obj(key, itms);
|
||||
}
|
||||
private void Ini_nde(int tag_tid, String... key_strs) {
|
||||
List_adp keys = List_adp_.new_();
|
||||
int len = key_strs.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte[] key = Bry_.new_a7(key_strs[i]);
|
||||
Object grp_obj = grp_hash.Get_by_bry(key); // is the key a grp? EX: "common"
|
||||
if (grp_obj == null)
|
||||
keys.Add(key);
|
||||
else {
|
||||
byte[][] grp_keys = (byte[][])grp_obj;
|
||||
int grp_keys_len = grp_keys.length;
|
||||
for (int j = 0; j < grp_keys_len; j++)
|
||||
keys.Add(grp_keys[j]);
|
||||
}
|
||||
}
|
||||
len = keys.Count();
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte[] key_bry = (byte[])keys.Get_at(i);
|
||||
Xop_xatr_whitelist_itm itm = (Xop_xatr_whitelist_itm)key_trie.Match_exact(key_bry, 0, key_bry.length);
|
||||
if (itm == null) {
|
||||
itm = Ini_key_trie_add(key_bry, true);
|
||||
key_trie.Add_obj(key_bry, itm);
|
||||
}
|
||||
itm.Tags()[tag_tid] = 1;
|
||||
}
|
||||
}
|
||||
private void Ini_all_loose(String key_str) {
|
||||
byte[] key_bry = Bry_.new_a7(key_str);
|
||||
Ini_key_trie_add(key_bry, false);
|
||||
Xop_xatr_whitelist_itm itm = Ini_key_trie_add(key_bry, false);
|
||||
key_trie.Add_obj(key_bry, itm);
|
||||
int len = Xop_xnde_tag_._MaxLen;
|
||||
for (int i = 0; i < len; i++)
|
||||
itm.Tags()[i] = 1;
|
||||
}
|
||||
private Xop_xatr_whitelist_itm Ini_key_trie_add(byte[] key, boolean exact) {
|
||||
Object key_tid_obj = tid_hash.Get_by(key);
|
||||
byte key_tid = key_tid_obj == null ? Xop_xatr_itm.Key_tid_generic : ((Byte_obj_val)key_tid_obj).Val();
|
||||
Xop_xatr_whitelist_itm rv = new Xop_xatr_whitelist_itm(key, key_tid, exact);
|
||||
key_trie.Add_obj(key, rv);
|
||||
return rv;
|
||||
}
|
||||
private Hash_adp_bry tid_hash = Hash_adp_bry.ci_ascii_()
|
||||
.Add_str_byte("id", Xop_xatr_itm.Key_tid_id)
|
||||
.Add_str_byte("style", Xop_xatr_itm.Key_tid_style)
|
||||
.Add_str_byte("role", Xop_xatr_itm.Key_tid_role)
|
||||
;
|
||||
private Btrie_slim_mgr key_trie = Btrie_slim_mgr.ci_ascii_(); // NOTE:ci.ascii:HTML.node_name
|
||||
public boolean Scrub_style(Xop_xatr_itm xatr, byte[] raw) { // REF:Sanitizer.php|checkCss; '! expression | filter\s*: | accelerator\s*: | url\s*\( !ix'; NOTE: this seems to affect MS IE only; DATE:2013-04-01
|
||||
byte[] val_bry = xatr.Val_bry();
|
||||
byte[] chk_bry; int chk_bgn, chk_end;
|
||||
if (val_bry == null) {
|
||||
chk_bry = raw;
|
||||
chk_bgn = xatr.Val_bgn();
|
||||
chk_end = xatr.Val_end();
|
||||
if (chk_end - chk_bgn == 0) return true; // no val; nothing to scrub; return true
|
||||
}
|
||||
else { // val_bry specified manually; EX: "id=<nowiki>1</nowiki>" has a manual val_bry of "1"
|
||||
chk_bry = val_bry;
|
||||
chk_bgn = 0;
|
||||
chk_end = val_bry.length;
|
||||
}
|
||||
int pos = chk_bgn;
|
||||
while (pos < chk_end) {
|
||||
Object o = style_trie.Match_bgn(chk_bry, pos, chk_end);
|
||||
if (o == null)
|
||||
++pos;
|
||||
else {
|
||||
pos = style_trie.Match_pos();
|
||||
byte style_tid = ((Byte_obj_val)o).Val();
|
||||
switch (style_tid) {
|
||||
case Style_expression:
|
||||
xatr.Val_bry_(Bry_.Empty);
|
||||
return false;
|
||||
case Style_filter:
|
||||
case Style_accelerator:
|
||||
if (Next_non_ws_byte(chk_bry, pos, chk_end) == Byte_ascii.Colon) {
|
||||
xatr.Val_bry_(Bry_.Empty);
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Style_url:
|
||||
case Style_urls:
|
||||
case Style_image:
|
||||
case Style_image_set:
|
||||
if (Next_non_ws_byte(chk_bry, pos, chk_end) == Byte_ascii.Paren_bgn) {
|
||||
xatr.Val_bry_(Bry_.Empty);
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
byte Next_non_ws_byte(byte[] raw, int bgn, int end) {
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = raw[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Space:
|
||||
case Byte_ascii.Tab:
|
||||
case Byte_ascii.Cr:
|
||||
case Byte_ascii.Nl:
|
||||
break;
|
||||
default:
|
||||
return b;
|
||||
}
|
||||
}
|
||||
return Byte_ascii.Nil;
|
||||
}
|
||||
static final byte Style_expression = 0, Style_filter = 1, Style_accelerator = 2, Style_url = 3, Style_urls = 4, Style_comment = 5, Style_image = 6, Style_image_set = 7;
|
||||
private static Btrie_slim_mgr style_trie = Btrie_slim_mgr.ci_ascii_() // NOTE:ci.ascii:Javascript
|
||||
.Add_str_byte("expression" , Style_expression)
|
||||
.Add_str_byte("filter" , Style_filter)
|
||||
.Add_str_byte("accelerator" , Style_accelerator)
|
||||
.Add_str_byte("url" , Style_url)
|
||||
.Add_str_byte("urls" , Style_urls)
|
||||
.Add_str_byte("image" , Style_image)
|
||||
.Add_str_byte("image-set" , Style_image_set)
|
||||
.Add_str_byte("/*" , Style_comment)
|
||||
;
|
||||
private static final byte[] Val_role_presentation = Bry_.new_a7("presentation");
|
||||
}
|
||||
class Xop_xatr_whitelist_itm {
|
||||
public Xop_xatr_whitelist_itm(byte[] key, byte key_tid, boolean exact) {this.key = key; this.key_tid = key_tid; this.exact = exact;}
|
||||
public byte[] Key() {return key;} private byte[] key;
|
||||
public byte Key_tid() {return key_tid;} private byte key_tid;
|
||||
public boolean Exact() {return exact;} private boolean exact;
|
||||
public byte[] Tags() {return tags;} private byte[] tags = new byte[Xop_xnde_tag_._MaxLen];
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xatr_whitelist_mgr_tst {
|
||||
Xop_xatr_whitelist_fxt fxt = new Xop_xatr_whitelist_fxt();
|
||||
@Before public void init() {fxt.Clear();}
|
||||
@Test public void Basic() {
|
||||
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "style" , true);
|
||||
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "xstyle" , false);
|
||||
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "stylex" , false);
|
||||
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "styl" , false);
|
||||
fxt.Whitelist(Xop_xnde_tag_.Tid_img , "alt" , true);
|
||||
fxt.Whitelist(Xop_xnde_tag_.Tid_img , "span" , false);
|
||||
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "data-sort-type" , true);
|
||||
fxt.Whitelist(Xop_xnde_tag_.Tid_data , "value" , true);
|
||||
fxt.Whitelist(Xop_xnde_tag_.Tid_data , "valuex" , false);
|
||||
}
|
||||
@Test public void Role() {
|
||||
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "role" , "presentation", true);
|
||||
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "role" , "other", false);
|
||||
}
|
||||
@Test public void Scrub() {
|
||||
fxt.Scrub_style_fail("expression");
|
||||
fxt.Scrub_style_fail("filter:a");
|
||||
fxt.Scrub_style_fail("filter\t \n:a");
|
||||
fxt.Scrub_style_fail("accelerator:a");
|
||||
fxt.Scrub_style_fail("url()");
|
||||
fxt.Scrub_style_fail("urls()");
|
||||
fxt.Scrub_style_pass("filterx");
|
||||
}
|
||||
}
|
||||
class Xop_xatr_whitelist_fxt {
|
||||
public void Clear() {
|
||||
if (whitelist_mgr == null) whitelist_mgr = new Xop_xatr_whitelist_mgr().Ini();
|
||||
} private Xop_xatr_whitelist_mgr whitelist_mgr;
|
||||
public void Whitelist(byte tag_id, String key_str, boolean expd) {
|
||||
byte[] key_bry = Bry_.new_a7(key_str);
|
||||
atr_itm.Key_rng_(0, key_bry.length);
|
||||
Tfds.Eq(expd, whitelist_mgr.Chk(tag_id, key_bry, atr_itm), key_str);
|
||||
} private Xop_xatr_itm atr_itm = new Xop_xatr_itm(0, 0);
|
||||
public void Whitelist(byte tag_id, String key_str, String val_str, boolean expd) {
|
||||
byte[] key_bry = Bry_.new_a7(key_str);
|
||||
atr_itm.Key_rng_(0, key_bry.length);
|
||||
atr_itm.Val_bry_(Bry_.new_a7(val_str));
|
||||
Tfds.Eq(expd, whitelist_mgr.Chk(tag_id, key_bry, atr_itm), key_str);
|
||||
}
|
||||
public void Scrub_style_pass(String style_val_str) {Scrub_style(style_val_str, style_val_str);}
|
||||
public void Scrub_style_fail(String val_str) {Scrub_style(val_str, "");}
|
||||
public void Scrub_style(String val_str, String expd) {
|
||||
byte[] val_bry = Bry_.new_a7(val_str);
|
||||
atr_itm.Val_bry_(val_bry);
|
||||
whitelist_mgr.Scrub_style(atr_itm, val_bry);
|
||||
Tfds.Eq(expd, String_.new_a7(atr_itm.Val_bry()));
|
||||
}
|
||||
}
|
||||
21
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_atr_parser.java
Normal file
21
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_atr_parser.java
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
public interface Xop_xnde_atr_parser {
|
||||
void Xatr_parse(Xowe_wiki wiki, byte[] src, Xop_xatr_itm xatr, Object xatr_key_obj);
|
||||
}
|
||||
26
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_lxr.java
Normal file
26
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_lxr.java
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import gplx.core.btries.*;
|
||||
class Xop_xnde_lxr implements Xop_lxr {
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_xnde;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Lt, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Xnde().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_xnde_lxr _ = new Xop_xnde_lxr(); Xop_xnde_lxr() {}
|
||||
}
|
||||
81
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_tag.java
Normal file
81
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_tag.java
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import gplx.core.primitives.*;
|
||||
public class Xop_xnde_tag {
|
||||
public Xop_xnde_tag(int id, String name_str) { // NOTE: should only be used by Xop_xnde_tag_
|
||||
this.id = id;
|
||||
this.name_bry = Bry_.new_a7(name_str);
|
||||
this.name_str = name_str;
|
||||
name_len = name_bry.length;
|
||||
xtn_end_tag = Bry_.Add(Xop_xnde_tag_.XtnEndTag_bgn, name_bry); // always force endtag; needed for <noinclude>
|
||||
xtn_end_tag_tmp = new byte[xtn_end_tag.length]; Array_.Copy(xtn_end_tag, xtn_end_tag_tmp);
|
||||
}
|
||||
public int Id() {return id;} public Xop_xnde_tag Id_(int v) {id = v; return this;} private int id;
|
||||
public byte[] Name_bry() {return name_bry;} private byte[] name_bry;
|
||||
public String Name_str() {return name_str;} private String name_str;
|
||||
public int Name_len() {return name_len;} private int name_len;
|
||||
public boolean Xtn() {return xtn;} public Xop_xnde_tag Xtn_() {xtn = true; return this;} private boolean xtn;
|
||||
public byte[] XtnEndTag() {return xtn_end_tag;} private byte[] xtn_end_tag;
|
||||
public byte[] XtnEndTag_tmp() {return xtn_end_tag_tmp;} private byte[] xtn_end_tag_tmp;
|
||||
public int BgnNdeMode() {return bgnNdeMode;} private int bgnNdeMode = Xop_xnde_tag_.BgnNdeMode_normal;
|
||||
public Xop_xnde_tag BgnNdeMode_inline_() {bgnNdeMode = Xop_xnde_tag_.BgnNdeMode_inline; return this;}
|
||||
public int EndNdeMode() {return endNdeMode;} private int endNdeMode = Xop_xnde_tag_.EndNdeMode_normal;
|
||||
public Xop_xnde_tag EndNdeMode_inline_() {endNdeMode = Xop_xnde_tag_.EndNdeMode_inline; return this;}
|
||||
public Xop_xnde_tag EndNdeMode_escape_() {endNdeMode = Xop_xnde_tag_.EndNdeMode_escape; return this;}
|
||||
public boolean SingleOnly() {return singleOnly;} public Xop_xnde_tag SingleOnly_() {singleOnly = true; return this;} private boolean singleOnly;
|
||||
public boolean TblSub() {return tblSub;} public Xop_xnde_tag TblSub_() {tblSub = true; return this;} private boolean tblSub;
|
||||
public boolean Restricted() {return restricted;} public Xop_xnde_tag Restricted_() {restricted = true; return this;} private boolean restricted;
|
||||
public boolean NoInline() {return noInline;} public Xop_xnde_tag NoInline_() {noInline = true; return this;} private boolean noInline;
|
||||
public boolean Inline_by_backslash() {return inline_by_backslash;} public Xop_xnde_tag Inline_by_backslash_() {inline_by_backslash = true; return this;} private boolean inline_by_backslash;
|
||||
public boolean Section() {return section;} public Xop_xnde_tag Section_() {section = true; return this;} private boolean section;
|
||||
public boolean Repeat_ends() {return repeat_ends;} public Xop_xnde_tag Repeat_ends_() {repeat_ends = true; return this;} private boolean repeat_ends;
|
||||
public boolean Repeat_mids() {return repeat_mids;} public Xop_xnde_tag Repeat_mids_() {repeat_mids = true; return this;} private boolean repeat_mids;
|
||||
public boolean Empty_ignored() {return empty_ignored;} public Xop_xnde_tag Empty_ignored_() {empty_ignored = true; return this;} private boolean empty_ignored;
|
||||
public boolean Raw() {return raw;} public Xop_xnde_tag Raw_() {raw = true; return this;} private boolean raw;
|
||||
public static final byte Block_noop = 0, Block_bgn = 1, Block_end = 2;
|
||||
public byte Block_open() {return block_open;} private byte block_open = Block_noop;
|
||||
public byte Block_close() {return block_close;} private byte block_close = Block_noop;
|
||||
public Xop_xnde_tag Block_open_bgn_() {block_open = Block_bgn; return this;} public Xop_xnde_tag Block_open_end_() {block_open = Block_end; return this;}
|
||||
public Xop_xnde_tag Block_close_bgn_() {block_close = Block_bgn; return this;} public Xop_xnde_tag Block_close_end_() {block_close = Block_end; return this;}
|
||||
public boolean Xtn_auto_close() {return xtn_auto_close;} public Xop_xnde_tag Xtn_auto_close_() {xtn_auto_close = true; return this;} private boolean xtn_auto_close;
|
||||
public boolean Ignore_empty() {return ignore_empty;} public Xop_xnde_tag Ignore_empty_() {ignore_empty = true; return this;} private boolean ignore_empty;
|
||||
public boolean Xtn_skips_template_args() {return xtn_skips_template_args;} public Xop_xnde_tag Xtn_skips_template_args_() {xtn_skips_template_args = true; return this;} private boolean xtn_skips_template_args;
|
||||
public Ordered_hash Langs() {return langs;} private Ordered_hash langs; private Int_obj_ref langs_key;
|
||||
public Xop_xnde_tag Langs_(int lang_code, String name) {
|
||||
if (langs == null) {
|
||||
langs = Ordered_hash_.new_();
|
||||
langs_key = Int_obj_ref.neg1_();
|
||||
}
|
||||
Xop_xnde_tag_lang lang_tag = new Xop_xnde_tag_lang(lang_code, name);
|
||||
langs.Add(lang_tag.Lang_code(), lang_tag);
|
||||
return this;
|
||||
}
|
||||
public Xop_xnde_tag_lang Langs_get(gplx.xowa.langs.cases.Xol_case_mgr case_mgr, int cur_lang, byte[] src, int bgn, int end) {
|
||||
if (langs == null) return Xop_xnde_tag_lang._; // no langs defined; always return true; EX:<b>
|
||||
if (Bry_.Eq(name_bry, src, bgn, end)) return Xop_xnde_tag_lang._; // canonical name (name_bry) is valid in all langs; EX: <section> and cur_lang=de
|
||||
synchronized (langs) {
|
||||
langs_key.Val_(cur_lang);
|
||||
}
|
||||
Xop_xnde_tag_lang lang = (Xop_xnde_tag_lang)langs.Get_by(langs_key);
|
||||
if (lang == null) return null; // cur tag is a lang tag, but no tag for this lang; EX: "<trecho>" and cur_lang=de
|
||||
return Bry_.Eq_ci_ascii(lang.Name_bry(), src, bgn, end)
|
||||
? lang
|
||||
: null;
|
||||
}
|
||||
}
|
||||
264
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_tag_.java
Normal file
264
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_tag_.java
Normal file
@@ -0,0 +1,264 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import gplx.xowa.langs.*;
|
||||
public class Xop_xnde_tag_ {
|
||||
public static final int EndNdeMode_normal = 0, EndNdeMode_inline = 1, EndNdeMode_escape = 2; // escape is for hr which does not support </hr>
|
||||
public static final int BgnNdeMode_normal = 0, BgnNdeMode_inline = 1;
|
||||
public static final byte[] Name_onlyinclude = Bry_.new_a7("onlyinclude");
|
||||
public static final byte[] XtnEndTag_bgn = Bry_.new_a7("</");//, XtnEndTag_end = Bry_.new_a7(">");
|
||||
public static final byte
|
||||
Tid_b = 0
|
||||
, Tid_strong = 1
|
||||
, Tid_i = 2
|
||||
, Tid_em = 3
|
||||
, Tid_cite = 4
|
||||
, Tid_dfn = 5
|
||||
, Tid_var = 6
|
||||
, Tid_u = 7
|
||||
, Tid_ins = 8
|
||||
, Tid_abbr = 9
|
||||
, Tid_strike = 10
|
||||
, Tid_del = 11
|
||||
, Tid_s = 12
|
||||
, Tid_sub = 13
|
||||
, Tid_sup = 14
|
||||
, Tid_big = 15
|
||||
, Tid_small = 16
|
||||
, Tid_code = 17
|
||||
, Tid_tt = 18
|
||||
, Tid_kbd = 19
|
||||
, Tid_samp = 20
|
||||
, Tid_blockquote = 21
|
||||
, Tid_pre = 22
|
||||
, Tid_font = 23
|
||||
, Tid_center = 24
|
||||
, Tid_p = 25
|
||||
, Tid_span = 26
|
||||
, Tid_div = 27
|
||||
, Tid_hr = 28
|
||||
, Tid_br = 29
|
||||
, Tid_h1 = 30
|
||||
, Tid_h2 = 31
|
||||
, Tid_h3 = 32
|
||||
, Tid_h4 = 33
|
||||
, Tid_h5 = 34
|
||||
, Tid_h6 = 35
|
||||
, Tid_li = 36
|
||||
, Tid_dt = 37
|
||||
, Tid_dd = 38
|
||||
, Tid_ol = 39
|
||||
, Tid_ul = 40
|
||||
, Tid_dl = 41
|
||||
, Tid_table = 42
|
||||
, Tid_tr = 43
|
||||
, Tid_td = 44
|
||||
, Tid_th = 45
|
||||
, Tid_thead = 46
|
||||
, Tid_tfoot = 47
|
||||
, Tid_tbody = 48
|
||||
, Tid_caption = 49
|
||||
, Tid_colgroup = 50
|
||||
, Tid_col = 51
|
||||
, Tid_a = 52
|
||||
, Tid_img = 53
|
||||
, Tid_ruby = 54
|
||||
, Tid_rt = 55
|
||||
, Tid_rb = 56
|
||||
, Tid_rp = 57
|
||||
, Tid_includeonly = 58
|
||||
, Tid_noinclude = 59
|
||||
, Tid_onlyinclude = 60
|
||||
, Tid_nowiki = 61
|
||||
, Tid_xowa_cmd = 62
|
||||
, Tid_poem = 63
|
||||
, Tid_math = 64
|
||||
, Tid_ref = 65
|
||||
, Tid_references = 66
|
||||
, Tid_source = 67
|
||||
, Tid_syntaxHighlight = 68
|
||||
, Tid_gallery = 69
|
||||
, Tid_imageMap = 70
|
||||
, Tid_timeline = 71
|
||||
, Tid_hiero = 72
|
||||
, Tid_inputBox = 73
|
||||
, Tid_pages = 74
|
||||
, Tid_section = 75
|
||||
, Tid_pagequality = 76
|
||||
, Tid_pagelist = 77
|
||||
, Tid_categoryList = 78
|
||||
, Tid_categoryTree = 79
|
||||
, Tid_dynamicPageList = 80
|
||||
, Tid_time = 81
|
||||
, Tid_input = 82
|
||||
, Tid_textarea = 83
|
||||
, Tid_score = 84
|
||||
, Tid_button = 85
|
||||
, Tid_select = 86
|
||||
, Tid_option = 87
|
||||
, Tid_optgroup = 88
|
||||
, Tid_script = 89
|
||||
, Tid_style = 90
|
||||
, Tid_form = 91
|
||||
, Tid_translate = 92
|
||||
, Tid_languages = 93
|
||||
, Tid_templateData = 94
|
||||
, Tid_bdi = 95
|
||||
, Tid_data = 96
|
||||
, Tid_mark = 97
|
||||
, Tid_wbr = 98
|
||||
, Tid_bdo = 99
|
||||
, Tid_listing_buy = 100
|
||||
, Tid_listing_do = 101
|
||||
, Tid_listing_drink = 102
|
||||
, Tid_listing_eat = 103
|
||||
, Tid_listing_listing = 104
|
||||
, Tid_listing_see = 105
|
||||
, Tid_listing_sleep = 106
|
||||
, Tid_rss = 107
|
||||
, Tid_xowa_html = 108
|
||||
, Tid_xowa_tag_bgn = 109
|
||||
, Tid_xowa_tag_end = 110
|
||||
, Tid_quiz = 111
|
||||
, Tid_indicator = 112
|
||||
, Tid_q = 113
|
||||
;
|
||||
public static final int _MaxLen = 114;
|
||||
public static final Xop_xnde_tag[] Ary = new Xop_xnde_tag[_MaxLen];
|
||||
private static Xop_xnde_tag new_(int id, String name) {
|
||||
Xop_xnde_tag rv = new Xop_xnde_tag(id, name);
|
||||
Ary[id] = rv;
|
||||
return rv;
|
||||
}
|
||||
public static final Xop_xnde_tag
|
||||
Tag_b = new_(Tid_b, "b").NoInline_()
|
||||
, Tag_strong = new_(Tid_strong, "strong").NoInline_()
|
||||
, Tag_i = new_(Tid_i, "i").NoInline_()
|
||||
, Tag_em = new_(Tid_em, "em").NoInline_()
|
||||
, Tag_cite = new_(Tid_cite, "cite").NoInline_()
|
||||
, Tag_dfn = new_(Tid_dfn, "dfn").NoInline_()
|
||||
, Tag_var = new_(Tid_var, "var").NoInline_()
|
||||
, Tag_u = new_(Tid_u, "u").NoInline_().Repeat_ends_() // PAGE:en.b:Textbook_of_Psychiatry/Alcoholism_and_Psychoactive_Substance_Use_Disorders; DATE:2014-09-05
|
||||
, Tag_ins = new_(Tid_ins, "ins").NoInline_()
|
||||
, Tag_abbr = new_(Tid_abbr, "abbr").NoInline_()
|
||||
, Tag_strike = new_(Tid_strike, "strike").NoInline_()
|
||||
, Tag_del = new_(Tid_del, "del").NoInline_()
|
||||
, Tag_s = new_(Tid_s, "s").NoInline_()
|
||||
, Tag_sub = new_(Tid_sub, "sub").NoInline_()
|
||||
, Tag_sup = new_(Tid_sup, "sup").NoInline_()
|
||||
, Tag_big = new_(Tid_big, "big").NoInline_()
|
||||
, Tag_small = new_(Tid_small, "small").NoInline_()
|
||||
, Tag_code = new_(Tid_code, "code").NoInline_().Repeat_ends_()
|
||||
, Tag_tt = new_(Tid_tt, "tt").NoInline_().Repeat_ends_()
|
||||
, Tag_kbd = new_(Tid_kbd, "kbd").NoInline_()
|
||||
, Tag_samp = new_(Tid_samp, "samp").NoInline_()
|
||||
, Tag_blockquote = new_(Tid_blockquote, "blockquote").NoInline_().Repeat_mids_().Section_().Block_open_bgn_().Block_close_end_() // NOTE: should be open_end_, but leaving for now; DATE:2014-03-11; added Repeat_mids_(); PAGE:en.w:Ring_a_Ring_o'_Roses DATE:2014-06-26
|
||||
, Tag_pre = new_(Tid_pre, "pre").NoInline_().Section_().Xtn_().Raw_().Block_open_bgn_().Block_close_end_().Ignore_empty_().Xtn_skips_template_args_()
|
||||
, Tag_font = new_(Tid_font, "font").NoInline_()
|
||||
, Tag_center = new_(Tid_center, "center").NoInline_().Block_open_end_().Block_close_end_() // removed .Repeat_ends_(); added Nest_(); EX: w:Burr Truss; DATE:2012-12-12
|
||||
, Tag_p = new_(Tid_p, "p").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_span = new_(Tid_span, "span").Section_()
|
||||
, Tag_div = new_(Tid_div, "div").Section_().Block_open_end_().Block_close_end_()
|
||||
, Tag_hr = new_(Tid_hr, "hr").SingleOnly_().BgnNdeMode_inline_().Inline_by_backslash_().EndNdeMode_escape_().Section_().Block_close_end_()
|
||||
, Tag_br = new_(Tid_br, "br").SingleOnly_().BgnNdeMode_inline_().Inline_by_backslash_().EndNdeMode_inline_().Section_()
|
||||
, Tag_h1 = new_(Tid_h1, "h1").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h2 = new_(Tid_h2, "h2").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h3 = new_(Tid_h3, "h3").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h4 = new_(Tid_h4, "h4").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h5 = new_(Tid_h5, "h5").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h6 = new_(Tid_h6, "h6").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_li = new_(Tid_li, "li").Repeat_mids_().Empty_ignored_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_dt = new_(Tid_dt, "dt").Repeat_mids_()
|
||||
, Tag_dd = new_(Tid_dd, "dd").Repeat_mids_()
|
||||
, Tag_ol = new_(Tid_ol, "ol").NoInline_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_ul = new_(Tid_ul, "ul").NoInline_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_dl = new_(Tid_dl, "dl").NoInline_()
|
||||
, Tag_table = new_(Tid_table, "table").NoInline_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_tr = new_(Tid_tr, "tr").TblSub_().Block_open_bgn_().Block_open_end_()
|
||||
, Tag_td = new_(Tid_td, "td").TblSub_().Block_open_end_().Block_close_bgn_()
|
||||
, Tag_th = new_(Tid_th, "th").TblSub_().Block_open_end_().Block_close_bgn_()
|
||||
, Tag_thead = new_(Tid_thead, "thead")
|
||||
, Tag_tfoot = new_(Tid_tfoot, "tfoot")
|
||||
, Tag_tbody = new_(Tid_tbody, "tbody")
|
||||
, Tag_caption = new_(Tid_caption, "caption").NoInline_().TblSub_()
|
||||
, Tag_colgroup = new_(Tid_colgroup, "colgroup")
|
||||
, Tag_col = new_(Tid_col, "col")
|
||||
, Tag_a = new_(Tid_a, "a").Restricted_()
|
||||
, Tag_img = new_(Tid_img, "img").Restricted_() // NOTE: was .Xtn() DATE:2014-11-06
|
||||
, Tag_ruby = new_(Tid_ruby, "ruby").NoInline_()
|
||||
, Tag_rt = new_(Tid_rt, "rt").NoInline_()
|
||||
, Tag_rb = new_(Tid_rb, "rb").NoInline_()
|
||||
, Tag_rp = new_(Tid_rp, "rp").NoInline_()
|
||||
, Tag_includeonly = new_(Tid_includeonly, "includeonly")
|
||||
, Tag_noinclude = new_(Tid_noinclude, "noinclude")
|
||||
, Tag_onlyinclude = new_(Tid_onlyinclude, "onlyinclude")
|
||||
, Tag_nowiki = new_(Tid_nowiki, "nowiki")
|
||||
, Tag_xowa_cmd = new_(Tid_xowa_cmd, "xowa_cmd").Xtn_()
|
||||
, Tag_poem = new_(Tid_poem, "poem").Xtn_().Xtn_auto_close_()
|
||||
, Tag_math = new_(Tid_math, "math").Xtn_()
|
||||
, Tag_ref = new_(Tid_ref, "ref").Xtn_()
|
||||
, Tag_references = new_(Tid_references, "references").Xtn_()
|
||||
, Tag_source = new_(Tid_source, "source").Xtn_().Block_open_bgn_().Block_close_end_() // deactivate pre; pre; PAGE:en.w:Comment_(computer_programming); DATE:2014-06-23
|
||||
, Tag_syntaxHighlight = new_(Tid_syntaxHighlight, "syntaxHighlight").Xtn_().Block_open_bgn_().Block_close_end_() // deactivate pre; pre; PAGE:en.w:Comment_(computer_programming); DATE:2014-06-23
|
||||
, Tag_gallery = new_(Tid_gallery, "gallery").Xtn_().Block_open_bgn_().Block_close_end_().Xtn_auto_close_()
|
||||
, Tag_imageMap = new_(Tid_imageMap, "imageMap").Xtn_()
|
||||
, Tag_timeline = new_(Tid_timeline, "timeline").Xtn_()
|
||||
, Tag_hiero = new_(Tid_hiero, "hiero").Xtn_()
|
||||
, Tag_inputBox = new_(Tid_inputBox, "inputBox").Xtn_()
|
||||
, Tag_pages = new_(Tid_pages, "pages").Xtn_()
|
||||
, Tag_section = new_(Tid_section, "section").Xtn_().Langs_(Xol_lang_itm_.Id_de, "Abschnitt").Langs_(Xol_lang_itm_.Id_he, "קטע").Langs_(Xol_lang_itm_.Id_pt, "trecho") // DATE:2014-07-18
|
||||
, Tag_pagequality = new_(Tid_pagequality, "pagequality").Xtn_()
|
||||
, Tag_pagelist = new_(Tid_pagelist, "pagelist").Xtn_()
|
||||
, Tag_categoryList = new_(Tid_categoryList, "categoryList").Xtn_()
|
||||
, Tag_categoryTree = new_(Tid_categoryTree, "categoryTree").Xtn_()
|
||||
, Tag_dynamicPageList = new_(Tid_dynamicPageList, "dynamicPageList").Xtn_()
|
||||
, Tag_time = new_(Tid_time, "time")
|
||||
, Tag_input = new_(Tid_input, "input").Restricted_()
|
||||
, Tag_textarea = new_(Tid_textarea, "textarea").Restricted_()
|
||||
, Tag_score = new_(Tid_score, "score").Xtn_()
|
||||
, Tag_button = new_(Tid_button, "button").Restricted_()
|
||||
, Tag_select = new_(Tid_select, "select").Restricted_()
|
||||
, Tag_option = new_(Tid_option, "option").Restricted_()
|
||||
, Tag_optgroup = new_(Tid_optgroup, "optgroup").Restricted_()
|
||||
, Tag_script = new_(Tid_script, "script").Restricted_() // NOTE: had ".Block_open_bgn_().Block_close_end_()"; PAGE:en.w:Cascading_Style_Sheets DATE:2014-06-23
|
||||
, Tag_style = new_(Tid_style, "style").Restricted_() // NOTE: had ".Block_open_bgn_().Block_close_end_()"; PAGE:en.w:Cascading_Style_Sheets DATE:2014-06-23
|
||||
, Tag_form = new_(Tid_form, "form").Restricted_() // NOTE: had ".Block_open_bgn_().Block_close_end_()"; PAGE:en.w:Cascading_Style_Sheets DATE:2014-06-23
|
||||
, Tag_translate = new_(Tid_translate, "translate").Xtn_()
|
||||
, Tag_languages = new_(Tid_languages, "languages").Xtn_()
|
||||
, Tag_templateData = new_(Tid_templateData, "templateData").Xtn_()
|
||||
, Tag_bdi = new_(Tid_bdi, "bdi")
|
||||
, Tag_data = new_(Tid_data, "data")
|
||||
, Tag_mark = new_(Tid_mark, "mark")
|
||||
, Tag_wbr = new_(Tid_wbr, "wbr").SingleOnly_()
|
||||
, Tag_bdo = new_(Tid_bdo, "bdo").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_listing_buy = new_(Tid_listing_buy, "buy").Xtn_()
|
||||
, Tag_listing_do = new_(Tid_listing_do, "do").Xtn_()
|
||||
, Tag_listing_drink = new_(Tid_listing_drink, "drink").Xtn_()
|
||||
, Tag_listing_eat = new_(Tid_listing_eat, "eat").Xtn_()
|
||||
, Tag_listing_listing = new_(Tid_listing_listing, "listing").Xtn_()
|
||||
, Tag_listing_see = new_(Tid_listing_see, "see").Xtn_()
|
||||
, Tag_listing_sleep = new_(Tid_listing_sleep, "sleep").Xtn_()
|
||||
, Tag_rss = new_(Tid_rss, "rss").Xtn_()
|
||||
, Tag_xowa_html = new_(Tid_xowa_html, "xowa_html").Xtn_()
|
||||
, Tag_xowa_tag_bgn = new_(Tid_xowa_tag_bgn, "xtag_bgn").Xtn_()
|
||||
, Tag_xowa_tag_end = new_(Tid_xowa_tag_end, "xtag_end").Xtn_()
|
||||
, Tag_quiz = new_(Tid_quiz, "quiz").Xtn_()
|
||||
, Tag_indicator = new_(Tid_indicator, "indicator").Xtn_()
|
||||
, Tag_q = new_(Tid_q, "q")
|
||||
;
|
||||
}
|
||||
32
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_tag_lang.java
Normal file
32
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_tag_lang.java
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import gplx.core.primitives.*;
|
||||
public class Xop_xnde_tag_lang {
|
||||
public Xop_xnde_tag_lang(int lang_code_int, String name_str) {
|
||||
lang_code = Int_obj_ref.new_(lang_code_int);
|
||||
this.name_str = name_str;
|
||||
this.name_bry = Bry_.new_u8(name_str);
|
||||
this.xtnEndTag_tmp = Bry_.Add(Xop_xnde_tag_.XtnEndTag_bgn, name_bry);
|
||||
}
|
||||
public Int_obj_ref Lang_code() {return lang_code;} private Int_obj_ref lang_code;
|
||||
public String Name_str() {return name_str;} private String name_str;
|
||||
public byte[] Name_bry() {return name_bry;} private byte[] name_bry;
|
||||
public byte[] XtnEndTag_tmp() {return xtnEndTag_tmp;} private byte[] xtnEndTag_tmp;
|
||||
public static final Xop_xnde_tag_lang _ = new Xop_xnde_tag_lang(-1, String_.Empty);
|
||||
}
|
||||
61
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_tag_regy.java
Normal file
61
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_tag_regy.java
Normal file
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xop_xnde_tag_regy {
|
||||
public Btrie_slim_mgr XndeNames(int i) {
|
||||
if (nild) {Init(); nild = false;}
|
||||
switch (i) {
|
||||
case Xop_parser_.Parse_tid_tmpl: return tag_regy_tmpl;
|
||||
case Xop_parser_.Parse_tid_page_tmpl: return tag_regy_wiki_tmpl;
|
||||
case Xop_parser_.Parse_tid_page_wiki: return tag_regy_wiki_main;
|
||||
default: return tag_regy_wiki_tmpl; //throw Exc_.new_unhandled(i);
|
||||
}
|
||||
} boolean nild = true;
|
||||
public void Init() {
|
||||
Init_reg(tag_regy_tmpl , FilterXtns(Xop_xnde_tag_.Ary, Xop_xnde_tag_.Tag_includeonly, Xop_xnde_tag_.Tag_noinclude, Xop_xnde_tag_.Tag_onlyinclude, Xop_xnde_tag_.Tag_nowiki));
|
||||
Init_reg(tag_regy_wiki_tmpl , FilterXtns(Xop_xnde_tag_.Ary, Xop_xnde_tag_.Tag_includeonly, Xop_xnde_tag_.Tag_noinclude, Xop_xnde_tag_.Tag_onlyinclude, Xop_xnde_tag_.Tag_nowiki));
|
||||
Init_reg(tag_regy_wiki_main , Xop_xnde_tag_.Ary);
|
||||
}
|
||||
private Xop_xnde_tag[] FilterXtns(Xop_xnde_tag[] ary, Xop_xnde_tag... more) {
|
||||
List_adp rv = List_adp_.new_();
|
||||
for (Xop_xnde_tag itm : ary)
|
||||
if (itm.Xtn()) rv.Add(itm);
|
||||
for (Xop_xnde_tag itm : more)
|
||||
rv.Add(itm);
|
||||
return (Xop_xnde_tag[])rv.To_ary(Xop_xnde_tag.class);
|
||||
}
|
||||
private void Init_reg(Btrie_slim_mgr tag_regy, Xop_xnde_tag... ary) {
|
||||
for (Xop_xnde_tag tag : ary) {
|
||||
tag_regy.Add_obj(tag.Name_bry(), tag);
|
||||
Ordered_hash langs = tag.Langs();
|
||||
if (langs != null) { // tag has langs; EX: <section>; DATE:2014-07-18
|
||||
int langs_len = langs.Count();
|
||||
for (int i = 0; i < langs_len; ++i) { // register each lang's tag; EX:"<Abschnitt>", "<trecho>"
|
||||
Xop_xnde_tag_lang lang = (Xop_xnde_tag_lang)langs.Get_at(i);
|
||||
tag_regy.Add_obj(lang.Name_bry(), tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
private Btrie_slim_mgr
|
||||
tag_regy_wiki_main = Btrie_slim_mgr.ci_utf_8_() // NOTE:ci.utf8; he.s and <section> alias DATE:2014-07-18
|
||||
, tag_regy_wiki_tmpl = Btrie_slim_mgr.ci_utf_8_()
|
||||
, tag_regy_tmpl = Btrie_slim_mgr.ci_utf_8_()
|
||||
;
|
||||
}
|
||||
35
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_tag_stack.java
Normal file
35
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_tag_stack.java
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
public class Xop_xnde_tag_stack {
|
||||
public void Push() {xmlTagsStack.Add(xmlTags); xmlTags = new int[Xop_xnde_tag_._MaxLen];}
|
||||
public void Pop() {xmlTags = (int[])List_adp_.Pop(xmlTagsStack);}
|
||||
public boolean Has(int id) {return xmlTags[id] != 0;}
|
||||
public void Add(int id) {++xmlTags[id];}
|
||||
public void Del(int id) {
|
||||
int val = --xmlTags[id];
|
||||
if (val == -1) xmlTags[id] = 0;
|
||||
}
|
||||
public void Clear() {
|
||||
for (int i = 0; i < Xop_xnde_tag_._MaxLen; i++)
|
||||
xmlTags[i] = 0;
|
||||
xmlTagsStack.Clear();
|
||||
}
|
||||
List_adp xmlTagsStack = List_adp_.new_();
|
||||
int[] xmlTags = new int[Xop_xnde_tag_._MaxLen];
|
||||
}
|
||||
119
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_tkn.java
Normal file
119
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_tkn.java
Normal file
@@ -0,0 +1,119 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import gplx.xowa.xtns.*; import gplx.xowa.parsers.tblws.*;
|
||||
public class Xop_xnde_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_xnde;}
|
||||
public int Tblw_tid() {return tag.Id();} // NOTE: tblw tkns actually return xnde as Tblw_tid
|
||||
public boolean Tblw_xml() {return true;}
|
||||
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
|
||||
public byte CloseMode() {return closeMode;} public Xop_xnde_tkn CloseMode_(byte v) {closeMode = v; return this;} private byte closeMode = Xop_xnde_tkn.CloseMode_null;
|
||||
public boolean Tag_visible() {return tag_visible;} public Xop_xnde_tkn Tag_visible_(boolean v) {tag_visible = v; return this;} private boolean tag_visible = true;
|
||||
public int Name_bgn() {return name_bgn;} public Xop_xnde_tkn Name_bgn_(int v) {name_bgn = v; return this;} private int name_bgn = -1;
|
||||
public int Name_end() {return name_end;} public Xop_xnde_tkn Name_end_(int v) {name_end = v; return this;} private int name_end = -1;
|
||||
public Xop_xnde_tkn Name_rng_(int bgn, int end) {name_bgn = bgn; name_end = end; return this;}
|
||||
public int Atrs_bgn() {return atrs_bgn;} public Xop_xnde_tkn Atrs_bgn_(int v) {atrs_bgn = v; return this;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
|
||||
public int Atrs_end() {return atrs_end;} public Xop_xnde_tkn Atrs_end_(int v) {atrs_end = v; return this;} private int atrs_end = Xop_tblw_wkr.Atrs_null;
|
||||
public Xop_xnde_tkn Atrs_rng_(int bgn, int end) {atrs_bgn = bgn; atrs_end = end; return this;}
|
||||
public void Atrs_rng_set(int bgn, int end) {Atrs_rng_(bgn, end);}
|
||||
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;}
|
||||
public Xop_xnde_tkn Atrs_ary_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
|
||||
public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;}
|
||||
public Xop_xnde_tag Tag() {return tag;} public Xop_xnde_tkn Tag_(Xop_xnde_tag v) {tag = v; return this;} private Xop_xnde_tag tag;
|
||||
public int Tag_open_bgn() {return tag_open_bgn;} private int tag_open_bgn = Int_.Null;
|
||||
public int Tag_open_end() {return tag_open_end;} private int tag_open_end = Int_.Null;
|
||||
public Xop_xnde_tkn Tag_open_rng_(int bgn, int end) {this.tag_open_bgn = bgn; this.tag_open_end = end; return this;}
|
||||
public int Tag_close_bgn() {return tag_close_bgn;} private int tag_close_bgn = Int_.Null;
|
||||
public int Tag_close_end() {return tag_close_end;} private int tag_close_end = Int_.Null;
|
||||
public Xop_xnde_tkn Tag_close_rng_(int bgn, int end) {this.tag_close_bgn = bgn; this.tag_close_end = end; return this;}
|
||||
public Xop_xnde_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) Subs_add(itm); return this;}
|
||||
public Xox_xnde Xnde_xtn() {return xnde_xtn;} public Xop_xnde_tkn Xnde_xtn_(Xox_xnde v) {xnde_xtn = v; return this;} private Xox_xnde xnde_xtn;
|
||||
@Override public void Tmpl_compile(Xop_ctx ctx, byte[] src, Xot_compile_data prep_data) {
|
||||
switch (tag.Id()) {
|
||||
case Xop_xnde_tag_.Tid_noinclude: // NOTE: prep_mode is false to force recompile; see Ex_Tmpl_noinclude and {{{1<ni>|a</ni>}}}
|
||||
case Xop_xnde_tag_.Tid_includeonly: // NOTE: changed to always ignore <includeonly>; DATE:2014-05-10
|
||||
break;
|
||||
case Xop_xnde_tag_.Tid_nowiki: {
|
||||
int subs_len = this.Subs_len();
|
||||
for (int i = 0; i < subs_len; i++) {
|
||||
Xop_tkn_itm sub = this.Subs_get(i);
|
||||
sub.Tmpl_compile(ctx, src, prep_data);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Xop_xnde_tag_.Tid_onlyinclude: {
|
||||
int subs_len = this.Subs_len();
|
||||
for (int i = 0; i < subs_len; i++) {
|
||||
Xop_tkn_itm sub = this.Subs_get(i);
|
||||
sub.Tmpl_compile(ctx, src, prep_data);
|
||||
}
|
||||
prep_data.OnlyInclude_exists = true;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
int subs_len = this.Subs_len();
|
||||
for (int i = 0; i < subs_len; i++) {
|
||||
Xop_tkn_itm sub = this.Subs_get(i);
|
||||
sub.Tmpl_compile(ctx, src, prep_data);
|
||||
}
|
||||
break; // can happen in compile b/c invks are now being compiled
|
||||
}
|
||||
}
|
||||
}
|
||||
@Override public boolean Tmpl_evaluate(Xop_ctx ctx, byte[] src, Xot_invk caller, Bry_bfr bfr) {
|
||||
int subs_len = this.Subs_len();
|
||||
switch (tag.Id()) {
|
||||
case Xop_xnde_tag_.Tid_noinclude: // do not evaluate subs
|
||||
break;
|
||||
case Xop_xnde_tag_.Tid_includeonly: // evaluate subs
|
||||
if (!ctx.Only_include_evaluate()) {
|
||||
for (int i = 0; i < subs_len; i++)
|
||||
this.Subs_get(i).Tmpl_evaluate(ctx, src, caller, bfr);
|
||||
}
|
||||
break;
|
||||
case Xop_xnde_tag_.Tid_nowiki: // evaluate subs; add tags
|
||||
bfr.Add_byte(Byte_ascii.Lt).Add(Xop_xnde_tag_.Tag_nowiki.Name_bry()).Add_byte(Byte_ascii.Gt);
|
||||
for (int i = 0; i < subs_len; i++)
|
||||
this.Subs_get(i).Tmpl_evaluate(ctx, src, caller, bfr);
|
||||
bfr.Add_byte(Byte_ascii.Lt).Add_byte(Byte_ascii.Slash).Add(Xop_xnde_tag_.Tag_nowiki.Name_bry()).Add_byte(Byte_ascii.Gt);
|
||||
break;
|
||||
case Xop_xnde_tag_.Tid_onlyinclude: // evaluate subs but toggle onlyinclude flag on/off
|
||||
// boolean prv_val = ctx.Onlyinclude_enabled;
|
||||
// ctx.Onlyinclude_enabled = false;
|
||||
for (int i = 0; i < subs_len; i++)
|
||||
this.Subs_get(i).Tmpl_evaluate(ctx, src, caller, bfr);
|
||||
// ctx.Onlyinclude_enabled = prv_val;
|
||||
break;
|
||||
default: // ignore tags except for xtn; NOTE: Xtn tags are part of tagRegy_wiki_tmpl stage
|
||||
if (tag.Xtn()) {
|
||||
bfr.Add_mid(src, tag_open_bgn, tag_open_end); // write tag_bgn
|
||||
for (int i = 0; i < subs_len; i++) // always evaluate subs; handle <poem>{{{1}}}</poem>; DATE:2014-03-03
|
||||
this.Subs_get(i).Tmpl_evaluate(ctx, src, caller, bfr);
|
||||
bfr.Add_mid(src, tag_close_bgn, tag_close_end); // write tag_end
|
||||
if (tag_close_bgn == Int_.MinValue) {// xtn is unclosed; add a </xtn> else rest of page will be gobbled; PAGE:en.w:Provinces_and_territories_of_Canada DATE:2014-11-13
|
||||
bfr.Add(tag.XtnEndTag());
|
||||
bfr.Add(Byte_ascii.Gt_bry);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
public static Xop_xnde_tkn new_() {return new Xop_xnde_tkn();} private Xop_xnde_tkn() {}
|
||||
public static final byte CloseMode_null = 0, CloseMode_inline = 1, CloseMode_pair = 2, CloseMode_open = 3;
|
||||
}
|
||||
757
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr.java
Normal file
757
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr.java
Normal file
@@ -0,0 +1,757 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.apps.progs.*; import gplx.xowa.wikis.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.strings.*; import gplx.html.*;
|
||||
import gplx.xowa.parsers.logs.*; import gplx.xowa.parsers.tblws.*;
|
||||
public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public boolean Pre_at_bos() {return pre_at_bos;} public void Pre_at_bos_(boolean v) {pre_at_bos = v;} private boolean pre_at_bos;
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {this.Clear();}
|
||||
private void Clear() {
|
||||
pre_at_bos = false;
|
||||
}
|
||||
public void AutoClose(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn, int closing_tkn_tid) {
|
||||
Xop_xnde_tkn xnde = (Xop_xnde_tkn)tkn;
|
||||
xnde.Src_end_(src_len);
|
||||
xnde.Subs_move(root); // NOTE: ctx.Root used to be root which was a member variable; DATE:2013-12-11
|
||||
if (closing_tkn_tid == Xop_tkn_itm_.Tid_lnki_end) Xop_xnde_wkr_.AutoClose_handle_dangling_nde_in_caption(root, tkn); // PAGE:sr.w:Сићевачка_клисура; DATE:2014-07-03
|
||||
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Dangling_xnde, src, xnde.Src_bgn(), xnde.Name_end()); // NOTE: xnde.Src_bgn to start at <; xnde.Name_end b/c xnde.Src_end is -1
|
||||
}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_bos) {
|
||||
bgn_pos = 0; // do not allow -1 pos
|
||||
}
|
||||
if (cur_pos == src_len) return ctx.Lxr_make_txt_(src_len); // "<" is last char in page; strange, but don't raise error;
|
||||
Xop_tkn_itm last_tkn = ctx.Stack_get_last(); // BLOCK:invalid_ttl_check
|
||||
if ( last_tkn != null
|
||||
&& last_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) {
|
||||
Xop_lnki_tkn lnki = (Xop_lnki_tkn)last_tkn;
|
||||
if ( lnki.Pipe_count_is_zero()
|
||||
// && !Xop_lnki_wkr_.Parse_ttl(ctx, src, lnki, bgn_pos) // NOTE: no ttl parse check; <xnde> in ttl is automatically invalid; EX: [[a<b>c</b>|d]]; "a" is valid ttl, but "a<b>c</b>" is not
|
||||
) {
|
||||
ctx.Stack_pop_last();
|
||||
return Xop_lnki_wkr_.Invalidate_lnki(ctx, src, root, lnki, bgn_pos);
|
||||
}
|
||||
}
|
||||
|
||||
// find >
|
||||
byte cur_byt = src[cur_pos];
|
||||
boolean tag_is_closing = false;
|
||||
if (cur_byt == Byte_ascii.Slash) { // "</" encountered (note that < enters this frame)
|
||||
++cur_pos;
|
||||
if (cur_pos == src_len) return ctx.Lxr_make_txt_(src_len); // "</" are last chars on page; strange, but don't raise error;
|
||||
cur_byt = src[cur_pos];
|
||||
tag_is_closing = true;
|
||||
}
|
||||
Btrie_slim_mgr tag_trie = ctx.App().Xnde_tag_regy().XndeNames(ctx.Xnde_names_tid());
|
||||
Object tag_obj = tag_trie.Match_bgn_w_byte(cur_byt, src, cur_pos, src_len); // NOTE:tag_obj can be null in wiki_tmpl mode; EX: "<ul" is not a valid tag in wiki_tmpl, but is valid in wiki_main
|
||||
int atrs_bgn_pos = tag_trie.Match_pos();
|
||||
int name_bgn = cur_pos, name_end = atrs_bgn_pos;
|
||||
int tag_end_pos = atrs_bgn_pos - 1;
|
||||
if (tag_obj != null) {
|
||||
if (atrs_bgn_pos >= src_len) return ctx.Lxr_make_txt_(atrs_bgn_pos); // truncated tag; EX: "<br"
|
||||
switch (src[atrs_bgn_pos]) { // NOTE: not sure about rules; Preprocessor_DOM.php calls preg_match on $elementsRegex which seems to break on word boundaries; $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
++atrs_bgn_pos; // set bgn_pos to be after ws
|
||||
break;
|
||||
case Byte_ascii.Slash: case Byte_ascii.Gt:
|
||||
++atrs_bgn_pos; // set bgn_pos to be after char
|
||||
break;
|
||||
case Byte_ascii.Backslash:
|
||||
++tag_end_pos;
|
||||
break;
|
||||
case Byte_ascii.Dollar:// handles <br$2>;
|
||||
default: // allow all other symbols by defaults
|
||||
break;
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
tag_obj = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
boolean ctx_cur_tid_is_tblw_atr_owner = false;
|
||||
switch (ctx.Cur_tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: case Xop_tkn_itm_.Tid_tblw_tr: case Xop_tkn_itm_.Tid_tblw_th:
|
||||
ctx_cur_tid_is_tblw_atr_owner = true;
|
||||
break;
|
||||
}
|
||||
if (tag_obj == null) { // not a known xml tag; EX: "<abcd>"; "if 5 < 7 then"
|
||||
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki) {
|
||||
if (ctx_cur_tid_is_tblw_atr_owner) // unknown_tag is occurring inside tblw element (EX: {| style='margin:1em<f'); just add to txt tkn
|
||||
return ctx.Lxr_make_txt_(cur_pos);
|
||||
else { // unknown_tag is occurring anywhere else; escape < to < and resume from character just after it;
|
||||
ctx.Subs_add(root, Make_bry_tkn(tkn_mkr, src, bgn_pos, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (ctx_cur_tid_is_tblw_atr_owner) Xop_tblw_wkr.Atrs_close(ctx, src, root, Bool_.N);
|
||||
return ctx.Lxr_make_txt_(cur_pos);
|
||||
}
|
||||
}
|
||||
Xop_xnde_tag tag = (Xop_xnde_tag)tag_obj;
|
||||
if (pre_at_bos) {
|
||||
pre_at_bos = false;
|
||||
if (tag.Block_close() == Xop_xnde_tag.Block_end
|
||||
) { // NOTE: only ignore if Block_end; loosely based on Parser.php|doBlockLevels|$closematch; DATE:2013-12-01
|
||||
ctx.Para().Process_block__bgn_n__end_y(tag);
|
||||
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, cur_pos, Xop_ignore_tkn.Ignore_tid_pre_at_bos));
|
||||
}
|
||||
}
|
||||
int gt_pos = -1; // find closing >; NOTE: MW does not ignore > inside quotes; EX: <div id="a>b">abc</div> -> <div id="a>
|
||||
boolean pre2_hack = false;
|
||||
int end_name_pos = cur_pos + tag.Name_len();
|
||||
Xop_xatr_parser atr_parser = ctx.App().Xatr_parser();
|
||||
for (int i = end_name_pos; i < src_len; i++) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Lt: // < encountered; may be inner node inside tag which is legal in wikitext; EX: "<ul style=<nowiki>#</nowiki>FFFFFF>"
|
||||
int name_bgn_pos = i + 1;
|
||||
if (name_bgn_pos < src_len) { // chk that name_bgn is less than src_len else arrayIndex error; EX: <ref><p></p<<ref/>; not that "<" is last char of String; DATE:2014-01-18
|
||||
int valid_inner_xnde_gt = atr_parser.Xnde_find_gt_find(src, name_bgn_pos, src_len); // check if <nowiki>, <noinclude>, <includeonly> or <onlyinclude> (which can exist inside tag)
|
||||
if (valid_inner_xnde_gt == String_.Find_none){ // not a <nowiki>
|
||||
switch (tag.Id()) {
|
||||
case Xop_xnde_tag_.Tid_input: break; // noop; needed for Options which may have < in value; DATE:2014-07-04
|
||||
default: return ctx.Lxr_make_txt_(cur_pos); // escape text; EX: "<div </div>" -> "<div </div>"; SEE:it.u:; DATE:2014-02-03
|
||||
}
|
||||
}
|
||||
else { // is a <nowiki> skip to </nowiki>
|
||||
if ( i == end_name_pos
|
||||
&& ctx.Parse_tid() == Xop_parser_.Parse_tid_tmpl
|
||||
&& Bry_.Eq(atr_parser.Bry_obj().Val(), Xop_xnde_tag_.Tag_includeonly.Name_bry())
|
||||
) {
|
||||
pre2_hack = true;
|
||||
}
|
||||
i = valid_inner_xnde_gt;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Gt:
|
||||
gt_pos = i;
|
||||
i = src_len;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (pre2_hack) {
|
||||
// Xop_xnde_tkn tt = tkn_mkr.Xnde(bgn_pos, gt_pos + 1).Tag_(tag);
|
||||
// ctx.Stack_add(tt);
|
||||
pre2_pending = true;
|
||||
return ctx.Lxr_make_txt_(cur_pos);
|
||||
}
|
||||
if (gt_pos == -1) {return ctx.Lxr_make_log_(Xop_xnde_log.Eos_while_closing_tag, src, bgn_pos, cur_pos);}
|
||||
boolean force_xtn_for_nowiki = false;
|
||||
int end_pos = gt_pos + 1;
|
||||
switch (ctx.Parse_tid()) { // NOTE: special logic to handle <*include*>; SEE: NOTE_1 below
|
||||
case Xop_parser_.Parse_tid_page_wiki: // NOTE: ignore if (a) wiki and (b) <noinclude> or <onlyinclude>
|
||||
switch (tag.Id()) {
|
||||
case Xop_xnde_tag_.Tid_noinclude:
|
||||
case Xop_xnde_tag_.Tid_onlyinclude:
|
||||
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, end_pos, Xop_ignore_tkn.Ignore_tid_include_wiki));
|
||||
return end_pos;
|
||||
case Xop_xnde_tag_.Tid_nowiki:
|
||||
force_xtn_for_nowiki = true;
|
||||
ctx_cur_tid_is_tblw_atr_owner = false;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Xop_parser_.Parse_tid_tmpl: // NOTE: ignore if (a) tmpl and (b) <includeonly>
|
||||
switch (tag.Id()) {
|
||||
case Xop_xnde_tag_.Tid_includeonly:
|
||||
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, end_pos, Xop_ignore_tkn.Ignore_tid_include_tmpl));
|
||||
return end_pos;
|
||||
case Xop_xnde_tag_.Tid_noinclude:
|
||||
return Make_noinclude(ctx, tkn_mkr, root, src, src_len, bgn_pos, gt_pos, tag, atrs_bgn_pos - 1, tag_is_closing); // -1 b/c atrs_bgn_pos may be set past >; may need to adjust above logic; DATE:2014-06-24
|
||||
case Xop_xnde_tag_.Tid_nowiki:
|
||||
force_xtn_for_nowiki = true;
|
||||
break;
|
||||
case Xop_xnde_tag_.Tid_onlyinclude:
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Xop_parser_.Parse_tid_page_tmpl: // NOTE: added late; SEE:comment test for "a <!-<noinclude></noinclude>- b -->c"
|
||||
switch (tag.Id()) {
|
||||
case Xop_xnde_tag_.Tid_noinclude:
|
||||
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, end_pos, Xop_ignore_tkn.Ignore_tid_include_tmpl));
|
||||
return end_pos;
|
||||
case Xop_xnde_tag_.Tid_nowiki: // if encountered in page_tmpl stage, mark nowiki as xtn; added for nowiki_xnde_frag; DATE:2013-01-27
|
||||
case Xop_xnde_tag_.Tid_includeonly: // includeonly should be resolved during template stage; EX: =<io>=</io>A=<io>=</io>; DATE:2014-02-12
|
||||
force_xtn_for_nowiki = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (ctx_cur_tid_is_tblw_atr_owner)
|
||||
Xop_tblw_wkr.Atrs_close(ctx, src, root, Bool_.Y); // < found inside tblw; close off tblw attributes; EX: |- id='abcd' <td>a</td> (which is valid wikitext; NOTE: must happen after <nowiki>
|
||||
if (tag_is_closing)
|
||||
return Make_xtag_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, gt_pos, tag);
|
||||
else
|
||||
return Make_xtag_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, gt_pos, name_bgn, name_end, tag, atrs_bgn_pos, src[tag_end_pos], force_xtn_for_nowiki, pre2_hack);
|
||||
}
|
||||
private static Xop_tkn_itm Make_bry_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int bgn_pos, int cur_pos) {
|
||||
int len = cur_pos - bgn_pos;
|
||||
byte[] bry = null;
|
||||
if (len == 1 && src[bgn_pos] == Byte_ascii.Lt) bry = Html_entity_.Lt_bry;
|
||||
else if (len == 2 && src[bgn_pos] == Byte_ascii.Lt
|
||||
&& src[bgn_pos + 1] == Byte_ascii.Slash) bry = Bry_escape_lt_slash; // NOTE: should use bgn_pos, not cur_pos; DATE:2014-10-22
|
||||
else bry = Bry_.Add(Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, cur_pos)); // +1 to skip <
|
||||
return tkn_mkr.Bry_raw(bgn_pos, cur_pos, bry);
|
||||
}
|
||||
private int Make_noinclude(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int gtPos, Xop_xnde_tag tag, int tag_end_pos, boolean tag_is_closing) {
|
||||
tag_end_pos = Bry_finder.Find_fwd_while(src, tag_end_pos, src_len, Byte_ascii.Space);// NOTE: must skip spaces else "<noinclude />" will not work with safesubst; PAGE:en.w:Wikipedia:Featured_picture_candidates; DATE:2014-06-24
|
||||
byte tag_end_byte = src[tag_end_pos];
|
||||
if (tag_end_byte == Byte_ascii.Slash) { // inline
|
||||
boolean valid = true;
|
||||
for (int i = tag_end_pos; i < gtPos; i++) {
|
||||
switch (src[i]) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: break;
|
||||
case Byte_ascii.Slash: break;
|
||||
default: valid = false; break;
|
||||
}
|
||||
}
|
||||
if (valid) {
|
||||
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, gtPos, Xop_ignore_tkn.Ignore_tid_include_tmpl));
|
||||
return gtPos + Xoa_prog_mgr.Adj_next_char;
|
||||
}
|
||||
else {
|
||||
return ctx.Lxr_make_txt_(gtPos);
|
||||
}
|
||||
}
|
||||
int end_rhs = -1, findPos = gtPos;
|
||||
byte[] end_bry = Xop_xnde_tag_.Tag_noinclude.XtnEndTag(); int end_bry_len = end_bry.length;
|
||||
if (tag_is_closing) // </noinclude>; no end tag to search for; DATE:2014-05-02
|
||||
end_rhs = gtPos;
|
||||
else { // <noinclude>; search for end tag
|
||||
while (true) {
|
||||
int end_lhs = Bry_finder.Find_fwd(src, end_bry, findPos);
|
||||
if (end_lhs == -1 || (end_lhs + end_bry_len) == src_len) break; // nothing found or EOS;
|
||||
findPos = end_lhs;
|
||||
for (int i = end_lhs + end_bry_len; i < src_len; i++) {
|
||||
switch (src[i]) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: break;
|
||||
case Byte_ascii.Slash: break;
|
||||
case Byte_ascii.Gt: end_rhs = i + 1; i = src_len; break; // +1 to place after Gt
|
||||
default: findPos = i ; i = src_len; break;
|
||||
}
|
||||
}
|
||||
if (end_rhs != -1) break;
|
||||
}
|
||||
if (end_rhs == -1) // end tag not found; match to end of String
|
||||
end_rhs = src_len;
|
||||
}
|
||||
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, end_rhs, Xop_ignore_tkn.Ignore_tid_include_tmpl));
|
||||
return end_rhs;
|
||||
}
|
||||
private boolean pre2_pending = false;
|
||||
private int Make_xtag_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int gtPos, int name_bgn, int name_end, Xop_xnde_tag tag, int tag_end_pos, byte tag_end_byte, boolean force_xtn_for_nowiki, boolean pre2_hack) {
|
||||
boolean inline = false;
|
||||
int open_tag_end = gtPos + Xoa_prog_mgr.Adj_next_char, atrs_bgn = -1, atrs_end = -1;
|
||||
// calc (a) inline; (b) atrs
|
||||
switch (tag_end_byte) { // look at last char of tag; EX: for b, following are registered: "b/","b>","b\s","b\n","b\t"
|
||||
case Byte_ascii.Slash: // "/" EX: "<br/"; // NOTE: <pre/a>, <pre//> are allowed
|
||||
inline = true;
|
||||
break;
|
||||
case Byte_ascii.Backslash: // allow <br\>; EX:w:Mosquito
|
||||
if (tag.Inline_by_backslash())
|
||||
src[tag_end_pos] = Byte_ascii.Slash;
|
||||
break;
|
||||
case Byte_ascii.Gt: // ">" "normal" tag; noop
|
||||
break;
|
||||
default: // "\s", "\n", "\t"
|
||||
atrs_bgn = tag_end_pos; // set atrs_bgn to first char after ws; EX: "<a\shref='b/>" atrs_bgn = pos(h)
|
||||
atrs_end = gtPos; // set atrs_end to gtPos; EX: "<a\shref='b/>" atrs_end = pos(>)
|
||||
if (src[gtPos - 1] == Byte_ascii.Slash) { // adjust if inline
|
||||
--atrs_end;
|
||||
inline = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
Xop_xatr_itm[] atrs = null;
|
||||
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki) {
|
||||
atrs = ctx.App().Xatr_parser().Parse(ctx.Msg_log(), src, atrs_bgn, atrs_end);
|
||||
}
|
||||
if (( ( tag.Xtn()
|
||||
&& ( ctx.Parse_tid() != Xop_parser_.Parse_tid_tmpl // do not gobble up rest if in tmpl; handle <poem>{{{1}}}</poem>; DATE:2014-03-03
|
||||
|| tag.Xtn_skips_template_args() // ignore above if tag specifically skips template args; EX: <pre>; DATE:2014-04-10
|
||||
)
|
||||
)
|
||||
|| (force_xtn_for_nowiki && !inline)
|
||||
)
|
||||
) {
|
||||
return Make_xnde_xtn(ctx, tkn_mkr, root, src, src_len, tag, bgn_pos, gtPos + 1, name_bgn, name_end, atrs_bgn, atrs_end, atrs, inline, pre2_hack); // find end tag and do not parse anything inbetween
|
||||
}
|
||||
if (tag.Restricted()) {
|
||||
Xoae_page page = ctx.Cur_page();
|
||||
if ( page.Html_data().Html_restricted()
|
||||
&& page.Wiki().Domain_tid() != Xow_domain_type_.Tid_home) {
|
||||
int end_pos = gtPos + 1;
|
||||
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, end_pos, Bry_.Add(gplx.html.Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, end_pos)))); // +1 to skip <
|
||||
return end_pos;
|
||||
}
|
||||
}
|
||||
int prv_acs = ctx.Stack_idx_find_but_stop_at_tbl(Xop_tkn_itm_.Tid_xnde);
|
||||
Xop_xnde_tkn prv_xnde = prv_acs == -1 ? null : (Xop_xnde_tkn)ctx.Stack_get(prv_acs); //(Xop_xnde_tkn)ctx.Stack_get_typ(Xop_tkn_itm_.Tid_xnde);
|
||||
int prv_xnde_tagId = prv_xnde == null ? Xop_tkn_itm_.Tid_null : prv_xnde.Tag().Id();
|
||||
|
||||
boolean tag_ignore = false;
|
||||
int tagId = tag.Id();
|
||||
if (tagId == Xop_xnde_tag_.Tid_table || tag.TblSub()) { // tbl tag; EX: <table>,<tr>,<td>,<th>
|
||||
Tblw_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, gtPos + 1, tagId, atrs_bgn, atrs_end);
|
||||
return gtPos + 1;
|
||||
}
|
||||
else if (prv_xnde_tagId == Xop_xnde_tag_.Tid_p && tagId == Xop_xnde_tag_.Tid_p) {
|
||||
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Auto_closing_section, src, bgn_pos, bgn_pos);
|
||||
End_tag(ctx, root, prv_xnde, src, src_len, bgn_pos - 1, bgn_pos - 1, tagId, true, tag);
|
||||
}
|
||||
else if (tagId == prv_xnde_tagId && tag.Repeat_ends()) { // EX: "<code>a<code>b" -> "<code>a</code>b"
|
||||
End_tag(ctx, root, prv_xnde, src, src_len, bgn_pos - 1, bgn_pos - 1, tagId, true, tag);
|
||||
return gtPos + 1;
|
||||
}
|
||||
else if (tagId == prv_xnde_tagId && tag.Repeat_mids()) { // EX: "<li>a<li>b" -> "<li>a</li><li>b"
|
||||
End_tag(ctx, root, prv_xnde, src, src_len, bgn_pos - 1, bgn_pos - 1, tagId, true, tag);
|
||||
}
|
||||
else if (tag.SingleOnly()) inline = true; // <br></br> not allowed; convert <br> to <br/> </br> will be escaped
|
||||
else if (tag.NoInline() && inline) {
|
||||
Xop_xnde_tkn xnde_inline = Xnde_bgn(ctx, tkn_mkr, root, tag, Xop_xnde_tkn.CloseMode_open, src, bgn_pos, open_tag_end, atrs_bgn, atrs_end, atrs);
|
||||
End_tag(ctx, root, xnde_inline, src, src_len, bgn_pos, gtPos, tagId, false, tag);
|
||||
ctx.Msg_log().Add_itm_none(Xop_xnde_log.No_inline, src, bgn_pos, gtPos);
|
||||
return gtPos + Int_.Const_position_after_char;
|
||||
}
|
||||
Xop_xnde_tkn xnde = null;
|
||||
xnde = Xnde_bgn(ctx, tkn_mkr, root, tag, inline ? Xop_xnde_tkn.CloseMode_inline : Xop_xnde_tkn.CloseMode_open, src, bgn_pos, open_tag_end, atrs_bgn, atrs_end, atrs);
|
||||
if (!inline && tag.BgnNdeMode() != Xop_xnde_tag_.BgnNdeMode_inline)
|
||||
ctx.Stack_add(xnde);
|
||||
if (tag_ignore)
|
||||
xnde.Tag_visible_(false);
|
||||
if (tag.Empty_ignored()) ctx.Empty_ignored_y_();
|
||||
return open_tag_end;
|
||||
}
|
||||
private boolean Stack_find_xnde(Xop_ctx ctx, int cur_tag_id) {
|
||||
int acs_end = ctx.Stack_len() - 1;
|
||||
if (acs_end == -1) return false;
|
||||
for (int i = acs_end; i > -1; i--) {
|
||||
Xop_tkn_itm tkn = ctx.Stack_get(i);
|
||||
switch (tkn.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: // needed for badly formed tables;PAGE:ro.b:Pagina_principala DATE:2014-06-26
|
||||
case Xop_tkn_itm_.Tid_tblw_td:
|
||||
case Xop_tkn_itm_.Tid_tblw_th:
|
||||
case Xop_tkn_itm_.Tid_tblw_tc: // tables always reset tag_stack; EX: <table><tr><td><li><table><tr><td><li>; 2nd li is not nested in 1st
|
||||
return false;
|
||||
case Xop_tkn_itm_.Tid_xnde:
|
||||
Xop_xnde_tkn xnde_tkn = (Xop_xnde_tkn)tkn;
|
||||
int stack_tag_id = xnde_tkn.Tag().Id();
|
||||
if (cur_tag_id == Xop_xnde_tag_.Tid_li) {
|
||||
switch (stack_tag_id) {
|
||||
case Xop_xnde_tag_.Tid_ul: // ul / ol resets tag_stack for li; EX: <li><ul><li>; 2nd li is not nested in 1st
|
||||
case Xop_xnde_tag_.Tid_ol:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (stack_tag_id == cur_tag_id) return true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
private void Tblw_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int tagId, int atrs_bgn, int atrs_end) {
|
||||
byte wlxr_type = 0;
|
||||
switch (tagId) {
|
||||
case Xop_xnde_tag_.Tid_table: wlxr_type = Xop_tblw_wkr.Tblw_type_tb; break;
|
||||
case Xop_xnde_tag_.Tid_tr: wlxr_type = Xop_tblw_wkr.Tblw_type_tr; break;
|
||||
case Xop_xnde_tag_.Tid_td: wlxr_type = Xop_tblw_wkr.Tblw_type_td; break;
|
||||
case Xop_xnde_tag_.Tid_th: wlxr_type = Xop_tblw_wkr.Tblw_type_th; break;
|
||||
case Xop_xnde_tag_.Tid_caption: wlxr_type = Xop_tblw_wkr.Tblw_type_tc; break;
|
||||
}
|
||||
ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, true, wlxr_type, Xop_tblw_wkr.Called_from_general, atrs_bgn, atrs_end);
|
||||
}
|
||||
private void Tblw_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int tagId) {
|
||||
int typeId = 0;
|
||||
byte wlxr_type = 0;
|
||||
switch (tagId) {
|
||||
case Xop_xnde_tag_.Tid_table: typeId = Xop_tkn_itm_.Tid_tblw_tb; wlxr_type = Xop_tblw_wkr.Tblw_type_tb; break;
|
||||
case Xop_xnde_tag_.Tid_tr: typeId = Xop_tkn_itm_.Tid_tblw_tr; wlxr_type = Xop_tblw_wkr.Tblw_type_tr; break;
|
||||
case Xop_xnde_tag_.Tid_td: typeId = Xop_tkn_itm_.Tid_tblw_td; wlxr_type = Xop_tblw_wkr.Tblw_type_td; break;
|
||||
case Xop_xnde_tag_.Tid_th: typeId = Xop_tkn_itm_.Tid_tblw_th; wlxr_type = Xop_tblw_wkr.Tblw_type_th; break;
|
||||
case Xop_xnde_tag_.Tid_caption: typeId = Xop_tkn_itm_.Tid_tblw_tc; wlxr_type = Xop_tblw_wkr.Tblw_type_tc; break;
|
||||
}
|
||||
Xop_tblw_tkn prv_tkn = ctx.Stack_get_tbl();
|
||||
int prv_tkn_typeId = prv_tkn == null ? -1 : prv_tkn.Tkn_tid();
|
||||
ctx.Tblw().Make_tkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, typeId, wlxr_type, prv_tkn, prv_tkn_typeId, true);
|
||||
// ctx.Para().Process_block__bgn_n__end_y(ctx, root, src, bgn_pos, cur_pos);
|
||||
}
|
||||
private int Make_xtag_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_xnde_tag end_tag) {
|
||||
int end_tag_id = end_tag.Id();
|
||||
cur_pos = Bry_finder.Find_fwd_while_not_ws(src, cur_pos, src_len) + 1;
|
||||
int prv_xnde_pos = ctx.Stack_idx_find_but_stop_at_tbl(Xop_tkn_itm_.Tid_xnde); // find any previous xnde on stack
|
||||
Xop_xnde_tkn bgn_nde = (Xop_xnde_tkn)ctx.Stack_get(prv_xnde_pos);
|
||||
int bgn_tag_id = bgn_nde == null ? -1 : bgn_nde.Tag().Id();
|
||||
|
||||
int end_nde_mode = end_tag.EndNdeMode();
|
||||
boolean force_end_tag_to_match_bgn_tag = false;
|
||||
switch (bgn_tag_id) {
|
||||
case Xop_xnde_tag_.Tid_sub: if (end_tag_id == Xop_xnde_tag_.Tid_sup) force_end_tag_to_match_bgn_tag = true; break;
|
||||
case Xop_xnde_tag_.Tid_sup: if (end_tag_id == Xop_xnde_tag_.Tid_sub) force_end_tag_to_match_bgn_tag = true; break;
|
||||
case Xop_xnde_tag_.Tid_mark: if (end_tag_id == Xop_xnde_tag_.Tid_span) force_end_tag_to_match_bgn_tag = true; break;
|
||||
case Xop_xnde_tag_.Tid_span: if (end_tag_id == Xop_xnde_tag_.Tid_font) force_end_tag_to_match_bgn_tag = true; break;
|
||||
}
|
||||
if (force_end_tag_to_match_bgn_tag) {
|
||||
end_tag_id = bgn_tag_id;
|
||||
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Sub_sup_swapped, src, bgn_pos, cur_pos);
|
||||
}
|
||||
if (end_tag_id == Xop_xnde_tag_.Tid_table || end_tag.TblSub()) {
|
||||
Tblw_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, end_tag_id);
|
||||
return cur_pos;
|
||||
}
|
||||
if (end_tag.Empty_ignored() && ctx.Empty_ignored() // emulate TidyHtml logic for pruning empty tags; EX: "<li> </li>" -> "")
|
||||
&& bgn_nde != null) { // bgn_nde will be null if only end_nde; EX:WP:Sukhoi Su-47; "* </li>"
|
||||
ctx.Empty_ignore(root, bgn_nde.Tkn_sub_idx());
|
||||
End_tag(ctx, root, bgn_nde, src, src_len, bgn_pos, cur_pos, end_tag_id, true, end_tag);
|
||||
return cur_pos;
|
||||
}
|
||||
switch (end_nde_mode) {
|
||||
case Xop_xnde_tag_.EndNdeMode_inline: // PATCH.WP: allows </br>, </br/> and many other variants
|
||||
Xnde_bgn(ctx, tkn_mkr, root, end_tag, Xop_xnde_tkn.CloseMode_inline, src, bgn_pos, cur_pos, Int_.MinValue, Int_.MinValue, null); // NOTE: atrs is null b/c </br> will never have atrs
|
||||
return cur_pos;
|
||||
case Xop_xnde_tag_.EndNdeMode_escape: // handle </hr>
|
||||
ctx.Lxr_make_(false);
|
||||
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Escaped_xnde, src, bgn_pos, cur_pos - 1);
|
||||
return cur_pos;
|
||||
}
|
||||
if (prv_xnde_pos != Xop_ctx.Stack_not_found) { // something found
|
||||
if (bgn_tag_id == end_tag_id) { // end_nde matches bgn_nde; normal;
|
||||
End_tag(ctx, root, bgn_nde, src, src_len, bgn_pos, cur_pos, end_tag_id, true, end_tag);
|
||||
return cur_pos;
|
||||
}
|
||||
else {
|
||||
if (Stack_find_xnde(ctx, end_tag_id)) { // end_tag has bgnTag somewhere in stack;
|
||||
int end = ctx.Stack_len() - 1;
|
||||
for (int i = end; i > -1; i--) { // iterate stack and close all nodes until bgn_nde that matches end_nde
|
||||
Xop_tkn_itm tkn = ctx.Stack_get(i);
|
||||
if (tkn.Tkn_tid() == Xop_tkn_itm_.Tid_xnde) {
|
||||
Xop_xnde_tkn xnde_tkn = (Xop_xnde_tkn)tkn;
|
||||
End_tag(ctx, root, xnde_tkn, src, src_len, bgn_pos, bgn_pos, xnde_tkn.Tag().Id(), false, end_tag);
|
||||
ctx.Stack_pop_idx(i);
|
||||
if (xnde_tkn.Tag().Id() == end_tag_id) {
|
||||
xnde_tkn.Src_end_(cur_pos);
|
||||
return cur_pos;
|
||||
}
|
||||
else
|
||||
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Auto_closing_section, src, bgn_nde.Src_bgn(), bgn_nde.Name_end());
|
||||
}
|
||||
else
|
||||
ctx.Stack_auto_close(root, src, tkn, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_xnde);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (end_tag.Restricted()) // restricted tags (like <script>) are not placed on stack; for now, just write it out
|
||||
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, cur_pos, Bry_.Add(gplx.html.Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, cur_pos)))); // +1 to skip <
|
||||
else {
|
||||
if (pre2_pending) {
|
||||
pre2_pending = false;
|
||||
return ctx.Lxr_make_txt_(cur_pos);
|
||||
}
|
||||
else {
|
||||
if (end_tag.Xtn()) // if xtn end tag, ignore it; tidy / browser doesn't know about xtn_tags like "</poem>" so these need to be hidden, else they will show; DATE:2014-07-22
|
||||
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, cur_pos, Xop_ignore_tkn.Ignore_tid_xnde_dangling));
|
||||
else // regular tag; show it; depend on tidy to clean up; DATE:2014-07-22
|
||||
ctx.Subs_add(root, tkn_mkr.Bry_mid(src, bgn_pos, cur_pos));
|
||||
}
|
||||
}
|
||||
ctx.Para().Process_block__xnde(end_tag, end_tag.Block_close());
|
||||
|
||||
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Escaped_xnde, src, bgn_pos, cur_pos - 1);
|
||||
return cur_pos;
|
||||
}
|
||||
private void End_tag(Xop_ctx ctx, Xop_root_tkn root, Xop_xnde_tkn bgn_nde, byte[] src, int src_len, int bgn_pos, int cur_pos, int tagId, boolean pop, Xop_xnde_tag end_tag) {
|
||||
bgn_nde.Src_end_(cur_pos);
|
||||
bgn_nde.CloseMode_(Xop_xnde_tkn.CloseMode_pair);
|
||||
bgn_nde.Tag_close_rng_(bgn_pos, cur_pos);
|
||||
if (pop)
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_xnde), false, cur_pos, cur_pos, Xop_tkn_itm_.Tid_xnde);
|
||||
bgn_nde.Subs_move(root); // NOTE: Subs_move must go after Stack_pop_til, b/c Stack_pop_til adds tkns; see Xnde_td_list
|
||||
ctx.Para().Process_block__xnde(end_tag, end_tag.Block_close());
|
||||
}
|
||||
private Xop_xnde_tkn Xnde_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, Xop_xnde_tag tag, byte closeMode, byte[] src, int bgn_pos, int cur_pos, int atrs_bgn, int atrs_end, Xop_xatr_itm[] atrs) {
|
||||
Xop_xnde_tkn xnde = tkn_mkr.Xnde(bgn_pos, cur_pos).CloseMode_(closeMode);
|
||||
int xndeBgn = bgn_pos + 1;
|
||||
xnde.Name_rng_(xndeBgn, xndeBgn + tag.Name_len());
|
||||
xnde.Tag_(tag);
|
||||
xnde.Tag_open_rng_(bgn_pos, cur_pos);
|
||||
if (atrs_bgn > 0) {
|
||||
xnde.Atrs_rng_(atrs_bgn, atrs_end);
|
||||
xnde.Atrs_ary_(atrs);
|
||||
}
|
||||
ctx.Subs_add(root, xnde);
|
||||
ctx.Para().Process_block__xnde(tag, tag.Block_open());
|
||||
return xnde;
|
||||
}
|
||||
private int Find_end_tag_pos(byte[] src, int src_len, int find_bgn) {
|
||||
int rv = find_bgn;
|
||||
boolean found = false, loop = true;
|
||||
while (loop) {
|
||||
if (rv == src_len) break;
|
||||
byte b = src[rv];
|
||||
switch (b) {
|
||||
case Byte_ascii.Space:
|
||||
case Byte_ascii.Nl:
|
||||
case Byte_ascii.Tab:
|
||||
++rv;
|
||||
break;
|
||||
case Byte_ascii.Gt:
|
||||
found = true;
|
||||
loop = false;
|
||||
++rv; // add 1 to position after >
|
||||
break;
|
||||
default:
|
||||
loop = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return found ? rv : Bry_.NotFound;
|
||||
}
|
||||
private int Find_xtn_end_lhs(Xop_ctx ctx, Xop_xnde_tag tag, byte[] src, int src_len, int open_bgn, int open_end, byte[] close_bry) {
|
||||
int tag_bgn = open_bgn - Pfunc_tag.Xtag_len;
|
||||
if (tag_bgn > -1
|
||||
&& Bry_.Eq(Pfunc_tag.Xtag_bgn_lhs, src, tag_bgn, tag_bgn + Pfunc_tag.Xtag_bgn_lhs.length)) // xtn created by tag
|
||||
return Find_xtn_end_tag(ctx, src, src_len, open_end, close_bry, tag_bgn + Pfunc_tag.Xtag_bgn);
|
||||
else { // search rest of String for case-insensitive name; NOTE: used to do CS first, then fall-back on CI; DATE:2013-12-02
|
||||
xtn_end_tag_trie.Clear();
|
||||
xtn_end_tag_trie.Add_obj(close_bry, close_bry);
|
||||
for (int i = open_end; i < src_len; i++) {
|
||||
Object o = xtn_end_tag_trie.Match_bgn(src, i, src_len);
|
||||
if (o != null) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return Bry_.NotFound;
|
||||
}
|
||||
}
|
||||
private int Find_xtn_end_tag(Xop_ctx ctx, byte[] src, int src_len, int open_end, byte[] close_bry, int tag_bgn) {
|
||||
int tag_id = Bry_.Xto_int_or(src, tag_bgn, tag_bgn + 10, -1);
|
||||
if (tag_id == -1) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not extract int: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
|
||||
Bry_bfr tmp = ctx.Wiki().Utl__bfr_mkr().Get_b128();
|
||||
tmp.Add(Pfunc_tag.Xtag_end_lhs).Add_int_pad_bgn(Byte_ascii.Num_0, 10, tag_id).Add(Pfunc_tag.Xtag_rhs);
|
||||
byte[] tag_end = tmp.To_bry_and_rls();
|
||||
int rv = Bry_finder.Find_fwd(src, tag_end, open_end + Pfunc_tag.Xtag_rhs.length);
|
||||
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find end: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
|
||||
rv = Bry_finder.Find_bwd(src, Byte_ascii.Lt, rv - 1);
|
||||
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find <: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
|
||||
return rv;
|
||||
}
|
||||
private int Make_xnde_xtn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, Xop_xnde_tag tag, int open_bgn, int open_end, int name_bgn, int name_end, int atrs_bgn, int atrs_end, Xop_xatr_itm[] atrs, boolean inline, boolean pre2_hack) {
|
||||
// NOTE: find end_tag that exactly matches bgnTag; must be case sensitive;
|
||||
int xnde_end = open_end;
|
||||
Xop_xnde_tkn xnde = null;
|
||||
if (inline) {
|
||||
xnde = Xnde_bgn(ctx, tkn_mkr, root, tag, Xop_xnde_tkn.CloseMode_inline, src, open_bgn, open_end, atrs_bgn, atrs_end, atrs);
|
||||
xnde.Tag_close_rng_(open_end, open_end); // NOTE: inline tag, so set TagClose to open_end; should noop
|
||||
}
|
||||
else {
|
||||
byte[] close_bry = tag.XtnEndTag_tmp(); // get tmp bry (so as not to new)
|
||||
if (tag.Langs() != null) { // cur tag has langs; EX:<section>; DATE:2014-07-18
|
||||
Xop_xnde_tag_lang tag_lang = tag.Langs_get(ctx.Lang().Case_mgr(), ctx.Cur_page().Lang().Lang_id(), src, name_bgn, name_end);
|
||||
if (tag_lang == null) // tag does not match lang; EX:<trecho> and lang=de;
|
||||
return ctx.Lxr_make_txt_(open_end);
|
||||
if (tag_lang != Xop_xnde_tag_lang._) // tag matches; note Xop_xnde_tag_lang._ is a wildcard match; EX:<section>
|
||||
close_bry = tag_lang.XtnEndTag_tmp();
|
||||
}
|
||||
int src_offset = open_bgn - 1; // open bgn to start at <; -2 to ignore </ ; +1 to include <
|
||||
int close_ary_len = close_bry.length;
|
||||
for (int i = 2; i < close_ary_len; i++) // 2 to ignore </
|
||||
close_bry[i] = src[src_offset + i];
|
||||
boolean auto_close = false;
|
||||
int close_bgn = Find_xtn_end_lhs(ctx, tag, src, src_len, open_bgn, open_end, close_bry);
|
||||
if (close_bgn == Bry_.NotFound) auto_close = true; // auto-close if end not found; verified with <poem>, <gallery>, <imagemap>, <hiero>, <references> DATE:2014-08-23
|
||||
int close_end = -1;
|
||||
if (auto_close) {
|
||||
xnde_end = close_bgn = close_end = src_len;
|
||||
}
|
||||
else {
|
||||
close_end = Find_end_tag_pos(src, src_len, close_bgn + close_bry.length);
|
||||
if (close_end == Bry_.NotFound) return ctx.Lxr_make_log_(Xop_xnde_log.Xtn_end_not_found, src, open_bgn, open_end);
|
||||
xnde_end = close_end;
|
||||
}
|
||||
|
||||
if (pre2_hack)
|
||||
return ctx.Lxr_make_txt_(close_end);
|
||||
xnde = New_xnde_pair(ctx, root, tkn_mkr, tag, open_bgn, open_end, close_bgn, close_end);
|
||||
xnde.Atrs_rng_(atrs_bgn, atrs_end);
|
||||
xnde.Atrs_ary_(atrs);
|
||||
if (close_bgn - open_end > 0)
|
||||
xnde.Subs_add(tkn_mkr.Txt(open_end, close_bgn));
|
||||
}
|
||||
switch (ctx.Parse_tid()) {
|
||||
case Xop_parser_.Parse_tid_page_tmpl: {
|
||||
Xox_xnde xnde_xtn = null;
|
||||
switch (tag.Id()) {
|
||||
case Xop_xnde_tag_.Tid_xowa_cmd: xnde_xtn = tkn_mkr.Xnde_xowa_cmd(); break;
|
||||
}
|
||||
if (xnde_xtn != null) {
|
||||
xnde_xtn.Xtn_parse(ctx.Wiki(), ctx, root, src, xnde);
|
||||
xnde.Xnde_xtn_(xnde_xtn);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Xop_parser_.Parse_tid_page_wiki: {
|
||||
Xox_xnde xnde_xtn = null;
|
||||
int tag_id = tag.Id();
|
||||
boolean escaped = false;
|
||||
switch (tag_id) {
|
||||
case Xop_xnde_tag_.Tid_xowa_cmd: xnde_xtn = tkn_mkr.Xnde_xowa_cmd(); break;
|
||||
case Xop_xnde_tag_.Tid_math: xnde_xtn = tkn_mkr.Xnde_math(); break;
|
||||
case Xop_xnde_tag_.Tid_poem: xnde_xtn = tkn_mkr.Xnde_poem(); break;
|
||||
case Xop_xnde_tag_.Tid_ref: xnde_xtn = gplx.xowa.xtns.cite.References_nde.Enabled ? tkn_mkr.Xnde_ref() : null; break;
|
||||
case Xop_xnde_tag_.Tid_references: xnde_xtn = gplx.xowa.xtns.cite.References_nde.Enabled ? tkn_mkr.Xnde_references() : null; break;
|
||||
case Xop_xnde_tag_.Tid_gallery: xnde_xtn = tkn_mkr.Xnde_gallery(); break;
|
||||
case Xop_xnde_tag_.Tid_imageMap: xnde_xtn = tkn_mkr.Xnde_imageMap(); break;
|
||||
case Xop_xnde_tag_.Tid_hiero: xnde_xtn = tkn_mkr.Xnde_hiero(); break;
|
||||
case Xop_xnde_tag_.Tid_inputBox: xnde_xtn = tkn_mkr.Xnde_inputbox(); break;
|
||||
case Xop_xnde_tag_.Tid_dynamicPageList: xnde_xtn = tkn_mkr.Xnde_dynamicPageList(); break;
|
||||
case Xop_xnde_tag_.Tid_pages: {
|
||||
xnde_xtn = tkn_mkr.Xnde_pages();
|
||||
boolean enabled = ctx.Wiki().Xtn_mgr().Xtn_proofread().Enabled();
|
||||
if (!enabled) { // if Page / Index ns does not exist, disable xtn and escape content; DATE:2014-11-28
|
||||
escaped = true;
|
||||
xnde_xtn = null;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Xop_xnde_tag_.Tid_pagequality: xnde_xtn = tkn_mkr.Xnde_pagequality(); break;
|
||||
case Xop_xnde_tag_.Tid_pagelist: xnde_xtn = tkn_mkr.Xnde_pagelist(); break;
|
||||
case Xop_xnde_tag_.Tid_section: xnde_xtn = tkn_mkr.Xnde_section(); break;
|
||||
case Xop_xnde_tag_.Tid_categoryList: xnde_xtn = tkn_mkr.Xnde_categoryList(); break;
|
||||
case Xop_xnde_tag_.Tid_syntaxHighlight: xnde_xtn = tkn_mkr.Xnde_syntaxHighlight(); break;
|
||||
case Xop_xnde_tag_.Tid_score: xnde_xtn = tkn_mkr.Xnde_score(); break;
|
||||
case Xop_xnde_tag_.Tid_translate: xnde_xtn = tkn_mkr.Xnde_translate(); break;
|
||||
case Xop_xnde_tag_.Tid_languages: xnde_xtn = tkn_mkr.Xnde_languages(); break;
|
||||
case Xop_xnde_tag_.Tid_templateData: xnde_xtn = tkn_mkr.Xnde_templateData(); break;
|
||||
case Xop_xnde_tag_.Tid_rss: xnde_xtn = tkn_mkr.Xnde_rss(); break;
|
||||
case Xop_xnde_tag_.Tid_quiz: xnde_xtn = tkn_mkr.Xnde_quiz(); break;
|
||||
case Xop_xnde_tag_.Tid_indicator: xnde_xtn = tkn_mkr.Xnde_indicator(); break;
|
||||
case Xop_xnde_tag_.Tid_xowa_html: xnde_xtn = tkn_mkr.Xnde_xowa_html(); break;
|
||||
case Xop_xnde_tag_.Tid_listing_buy:
|
||||
case Xop_xnde_tag_.Tid_listing_do:
|
||||
case Xop_xnde_tag_.Tid_listing_drink:
|
||||
case Xop_xnde_tag_.Tid_listing_eat:
|
||||
case Xop_xnde_tag_.Tid_listing_listing:
|
||||
case Xop_xnde_tag_.Tid_listing_see:
|
||||
case Xop_xnde_tag_.Tid_listing_sleep: xnde_xtn = tkn_mkr.Xnde_listing(tag_id); break;
|
||||
case Xop_xnde_tag_.Tid_timeline:
|
||||
boolean log_wkr_enabled = Timeline_log_wkr != Xop_log_basic_wkr.Null; if (log_wkr_enabled) Timeline_log_wkr.Log_end_xnde(ctx.Cur_page(), Xop_log_basic_wkr.Tid_timeline, src, xnde);
|
||||
ctx.Cur_page().Html_data().Module_mgr().Itm__timeline().Enabled_y_();
|
||||
break;
|
||||
case Xop_xnde_tag_.Tid_xowa_tag_bgn:
|
||||
case Xop_xnde_tag_.Tid_xowa_tag_end:
|
||||
break;
|
||||
case Xop_xnde_tag_.Tid_source: // added on DATE:2014-06-24
|
||||
case Xop_xnde_tag_.Tid_pre: // NOTE: pre must be an xtn, but does not create an xtn node (it gobbles up everything between); still need to touch the para_wkr; DATE:2014-02-20
|
||||
ctx.Para().Process_block__xnde(tag, Xop_xnde_tag.Block_bgn);
|
||||
if (Bry_finder.Find_fwd(src, Byte_ascii.Nl, xnde.Tag_open_end(), xnde.Tag_close_bgn()) != Bry_finder.Not_found)
|
||||
ctx.Para().Process_nl(ctx, root, src, xnde.Tag_open_bgn(), xnde.Tag_open_bgn());
|
||||
ctx.Para().Process_block__xnde(tag, Xop_xnde_tag.Block_end);
|
||||
break;
|
||||
}
|
||||
if (escaped) {
|
||||
root.Subs_del_after(root.Subs_len() - 1); // since content is escaped, delete xnde_xtn; DATE:2014-09-08
|
||||
return ctx.Lxr_make_txt_(open_end); // return after lhs_end, not entire xnde;
|
||||
}
|
||||
if (xnde_xtn != null) {
|
||||
try {
|
||||
xnde.Xnde_xtn_(xnde_xtn); // NOTE: must set xnde_xtn, else null ref (html_wtr expects non-null nde)
|
||||
xnde_xtn.Xtn_parse(ctx.Wiki(), ctx, root, src, xnde);
|
||||
}
|
||||
catch (Exception e) {
|
||||
String err_msg = String_.Format("failed to render extension: title={0} excerpt={1} err={2}", ctx.Cur_page().Ttl().Full_txt()
|
||||
, Bry_.Mid(src, xnde.Tag_open_end(), xnde.Tag_close_bgn())
|
||||
, Err_.Message_gplx_brief(e));
|
||||
if (Env_.Mode_testing())
|
||||
throw Exc_.new_exc(e, "xo", err_msg);
|
||||
else
|
||||
ctx.Wiki().Appe().Usr_dlg().Warn_many("", "", err_msg);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return xnde_end;
|
||||
} private Btrie_slim_mgr xtn_end_tag_trie = Btrie_slim_mgr.ci_ascii_(); // NOTE:ci.ascii:MW_const.en; listed XML node names are en
|
||||
private Xop_xnde_tkn New_xnde_pair(Xop_ctx ctx, Xop_root_tkn root, Xop_tkn_mkr tkn_mkr, Xop_xnde_tag tag, int open_bgn, int open_end, int close_bgn, int close_end) {
|
||||
Xop_xnde_tkn rv = tkn_mkr.Xnde(open_bgn, close_end).Tag_(tag).Tag_open_rng_(open_bgn, open_end).Tag_close_rng_(close_bgn, close_end).CloseMode_(Xop_xnde_tkn.CloseMode_pair);
|
||||
int name_bgn = open_bgn + 1;
|
||||
rv.Name_rng_(name_bgn, name_bgn + tag.Name_len());
|
||||
ctx.Subs_add(root, rv);
|
||||
return rv;
|
||||
}
|
||||
private static final byte[]
|
||||
Bry_escape_lt_slash = Bry_.new_a7("</")
|
||||
;
|
||||
public static int Find_gt_pos(Xop_ctx ctx, byte[] src, int cur_pos, int src_len) { // UNUSED
|
||||
int gt_pos = -1; // find closing >
|
||||
for (int i = cur_pos; i < src_len; i++) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Lt: // < encountered; may be inner node inside tag which is legal in wikitext; EX: "<ul style=<nowiki>#</nowiki>FFFFFF>"
|
||||
int valid_inner_xnde_gt = ctx.App().Xatr_parser().Xnde_find_gt_find(src, i + 1, src_len);
|
||||
if (valid_inner_xnde_gt != String_.Find_none) {
|
||||
i = valid_inner_xnde_gt;
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Gt:
|
||||
gt_pos = i;
|
||||
i = src_len;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return gt_pos;
|
||||
}
|
||||
public static Xop_log_basic_wkr Timeline_log_wkr = Xop_log_basic_wkr.Null;
|
||||
}
|
||||
class Xop_xnde_wkr_ {
|
||||
public static void AutoClose_handle_dangling_nde_in_caption(Xop_root_tkn root, Xop_tkn_itm owner) {
|
||||
int subs_bgn = -1, subs_len = owner.Subs_len();
|
||||
for (int i = 0; i < subs_len; i++) {
|
||||
Xop_tkn_itm sub_itm = owner.Subs_get(i);
|
||||
if (sub_itm.Tkn_tid() == Xop_tkn_itm_.Tid_pipe) { // tkn is "|"; assume that caption should end here
|
||||
subs_bgn = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (subs_bgn != -1)
|
||||
root.Subs_move(owner, subs_bgn, subs_len); // move everything after "|" back to root
|
||||
}
|
||||
}
|
||||
/*
|
||||
NOTE_1: special logic for <*include*>
|
||||
cannot process like regular xnde tag b/c cannot auto-close tags on tmpl
|
||||
EX: <includeonly>{{subst:</includeonly><includeonly>substcheck}}</includeonly>
|
||||
1st </io> would autoclose {{subst:
|
||||
Since the basic intent is to "hide" the tags in certain modes, then basically create ignore_tkn and exit
|
||||
*/
|
||||
158
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr__basic_tst.java
Normal file
158
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr__basic_tst.java
Normal file
@@ -0,0 +1,158 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xnde_wkr__basic_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Escape_lt() { // PURPOSE: some templates have unknown tags; PAGE:en.w:PHP
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str("a<code><?</code>b", String_.Concat_lines_nl_skip_last
|
||||
( "<p>a<code><?</code>b"
|
||||
, "</p>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Inline() {
|
||||
fxt.Test_parse_page_wiki("<ref/>" , fxt.tkn_xnde_(0, 6).CloseMode_(Xop_xnde_tkn.CloseMode_inline).Name_rng_(1, 4));
|
||||
}
|
||||
@Test public void Pair() {
|
||||
fxt.Test_parse_page_wiki("<div></div>", fxt.tkn_xnde_(0, 11).CloseMode_(Xop_xnde_tkn.CloseMode_pair).Name_rng_(1, 4));
|
||||
}
|
||||
@Test public void Pair_text() {
|
||||
fxt.Test_parse_page_wiki("<div>b</div>", fxt.tkn_xnde_(0, 12).Subs_(fxt.tkn_txt_(5, 6)));
|
||||
}
|
||||
@Test public void Deep1_pair1() {
|
||||
fxt.Test_parse_page_wiki("<div><div></div></div>", fxt.tkn_xnde_(0, 22).Name_rng_(1, 4)
|
||||
.Subs_(fxt.tkn_xnde_(5, 16).Name_rng_(6, 9)));
|
||||
}
|
||||
@Test public void Deep1_inline1() {
|
||||
fxt.Test_parse_page_wiki("<div><ref/></div>", fxt.tkn_xnde_(0, 17).Name_rng_(1, 4)
|
||||
.Subs_(fxt.tkn_xnde_(5, 11).Name_rng_(6, 9)) );
|
||||
}
|
||||
@Test public void Deep1_pair2() {
|
||||
fxt.Test_parse_page_wiki("<div><div></div><div></div></div>", fxt.tkn_xnde_(0, 33).Name_rng_(1, 4)
|
||||
.Subs_
|
||||
( fxt.tkn_xnde_( 5, 16).Name_rng_( 6, 9)
|
||||
, fxt.tkn_xnde_(16, 27).Name_rng_(17, 20)
|
||||
));
|
||||
}
|
||||
@Test public void Deep2_pair1() {
|
||||
fxt.Test_parse_page_wiki("<div><div><div></div></div></div>", fxt.tkn_xnde_(0, 33).Name_rng_(1, 4)
|
||||
.Subs_
|
||||
( fxt.tkn_xnde_( 5, 27).Name_rng_( 6, 9)
|
||||
.Subs_
|
||||
( fxt.tkn_xnde_(10, 21).Name_rng_(11, 14))
|
||||
));
|
||||
}
|
||||
@Test public void Slash() {// b/c mw allows unquoted attributes
|
||||
fxt.Test_parse_page_wiki("<ref / >a</ref>", fxt.tkn_xnde_(0, 15).Atrs_rng_(5, 7).Subs_(fxt.tkn_txt_(8, 9)));
|
||||
fxt.Test_parse_page_wiki("<ref name=a/b/>", fxt.tkn_xnde_(0, 15).Atrs_rng_(5, 13));
|
||||
}
|
||||
@Test public void Escaped() {
|
||||
fxt.Init_log_(Xop_xnde_log.Escaped_xnde).Test_parse_page_wiki("<div></span></div>", fxt.tkn_xnde_(0, 18).Subs_(fxt.tkn_bry_(5, 12)));// TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
|
||||
}
|
||||
@Test public void Xtn() {
|
||||
fxt.Test_parse_page_wiki("<math><div></math>", fxt.tkn_xnde_(0, 18).Subs_(fxt.tkn_txt_(6, 11))); // NOTE: no dangling nde b/c .Xtn skips
|
||||
}
|
||||
@Test public void Xtn_ref() {
|
||||
fxt.Test_parse_page_wiki("<ref name=\"a\">b</ref>", fxt.tkn_xnde_(0, 21).Name_rng_(1, 4).Atrs_rng_(5, 13).Subs_(fxt.tkn_txt_(14, 15)));
|
||||
}
|
||||
@Test public void Lnki() {
|
||||
fxt.Test_parse_page_wiki("[[Image:a|b<br/>d]]"
|
||||
, fxt.tkn_lnki_().Ns_id_(Xow_ns_.Id_file).Trg_tkn_(fxt.tkn_arg_nde_().Val_tkn_(fxt.tkn_arg_itm_(fxt.tkn_txt_(2, 7), fxt.tkn_colon_(7), fxt.tkn_txt_(8, 9))))
|
||||
.Caption_tkn_(fxt.tkn_arg_nde_(10, 17).Val_tkn_(fxt.tkn_arg_itm_(fxt.tkn_txt_(10, 11), fxt.tkn_xnde_(11, 16), fxt.tkn_txt_(16, 17))))
|
||||
);
|
||||
}
|
||||
@Test public void Br_converted_to_reguar_br() {
|
||||
fxt.Test_parse_page_wiki("</br>a" , fxt.tkn_xnde_(0, 5), fxt.tkn_txt_(5, 6));
|
||||
fxt.Test_parse_page_wiki("<br/>a" , fxt.tkn_xnde_(0, 5), fxt.tkn_txt_(5, 6));
|
||||
fxt.Test_parse_page_wiki("</br/>a" , fxt.tkn_xnde_(0, 6), fxt.tkn_txt_(6, 7));
|
||||
}
|
||||
@Test public void CaseSensitivity() {
|
||||
fxt.Test_parse_page_wiki("<DiV></dIv>", fxt.tkn_xnde_(0, 11).CloseMode_(Xop_xnde_tkn.CloseMode_pair).Name_rng_(1, 4));
|
||||
}
|
||||
@Test public void CaseSensitivity_xtn_1() {
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "<Inputbox>a</Inputbox>b<inputbox>c</inputbox>"
|
||||
, "b"
|
||||
);
|
||||
}
|
||||
@Test public void CaseSensitivity_xtn_2() { // PURPOSE: xtn end_tag may not match bgn_tag; EX: w:Ehrenfest_paradox; <References></references>
|
||||
fxt.Test_parse_page_all_str("a<ref name=b /><References><ref name=b>c</ref></references>", String_.Concat_lines_nl
|
||||
( "a<sup id=\"cite_ref-b_0-0\" class=\"reference\"><a href=\"#cite_note-b-0\">[1]</a></sup><ol class=\"references\">"
|
||||
, "<li id=\"cite_note-b-0\"><span class=\"mw-cite-backlink\"><a href=\"#cite_ref-b_0-0\">^</a></span> <span class=\"reference-text\">c</span></li>"
|
||||
, "</ol>"
|
||||
));
|
||||
}
|
||||
@Test public void CaseSensitivity_xtn_3() {// PURPOSE: xtn xnde must do case-insensitive match DATE:2013-12-02
|
||||
fxt.Test_parse_page_all_str
|
||||
( "<matH>a</math> b <math>c</matH>" // <matH> should match </math> not </matH>
|
||||
, "<span id='xowa_math_txt_0'>a</span> b <span id='xowa_math_txt_0'>c</span>"
|
||||
);
|
||||
}
|
||||
@Test public void Whitelist() {
|
||||
fxt.Test_parse_page_all_str("<span onload='alert()'></span>", "<span></span>");
|
||||
}
|
||||
@Test public void Whitelist_pre() { // PURPOSE: <pre style="overflow:auto">a</pre> somehow becoming <prestyle="overflow:auto">a</pre>; Template:Infobox_country; ISSUE: old xatr code being triggered; PURPOSE:(2) style being stripped when it shouldn't be
|
||||
fxt.Test_parse_page_all_str("<pre style=\"overflow:auto\">a</pre>", "<pre style=\"overflow:auto\">a</pre>");
|
||||
}
|
||||
@Test public void Whitelist_style() {
|
||||
fxt.Test_parse_page_all_str("<div style=\"url(bad)\"></div>", "<div></div>");
|
||||
}
|
||||
@Test public void Script() { // PURPOSE: nested script should (a) write attributes; (b) write close tag; DATE:2014-01-24
|
||||
fxt.Test_parse_page_all_str("<code><script src='a'>b</script></code>", "<code><script src='a'>b</script></code>");
|
||||
}
|
||||
@Test public void Script_in_syntaxhighlight() {
|
||||
fxt.Test_parse_page_all_str("<syntaxhighlight><script>alert('fail');</script></syntaxhighlight>", "<pre style=\"overflow:auto;\"><script>alert('fail');</script></pre>");
|
||||
}
|
||||
@Test public void Script_in_math() {
|
||||
fxt.App().File_mgr().Math_mgr().Renderer_is_mathjax_(false);
|
||||
fxt.Test_parse_page_all_str("<math><script>alert('fail');</script></math>", "<img id='xowa_math_img_0' src='' width='' height=''/><span id='xowa_math_txt_0'><script>alert('fail');</script></span>");
|
||||
fxt.App().File_mgr().Math_mgr().Renderer_is_mathjax_(true);
|
||||
}
|
||||
@Test public void Html5_time() {// PURPOSE: HTML5; should output self (i.e.: must be whitelisted)
|
||||
fxt.Test_parse_page_wiki_str("<time class=\"dtstart\" datetime=\"2010-10-10\">10 October 2010</time>", "<time class=\"dtstart\" datetime=\"2010-10-10\">10 October 2010</time>");
|
||||
}
|
||||
@Test public void Html5_bdi() {// PURPOSE: HTML5; should output self (i.e.: must be whitelisted); DATE:2013-12-07
|
||||
fxt.Test_parse_page_wiki_str("<bdi lang=\"en\">a</bdi>", "<bdi lang=\"en\">a</bdi>");
|
||||
}
|
||||
@Test public void Html5_mark() {// PURPOSE: HTML5; should output self (i.e.: must be whitelisted); DATE:2014-01-03
|
||||
fxt.Test_parse_page_wiki_str("<mark lang=\"en\">a</mark>", "<mark lang=\"en\">a</mark>");
|
||||
}
|
||||
@Test public void Html5_mark_span() {// PURPOSE: </span> should close <mark> tag; EX: zh.wikipedia.org/wiki/异体字; DATE:2014-01-03
|
||||
fxt.Test_parse_page_wiki_str("<mark>a</span>", "<mark>a</mark>");
|
||||
}
|
||||
@Test public void Html5_wbr() {// PURPOSE: HTML5; should output self (i.e.: must be whitelisted); DATE:2014-01-03
|
||||
fxt.Test_parse_page_wiki_str("a<wbr>b<wbr>c", "a<wbr></wbr>b<wbr></wbr>c");
|
||||
}
|
||||
@Test public void Html5_bdo() {// PURPOSE: HTML5; should output self (i.e.: must be whitelisted); DATE:2014-01-03
|
||||
fxt.Test_parse_page_wiki_str("<bdo>a</bdo>", "<bdo>a</bdo>");
|
||||
}
|
||||
@Test public void Pre_always_parsed() { // PURPOSE: pre should not interpret templates; DATE:2014-04-10
|
||||
fxt.Init_defn_clear();
|
||||
fxt.Init_defn_add("a", "a");
|
||||
fxt.Init_defn_add("test", "<pre>{{a}}</pre>");
|
||||
fxt.Test_parse_page_all_str("{{test}}", "<pre>{{a}}</pre>");
|
||||
fxt.Init_defn_clear();
|
||||
}
|
||||
@Test public void Quote() {// PURPOSE: handle <q> element; DATE:2015-05-29
|
||||
fxt.Test_parse_page_wiki_str("<q>a</q>", "<q>a</q>");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xnde_wkr__blockquote_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Pre() { // PURPOSE: preserve leading spaces within blockquote; PAGE:en.w:Tenerife_airport_disaster
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<blockquote>"
|
||||
, " a"
|
||||
, "</blockquote>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<blockquote>"
|
||||
, " a"
|
||||
, "</blockquote>"
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Trailing_nls() { // PURPOSE: para/pre not working after blockquote; PAGE:en.w:Snappy_(software); DATE:2014-04-25
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<blockquote>a"
|
||||
, "</blockquote>"
|
||||
, ""
|
||||
, "b"
|
||||
, ""
|
||||
, " c"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<blockquote>a"
|
||||
, "</blockquote>"
|
||||
, ""
|
||||
, "<p>b"
|
||||
, "</p>"
|
||||
, ""
|
||||
, "<pre>c"
|
||||
, "</pre>"
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Dangling_multiple() { // PURPOSE: handle multiple dangling; PAGE:en.w:Ring_a_Ring_o'_Roses DATE:2014-06-26
|
||||
fxt.Test_parse_page_wiki_str("<blockquote>a<blockquote>b", "<blockquote>a</blockquote><blockquote>b</blockquote>");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,198 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*; import gplx.xowa.parsers.lists.*;
|
||||
public class Xop_xnde_wkr__err_dangling_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Basic() {
|
||||
fxt.Init_log_(Xop_xnde_log.Dangling_xnde)
|
||||
.Test_parse_page_wiki("<div>", fxt.tkn_xnde_(0, 5));
|
||||
}
|
||||
@Test public void Many() {
|
||||
fxt.Init_log_(Xop_xnde_log.Dangling_xnde, Xop_xnde_log.Dangling_xnde, Xop_xnde_log.Dangling_xnde)
|
||||
.Test_parse_page_wiki("<div><div><div>", fxt.tkn_xnde_(0, 15).Subs_(fxt.tkn_xnde_(5, 15).Subs_(fxt.tkn_xnde_(10, 15))));
|
||||
}
|
||||
@Test public void Nested() {
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "<div><div><center>a</div></div>"
|
||||
, "<div><div><center>a</center></div></div>"
|
||||
);
|
||||
}
|
||||
@Test public void Center() {
|
||||
fxt.Init_log_(Xop_xnde_log.Dangling_xnde).Test_parse_page_wiki("a<center>b"
|
||||
, fxt.tkn_txt_(0, 1)
|
||||
, fxt.tkn_xnde_(1, 10).CloseMode_(Xop_xnde_tkn.CloseMode_open).Subs_(fxt.tkn_txt_(9, 10))
|
||||
);
|
||||
}
|
||||
@Test public void P() {
|
||||
fxt.Init_log_(Xop_xnde_log.Auto_closing_section).Test_parse_page_wiki("a<p>b<p>c</p>"
|
||||
, fxt.tkn_txt_ (0, 1)
|
||||
, fxt.tkn_xnde_ (1, 4).Subs_(fxt.tkn_txt_(4, 5))
|
||||
, fxt.tkn_xnde_ (5, 13).Subs_(fxt.tkn_txt_(8, 9))
|
||||
);
|
||||
}
|
||||
@Test public void Alternating() { // PURPOSE: confirmation test for alternating dangling nodes; PAGE:en.w:Portal:Pornography/Selected_historical_image/Archive; DATE:2014-09-24
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "c<b><i>d<b><i>e"
|
||||
, "c<b><i>d<b><i>e</i></b></i></b>"
|
||||
);
|
||||
}
|
||||
@Test public void Li() { // PURPOSE: auto-close <li>; NOTE: no longer encloses in <ul/>; DATE:2014-06-26
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "<li>a<li>b"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<li>a</li>"
|
||||
, "<li>b</li>"
|
||||
));
|
||||
}
|
||||
@Test public void Br() {
|
||||
fxt.Test_parse_page_wiki("<br>a" , fxt.tkn_xnde_(0, 4), fxt.tkn_txt_(4, 5));
|
||||
fxt.Test_parse_page_wiki("a<br name=b>c", fxt.tkn_txt_(0, 1), fxt.tkn_xnde_(1, 12), fxt.tkn_txt_(12, 13));
|
||||
}
|
||||
@Test public void Td_and_td() { // PURPOSE: when "<td>a<td>", 2nd <td> should auto-close
|
||||
fxt.Test_parse_page_wiki("<table><tr><td>a<td></tr><tr><td>b</td></tr></table>"
|
||||
, fxt.tkn_tblw_tb_(0, 52).Subs_
|
||||
( fxt.tkn_tblw_tr_(7, 25).Subs_
|
||||
( fxt.tkn_tblw_td_(11, 16).Subs_(fxt.tkn_txt_(15, 16)) // FUTURE: change to 11,20
|
||||
, fxt.tkn_tblw_td_(16, 25) // FUTURE: change this to 16, 20
|
||||
)
|
||||
, fxt.tkn_tblw_tr_(25, 44).Subs_
|
||||
( fxt.tkn_tblw_td_(29, 39).Subs_(fxt.tkn_txt_(33, 34))
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Tblw_and_tr() {// PURPOSE: <tr> should auto-close |-; EX:fr.wikipedia.org/wiki/Napoléon_Ier; DATE:2013-12-09
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "<td>row1</td>"
|
||||
, "<tr><td>row2</td>"
|
||||
, "|}"
|
||||
)
|
||||
, String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>row1"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " <tr>"
|
||||
, " <td>row2"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Tblx_and_b() {
|
||||
fxt.Init_log_(Xop_xnde_log.Dangling_xnde).Test_parse_page_wiki("<table><tr><td><b>a<td></tr></table>"
|
||||
, fxt.tkn_tblw_tb_(0, 36).Subs_
|
||||
( fxt.tkn_tblw_tr_(7, 28).Subs_
|
||||
( fxt.tkn_tblw_td_(11, 19).Subs_ // FUTURE: change to 11,23
|
||||
( fxt.tkn_xnde_(15, 36).Subs_(fxt.tkn_txt_(18, 19)) // FUTURE: should be 19, but xnde.Close() is passing in src_len
|
||||
)
|
||||
, fxt.tkn_tblw_td_(19, 28) // FUTURE: should be 23
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Tblx_and_li() { // PURPOSE: </td> should close list; see Stamp Act 1765
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table><tr><td>"
|
||||
, "*abc</td></tr><tr><td>bcd</td></tr>"
|
||||
, "</table>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, ""
|
||||
, " <ul>"
|
||||
, " <li>abc"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " <tr>"
|
||||
, " <td>bcd"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Tblx_and_small() { // PURPOSE: </td> should close <small> correctly; see Stamp Act 1765
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table><tr><td>"
|
||||
, "<small>abc</td></tr><tr><td>bcd</td></tr>"
|
||||
, "</table>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, "<small>abc</small>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " <tr>"
|
||||
, " <td>bcd"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Blockquote_and_p() {
|
||||
fxt.Init_log_(Xop_xnde_log.Auto_closing_section).Test_parse_page_wiki("<blockquote>a<p>b</blockquote>"
|
||||
, fxt.tkn_xnde_(0, 30).Subs_
|
||||
( fxt.tkn_txt_(12, 13)
|
||||
, fxt.tkn_xnde_(13, 17).Subs_(fxt.tkn_txt_(16, 17))
|
||||
));
|
||||
}
|
||||
@Test public void List_and_b() {
|
||||
fxt.Init_log_(Xop_xnde_log.Dangling_xnde).Test_parse_page_wiki("*<b>a\n*"
|
||||
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ul).List_path_(0)
|
||||
, fxt.tkn_xnde_(1, 7).Subs_(fxt.tkn_txt_(4, 5))
|
||||
, fxt.tkn_list_end_(5).List_path_(0)
|
||||
, fxt.tkn_list_bgn_(5, 7, Xop_list_tkn_.List_itmTyp_ul).List_path_(1)
|
||||
, fxt.tkn_list_end_(7).List_path_(1)
|
||||
);
|
||||
}
|
||||
@Test public void Underline() { // PURPOSE: 2nd <u> should auto-close; PAGE:en.b:Textbook_of_Psychiatry/Alcoholism_and_Psychoactive_Substance_Use_Disorders DATE:2014-09-05
|
||||
fxt.Test_html_full_str("a<u>b<u>c", "a<u>b</u>c");
|
||||
}
|
||||
@Test public void Xtn_template() { // PURPOSE: dangling xtns within templates should be auto-closed inside template, not in calling page; PAGE:en.w:Provinces_and_territories_of_Canada DATE:2014-11-13
|
||||
fxt.Init_page_create("Template:A", "<poem>A");
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{{A}}"
|
||||
, " b" // poem should not extend to " b"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<div class=\"poem\">"
|
||||
, "<p>"
|
||||
, "A"
|
||||
, "</p>"
|
||||
, "</div>" // poem ends here
|
||||
, " b"
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xnde_wkr__err_malformed_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Lt_only() {
|
||||
fxt.Test_parse_page_wiki("<", fxt.tkn_txt_(0, 1));
|
||||
}
|
||||
@Test public void Eos_while_closing_tag() {
|
||||
fxt.Init_log_(Xop_xnde_log.Eos_while_closing_tag).Test_parse_page_wiki("<ref [[a]]", fxt.tkn_txt_(0, 4), fxt.tkn_space_(4, 5), fxt.tkn_lnki_(5, 10));
|
||||
}
|
||||
@Test public void End_tag_broken() { // chk that name_bgn is less than src_len else arrayIndex error; EX: <ref><p></p<<ref/>; DATE:2014-01-18
|
||||
fxt.Wiki().Xtn_mgr().Init_by_wiki(fxt.Wiki());
|
||||
fxt.Test_parse_page_all_str("<poem><p></p<</poem>", String_.Concat_lines_nl_skip_last
|
||||
( "<div class=\"poem\">"
|
||||
, "<p>" // NOTE: technically MW / WP does not add this <p>; however, easier to hardcode <p>; no "visual" effect; DATE:2014-04-27
|
||||
, "<p></p<</p>"
|
||||
, "</p>"
|
||||
, "</div>"
|
||||
));
|
||||
}
|
||||
@Test public void Incomplete_tag_div() { // PURPOSE: handle broken tags; EX: <div a </div> -> <div a; DATE:2014-02-03
|
||||
fxt.Test_parse_page_all_str("<div a </div>", "<div a </div>"); // note that "<div a " is escaped (not considered xnde; while "</div>" is literally printed; // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
|
||||
}
|
||||
@Test public void Incomplete_tag_ref() {// PURPOSE: invalid tag shouldn't break parser; EX:w:Cullen_(surname); "http://www.surnamedb.com/Surname/Cullen<ref"
|
||||
fxt.Test_parse_page_all_str("a<ref", "a<ref");
|
||||
}
|
||||
@Test public void Inline_tag_fix() { // PURPOSE: force <b/> to be <b></b>; EX: w:Exchange_value
|
||||
fxt.Init_log_(Xop_xnde_log.No_inline);
|
||||
fxt.Test_parse_page_all_str("<b/>", "<b></b>");
|
||||
}
|
||||
@Test public void Tblw() { // PURPOSE.fix: don't auto-close past tblw PAGE:ro.b:Pagina_principala DATE:2014-06-26
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "<div>"
|
||||
, "{|" // this should stop xnde search
|
||||
, "<center>"
|
||||
, "</div>" // this should not find <div> as its bgn_tag; note that it will "drop out" below
|
||||
, "|}"
|
||||
, "</div>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<div>"
|
||||
, "<table><center></div>" // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</center>"
|
||||
, "</table>"
|
||||
, "</div>"
|
||||
));
|
||||
}
|
||||
@Test public void Incomplete_tag() { // PURPOSE: handle incomplete tag sequences; DATE:2014-10-22
|
||||
fxt.Test_parse_page_all_str("<", "<");
|
||||
fxt.Test_parse_page_all_str("</", "</");
|
||||
fxt.Test_parse_page_all_str("</<", "</<"); // this used to fail
|
||||
}
|
||||
}
|
||||
190
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr__err_misc_tst.java
Normal file
190
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr__err_misc_tst.java
Normal file
@@ -0,0 +1,190 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xnde_wkr__err_misc_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Error_br_removed() {
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <th><span>a</span><br/><span>b</span>"
|
||||
, " </th>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <th><span>a</span><br/><span>b</span>"
|
||||
, " </th>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Div_should_not_pop_past_td() { // PURPOSE: extra </div> should not close <div> that is outside of <td>; PAGE:en.w:Rome en.w:Ankara
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, "<tr>"
|
||||
, "<td>"
|
||||
, "<div>" // this is <div> #1
|
||||
, "<table>"
|
||||
, "<tr>"
|
||||
, "<td>"
|
||||
, "<div>" // this is <div> #2
|
||||
, "<div>"
|
||||
, "a"
|
||||
, "</div>"
|
||||
, "</td>"
|
||||
, "<td>"
|
||||
, "<div>"
|
||||
, "b"
|
||||
, "</div>"
|
||||
, "</div>" // this </div> was supposed to pop <div> #2, but can't (b/c of HTML rules); however, do not try to pop <div> #1;
|
||||
, "</td>"
|
||||
, "<td>"
|
||||
, "<div>"
|
||||
, "c"
|
||||
, "</div>"
|
||||
, "</td>"
|
||||
, "</tr>"
|
||||
, "</table>"
|
||||
, "</div>"
|
||||
, "</td>"
|
||||
, "</tr>"
|
||||
, "</table>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, "<div>"
|
||||
, " <table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, "<div>"
|
||||
, "<div>"
|
||||
, ""
|
||||
, "<p>a"
|
||||
, "</p>"
|
||||
, "</div>"
|
||||
, "</div>"
|
||||
, " </td>"
|
||||
, " <td>"
|
||||
, "<div>"
|
||||
, ""
|
||||
, "<p>b"
|
||||
, "</p>"
|
||||
, "</div>"
|
||||
, "</div>" // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
|
||||
, " </td>"
|
||||
, " <td>"
|
||||
, "<div>"
|
||||
, ""
|
||||
, "<p>c"
|
||||
, "</p>"
|
||||
, "</div>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
, "</div>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Xnde_pops() { // PURPOSE: somehow xnde pops upper nde; PAGE:en.w:Greek government debt crisis; "History of government debt"
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<i>"
|
||||
, "{|"
|
||||
, "|-"
|
||||
, "|<i>a</i>"
|
||||
, "|}"
|
||||
, "</i>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<i>"
|
||||
, "<table>"
|
||||
, " <tr>"
|
||||
, " <td><i>a</i>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, "</i>"
|
||||
));
|
||||
}
|
||||
@Test public void Err_inline_extension() {
|
||||
fxt.Test_parse_page_all_str
|
||||
( "<poem/>"
|
||||
, ""
|
||||
);
|
||||
}
|
||||
@Test public void Xnde_para() { // PURPOSE: buggy code caused </p> to close everything; keeping test b/c of <p> logic
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, "<tr>"
|
||||
, "<td>"
|
||||
, "<div>"
|
||||
, "<p>"
|
||||
, "<span>"
|
||||
, "</span>"
|
||||
, "</p>"
|
||||
, "</div>"
|
||||
, "</td>"
|
||||
, "</tr>"
|
||||
, "</table>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, "<div>"
|
||||
, "<p>"
|
||||
, "<span>"
|
||||
, "</span>"
|
||||
, "</p>"
|
||||
, "</div>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Sup_bug() { // PURPOSE: occurred at ref of UK; a {{cite web|url=http://www.abc.gov/{{dead link|date=December 2011}}|title=UK}} b
|
||||
fxt.Test_parse_page_wiki_str("x <b><sup>y</b> z", "x <b><sup>y</sup></b> z");
|
||||
}
|
||||
@Test public void Br_backslash() { // PURPOSE: allow <br\>; EX:w:Mosquito; [[Acalyptratae|A<br\>c<br\>a<br\>l<br\>y<br\>p<br\>t<br\>r<br\>a<br\>t<br\>a<br\>e]]
|
||||
fxt.Test_parse_page_all_str("<br\\>", "<br/>");
|
||||
}
|
||||
@Test public void Tt_does_not_repeat() { // PURPOSE: handle <tt>a<tt>; EX:w:Domain name registry
|
||||
fxt.Test_parse_page_all_str("<tt>a<tt>", "<tt>a</tt>");
|
||||
}
|
||||
@Test public void Loose_xnde_names() { // PURPOSE: MW allows <font-> and other variations; EX:w:2012_in_film
|
||||
fxt.Test_parse_page_all_str("<font-size='100%'>a</font>", "<font>a</font>");
|
||||
}
|
||||
@Test public void Anchor_nested() {
|
||||
fxt.Test_parse_page_all_str("b<a>c<a>d [[e]] f", "b<a>c<a>d <a href=\"/wiki/E\">e</a> f");
|
||||
}
|
||||
@Test public void Img_should_not_be_xtn() { // PURPOSE:<img> marked as .xtn; unclosed <img> was escaping rest of text; PAGE:de.w:Wikipedia:Technik/Archiv/2014 DATE:2014-11-06
|
||||
fxt.Test_parse_page_all_str("<img>''a''", "<img><i>a</i>");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xnde_wkr__include_basic_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@Before public void init() {fxt.Reset();}
|
||||
@Test public void Tmpl_includeonly() {fxt.Test_parse_tmpl_str_test("a<includeonly>b</includeonly>c" , "{{test}}", "abc");}
|
||||
@Test public void Tmpl_noinclude() {fxt.Test_parse_tmpl_str_test("a<noinclude>b</noinclude>c" , "{{test}}", "ac");}
|
||||
@Test public void Tmpl_onlyinclude() {fxt.Test_parse_tmpl_str_test("a<onlyinclude>b</onlyinclude>c" , "{{test}}", "b");}
|
||||
@Test public void Tmpl_onlyinclude_nest() {fxt.Test_parse_tmpl_str_test("{{#ifeq:y|y|a<onlyinclude>b</onlyinclude>c|n}}" , "{{test}}", "b");} // PURPOSE: check that onlyinclude handles (a) inside {{#if}} function (old engine did not); and (b) that abc are correctly added together
|
||||
@Test public void Tmpl_onlyinclude_page() {// PURPOSE: handle scenario similar to {{FA Number}} where # of articles is buried in page between onlyinclude tags; added noinclude as additional stress test
|
||||
fxt.Init_page_create("Transclude_1", "<noinclude>a<onlyinclude>b</onlyinclude>c</noinclude>d");
|
||||
fxt.Test_parse_tmpl_str_test("{{:Transclude_1}}" , "{{test}}", "b");
|
||||
}
|
||||
@Test public void Tmpl_onlyinclude_page2() { // PURPOSE: handle scenario similar to PS3 wherein onlyinclude was being skipped (somewhat correctly) but following text (<pre>) was also included
|
||||
fxt.Init_page_create("Transclude_2", "a<onlyinclude>b<includeonly>c</includeonly>d</onlyinclude>e<pre>f</pre>g");
|
||||
fxt.Test_parse_tmpl_str_test("{{:Transclude_2}}" , "{{test}}", "bcd");
|
||||
}
|
||||
@Test public void Tmpl_noinclude_unmatched() { // PURPOSE.fix: ignore unmatched </noinclude>; EX:fi.w:Sergio_Leone; DATE:2014-05-02
|
||||
fxt.Test_parse_tmpl_str_test("{{{1|</noinclude>}}}", "{{test|a}}", "a"); // was "{{{test|"
|
||||
}
|
||||
|
||||
@Test public void Wiki_includeonly() {fxt.Test_parse_page_all_str("a<includeonly>b</includeonly>c" , "ac");}
|
||||
@Test public void Wiki_noinclude() {fxt.Test_parse_page_all_str("a<noinclude>b</noinclude>c" , "abc");}
|
||||
@Test public void Wiki_onlyinclude() {fxt.Test_parse_page_all_str("a<onlyinclude>b</onlyinclude>c" , "abc");}
|
||||
@Test public void Wiki_oi_io() {fxt.Test_parse_page_all_str("a<onlyinclude>b<includeonly>c</includeonly>d</onlyinclude>e" , "abde");}
|
||||
@Test public void Wiki_oi_io_tblw() {
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "<onlyinclude>"
|
||||
, "{|"
|
||||
, "|-"
|
||||
, "|a<includeonly>"
|
||||
, "|}</includeonly></onlyinclude>"
|
||||
, "|-"
|
||||
, "|b"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " <tr>"
|
||||
, " <td>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
}
|
||||
/*
|
||||
<includeonly>-({{{1}}}={{{1}}}round-5)-({{{1}}}={{{1}}}round-4)-({{{1}}}={{{1}}}round-3)-({{{1}}}={{{1}}}round-2)-({{{1}}}={{{1}}}round-1)</includeonly><noinclude>
|
||||
{{pp-template}}Called by {{lt|precision/0}}</noinclude>
|
||||
|
||||
==includeonly -- aka: do not eval in template ==
|
||||
main: a<includeonly>b</includeonly>c<br/>
|
||||
tmpl: {{mwo_include_only|a|b|c}}
|
||||
|
||||
==noinclude -- aka: eval in template only==
|
||||
main: a<noinclude>b</noinclude>c<br/>
|
||||
tmpl: {{mwo_no_include|a|b|c}}
|
||||
|
||||
==onlyinclude -- aka: only include in template only (ignore everything else) ==
|
||||
main: a<onlyinclude>b</onlyinclude>c<br/>
|
||||
tmpl: {{mwo_only_include|a|b|c}}
|
||||
*/
|
||||
@@ -0,0 +1,194 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xnde_wkr__include_uncommon_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@Before public void init() {fxt.Reset();}
|
||||
@Test public void Ex_Tmpl_io_oi() { // PURPOSE: <includeonly> not parsing internals; PAGE:en.w:[[Template:MONTHNAME]]
|
||||
fxt.Test_parse_tmpl_str_test("<includeonly>{{#if:{{{1}}}|a|b}}</includeonly><noinclude>c</noinclude>", "{{test|1}}", "a");
|
||||
}
|
||||
@Test public void Ex_Tmpl_io_subst() { // PURPOSE: <includeonly> and @gplx.Internal protected subst; PAGE:en.w:[[Template:Dubious]]
|
||||
fxt.Init_defn_clear();
|
||||
fxt.Init_defn_add("mwo_print", "{{{1}}}");
|
||||
fxt.Init_defn_add("substcheck", "SUBST");
|
||||
fxt.Test_parse_tmpl_str_test(String_.Concat_lines_nl_skip_last
|
||||
( "{{mwo_print"
|
||||
, "|<includeonly>{{subst:</includeonly><includeonly>substcheck}}</includeonly>"
|
||||
, "}}"
|
||||
), "{{test}}"
|
||||
, "{{subst:substcheck}}\n"
|
||||
);
|
||||
fxt.Reset();
|
||||
fxt.Test_parse_tmpl_str_test(String_.Concat_lines_nl_skip_last
|
||||
( "{{mwo_print"
|
||||
, "|<includeonly>{{safesubst:</includeonly><includeonly>substcheck}}</includeonly>"
|
||||
, "}}"
|
||||
), "{{test}}"
|
||||
, "SUBST\n");
|
||||
fxt.Init_defn_clear();
|
||||
}
|
||||
@Test public void Ex_Tmpl_noinclude_prm_1() { // PURPOSE: <noinclude> should not process @gplx.Internal protected tkns; PAGE:en.w:[[Template:See]]
|
||||
fxt.Init_defn_clear();
|
||||
fxt.Init_defn_add("mwo_print", "{{{1}}}{{{2}}}");
|
||||
fxt.Test_parse_tmpl_str_test
|
||||
( "{{mwo_print|{{{1<noinclude>|not_seen</noinclude>}}}|{{{2}}}}}"
|
||||
, "{{test|a|b}}"
|
||||
, "ab"
|
||||
);
|
||||
fxt.Init_defn_clear();
|
||||
}
|
||||
@Test public void Ex_Tmpl_noinclude_prm_2() { // PURPOSE: <noinclude> should not process default tkn;
|
||||
fxt.Test_parse_tmpl_str_test
|
||||
( "{{#if: {{{x|<noinclude>y</noinclude>}}} | visible | hidden}}" // {{#if: {{{x|<noinclude>y</noinclude>}}} -> {{#if: {{{x|}} -> hidden
|
||||
, "{{test}}"
|
||||
, "hidden"
|
||||
);
|
||||
}
|
||||
@Test public void Ex_Tmpl_noinclude2() { // PURPOSE: <noinclude> should be separate from tkns {{convert|50|km|0|abbr=on}}
|
||||
fxt.Init_defn_clear();
|
||||
fxt.Init_defn_add("mwo_print", "{{{1}}}{{{2}}}");
|
||||
fxt.Test_parse_tmpl_str_test
|
||||
( "{{mwo_print<noinclude>{{{?}}}</noinclude>|a|b}}"
|
||||
, "{{test}}"
|
||||
, "ab"
|
||||
);
|
||||
fxt.Init_defn_clear();
|
||||
}
|
||||
@Test public void Exception_incompleteTag_matchNext() { // PURPOSE: "</noinclude" should not be matched;
|
||||
fxt.Test_parse_tmpl_str_test
|
||||
( "a<noinclude>b</noinclude c<noinclude>d</noinclude>e"
|
||||
, "{{test}}"
|
||||
, "ae"
|
||||
);
|
||||
}
|
||||
@Test public void Exception_noCloseTag() {
|
||||
fxt.Test_parse_tmpl_str_test
|
||||
( "a<noinclude>bcde"
|
||||
, "{{test}}"
|
||||
, "a"
|
||||
);
|
||||
}
|
||||
@Test public void Exception_inline() {
|
||||
fxt.Test_parse_tmpl_str_test
|
||||
( "a<noinclude/>bcde"
|
||||
, "{{test}}"
|
||||
, "abcde"
|
||||
);
|
||||
}
|
||||
@Test public void Exception_inline_2() {
|
||||
fxt.Test_parse_tmpl_str_test
|
||||
( "a<noinclude/a/>bcde"
|
||||
, "{{test}}"
|
||||
, "a<noinclude/a/>bcde"
|
||||
);
|
||||
}
|
||||
@Test public void Defect_onlyinclude_inside_template() { // PURPOSE: was eating up next template; PAGE:en.w:Wikipedia:Featured_articles
|
||||
fxt.Test_parse_page_all_str
|
||||
( "{{formatnum: <onlyinclude>1</onlyinclude>}} {{formatnum:2}}"
|
||||
, "1 2"
|
||||
);
|
||||
}
|
||||
@Test public void Only_include_preserves_nl() { // PURPOSE: given "a\n<onlyinclude>{|\n", "{|" should be table; PAGE:en.w:Wikipedia:Reference_desk
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( "a"
|
||||
, "<onlyinclude>==b==</onlyinclude>"
|
||||
, "c"
|
||||
)
|
||||
// , "{{test}}"
|
||||
, String_.Concat_lines_nl
|
||||
( "a"
|
||||
, ""
|
||||
, "<h2>b</h2>"
|
||||
, "c"
|
||||
));
|
||||
}
|
||||
@Test public void Only_include_interprets_template() { // PURPOSE: <oi> should interpret templates
|
||||
fxt.Init_defn_clear();
|
||||
fxt.Init_defn_add("test", "see_me");
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( "a"
|
||||
, "<onlyinclude>{{test}}</onlyinclude>"
|
||||
, "c"
|
||||
)
|
||||
, String_.Concat_lines_nl
|
||||
( "a"
|
||||
, "see_me"
|
||||
, "c"
|
||||
));
|
||||
}
|
||||
@Test public void Include_only_in_template_name() {// PURPOSE: includeonly in tmpl_name should be ignored; EX:de.w:Wikipedia:Projektdiskussion; DATE:2014-01-24
|
||||
fxt.Init_defn_clear();
|
||||
fxt.Init_defn_add("test", "abc");
|
||||
fxt.Test_parse_page_all_str("{{<includeonly></includeonly>test}}", "abc");
|
||||
}
|
||||
@Test public void Include_only_in_transcluded_page() {// PURPOSE: include only in transcluded page should be ignored; EX:de.w:Wikipedia:Projektdiskussion; DATE:2014-01-24; DATE:2014-05-10
|
||||
fxt.Init_page_create("page", "abc"); // create page in main ns
|
||||
fxt.Test_parse_page_all_str("{{:<includeonly>safesubst:</includeonly>page}}", "abc"); // will become {{:page}} which should then transclude page
|
||||
}
|
||||
@Test public void Include_only_subst_in_function() {// PURPOSE: includeonly and subst inside function should be ignored; PAGE:en.w:WikiProject_Articles_for_creation/BLD_Preload; DATE:2014-04-29
|
||||
fxt.Test_parse_page_all_str("{{<includeonly>subst:</includeonly>#expr:0}}", "0");
|
||||
}
|
||||
@Test public void Hdr() { // PURPOSE: includeonly should be evaluated during template parse; EX: es.b:Billar/T<>cnica/Clases_de_puentes; DATE:2014-02-12
|
||||
fxt.Test_parse_page_all_str("=<includeonly>=</includeonly>A=<includeonly>=</includeonly>", "<h1>A</h1>\n");
|
||||
}
|
||||
// @Test public void Noinclude_nested() { // PURPOSE: nested noincludes don't work; th.w:ISO_3166-1;DATE:2014-04-06
|
||||
// fxt.Init_defn_clear();
|
||||
// fxt.Init_defn_add("test", "a<noinclude>b<noinclude>c</noinclude>d</noinclude>e");
|
||||
// fxt.Test_parse_page_all_str("{{test}}", "ae");
|
||||
// }
|
||||
|
||||
// @Test public void Wiki_includeonly_ignore() {fxt.Test_parse_wiki_text("[[a<includeonly>b</includeonly>c]]", "[[ac]]");} // FUTURE: ttl parses by idx, and ignores includeonly: WHEN: upon encountering; may need to redo in other parsers?
|
||||
@Test public void Defect_noinclude_inside_main() { // PURPOSE: <onlyinclude> inside main was not returning content; PAGE:en.w:Wikipedia:Featured_articles
|
||||
fxt.Init_defn_clear();
|
||||
fxt.Init_defn_add("Test_tmpl", "{{:Test_page}}");
|
||||
fxt.Data_create("Test_page", "a{{#expr:<onlyinclude>1</onlyinclude>}}c");
|
||||
fxt.Test_parse_page_all_str
|
||||
( "{{Test_tmpl}}"
|
||||
, "1"
|
||||
);
|
||||
fxt.Init_defn_clear();
|
||||
}
|
||||
@Test public void Pre_and_includeonly() { // PAGE:https://en.wikipedia.org/wiki/BSD_licenses DATE:2014-05-23
|
||||
fxt.Init_defn_add("pre2", "<pre<includeonly></includeonly>>{{{1}}}</pre>");
|
||||
fxt.Test_parse_page_all_str
|
||||
( "{{pre2|a}}"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<pre>a</pre>"
|
||||
));
|
||||
}
|
||||
// @Test public void Pre_and_includeonly2() {
|
||||
// fxt.Init_defn_add("pre2", "<pre<includeonly></includeonly>><nowiki>{{{1}}}</nowiki></pre>");
|
||||
// fxt.Test_parse_page_all_str
|
||||
// ( "{{pre2|a}}"
|
||||
// , String_.Concat_lines_nl_skip_last
|
||||
// ( "<pre>a</pre>"
|
||||
// ));
|
||||
// }
|
||||
@Test public void Noinclude_inline_w_space_inside_safesubst() { // PURPOSE: "<noinclude />" did not work with safesubst b/c of space; PAGE:en.w:Wikipedia:Featured_picture_candidates; DATE:2014-06-24
|
||||
fxt.Test_parse_tmpl_str_test("{{SAFESUBST:<noinclude />#if:val_exists|y|n}}", "{{test}}", "y");
|
||||
}
|
||||
@Test public void Subst() {// PURPOSE: handle subst-includeonly-subst combination; PAGE:pt.w:Argentina DATE:2014-09-24
|
||||
fxt.Init_defn_clear();
|
||||
fxt.Init_defn_add("test", "{{<includeonly>subst:</includeonly>#switch:1|1=y|default=n}}");
|
||||
//fxt.Init_defn_add("test", "{{subst:#switch:1|1=y|default=n}}"); // keeping around for debugging purposes
|
||||
//fxt.Init_defn_add("test", "{{<includeonly>#switch:</includeonly>1|1=y|default=n}}"); // keeping around for debugging purposes
|
||||
fxt.Test_parse_page_all_str("{{test}}", "{{subst:#switch:1|1=y|default=n}}"); // note that subst is preserved b/c of <includeonly>
|
||||
fxt.Test_parse_page_all_str("{{subst:test}}", "y"); // note that expression is evaluated b/c of subst:
|
||||
}
|
||||
}
|
||||
104
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr__li_tst.java
Normal file
104
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr__li_tst.java
Normal file
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xnde_wkr__li_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Inside_tblx() { // PURPOSE: auto-close <li> (EX: "<li>a<li>") was causing 3rd <li> to close incorrectly
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "<table><tr><td><ul><li>a</li><li>b</li><li>c</li></ul></td></tr></table>"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td><ul>"
|
||||
, "<li>a</li>"
|
||||
, "<li>b</li>"
|
||||
, "<li>c</li></ul>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Li_nested_inside_ul() { // PURPOSE: nested li in ul should not be escaped; DATE:2013-12-04
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "<ul><li>a<ul><li>b</li></ul></li></ul>"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, "<li>a<ul>"
|
||||
, "<li>b</li></ul></li></ul>" // note that <li><li>b becomes <li><li>b but <li><ul><li>b should stay the same
|
||||
));
|
||||
}
|
||||
@Test public void Empty_ignored() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, "<li>a"
|
||||
, "</li><li>"
|
||||
, "</li><li>b"
|
||||
, "</li>"
|
||||
, "</ul>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, "<li>a"
|
||||
, "</li>"
|
||||
, "<li>b"
|
||||
, "</li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Empty_ignored_error() { // PAGE:en.w:Sukhoi_Su-47; "* </li>" causes error b/c </li> tries to close non-existent node
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "* a"
|
||||
, "* </li>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li> a"
|
||||
, " </li>"
|
||||
, " <li> </li>" // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Insert_nl() {// PURPOSE: <li> should always be separated by nl, or else items will merge, creating long horizontal scroll bar; EX:w:Music
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str("<ul><li>a</li><li>b</li></ul>"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, "<li>a</li>"
|
||||
, "<li>b</li></ul>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Duplicate() { // PURPOSE: redundant li; EX: "* <li>"; PAGE:it.w:Milano#Bibliographie; DATE:2013-07-23
|
||||
fxt.Test_parse_page_all_str("* <li>x</li>", String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li> "
|
||||
, "<li>x</li>" // TIDY: duplicate li will be stripped out; DATE:2014-06-26
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Dangling_inside_xnde() { // PURPOSE.TIDY: handle "<li><span>a<li><span>b"; PAGE:ro.w:Pagina principala; DATE:2014-06-26
|
||||
fxt.Test_parse_page_all_str("<li><span>a<li><span>b", String_.Concat_lines_nl_skip_last
|
||||
( "<li><span>a"
|
||||
, "<li><span>b</span></li></span></li>" // TIDY: will (a) move </span></li> to 1st line
|
||||
));
|
||||
}
|
||||
}
|
||||
144
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr__nowiki_tst.java
Normal file
144
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr__nowiki_tst.java
Normal file
@@ -0,0 +1,144 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xnde_wkr__nowiki_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Basic() {
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "<nowiki>''a''</nowiki>b"
|
||||
, "''a''b"
|
||||
);
|
||||
}
|
||||
@Test public void Template() {
|
||||
fxt.Init_para_y_();
|
||||
fxt.Init_defn_add("nowiki_test", "<nowiki>#</nowiki>a");
|
||||
fxt.Test_parse_page_all_str
|
||||
( "{{nowiki_test}}"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<p>#a"
|
||||
, "</p>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void H2() { // PAGE:en.w:HTML
|
||||
fxt.Test_parse_page_all_str
|
||||
( "a<nowiki><h1>b<h6></nowiki>c"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "a<h1>b<h6>c"
|
||||
));
|
||||
}
|
||||
@Test public void Lnke() { // PAGE:en.w:Doomsday_argument; <nowiki>[0, 1]</nowiki>
|
||||
fxt.Test_parse_page_wiki_str("a <nowiki>[0, 1]</nowiki> b", "a [0, 1] b"); // NOTE: not "0" + Byte_.XtoStr(160) + "1"; depend on browser to translate
|
||||
}
|
||||
@Test public void Xatrs_val_text() {
|
||||
fxt.Test_parse_page_all_str
|
||||
( "<div id=<nowiki>a</nowiki>>b</div>"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<div id=\"a\">b</div>"
|
||||
));
|
||||
}
|
||||
@Test public void Xatrs_val_quote() {
|
||||
fxt.Test_parse_page_all_str
|
||||
( "<div id='a<nowiki>b</nowiki>c'>d</div>"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<div id='abc'>d</div>"
|
||||
));
|
||||
}
|
||||
@Test public void Xatrs_eq() {
|
||||
fxt.Test_parse_page_all_str("<ul id<nowiki>=</nowiki>\"a\" class<nowiki>=</nowiki>\"b\"><li><span class=\"c\">d</li></ul>", String_.Concat_lines_nl_skip_last
|
||||
( "<ul id=\"a\" class=\"b\">"
|
||||
, "<li><span class=\"c\">d</span></li></ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Tblw_atr() {// PURPOSE: nowiki breaks token
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|style=\"background-color:<nowiki>#</nowiki>FFCC99\""
|
||||
, "|a"
|
||||
, "|}"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<table style=\"background-color:#FFCC99\">"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Prex() { // PURPOSE: nowikis inside pre should be ignored; DATE:2013-03-30
|
||||
fxt.Test_parse_page_all_str("<pre>a<nowiki><</nowiki>b</pre>" , "<pre>a<b</pre>"); // basic
|
||||
fxt.Test_parse_page_all_str("<pre>a<nowiki><<nowiki>b</pre>" , "<pre>a<nowiki><<nowiki>b</pre>"); // not closed
|
||||
fxt.Test_parse_page_all_str("<pre><nowiki>a<nowiki>b</nowiki>c</nowiki></pre>" , "<pre><nowiki>abc</nowiki></pre>"); // nested; this is wrong, but leave for now; should be a<nowiki>b</nowiki>c
|
||||
}
|
||||
@Test public void Prew() { // PURPOSE: space inside nowiki should be ignored; ru.b:Rubyn DATE:2014-07-03
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( " a<nowiki>"
|
||||
, " <b></b></nowiki>" // note that "\s" must remain "\s" so that <pre> continues uninterrupted
|
||||
), String_.Concat_lines_nl
|
||||
( "<pre>a"
|
||||
, "<b></b>"
|
||||
, "</pre>"
|
||||
)
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Prew_2() { // PURPOSE: prew should continue over nowiki, even if no space DATE:2014-07-03
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( " <nowiki>a"
|
||||
, "b</nowiki>" // note that "b" should be in pre b/c it is part of <nowiki> which is pre'd (even though there is no \n\s)
|
||||
), String_.Concat_lines_nl
|
||||
( "<pre>a"
|
||||
, "b"
|
||||
, "</pre>"
|
||||
)
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Code() { // PURPOSE.fix:HtmlNcr-escaped refs were being ignored; caused by HtmlTidy fix for frwiki templates;DATE:2013-06-27
|
||||
fxt.Test_parse_page_all_str("<code><nowiki>|:</nowiki></code>", "<code>|:</code>");
|
||||
}
|
||||
@Test public void Brack_end() { // PURPOSE: check that "]" is escaped; PAGE:en.w:Tall_poppy_syndrome; DATE:2014-07-23
|
||||
fxt.Test_parse_page_all_str
|
||||
( "<nowiki>[</nowiki>[[A]]<nowiki>]</nowiki>"
|
||||
, "[<a href=\"/wiki/A\">A</a>]"); // was showing up as [[[A]]]
|
||||
}
|
||||
@Test public void Tblw_tr() { // PURPOSE: dash should be escaped in nowiki PAGE:de.w:Liste_von_Vereinen_und_Vereinigungen_von_Gläubigen_(römisch-katholische_Kirche) DATE:2015-01-08
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|a"
|
||||
, "|<nowiki>-</nowiki>" // do not treat as "|-"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " <td>-" // "|" creates <td>; "-" is rendered literally
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
}
|
||||
80
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr__tblx_tst.java
Normal file
80
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr__tblx_tst.java
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xnde_wkr__tblx_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Table() {
|
||||
fxt.Test_parse_page_wiki("a<table><tr><td>b</td></tr></table>c"
|
||||
, fxt.tkn_txt_ ( 0, 1)
|
||||
, fxt.tkn_tblw_tb_(1, 35).Subs_
|
||||
( fxt.tkn_tblw_tr_(8, 27).Subs_
|
||||
( fxt.tkn_tblw_td_(12, 22).Subs_(fxt.tkn_txt_(16, 17))
|
||||
)
|
||||
)
|
||||
, fxt.tkn_txt_ (35, 36)
|
||||
);
|
||||
}
|
||||
@Test public void Ws_bgn() { // PURPOSE: some templates return leading ws; PAGE:en.w:UK
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( " <table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Td_in_lnki_should_be_ignored() {// PURPOSE: \n| inside lnki should not be interpreted as table cell; EX: uk.w:Дніпро; DATE:2014-03-11
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table><tr><td>"
|
||||
, "[[File:A.png|150px"
|
||||
, "|B]]</td></tr></table>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, "<a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\"B\" src=\"file:///mem/wiki/repo/trg/thumb/7/0/A.png/150px.png\" width=\"150\" height=\"0\" /></a>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Nl() {
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "<table>\n\n\n\n\n</table>"
|
||||
, "<table>\n"
|
||||
+ "</table>\n"
|
||||
);
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xnde_wkr__text_block_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Source_wikitext() { // PURPOSE.ASSERT: wikitext should be rendered literally; DATE:2014-03-11
|
||||
fxt.Test_parse_page_wiki_str("<source>''a''</source>", "<pre>''a''</pre>");
|
||||
}
|
||||
@Test public void Source_nowiki() { // PURPOSE.ASSERT: onlyinclude should be rendered literally; DATE:2014-03-11
|
||||
fxt.Test_parse_page_wiki_str("<source><onlyinclude>a</onlyinclude></source>", "<pre><onlyinclude>a</onlyinclude></pre>");
|
||||
}
|
||||
@Test public void Source_escape() {
|
||||
fxt.Test_parse_page_wiki_str("<source><b></source>", "<pre><b></pre>");
|
||||
}
|
||||
@Test public void Source_escape_amp() { // PURPOSE: < should be rendered as &lt; PAGE:uk.b:HTML; DATE:2014-03-11
|
||||
fxt.Test_parse_page_wiki_str("<source><</source>", "<pre>&lt;</pre>");
|
||||
}
|
||||
@Test public void Source_pre() { // PURPOSE: handle pre; PAGE:en.w:Comment_(computer_programming); DATE:2014-06-23
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( " <source>"
|
||||
, " a"
|
||||
, " </source>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( " <pre>"
|
||||
, " a"
|
||||
, "</pre>"
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Code_dangling() { // PAGE:en.w:HTML; <code><i><code> and <code><center><code> tags. There are
|
||||
fxt.Test_parse_page_wiki_str("a<code>b<code>c", "a<code>b</code>c");
|
||||
}
|
||||
@Test public void Code_do_not_escape() { // PURPOSE: <code> was mistakenly marked as escape, causing inner tags to be rendered incorrectly; PAGE:en.w:UTF8
|
||||
fxt.Test_parse_page_all_str
|
||||
( "<code><span style=\"color:red;\">0100100</span></code>"
|
||||
, "<code><span style=\"color:red;\">0100100</span></code>"
|
||||
);
|
||||
}
|
||||
@Test public void Pre_and_html_chars() {// PURPOSE: <pre> should handle '"<> according to context
|
||||
fxt.Test_parse_page_all_str("<pre>a	b</pre>" , "<pre>a	b</pre>"); // known ncr/dec; embed and depend on browser transforming; EX: de.w:Wikipedia:Technik/Skin/Werkstatt
|
||||
fxt.Test_parse_page_all_str("<pre>a�b</pre>" , "<pre>a&#9999999999;b</pre>"); // unknown ncr/dec; escape & (since browser cannot render);
|
||||
fxt.Test_parse_page_all_str("<pre>a&#af ;b</pre>" , "<pre>a&#af ;b</pre>"); // unknown ncr/dec 2
|
||||
fxt.Test_parse_page_all_str("<pre>a	b</pre>" , "<pre>a	b</pre>"); // known ncr/hex
|
||||
fxt.Test_parse_page_all_str("<pre>a'b</pre>" , "<pre>a'b</pre>"); // known name; embed
|
||||
fxt.Test_parse_page_all_str("<pre>a&apox;b</pre>" , "<pre>a&apox;b</pre>"); // unknown name; escape
|
||||
fxt.Test_parse_page_all_str("<pre>&\"<></pre>" , "<pre>&"<></pre>"); // no ncr or name; escape; needed for <pre><img ...></pre>; PAGE:en.w:Alt attribute
|
||||
}
|
||||
@Test public void Pre_and_space() {// PURPOSE: make sure pre does not careate <p></p> around it; also, make sure " a" is preserved; DATE:2014-02-20
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<pre>"
|
||||
, " a"
|
||||
, "</pre>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<pre>"
|
||||
, " a"
|
||||
, "</pre>"
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
}
|
||||
46
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr__tidy_tst.java
Normal file
46
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr__tidy_tst.java
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xnde_wkr__tidy_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Sub_sup_autocorrect() {
|
||||
fxt.Test_parse_page_wiki_str("<sub>a</sup>b", "<sub>a</sub>b");
|
||||
fxt.Test_parse_page_wiki_str("<sup>a</sub>b", "<sup>a</sup>b");
|
||||
}
|
||||
@Test public void Span_font_autocorrect() { // PURPOSE: force </font> to close <span>; EX:w:Rupee; DATE:2014-04-07
|
||||
fxt.Test_parse_page_wiki_str("<span>a</font>b", "<span>a</span>b");
|
||||
}
|
||||
@Test public void Move_ws_char() {
|
||||
fxt.Test_parse_page_all_str("a<i> b </i>c", "a <i>b</i> c");
|
||||
}
|
||||
@Test public void Move_ws_ent() {
|
||||
fxt.Test_parse_page_all_str("a<i> b </i>c", "a <i>b</i> c");
|
||||
}
|
||||
@Test public void Ignore_empty_tags() { // PURPOSE: ignore tag if marked ignore_empty; EX:uk.b:HTML; DATE:2014-03-12
|
||||
fxt.Test_parse_page_all_str("a<pre></pre>b", "ab");
|
||||
}
|
||||
// @Test public void Escaped_div() { // NOTE: WP <div><span>a</span></div><span>b</span>; MW: <div><span>a</div>b</span> // REVISIT: 2012-05-11; WP does harder split-span
|
||||
// fxt.Init_log_(Xop_xnde_log.Auto_closing_section, Xop_xnde_log.Escaped_xnde).Test_parse_page_wiki("<div><span></div></span>"
|
||||
// , fxt.tkn_xnde_(0, 17).Subs_
|
||||
// ( fxt.tkn_xnde_(5, 11))
|
||||
// , fxt.tkn_ignore_(17, 24)
|
||||
// );
|
||||
// }
|
||||
}
|
||||
57
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr__xatrs_tst.java
Normal file
57
400_xowa/src_490_xnde/gplx/xowa/Xop_xnde_wkr__xatrs_tst.java
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xnde_wkr__xatrs_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Inline() {
|
||||
fxt.Test_parse_page_wiki("<ref cd=\"ef\" />" , fxt.tkn_xnde_(0, 15).Atrs_rng_(5, 13));
|
||||
fxt.Test_parse_page_wiki("<ref cd = \"e f\" />" , fxt.tkn_xnde_(0, 21).Atrs_rng_(5, 19)); // ws
|
||||
}
|
||||
@Test public void Bgn() {
|
||||
fxt.Test_parse_page_wiki("<div cd=\"ef\"></div>" , fxt.tkn_xnde_(0, 19).Atrs_rng_(5, 12)); // basic
|
||||
}
|
||||
@Test public void Repeated() { // PURPOSE: if atr is repeated, take 1st, not last; EX: it.u:Dipartimento:Fisica_e_Astronomia; DATE:2014-02-09
|
||||
fxt.Test_parse_page_all_str("<span style='color:red' style='color:green'>a</span>" , "<span style='color:green'>a</span>"); // two
|
||||
fxt.Test_parse_page_all_str("<span style='color:red' style='color:green' style='color:blue'>a</span>" , "<span style='color:blue'>a</span>"); // three
|
||||
}
|
||||
@Test public void Non_ws() { // PURPOSE: <br$2/> is valid; symbols function as ws
|
||||
fxt.Init_log_(Xop_xatr_parser.Log_invalid_atr).Test_parse_page_wiki("<br$2/>" , fxt.tkn_xnde_(0, 7).Atrs_rng_(3, 5));
|
||||
}
|
||||
@Test public void Invalid() { // PURPOSE: make sure brx does not match br
|
||||
fxt.Test_parse_page_wiki("<brx/>" , fxt.tkn_bry_(0, 1), fxt.tkn_txt_(1, 6));
|
||||
}
|
||||
@Test public void Id_encode() {
|
||||
fxt.Test_parse_page_all_str("<div id=\"a b c\"></div>", "<div id=\"a_b_c\"></div>");
|
||||
}
|
||||
@Test public void Lt_should_not_be_escaped_in_input() { // PURPOSE: options textboxes were escaped if input's value had "<"; DATE:2014-07-04
|
||||
fxt.Page().Html_data().Html_restricted_n_();
|
||||
fxt.Test_parse_page_wiki_str("<input value='a<'></input>", "<input value='a<'></input>"); // NOTE: do not call parse_page_all_str which will call Page.Clear and reset Restricted
|
||||
fxt.Page().Html_data().Html_restricted_y_();
|
||||
}
|
||||
// @Test public void Unclosed() { // PURPOSE: unclosed atr should be treated as key, which should be ignored; PAGE:en.w:Palace of Versailles
|
||||
// fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
// ( "<span id=\"1<>>a" // id="1<> -> key named 'id="1<>' which fails whitelist keys
|
||||
// , "</span>"
|
||||
// ), String_.Concat_lines_nl_skip_last
|
||||
// ( "<span>a"
|
||||
// , "</span>"
|
||||
// ));
|
||||
// }
|
||||
}
|
||||
Reference in New Issue
Block a user