1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

v2.10.3.1

This commit is contained in:
gnosygnu
2015-10-18 22:17:57 -04:00
parent 8e18af05b6
commit 4f43f51b18
1935 changed files with 12500 additions and 12889 deletions

View File

@@ -1,47 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_xatr_hash {
private final Ordered_hash hash = Ordered_hash_.new_bry_();
private final byte[] src;
Xop_xatr_hash(byte[] src) {this.src = src;}
public int Len() {return hash.Count();}
public Xop_xatr_itm Get_at(int idx) {
return (Xop_xatr_itm)hash.Get_at(idx);
}
public Xop_xatr_itm Get_by(String key) {
return (Xop_xatr_itm)hash.Get_by(Bry_.new_u8(key));
}
public byte[] Get_as_bry_or(String key, byte[] or) {
Xop_xatr_itm itm = Get_by(key);
return itm == null ? or : itm.Val_as_bry(src);
}
public boolean Match(String key, String val) {
Xop_xatr_itm itm = Get_by(key); if (itm == null) return false;
return String_.Eq(itm.Val_as_str(src), val);
}
private void Add(Xop_xatr_itm itm) {
hash.Add_if_dupe_use_nth(itm.Key_bry(), itm);
}
public static Xop_xatr_hash new_ary(byte[] src, Xop_xatr_itm[] ary) {
Xop_xatr_hash rv = new Xop_xatr_hash(src);
for (Xop_xatr_itm itm : ary)
rv.Add(itm);
return rv;
}
}

View File

@@ -1,65 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_xatr_itm {
public static final byte Tid_null = 0, Tid_invalid = 1, Tid_repeat = 2, Tid_key_val = 3, Tid_key_only = 4; // NOTE: id order is important; see below;
public byte Tid() {return tid;} private byte tid;
public void Tid_to_repeat_() {tid = Tid_repeat;}
public void Tid_to_invalid_() {tid = Tid_invalid;}
public boolean Tid_is_key_only() {return tid == Tid_key_only;}
public int Key_bgn() {return key_bgn;} private int key_bgn;
public int Key_end() {return key_end;} private int key_end;
public byte[] Key_bry() {return key_bry;} public Xop_xatr_itm Key_bry_(byte[] v) {key_bry = v; return this;} private byte[] key_bry;
public byte[] Val_bry() {return val_bry;} public Xop_xatr_itm Val_bry_(byte[] v) {val_bry = v; return this;} private byte[] val_bry;
public void Key_rng_(int key_bgn, int key_end) {this.key_bgn = key_bgn; this.key_end = key_end;}
public byte Key_tid() {return key_tid;} public Xop_xatr_itm Key_tid_(byte v) {key_tid = v; return this;} private byte key_tid;
public int Val_bgn() {return val_bgn;} private int val_bgn;
public int Val_end() {return val_end;} private int val_end;
public int Atr_bgn() {return atr_bgn;} private int atr_bgn;
public int Atr_end() {return atr_end;} private int atr_end;
public int Eq_pos() {return eq_pos;} private int eq_pos;
public boolean Invalid() {return tid < Tid_key_val;} // NOTE: Tid order is important
public byte Quote_byte() {return quote_byte;} private byte quote_byte;
public String Val_as_str(byte[] src) {return String_.new_u8(Val_as_bry(src));}
public byte[] Val_as_bry(byte[] src) {if (val_bry == null) val_bry = Bry_.Mid(src, val_bgn, val_end); return val_bry;} // NOTE: val_bry is cached
public byte[] Val_as_bry__blank_to_null(byte[] src) {byte[] rv = Val_as_bry(src); return Bry_.Len_eq_0(rv) ? null : rv;}
public int Val_as_int_or(byte[] src, int or) {return val_bry == null ? Bry_.To_int_or__lax(src, val_bgn, val_end, or) : Bry_.To_int_or(val_bry, or);}
public boolean Val_as_bool_by_int(byte[] src) {return Val_as_int_or(src, 0) == 1;}
public boolean Val_as_bool(byte[] src) {return Bry_.Eq(Bry_.Lcase__all(Val_as_bry(src)), Bool_.True_bry);}
public static Xop_xatr_itm[] Xatr_parse(Xoae_app app, Xop_xnde_atr_parser parser, Hash_adp_bry hash, Xowe_wiki wiki, byte[] src, Xop_xnde_tkn xnde) {
Xop_xatr_itm[] xatr_ary = app.Xatr_parser().Parse(app.Msg_log(), src, xnde.Atrs_bgn(), xnde.Atrs_end());
for (int i = 0; i < xatr_ary.length; i++) {
Xop_xatr_itm xatr = xatr_ary[i];
if (xatr.Invalid()) continue;
Object xatr_key_obj = hash.Get_by_mid(src, xatr.Key_bgn(), xatr.Key_end());
parser.Xatr_parse(wiki, src, xatr, xatr_key_obj);
}
return xatr_ary;
}
public Xop_xatr_itm(int atr_bgn, int atr_end) {
this.tid = Tid_invalid; this.atr_bgn = atr_bgn; this.atr_end = atr_end;
}
public Xop_xatr_itm(byte quote_byte, int atr_bgn, int atr_end, int key_bgn, int key_end) {
this.tid = Tid_key_only; this.quote_byte = quote_byte; this.atr_bgn = atr_bgn; this.atr_end = atr_end; this.key_bgn = key_bgn; this.key_end = key_end; this.val_bgn = key_bgn; this.val_end = key_end;
}
public Xop_xatr_itm(byte quote_byte, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end, int eq_pos) {
this.tid = Tid_key_val; this.quote_byte = quote_byte; this.atr_bgn = atr_bgn; this.atr_end = atr_end; this.key_bgn = key_bgn; this.key_end = key_end; this.val_bgn = val_bgn; this.val_end = val_end; this.eq_pos = eq_pos;
}
public static final Xop_xatr_itm[] Ary_empty = new Xop_xatr_itm[0];
public static final byte Key_tid_generic = 0, Key_tid_id = 1, Key_tid_style = 2, Key_tid_role = 3;
}

View File

@@ -1,408 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.primitives.*;
public class Xop_xatr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATTRIBS_REGEX
private final List_adp xatrs = List_adp_.new_();
private static final byte Mode_atr_bgn = 1, Mode_invalid = 2, Mode_key = 3, Mode_eq = 4, Mode_val_bgn = 5, Mode_val_quote = 6, Mode_val_raw = 7;
private byte mode = Mode_atr_bgn;
private int atr_bgn = -1, key_bgn = -1, key_end = -1, eq_pos = -1, val_bgn = -1, val_end = -1; boolean valid = true;
private byte quote_byte = Byte_ascii.Null;
private final Hash_adp_bry repeated_atrs_hash = Hash_adp_bry.ci_a7(); // ASCII:xnde_atrs
private final Bry_bfr key_bfr = Bry_bfr.new_(), val_bfr = Bry_bfr.new_();
private boolean key_bfr_on = false, val_bfr_on = false, ws_is_before_val = false;
public Bry_obj_ref Bry_obj() {return bry_ref;} private final Bry_obj_ref bry_ref = Bry_obj_ref.null_();
public int Xnde_find_gt_find(byte[] src, int pos, int end) {
bry_ref.Val_(null);
byte b = src[pos];
if (b == Byte_ascii.Slash && pos + 1 < end) { // if </ move pos to after /
++pos;
b = src[pos];
}
int gt_pos = Bry_find_.Find_fwd(src, Byte_ascii.Gt, pos, end); if (gt_pos == Bry_.NotFound) return String_.Find_none;
byte[] bry = (byte[])xnde_hash.Get_by_mid(src, pos, gt_pos);
bry_ref.Val_(bry);
return bry == null ? String_.Find_none : bry.length + pos;
}
private int Xnde_find_gt(Gfo_msg_log log_mgr, byte[] src, int lt_pos, int end) {
int pos = lt_pos + 1;
byte b = src[pos];
if (b == Byte_ascii.Slash && pos + 1 < end) {
++pos;
b = src[pos];
}
int match_pos = Xnde_find_gt_find(src, pos, end);
if (match_pos == String_.Find_none) {log_mgr.Add_str_warn_key_none(Msg_mgr, "invalid lt", src, lt_pos); return String_.Find_none;}
boolean slash_found = false;
for (int i = match_pos; i < end; i++) {
b = src[i];
switch (b) {
case Byte_ascii.Gt: return i;
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip any ws
break;
case Byte_ascii.Slash:
if (slash_found) {log_mgr.Add_str_warn_key_none(Msg_mgr, "multiple slashes not allowed", src, i); return String_.Find_none;} // only allow one slash
else slash_found = true;
break;
default:
log_mgr.Add_str_warn_key_none(Msg_mgr, "invalid character", src, i);
return String_.Find_none;
}
}
log_mgr.Add_str_warn_key_none(Msg_mgr, "eos", src, lt_pos);
return String_.Find_none;
}
public Xop_xatr_itm[] Parse(Gfo_msg_log log_mgr, byte[] src, int bgn, int end) {
xatrs.Clear();
repeated_atrs_hash.Clear();
int i = bgn;
mode = Mode_atr_bgn;
boolean prv_is_ws = false;
while (true) {
if (i == end) {
if (mode == Mode_val_quote) { // quote still open
int reset_pos = Bry_find_.Find_fwd(src, Byte_ascii.Space, atr_bgn, end); // try to find 1st space within quote; EX:"a='b c=d" should try to reset at c=d
boolean reset_found = reset_pos != Bry_find_.Not_found;
valid = false; val_end = reset_found ? reset_pos : end;
Make(log_mgr, src, val_end); // create invalid atr
if (reset_found) { // space found; resume from text after space; EX: "a='b c=d"; PAGE:en.w:Aubervilliers DATE:2014-06-25
i = Bry_find_.Find_fwd_while_not_ws(src, reset_pos, end); // skip ws
atr_bgn = -1;
mode = Mode_atr_bgn;
val_bfr.Clear();
val_bfr_on = false;
ws_is_before_val = false;
continue;
}
else
break;
}
else {
if (mode == Mode_val_bgn) // NOTE: handle dangling "k=" else will be "k"; EX: <a b=> x> <a b>; PAGE:en.s:Notes_by_the_Way/Chapter_2; DATE:2015-01-31
valid = false;
if (atr_bgn != -1) { // atr_bgn will be -1 if atrs ends on quoted (EX:"a='b'"); else, pending atr that needs to be processed; EX: "a=b" b wil be in bfr
val_end = end;
Make(log_mgr, src, end);
}
break;
}
}
else if (i > end)
break;
byte b = src[i];
switch (mode) {
case Mode_atr_bgn:
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip any ws at bgn; note that once a non-ws char is encountered, it will immediately go into another mode
break;
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Colon:
if (atr_bgn == -1) atr_bgn = i;
mode = Mode_key;
key_bgn = i;
break;
case Byte_ascii.Lt:
int gt_pos = Xnde_find_gt(log_mgr, src, i, end);
if (gt_pos == String_.Find_none) {
valid = false; mode = Mode_invalid; if (atr_bgn == -1) atr_bgn = i;
}
else {
i = gt_pos; // note that there is ++i below and loop will continue at gt_pos + 1 (next character after)
}
break;
default:
valid = false; mode = Mode_invalid; if (atr_bgn == -1) atr_bgn = i;
break;
}
break;
case Mode_invalid:
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
Make(log_mgr, src, i);
mode = Mode_atr_bgn;
break;
default:
break;
}
break;
case Mode_key:
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Colon: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Underline:
if (key_bfr_on) key_bfr.Add_byte(b);
break;
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
if (valid) {
key_end = i;
mode = Mode_eq;
}
else
Make(log_mgr, src, i);
break;
case Byte_ascii.Eq:
if (valid) {
key_end = i;
mode = Mode_val_bgn;
eq_pos = i;
}
break;
case Byte_ascii.Lt:
int gt_pos = Xnde_find_gt(log_mgr, src, i, end);
if (gt_pos == String_.Find_none) {
valid = false; mode = Mode_invalid;
}
else {
if (!key_bfr_on) key_bfr.Add_mid(src, key_bgn, i);
i = gt_pos; // note that there is ++i below and loop will continue at gt_pos + 1 (next character after)
key_bfr_on = true;
}
break;
default:
valid = false; mode = Mode_invalid;
break;
}
break;
case Mode_eq:
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip ws
if (key_end == -1) { // EX: "a = b"; key_end != -1 b/c 1st \s sets key_end; EX: "a b = c"; key_end
val_end = i - 1;
Make(log_mgr, src, i);
mode = Mode_atr_bgn;
continue;
}
break;
case Byte_ascii.Eq:
eq_pos = i;
mode = Mode_val_bgn;
break;
case Byte_ascii.Quote: case Byte_ascii.Apos: // FUTURE: previous word was key
default: // NOTE: added this late; xml_parser was not handling "line start=3" DATE:2013-07-03
val_end = i - 1;
Make(log_mgr, src, i);
mode = Mode_atr_bgn;
continue;
}
break;
case Mode_val_bgn:
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip-ws
ws_is_before_val = true;
break;
case Byte_ascii.Quote: case Byte_ascii.Apos:
mode = Mode_val_quote; quote_byte = b; prv_is_ws = false;
break;
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Colon:
case Byte_ascii.Hash:
mode = Mode_val_raw;
val_bgn = i;
break;
case Byte_ascii.Lt:
int gt_pos = Xnde_find_gt(log_mgr, src, i, end);
if (gt_pos == String_.Find_none) {
valid = false; mode = Mode_invalid;
}
else {
i = gt_pos; // note that there is ++i below and loop will continue at gt_pos + 1 (next character after)
}
break;
default:
break;
}
break;
case Mode_val_quote:
if (val_bgn == -1) val_bgn = i;
switch (b) {
case Byte_ascii.Quote: case Byte_ascii.Apos:
if (quote_byte == b) {
val_end = i;
Make(log_mgr, src, i + 1); // NOTE: set atr_end *after* quote
}
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
break;
case Byte_ascii.Lt: // "<" try to find nowiki inside atr
int gt_pos = Xnde_find_gt(log_mgr, src, i, end);
if (gt_pos == String_.Find_none) {
// valid = false; mode = Mode_invalid; // DELETE: 2012-11-13; unpaired < should not mark atr invalid; EX: style='margin:1em<f'
if (!val_bfr_on) val_bfr.Add_mid(src, val_bgn, i + 1); // +1 to include <
val_bfr_on = true;
}
else {
if (!val_bfr_on) val_bfr.Add_mid(src, val_bgn, i);
i = gt_pos; // note that there is ++i below and loop will continue at gt_pos + 1 (next character after)
val_bfr_on = true;
}
prv_is_ws = false;
break;
case Byte_ascii.Nl: case Byte_ascii.Tab: case Byte_ascii.Cr: // REF.MW:Sanitizer.php|decodeTagAttributes $value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
case Byte_ascii.Space:
if (!val_bfr_on) {
val_bfr.Add_mid(src, val_bgn, i);
val_bfr_on = true;
}
if (prv_is_ws) {} // noop; only allow one ws at a time
else {
prv_is_ws = true; val_bfr.Add_byte(Byte_ascii.Space);
}
break;
default:
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
break;
}
break;
case Mode_val_raw: // no quotes; EX:a=bcd
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Bang: case Byte_ascii.Hash: case Byte_ascii.Dollar: case Byte_ascii.Percent:
case Byte_ascii.Amp: case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star:
case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Slash:
case Byte_ascii.Colon: case Byte_ascii.Semic: case Byte_ascii.Gt:
case Byte_ascii.Question: case Byte_ascii.At: case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end:
case Byte_ascii.Pow: case Byte_ascii.Underline: case Byte_ascii.Tick:
case Byte_ascii.Curly_bgn: case Byte_ascii.Pipe: case Byte_ascii.Curly_end: case Byte_ascii.Tilde:
break;
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl:
val_end = i;
Make(log_mgr, src, i);
break;
case Byte_ascii.Eq: // EX:"a= b=c" or "a=b=c"; PAGE:en.w:2013_in_American_television
if (ws_is_before_val) { // "a= b=c"; discard 1st and resume at 2nd
int old_val_bgn = val_bgn;
valid = false; mode = Mode_invalid; Make(log_mgr, src, val_bgn); // invalidate cur atr; EX:"a="
atr_bgn = key_bgn = old_val_bgn; // reset atr / key to new atr; EX: "b"
key_end = i;
mode = Mode_val_bgn; // set mode to val_bgn (basically, put after =)
}
else { // "a=b=c"; discard all
valid = false; mode = Mode_invalid;
}
break;
case Byte_ascii.Lt:
val_end = i;
Make(log_mgr, src, i);
--i; // NOTE: --i to include "<" as part of next atr; above ws excludes from next atr
break;
default:
valid = false; mode = Mode_invalid;
break;
}
break;
}
++i;
}
repeated_atrs_hash.Clear();
return (Xop_xatr_itm[])xatrs.To_ary(Xop_xatr_itm.class);
}
private void Make(Gfo_msg_log log_mgr, byte[] src, int atr_end) {
Xop_xatr_itm xatr = null;
boolean key_bgn_exists = key_bgn != -1;
boolean val_bgn_exists = val_bgn != -1;
if (valid) {
if (key_bgn_exists && val_bgn_exists)
xatr = new Xop_xatr_itm(quote_byte, atr_bgn, atr_end, key_bgn, key_end, val_bgn, val_end, eq_pos);
else {
if (key_end == -1) key_end = val_end; // NOTE: key_end == -1 when eos; EX: "a" would have key_bgn = 0; key_end = -1; val_end = 1 DATE:2014-07-03
xatr = new Xop_xatr_itm(quote_byte, atr_bgn, atr_end, key_bgn, key_end);
}
}
else {
xatr = new Xop_xatr_itm(atr_bgn, atr_end);
log_mgr.Add_itm_none(Log_invalid_atr, src, atr_bgn, atr_end);
}
if (valid) { // note that invalid will have no key_bgn / key_end
byte[] key_bry = key_bfr_on ? key_bfr.Xto_bry_and_clear() : Bry_.Mid(src, xatr.Key_bgn(), xatr.Key_end());
xatr.Key_bry_(key_bry);
Invalidate_repeated_atr(xatr, key_bry);
}
if (val_bfr_on) xatr.Val_bry_(val_bfr.Xto_bry_and_clear());
xatrs.Add(xatr);
mode = Mode_atr_bgn; quote_byte = Byte_ascii.Null; valid = true;
atr_bgn = key_bgn = val_bgn = key_end = val_end = eq_pos = -1;
val_bfr_on = key_bfr_on = ws_is_before_val = false;
}
private void Invalidate_repeated_atr(Xop_xatr_itm cur, byte[] key_bry) {
Xop_xatr_itm prv = (Xop_xatr_itm)repeated_atrs_hash.Get_by(key_bry);
if (prv != null) {
prv.Tid_to_repeat_();
repeated_atrs_hash.Del(key_bry);
}
repeated_atrs_hash.Add(key_bry, cur);
}
private static final Hash_adp_bry xnde_hash = Hash_adp_bry.ci_a7()
.Add_bry_bry(Xop_xnde_tag_.Tag_nowiki.Name_bry())
.Add_bry_bry(Xop_xnde_tag_.Tag_noinclude.Name_bry())
.Add_bry_bry(Xop_xnde_tag_.Tag_includeonly.Name_bry())
.Add_bry_bry(Xop_xnde_tag_.Tag_onlyinclude.Name_bry())
;
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "xatr_parser");
public static final Gfo_msg_itm
Log_invalid_atr = Gfo_msg_itm_.new_warn_(owner, "invalid_atr")
;
private static final String Msg_mgr = "gplx.xowa.wiki.parser.xatr";
}
/*
NOTE: this parser can be done with a trie and hooks on Quote,Apos,Eq,NewLine,Space,Tab, but...
- multi-byte lookup is not needed (main advantage of trie)
- less performant
- logic is indirect (b/c different chars are valid if first letter of key, raw mode, quoted)
*/

View File

@@ -1,114 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*; import gplx.core.tests.*;
public class Xop_xatr_parser_tst {
private final Xop_xatr_parser_fxt fxt = new Xop_xatr_parser_fxt();
@Test public void Kv_quote_double() {fxt.tst_("a=\"b\"", fxt.new_atr_("a", "b"));}
@Test public void Kv_quote_single() {fxt.tst_("a='b'", fxt.new_atr_("a", "b"));}
@Test public void Kv_quote_none() {fxt.tst_("a=b", fxt.new_atr_("a", "b"));}
@Test public void Kv_empty() {fxt.tst_("a=''", fxt.new_atr_("a", ""));}
@Test public void Kv_key_has_underline() {fxt.tst_("a_b=c", fxt.new_atr_("a_b", "c"));}
@Test public void Val_quote_none() {fxt.tst_("b", fxt.new_atr_("b", "b"));}
@Test public void Val_quote_none_ws() {fxt.tst_(" b ", fxt.new_atr_("b", "b"));} // PURPOSE:discovered while writing test for ref's "lower-alpha" DATE:2014-07-03
@Test public void Invalid_key_plus() {fxt.tst_("a+b", fxt.new_invalid_(0, 3));}
@Test public void Invalid_key_plus_many() {fxt.tst_("a+b c=d", fxt.new_invalid_(0, 3), fxt.new_atr_("c", "d"));}
@Test public void Invalid_val_plus() {fxt.tst_("a=b+c", fxt.new_invalid_(0, 5));}
@Test public void Invalid_recover() {fxt.tst_("* a=b", fxt.new_invalid_(0, 1), fxt.new_atr_("a", "b"));} // PURPOSE: * is invalid, but should not stop parsing of a=b
@Test public void Nowiki_val() {fxt.tst_("a=<nowiki>'b'</nowiki>", fxt.new_atr_("a", "b").Expd_atr_rng_(0, 13).Expd_key_("a").Expd_val_("b"));}
@Test public void Nowiki_key() {fxt.tst_("<nowiki>a=b</nowiki>", fxt.new_atr_("a", "b").Expd_atr_rng_(8, 11));}
@Test public void Nowiki_key_2() {fxt.tst_("a<nowiki>b</nowiki>c=d", fxt.new_atr_("abc", "d").Expd_atr_rng_(0, 22));}
@Test public void Nowiki_key_3() {fxt.tst_("a<nowiki>=</nowiki>\"b\"", fxt.new_atr_("a", "b").Expd_atr_rng_(0, 22));} // EX:fr.w:{{Portail|Transpédia|Californie}}
@Test public void Nowiki_quote() {fxt.tst_("a=\"b<nowiki>c</nowiki>d<nowiki>e</nowiki>f\"", fxt.new_atr_("a", "bcdef"));}
@Test public void Int_value() {fxt.tst_int("a='-123'", -123);}
@Test public void Many_apos() {fxt.tst_("a='b' c='d' e='f'", fxt.new_atr_("a", "b"), fxt.new_atr_("c", "d"), fxt.new_atr_("e", "f"));}
@Test public void Many_raw() {fxt.tst_("a=b c=d e=f", fxt.new_atr_("a", "b"), fxt.new_atr_("c", "d"), fxt.new_atr_("e", "f"));}
@Test public void Ws_ini() {fxt.tst_(" a='b'", fxt.new_atr_("a", "b").Expd_atr_rng_(1, 6));}
@Test public void Ws_end() {fxt.tst_(" a='b' c='d'", fxt.new_atr_("a", "b").Expd_atr_rng_(1, 6), fxt.new_atr_("c", "d").Expd_atr_rng_(7, 12));}
@Test public void Quote_ws_nl() {fxt.tst_("a='b\nc'", fxt.new_atr_("a", "b c"));}
@Test public void Quote_ws_mult() {fxt.tst_("a='b c'", fxt.new_atr_("a", "b c"));}
@Test public void Quote_ws_mult_mult() {fxt.tst_("a='b c d'", fxt.new_atr_("a", "b c d"));} // PURPOSE: fix wherein 1st-gobble gobbled rest of spaces (was b cd)
@Test public void Quote_apos() {fxt.tst_("a=\"b c'd\"", fxt.new_atr_("a", "b c'd"));} // PURPOSE: fix wherein apos was gobbled up; PAGE:en.s:Alice's_Adventures_in_Wonderland; DATE:2013-11-22
@Test public void Quote_apos_2() {fxt.tst_("a=\"b'c d\"", fxt.new_atr_("a", "b'c d"));} // PURPOSE: fix wherein apos was causing "'b'c d"; PAGE:en.s:Grimm's_Household_Tales,_Volume_1; DATE:2013-12-22
@Test public void Multiple() {fxt.tst_("a b1 c", fxt.new_atr_("a", "a"), fxt.new_atr_("b1", "b1"), fxt.new_atr_("c", "c"));}
@Test public void Ws() {fxt.tst_("a = 'b'", fxt.new_atr_("a", "b"));} // PURPOSE: fix wherein multiple space was causing "a=a"; PAGE:fr.s:La_Sculpture_dans_les_cimetières_de_Paris/Père-Lachaise; DATE:2014-01-18
@Test public void Dangling_eos() {fxt.tst_("a='b' c='d", fxt.new_atr_("a", "b"), fxt.new_invalid_(6, 10));} // PURPOSE: handle dangling quote at eos; PAGE:en.w:Aubervilliers DATE:2014-06-25
@Test public void Dangling_bos() {fxt.tst_("a='b c=d", fxt.new_invalid_(0, 4), fxt.new_atr_("c", "d"));} // PURPOSE: handle dangling quote at bos; resume at next valid atr; PAGE:en.w:Aubervilliers DATE:2014-06-25
@Test public void Invalid_incomplete() {fxt.tst_("a= c=d", fxt.new_invalid_(0, 3), fxt.new_atr_("c", "d"));} // PURPOSE: discard xatr if incomplete and followed by valid atr; PAGE:en.w:2013_in_American_television DATE:2014-09-25
@Test public void Invalid_incomplete_2() {fxt.tst_("a=c=d", fxt.new_invalid_(0, 5));} // PURPOSE: variation of above; per MW regex, missing space invalidates entire attribute; DATE:2014-09-25
@Test public void Invalid_incomplete_pair(){fxt.tst_("a= b=", fxt.new_invalid_(0, 3), fxt.new_invalid_(3, 5));} // PURPOSE: "b=" should be invalid not a kv of "b" = "b"; PAGE:en.s:Notes_by_the_Way/Chapter_2; DATE:2015-01-31
/*
TODO:
change ws to be end; EX: "a=b c=d" atr1 ends at 4 (not 3)
*/
// @Test public void Val_quote_none_many() {
// fxt.tst_("a b", fxt.new_atr_("", "a"), fxt.new_atr_("", "b"));
//// fxt.tst_("a='b' c d e='f'", fxt.new_atr_("a", "b"), fxt.new_atr_("", "c"), fxt.new_atr_("", "d"), fxt.new_atr_("e", "f"));
// }
}
class Xop_xatr_parser_fxt {
Xop_xatr_parser parser = new Xop_xatr_parser();
Tst_mgr tst_mgr = new Tst_mgr();
public Xop_xatr_itm_chkr new_invalid_(int bgn, int end) {return new Xop_xatr_itm_chkr().Expd_atr_rng_(bgn, end).Expd_typeId_(Xop_xatr_itm.Tid_invalid);}
public Xop_xatr_itm_chkr new_atr_(String key, String val) {return new Xop_xatr_itm_chkr().Expd_key_(key).Expd_val_(val);}
public void tst_(String src_str, Xop_xatr_itm_chkr... expd) {
byte[] src = Bry_.new_u8(src_str);
Gfo_msg_log msg_log = new Gfo_msg_log(Xoa_app_.Name);
Xop_xatr_itm[] actl = parser.Parse(msg_log, src, 0, src.length);
tst_mgr.Vars().Clear().Add("raw_bry", src);
tst_mgr.Tst_ary("xatr:", expd, actl);
}
public void tst_int(String src_str, int... expd) {
byte[] src = Bry_.new_u8(src_str);
Gfo_msg_log msg_log = new Gfo_msg_log(Xoa_app_.Name);
Xop_xatr_itm[] actl_atr = parser.Parse(msg_log, src, 0, src.length);
int[] actl = new int[actl_atr.length];
for (int i = 0; i < actl.length; i++)
actl[i] = actl_atr[i].Val_as_int_or(src, 0);
Tfds.Eq_ary(expd, actl);
}
}
class Xop_xatr_itm_chkr implements Tst_chkr {
public Class<?> TypeOf() {return Xop_xatr_itm.class;}
public Xop_xatr_itm_chkr Expd_atr_rng_(int bgn, int end) {expd_atr_bgn = bgn; expd_atr_end = end; return this;} private int expd_atr_bgn = -1, expd_atr_end = -1;
public Xop_xatr_itm_chkr Expd_key_rng_(int bgn, int end) {expd_key_bgn = bgn; expd_key_end = end; return this;} private int expd_key_bgn = -1, expd_key_end = -1;
public Xop_xatr_itm_chkr Expd_key_(String v) {expd_key = v; return this;} private String expd_key;
public Xop_xatr_itm_chkr Expd_val_(String v) {expd_val = v; return this;} private String expd_val;
public Xop_xatr_itm_chkr Expd_typeId_(byte v) {expd_typeId = v; return this;} private byte expd_typeId = Xop_xatr_itm.Tid_null;
public int Chk(Tst_mgr mgr, String path, Object actl_obj) {
Xop_xatr_itm actl = (Xop_xatr_itm)actl_obj;
int err = 0;
err += mgr.Tst_val(expd_typeId == Xop_xatr_itm.Tid_null, path, "atr_typeId", expd_typeId, actl.Tid());
err += mgr.Tst_val(expd_atr_bgn == -1, path, "atr_bgn", expd_atr_bgn, actl.Atr_bgn());
err += mgr.Tst_val(expd_atr_end == -1, path, "atr_end", expd_atr_end, actl.Atr_end());
err += mgr.Tst_val(expd_key_bgn == -1, path, "key_bgn", expd_key_bgn, actl.Key_bgn());
err += mgr.Tst_val(expd_key_end == -1, path, "key_end", expd_key_end, actl.Key_end());
if (actl.Key_bry() == null)
err += mgr.Tst_val(expd_key == null, path, "key", expd_key, mgr.Vars_get_bry_as_str("raw_bry", actl.Key_bgn(), actl.Key_end()));
else
err += mgr.Tst_val(expd_key == null, path, "key", expd_key, String_.new_u8(actl.Key_bry()));
if (actl.Val_bry() == null)
err += mgr.Tst_val(expd_val == null, path, "val", expd_val, mgr.Vars_get_bry_as_str("raw_bry", actl.Val_bgn(), actl.Val_end()));
else
err += mgr.Tst_val(expd_val == null, path, "val", expd_val, String_.new_u8(actl.Val_bry()));
return err;
}
}
/*
*/

View File

@@ -16,9 +16,9 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.primitives.*; import gplx.core.btries.*;
import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.xowa.parsers.htmls.*;
public class Xop_xatr_whitelist_mgr {
public boolean Chk(int tag_id, byte[] src, Xop_xatr_itm xatr) {
public boolean Chk(int tag_id, byte[] src, Mwh_atr_itm xatr) {
byte[] key_bry = xatr.Key_bry();
byte[] chk_bry; int chk_bgn, chk_end;
if (key_bry == null) {
@@ -41,12 +41,12 @@ public class Xop_xatr_whitelist_mgr {
&& (itm.Exact() ? key_trie.Match_pos() == chk_end : true) // if exact, check for exact; else always true
;
switch (itm_key_tid) {
case Xop_xatr_itm.Key_tid_style:
case Mwh_atr_itm_.Key_tid__style:
if (!Scrub_style(xatr, src)) return false;
xatr.Val_bry_(gplx.xowa.parsers.amps.Xop_amp_mgr.I.Decode_as_bry(xatr.Val_as_bry(src))); // NOTE: must decode style values; "&#amp;#000000" -> "#000000"; see MW:checkCss; PAGE:en.w:Boron DATE:2015-07-29
xatr.Val_bry_(gplx.xowa.parsers.amps.Xop_amp_mgr.Instance.Decode_as_bry(xatr.Val_as_bry())); // NOTE: must decode style values; "&#amp;#000000" -> "#000000"; see MW:checkCss; PAGE:en.w:Boron DATE:2015-07-29
break;
case Xop_xatr_itm.Key_tid_role:
if (!Bry_.Eq(Val_role_presentation, xatr.Val_as_bry(src))) return false; // MW: For now we only support role="presentation"; DATE:2014-04-05
case Mwh_atr_itm_.Key_tid__role:
if (!Bry_.Eq(Val_role_presentation, xatr.Val_as_bry())) return false; // MW: For now we only support role="presentation"; DATE:2014-04-05
break;
}
return rv;
@@ -169,18 +169,18 @@ public class Xop_xatr_whitelist_mgr {
}
private Xop_xatr_whitelist_itm Ini_key_trie_add(byte[] key, boolean exact) {
Object key_tid_obj = tid_hash.Get_by(key);
byte key_tid = key_tid_obj == null ? Xop_xatr_itm.Key_tid_generic : ((Byte_obj_val)key_tid_obj).Val();
byte key_tid = key_tid_obj == null ? Mwh_atr_itm_.Key_tid__generic : ((Byte_obj_val)key_tid_obj).Val();
Xop_xatr_whitelist_itm rv = new Xop_xatr_whitelist_itm(key, key_tid, exact);
key_trie.Add_obj(key, rv);
return rv;
}
private Hash_adp_bry tid_hash = Hash_adp_bry.ci_a7()
.Add_str_byte("id", Xop_xatr_itm.Key_tid_id)
.Add_str_byte("style", Xop_xatr_itm.Key_tid_style)
.Add_str_byte("role", Xop_xatr_itm.Key_tid_role)
.Add_str_byte("id", Mwh_atr_itm_.Key_tid__id)
.Add_str_byte("style", Mwh_atr_itm_.Key_tid__style)
.Add_str_byte("role", Mwh_atr_itm_.Key_tid__role)
;
private Btrie_slim_mgr key_trie = Btrie_slim_mgr.ci_a7(); // NOTE:ci.ascii:HTML.node_name
public boolean Scrub_style(Xop_xatr_itm xatr, byte[] raw) { // REF:Sanitizer.php|checkCss; '! expression | filter\s*: | accelerator\s*: | url\s*\( !ix'; NOTE: this seems to affect MS IE only; DATE:2013-04-01
public boolean Scrub_style(Mwh_atr_itm xatr, byte[] raw) { // REF:Sanitizer.php|checkCss; '! expression | filter\s*: | accelerator\s*: | url\s*\( !ix'; NOTE: this seems to affect MS IE only; DATE:2013-04-01
byte[] val_bry = xatr.Val_bry();
byte[] chk_bry; int chk_bgn, chk_end;
if (val_bry == null) {

View File

@@ -16,9 +16,9 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
import org.junit.*; import gplx.xowa.parsers.htmls.*;
public class Xop_xatr_whitelist_mgr_tst {
Xop_xatr_whitelist_fxt fxt = new Xop_xatr_whitelist_fxt();
private final Xop_xatr_whitelist_fxt fxt = new Xop_xatr_whitelist_fxt();
@Before public void init() {fxt.Clear();}
@Test public void Basic() {
fxt.Whitelist(Xop_xnde_tag_.Tid_div , "style" , true);
@@ -46,17 +46,21 @@ public class Xop_xatr_whitelist_mgr_tst {
}
}
class Xop_xatr_whitelist_fxt {
private Xop_xatr_whitelist_mgr whitelist_mgr;
private Mwh_atr_itm atr_itm = new Mwh_atr_itm(null, false, false, false, -1, -1, -1, -1, null, -1, -1, null, -1, 0);
public void Clear() {
if (whitelist_mgr == null) whitelist_mgr = new Xop_xatr_whitelist_mgr().Ini();
} private Xop_xatr_whitelist_mgr whitelist_mgr;
}
public void Whitelist(int tag_id, String key_str, boolean expd) {
byte[] key_bry = Bry_.new_a7(key_str);
atr_itm.Key_rng_(0, key_bry.length);
// atr_itm.Key_rng_(0, key_bry.length);
atr_itm.Key_bry_(key_bry);
Tfds.Eq(expd, whitelist_mgr.Chk(tag_id, key_bry, atr_itm), key_str);
} private Xop_xatr_itm atr_itm = new Xop_xatr_itm(0, 0);
}
public void Whitelist(int tag_id, String key_str, String val_str, boolean expd) {
byte[] key_bry = Bry_.new_a7(key_str);
atr_itm.Key_rng_(0, key_bry.length);
// atr_itm.Key_rng_(0, key_bry.length);
atr_itm.Key_bry_(key_bry);
atr_itm.Val_bry_(Bry_.new_a7(val_str));
Tfds.Eq(expd, whitelist_mgr.Chk(tag_id, key_bry, atr_itm), key_str);
}

View File

@@ -1,21 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public interface Xop_xnde_atr_parser {
void Xatr_parse(Xowe_wiki wiki, byte[] src, Xop_xatr_itm xatr, Object xatr_key_obj);
}

View File

@@ -20,8 +20,8 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
public class Xop_xnde_lxr implements Xop_lxr {
public int Lxr_tid() {return Xop_lxr_.Tid_xnde;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Lt, this);}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
public void Term(Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Xnde().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
public static final Xop_xnde_lxr _ = new Xop_xnde_lxr(); Xop_xnde_lxr() {}
public static final Xop_xnde_lxr Instance = new Xop_xnde_lxr(); Xop_xnde_lxr() {}
}

View File

@@ -61,7 +61,7 @@ public class Xop_xnde_tag {
public Ordered_hash Langs() {return langs;} private Ordered_hash langs; private Int_obj_ref langs_key;
public Xop_xnde_tag Langs_(int lang_code, String name) {
if (langs == null) {
langs = Ordered_hash_.new_();
langs = Ordered_hash_.New();
langs_key = Int_obj_ref.neg1_();
}
Xop_xnde_tag_lang lang_tag = new Xop_xnde_tag_lang(lang_code, name);
@@ -69,8 +69,8 @@ public class Xop_xnde_tag {
return this;
}
public Xop_xnde_tag_lang Langs_get(gplx.xowa.langs.cases.Xol_case_mgr case_mgr, int cur_lang, byte[] src, int bgn, int end) {
if (langs == null) return Xop_xnde_tag_lang._; // no langs defined; always return true; EX:<b>
if (Bry_.Eq(src, bgn, end, name_bry)) return Xop_xnde_tag_lang._; // canonical name (name_bry) is valid in all langs; EX: <section> and cur_lang=de
if (langs == null) return Xop_xnde_tag_lang.Instance; // no langs defined; always return true; EX:<b>
if (Bry_.Eq(src, bgn, end, name_bry)) return Xop_xnde_tag_lang.Instance; // canonical name (name_bry) is valid in all langs; EX: <section> and cur_lang=de
synchronized (langs) {
langs_key.Val_(cur_lang);
}

View File

@@ -170,7 +170,7 @@ public class Xop_xnde_tag_ {
, Tag_kbd = new_(Tid_kbd, "kbd").No_inline_()
, Tag_samp = new_(Tid_samp, "samp").No_inline_()
, Tag_blockquote = new_(Tid_blockquote, "blockquote").No_inline_().Repeat_mids_().Section_().Block_open_bgn_().Block_close_end_() // NOTE: should be open_end_, but leaving for now; DATE:2014-03-11; added Repeat_mids_(); PAGE:en.w:Ring_a_Ring_o'_Roses DATE:2014-06-26
, Tag_pre = new_(Tid_pre, "pre").No_inline_().Section_().Xtn_().Raw_().Block_open_bgn_().Block_close_end_().Ignore_empty_().Xtn_skips_template_args_()
, Tag_pre = new_(Tid_pre, "pre").No_inline_().Section_().Xtn_mw_().Raw_().Block_open_bgn_().Block_close_end_().Ignore_empty_().Xtn_skips_template_args_()
, Tag_font = new_(Tid_font, "font").No_inline_()
, Tag_center = new_(Tid_center, "center").No_inline_().Block_open_end_().Block_close_end_() // removed .Repeat_ends_(); added Nest_(); EX: w:Burr Truss; DATE:2012-12-12
, Tag_p = new_(Tid_p, "p").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
@@ -217,13 +217,13 @@ public class Xop_xnde_tag_ {
, Tag_references = new_(Tid_references, "references").Xtn_mw_()
, Tag_source = new_(Tid_source, "source").Xtn_mw_().Block_open_bgn_().Block_close_end_() // deactivate pre; pre; PAGE:en.w:Comment_(computer_programming); DATE:2014-06-23
, Tag_syntaxHighlight = new_(Tid_syntaxHighlight, "syntaxHighlight").Xtn_mw_().Block_open_bgn_().Block_close_end_() // deactivate pre; pre; PAGE:en.w:Comment_(computer_programming); DATE:2014-06-23
, Tag_gallery = new_(Tid_gallery, "gallery").Xtn_().Block_open_bgn_().Block_close_end_().Xtn_auto_close_()
, Tag_imageMap = new_(Tid_imageMap, "imageMap").Xtn_()
, Tag_gallery = new_(Tid_gallery, "gallery").Xtn_mw_().Block_open_bgn_().Block_close_end_().Xtn_auto_close_()
, Tag_imageMap = new_(Tid_imageMap, "imageMap").Xtn_mw_()
, Tag_timeline = new_(Tid_timeline, "timeline").Xtn_mw_()
, Tag_hiero = new_(Tid_hiero, "hiero").Xtn_mw_()
, Tag_inputBox = new_(Tid_inputBox, "inputBox").Xtn_mw_()
, Tag_pages = new_(Tid_pages, "pages").Xtn_mw_()
, Tag_section = new_(Tid_section, "section").Xtn_mw_().Langs_(Xol_lang_itm_.Id_de, "Abschnitt").Langs_(Xol_lang_itm_.Id_he, "קטע").Langs_(Xol_lang_itm_.Id_pt, "trecho") // DATE:2014-07-18
, Tag_section = new_(Tid_section, "section").Xtn_mw_().Langs_(Xol_lang_stub_.Id_de, "Abschnitt").Langs_(Xol_lang_stub_.Id_he, "קטע").Langs_(Xol_lang_stub_.Id_pt, "trecho") // DATE:2014-07-18
, Tag_pagequality = new_(Tid_pagequality, "pagequality").Xtn_mw_()
, Tag_pagelist = new_(Tid_pagelist, "pagelist").Xtn_mw_()
, Tag_categoryList = new_(Tid_categoryList, "categoryList").Xtn_mw_()

View File

@@ -28,5 +28,5 @@ public class Xop_xnde_tag_lang {
public String Name_str() {return name_str;} private String name_str;
public byte[] Name_bry() {return name_bry;} private byte[] name_bry;
public byte[] Xtn_end_tag_tmp() {return xtnEndTag_tmp;} private byte[] xtnEndTag_tmp;
public static final Xop_xnde_tag_lang _ = new Xop_xnde_tag_lang(-1, String_.Empty);
public static final Xop_xnde_tag_lang Instance = new Xop_xnde_tag_lang(-1, String_.Empty);
}

View File

@@ -44,12 +44,8 @@ public class Xop_xnde_tag_regy {
Xop_xnde_tag[] ary = Xop_xnde_tag_.Ary;
for (int i = 0; i < len; ++i) {
Xop_xnde_tag xnde = ary[i];
if ( xtn_hash != null // xtn_hash is null during tests or when wiki is not in site_meta_db
&& xnde.Xtn_mw() // only apply filter to xtn_xnde, not basic_xnde; EX: <dynamicpagelist> not <table>
&& !xtn_hash.Has(xnde.Name_bry()) // xtn_xnde is not in xtn_hash
) continue; // skip; xtn is not defined in site_meta_db
if (is_tmpl && !xnde.Xtn()) // is_tmpl and basic_xnde; EX: <b>
continue; // skip; tmpl only needs xtn_xnde;
if (Ignore_xnde(xtn_hash, xnde)) continue; // skip; xtn is not defined in site_meta_db
if (is_tmpl && !xnde.Xtn()) continue; // is_tmpl and basic_xnde; EX: <b>
Add_itm(trie, xnde);
}
if (is_tmpl) { // is_tmpl also has <nowiki>, <includeonly>, <noinclude>, <onlyinclude>
@@ -59,6 +55,13 @@ public class Xop_xnde_tag_regy {
Add_itm(trie, Xop_xnde_tag_.Tag_onlyinclude);
}
}
private boolean Ignore_xnde(Hash_adp_bry xtn_hash, Xop_xnde_tag xnde) {
return xtn_hash != null // xtn_hash is null during tests or when wiki is not in site_meta_db
&& xnde.Xtn_mw() // only apply filter to xtn_xnde, not basic_xnde; EX: <dynamicpagelist> not <table>
&& !xtn_hash.Has(xnde.Name_bry()) // xtn_xnde is not in xtn_hash
&& !Int_.In(xnde.Id(), Xop_xnde_tag_.Tid_translate, Xop_xnde_tag_.Tid_languages) // always include <translate> and <languages>; TODO:filter out when extensions supported in site_cfg; DATE:2015-10-13
; // skip; xtn is not defined in site_meta_db
}
private void Add_itm(Btrie_slim_mgr trie, Xop_xnde_tag xnde) {
trie.Add_obj(xnde.Name_bry(), xnde);
Ordered_hash langs = xnde.Langs();

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.xtns.*; import gplx.xowa.parsers.tblws.*; import gplx.xowa.parsers.tmpls.*;
import gplx.xowa.xtns.*; import gplx.xowa.parsers.tblws.*; import gplx.xowa.parsers.tmpls.*; import gplx.xowa.parsers.htmls.*;
public class Xop_xnde_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_xnde;}
public int Tblw_tid() {return tag.Id();} // NOTE: tblw tkns actually return xnde as Tblw_tid
@@ -31,9 +31,9 @@ public class Xop_xnde_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
public int Atrs_end() {return atrs_end;} public Xop_xnde_tkn Atrs_end_(int v) {atrs_end = v; return this;} private int atrs_end = Xop_tblw_wkr.Atrs_null;
public Xop_xnde_tkn Atrs_rng_(int bgn, int end) {atrs_bgn = bgn; atrs_end = end; return this;}
public void Atrs_rng_set(int bgn, int end) {Atrs_rng_(bgn, end);}
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;}
public Xop_xnde_tkn Atrs_ary_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;}
public Mwh_atr_itm[] Atrs_ary() {return atrs_ary;}
public Xop_xnde_tkn Atrs_ary_ (Mwh_atr_itm[] v) {atrs_ary = v; return this;} private Mwh_atr_itm[] atrs_ary;
public Xop_tblw_tkn Atrs_ary_as_tblw_ (Mwh_atr_itm[] v) {atrs_ary = v; return this;}
public Xop_xnde_tag Tag() {return tag;} public Xop_xnde_tkn Tag_(Xop_xnde_tag v) {tag = v; return this;} private Xop_xnde_tag tag;
public int Tag_open_bgn() {return tag_open_bgn;} private int tag_open_bgn = Int_.Null;
public int Tag_open_end() {return tag_open_end;} private int tag_open_end = Int_.Null;
@@ -75,7 +75,13 @@ public class Xop_xnde_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
}
}
}
// public static Xop_ctx Hack_ctx; // CHART
@Override public boolean Tmpl_evaluate(Xop_ctx ctx, byte[] src, Xot_invk caller, Bry_bfr bfr) {
// if (ctx.Scribunto) { // CHART
// byte[] key = uniq_mgr.Add(Bry_.Mid(src, this.Src_bgn(), this.Src_end()));
// bfr.Add(key);
// return true;
// }
int subs_len = this.Subs_len();
switch (tag.Id()) {
case Xop_xnde_tag_.Tid_noinclude: // do not evaluate subs

View File

@@ -18,28 +18,24 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.xowa.apps.progs.*;
import gplx.xowa.wikis.domains.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.strings.*; import gplx.langs.htmls.*;
import gplx.xowa.parsers.logs.*; import gplx.xowa.parsers.tblws.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.miscs.*;
import gplx.xowa.parsers.logs.*; import gplx.xowa.parsers.tblws.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.miscs.*; import gplx.xowa.parsers.htmls.*;
public class Xop_xnde_wkr implements Xop_ctx_wkr {
public void Ctor_ctx(Xop_ctx ctx) {}
public boolean Pre_at_bos() {return pre_at_bos;} public void Pre_at_bos_(boolean v) {pre_at_bos = v;} private boolean pre_at_bos;
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {this.Clear();}
private void Clear() {
pre_at_bos = false;
}
private void Clear() {pre_at_bos = false;}
public void AutoClose(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn, int closing_tkn_tid) {
Xop_xnde_tkn xnde = (Xop_xnde_tkn)tkn;
xnde.Src_end_(src_len);
xnde.Subs_move(root); // NOTE: ctx.Root used to be root which was a member variable; DATE:2013-12-11
xnde.Subs_move(root);
if (closing_tkn_tid == Xop_tkn_itm_.Tid_lnki_end) Xop_xnde_wkr_.AutoClose_handle_dangling_nde_in_caption(root, tkn); // PAGE:sr.w:Сићевачка_клисура; DATE:2014-07-03
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Dangling_xnde, src, xnde.Src_bgn(), xnde.Name_end()); // NOTE: xnde.Src_bgn to start at <; xnde.Name_end b/c xnde.Src_end is -1
}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
if (bgn_pos == Xop_parser_.Doc_bgn_bos) {
bgn_pos = 0; // do not allow -1 pos
}
if (cur_pos == src_len) return ctx.Lxr_make_txt_(src_len); // "<" is last char in page; strange, but don't raise error;
Xop_tkn_itm last_tkn = ctx.Stack_get_last(); // BLOCK:invalid_ttl_check
if (bgn_pos == Xop_parser_.Doc_bgn_bos) bgn_pos = 0; // do not allow -1 pos
if (cur_pos == src_len) return ctx.Lxr_make_txt_(src_len); // "<" is EOS; don't raise error;
Xop_tkn_itm last_tkn = ctx.Stack_get_last(); // BLOCK:invalid_ttl_check
if ( last_tkn != null
&& last_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) {
Xop_lnki_tkn lnki = (Xop_lnki_tkn)last_tkn;
@@ -51,35 +47,38 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
}
// find >
// check for "</"
byte cur_byt = src[cur_pos];
boolean tag_is_closing = false;
if (cur_byt == Byte_ascii.Slash) { // "</" encountered (note that < enters this frame)
++cur_pos;
if (cur_pos == src_len) return ctx.Lxr_make_txt_(src_len); // "</" are last chars on page; strange, but don't raise error;
cur_byt = src[cur_pos];
if (cur_byt == Byte_ascii.Slash) { // "</"
tag_is_closing = true;
++cur_pos;
if (cur_pos == src_len) return ctx.Lxr_make_txt_(src_len); // "</" is EOS
cur_byt = src[cur_pos];
}
// get node_name
Btrie_slim_mgr tag_trie = ctx.Xnde_tag_regy().Get_trie(ctx.Xnde_names_tid());
Object tag_obj = tag_trie.Match_bgn_w_byte(cur_byt, src, cur_pos, src_len); // NOTE:tag_obj can be null in wiki_tmpl mode; EX: "<ul" is not a valid tag in wiki_tmpl, but is valid in wiki_main
int atrs_bgn_pos = tag_trie.Match_pos();
int name_bgn = cur_pos, name_end = atrs_bgn_pos;
int tag_end_pos = atrs_bgn_pos - 1;
if (tag_obj != null) {
if (atrs_bgn_pos >= src_len) return ctx.Lxr_make_txt_(atrs_bgn_pos); // truncated tag; EX: "<br"
if (atrs_bgn_pos >= src_len) return ctx.Lxr_make_txt_(atrs_bgn_pos); // EOS: EX: "<br"EOS
// check next char after tag name; EX: "<span"
switch (src[atrs_bgn_pos]) { // NOTE: not sure about rules; Preprocessor_DOM.php calls preg_match on $elementsRegex which seems to break on word boundaries; $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
++atrs_bgn_pos; // set bgn_pos to be after ws
++atrs_bgn_pos; // set bgn_pos to be after ws
break;
case Byte_ascii.Slash: case Byte_ascii.Angle_end:
++atrs_bgn_pos; // set bgn_pos to be after char
++atrs_bgn_pos; // set bgn_pos to be after char
break;
case Byte_ascii.Backslash: // NOTE: MW treats \ as /; EX: <br\>" -> "<br/>
++tag_end_pos;
break;
case Byte_ascii.Dollar:// handles <br$2>;
default: // allow all other symbols by defaults
case Byte_ascii.Dollar: // handles <br$2>;
default: // allow all other symbols by defaults; TODO: need to filter out some like <br@>
break;
// letters / numbers after tag; tag is invalid; EX: "<spanA"
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
@@ -92,7 +91,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
tag_obj = null;
tag_obj = null;
break;
}
}
@@ -125,10 +124,11 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, cur_pos, Xop_ignore_tkn.Ignore_tid_pre_at_bos));
}
}
int gt_pos = -1; // find closing >; NOTE: MW does not ignore > inside quotes; EX: <div id="a>b">abc</div> -> <div id="a>
// find closing >; NOTE: MW does not ignore > inside quotes; EX: <div id="a>b">abc</div> -> <div id="a>
int gt_pos = -1;
boolean pre2_hack = false;
int end_name_pos = cur_pos + tag.Name_len();
Xop_xatr_parser atr_parser = ctx.App().Xatr_parser();
Mwh_atr_parser atr_parser = ctx.App().Parser_mgr().Xnde__atr_parser();
for (int i = end_name_pos; i < src_len; i++) {
byte b = src[i];
switch (b) {
@@ -160,15 +160,15 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
}
if (pre2_hack) {
// Xop_xnde_tkn tt = tkn_mkr.Xnde(bgn_pos, gt_pos + 1).Tag_(tag);
// ctx.Stack_add(tt);
// Xop_xnde_tkn tt = tkn_mkr.Xnde(bgn_pos, gt_pos + 1).Tag_(tag);
// ctx.Stack_add(tt);
pre2_pending = true;
return ctx.Lxr_make_txt_(cur_pos);
}
if (gt_pos == -1) {return ctx.Lxr_make_log_(Xop_xnde_log.Eos_while_closing_tag, src, bgn_pos, cur_pos);}
boolean force_xtn_for_nowiki = false;
int end_pos = gt_pos + 1;
switch (ctx.Parse_tid()) { // NOTE: special logic to handle <*include*>; SEE: NOTE_1 below
switch (ctx.Parse_tid()) { // NOTE: special logic to handle <include>; SEE: NOTE_1 below
case Xop_parser_.Parse_tid_page_wiki: // NOTE: ignore if (a) wiki and (b) <noinclude> or <onlyinclude>
switch (tag.Id()) {
case Xop_xnde_tag_.Tid_noinclude:
@@ -294,9 +294,9 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
break;
}
Xop_xatr_itm[] atrs = null;
Mwh_atr_itm[] atrs = null;
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki) {
atrs = ctx.App().Xatr_parser().Parse(ctx.Msg_log(), src, atrs_bgn, atrs_end);
atrs = ctx.App().Parser_mgr().Xnde__parse_atrs(src, atrs_bgn, atrs_end);
}
if (( ( tag.Xtn()
&& ( ctx.Parse_tid() != Xop_parser_.Parse_tid_tmpl // do not gobble up rest if in tmpl; handle <poem>{{{1}}}</poem>; DATE:2014-03-03
@@ -311,7 +311,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
if (tag.Restricted()) {
Xoae_page page = ctx.Cur_page();
if ( page.Html_data().Html_restricted()
&& page.Wiki().Domain_tid() != Xow_domain_type_.Int__home) {
&& page.Wiki().Domain_tid() != Xow_domain_tid_.Int__home) {
int end_pos = gtPos + 1;
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, end_pos, Bry_.Add(gplx.langs.htmls.Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, end_pos)))); // +1 to skip <
return end_pos;
@@ -500,7 +500,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
bgn_nde.Subs_move(root); // NOTE: Subs_move must go after Stack_pop_til, b/c Stack_pop_til adds tkns; see Xnde_td_list
ctx.Para().Process_block__xnde(end_tag, end_tag.Block_close());
}
private Xop_xnde_tkn Xnde_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, Xop_xnde_tag tag, byte closeMode, byte[] src, int bgn_pos, int cur_pos, int atrs_bgn, int atrs_end, Xop_xatr_itm[] atrs) {
private Xop_xnde_tkn Xnde_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, Xop_xnde_tag tag, byte closeMode, byte[] src, int bgn_pos, int cur_pos, int atrs_bgn, int atrs_end, Mwh_atr_itm[] atrs) {
Xop_xnde_tkn xnde = tkn_mkr.Xnde(bgn_pos, cur_pos).CloseMode_(closeMode);
int xndeBgn = bgn_pos + 1;
xnde.Name_rng_(xndeBgn, xndeBgn + tag.Name_len());
@@ -567,7 +567,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
if (rv == Bry_find_.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find <: page=~{0}", ctx.Cur_page().Url().To_str()); return Bry_find_.Not_found;}
return rv;
}
private int Make_xnde_xtn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, Xop_xnde_tag tag, int open_bgn, int open_end, int name_bgn, int name_end, int atrs_bgn, int atrs_end, Xop_xatr_itm[] atrs, boolean inline, boolean pre2_hack) {
private int Make_xnde_xtn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, Xop_xnde_tag tag, int open_bgn, int open_end, int name_bgn, int name_end, int atrs_bgn, int atrs_end, Mwh_atr_itm[] atrs, boolean inline, boolean pre2_hack) {
// NOTE: find end_tag that exactly matches bgnTag; must be case sensitive;
int xnde_end = open_end;
Xop_xnde_tkn xnde = null;
@@ -581,7 +581,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
Xop_xnde_tag_lang tag_lang = tag.Langs_get(ctx.Lang().Case_mgr(), ctx.Cur_page().Lang().Lang_id(), src, name_bgn, name_end);
if (tag_lang == null) // tag does not match lang; EX:<trecho> and lang=de;
return ctx.Lxr_make_txt_(open_end);
if (tag_lang != Xop_xnde_tag_lang._) // tag matches; note Xop_xnde_tag_lang._ is a wildcard match; EX:<section>
if (tag_lang != Xop_xnde_tag_lang.Instance) // tag matches; note Xop_xnde_tag_lang._ is a wildcard match; EX:<section>
close_bry = tag_lang.Xtn_end_tag_tmp();
}
int src_offset = open_bgn - 1; // open bgn to start at <; -2 to ignore </ ; +1 to include <
@@ -649,6 +649,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
case Xop_xnde_tag_.Tid_pagelist: xnde_xtn = tkn_mkr.Xnde_pagelist(); break;
case Xop_xnde_tag_.Tid_section: xnde_xtn = tkn_mkr.Xnde_section(); break;
case Xop_xnde_tag_.Tid_categoryList: xnde_xtn = tkn_mkr.Xnde_categoryList(); break;
case Xop_xnde_tag_.Tid_source: // changed to be synonymn of syntax_highlight; DATE:2014-06-24
case Xop_xnde_tag_.Tid_syntaxHighlight: xnde_xtn = tkn_mkr.Xnde_syntaxHighlight(); break;
case Xop_xnde_tag_.Tid_score: xnde_xtn = tkn_mkr.Xnde_score(); break;
case Xop_xnde_tag_.Tid_translate: xnde_xtn = tkn_mkr.Xnde_translate(); break;
@@ -673,7 +674,6 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
case Xop_xnde_tag_.Tid_xowa_tag_bgn:
case Xop_xnde_tag_.Tid_xowa_tag_end:
break;
case Xop_xnde_tag_.Tid_source: // added on DATE:2014-06-24
case Xop_xnde_tag_.Tid_pre: // NOTE: pre must be an xtn, but does not create an xtn node (it gobbles up everything between); still need to touch the para_wkr; DATE:2014-02-20
ctx.Para().Process_block__xnde(tag, Xop_xnde_tag.Block_bgn);
if (Bry_find_.Find_fwd(src, Byte_ascii.Nl, xnde.Tag_open_end(), xnde.Tag_close_bgn()) != Bry_find_.Not_found)
@@ -721,7 +721,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
byte b = src[i];
switch (b) {
case Byte_ascii.Lt: // < encountered; may be inner node inside tag which is legal in wikitext; EX: "<ul style=<nowiki>#</nowiki>FFFFFF>"
int valid_inner_xnde_gt = ctx.App().Xatr_parser().Xnde_find_gt_find(src, i + 1, src_len);
int valid_inner_xnde_gt = ctx.App().Parser_mgr().Xnde__atr_parser().Xnde_find_gt_find(src, i + 1, src_len);
if (valid_inner_xnde_gt != String_.Find_none) {
i = valid_inner_xnde_gt;
}

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*; import gplx.xowa.nss.*;
import org.junit.*; import gplx.xowa.wikis.nss.*;
public class Xop_xnde_wkr__basic_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@@ -120,7 +120,7 @@ public class Xop_xnde_wkr__basic_tst {
fxt.Test_parse_page_all_str("<code><script src='a'>b</script></code>", "<code>&lt;script src='a'>b&lt;/script></code>");
}
@Test public void Script_in_syntaxhighlight() {
fxt.Test_parse_page_all_str("<syntaxhighlight><script>alert('fail');</script></syntaxhighlight>", "<pre style=\"overflow:auto;\">&lt;script&gt;alert('fail');&lt;/script&gt;</pre>");
fxt.Test_parse_page_all_str("<syntaxhighlight><script>alert('fail');</script></syntaxhighlight>", "<div class=\"mw-highlight\"><pre style=\"overflow:auto\">&lt;script&gt;alert('fail');&lt;/script&gt;</pre></div>");
}
@Test public void Script_in_math() {
fxt.App().File_mgr().Math_mgr().Renderer_is_mathjax_(false);

View File

@@ -21,16 +21,16 @@ public class Xop_xnde_wkr__text_block_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Source_wikitext() { // PURPOSE.ASSERT: wikitext should be rendered literally; DATE:2014-03-11
fxt.Test_parse_page_wiki_str("<source>''a''</source>", "<pre>''a''</pre>");
fxt.Test_parse_page_wiki_str("<source>''a''</source>", "<div class=\"mw-highlight\"><pre style=\"overflow:auto\">''a''</pre></div>");
}
@Test public void Source_nowiki() { // PURPOSE.ASSERT: onlyinclude should be rendered literally; DATE:2014-03-11
fxt.Test_parse_page_wiki_str("<source><onlyinclude>a</onlyinclude></source>", "<pre>&lt;onlyinclude&gt;a&lt;/onlyinclude&gt;</pre>");
fxt.Test_parse_page_wiki_str("<source><onlyinclude>a</onlyinclude></source>", "<div class=\"mw-highlight\"><pre style=\"overflow:auto\">&lt;onlyinclude&gt;a&lt;/onlyinclude&gt;</pre></div>");
}
@Test public void Source_escape() {
fxt.Test_parse_page_wiki_str("<source><b></source>", "<pre>&lt;b&gt;</pre>");
fxt.Test_parse_page_wiki_str("<source><b></source>", "<div class=\"mw-highlight\"><pre style=\"overflow:auto\">&lt;b&gt;</pre></div>");
}
@Test public void Source_escape_amp() { // PURPOSE: &lt; should be rendered as &amp;lt; PAGE:uk.b:HTML; DATE:2014-03-11
fxt.Test_parse_page_wiki_str("<source>&lt;</source>", "<pre>&amp;lt;</pre>");
fxt.Test_parse_page_wiki_str("<source>&lt;</source>", "<div class=\"mw-highlight\"><pre style=\"overflow:auto\">&amp;lt;</pre></div>");
}
@Test public void Source_pre() { // PURPOSE: handle pre; PAGE:en.w:Comment_(computer_programming); DATE:2014-06-23
fxt.Init_para_y_();
@@ -39,9 +39,9 @@ public class Xop_xnde_wkr__text_block_tst {
, " a"
, " </source>"
), String_.Concat_lines_nl_skip_last
( " <pre>"
( " <div class=\"mw-highlight\"><pre style=\"overflow:auto\">"
, " a"
, "</pre>"
, "</pre></div>"
));
fxt.Init_para_n_();
}

View File

@@ -32,7 +32,7 @@ public class Xop_xnde_wkr__xatrs_tst {
fxt.Test_parse_page_all_str("<span style='color:red' style='color:green' style='color:blue'>a</span>" , "<span style='color:blue'>a</span>"); // three
}
@Test public void Non_ws() { // PURPOSE: <br$2/> is valid; symbols function as ws
fxt.Init_log_(Xop_xatr_parser.Log_invalid_atr).Test_parse_page_wiki("<br$2/>" , fxt.tkn_xnde_(0, 7).Atrs_rng_(3, 5));
fxt.Test_parse_page_wiki("<br$2/>" , fxt.tkn_xnde_(0, 7).Atrs_rng_(3, 5));
}
@Test public void Invalid() { // PURPOSE: make sure brx does not match br
fxt.Test_parse_page_wiki("<brx/>" , fxt.tkn_bry_(0, 1), fxt.tkn_txt_(1, 6));