1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-08-03 00:10:03 -04:00
parent 9d63f03b3d
commit 34c34f227c
514 changed files with 4972 additions and 3910 deletions

View File

@@ -38,9 +38,9 @@ public class Xop_xatr_itm {
public String Val_as_str(byte[] src) {return String_.new_u8(Val_as_bry(src));}
public byte[] Val_as_bry(byte[] src) {if (val_bry == null) val_bry = Bry_.Mid(src, val_bgn, val_end); return val_bry;} // NOTE: val_bry is cached
public byte[] Val_as_bry__blank_to_null(byte[] src) {byte[] rv = Val_as_bry(src); return Bry_.Len_eq_0(rv) ? null : rv;}
public int Val_as_int_or(byte[] src, int or) {return val_bry == null ? Bry_.Xto_int_or_lax(src, val_bgn, val_end, or) : Bry_.Xto_int_or(val_bry, or);}
public int Val_as_int_or(byte[] src, int or) {return val_bry == null ? Bry_.To_int_or__lax(src, val_bgn, val_end, or) : Bry_.To_int_or(val_bry, or);}
public boolean Val_as_bool_by_int(byte[] src) {return Val_as_int_or(src, 0) == 1;}
public boolean Val_as_bool(byte[] src) {return Bry_.Eq(Bry_.Lower_ascii(Val_as_bry(src)), Bool_.True_bry);}
public boolean Val_as_bool(byte[] src) {return Bry_.Eq(Bry_.Lcase__all(Val_as_bry(src)), Bool_.True_bry);}
public static Xop_xatr_itm[] Xatr_parse(Xoae_app app, Xop_xnde_atr_parser parser, Hash_adp_bry hash, Xowe_wiki wiki, byte[] src, Xop_xnde_tkn xnde) {
Xop_xatr_itm[] xatr_ary = app.Xatr_parser().Parse(app.Msg_log(), src, xnde.Atrs_bgn(), xnde.Atrs_end());
for (int i = 0; i < xatr_ary.length; i++) {

View File

@@ -23,7 +23,7 @@ public class Xop_xatr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_AT
private byte mode = Mode_atr_bgn;
private int atr_bgn = -1, key_bgn = -1, key_end = -1, eq_pos = -1, val_bgn = -1, val_end = -1; boolean valid = true;
private byte quote_byte = Byte_ascii.Null;
private final Hash_adp_bry repeated_atrs_hash = Hash_adp_bry.ci_ascii_(); // ASCII:xnde_atrs
private final Hash_adp_bry repeated_atrs_hash = Hash_adp_bry.ci_a7(); // ASCII:xnde_atrs
private final Bry_bfr key_bfr = Bry_bfr.new_(), val_bfr = Bry_bfr.new_();
private boolean key_bfr_on = false, val_bfr_on = false, ws_is_before_val = false;
public Bry_obj_ref Bry_obj() {return bry_ref;} private final Bry_obj_ref bry_ref = Bry_obj_ref.null_();
@@ -388,7 +388,7 @@ public class Xop_xatr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_AT
}
repeated_atrs_hash.Add(key_bry, cur);
}
private static final Hash_adp_bry xnde_hash = Hash_adp_bry.ci_ascii_()
private static final Hash_adp_bry xnde_hash = Hash_adp_bry.ci_a7()
.Add_bry_bry(Xop_xnde_tag_.Tag_nowiki.Name_bry())
.Add_bry_bry(Xop_xnde_tag_.Tag_noinclude.Name_bry())
.Add_bry_bry(Xop_xnde_tag_.Tag_includeonly.Name_bry())

View File

@@ -43,6 +43,7 @@ public class Xop_xatr_whitelist_mgr {
switch (itm_key_tid) {
case Xop_xatr_itm.Key_tid_style:
if (!Scrub_style(xatr, src)) return false;
xatr.Val_bry_(gplx.xowa.parsers.amps.Xop_amp_mgr.I.Decode_as_bry(xatr.Val_as_bry(src))); // NOTE: must decode style values; "&#amp;#000000" -> "#000000"; see MW:checkCss; PAGE:en.w:Boron DATE:2015-07-29
break;
case Xop_xatr_itm.Key_tid_role:
if (!Bry_.Eq(Val_role_presentation, xatr.Val_as_bry(src))) return false; // MW: For now we only support role="presentation"; DATE:2014-04-05
@@ -123,7 +124,7 @@ public class Xop_xatr_whitelist_mgr {
Ini_all_loose("data");
return this;
}
private Hash_adp_bry grp_hash = Hash_adp_bry.cs_();
private Hash_adp_bry grp_hash = Hash_adp_bry.cs();
private void Ini_grp(String key_str, String base_grp, String... cur_itms) {
byte[][] itms = Bry_.Ary(cur_itms);
if (base_grp != null)
@@ -173,12 +174,12 @@ public class Xop_xatr_whitelist_mgr {
key_trie.Add_obj(key, rv);
return rv;
}
private Hash_adp_bry tid_hash = Hash_adp_bry.ci_ascii_()
private Hash_adp_bry tid_hash = Hash_adp_bry.ci_a7()
.Add_str_byte("id", Xop_xatr_itm.Key_tid_id)
.Add_str_byte("style", Xop_xatr_itm.Key_tid_style)
.Add_str_byte("role", Xop_xatr_itm.Key_tid_role)
;
private Btrie_slim_mgr key_trie = Btrie_slim_mgr.ci_ascii_(); // NOTE:ci.ascii:HTML.node_name
private Btrie_slim_mgr key_trie = Btrie_slim_mgr.ci_a7(); // NOTE:ci.ascii:HTML.node_name
public boolean Scrub_style(Xop_xatr_itm xatr, byte[] raw) { // REF:Sanitizer.php|checkCss; '! expression | filter\s*: | accelerator\s*: | url\s*\( !ix'; NOTE: this seems to affect MS IE only; DATE:2013-04-01
byte[] val_bry = xatr.Val_bry();
byte[] chk_bry; int chk_bgn, chk_end;
@@ -242,7 +243,7 @@ public class Xop_xatr_whitelist_mgr {
return Byte_ascii.Null;
}
static final byte Style_expression = 0, Style_filter = 1, Style_accelerator = 2, Style_url = 3, Style_urls = 4, Style_comment = 5, Style_image = 6, Style_image_set = 7;
private static Btrie_slim_mgr style_trie = Btrie_slim_mgr.ci_ascii_() // NOTE:ci.ascii:Javascript
private static Btrie_slim_mgr style_trie = Btrie_slim_mgr.ci_a7() // NOTE:ci.ascii:Javascript
.Add_str_byte("expression" , Style_expression)
.Add_str_byte("filter" , Style_filter)
.Add_str_byte("accelerator" , Style_accelerator)

View File

@@ -74,7 +74,7 @@ public class Xop_xnde_tag {
}
Xop_xnde_tag_lang lang = (Xop_xnde_tag_lang)langs.Get_by(langs_key);
if (lang == null) return null; // cur tag is a lang tag, but no tag for this lang; EX: "<trecho>" and cur_lang=de
return Bry_.Eq_ci_ascii(lang.Name_bry(), src, bgn, end)
return Bry_.Eq_ci_a7(lang.Name_bry(), src, bgn, end)
? lang
: null;
}

View File

@@ -54,8 +54,8 @@ public class Xop_xnde_tag_regy {
}
}
private Btrie_slim_mgr
tag_regy_wiki_main = Btrie_slim_mgr.ci_utf_8_() // NOTE:ci.utf8; he.s and <section> alias DATE:2014-07-18
, tag_regy_wiki_tmpl = Btrie_slim_mgr.ci_utf_8_()
, tag_regy_tmpl = Btrie_slim_mgr.ci_utf_8_()
tag_regy_wiki_main = Btrie_slim_mgr.ci_u8() // NOTE:ci.utf8; he.s and <section> alias DATE:2014-07-18
, tag_regy_wiki_tmpl = Btrie_slim_mgr.ci_u8()
, tag_regy_tmpl = Btrie_slim_mgr.ci_u8()
;
}

View File

@@ -555,15 +555,15 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
}
private int Find_xtn_end_tag(Xop_ctx ctx, byte[] src, int src_len, int open_end, byte[] close_bry, int tag_bgn) {
int tag_id = Bry_.Xto_int_or(src, tag_bgn, tag_bgn + 10, -1);
if (tag_id == -1) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not extract int: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
int tag_id = Bry_.To_int_or(src, tag_bgn, tag_bgn + 10, -1);
if (tag_id == -1) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not extract int: page=~{0}", ctx.Cur_page().Url().To_str()); return Bry_finder.Not_found;}
Bry_bfr tmp = ctx.Wiki().Utl__bfr_mkr().Get_b128();
tmp.Add(Pfunc_tag.Xtag_end_lhs).Add_int_pad_bgn(Byte_ascii.Num_0, 10, tag_id).Add(Pfunc_tag.Xtag_rhs);
byte[] tag_end = tmp.To_bry_and_rls();
int rv = Bry_finder.Find_fwd(src, tag_end, open_end + Pfunc_tag.Xtag_rhs.length);
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find end: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find end: page=~{0}", ctx.Cur_page().Url().To_str()); return Bry_finder.Not_found;}
rv = Bry_finder.Find_bwd(src, Byte_ascii.Lt, rv - 1);
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find <: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find <: page=~{0}", ctx.Cur_page().Url().To_str()); return Bry_finder.Not_found;}
return rv;
}
private int Make_xnde_xtn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, Xop_xnde_tag tag, int open_bgn, int open_end, int name_bgn, int name_end, int atrs_bgn, int atrs_end, Xop_xatr_itm[] atrs, boolean inline, boolean pre2_hack) {
@@ -702,7 +702,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
}
return xnde_end;
} private Btrie_slim_mgr xtn_end_tag_trie = Btrie_slim_mgr.ci_ascii_(); // NOTE:ci.ascii:MW_const.en; listed XML node names are en
} private Btrie_slim_mgr xtn_end_tag_trie = Btrie_slim_mgr.ci_a7(); // NOTE:ci.ascii:MW_const.en; listed XML node names are en
private Xop_xnde_tkn New_xnde_pair(Xop_ctx ctx, Xop_root_tkn root, Xop_tkn_mkr tkn_mkr, Xop_xnde_tag tag, int open_bgn, int open_end, int close_bgn, int close_end) {
Xop_xnde_tkn rv = tkn_mkr.Xnde(open_bgn, close_end).Tag_(tag).Tag_open_rng_(open_bgn, open_end).Tag_close_rng_(close_bgn, close_end).CloseMode_(Xop_xnde_tkn.CloseMode_pair);
int name_bgn = open_bgn + 1;

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_xnde_wkr__xatrs_tst {
private Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Inline() {
fxt.Test_parse_page_wiki("<ref cd=\"ef\" />" , fxt.tkn_xnde_(0, 15).Atrs_rng_(5, 13));
@@ -45,6 +45,9 @@ public class Xop_xnde_wkr__xatrs_tst {
fxt.Test_parse_page_wiki_str("<input value='a<'></input>", "<input value='a<'></input>"); // NOTE: do not call parse_page_all_str which will call Page.Clear and reset Restricted
fxt.Page().Html_data().Html_restricted_y_();
}
@Test public void Style__decode() { // PURPOSE: style values should be decoded; PAGE:en.w:Boron; DATE:2015-07-29
fxt.Test_parse_page_all_str("<span style='background:&#x23;ffc0c0'>a</span>", "<span style='background:#ffc0c0'>a</span>");
}
// @Test public void Unclosed() { // PURPOSE: unclosed atr should be treated as key, which should be ignored; PAGE:en.w:Palace of Versailles
// fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
// ( "<span id=\"1<>>a" // id="1<> -> key named 'id="1<>' which fails whitelist keys