1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2014-07-13 23:23:30 -04:00
parent ecbe2918d8
commit bc10cd76b6
316 changed files with 3251 additions and 1652 deletions

View File

@@ -22,8 +22,7 @@ public class Xop_xatr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_AT
private byte mode = Mode_atr_bgn;
private int atr_bgn = -1, key_bgn = -1, key_end = -1, eq_pos = -1, val_bgn = -1, val_end = -1; boolean valid = true;
private byte quote_byte = Byte_ascii.Nil;
private Hash_adp_bry xnde_hash = Hash_adp_bry.ci_().Add_bry_bry(Xop_xnde_tag_.Tag_nowiki.Name_bry()).Add_bry_bry(Xop_xnde_tag_.Tag_noinclude.Name_bry()).Add_bry_bry(Xop_xnde_tag_.Tag_includeonly.Name_bry()).Add_bry_bry(Xop_xnde_tag_.Tag_onlyinclude.Name_bry());
private Hash_adp_bry repeated_atrs_hash = Hash_adp_bry.ci_();
private Hash_adp_bry repeated_atrs_hash = Hash_adp_bry.ci_ascii_(); // ASCII:xnde_atrs
private Bry_bfr key_bfr = Bry_bfr.new_(), val_bfr = Bry_bfr.new_(); boolean key_bfr_on = false, val_bfr_on = false;
public Bry_obj_ref Bry_obj() {return bry_ref;} private Bry_obj_ref bry_ref = Bry_obj_ref.null_();
public int Xnde_find_gt_find(byte[] src, int pos, int end) {
@@ -386,6 +385,12 @@ public class Xop_xatr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_AT
}
repeated_atrs_hash.Add(key_bry, cur);
}
private static final Hash_adp_bry xnde_hash = Hash_adp_bry.ci_ascii_()
.Add_bry_bry(Xop_xnde_tag_.Tag_nowiki.Name_bry())
.Add_bry_bry(Xop_xnde_tag_.Tag_noinclude.Name_bry())
.Add_bry_bry(Xop_xnde_tag_.Tag_includeonly.Name_bry())
.Add_bry_bry(Xop_xnde_tag_.Tag_onlyinclude.Name_bry())
;
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "xatr_parser");
public static final Gfo_msg_itm
Log_invalid_atr = Gfo_msg_itm_.new_warn_(owner, "invalid_atr")

View File

@@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*;
public class Xop_xatr_whitelist_mgr {
public boolean Chk(int tag_id, byte[] src, Xop_xatr_itm xatr) {
byte[] key_bry = xatr.Key_bry();
@@ -31,7 +32,7 @@ public class Xop_xatr_whitelist_mgr {
chk_bgn = 0;
chk_end = key_bry.length;
}
Object o = key_trie.MatchAtCur(chk_bry, chk_bgn, chk_end);
Object o = key_trie.Match_bgn(chk_bry, chk_bgn, chk_end);
if (o == null) return false;// unknown atr_key; EX: <b unknown=1/>
Xop_xatr_whitelist_itm itm = (Xop_xatr_whitelist_itm)o;
byte itm_key_tid = itm.Key_tid();
@@ -147,10 +148,10 @@ public class Xop_xatr_whitelist_mgr {
len = keys.Count();
for (int i = 0; i < len; i++) {
byte[] key_bry = (byte[])keys.FetchAt(i);
Xop_xatr_whitelist_itm itm = (Xop_xatr_whitelist_itm)key_trie.MatchAtCurExact(key_bry, 0, key_bry.length);
Xop_xatr_whitelist_itm itm = (Xop_xatr_whitelist_itm)key_trie.Match_exact(key_bry, 0, key_bry.length);
if (itm == null) {
itm = Ini_key_trie_add(key_bry, true);
key_trie.Add(key_bry, itm);
key_trie.Add_obj(key_bry, itm);
}
itm.Tags()[tag_tid] = 1;
}
@@ -159,7 +160,7 @@ public class Xop_xatr_whitelist_mgr {
byte[] key_bry = Bry_.new_ascii_(key_str);
Ini_key_trie_add(key_bry, false);
Xop_xatr_whitelist_itm itm = Ini_key_trie_add(key_bry, false);
key_trie.Add(key_bry, itm);
key_trie.Add_obj(key_bry, itm);
int len = Xop_xnde_tag_._MaxLen;
for (int i = 0; i < len; i++)
itm.Tags()[i] = 1;
@@ -168,15 +169,15 @@ public class Xop_xatr_whitelist_mgr {
Object key_tid_obj = tid_hash.Fetch(key);
byte key_tid = key_tid_obj == null ? Xop_xatr_itm.Key_tid_generic : ((Byte_obj_val)key_tid_obj).Val();
Xop_xatr_whitelist_itm rv = new Xop_xatr_whitelist_itm(key, key_tid, exact);
key_trie.Add(key, rv);
key_trie.Add_obj(key, rv);
return rv;
}
private Hash_adp_bry tid_hash = Hash_adp_bry.ci_()
private Hash_adp_bry tid_hash = Hash_adp_bry.ci_ascii_()
.Add_str_byte("id", Xop_xatr_itm.Key_tid_id)
.Add_str_byte("style", Xop_xatr_itm.Key_tid_style)
.Add_str_byte("role", Xop_xatr_itm.Key_tid_role)
;
private ByteTrieMgr_slim key_trie = ByteTrieMgr_slim.ci_ascii_(); // NOTE:ci.ascii:HTML.node_name
private Btrie_slim_mgr key_trie = Btrie_slim_mgr.ci_ascii_(); // NOTE:ci.ascii:HTML.node_name
public boolean Scrub_style(Xop_xatr_itm xatr, byte[] raw) { // REF:Sanitizer.php|checkCss; '! expression | filter\s*: | accelerator\s*: | url\s*\( !ix'; NOTE: this seems to affect MS IE only; DATE:2013-04-01
byte[] val_bry = xatr.Val_bry();
byte[] chk_bry; int chk_bgn, chk_end;
@@ -193,7 +194,7 @@ public class Xop_xatr_whitelist_mgr {
}
int pos = chk_bgn;
while (pos < chk_end) {
Object o = style_trie.MatchAtCur(chk_bry, pos, chk_end);
Object o = style_trie.Match_bgn(chk_bry, pos, chk_end);
if (o == null)
++pos;
else {
@@ -240,7 +241,7 @@ public class Xop_xatr_whitelist_mgr {
return Byte_ascii.Nil;
}
static final byte Style_expression = 0, Style_filter = 1, Style_accelerator = 2, Style_url = 3, Style_urls = 4, Style_comment = 5, Style_image = 6, Style_image_set = 7;
private static ByteTrieMgr_slim style_trie = ByteTrieMgr_slim.ci_ascii_() // NOTE:ci.ascii:Javascript
private static Btrie_slim_mgr style_trie = Btrie_slim_mgr.ci_ascii_() // NOTE:ci.ascii:Javascript
.Add_str_byte("expression" , Style_expression)
.Add_str_byte("filter" , Style_filter)
.Add_str_byte("accelerator" , Style_accelerator)

View File

@@ -16,10 +16,11 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*;
class Xop_xnde_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_xnde;}
public void Init_by_wiki(Xow_wiki wiki, ByteTrieMgr_fast core_trie) {core_trie.Add(Byte_ascii.Lt, this);}
public void Init_by_lang(Xol_lang lang, ByteTrieMgr_fast core_trie) {}
public void Init_by_wiki(Xow_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Lt, this);}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Xnde().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
public static final Xop_xnde_lxr _ = new Xop_xnde_lxr(); Xop_xnde_lxr() {}
}

View File

@@ -16,8 +16,9 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*;
public class Xop_xnde_tag_regy {
public ByteTrieMgr_slim XndeNames(int i) {
public Btrie_slim_mgr XndeNames(int i) {
if (nild) {Init(); nild = false;}
switch (i) {
case Xop_parser_.Parse_tid_tmpl: return tagRegy_tmpl;
@@ -39,13 +40,13 @@ public class Xop_xnde_tag_regy {
rv.Add(itm);
return (Xop_xnde_tag[])rv.XtoAry(Xop_xnde_tag.class);
}
private void Init_reg(ByteTrieMgr_slim tagRegy, Xop_xnde_tag... ary) {
private void Init_reg(Btrie_slim_mgr tagRegy, Xop_xnde_tag... ary) {
for (Xop_xnde_tag tag : ary)
tagRegy.Add(tag.Name_bry(), tag);
tagRegy.Add_obj(tag.Name_bry(), tag);
}
private ByteTrieMgr_slim
tagRegy_wiki_main = ByteTrieMgr_slim.ci_ascii_() // NOTE:ci.ascii:MW_const.en; listed XML node names are en
, tagRegy_wiki_tmpl = ByteTrieMgr_slim.ci_ascii_()
, tagRegy_tmpl = ByteTrieMgr_slim.ci_ascii_()
private Btrie_slim_mgr
tagRegy_wiki_main = Btrie_slim_mgr.ci_ascii_() // NOTE:ci.ascii:MW_const.en; listed XML node names are en
, tagRegy_wiki_tmpl = Btrie_slim_mgr.ci_ascii_()
, tagRegy_tmpl = Btrie_slim_mgr.ci_ascii_()
;
}

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.xowa.apps.fsys.*; import gplx.xowa.wikis.*; import gplx.xowa.xtns.*; import gplx.html.*;
import gplx.core.btries.*; import gplx.xowa.apps.fsys.*; import gplx.xowa.wikis.*; import gplx.xowa.xtns.*; import gplx.html.*;
import gplx.xowa.parsers.logs.*;
public class Xop_xnde_wkr implements Xop_ctx_wkr {
public void Ctor_ctx(Xop_ctx ctx) {}
@@ -60,8 +60,8 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
cur_byt = src[cur_pos];
tag_is_closing = true;
}
ByteTrieMgr_slim tag_trie = ctx.App().Xnde_tag_regy().XndeNames(ctx.Xnde_names_tid());
Object tag_obj = tag_trie.Match(cur_byt, src, cur_pos, src_len); // NOTE:tag_obj can be null in wiki_tmpl mode; EX: "<ul" is not a valid tag in wiki_tmpl, but is valid in wiki_main
Btrie_slim_mgr tag_trie = ctx.App().Xnde_tag_regy().XndeNames(ctx.Xnde_names_tid());
Object tag_obj = tag_trie.Match_bgn_w_byte(cur_byt, src, cur_pos, src_len); // NOTE:tag_obj can be null in wiki_tmpl mode; EX: "<ul" is not a valid tag in wiki_tmpl, but is valid in wiki_main
int atrs_bgn_pos = tag_trie.Match_pos();
int tag_end_pos = atrs_bgn_pos - 1;
if (tag_obj != null) {
@@ -553,9 +553,9 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
return Find_xtn_end_tag(ctx, src, src_len, open_end, close_bry, tag_bgn + Pf_tag.Xtag_bgn);
else { // search rest of String for case-insensitive name; NOTE: used to do CS first, then fall-back on CI; DATE:2013-12-02
xtn_end_tag_trie.Clear();
xtn_end_tag_trie.Add(close_bry, close_bry);
xtn_end_tag_trie.Add_obj(close_bry, close_bry);
for (int i = open_end; i < src_len; i++) {
Object o = xtn_end_tag_trie.MatchAtCur(src, i, src_len);
Object o = xtn_end_tag_trie.Match_bgn(src, i, src_len);
if (o != null) {
return i;
}
@@ -565,14 +565,14 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
private int Find_xtn_end_tag(Xop_ctx ctx, byte[] src, int src_len, int open_end, byte[] close_bry, int tag_bgn) {
int tag_id = Bry_.X_to_int_or(src, tag_bgn, tag_bgn + 10, -1);
if (tag_id == -1) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not extract int: page=~{0}", ctx.Cur_page().Url().X_to_full_str_safe()); return Bry_finder.Not_found;}
if (tag_id == -1) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not extract int: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
Bry_bfr tmp = ctx.Wiki().Utl_bry_bfr_mkr().Get_b128();
tmp.Add(Pf_tag.Xtag_end_lhs).Add_int_pad_bgn(Byte_ascii.Num_0, 10, tag_id).Add(Pf_tag.Xtag_rhs);
byte[] tag_end = tmp.Mkr_rls().XtoAryAndClear();
int rv = Bry_finder.Find_fwd(src, tag_end, open_end + Pf_tag.Xtag_rhs.length);
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find end: page=~{0}", ctx.Cur_page().Url().X_to_full_str_safe()); return Bry_finder.Not_found;}
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find end: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
rv = Bry_finder.Find_bwd(src, Byte_ascii.Lt, rv - 1);
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find <: page=~{0}", ctx.Cur_page().Url().X_to_full_str_safe()); return Bry_finder.Not_found;}
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find <: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
return rv;
}
private int Make_xnde_xtn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, Xop_xnde_tag tag, int open_bgn, int open_end, int atrs_bgn, int atrs_end, Xop_xatr_itm[] atrs, boolean inline, boolean pre2_hack) {
@@ -693,7 +693,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
}
return xnde_end;
} private ByteTrieMgr_slim xtn_end_tag_trie = ByteTrieMgr_slim.ci_ascii_(); // NOTE:ci.ascii:MW_const.en; listed XML node names are en
} private Btrie_slim_mgr xtn_end_tag_trie = Btrie_slim_mgr.ci_ascii_(); // NOTE:ci.ascii:MW_const.en; listed XML node names are en
private Xop_xnde_tkn New_xnde_pair(Xop_ctx ctx, Xop_root_tkn root, Xop_tkn_mkr tkn_mkr, Xop_xnde_tag tag, int open_bgn, int open_end, int close_bgn, int close_end) {
Xop_xnde_tkn rv = tkn_mkr.Xnde(open_bgn, close_end).Tag_(tag).Tag_open_rng_(open_bgn, open_end).Tag_close_rng_(close_bgn, close_end).CloseMode_(Xop_xnde_tkn.CloseMode_pair);
int name_bgn = open_bgn + 1;