mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.10.3.1
This commit is contained in:
@@ -16,6 +16,31 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
public class Xoa_parser_mgr {
|
||||
public Xop_tkn_mkr Tkn_mkr() {return tkn_mkr;} private final Xop_tkn_mkr tkn_mkr = new Xop_tkn_mkr();
|
||||
import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.uniqs.*;
|
||||
public class Xoa_parser_mgr {
|
||||
private final Mwh_doc_wkr__atr_bldr atr_bldr = new Mwh_doc_wkr__atr_bldr();
|
||||
public Xop_tkn_mkr Tkn_mkr() {return tkn_mkr;} private final Xop_tkn_mkr tkn_mkr = new Xop_tkn_mkr();
|
||||
public Xop_uniq_mgr Core__uniq_mgr() {return core__uniq_mgr;} private final Xop_uniq_mgr core__uniq_mgr = new Xop_uniq_mgr();
|
||||
public Mwh_atr_parser Xnde__atr_parser() {return atr_parser;} private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
||||
public Mwh_atr_itm[] Xnde__parse_atrs(byte[] src, int src_bgn, int src_end) {
|
||||
// if (src_bgn < src_end) { // CHART
|
||||
// src = Bry_.Mid(src, src_bgn, src_end);
|
||||
// src = gplx.xowa.parsers.xndes.Xop_xnde_tkn.uniq_mgr.Parse(src);
|
||||
// src_bgn = 0;
|
||||
// src_end = src.length;
|
||||
// }
|
||||
atr_parser.Parse(atr_bldr, -1, -1, src, src_bgn, src_end);
|
||||
return atr_bldr.To_atr_ary();
|
||||
}
|
||||
public Mwh_atr_itm[] Xnde__parse_atrs_for_tblw(byte[] src, int src_bgn, int src_end) {
|
||||
// int angle_bgn_pos = Bry_find_.Find_fwd(src, Byte_ascii.Angle_bgn, src_bgn, src_end);
|
||||
// if (angle_bgn_pos != Bry_find_.Not_found) {
|
||||
// src = Bry_.Mid(src, src_bgn, src_end);
|
||||
// src = Bry_.Replace(src, Byte_ascii.Angle_bgn_bry, gplx.langs.htmls.Html_entity_.Lt_bry);
|
||||
// src_bgn = 0;
|
||||
// src_end = src.length;
|
||||
// }
|
||||
atr_parser.Parse(atr_bldr, -1, -1, src, src_bgn, src_end);
|
||||
return atr_bldr.To_atr_ary();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,10 +18,10 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.langs.*;
|
||||
import gplx.xowa.gui.*; import gplx.xowa.xtns.lst.*;
|
||||
import gplx.xowa.guis.*; import gplx.xowa.xtns.lst.*;
|
||||
import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.wdatas.*;
|
||||
import gplx.xowa.parsers.apos.*; import gplx.xowa.parsers.amps.*; import gplx.xowa.parsers.lnkes.*; import gplx.xowa.parsers.hdrs.*; import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.tblws.*; import gplx.xowa.parsers.paras.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.tmpls.*;
|
||||
import gplx.xowa.parsers.logs.*; import gplx.xowa.html.modules.popups.keeplists.*;
|
||||
import gplx.xowa.parsers.logs.*; import gplx.xowa.htmls.modules.popups.keeplists.*;
|
||||
public class Xop_ctx {
|
||||
private Xop_ctx_wkr[] wkrs = new Xop_ctx_wkr[] {};
|
||||
Xop_ctx(Xowe_wiki wiki, Xoae_page page) {
|
||||
@@ -31,9 +31,10 @@ public class Xop_ctx {
|
||||
for (Xop_ctx_wkr wkr : wkrs) wkr.Ctor_ctx(this);
|
||||
this.xnde_tag_regy = wiki.Mw_parser_mgr().Xnde_tag_regy();
|
||||
}
|
||||
// public boolean Scribunto; // CHART
|
||||
public Xoae_app App() {return app;} private final Xoae_app app;
|
||||
public Xowe_wiki Wiki() {return wiki;} private final Xowe_wiki wiki;
|
||||
public Xol_lang Lang() {return lang;} private final Xol_lang lang;
|
||||
public Xol_lang_itm Lang() {return lang;} private final Xol_lang_itm lang;
|
||||
public Xop_tkn_mkr Tkn_mkr() {return tkn_mkr;} private final Xop_tkn_mkr tkn_mkr;
|
||||
public Xoae_page Cur_page() {return cur_page;} public void Cur_page_(Xoae_page v) {cur_page = v;} private Xoae_page cur_page;
|
||||
public byte Parse_tid() {return parse_tid;} public Xop_ctx Parse_tid_(byte v) {parse_tid = v; xnde_names_tid = v; return this;} private byte parse_tid = Xop_parser_.Parse_tid_null;
|
||||
@@ -56,7 +57,7 @@ public class Xop_ctx {
|
||||
public Xop_keeplist_wiki Tmpl_keeplist() {return tmpl_keeplist;} public void Tmpl_keeplist_(Xop_keeplist_wiki v) {this.tmpl_keeplist = v;} private Xop_keeplist_wiki tmpl_keeplist;
|
||||
public boolean Tmpl_args_parsing() {return tmpl_args_parsing;} public Xop_ctx Tmpl_args_parsing_(boolean v) {tmpl_args_parsing = v; return this;} private boolean tmpl_args_parsing;
|
||||
public Bry_bfr Tmpl_output() {return tmpl_output;} public Xop_ctx Tmpl_output_(Bry_bfr v) {tmpl_output = v; return this;} private Bry_bfr tmpl_output; // OBSOLETE: after tmpl_prepend_nl rewrite; DATE:2014-08-21
|
||||
public Xot_defn_trace Defn_trace() {return defn_trace;} public Xop_ctx Defn_trace_(Xot_defn_trace v) {defn_trace = v; return this;} private Xot_defn_trace defn_trace = Xot_defn_trace_null._;
|
||||
public Xot_defn_trace Defn_trace() {return defn_trace;} public Xop_ctx Defn_trace_(Xot_defn_trace v) {defn_trace = v; return this;} private Xot_defn_trace defn_trace = Xot_defn_trace_null.Instance;
|
||||
public boolean Only_include_evaluate() {return only_include_evaluate;} public Xop_ctx Only_include_evaluate_(boolean v) {only_include_evaluate = v; return this;} private boolean only_include_evaluate;
|
||||
public Lst_section_nde_mgr Lst_section_mgr() {if (lst_section_mgr == null) lst_section_mgr = new Lst_section_nde_mgr(); return lst_section_mgr;} private Lst_section_nde_mgr lst_section_mgr;
|
||||
public Hash_adp_bry Lst_page_regy() {return lst_page_regy;} private Hash_adp_bry lst_page_regy;
|
||||
|
||||
@@ -20,7 +20,7 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public interface Xop_lxr {
|
||||
int Lxr_tid();
|
||||
void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie);
|
||||
void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie);
|
||||
void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie);
|
||||
void Term(Btrie_fast_mgr core_trie);
|
||||
int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos);
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ public class Xop_lxr_mgr {
|
||||
lxr.Init_by_wiki(wiki, trie);
|
||||
}
|
||||
}
|
||||
public void Init_by_lang(Xol_lang lang) {
|
||||
public void Init_by_lang(Xol_lang_itm lang) {
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
Xop_lxr lxr = ary[i];
|
||||
@@ -55,53 +55,53 @@ public class Xop_lxr_mgr {
|
||||
}
|
||||
public static Xop_lxr_mgr new_tmpl_() {
|
||||
return new Xop_lxr_mgr(new Xop_lxr[]
|
||||
{ Xop_pipe_lxr._, new Xop_eq_lxr(true), Xop_colon_lxr._, Xop_space_lxr._, Xop_tab_lxr._, Xop_nl_lxr._
|
||||
, Xop_curly_bgn_lxr._, Xop_curly_end_lxr._
|
||||
, Xop_brack_bgn_lxr._, Xop_brack_end_lxr._
|
||||
, Xop_comm_lxr._
|
||||
, Xop_xnde_lxr._ // needed for xtn, noinclude, etc.
|
||||
, Xop_under_lxr._
|
||||
, gplx.xowa.xtns.translates.Xop_tvar_lxr._
|
||||
, Xop_cr_lxr._ // always ignore \r; DATE:2014-03-02
|
||||
{ Xop_pipe_lxr.Instance, new Xop_eq_lxr(true), Xop_colon_lxr.Instance, Xop_space_lxr.Instance, Xop_tab_lxr.Instance, Xop_nl_lxr.Instance
|
||||
, Xop_curly_bgn_lxr.Instance, Xop_curly_end_lxr.Instance
|
||||
, Xop_brack_bgn_lxr.Instance, Xop_brack_end_lxr.Instance
|
||||
, Xop_comm_lxr.Instance
|
||||
, Xop_xnde_lxr.Instance // needed for xtn, noinclude, etc.
|
||||
, Xop_under_lxr.Instance
|
||||
, gplx.xowa.xtns.translates.Xop_tvar_lxr.Instance
|
||||
, Xop_cr_lxr.Instance // always ignore \r; DATE:2014-03-02
|
||||
});
|
||||
}
|
||||
public static Xop_lxr_mgr new_wiki_() {
|
||||
return new Xop_lxr_mgr(new Xop_lxr[]
|
||||
{ Xop_pipe_lxr._, new Xop_eq_lxr(false), Xop_space_lxr._, Xop_tab_lxr._, Xop_nl_lxr._
|
||||
, Xop_amp_lxr._, Xop_apos_lxr._, Xop_colon_lxr._
|
||||
, Xop_lnki_lxr_bgn._, Xop_lnki_lxr_end._
|
||||
, Xop_list_lxr._
|
||||
, Xop_hdr_lxr._
|
||||
, Xop_hr_lxr._
|
||||
, Xop_xnde_lxr._
|
||||
, Xop_lnke_lxr._, Xop_lnke_end_lxr._
|
||||
, Xop_tblw_lxr._
|
||||
, Xop_pre_lxr._, Xop_nl_tab_lxr._
|
||||
, Xop_comm_lxr._
|
||||
, Xop_under_lxr._
|
||||
{ Xop_pipe_lxr.Instance, new Xop_eq_lxr(false), Xop_space_lxr.Instance, Xop_tab_lxr.Instance, Xop_nl_lxr.Instance
|
||||
, Xop_amp_lxr.Instance, Xop_apos_lxr.Instance, Xop_colon_lxr.Instance
|
||||
, Xop_lnki_lxr_bgn.Instance, Xop_lnki_lxr_end.Instance
|
||||
, Xop_list_lxr.Instance
|
||||
, Xop_hdr_lxr.Instance
|
||||
, Xop_hr_lxr.Instance
|
||||
, Xop_xnde_lxr.Instance
|
||||
, Xop_lnke_lxr.Instance, Xop_lnke_end_lxr.Instance
|
||||
, Xop_tblw_lxr.Instance
|
||||
, Xop_pre_lxr.Instance, Xop_nl_tab_lxr.Instance
|
||||
, Xop_comm_lxr.Instance
|
||||
, Xop_under_lxr.Instance
|
||||
});
|
||||
}
|
||||
public static Xop_lxr_mgr new_anchor_encoder() {
|
||||
return new Xop_lxr_mgr(new Xop_lxr[]
|
||||
{ Xop_pipe_lxr._, new Xop_eq_lxr(false), Xop_space_lxr._, Xop_tab_lxr._, Xop_nl_lxr._
|
||||
, Xop_curly_bgn_lxr._, Xop_curly_end_lxr._
|
||||
, Xop_amp_lxr._, Xop_colon_lxr._
|
||||
, Xop_apos_lxr._
|
||||
, Xop_lnki_lxr_bgn._, Xop_lnki_lxr_end._
|
||||
, Xop_lnke_lxr._, Xop_lnke_end_lxr._
|
||||
, Xop_xnde_lxr._
|
||||
{ Xop_pipe_lxr.Instance, new Xop_eq_lxr(false), Xop_space_lxr.Instance, Xop_tab_lxr.Instance, Xop_nl_lxr.Instance
|
||||
, Xop_curly_bgn_lxr.Instance, Xop_curly_end_lxr.Instance
|
||||
, Xop_amp_lxr.Instance, Xop_colon_lxr.Instance
|
||||
, Xop_apos_lxr.Instance
|
||||
, Xop_lnki_lxr_bgn.Instance, Xop_lnki_lxr_end.Instance
|
||||
, Xop_lnke_lxr.Instance, Xop_lnke_end_lxr.Instance
|
||||
, Xop_xnde_lxr.Instance
|
||||
});
|
||||
}
|
||||
public static final Xop_lxr_mgr Popup_lxr_mgr // same as orig_page, except apos_lxr added
|
||||
= new Xop_lxr_mgr(new Xop_lxr[]
|
||||
{ Xop_pipe_lxr._, new Xop_eq_lxr(true), Xop_colon_lxr._, Xop_space_lxr._, Xop_tab_lxr._, Xop_nl_lxr._
|
||||
, Xop_curly_bgn_lxr._, Xop_curly_end_lxr._
|
||||
, Xop_brack_bgn_lxr._, Xop_brack_end_lxr._
|
||||
, Xop_comm_lxr._
|
||||
, Xop_xnde_lxr._ // needed for xtn, noinclude, etc.
|
||||
, Xop_under_lxr._
|
||||
, gplx.xowa.xtns.translates.Xop_tvar_lxr._
|
||||
, Xop_cr_lxr._ // always ignore \r; DATE:2014-03-02
|
||||
, gplx.xowa.parsers.apos.Xop_apos_lxr._ // needed else multiple apos may be split across blocks;
|
||||
{ Xop_pipe_lxr.Instance, new Xop_eq_lxr(true), Xop_colon_lxr.Instance, Xop_space_lxr.Instance, Xop_tab_lxr.Instance, Xop_nl_lxr.Instance
|
||||
, Xop_curly_bgn_lxr.Instance, Xop_curly_end_lxr.Instance
|
||||
, Xop_brack_bgn_lxr.Instance, Xop_brack_end_lxr.Instance
|
||||
, Xop_comm_lxr.Instance
|
||||
, Xop_xnde_lxr.Instance // needed for xtn, noinclude, etc.
|
||||
, Xop_under_lxr.Instance
|
||||
, gplx.xowa.xtns.translates.Xop_tvar_lxr.Instance
|
||||
, Xop_cr_lxr.Instance // always ignore \r; DATE:2014-03-02
|
||||
, gplx.xowa.parsers.apos.Xop_apos_lxr.Instance // needed else multiple apos may be split across blocks;
|
||||
});
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.nss.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.tmpls.*;
|
||||
public class Xop_parser { // NOTE: parsers are reused; do not keep any read-write state
|
||||
private final Xowe_wiki wiki;
|
||||
@@ -33,7 +33,7 @@ public class Xop_parser { // NOTE: parsers are reused; do not keep any read-writ
|
||||
tmpl_lxr_mgr.Init_by_wiki(wiki);
|
||||
wtxt_lxr_mgr.Init_by_wiki(wiki);
|
||||
}
|
||||
public void Init_by_lang(Xol_lang lang) {
|
||||
public void Init_by_lang(Xol_lang_itm lang) {
|
||||
tmpl_lxr_mgr.Init_by_lang(lang);
|
||||
wtxt_lxr_mgr.Init_by_lang(lang);
|
||||
}
|
||||
@@ -101,7 +101,7 @@ public class Xop_parser { // NOTE: parsers are reused; do not keep any read-writ
|
||||
int subs_len = root.Subs_len();
|
||||
for (int i = 0; i < subs_len; i++)
|
||||
root.Subs_get(i).Tmpl_compile(ctx, src, tmpl_props);
|
||||
return Xot_tmpl_wtr._.Write_all(ctx, root, src); // NOTE: generate new src since most callers will use it;
|
||||
return Xot_tmpl_wtr.Instance.Write_all(ctx, root, src); // NOTE: generate new src since most callers will use it;
|
||||
}
|
||||
public void Parse_wtxt_to_wdom(Xop_root_tkn root, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, byte[] wtxt, int doc_bgn_pos) {
|
||||
root.Root_src_(wtxt); // always set latest src; needed for Parse_all wherein src will first be raw and then parsed tmpl
|
||||
@@ -112,6 +112,7 @@ public class Xop_parser { // NOTE: parsers are reused; do not keep any read-writ
|
||||
byte parse_tid_old = ctx.Parse_tid(); // NOTE: must store parse_tid b/c ctx can be reused by other classes
|
||||
ctx.Parse_tid_(parse_type);
|
||||
ctx.Page_bgn(root, src);
|
||||
ctx.App().Parser_mgr().Core__uniq_mgr().Clear();
|
||||
Parse_to_src_end(root, ctx, tkn_mkr, src, trie, doc_bgn_pos, len);
|
||||
ctx.Page_end(root, src, len);
|
||||
ctx.Parse_tid_(parse_tid_old);
|
||||
|
||||
@@ -53,6 +53,6 @@ class Xop_parser__fxt {
|
||||
public void Test_parse_to_html(String raw, boolean para_enabled, String expd) {
|
||||
byte[] raw_bry = Bry_.new_u8(raw);
|
||||
fxt.Wiki().Parser_mgr().Main().Parse_text_to_html(bfr, fxt.Page(), para_enabled, raw_bry);
|
||||
Tfds.Eq(expd, bfr.Xto_str_and_clear());
|
||||
Tfds.Eq(expd, bfr.To_str_and_clear());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,7 +76,7 @@ public class Xop_tkn_mkr {
|
||||
public gplx.xowa.xtns.lst.Lst_section_nde Xnde_section() {return new gplx.xowa.xtns.lst.Lst_section_nde();}
|
||||
public gplx.xowa.xtns.categoryList.Xtn_categorylist_nde Xnde_categoryList() {return new gplx.xowa.xtns.categoryList.Xtn_categorylist_nde();}
|
||||
public gplx.xowa.xtns.dynamicPageList.Dpl_xnde Xnde_dynamicPageList() {return new gplx.xowa.xtns.dynamicPageList.Dpl_xnde();}
|
||||
public gplx.xowa.xtns.syntaxHighlight.Xtn_syntaxHighlight_nde Xnde_syntaxHighlight() {return new gplx.xowa.xtns.syntaxHighlight.Xtn_syntaxHighlight_nde();}
|
||||
public gplx.xowa.xtns.syntax_highlights.Synh_xtn_nde Xnde_syntaxHighlight() {return new gplx.xowa.xtns.syntax_highlights.Synh_xtn_nde();}
|
||||
public gplx.xowa.xtns.templateData.Xtn_templateData_nde Xnde_templateData() {return new gplx.xowa.xtns.templateData.Xtn_templateData_nde();}
|
||||
public gplx.xowa.xtns.rss.Rss_xnde Xnde_rss() {return new gplx.xowa.xtns.rss.Rss_xnde();}
|
||||
public gplx.xowa.xtns.quiz.Quiz_xnde Xnde_quiz() {return new gplx.xowa.xtns.quiz.Quiz_xnde();}
|
||||
@@ -87,10 +87,7 @@ public class Xop_tkn_mkr {
|
||||
public gplx.xowa.xtns.inputBox.Xtn_inputbox_nde Xnde_inputbox() {return new gplx.xowa.xtns.inputBox.Xtn_inputbox_nde();}
|
||||
public gplx.xowa.xtns.translates.Xop_translate_xnde Xnde_translate() {return new gplx.xowa.xtns.translates.Xop_translate_xnde();}
|
||||
public gplx.xowa.xtns.translates.Xop_languages_xnde Xnde_languages() {return new gplx.xowa.xtns.translates.Xop_languages_xnde();}
|
||||
public gplx.xowa.xtns.translates.Xop_tvar_tkn Tvar(int tkn_bgn, int tkn_end, int key_bgn, int key_end, int txt_bgn, int txt_end, byte[] wikitext)
|
||||
{return new gplx.xowa.xtns.translates.Xop_tvar_tkn(tkn_bgn, tkn_end, key_bgn, key_end, txt_bgn, txt_end, wikitext);}
|
||||
public Xop_vnt_tkn Vnt(int bgn_lhs, int bgn_rhs) {return new Xop_vnt_tkn(bgn_lhs, bgn_rhs);}
|
||||
public Xop_vnt_eqgt_tkn Vnt_eqgt(int bgn, int end) {return new Xop_vnt_eqgt_tkn(bgn, end);}
|
||||
public gplx.xowa.xtns.translates.Xop_tvar_tkn Tvar(int tkn_bgn, int tkn_end, int key_bgn, int key_end, int txt_bgn, int txt_end, byte[] wikitext) {return new gplx.xowa.xtns.translates.Xop_tvar_tkn(tkn_bgn, tkn_end, key_bgn, key_end, txt_bgn, txt_end, wikitext);}
|
||||
// public void Clear() {
|
||||
// space_tkns_len = txt_tkns_len = 0;
|
||||
// }
|
||||
|
||||
@@ -20,10 +20,10 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_amp_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_amp;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Amp, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
return ctx.Amp().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);
|
||||
}
|
||||
public static final Xop_amp_lxr _ = new Xop_amp_lxr();
|
||||
public static final Xop_amp_lxr Instance = new Xop_amp_lxr();
|
||||
}
|
||||
|
||||
@@ -18,58 +18,63 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xop_amp_mgr {
|
||||
private final Object thread_lock_1 = new Object(), thread_lock_2 = new Object();
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr.reset_(32);
|
||||
public Btrie_slim_mgr Amp_trie() {return amp_trie;} private final Btrie_slim_mgr amp_trie = Xop_amp_trie._;
|
||||
public Btrie_slim_mgr Amp_trie() {return amp_trie;} private final Btrie_slim_mgr amp_trie = Xop_amp_trie.Instance;
|
||||
public int Rslt_pos() {return rslt_pos;} private int rslt_pos;
|
||||
public int Rslt_val() {return rslt_val;} private int rslt_val;
|
||||
public Xop_tkn_itm Parse_as_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, int amp_pos, int cur_pos) {
|
||||
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
Object o = amp_trie.Match_bgn(src, cur_pos, src_len);
|
||||
cur_pos = amp_trie.Match_pos();
|
||||
if (o == null) return null;
|
||||
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
|
||||
switch (itm.Tid()) {
|
||||
case Xop_amp_trie_itm.Tid_name_std:
|
||||
case Xop_amp_trie_itm.Tid_name_xowa:
|
||||
rslt_pos = cur_pos;
|
||||
return tkn_mkr.Amp_txt(amp_pos, cur_pos, itm);
|
||||
case Xop_amp_trie_itm.Tid_num_hex:
|
||||
case Xop_amp_trie_itm.Tid_num_dec:
|
||||
boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
|
||||
boolean pass = Parse_as_int(ncr_is_hex, src, src_len, amp_pos, cur_pos);
|
||||
return pass ? tkn_mkr.Amp_num(amp_pos, rslt_pos, rslt_val) : null;
|
||||
default: throw Err_.new_unhandled(itm.Tid());
|
||||
synchronized (thread_lock_1) {
|
||||
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
Object o = amp_trie.Match_bgn(src, cur_pos, src_len);
|
||||
cur_pos = amp_trie.Match_pos();
|
||||
if (o == null) return null;
|
||||
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
|
||||
switch (itm.Tid()) {
|
||||
case Xop_amp_trie_itm.Tid_name_std:
|
||||
case Xop_amp_trie_itm.Tid_name_xowa:
|
||||
rslt_pos = cur_pos;
|
||||
return tkn_mkr.Amp_txt(amp_pos, cur_pos, itm);
|
||||
case Xop_amp_trie_itm.Tid_num_hex:
|
||||
case Xop_amp_trie_itm.Tid_num_dec:
|
||||
boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
|
||||
boolean pass = Parse_as_int(ncr_is_hex, src, src_len, amp_pos, cur_pos);
|
||||
return pass ? tkn_mkr.Amp_num(amp_pos, rslt_pos, rslt_val) : null;
|
||||
default: throw Err_.new_unhandled(itm.Tid());
|
||||
}
|
||||
}
|
||||
}
|
||||
public boolean Parse_as_int(boolean ncr_is_hex, byte[] src, int src_len, int amp_pos, int int_bgn) {
|
||||
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
rslt_val = -1; // clear any previous setting
|
||||
int cur_pos = int_bgn, int_end = -1;
|
||||
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, cur_pos, src_len);
|
||||
if (semic_pos == Bry_find_.Not_found) return false;
|
||||
int_end = semic_pos - 1; // int_end = pos before semicolon
|
||||
int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
|
||||
for (int i = int_end; i >= int_bgn; i--) {
|
||||
byte b = src[i];
|
||||
if (ncr_is_hex) {
|
||||
if (b >= 48 && b <= 57) cur = b - 48;
|
||||
else if (b >= 65 && b <= 70) cur = b - 55;
|
||||
else if (b >= 97 && b <= 102) cur = b - 87;
|
||||
else if((b >= 71 && b <= 90)
|
||||
|| (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
|
||||
else return false;
|
||||
synchronized (thread_lock_2) {
|
||||
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
rslt_val = -1; // clear any previous setting
|
||||
int cur_pos = int_bgn, int_end = -1;
|
||||
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, cur_pos, src_len);
|
||||
if (semic_pos == Bry_find_.Not_found) return false;
|
||||
int_end = semic_pos - 1; // int_end = pos before semicolon
|
||||
int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
|
||||
for (int i = int_end; i >= int_bgn; i--) {
|
||||
byte b = src[i];
|
||||
if (ncr_is_hex) {
|
||||
if (b >= 48 && b <= 57) cur = b - 48;
|
||||
else if (b >= 65 && b <= 70) cur = b - 55;
|
||||
else if (b >= 97 && b <= 102) cur = b - 87;
|
||||
else if((b >= 71 && b <= 90)
|
||||
|| (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
|
||||
else return false;
|
||||
}
|
||||
else {
|
||||
cur = b - Byte_ascii.Num_0;
|
||||
if (cur < 0 || cur > 10) return false;
|
||||
}
|
||||
val += cur * factor;
|
||||
if (val > gplx.core.intls.Utf8_.Codepoint_max) return false; // fail if value > largest_unicode_codepoint
|
||||
factor *= multiple;
|
||||
}
|
||||
else {
|
||||
cur = b - Byte_ascii.Num_0;
|
||||
if (cur < 0 || cur > 10) return false;
|
||||
}
|
||||
val += cur * factor;
|
||||
if (val > gplx.core.intls.Utf8_.Codepoint_max) return false; // fail if value > largest_unicode_codepoint
|
||||
factor *= multiple;
|
||||
rslt_val = val;
|
||||
rslt_pos = semic_pos + 1; // position after semic
|
||||
return true;
|
||||
}
|
||||
rslt_val = val;
|
||||
rslt_pos = semic_pos + 1; // position after semic
|
||||
return true;
|
||||
}
|
||||
public byte[] Decode_as_bry(byte[] src) {
|
||||
if (src == null) return src;
|
||||
@@ -117,8 +122,8 @@ public class Xop_amp_mgr {
|
||||
tmp_bfr.Add_byte(b);
|
||||
++pos;
|
||||
}
|
||||
return dirty ? tmp_bfr.Xto_bry_and_clear() : src;
|
||||
return dirty ? tmp_bfr.To_bry_and_clear() : src;
|
||||
}
|
||||
}
|
||||
public static final Xop_amp_mgr I = new Xop_amp_mgr(); Xop_amp_mgr() {}
|
||||
public static final Xop_amp_mgr Instance = new Xop_amp_mgr(); Xop_amp_mgr() {}
|
||||
}
|
||||
|
||||
@@ -33,10 +33,10 @@ public class Xop_amp_mgr_decode_tst {
|
||||
@Test public void Hex_zero_padded() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_upper_x() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Num_fail_large_codepoint() {fxt.Test_decode_as_bry("�" , "�");}
|
||||
@Test public void Num_ignore_extra_x() {fxt.Test_decode_as_bry("&#xx26D0;" , Char_.To_str(Char_.XbyInt(9936)));} // 2nd x is ignored
|
||||
@Test public void Num_ignore_extra_x() {fxt.Test_decode_as_bry("&#xx26D0;" , Char_.To_str(Char_.By_int(9936)));} // 2nd x is ignored
|
||||
}
|
||||
class Xop_amp_mgr_fxt {
|
||||
private Xop_amp_mgr amp_mgr = Xop_amp_mgr.I;
|
||||
private Xop_amp_mgr amp_mgr = Xop_amp_mgr.Instance;
|
||||
public void Reset() {}
|
||||
public void Test_decode_as_bry(String raw, String expd) {
|
||||
Tfds.Eq(expd, String_.new_u8(amp_mgr.Decode_as_bry(Bry_.new_u8(raw))));
|
||||
|
||||
@@ -31,7 +31,7 @@ public class Xop_amp_trie {
|
||||
, Bry_xowa_nl = Bry_.new_a7("&xowa_nl;")
|
||||
, Bry_xowa_dash = Bry_.new_a7("&xowa_dash;")
|
||||
;
|
||||
public static final Btrie_slim_mgr _ = new_(); Xop_amp_trie() {}
|
||||
public static final Btrie_slim_mgr Instance = new_(); Xop_amp_trie() {}
|
||||
private static Btrie_slim_mgr new_() {// REF.MW: Sanitizer|$wgHtmlEntities; NOTE:added apos
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
|
||||
Reg_name(rv, Bool_.Y, 60, Bry_xowa_lt);
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.langs.htmls.*; import gplx.xowa.html.lnkis.*;
|
||||
import gplx.langs.htmls.*; import gplx.xowa.htmls.lnkis.*;
|
||||
public class Xop_amp_trie_itm {
|
||||
public Xop_amp_trie_itm(byte tid, int char_int, byte[] xml_name_bry) {
|
||||
this.tid = tid;
|
||||
|
||||
@@ -20,8 +20,8 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_apos_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_apos;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Apos_ary, this);} private static final byte[] Apos_ary = new byte[] {Byte_ascii.Apos, Byte_ascii.Apos};
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Apos().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_apos_lxr _ = new Xop_apos_lxr(); Xop_apos_lxr() {}
|
||||
public static final Xop_apos_lxr Instance = new Xop_apos_lxr(); Xop_apos_lxr() {}
|
||||
}
|
||||
|
||||
@@ -20,9 +20,9 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_hdr_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_hdr;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Hook_bgn, this);} static final byte[] Hook_bgn = new byte[] {Byte_ascii.Nl, Byte_ascii.Eq};
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Hdr().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_hdr_lxr _ = new Xop_hdr_lxr(); Xop_hdr_lxr() {}
|
||||
public static final Xop_hdr_lxr Instance = new Xop_hdr_lxr(); Xop_hdr_lxr() {}
|
||||
public static final byte Hook = Byte_ascii.Eq;
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ public class Mwh_atr_itm {
|
||||
public boolean Valid() {return valid;} private final boolean valid;
|
||||
public boolean Key_exists() {return key_exists;} private final boolean key_exists;
|
||||
public boolean Repeated() {return repeated;} private final boolean repeated;
|
||||
public boolean Invalid() {return repeated || !valid;}
|
||||
public int Atr_bgn() {return atr_bgn;} private int atr_bgn;
|
||||
public int Atr_end() {return atr_end;} private int atr_end;
|
||||
public int Key_bgn() {return key_bgn;} private final int key_bgn;
|
||||
@@ -45,42 +46,21 @@ public class Mwh_atr_itm {
|
||||
public byte[] Val_bry() {return val_bry;} private byte[] val_bry;
|
||||
public int Eql_pos() {return eql_pos;} private final int eql_pos;
|
||||
public int Qte_tid() {return qte_tid;} private final int qte_tid;
|
||||
public byte Qte_byte() {
|
||||
switch (qte_tid) {
|
||||
case Mwh_atr_itm_.Qte_tid__none: return Byte_ascii.Null;
|
||||
case Mwh_atr_itm_.Qte_tid__apos: return Byte_ascii.Apos;
|
||||
case Mwh_atr_itm_.Qte_tid__qute: return Byte_ascii.Quote;
|
||||
default: throw Err_.new_unhandled(qte_tid);
|
||||
}
|
||||
}
|
||||
public Mwh_atr_itm Atr_rng(int bgn, int end) {this.atr_bgn = bgn; this.atr_end = end; return this;}
|
||||
public void Key_bry_(byte[] v) {this.key_bry = v;}
|
||||
public void Val_bry_(byte[] v) {this.val_bry = v;}
|
||||
public String Val_as_str() {return String_.new_u8(Val_as_bry());}
|
||||
public byte[] Val_as_bry() {if (val_bry == null) val_bry = Bry_.Mid(src, val_bgn, val_end); return val_bry;} // NOTE: val_bry is cached
|
||||
public byte[] Val_as_bry__blank_to_null() {byte[] rv = Val_as_bry(); return Bry_.Len_eq_0(rv) ? null : rv;}
|
||||
public int Val_as_int_or(int or) {return val_bry == null ? Bry_.To_int_or__lax(src, val_bgn, val_end, or) : Bry_.To_int_or(val_bry, or);}
|
||||
public boolean Val_as_bool_by_int() {return Val_as_int_or(0) == 1;}
|
||||
public boolean Val_as_bool() {return Bry_.Eq(Bry_.Lcase__all(Val_as_bry()), Bool_.True_bry);}
|
||||
public static final Mwh_atr_itm[] Ary_empty = new Mwh_atr_itm[0];
|
||||
public static final int Atr_tid__invalid = 1, Atr_tid__repeat = 2, Atr_tid__pair = 4, Atr_tid__name = 8; // NOTE: id order is important; see above;
|
||||
public static final int Qte_tid__none = 0, Qte_tid__apos = 1, Qte_tid__qute = 2;
|
||||
public static final int Mask__qte__none = 0, Mask__qte__apos = 1, Mask__qte_qute = 2;
|
||||
public static final int
|
||||
Mask__valid = 8
|
||||
, Mask__repeated = 16
|
||||
, Mask__key_exists = 32
|
||||
, Mask__val_made = 64
|
||||
;
|
||||
public static final boolean Mask__valid__n = false, Mask__valid__y = true;
|
||||
public static final boolean Mask__key_exists__n = false, Mask__key_exists__y = true;
|
||||
public static final boolean Mask__repeated__n = false, Mask__repeated__y = true;
|
||||
public static final boolean Mask__val_made__n = false, Mask__val_made__y = true;
|
||||
public static int Calc_atr_utl(int qte_tid, boolean valid, boolean repeated, boolean key_exists, boolean val_made) {
|
||||
int rv = qte_tid;
|
||||
if (valid) rv |= Mwh_atr_itm.Mask__valid;
|
||||
if (repeated) rv |= Mwh_atr_itm.Mask__repeated;
|
||||
if (key_exists) rv |= Mwh_atr_itm.Mask__key_exists;
|
||||
if (val_made) rv |= Mwh_atr_itm.Mask__val_made;
|
||||
return rv;
|
||||
}
|
||||
public static int Calc_qte_tid(int val) {
|
||||
return val & ((1 << 3) - 1);
|
||||
}
|
||||
public static byte Calc_qte_byte(int[] data_ary, int idx) {
|
||||
int val = data_ary[idx + Mwh_atr_mgr.Idx_atr_utl];
|
||||
int qte_tid = (val & ((1 << 3) - 1));
|
||||
return qte_tid == Qte_tid__apos ? Byte_ascii.Apos : Byte_ascii.Quote;
|
||||
}
|
||||
// public static final byte Key_tid_generic = 0, Key_tid_id = 1, Key_tid_style = 2, Key_tid_role = 3;
|
||||
}
|
||||
|
||||
51
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_itm_.java
Normal file
51
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_itm_.java
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Mwh_atr_itm_ {
|
||||
public static final Mwh_atr_itm[] Ary_empty = new Mwh_atr_itm[0];
|
||||
public static final int Atr_tid__invalid = 1, Atr_tid__repeat = 2, Atr_tid__pair = 4, Atr_tid__name = 8; // NOTE: id order is important; see above;
|
||||
public static final int Qte_tid__none = 0, Qte_tid__apos = 1, Qte_tid__qute = 2;
|
||||
public static final int Mask__qte__none = 0, Mask__qte__apos = 1, Mask__qte_qute = 2;
|
||||
public static final int
|
||||
Mask__valid = 8
|
||||
, Mask__repeated = 16
|
||||
, Mask__key_exists = 32
|
||||
, Mask__val_made = 64
|
||||
;
|
||||
public static final boolean Mask__valid__n = false, Mask__valid__y = true;
|
||||
public static final boolean Mask__key_exists__n = false, Mask__key_exists__y = true;
|
||||
public static final boolean Mask__repeated__n = false, Mask__repeated__y = true;
|
||||
public static final boolean Mask__val_made__n = false, Mask__val_made__y = true;
|
||||
public static int Calc_atr_utl(int qte_tid, boolean valid, boolean repeated, boolean key_exists, boolean val_made) {
|
||||
int rv = qte_tid;
|
||||
if (valid) rv |= Mwh_atr_itm_.Mask__valid;
|
||||
if (repeated) rv |= Mwh_atr_itm_.Mask__repeated;
|
||||
if (key_exists) rv |= Mwh_atr_itm_.Mask__key_exists;
|
||||
if (val_made) rv |= Mwh_atr_itm_.Mask__val_made;
|
||||
return rv;
|
||||
}
|
||||
public static int Calc_qte_tid(int val) {
|
||||
return val & ((1 << 3) - 1);
|
||||
}
|
||||
public static byte Calc_qte_byte(int[] data_ary, int idx) {
|
||||
int val = data_ary[idx + Mwh_atr_mgr.Idx_atr_utl];
|
||||
int qte_tid = (val & ((1 << 3) - 1));
|
||||
return qte_tid == Qte_tid__apos ? Byte_ascii.Apos : Byte_ascii.Quote;
|
||||
}
|
||||
public static final byte Key_tid__generic = 0, Key_tid__id = 1, Key_tid__style = 2, Key_tid__role = 3;
|
||||
}
|
||||
@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public interface Xop_xnde_atr_parser {
|
||||
void Xatr_parse(Xowe_wiki wiki, byte[] src, Xop_xatr_itm xatr, Object xatr_key_obj);
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public interface Mwh_atr_itm_owner {
|
||||
void Xatr__set(Xowe_wiki wiki, byte[] src, Mwh_atr_itm xatr, Object xatr_id_obj);
|
||||
}
|
||||
@@ -64,7 +64,7 @@ public class Mwh_atr_mgr {
|
||||
}
|
||||
data_ary[data_idx + Idx_nde_uid] = nde_uid;
|
||||
data_ary[data_idx + Idx_nde_tid] = nde_tid;
|
||||
data_ary[data_idx + Idx_atr_utl] = Mwh_atr_itm.Calc_atr_utl(qte_tid, valid, repeated, key_exists, val_made);
|
||||
data_ary[data_idx + Idx_atr_utl] = Mwh_atr_itm_.Calc_atr_utl(qte_tid, valid, repeated, key_exists, val_made);
|
||||
data_ary[data_idx + Idx_atr_bgn] = atr_bgn;
|
||||
data_ary[data_idx + Idx_atr_end] = atr_end;
|
||||
data_ary[data_idx + Idx_key_bgn] = key_bgn;
|
||||
@@ -78,7 +78,7 @@ public class Mwh_atr_mgr {
|
||||
int atr_utl_idx = (atr_uid * Idx__mult) + Idx_atr_utl;
|
||||
int atr_utl = data_ary[atr_utl_idx];
|
||||
int val_bry_exists = atr_utl & Atr_utl__val_bry_exists;
|
||||
data_ary[atr_utl_idx] = Mwh_atr_itm.Atr_tid__repeat | val_bry_exists;
|
||||
data_ary[atr_utl_idx] = Mwh_atr_itm_.Atr_tid__repeat | val_bry_exists;
|
||||
}
|
||||
public static final int
|
||||
Idx_nde_uid = 0
|
||||
|
||||
@@ -21,19 +21,19 @@ public class Mwh_atr_mgr_tst {
|
||||
private final Mwh_atr_mgr_fxt fxt = new Mwh_atr_mgr_fxt();
|
||||
@Test public void Atr_utl_make() {
|
||||
// key="val"
|
||||
fxt.Test_atr_utl_make(Mwh_atr_itm.Qte_tid__qute, Mwh_atr_itm.Mask__valid__y, Mwh_atr_itm.Mask__repeated__n, Mwh_atr_itm.Mask__key_exists__y, Mwh_atr_itm.Mask__val_made__n, 42);
|
||||
fxt.Test_atr_utl_make(Mwh_atr_itm_.Qte_tid__qute, Mwh_atr_itm_.Mask__valid__y, Mwh_atr_itm_.Mask__repeated__n, Mwh_atr_itm_.Mask__key_exists__y, Mwh_atr_itm_.Mask__val_made__n, 42);
|
||||
// key=val key=v<nowiki/>al
|
||||
fxt.Test_atr_utl_make(Mwh_atr_itm.Qte_tid__none, Mwh_atr_itm.Mask__valid__y, Mwh_atr_itm.Mask__repeated__y, Mwh_atr_itm.Mask__key_exists__y, Mwh_atr_itm.Mask__val_made__y, 120);
|
||||
fxt.Test_atr_utl_make(Mwh_atr_itm_.Qte_tid__none, Mwh_atr_itm_.Mask__valid__y, Mwh_atr_itm_.Mask__repeated__y, Mwh_atr_itm_.Mask__key_exists__y, Mwh_atr_itm_.Mask__val_made__y, 120);
|
||||
}
|
||||
}
|
||||
class Mwh_atr_mgr_fxt {
|
||||
public void Test_atr_utl_make(int qte_tid, boolean valid, boolean repeated, boolean key_exists, boolean val_made, int expd) {
|
||||
int atr_utl = Mwh_atr_itm.Calc_atr_utl(qte_tid, valid, repeated, key_exists, val_made);
|
||||
int atr_utl = Mwh_atr_itm_.Calc_atr_utl(qte_tid, valid, repeated, key_exists, val_made);
|
||||
Tfds.Eq_int(expd, atr_utl);
|
||||
Tfds.Eq_int(qte_tid, Mwh_atr_itm.Calc_qte_tid(atr_utl));
|
||||
Tfds.Eq_bool(valid, (atr_utl & Mwh_atr_itm.Mask__valid) == Mwh_atr_itm.Mask__valid);
|
||||
Tfds.Eq_bool(repeated, (atr_utl & Mwh_atr_itm.Mask__repeated) == Mwh_atr_itm.Mask__repeated);
|
||||
Tfds.Eq_bool(key_exists, (atr_utl & Mwh_atr_itm.Mask__key_exists) == Mwh_atr_itm.Mask__key_exists);
|
||||
Tfds.Eq_bool(val_made, (atr_utl & Mwh_atr_itm.Mask__val_made) == Mwh_atr_itm.Mask__val_made);
|
||||
Tfds.Eq_int(qte_tid, Mwh_atr_itm_.Calc_qte_tid(atr_utl));
|
||||
Tfds.Eq_bool(valid, (atr_utl & Mwh_atr_itm_.Mask__valid) == Mwh_atr_itm_.Mask__valid);
|
||||
Tfds.Eq_bool(repeated, (atr_utl & Mwh_atr_itm_.Mask__repeated) == Mwh_atr_itm_.Mask__repeated);
|
||||
Tfds.Eq_bool(key_exists, (atr_utl & Mwh_atr_itm_.Mask__key_exists) == Mwh_atr_itm_.Mask__key_exists);
|
||||
Tfds.Eq_bool(val_made, (atr_utl & Mwh_atr_itm_.Mask__val_made) == Mwh_atr_itm_.Mask__val_made);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,115 +26,58 @@ public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATT
|
||||
private byte area = Area__atr_limbo;
|
||||
private int atr_bgn = -1, key_bgn = -1, key_end = -1, eql_pos = -1, val_bgn = -1, val_end = -1;
|
||||
private byte qte_byte = Byte_ascii.Null;
|
||||
private boolean key_bfr_on = false, val_bfr_on = false, ws_is_before_val = false;
|
||||
private boolean key_bfr_on = false, val_bfr_on = false, ws_is_before_val = false, qte_closed = false;
|
||||
private int nde_uid, nde_tid;
|
||||
public Bry_obj_ref Bry_obj() {return bry_ref;} private final Bry_obj_ref bry_ref = Bry_obj_ref.null_();
|
||||
public int Nde_end_tid() {return nde_end_tid;} private int nde_end_tid;
|
||||
public int Parse(Mwh_doc_wkr wkr, int nde_uid, int nde_tid, byte[] src, int src_bgn, int src_end) {
|
||||
this.nde_uid = nde_uid; this.nde_tid = nde_tid;
|
||||
this.nde_end_tid = Mwh_doc_parser.Nde_end_tid__invalid;
|
||||
this.atr_bgn = -1;
|
||||
area = Area__atr_limbo;
|
||||
boolean prv_is_ws = false;
|
||||
int pos = src_bgn;
|
||||
boolean loop = true;
|
||||
while (loop) {
|
||||
if (pos == src_end) {
|
||||
if (area == Area__val_quote) { // quote still open
|
||||
int reset_pos = Bry_find_.Find_fwd(src, Byte_ascii.Space, val_bgn, src_end); // try to find 1st space within quote; EX:"a='b c=d" should try to reset at c=d
|
||||
boolean reset_found = reset_pos != Bry_find_.Not_found;
|
||||
area = Area__invalid; val_end = reset_found ? reset_pos : src_end;
|
||||
Make(src, val_end); // create invalid atr
|
||||
if (reset_found) { // space found; resume from text after space; EX: "a='b c=d"; PAGE:en.w:Aubervilliers DATE:2014-06-25
|
||||
pos = Bry_find_.Find_fwd_while_not_ws(src, reset_pos, src_end); // skip ws
|
||||
atr_bgn = -1;
|
||||
area = Area__atr_limbo;
|
||||
val_bfr.Clear();
|
||||
val_bfr_on = false;
|
||||
ws_is_before_val = false;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
if (pos >= src_end) {
|
||||
switch (area) {
|
||||
case Area__key: // EX: "a"
|
||||
case Area__eql_limbo: // EX: "a "
|
||||
case Area__val_naked: // EX: "a=b"
|
||||
break; // valid atr
|
||||
case Area__val_quote: // EX: "a='b'"
|
||||
if (qte_closed)
|
||||
Make(src, src_end);
|
||||
else { // dangling; EX: "a='b c=d"
|
||||
int reset_pos = Bry_find_.Find_fwd(src, Byte_ascii.Space, val_bgn, src_end); // try to find 1st space within quote; EX:"a='b c=d" should try to reset at c=d
|
||||
boolean reset_found = reset_pos != Bry_find_.Not_found;
|
||||
area = Area__invalid; val_end = reset_found ? reset_pos : src_end;
|
||||
Make(src, val_end); // create invalid atr
|
||||
if (reset_found) { // space found; resume from text after space; EX: "a='b c=d"; PAGE:en.w:Aubervilliers DATE:2014-06-25
|
||||
pos = Bry_find_.Find_fwd_while_not_ws(src, reset_pos, src_end); // skip ws
|
||||
atr_bgn = -1;
|
||||
area = Area__atr_limbo;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Area__invalid: case Area__atr_limbo:
|
||||
case Area__val_limbo:
|
||||
area = Area__invalid;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
if (area == Area__val_limbo) // NOTE: handle dangling "k=" else will be "k"; EX: <a b=> x> <a b>; PAGE:en.s:Notes_by_the_Way/Chapter_2; DATE:2015-01-31
|
||||
area = Area__invalid;
|
||||
if (atr_bgn != -1) { // atr_bgn will be -1 if atrs ends on quoted (EX:"a='b'"); else, pending atr that needs to be processed; EX: "a=b" b wil be in bfr
|
||||
val_end = src_end;
|
||||
Make(src, src_end);
|
||||
}
|
||||
break;
|
||||
if (atr_bgn != -1) {
|
||||
val_end = src_end;
|
||||
Make(src, val_end);
|
||||
}
|
||||
}
|
||||
else if (pos > src_end)
|
||||
break;
|
||||
}
|
||||
byte b = src[pos];
|
||||
switch (area) {
|
||||
case Area__atr_limbo: // 1st area after node_name or attribute
|
||||
switch (b) {
|
||||
// gt -> stop iterating
|
||||
case Byte_ascii.Gt:
|
||||
nde_end_tid = Mwh_doc_parser.Nde_end_tid__gt;
|
||||
loop = false;
|
||||
break;
|
||||
// slash -> check for "/>" or " / "
|
||||
case Byte_ascii.Slash:
|
||||
int nxt_pos = pos + 1;
|
||||
if (nxt_pos == src_end) {
|
||||
pos = nxt_pos;
|
||||
return Mwh_doc_parser.Nde_end_tid__invalid;
|
||||
}
|
||||
else if (src[nxt_pos] == Byte_ascii.Gt) {
|
||||
nde_end_tid = Mwh_doc_parser.Nde_end_tid__inline;
|
||||
pos = nxt_pos;
|
||||
loop = false;
|
||||
}
|
||||
else {
|
||||
area = Area__invalid; atr_bgn = pos;
|
||||
}
|
||||
break;
|
||||
// ws -> ignore; skip any ws in atr_limbo; note that once a non-ws char is encountered, it will immediately go into another area
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
|
||||
if (atr_bgn == -1) atr_bgn = pos;
|
||||
break;
|
||||
// alphanum -> enter Area__key
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Colon:
|
||||
area = Area__key;
|
||||
if (atr_bgn == -1) atr_bgn = pos;
|
||||
key_bgn = pos;
|
||||
break;
|
||||
// lt -> check for <nowiki>
|
||||
case Byte_ascii.Lt: // handle "<nowiki>"
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found) {
|
||||
area = Area__invalid;
|
||||
atr_bgn = pos;
|
||||
}
|
||||
else
|
||||
pos = gt_pos; // position after ">"; note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
break;
|
||||
// rest -> invalid
|
||||
default: // quote and other non-valid key characters are invalid until next space; EX: "<span 'key_cannot_be_quoted' id='123'"
|
||||
area = Area__invalid; atr_bgn = pos;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Area__invalid:
|
||||
switch (b) {
|
||||
// ws -> src_end invalid area
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
|
||||
// ws -> end invalid area
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
Make(src, pos);
|
||||
area = Area__atr_limbo;
|
||||
break;
|
||||
@@ -143,9 +86,13 @@ public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATT
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Area__key:
|
||||
case Area__atr_limbo: // 1st area after (a) node_name, (b) attribute, (c) invalid_area
|
||||
switch (b) {
|
||||
// alphanum -> valid key chars
|
||||
// ws -> ignore; skip any ws in atr_limbo; note that once a non-ws char is encountered, it will immediately go into another area
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
if (atr_bgn == -1) atr_bgn = pos; // NOTE: atr_bgn == -1 needed for multiple spaces; ALSO: cannot move above switch b/c of <nowiki>
|
||||
break;
|
||||
// attribFirst -> enter Area__key; REF.MW: $attribFirst = '[:A-Z_a-z0-9]';
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
@@ -158,26 +105,61 @@ public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATT
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Colon: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Underline:
|
||||
case Byte_ascii.Colon: case Byte_ascii.Underline:
|
||||
area = Area__key;
|
||||
if (atr_bgn == -1) atr_bgn = pos; // NOTE: atr_bgn == -1 needed b/c of spaces
|
||||
key_bgn = pos;
|
||||
break;
|
||||
// angle_bgn -> check for <nowiki>
|
||||
case Byte_ascii.Angle_bgn: // handle "<nowiki>"
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found) {
|
||||
area = Area__invalid; if (atr_bgn == -1) atr_bgn = pos;
|
||||
}
|
||||
else
|
||||
pos = gt_pos; // position after ">"; note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
break;
|
||||
// rest -> invalid
|
||||
default: // quote and other non-valid key characters are invalid until next space; EX: "<span 'key_cannot_be_quoted' id='123'"
|
||||
area = Area__invalid; if (atr_bgn == -1) atr_bgn = pos;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Area__key:
|
||||
switch (b) {
|
||||
// alphanum -> valid key chars; REF.MW: $attrib = '[:A-Z_a-z-.0-9]';
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Colon: case Byte_ascii.Underline: case Byte_ascii.Dash: case Byte_ascii.Dot:
|
||||
if (key_bfr_on) key_bfr.Add_byte(b);
|
||||
break;
|
||||
// ws -> src_end key
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
|
||||
// ws -> end key
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
area = Area__eql_limbo;
|
||||
key_end = pos;
|
||||
break;
|
||||
// eq -> src_end key; skip Area_eq and go to Area_val_bgn
|
||||
// eq -> end key; go to Area_val_limbo
|
||||
case Byte_ascii.Eq:
|
||||
area = Area__val_limbo;
|
||||
key_end = eql_pos = pos;
|
||||
break;
|
||||
// lt -> check for <nowiki>
|
||||
case Byte_ascii.Lt:
|
||||
// angle_bgn -> check for <nowiki>
|
||||
case Byte_ascii.Angle_bgn:
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found) // "<" should not be in key; EX: "ke<y"
|
||||
area = Area__invalid;
|
||||
else {
|
||||
if (!key_bfr_on) {key_bfr.Add_mid(src, key_bgn, pos); key_bfr_on = true;}
|
||||
if (!key_bfr_on) {key_bfr.Add_mid(src, key_bgn, pos); key_bfr_on = true;}
|
||||
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
}
|
||||
break;
|
||||
@@ -190,40 +172,14 @@ public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATT
|
||||
case Area__eql_limbo:
|
||||
switch (b) {
|
||||
// ws -> skip
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip ws
|
||||
if (key_end == -1) { // EX: "a = b"; key_end != -1 b/c 1st \s sets key_end; EX: "a b = c"; key_end
|
||||
val_end = pos - 1;
|
||||
Make(src, pos);
|
||||
area = Area__atr_limbo;
|
||||
continue;
|
||||
}
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space: // skip ws
|
||||
break;
|
||||
// eq -> enter Area__eq
|
||||
// eq -> enter Area__val_limbo
|
||||
case Byte_ascii.Eq:
|
||||
eql_pos = pos;
|
||||
area = Area__val_limbo;
|
||||
break;
|
||||
// rest -> make atr and enter limbo
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos: // FUTURE: previous word was key
|
||||
default: // NOTE: added this late; xml_parser was not handling "line start=3" DATE:2013-07-03
|
||||
val_end = pos - 1;
|
||||
Make(src, pos);
|
||||
area = Area__atr_limbo;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case Area__val_limbo:
|
||||
switch (b) {
|
||||
// ws -> skip
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
|
||||
ws_is_before_val = true;
|
||||
break;
|
||||
// quote -> enter Area_val_quote
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos:
|
||||
area = Area__val_quote; qte_byte = b; prv_is_ws = false;
|
||||
val_bgn = pos + 1;
|
||||
break;
|
||||
// alphanum -> enter Area_val_raw
|
||||
// attribFirst -> enter Area__key; REF.MW: $attribFirst = '[:A-Z_a-z0-9]';
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
@@ -236,64 +192,124 @@ public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATT
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Colon:
|
||||
case Byte_ascii.Hash:
|
||||
area = Area__val_naked;
|
||||
val_bgn = pos;
|
||||
case Byte_ascii.Colon: case Byte_ascii.Underline:
|
||||
Make(src, pos);
|
||||
area = Area__key;
|
||||
atr_bgn = key_bgn = pos;
|
||||
break;
|
||||
// lt -> check for <nowiki>
|
||||
case Byte_ascii.Lt:
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found)
|
||||
area = Area__invalid;
|
||||
else
|
||||
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
break;
|
||||
// rest -> ignore (?)
|
||||
// rest -> make atr and enter limbo
|
||||
default:
|
||||
area = Area__invalid;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Area__val_quote: { // EX: "'val' " in "key = 'val'"
|
||||
case Area__val_limbo:
|
||||
switch (b) {
|
||||
// ws -> skip
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
ws_is_before_val = true;
|
||||
break;
|
||||
// quote -> enter Area_val_quote
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos:
|
||||
area = Area__val_quote; qte_byte = b; qte_closed = false;
|
||||
prv_is_ws = false;
|
||||
val_bgn = pos + 1;
|
||||
break;
|
||||
// alphanum -> enter Area_val_raw; REF.MW: [a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Bang: case Byte_ascii.Hash: case Byte_ascii.Dollar: case Byte_ascii.Percent: case Byte_ascii.Amp:
|
||||
case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star: case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot:
|
||||
case Byte_ascii.Backslash: case Byte_ascii.Slash: case Byte_ascii.Colon: case Byte_ascii.Semic:
|
||||
case Byte_ascii.Question: case Byte_ascii.At:
|
||||
case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end: case Byte_ascii.Pow: case Byte_ascii.Underline: case Byte_ascii.Tick:
|
||||
case Byte_ascii.Curly_bgn: case Byte_ascii.Curly_end: case Byte_ascii.Pipe: case Byte_ascii.Tilde:
|
||||
area = Area__val_naked;
|
||||
val_bgn = pos;
|
||||
break;
|
||||
// case Byte_ascii.Angle_end: NOTE: valid in MW; making invalid now until finding counter-example
|
||||
// angle_bgn -> check for <nowiki>
|
||||
case Byte_ascii.Angle_bgn:
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found)
|
||||
area = Area__invalid; // NOTE: valid in MW; making invalid now until finding counter-example
|
||||
else
|
||||
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
break;
|
||||
// rest -> ignore
|
||||
default:
|
||||
area = Area__invalid;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Area__val_quote: { // EX: "'val' " in "key = 'val'"; REF.MW: \"([^<\"]*)\"
|
||||
switch (b) {
|
||||
// quote: check if same as opening quote
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos:
|
||||
if (qte_byte == b) { // quote closes val
|
||||
val_end = pos;
|
||||
Make(src, pos + 1); // NOTE: set atr_end *after* quote
|
||||
if (qte_closed)
|
||||
area = Area__invalid;
|
||||
else {
|
||||
if (qte_byte == b) { // quote closes val
|
||||
qte_closed = true;
|
||||
val_end = pos;
|
||||
}
|
||||
else { // quote is just char; EX: title="1 o'clock" or title='The "C" way'
|
||||
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
|
||||
}
|
||||
}
|
||||
else { // quote is just char; EX: title="1 o'clock" or title='The "C" way'
|
||||
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
|
||||
}
|
||||
break;
|
||||
// lt -> check for <nowiki>; EX: <span title='ab<nowiki>c</nowiki>de'>
|
||||
case Byte_ascii.Lt:
|
||||
if (!val_bfr_on) {val_bfr.Add_mid(src, val_bgn, pos); val_bfr_on = true;} // INLINE: val_bfr.init
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found)
|
||||
// area = Area__invalid; // DELETE: 2012-11-13; unpaired < should not mark atr invalid; EX: style='margin:1em<f'
|
||||
val_bfr.Add_byte(Byte_ascii.Lt);
|
||||
else
|
||||
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
prv_is_ws = false;
|
||||
break;
|
||||
// ws -> convert all ws to \s; only allow 1 ws at any point in time
|
||||
case Byte_ascii.Nl: case Byte_ascii.Tab: case Byte_ascii.Cr: // REF.MW:Sanitizer.php|decodeTagAttributes $value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
|
||||
case Byte_ascii.Space:
|
||||
if (!val_bfr_on) {val_bfr.Add_mid(src, val_bgn, pos); val_bfr_on = true;} // INLINE: val_bfr.init
|
||||
if (prv_is_ws) {} // noop; only allow one ws at a time; EX: "a b" -> "a b"; "a\n\nb" -> "a b"
|
||||
else {
|
||||
prv_is_ws = true; val_bfr.Add_byte(Byte_ascii.Space);
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space: // REF.MW:Sanitizer.php|decodeTagAttributes $value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
|
||||
if (qte_closed) {
|
||||
Make(src, pos); // NOTE: set atr_end *after* quote
|
||||
if (atr_bgn == -1) atr_bgn = pos; // NOTE: process ws just like Area__atr_limbo
|
||||
}
|
||||
else {
|
||||
if (!val_bfr_on) {val_bfr.Add_mid(src, val_bgn, pos); val_bfr_on = true;} // INLINE: val_bfr.init
|
||||
if (prv_is_ws) {} // noop; only allow one ws at a time; EX: "a b" -> "a b"; "a\n\nb" -> "a b"
|
||||
else {
|
||||
prv_is_ws = true; val_bfr.Add_byte(Byte_ascii.Space);
|
||||
}
|
||||
}
|
||||
break;
|
||||
// angle_bgn -> check for <nowiki>; EX: <span title='ab<nowiki>c</nowiki>de'>
|
||||
case Byte_ascii.Angle_bgn:
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found) {
|
||||
// area = Area__invalid; // "<" inside quote is invalid; EX: <span title='a<b'>c</span>
|
||||
if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
|
||||
}
|
||||
else {
|
||||
if (qte_closed) {}
|
||||
else {
|
||||
if (!val_bfr_on) {val_bfr.Add_mid(src, val_bgn, pos); val_bfr_on = true;} // INLINE: val_bfr.init
|
||||
}
|
||||
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
}
|
||||
prv_is_ws = false;
|
||||
break;
|
||||
// rest -> add to val
|
||||
default:
|
||||
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
|
||||
if (qte_closed)
|
||||
area = Area__invalid;
|
||||
else {
|
||||
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Area__val_naked: // no quotes; EX:a=bcd
|
||||
case Area__val_naked: // no quotes; EX:a=bcd; REF.MW:([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
|
||||
switch (b) {
|
||||
// alphanum -> continue reading
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
@@ -308,16 +324,28 @@ public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATT
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Bang: case Byte_ascii.Hash: case Byte_ascii.Dollar: case Byte_ascii.Percent:
|
||||
case Byte_ascii.Amp: case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star:
|
||||
case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Slash:
|
||||
case Byte_ascii.Colon: case Byte_ascii.Semic: case Byte_ascii.Gt:
|
||||
case Byte_ascii.Question: case Byte_ascii.At: case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end:
|
||||
case Byte_ascii.Pow: case Byte_ascii.Underline: case Byte_ascii.Tick:
|
||||
case Byte_ascii.Curly_bgn: case Byte_ascii.Pipe: case Byte_ascii.Curly_end: case Byte_ascii.Tilde:
|
||||
case Byte_ascii.Bang: case Byte_ascii.Hash: case Byte_ascii.Dollar: case Byte_ascii.Percent: case Byte_ascii.Amp:
|
||||
case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star: case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot:
|
||||
case Byte_ascii.Backslash: case Byte_ascii.Slash: case Byte_ascii.Colon: case Byte_ascii.Semic:
|
||||
case Byte_ascii.Question: case Byte_ascii.At:
|
||||
case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end: case Byte_ascii.Pow: case Byte_ascii.Underline: case Byte_ascii.Tick:
|
||||
case Byte_ascii.Curly_bgn: case Byte_ascii.Curly_end: case Byte_ascii.Pipe: case Byte_ascii.Tilde:
|
||||
if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
|
||||
break;
|
||||
// case Byte_ascii.Angle_end: NOTE: valid in MW; making invalid now until finding counter-example
|
||||
// angle_bgn -> check for <nowiki>; EX: a=b<nowiki>c</nowiki>d
|
||||
case Byte_ascii.Angle_bgn:
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found) {
|
||||
area = Area__invalid; // NOTE: valid in MW; making invalid now until finding counter-example
|
||||
}
|
||||
else {
|
||||
if (!val_bfr_on) {val_bfr.Add_mid(src, val_bgn, pos); val_bfr_on = true;} // INLINE: val_bfr.init
|
||||
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
}
|
||||
break;
|
||||
// ws -> src_end atr
|
||||
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl:
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
val_end = pos;
|
||||
Make(src, pos);
|
||||
break;
|
||||
@@ -329,14 +357,9 @@ public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATT
|
||||
key_end = pos;
|
||||
area = Area__val_limbo; // set area to val_bgn (basically, put after =)
|
||||
}
|
||||
else // "a=b=c"; discard all
|
||||
else // "a=b=c"; discard all
|
||||
area = Area__invalid;
|
||||
break;
|
||||
case Byte_ascii.Lt:
|
||||
val_end = pos;
|
||||
Make(src, pos);
|
||||
--pos; // NOTE: --pos to include "<" as part of next atr; above ws excludes from next atr
|
||||
break;
|
||||
default:
|
||||
area = Area__invalid;
|
||||
break;
|
||||
@@ -355,10 +378,10 @@ public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATT
|
||||
byte[] key_bry = text_ary[j * Mwh_atr_mgr.Text__mult];
|
||||
byte[] val_bry_manual = null;
|
||||
int atr_utl = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_utl];
|
||||
boolean atr_valid = (atr_utl & Mwh_atr_itm.Mask__valid) == Mwh_atr_itm.Mask__valid;
|
||||
boolean repeated = (atr_utl & Mwh_atr_itm.Mask__repeated) == Mwh_atr_itm.Mask__repeated;
|
||||
boolean key_exists = (atr_utl & Mwh_atr_itm.Mask__key_exists) == Mwh_atr_itm.Mask__key_exists;
|
||||
boolean val_made = (atr_utl & Mwh_atr_itm.Mask__val_made) == Mwh_atr_itm.Mask__val_made;
|
||||
boolean atr_valid = (atr_utl & Mwh_atr_itm_.Mask__valid) == Mwh_atr_itm_.Mask__valid;
|
||||
boolean repeated = (atr_utl & Mwh_atr_itm_.Mask__repeated) == Mwh_atr_itm_.Mask__repeated;
|
||||
boolean key_exists = (atr_utl & Mwh_atr_itm_.Mask__key_exists) == Mwh_atr_itm_.Mask__key_exists;
|
||||
boolean val_made = (atr_utl & Mwh_atr_itm_.Mask__val_made) == Mwh_atr_itm_.Mask__val_made;
|
||||
if (val_made)
|
||||
val_bry_manual = text_ary[(j * Mwh_atr_mgr.Text__mult) + 1];
|
||||
wkr.On_atr_each(this, src, nde_tid, atr_valid, repeated, key_exists, key_bry, val_bry_manual, data_ary, itm_idx);
|
||||
@@ -368,6 +391,48 @@ public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATT
|
||||
|
||||
return pos;
|
||||
}
|
||||
private void Make(byte[] src, int atr_end) {
|
||||
// calc final values for atr
|
||||
boolean key_exists = false;
|
||||
byte[] key_bry = null, val_bry = null;
|
||||
boolean atr_valid = true;
|
||||
if (area == Area__invalid) {
|
||||
atr_valid = false;
|
||||
key_bry = Bry_.Empty;
|
||||
key_bfr.Clear();
|
||||
if (val_bgn == -1) val_bgn = atr_bgn;
|
||||
val_bfr.Clear();
|
||||
}
|
||||
else {
|
||||
if (key_bgn != -1 && val_bgn != -1) // key && val exists; EX: "<input id='123'>"
|
||||
key_exists = true;
|
||||
else { // not a pair; EX: "<input checked>"
|
||||
if (key_end == -1) key_end = val_end; // NOTE: key_end == -1 when eos; EX: "a" would have key_bgn = 0; key_end = -1; val_end = 1 DATE:2014-07-03
|
||||
val_bgn = val_end = -1;
|
||||
}
|
||||
key_bry = key_bfr_on ? key_bfr.To_bry_and_clear() : Bry_.Mid(src, key_bgn, key_end); // always make key_bry; needed for repeated_atrs as well as key_tid
|
||||
if (val_bfr_on) val_bry = val_bfr.To_bry_and_clear();
|
||||
}
|
||||
int qte_tid = Mwh_atr_itm_.Mask__qte__none;
|
||||
if (qte_byte != Byte_ascii.Null)
|
||||
qte_tid = qte_byte == Byte_ascii.Quote ? Mwh_atr_itm_.Mask__qte_qute : Mwh_atr_itm_.Mask__qte__apos;
|
||||
int atr_uid = atr_mgr.Add(nde_uid, nde_tid, atr_valid, false, key_exists, atr_bgn, atr_end, key_bgn, key_end, key_bry, eql_pos, qte_tid, val_bgn, val_end, val_bry);
|
||||
|
||||
// handle repeated atrs
|
||||
if (atr_valid) {
|
||||
int repeated_uid = repeated_atrs_hash.Get_as_int_or(key_bry, -1);
|
||||
if (repeated_uid != -1) {
|
||||
repeated_atrs_hash.Del(key_bry);
|
||||
atr_mgr.Set_repeated(repeated_uid);
|
||||
}
|
||||
repeated_atrs_hash.Add_bry_int(key_bry, atr_uid);
|
||||
}
|
||||
|
||||
// reset temp variables
|
||||
area = Area__atr_limbo; qte_byte = Byte_ascii.Null;
|
||||
atr_bgn = key_bgn = val_bgn = key_end = val_end = eql_pos = -1;
|
||||
key_bfr_on = val_bfr_on = ws_is_before_val = qte_closed = false;
|
||||
}
|
||||
public int Xnde_find_gt_find(byte[] src, int pos, int end) {
|
||||
bry_ref.Val_(null);
|
||||
byte b = src[pos];
|
||||
@@ -381,7 +446,7 @@ public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATT
|
||||
return bry == null ? Bry_find_.Not_found : bry.length + pos;
|
||||
}
|
||||
private int Xnde_find_gt(byte[] src, int lt_pos, int end) {
|
||||
int pos = lt_pos + 1;
|
||||
int pos = lt_pos + 1; if (pos == end) return Bry_find_.Not_found;
|
||||
byte b = src[pos];
|
||||
if (b == Byte_ascii.Slash && pos + 1 < end) {
|
||||
++pos;
|
||||
@@ -406,47 +471,6 @@ public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATT
|
||||
}
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
private void Make(byte[] src, int atr_end) {
|
||||
// calc final values for atr
|
||||
boolean key_exists = false;
|
||||
byte[] key_bry = null, val_bry = null;
|
||||
boolean atr_valid = true;
|
||||
if (area != Area__invalid) {
|
||||
if (key_bgn != -1 && val_bgn != -1) // key && val exists; EX: "<input id='123'>"
|
||||
key_exists = true;
|
||||
else { // not a pair; EX: "<input checked>"
|
||||
if (key_end == -1) key_end = val_end; // NOTE: key_end == -1 when eos; EX: "a" would have key_bgn = 0; key_end = -1; val_end = 1 DATE:2014-07-03
|
||||
val_bgn = val_end = -1;
|
||||
}
|
||||
key_bry = key_bfr_on ? key_bfr.Xto_bry_and_clear() : Bry_.Mid(src, key_bgn, key_end); // always make key_bry; needed for repeated_atrs as well as key_tid
|
||||
if (val_bfr_on) val_bry = val_bfr.Xto_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
atr_valid = false;
|
||||
key_bry = Bry_.Empty;
|
||||
key_bfr.Clear();
|
||||
if (val_bgn == -1) val_bgn = atr_bgn;
|
||||
}
|
||||
int qte_tid = Mwh_atr_itm.Mask__qte__none;
|
||||
if (qte_byte != Byte_ascii.Null)
|
||||
qte_tid = qte_byte == Byte_ascii.Quote ? Mwh_atr_itm.Mask__qte_qute : Mwh_atr_itm.Mask__qte__apos;
|
||||
int atr_uid = atr_mgr.Add(nde_uid, nde_tid, atr_valid, false, key_exists, atr_bgn, atr_end, key_bgn, key_end, key_bry, eql_pos, qte_tid, val_bgn, val_end, val_bry);
|
||||
|
||||
// handle repeated atrs
|
||||
if (atr_valid) {
|
||||
int repeated_uid = repeated_atrs_hash.Get_as_int_or(key_bry, -1);
|
||||
if (repeated_uid != -1) {
|
||||
repeated_atrs_hash.Del(key_bry);
|
||||
atr_mgr.Set_repeated(repeated_uid);
|
||||
}
|
||||
repeated_atrs_hash.Add_bry_int(key_bry, atr_uid);
|
||||
}
|
||||
|
||||
// reset temp variables
|
||||
area = Area__atr_limbo; qte_byte = Byte_ascii.Null;
|
||||
atr_bgn = key_bgn = val_bgn = key_end = val_end = eql_pos = -1;
|
||||
key_bfr_on = val_bfr_on = ws_is_before_val = false;
|
||||
}
|
||||
private static final Hash_adp_bry xnde_hash = Hash_adp_bry.ci_a7()
|
||||
.Add_bry_bry(Xop_xnde_tag_.Tag_nowiki.Name_bry())
|
||||
.Add_bry_bry(Xop_xnde_tag_.Tag_noinclude.Name_bry())
|
||||
|
||||
@@ -21,7 +21,7 @@ class Mwh_atr_parser_fxt {
|
||||
private final Mwh_atr_parser parser = new Mwh_atr_parser();
|
||||
private final Mwh_doc_wkr__atr_bldr wkr = new Mwh_doc_wkr__atr_bldr();
|
||||
public Mwh_atr_itm Make_pair(String key, String val) {return new Mwh_atr_itm(Bry_.Empty, Bool_.Y, Bool_.N, Bool_.Y, -1, -1, -1, -1, Bry_.new_u8(key) , -1, -1, Bry_.new_u8(val) , -1, -1);}
|
||||
public Mwh_atr_itm Make_name(String key) {return new Mwh_atr_itm(Bry_.Empty, Bool_.Y, Bool_.N, Bool_.N, -1, -1, -1, -1, Bry_.new_u8(key) , -1, -1, null , -1, -1);}
|
||||
public Mwh_atr_itm Make_name(String key) {return new Mwh_atr_itm(Bry_.Empty, Bool_.Y, Bool_.N, Bool_.N, -1, -1, -1, -1, Bry_.new_u8(key) , -1, -1, Bry_.new_u8(key) , -1, -1);}
|
||||
public Mwh_atr_itm Make_fail(int bgn, int end) {return new Mwh_atr_itm(Bry_.Empty, Bool_.N, Bool_.N, Bool_.N, bgn, end, -1, -1, null , -1, -1, null , -1, -1);}
|
||||
public void Test_val_as_int(String raw, int expd) {
|
||||
byte[] src = Bry_.new_u8(raw);
|
||||
@@ -44,14 +44,14 @@ class Mwh_atr_parser_fxt {
|
||||
for (int i = 0; i < len; ++i) {
|
||||
To_bfr(expd_bfr, i < expd_len ? expd_ary[i] : null, actl_bfr, i < actl_len ? actl_ary[i] : null);
|
||||
}
|
||||
Tfds.Eq_str_lines(expd_bfr.Xto_str_and_clear(), actl_bfr.Xto_str_and_clear());
|
||||
Tfds.Eq_str_lines(expd_bfr.To_str_and_clear(), actl_bfr.To_str_and_clear());
|
||||
}
|
||||
private void To_bfr(Bry_bfr expd_bfr, Mwh_atr_itm expd_itm, Bry_bfr actl_bfr, Mwh_atr_itm actl_itm) {
|
||||
To_bfr__main(expd_bfr, expd_itm);
|
||||
To_bfr__main(actl_bfr, actl_itm);
|
||||
To_bfr__head(expd_bfr, expd_itm);
|
||||
To_bfr__head(actl_bfr, actl_itm);
|
||||
if (expd_itm.Atr_bgn() != -1) {
|
||||
if (expd_itm != null && expd_itm.Atr_bgn() != -1) {
|
||||
To_bfr__atr_rng(expd_bfr, expd_itm);
|
||||
To_bfr__atr_rng(actl_bfr, actl_itm);
|
||||
}
|
||||
@@ -74,26 +74,3 @@ class Mwh_atr_parser_fxt {
|
||||
bfr.Add_str_a7("rng:").Add_int_variable(itm.Atr_bgn()).Add_byte_semic().Add_int_variable(itm.Atr_end()).Add_byte_nl();
|
||||
}
|
||||
}
|
||||
class Mwh_doc_wkr__atr_bldr implements Mwh_doc_wkr {
|
||||
private final List_adp list = List_adp_.new_();
|
||||
public Hash_adp_bry Nde_regy() {return null;}
|
||||
public void On_atr_each(Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] data_ary, int itm_idx) {
|
||||
int atr_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_bgn];
|
||||
int atr_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_end];
|
||||
int key_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_key_bgn];
|
||||
int key_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_key_end];
|
||||
int val_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
|
||||
int val_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
|
||||
int eql_pos = data_ary[itm_idx + Mwh_atr_mgr.Idx_eql_pos];
|
||||
int qte_tid = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_utl];
|
||||
qte_tid = Mwh_atr_itm.Calc_qte_tid(qte_tid);
|
||||
Mwh_atr_itm atr = new Mwh_atr_itm(src, valid, repeated, key_exists, atr_bgn, atr_end, key_bgn, key_end, key_bry, val_bgn, val_end, val_bry_manual, eql_pos, qte_tid);
|
||||
list.Add(atr);
|
||||
}
|
||||
public void On_txt_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public void On_nde_head_bgn(Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end) {}
|
||||
public void On_nde_head_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline) {}
|
||||
public void On_nde_tail_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public Mwh_atr_itm[] To_atr_ary() {return (Mwh_atr_itm[])list.To_ary_and_clear(Mwh_atr_itm.class);}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ public class Mwh_atr_parser_tst {
|
||||
@Test public void Pair__quote__double() {fxt.Test_parse("a=\"b\"" , fxt.Make_pair("a" , "b"));}
|
||||
@Test public void Pair__quote__single() {fxt.Test_parse("a='b'" , fxt.Make_pair("a" , "b"));}
|
||||
@Test public void Pair__quote__none() {fxt.Test_parse("a=b" , fxt.Make_pair("a" , "b"));}
|
||||
@Test public void Pair__quote__none__amp() {fxt.Test_parse("a=&bc" , fxt.Make_pair("a" , "&bc"));}
|
||||
@Test public void Pair__empty() {fxt.Test_parse("a=''" , fxt.Make_pair("a" , ""));}
|
||||
@Test public void Pair__key_w_underline() {fxt.Test_parse("a_b=c" , fxt.Make_pair("a_b" , "c"));}
|
||||
|
||||
@@ -46,18 +47,32 @@ public class Mwh_atr_parser_tst {
|
||||
|
||||
@Test public void Many__quote__apos() {fxt.Test_parse("a='b' c='d' e='f'" , fxt.Make_pair("a", "b"), fxt.Make_pair("c", "d"), fxt.Make_pair("e", "f"));}
|
||||
@Test public void Many__naked() {fxt.Test_parse("a=b c=d e=f" , fxt.Make_pair("a", "b"), fxt.Make_pair("c", "d"), fxt.Make_pair("e", "f"));}
|
||||
@Test public void Many__naked__pair() {fxt.Test_parse("a b=c" , fxt.Make_name("a"), fxt.Make_pair("b", "c"));}
|
||||
|
||||
@Test public void Val__ws__nl() {fxt.Test_parse("a='b\nc'" , fxt.Make_pair("a", "b c"));}
|
||||
@Test public void Val__ws__mult() {fxt.Test_parse("a='b c'" , fxt.Make_pair("a", "b c"));}
|
||||
@Test public void Val__ws__mult_mult() {fxt.Test_parse("a='b c d'" , fxt.Make_pair("a", "b c d"));} // PURPOSE: fix wherein 1st-gobble gobbled rest of spaces (was b cd)
|
||||
@Test public void Val__apos() {fxt.Test_parse("a=\"b c'd\"" , fxt.Make_pair("a", "b c'd"));} // PURPOSE: fix wherein apos was gobbled up; PAGE:en.s:Alice's_Adventures_in_Wonderland; DATE:2013-11-22
|
||||
@Test public void Val__apos_2() {fxt.Test_parse("a=\"b'c d\"" , fxt.Make_pair("a", "b'c d"));} // PURPOSE: fix wherein apos was causing "'b'c d"; PAGE:en.s:Grimm's_Household_Tales,_Volume_1; DATE:2013-12-22
|
||||
@Test public void Quote__ws__nl() {fxt.Test_parse("a='b\nc'" , fxt.Make_pair("a", "b c"));}
|
||||
@Test public void Quote__ws__mult() {fxt.Test_parse("a='b c'" , fxt.Make_pair("a", "b c"));}
|
||||
@Test public void Quote__ws__mult_mult() {fxt.Test_parse("a='b c d'" , fxt.Make_pair("a", "b c d"));} // PURPOSE: fix wherein 1st-gobble gobbled rest of spaces (was b cd)
|
||||
@Test public void Quote__apos() {fxt.Test_parse("a=\"b c'd\"" , fxt.Make_pair("a", "b c'd"));} // PURPOSE: fix wherein apos was gobbled up; PAGE:en.s:Alice's_Adventures_in_Wonderland; DATE:2013-11-22
|
||||
@Test public void Quote__apos_2() {fxt.Test_parse("a=\"b'c d\"" , fxt.Make_pair("a", "b'c d"));} // PURPOSE: fix wherein apos was causing "'b'c d"; PAGE:en.s:Grimm's_Household_Tales,_Volume_1; DATE:2013-12-22
|
||||
// @Test public void Quote__angle() {fxt.Test_parse("a='<'" , fxt.Make_fail(0, 5));} // PURPOSE: "<" inside quotes is always invalid
|
||||
@Test public void Quote__invalid() {fxt.Test_parse("a='b'c" , fxt.Make_fail(0, 6));}
|
||||
|
||||
@Test public void Nowiki__val() {fxt.Test_parse("a=<nowiki>'b'</nowiki>" , fxt.Make_pair("a", "b").Atr_rng(0, 13));}
|
||||
@Test public void Nowiki__key() {fxt.Test_parse("<nowiki>a=b</nowiki>" , fxt.Make_pair("a", "b").Atr_rng(8, 11));}
|
||||
@Test public void Nowiki__key_2() {fxt.Test_parse("a<nowiki>b</nowiki>c=d" , fxt.Make_pair("abc", "d").Atr_rng(0, 22));}
|
||||
@Test public void Nowiki__key_3() {fxt.Test_parse("a<nowiki>=</nowiki>\"b\"" , fxt.Make_pair("a", "b").Atr_rng(0, 22));} // EX:fr.w:{{Portail|Transpédia|Californie}}
|
||||
@Test public void Nowiki__quote() {fxt.Test_parse("a=\"b<nowiki>c</nowiki>d<nowiki>e</nowiki>f\"", fxt.Make_pair("a", "bcdef"));}
|
||||
@Test public void Nowiki__atr() {fxt.Test_parse("<nowiki>a=b</nowiki>" , fxt.Make_pair("a", "b").Atr_rng(8, 20));}
|
||||
@Test public void Nowiki__key() {fxt.Test_parse("a<nowiki>b</nowiki>c=d" , fxt.Make_pair("abc", "d").Atr_rng(0, 22));}
|
||||
@Test public void Nowiki__eql() {fxt.Test_parse("a<nowiki>=</nowiki>\"b\"" , fxt.Make_pair("a", "b").Atr_rng(0, 22));} // EX:fr.w:{{Portail|Transpédia|Californie}}
|
||||
@Test public void Nowiki__val__naked() {fxt.Test_parse("a=b<nowiki>c</nowiki>d" , fxt.Make_pair("a", "bcd").Atr_rng(0, 22));}
|
||||
@Test public void Nowiki__val__quote() {fxt.Test_parse("a=<nowiki>'b'</nowiki>" , fxt.Make_pair("a", "b").Atr_rng(0, 22));}
|
||||
@Test public void Nowiki__val__quote_2() {fxt.Test_parse("a=\"b<nowiki>c</nowiki>d<nowiki>e</nowiki>f\"", fxt.Make_pair("a", "bcdef"));}
|
||||
|
||||
@Test public void Val__as_int() {fxt.Test_val_as_int("-123" , -123);}
|
||||
|
||||
// @Test public void Embedded() { // PURPOSE: handle html inside attrib; PAGE:en.w:Economy_of_Greece DATE:2015-10-15
|
||||
// fxt.Test_parse("title='<sup id='cite_ref-a_1-0' class='reference'><a href='#cite_note-a-1'>[1]</a></sup> c'"
|
||||
// , fxt.Make_fail(0, 11) // "title='<sup" invalid b/c of "<"
|
||||
// , fxt.Make_pair("id", "cite_ref-a_1-0")
|
||||
// , fxt.Make_fail(31, 52) // "class='reference'><a" invalid b/c no ws after '
|
||||
// , fxt.Make_fail(53, 88) // "href='#cite_note-a-1'>[1]</a></sup>" invalid b/c no ws after '
|
||||
// , fxt.Make_fail(89, 91) // " c'" invalid b/c name (c) cannot have apos
|
||||
// );
|
||||
// }
|
||||
}
|
||||
|
||||
@@ -21,5 +21,5 @@ class Mwh_doc_itm {
|
||||
public int Itm_tid() {return itm_tid;} private final int itm_tid;
|
||||
public byte[] Itm_bry() {return itm_bry;} private final byte[] itm_bry;
|
||||
public int Nde_tid() {return nde_tid;} private final int nde_tid;
|
||||
public static final int Itm_tid__txt = 0, Itm_tid__nde_head = 1, Itm_tid__nde_tail = 2, Itm_tid__comment = 3;
|
||||
public static final int Itm_tid__txt = 0, Itm_tid__nde_head = 1, Itm_tid__nde_tail = 2, Itm_tid__comment = 3, Itm_tid__entity = 4;
|
||||
}
|
||||
|
||||
@@ -17,11 +17,12 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.primitives.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
import gplx.xowa.parsers.amps.*; import gplx.xowa.parsers.xndes.*;
|
||||
public class Mwh_doc_parser {
|
||||
private final Mwh_doc_mgr dom_mgr = new Mwh_doc_mgr(16);
|
||||
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
||||
private final List_adp nde_stack = List_adp_.new_();
|
||||
private final Xop_amp_mgr amp_mgr = Xop_amp_mgr.Instance; private final Xop_tkn_mkr tkn_mkr = new Xop_tkn_mkr();
|
||||
private byte[] src; private int src_end;
|
||||
private Mwh_doc_wkr wkr;
|
||||
private Hash_adp_bry nde_regy;
|
||||
@@ -34,11 +35,28 @@ public class Mwh_doc_parser {
|
||||
int pos = txt_bgn = src_bgn;
|
||||
nde_uid = cur_nde_tid = -1;
|
||||
cur_nde = null;
|
||||
|
||||
while (pos < src_end) {
|
||||
if (src[pos] == Byte_ascii.Angle_bgn) // "<": possible nde start
|
||||
pos = Parse_nde(pos);
|
||||
else // else, just increment
|
||||
++pos;
|
||||
byte b = src[pos];
|
||||
switch (b) {
|
||||
case Byte_ascii.Angle_bgn: // "<": possible nde start
|
||||
pos = Parse_nde(pos);
|
||||
break;
|
||||
case Byte_ascii.Amp: // "&": check for entity; EX: in sr-ec -> sr-el
|
||||
Xop_tkn_itm tkn = amp_mgr.Parse_as_tkn(tkn_mkr, src, src_end, pos, pos + 1);
|
||||
if (tkn == null)
|
||||
++pos;
|
||||
else {
|
||||
wkr.On_txt_end(this, src, cur_nde_tid, txt_bgn, pos);
|
||||
wkr.On_entity_end(this, src, cur_nde_tid, tkn.Src_bgn(), tkn.Src_end());
|
||||
pos = tkn.Src_end();
|
||||
txt_bgn = pos;
|
||||
}
|
||||
break;
|
||||
default: // else, just increment
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (src_end != txt_bgn) wkr.On_txt_end(this, src, cur_nde_tid, txt_bgn, pos);
|
||||
}
|
||||
@@ -142,10 +160,45 @@ public class Mwh_doc_parser {
|
||||
break;
|
||||
case Nde_end_tid__ws:
|
||||
case Nde_end_tid__slash:
|
||||
case Nde_end_tid__backslash: // handled above
|
||||
pos = atr_parser.Parse(wkr, nde_uid, cur_nde_tid, src, pos, src_end);
|
||||
nde_end_tid = atr_parser.Nde_end_tid();
|
||||
txt_bgn = pos;
|
||||
case Nde_end_tid__backslash:
|
||||
// look for ">" or "/>"
|
||||
int tmp_pos = pos, atrs_end = src_end, head_end = src_end;
|
||||
boolean loop = true;
|
||||
while (loop) {
|
||||
byte b = src[tmp_pos];
|
||||
switch (b) {
|
||||
// angle_end -> stop iterating
|
||||
case Byte_ascii.Angle_end:
|
||||
atrs_end = tmp_pos;
|
||||
head_end = tmp_pos + 1;
|
||||
nde_end_tid = Mwh_doc_parser.Nde_end_tid__gt;
|
||||
loop = false;
|
||||
break;
|
||||
// slash -> check for "/>" or " / "
|
||||
case Byte_ascii.Slash:
|
||||
int nxt_pos = tmp_pos + 1;
|
||||
if (nxt_pos == src_end) {
|
||||
nde_end_tid = Mwh_doc_parser.Nde_end_tid__invalid;
|
||||
loop = false;
|
||||
}
|
||||
else if (src[nxt_pos] == Byte_ascii.Angle_end) {
|
||||
atrs_end = tmp_pos;
|
||||
head_end = tmp_pos + 2;
|
||||
nde_end_tid = Mwh_doc_parser.Nde_end_tid__inline;
|
||||
loop = false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (loop) {
|
||||
++tmp_pos;
|
||||
if (tmp_pos == src_end) break;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
atr_parser.Parse(wkr, nde_uid, cur_nde_tid, src, pos, atrs_end);
|
||||
pos = head_end;
|
||||
txt_bgn = head_end;
|
||||
break;
|
||||
}
|
||||
switch (nde_end_tid) {
|
||||
|
||||
@@ -23,6 +23,7 @@ class Mwh_doc_parser_fxt {
|
||||
public Mwh_doc_itm Make_txt (String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt , -1, Bry_.new_u8(raw));}
|
||||
public Mwh_doc_itm Make_txt (String raw, int nde_tid) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt , nde_tid, Bry_.new_u8(raw));}
|
||||
public Mwh_doc_itm Make_comment (String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__comment , -1, Bry_.new_u8(raw));}
|
||||
public Mwh_doc_itm Make_entity (String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__entity , -1, Bry_.new_u8(raw));}
|
||||
public Mwh_doc_itm Make_nde_head(String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_head , -1, Bry_.new_u8(raw));}
|
||||
public Mwh_doc_itm Make_nde_tail(String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_tail , -1, Bry_.new_u8(raw));}
|
||||
public void Test_parse(String raw, Mwh_doc_itm... expd) {
|
||||
@@ -41,7 +42,7 @@ class Mwh_doc_parser_fxt {
|
||||
for (int i = 0; i < len; ++i) {
|
||||
To_bfr(expd_bfr, i < expd_len ? expd_ary[i] : null, actl_bfr, i < actl_len ? actl_ary[i] : null);
|
||||
}
|
||||
Tfds.Eq_str_lines(expd_bfr.Xto_str_and_clear(), actl_bfr.Xto_str_and_clear());
|
||||
Tfds.Eq_str_lines(expd_bfr.To_str_and_clear(), actl_bfr.To_str_and_clear());
|
||||
}
|
||||
private void To_bfr(Bry_bfr expd_bfr, Mwh_doc_itm expd_itm, Bry_bfr actl_bfr, Mwh_doc_itm actl_itm) {
|
||||
To_bfr__main(expd_bfr, expd_itm); To_bfr__main(actl_bfr, actl_itm);
|
||||
@@ -62,12 +63,13 @@ class Mwh_doc_parser_fxt {
|
||||
class Mwh_doc_wkr__itm_bldr implements Mwh_doc_wkr {
|
||||
private final List_adp list = List_adp_.new_();
|
||||
public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw();
|
||||
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {}
|
||||
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {}
|
||||
public void On_txt_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
|
||||
public void On_nde_head_bgn (Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end) {}
|
||||
public void On_nde_head_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_head , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
|
||||
public void On_nde_tail_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_tail , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
|
||||
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__comment , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
|
||||
public void On_entity_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__entity , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
|
||||
|
||||
public Mwh_doc_itm[] To_atr_ary() {return (Mwh_doc_itm[])list.To_ary_and_clear(Mwh_doc_itm.class);}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ public class Mwh_doc_parser_tst {
|
||||
private final Mwh_doc_parser_fxt fxt = new Mwh_doc_parser_fxt();
|
||||
@Test public void Text__basic() {fxt.Test_parse("abc" , fxt.Make_txt("abc"));}
|
||||
@Test public void Comment() {fxt.Test_parse("a<!--b-->c" , fxt.Make_txt("a"), fxt.Make_comment("<!--b-->"), fxt.Make_txt("c"));}
|
||||
@Test public void Entity() {fxt.Test_parse("a b" , fxt.Make_txt("a"), fxt.Make_entity(" "), fxt.Make_txt("b"));}
|
||||
@Test public void Fail__inline_eos() {fxt.Test_parse("a<b/" , fxt.Make_txt("a<b/"));}
|
||||
@Test public void Fail__unknown() {fxt.Test_parse("a<bc/>d" , fxt.Make_txt("a<bc/>d"));}
|
||||
@Test public void Node__inline() {fxt.Test_parse("a<b/>c" , fxt.Make_txt("a"), fxt.Make_nde_head("<b/>") , fxt.Make_txt("c"));}
|
||||
|
||||
@@ -24,4 +24,5 @@ public interface Mwh_doc_wkr {
|
||||
void On_nde_head_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline);
|
||||
void On_nde_tail_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end);
|
||||
void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end);
|
||||
void On_entity_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Mwh_doc_wkr__atr_bldr implements Mwh_doc_wkr {
|
||||
private final List_adp list = List_adp_.new_();
|
||||
public Hash_adp_bry Nde_regy() {return null;}
|
||||
public void On_atr_each(Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] data_ary, int itm_idx) {
|
||||
int atr_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_bgn];
|
||||
int atr_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_end];
|
||||
int key_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_key_bgn];
|
||||
int key_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_key_end];
|
||||
int val_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
|
||||
int val_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
|
||||
int eql_pos = data_ary[itm_idx + Mwh_atr_mgr.Idx_eql_pos];
|
||||
int qte_tid = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_utl];
|
||||
qte_tid = Mwh_atr_itm_.Calc_qte_tid(qte_tid);
|
||||
if (!key_exists) val_bry_manual = key_bry;
|
||||
Mwh_atr_itm atr = new Mwh_atr_itm(src, valid, repeated, key_exists, atr_bgn, atr_end, key_bgn, key_end, key_bry, val_bgn, val_end, val_bry_manual, eql_pos, qte_tid);
|
||||
list.Add(atr);
|
||||
}
|
||||
public void On_txt_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public void On_nde_head_bgn(Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end) {}
|
||||
public void On_nde_head_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline) {}
|
||||
public void On_nde_tail_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public void On_entity_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public Mwh_atr_itm[] To_atr_ary() {return (Mwh_atr_itm[])list.To_ary_and_clear(Mwh_atr_itm.class);}
|
||||
}
|
||||
@@ -53,7 +53,7 @@ public class HierPosAryBldr {
|
||||
public String To_str() {
|
||||
String_bldr sb = String_bldr_.new_();
|
||||
for (int i = 0; i < aryIdx; i++)
|
||||
sb.Add_spr_unless_first(Int_.Xto_str(ary[i]), " ", i);
|
||||
sb.Add_spr_unless_first(Int_.To_str(ary[i]), " ", i);
|
||||
return sb.To_str();
|
||||
}
|
||||
int[] ary; int aryIdx = -1; int root = -1;
|
||||
|
||||
@@ -20,7 +20,7 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_colon_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_colon;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Colon, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
Xop_list_wkr listCtx = ctx.List();
|
||||
@@ -37,5 +37,5 @@ public class Xop_colon_lxr implements Xop_lxr {
|
||||
ctx.Subs_add(root, tkn_mkr.Colon(bgn_pos, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
public static final Xop_colon_lxr _ = new Xop_colon_lxr();
|
||||
public static final Xop_colon_lxr Instance = new Xop_colon_lxr();
|
||||
}
|
||||
|
||||
@@ -20,9 +20,9 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_list_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_list;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {Add_ary(core_trie, this, Xop_list_tkn_.Hook_ul, Xop_list_tkn_.Hook_ol, Xop_list_tkn_.Hook_dt, Xop_list_tkn_.Hook_dd);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
private void Add_ary(Btrie_fast_mgr core_trie, Object val, byte[]... ary) {for (byte[] itm : ary) core_trie.Add(itm, val);}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.List().MakeTkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_list_lxr _ = new Xop_list_lxr(); Xop_list_lxr() {}
|
||||
public static final Xop_list_lxr Instance = new Xop_list_lxr(); Xop_list_lxr() {}
|
||||
}
|
||||
|
||||
@@ -17,8 +17,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.net.*; import gplx.langs.htmls.encoders.*;
|
||||
import gplx.xowa.html.*; import gplx.xowa.html.hrefs.*;
|
||||
import gplx.xowa.urls.*;
|
||||
import gplx.xowa.htmls.*; import gplx.xowa.htmls.hrefs.*;
|
||||
import gplx.xowa.apps.urls.*;
|
||||
public class Xoh_lnke_wtr {
|
||||
// private Xoae_app app;
|
||||
public Xoh_lnke_wtr(Xowe_wiki wiki) {}// this.app = wiki.Appe();}
|
||||
|
||||
@@ -20,8 +20,8 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_lnke_end_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_lnke_end;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Brack_end, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Lnke().MakeTkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_lnke_end_lxr _ = new Xop_lnke_end_lxr(); Xop_lnke_end_lxr() {}
|
||||
public static final Xop_lnke_end_lxr Instance = new Xop_lnke_end_lxr(); Xop_lnke_end_lxr() {}
|
||||
}
|
||||
|
||||
@@ -28,11 +28,11 @@ public class Xop_lnke_lxr implements Xop_lxr {
|
||||
Gfo_protocol_itm itm = ary[i];
|
||||
Ctor_lxr_add(core_trie, itm.Key_w_colon_bry(), itm.Tid());
|
||||
}
|
||||
core_trie.Add(Bry_relative_1, new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, Xoa_consts.Url_relative_prefix, Gfo_protocol_itm.Tid_relative_1));
|
||||
core_trie.Add(Bry_relative_2, new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, Xoa_consts.Url_relative_prefix, Gfo_protocol_itm.Tid_relative_2));
|
||||
core_trie.Add(Bry_relative_1, new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, Gfo_protocol_itm.Bry_relative, Gfo_protocol_itm.Tid_relative_1));
|
||||
core_trie.Add(Bry_relative_2, new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, Gfo_protocol_itm.Bry_relative, Gfo_protocol_itm.Tid_relative_2));
|
||||
Ctor_lxr_add(core_trie, Bry_.new_a7("xowa-cmd"), Gfo_protocol_itm.Tid_xowa);
|
||||
} private static final byte[] Bry_relative_1 = Bry_.new_a7("[//"), Bry_relative_2 = Bry_.new_a7("[[//");
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
private void Ctor_lxr_add(Btrie_fast_mgr core_trie, byte[] protocol_bry, byte tid) {
|
||||
core_trie.Add(protocol_bry , new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_text, protocol_bry, tid));
|
||||
@@ -42,5 +42,5 @@ public class Xop_lnke_lxr implements Xop_lxr {
|
||||
if (this.tid == Gfo_protocol_itm.Tid_xowa && !ctx.Wiki().Sys_cfg().Xowa_proto_enabled()) return ctx.Lxr_make_txt_(cur_pos);
|
||||
return ctx.Lnke().MakeTkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, protocol, tid, lnke_typ);
|
||||
}
|
||||
public static final Xop_lnke_lxr _ = new Xop_lnke_lxr(); Xop_lnke_lxr() {}
|
||||
public static final Xop_lnke_lxr Instance = new Xop_lnke_lxr(); Xop_lnke_lxr() {}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.net.*; import gplx.xowa.urls.*;
|
||||
import gplx.core.net.*; import gplx.xowa.apps.urls.*;
|
||||
import gplx.xowa.apps.progs.*; import gplx.xowa.wikis.xwikis.*;
|
||||
public class Xop_lnke_wkr implements Xop_ctx_wkr {
|
||||
public void Ctor_ctx(Xop_ctx ctx) {url_parser = ctx.Wiki().Utl__url_parser().Url_parser();} Gfo_url_parser url_parser; Gfo_url_site_data site_data = new Gfo_url_site_data();
|
||||
@@ -288,7 +288,7 @@ public class Xop_lnke_wkr implements Xop_ctx_wkr {
|
||||
byte[] rhs_dlm_bry = Bry_quote;
|
||||
if (lhs_dlm_pos - proto_end_pos > 0) {
|
||||
Bry_bfr bfr = ctx.App().Utl__bfr_mkr().Get_k004();
|
||||
rhs_dlm_bry = bfr.Add(Bry_quote).Add_mid(src, proto_end_pos, lhs_dlm_pos).Xto_bry_and_clear();
|
||||
rhs_dlm_bry = bfr.Add(Bry_quote).Add_mid(src, proto_end_pos, lhs_dlm_pos).To_bry_and_clear();
|
||||
bfr.Mkr_rls();
|
||||
}
|
||||
int rhs_dlm_pos = Bry_find_.Find_fwd(src, rhs_dlm_bry, lnke_bgn_pos, src_len); if (rhs_dlm_pos == Bry_.NotFound) return ctx.Lxr_make_txt_(cur_pos);
|
||||
|
||||
@@ -61,7 +61,7 @@ public class Xop_lnke_wkr_brack_tst {
|
||||
));
|
||||
}
|
||||
@Test public void Encode_xwiki() { // PURPOSE: href title and args should always be encoded; PAGE:en.w:List_of_Category_A_listed_buildings_in_West_Lothian DATE:2014-07-15
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_a7("commons.wikimedia.org"), Bry_.new_a7("commons.wikimedia.org"));
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("commons.wikimedia.org"), Bry_.new_a7("commons.wikimedia.org"));
|
||||
fxt.Test_parse_page_wiki_str // encode page
|
||||
( "[http://commons.wikimedia.org/%22%3E_A B]"
|
||||
, "<a href=\"/site/commons.wikimedia.org/wiki/%22%3E_A\">B</a>" // '%22%3E' not '">'
|
||||
|
||||
@@ -36,7 +36,7 @@ public class Xop_lnke_wkr_relative_tst {
|
||||
fxt.Test_parse_page_wiki_str("[//en.wikipedia.org/wiki/Category:A A]", "<a href=\"/site/en.wikipedia.org/wiki/Category:A\">A</a>");
|
||||
}
|
||||
@Test public void Relurl() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.Test_parse_page_wiki_str("[[//en.wikipedia.org/ a]]", "[<a href=\"/site/en.wikipedia.org/wiki/\">a</a>]");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,11 +20,11 @@ import org.junit.*;
|
||||
public class Xop_lnke_wkr_xwiki_tst {
|
||||
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Xwiki() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.Test_parse_page_wiki_str("[http://en.wikipedia.org/wiki/A a]", "<a href=\"/site/en.wikipedia.org/wiki/A\">a</a>");
|
||||
}
|
||||
@Test public void Xwiki_relative() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.Test_parse_page_wiki_str("[//en.wikipedia.org/ a]", "<a href=\"/site/en.wikipedia.org/wiki/\">a</a>");
|
||||
}
|
||||
@Test public void Xwiki_qarg() {// DATE:2013-02-02
|
||||
@@ -32,12 +32,12 @@ public class Xop_lnke_wkr_xwiki_tst {
|
||||
fxt.Test_parse_page_wiki_str("http://en.wikipedia.org/wiki/Special:Allpages?from=Earth", "<a href=\"/site/en.wikipedia.org/wiki/Special:Allpages?from=Earth\">http://en.wikipedia.org/wiki/Special:Allpages?from=Earth</a>");
|
||||
}
|
||||
@Test public void Lang_prefix() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.Wiki().Xwiki_mgr().Add_full(Bry_.new_a7("fr"), Bry_.new_a7("fr.wikipedia.org"));
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("fr"), Bry_.new_a7("fr.wikipedia.org"));
|
||||
fxt.Test_parse_page_wiki_str("[http://en.wikipedia.org/wiki/fr:A a]", "<a href=\"/site/fr.wikipedia.org/wiki/A\">a</a>");
|
||||
}
|
||||
@Test public void Xwiki_query_arg() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.Test_parse_page_wiki_str("[http://en.wikipedia.org/wiki/A?action=edit a]", "<a href=\"/site/en.wikipedia.org/wiki/A?action=edit\">a</a>");
|
||||
}
|
||||
@Test public void Ignore_proto() { // PURPOSE: handle other protocols; PAGE:uk.w:Маскалі; DATE:2015-07-28
|
||||
|
||||
@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.net.*; import gplx.xowa.wikis.xwikis.*;
|
||||
import gplx.xowa.html.*; import gplx.xowa.html.lnkis.*; import gplx.xowa.html.hrefs.*;
|
||||
import gplx.xowa.htmls.*; import gplx.xowa.htmls.lnkis.*; import gplx.xowa.htmls.hrefs.*;
|
||||
public class Xop_link_parser {
|
||||
public byte[] Html_xowa_ttl() {return html_xowa_ttl;} private byte[] html_xowa_ttl;
|
||||
public byte Html_anchor_cls() {return html_anchor_cls;} private byte html_anchor_cls;
|
||||
@@ -44,7 +44,7 @@ public class Xop_link_parser {
|
||||
}
|
||||
tmp_bfr.Add(raw); // dump everything
|
||||
}
|
||||
raw = tmp_bfr.Xto_bry_and_clear();
|
||||
raw = tmp_bfr.To_bry_and_clear();
|
||||
html_anchor_cls = Xoh_lnki_consts.Tid_a_cls_none;
|
||||
html_anchor_rel = Xoh_lnki_consts.Tid_a_rel_nofollow;
|
||||
break;
|
||||
@@ -56,7 +56,7 @@ public class Xop_link_parser {
|
||||
html_xowa_ttl = Bry_.Mid(raw, slash_pos + Int_.Const_dlm_len, raw.length);
|
||||
}
|
||||
else // next char is not slash; assume xfer_itm refers to ns; EX:File:A.png
|
||||
raw = tmp_bfr.Add(Xoh_href_.Bry__wiki).Add(raw).Xto_bry_and_clear();
|
||||
raw = tmp_bfr.Add(Xoh_href_.Bry__wiki).Add(raw).To_bry_and_clear();
|
||||
break;
|
||||
default: // is page only; EX: Abc
|
||||
if (Bry_.Len_eq_0(raw)) // NOTE: handle blank link; EX: [[File:Loudspeaker.svg|11px|link=|alt=play]]
|
||||
@@ -67,7 +67,7 @@ public class Xop_link_parser {
|
||||
tmp_bfr.Clear();
|
||||
return null;
|
||||
}
|
||||
raw = tmp_bfr.Xto_bry_and_clear();
|
||||
raw = tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -17,13 +17,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.numbers.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.kwds.*; import gplx.xowa.langs.numbers.*;
|
||||
public class Xop_lnki_arg_parser {
|
||||
private final Btrie_fast_mgr key_trie = Btrie_fast_mgr.cs();
|
||||
private final Bry_bfr int_bfr = Bry_bfr.reset_(16);
|
||||
private final Btrie_bwd_mgr px_trie = Btrie_bwd_mgr.cs_(); private final Btrie_fast_mgr size_trie = Btrie_fast_mgr.cs();
|
||||
private int lnki_w, lnki_h;
|
||||
public void Evt_lang_changed(Xol_lang lang) {
|
||||
public void Evt_lang_changed(Xol_lang_itm lang) {
|
||||
Bry_bfr tmp_bfr = int_bfr;
|
||||
Byte_obj_ref rslt = Byte_obj_ref.zero_();
|
||||
Xol_kwd_mgr mgr = lang.Kwd_mgr();
|
||||
@@ -103,7 +103,7 @@ public class Xop_lnki_arg_parser {
|
||||
}
|
||||
case Key_dim_x: {
|
||||
if (mode_width) {
|
||||
this.lnki_w = int_bfr.XtoIntAndClear(-1);
|
||||
this.lnki_w = int_bfr.To_int_and_clear(-1);
|
||||
mode_width = false;
|
||||
break;
|
||||
}
|
||||
@@ -111,7 +111,7 @@ public class Xop_lnki_arg_parser {
|
||||
}
|
||||
}
|
||||
}
|
||||
int dim = int_bfr.XtoIntAndClear(-1);
|
||||
int dim = int_bfr.To_int_and_clear(-1);
|
||||
if (mode_width) this.lnki_w = dim;
|
||||
else this.lnki_h = dim;
|
||||
return Tid_dim;
|
||||
@@ -126,7 +126,7 @@ public class Xop_lnki_arg_parser {
|
||||
if (list == null && Env_.Mode_testing()) return; // TEST: allows partial parsing of $magicWords
|
||||
size_trie.Clear(); px_trie.Clear();
|
||||
for (int i = 0; i < 10; i++)
|
||||
size_trie.Add((byte)(i + Char_.AsciiZero), Byte_obj_val.new_(Key_dim_num));
|
||||
size_trie.Add((byte)(i + Byte_ascii.Num_0), Byte_obj_val.new_(Key_dim_num));
|
||||
int len = digit_mgr.Len(); // NOTE: add non-english numbers; EX: ۲۰۰px; DATE:2015-07-18
|
||||
for (int i = 0; i < len; ++i) {
|
||||
KeyVal kv = digit_mgr.Get_at(i);
|
||||
|
||||
@@ -21,7 +21,7 @@ import gplx.xowa.parsers.tmpls.*;
|
||||
public class Xop_lnki_lxr_bgn implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_lnki_bgn;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Xop_tkn_.Lnki_bgn, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
Xop_tkn_itm prv_tkn = ctx.Stack_get_last();
|
||||
@@ -37,7 +37,7 @@ public class Xop_lnki_lxr_bgn implements Xop_lxr {
|
||||
ctx.Subs_add_and_stack(root, lnki);
|
||||
return cur_pos;
|
||||
}
|
||||
public static final Xop_lnki_lxr_bgn _ = new Xop_lnki_lxr_bgn();
|
||||
public static final Xop_lnki_lxr_bgn Instance = new Xop_lnki_lxr_bgn();
|
||||
}
|
||||
class Xop_lnki_size {public static final int None = 0, Width = 1, Height = 2, WidthHeight = 4, Upright = 8;}
|
||||
/*
|
||||
|
||||
@@ -21,8 +21,8 @@ import gplx.xowa.parsers.tmpls.*;
|
||||
public class Xop_lnki_lxr_end implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_lnki_end;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Xop_tkn_.Lnki_end, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Lnki().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_lnki_lxr_end _ = new Xop_lnki_lxr_end();
|
||||
public static final Xop_lnki_lxr_end Instance = new Xop_lnki_lxr_end();
|
||||
}
|
||||
|
||||
@@ -16,8 +16,8 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.html.*; import gplx.xowa.html.lnkis.*; import gplx.xowa.xtns.pfuncs.ttls.*;
|
||||
import gplx.xowa.nss.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.lnkis.*; import gplx.xowa.xtns.pfuncs.ttls.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.parsers.tmpls.*;
|
||||
public class Xop_lnki_tkn extends Xop_tkn_itm_base {
|
||||
@Override public byte Tkn_tid() {return tkn_tid;} private byte tkn_tid = Xop_tkn_itm_.Tid_lnki;
|
||||
|
||||
@@ -17,11 +17,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
import gplx.xowa.nss.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.wikis.*; import gplx.xowa.parsers.lnkis.redlinks.*; import gplx.xowa.xtns.pfuncs.ttls.*; import gplx.xowa.xtns.relatedSites.*;
|
||||
import gplx.xowa.parsers.tmpls.*; import gplx.xowa.parsers.miscs.*;
|
||||
public class Xop_lnki_wkr implements Xop_ctx_wkr, Xop_arg_wkr {
|
||||
private Arg_bldr arg_bldr = Arg_bldr._;
|
||||
private Arg_bldr arg_bldr = Arg_bldr.Instance;
|
||||
private Number_parser number_parser = new Number_parser();
|
||||
private Sites_regy_mgr sites_regy_mgr;
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
|
||||
@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*;
|
||||
import gplx.xowa.langs.*;
|
||||
import gplx.xowa.nss.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.wikis.*; import gplx.xowa.xtns.pfuncs.ttls.*; import gplx.xowa.xtns.relatedSites.*;
|
||||
import gplx.xowa.parsers.tmpls.*; import gplx.xowa.parsers.lnkis.redlinks.*;
|
||||
public class Xop_lnki_wkr_ {
|
||||
@@ -77,7 +77,7 @@ public class Xop_lnki_wkr_ {
|
||||
if (ttl_in_xwiki == null) return ttl; // occurs if ttl is bad in xwiki; EX: [[en:<bad>]]
|
||||
return ttl_in_xwiki.Ns().Id_file() ? ttl_in_xwiki : ttl;
|
||||
}
|
||||
public static int Chk_for_tail(Xol_lang lang, byte[] src, int cur_pos, int src_len, Xop_lnki_tkn lnki) {
|
||||
public static int Chk_for_tail(Xol_lang_itm lang, byte[] src, int cur_pos, int src_len, Xop_lnki_tkn lnki) {
|
||||
int bgn_pos = cur_pos;
|
||||
Btrie_slim_mgr lnki_trail = lang.Lnki_trail_mgr().Trie();
|
||||
while (true) { // loop b/c there can be multiple consecutive lnki_trail_chars; EX: [[A]]bcde
|
||||
|
||||
@@ -17,8 +17,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.cases.*;
|
||||
import gplx.xowa.nss.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.cases.*; import gplx.xowa.langs.funcs.*; import gplx.xowa.langs.lnki_trails.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.parsers.paras.*; import gplx.xowa.wikis.ttls.*;
|
||||
public class Xop_lnki_wkr__basic_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_n_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.nss.*;
|
||||
import org.junit.*; import gplx.xowa.wikis.nss.*;
|
||||
public class Xop_lnki_wkr__subpage_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_n_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Disabled() { // PURPOSE: slash being interpreted as subpage; PAGE:en.w:[[/dev/null]]
|
||||
|
||||
@@ -51,7 +51,7 @@ public class Xop_lnki_wkr__uncommon_tst {
|
||||
fxt.Test_html_wiki_frag("[[File:A.png|upright=1.333333333333333333333333333333333333333333333333333333333333333333333]]", " width=\"0\" height=\"0\""); // failure would print out original lnki
|
||||
}
|
||||
@Test public void Persian() { // PURPOSE: handle il8n nums; EX:[[پرونده:Shahbazi 3.jpg|۲۰۰px]] -> 200px; PAGE:fa.w:فهرست_آثار_علیرضا_شاپور_شهبازی; DATE:2015-07-18
|
||||
Xol_lang lang = fxt.Wiki().Lang();
|
||||
Xol_lang_itm lang = fxt.Wiki().Lang();
|
||||
fxt.App().Gfs_mgr().Run_str_for(lang, gplx.xowa.xtns.pfuncs.numbers.Pf_formatnum_fa_tst.Persian_numbers_gfs);
|
||||
lang.Evt_lang_changed(); // force rebuild of size_trie
|
||||
fxt.Test_html_wiki_frag("[[File:A.png|۲۰۰px]]", " width=\"200\" height=\"0\"");
|
||||
|
||||
@@ -51,7 +51,7 @@ public class Xop_lnki_wkr__xwiki_tst {
|
||||
}
|
||||
@Test public void Xwiki_not_registered() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Clear();
|
||||
fxt.Wiki().Xwiki_mgr().Add_full(Bry_.new_a7("test"), Bry_.new_a7("test.wikimedia.org")); // register alias only, but not in user_wiki
|
||||
fxt.Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("test"), Bry_.new_a7("test.wikimedia.org")); // register alias only, but not in user_wiki
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "[[test:A|A]]", String_.Concat_lines_nl_skip_last
|
||||
( "<a href=\"https://test.wikimedia.org/wiki/A\">A</a>"
|
||||
|
||||
@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis.cfgs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
|
||||
public class Xoc_xwiki_repo_mgr implements GfoInvkAble {
|
||||
private Ordered_hash hash = Ordered_hash_.new_bry_();
|
||||
private Ordered_hash hash = Ordered_hash_.New_bry();
|
||||
private Xowe_wiki wiki;
|
||||
public Xoc_xwiki_repo_mgr(Xowe_wiki wiki) {this.wiki = wiki;}
|
||||
public boolean Has(byte[] abrv) {
|
||||
|
||||
@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.parsers.lnkis.redlinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
|
||||
import gplx.core.primitives.*;
|
||||
import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.langs.vnts.*; import gplx.xowa.gui.views.*; import gplx.xowa.pages.*; import gplx.xowa.html.hdumps.core.*;
|
||||
import gplx.xowa.langs.vnts.*; import gplx.xowa.guis.views.*; import gplx.xowa.wikis.pages.*; import gplx.xowa.htmls.hdumps.core.*;
|
||||
import gplx.xowa.parsers.tmpls.*;
|
||||
public class Xog_redlink_mgr implements GfoInvkAble {
|
||||
private Xog_win_itm win; private Xog_html_itm html_itm; private Xowe_wiki wiki; private Xoae_page page;
|
||||
@@ -29,7 +29,7 @@ public class Xog_redlink_mgr implements GfoInvkAble {
|
||||
this.redlink_lnki_list = page.Redlink_lnki_list();
|
||||
this.lnki_list = redlink_lnki_list.Lnki_list();
|
||||
this.thread_id = redlink_lnki_list.Thread_id();
|
||||
this.log_enabled = log_enabled; this.usr_dlg = log_enabled ? Gfo_usr_dlg_.I : Gfo_usr_dlg_.Noop;
|
||||
this.log_enabled = log_enabled; this.usr_dlg = log_enabled ? Gfo_usr_dlg_.Instance : Gfo_usr_dlg_.Noop;
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_run)) Redlink();
|
||||
@@ -40,7 +40,7 @@ public class Xog_redlink_mgr implements GfoInvkAble {
|
||||
synchronized (this) { // NOTE: attempt to eliminate random IndexBounds errors; DATE:2014-09-02
|
||||
if (redlink_lnki_list.Disabled()) return;
|
||||
List_adp work_list = List_adp_.new_();
|
||||
Ordered_hash page_hash = Ordered_hash_.new_bry_();
|
||||
Ordered_hash page_hash = Ordered_hash_.New_bry();
|
||||
page_hash.Clear(); // NOTE: do not clear in Page_bgn, else will fail b/c of threading; EX: Open Page -> Preview -> Save; DATE:2013-11-17
|
||||
work_list.Clear();
|
||||
int len = lnki_list.Count();
|
||||
@@ -79,7 +79,7 @@ public class Xog_redlink_mgr implements GfoInvkAble {
|
||||
Xowd_page_itm db_page = (Xowd_page_itm)page_hash.Get_by(full_db);
|
||||
if (db_page == null) continue; // pages shouldn't be null, but just in case
|
||||
if (!db_page.Exists()) {
|
||||
String lnki_id = Xopg_redlink_lnki_list.Lnki_id_prefix + Int_.Xto_str(lnki.Html_uid());
|
||||
String lnki_id = Xopg_redlink_lnki_list.Lnki_id_prefix + Int_.To_str(lnki.Html_uid());
|
||||
if (variants_enabled) {
|
||||
Xowd_page_itm vnt_page = vnt_mgr.Convert_mgr().Convert_ttl(wiki, lnki.Ttl());
|
||||
if (vnt_page != null) {
|
||||
@@ -97,13 +97,13 @@ public class Xog_redlink_mgr implements GfoInvkAble {
|
||||
if (win.Usr_dlg().Canceled()) return;
|
||||
if (redlink_lnki_list.Thread_id() != thread_id) return;
|
||||
int uid = lnki.Html_uid();
|
||||
gplx.xowa.files.gui.Js_img_mgr.Update_link_missing(html_itm, Xopg_redlink_lnki_list.Lnki_id_prefix + Int_.Xto_str(uid));
|
||||
gplx.xowa.files.gui.Js_img_mgr.Update_link_missing(html_itm, Xopg_redlink_lnki_list.Lnki_id_prefix + Int_.To_str(uid));
|
||||
redlink_mgr.Add(uid);
|
||||
++redlink_count;
|
||||
}
|
||||
}
|
||||
if (log_enabled)
|
||||
usr_dlg.Log_many("", "", "redlink.redlink_end: redlinks_run=~{0} links=~{1}", redlink_count, bfr == null ? String_.Empty : bfr.Xto_str_and_clear());
|
||||
usr_dlg.Log_many("", "", "redlink.redlink_end: redlinks_run=~{0} links=~{1}", redlink_count, bfr == null ? String_.Empty : bfr.To_str_and_clear());
|
||||
}
|
||||
}
|
||||
public static final Xog_redlink_mgr Null = new Xog_redlink_mgr(); Xog_redlink_mgr() {}
|
||||
@@ -118,6 +118,6 @@ class Xog_redlink_wkr {
|
||||
}
|
||||
}
|
||||
public static void Redlink(Xog_html_itm html_itm, int uid) {
|
||||
gplx.xowa.files.gui.Js_img_mgr.Update_link_missing(html_itm, Xopg_redlink_lnki_list.Lnki_id_prefix + Int_.Xto_str(uid));
|
||||
gplx.xowa.files.gui.Js_img_mgr.Update_link_missing(html_itm, Xopg_redlink_lnki_list.Lnki_id_prefix + Int_.To_str(uid));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,10 +16,10 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis.redlinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*;
|
||||
import gplx.xowa.nss.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.users.*;
|
||||
public class Xopg_redlink_lnki_list {
|
||||
private int lnki_idx = gplx.xowa.html.lnkis.Xoh_lnki_wtr.Lnki_id_min; // NOTE: default to 1, not 0, b/c 0 is ignored by wtr; DATE:2014-10-09
|
||||
private int lnki_idx = gplx.xowa.htmls.lnkis.Xoh_lnki_wtr.Lnki_id_min; // NOTE: default to 1, not 0, b/c 0 is ignored by wtr; DATE:2014-10-09
|
||||
public Xopg_redlink_lnki_list(boolean ttl_is_module) { // never redlink in Module ns; particularly since Lua has multi-line comments for [[ ]]
|
||||
this.disabled = ttl_is_module;
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.logs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.dbs.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*;
|
||||
public class Xop_log_basic_wkr implements GfoInvkAble {
|
||||
private Xop_log_mgr log_mgr; private Xop_log_basic_tbl log_tbl;
|
||||
private boolean save_page_ttl, save_log_time, save_args_len, save_args_str;
|
||||
@@ -25,7 +25,7 @@ public class Xop_log_basic_wkr implements GfoInvkAble {
|
||||
public Xop_log_basic_wkr(Xop_log_mgr log_mgr, Xop_log_basic_tbl log_tbl) {this.log_mgr = log_mgr; this.log_tbl = log_tbl;}
|
||||
public boolean Log_bgn(Xoae_page page, byte[] src, Xop_xnde_tkn xnde) {return true;}
|
||||
public void Log_end_xnde(Xoae_page page, int log_tid, byte[] src, Xop_xnde_tkn xnde_tkn) {
|
||||
Xop_xatr_itm[] atrs_ary = xnde_tkn.Atrs_ary();
|
||||
Mwh_atr_itm[] atrs_ary = xnde_tkn.Atrs_ary();
|
||||
Log_end(page, Null_log_bgn, log_tid, Null_log_msg, src
|
||||
, xnde_tkn.Src_bgn(), xnde_tkn.Src_end()
|
||||
, atrs_ary == null ? 0 : atrs_ary.length
|
||||
|
||||
@@ -21,7 +21,7 @@ import gplx.xowa.parsers.paras.*;
|
||||
public class Xop_comm_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_comment;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Bgn_ary, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
int lhs_end = cur_pos;
|
||||
@@ -91,7 +91,7 @@ public class Xop_comm_lxr implements Xop_lxr {
|
||||
}
|
||||
public static final byte[] Bgn_ary = new byte[] {60, 33, 45, 45}, /*<!--*/ End_ary = new byte[] {45, 45, 62}; /*-->*/
|
||||
private static final int End_len = End_ary.length;
|
||||
public static final Xop_comm_lxr _ = new Xop_comm_lxr(); Xop_comm_lxr() {}
|
||||
public static final Xop_comm_lxr Instance = new Xop_comm_lxr(); Xop_comm_lxr() {}
|
||||
private static final String Xowa_skip_text_str = "XOWA_SKIP";
|
||||
private static final byte[] Xowa_skip_text_bry = Bry_.new_a7(Xowa_skip_text_str);
|
||||
public static final byte[] Xowa_skip_comment_bry = Bry_.new_a7("<!--" + Xowa_skip_text_str + "-->");
|
||||
|
||||
@@ -20,10 +20,10 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_cr_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_cr;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Cr, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
return cur_pos; //ignore
|
||||
}
|
||||
public static final Xop_cr_lxr _ = new Xop_cr_lxr(); Xop_cr_lxr() {}
|
||||
public static final Xop_cr_lxr Instance = new Xop_cr_lxr(); Xop_cr_lxr() {}
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ public class Xop_eq_lxr implements Xop_lxr {
|
||||
public Xop_eq_lxr(boolean tmpl_mode) {this.tmpl_mode = tmpl_mode;} boolean tmpl_mode;
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_eq;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Eq, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
cur_pos = Bry_find_.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Eq); // gobble up eq; "==" should produce 1 eq_tkn with len of 2, not 2 eq_tkn with len of 1; DATE:2014-04-17
|
||||
|
||||
@@ -21,7 +21,7 @@ import gplx.xowa.parsers.xndes.*;
|
||||
public class Xop_hr_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_hr;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr parse_trie) {parse_trie.Add(Hook_ary, this);} static final byte[] Hook_ary = new byte[] {Byte_ascii.Nl, Byte_ascii.Dash, Byte_ascii.Dash, Byte_ascii.Dash, Byte_ascii.Dash};
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
int nl_adj = -1; // -1 to ignore nl at bgn for hr_len
|
||||
@@ -42,5 +42,5 @@ public class Xop_hr_lxr implements Xop_lxr {
|
||||
return cur_pos;
|
||||
} private static final byte Hook_byt = Byte_ascii.Dash;
|
||||
public static final int Hr_len = 4;
|
||||
public static final Xop_hr_lxr _ = new Xop_hr_lxr(); Xop_hr_lxr() {}
|
||||
public static final Xop_hr_lxr Instance = new Xop_hr_lxr(); Xop_hr_lxr() {}
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ public class Xop_macro_lxr implements Xop_lxr {
|
||||
public Xop_macro_lxr() {}
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_macro;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Tab, this); core_trie.Add(Xop_tab_tkn.Bry_tab_ent, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
cur_pos = Bry_find_.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Tab);
|
||||
@@ -31,5 +31,5 @@ public class Xop_macro_lxr implements Xop_lxr {
|
||||
ctx.Subs_add(root, tkn_mkr.Tab(bgn_pos, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
public static final Xop_tab_lxr _ = new Xop_tab_lxr();
|
||||
public static final Xop_tab_lxr Instance = new Xop_tab_lxr();
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ import gplx.xowa.parsers.tblws.*; import gplx.xowa.parsers.lnkis.*; import gplx.
|
||||
public class Xop_pipe_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_pipe;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Pipe, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
int cur_stack_tid = ctx.Cur_tkn_tid(), rv = -1;
|
||||
@@ -62,11 +62,6 @@ public class Xop_pipe_lxr implements Xop_lxr {
|
||||
Xop_tblw_wkr.Atrs_make(ctx, src, root, ctx.Tblw(), cur_tkn, Bool_.N);
|
||||
return cur_pos;
|
||||
}
|
||||
case Xop_tkn_itm_.Tid_vnt:
|
||||
Xop_vnt_tkn vnt_tkn = (Xop_vnt_tkn)ctx.Stack_get_typ(Xop_tkn_itm_.Tid_vnt);
|
||||
vnt_tkn.Vnt_pipe_tkn_count_add_();
|
||||
ctx.Subs_add(root, tkn_mkr.Pipe(bgn_pos, cur_pos));
|
||||
return cur_pos;
|
||||
case Xop_tkn_itm_.Tid_lnki:
|
||||
Xop_lnki_tkn lnki = (Xop_lnki_tkn)ctx.Stack_get_last(); // BLOCK:invalid_ttl_check
|
||||
if ( lnki.Pipe_count_is_zero()
|
||||
@@ -81,5 +76,5 @@ public class Xop_pipe_lxr implements Xop_lxr {
|
||||
return cur_pos;
|
||||
}
|
||||
}
|
||||
public static final Xop_pipe_lxr _ = new Xop_pipe_lxr();
|
||||
public static final Xop_pipe_lxr Instance = new Xop_pipe_lxr();
|
||||
}
|
||||
|
||||
@@ -20,12 +20,12 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_space_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_space;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Space, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
cur_pos = Bry_find_.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Space);
|
||||
ctx.Subs_add(root, tkn_mkr.Space(root, bgn_pos, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
public static final Xop_space_lxr _ = new Xop_space_lxr();
|
||||
public static final Xop_space_lxr Instance = new Xop_space_lxr();
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_tab_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_tab;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Tab, this); core_trie.Add(Xop_tab_tkn.Bry_tab_ent, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
cur_pos = Bry_find_.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Tab);
|
||||
@@ -30,5 +30,5 @@ public class Xop_tab_lxr implements Xop_lxr {
|
||||
ctx.Subs_add(root, tkn_mkr.Tab(bgn_pos, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
public static final Xop_tab_lxr _ = new Xop_tab_lxr();
|
||||
public static final Xop_tab_lxr Instance = new Xop_tab_lxr();
|
||||
}
|
||||
|
||||
@@ -16,13 +16,14 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.miscs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
import gplx.xowa.html.tocs.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.kwds.*;
|
||||
import gplx.xowa.htmls.tocs.*;
|
||||
public class Xop_under_lxr implements Xop_lxr {
|
||||
private Btrie_mgr words_trie_ci, words_trie_cs;
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_under;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {
|
||||
Xol_kwd_mgr kwd_mgr = lang.Kwd_mgr();
|
||||
int under_kwds_len = under_kwds.length;
|
||||
Xop_under_lxr lxr = new Xop_under_lxr();
|
||||
@@ -56,7 +57,7 @@ public class Xop_under_lxr implements Xop_lxr {
|
||||
if (kwd_case_match) // cs; add word directly to trie
|
||||
core_trie.Add(kwd_bry, word_lxr);
|
||||
else { // NOTE: next part is imprecise; XOWA parser is cs, but kwd is ci; for now, just add all upper and all lower
|
||||
Gfo_usr_dlg_.I.Warn_many("", "", "under keyword does not start with __; id=~{0} key=~{1} word=~{2}", kwd_id, String_.new_u8(kwd_grp.Key()), String_.new_u8(kwd_bry));
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "under keyword does not start with __; id=~{0} key=~{1} word=~{2}", kwd_id, String_.new_u8(kwd_grp.Key()), String_.new_u8(kwd_bry));
|
||||
core_trie.Add(lang.Case_mgr().Case_build_lower(kwd_bry), word_lxr);
|
||||
core_trie.Add(lang.Case_mgr().Case_build_upper(kwd_bry), word_lxr);
|
||||
}
|
||||
@@ -110,14 +111,14 @@ public class Xop_under_lxr implements Xop_lxr {
|
||||
default: break; // ignore anything else
|
||||
}
|
||||
}
|
||||
public static final Xop_under_lxr _ = new Xop_under_lxr(); Xop_under_lxr() {}
|
||||
public static final Xop_under_lxr Instance = new Xop_under_lxr(); Xop_under_lxr() {}
|
||||
}
|
||||
class Xop_word_lxr implements Xop_lxr {
|
||||
private int kwd_id;
|
||||
public Xop_word_lxr(int kwd_id) {this.kwd_id = kwd_id;}
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_word;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
Xop_under_lxr.Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, kwd_id); // for now, all word_lxrs only call the under_lxr; DATE:2014-02-14
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.miscs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.langs.*;
|
||||
import org.junit.*; import gplx.xowa.langs.*; import gplx.xowa.langs.kwds.*;
|
||||
public class Xop_under_lxr_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@Before public void init() {fxt.Reset();}
|
||||
@@ -98,7 +98,7 @@ public class Xop_under_lxr_tst {
|
||||
fxt.Test_parse_page_all_str("__DISAMBIG__", "");
|
||||
}
|
||||
@Test public void Nocontentconvert() { // simple test; test for flag only; DATE:2014-02-06
|
||||
gplx.xowa.pages.Xopg_html_data html_data = fxt.Page().Html_data();
|
||||
gplx.xowa.wikis.pages.Xopg_html_data html_data = fxt.Page().Html_data();
|
||||
Tfds.Eq(html_data.Lang_convert_content(), true);
|
||||
Tfds.Eq(html_data.Lang_convert_title(), true);
|
||||
fxt.Test_parse_page_all_str("__NOCONTENTCONVERT__ __NOTITLECONVERT__", " ");
|
||||
@@ -140,20 +140,20 @@ public class Xop_under_lxr_tst {
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Hook_alt() { // PURPOSE: ja wikis use alternate __; DATE:2014-03-04
|
||||
Xowe_wiki wiki = fxt.Wiki(); Xol_lang lang = wiki.Lang();
|
||||
Xowe_wiki wiki = fxt.Wiki(); Xol_lang_itm lang = wiki.Lang();
|
||||
fxt.Init_lang_kwds(lang, Xol_kwd_grp_.Id_toc, true, "__TOC__");
|
||||
wiki.Parser_mgr().Main().Init_by_lang(lang);
|
||||
fxt.Test_parse_page_all_str("a__TOC__b", "ab");
|
||||
}
|
||||
@Test public void Ascii_ci() { // PURPOSE: case-insensitive ascii; DATE:2014-07-10
|
||||
Xowe_wiki wiki = fxt.Wiki(); Xol_lang lang = wiki.Lang();
|
||||
Xowe_wiki wiki = fxt.Wiki(); Xol_lang_itm lang = wiki.Lang();
|
||||
fxt.Init_lang_kwds(lang, Xol_kwd_grp_.Id_toc, false, "__TOC__");
|
||||
wiki.Parser_mgr().Main().Init_by_lang(lang);
|
||||
fxt.Test_parse_page_all_str("a__TOC__b", "ab");
|
||||
fxt.Test_parse_page_all_str("a__toc__b", "ab");
|
||||
}
|
||||
@Test public void Utf8_ci() { // PURPOSE: case-insensitive UTF8; DATE:2014-07-10
|
||||
Xowe_wiki wiki = fxt.Wiki(); Xol_lang lang = wiki.Lang();
|
||||
Xowe_wiki wiki = fxt.Wiki(); Xol_lang_itm lang = wiki.Lang();
|
||||
lang.Case_mgr_u8_();
|
||||
fxt.Init_lang_kwds(lang, Xol_kwd_grp_.Id_toc, false, "__AÉI__");
|
||||
wiki.Parser_mgr().Main().Init_by_lang(lang);
|
||||
@@ -161,21 +161,21 @@ public class Xop_under_lxr_tst {
|
||||
fxt.Test_parse_page_all_str("a__aéi__b", "ab");
|
||||
}
|
||||
@Test public void Utf8_ci_asymmetric() { // PURPOSE: case-insensitive UTF8; asymmetric; DATE:2014-07-10
|
||||
Xowe_wiki wiki = fxt.Wiki(); Xol_lang lang = wiki.Lang();
|
||||
Xowe_wiki wiki = fxt.Wiki(); Xol_lang_itm lang = wiki.Lang();
|
||||
lang.Case_mgr_u8_();
|
||||
fxt.Init_lang_kwds(lang, Xol_kwd_grp_.Id_toc, false, "__İÇİNDEKİLER__"); // __TOC__ for tr.w
|
||||
wiki.Parser_mgr().Main().Init_by_lang(lang);
|
||||
fxt.Test_parse_page_all_str("a__İçindekiler__b", "ab");
|
||||
}
|
||||
@Test public void Cs() { // PURPOSE: cs (ascii / utf8 doesn't matter); DATE:2014-07-11
|
||||
Xowe_wiki wiki = fxt.Wiki(); Xol_lang lang = wiki.Lang();
|
||||
Xowe_wiki wiki = fxt.Wiki(); Xol_lang_itm lang = wiki.Lang();
|
||||
fxt.Init_lang_kwds(lang, Xol_kwd_grp_.Id_toc , Bool_.Y, "__TOC__");
|
||||
wiki.Parser_mgr().Main().Init_by_lang(lang);
|
||||
fxt.Test_parse_page_all_str("a__TOC__b" , "ab"); // ci.pass
|
||||
fxt.Test_parse_page_all_str("a__toc__b" , "a__toc__b"); // ci.pass
|
||||
}
|
||||
@Test public void Ascii_cs_ci() { // PURPOSE: test simultaneous cs and ci; DATE:2014-07-11
|
||||
Xowe_wiki wiki = fxt.Wiki(); Xol_lang lang = wiki.Lang();
|
||||
Xowe_wiki wiki = fxt.Wiki(); Xol_lang_itm lang = wiki.Lang();
|
||||
fxt.Init_lang_kwds(lang, Xol_kwd_grp_.Id_toc , Bool_.N, "__TOC__");
|
||||
fxt.Init_lang_kwds(lang, Xol_kwd_grp_.Id_notoc , Bool_.Y, "__NOTOC__");
|
||||
wiki.Parser_mgr().Main().Init_by_lang(lang);
|
||||
|
||||
@@ -21,7 +21,7 @@ import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.tblws.*; import gplx.
|
||||
public class Xop_nl_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_nl;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Nl, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_bos) return ctx.Lxr_make_txt_(cur_pos); // simulated nl at beginning of every parse
|
||||
@@ -113,5 +113,5 @@ public class Xop_nl_lxr implements Xop_lxr {
|
||||
}
|
||||
return Bry_.NotFound;
|
||||
}
|
||||
public static final Xop_nl_lxr _ = new Xop_nl_lxr(); Xop_nl_lxr() {}
|
||||
public static final Xop_nl_lxr Instance = new Xop_nl_lxr(); Xop_nl_lxr() {}
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ import gplx.xowa.parsers.tblws.*;
|
||||
public class Xop_nl_tab_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_nl_tab;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Hook_nl_tab, this);} private static final byte[] Hook_nl_tab = new byte[] {Byte_ascii.Nl, Byte_ascii.Tab};
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
int non_ws_pos = Bry_find_.Find_fwd_while_space_or_tab(src, cur_pos, src_len);
|
||||
@@ -49,5 +49,5 @@ public class Xop_nl_tab_lxr implements Xop_lxr {
|
||||
ctx.Subs_add(root, tkn_mkr.Tab(cur_pos - 1, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
public static final Xop_nl_tab_lxr _ = new Xop_nl_tab_lxr(); Xop_nl_tab_lxr() {}
|
||||
public static final Xop_nl_tab_lxr Instance = new Xop_nl_tab_lxr(); Xop_nl_tab_lxr() {}
|
||||
}
|
||||
|
||||
@@ -236,9 +236,9 @@ public class Xop_para_wkr_pre_tst {
|
||||
), String_.Concat_lines_nl
|
||||
( "<p>" // this is wrong, but will be stripped by tidy
|
||||
, "</p>"
|
||||
, " <pre>"
|
||||
, " <div class=\"mw-highlight\"><pre style=\"overflow:auto\">"
|
||||
, " a"
|
||||
, "</pre>"
|
||||
, "</pre></div>"
|
||||
, ""
|
||||
, "<p><br/>" // also wrong, but leave for now
|
||||
, "</p>"
|
||||
|
||||
@@ -21,7 +21,7 @@ import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.tblws.*; import gplx.
|
||||
public class Xop_pre_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_pre;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Hook_space, this);} // NOTE: do not treat \n\t as shorthand pre; EX:pl.w:Main_Page; DATE:2014-05-06
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if ( !ctx.Para().Enabled() // para disabled; "\n\s" should just be "\n\s"; NOTE: para disabled in <gallery>
|
||||
@@ -75,7 +75,7 @@ public class Xop_pre_lxr implements Xop_lxr {
|
||||
}
|
||||
return ctx.Para().Process_pre(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, txt_pos);
|
||||
}
|
||||
public static final Xop_pre_lxr _ = new Xop_pre_lxr(); Xop_pre_lxr() {}
|
||||
public static final Xop_pre_lxr Instance = new Xop_pre_lxr(); Xop_pre_lxr() {}
|
||||
private static final byte[] Hook_space = new byte[] {Byte_ascii.Nl, Byte_ascii.Space};
|
||||
}
|
||||
/*
|
||||
|
||||
@@ -44,7 +44,7 @@ public class Xop_tblw_lxr implements Xop_lxr {
|
||||
ctx.Stack_add(lnki_tkn); // push lnki back onto stack; TODO: combine these 2 lines into 1
|
||||
// NOTE: this is a "\n|" inside a [[ ]]; must create two tokens for lnki to build correctly;
|
||||
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos + 1, Xop_nl_tkn.Tid_char, 1));
|
||||
return Xop_pipe_lxr._.Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos); // NOTE: need to call pipe_lxr in order to invalidate if lnki; DATE:2014-06-06
|
||||
return Xop_pipe_lxr.Instance.Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos); // NOTE: need to call pipe_lxr in order to invalidate if lnki; DATE:2014-06-06
|
||||
}
|
||||
else { // \n| or \n! but no tbl
|
||||
if ( bgn_pos != Xop_parser_.Doc_bgn_bos // avoid ! at BOS
|
||||
@@ -118,7 +118,7 @@ public class Xop_tblw_lxr implements Xop_lxr {
|
||||
return Continue;
|
||||
}
|
||||
public Xop_tblw_lxr(byte wlxr_type) {this.wlxr_type = wlxr_type;} private byte wlxr_type;
|
||||
public static final Xop_tblw_lxr _ = new Xop_tblw_lxr(); Xop_tblw_lxr() {}
|
||||
public static final Xop_tblw_lxr Instance = new Xop_tblw_lxr(); Xop_tblw_lxr() {}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {
|
||||
core_trie.Add(Hook_tb, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_tb));
|
||||
core_trie.Add(Hook_te, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_te));
|
||||
@@ -129,7 +129,7 @@ public class Xop_tblw_lxr implements Xop_lxr {
|
||||
core_trie.Add(Hook_td2, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_td2));
|
||||
core_trie.Add(Hook_th2, new Xop_tblw_lxr(Xop_tblw_wkr.Tblw_type_th2));
|
||||
}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public static final byte[] Hook_tb = Bry_.new_a7("\n{|"), Hook_te = Bry_.new_a7("\n|}"), Hook_tr = Bry_.new_a7("\n|-")
|
||||
, Hook_td = Bry_.new_a7("\n|"), Hook_th = Bry_.new_a7("\n!"), Hook_tc = Bry_.new_a7("\n|+")
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*;
|
||||
public class Xop_tblw_tb_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
|
||||
public Xop_tblw_tb_tkn(int bgn, int end, boolean tblw_xml, boolean auto_created) {
|
||||
this.tblw_xml = tblw_xml; this.Tkn_ini_pos(false, bgn, end);
|
||||
@@ -28,7 +28,7 @@ public class Xop_tblw_tb_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
|
||||
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
|
||||
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
|
||||
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
|
||||
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
|
||||
public Mwh_atr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Mwh_atr_itm[] v) {atrs_ary = v; return this;} private Mwh_atr_itm[] atrs_ary;
|
||||
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
|
||||
public void Tblw_xml_(boolean v) {tblw_xml = v;}
|
||||
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
|
||||
|
||||
@@ -16,14 +16,14 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*;
|
||||
public class Xop_tblw_tc_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_tblw_tc;}
|
||||
public int Tblw_tid() {return Xop_xnde_tag_.Tid_caption;}
|
||||
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
|
||||
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
|
||||
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
|
||||
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
|
||||
public Mwh_atr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Mwh_atr_itm[] v) {atrs_ary = v; return this;} private Mwh_atr_itm[] atrs_ary;
|
||||
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
|
||||
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
|
||||
public Xop_tblw_tc_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
|
||||
|
||||
@@ -16,14 +16,14 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*;
|
||||
public class Xop_tblw_td_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_tblw_td;}
|
||||
public int Tblw_tid() {return Xop_xnde_tag_.Tid_td;}
|
||||
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
|
||||
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
|
||||
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
|
||||
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
|
||||
public Mwh_atr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Mwh_atr_itm[] v) {atrs_ary = v; return this;} private Mwh_atr_itm[] atrs_ary;
|
||||
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
|
||||
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
|
||||
public Xop_tblw_td_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
|
||||
|
||||
@@ -16,14 +16,14 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*;
|
||||
public class Xop_tblw_th_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_tblw_th;}
|
||||
public int Tblw_tid() {return Xop_xnde_tag_.Tid_th;}
|
||||
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
|
||||
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
|
||||
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
|
||||
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
|
||||
public Mwh_atr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Mwh_atr_itm[] v) {atrs_ary = v; return this;} private Mwh_atr_itm[] atrs_ary;
|
||||
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
|
||||
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
|
||||
public Xop_tblw_th_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*;
|
||||
public interface Xop_tblw_tkn extends Xop_tkn_itm {
|
||||
int Tblw_tid();
|
||||
boolean Tblw_xml();
|
||||
@@ -24,5 +24,5 @@ public interface Xop_tblw_tkn extends Xop_tkn_itm {
|
||||
int Atrs_bgn();
|
||||
int Atrs_end();
|
||||
void Atrs_rng_set(int bgn, int end);
|
||||
Xop_xatr_itm[] Atrs_ary(); Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v);
|
||||
Mwh_atr_itm[] Atrs_ary(); Xop_tblw_tkn Atrs_ary_as_tblw_(Mwh_atr_itm[] v);
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*;
|
||||
public class Xop_tblw_tr_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
|
||||
public Xop_tblw_tr_tkn(int bgn, int end, boolean tblw_xml, boolean auto_created) {
|
||||
this.tblw_xml = tblw_xml; this.Tkn_ini_pos(false, bgn, end);
|
||||
@@ -28,7 +28,7 @@ public class Xop_tblw_tr_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
|
||||
public int Atrs_bgn() {return atrs_bgn;} private int atrs_bgn = Xop_tblw_wkr.Atrs_null;
|
||||
public int Atrs_end() {return atrs_end;} private int atrs_end = -1;
|
||||
public void Atrs_rng_set(int bgn, int end) {this.atrs_bgn = bgn; this.atrs_end = end;}
|
||||
public Xop_xatr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Xop_xatr_itm[] v) {atrs_ary = v; return this;} private Xop_xatr_itm[] atrs_ary;
|
||||
public Mwh_atr_itm[] Atrs_ary() {return atrs_ary;} public Xop_tblw_tkn Atrs_ary_as_tblw_(Mwh_atr_itm[] v) {atrs_ary = v; return this;} private Mwh_atr_itm[] atrs_ary;
|
||||
public boolean Tblw_xml() {return tblw_xml;} private boolean tblw_xml;
|
||||
public int Tblw_subs_len() {return tblw_subs_len;} public void Tblw_subs_len_add_() {++tblw_subs_len;} private int tblw_subs_len;
|
||||
public Xop_tblw_tr_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.paras.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.miscs.*;
|
||||
import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.paras.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.miscs.*;
|
||||
public class Xop_tblw_wkr implements Xop_ctx_wkr {
|
||||
private int tblw_te_ignore_count = 0;
|
||||
public boolean Cell_pipe_seen() {return cell_pipe_seen;} public Xop_tblw_wkr Cell_pipe_seen_(boolean v) {cell_pipe_seen = v; return this;} private boolean cell_pipe_seen; // status of 1st cell pipe; EX: \n| a | b | c || -> flag pipe between a and b but ignore b and c
|
||||
@@ -299,7 +299,7 @@ public class Xop_tblw_wkr implements Xop_ctx_wkr {
|
||||
if (atrs_bgn > Xop_tblw_wkr.Atrs_ignore_check) {
|
||||
new_tkn.Atrs_rng_set(atrs_bgn, atrs_end);
|
||||
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki) {
|
||||
Xop_xatr_itm[] atrs = ctx.App().Xatr_parser().Parse(ctx.Msg_log(), src, atrs_bgn, atrs_end);
|
||||
Mwh_atr_itm[] atrs = ctx.App().Parser_mgr().Xnde__parse_atrs_for_tblw(src, atrs_bgn, atrs_end);
|
||||
new_tkn.Atrs_ary_as_tblw_(atrs);
|
||||
}
|
||||
}
|
||||
@@ -484,7 +484,7 @@ public class Xop_tblw_wkr implements Xop_ctx_wkr {
|
||||
atrs_bgn = Bry_find_.Find_fwd_while(src, atrs_bgn, src.length, Byte_ascii.Dash);
|
||||
prv_tblw.Atrs_rng_set(atrs_bgn, atrs_end);
|
||||
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki && atrs_bgn != -1) {
|
||||
Xop_xatr_itm[] atrs = ctx.App().Xatr_parser().Parse(ctx.Msg_log(), src, atrs_bgn, atrs_end);
|
||||
Mwh_atr_itm[] atrs = ctx.App().Parser_mgr().Xnde__parse_atrs_for_tblw(src, atrs_bgn, atrs_end);
|
||||
prv_tblw.Atrs_ary_as_tblw_(atrs);
|
||||
}
|
||||
wkr.Cell_pipe_seen_(true);
|
||||
|
||||
@@ -20,7 +20,7 @@ import org.junit.*;
|
||||
public class Xop_tblw_wkr__uncommon_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Tr_pops_entire_stack() { // PURPOSE: in strange cases, tr will pop entire stack; PAGE:en.w:Turks_in_Denmark; DATE:2014-03-02
|
||||
@Test public void Tr_pops_entire_stack() { // PURPOSE: in strange cases, tr will pop entire stack; PAGE:en.w:Turks_in_Denmark; DATE:2014-03-02
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "<caption>a"
|
||||
|
||||
@@ -190,7 +190,7 @@ public class Arg_bldr {
|
||||
// itm.Dat_ary_(dat_end == dat_bgn ? Bry_.Empty : Bry_.Mid(src, dat_bgn, dat_end));
|
||||
itm.Itm_static_(itm_is_static);
|
||||
}
|
||||
public static final Arg_bldr _ = new Arg_bldr(); Arg_bldr() {}
|
||||
public static final Arg_bldr Instance = new Arg_bldr(); Arg_bldr() {}
|
||||
}
|
||||
/*
|
||||
NOTE_1:mark tkn ignore unless wkr is prm;
|
||||
|
||||
@@ -20,12 +20,12 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_brack_bgn_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_brack_bgn;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Xop_tkn_.Lnki_bgn, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
Xop_tkn_itm tkn = tkn_mkr.Brack_bgn(bgn_pos, cur_pos);
|
||||
ctx.Subs_add_and_stack(root, tkn);
|
||||
return cur_pos;
|
||||
}
|
||||
public static final Xop_brack_bgn_lxr _ = new Xop_brack_bgn_lxr(); Xop_brack_bgn_lxr() {}
|
||||
public static final Xop_brack_bgn_lxr Instance = new Xop_brack_bgn_lxr(); Xop_brack_bgn_lxr() {}
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_brack_end_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_brack_end;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Xop_tkn_.Lnki_end, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
int acs_pos = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_brack_bgn);
|
||||
@@ -30,5 +30,5 @@ public class Xop_brack_end_lxr implements Xop_lxr {
|
||||
ctx.Subs_add(root, tkn);
|
||||
return cur_pos;
|
||||
}
|
||||
public static final Xop_brack_end_lxr _ = new Xop_brack_end_lxr(); Xop_brack_end_lxr() {}
|
||||
public static final Xop_brack_end_lxr Instance = new Xop_brack_end_lxr(); Xop_brack_end_lxr() {}
|
||||
}
|
||||
|
||||
@@ -21,10 +21,10 @@ import gplx.xowa.parsers.tblws.*;
|
||||
public class Xop_curly_bgn_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_curly_bgn;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Hook, this);} public static final byte[] Hook = new byte[] {Byte_ascii.Curly_bgn, Byte_ascii.Curly_bgn};
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Curly().MakeTkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_curly_bgn_lxr _ = new Xop_curly_bgn_lxr(); Xop_curly_bgn_lxr() {}
|
||||
public static final Xop_curly_bgn_lxr Instance = new Xop_curly_bgn_lxr(); Xop_curly_bgn_lxr() {}
|
||||
public static Btrie_fast_mgr tmpl_bgn_trie_() { // hook sequences for adding new_line to tmpl return; "{|" "|-" ":" ";" "#" "*"; EX: "{{a}}" returns "*"; convert to "\n*"
|
||||
Btrie_fast_mgr rv = Btrie_fast_mgr.cs();
|
||||
rv.Add(Xop_tblw_lxr_ws.Hook_tb, Bry_.Empty);
|
||||
|
||||
@@ -20,8 +20,8 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_curly_end_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_curly_end;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Hook, this);} public static final byte[] Hook = new byte[] {Byte_ascii.Curly_end, Byte_ascii.Curly_end};
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Curly().MakeTkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_curly_end_lxr _ = new Xop_curly_end_lxr(); Xop_curly_end_lxr() {}
|
||||
public static final Xop_curly_end_lxr Instance = new Xop_curly_end_lxr(); Xop_curly_end_lxr() {}
|
||||
}
|
||||
|
||||
@@ -147,7 +147,7 @@ public class Xop_curly_wkr implements Xop_ctx_wkr {
|
||||
}
|
||||
return lxr_end_pos;
|
||||
}
|
||||
private Xot_prm_wkr prm_wkr = Xot_prm_wkr._;
|
||||
private Xot_prm_wkr prm_wkr = Xot_prm_wkr.Instance;
|
||||
public static final byte[] Hook_prm_bgn = new byte[] {Byte_ascii.Curly_bgn, Byte_ascii.Curly_bgn, Byte_ascii.Curly_bgn}, Hook_prm_end = new byte[] {Byte_ascii.Curly_end, Byte_ascii.Curly_end, Byte_ascii.Curly_end};
|
||||
}
|
||||
/*
|
||||
|
||||
@@ -35,6 +35,6 @@ public class Xop_tkn_print_tst {
|
||||
Xot_fmtr_prm raw_fmtr = new Xot_fmtr_prm();
|
||||
defn.Root().Tmpl_fmt(ctx, raw_bry, raw_fmtr);
|
||||
raw_fmtr.Print(tst_Print_bb);
|
||||
Tfds.Eq(raw, tst_Print_bb.Xto_str_and_clear());
|
||||
Tfds.Eq(raw, tst_Print_bb.To_str_and_clear());
|
||||
} private Bry_bfr tst_Print_bb = Bry_bfr.new_();
|
||||
}
|
||||
|
||||
@@ -30,5 +30,5 @@ class Xot_defn_null implements Xot_defn {
|
||||
public Xot_defn Clone(int id, byte[] name) {return this;}
|
||||
public int Cache_size() {return 0;}
|
||||
public void Rls() {}
|
||||
public static final Xot_defn_null _ = new Xot_defn_null(); Xot_defn_null() {}
|
||||
public static final Xot_defn_null Instance = new Xot_defn_null(); Xot_defn_null() {}
|
||||
}
|
||||
|
||||
@@ -16,9 +16,9 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tmpls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.langs.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.kwds.*;
|
||||
public class Xot_defn_ {
|
||||
public static final Xot_defn Null = Xot_defn_null._;
|
||||
public static final Xot_defn Null = Xot_defn_null.Instance;
|
||||
public static final byte
|
||||
Tid_null = 0
|
||||
, Tid_func = 1
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tmpls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.nss.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
public class Xot_defn_tmpl implements Xot_defn {
|
||||
public byte Defn_tid() {return Xot_defn_.Tid_tmpl;}
|
||||
public boolean Defn_require_colon_arg() {return false;}
|
||||
|
||||
@@ -49,7 +49,7 @@ public class Xot_defn_tmpl_ {
|
||||
for (int i = 0; i < subs_len; i++)
|
||||
orig.Subs_get(i).Tmpl_evaluate(ctx, src, caller, arg_bfr);
|
||||
Arg_itm_tkn rv = tkn_mkr.ArgItm(-1, -1); // NOTE: was -1, 0; DATE:2013-04-10
|
||||
byte[] rv_ary = orig_arg.KeyTkn_exists() && val_tkn ? arg_bfr.Xto_bry_and_clear_and_trim() : arg_bfr.Xto_bry_and_clear(); // // NOTE: must trim if key_exists; DUPE:TRIM_IF_KEY; PAGE:en.w:Coord in Chernobyl disaster, Sahara
|
||||
byte[] rv_ary = orig_arg.KeyTkn_exists() && val_tkn ? arg_bfr.To_bry_and_clear_and_trim() : arg_bfr.To_bry_and_clear(); // // NOTE: must trim if key_exists; DUPE:TRIM_IF_KEY; PAGE:en.w:Coord in Chernobyl disaster, Sahara
|
||||
rv.Dat_ary_(rv_ary);
|
||||
return rv;
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ class Xot_defn_trace_brief implements Xot_defn_trace {
|
||||
else
|
||||
itm = (Xot_defn_trace_itm_brief)o;
|
||||
itm.Count_add();
|
||||
} private Ordered_hash hash = Ordered_hash_.new_();
|
||||
} private Ordered_hash hash = Ordered_hash_.New();
|
||||
public void Trace_end(int trg_bgn, Bry_bfr trg) {}
|
||||
public void Print(byte[] src, Bry_bfr bb) {
|
||||
int count = hash.Count(); if (count == 0) return;
|
||||
|
||||
@@ -42,7 +42,7 @@ class Xot_defn_trace_fxt {
|
||||
Xop_root_tkn root = ctx.Tkn_mkr().Root(src);
|
||||
fxt.Parser().Parse_page_all_clear(root, ctx, ctx.Tkn_mkr(), src);
|
||||
ctx.Defn_trace().Print(src, tmp);
|
||||
String[] actl_ary = String_.Split(tmp.Xto_str_and_clear(), (char)Byte_ascii.Nl);
|
||||
String[] actl_ary = String_.Split(tmp.To_str_and_clear(), (char)Byte_ascii.Nl);
|
||||
Tfds.Eq_ary(expd_ary, actl_ary);
|
||||
} private Bry_bfr tmp = Bry_bfr.new_();
|
||||
String[] To_str(Xot_defn_trace_itm_brief[] ary) {
|
||||
@@ -50,7 +50,7 @@ class Xot_defn_trace_fxt {
|
||||
for (int i = 0; i < rv.length; i++) {
|
||||
Xot_defn_trace_itm_brief itm = ary[i];
|
||||
sb.Add(String_.new_u8(itm.Name())).Add("|").Add(itm.Count());
|
||||
rv[i] = sb.Xto_str_and_clear();
|
||||
rv[i] = sb.To_str_and_clear();
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ public class Xot_defn_trace_dbg implements Xot_defn_trace {
|
||||
// bfr.Add_mid(src, val_tkn.Dat_bgn(), val_tkn.Dat_end());
|
||||
// }
|
||||
// else {
|
||||
// Xot_fmtr_prm raw_fmtr = Xot_fmtr_prm._;
|
||||
// Xot_fmtr_prm raw_fmtr = Xot_fmtr_prm.Instance;
|
||||
// nde.ValTkn().Tmpl_fmt(ctx, src, raw_fmtr);
|
||||
// raw_fmtr.Print(bfr);
|
||||
// }
|
||||
@@ -133,7 +133,7 @@ public class Xot_defn_trace_dbg implements Xot_defn_trace {
|
||||
}
|
||||
public void Clear() {bfr.Clear(); indent = 0; count = 0;}
|
||||
int indent = 0, count = 0;
|
||||
public static final Xot_defn_trace_dbg _ = new Xot_defn_trace_dbg(); Xot_defn_trace_dbg() {}
|
||||
public static final Xot_defn_trace_dbg Instance = new Xot_defn_trace_dbg(); Xot_defn_trace_dbg() {}
|
||||
private static final byte[] Ary_invk_lbl = Bry_.new_a7("*invk\n"), Ary_lnk_lbl = Bry_.new_a7("*lnk: "), Ary_args_lbl = Bry_.new_a7("*args\n")
|
||||
, Ary_result_lbl = Bry_.new_a7("*result\n")
|
||||
, Ary_eval_lbl = Bry_.new_a7("*eval\n")
|
||||
|
||||
@@ -25,7 +25,7 @@ public class Xot_defn_trace_dbg_tst {
|
||||
fx.Init_defn_add("concat", "{{{1}}}{{{2}}}");
|
||||
fx.Init_defn_add("bool_str", "{{#ifeq:{{{1}}}|1|y|n}}");
|
||||
fx.Init_defn_add("mid_1", "{{print|[ {{concat|{{{1}}}|{{{2}}}}} ]}}");
|
||||
fx.Ctx().Defn_trace_(Xot_defn_trace_dbg._);
|
||||
fx.Ctx().Defn_trace_(Xot_defn_trace_dbg.Instance);
|
||||
}
|
||||
@Test public void Tmpl() {
|
||||
fx.tst_
|
||||
|
||||
@@ -21,5 +21,5 @@ public class Xot_defn_trace_null implements Xot_defn_trace {
|
||||
public void Trace_bgn(Xop_ctx ctx, byte[] src, byte[] name, Xot_invk caller, Xot_invk self, Xot_defn defn) {}
|
||||
public void Trace_end(int trg_bgn, Bry_bfr trg) {}
|
||||
public void Print(byte[] src, Bry_bfr bb) {}
|
||||
public static final Xot_defn_trace_null _ = new Xot_defn_trace_null(); Xot_defn_trace_null() {}
|
||||
public static final Xot_defn_trace_null Instance = new Xot_defn_trace_null(); Xot_defn_trace_null() {}
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ import org.junit.*;
|
||||
public class Xot_examples_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@Before public void init() {
|
||||
Io_mgr.I.InitEngine_mem();
|
||||
Io_mgr.Instance.InitEngine_mem();
|
||||
fxt.Reset();
|
||||
}
|
||||
@Test public void Arg_0() {Init_tmpl_for(); fxt.Test_parse_tmpl_str("{{For}}" , "For other uses, see [[Test page (disambiguation)]].");}
|
||||
|
||||
@@ -71,5 +71,5 @@ class Xot_fmtr_prm implements Xot_fmtr {
|
||||
}
|
||||
public void Print(Bry_bfr bb) {bb.Add_bfr_and_preserve(trg); trg.Clear(); depth = 0;}
|
||||
Bry_bfr trg = Bry_bfr.new_(); int depth = 0;
|
||||
public static final Xot_fmtr_prm _ = new Xot_fmtr_prm();
|
||||
public static final Xot_fmtr_prm Instance = new Xot_fmtr_prm();
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ public class Xot_invk_mock implements Xot_invk {
|
||||
public int Frame_lifetime() {return frame_lifetime;} public void Frame_lifetime_(int v) {frame_lifetime = v;} private int frame_lifetime;
|
||||
public boolean Rslt_is_redirect() {return rslt_is_redirect;} public void Rslt_is_redirect_(boolean v) {rslt_is_redirect = v;} private boolean rslt_is_redirect;
|
||||
public Arg_nde_tkn Name_tkn() {return Arg_nde_tkn.Null;}
|
||||
public int Args_len() {return args.Count() + idx_adj;} private Ordered_hash args = Ordered_hash_.new_bry_();
|
||||
public int Args_len() {return args.Count() + idx_adj;} private Ordered_hash args = Ordered_hash_.New_bry();
|
||||
public Arg_nde_tkn Args_get_by_idx(int i) {return (Arg_nde_tkn)args.Get_at(i - idx_adj);}
|
||||
public Arg_nde_tkn Args_eval_by_idx(byte[] src, int idx) {// DUPE:MW_ARG_RETRIEVE
|
||||
int cur = 0, list_len = args.Count();
|
||||
|
||||
@@ -16,9 +16,9 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tmpls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
|
||||
import gplx.xowa.nss.*;
|
||||
import gplx.xowa.wikis.caches.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.pfuncs.*; import gplx.xowa.xtns.pfuncs.ttls.*; import gplx.xowa.pages.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.kwds.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*; import gplx.xowa.langs.funcs.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.wikis.caches.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.pfuncs.*; import gplx.xowa.xtns.pfuncs.ttls.*; import gplx.xowa.wikis.pages.*;
|
||||
import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.parsers.miscs.*;
|
||||
public class Xot_invk_tkn extends Xop_tkn_itm_base implements Xot_invk {
|
||||
@@ -49,7 +49,7 @@ public class Xot_invk_tkn extends Xop_tkn_itm_base implements Xot_invk {
|
||||
}
|
||||
@Override public boolean Tmpl_evaluate(Xop_ctx ctx, byte[] src, Xot_invk caller, Bry_bfr bfr) { // this="{{t|{{{0}}}}}" caller="{{t|1}}"
|
||||
boolean rv = false;
|
||||
Xot_defn defn = tmpl_defn; Xowe_wiki wiki = ctx.Wiki(); Xol_lang lang = wiki.Lang();
|
||||
Xot_defn defn = tmpl_defn; Xowe_wiki wiki = ctx.Wiki(); Xol_lang_itm lang = wiki.Lang();
|
||||
byte[] name_ary = defn.Name(), argx_ary = Bry_.Empty; Arg_itm_tkn name_key_tkn = name_tkn.Key_tkn();
|
||||
byte[] name_ary_orig = Bry_.Empty;
|
||||
int name_bgn = 0, name_ary_len = 0;
|
||||
@@ -61,7 +61,7 @@ public class Xot_invk_tkn extends Xop_tkn_itm_base implements Xot_invk {
|
||||
if (defn_tid == Xot_defn_.Tid_subst)
|
||||
name_tkn_bfr.Add(Get_first_subst_itm(lang.Kwd_mgr()));
|
||||
name_tkn.Tmpl_evaluate(ctx, src, caller, name_tkn_bfr);
|
||||
name_ary = name_tkn_bfr.Xto_bry_and_clear();
|
||||
name_ary = name_tkn_bfr.To_bry_and_clear();
|
||||
}
|
||||
else // tmpl is static; note that dat_ary is still valid but rest of name may not be; EX: {{subst:name{{{1}}}}}
|
||||
name_ary = Bry_.Mid(src, name_key_tkn.Dat_bgn(), name_key_tkn.Dat_end());
|
||||
@@ -99,7 +99,7 @@ public class Xot_invk_tkn extends Xop_tkn_itm_base implements Xot_invk {
|
||||
if (ns_eval != null && !template_prefix_found) // do not transclude ns if Template prefix seen earlier; EX: {{Template:Wikipedia:A}} should not transclude "Wikipedia:A"; DATE:2013-04-03
|
||||
return SubEval(ctx, wiki, bfr, name_ary, caller, src);
|
||||
|
||||
Xol_func_name_itm finder = lang.Func_regy().Find_defn(name_ary, name_bgn, name_ary_len);
|
||||
Xol_func_itm finder = lang.Func_regy().Find_defn(name_ary, name_bgn, name_ary_len);
|
||||
defn = finder.Func();
|
||||
int colon_pos = -1;
|
||||
switch (finder.Tid()) {
|
||||
@@ -110,7 +110,7 @@ public class Xot_invk_tkn extends Xop_tkn_itm_base implements Xot_invk {
|
||||
bfr.Add_byte(Byte_ascii.Pipe); // add |
|
||||
bfr.Add_mid(src, nde.Src_bgn(), nde.Src_end()); // add entire arg; "k=v"; note that src must be added, not evaluated, else <nowiki> may be dropped and cause stack overflow; PAGE:ru.w:Близкие_друзья_(Сезон_2) DATE:2014-10-21
|
||||
}
|
||||
Xot_fmtr_prm._.Print(bfr);
|
||||
Xot_fmtr_prm.Instance.Print(bfr);
|
||||
bfr.Add(Xop_curly_end_lxr.Hook);
|
||||
return true; // NOTE: nothing else to do; return
|
||||
case Xot_defn_.Tid_safesubst:
|
||||
@@ -253,7 +253,7 @@ public class Xot_invk_tkn extends Xop_tkn_itm_base implements Xot_invk {
|
||||
Bry_bfr rslt_bfr = wiki.Utl__bfr_mkr().Get_k004();
|
||||
try {
|
||||
Bld_key(invk_tmpl, name_ary, rslt_bfr);
|
||||
byte[] rslt_key = rslt_bfr.Xto_bry_and_clear();
|
||||
byte[] rslt_key = rslt_bfr.To_bry_and_clear();
|
||||
Object o = wiki.Cache_mgr().Tmpl_result_cache().Get_by(rslt_key);
|
||||
Xopg_tmpl_prepend_mgr prepend_mgr = ctx.Cur_page().Tmpl_prepend_mgr().Bgn(bfr);
|
||||
if (o != null) {
|
||||
@@ -265,11 +265,11 @@ public class Xot_invk_tkn extends Xop_tkn_itm_base implements Xot_invk {
|
||||
rv = defn_tmpl.Tmpl_evaluate(ctx, invk_tmpl, rslt_bfr);
|
||||
prepend_mgr.End(ctx, bfr, rslt_bfr.Bfr(), rslt_bfr.Len(), Bool_.Y);
|
||||
if (name_had_subst) { // current invk had "subst:"; parse incoming invk again to remove effects of subst; PAGE:pt.w:Argentina DATE:2014-09-24
|
||||
byte[] tmp_src = rslt_bfr.Xto_bry_and_clear();
|
||||
byte[] tmp_src = rslt_bfr.To_bry_and_clear();
|
||||
rslt_bfr.Add(wiki.Parser_mgr().Main().Parse_text_to_wtxt(tmp_src)); // this could be cleaner / more optimized
|
||||
}
|
||||
if (Cache_enabled) {
|
||||
byte[] rslt_val = rslt_bfr.Xto_bry_and_clear();
|
||||
byte[] rslt_val = rslt_bfr.To_bry_and_clear();
|
||||
bfr.Add(rslt_val);
|
||||
Hash_adp cache = wiki.Cache_mgr().Tmpl_result_cache();
|
||||
cache.Del(rslt_key);
|
||||
@@ -288,7 +288,7 @@ public class Xot_invk_tkn extends Xop_tkn_itm_base implements Xot_invk {
|
||||
boolean skip = false;
|
||||
skip = this.Src_end() - this.Src_bgn() > ctx.Tmpl_tkn_max();
|
||||
if (!skip) {
|
||||
gplx.xowa.html.modules.popups.keeplists.Xop_keeplist_wiki tmpl_keeplist = ctx.Tmpl_keeplist();
|
||||
gplx.xowa.htmls.modules.popups.keeplists.Xop_keeplist_wiki tmpl_keeplist = ctx.Tmpl_keeplist();
|
||||
if (tmpl_keeplist != null && tmpl_keeplist.Enabled()) {
|
||||
byte[] ttl_lower = Xoa_ttl.Replace_spaces(ctx.Wiki().Lang().Case_mgr().Case_build_lower(ttl));
|
||||
skip = !tmpl_keeplist.Match(ttl_lower);
|
||||
|
||||
@@ -16,13 +16,13 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tmpls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.langs.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.funcs.*;
|
||||
public class Xot_invk_wkr implements Xop_ctx_wkr, Xop_arg_wkr {
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {this.tkn_mkr = ctx.Tkn_mkr();} private Xop_tkn_mkr tkn_mkr;
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
|
||||
public void AutoClose(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int lxr_bgn_pos, int lxr_cur_pos, Xop_tkn_itm tkn) {}
|
||||
private static Arg_bldr arg_bldr = Arg_bldr._;
|
||||
private static Arg_bldr arg_bldr = Arg_bldr.Instance;
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int lxr_cur_pos, int lxr_end_pos, Xop_curly_bgn_tkn bgn_tkn, int keep_curly_bgn) {
|
||||
Xot_invk_tkn invk = tkn_mkr.Tmpl_invk(bgn_tkn.Src_bgn(), lxr_end_pos);
|
||||
int loop_bgn = bgn_tkn.Tkn_sub_idx() + 1, loop_end = root.Subs_len();
|
||||
@@ -60,7 +60,7 @@ public class Xot_invk_wkr implements Xop_ctx_wkr, Xop_arg_wkr {
|
||||
|
||||
if (key_tkn.Itm_static() != Bool_.Y_byte) return; // dynamic tkn; can't identify func/name
|
||||
int colon_pos = -1, txt_bgn = key_tkn.Dat_bgn(), txt_end = key_tkn.Dat_end();
|
||||
Xol_func_name_itm finder = ctx.Wiki().Lang().Func_regy().Find_defn(src, txt_bgn, txt_end);
|
||||
Xol_func_itm finder = ctx.Wiki().Lang().Func_regy().Find_defn(src, txt_bgn, txt_end);
|
||||
Xot_defn finder_func = finder.Func();
|
||||
byte finder_typeId = finder.Tid();
|
||||
switch (finder_typeId) {
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tmpls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.wikis.ttls.*; import gplx.xowa.nss.*;
|
||||
import org.junit.*; import gplx.xowa.wikis.ttls.*; import gplx.xowa.wikis.nss.*;
|
||||
public class Xot_invk_wkr_basic_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@Before public void init() {fxt.Reset();}
|
||||
@@ -214,10 +214,10 @@ public class Xot_invk_wkr_basic_tst {
|
||||
fxt.Test_parse_tmpl_str("{{Missing}}", "[[:Template_foreign:Missing]]");
|
||||
ns.Name_bry_(old_ns);
|
||||
}
|
||||
@Test public void Xnde_xtn_preserved() { // PURPOSE: tmpl was dropping .Xtn ndes; EX: below was just ab
|
||||
@Test public void Xnde_xtn_preserved() { // PURPOSE: tmpl was dropping .Xtn ndes;
|
||||
fxt.Init_defn_clear();
|
||||
fxt.Init_defn_add("test_template", "{{{1}}}");
|
||||
fxt.Test_parse_page_all_str("{{test_template|a<source>1</source>b}}", "a<pre>1</pre>b");
|
||||
fxt.Test_parse_page_all_str("{{test_template|a<source>1</source>b}}", "a<div class=\"mw-highlight\"><pre style=\"overflow:auto\">1</pre></div>b"); // was just ab
|
||||
fxt.Init_defn_clear();
|
||||
}
|
||||
@Test public void Recurse() {
|
||||
|
||||
@@ -49,7 +49,7 @@ public class Xot_prm_tkn extends Xop_tkn_itm_base {
|
||||
find_tkn.Subs_get(i).Tmpl_evaluate(ctx, src, caller, find_bfr);
|
||||
prm_idx = Bry_.To_int_or__trim_ws(find_bfr.Bfr(), 0, find_bfr.Len(), -1); // parse as number first; NOTE: trim needed to transform "{{{ 1 }}}" to "1"; it.w:Portale:Giochi_da_tavolo; DATE:2014-02-09
|
||||
if (prm_idx == -1)
|
||||
prm_key = find_bfr.Xto_bry_and_clear_and_trim(); // not a number; parse as key; NOTE: must trim; PAGE:en.w:William Shakespeare; {{Relatebardtree}}
|
||||
prm_key = find_bfr.To_bry_and_clear_and_trim(); // not a number; parse as key; NOTE: must trim; PAGE:en.w:William Shakespeare; {{Relatebardtree}}
|
||||
}
|
||||
Arg_nde_tkn arg_nde = null;
|
||||
if (prm_idx == -1) { // prm is key; EX: "{{{key1}}}"
|
||||
|
||||
@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.tmpls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Xot_prm_wkr implements Xop_arg_wkr {
|
||||
private static Arg_bldr arg_bldr = Arg_bldr._;
|
||||
private static Arg_bldr arg_bldr = Arg_bldr.Instance;
|
||||
public boolean Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int lxr_bgn_pos, int lxr_cur_pos, Xop_curly_bgn_tkn bgn, int keep_curly_bgn) {
|
||||
int loop_bgn = bgn.Tkn_sub_idx() + 1; // +1 to ignore curly_bgn
|
||||
int loop_end = root.Subs_len();
|
||||
@@ -28,7 +28,7 @@ class Xot_prm_wkr implements Xop_arg_wkr {
|
||||
return false;
|
||||
}
|
||||
Xot_prm_tkn prm_tkn = tkn_mkr.Tmpl_prm(bgn.Src_bgn(), lxr_cur_pos);
|
||||
arg_bldr.Bld(ctx, tkn_mkr, Xot_prm_wkr._, Xop_arg_wkr_.Typ_prm, root, prm_tkn, lxr_bgn_pos, lxr_cur_pos, loop_bgn, loop_end, src);
|
||||
arg_bldr.Bld(ctx, tkn_mkr, Xot_prm_wkr.Instance, Xop_arg_wkr_.Typ_prm, root, prm_tkn, lxr_bgn_pos, lxr_cur_pos, loop_bgn, loop_end, src);
|
||||
root.Subs_del_after(bgn.Tkn_sub_idx() + keep_curly_bgn); // NOTE: keep_curly_bgn determines whether or not to delete opening {{{
|
||||
root.Subs_add(prm_tkn);
|
||||
return true;
|
||||
@@ -42,5 +42,5 @@ class Xot_prm_wkr implements Xop_arg_wkr {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
public static final Xot_prm_wkr _ = new Xot_prm_wkr(); Xot_prm_wkr() {}
|
||||
public static final Xot_prm_wkr Instance = new Xot_prm_wkr(); Xot_prm_wkr() {}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user