mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Wiki: Support renamed folders (fix)
This commit is contained in:
@@ -1,50 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.uniqs.*;
|
||||
public class Xoa_parser_mgr {
|
||||
private final Mwh_doc_wkr__atr_bldr atr_bldr = new Mwh_doc_wkr__atr_bldr();
|
||||
public Xop_tkn_mkr Tkn_mkr() {return tkn_mkr;} private final Xop_tkn_mkr tkn_mkr = new Xop_tkn_mkr();
|
||||
public Xop_uniq_mgr Core__uniq_mgr() {return core__uniq_mgr;} private final Xop_uniq_mgr core__uniq_mgr = new Xop_uniq_mgr();
|
||||
public Mwh_atr_parser Xnde__atr_parser() {return atr_parser;} private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
||||
public Mwh_atr_itm[] Xnde__parse_atrs(byte[] src, int src_bgn, int src_end) {
|
||||
synchronized (atr_bldr) {// LOCK:app-level; DATE:2016-07-06
|
||||
//if (src_bgn < src_end) { // CHART
|
||||
// src = Bry_.Mid(src, src_bgn, src_end);
|
||||
// src = gplx.xowa.parsers.xndes.Xop_xnde_tkn.uniq_mgr.Parse(src);
|
||||
// src_bgn = 0;
|
||||
// src_end = src.length;
|
||||
//}
|
||||
atr_parser.Parse(atr_bldr, -1, -1, src, src_bgn, src_end);
|
||||
return atr_bldr.To_atr_ary();
|
||||
}
|
||||
}
|
||||
public Mwh_atr_itm[] Xnde__parse_atrs_for_tblw(byte[] src, int src_bgn, int src_end) {
|
||||
synchronized (atr_bldr) { // LOCK:app-level; DATE:2016-07-06
|
||||
//int angle_bgn_pos = Bry_find_.Find_fwd(src, Byte_ascii.Angle_bgn, src_bgn, src_end);
|
||||
//if (angle_bgn_pos != Bry_find_.Not_found) {
|
||||
// src = Bry_.Mid(src, src_bgn, src_end);
|
||||
// src = Bry_.Replace(src, Byte_ascii.Angle_bgn_bry, gplx.langs.htmls.Gfh_entity_.Lt_bry);
|
||||
// src_bgn = 0;
|
||||
// src_end = src.length;
|
||||
//}
|
||||
atr_parser.Parse(atr_bldr, -1, -1, src, src_bgn, src_end);
|
||||
return atr_bldr.To_atr_ary();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,342 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.btries.*; import gplx.core.log_msgs.*;
|
||||
import gplx.xowa.langs.*;
|
||||
import gplx.xowa.guis.*;
|
||||
import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.lst.*;
|
||||
import gplx.xowa.parsers.apos.*; import gplx.xowa.parsers.amps.*; import gplx.xowa.parsers.lnkes.*; import gplx.xowa.parsers.hdrs.*; import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.tblws.*; import gplx.xowa.parsers.paras.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.tmpls.*;
|
||||
import gplx.xowa.parsers.logs.*; import gplx.xowa.htmls.modules.popups.keeplists.*;
|
||||
public class Xop_ctx {
|
||||
private final Xop_ctx_wkr[] wkrs;
|
||||
Xop_ctx(Xowe_wiki wiki, Xoae_page page) {
|
||||
this.wiki = wiki; this.cur_page = page;
|
||||
this.app = wiki.Appe(); this.msg_log = app.Msg_log(); this.tkn_mkr = app.Parser_mgr().Tkn_mkr();
|
||||
this.lang = wiki.Lang();
|
||||
this.wkrs = new Xop_ctx_wkr[] {para, apos, xnde, list, lnki, hdr, amp, lnke, tblw, invk};
|
||||
for (Xop_ctx_wkr wkr : wkrs)
|
||||
wkr.Ctor_ctx(this);
|
||||
this.xnde_tag_regy = wiki.Mw_parser_mgr().Xnde_tag_regy();
|
||||
}
|
||||
// public boolean Scribunto; // CHART
|
||||
public Xowe_wiki Wiki() {return wiki;} private final Xowe_wiki wiki;
|
||||
public Xoae_page Page() {return cur_page;} public void Page_(Xoae_page v) {cur_page = v;} private Xoae_page cur_page;
|
||||
public Xol_lang_itm Lang() {return lang;} private final Xol_lang_itm lang;
|
||||
public Xoae_app App() {return app;} private final Xoae_app app;
|
||||
public Xop_tkn_mkr Tkn_mkr() {return tkn_mkr;} private final Xop_tkn_mkr tkn_mkr;
|
||||
public Gfo_msg_log Msg_log() {return msg_log;} private final Gfo_msg_log msg_log;
|
||||
public Xop_amp_wkr Amp() {return amp;} private final Xop_amp_wkr amp = new Xop_amp_wkr();
|
||||
public Xop_apos_wkr Apos() {return apos;} private final Xop_apos_wkr apos = new Xop_apos_wkr();
|
||||
public Xop_lnke_wkr Lnke() {return lnke;} private final Xop_lnke_wkr lnke = new Xop_lnke_wkr();
|
||||
public Xop_lnki_wkr Lnki() {return lnki;} private final Xop_lnki_wkr lnki = new Xop_lnki_wkr();
|
||||
public Xop_hdr_wkr Hdr() {return hdr;} private final Xop_hdr_wkr hdr = new Xop_hdr_wkr();
|
||||
public Xop_para_wkr Para() {return para;} private final Xop_para_wkr para = new Xop_para_wkr();
|
||||
public Xop_list_wkr List() {return list;} private final Xop_list_wkr list = new Xop_list_wkr();
|
||||
public Xop_tblw_wkr Tblw() {return tblw;} private final Xop_tblw_wkr tblw = new Xop_tblw_wkr();
|
||||
public Xop_xnde_wkr Xnde() {return xnde;} private final Xop_xnde_wkr xnde = new Xop_xnde_wkr();
|
||||
public Xot_invk_wkr Invk() {return invk;} private final Xot_invk_wkr invk = new Xot_invk_wkr();
|
||||
public Xop_curly_wkr Curly() {return curly;} private final Xop_curly_wkr curly = new Xop_curly_wkr();
|
||||
public Xop_xnde_tag_regy Xnde_tag_regy() {return xnde_tag_regy;} private final Xop_xnde_tag_regy xnde_tag_regy; // PERF:demeter
|
||||
public Xop_tmp_mgr Tmp_mgr() {return tmp_mgr;} private final Xop_tmp_mgr tmp_mgr = new Xop_tmp_mgr();
|
||||
|
||||
public byte Xnde_names_tid() {return xnde_names_tid;} public Xop_ctx Xnde_names_tid_(byte v) {xnde_names_tid = v; return this;} private byte xnde_names_tid = Xop_parser_tid_.Tid__null;
|
||||
public byte Parse_tid() {return parse_tid;} public Xop_ctx Parse_tid_(byte v) {parse_tid = v; xnde_names_tid = v; return this;} private byte parse_tid = Xop_parser_tid_.Tid__null;
|
||||
public boolean Tid_is_popup() {return tid_is_popup;} public void Tid_is_popup_(boolean v) {tid_is_popup = v;} private boolean tid_is_popup = false;
|
||||
public boolean Tid_is_image_map() {return tid_is_image_map;} public Xop_ctx Tid_is_image_map_(boolean v) {tid_is_image_map = v; return this;} private boolean tid_is_image_map;
|
||||
|
||||
public boolean Tmpl_load_enabled() {return tmpl_load_enabled;} public void Tmpl_load_enabled_(boolean v) {tmpl_load_enabled = v;} private boolean tmpl_load_enabled = true;
|
||||
public int Tmpl_tkn_max() {return tmpl_tkn_max;} public void Tmpl_tkn_max_(int v) {tmpl_tkn_max = v;} private int tmpl_tkn_max = Int_.Max_value;
|
||||
public Xop_keeplist_wiki Tmpl_keeplist() {return tmpl_keeplist;} public void Tmpl_keeplist_(Xop_keeplist_wiki v) {this.tmpl_keeplist = v;} private Xop_keeplist_wiki tmpl_keeplist;
|
||||
public boolean Tmpl_args_parsing() {return tmpl_args_parsing;} public Xop_ctx Tmpl_args_parsing_(boolean v) {tmpl_args_parsing = v; return this;} private boolean tmpl_args_parsing;
|
||||
public Xot_defn_trace Defn_trace() {return defn_trace;} public Xop_ctx Defn_trace_(Xot_defn_trace v) {defn_trace = v; return this;} private Xot_defn_trace defn_trace = Xot_defn_trace_null.Instance;
|
||||
public boolean Only_include_evaluate() {return only_include_evaluate;} public Xop_ctx Only_include_evaluate_(boolean v) {only_include_evaluate = v; return this;} private boolean only_include_evaluate;
|
||||
|
||||
public Lst_section_nde_mgr Lst_section_mgr() {if (lst_section_mgr == null) lst_section_mgr = new Lst_section_nde_mgr(); return lst_section_mgr;} private Lst_section_nde_mgr lst_section_mgr;
|
||||
public Hash_adp_bry Lst_page_regy() {return lst_page_regy;} private Hash_adp_bry lst_page_regy;
|
||||
|
||||
public boolean Ref_ignore() {return ref_ignore;} public Xop_ctx Ref_ignore_(boolean v) {ref_ignore = v; return this;} private boolean ref_ignore; // NOTE: only applies to sub_ctx's created by <pages> and {{#lst}}; if true, does not add <ref> to page.Ref_mgr; DATE:2014-04-24
|
||||
public byte[] References_group() {return references_group;} public Xop_ctx References_group_(byte[] v) {references_group = v; return this;} private byte[] references_group;
|
||||
|
||||
public Xop_log_property_wkr Xtn__wikidata__property_wkr() {return app.Wiki_mgr().Wdata_mgr().Property_wkr();}
|
||||
public Xop_log_invoke_wkr Xtn__scribunto__invoke_wkr() {
|
||||
if (scrib_invoke_wkr == null)
|
||||
scrib_invoke_wkr = ((Scrib_xtn_mgr)(wiki.Xtn_mgr().Get_or_fail(Scrib_xtn_mgr.XTN_KEY))).Invoke_wkr();
|
||||
return scrib_invoke_wkr;
|
||||
} private Xop_log_invoke_wkr scrib_invoke_wkr;
|
||||
|
||||
public Xop_ctx Clear_all() {return Clear(true);}
|
||||
public Xop_ctx Clear(boolean clear_scrib) {
|
||||
cur_page.Clear(clear_scrib);
|
||||
stack = Xop_tkn_itm_.Ary_empty;
|
||||
stack_len = stack_max = 0;
|
||||
if (lst_section_mgr != null) lst_section_mgr.Clear();
|
||||
if (lst_page_regy != null) lst_page_regy.Clear();
|
||||
tmpl_args_parsing = false;
|
||||
return this;
|
||||
}
|
||||
public String Page_url_str() {
|
||||
try {return cur_page.Url().To_str();}
|
||||
catch (Exception e) {Err_.Noop(e); return "page_url shouldn't fail";}
|
||||
}
|
||||
public void Parser__page_init(Xop_root_tkn root, byte[] src) {
|
||||
this.Msg_log().Clear(); cur_tkn_tid = Xop_tkn_itm_.Tid_null;
|
||||
empty_ignored = false;
|
||||
for (Xop_ctx_wkr wkr : wkrs) wkr.Page_bgn(this, root);
|
||||
}
|
||||
public void Parser__page_term(Xop_root_tkn root, byte[] src, int src_len) {
|
||||
Stack_pop_til(root, src, 0, true, src_len, src_len, Xop_tkn_itm_.Tid_txt);
|
||||
for (Xop_ctx_wkr wkr : wkrs) wkr.Page_end(this, root, src, src_len);
|
||||
}
|
||||
public boolean Lxr_make() {return lxr_make;} public Xop_ctx Lxr_make_(boolean v) {lxr_make = v; return this;} private boolean lxr_make = false;
|
||||
public int Lxr_make_txt_(int pos) {lxr_make = false; return pos;}
|
||||
public int Lxr_make_log_(Gfo_msg_itm itm, byte[] src, int bgn_pos, int cur_pos) {lxr_make = false; msg_log.Add_itm_none(itm, src, bgn_pos, cur_pos); return cur_pos;}
|
||||
public boolean Empty_ignored() {return empty_ignored;}
|
||||
public void Empty_ignored_y_() {empty_ignored = Bool_.Y;} private boolean empty_ignored = false;
|
||||
public void Empty_ignored_n_() {empty_ignored = Bool_.N;}
|
||||
public void Empty_ignore(Xop_root_tkn root, int empty_bgn) {
|
||||
int empty_end = root.Subs_len();
|
||||
for (int i = empty_bgn; i < empty_end; i++) {
|
||||
Xop_tkn_itm sub_tkn = root.Subs_get(i);
|
||||
sub_tkn.Ignore_y_grp_(this, root, i);
|
||||
}
|
||||
empty_ignored = false;
|
||||
}
|
||||
|
||||
public byte Cur_tkn_tid() {return cur_tkn_tid;} private byte cur_tkn_tid = Xop_tkn_itm_.Tid_null;
|
||||
public void Subs_add_and_stack_tblw(Xop_root_tkn root, Xop_tblw_tkn owner_tkn, Xop_tkn_itm sub) {
|
||||
if (owner_tkn != null) owner_tkn.Tblw_subs_len_add_(); // owner_tkn can be null;EX: "{|" -> prv_tkn is null
|
||||
Subs_add_and_stack(root, sub);
|
||||
}
|
||||
public void Subs_add_and_stack(Xop_root_tkn root, Xop_tkn_itm sub) {this.Subs_add(root, sub); this.Stack_add(sub);}
|
||||
public void Subs_add(Xop_root_tkn root, Xop_tkn_itm sub) {
|
||||
switch (sub.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab: case Xop_tkn_itm_.Tid_newLine:
|
||||
case Xop_tkn_itm_.Tid_para:
|
||||
break;
|
||||
default:
|
||||
empty_ignored = false;
|
||||
break;
|
||||
}
|
||||
root.Subs_add(sub);
|
||||
}
|
||||
public void StackTkn_add(Xop_root_tkn root, Xop_tkn_itm sub) {
|
||||
root.Subs_add(sub);
|
||||
this.Stack_add(sub);
|
||||
}
|
||||
public void Stack_add(Xop_tkn_itm tkn) {
|
||||
int newLen = stack_len + 1;
|
||||
if (newLen > stack_max) {
|
||||
stack_max = newLen * 2;
|
||||
stack = (Xop_tkn_itm[])Array_.Resize(stack, stack_max);
|
||||
}
|
||||
stack[stack_len] = tkn;
|
||||
cur_tkn_tid = tkn.Tkn_tid();
|
||||
stack_len = newLen;
|
||||
} private Xop_tkn_itm[] stack = Xop_tkn_itm_.Ary_empty; int stack_len = 0, stack_max = 0;
|
||||
public int Stack_len() {return stack_len;}
|
||||
public Xop_tkn_itm Stack_get_last() {return stack_len == 0 ? null : stack[stack_len - 1];}
|
||||
public Xop_tkn_itm Stack_get(int i) {return i < 0 || i >= stack_len ? null : stack[i];}
|
||||
public Xop_tblw_tkn Stack_get_tblw_tb() {// find any {| (exclude <table)
|
||||
for (int i = stack_len - 1; i > -1; i--) {
|
||||
Xop_tkn_itm tkn = stack[i];
|
||||
if (tkn.Tkn_tid() == Xop_tkn_itm_.Tid_tblw_tb) {
|
||||
Xop_tblw_tkn tkn_as_tbl = (Xop_tblw_tkn)tkn;
|
||||
if (!tkn_as_tbl.Tblw_xml()) return tkn_as_tbl;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
public Xop_tblw_tkn Stack_get_tbl_tb() {
|
||||
for (int i = stack_len - 1; i > -1; i--) {
|
||||
Xop_tkn_itm tkn = stack[i];
|
||||
switch (tkn.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb:
|
||||
return (Xop_tblw_tkn)tkn;
|
||||
case Xop_tkn_itm_.Tid_xnde:
|
||||
Xop_xnde_tkn xnde_tkn = (Xop_xnde_tkn)tkn;
|
||||
switch (xnde_tkn.Tag().Id()) {
|
||||
case Xop_xnde_tag_.Tid__table:
|
||||
return (Xop_tblw_tkn)tkn;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
public Xop_tblw_tkn Stack_get_tbl() {
|
||||
for (int i = stack_len - 1; i > -1; i--) {
|
||||
Xop_tkn_itm tkn = stack[i];
|
||||
switch (tkn.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb:
|
||||
case Xop_tkn_itm_.Tid_tblw_tr:
|
||||
case Xop_tkn_itm_.Tid_tblw_td:
|
||||
case Xop_tkn_itm_.Tid_tblw_th:
|
||||
case Xop_tkn_itm_.Tid_tblw_tc:
|
||||
return (Xop_tblw_tkn)tkn;
|
||||
case Xop_tkn_itm_.Tid_xnde:
|
||||
Xop_xnde_tkn xnde_tkn = (Xop_xnde_tkn)tkn;
|
||||
switch (xnde_tkn.Tag().Id()) {
|
||||
case Xop_xnde_tag_.Tid__table:
|
||||
case Xop_xnde_tag_.Tid__tr:
|
||||
case Xop_xnde_tag_.Tid__td:
|
||||
case Xop_xnde_tag_.Tid__th:
|
||||
case Xop_xnde_tag_.Tid__caption:
|
||||
return (Xop_tblw_tkn)tkn;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
public static final int Stack_not_found = -1;
|
||||
public boolean Stack_has(int typeId) {return Stack_idx_typ(typeId) != Stack_not_found;}
|
||||
public int Stack_idx_typ(int typeId) {
|
||||
for (int i = stack_len - 1; i > -1; i--)
|
||||
if (stack[i].Tkn_tid() == typeId)
|
||||
return i;
|
||||
return Stack_not_found;
|
||||
}
|
||||
public int Stack_idx_find_but_stop_at_tbl(int tid) {
|
||||
for (int i = stack_len - 1; i > -1 ; i--) {
|
||||
Xop_tkn_itm tkn_itm = stack[i];
|
||||
int tkn_itm_tid = tkn_itm.Tkn_tid();
|
||||
switch (tkn_itm_tid) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: // NOTE: added DATE:2014-06-26
|
||||
case Xop_tkn_itm_.Tid_tblw_td:
|
||||
case Xop_tkn_itm_.Tid_tblw_th:
|
||||
case Xop_tkn_itm_.Tid_tblw_tc:
|
||||
return -1;
|
||||
}
|
||||
if (tkn_itm_tid == tid)
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
public Xop_tkn_itm Stack_get_typ(int tid) {
|
||||
for (int i = stack_len - 1; i > -1 ; i--) {
|
||||
Xop_tkn_itm tkn = stack[i];
|
||||
if (tkn.Tkn_tid() == tid) return tkn;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
public void Stack_del(Xop_tkn_itm del) {
|
||||
if (stack_len == 0) return;
|
||||
for (int i = stack_len - 1; i > -1; i--) {
|
||||
Xop_tkn_itm tkn = stack[i];
|
||||
if (tkn == del) {
|
||||
for (int j = i + 1; j < stack_len; j++) {
|
||||
stack[j - 1] = stack[j];
|
||||
}
|
||||
--stack_len;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
public Xop_tkn_itm Stack_pop_til(Xop_root_tkn root, byte[] src, int til_idx, boolean include, int bgn_pos, int cur_pos, int closing_tkn_tid) { // NOTE: closing_tkn_tid is a book-keeping variable to indicate who started auto-close; only used by xnde.AutoClose
|
||||
if (stack_len == 0) return null; // nothing to pop; return;
|
||||
int min_idx = include ? til_idx - 1 : til_idx; // if "include", auto-close tkn at til_idx; if not, auto-close to one before
|
||||
if (min_idx < -1) min_idx = -1; // bounds-check; make sure til_idx was not -1, resulting in -2; NOTE: does not seem to be needed; DATE:2015-03-31
|
||||
Xop_tkn_itm rv = null;
|
||||
for (int i = stack_len - 1; i > min_idx; i--) { // pop tkns going backwards
|
||||
rv = stack[i];
|
||||
Stack_auto_close(root, src, rv, bgn_pos, cur_pos, closing_tkn_tid);
|
||||
}
|
||||
Stack_pop_idx(til_idx);
|
||||
return include ? rv : stack[stack_len]; // if include, return popped_tkn; if not, return tkn before popped_tkn
|
||||
}
|
||||
public Xop_tkn_itm Stack_pop_before(Xop_root_tkn root, byte[] src, int til_idx, boolean include, int bgn_pos, int cur_pos, int closing_tkn_tid) { // used by Xop_tblw_lxr to detect \n| in lnki; seems useful as well
|
||||
if (stack_len == 0) return null;
|
||||
int min_idx = include ? til_idx - 1 : til_idx;
|
||||
if (min_idx < -1) min_idx = -1;
|
||||
Xop_tkn_itm rv = null;
|
||||
for (int i = stack_len - 1; i > min_idx; i--) {
|
||||
rv = stack[i];
|
||||
Stack_auto_close(root, src, rv, bgn_pos, cur_pos, closing_tkn_tid);
|
||||
}
|
||||
return include ? rv : stack[stack_len]; // if include, return poppedTkn; if not, return tkn before poppedTkn
|
||||
}
|
||||
public void Stack_auto_close(Xop_root_tkn root, byte[] src, Xop_tkn_itm tkn, int bgn_pos, int cur_pos, int closing_tkn_tid) {
|
||||
int src_len = src.length;
|
||||
switch (tkn.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_newLine: break; // NOOP: just a marker
|
||||
case Xop_tkn_itm_.Tid_list: list.AutoClose(this, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, tkn); break;
|
||||
case Xop_tkn_itm_.Tid_xnde: xnde.AutoClose(this, root, src, src_len, bgn_pos, cur_pos, tkn, closing_tkn_tid); break;
|
||||
case Xop_tkn_itm_.Tid_apos: apos.AutoClose(this, src, src_len, bgn_pos, cur_pos, tkn); break;
|
||||
case Xop_tkn_itm_.Tid_lnke: lnke.AutoClose(this, src, src_len, bgn_pos, cur_pos, tkn); break;
|
||||
case Xop_tkn_itm_.Tid_hdr: hdr.AutoClose(this, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, tkn); break;
|
||||
case Xop_tkn_itm_.Tid_tblw_tb:
|
||||
case Xop_tkn_itm_.Tid_tblw_tr:
|
||||
case Xop_tkn_itm_.Tid_tblw_td:
|
||||
case Xop_tkn_itm_.Tid_tblw_th:
|
||||
case Xop_tkn_itm_.Tid_tblw_tc: tblw.AutoClose(this, root, src, src_len, bgn_pos, cur_pos, tkn); break;
|
||||
case Xop_tkn_itm_.Tid_lnki: lnki.Auto_close(this, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, tkn); break;
|
||||
case Xop_tkn_itm_.Tid_pre: para.AutoClose(this, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, tkn); break;
|
||||
}
|
||||
}
|
||||
public void Stack_pop_idx(int tilIdx) {
|
||||
stack_len = tilIdx < 0 ? 0 : tilIdx;
|
||||
cur_tkn_tid = stack_len == 0 ? Xop_tkn_itm_.Tid_null : stack[stack_len - 1].Tkn_tid();
|
||||
}
|
||||
public void Stack_pop_last() { // used primarily by lnke to remove lnke from stack
|
||||
--stack_len;
|
||||
cur_tkn_tid = stack_len == 0 ? Xop_tkn_itm_.Tid_null : stack[stack_len - 1].Tkn_tid();
|
||||
}
|
||||
public void CloseOpenItms(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
int stack_pos = -1, stack_len = ctx.Stack_len(); boolean stop = false;
|
||||
for (int i = 0; i < stack_len; i++) { // loop over stack
|
||||
Xop_tkn_itm prv_tkn = ctx.Stack_get(i);
|
||||
switch (prv_tkn.Tkn_tid()) { // find first list/hdr; close everything until this
|
||||
case Xop_tkn_itm_.Tid_list:
|
||||
case Xop_tkn_itm_.Tid_hdr:
|
||||
stack_pos = i; stop = true; break;
|
||||
}
|
||||
if (stop) break;
|
||||
}
|
||||
if (stack_pos == -1) return;
|
||||
ctx.Stack_pop_til(root, src, stack_pos, true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_txt);
|
||||
}
|
||||
|
||||
public static Xop_ctx New__top(Xowe_wiki wiki) {return New__top(wiki, Xoa_page_.Main_page_bry);} // HACK: use "Main_Page" to put in valid page title
|
||||
public static Xop_ctx New__top(Xowe_wiki wiki, byte[] ttl_bry) {return new Xop_ctx(wiki, Xoae_page.New(wiki, wiki.Ttl_parse(ttl_bry)));}
|
||||
|
||||
public static Xop_ctx New__sub__reuse_page(Xop_ctx ctx) {return New__sub(ctx.wiki, ctx, ctx.cur_page);} // CALLED: many
|
||||
public static Xop_ctx New__sub__reuse_lst(Xowe_wiki wiki, Xop_ctx ctx, Hash_adp_bry lst_page_regy) {
|
||||
Xop_ctx rv = new Xop_ctx(wiki, ctx.cur_page);
|
||||
Share_ctx_vars(ctx, rv);
|
||||
rv.lst_page_regy = lst_page_regy; // NOTE: must share ref for callers of New__sub__reuse_lst only (do not share for New__sub(), else stack overflow)
|
||||
return rv;
|
||||
}
|
||||
public static Xop_ctx New__sub(Xowe_wiki wiki, Xop_ctx ctx, Xoae_page page) {// TODO_OLD: new_sub_ should reuse ctx's page; callers who want new_page should call new_sub_page_; DATE:2014-04-10
|
||||
Xop_ctx rv = new Xop_ctx(wiki, page);
|
||||
Share_ctx_vars(ctx, rv);
|
||||
return rv;
|
||||
}
|
||||
public static Xop_ctx New__sub_and_page(Xowe_wiki wiki, Xop_ctx ctx) { // CALLED: poem
|
||||
Xop_ctx rv = new Xop_ctx(wiki, Xoae_page.New(wiki, wiki.Ttl_parse(ctx.Page().Ttl().Full_db())));
|
||||
Share_ctx_vars(ctx, rv);
|
||||
return rv;
|
||||
}
|
||||
|
||||
private static void Share_ctx_vars(Xop_ctx src, Xop_ctx trg) {
|
||||
trg.Page().Db().Page().Id_(src.Page().Db().Page().Id());
|
||||
trg.Lnki().File_logger_(src.Lnki().File_logger()); // always share lnki_logger between sub contexts
|
||||
trg.ref_ignore = src.ref_ignore; // copy ref_ignore; needed for refs inside poem else duplicate refs; it.s:La_Secchia_rapita/Canto_primo; DATE:2015-12-03
|
||||
trg.references_group = src.references_group;
|
||||
trg.cur_page.Ref_mgr_(src.cur_page.Ref_mgr());
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
public class Xop_ctx_ {
|
||||
public static String Page_as_str(Xop_ctx ctx) {return String_.new_u8(ctx.Page().Ttl().Full_db());}
|
||||
public static String Src_limit_and_escape_nl(byte[] src, int bgn, int limit) {
|
||||
int end = bgn + limit;
|
||||
int src_len = src.length;
|
||||
if (end > src_len) end = src_len;
|
||||
byte[] rv = Bry_.Mid(src, bgn, end);
|
||||
rv = Bry_.Replace(rv, Byte_ascii.Nl, Byte_ascii.Tab); // change nl to tab so text editor will show one warning per line
|
||||
return String_.new_u8(rv);
|
||||
}
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import org.junit.*;
|
||||
public class Xop_ctx__tst {
|
||||
@Before public void init() {fxt.Clear();} private Xop_ctx__fxt fxt = new Xop_ctx__fxt();
|
||||
@Test public void Src_limit_and_escape_nl() {
|
||||
fxt.Test_Src_limit_and_escape_nl("abcdefg", 4, 3, "efg"); // PURPOSE: bug fix; outOfBounds thrown; DATE:2014-03-31
|
||||
}
|
||||
}
|
||||
class Xop_ctx__fxt {
|
||||
public void Clear() {
|
||||
}
|
||||
public void Test_Src_limit_and_escape_nl(String src, int bgn, int limit, String expd) {
|
||||
String actl = Xop_ctx_.Src_limit_and_escape_nl(Bry_.new_u8(src), bgn, limit);
|
||||
Tfds.Eq(expd, actl);
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
public interface Xop_ctx_wkr {
|
||||
void Ctor_ctx(Xop_ctx ctx);
|
||||
void Page_bgn(Xop_ctx ctx, Xop_root_tkn root);
|
||||
void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len);
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public interface Xop_lxr {
|
||||
int Lxr_tid();
|
||||
void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie);
|
||||
void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie);
|
||||
void Term(Btrie_fast_mgr core_trie);
|
||||
int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos);
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
public class Xop_lxr_ {
|
||||
public static final int
|
||||
Tid_pipe = 0, Tid_space = 1, Tid_nbsp = 2, Tid_tab = 3, Tid_nl = 4, Tid_amp = 5, Tid_apos = 6, Tid_colon = 7, Tid_lnki_bgn = 8, Tid_lnki_end = 9
|
||||
, Tid_list = 10, Tid_hdr = 11, Tid_hr = 12, Tid_xnde = 13, Tid_lnke_bgn = 14, Tid_lnke_end = 15, Tid_tblw = 16, Tid_pre = 17, Tid_under = 18, Tid_comment = 19
|
||||
, Tid_eq = 20, Tid_curly_bgn = 21, Tid_curly_end = 22, Tid_brack_bgn = 23, Tid_brack_end = 24, Tid_poem = 25
|
||||
, Tid_tvar = 26, Tid_vnt_bgn = 27, Tid_vnt_end = 28, Tid_vnt_eqgt = 29, Tid_vnt_tmpl_bgn = 30, Tid_word = 31, Tid_nl_poem = 32, Tid_cr = 33
|
||||
, Tid_brack_end_lnki = 34, Tid_nl_tab = 35, Tid_escape = 36
|
||||
;
|
||||
}
|
||||
@@ -1,107 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.langs.*;
|
||||
import gplx.xowa.parsers.apos.*; import gplx.xowa.parsers.amps.*; import gplx.xowa.parsers.lnkes.*; import gplx.xowa.parsers.hdrs.*; import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.tblws.*; import gplx.xowa.parsers.paras.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.tmpls.*; import gplx.xowa.parsers.miscs.*;
|
||||
public class Xop_lxr_mgr {
|
||||
private final Xop_lxr[] ary;
|
||||
private final List_adp page_lxr_list = List_adp_.New();
|
||||
public Xop_lxr_mgr(Xop_lxr[] ary) {this.ary = ary;}
|
||||
public Btrie_fast_mgr Trie() {return trie;} private final Btrie_fast_mgr trie = Btrie_fast_mgr.cs();
|
||||
public void Page__add(Xowe_wiki wiki, Xop_lxr... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xop_lxr lxr = ary[i];
|
||||
lxr.Init_by_wiki(wiki, trie);
|
||||
page_lxr_list.Add(lxr);
|
||||
}
|
||||
}
|
||||
public void Page__del_all() {
|
||||
int len = page_lxr_list.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xop_lxr lxr = (Xop_lxr)page_lxr_list.Get_at(i);
|
||||
lxr.Term(trie);
|
||||
}
|
||||
}
|
||||
public void Init_by_wiki(Xowe_wiki wiki) {
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
Xop_lxr lxr = ary[i];
|
||||
lxr.Init_by_wiki(wiki, trie);
|
||||
}
|
||||
}
|
||||
public void Init_by_lang(Xol_lang_itm lang) {
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
Xop_lxr lxr = ary[i];
|
||||
lxr.Init_by_lang(lang, trie);
|
||||
}
|
||||
}
|
||||
public static Xop_lxr_mgr new_tmpl_() {
|
||||
return new Xop_lxr_mgr(new Xop_lxr[]
|
||||
{ Xop_pipe_lxr.Instance, new Xop_eq_lxr(true), Xop_colon_lxr.Instance, Xop_space_lxr.Instance, Xop_tab_lxr.Instance, Xop_nl_lxr.Instance
|
||||
, Xop_curly_bgn_lxr.Instance, Xop_curly_end_lxr.Instance
|
||||
, Xop_brack_bgn_lxr.Instance, Xop_brack_end_lxr.Instance
|
||||
, Xop_comm_lxr.Instance
|
||||
, Xop_xnde_lxr.Instance // needed for xtn, noinclude, etc.
|
||||
, Xop_under_lxr.Instance
|
||||
, gplx.xowa.xtns.translates.Xop_tvar_lxr.Instance
|
||||
, Xop_cr_lxr.Instance // always ignore \r; DATE:2014-03-02
|
||||
});
|
||||
}
|
||||
public static Xop_lxr_mgr new_wiki_() {
|
||||
return new Xop_lxr_mgr(new Xop_lxr[]
|
||||
{ Xop_pipe_lxr.Instance, new Xop_eq_lxr(false), Xop_space_lxr.Instance, Xop_tab_lxr.Instance, Xop_nl_lxr.Instance
|
||||
, Xop_amp_lxr.Instance, Xop_apos_lxr.Instance, Xop_colon_lxr.Instance
|
||||
, Xop_lnki_lxr_bgn.Instance, Xop_lnki_lxr_end.Instance
|
||||
, Xop_list_lxr.Instance
|
||||
, Xop_hdr_lxr.Instance
|
||||
, Xop_hr_lxr.Instance
|
||||
, Xop_xnde_lxr.Instance
|
||||
, Xop_lnke_lxr.Instance, Xop_lnke_end_lxr.Instance
|
||||
, Xop_tblw_lxr.Instance
|
||||
, Xop_pre_lxr.Instance, Xop_nl_tab_lxr.Instance
|
||||
, Xop_comm_lxr.Instance
|
||||
, Xop_under_lxr.Instance
|
||||
});
|
||||
}
|
||||
public static Xop_lxr_mgr new_anchor_encoder() {
|
||||
return new Xop_lxr_mgr(new Xop_lxr[]
|
||||
{ Xop_pipe_lxr.Instance, new Xop_eq_lxr(false), Xop_space_lxr.Instance, Xop_tab_lxr.Instance, Xop_nl_lxr.Instance
|
||||
, Xop_curly_bgn_lxr.Instance, Xop_curly_end_lxr.Instance
|
||||
, Xop_amp_lxr.Instance, Xop_colon_lxr.Instance
|
||||
, Xop_apos_lxr.Instance
|
||||
, Xop_lnki_lxr_bgn.Instance, Xop_lnki_lxr_end.Instance
|
||||
, Xop_lnke_lxr.Instance, Xop_lnke_end_lxr.Instance
|
||||
, Xop_xnde_lxr.Instance
|
||||
});
|
||||
}
|
||||
public static final Xop_lxr_mgr Popup_lxr_mgr // same as orig_page, except apos_lxr added
|
||||
= new Xop_lxr_mgr(new Xop_lxr[]
|
||||
{ Xop_pipe_lxr.Instance, new Xop_eq_lxr(true), Xop_colon_lxr.Instance, Xop_space_lxr.Instance, Xop_tab_lxr.Instance, Xop_nl_lxr.Instance
|
||||
, Xop_curly_bgn_lxr.Instance, Xop_curly_end_lxr.Instance
|
||||
, Xop_brack_bgn_lxr.Instance, Xop_brack_end_lxr.Instance
|
||||
, Xop_comm_lxr.Instance
|
||||
, Xop_xnde_lxr.Instance // needed for xtn, noinclude, etc.
|
||||
, Xop_under_lxr.Instance
|
||||
, gplx.xowa.xtns.translates.Xop_tvar_lxr.Instance
|
||||
, Xop_cr_lxr.Instance // always ignore \r; DATE:2014-03-02
|
||||
, gplx.xowa.parsers.apos.Xop_apos_lxr.Instance // needed else multiple apos may be split across blocks;
|
||||
});
|
||||
}
|
||||
@@ -1,224 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.tmpls.*;
|
||||
public class Xop_parser { // NOTE: parsers are reused; do not keep any read-write state
|
||||
private final Xowe_wiki wiki;
|
||||
private final Btrie_fast_mgr tmpl_trie, wtxt_trie;
|
||||
private Xot_compile_data tmpl_props = new Xot_compile_data(); // NOTE: probably should not be a member variable, but leave for now; DATE:2016-12-02
|
||||
Xop_parser(Xowe_wiki wiki, Xop_lxr_mgr tmpl_lxr_mgr, Xop_lxr_mgr wtxt_lxr_mgr) {
|
||||
this.wiki = wiki;
|
||||
this.tmpl_lxr_mgr = tmpl_lxr_mgr; this.tmpl_trie = tmpl_lxr_mgr.Trie();
|
||||
this.wtxt_lxr_mgr = wtxt_lxr_mgr; this.wtxt_trie = wtxt_lxr_mgr.Trie();
|
||||
}
|
||||
public Xop_lxr_mgr Tmpl_lxr_mgr() {return tmpl_lxr_mgr;} private final Xop_lxr_mgr tmpl_lxr_mgr;
|
||||
public Xop_lxr_mgr Wtxt_lxr_mgr() {return wtxt_lxr_mgr;} private final Xop_lxr_mgr wtxt_lxr_mgr;
|
||||
public void Init_by_wiki(Xowe_wiki wiki) {
|
||||
tmpl_lxr_mgr.Init_by_wiki(wiki);
|
||||
wtxt_lxr_mgr.Init_by_wiki(wiki);
|
||||
}
|
||||
public void Init_by_lang(Xol_lang_itm lang) {
|
||||
tmpl_lxr_mgr.Init_by_lang(lang);
|
||||
wtxt_lxr_mgr.Init_by_lang(lang);
|
||||
}
|
||||
public byte[] Expand_tmpl(byte[] src) { // expands {{A}} -> some wikitext; called by tmpl_invk, lang_msgs, sidebar
|
||||
Xop_ctx ctx = Xop_ctx.New__sub__reuse_page(wiki.Parser_mgr().Ctx()); // PERF: reuse root ctx
|
||||
return Expand_tmpl(ctx, ctx.Tkn_mkr(), src);
|
||||
}
|
||||
private byte[] Expand_tmpl(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, byte[] src) {return Expand_tmpl(tkn_mkr.Root(src), ctx, tkn_mkr, src);}
|
||||
public byte[] Expand_tmpl(Xop_root_tkn root, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, byte[] src) {
|
||||
Parse(root, ctx, tkn_mkr, src, Xop_parser_tid_.Tid__tmpl, tmpl_trie, Xop_parser_.Doc_bgn_bos);
|
||||
int len = root.Subs_len();
|
||||
for (int i = 0; i < len; ++i)
|
||||
root.Subs_get(i).Tmpl_compile(ctx, src, tmpl_props);
|
||||
return Xot_tmpl_wtr.Instance.Write_all(ctx, root, src);
|
||||
}
|
||||
|
||||
public byte[] Parse_text_to_html(Xop_ctx ctx, byte[] src) {
|
||||
Bry_bfr bfr = wiki.Utl__bfr_mkr().Get_b512();
|
||||
Parse_text_to_html(bfr, ctx, ctx.Page(), false, src);
|
||||
return bfr.To_bry_and_rls();
|
||||
}
|
||||
public void Parse_text_to_html(Bry_bfr trg, Xop_ctx pctx, Xoae_page page, boolean para_enabled, byte[] src) {Parse_text_to_html(trg, pctx, page, Xoh_wtr_ctx.Basic, para_enabled, src);}
|
||||
public void Parse_text_to_html(Bry_bfr trg, Xop_ctx pctx, Xoae_page page, Xoh_wtr_ctx hctx, boolean para_enabled, byte[] src) {
|
||||
Xop_ctx ctx = Xop_ctx.New__sub(wiki, pctx, page);
|
||||
Xop_tkn_mkr tkn_mkr = ctx.Tkn_mkr();
|
||||
Xop_root_tkn root = tkn_mkr.Root(src);
|
||||
Xop_parser parser = wiki.Parser_mgr().Main();
|
||||
byte[] wtxt = parser.Expand_tmpl(root, ctx, tkn_mkr, src);
|
||||
root.Reset();
|
||||
ctx.Para().Enabled_(para_enabled);
|
||||
parser.Parse_wtxt_to_wdom(root, ctx, ctx.Tkn_mkr(), wtxt, Xop_parser_.Doc_bgn_bos);
|
||||
wiki.Html_mgr().Html_wtr().Write_doc(trg, ctx, hctx, wtxt, root);
|
||||
}
|
||||
|
||||
public Xot_defn_tmpl Parse_text_to_defn_obj(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xow_ns ns, byte[] name, byte[] src) {
|
||||
Xot_defn_tmpl rv = new Xot_defn_tmpl();
|
||||
Parse_text_to_defn(rv, ctx, tkn_mkr, ns, name, src);
|
||||
return rv;
|
||||
}
|
||||
public void Parse_text_to_defn(Xot_defn_tmpl tmpl, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xow_ns ns, byte[] name, byte[] src) {
|
||||
Xop_root_tkn root = tkn_mkr.Root(src);
|
||||
Parse(root, ctx, tkn_mkr, src, Xop_parser_tid_.Tid__defn, tmpl_trie, Xop_parser_.Doc_bgn_bos);
|
||||
tmpl_props.OnlyInclude_exists = false; int subs_len = root.Subs_len();
|
||||
for (int i = 0; i < subs_len; i++)
|
||||
root.Subs_get(i).Tmpl_compile(ctx, src, tmpl_props);
|
||||
boolean only_include_chk = Bry_find_.Find_fwd(src, Xop_xnde_tag_.Bry__onlyinclude, 0, src.length) != Bry_find_.Not_found;
|
||||
if (only_include_chk) tmpl_props.OnlyInclude_exists = true;
|
||||
tmpl.Init_by_new(ns, name, src, root, tmpl_props.OnlyInclude_exists);
|
||||
}
|
||||
public void Parse_page_all_clear(Xop_root_tkn root, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, byte[] src) {
|
||||
ctx.Page().Clear_all(); ctx.App().Msg_log().Clear();
|
||||
Parse_text_to_wdom(root, ctx, tkn_mkr, src, Xop_parser_.Doc_bgn_bos);
|
||||
}
|
||||
public Xop_root_tkn Parse_text_to_wdom_old_ctx(Xop_ctx old_ctx, byte[] src, boolean doc_bgn_pos) {return Parse_text_to_wdom(Xop_ctx.New__sub__reuse_page(old_ctx), src, doc_bgn_pos);}
|
||||
public Xop_root_tkn Parse_text_to_wdom(Xop_ctx new_ctx, byte[] src, boolean doc_bgn_pos) {
|
||||
new_ctx.Para().Enabled_n_();
|
||||
Xop_tkn_mkr tkn_mkr = new_ctx.Tkn_mkr();
|
||||
Xop_root_tkn root = tkn_mkr.Root(src);
|
||||
Parse_text_to_wdom(root, new_ctx, tkn_mkr, src, doc_bgn_pos ? Xop_parser_.Doc_bgn_bos : Xop_parser_.Doc_bgn_char_0);
|
||||
return root;
|
||||
}
|
||||
public void Parse_text_to_wdom(Xop_root_tkn root, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, byte[] src, int doc_bgn_pos) {
|
||||
byte parse_tid_old = ctx.Parse_tid();// NOTE: must store parse_tid b/c ctx can be reused by other classes
|
||||
ctx.Parse_tid_(Xop_parser_tid_.Tid__tmpl);
|
||||
root.Reset();
|
||||
byte[] mid_bry = Expand_tmpl(root, ctx, tkn_mkr, src);
|
||||
root.Data_mid_(mid_bry);
|
||||
root.Reset();
|
||||
Parse_wtxt_to_wdom(root, ctx, tkn_mkr, mid_bry, doc_bgn_pos);
|
||||
ctx.Parse_tid_(parse_tid_old);
|
||||
}
|
||||
public void Parse_wtxt_to_wdom(Xop_root_tkn root, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, byte[] wtxt, int doc_bgn_pos) {
|
||||
root.Root_src_(wtxt); // always set latest src; needed for Parse_all wherein src will first be raw and then parsed tmpl
|
||||
Parse(root, ctx, tkn_mkr, wtxt, Xop_parser_tid_.Tid__wtxt, wtxt_trie, doc_bgn_pos);
|
||||
}
|
||||
private void Parse(Xop_root_tkn root, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, byte[] src, byte parse_type, Btrie_fast_mgr trie, int doc_bgn_pos) {
|
||||
int len = src.length; if (len == 0) return; // nothing to parse;
|
||||
byte parse_tid_old = ctx.Parse_tid(); // NOTE: must store parse_tid b/c ctx can be reused by other classes
|
||||
ctx.Parse_tid_(parse_type);
|
||||
ctx.Parser__page_init(root, src);
|
||||
ctx.App().Parser_mgr().Core__uniq_mgr().Clear();
|
||||
Parse_to_src_end(root, ctx, tkn_mkr, src, trie, doc_bgn_pos, len);
|
||||
ctx.Parser__page_term(root, src, len);
|
||||
ctx.Parse_tid_(parse_tid_old);
|
||||
}
|
||||
public int Parse_to_src_end(Xop_root_tkn root, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, byte[] src, Btrie_fast_mgr trie, int pos, int len) {
|
||||
byte b = pos == -1 ? Byte_ascii.Nl : src[pos]; // simulate newLine at bgn of src; needed for lxrs which rely on \n (EX: "=a=")
|
||||
int txt_bgn = pos == -1 ? 0 : pos; Xop_tkn_itm txt_tkn = null;
|
||||
Btrie_rv trv = new Btrie_rv();
|
||||
while (true) {
|
||||
Object o = trie.Match_at_w_b0(trv, b, src, pos, len);
|
||||
Xop_lxr lxr = null;
|
||||
if (o == null) // no lxr found; char is txt; increment pos
|
||||
pos++;
|
||||
else { // lxr found
|
||||
lxr = (Xop_lxr)o;
|
||||
if (txt_bgn != pos) // chars exist between pos and txt_bgn; make txt_tkn; see NOTE_1
|
||||
txt_tkn = Txt_add(ctx, tkn_mkr, root, txt_tkn, txt_bgn, pos);
|
||||
ctx.Lxr_make_(true);
|
||||
pos = lxr.Make_tkn(ctx, tkn_mkr, root, src, len, pos, trv.Pos());
|
||||
if (ctx.Lxr_make()) {txt_bgn = pos; txt_tkn = null;} // reset txt_tkn
|
||||
}
|
||||
if (pos == len) break;
|
||||
b = src[pos];
|
||||
}
|
||||
if (txt_bgn != pos) txt_tkn = Txt_add(ctx, tkn_mkr, root, txt_tkn, txt_bgn, pos);
|
||||
return pos;
|
||||
}
|
||||
public int Parse_to_stack_end(Xop_root_tkn root, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, Btrie_fast_mgr trie, int pos, int end) {
|
||||
byte b = pos == -1 ? Byte_ascii.Nl : src[pos]; // simulate \n at bgn of src; needed for lxrs which rely on \n (EX: "=a=")
|
||||
int txt_bgn = pos == -1 ? 0 : pos; Xop_tkn_itm txt_tkn = null;
|
||||
Xop_lxr lxr = null;
|
||||
Btrie_rv trv = new Btrie_rv();
|
||||
while (true) {
|
||||
lxr = null;
|
||||
|
||||
Object o = trie.Match_at_w_b0(trv, b, src, pos, src_len);
|
||||
if (o == null) // no lxr found; char is txt; increment pos
|
||||
pos++;
|
||||
else { // lxr found
|
||||
lxr = (Xop_lxr)o;
|
||||
if (txt_bgn != pos) // chars exist between pos and txt_bgn; make txt_tkn; see NOTE_1
|
||||
txt_tkn = Txt_add(ctx, tkn_mkr, root, txt_tkn, txt_bgn, pos);
|
||||
ctx.Lxr_make_(true);
|
||||
pos = lxr.Make_tkn(ctx, tkn_mkr, root, src, src_len, pos, trv.Pos());
|
||||
if (ctx.Lxr_make()) {txt_bgn = pos; txt_tkn = null;} // reset txt_tkn
|
||||
}
|
||||
if ( pos >= end
|
||||
&& ctx.Stack_len() == 0 // check stack is 0 to avoid dangling templates
|
||||
) {
|
||||
if (o == null) {} // last sequence is not text; avoids splitting words across blocks; EX: 4 block and word of "abcde" will split to "abcd" and "e"
|
||||
else {
|
||||
if (lxr != null) {
|
||||
boolean stop = true;
|
||||
switch (lxr.Lxr_tid()) {
|
||||
case Xop_lxr_.Tid_eq:
|
||||
case Xop_lxr_.Tid_nl:
|
||||
stop = false;
|
||||
break;
|
||||
}
|
||||
if (stop)
|
||||
break;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (pos >= src_len) break;
|
||||
b = src[pos];
|
||||
}
|
||||
if (txt_bgn != pos) txt_tkn = Txt_add(ctx, tkn_mkr, root, txt_tkn, txt_bgn, pos);
|
||||
return pos;
|
||||
}
|
||||
private static Xop_tkn_itm Txt_add(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, Xop_tkn_itm tkn, int txt_bgn, int pos) {
|
||||
if (pos == Xop_parser_.Doc_bgn_bos) return null; // don't make txt_tkn for Bos_pos
|
||||
if (tkn == null) { // no existing txt_tkn; create new one
|
||||
tkn = tkn_mkr.Txt(txt_bgn, pos);
|
||||
ctx.Subs_add(root, tkn);
|
||||
}
|
||||
else // existing txt_tkn; happens for false matches; EX: abc[[\nef[[a]]; see NOTE_1
|
||||
tkn.Src_end_(pos);
|
||||
return tkn;
|
||||
}
|
||||
public static Xop_parser new_(Xowe_wiki wiki, Xop_lxr_mgr tmpl_lxr_mgr, Xop_lxr_mgr wtxt_lxr_mgr) {return new Xop_parser(wiki, tmpl_lxr_mgr, wtxt_lxr_mgr);}
|
||||
public static Xop_parser new_wiki(Xowe_wiki wiki) {
|
||||
Xop_parser rv = new Xop_parser(wiki, Xop_lxr_mgr.new_tmpl_(), Xop_lxr_mgr.new_wiki_());
|
||||
rv.Init_by_wiki(wiki);
|
||||
rv.Init_by_lang(wiki.Lang());
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
/*
|
||||
NOTE_1
|
||||
abc[[\nef[[a]]
|
||||
<BOS> : txt_bgn = 0; txt_tkn = null;
|
||||
abc : increment pos
|
||||
[[\n : lnki lxr
|
||||
: (1): txt_tkn == null, so create txt_tkn with (0, 3)
|
||||
: (2): lxr.Make_tkn() entered for lnki; however \n exits lnki
|
||||
: (3): note that ctx.Lxr_make == false, so txt_bgn/txt_tkn is not reset
|
||||
ef : still just text; increment pos
|
||||
[[a]] : lnki entered
|
||||
: (1): txt_tkn != null; set end to 8
|
||||
: (2): lxr.Make_tkn() entered and lnki made
|
||||
: (3): note that ctx.Lxr_make == true, so txt_bgn = 13 and txt_tkn = null
|
||||
*/
|
||||
@@ -1,43 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.xowa.htmls.core.htmls.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
public class Xop_parser_ {
|
||||
public static final int Doc_bgn_bos = -1, Doc_bgn_char_0 = 0;
|
||||
public static byte[] Parse_text_to_html(Xowe_wiki wiki, Xop_ctx owner_ctx, Xoae_page page, Xoa_ttl ttl, byte[] src, boolean para_enabled) { // NOTE: must pass in same page instance; do not do Xoa_page_.new_(), else img_idx will get reset to 0; DATE:2015-02-08
|
||||
// init
|
||||
Xop_ctx ctx = Xop_ctx.New__sub(wiki, owner_ctx, page);
|
||||
Xop_tkn_mkr tkn_mkr = ctx.Tkn_mkr();
|
||||
Xop_root_tkn root = tkn_mkr.Root(src);
|
||||
Xop_parser parser = wiki.Parser_mgr().Main();
|
||||
|
||||
// expand template; EX: {{A}} -> wikitext
|
||||
byte[] wtxt = parser.Expand_tmpl(root, ctx, tkn_mkr, src);
|
||||
|
||||
// parse wikitext
|
||||
root.Reset();
|
||||
ctx.Para().Enabled_(para_enabled);
|
||||
parser.Parse_wtxt_to_wdom(root, ctx, ctx.Tkn_mkr(), wtxt, Xop_parser_.Doc_bgn_bos);
|
||||
|
||||
// write html
|
||||
Bry_bfr bfr = wiki.Utl__bfr_mkr().Get_b512();
|
||||
wiki.Html_mgr().Html_wtr().Write_doc(bfr, ctx, Xoh_wtr_ctx.Basic, wtxt, root);
|
||||
return bfr.To_bry_and_rls();
|
||||
}
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import org.junit.*;
|
||||
public class Xop_parser__tst {
|
||||
@Before public void init() {fxt.Clear();} private Xop_parser__fxt fxt = new Xop_parser__fxt();
|
||||
@Test public void Para_y() {
|
||||
fxt.Test_parse_to_html(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, ""
|
||||
, "b"
|
||||
), true, String_.Concat_lines_nl_skip_last
|
||||
( "<p>a"
|
||||
, "</p>"
|
||||
, ""
|
||||
, "<p>b"
|
||||
, "</p>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Para_n() {
|
||||
fxt.Test_parse_to_html(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, ""
|
||||
, "b"
|
||||
), false, String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, "b"
|
||||
));
|
||||
}
|
||||
}
|
||||
class Xop_parser__fxt {
|
||||
private final Xop_fxt fxt = new Xop_fxt();
|
||||
private Bry_bfr bfr = Bry_bfr_.Reset(255);
|
||||
public void Clear() {
|
||||
fxt.Reset();
|
||||
}
|
||||
public void Test_parse_to_html(String raw, boolean para_enabled, String expd) {
|
||||
byte[] raw_bry = Bry_.new_u8(raw);
|
||||
fxt.Wiki().Parser_mgr().Main().Parse_text_to_html(bfr, fxt.Ctx(), fxt.Page(), para_enabled, raw_bry);
|
||||
Tfds.Eq(expd, bfr.To_str_and_clear());
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
public class Xop_parser_tid_ {
|
||||
public static final byte Tid__null = 0, Tid__defn = 1, Tid__tmpl = 2, Tid__wtxt = 3;
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
public class Xop_root_tkn extends Xop_tkn_itm_base {
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_root;}
|
||||
public byte[] Root_src() {return root_src;} public Xop_root_tkn Root_src_(byte[] v) {root_src = v; return this;} private byte[] root_src = Bry_.Empty;
|
||||
public byte[] Data_mid() {return data_mid;} public Xop_root_tkn Data_mid_(byte[] v) {data_mid = v; return this;} private byte[] data_mid = Bry_.Empty;
|
||||
public byte[] Data_htm() {return data_htm;} public Xop_root_tkn Data_htm_(byte[] v) {data_htm = v; return this;} private byte[] data_htm = Bry_.Empty;
|
||||
@Override public void Reset() {
|
||||
super.Reset();
|
||||
root_src = Bry_.Empty;
|
||||
}
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.tests.*;
|
||||
public class Xop_tkn_chkr_base implements Tst_chkr {
|
||||
@gplx.Virtual public Class<?> TypeOf() {return Xop_tkn_itm.class;}
|
||||
@gplx.Virtual public byte Tkn_tid() {return Byte_.Max_value_127;}
|
||||
public Xop_tkn_chkr_base TypeId_dynamic(int v) {typeId = Xop_tkn_itm_.Tid__names[v]; return this;} private String typeId = null;
|
||||
public int Src_bgn() {return src_bgn;} private int src_bgn = -1;
|
||||
public int Src_end() {return src_end;} private int src_end = -1;
|
||||
public byte Ignore() {return ignore;} private Xop_tkn_chkr_base Ignore_(byte v) {ignore = v; return this;} private byte ignore = Bool_.__byte;
|
||||
public Xop_tkn_chkr_base Ignore_y_() {return Ignore_(Bool_.Y_byte);}
|
||||
public Xop_tkn_chkr_base Src_rng_(int bgn, int end) {src_bgn = bgn; src_end = end; return this;}
|
||||
public String Raw() {return raw;} public Xop_tkn_chkr_base Raw_(String v) {raw = v; return this;} private String raw;
|
||||
public String Raw_src() {return raw_src;} public Xop_tkn_chkr_base Raw_src_(String v) {raw_src = v; return this;} private String raw_src;
|
||||
public Xop_tkn_chkr_base[] Subs() {return subs;} public Xop_tkn_chkr_base Subs_(Xop_tkn_chkr_base... v) {subs = v; return this;} private Xop_tkn_chkr_base[] subs = null;
|
||||
@gplx.Virtual public int Chk(Tst_mgr mgr, String path, Object actl_obj) {
|
||||
Xop_tkn_itm actl = (Xop_tkn_itm)actl_obj;
|
||||
int rv = 0;
|
||||
rv += Chk_basic(mgr, path, actl, rv);
|
||||
rv += Chk_hook(mgr, path, actl, rv);
|
||||
rv += Chk_subs(mgr, path, actl, rv);
|
||||
return rv;
|
||||
}
|
||||
@gplx.Virtual public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {return 0;}
|
||||
int Chk_basic(Tst_mgr mgr, String path, Xop_tkn_itm actl, int err) {
|
||||
if (typeId == null) typeId = Xop_tkn_itm_.Tid__names[this.Tkn_tid()];
|
||||
err += mgr.Tst_val(typeId == null, path, "typeId", typeId, Xop_tkn_itm_.Tid__names[actl.Tkn_tid()]);
|
||||
if (ignore != Bool_.__byte) err += mgr.Tst_val(ignore == Bool_.__byte, path, "ignore", ignore == Bool_.Y_byte, actl.Ignore()); // "ignore !=" to skip comparison unless explicitly toggled
|
||||
err += mgr.Tst_val(src_bgn == -1, path, "src_bgn", src_bgn, actl.Src_bgn());
|
||||
err += mgr.Tst_val(src_end == -1, path, "src_end", src_end, actl.Src_end());
|
||||
if (raw != null) {
|
||||
String raw_actl = raw_src == null ? mgr.Vars_get_bry_as_str("raw_bry", actl.Src_bgn(), actl.Src_end()) : String_.Mid(raw_src, actl.Src_bgn(), actl.Src_end());
|
||||
err += mgr.Tst_val(raw == null, path, "raw", raw, raw_actl);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
int Chk_subs(Tst_mgr mgr, String path, Xop_tkn_itm actl, int err) {
|
||||
if (subs != null) {
|
||||
int actl_subs_len = actl.Subs_len();
|
||||
Xop_tkn_itm[] actl_subs = new Xop_tkn_itm[actl_subs_len];
|
||||
for (int i = 0; i < actl_subs_len; i++)
|
||||
actl_subs[i] = actl.Subs_get(i);
|
||||
return mgr.Tst_sub_ary(subs, actl_subs, path, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
public static final Tst_chkr Null = Tst_mgr.Null_chkr;
|
||||
public static final Xop_tkn_chkr_base[] Ary_empty = new Xop_tkn_chkr_base[0];
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
public interface Xop_tkn_grp {
|
||||
int Subs_len();
|
||||
Xop_tkn_itm Subs_get(int i);
|
||||
void Subs_add(Xop_tkn_itm sub);
|
||||
void Subs_add_grp(Xop_tkn_itm sub, Xop_tkn_grp old_grp, int old_sub_idx);
|
||||
void Subs_del_after(int pos_bgn);
|
||||
void Subs_clear();
|
||||
void Subs_move(Xop_tkn_itm tkn);
|
||||
int Subs_src_bgn(int sub_idx);
|
||||
int Subs_src_end(int sub_idx);
|
||||
void Subs_src_pos_(int sub_idx, int bgn, int end);
|
||||
Xop_tkn_itm Immutable_clone(Xop_ctx ctx, Xop_tkn_itm tkn, int sub_idx);
|
||||
}
|
||||
class Xop_tkn_grp_ {
|
||||
public static final Xop_tkn_grp Null = null;
|
||||
}
|
||||
@@ -1,51 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.xowa.parsers.tmpls.*;
|
||||
import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.htmls.*;
|
||||
public interface Xop_tkn_itm extends Xop_tkn_grp {
|
||||
byte Tkn_tid();
|
||||
Xop_tkn_itm Tkn_ini_pos(boolean immutable, int bgn, int end);
|
||||
Xop_tkn_itm Tkn_clone(Xop_ctx ctx, int bgn, int end);
|
||||
boolean Tkn_immutable();
|
||||
Xop_tkn_grp Tkn_grp();
|
||||
int Src_bgn();
|
||||
int Src_end();
|
||||
int Src_bgn_grp(Xop_tkn_grp grp, int sub_idx);
|
||||
int Src_end_grp(Xop_tkn_grp grp, int sub_idx);
|
||||
int Tkn_sub_idx();
|
||||
boolean Ignore();
|
||||
Xop_tkn_itm Tkn_grp_(Xop_tkn_grp grp, int sub_idx);
|
||||
void Src_end_(int v);
|
||||
void Src_end_grp_(Xop_ctx ctx, Xop_tkn_grp grp, int sub_idx, int src_end);
|
||||
Xop_tkn_itm Ignore_y_();
|
||||
void Ignore_y_grp_(Xop_ctx ctx, Xop_tkn_grp grp, int sub_idx);
|
||||
void Clear();
|
||||
void Tmpl_fmt(Xop_ctx ctx, byte[] src, Xot_fmtr fmtr);
|
||||
void Tmpl_compile(Xop_ctx ctx, byte[] src, Xot_compile_data prep_data); // SEE:NOTE_1:Tmpl_compile
|
||||
boolean Tmpl_evaluate(Xop_ctx ctx, byte[] src, Xot_invk caller, Bry_bfr bfr);
|
||||
void Html__write(Bry_bfr bfr, Xoh_html_wtr wtr, Xowe_wiki wiki, Xoae_page page, Xop_ctx ctx, Xoh_wtr_ctx hctx, Xoh_html_wtr_cfg cfg, Xop_tkn_grp grp, int sub_idx, byte[] src);
|
||||
}
|
||||
/*
|
||||
NOTE_1: Tmpl_compile
|
||||
- called for tmpl_defn
|
||||
- identifies tkn as static or dynamic; important for evaluate later; if static, evaluate will simply extract src
|
||||
- if static, parses prm; EX: {{{1|a}}} will produce member variables of idx=1 and dflt=a
|
||||
- if static, parses tmpl_name; EX: {{concat|a|b}} will generate name of concat
|
||||
- if <onlyinclude> mark tmpl accordingly
|
||||
*/
|
||||
@@ -1,130 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
public class Xop_tkn_itm_ {
|
||||
public static final Xop_tkn_itm[] Ary_empty = new Xop_tkn_itm[0];
|
||||
public static final byte
|
||||
Tid_null = 0
|
||||
, Tid_root = 1
|
||||
, Tid_txt = 2
|
||||
, Tid_ignore = 3
|
||||
, Tid_newLine = 4
|
||||
, Tid_space = 5
|
||||
, Tid_tab = 6
|
||||
, Tid_pipe = 7
|
||||
, Tid_eq = 8
|
||||
, Tid_colon = 9
|
||||
, Tid_amp = 10
|
||||
, Tid_lt = 11
|
||||
, Tid_gt = 12
|
||||
, Tid_quot = 13
|
||||
, Tid_apos = 14
|
||||
, Tid_html_ref = 15
|
||||
, Tid_html_ncr = 16
|
||||
, Tid_lnki_bgn = 17
|
||||
, Tid_lnki_end = 18
|
||||
, Tid_lnki = 19
|
||||
, Tid_lnke = 20
|
||||
, Tid_hr = 21
|
||||
, Tid_hdr = 22
|
||||
, Tid_tblw_tb = 23
|
||||
, Tid_tblw_te = 24
|
||||
, Tid_tblw_tr = 25
|
||||
, Tid_tblw_th = 26
|
||||
, Tid_tblw_td = 27
|
||||
, Tid_tblw_tc = 28
|
||||
, Tid_list = 29
|
||||
, Tid_xnde = 30
|
||||
, Tid_xatr = 31
|
||||
, Tid_tmpl_prm_bgn = 32
|
||||
, Tid_tmpl_prm_end = 33
|
||||
, Tid_tmpl_prm = 34
|
||||
, Tid_tmpl_invk_dat = 35
|
||||
, Tid_arg_nde = 36
|
||||
, Tid_arg_itm = 37
|
||||
, Tid_tmpl_invk = 38
|
||||
, Tid_tmpl_curly_bgn = 39
|
||||
, Tid_brack_bgn = 40
|
||||
, Tid_brack_end = 41
|
||||
, Tid_para = 42
|
||||
, Tid_pre = 43
|
||||
, Tid_bry = 44
|
||||
, Tid_under = 45
|
||||
, Tid_tvar = 46
|
||||
, Tid_vnt = 47
|
||||
, Tid_vnt_rule = 48
|
||||
, Tid_vnt_eqgt = 49
|
||||
, Tid_cr = 50
|
||||
, Tid_escape = 51
|
||||
;
|
||||
public static final String[] Tid__names
|
||||
= new String[]
|
||||
{ "null"
|
||||
, "root"
|
||||
, "text"
|
||||
, "ignore"
|
||||
, "newLine"
|
||||
, "space"
|
||||
, "tab"
|
||||
, "pipe"
|
||||
, "eq"
|
||||
, "colon"
|
||||
, "amp"
|
||||
, "lt"
|
||||
, "gt"
|
||||
, "quot"
|
||||
, "apos"
|
||||
, "htmlRef"
|
||||
, "htmlNcr"
|
||||
, "lnki_bgn"
|
||||
, "lnki_end"
|
||||
, "lnki"
|
||||
, "lnke"
|
||||
, "hr"
|
||||
, "hdr"
|
||||
, "tblw_tb"
|
||||
, "tblw_te"
|
||||
, "tblw_tr"
|
||||
, "tblw_th"
|
||||
, "tblw_td"
|
||||
, "tblw_tc"
|
||||
, "list"
|
||||
, "xnde"
|
||||
, "xatr"
|
||||
, "tmpl_prm_bgn"
|
||||
, "tmpl_prm_end"
|
||||
, "tmpl_prm"
|
||||
, "tmpl_invk_dat"
|
||||
, "arg"
|
||||
, "arg_itm"
|
||||
, "tmpl_invk"
|
||||
, "tmpl_curly_bgn"
|
||||
, "brack_bgn"
|
||||
, "brack_end"
|
||||
, "para"
|
||||
, "para_pre"
|
||||
, "bry"
|
||||
, "under"
|
||||
, "tvar"
|
||||
, "vnt"
|
||||
, "vnt_rule"
|
||||
, "vnt_eqgt"
|
||||
, "cr"
|
||||
, "escape"
|
||||
};
|
||||
}
|
||||
@@ -1,169 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.xowa.parsers.tmpls.*;
|
||||
import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.htmls.*;
|
||||
public abstract class Xop_tkn_itm_base implements Xop_tkn_itm {
|
||||
public abstract byte Tkn_tid();
|
||||
public Xop_tkn_grp Tkn_grp() {return grp == null ? this : grp;} private Xop_tkn_grp grp; // NOTE: not sure about this; need to handle null refs when tkns are manipulated but not yet added to a group
|
||||
public Xop_tkn_itm Tkn_ini_pos(boolean immutable, int bgn, int end) {this.immutable = immutable; this.src_bgn = bgn; this.src_end = end; return this;}
|
||||
public Xop_tkn_itm Tkn_grp_(Xop_tkn_grp grp, int sub_idx) {this.grp = grp; this.tkn_sub_idx = sub_idx; return this;}
|
||||
@gplx.Virtual public Xop_tkn_itm Tkn_clone(Xop_ctx ctx, int bgn, int end) {throw Err_.new_wo_type("tkn_clone not implemented", "name", Xop_tkn_itm_.Tid__names[this.Tkn_tid()]);}
|
||||
public boolean Tkn_immutable() {return immutable;} private boolean immutable;
|
||||
public int Tkn_sub_idx() {return tkn_sub_idx;} private int tkn_sub_idx = -1;
|
||||
public int Src_bgn() {return src_bgn;} private int src_bgn = -1;
|
||||
public int Src_end() {return src_end;} private int src_end = -1;
|
||||
public void Src_end_(int v) {src_end = v;}
|
||||
public int Src_bgn_grp(Xop_tkn_grp grp, int sub_idx) {return immutable ? grp.Subs_src_bgn(sub_idx) : src_bgn;}
|
||||
public int Src_end_grp(Xop_tkn_grp grp, int sub_idx) {return immutable ? grp.Subs_src_end(sub_idx) : src_end;}
|
||||
public int Subs_src_bgn(int sub_idx) {if (subs_len == 0) throw Err_.new_wo_type("no subs available", "idx", sub_idx); return subs_pos_ary[ sub_idx * 2];}
|
||||
public int Subs_src_end(int sub_idx) {if (subs_len == 0) throw Err_.new_wo_type("no subs available", "idx", sub_idx); return subs_pos_ary[(sub_idx * 2) + 1];}
|
||||
public void Subs_src_pos_(int sub_idx, int bgn, int end) {
|
||||
int pos_idx = sub_idx * 2;
|
||||
int subs_pos_ary_len = subs_pos_ary.length;
|
||||
if (pos_idx + 1 > subs_pos_ary_len) {
|
||||
int[] new_subs_pos_ary = new int[(pos_idx + 1) * 2];
|
||||
Array_.Copy_to(subs_pos_ary, 0, new_subs_pos_ary, 0, subs_pos_ary.length);
|
||||
subs_pos_ary = new_subs_pos_ary;
|
||||
}
|
||||
subs_pos_ary[pos_idx] = bgn;
|
||||
subs_pos_ary[pos_idx + 1] = end;
|
||||
}
|
||||
public boolean Ignore() {return ignore;} private boolean ignore;
|
||||
public Xop_tkn_itm Ignore_y_() {
|
||||
ignore = true;
|
||||
return this;
|
||||
}
|
||||
public int Subs_len() {return subs_len;} private int subs_len;
|
||||
public Xop_tkn_itm[] Subs() {return subs;}
|
||||
public Xop_tkn_itm Subs_get(int i) {return subs[i];}
|
||||
public Xop_tkn_itm Subs_get_or_null(int i) {return i < subs_len ? subs[i] : null;}
|
||||
public void Subs_add(Xop_tkn_itm sub) {
|
||||
int new_len = subs_len + 1;
|
||||
if (new_len > subs_max) { // ary too small >>> expand
|
||||
subs_max = new_len * 2;
|
||||
Xop_tkn_itm[] new_subs = new Xop_tkn_itm[subs_max];
|
||||
Array_.Copy_to(subs, 0, new_subs, 0, subs_len);
|
||||
subs = new_subs;
|
||||
}
|
||||
subs[subs_len] = sub;
|
||||
sub.Tkn_grp_(this, subs_len);
|
||||
subs_len = new_len;
|
||||
} private Xop_tkn_itm[] subs = Xop_tkn_itm_.Ary_empty; int subs_max; int[] subs_pos_ary = Int_.Ary_empty;
|
||||
public void Subs_add_grp(Xop_tkn_itm sub, Xop_tkn_grp old_grp, int old_sub_idx) {
|
||||
this.Subs_add(sub);
|
||||
if (sub.Tkn_immutable())
|
||||
this.Subs_src_pos_(subs_len - 1, sub.Src_bgn_grp(old_grp, old_sub_idx), sub.Src_end_grp(old_grp, old_sub_idx));
|
||||
}
|
||||
public void Subs_del_after(int tkn_sub_idx) {
|
||||
if (tkn_sub_idx >= subs_len) return; // ignore delete after len; PRUNE: breaks 3 tests;
|
||||
for (int i = tkn_sub_idx; i < subs_len; i++)
|
||||
subs[i] = null;
|
||||
subs_len = tkn_sub_idx;
|
||||
}
|
||||
public void Subs_del_between(Xop_ctx ctx, int idx_bgn, int idx_end) {
|
||||
if (idx_bgn >= subs_len || idx_bgn >= idx_end) return; // ignore invalid bounds; PRUNE: breaks 2 tests
|
||||
int idx_dif = idx_end - idx_bgn;
|
||||
for (int trg_idx = idx_bgn; trg_idx < subs_len; trg_idx++) {
|
||||
int src_idx = trg_idx + idx_dif;
|
||||
if (src_idx < subs_len) { // trg exists >>> move tkn from src to trg
|
||||
Xop_tkn_itm src_tkn = subs[src_idx];
|
||||
subs[trg_idx] = src_tkn;
|
||||
src_tkn.Tkn_grp_(this, trg_idx);
|
||||
subs[src_idx] = null;
|
||||
}
|
||||
else
|
||||
subs[trg_idx] = null;
|
||||
}
|
||||
subs_len -= idx_dif;
|
||||
}
|
||||
public void Subs_clear() {
|
||||
subs_len = subs_max = 0;
|
||||
subs = Xop_tkn_itm_.Ary_empty;
|
||||
subs_pos_ary = Int_.Ary_empty;
|
||||
}
|
||||
public void Subs_move(Xop_tkn_itm tkn) {
|
||||
int nxt_idx = tkn_sub_idx + 1, len = tkn.Subs_len();
|
||||
for (int i = nxt_idx; i < len; i++) {
|
||||
Xop_tkn_itm sub = tkn.Subs_get(i);
|
||||
Subs_add_grp(sub, tkn, i);
|
||||
}
|
||||
tkn.Subs_del_after(nxt_idx);
|
||||
}
|
||||
public void Subs_move(Xop_tkn_itm owner, int sub_idx, int subs_len) {
|
||||
for (int i = sub_idx; i < subs_len; i++) {
|
||||
Xop_tkn_itm sub = owner.Subs_get(i);
|
||||
this.Subs_add(sub);
|
||||
}
|
||||
owner.Subs_del_after(sub_idx);
|
||||
}
|
||||
public Xop_tkn_itm Immutable_clone(Xop_ctx ctx, Xop_tkn_itm tkn, int sub_idx) {
|
||||
int pos_idx = sub_idx * 2;
|
||||
Xop_tkn_itm rv = tkn.Tkn_clone(ctx, subs_pos_ary[pos_idx], subs_pos_ary[pos_idx + 1]);
|
||||
subs[sub_idx] = rv;
|
||||
rv.Tkn_grp_(this, sub_idx);
|
||||
return rv;
|
||||
}
|
||||
public void Src_end_grp_(Xop_ctx ctx, Xop_tkn_grp grp, int sub_idx, int src_end) {
|
||||
Xop_tkn_itm tkn = this;
|
||||
if (immutable) tkn = grp.Immutable_clone(ctx, this, sub_idx);
|
||||
tkn.Src_end_(src_end);
|
||||
subs_pos_ary[(sub_idx * 2) + 1] = src_end;
|
||||
}
|
||||
public void Ignore_y_grp_(Xop_ctx ctx, Xop_tkn_grp grp, int sub_idx) {
|
||||
Xop_tkn_itm tkn = this;
|
||||
if (immutable) tkn = grp.Immutable_clone(ctx, this, sub_idx);
|
||||
tkn.Ignore_y_();
|
||||
}
|
||||
public void Subs_grp_(Xop_ctx ctx, Xop_tkn_itm tkn, Xop_tkn_grp grp, int sub_idx) {
|
||||
// if (tkn.Tkn_immutable()) tkn = Subs_immutable_clone(ctx, tkn);
|
||||
// tkn.Tkn_grp_(grp, sub_idx);
|
||||
}
|
||||
@gplx.Virtual public void Reset() {
|
||||
src_bgn = src_end = tkn_sub_idx = -1; ignore = false; tmpl_static = false;
|
||||
if (subs.length > Tkn_subs_max) {
|
||||
subs = new Xop_tkn_itm[Tkn_subs_max];
|
||||
subs_max = Tkn_subs_max;
|
||||
subs_pos_ary = new int[(Tkn_subs_max + 1) * 2];
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < subs_len; i++)
|
||||
subs[i] = null;
|
||||
}
|
||||
subs_len = 0;
|
||||
}
|
||||
@gplx.Virtual public void Html__write(Bry_bfr bfr, Xoh_html_wtr wtr, Xowe_wiki wiki, Xoae_page page, Xop_ctx ctx, Xoh_wtr_ctx hctx, Xoh_html_wtr_cfg cfg, Xop_tkn_grp grp, int sub_idx, byte[] src) {throw Err_.new_unimplemented();}
|
||||
public void Clear() {
|
||||
src_bgn = src_end = tkn_sub_idx = -1; ignore = false; tmpl_static = false;
|
||||
Subs_clear();
|
||||
}
|
||||
@gplx.Virtual public void Tmpl_fmt(Xop_ctx ctx, byte[] src, Xot_fmtr fmtr) {fmtr.Reg_ary(ctx, src, tmpl_static, src_bgn, src_end, subs_len, subs);}
|
||||
@gplx.Virtual public void Tmpl_compile(Xop_ctx ctx, byte[] src, Xot_compile_data prep_data) {
|
||||
if (!ignore) tmpl_static = true;
|
||||
for (int i = 0; i < subs_len; i++)
|
||||
subs[i].Tmpl_compile(ctx, src, prep_data);
|
||||
} boolean tmpl_static = false;
|
||||
@gplx.Virtual public boolean Tmpl_evaluate(Xop_ctx ctx, byte[] src, Xot_invk caller, Bry_bfr bfr) {
|
||||
if (tmpl_static) bfr.Add_mid(src, src_bgn, src_end);
|
||||
for (int i = 0; i < subs_len; i++)
|
||||
subs[i].Tmpl_evaluate(ctx, src, caller, bfr);
|
||||
return true;
|
||||
}
|
||||
static final String GRP_KEY = "xowa.tkn_base";
|
||||
public static final int Tkn_subs_max = 16;
|
||||
}
|
||||
@@ -1,175 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.langs.htmls.entitys.*;
|
||||
import gplx.xowa.parsers.apos.*; import gplx.xowa.parsers.amps.*; import gplx.xowa.parsers.lnkes.*; import gplx.xowa.parsers.hdrs.*; import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.tblws.*;
|
||||
import gplx.xowa.parsers.paras.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.tmpls.*; import gplx.xowa.parsers.miscs.*; import gplx.xowa.parsers.vnts.*; import gplx.xowa.xtns.cites.*;
|
||||
public class Xop_tkn_mkr {
|
||||
public Xop_root_tkn Root(byte[] raw) {return new Xop_root_tkn().Root_src_(raw);}
|
||||
public Xop_txt_tkn Txt(int bgn, int end) {return new Xop_txt_tkn(bgn, end);}
|
||||
public Xop_space_tkn Space(Xop_tkn_grp grp, int bgn, int end) {Xop_space_tkn rv = new Xop_space_tkn(false, bgn, end); grp.Subs_src_pos_(grp.Subs_len(), bgn, end); return rv;}
|
||||
public Xop_space_tkn Space_mutable(int bgn, int end) {return new Xop_space_tkn(false, bgn, end);}
|
||||
public Xop_apos_tkn Apos(int bgn, int end
|
||||
, int aposLen, int typ, int cmd, int lit_apos) {return new Xop_apos_tkn(bgn, end, aposLen, typ, cmd, lit_apos);}
|
||||
public Xop_tkn_itm Amp_txt(int bgn, int end, Gfh_entity_itm itm) {return new Xop_amp_tkn_ent(bgn, end, itm);}
|
||||
public Xop_tkn_itm Amp_num(int bgn, int end, int val_int, byte[] val_bry) {return new Xop_amp_tkn_num(bgn, end, val_int, val_bry);}
|
||||
public Xop_tkn_itm Amp_num(int bgn, int end, int val_int) {return new Xop_amp_tkn_num(bgn, end, val_int, gplx.core.intls.Utf16_.Encode_int_to_bry(val_int));}
|
||||
public Xop_nl_tkn NewLine(int bgn, int end, byte nl_typ, int nl_len) {return new Xop_nl_tkn(bgn, end, nl_typ, nl_len);}
|
||||
public Xop_lnki_tkn Lnki(int bgn, int end) {return (Xop_lnki_tkn)new Xop_lnki_tkn().Tkn_ini_pos(false, bgn, end);}
|
||||
public Xop_list_tkn List_bgn(int bgn, int end, byte listType, int symLen) {return Xop_list_tkn.bgn_(bgn, end, listType, symLen);}
|
||||
public Xop_list_tkn List_end(int pos, byte listType) {return Xop_list_tkn.end_(pos, listType);}
|
||||
public Xop_tkn_itm Pipe(int bgn, int end) {return new Xop_pipe_tkn(bgn, end);}
|
||||
public Xop_tkn_itm Colon(int bgn, int end) {return new Xop_colon_tkn(bgn, end);}
|
||||
public Xop_eq_tkn Eq(int bgn, int end) {return new Xop_eq_tkn(bgn, end, end - bgn);}
|
||||
public Xop_eq_tkn Eq(int bgn, int end, int eq_len) {return new Xop_eq_tkn(bgn, end, eq_len);}
|
||||
public Xot_invk_tkn Tmpl_invk(int bgn, int end) {return new Xot_invk_tkn(bgn, end);}
|
||||
public Arg_nde_tkn ArgNde(int arg_idx, int bgn) {return new Arg_nde_tkn(arg_idx, bgn);}
|
||||
public Arg_itm_tkn ArgItm(int bgn, int end) {return new Arg_itm_tkn_base(bgn, end);}
|
||||
public Xop_xnde_tkn Xnde(int bgn, int end) {return (Xop_xnde_tkn)Xop_xnde_tkn.new_().Tkn_ini_pos(false, bgn, end);}
|
||||
public Xop_hdr_tkn Hdr(int bgn, int end, int hdr_len) {return new Xop_hdr_tkn(bgn, end, hdr_len);}
|
||||
public Xop_hr_tkn Hr(int bgn, int end, int hr_len) {return new Xop_hr_tkn(bgn, end, hr_len);}
|
||||
public Xop_tab_tkn Tab(int bgn, int end) {return new Xop_tab_tkn(bgn, end);}
|
||||
public Xop_curly_bgn_tkn Tmpl_curly_bgn(int bgn, int end) {return new Xop_curly_bgn_tkn(bgn, end);}
|
||||
public Xop_tkn_itm Brack_bgn(int bgn, int end) {return new Xop_brack_bgn_tkn(bgn, end);}
|
||||
public Xop_tkn_itm Brack_end(int bgn, int end) {return new Xop_brack_end_tkn(bgn, end);}
|
||||
public Xop_lnke_tkn Lnke(int bgn, int end, byte[] protocol, byte proto_tid, byte lnke_typ, int lnk_bgn, int lnk_end) {
|
||||
return new Xop_lnke_tkn(bgn, end, protocol, proto_tid, lnke_typ, lnk_bgn, lnk_end);
|
||||
}
|
||||
public Xop_tblw_tb_tkn Tblw_tb(int bgn, int end, boolean tblw_xml, boolean auto_created) {return new Xop_tblw_tb_tkn(bgn, end, tblw_xml, auto_created);}
|
||||
public Xop_tblw_tr_tkn Tblw_tr(int bgn, int end, boolean tblw_xml, boolean auto_created) {return new Xop_tblw_tr_tkn(bgn, end, tblw_xml, auto_created);}
|
||||
public Xop_tblw_td_tkn Tblw_td(int bgn, int end, boolean tblw_xml) {return new Xop_tblw_td_tkn(bgn, end, tblw_xml);}
|
||||
public Xop_tblw_th_tkn Tblw_th(int bgn, int end, boolean tblw_xml) {return new Xop_tblw_th_tkn(bgn, end, tblw_xml);}
|
||||
public Xop_tblw_tc_tkn Tblw_tc(int bgn, int end, boolean tblw_xml) {return new Xop_tblw_tc_tkn(bgn, end, tblw_xml);}
|
||||
public Xot_prm_tkn Tmpl_prm(int bgn, int end) {return new Xot_prm_tkn(bgn, end);}
|
||||
public Xop_para_tkn Para(int pos) {return new Xop_para_tkn(pos);}
|
||||
public Xop_pre_tkn Para_pre_bgn(int pos) {return new Xop_pre_tkn(pos, pos, Xop_pre_tkn.Pre_tid_bgn, null);}
|
||||
public Xop_pre_tkn Para_pre_end(int pos, Xop_tkn_itm bgn) {return new Xop_pre_tkn(pos, pos, Xop_pre_tkn.Pre_tid_end, bgn);}
|
||||
public Xop_ignore_tkn Ignore(int bgn, int end, byte ignore_type) {return new Xop_ignore_tkn(bgn, end, ignore_type);}
|
||||
public Xop_bry_tkn Bry_raw(int bgn, int end, byte[] bry) {return new Xop_bry_tkn(bgn, end, bry);}
|
||||
public Xop_bry_tkn Bry_mid(byte[] src, int bgn, int end) {return new Xop_bry_tkn(bgn, end, Bry_.Mid(src, bgn, end));}
|
||||
public Xop_under_tkn Under(int bgn, int end, int v) {return new Xop_under_tkn(bgn, end, v);}
|
||||
public gplx.xowa.xtns.xowa_cmds.Xop_xowa_cmd Xnde__xowa_cmd() {return new gplx.xowa.xtns.xowa_cmds.Xop_xowa_cmd();}
|
||||
public gplx.xowa.xtns.poems.Poem_nde Xnde__poem() {return new gplx.xowa.xtns.poems.Poem_nde();}
|
||||
public Ref_nde Xnde__ref() {return new Ref_nde();}
|
||||
public References_nde Xnde__references() {return new References_nde();}
|
||||
public gplx.xowa.xtns.math.Xomath_xnde Xnde__math() {return new gplx.xowa.xtns.math.Xomath_xnde();}
|
||||
public gplx.xowa.xtns.gallery.Gallery_xnde Xnde__gallery() {return new gplx.xowa.xtns.gallery.Gallery_xnde();}
|
||||
public gplx.xowa.xtns.imaps.Imap_xnde Xnde__imageMap() {return new gplx.xowa.xtns.imaps.Imap_xnde();}
|
||||
public gplx.xowa.xtns.hieros.Hiero_xnde Xnde__hiero() {return new gplx.xowa.xtns.hieros.Hiero_xnde();}
|
||||
public gplx.xowa.xtns.graphs.Graph_xnde Xnde__graph() {return new gplx.xowa.xtns.graphs.Graph_xnde();}
|
||||
public gplx.xowa.xtns.kartographers.Mapframe_xnde Xnde__mapframe() {return new gplx.xowa.xtns.kartographers.Mapframe_xnde();}
|
||||
public gplx.xowa.xtns.kartographers.Maplink_xnde Xnde__maplink() {return new gplx.xowa.xtns.kartographers.Maplink_xnde();}
|
||||
public gplx.xowa.xtns.proofreadPage.Pp_pages_nde Xnde__pages() {return new gplx.xowa.xtns.proofreadPage.Pp_pages_nde();}
|
||||
public gplx.xowa.xtns.proofreadPage.Pp_pagelist_nde Xnde__pagelist() {return new gplx.xowa.xtns.proofreadPage.Pp_pagelist_nde();}
|
||||
public gplx.xowa.xtns.proofreadPage.Pp_pagequality_nde Xnde__pagequality() {return new gplx.xowa.xtns.proofreadPage.Pp_pagequality_nde();}
|
||||
public gplx.xowa.xtns.lst.Lst_section_nde Xnde__section() {return new gplx.xowa.xtns.lst.Lst_section_nde();}
|
||||
public gplx.xowa.xtns.categoryList.Xtn_categorylist_nde Xnde__categoryList() {return new gplx.xowa.xtns.categoryList.Xtn_categorylist_nde();}
|
||||
public gplx.xowa.xtns.dynamicPageList.Dpl_xnde Xnde__dynamicPageList() {return new gplx.xowa.xtns.dynamicPageList.Dpl_xnde();}
|
||||
public gplx.xowa.xtns.syntax_highlights.Synh_xtn_nde Xnde__syntaxHighlight() {return new gplx.xowa.xtns.syntax_highlights.Synh_xtn_nde();}
|
||||
public gplx.xowa.xtns.templateData.Xtn_templateData_nde Xnde__templateData() {return new gplx.xowa.xtns.templateData.Xtn_templateData_nde();}
|
||||
public gplx.xowa.xtns.rss.Rss_xnde Xnde__rss() {return new gplx.xowa.xtns.rss.Rss_xnde();}
|
||||
public gplx.xowa.xtns.quiz.Quiz_xnde Xnde__quiz() {return new gplx.xowa.xtns.quiz.Quiz_xnde();}
|
||||
public gplx.xowa.xtns.indicators.Indicator_xnde Xnde__indicator() {return new gplx.xowa.xtns.indicators.Indicator_xnde();}
|
||||
public gplx.xowa.xtns.xowa_cmds.Xox_xowa_html_cmd Xnde__xowa_html() {return new gplx.xowa.xtns.xowa_cmds.Xox_xowa_html_cmd();}
|
||||
public gplx.xowa.xtns.xowa_cmds.wiki_setups.Xop_wiki_setup_xnde Xnde__xowa_wiki_setup() {return new gplx.xowa.xtns.xowa_cmds.wiki_setups.Xop_wiki_setup_xnde();}
|
||||
public gplx.xowa.xtns.listings.Listing_xnde Xnde__listing(int tag_id) {return new gplx.xowa.xtns.listings.Listing_xnde(tag_id);}
|
||||
public gplx.xowa.xtns.scores.Score_xnde Xnde__score() {return new gplx.xowa.xtns.scores.Score_xnde();}
|
||||
public gplx.xowa.xtns.inputBox.Xtn_inputbox_nde Xnde__inputbox() {return new gplx.xowa.xtns.inputBox.Xtn_inputbox_nde();}
|
||||
public gplx.xowa.xtns.translates.Xop_translate_xnde Xnde__translate() {return new gplx.xowa.xtns.translates.Xop_translate_xnde();}
|
||||
public gplx.xowa.xtns.translates.Xop_languages_xnde Xnde__languages() {return new gplx.xowa.xtns.translates.Xop_languages_xnde();}
|
||||
public gplx.xowa.xtns.wikias.Random_selection_xnde Xnde__random_selection() {return new gplx.xowa.xtns.wikias.Random_selection_xnde();}
|
||||
public gplx.xowa.xtns.wikias.Tabber_xnde Xnde__tabber() {return new gplx.xowa.xtns.wikias.Tabber_xnde();}
|
||||
public gplx.xowa.xtns.wikias.Tabview_xnde Xnde__tabview() {return new gplx.xowa.xtns.wikias.Tabview_xnde();}
|
||||
|
||||
public gplx.xowa.xtns.translates.Xop_tvar_tkn Tvar(int tkn_bgn, int tkn_end, int key_bgn, int key_end, int txt_bgn, int txt_end, byte[] wikitext) {return new gplx.xowa.xtns.translates.Xop_tvar_tkn(tkn_bgn, tkn_end, key_bgn, key_end, txt_bgn, txt_end, wikitext);}
|
||||
// public void Clear() {
|
||||
// space_tkns_len = txt_tkns_len = 0;
|
||||
// }
|
||||
// public Xop_txt_tkn Txt(int bgn, int end) {
|
||||
// Xop_txt_tkn rv = null;
|
||||
// if (txt_tkns_len < txt_tkns_max) {
|
||||
// rv = txt_tkns[txt_tkns_len];
|
||||
// if (rv == null) {
|
||||
// rv = new Xop_txt_tkn(bgn, end);
|
||||
// txt_tkns[txt_tkns_len] = rv;
|
||||
// }
|
||||
// else {
|
||||
// rv.Reset();
|
||||
// rv.Src_rng_(bgn, end);
|
||||
// }
|
||||
// txt_tkns_len++;
|
||||
// }
|
||||
// else {
|
||||
// rv = new Xop_txt_tkn(bgn, end);
|
||||
// Txt_tkns_add(rv);
|
||||
// }
|
||||
// return rv;
|
||||
//// return new Xop_txt_tkn(bgn, end);
|
||||
// }
|
||||
// public Xop_space_tkn Space(int bgn, int end) {
|
||||
// Xop_space_tkn rv = null;
|
||||
// if (space_tkns_len < space_tkns_max) {
|
||||
// rv = space_tkns[space_tkns_len];
|
||||
// if (rv == null) {
|
||||
// rv = new Xop_space_tkn(bgn, end);
|
||||
// space_tkns[space_tkns_len] = rv;
|
||||
// }
|
||||
// else {
|
||||
// rv.Reset();
|
||||
// rv.Src_rng_(bgn, end);
|
||||
// }
|
||||
// space_tkns_len++;
|
||||
// }
|
||||
// else {
|
||||
// rv = new Xop_space_tkn(bgn, end);
|
||||
// Space_tkns_add(rv);
|
||||
// }
|
||||
// return rv;
|
||||
//// return new Xop_space_tkn(bgn, end);
|
||||
// }
|
||||
// private void Txt_tkns_add(Xop_txt_tkn sub) {
|
||||
// int new_len = txt_tkns_len + 1;
|
||||
// if (new_len > txt_tkns_max) {
|
||||
// txt_tkns_max = new_len * 2;
|
||||
// txt_tkns = Resize(txt_tkns, txt_tkns_len, txt_tkns_max);
|
||||
// }
|
||||
// txt_tkns[txt_tkns_len] = sub;
|
||||
// txt_tkns_len = new_len;
|
||||
// } private Xop_txt_tkn[] txt_tkns = new Xop_txt_tkn[0]; int txt_tkns_len, txt_tkns_max;
|
||||
// Xop_txt_tkn[] Resize(Xop_txt_tkn[] src, int cur_len, int new_len) {
|
||||
// Xop_txt_tkn[] rv = new Xop_txt_tkn[new_len];
|
||||
// for (int i = 0; i < cur_len; i++)
|
||||
// rv[i] = src[i];
|
||||
// return rv;
|
||||
// }
|
||||
// private void Space_tkns_add(Xop_space_tkn sub) {
|
||||
// int new_len = space_tkns_len + 1;
|
||||
// if (new_len > space_tkns_max) {
|
||||
// space_tkns_max = new_len * 2;
|
||||
// space_tkns = Resize(space_tkns, space_tkns_len, space_tkns_max);
|
||||
// }
|
||||
// space_tkns[space_tkns_len] = sub;
|
||||
// space_tkns_len = new_len;
|
||||
// } private Xop_space_tkn[] space_tkns = new Xop_space_tkn[0]; int space_tkns_len, space_tkns_max;
|
||||
// Xop_space_tkn[] Resize(Xop_space_tkn[] src, int cur_len, int new_len) {
|
||||
// Xop_space_tkn[] rv = new Xop_space_tkn[new_len];
|
||||
// for (int i = 0; i < cur_len; i++)
|
||||
// rv[i] = src[i];
|
||||
// return rv;
|
||||
// }
|
||||
}
|
||||
@@ -1,55 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.xowa.parsers.tmpls.*;
|
||||
import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.htmls.*;
|
||||
public class Xop_tkn_null implements Xop_tkn_itm {
|
||||
public byte Tkn_tid() {return Xop_tkn_itm_.Tid_null;}
|
||||
public boolean Tkn_immutable() {return true;}
|
||||
public Xop_tkn_grp Tkn_grp() {return Xop_tkn_grp_.Null;}
|
||||
public Xop_tkn_itm Tkn_ini_pos(boolean immutable, int bgn, int end) {return this;}
|
||||
public Xop_tkn_itm Tkn_grp_(Xop_tkn_grp grp, int sub_idx) {return this;}
|
||||
public Xop_tkn_itm Tkn_clone(Xop_ctx ctx, int bgn, int end) {return this;}
|
||||
public int Tkn_sub_idx() {return -1;}
|
||||
public int Src_bgn() {return -1;}
|
||||
public int Src_end() {return -1;}
|
||||
public int Src_bgn_grp(Xop_tkn_grp grp, int sub_idx) {return -1;}
|
||||
public int Src_end_grp(Xop_tkn_grp grp, int sub_idx) {return -1;}
|
||||
public int Subs_src_bgn(int sub_idx) {return -1;}
|
||||
public int Subs_src_end(int sub_idx) {return -1;}
|
||||
public void Src_end_(int v) {}
|
||||
public void Src_end_grp_(Xop_ctx ctx, Xop_tkn_grp grp, int sub_idx, int src_end) {}
|
||||
public boolean Ignore() {return false;} public Xop_tkn_itm Ignore_y_() {return this;}
|
||||
public int Subs_len() {return 0;}
|
||||
public Xop_tkn_itm Subs_get(int i) {return null;}
|
||||
public void Subs_add(Xop_tkn_itm sub) {}
|
||||
public void Subs_add_grp(Xop_tkn_itm sub, Xop_tkn_grp old_grp, int old_sub_idx) {}
|
||||
public void Subs_del_after(int pos_bgn) {}
|
||||
public void Subs_clear() {}
|
||||
public void Subs_move(Xop_tkn_itm tkn) {}
|
||||
public Xop_tkn_itm Immutable_clone(Xop_ctx ctx, Xop_tkn_itm tkn, int sub_idx) {return this;}
|
||||
public void Ignore_y_grp_(Xop_ctx ctx, Xop_tkn_grp grp, int sub_idx) {}
|
||||
public void Subs_grp_(Xop_ctx ctx, Xop_tkn_itm tkn, Xop_tkn_grp grp, int sub_idx) {}
|
||||
public void Subs_src_pos_(int sub_idx, int bgn, int end) {}
|
||||
public void Clear() {}
|
||||
public void Tmpl_fmt(Xop_ctx ctx, byte[] src, Xot_fmtr fmtr) {}
|
||||
public void Tmpl_compile(Xop_ctx ctx, byte[] src, Xot_compile_data prep_data) {}
|
||||
public boolean Tmpl_evaluate(Xop_ctx ctx, byte[] src, Xot_invk caller, Bry_bfr bfr) {return true;}
|
||||
public void Html__write(Bry_bfr bfr, Xoh_html_wtr wtr, Xowe_wiki wiki, Xoae_page page, Xop_ctx ctx, Xoh_wtr_ctx hctx, Xoh_html_wtr_cfg cfg, Xop_tkn_grp grp, int sub_idx, byte[] src) {}
|
||||
public static final Xop_tkn_null Null_tkn = new Xop_tkn_null();
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*;
|
||||
import gplx.xowa.files.*;
|
||||
import gplx.xowa.xtns.pfuncs.exprs.*; import gplx.xowa.xtns.pfuncs.ttls.*;
|
||||
public class Xop_tmp_mgr {
|
||||
public Xof_xfer_itm Xfer_itm() {return xfer_itm;} private final Xof_xfer_itm xfer_itm = new Xof_xfer_itm();
|
||||
public Gfo_number_parser Pfunc_num_parser_0() {return num_parser_0;} private final Gfo_number_parser num_parser_0 = new Gfo_number_parser().Hex_enabled_(true);
|
||||
public Gfo_number_parser Pfunc_num_parser_1() {return num_parser_1;} private final Gfo_number_parser num_parser_1 = new Gfo_number_parser().Hex_enabled_(true);
|
||||
public Pfunc_expr_shunter Expr_shunter() {return expr_shunter;} private final Pfunc_expr_shunter expr_shunter = new Pfunc_expr_shunter();
|
||||
public Btrie_slim_mgr Xnde__xtn_end() {return xnde__xtn_end;} private final Btrie_slim_mgr xnde__xtn_end = Btrie_slim_mgr.ci_a7(); // NOTE:ci.ascii:MW_const.en; listed XML node names are en
|
||||
public Btrie_rv Xnde__trv() {return xnde__trv;} private final Btrie_rv xnde__trv = new Btrie_rv();
|
||||
public Int_obj_ref Pfunc_rel2abs() {return pfunc_rel2abs;} private final Int_obj_ref pfunc_rel2abs = Int_obj_ref.New_zero();
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
public class Xop_txt_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_txt_tkn(int bgn, int end) {this.Tkn_ini_pos(false, bgn, end);}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_txt;}
|
||||
}
|
||||
class Xop_colon_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_colon_tkn(int bgn, int end) {this.Tkn_ini_pos(false, bgn, end);}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_colon;}
|
||||
}
|
||||
class Xop_brack_bgn_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_brack_bgn_tkn(int bgn, int end) {this.Tkn_ini_pos(false, bgn, end);}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_brack_bgn;}
|
||||
}
|
||||
class Xop_brack_end_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_brack_end_tkn(int bgn, int end) {this.Tkn_ini_pos(false, bgn, end);}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_brack_end;}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
public class Xow_mw_parser_mgr {
|
||||
public Xop_xnde_tag_regy Xnde_tag_regy() {return xnde_tag_regy;} private final Xop_xnde_tag_regy xnde_tag_regy = new Xop_xnde_tag_regy();
|
||||
}
|
||||
@@ -1,102 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.primitives.*; import gplx.core.brys.fmtrs.*;
|
||||
import gplx.xowa.wikis.*; import gplx.core.envs.*;
|
||||
import gplx.xowa.files.*;
|
||||
import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.wbases.hwtrs.*; import gplx.xowa.xtns.pfuncs.ifs.*; import gplx.xowa.xtns.pfuncs.times.*; import gplx.xowa.xtns.pfuncs.ttls.*;
|
||||
import gplx.xowa.xtns.math.*; import gplx.xowa.parsers.uniqs.*; import gplx.xowa.parsers.hdrs.sections.*;
|
||||
public class Xow_parser_mgr {
|
||||
private final Xowe_wiki wiki; private final Xop_tkn_mkr tkn_mkr;
|
||||
public Xow_parser_mgr(Xowe_wiki wiki) {
|
||||
this.wiki = wiki; this.tkn_mkr = wiki.Appe().Parser_mgr().Tkn_mkr();
|
||||
this.ctx = Xop_ctx.New__top(wiki);
|
||||
this.parser = Xop_parser.new_wiki(wiki);
|
||||
}
|
||||
public Xop_ctx Ctx() {return ctx;} private final Xop_ctx ctx;
|
||||
public Xop_parser Main() {return parser;} private final Xop_parser parser;
|
||||
public Scrib_core_mgr Scrib() {return scrib;} private final Scrib_core_mgr scrib = new Scrib_core_mgr();
|
||||
public Xof_img_size Img_size() {return img_size;} private final Xof_img_size img_size = new Xof_img_size();
|
||||
public Pfunc_ifexist_mgr Ifexist_mgr() {return ifexist_mgr;} private final Pfunc_ifexist_mgr ifexist_mgr = new Pfunc_ifexist_mgr();
|
||||
public Xof_url_bldr Url_bldr() {return url_bldr;} private final Xof_url_bldr url_bldr = Xof_url_bldr.new_v2();
|
||||
public List_adp Time_parser_itms() {return time_parser_itms;} private final List_adp time_parser_itms = List_adp_.New();
|
||||
public Pft_func_formatdate_bldr Date_fmt_bldr() {return date_fmt_bldr;} private final Pft_func_formatdate_bldr date_fmt_bldr = new Pft_func_formatdate_bldr();
|
||||
public Gfo_number_parser Pp_num_parser() {return pp_num_parser;} private final Gfo_number_parser pp_num_parser = new Gfo_number_parser().Ignore_space_at_end_y_();
|
||||
public int[] Rel2abs_ary() {return rel2abs_ary;} private final int[] rel2abs_ary = new int[Pfunc_rel2abs.Ttl_max];
|
||||
public Xop_uniq_mgr Uniq_mgr() {return uniq_mgr;} private final Xop_uniq_mgr uniq_mgr = new Xop_uniq_mgr();
|
||||
public Xomath_core Math__core() {return math__core;} private final Xomath_core math__core = new Xomath_core();
|
||||
public boolean Lst__recursing() {return lst_recursing;} private boolean lst_recursing; public void Lst__recursing_(boolean v) {lst_recursing = v;}
|
||||
public Bry_bfr Wbase__time__bfr() {return wbase__time__bfr;} private final Bry_bfr wbase__time__bfr = Bry_bfr_.New();
|
||||
public Bry_fmtr Wbase__time__fmtr() {return wbase__time__fmtr;} private final Bry_fmtr wbase__time__fmtr = Bry_fmtr.new_();
|
||||
public Xop_section_mgr Hdr__section_editable__mgr() {return hdr__section_editable__mgr;} private final Xop_section_mgr hdr__section_editable__mgr = new Xop_section_mgr();
|
||||
public Wdata_hwtr_msgs Wbase__time__msgs() {
|
||||
if (wbase__time__msgs == null)
|
||||
wbase__time__msgs = Wdata_hwtr_msgs.new_(wiki.Msg_mgr());
|
||||
return wbase__time__msgs;
|
||||
} private Wdata_hwtr_msgs wbase__time__msgs;
|
||||
public Bry_bfr Tmp_bfr() {return tmp_bfr;} private final Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
public int Tag__next_idx() {return ++tag_idx;} private int tag_idx; // NOTE:must be wiki-level variable, not page-level, b/c pre-compiled templates can reserve tag #s; PAGE:de.s:Seite:NewtonPrincipien.djvu/465 DATE:2015-02-03
|
||||
public void Tmpl_stack_del() {--tmpl_stack_ary_len;}
|
||||
public boolean Tmpl_stack_add(byte[] key) {
|
||||
for (int i = 0; i < tmpl_stack_ary_len; i++) {
|
||||
if (Bry_.Match(key, tmpl_stack_ary[i])) return false;
|
||||
}
|
||||
int new_len = tmpl_stack_ary_len + 1;
|
||||
if (new_len > tmpl_stack_ary_max) {
|
||||
tmpl_stack_ary_max = new_len * 2;
|
||||
tmpl_stack_ary = (byte[][])Array_.Resize(tmpl_stack_ary, tmpl_stack_ary_max);
|
||||
}
|
||||
tmpl_stack_ary[tmpl_stack_ary_len] = key;
|
||||
tmpl_stack_ary_len = new_len;
|
||||
return true;
|
||||
} private byte[][] tmpl_stack_ary = Bry_.Ary_empty; private int tmpl_stack_ary_len = 0, tmpl_stack_ary_max = 0;
|
||||
public Pfunc_anchorencode_mgr Anchor_encoder_mgr__dflt_or_new(Xop_ctx calling_ctx) {
|
||||
// lazy-instantiate anchor_encoder_mgr
|
||||
if (anchor_encoder_mgr == null) anchor_encoder_mgr = new Pfunc_anchorencode_mgr(wiki);
|
||||
|
||||
// default to member instance
|
||||
Pfunc_anchorencode_mgr rv = anchor_encoder_mgr;
|
||||
// if used, create a new one; only occurs if {{anchorencode}} is nested
|
||||
if (rv.Used()) rv = new Pfunc_anchorencode_mgr(wiki);
|
||||
rv.Used_(Bool_.Y);
|
||||
return rv;
|
||||
} private Pfunc_anchorencode_mgr anchor_encoder_mgr;
|
||||
public void Init_by_wiki() {
|
||||
math__core.Init_by_wiki(wiki);
|
||||
hdr__section_editable__mgr.Init_by_wiki(wiki);
|
||||
}
|
||||
public void Parse(Xoae_page page, boolean clear) { // main parse method; should never be called nested
|
||||
// init
|
||||
if (!Env_.Mode_testing()) wiki.Init_assert(); // needed for html_server?
|
||||
tmpl_stack_ary = Bry_.Ary_empty;
|
||||
tmpl_stack_ary_len = tmpl_stack_ary_max = 0;
|
||||
uniq_mgr.Clear();
|
||||
|
||||
scrib.When_page_changed(page); // notify scribunto about page changed
|
||||
ctx.Page_(page);
|
||||
Xop_root_tkn root = ctx.Tkn_mkr().Root(page.Db().Text().Text_bry());
|
||||
if (clear) {page.Clear_all();}
|
||||
Xoa_ttl ttl = page.Ttl();
|
||||
if ( Xow_page_tid.Identify(wiki.Domain_tid(), ttl.Ns().Id(), ttl.Page_db()) == Xow_page_tid.Tid_wikitext) { // only parse page if wikitext; skip .js, .css, Module; DATE:2013-11-10
|
||||
byte[] data_raw = page.Db().Text().Text_bry();
|
||||
parser.Parse_text_to_wdom(root, ctx, tkn_mkr, data_raw , Xop_parser_.Doc_bgn_bos);
|
||||
}
|
||||
page.Root_(root);
|
||||
root.Data_htm_(root.Root_src());
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_amp_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_amp;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Amp, this);}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
return ctx.Amp().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);
|
||||
}
|
||||
public static final Xop_amp_lxr Instance = new Xop_amp_lxr();
|
||||
}
|
||||
@@ -1,157 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.langs.htmls.entitys.*;
|
||||
public class Xop_amp_mgr { // TS
|
||||
private static final Btrie_rv trv = new Btrie_rv();
|
||||
public Btrie_slim_mgr Amp_trie() {return amp_trie;} private final Btrie_slim_mgr amp_trie = Gfh_entity_trie.Instance;
|
||||
public Xop_amp_mgr_rslt Parse_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, int amp_pos, int bgn) {
|
||||
int fail_pos = amp_pos + 1; // default to fail pos which is after &
|
||||
|
||||
// check amp_trie; EX: 'lt'
|
||||
Xop_amp_mgr_rslt rv = new Xop_amp_mgr_rslt();
|
||||
Gfh_entity_itm itm; int cur;
|
||||
synchronized (trv) {
|
||||
itm = (Gfh_entity_itm)amp_trie.Match_at(trv, src, bgn, src_len);
|
||||
cur = trv.Pos();
|
||||
}
|
||||
|
||||
if (itm == null) {
|
||||
rv.Pass_n_(fail_pos);
|
||||
return rv;
|
||||
}
|
||||
|
||||
// check itm
|
||||
switch (itm.Tid()) {
|
||||
// letters; EX: '<'
|
||||
case Gfh_entity_itm.Tid_name_std:
|
||||
case Gfh_entity_itm.Tid_name_xowa:
|
||||
rv.Pos_(cur);
|
||||
rv.Tkn_(tkn_mkr.Amp_txt(amp_pos, cur, itm));
|
||||
return rv;
|
||||
// numbers; EX: '{' 'ģ'
|
||||
case Gfh_entity_itm.Tid_num_hex:
|
||||
case Gfh_entity_itm.Tid_num_dec:
|
||||
boolean ncr_is_hex = itm.Tid() == Gfh_entity_itm.Tid_num_hex;
|
||||
boolean pass = Parse_ncr(rv, ncr_is_hex, src, src_len, amp_pos, cur);
|
||||
if (pass) { // NOTE: do not set rv.Pos_(); will be set by Parse_ncr
|
||||
rv.Tkn_(tkn_mkr.Amp_num(amp_pos, rv.Pos(), rv.Val()));
|
||||
return rv;
|
||||
}
|
||||
else {
|
||||
rv.Pass_n_(fail_pos);
|
||||
return rv;
|
||||
}
|
||||
default: throw Err_.new_unhandled_default(itm.Tid());
|
||||
}
|
||||
}
|
||||
public boolean Parse_ncr(Xop_amp_mgr_rslt rv, boolean ncr_is_hex, byte[] src, int src_len, int amp_pos, int num_bgn) {
|
||||
int fail_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
|
||||
// find semic; fail if none found
|
||||
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, num_bgn, src_len);
|
||||
if (semic_pos == Bry_find_.Not_found) return rv.Pass_n_(fail_pos);
|
||||
int num_end = semic_pos - 1; // num_end = pos before semicolon
|
||||
|
||||
// calc amp_val; EX: Σ -> 931; Σ -> 931;
|
||||
int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
|
||||
for (int i = num_end; i >= num_bgn; i--) {
|
||||
byte b = src[i];
|
||||
if (ncr_is_hex) {
|
||||
if (b >= 48 && b <= 57) cur = b - 48;
|
||||
else if (b >= 65 && b <= 70) cur = b - 55;
|
||||
else if (b >= 97 && b <= 102) cur = b - 87;
|
||||
else if((b >= 71 && b <= 90)
|
||||
|| (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
|
||||
else return rv.Pass_n_(fail_pos);
|
||||
}
|
||||
else {
|
||||
cur = b - Byte_ascii.Num_0;
|
||||
if (cur < 0 || cur > 10) return rv.Pass_n_(fail_pos);
|
||||
}
|
||||
val += cur * factor;
|
||||
if (val > gplx.core.intls.Utf8_.Codepoint_max) return rv.Pass_n_(fail_pos); // fail if value > largest_unicode_codepoint
|
||||
factor *= multiple;
|
||||
}
|
||||
return rv.Pass_y_(semic_pos + 1, val); // +1 to position after semic
|
||||
}
|
||||
public byte[] Decode_as_bry(byte[] src) {
|
||||
if (src == null) return src;
|
||||
boolean dirty = false;
|
||||
int end = src.length;
|
||||
int pos = 0;
|
||||
Xop_amp_mgr_rslt amp_rv = null;
|
||||
Bry_bfr bfr = null;
|
||||
Btrie_rv trv = null;
|
||||
|
||||
// scan for &
|
||||
while (pos < end) {
|
||||
byte b = src[pos];
|
||||
if (b == Byte_ascii.Amp) { // & found
|
||||
int nxt_pos = pos + 1;
|
||||
if (nxt_pos < end) { // check & is not eos
|
||||
byte nxt_b = src[nxt_pos];
|
||||
|
||||
if (trv == null) trv = new Btrie_rv();
|
||||
Object amp_obj = amp_trie.Match_at_w_b0(trv, nxt_b, src, nxt_pos, end);
|
||||
int amp_pos = trv.Pos();
|
||||
|
||||
if (amp_obj != null) {
|
||||
if (!dirty) { // 1st amp found; add preceding String to bfr
|
||||
if (bfr == null) {
|
||||
bfr = Bry_bfr_.Get();
|
||||
dirty = true;
|
||||
}
|
||||
bfr.Add_mid(src, 0, pos);
|
||||
}
|
||||
Gfh_entity_itm amp_itm = (Gfh_entity_itm)amp_obj;
|
||||
switch (amp_itm.Tid()) {
|
||||
case Gfh_entity_itm.Tid_name_std:
|
||||
case Gfh_entity_itm.Tid_name_xowa:
|
||||
bfr.Add(amp_itm.U8_bry());
|
||||
pos = amp_pos;
|
||||
break;
|
||||
case Gfh_entity_itm.Tid_num_hex:
|
||||
case Gfh_entity_itm.Tid_num_dec:
|
||||
boolean ncr_is_hex = amp_itm.Tid() == Gfh_entity_itm.Tid_num_hex;
|
||||
int int_bgn = amp_pos;
|
||||
if (amp_rv == null)
|
||||
amp_rv = new Xop_amp_mgr_rslt();
|
||||
boolean pass = Parse_ncr(amp_rv, ncr_is_hex, src, end, pos, int_bgn);
|
||||
if (pass)
|
||||
bfr.Add_u8_int(amp_rv.Val());
|
||||
else
|
||||
bfr.Add_mid(src, pos, nxt_pos);
|
||||
pos = amp_rv.Pos();
|
||||
break;
|
||||
default:
|
||||
throw Err_.new_unhandled_default(amp_itm.Tid());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (dirty)
|
||||
bfr.Add_byte(b);
|
||||
++pos;
|
||||
}
|
||||
return dirty ? bfr.To_bry_and_clear_and_rls() : src;
|
||||
}
|
||||
public static final Xop_amp_mgr Instance = new Xop_amp_mgr(); Xop_amp_mgr() {}
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xop_amp_mgr__decode__tst {
|
||||
@Before public void init() {} private final Xop_amp_mgr_fxt fxt = new Xop_amp_mgr_fxt();
|
||||
@Test public void Text() {fxt.Test__decode_as_bry("a" , "a");}
|
||||
@Test public void Name() {fxt.Test__decode_as_bry("&" , "&");}
|
||||
@Test public void Name_w_text() {fxt.Test__decode_as_bry("a&b" , "a&b");}
|
||||
@Test public void Name_fail_semic_missing() {fxt.Test__decode_as_bry("a&b" , "a&b");}
|
||||
@Test public void Name_fail_amp_only() {fxt.Test__decode_as_bry("a&" , "a&");}
|
||||
@Test public void Num_fail() {fxt.Test__decode_as_bry("&#!;" , "&#!;");} // ! is not valid num
|
||||
@Test public void Hex_fail() {fxt.Test__decode_as_bry("&#x!;" , "&#x!;");} // ! is not valid hex
|
||||
@Test public void Num_basic() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Num_zero_padded() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_upper() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_lower() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_zero_padded() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_upper_x() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Num_fail_large_codepoint() {fxt.Test__decode_as_bry("�" , "�");}
|
||||
@Test public void Num_ignore_extra_x() {fxt.Test__decode_as_bry("&#xx26D0;" , Char_.To_str(Char_.By_int(9936)));} // 2nd x is ignored
|
||||
}
|
||||
class Xop_amp_mgr_fxt {
|
||||
private final Xop_amp_mgr amp_mgr = Xop_amp_mgr.Instance;
|
||||
public void Test__decode_as_bry(String raw, String expd) {
|
||||
Gftest.Eq__str(expd, String_.new_u8(amp_mgr.Decode_as_bry(Bry_.new_u8(raw))));
|
||||
}
|
||||
public void Test__parse_tkn__ent(String raw, String expd) {
|
||||
Xop_amp_mgr_rslt rv = Exec__parse_tkn(raw);
|
||||
Xop_amp_tkn_ent tkn = (Xop_amp_tkn_ent)rv.Tkn();
|
||||
Gftest.Eq__byte(Xop_tkn_itm_.Tid_html_ref, tkn.Tkn_tid());
|
||||
Gftest.Eq__str(expd, tkn.Xml_name_bry());
|
||||
}
|
||||
public void Test__parse_tkn__ncr(String raw, int expd) {
|
||||
Xop_amp_mgr_rslt rv = Exec__parse_tkn(raw);
|
||||
Xop_amp_tkn_num tkn = (Xop_amp_tkn_num)rv.Tkn();
|
||||
Gftest.Eq__byte(Xop_tkn_itm_.Tid_html_ncr, tkn.Tkn_tid());
|
||||
Gftest.Eq__int(expd, tkn.Val());
|
||||
}
|
||||
public void Test__parse_tkn__txt(String raw, int expd) {
|
||||
Xop_amp_mgr_rslt rv = Exec__parse_tkn(raw);
|
||||
Gftest.Eq__null(Bool_.Y, rv.Tkn());
|
||||
Gftest.Eq__int(expd, rv.Pos());
|
||||
}
|
||||
private Xop_amp_mgr_rslt Exec__parse_tkn(String raw) {
|
||||
byte[] src = Bry_.new_u8(raw);
|
||||
return amp_mgr.Parse_tkn(new Xop_tkn_mkr(), src, src.length, 0, 1);
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xop_amp_mgr__parse_tkn__tst {
|
||||
@Before public void init() {} private final Xop_amp_mgr_fxt fxt = new Xop_amp_mgr_fxt();
|
||||
@Test public void Ent() {fxt.Test__parse_tkn__ent("&" , "&");} // check for html_ref
|
||||
@Test public void Ent__fail() {fxt.Test__parse_tkn__txt("&nil;" , 1);}
|
||||
@Test public void Num__nex() {fxt.Test__parse_tkn__ncr("Σ" , 931);} // check for html_ncr; Σ: http://en.wikipedia.org/wiki/Numeric_character_reference
|
||||
@Test public void Num__dec() {fxt.Test__parse_tkn__ncr("Σ" , 931);}
|
||||
@Test public void Num__fail() {fxt.Test__parse_tkn__txt("&#" , 1);}
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_amp_mgr_rslt {
|
||||
public Xop_amp_mgr_rslt(int pos, int val, Xop_tkn_itm tkn) {
|
||||
this.pos = pos;
|
||||
this.val = val;
|
||||
this.tkn = tkn;
|
||||
}
|
||||
public Xop_amp_mgr_rslt() {}
|
||||
public boolean Pass() {return pass;} private boolean pass; public void Valid_(boolean v) {this.pass = v;}
|
||||
public int Pos() {return pos;} private int pos; public void Pos_(int v) {this.pos = v;}
|
||||
public int Val() {return val;} private int val; public void Val_(int v) {this.val = v;}
|
||||
public Xop_tkn_itm Tkn() {return tkn;} private Xop_tkn_itm tkn; public void Tkn_(Xop_tkn_itm v) {this.tkn = v;}
|
||||
public boolean Pass_y_(int pos, int val) {
|
||||
this.pos = pos; this.val = val;
|
||||
this.pass = true;
|
||||
return true;
|
||||
}
|
||||
public boolean Pass_n_(int pos) {
|
||||
this.pass = false;
|
||||
this.pos = pos;
|
||||
this.val = -1;
|
||||
this.tkn = null;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.langs.htmls.entitys.*;
|
||||
public class Xop_amp_tkn_ent extends Xop_tkn_itm_base {
|
||||
private Gfh_entity_itm html_ref_itm;
|
||||
public Xop_amp_tkn_ent(int bgn, int end, Gfh_entity_itm html_ref_itm) {
|
||||
this.html_ref_itm = html_ref_itm;
|
||||
this.Tkn_ini_pos(false, bgn, end);
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_html_ref;}
|
||||
public int Char_int() {return html_ref_itm.Char_int();}
|
||||
public byte[] Xml_name_bry() {return html_ref_itm.Xml_name_bry();}
|
||||
public boolean Itm_is_custom() {return html_ref_itm.Tid() == Gfh_entity_itm.Tid_name_xowa;}
|
||||
public void Print_ncr(Bry_bfr bfr) {html_ref_itm.Print_ncr(bfr);}
|
||||
public void Print_literal(Bry_bfr bfr) {html_ref_itm.Print_literal(bfr);}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_amp_tkn_num extends Xop_tkn_itm_base {
|
||||
public Xop_amp_tkn_num(int bgn, int end, int val, byte[] str_as_bry) {
|
||||
this.val = val; this.str_as_bry = str_as_bry;
|
||||
this.Tkn_ini_pos(false, bgn, end);
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_html_ncr;}
|
||||
public int Val() {return val;} private int val;
|
||||
public byte[] Str_as_bry() {return str_as_bry;} private byte[] str_as_bry;
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_amp_wkr implements Xop_ctx_wkr {
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn, int cur) {
|
||||
if (cur == src_len) return ctx.Lxr_make_txt_(cur); // NOTE: & is last char in page; strange and rare, but don't raise error
|
||||
|
||||
Xop_amp_mgr amp_mgr = ctx.App().Parser_amp_mgr();
|
||||
Xop_amp_mgr_rslt amp_rv = amp_mgr.Parse_tkn(tkn_mkr, src, src_len, bgn, cur);
|
||||
Xop_tkn_itm amp_tkn = amp_rv.Tkn();
|
||||
int rv_pos = amp_rv.Pos();
|
||||
if (amp_tkn == null) return ctx.Lxr_make_txt_(rv_pos);
|
||||
ctx.Subs_add(root, amp_tkn);
|
||||
return rv_pos;
|
||||
}
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_amp_wkr_tst {
|
||||
private final Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Convert_to_named() {fxt.Test_parse_page_wiki_str("&" , "&");} // note that & is printed, not &
|
||||
@Test public void Convert_to_named_amp() {fxt.Test_parse_page_wiki_str("&" , "&");} // PURPOSE: html_wtr was not handling & only
|
||||
@Test public void Convert_to_numeric() {fxt.Test_parse_page_wiki_str("á" , "á");} // testing that á is outputted, not á
|
||||
@Test public void Defect_bad_code_fails() { // PURPOSE: early rewrite of Xop_amp_mgr caused Xoh_html_wtr_escaper to fail with array out of bounds error; EX:w:Czech_Republic; DATE:2014-05-11
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "[[File:A.png|alt=<p> </p>]]" // basically checks amp parsing inside xnde inside lnki's alt (which uses different parsing code
|
||||
, "<a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xoimg_0\" alt=\" \" src=\"file:///mem/wiki/repo/trg/orig/7/0/A.png\" width=\"0\" height=\"0\" /></a>"
|
||||
);
|
||||
}
|
||||
@Test public void Ignore_ncr() { // PURPOSE: check that ncr is unescaped; PAGE:de.w:Cross-Site-Scripting; DATE:2014-07-23
|
||||
fxt.Test_parse_page_all_str
|
||||
( "a <code><iframe></code>) b"
|
||||
, "a <code><iframe></code>) b" // < should not become <
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,81 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_apos_dat {
|
||||
public int State() {return state;} public void State_clear() {state = Xop_apos_tkn_.State_nil;} private int state = Xop_apos_tkn_.State_nil;
|
||||
public int Typ() {return typ;} private int typ;
|
||||
public int Cmd() {return cmd;} private int cmd;
|
||||
public int Lit_apos() {return lit_apos;} private int lit_apos;
|
||||
public int Dual_cmd() {return dual_cmd;} private int dual_cmd;
|
||||
public void Ident(Xop_ctx ctx, byte[] src, int apos_len, int cur_pos) {
|
||||
typ = cmd = lit_apos = dual_cmd = 0;
|
||||
switch (apos_len) {
|
||||
case Xop_apos_tkn_.Len_ital: case Xop_apos_tkn_.Len_bold: case Xop_apos_tkn_.Len_dual:
|
||||
Ident_props(apos_len); break;
|
||||
case Xop_apos_tkn_.Len_apos_bold:
|
||||
lit_apos = 1;
|
||||
Ident_props(Xop_apos_tkn_.Len_bold); break;
|
||||
default:
|
||||
lit_apos = apos_len - Xop_apos_tkn_.Len_dual;
|
||||
Ident_props(Xop_apos_tkn_.Len_dual);
|
||||
break;
|
||||
}
|
||||
}
|
||||
private void Ident_props(int apos_len) {
|
||||
typ = apos_len;
|
||||
switch (apos_len) {
|
||||
case Xop_apos_tkn_.Len_ital: {
|
||||
switch (state) {
|
||||
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_b; break;
|
||||
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_bi_end__b_bgn; state = Xop_apos_tkn_.State_b; break;
|
||||
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_b; dual_cmd = Xop_apos_tkn_.Cmd_bi_bgn; break;
|
||||
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_i_bgn; state = Xop_apos_tkn_.State_bi; break;
|
||||
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_i_bgn; state = Xop_apos_tkn_.State_i; break;
|
||||
default: throw Err_.new_unhandled(state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Xop_apos_tkn_.Len_bold: {
|
||||
switch (state) {
|
||||
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_ib_end__i_bgn; state = Xop_apos_tkn_.State_i; break;
|
||||
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_i; break;
|
||||
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_i; break; // NOTE: dual_cmd = Cmd_ib_bgn is implied
|
||||
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_b_bgn; state = Xop_apos_tkn_.State_ib; break;
|
||||
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_b_bgn; state = Xop_apos_tkn_.State_b; break;
|
||||
default: throw Err_.new_unhandled(state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Xop_apos_tkn_.Len_dual: {
|
||||
switch (state) {
|
||||
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_b_end__i_bgn; state = Xop_apos_tkn_.State_i; break;
|
||||
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_i_end__b_bgn; state = Xop_apos_tkn_.State_b; break;
|
||||
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_ib_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_bi_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_bi_end; state = Xop_apos_tkn_.State_nil; break; // NOTE: dual_cmd = Cmd_ib_bgn is implied
|
||||
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_ib_bgn; state = Xop_apos_tkn_.State_dual; break;
|
||||
default: throw Err_.new_unhandled(state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: throw Err_.new_unhandled(apos_len);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,84 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_apos_itm {
|
||||
public int State() {return state;} public void State_clear() {state = Xop_apos_tkn_.State_nil;} private int state = Xop_apos_tkn_.State_nil;
|
||||
public int Typ() {return typ;} private int typ;
|
||||
public int Cmd() {return cmd;} private int cmd;
|
||||
public int Lit_apos() {return lit_apos;} private int lit_apos;
|
||||
public int Dual_cmd() {return dual_cmd;} private int dual_cmd;
|
||||
public void Init(int state, int typ, int cmd, int lit_apos, int dual_cmd) {
|
||||
this.state = state;
|
||||
this.typ = typ; this.cmd = cmd; this.lit_apos = lit_apos; this.dual_cmd = dual_cmd;
|
||||
}
|
||||
public static void Ident(Xop_apos_itm rv, Xop_ctx ctx, byte[] src, int apos_len, int cur_pos, int state) {
|
||||
switch (apos_len) {
|
||||
case Xop_apos_tkn_.Len_ital: case Xop_apos_tkn_.Len_bold: case Xop_apos_tkn_.Len_dual:
|
||||
Ident_props(rv, state, apos_len, 0); break;
|
||||
case Xop_apos_tkn_.Len_apos_bold:
|
||||
Ident_props(rv, state, Xop_apos_tkn_.Len_bold, 1); break;
|
||||
default:
|
||||
Ident_props(rv, state, Xop_apos_tkn_.Len_dual, apos_len - Xop_apos_tkn_.Len_dual);
|
||||
break;
|
||||
}
|
||||
}
|
||||
private static void Ident_props(Xop_apos_itm rv, int state, int apos_len, int lit_apos) {
|
||||
int typ = apos_len;
|
||||
int cmd = 0, dual_cmd = 0;
|
||||
switch (apos_len) {
|
||||
case Xop_apos_tkn_.Len_ital: {
|
||||
switch (state) {
|
||||
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_b; break;
|
||||
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_bi_end__b_bgn; state = Xop_apos_tkn_.State_b; break;
|
||||
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_b; dual_cmd = Xop_apos_tkn_.Cmd_bi_bgn; break;
|
||||
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_i_bgn; state = Xop_apos_tkn_.State_bi; break;
|
||||
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_i_bgn; state = Xop_apos_tkn_.State_i; break;
|
||||
default: throw Err_.new_unhandled(state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Xop_apos_tkn_.Len_bold: {
|
||||
switch (state) {
|
||||
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_ib_end__i_bgn; state = Xop_apos_tkn_.State_i; break;
|
||||
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_i; break;
|
||||
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_i; break; // NOTE: dual_cmd = Cmd_ib_bgn is implied
|
||||
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_b_bgn; state = Xop_apos_tkn_.State_ib; break;
|
||||
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_b_bgn; state = Xop_apos_tkn_.State_b; break;
|
||||
default: throw Err_.new_unhandled(state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Xop_apos_tkn_.Len_dual: {
|
||||
switch (state) {
|
||||
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_b_end__i_bgn; state = Xop_apos_tkn_.State_i; break;
|
||||
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_i_end__b_bgn; state = Xop_apos_tkn_.State_b; break;
|
||||
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_ib_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_bi_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_bi_end; state = Xop_apos_tkn_.State_nil; break; // NOTE: dual_cmd = Cmd_ib_bgn is implied
|
||||
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_ib_bgn; state = Xop_apos_tkn_.State_dual; break;
|
||||
default: throw Err_.new_unhandled(state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: throw Err_.new_unhandled_default(apos_len);
|
||||
}
|
||||
rv.Init(state, typ, cmd, lit_apos, dual_cmd);
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_apos_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_apos;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Apos_ary, this);} private static final byte[] Apos_ary = new byte[] {Byte_ascii.Apos, Byte_ascii.Apos};
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Apos().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_apos_lxr Instance = new Xop_apos_lxr(); Xop_apos_lxr() {}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_apos_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_apos_tkn(int bgn, int end, int apos_len, int apos_tid, int apos_cmd, int apos_lit) {
|
||||
this.apos_len = apos_len; this.apos_tid = apos_tid; this.apos_cmd = apos_cmd; this.apos_lit = apos_lit;
|
||||
this.Tkn_ini_pos(false, bgn, end);
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_apos;}
|
||||
public int Apos_len() {return apos_len;} private int apos_len;
|
||||
public int Apos_lit() {return apos_lit;} public Xop_apos_tkn Apos_lit_(int v) {apos_lit = v; return this;} private int apos_lit;
|
||||
public int Apos_tid() {return apos_tid;} public Xop_apos_tkn Apos_tid_(int v) {apos_tid = v; return this;} private int apos_tid;
|
||||
public int Apos_cmd() {return apos_cmd;} public Xop_apos_tkn Apos_cmd_(int v) {apos_cmd = v; return this;} private int apos_cmd;
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_apos_tkn_ {
|
||||
public static final int
|
||||
Cmd_nil = 0
|
||||
, Cmd_i_bgn = 1, Cmd_i_end = 2, Cmd_b_bgn = 3, Cmd_b_end = 4
|
||||
, Cmd_bi_bgn = 5, Cmd_ib_bgn = 6, Cmd_ib_end = 7, Cmd_bi_end = 8
|
||||
, Cmd_bi_end__b_bgn = 9, Cmd_ib_end__i_bgn = 10, Cmd_b_end__i_bgn = 11, Cmd_i_end__b_bgn = 12;
|
||||
public static final byte[][] Cmds
|
||||
= new byte[][]
|
||||
{ Bry_.new_a7("nil")
|
||||
, Bry_.new_a7("i+"), Bry_.new_a7("i-"), Bry_.new_a7("b+"), Bry_.new_a7("b-")
|
||||
, Bry_.new_a7("bi+"), Bry_.new_a7("ib+"), Bry_.new_a7("ib-"), Bry_.new_a7("bi-")
|
||||
, Bry_.new_a7("bi-b+"), Bry_.new_a7("ib-i+"), Bry_.new_a7("b-i+"), Bry_.new_a7("i-b+")
|
||||
};
|
||||
public static String Cmd_str(int id) {return String_.new_u8(Cmds[id]);}
|
||||
public static final int Len_ital = 2, Len_bold = 3, Len_dual = 5, Len_apos_bold = 4;
|
||||
public static final int Typ_ital = 2, Typ_bold = 3, Typ_dual = 5;
|
||||
public static final int State_nil = 0, State_i = 1, State_b = 2, State_bi = 3, State_ib = 4, State_dual = 5;
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.tests.*;
|
||||
public class Xop_apos_tkn_chkr extends Xop_tkn_chkr_base {
|
||||
@Override public Class<?> TypeOf() {return Xop_apos_tkn.class;}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_apos;}
|
||||
public int Apos_cmd() {return apos_cmd;} public Xop_apos_tkn_chkr Apos_cmd_(int v) {apos_cmd = v; return this;} private int apos_cmd = Xop_apos_tkn_.Cmd_nil;
|
||||
public int Apos_lit() {return apos_lit;} public Xop_apos_tkn_chkr Apos_lit_(int v) {apos_lit = v; return this;} private int apos_lit = -1;
|
||||
@Override public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {
|
||||
Xop_apos_tkn actl = (Xop_apos_tkn)actl_obj;
|
||||
err += mgr.Tst_val(apos_cmd == Xop_apos_tkn_.Cmd_nil, path, "apos_cmd", Xop_apos_tkn_.Cmd_str(apos_cmd), Xop_apos_tkn_.Cmd_str(actl.Apos_cmd()));
|
||||
err += mgr.Tst_val(apos_lit == -1, path, "apos_lit", apos_lit, actl.Apos_lit());
|
||||
return err;
|
||||
}
|
||||
}
|
||||
@@ -1,114 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_apos_wkr implements Xop_ctx_wkr {
|
||||
private final List_adp stack = List_adp_.New(); // stores all apos tkns for page; needed to recalc tkn type if apos are dangling
|
||||
private int bold_count, ital_count; private Xop_apos_tkn dual_tkn = null;
|
||||
private Xop_apos_dat dat = new Xop_apos_dat();
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {Clear();}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {
|
||||
this.End_frame(ctx, root, src, src_len, false);
|
||||
}
|
||||
public void AutoClose(Xop_ctx ctx, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {}
|
||||
public int Stack_len() {return stack.Len();}
|
||||
private void Clear() {
|
||||
bold_count = ital_count = 0;
|
||||
dual_tkn = null;
|
||||
stack.Clear();
|
||||
dat.State_clear();
|
||||
}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
cur_pos = Bry_find_.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Apos);
|
||||
int apos_len = cur_pos - bgn_pos;
|
||||
dat.Ident(ctx, src, apos_len, cur_pos);
|
||||
Xop_apos_tkn apos_tkn = tkn_mkr.Apos(bgn_pos, cur_pos, apos_len, dat.Typ(), dat.Cmd(), dat.Lit_apos());
|
||||
ctx.Subs_add(root, apos_tkn);
|
||||
ctx.Apos().Reg_tkn(apos_tkn, cur_pos); // NOTE: register in root ctx (main document)
|
||||
return cur_pos;
|
||||
}
|
||||
private void Reg_tkn(Xop_apos_tkn tkn, int cur_pos) { // REF.MW: Parser|doQuotes
|
||||
stack.Add(tkn);
|
||||
switch (tkn.Apos_tid()) {
|
||||
case Xop_apos_tkn_.Len_ital: ital_count++; break;
|
||||
case Xop_apos_tkn_.Len_bold: bold_count++; break;
|
||||
case Xop_apos_tkn_.Len_dual: //bold_count++; ital_count++; // NOTE: removed b/c of '''''a''b'' was trying to convert ''''' to bold
|
||||
dual_tkn = tkn;
|
||||
break;
|
||||
}
|
||||
if (dat.Dual_cmd() != 0) { // earlier dual tkn assumed to be <i><b>; </i> encountered so change dual to <b><i>
|
||||
if (dual_tkn == null) throw Err_.new_wo_type("dual tkn is null"); // should never happen
|
||||
dual_tkn.Apos_cmd_(dat.Dual_cmd());
|
||||
dual_tkn = null;
|
||||
}
|
||||
}
|
||||
public void End_frame(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int cur_pos, boolean skip_cancel_if_lnki_and_apos) {
|
||||
int state = dat.State();
|
||||
if (state == 0) {Clear(); return;} // all apos close correctly; nothing dangling; return;
|
||||
|
||||
if (bold_count % 2 == 1 && ital_count % 2 == 1) Convert_bold_to_ital(ctx, src, stack, dat);
|
||||
state = dat.State();
|
||||
if (state == 0) {Clear(); return;} // all apos close correctly after converting bold to italic; return;
|
||||
|
||||
int closeCmd = 0, closeTyp = 0;
|
||||
byte cur_tkn_tid = ctx.Cur_tkn_tid();
|
||||
if ( skip_cancel_if_lnki_and_apos // NOTE: if \n or tblw
|
||||
&& cur_tkn_tid == Xop_tkn_itm_.Tid_lnki // and cur scope is lnki
|
||||
)
|
||||
return; // don't end frame
|
||||
switch (state) {
|
||||
case Xop_apos_tkn_.State_i: closeTyp = Xop_apos_tkn_.Typ_ital; closeCmd = Xop_apos_tkn_.Cmd_i_end; break;
|
||||
case Xop_apos_tkn_.State_b: closeTyp = Xop_apos_tkn_.Typ_bold; closeCmd = Xop_apos_tkn_.Cmd_b_end; break;
|
||||
case Xop_apos_tkn_.State_dual:
|
||||
case Xop_apos_tkn_.State_ib: closeTyp = Xop_apos_tkn_.Typ_dual; closeCmd = Xop_apos_tkn_.Cmd_bi_end; break;
|
||||
case Xop_apos_tkn_.State_bi: closeTyp = Xop_apos_tkn_.Typ_dual; closeCmd = Xop_apos_tkn_.Cmd_ib_end; break;
|
||||
}
|
||||
ctx.Subs_add(root, ctx.Tkn_mkr().Apos(cur_pos, cur_pos, 0, closeTyp, closeCmd, 0));
|
||||
Clear();
|
||||
}
|
||||
private static void Convert_bold_to_ital(Xop_ctx ctx, byte[] src, List_adp stack, Xop_apos_dat dat) {
|
||||
Xop_apos_tkn idxNeg1 = null, idxNeg2 = null, idxNone = null; // look at previous tkn for spaces; EX: "a '''" -> idxNeg1; " a'''" -> idxNeg2; "ab'''" -> idxNone
|
||||
int len = stack.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xop_apos_tkn apos = (Xop_apos_tkn)stack.Get_at(i);
|
||||
if (apos.Apos_tid() != Xop_apos_tkn_.Typ_bold) continue; // only look for bold
|
||||
int tkn_bgn = apos.Src_bgn();
|
||||
boolean idxNeg1Space = tkn_bgn > 0 && src[tkn_bgn - 1] == Byte_ascii.Space;
|
||||
boolean idxNeg2Space = tkn_bgn > 1 && src[tkn_bgn - 2] == Byte_ascii.Space;
|
||||
if (idxNeg1 == null && idxNeg1Space) {idxNeg1 = apos;}
|
||||
else if (idxNeg2 == null && idxNeg2Space) {idxNeg2 = apos;}
|
||||
else if (idxNone == null && !idxNeg1Space && !idxNeg2Space) {idxNone = apos;}
|
||||
}
|
||||
if (idxNeg2 != null) Convert_bold_to_ital(ctx, src, idxNeg2); // 1st single letter word
|
||||
else if (idxNone != null) Convert_bold_to_ital(ctx, src, idxNone); // 1st multi letter word
|
||||
else if (idxNeg1 != null) Convert_bold_to_ital(ctx, src, idxNeg1); // everything else
|
||||
|
||||
// now recalc all cmds for stack
|
||||
dat.State_clear();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xop_apos_tkn apos = (Xop_apos_tkn)stack.Get_at(i);
|
||||
dat.Ident(ctx, src, apos.Apos_tid(), apos.Src_end()); // NOTE: apos.Typ() must map to apos_len
|
||||
int newCmd = dat.Cmd();
|
||||
if (newCmd == apos.Apos_cmd()) continue;
|
||||
apos.Apos_cmd_(newCmd);
|
||||
}
|
||||
}
|
||||
private static void Convert_bold_to_ital(Xop_ctx ctx, byte[] src, Xop_apos_tkn oldTkn) {
|
||||
oldTkn.Apos_tid_(Xop_apos_tkn_.Typ_ital).Apos_cmd_(Xop_apos_tkn_.Cmd_i_bgn).Apos_lit_(oldTkn.Apos_lit() + 1);// NOTE: Cmd_i_bgn may be overridden later
|
||||
}
|
||||
}
|
||||
@@ -1,153 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
import gplx.xowa.parsers.lists.*;
|
||||
public class Xop_apos_wkr_tst {
|
||||
private final Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test_parse_page_wiki("''a''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn) , fxt.tkn_txt_(2, 3), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
|
||||
fxt.Test_parse_page_wiki("'''a'''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn) , fxt.tkn_txt_(3, 4), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
|
||||
fxt.Test_parse_page_wiki("'''''a'''''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn) , fxt.tkn_txt_(5, 6), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_end));
|
||||
}
|
||||
@Test public void Advanced() {
|
||||
fxt.Test_parse_page_wiki("''''a''''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn).Apos_lit_(1) , fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end).Apos_lit_(1)); // 1 apos + bold
|
||||
fxt.Test_parse_page_wiki("''''''''a''''''''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn).Apos_lit_(3) , fxt.tkn_txt_(), fxt.tkn_apos_( Xop_apos_tkn_.Cmd_bi_end).Apos_lit_(3)); // 3 apos + dual
|
||||
}
|
||||
@Test public void Combo() {
|
||||
fxt.Test_parse_page_wiki("''a'''b'''c''", fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // b{i}
|
||||
fxt.Test_parse_page_wiki("'''a''b''c'''", fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)); // i{b}
|
||||
fxt.Test_parse_page_wiki("''a''b'''c'''", fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)); // b_i
|
||||
}
|
||||
@Test public void Assume_apos() {
|
||||
fxt.Test_parse_page_wiki("a01'''b01 '''c0 1'''d01''" // pick c0 1, b/c it is idxNeg2
|
||||
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
|
||||
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)
|
||||
, fxt.tkn_txt_(), fxt.tkn_space_(), fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn)
|
||||
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // idx_neg2
|
||||
fxt.Test_parse_page_wiki("a01 '''b01 '''c01'''d01''" // pick c01, b/c it is idxNone
|
||||
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
|
||||
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)
|
||||
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn)
|
||||
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // idx_none
|
||||
fxt.Test_parse_page_wiki("a01 '''b01 '''c01 '''d01''" // pick a01 , b/c it is idxNeg1
|
||||
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn)
|
||||
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
|
||||
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)
|
||||
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // idx_neg1
|
||||
fxt.Test_parse_page_wiki("a''''b''" // strange outlier condition
|
||||
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Apos_lit_(2)
|
||||
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // 4 apos -> 2 apos + ital
|
||||
}
|
||||
@Test public void Dual() {
|
||||
fxt.Test_parse_page_wiki("'''''a'''b''" // +ib -b -i; 5apos defaults to ib
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
|
||||
fxt.Test_parse_page_wiki("'''''a''b'''" // +bi -i -b; change 5apos to bi
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
|
||||
fxt.Test_parse_page_wiki("''b'''''c'''" // 5q toggles ital n, bold y
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end__b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
|
||||
}
|
||||
@Test public void Unclosed() {
|
||||
fxt.Test_parse_page_wiki("''a"
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
|
||||
fxt.Test_parse_page_wiki("'''a"
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
|
||||
fxt.Test_parse_page_wiki("'''''a"
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_end));
|
||||
}
|
||||
@Test public void Outliers() {
|
||||
fxt.Test_parse_page_wiki("''a'''b'''c'''" // '''b -> ' +i b
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Apos_lit_(1)
|
||||
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
|
||||
fxt.Test_parse_page_wiki("''a'''b''c''" // '''b -> ' +i b; double check with closing itals
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Apos_lit_(1)
|
||||
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
|
||||
fxt.Test_parse_page_wiki("''a'''b''c" // ''c -> -bi + b
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
|
||||
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_end__b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
|
||||
}
|
||||
@Test public void MultiLines() {
|
||||
fxt.Test_parse_page_wiki("a''b\nc''d"
|
||||
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(3, 4), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_nl_char_len1_(4)
|
||||
, fxt.tkn_txt_(5, 6), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
|
||||
}
|
||||
@Test public void Lnki() {
|
||||
fxt.Test_parse_page_wiki_str("[[''a''']]", "<a href=\"/wiki/%27%27a%27%27%27\">''a'''</a>");
|
||||
}
|
||||
@Test public void Dual_exceptions() {
|
||||
fxt.Test_parse_page_wiki("'''''a''b''"
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_end)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_list_autoClose() {
|
||||
fxt.Test_parse_page_wiki("''a\n*b"
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Src_rng_(0, 2)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Src_rng_(3, 3)
|
||||
, fxt.tkn_list_bgn_(3, 5, Xop_list_tkn_.List_itmTyp_ul)
|
||||
, fxt.tkn_txt_(5, 6)
|
||||
, fxt.tkn_list_end_(6)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_hr_autoClose() {
|
||||
fxt.Test_parse_page_wiki("''a\n----"
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Src_rng_(0, 2)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Src_rng_(3, 3)
|
||||
, fxt.tkn_para_blank_(3)
|
||||
, fxt.tkn_hr_(3, 8)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_hdr_autoClose() {
|
||||
fxt.Test_parse_page_wiki_str("''a\n==b==", "<i>a</i>\n\n<h2>b</h2>");
|
||||
}
|
||||
@Test public void Apos_broken_by_tblw_th() { // DATE:2013-04-24
|
||||
fxt.Test_parse_page_all_str("A ''[[b!!]]'' c", "A <i><a href=\"/wiki/B!!\">b!!</a></i> c");
|
||||
}
|
||||
@Test public void Nowiki() { // PAGE:en.w:Wiki; DATE:2013-05-13
|
||||
fxt.Test_parse_page_all_str("<nowiki>''a''</nowiki>", "''a''");
|
||||
}
|
||||
@Test public void Lnki_multi_line() { // PURPOSE: handle apos within multi-line lnki caption; DATE:2013-11-10
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "[[A|b '' c"
|
||||
, "d '' e ]]"
|
||||
)
|
||||
, "<a href=\"/wiki/A\">b <i> c d </i> e</a>"); // NOTE: c d should be italicized, not c e (latter occurs when apos is ended on each line)
|
||||
}
|
||||
@Test public void French() { // PURPOSE: L'''A'' -> L'<i>A</i>; DATE:2014-01-06
|
||||
fxt.Test_parse_page_all_str("L''''A'''", "L'<b>A</b>");
|
||||
fxt.Test_parse_page_all_str("L'''A''", "L'<i>A</i>");
|
||||
}
|
||||
// @Test public void Mix_lnke() { // FUTURE: requires rewrite of apos
|
||||
// fxt.Test_parse_page_wiki("''a[irc://b c''d''e]f''"
|
||||
// , fxt.tkn_apos_(0, 2, Xop_apos_tkn_.Cmd_i_bgn)
|
||||
// , fxt.tkn_txt_(2, 3)
|
||||
// , fxt.tkn_lnke_(3, 20).Subs_add_ary
|
||||
// ( fxt.tkn_txt_(12, 13)
|
||||
// , fxt.tkn_apos_(13, 15, Xop_apos_tkn_.Cmd_i_bgn)
|
||||
// , fxt.tkn_txt_(15, 16)
|
||||
// , fxt.tkn_apos_(16, 18, Xop_apos_tkn_.Cmd_i_end)
|
||||
// , fxt.tkn_txt_(18, 19)
|
||||
// )
|
||||
// , fxt.tkn_txt_(20, 21)
|
||||
// , fxt.tkn_apos_(21, 23, Xop_apos_tkn_.Cmd_i_bgn)
|
||||
// );
|
||||
// }
|
||||
}
|
||||
/*
|
||||
*/
|
||||
@@ -1,28 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.log_msgs.*;
|
||||
public class Xop_hdr_log {
|
||||
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "hdr");
|
||||
public static final Gfo_msg_itm
|
||||
Dangling_hdr = Gfo_msg_itm_.new_warn_(owner, "dangling_hdr")
|
||||
, Mismatched = Gfo_msg_itm_.new_warn_(owner, "mismatched")
|
||||
, Len_1 = Gfo_msg_itm_.new_warn_(owner, "len_1")
|
||||
, Len_7_or_more = Gfo_msg_itm_.new_warn_(owner, "len_7_or_more")
|
||||
;
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_hdr_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_hdr;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Hook_bgn, this);} static final byte[] Hook_bgn = new byte[] {Byte_ascii.Nl, Byte_ascii.Eq};
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Hdr().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_hdr_lxr Instance = new Xop_hdr_lxr(); Xop_hdr_lxr() {}
|
||||
public static final byte Hook = Byte_ascii.Eq;
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_hdr_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_hdr_tkn(int bgn, int end, int num) {this.Tkn_ini_pos(false, bgn, end); this.num = num;}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_hdr;}
|
||||
public int Num() {return num;} private int num = -1; // EX: 2 for <h2>
|
||||
public int Manual_bgn() {return manual_bgn;} private int manual_bgn; // unbalanced count; EX: === A == -> 1
|
||||
public int Manual_end() {return manual_end;} private int manual_end; // unbalanced count; EX: == A === -> 1
|
||||
public boolean First_in_doc() {return first_in_doc;} private boolean first_in_doc; // true if 1st hdr in doc
|
||||
public void First_in_doc_y_() {first_in_doc = true;}
|
||||
public byte[] Section_editable_page() {return section_editable_page;} private byte[] section_editable_page; // EX: Earth as in 'href="/wiki/Earth"'
|
||||
public int Section_editable_idx() {return section_editable_idx;} private int section_editable_idx; // EX: 1 as in "section=1"
|
||||
|
||||
public void Init_by_parse(int num, int manual_bgn, int manual_end) {
|
||||
this.num = num;
|
||||
this.manual_bgn = manual_bgn;
|
||||
this.manual_end = manual_end;
|
||||
}
|
||||
public void Section_editable_(byte[] section_editable_page, int section_editable_idx) {
|
||||
this.section_editable_page = section_editable_page;
|
||||
this.section_editable_idx = section_editable_idx;
|
||||
}
|
||||
|
||||
public static final Xop_hdr_tkn[] Ary_empty = new Xop_hdr_tkn[0];
|
||||
}
|
||||
@@ -1,133 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
import gplx.xowa.parsers.hdrs.sections.*;
|
||||
public class Xop_hdr_wkr implements Xop_ctx_wkr {
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
|
||||
public void AutoClose(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {
|
||||
// bgn never closed; mark inert; EX: "==a"
|
||||
Xop_hdr_tkn bgn = (Xop_hdr_tkn)tkn;
|
||||
int bgn_hdr_len = bgn.Num();
|
||||
bgn.Init_by_parse(0, bgn_hdr_len, 0);
|
||||
if (bgn_hdr_len > 1 && ctx.Parse_tid() == Xop_parser_tid_.Tid__wtxt) // NOTE: \n= is not uncommon for templates; ignore them;
|
||||
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Dangling_hdr, src, bgn.Src_bgn(), bgn_pos);
|
||||
}
|
||||
public int Make_tkn_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_bos) bgn_pos = 0; // do not allow -1 pos
|
||||
ctx.Apos().End_frame(ctx, root, src, bgn_pos, false);
|
||||
Close_open_itms(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);
|
||||
ctx.Para().Process_block__bgn__nl_w_symbol(ctx, root, src, bgn_pos, cur_pos, Xop_xnde_tag_.Tag__h2); // pass h2; should pass h# where # is correct #, but for purpose of Para_wkr, <h2> tag does not matter
|
||||
int new_pos = Bry_find_.Find_fwd_while(src, cur_pos, src_len, Xop_hdr_lxr.Hook); // count all =
|
||||
int hdr_len = new_pos - cur_pos + 1; // +1 b/c Hook has 1 eq: "\n="
|
||||
switch (hdr_len) {
|
||||
case 1: ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_1, src, bgn_pos, new_pos); break; // <h1>; flag
|
||||
case 2: case 3: case 4: case 5: case 6: break; // <h2>-<h6>: normal
|
||||
default: ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_7_or_more, src, bgn_pos, new_pos); break; // <h7>+; limit to 6; flag; NOTE: only 14 pages in 2011-07-27
|
||||
}
|
||||
|
||||
Xop_hdr_tkn tkn = tkn_mkr.Hdr(bgn_pos, new_pos, hdr_len); // make tkn
|
||||
ctx.StackTkn_add(root, tkn);
|
||||
return new_pos;
|
||||
}
|
||||
public int Make_tkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int stackPos, int end_hdr_len) {// REF.MW: Parser|doHeadings
|
||||
if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_tmpl_curly_bgn) return ctx.Lxr_make_txt_(cur_pos);
|
||||
|
||||
// end frame
|
||||
Xop_hdr_tkn hdr = (Xop_hdr_tkn)ctx.Stack_pop_til(root, src, stackPos, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_hdr);
|
||||
ctx.Apos().End_frame(ctx, root, src, bgn_pos, false); // end any apos; EX: ==''a==
|
||||
|
||||
// handle asymmetrical "="; EX: "== A ==="
|
||||
int hdr_len = hdr.Num(), bgn_manual = 0, end_manual = 0;
|
||||
boolean dirty = false;
|
||||
if (end_hdr_len < hdr_len) { // mismatch: end has more; adjust hdr
|
||||
bgn_manual = hdr_len - end_hdr_len;
|
||||
hdr_len = end_hdr_len;
|
||||
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Mismatched, src, bgn_pos, cur_pos);
|
||||
if (hdr_len == 1) ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_1, src, bgn_pos, cur_pos);
|
||||
dirty = true;
|
||||
}
|
||||
else if (end_hdr_len > hdr_len) { // mismatch: hdr has more; adjust variables
|
||||
end_manual = end_hdr_len - hdr_len;
|
||||
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Mismatched, src, bgn_pos, cur_pos);
|
||||
dirty = true;
|
||||
}
|
||||
if (hdr_len > 6) { // <h7>+; limit to 6; NOTE: make both bgn/end are equal length; EX: bgn=8,end=7 -> bgn=7,end=7;bgn_manual=1
|
||||
bgn_manual = end_manual = hdr_len - 6;
|
||||
hdr_len = 6;
|
||||
dirty = true;
|
||||
}
|
||||
if (dirty)
|
||||
hdr.Init_by_parse(hdr_len, bgn_manual, end_manual);
|
||||
|
||||
// gobble ws; hdr gobbles up trailing ws; EX: "==a== \n\t \n \nb" gobbles up all 3 "\n"s; otherwise para_wkr will process <br/>
|
||||
cur_pos = Find_fwd_while_ws_hdr_version(src, cur_pos, src_len);
|
||||
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag__h2);
|
||||
|
||||
// add to root tkn; other post-processing
|
||||
hdr.Subs_move(root);
|
||||
hdr.Src_end_(cur_pos);
|
||||
if (ctx.Parse_tid() == Xop_parser_tid_.Tid__wtxt) { // do not add if defn / tmpl mode
|
||||
ctx.Page().Wtxt().Toc().Add(hdr);
|
||||
}
|
||||
return cur_pos;
|
||||
}
|
||||
private void Close_open_itms(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
int stack_pos = -1, stack_len = ctx.Stack_len(); boolean stop = false;
|
||||
for (int i = 0; i < stack_len; i++) { // loop over stack
|
||||
Xop_tkn_itm prv_tkn = ctx.Stack_get(i);
|
||||
switch (prv_tkn.Tkn_tid()) { // find first list/hdr; close everything until this
|
||||
case Xop_tkn_itm_.Tid_list:
|
||||
case Xop_tkn_itm_.Tid_hdr:
|
||||
stack_pos = i; stop = true; break;
|
||||
}
|
||||
if (stop) break;
|
||||
}
|
||||
if (stack_pos == -1) return;
|
||||
ctx.Stack_pop_til(root, src, stack_pos, true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_hdr);
|
||||
}
|
||||
private static int Find_fwd_while_ws_hdr_version(byte[] src, int cur, int end) {
|
||||
int last_nl = -1;
|
||||
while (true) {
|
||||
if (cur == end) return cur;
|
||||
byte b = src[cur];
|
||||
switch (b) {
|
||||
case Byte_ascii.Nl:
|
||||
cur++;
|
||||
last_nl = cur;
|
||||
break;
|
||||
case Byte_ascii.Space:
|
||||
case Byte_ascii.Tab:
|
||||
cur++;
|
||||
break;
|
||||
default:
|
||||
return last_nl == -1 ? cur : last_nl - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
NOTE:hdr.trailing_nl
|
||||
. by design, the hdr_tkn's src_end will not include the trailing \n
|
||||
.. for example, for "\n==a==\n", the src_bgn will be 0, but the src_end will be 6
|
||||
.. note that at 6, it does not include the \n at pos 6
|
||||
. this is needed to leave the \n for the parser to handle other tkns, such as hdrs, tblws, lists.
|
||||
. for example, in "\n==a==\n*b", if the \n at pos 6 was taken by the hdr_tkn, then the parser would encounter a "*" instead of a "\n*"
|
||||
*/
|
||||
@@ -1,127 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_hdr_wkr__basic_tst {
|
||||
@Before public void init() {fxt.Reset();} private final Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void H2() {fxt.Test_parse_page_wiki_str("==a==" , "<h2>a</h2>\n");}
|
||||
@Test public void H3() {fxt.Test_parse_page_wiki_str("===a===" , "<h3>a</h3>\n");}
|
||||
@Test public void H6_limit() {fxt.Test_parse_page_wiki_str("=======a=======" , "<h6>=a=</h6>\n");}
|
||||
@Test public void Mismatch_bgn() {fxt.Test_parse_page_wiki_str("=====a==" , "<h2>===a</h2>\n");}
|
||||
@Test public void Mismatch_end() {fxt.Test_parse_page_wiki_str("==a=====" , "<h2>a===</h2>\n");}
|
||||
@Test public void Dangling() {fxt.Test_parse_page_wiki_str("==a" , "==a");}
|
||||
@Test public void Comment_bgn() {fxt.Test_parse_page_all_str ("<!--b-->==a==" , "<h2>a</h2>\n");}
|
||||
@Test public void Comment_end() {fxt.Test_parse_page_all_str ("==a==<!--b-->" , "<h2>a</h2>\n");}
|
||||
@Test public void Ws_end() { // PURPOSE: "==\n" merges all ws following it; \n\n\n is not transformed by Para_wkr to "<br/>"
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a== \t"
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, "b"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, "b"
|
||||
));
|
||||
}
|
||||
@Test public void Many() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a=="
|
||||
, "===b==="
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, ""
|
||||
, "<h3>b</h3>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Hdr_w_tblw() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a=="
|
||||
, "{|"
|
||||
, "|+"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, "<table>"
|
||||
, " <caption>"
|
||||
, " </caption>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Hdr_w_hr() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a=="
|
||||
, "----"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, "<hr/>"
|
||||
));
|
||||
}
|
||||
@Test public void Mix_apos_dangling() {fxt.Test_parse_page_wiki_str("==''a==" , "<h2><i>a</i></h2>\n");}
|
||||
@Test public void Mix_xnde_dangling() {fxt.Test_parse_page_wiki_str("==<i>a==" , "<h2><i>a</i></h2>\n");}
|
||||
@Test public void Mix_tblw_cell() {fxt.Test_parse_page_wiki_str("==a!!==" , "<h2>a!!</h2>\n");}
|
||||
@Test public void Ws() {fxt.Test_parse_page_wiki_str("== a b ==" , "<h2> a b </h2>\n");}
|
||||
@Test public void Err_hdr() {fxt.Init_log_(Xop_hdr_log.Mismatched) .Test_parse_page_wiki_str("====a== ==" , "<h2>==a== </h2>\n").tst_Log_check();}
|
||||
@Test public void Err_end_hdr_is_1() {fxt.Init_log_(Xop_hdr_log.Mismatched, Xop_hdr_log.Len_1).Test_parse_page_wiki_str("==a=" , "<h1>=a</h1>\n").tst_Log_check();}
|
||||
@Test public void Html_hdr_many() {
|
||||
fxt.Wtr_cfg().Toc__show_(Bool_.Y);
|
||||
fxt.Test_parse_page_wiki_str__esc(String_.Concat_lines_nl_skip_last
|
||||
( "==a=="
|
||||
, "==a=="
|
||||
, "==a=="
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2><span class='mw-headline' id='a'>a</span></h2>"
|
||||
, ""
|
||||
, "<h2><span class='mw-headline' id='a_2'>a</span></h2>"
|
||||
, ""
|
||||
, "<h2><span class='mw-headline' id='a_3'>a</span></h2>"
|
||||
, ""
|
||||
));
|
||||
fxt.Wtr_cfg().Toc__show_(Bool_.N);
|
||||
}
|
||||
@Test public void Hdr_inside_dangling_tmpl_fix() { // PURPOSE: one-off fix to handle == inside dangling tmpl; DATE:2014-02-11
|
||||
fxt.Test_parse_page_all_str("{{a|}\n==b=="
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "{{a|}"
|
||||
, ""
|
||||
, "<h2>b</h2>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Pfunc() {// multiple = should not be interpreted as key-val equals; PAGE:en.w:Wikipedia:Picture_of_the_day/June_2014 DATE:2014-07-21
|
||||
fxt.Test_parse_page_all_str
|
||||
( "{{#if:exists|==a==|no}}"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
// @Test public void Hdr_inside_dangling_tmpl_fix_2() { // PURPOSE: hdr == inside dangling tmpl; DATE:2014-06-10
|
||||
// fxt.Init_defn_add("Print", "{{{1}}}");
|
||||
// fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
// ( "=={{Print|b=="
|
||||
// , "}}"
|
||||
// ), String_.Concat_lines_nl_skip_last
|
||||
// ( "==b="
|
||||
// , ""
|
||||
// ));
|
||||
// }
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_hdr_wkr__para_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private final Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Hdr_at_bos() { // PURPOSE: check that BOS==a== does not throw null ref in para; DATE:2014-02-18
|
||||
fxt.Test_parse_page_all_str("==a==", "<h2>a</h2>\n");
|
||||
}
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs.sections; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.hdrs.*;
|
||||
class Xop_section_itm {
|
||||
public Xop_section_itm(int idx, int num, byte[] key, int src_bgn, int src_end) {
|
||||
this.idx = idx;
|
||||
this.num = num;
|
||||
this.key = key;
|
||||
this.src_bgn = src_bgn;
|
||||
this.src_end = src_end;
|
||||
}
|
||||
public int Idx() {return idx;} private final int idx;
|
||||
public int Num() {return num;} private final int num;
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public int Src_bgn() {return src_bgn;} private final int src_bgn;
|
||||
public int Src_end() {return src_end;} private final int src_end;
|
||||
}
|
||||
@@ -1,121 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs.sections; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.hdrs.*;
|
||||
import gplx.xowa.mws.parsers.*; import gplx.xowa.mws.parsers.headings.*;
|
||||
import gplx.xowa.addons.htmls.tocs.*; import gplx.xowa.htmls.core.htmls.tidy.*;
|
||||
class Xop_section_list implements Xomw_heading_cbk {
|
||||
private final Xomw_heading_wkr hdr_wkr = new Xomw_heading_wkr();
|
||||
private final Ordered_hash hash = Ordered_hash_.New_bry();
|
||||
private final Xoh_toc_mgr toc_mgr = new Xoh_toc_mgr();
|
||||
private byte[] src;
|
||||
private Xowe_wiki wiki;
|
||||
|
||||
public Xop_section_list Parse(Xowe_wiki wiki, Xow_tidy_mgr_interface tidy_mgr, byte[] src) {
|
||||
// clear
|
||||
this.wiki = wiki;
|
||||
this.src = src;
|
||||
hash.Clear();
|
||||
toc_mgr.Clear();
|
||||
toc_mgr.Init(tidy_mgr, Bry_.Empty, Bry_.Empty);
|
||||
|
||||
// parse
|
||||
Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
hdr_wkr.Parse(pctx, src, 0, src.length, this);
|
||||
return this;
|
||||
}
|
||||
public byte[] Slice_bry_or_null(byte[] key) {
|
||||
int[] bounds = Get_section_bounds(key);
|
||||
if (bounds == null) return null; // handle missing key
|
||||
|
||||
// return slice
|
||||
return Bry_.Mid(src, bounds[0], bounds[1]);
|
||||
}
|
||||
public byte[] Merge_bry_or_null(byte[] key, byte[] edit) {
|
||||
int[] bounds = Get_section_bounds(key);
|
||||
if (bounds == null) return null; // handle missing key
|
||||
|
||||
// merge edit into orig
|
||||
Bry_bfr bfr = Bry_bfr_.New();
|
||||
bfr.Add_mid(src, 0, bounds[0]);
|
||||
bfr.Add(edit);
|
||||
bfr.Add_mid(src, bounds[1], src.length);
|
||||
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
private int[] Get_section_bounds(byte[] key) {
|
||||
int src_bgn = -1, src_end = -1;
|
||||
int hash_len = hash.Len();
|
||||
|
||||
// if key == "", get lead section
|
||||
if (Bry_.Eq(key, Bry_.Empty)) {
|
||||
src_bgn = 0;
|
||||
src_end = src.length;
|
||||
if (hash_len > 0) {
|
||||
Xop_section_itm itm = (Xop_section_itm)hash.Get_at(0);
|
||||
src_end = itm.Src_bgn(); // -1 to skip "\n" in "\n=="
|
||||
}
|
||||
}
|
||||
// else, get section matching key
|
||||
else {
|
||||
Xop_section_itm itm = (Xop_section_itm)hash.Get_by(key);
|
||||
if (itm == null) return null;
|
||||
|
||||
// get bgn
|
||||
src_bgn = itm.Src_bgn();
|
||||
if (src[src_bgn] == Byte_ascii.Nl) src_bgn++; // skip "\n" in "\n=="
|
||||
|
||||
// get end
|
||||
for (int i = itm.Idx() + 1; i < hash_len; i++) {
|
||||
Xop_section_itm nxt = (Xop_section_itm)hash.Get_at(i);
|
||||
if (nxt.Num() > itm.Num()) continue; // skip headers that are at lower level; EX: == H2 == should skip === H3 ===
|
||||
src_end = nxt.Src_bgn();
|
||||
break;
|
||||
}
|
||||
if (src_end == -1) src_end = src.length; // no headers found; default to EOS
|
||||
src_end = Bry_find_.Find_bwd__skip_ws(src, src_end, src_bgn); // always remove ws at end
|
||||
}
|
||||
|
||||
return new int[] {src_bgn, src_end};
|
||||
}
|
||||
public void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr) {
|
||||
// get key by taking everything between ==; EX: "== abc ==" -> " abc "
|
||||
byte[] src = wkr.Src();
|
||||
int hdr_txt_bgn = wkr.Hdr_lhs_end();
|
||||
int hdr_txt_end = wkr.Hdr_rhs_bgn();
|
||||
|
||||
// trim ws
|
||||
hdr_txt_bgn = Bry_find_.Find_fwd_while_ws(src, hdr_txt_bgn, hdr_txt_end);
|
||||
hdr_txt_end = Bry_find_.Find_bwd__skip_ws(src, hdr_txt_end, hdr_txt_bgn);
|
||||
byte[] key = Bry_.Mid(wkr.Src(), hdr_txt_bgn, hdr_txt_end);
|
||||
|
||||
// handle nested templates; EX: "== {{A}} ==" note that calling Parse_text_to_html is expensive (called per header) but should be as long as its not nested
|
||||
key = wiki.Parser_mgr().Main().Parse_text_to_html(wiki.Parser_mgr().Ctx(), key);
|
||||
|
||||
// handle math; EX: "== <math>\delta</math> =="
|
||||
key = wiki.Parser_mgr().Uniq_mgr().Convert(key);
|
||||
|
||||
// convert key to toc_text to handle (a) XML ("<i>a</i>" -> "a"); (b) dupes ("text" -> "text_2")
|
||||
int num = wkr.Hdr_num();
|
||||
Xoh_toc_itm toc_itm = toc_mgr.Add(num, key);
|
||||
key = toc_itm.Anch();
|
||||
|
||||
Xop_section_itm itm = new Xop_section_itm(hash.Count(), num, key, wkr.Hdr_bgn(), wkr.Hdr_end());
|
||||
hash.Add(key, itm);
|
||||
}
|
||||
public void On_src_done(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr) {}
|
||||
}
|
||||
@@ -1,179 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs.sections; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.hdrs.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xop_section_list__merge__tst {
|
||||
private final Xop_section_list__fxt fxt = new Xop_section_list__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Exec__parse
|
||||
( "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2"
|
||||
, ""
|
||||
, "== Hdr 3 =="
|
||||
, "Para 3"
|
||||
);
|
||||
fxt.Test__merge_bry_or_null("Hdr_2", String_.Concat_lines_nl_skip_last
|
||||
( "== Hdr 2 =="
|
||||
, "Para 2a"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2a"
|
||||
, ""
|
||||
, "== Hdr 3 =="
|
||||
, "Para 3"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Nl_many() {
|
||||
fxt.Exec__parse
|
||||
( "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2"
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, "== Hdr 3 =="
|
||||
, "Para 3"
|
||||
);
|
||||
fxt.Test__merge_bry_or_null("Hdr_2", String_.Concat_lines_nl_skip_last
|
||||
( "== Hdr 2 =="
|
||||
, "Para 2a"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2a"
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, "== Hdr 3 =="
|
||||
, "Para 3"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Bos() {
|
||||
fxt.Exec__parse
|
||||
( "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2"
|
||||
);
|
||||
fxt.Test__merge_bry_or_null("Hdr_1", String_.Concat_lines_nl_skip_last
|
||||
( "== Hdr 1 =="
|
||||
, "Para 1a"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "== Hdr 1 =="
|
||||
, "Para 1a"
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Bos__ws() {
|
||||
fxt.Exec__parse
|
||||
( ""
|
||||
, "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2"
|
||||
);
|
||||
fxt.Test__merge_bry_or_null("Hdr_1", String_.Concat_lines_nl_skip_last
|
||||
( "== Hdr 1 =="
|
||||
, "Para 1a"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( ""
|
||||
, "== Hdr 1 =="
|
||||
, "Para 1a"
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Eos() {
|
||||
fxt.Exec__parse
|
||||
( "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2"
|
||||
);
|
||||
fxt.Test__merge_bry_or_null("Hdr_2", String_.Concat_lines_nl_skip_last
|
||||
( "== Hdr 2 =="
|
||||
, "Para 2a"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2a"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Lead() {
|
||||
fxt.Exec__parse
|
||||
( "lead para"
|
||||
, ""
|
||||
, "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
);
|
||||
fxt.Test__merge_bry_or_null("", String_.Concat_lines_nl_skip_last
|
||||
( "lead para 1"
|
||||
, ""
|
||||
, "lead para 2"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "lead para 1"
|
||||
, ""
|
||||
, "lead para 2"
|
||||
, "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Lead__new() {
|
||||
fxt.Exec__parse
|
||||
( "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
);
|
||||
fxt.Test__merge_bry_or_null("", String_.Concat_lines_nl_skip_last
|
||||
( "lead para 1"
|
||||
, ""
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "lead para 1"
|
||||
, "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,162 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs.sections; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.hdrs.*;
|
||||
import org.junit.*; import gplx.core.tests.*; import gplx.xowa.htmls.core.htmls.tidy.*;
|
||||
public class Xop_section_list__slice__tst {
|
||||
private final Xop_section_list__fxt fxt = new Xop_section_list__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Exec__parse
|
||||
( "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2"
|
||||
, ""
|
||||
, "== Hdr 3 =="
|
||||
, "Para 3"
|
||||
);
|
||||
fxt.Test__slice_bry_or_null("Hdr_1"
|
||||
, "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
);
|
||||
fxt.Test__slice_bry_or_null("Hdr_2"
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2"
|
||||
);
|
||||
fxt.Test__slice_bry_or_null("Hdr_3"
|
||||
, "== Hdr 3 =="
|
||||
, "Para 3"
|
||||
);
|
||||
}
|
||||
@Test public void Covering() {
|
||||
fxt.Exec__parse
|
||||
( "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
, "=== Hdr 1a ==="
|
||||
, "Para 1a"
|
||||
, ""
|
||||
, "=== Hdr 1b ==="
|
||||
, "Para 1b"
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2"
|
||||
);
|
||||
fxt.Test__slice_bry_or_null("Hdr_1"
|
||||
, "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
, "=== Hdr 1a ==="
|
||||
, "Para 1a"
|
||||
, ""
|
||||
, "=== Hdr 1b ==="
|
||||
, "Para 1b"
|
||||
);
|
||||
}
|
||||
@Test public void Xml() {
|
||||
fxt.Exec__parse
|
||||
( "== <i>Hdr 1</i> =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2"
|
||||
);
|
||||
fxt.Test__slice_bry_or_null("Hdr_1", String_.Concat_lines_nl_skip_last
|
||||
( "== <i>Hdr 1</i> =="
|
||||
, "Para 1"
|
||||
));
|
||||
}
|
||||
@Test public void Math() {
|
||||
fxt.Exec__parse
|
||||
( "== <math>\\delta</math> =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2"
|
||||
);
|
||||
fxt.Test__slice_bry_or_null(".5Cdelta", String_.Concat_lines_nl_skip_last
|
||||
( "== <math>\\delta</math> =="
|
||||
, "Para 1"
|
||||
));
|
||||
}
|
||||
@Test public void Template() {
|
||||
fxt.Init__template("mock", "''{{{1}}}''");
|
||||
fxt.Exec__parse
|
||||
( "== {{mock|a}} =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
, "== Hdr 2 =="
|
||||
, "Para 2"
|
||||
);
|
||||
fxt.Test__slice_bry_or_null("a", String_.Concat_lines_nl_skip_last
|
||||
( "== {{mock|a}} =="
|
||||
, "Para 1"
|
||||
));
|
||||
}
|
||||
@Test public void Lead() {
|
||||
fxt.Exec__parse
|
||||
( "lead text"
|
||||
, ""
|
||||
, "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
);
|
||||
fxt.Test__slice_bry_or_null(""
|
||||
, "lead text"
|
||||
);
|
||||
}
|
||||
@Test public void Lead__none() {
|
||||
fxt.Exec__parse
|
||||
( ""
|
||||
, "== Hdr 1 =="
|
||||
, "Para 1"
|
||||
, ""
|
||||
);
|
||||
fxt.Test__slice_bry_or_null("");
|
||||
}
|
||||
@Test public void Lead__eos() {
|
||||
fxt.Exec__parse
|
||||
( "lead text"
|
||||
, ""
|
||||
, "para 1"
|
||||
, ""
|
||||
);
|
||||
fxt.Test__slice_bry_or_null(""
|
||||
, "lead text"
|
||||
, ""
|
||||
, "para 1"
|
||||
);
|
||||
}
|
||||
}
|
||||
class Xop_section_list__fxt {
|
||||
private final Xop_section_list list = new Xop_section_list();
|
||||
private final Xop_fxt parser_fxt = new Xop_fxt();
|
||||
public void Init__template(String page, String text) {parser_fxt.Init_defn_add(page, text);}
|
||||
public void Exec__parse(String... lines) {
|
||||
list.Parse(parser_fxt.Wiki(), Xow_tidy_mgr_interface_.Noop, Bry_.new_u8(String_.Concat_lines_nl_skip_last(lines)));
|
||||
}
|
||||
public void Test__slice_bry_or_null(String key, String... lines) {
|
||||
String expd = String_.Concat_lines_nl_skip_last(lines);
|
||||
byte[] actl = list.Slice_bry_or_null(Bry_.new_u8(key));
|
||||
Gftest.Eq__ary__lines(expd, actl, key);
|
||||
}
|
||||
public void Test__merge_bry_or_null(String key, String edit, String expd) {
|
||||
byte[] actl = list.Merge_bry_or_null(Bry_.new_u8(key), Bry_.new_u8(edit));
|
||||
Gftest.Eq__ary__lines(expd, actl, key);
|
||||
}
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs.sections; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.hdrs.*;
|
||||
import gplx.langs.htmls.*;
|
||||
import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*; import gplx.xowa.parsers.hdrs.*; import gplx.xowa.htmls.core.htmls.tidy.*;
|
||||
public class Xop_section_mgr implements Gfo_invk {
|
||||
private Xoae_app app; private Xowe_wiki wiki;
|
||||
private Xow_tidy_mgr_interface tidy_mgr;
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
private byte[] bry__edit_text;
|
||||
private final Bry_fmt fmt__edit_hint = Bry_fmt.New("")
|
||||
, fmt__section_editable = Bry_fmt.Auto_nl_apos
|
||||
( "<span class='mw-editsection'><span class='mw-editsection-bracket'>[</span><a href='/wiki/~{page_ttl}?action=edit§ion_key=~{section_key}' title='~{edit_hint}' class='xowa-hover-off'>~{edit_text}</a><span class='mw-editsection-bracket'>]</span></span>"
|
||||
)
|
||||
;
|
||||
public boolean Enabled() {return enabled;} private boolean enabled;
|
||||
public void Init_by_wiki(Xowe_wiki wiki) {
|
||||
this.app = wiki.Appe();
|
||||
this.wiki = wiki;
|
||||
wiki.App().Cfg().Bind_many_wiki(this, wiki, Cfg__section_editing__enabled);
|
||||
this.tidy_mgr = wiki.Html_mgr().Tidy_mgr();
|
||||
}
|
||||
public byte[] Slice_section(Xoa_url url, Xoa_ttl ttl, byte[] src) {
|
||||
// return orig if section_editing not enabled
|
||||
if (!enabled) return src;
|
||||
|
||||
// return orig if section_key not in qargs
|
||||
byte[] section_key = url.Qargs_mgr().Get_val_bry_or(Qarg__section_key, null);
|
||||
if (section_key == null) return src;
|
||||
|
||||
// parse wikitext into list of headers
|
||||
Xop_section_list section_list = new Xop_section_list().Parse(wiki, tidy_mgr, src);
|
||||
byte[] rv = section_list.Slice_bry_or_null(section_key);
|
||||
if (rv == null) {
|
||||
app.Gui_mgr().Kit().Ask_ok("", "", String_.Format("Section extraction failed!\nPlease do not edit this page else data will be lost!!\n\nwiki={0}\npage={1}\nsection={2}", url.Wiki_bry(), ttl.Full_db(), section_key));
|
||||
throw Err_.new_wo_type("section_key not found", "wiki", url.Wiki_bry(), "page", ttl.Full_db(), "section_key", section_key);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public byte[] Merge_section(Xoa_url url, byte[] edit, byte[] orig) {
|
||||
// return edit if not enabled
|
||||
if (!enabled) return edit;
|
||||
|
||||
// return edit if section_key not in qargs
|
||||
byte[] section_key = url.Qargs_mgr().Get_val_bry_or(Qarg__section_key, null);
|
||||
if (section_key == null) return edit;
|
||||
|
||||
// parse orig
|
||||
Xop_section_list section_list = new Xop_section_list().Parse(wiki, tidy_mgr, orig);
|
||||
byte[] rv = section_list.Merge_bry_or_null(section_key, edit);
|
||||
if (rv == null)
|
||||
throw Err_.new_wo_type("could not merge section_key", "page", url.To_str(), "section_key", section_key);
|
||||
return rv;
|
||||
}
|
||||
public void Write_html(Bry_bfr bfr, byte[] page_ttl, byte[] section_key, byte[] section_hint) {
|
||||
if (bry__edit_text == null) { // LAZY: cannot call in Init_by_wiki b/c of circularity; section_mgr is init'd by parser_mgr which is init'd before msg_mgr which is used below
|
||||
this.bry__edit_text = wiki.Msg_mgr().Val_by_key_obj("editlink");
|
||||
this.fmt__edit_hint.Fmt_(String_.new_u8(wiki.Msg_mgr().Val_by_key_obj("editsectionhint")));
|
||||
}
|
||||
|
||||
section_key = wiki.Parser_mgr().Uniq_mgr().Convert(section_key); // need to swap out uniqs for Math; DATE:2016-12-09
|
||||
byte[] edit_hint = fmt__edit_hint.Bld_many_to_bry(tmp_bfr, section_hint);
|
||||
fmt__section_editable.Bld_many(bfr, page_ttl, section_key, edit_hint, bry__edit_text);
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Cfg__section_editing__enabled)) enabled = m.ReadBool("v");
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
|
||||
public static final byte[] Bry__meta = Bry_.new_a7("<!--xo_meta|section_edit|");
|
||||
public static final int Len__meta = Bry__meta.length;
|
||||
private static final byte[] Qarg__section_key = Bry_.new_u8("section_key");
|
||||
private static final String Cfg__section_editing__enabled = "xowa.wiki.edit.section.enabled";
|
||||
}
|
||||
@@ -1,66 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Mwh_atr_itm {
|
||||
public Mwh_atr_itm
|
||||
( byte[] src, boolean valid, boolean repeated, boolean key_exists, int atr_bgn, int atr_end
|
||||
, int key_bgn, int key_end, byte[] key_bry
|
||||
, int val_bgn, int val_end, byte[] val_bry
|
||||
, int eql_pos, int qte_tid
|
||||
) {
|
||||
this.src = src;
|
||||
this.valid = valid; this.repeated = repeated; this.key_exists = key_exists;
|
||||
this.atr_bgn = atr_bgn; this.atr_end = atr_end;
|
||||
this.key_bgn = key_bgn; this.key_end = key_end; this.key_bry = key_bry;
|
||||
this.val_bgn = val_bgn; this.val_end = val_end; this.val_bry = val_bry;
|
||||
this.eql_pos = eql_pos; this.qte_tid = qte_tid;
|
||||
}
|
||||
public byte[] Src() {return src;} private final byte[] src;
|
||||
public boolean Valid() {return valid;} private final boolean valid;
|
||||
public boolean Key_exists() {return key_exists;} private final boolean key_exists;
|
||||
public boolean Repeated() {return repeated;} private final boolean repeated;
|
||||
public boolean Invalid() {return repeated || !valid;}
|
||||
public int Atr_bgn() {return atr_bgn;} private int atr_bgn;
|
||||
public int Atr_end() {return atr_end;} private int atr_end;
|
||||
public int Key_bgn() {return key_bgn;} private final int key_bgn;
|
||||
public int Key_end() {return key_end;} private final int key_end;
|
||||
public byte[] Key_bry() {return key_bry;} private byte[] key_bry;
|
||||
public byte Key_tid() {return key_tid;} public Mwh_atr_itm Key_tid_(byte v) {key_tid = v; return this;} private byte key_tid;
|
||||
public int Val_bgn() {return val_bgn;} private final int val_bgn;
|
||||
public int Val_end() {return val_end;} private final int val_end;
|
||||
public byte[] Val_bry() {return val_bry;} private byte[] val_bry;
|
||||
public int Eql_pos() {return eql_pos;} private final int eql_pos;
|
||||
public int Qte_tid() {return qte_tid;} private final int qte_tid;
|
||||
public byte Qte_byte() {
|
||||
switch (qte_tid) {
|
||||
case Mwh_atr_itm_.Qte_tid__none: return Byte_ascii.Null;
|
||||
case Mwh_atr_itm_.Qte_tid__apos: return Byte_ascii.Apos;
|
||||
case Mwh_atr_itm_.Qte_tid__qute: return Byte_ascii.Quote;
|
||||
default: throw Err_.new_unhandled(qte_tid);
|
||||
}
|
||||
}
|
||||
public Mwh_atr_itm Atr_rng(int bgn, int end) {this.atr_bgn = bgn; this.atr_end = end; return this;}
|
||||
public void Key_bry_(byte[] v) {this.key_bry = v;}
|
||||
public void Val_bry_(byte[] v) {this.val_bry = v;}
|
||||
public String Val_as_str() {return String_.new_u8(Val_as_bry());}
|
||||
public byte[] Val_as_bry() {if (val_bry == null) val_bry = Bry_.Mid(src, val_bgn, val_end); return val_bry;} // NOTE: val_bry is cached
|
||||
public byte[] Val_as_bry__blank_to_null() {byte[] rv = Val_as_bry(); return Bry_.Len_eq_0(rv) ? null : rv;}
|
||||
public int Val_as_int_or(int or) {return val_bry == null ? Bry_.To_int_or__lax(src, val_bgn, val_end, or) : Bry_.To_int_or(val_bry, or);}
|
||||
public boolean Val_as_bool_by_int() {return Val_as_int_or(0) == 1;}
|
||||
public boolean Val_as_bool() {return Bry_.Eq(Bry_.Lcase__all(Val_as_bry()), Bool_.True_bry);}
|
||||
}
|
||||
@@ -1,51 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Mwh_atr_itm_ {
|
||||
public static final Mwh_atr_itm[] Ary_empty = new Mwh_atr_itm[0];
|
||||
public static final int Atr_tid__invalid = 1, Atr_tid__repeat = 2, Atr_tid__pair = 4, Atr_tid__name = 8; // NOTE: id order is important; see above;
|
||||
public static final int Qte_tid__none = 0, Qte_tid__apos = 1, Qte_tid__qute = 2;
|
||||
public static final int Mask__qte__none = 0, Mask__qte__apos = 1, Mask__qte_qute = 2;
|
||||
public static final int
|
||||
Mask__valid = 8
|
||||
, Mask__repeated = 16
|
||||
, Mask__key_exists = 32
|
||||
, Mask__val_made = 64
|
||||
;
|
||||
public static final boolean Mask__valid__n = false, Mask__valid__y = true;
|
||||
public static final boolean Mask__key_exists__n = false, Mask__key_exists__y = true;
|
||||
public static final boolean Mask__repeated__n = false, Mask__repeated__y = true;
|
||||
public static final boolean Mask__val_made__n = false, Mask__val_made__y = true;
|
||||
public static int Calc_atr_utl(int qte_tid, boolean valid, boolean repeated, boolean key_exists, boolean val_made) {
|
||||
int rv = qte_tid;
|
||||
if (valid) rv |= Mwh_atr_itm_.Mask__valid;
|
||||
if (repeated) rv |= Mwh_atr_itm_.Mask__repeated;
|
||||
if (key_exists) rv |= Mwh_atr_itm_.Mask__key_exists;
|
||||
if (val_made) rv |= Mwh_atr_itm_.Mask__val_made;
|
||||
return rv;
|
||||
}
|
||||
public static int Calc_qte_tid(int val) {
|
||||
return val & ((1 << 3) - 1);
|
||||
}
|
||||
public static byte Calc_qte_byte(int[] data_ary, int idx) {
|
||||
int val = data_ary[idx + Mwh_atr_mgr.Idx_atr_utl];
|
||||
int qte_tid = (val & ((1 << 3) - 1));
|
||||
return qte_tid == Qte_tid__apos ? Byte_ascii.Apos : Byte_ascii.Quote;
|
||||
}
|
||||
public static final byte Key_tid__generic = 0, Key_tid__id = 1, Key_tid__style = 2, Key_tid__role = 3;
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public interface Mwh_atr_itm_owner1 {
|
||||
void Xatr__set(Xowe_wiki wiki, byte[] src, Mwh_atr_itm xatr, Object xatr_id_obj);
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public interface Mwh_atr_itm_owner2 {
|
||||
void Xatr__set(Xowe_wiki wiki, byte[] src, Mwh_atr_itm xatr, byte xatr_id);
|
||||
}
|
||||
@@ -1,98 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.brys.*;
|
||||
public class Mwh_atr_mgr {
|
||||
private final int data_max_orig;
|
||||
public Mwh_atr_mgr(int max) {
|
||||
this.data_max_orig = max * Idx__mult;
|
||||
this.Max_(max);
|
||||
}
|
||||
public int Len() {return itm_len;} private int itm_len;
|
||||
public int[] Data_ary() {return data_ary;} private int[] data_ary; private int data_max;
|
||||
public byte[][] Text_ary() {return text_ary;} private byte[][] text_ary;
|
||||
private void Max_(int len) {
|
||||
this.data_max = len * Idx__mult;
|
||||
this.data_ary = new int[data_max];
|
||||
this.text_ary = new byte[len * Text__mult][];
|
||||
this.itm_len = 0;
|
||||
}
|
||||
public void Clear() {
|
||||
if (data_max == data_max_orig)
|
||||
itm_len = 0;
|
||||
else
|
||||
Max_(data_max_orig / Idx__mult);
|
||||
}
|
||||
public int Add(int nde_uid, int nde_tid, boolean valid, boolean repeated, boolean key_exists, int atr_bgn, int atr_end, int key_bgn, int key_end, byte[] key_bry, int eql_pos, int qte_tid, int val_bgn, int val_end, byte[] val_bry) {
|
||||
int data_idx = itm_len * Idx__mult;
|
||||
if (data_idx == data_max) {
|
||||
int new_data_max = data_max == 0 ? Idx__mult : data_max * 2;
|
||||
int[] new_data_ary = new int[new_data_max];
|
||||
Int_.Ary_copy_to(data_ary, data_max, data_ary);
|
||||
this.data_ary = new_data_ary;
|
||||
|
||||
int text_max = text_ary.length;
|
||||
int new_text_max = data_max == 0 ? Text__mult : text_max * 2;
|
||||
byte[][] new_text_ary = new byte[new_text_max][];
|
||||
for (int i = 0; i < text_max; ++i)
|
||||
new_text_ary[i] = text_ary[i];
|
||||
this.text_ary = new_text_ary;
|
||||
|
||||
this.data_max = new_data_max;
|
||||
}
|
||||
boolean val_made = false;
|
||||
int text_idx = itm_len * Text__mult;
|
||||
text_ary[text_idx] = key_bry;
|
||||
if (val_bry != null) {
|
||||
text_ary[text_idx + 1] = val_bry;
|
||||
val_made = true;
|
||||
}
|
||||
data_ary[data_idx + Idx_nde_uid] = nde_uid;
|
||||
data_ary[data_idx + Idx_nde_tid] = nde_tid;
|
||||
data_ary[data_idx + Idx_atr_utl] = Mwh_atr_itm_.Calc_atr_utl(qte_tid, valid, repeated, key_exists, val_made);
|
||||
data_ary[data_idx + Idx_atr_bgn] = atr_bgn;
|
||||
data_ary[data_idx + Idx_atr_end] = atr_end;
|
||||
data_ary[data_idx + Idx_key_bgn] = key_bgn;
|
||||
data_ary[data_idx + Idx_key_end] = key_end;
|
||||
data_ary[data_idx + Idx_val_bgn] = val_bgn;
|
||||
data_ary[data_idx + Idx_val_end] = val_end;
|
||||
data_ary[data_idx + Idx_eql_pos] = eql_pos;
|
||||
return itm_len++;
|
||||
}
|
||||
public void Set_repeated(int atr_uid) {
|
||||
int atr_utl_idx = (atr_uid * Idx__mult) + Idx_atr_utl;
|
||||
int atr_utl = data_ary[atr_utl_idx];
|
||||
int val_bry_exists = atr_utl & Atr_utl__val_bry_exists;
|
||||
data_ary[atr_utl_idx] = Mwh_atr_itm_.Atr_tid__repeat | val_bry_exists;
|
||||
}
|
||||
public static final int
|
||||
Idx_nde_uid = 0
|
||||
, Idx_nde_tid = 1
|
||||
, Idx_atr_utl = 2
|
||||
, Idx_atr_bgn = 3
|
||||
, Idx_atr_end = 4
|
||||
, Idx_key_bgn = 5
|
||||
, Idx_key_end = 6
|
||||
, Idx_val_bgn = 7
|
||||
, Idx_val_end = 8
|
||||
, Idx_eql_pos = 9
|
||||
, Idx__mult = 10
|
||||
;
|
||||
public static final int Text__mult = 2;
|
||||
public static final int Atr_utl__val_bry_exists = 16;
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Mwh_atr_mgr_tst {
|
||||
private final Mwh_atr_mgr_fxt fxt = new Mwh_atr_mgr_fxt();
|
||||
@Test public void Atr_utl_make() {
|
||||
// key="val"
|
||||
fxt.Test_atr_utl_make(Mwh_atr_itm_.Qte_tid__qute, Mwh_atr_itm_.Mask__valid__y, Mwh_atr_itm_.Mask__repeated__n, Mwh_atr_itm_.Mask__key_exists__y, Mwh_atr_itm_.Mask__val_made__n, 42);
|
||||
// key=val key=v<nowiki/>al
|
||||
fxt.Test_atr_utl_make(Mwh_atr_itm_.Qte_tid__none, Mwh_atr_itm_.Mask__valid__y, Mwh_atr_itm_.Mask__repeated__y, Mwh_atr_itm_.Mask__key_exists__y, Mwh_atr_itm_.Mask__val_made__y, 120);
|
||||
}
|
||||
}
|
||||
class Mwh_atr_mgr_fxt {
|
||||
public void Test_atr_utl_make(int qte_tid, boolean valid, boolean repeated, boolean key_exists, boolean val_made, int expd) {
|
||||
int atr_utl = Mwh_atr_itm_.Calc_atr_utl(qte_tid, valid, repeated, key_exists, val_made);
|
||||
Tfds.Eq_int(expd, atr_utl);
|
||||
Tfds.Eq_int(qte_tid, Mwh_atr_itm_.Calc_qte_tid(atr_utl));
|
||||
Tfds.Eq_bool(valid, (atr_utl & Mwh_atr_itm_.Mask__valid) == Mwh_atr_itm_.Mask__valid);
|
||||
Tfds.Eq_bool(repeated, (atr_utl & Mwh_atr_itm_.Mask__repeated) == Mwh_atr_itm_.Mask__repeated);
|
||||
Tfds.Eq_bool(key_exists, (atr_utl & Mwh_atr_itm_.Mask__key_exists) == Mwh_atr_itm_.Mask__key_exists);
|
||||
Tfds.Eq_bool(val_made, (atr_utl & Mwh_atr_itm_.Mask__val_made) == Mwh_atr_itm_.Mask__val_made);
|
||||
}
|
||||
}
|
||||
@@ -1,481 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.primitives.*;
|
||||
import gplx.xowa.parsers.xndes.*; // for brys: <nowiki>, <noinclude>, <includeonly>, <onlyinclude>
|
||||
public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATTRIBS_REGEX
|
||||
private static final byte Area__invalid = 0, Area__atr_limbo = 1, Area__key = 2, Area__eql_limbo = 3, Area__val_limbo = 4, Area__val_quote = 5, Area__val_naked = 6;
|
||||
private final Hash_adp_bry repeated_atrs_hash = Hash_adp_bry.ci_a7(); // ASCII:xnde_atrs
|
||||
private final Mwh_atr_mgr atr_mgr = new Mwh_atr_mgr(16);
|
||||
private final Bry_bfr key_bfr = Bry_bfr_.New(), val_bfr = Bry_bfr_.New();
|
||||
private byte area = Area__atr_limbo;
|
||||
private int atr_bgn = -1, key_bgn = -1, key_end = -1, eql_pos = -1, val_bgn = -1, val_end = -1;
|
||||
private byte qte_byte = Byte_ascii.Null;
|
||||
private boolean key_bfr_on = false, val_bfr_on = false, ws_is_before_val = false, qte_closed = false;
|
||||
private int nde_uid, nde_tid;
|
||||
public Bry_obj_ref Bry_obj() {return bry_ref;} private final Bry_obj_ref bry_ref = Bry_obj_ref.New_empty();
|
||||
public int Nde_end_tid() {return nde_end_tid;} private int nde_end_tid;
|
||||
public int Parse(Mwh_atr_wkr wkr, int nde_uid, int nde_tid, byte[] src, int src_bgn, int src_end) {
|
||||
this.nde_uid = nde_uid; this.nde_tid = nde_tid;
|
||||
this.nde_end_tid = Mwh_doc_parser.Nde_end_tid__invalid;
|
||||
this.atr_bgn = -1;
|
||||
area = Area__atr_limbo;
|
||||
boolean prv_is_ws = false;
|
||||
int pos = src_bgn;
|
||||
boolean loop = true;
|
||||
while (loop) {
|
||||
if (pos >= src_end) {
|
||||
switch (area) {
|
||||
case Area__key: // EX: "a"
|
||||
case Area__eql_limbo: // EX: "a "
|
||||
case Area__val_naked: // EX: "a=b"
|
||||
break; // valid atr
|
||||
case Area__val_quote: // EX: "a='b'"
|
||||
if (qte_closed)
|
||||
Make(src, src_end);
|
||||
else { // dangling; EX: "a='b c=d"
|
||||
int reset_pos = Bry_find_.Find_fwd(src, Byte_ascii.Space, val_bgn, src_end); // try to find 1st space within quote; EX:"a='b c=d" should try to reset at c=d
|
||||
boolean reset_found = reset_pos != Bry_find_.Not_found;
|
||||
area = Area__invalid; val_end = reset_found ? reset_pos : src_end;
|
||||
Make(src, val_end); // create invalid atr
|
||||
if (reset_found) { // space found; resume from text after space; EX: "a='b c=d"; PAGE:en.w:Aubervilliers DATE:2014-06-25
|
||||
pos = Bry_find_.Find_fwd_while_not_ws(src, reset_pos, src_end); // skip ws
|
||||
atr_bgn = -1;
|
||||
area = Area__atr_limbo;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Area__invalid: case Area__atr_limbo:
|
||||
case Area__val_limbo:
|
||||
area = Area__invalid;
|
||||
break;
|
||||
}
|
||||
if (atr_bgn != -1) {
|
||||
val_end = src_end;
|
||||
Make(src, val_end);
|
||||
}
|
||||
break;
|
||||
}
|
||||
byte b = src[pos];
|
||||
switch (area) {
|
||||
case Area__invalid:
|
||||
switch (b) {
|
||||
// ws -> end invalid area
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
Make(src, pos);
|
||||
area = Area__atr_limbo;
|
||||
break;
|
||||
// rest -> continue eating up invalid chars
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Area__atr_limbo: // 1st area after (a) node_name, (b) attribute, (c) invalid_area
|
||||
switch (b) {
|
||||
// ws -> ignore; skip any ws in atr_limbo; note that once a non-ws char is encountered, it will immediately go into another area
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
if (atr_bgn == -1) atr_bgn = pos; // NOTE: atr_bgn == -1 needed for multiple spaces; ALSO: cannot move above switch b/c of <nowiki>
|
||||
break;
|
||||
// attribFirst -> enter Area__key; REF.MW: $attribFirst = '[:A-Z_a-z0-9]';
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Colon: case Byte_ascii.Underline:
|
||||
area = Area__key;
|
||||
if (atr_bgn == -1) atr_bgn = pos; // NOTE: atr_bgn == -1 needed b/c of spaces
|
||||
key_bgn = pos;
|
||||
break;
|
||||
// angle_bgn -> check for <nowiki>
|
||||
case Byte_ascii.Angle_bgn: // handle "<nowiki>"
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found) {
|
||||
area = Area__invalid; if (atr_bgn == -1) atr_bgn = pos;
|
||||
}
|
||||
else
|
||||
pos = gt_pos; // position after ">"; note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
break;
|
||||
// rest -> invalid
|
||||
default: // quote and other non-valid key characters are invalid until next space; EX: "<span 'key_cannot_be_quoted' id='123'"
|
||||
area = Area__invalid; if (atr_bgn == -1) atr_bgn = pos;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Area__key:
|
||||
switch (b) {
|
||||
// alphanum -> valid key chars; REF.MW: $attrib = '[:A-Z_a-z-.0-9]';
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Colon: case Byte_ascii.Underline: case Byte_ascii.Dash: case Byte_ascii.Dot:
|
||||
if (key_bfr_on) key_bfr.Add_byte(b);
|
||||
break;
|
||||
// ws -> end key
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
area = Area__eql_limbo;
|
||||
key_end = pos;
|
||||
break;
|
||||
// eq -> end key; go to Area_val_limbo
|
||||
case Byte_ascii.Eq:
|
||||
area = Area__val_limbo;
|
||||
key_end = eql_pos = pos;
|
||||
break;
|
||||
// angle_bgn -> check for <nowiki>
|
||||
case Byte_ascii.Angle_bgn:
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found) // "<" should not be in key; EX: "ke<y"
|
||||
area = Area__invalid;
|
||||
else {
|
||||
if (!key_bfr_on) {key_bfr.Add_mid(src, key_bgn, pos); key_bfr_on = true;}
|
||||
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
}
|
||||
break;
|
||||
// rest -> enter invalid
|
||||
default:
|
||||
area = Area__invalid;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Area__eql_limbo:
|
||||
switch (b) {
|
||||
// ws -> skip
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space: // skip ws
|
||||
break;
|
||||
// eq -> enter Area__val_limbo
|
||||
case Byte_ascii.Eq:
|
||||
eql_pos = pos;
|
||||
area = Area__val_limbo;
|
||||
break;
|
||||
// attribFirst -> enter Area__key; REF.MW: $attribFirst = '[:A-Z_a-z0-9]';
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Colon: case Byte_ascii.Underline:
|
||||
Make(src, pos);
|
||||
area = Area__key;
|
||||
atr_bgn = key_bgn = pos;
|
||||
break;
|
||||
// rest -> make atr and enter limbo
|
||||
default:
|
||||
area = Area__invalid;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Area__val_limbo:
|
||||
switch (b) {
|
||||
// ws -> skip
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
ws_is_before_val = true;
|
||||
break;
|
||||
// quote -> enter Area_val_quote
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos:
|
||||
area = Area__val_quote; qte_byte = b; qte_closed = false;
|
||||
prv_is_ws = false;
|
||||
val_bgn = pos + 1;
|
||||
break;
|
||||
// alphanum -> enter Area_val_raw; REF.MW: [a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Bang: case Byte_ascii.Hash: case Byte_ascii.Dollar: case Byte_ascii.Percent: case Byte_ascii.Amp:
|
||||
case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star: case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot:
|
||||
case Byte_ascii.Backslash: case Byte_ascii.Slash: case Byte_ascii.Colon: case Byte_ascii.Semic:
|
||||
case Byte_ascii.Question: case Byte_ascii.At:
|
||||
case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end: case Byte_ascii.Pow: case Byte_ascii.Underline: case Byte_ascii.Tick:
|
||||
case Byte_ascii.Curly_bgn: case Byte_ascii.Curly_end: case Byte_ascii.Pipe: case Byte_ascii.Tilde:
|
||||
area = Area__val_naked;
|
||||
val_bgn = pos;
|
||||
break;
|
||||
// case Byte_ascii.Angle_end: NOTE: valid in MW; making invalid now until finding counter-example
|
||||
// angle_bgn -> check for <nowiki>
|
||||
case Byte_ascii.Angle_bgn:
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found)
|
||||
area = Area__invalid; // NOTE: valid in MW; making invalid now until finding counter-example
|
||||
else
|
||||
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
break;
|
||||
// rest -> ignore
|
||||
default:
|
||||
area = Area__invalid;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Area__val_quote: { // EX: "'val' " in "key = 'val'"; REF.MW: \"([^<\"]*)\"
|
||||
switch (b) {
|
||||
// quote: check if same as opening quote
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos:
|
||||
if (qte_closed)
|
||||
area = Area__invalid;
|
||||
else {
|
||||
if (qte_byte == b) { // quote closes val
|
||||
qte_closed = true;
|
||||
val_end = pos;
|
||||
}
|
||||
else { // quote is just char; EX: title="1 o'clock" or title='The "C" way'
|
||||
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
|
||||
}
|
||||
}
|
||||
break;
|
||||
// ws -> convert all ws to \s; only allow 1 ws at any point in time
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space: // REF.MW:Sanitizer.php|decodeTagAttributes $value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
|
||||
if (qte_closed) {
|
||||
Make(src, pos); // NOTE: set atr_end *after* quote
|
||||
if (atr_bgn == -1) atr_bgn = pos; // NOTE: process ws just like Area__atr_limbo
|
||||
}
|
||||
else {
|
||||
if (!val_bfr_on) {val_bfr.Add_mid(src, val_bgn, pos); val_bfr_on = true;} // INLINE: val_bfr.init
|
||||
if (prv_is_ws) {} // noop; only allow one ws at a time; EX: "a b" -> "a b"; "a\n\nb" -> "a b"
|
||||
else {
|
||||
prv_is_ws = true; val_bfr.Add_byte(Byte_ascii.Space);
|
||||
}
|
||||
}
|
||||
break;
|
||||
// angle_bgn -> check for <nowiki>; EX: <span title='ab<nowiki>c</nowiki>de'>
|
||||
case Byte_ascii.Angle_bgn:
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found) {
|
||||
// area = Area__invalid; // "<" inside quote is invalid; EX: <span title='a<b'>c</span>
|
||||
if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
|
||||
}
|
||||
else {
|
||||
if (qte_closed) {}
|
||||
else {
|
||||
if (!val_bfr_on) {val_bfr.Add_mid(src, val_bgn, pos); val_bfr_on = true;} // INLINE: val_bfr.init
|
||||
}
|
||||
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
}
|
||||
prv_is_ws = false;
|
||||
break;
|
||||
// rest -> add to val
|
||||
default:
|
||||
if (qte_closed)
|
||||
area = Area__invalid;
|
||||
else {
|
||||
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Area__val_naked: // no quotes; EX:a=bcd; REF.MW:([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
|
||||
switch (b) {
|
||||
// alphanum -> continue reading
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Bang: case Byte_ascii.Hash: case Byte_ascii.Dollar: case Byte_ascii.Percent: case Byte_ascii.Amp:
|
||||
case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star: case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot:
|
||||
case Byte_ascii.Backslash: case Byte_ascii.Slash: case Byte_ascii.Colon: case Byte_ascii.Semic:
|
||||
case Byte_ascii.Question: case Byte_ascii.At:
|
||||
case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end: case Byte_ascii.Pow: case Byte_ascii.Underline: case Byte_ascii.Tick:
|
||||
case Byte_ascii.Curly_bgn: case Byte_ascii.Curly_end: case Byte_ascii.Pipe: case Byte_ascii.Tilde:
|
||||
if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
|
||||
break;
|
||||
// case Byte_ascii.Angle_end: NOTE: valid in MW; making invalid now until finding counter-example
|
||||
// angle_bgn -> check for <nowiki>; EX: a=b<nowiki>c</nowiki>d
|
||||
case Byte_ascii.Angle_bgn:
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found) {
|
||||
area = Area__invalid; // NOTE: valid in MW; making invalid now until finding counter-example
|
||||
}
|
||||
else {
|
||||
if (!val_bfr_on) {val_bfr.Add_mid(src, val_bgn, pos); val_bfr_on = true;} // INLINE: val_bfr.init
|
||||
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
}
|
||||
break;
|
||||
// ws -> src_end atr
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
val_end = pos;
|
||||
Make(src, pos);
|
||||
break;
|
||||
case Byte_ascii.Eq: // EX:"a= b=c" or "a=b=c"; PAGE:en.w:2013_in_American_television
|
||||
if (ws_is_before_val) { // "a= b=c"; discard 1st and resume at 2nd
|
||||
int old_val_bgn = val_bgn;
|
||||
area = Area__invalid; Make(src, val_bgn); // invalidate cur atr; EX:"a="
|
||||
atr_bgn = key_bgn = old_val_bgn; // reset atr / key to new atr; EX: "b"
|
||||
key_end = pos;
|
||||
area = Area__val_limbo; // set area to val_bgn (basically, put after =)
|
||||
}
|
||||
else // "a=b=c"; discard all
|
||||
area = Area__invalid;
|
||||
break;
|
||||
default:
|
||||
area = Area__invalid;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
|
||||
// iterate atrs and notify
|
||||
int len = atr_mgr.Len();
|
||||
int[] data_ary = atr_mgr.Data_ary();
|
||||
byte[][] text_ary = atr_mgr.Text_ary();
|
||||
for (int j = 0; j < len; ++j) {
|
||||
int itm_idx = j * Mwh_atr_mgr.Idx__mult;
|
||||
byte[] key_bry = text_ary[j * Mwh_atr_mgr.Text__mult];
|
||||
byte[] val_bry_manual = null;
|
||||
int atr_utl = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_utl];
|
||||
boolean atr_valid = (atr_utl & Mwh_atr_itm_.Mask__valid) == Mwh_atr_itm_.Mask__valid;
|
||||
boolean repeated = (atr_utl & Mwh_atr_itm_.Mask__repeated) == Mwh_atr_itm_.Mask__repeated;
|
||||
boolean key_exists = (atr_utl & Mwh_atr_itm_.Mask__key_exists) == Mwh_atr_itm_.Mask__key_exists;
|
||||
boolean val_made = (atr_utl & Mwh_atr_itm_.Mask__val_made) == Mwh_atr_itm_.Mask__val_made;
|
||||
if (val_made)
|
||||
val_bry_manual = text_ary[(j * Mwh_atr_mgr.Text__mult) + 1];
|
||||
wkr.On_atr_each(this, src, nde_tid, atr_valid, repeated, key_exists, key_bry, val_bry_manual, data_ary, itm_idx);
|
||||
}
|
||||
atr_mgr.Clear();
|
||||
repeated_atrs_hash.Clear();
|
||||
|
||||
return pos;
|
||||
}
|
||||
private void Make(byte[] src, int atr_end) {
|
||||
// calc final values for atr
|
||||
boolean key_exists = false;
|
||||
byte[] key_bry = null, val_bry = null;
|
||||
boolean atr_valid = true;
|
||||
if (area == Area__invalid) {
|
||||
atr_valid = false;
|
||||
key_bry = Bry_.Empty;
|
||||
key_bfr.Clear();
|
||||
if (val_bgn == -1) val_bgn = atr_bgn;
|
||||
val_bfr.Clear();
|
||||
}
|
||||
else {
|
||||
if (key_bgn != -1 && val_bgn != -1) // key && val exists; EX: "<input id='123'>"
|
||||
key_exists = true;
|
||||
else { // not a pair; EX: "<input checked>"
|
||||
if (key_end == -1) key_end = val_end; // NOTE: key_end == -1 when eos; EX: "a" would have key_bgn = 0; key_end = -1; val_end = 1 DATE:2014-07-03
|
||||
val_bgn = val_end = -1;
|
||||
}
|
||||
key_bry = key_bfr_on ? key_bfr.To_bry_and_clear() : Bry_.Mid(src, key_bgn, key_end); // always make key_bry; needed for repeated_atrs as well as key_tid
|
||||
if (val_bfr_on) val_bry = val_bfr.To_bry_and_clear();
|
||||
}
|
||||
int qte_tid = Mwh_atr_itm_.Mask__qte__none;
|
||||
if (qte_byte != Byte_ascii.Null)
|
||||
qte_tid = qte_byte == Byte_ascii.Quote ? Mwh_atr_itm_.Mask__qte_qute : Mwh_atr_itm_.Mask__qte__apos;
|
||||
int atr_uid = atr_mgr.Add(nde_uid, nde_tid, atr_valid, false, key_exists, atr_bgn, atr_end, key_bgn, key_end, key_bry, eql_pos, qte_tid, val_bgn, val_end, val_bry);
|
||||
|
||||
// handle repeated atrs
|
||||
if (atr_valid) {
|
||||
int repeated_uid = repeated_atrs_hash.Get_as_int_or(key_bry, -1);
|
||||
if (repeated_uid != -1) {
|
||||
repeated_atrs_hash.Del(key_bry);
|
||||
atr_mgr.Set_repeated(repeated_uid);
|
||||
}
|
||||
repeated_atrs_hash.Add_bry_int(key_bry, atr_uid);
|
||||
}
|
||||
|
||||
// reset temp variables
|
||||
area = Area__atr_limbo; qte_byte = Byte_ascii.Null;
|
||||
atr_bgn = key_bgn = val_bgn = key_end = val_end = eql_pos = -1;
|
||||
key_bfr_on = val_bfr_on = ws_is_before_val = qte_closed = false;
|
||||
}
|
||||
public int Xnde_find_gt_find(byte[] src, int pos, int end) {
|
||||
bry_ref.Val_(Bry_.Empty);
|
||||
byte b = src[pos];
|
||||
if (b == Byte_ascii.Slash && pos + 1 < end) { // if </ move pos to after /
|
||||
++pos;
|
||||
b = src[pos];
|
||||
}
|
||||
int gt_pos = Bry_find_.Find_fwd(src, Byte_ascii.Gt, pos, end); if (gt_pos == Bry_find_.Not_found) return Bry_find_.Not_found;
|
||||
byte[] bry = (byte[])xnde_hash.Get_by_mid(src, pos, gt_pos); if (bry == null) return Bry_find_.Not_found;
|
||||
bry_ref.Val_(bry);
|
||||
return bry.length + pos;
|
||||
}
|
||||
private int Xnde_find_gt(byte[] src, int lt_pos, int end) {
|
||||
int pos = lt_pos + 1; if (pos == end) return Bry_find_.Not_found;
|
||||
byte b = src[pos];
|
||||
if (b == Byte_ascii.Slash && pos + 1 < end) {
|
||||
++pos;
|
||||
b = src[pos];
|
||||
}
|
||||
int match_pos = Xnde_find_gt_find(src, pos, end);
|
||||
if (match_pos == Bry_find_.Not_found) {return Bry_find_.Not_found;}
|
||||
boolean slash_found = false;
|
||||
for (int i = match_pos; i < end; i++) {
|
||||
b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Gt: return i;
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip any ws
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
if (slash_found) {return Bry_find_.Not_found;} // only allow one slash
|
||||
else slash_found = true;
|
||||
break;
|
||||
default:
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
}
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
private static final Hash_adp_bry xnde_hash = Hash_adp_bry.ci_a7()
|
||||
.Add_bry_bry(Xop_xnde_tag_.Tag__nowiki.Name_bry())
|
||||
.Add_bry_bry(Xop_xnde_tag_.Tag__noinclude.Name_bry())
|
||||
.Add_bry_bry(Xop_xnde_tag_.Tag__includeonly.Name_bry())
|
||||
.Add_bry_bry(Xop_xnde_tag_.Tag__onlyinclude.Name_bry())
|
||||
;
|
||||
public static final int Key_tid__unknown = -1;
|
||||
}
|
||||
@@ -1,76 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Mwh_atr_parser_fxt {
|
||||
private final Bry_bfr expd_bfr = Bry_bfr_.New(), actl_bfr = Bry_bfr_.New();
|
||||
private final Mwh_atr_parser parser = new Mwh_atr_parser();
|
||||
private final Mwh_doc_wkr__atr_bldr wkr = new Mwh_doc_wkr__atr_bldr();
|
||||
public Mwh_atr_itm Make_pair(String key, String val) {return new Mwh_atr_itm(Bry_.Empty, Bool_.Y, Bool_.N, Bool_.Y, -1, -1, -1, -1, Bry_.new_u8(key) , -1, -1, Bry_.new_u8(val) , -1, -1);}
|
||||
public Mwh_atr_itm Make_name(String key) {return new Mwh_atr_itm(Bry_.Empty, Bool_.Y, Bool_.N, Bool_.N, -1, -1, -1, -1, Bry_.new_u8(key) , -1, -1, Bry_.new_u8(key) , -1, -1);}
|
||||
public Mwh_atr_itm Make_fail(int bgn, int end) {return new Mwh_atr_itm(Bry_.Empty, Bool_.N, Bool_.N, Bool_.N, bgn, end, -1, -1, null , -1, -1, null , -1, -1);}
|
||||
public void Test_val_as_int(String raw, int expd) {
|
||||
byte[] src = Bry_.new_u8(raw);
|
||||
Mwh_atr_itm itm = new Mwh_atr_itm(src, true, false, false, 0, src.length, -1, -1, null, 0, src.length, src, -1, -1);
|
||||
Tfds.Eq_int(expd, itm.Val_as_int_or(-1));
|
||||
}
|
||||
public void Test_parse(String raw, Mwh_atr_itm... expd) {
|
||||
Mwh_atr_itm[] actl = Exec_parse(raw);
|
||||
Test_print(expd, actl);
|
||||
}
|
||||
private Mwh_atr_itm[] Exec_parse(String raw) {
|
||||
byte[] bry = Bry_.new_u8(raw);
|
||||
parser.Parse(wkr, -1, -1, bry, 0, bry.length);
|
||||
return wkr.To_atr_ary();
|
||||
}
|
||||
public void Test_print(Mwh_atr_itm[] expd_ary, Mwh_atr_itm[] actl_ary) {
|
||||
int expd_len = expd_ary.length;
|
||||
int actl_len = actl_ary.length;
|
||||
int len = expd_len > actl_len ? expd_len : actl_len;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
To_bfr(expd_bfr, i < expd_len ? expd_ary[i] : null, actl_bfr, i < actl_len ? actl_ary[i] : null);
|
||||
}
|
||||
Tfds.Eq_str_lines(expd_bfr.To_str_and_clear(), actl_bfr.To_str_and_clear());
|
||||
}
|
||||
private void To_bfr(Bry_bfr expd_bfr, Mwh_atr_itm expd_itm, Bry_bfr actl_bfr, Mwh_atr_itm actl_itm) {
|
||||
To_bfr__main(expd_bfr, expd_itm);
|
||||
To_bfr__main(actl_bfr, actl_itm);
|
||||
To_bfr__head(expd_bfr, expd_itm);
|
||||
To_bfr__head(actl_bfr, actl_itm);
|
||||
if (expd_itm != null && expd_itm.Atr_bgn() != -1) {
|
||||
To_bfr__atr_rng(expd_bfr, expd_itm);
|
||||
To_bfr__atr_rng(actl_bfr, actl_itm);
|
||||
}
|
||||
}
|
||||
private void To_bfr__head(Bry_bfr bfr, Mwh_atr_itm itm) {
|
||||
if (itm == null) return;
|
||||
bfr.Add_str_a7("head:").Add_yn(itm.Valid()).Add_byte_semic().Add_yn(itm.Repeated()).Add_byte_semic().Add_yn(itm.Key_exists()).Add_byte_nl();
|
||||
}
|
||||
private void To_bfr__main(Bry_bfr bfr, Mwh_atr_itm itm) {
|
||||
if (itm == null) return;
|
||||
if (itm.Valid()) {
|
||||
bfr.Add_str_a7("key:").Add(itm.Key_bry()).Add_byte_nl();
|
||||
bfr.Add_str_a7("val:").Add(itm.Val_as_bry()).Add_byte_nl();
|
||||
}
|
||||
// else
|
||||
// To_bfr__atr_rng(bfr, itm);
|
||||
}
|
||||
private void To_bfr__atr_rng(Bry_bfr bfr, Mwh_atr_itm itm) {
|
||||
if (itm == null) return;
|
||||
bfr.Add_str_a7("rng:").Add_int_variable(itm.Atr_bgn()).Add_byte_semic().Add_int_variable(itm.Atr_end()).Add_byte_nl();
|
||||
}
|
||||
}
|
||||
@@ -1,78 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Mwh_atr_parser_tst {
|
||||
private final Mwh_atr_parser_fxt fxt = new Mwh_atr_parser_fxt();
|
||||
@Test public void Pair__quote__double() {fxt.Test_parse("a=\"b\"" , fxt.Make_pair("a" , "b"));}
|
||||
@Test public void Pair__quote__single() {fxt.Test_parse("a='b'" , fxt.Make_pair("a" , "b"));}
|
||||
@Test public void Pair__quote__none() {fxt.Test_parse("a=b" , fxt.Make_pair("a" , "b"));}
|
||||
@Test public void Pair__quote__none__amp() {fxt.Test_parse("a=&bc" , fxt.Make_pair("a" , "&bc"));}
|
||||
@Test public void Pair__empty() {fxt.Test_parse("a=''" , fxt.Make_pair("a" , ""));}
|
||||
@Test public void Pair__key_w_underline() {fxt.Test_parse("a_b=c" , fxt.Make_pair("a_b" , "c"));}
|
||||
|
||||
@Test public void Name__quote__none() {fxt.Test_parse("b" , fxt.Make_name("b"));}
|
||||
@Test public void Name__ws() {fxt.Test_parse(" b " , fxt.Make_name("b"));} // PURPOSE:discovered while writing test for ref's "lower-alpha" DATE:2014-07-03
|
||||
@Test public void Name__mult() {fxt.Test_parse("a b1 c" , fxt.Make_name("a"), fxt.Make_name("b1"), fxt.Make_name("c"));}
|
||||
|
||||
@Test public void Fail__key_w_plus() {fxt.Test_parse("a+b" , fxt.Make_fail(0, 3));}
|
||||
@Test public void Fail__key_w_plus__many() {fxt.Test_parse("a+b c=d" , fxt.Make_fail(0, 3) , fxt.Make_pair("c", "d"));}
|
||||
@Test public void Fail__val_w_plus() {fxt.Test_parse("a=b+c" , fxt.Make_fail(0, 5));}
|
||||
@Test public void Fail__recover() {fxt.Test_parse("* a=b" , fxt.Make_fail(0, 1) , fxt.Make_pair("a", "b"));} // PURPOSE: * is invalid, but should not stop parsing of a=b
|
||||
@Test public void Fail__incomplete() {fxt.Test_parse("a= c=d" , fxt.Make_fail(0, 3) , fxt.Make_pair("c", "d"));} // PURPOSE: discard xatr if incomplete and followed by valid atr; PAGE:en.w:2013_in_American_television DATE:2014-09-25
|
||||
@Test public void Fail__incomplete_2() {fxt.Test_parse("a=c=d" , fxt.Make_fail(0, 5));} // PURPOSE: variation of above; per MW regex, missing space invalidates entire attribute; DATE:2014-09-25
|
||||
@Test public void Fail__incomplete_pair() {fxt.Test_parse("a= b=" , fxt.Make_fail(0, 3) , fxt.Make_fail(3, 5));} // PURPOSE: "b=" should be invalid not a kv of "b" = "b"; PAGE:en.s:Notes_by_the_Way/Chapter_2; DATE:2015-01-31
|
||||
|
||||
@Test public void Dangling_eos() {fxt.Test_parse("a='b' c='d" , fxt.Make_pair("a", "b") , fxt.Make_fail(5, 10));} // PURPOSE: handle dangling quote at eos; PAGE:en.w:Aubervilliers DATE:2014-06-25
|
||||
@Test public void Dangling_bos() {fxt.Test_parse("a='b c=d" , fxt.Make_fail(0, 4) , fxt.Make_pair("c", "d"));}// PURPOSE: handle dangling quote at bos; resume at next valid atr; PAGE:en.w:Aubervilliers DATE:2014-06-25
|
||||
|
||||
@Test public void Ws__ini() {fxt.Test_parse(" a='b'" , fxt.Make_pair("a", "b").Atr_rng(0, 6));}
|
||||
@Test public void Ws__end() {fxt.Test_parse(" a='b' c='d'" , fxt.Make_pair("a", "b").Atr_rng(0, 6), fxt.Make_pair("c", "d").Atr_rng(6, 12));}
|
||||
@Test public void Ws() {fxt.Test_parse("a = 'b'" , fxt.Make_pair("a", "b"));} // PURPOSE: fix wherein multiple space was causing "a=a"; PAGE:fr.s:La_Sculpture_dans_les_cimetières_de_Paris/Père-Lachaise; DATE:2014-01-18
|
||||
|
||||
@Test public void Many__quote__apos() {fxt.Test_parse("a='b' c='d' e='f'" , fxt.Make_pair("a", "b"), fxt.Make_pair("c", "d"), fxt.Make_pair("e", "f"));}
|
||||
@Test public void Many__naked() {fxt.Test_parse("a=b c=d e=f" , fxt.Make_pair("a", "b"), fxt.Make_pair("c", "d"), fxt.Make_pair("e", "f"));}
|
||||
@Test public void Many__naked__pair() {fxt.Test_parse("a b=c" , fxt.Make_name("a"), fxt.Make_pair("b", "c"));}
|
||||
|
||||
@Test public void Quote__ws__nl() {fxt.Test_parse("a='b\nc'" , fxt.Make_pair("a", "b c"));}
|
||||
@Test public void Quote__ws__mult() {fxt.Test_parse("a='b c'" , fxt.Make_pair("a", "b c"));}
|
||||
@Test public void Quote__ws__mult_mult() {fxt.Test_parse("a='b c d'" , fxt.Make_pair("a", "b c d"));} // PURPOSE: fix wherein 1st-gobble gobbled rest of spaces (was b cd)
|
||||
@Test public void Quote__apos() {fxt.Test_parse("a=\"b c'd\"" , fxt.Make_pair("a", "b c'd"));} // PURPOSE: fix wherein apos was gobbled up; PAGE:en.s:Alice's_Adventures_in_Wonderland; DATE:2013-11-22
|
||||
@Test public void Quote__apos_2() {fxt.Test_parse("a=\"b'c d\"" , fxt.Make_pair("a", "b'c d"));} // PURPOSE: fix wherein apos was causing "'b'c d"; PAGE:en.s:Grimm's_Household_Tales,_Volume_1; DATE:2013-12-22
|
||||
// @Test public void Quote__angle() {fxt.Test_parse("a='<'" , fxt.Make_fail(0, 5));} // PURPOSE: "<" inside quotes is always invalid
|
||||
@Test public void Quote__invalid() {fxt.Test_parse("a='b'c" , fxt.Make_fail(0, 6));}
|
||||
|
||||
@Test public void Nowiki__atr() {fxt.Test_parse("<nowiki>a=b</nowiki>" , fxt.Make_pair("a", "b").Atr_rng(8, 20));}
|
||||
@Test public void Nowiki__key() {fxt.Test_parse("a<nowiki>b</nowiki>c=d" , fxt.Make_pair("abc", "d").Atr_rng(0, 22));}
|
||||
@Test public void Nowiki__eql() {fxt.Test_parse("a<nowiki>=</nowiki>\"b\"" , fxt.Make_pair("a", "b").Atr_rng(0, 22));} // EX:fr.w:{{Portail|Transpédia|Californie}}
|
||||
@Test public void Nowiki__val__naked() {fxt.Test_parse("a=b<nowiki>c</nowiki>d" , fxt.Make_pair("a", "bcd").Atr_rng(0, 22));}
|
||||
@Test public void Nowiki__val__quote() {fxt.Test_parse("a=<nowiki>'b'</nowiki>" , fxt.Make_pair("a", "b").Atr_rng(0, 22));}
|
||||
@Test public void Nowiki__val__quote_2() {fxt.Test_parse("a=\"b<nowiki>c</nowiki>d<nowiki>e</nowiki>f\"", fxt.Make_pair("a", "bcdef"));}
|
||||
|
||||
@Test public void Val__as_int() {fxt.Test_val_as_int("-123" , -123);}
|
||||
|
||||
// @Test public void Embedded() { // PURPOSE: handle html inside attrib; PAGE:en.w:Economy_of_Greece DATE:2015-10-15
|
||||
// fxt.Test_parse("title='<sup id='cite_ref-a_1-0' class='reference'><a href='#cite_note-a-1'>[1]</a></sup> c'"
|
||||
// , fxt.Make_fail(0, 11) // "title='<sup" invalid b/c of "<"
|
||||
// , fxt.Make_pair("id", "cite_ref-a_1-0")
|
||||
// , fxt.Make_fail(31, 52) // "class='reference'><a" invalid b/c no ws after '
|
||||
// , fxt.Make_fail(53, 88) // "href='#cite_note-a-1'>[1]</a></sup>" invalid b/c no ws after '
|
||||
// , fxt.Make_fail(89, 91) // " c'" invalid b/c name (c) cannot have apos
|
||||
// );
|
||||
// }
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public interface Mwh_atr_wkr {
|
||||
void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx);
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Mwh_doc_itm {
|
||||
public Mwh_doc_itm(int itm_tid, int nde_tid, byte[] itm_bry) {this.itm_tid = itm_tid; this.itm_bry = itm_bry; this.nde_tid = nde_tid;}
|
||||
public int Itm_tid() {return itm_tid;} private final int itm_tid;
|
||||
public byte[] Itm_bry() {return itm_bry;} private final byte[] itm_bry;
|
||||
public int Nde_tid() {return nde_tid;} private final int nde_tid;
|
||||
public static final int Itm_tid__txt = 0, Itm_tid__nde_head = 1, Itm_tid__nde_tail = 2, Itm_tid__comment = 3, Itm_tid__entity = 4;
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Mwh_doc_mgr {
|
||||
private final int data_max_orig;
|
||||
public Mwh_doc_mgr(int max) {
|
||||
this.data_max_orig = max * Idx__mult;
|
||||
this.Max_(max);
|
||||
}
|
||||
public int Len() {return itm_len;} private int itm_len;
|
||||
public int[] Data_ary() {return data_ary;} private int[] data_ary; private int data_max;
|
||||
private void Max_(int len) {
|
||||
this.data_max = len * Idx__mult;
|
||||
this.data_ary = new int[data_max];
|
||||
this.itm_len = 0;
|
||||
}
|
||||
public void Clear() {
|
||||
if (data_max == data_max_orig)
|
||||
itm_len = 0;
|
||||
else
|
||||
Max_(data_max_orig / Idx__mult);
|
||||
}
|
||||
public int Add(int dom_tid, int src_bgn, int src_end) {
|
||||
int data_idx = itm_len * Idx__mult;
|
||||
if (data_idx == data_max) {
|
||||
int new_data_max = data_max == 0 ? Idx__mult : data_max * 2;
|
||||
int[] new_data_ary = new int[new_data_max];
|
||||
Int_.Ary_copy_to(data_ary, data_max, data_ary);
|
||||
this.data_ary = new_data_ary;
|
||||
this.data_max = new_data_max;
|
||||
}
|
||||
int dom_uid = itm_len;
|
||||
data_ary[data_idx + Idx_dom_uid] = dom_uid;
|
||||
data_ary[data_idx + Idx_dom_tid] = dom_tid;
|
||||
data_ary[data_idx + Idx_src_bgn] = src_bgn;
|
||||
data_ary[data_idx + Idx_src_end] = src_end;
|
||||
++itm_len;
|
||||
return dom_uid;
|
||||
}
|
||||
public static final int
|
||||
Idx_dom_uid = 0
|
||||
, Idx_dom_tid = 1
|
||||
, Idx_src_bgn = 2
|
||||
, Idx_src_end = 3
|
||||
, Idx__mult = 4
|
||||
;
|
||||
}
|
||||
@@ -1,245 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.primitives.*;
|
||||
import gplx.xowa.parsers.amps.*; import gplx.xowa.parsers.xndes.*;
|
||||
public class Mwh_doc_parser {
|
||||
private final Mwh_doc_mgr dom_mgr = new Mwh_doc_mgr(16);
|
||||
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
||||
private final List_adp nde_stack = List_adp_.New();
|
||||
private final Xop_amp_mgr amp_mgr = Xop_amp_mgr.Instance; private final Xop_tkn_mkr tkn_mkr = new Xop_tkn_mkr();
|
||||
private byte[] src; private int src_end;
|
||||
private Mwh_doc_wkr wkr;
|
||||
private Hash_adp_bry nde_regy;
|
||||
private int txt_bgn, nde_uid;
|
||||
private Xop_xnde_tag cur_nde; private int cur_nde_tid;
|
||||
public void Parse(Mwh_doc_wkr wkr, byte[] src, int src_bgn, int src_end) {
|
||||
this.wkr = wkr; this.src = src; this.src_end = src_end;
|
||||
this.nde_regy = wkr.Nde_regy();
|
||||
nde_stack.Clear();
|
||||
int pos = txt_bgn = src_bgn;
|
||||
nde_uid = cur_nde_tid = -1;
|
||||
cur_nde = null;
|
||||
|
||||
while (pos < src_end) {
|
||||
byte b = src[pos];
|
||||
switch (b) {
|
||||
case Byte_ascii.Angle_bgn: // "<": possible nde start
|
||||
pos = Parse_nde(pos);
|
||||
break;
|
||||
case Byte_ascii.Amp: // "&": check for entity; EX: in sr-ec -> sr-el
|
||||
Xop_amp_mgr_rslt rv = amp_mgr.Parse_tkn(tkn_mkr, src, src_end, pos, pos + 1);
|
||||
Xop_tkn_itm rv_tkn = rv.Tkn();
|
||||
if (rv_tkn == null)
|
||||
++pos;
|
||||
else {
|
||||
wkr.On_txt_end(this, src, cur_nde_tid, txt_bgn, pos);
|
||||
wkr.On_entity_end(this, src, cur_nde_tid, rv_tkn.Src_bgn(), rv_tkn.Src_end());
|
||||
pos = rv_tkn.Src_end();
|
||||
txt_bgn = pos;
|
||||
}
|
||||
break;
|
||||
default: // else, just increment
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (src_end != txt_bgn) wkr.On_txt_end(this, src, cur_nde_tid, txt_bgn, pos);
|
||||
}
|
||||
private int Parse_nde(int pos) {
|
||||
int nde_end_tid = Nde_end_tid__invalid;
|
||||
boolean nde_is_head = true;
|
||||
int nde_bgn = pos;
|
||||
++pos;
|
||||
int name_bgn = pos;
|
||||
int name_end = pos;
|
||||
while (pos < src_end) {
|
||||
byte b = src[pos];
|
||||
switch (b) {
|
||||
// valid chars for name
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Dot: case Byte_ascii.Dash: case Byte_ascii.Underline: case Byte_ascii.Colon: // XML allowed punctuation
|
||||
case Byte_ascii.Dollar:// MW: handles <br$2>;
|
||||
++pos;
|
||||
break;
|
||||
// comment check
|
||||
case Byte_ascii.Bang:
|
||||
boolean comment_found = false;
|
||||
if (name_bgn == pos && Bry_.Eq(src, pos + 1, pos + 3, Comment_bgn)) {
|
||||
int comment_end_pos = Bry_find_.Find_fwd(src, Comment_end, pos + 3);
|
||||
if (comment_end_pos != Bry_find_.Not_found) {
|
||||
nde_end_tid = Nde_end_tid__comment;
|
||||
pos = comment_end_pos + 3;
|
||||
comment_found = true;
|
||||
}
|
||||
}
|
||||
if (!comment_found)
|
||||
return pos;
|
||||
else
|
||||
break;
|
||||
// invalid char; not a node; treat as text; EX: "<!@#", "< /b>"
|
||||
default:
|
||||
return pos;
|
||||
// slash -> either "</b>" or "<b/>"
|
||||
case Byte_ascii.Slash:
|
||||
if (name_bgn == pos) { // "</"; EX: "</b>"
|
||||
nde_is_head = false;
|
||||
++name_bgn;
|
||||
++pos;
|
||||
continue;
|
||||
}
|
||||
else { // check for "/>"; NOTE: <pre/a>, <pre//> are allowed
|
||||
name_end = pos;
|
||||
++pos;
|
||||
if (pos == src_end) return pos; // end of doc; treat as text; EX: "<b/EOS"
|
||||
if (src[pos] == Byte_ascii.Gt) {
|
||||
nde_end_tid = Nde_end_tid__inline;
|
||||
++pos;
|
||||
}
|
||||
else
|
||||
nde_end_tid = Nde_end_tid__slash;
|
||||
}
|
||||
break;
|
||||
// stops "name"
|
||||
case Byte_ascii.Gt:
|
||||
nde_end_tid = Nde_end_tid__gt;
|
||||
name_end = pos;
|
||||
++pos;
|
||||
break;
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
nde_end_tid = Nde_end_tid__ws;
|
||||
name_end = pos;
|
||||
break;
|
||||
case Byte_ascii.Backslash: // MW: allows "<br\>" -> "<br/>"
|
||||
nde_end_tid = Nde_end_tid__backslash;
|
||||
name_end = pos;
|
||||
break;
|
||||
}
|
||||
if (nde_end_tid != Nde_end_tid__invalid) break;
|
||||
}
|
||||
// get name
|
||||
Xop_xnde_tag nde_itm = null;
|
||||
if (nde_end_tid != Nde_end_tid__comment) {
|
||||
nde_itm = (Xop_xnde_tag)nde_regy.Get_by_mid(src, name_bgn, name_end);
|
||||
if (nde_itm == null) return pos; // not a known nde; exit
|
||||
}
|
||||
if (txt_bgn != nde_bgn) { // notify txt
|
||||
wkr.On_txt_end(this, src, cur_nde_tid, txt_bgn, nde_bgn);
|
||||
txt_bgn = pos;
|
||||
}
|
||||
if (nde_is_head) {
|
||||
wkr.On_nde_head_bgn(this, src, cur_nde_tid, name_bgn, name_end);
|
||||
switch (nde_end_tid) {
|
||||
case Nde_end_tid__comment:
|
||||
wkr.On_comment_end(this, src, cur_nde_tid, nde_bgn, pos);
|
||||
break;
|
||||
case Nde_end_tid__ws:
|
||||
case Nde_end_tid__slash:
|
||||
case Nde_end_tid__backslash:
|
||||
// look for ">" or "/>"
|
||||
int tmp_pos = pos, atrs_end = src_end, head_end = src_end;
|
||||
boolean loop = true;
|
||||
while (loop) {
|
||||
byte b = src[tmp_pos];
|
||||
switch (b) {
|
||||
// angle_end -> stop iterating
|
||||
case Byte_ascii.Angle_end:
|
||||
atrs_end = tmp_pos;
|
||||
head_end = tmp_pos + 1;
|
||||
nde_end_tid = Mwh_doc_parser.Nde_end_tid__gt;
|
||||
loop = false;
|
||||
break;
|
||||
// slash -> check for "/>" or " / "
|
||||
case Byte_ascii.Slash:
|
||||
int nxt_pos = tmp_pos + 1;
|
||||
if (nxt_pos == src_end) {
|
||||
nde_end_tid = Mwh_doc_parser.Nde_end_tid__invalid;
|
||||
loop = false;
|
||||
}
|
||||
else if (src[nxt_pos] == Byte_ascii.Angle_end) {
|
||||
atrs_end = tmp_pos;
|
||||
head_end = tmp_pos + 2;
|
||||
nde_end_tid = Mwh_doc_parser.Nde_end_tid__inline;
|
||||
loop = false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (loop) {
|
||||
++tmp_pos;
|
||||
if (tmp_pos == src_end) break;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
atr_parser.Parse(wkr, nde_uid, cur_nde_tid, src, pos, atrs_end);
|
||||
pos = head_end;
|
||||
txt_bgn = head_end;
|
||||
break;
|
||||
}
|
||||
switch (nde_end_tid) {
|
||||
case Nde_end_tid__inline:
|
||||
wkr.On_nde_head_end(this, src, cur_nde_tid, nde_bgn, pos, Bool_.Y);
|
||||
txt_bgn = pos;
|
||||
break;
|
||||
case Nde_end_tid__gt:
|
||||
wkr.On_nde_head_end(this, src, cur_nde_tid, nde_bgn, pos, Bool_.N);
|
||||
txt_bgn = pos;
|
||||
if ( nde_itm != null
|
||||
&& !nde_itm.Single_only_html() // ignore <b>
|
||||
&& (cur_nde == null || !cur_nde.Xtn()) // <pre> ignores inner
|
||||
) {
|
||||
if (cur_nde != null)
|
||||
nde_stack.Add(cur_nde);
|
||||
this.cur_nde = nde_itm;
|
||||
this.cur_nde_tid = nde_itm.Id();
|
||||
}
|
||||
break;
|
||||
case Nde_end_tid__ws:
|
||||
case Nde_end_tid__slash:
|
||||
case Nde_end_tid__backslash: break; // handled above
|
||||
}
|
||||
nde_uid = dom_mgr.Add(Mwh_doc_itm.Itm_tid__nde_head, nde_bgn, pos);
|
||||
}
|
||||
else {
|
||||
switch (nde_end_tid) {
|
||||
case Nde_end_tid__gt:
|
||||
wkr.On_nde_tail_end(this, src, cur_nde_tid, nde_bgn, pos);
|
||||
txt_bgn = pos;
|
||||
if (nde_itm.Id() == cur_nde_tid) {
|
||||
cur_nde = (Xop_xnde_tag)List_adp_.Pop_or(nde_stack, null);
|
||||
cur_nde_tid = cur_nde == null ? -1 : cur_nde.Id();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
public static final int Nde_end_tid__invalid = 0, Nde_end_tid__gt = 1, Nde_end_tid__ws = 2, Nde_end_tid__inline = 3, Nde_end_tid__slash = 4, Nde_end_tid__backslash = 5, Nde_end_tid__comment = 6;
|
||||
private static final byte[] Comment_bgn = Bry_.new_a7("--"), Comment_end = Bry_.new_a7("-->");
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Mwh_doc_parser_fxt {
|
||||
private final Bry_bfr expd_bfr = Bry_bfr_.New(), actl_bfr = Bry_bfr_.New();
|
||||
private final Mwh_doc_parser parser = new Mwh_doc_parser();
|
||||
private final Mwh_doc_wkr__itm_bldr wkr = new Mwh_doc_wkr__itm_bldr();
|
||||
public Mwh_doc_itm Make_txt (String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt , -1, Bry_.new_u8(raw));}
|
||||
public Mwh_doc_itm Make_txt (String raw, int nde_tid) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt , nde_tid, Bry_.new_u8(raw));}
|
||||
public Mwh_doc_itm Make_comment (String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__comment , -1, Bry_.new_u8(raw));}
|
||||
public Mwh_doc_itm Make_entity (String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__entity , -1, Bry_.new_u8(raw));}
|
||||
public Mwh_doc_itm Make_nde_head(String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_head , -1, Bry_.new_u8(raw));}
|
||||
public Mwh_doc_itm Make_nde_tail(String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_tail , -1, Bry_.new_u8(raw));}
|
||||
public void Test_parse(String raw, Mwh_doc_itm... expd) {
|
||||
Mwh_doc_itm[] actl = Exec_parse(raw);
|
||||
Test_print(expd, actl);
|
||||
}
|
||||
public Mwh_doc_itm[] Exec_parse(String raw) {
|
||||
byte[] bry = Bry_.new_u8(raw);
|
||||
parser.Parse(wkr, bry, 0, bry.length);
|
||||
return wkr.To_atr_ary();
|
||||
}
|
||||
public void Test_print(Mwh_doc_itm[] expd_ary, Mwh_doc_itm[] actl_ary) {
|
||||
int expd_len = expd_ary.length;
|
||||
int actl_len = actl_ary.length;
|
||||
int len = expd_len > actl_len ? expd_len : actl_len;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
To_bfr(expd_bfr, i < expd_len ? expd_ary[i] : null, actl_bfr, i < actl_len ? actl_ary[i] : null);
|
||||
}
|
||||
Tfds.Eq_str_lines(expd_bfr.To_str_and_clear(), actl_bfr.To_str_and_clear());
|
||||
}
|
||||
private void To_bfr(Bry_bfr expd_bfr, Mwh_doc_itm expd_itm, Bry_bfr actl_bfr, Mwh_doc_itm actl_itm) {
|
||||
To_bfr__main(expd_bfr, expd_itm); To_bfr__main(actl_bfr, actl_itm);
|
||||
if (expd_itm != null && expd_itm.Nde_tid() != -1) {
|
||||
To_bfr__nde_tid(expd_bfr, expd_itm); To_bfr__nde_tid(actl_bfr, actl_itm);
|
||||
}
|
||||
}
|
||||
private void To_bfr__main(Bry_bfr bfr, Mwh_doc_itm itm) {
|
||||
if (itm == null) return;
|
||||
bfr.Add_str_a7("itm_tid:").Add_int_variable(itm.Itm_tid()).Add_byte_nl();
|
||||
bfr.Add_str_a7("txt:").Add(itm.Itm_bry()).Add_byte_nl();
|
||||
}
|
||||
private void To_bfr__nde_tid(Bry_bfr bfr, Mwh_doc_itm itm) {
|
||||
if (itm == null) return;
|
||||
bfr.Add_str_a7("nde_tid:").Add_int_variable(itm.Nde_tid()).Add_byte_nl();
|
||||
}
|
||||
}
|
||||
class Mwh_doc_wkr__itm_bldr implements Mwh_doc_wkr {
|
||||
private final List_adp list = List_adp_.New();
|
||||
public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw();
|
||||
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {}
|
||||
public void On_txt_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
|
||||
public void On_nde_head_bgn (Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end) {}
|
||||
public void On_nde_head_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_head , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
|
||||
public void On_nde_tail_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_tail , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
|
||||
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__comment , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
|
||||
public void On_entity_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__entity , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
|
||||
|
||||
public Mwh_doc_itm[] To_atr_ary() {return (Mwh_doc_itm[])list.To_ary_and_clear(Mwh_doc_itm.class);}
|
||||
}
|
||||
@@ -1,61 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.parsers.xndes.*;
|
||||
public class Mwh_doc_parser_tst {
|
||||
private final Mwh_doc_parser_fxt fxt = new Mwh_doc_parser_fxt();
|
||||
@Test public void Text__basic() {fxt.Test_parse("abc" , fxt.Make_txt("abc"));}
|
||||
@Test public void Comment() {fxt.Test_parse("a<!--b-->c" , fxt.Make_txt("a"), fxt.Make_comment("<!--b-->"), fxt.Make_txt("c"));}
|
||||
@Test public void Entity() {fxt.Test_parse("a b" , fxt.Make_txt("a"), fxt.Make_entity(" "), fxt.Make_txt("b"));}
|
||||
@Test public void Fail__inline_eos() {fxt.Test_parse("a<b/" , fxt.Make_txt("a<b/"));}
|
||||
@Test public void Fail__unknown() {fxt.Test_parse("a<bc/>d" , fxt.Make_txt("a<bc/>d"));}
|
||||
@Test public void Node__inline() {fxt.Test_parse("a<b/>c" , fxt.Make_txt("a"), fxt.Make_nde_head("<b/>") , fxt.Make_txt("c"));}
|
||||
@Test public void Node__pair() {fxt.Test_parse("a<b>c</b>d" , fxt.Make_txt("a"), fxt.Make_nde_head("<b>") , fxt.Make_txt("c"), fxt.Make_nde_tail("</b>"), fxt.Make_txt("d"));}
|
||||
@Test public void Atrs__pair() {
|
||||
fxt.Test_parse("<div id='1'>a</div>"
|
||||
, fxt.Make_nde_head("<div id='1'>")
|
||||
, fxt.Make_txt("a")
|
||||
, fxt.Make_nde_tail("</div>"));
|
||||
}
|
||||
@Test public void Atrs__inline() {
|
||||
fxt.Test_parse("a<div id='1'/>b"
|
||||
, fxt.Make_txt("a")
|
||||
, fxt.Make_nde_head("<div id='1'/>")
|
||||
, fxt.Make_txt("b"));
|
||||
}
|
||||
@Test public void Node__single_only() {
|
||||
fxt.Test_parse("<b>a<br>b</b>c"
|
||||
, fxt.Make_nde_head("<b>")
|
||||
, fxt.Make_txt("a", Xop_xnde_tag_.Tid__b)
|
||||
, fxt.Make_nde_head("<br>")
|
||||
, fxt.Make_txt("b", Xop_xnde_tag_.Tid__b) // <b> not <br>
|
||||
, fxt.Make_nde_tail("</b>")
|
||||
, fxt.Make_txt("c", Xop_xnde_tag_.Tid__null)
|
||||
);
|
||||
}
|
||||
@Test public void Node__pre() {
|
||||
fxt.Test_parse("<pre>a<div>b</pre>c"
|
||||
, fxt.Make_nde_head("<pre>")
|
||||
, fxt.Make_txt("a", Xop_xnde_tag_.Tid__pre)
|
||||
, fxt.Make_nde_head("<div>")
|
||||
, fxt.Make_txt("b", Xop_xnde_tag_.Tid__pre) // <pre> not <div>
|
||||
, fxt.Make_nde_tail("</pre>")
|
||||
, fxt.Make_txt("c", Xop_xnde_tag_.Tid__null)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public interface Mwh_doc_wkr extends Mwh_atr_wkr {
|
||||
Hash_adp_bry Nde_regy();
|
||||
void On_txt_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end);
|
||||
void On_nde_head_bgn(Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end);
|
||||
void On_nde_head_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline);
|
||||
void On_nde_tail_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end);
|
||||
void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end);
|
||||
void On_entity_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end);
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
public class Mwh_doc_wkr_ {
|
||||
public static Hash_adp_bry Nde_regy__mw() {
|
||||
Xop_xnde_tag[] ary = Xop_xnde_tag_.Ary;
|
||||
int len = ary.length;
|
||||
Hash_adp_bry rv = Hash_adp_bry.ci_a7();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xop_xnde_tag itm = ary[i];
|
||||
rv.Add(itm.Name_bry(), itm);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Mwh_doc_wkr__atr_bldr implements Mwh_doc_wkr {
|
||||
private final List_adp list = List_adp_.New();
|
||||
public Hash_adp_bry Nde_regy() {return null;}
|
||||
public void On_atr_each(Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] data_ary, int itm_idx) {
|
||||
int atr_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_bgn];
|
||||
int atr_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_end];
|
||||
int key_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_key_bgn];
|
||||
int key_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_key_end];
|
||||
int val_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
|
||||
int val_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
|
||||
int eql_pos = data_ary[itm_idx + Mwh_atr_mgr.Idx_eql_pos];
|
||||
int qte_tid = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_utl];
|
||||
qte_tid = Mwh_atr_itm_.Calc_qte_tid(qte_tid);
|
||||
if (!key_exists) val_bry_manual = key_bry;
|
||||
Mwh_atr_itm atr = new Mwh_atr_itm(src, valid, repeated, key_exists, atr_bgn, atr_end, key_bgn, key_end, key_bry, val_bgn, val_end, val_bry_manual, eql_pos, qte_tid);
|
||||
list.Add(atr);
|
||||
}
|
||||
public void On_txt_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public void On_nde_head_bgn(Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end) {}
|
||||
public void On_nde_head_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline) {}
|
||||
public void On_nde_tail_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public void On_entity_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
|
||||
public Mwh_atr_itm[] To_atr_ary() {return (Mwh_atr_itm[])list.To_ary_and_clear(Mwh_atr_itm.class);}
|
||||
public int Atrs__len() {return list.Len();}
|
||||
public Mwh_atr_itm Atrs__get_at(int i) {return (Mwh_atr_itm)list.Get_at(i);}
|
||||
public void Atrs__clear() {list.Clear();}
|
||||
}
|
||||
@@ -1,61 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.strings.*;
|
||||
public class HierPosAryBldr {
|
||||
int[] ary; int aryIdx = -1; int root = -1;
|
||||
public HierPosAryBldr(int ary_max) {ary = new int[ary_max]; this.Init();}
|
||||
public void Init() {
|
||||
int ary_max = ary.length;
|
||||
for (int i = 0; i < ary_max; i++)
|
||||
ary[i] = 0;
|
||||
aryIdx = -1;
|
||||
root = 0;
|
||||
}
|
||||
public void MoveDown() {
|
||||
aryIdx += 1;
|
||||
if (aryIdx == 0)
|
||||
ary[aryIdx] = root;
|
||||
else
|
||||
ary[aryIdx] = 0;
|
||||
}
|
||||
public void MoveUp() {
|
||||
aryIdx -= 1;
|
||||
MoveNext();
|
||||
}
|
||||
public void MoveNext() {
|
||||
if (aryIdx == -1)
|
||||
root += 1;
|
||||
else
|
||||
ary[aryIdx] += 1;
|
||||
}
|
||||
public boolean Dirty() {return aryIdx > -1 || root > 0;}
|
||||
public int[] XtoIntAry() {
|
||||
if (aryIdx == -1) return Int_.Ary_empty;
|
||||
int[] rv = new int[aryIdx + 1];
|
||||
for (int i = 0; i < aryIdx + 1; i++)
|
||||
rv[i] = ary[i];
|
||||
return rv;
|
||||
}
|
||||
public String To_str() {
|
||||
String_bldr sb = String_bldr_.new_();
|
||||
for (int i = 0; i < aryIdx; i++)
|
||||
sb.Add_spr_unless_first(Int_.To_str(ary[i]), " ", i);
|
||||
return sb.To_str();
|
||||
}
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class HierPosAryBldr_tst {
|
||||
@Before public void init() {bldr.Init();} HierPosAryBldr bldr = new HierPosAryBldr(256);
|
||||
@Test public void Basic() {
|
||||
tst_ary(Int_.Ary_empty);
|
||||
}
|
||||
@Test public void Move_d() {
|
||||
bldr.MoveDown();
|
||||
tst_ary(0);
|
||||
}
|
||||
@Test public void Move_dd() {
|
||||
bldr.MoveDown();
|
||||
bldr.MoveDown();
|
||||
tst_ary(0, 0);
|
||||
}
|
||||
@Test public void Move_ddu() {
|
||||
bldr.MoveDown();
|
||||
bldr.MoveDown();
|
||||
bldr.MoveUp();
|
||||
tst_ary(1);
|
||||
}
|
||||
@Test public void Move_ddud() {
|
||||
bldr.MoveDown();
|
||||
bldr.MoveDown();
|
||||
bldr.MoveUp();
|
||||
bldr.MoveDown();
|
||||
tst_ary(1, 0);
|
||||
}
|
||||
@Test public void Move_dud() {
|
||||
bldr.MoveDown();
|
||||
bldr.MoveUp();
|
||||
bldr.MoveDown();
|
||||
tst_ary(1);
|
||||
}
|
||||
@Test public void Move_dn() {
|
||||
bldr.MoveDown();
|
||||
bldr.MoveNext();
|
||||
tst_ary(1);
|
||||
}
|
||||
@Test public void Move_ddn() {
|
||||
bldr.MoveDown();
|
||||
bldr.MoveDown();
|
||||
bldr.MoveNext();
|
||||
tst_ary(0, 1);
|
||||
}
|
||||
private void tst_ary(int... expd) {Tfds.Eq_ary(expd, bldr.XtoIntAry());}
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_colon_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_colon;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Colon, this);}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
Xop_list_wkr listCtx = ctx.List();
|
||||
if (listCtx.Dd_chk()) { // handle ";a:b" construct; REF.MW: Parser.php|doBlockLevels|; title : definition text
|
||||
int prv_pos = cur_pos -1 ;
|
||||
if ( ctx.Cur_tkn_tid() != Xop_tkn_itm_.Tid_lnki // ignore if inside link
|
||||
&& prv_pos > 0
|
||||
&& src[prv_pos] != Byte_ascii.Nl // only consider ":" which are not preceded by \n; DATE:2014-07-11 TODO_OLD: emulate Parser.php|findColonNoLinks which does much more logic to see if ";a:b" construct should apply
|
||||
) {
|
||||
listCtx.Dd_chk_(false);
|
||||
return listCtx.MakeTkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);
|
||||
}
|
||||
}
|
||||
ctx.Subs_add(root, tkn_mkr.Colon(bgn_pos, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
public static final Xop_colon_lxr Instance = new Xop_colon_lxr();
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_list_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_list;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {Add_ary(core_trie, this, Xop_list_tkn_.Hook_ul, Xop_list_tkn_.Hook_ol, Xop_list_tkn_.Hook_dt, Xop_list_tkn_.Hook_dd);}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
private void Add_ary(Btrie_fast_mgr core_trie, Object val, byte[]... ary) {for (byte[] itm : ary) core_trie.Add(itm, val);}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.List().MakeTkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_list_lxr Instance = new Xop_list_lxr(); Xop_list_lxr() {}
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_list_tkn extends Xop_tkn_itm_base {
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_list;}
|
||||
public int List_uid() {return list_uid;} public Xop_list_tkn List_uid_(int v) {list_uid = v; return this;} private int list_uid = -1;
|
||||
public byte List_bgn() {return list_bgn;} private byte list_bgn;
|
||||
public byte List_itmTyp() {return list_itmTyp;} public Xop_list_tkn List_itmTyp_(byte v) {list_itmTyp = v; return this;} private byte list_itmTyp = Xop_list_tkn_.List_itmTyp_null;
|
||||
public int[] List_path() {return path;} public Xop_list_tkn List_path_(int... v) {path = v; return this;} private int[] path = Int_.Ary_empty;
|
||||
public int List_path_idx() {return path[path.length - 1];}
|
||||
public boolean List_sub_first() {return List_path_idx() == 0;}
|
||||
public byte List_sub_last() {return list_sub_last;} public Xop_list_tkn List_sub_last_(byte v) {list_sub_last = v; return this;} private byte list_sub_last = Bool_.__byte;
|
||||
public static Xop_list_tkn bgn_(int bgn, int end, byte list_itmTyp, int symLen) {return new Xop_list_tkn(bgn, end, Bool_.Y_byte, list_itmTyp);}
|
||||
public static Xop_list_tkn end_(int pos, byte list_itmTyp) {return new Xop_list_tkn(pos, pos, Bool_.N_byte, list_itmTyp);}
|
||||
public Xop_list_tkn(int bgn, int end, byte bgnEndType, byte list_itmTyp) {this.Tkn_ini_pos(false, bgn, end); this.list_bgn = bgnEndType; this.list_itmTyp = list_itmTyp;}
|
||||
public static final Xop_list_tkn Null = new Xop_list_tkn(); Xop_list_tkn() {}
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_list_tkn_ {
|
||||
public static final byte[]
|
||||
Hook_ul = new byte[] {Byte_ascii.Nl, Byte_ascii.Star} , Hook_ol = new byte[] {Byte_ascii.Nl, Byte_ascii.Hash}
|
||||
, Hook_dt = new byte[] {Byte_ascii.Nl, Byte_ascii.Semic} , Hook_dd = new byte[] {Byte_ascii.Nl, Byte_ascii.Colon};
|
||||
public static final byte List_itmTyp_null = 0, List_itmTyp_ul = Byte_ascii.Star, List_itmTyp_ol = Byte_ascii.Hash, List_itmTyp_dt = Byte_ascii.Semic, List_itmTyp_dd = Byte_ascii.Colon;
|
||||
public static final String Str_li = "li", Str_ol = "ol", Str_ul = "ul", Str_dl = "dl", Str_dt = "dt", Str_dd = "dd";
|
||||
public static final byte[] Byt_li = Bry_.new_a7(Str_li), Byt_ol = Bry_.new_a7(Str_ol), Byt_ul = Bry_.new_a7(Str_ul)
|
||||
, Byt_dl = Bry_.new_a7(Str_dl), Byt_dt = Bry_.new_a7(Str_dt), Byt_dd = Bry_.new_a7(Str_dd);
|
||||
public static byte[] XmlTag_lst(byte b) {
|
||||
switch (b) {
|
||||
case List_itmTyp_ul: return Byt_ul;
|
||||
case List_itmTyp_ol: return Byt_ol;
|
||||
case List_itmTyp_dt:
|
||||
case List_itmTyp_dd: return Byt_dl;
|
||||
default: throw Err_.new_unhandled(b);
|
||||
}
|
||||
}
|
||||
public static byte[] XmlTag_itm(byte b) {
|
||||
switch (b) {
|
||||
case List_itmTyp_ul:
|
||||
case List_itmTyp_ol: return Byt_li;
|
||||
case List_itmTyp_dt: return Byt_dt;
|
||||
case List_itmTyp_dd: return Byt_dd;
|
||||
default: throw Err_.new_unhandled(b);
|
||||
}
|
||||
}
|
||||
public static byte Char_lst(byte b) {
|
||||
switch (b) {
|
||||
case List_itmTyp_ul: return Byte_ascii.Star;
|
||||
case List_itmTyp_ol: return Byte_ascii.Hash;
|
||||
case List_itmTyp_dt: return Byte_ascii.Semic;
|
||||
case List_itmTyp_dd: return Byte_ascii.Colon;
|
||||
default: throw Err_.new_unhandled(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.tests.*;
|
||||
public class Xop_list_tkn_chkr extends Xop_tkn_chkr_base {
|
||||
@Override public Class<?> TypeOf() {return Xop_list_tkn.class;}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_list;}
|
||||
public int List_uid() {return list_uid;} public Xop_list_tkn_chkr List_uid_(int v) {list_uid = v; return this;} private int list_uid = -1;
|
||||
public byte List_bgn() {return list_bgn;} public Xop_list_tkn_chkr List_bgn_(byte v) {list_bgn = v; return this;} private byte list_bgn;
|
||||
public byte List_itmTyp() {return list_itmTyp;} public Xop_list_tkn_chkr List_itmTyp_(byte v) {list_itmTyp = v; return this;} private byte list_itmTyp = Xop_list_tkn_.List_itmTyp_null;
|
||||
public int[] List_path() {return list_path;} public Xop_list_tkn_chkr List_path_(int... v) {list_path = v; return this;} private int[] list_path = Int_.Ary_empty;
|
||||
public byte List_sub_last() {return list_sub_last;} public Xop_list_tkn_chkr List_sub_last_(byte v) {list_sub_last = v; return this;} private byte list_sub_last = Bool_.__byte;
|
||||
@Override public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {
|
||||
Xop_list_tkn actl = (Xop_list_tkn)actl_obj;
|
||||
err += mgr.Tst_val(list_uid == -1, path, "list_uid", list_uid, actl.List_uid());
|
||||
err += mgr.Tst_val(list_bgn == 0, path, "list_bgn", list_bgn, actl.List_bgn());
|
||||
err += mgr.Tst_val(list_itmTyp == Xop_list_tkn_.List_itmTyp_null, path, "list_itmTyp", list_itmTyp, actl.List_itmTyp());
|
||||
err += mgr.Tst_val(list_sub_last == Bool_.__byte, path, "list_sub_last", list_sub_last, actl.List_sub_last());
|
||||
err += mgr.Tst_val(list_path == Int_.Ary_empty, path, "list_path", Array_.To_str(list_path), Array_.To_str(actl.List_path()));
|
||||
return err;
|
||||
}
|
||||
}
|
||||
@@ -1,186 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.tblws.*; import gplx.xowa.parsers.xndes.*;
|
||||
public class Xop_list_wkr implements Xop_ctx_wkr {
|
||||
private int listId = 0; byte[] curSymAry = new byte[Max_list_depth]; int curSymLen = 0; byte[] prvSymAry = Bry_.Empty;
|
||||
private HierPosAryBldr posBldr = new HierPosAryBldr(Max_list_depth);
|
||||
private boolean SymAry_fill_overflow;
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {Reset(0);}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
|
||||
public boolean List_dirty() {return posBldr.Dirty();}
|
||||
public boolean Dd_chk() {return dd_chk;} public Xop_list_wkr Dd_chk_(boolean v) {dd_chk = v; return this;} private boolean dd_chk;
|
||||
public void AutoClose(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {
|
||||
// NOTE: list_tkns can not be explicitly closed, so auto-close will happen for all items
|
||||
MakeTkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, (Xop_list_tkn)tkn, Bool_.Y_byte);
|
||||
Reset(listId + 1);
|
||||
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag__ul);
|
||||
}
|
||||
public int MakeTkn_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {// REF.MW: Parser|doBlockLevels
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_bos) bgn_pos = 0; // do not allow -1 pos
|
||||
|
||||
// pop hdr if exists; EX: \n== a ==\n*b; \n* needs to close hdr
|
||||
int acsPos = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_hdr);
|
||||
if (acsPos != -1) ctx.Stack_pop_til(root, src, acsPos, true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_list);
|
||||
|
||||
// close apos
|
||||
ctx.Apos().End_frame(ctx, root, src, bgn_pos, false);
|
||||
byte symByt = src[cur_pos - 1]; // -1 b/c symByt is byte before curByt; EX: \n*a; cur_pos is at a; want to get *
|
||||
int prvSymLen = curSymLen;
|
||||
cur_pos = SymAry_fill(src, cur_pos, src_len, symByt);
|
||||
symByt = src[cur_pos - 1]; // NOTE: get symByt again b/c cur_pos may have changed; EX: "#*"; # may have triggered list, but last symByt should be *
|
||||
if (SymAry_fill_overflow) return ctx.Lxr_make_txt_(cur_pos);
|
||||
PrvItm_compare();
|
||||
ctx.Para().Process_block__bgn__nl_w_symbol(ctx, root, src, bgn_pos, cur_pos - 1, Xop_xnde_tag_.Tag__li); // -1 b/c cur_pos includes sym_byte; EX: \n*; pass li; should pass correct tag, but for purposes of para_wkr, <li> doesn't matter
|
||||
if (prvSymMatch) {
|
||||
PopTil(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, Bool_.N_byte);
|
||||
posBldr.MoveNext();
|
||||
prvSymAry = Xop_list_wkr_.MakeSymAry(curSymAry, curSymLen);
|
||||
Xop_list_tkn prvItm = tkn_mkr.List_bgn(bgn_pos, cur_pos, curSymAry[curSymLen - 1], curSymLen).List_path_(posBldr.XtoIntAry()).List_uid_(listId);
|
||||
ctx.Subs_add_and_stack(root, prvItm);
|
||||
ctx.Empty_ignored_y_();
|
||||
}
|
||||
else {
|
||||
for (int i = prvSymLen; i > commonSymLen; i--) { // close all discontinued itms: EX: ##\n#\n
|
||||
PopTil(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, Bool_.Y_byte);
|
||||
posBldr.MoveUp();
|
||||
}
|
||||
if (commonSymLen == 0 && prvSymLen != 0) { // nothing in common; reset list
|
||||
listId++;
|
||||
posBldr.Init();
|
||||
}
|
||||
if (curSymLen == commonSymLen) { // add another itm if continuing; EX: #\n#\n
|
||||
PopTil(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, Bool_.N_byte);
|
||||
if ((prvSymLen - curSymLen) > 0 // moving up many levels; do not open new list; just MoveNext; EX: #1\n###3\n##2
|
||||
&& curSymLen != 1) { // do not moveNext if at level 1; this has to do with strange incrementing logic in posBldr at rootLvl
|
||||
posBldr.MoveNext();
|
||||
}
|
||||
else {
|
||||
posBldr.MoveUp(); posBldr.MoveDown();
|
||||
}
|
||||
prvSymAry = Xop_list_wkr_.MakeSymAry(curSymAry, curSymLen);
|
||||
symByt = src[cur_pos - 1];
|
||||
Xop_list_tkn prvItm = tkn_mkr.List_bgn(bgn_pos, cur_pos, symByt, curSymLen).List_path_(posBldr.XtoIntAry()).List_uid_(listId);
|
||||
ctx.Subs_add_and_stack(root, prvItm);
|
||||
ctx.Empty_ignored_y_();
|
||||
}
|
||||
for (int i = commonSymLen; i < curSymLen; i++) { // open new itms; EX: #\n##\n
|
||||
posBldr.MoveDown();
|
||||
symByt = curSymAry[i];
|
||||
prvSymAry = Xop_list_wkr_.MakeSymAry(curSymAry, curSymLen);
|
||||
Xop_list_tkn prvItm = tkn_mkr.List_bgn(bgn_pos, cur_pos, symByt, i + List_adp_.Base1).List_path_(posBldr.XtoIntAry()).List_uid_(listId);
|
||||
ctx.Subs_add_and_stack(root, prvItm);
|
||||
ctx.Empty_ignored_y_();
|
||||
}
|
||||
}
|
||||
if (allDd) { // NOTE: if indent && next == {| then invoke table; EX: ":::{|"
|
||||
int tblw_bgn = Bry_find_.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Space); // skip spaces; EX: ": {|" DATE:2017-01-26
|
||||
if (tblw_bgn < src_len - 2 && src[tblw_bgn] == '{' && src[tblw_bgn + 1] == '|') // check if next chars are "{|"
|
||||
return ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, tblw_bgn, tblw_bgn+ 2, false, Xop_tblw_wkr.Tblw_type_tb, Xop_tblw_wkr.Called_from_list, -1, -1); // NOTE: ws_enabled must be set to true; see test for Adinkras; Cato the Elder
|
||||
}
|
||||
dd_chk = symByt == Xop_list_tkn_.List_itmTyp_dt;
|
||||
return cur_pos;
|
||||
}
|
||||
public void MakeTkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_list_tkn bgn, byte sub_last) {
|
||||
// boolean empty_ignored = ctx.Empty_ignored(); // commented; see below; DATE:2014-06-24
|
||||
Xop_tkn_itm end_tkn = tkn_mkr.List_end(bgn_pos, bgn.List_itmTyp()).List_path_(bgn.List_path()).List_uid_(listId).List_sub_last_(sub_last);
|
||||
ctx.Subs_add(root, end_tkn);
|
||||
// if (empty_ignored) ctx.Empty_ignore(root, bgn.Tkn_sub_idx()); // commented; code was incorrectly deactivating "*a" when "<li>" encountered; PAGE:en.w:Bristol_Bullfinch DATE:2014-06-24
|
||||
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag__ul);
|
||||
}
|
||||
private Xop_list_tkn PopTil(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, byte subLast) {
|
||||
int acs_pos = ctx.Stack_idx_find_but_stop_at_tbl(Xop_tkn_itm_.Tid_list);
|
||||
if (acs_pos == -1) return null;
|
||||
Xop_list_tkn rv = (Xop_list_tkn)ctx.Stack_pop_til(root, src, acs_pos, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_list);
|
||||
MakeTkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, rv, subLast);
|
||||
return rv;
|
||||
}
|
||||
private void PrvItm_compare() {
|
||||
int prvSymLen = prvSymAry.length;
|
||||
prvSymMatch = curSymLen == prvSymLen; commonSymLen = 0;
|
||||
for (int i = 0; i < curSymLen; i++) {
|
||||
if (i < prvSymLen && (Xop_list_wkr_.Compare_normalize(curSymAry[i]) == Xop_list_wkr_.Compare_normalize(prvSymAry[i]))) {
|
||||
commonSymLen = i + 1;
|
||||
}
|
||||
else {
|
||||
prvSymMatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} boolean prvSymMatch; int commonSymLen = 0; boolean allDd = false;
|
||||
private int SymAry_fill(byte[] src, int cur_pos, int src_len, byte curByt) {
|
||||
curSymLen = 0;
|
||||
curSymAry[curSymLen++] = curByt;
|
||||
allDd = true;
|
||||
boolean loop = true;
|
||||
SymAry_fill_overflow = false;
|
||||
while (loop) {
|
||||
if (cur_pos == src_len) break;
|
||||
if (curSymLen == Max_list_depth) { // WORKAROUND: xowa imposes max list depth of 256; MW is unlimited; may change for future release but 256 should accomodate all real-world usages
|
||||
boolean stop = false;
|
||||
for (int i = cur_pos; i < src_len; i++) {
|
||||
curByt = src[i];
|
||||
switch (curByt) {
|
||||
case Byte_ascii.Star:
|
||||
case Byte_ascii.Hash:
|
||||
case Byte_ascii.Semic:
|
||||
case Byte_ascii.Colon:
|
||||
cur_pos = i;
|
||||
break;
|
||||
default:
|
||||
stop = true;
|
||||
break;
|
||||
}
|
||||
if (stop) break;
|
||||
}
|
||||
for (int i = 0; i < Max_list_depth; i++)
|
||||
curSymAry[i] = Byte_ascii.Null;
|
||||
curSymLen = 0;
|
||||
SymAry_fill_overflow = true;
|
||||
return cur_pos;
|
||||
}
|
||||
curByt = src[cur_pos];
|
||||
switch (curByt) {
|
||||
case Byte_ascii.Star:
|
||||
case Byte_ascii.Hash:
|
||||
case Byte_ascii.Semic:
|
||||
curSymAry[curSymLen++] = curByt;
|
||||
cur_pos++;
|
||||
allDd = false;
|
||||
break;
|
||||
case Byte_ascii.Colon:
|
||||
curSymAry[curSymLen++] = curByt;
|
||||
cur_pos++;
|
||||
break;
|
||||
default:
|
||||
loop = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return cur_pos;
|
||||
}
|
||||
private void Reset(int newListId) {
|
||||
posBldr.Init();
|
||||
curSymLen = 0;
|
||||
prvSymAry = Bry_.Empty;
|
||||
dd_chk = false;
|
||||
listId = newListId;
|
||||
}
|
||||
public static final int Max_list_depth = 256;
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_list_wkr_ {
|
||||
public static byte[] MakeSymAry(byte[] curSymAry, int curSymLen) {
|
||||
byte[] rv = new byte[curSymLen];
|
||||
for (int i = 0; i < curSymLen; i++)
|
||||
rv[i] = curSymAry[i];
|
||||
return rv;
|
||||
}
|
||||
public static byte Compare_normalize(byte b) { // convert : to ; for sake of determining levels; EX: ";:" is actually same group
|
||||
switch (b) {
|
||||
case Byte_ascii.Star:
|
||||
case Byte_ascii.Hash:
|
||||
case Byte_ascii.Semic: return b;
|
||||
case Byte_ascii.Colon: return Byte_ascii.Semic;
|
||||
default: throw Err_.new_unhandled(b);
|
||||
}
|
||||
}
|
||||
public static void Close_list_if_present(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int bgn_pos, int cur_pos) {// close all list tkns on stack; EX: ***\n should close all 3 stars; used to only close 1
|
||||
if (ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tmpl_invk) != Xop_ctx.Stack_not_found) return; // list is inside template; do not close;
|
||||
int acs_pos = -1, acs_len = ctx.Stack_len();
|
||||
for (int i = acs_len - 1; i > -1; i--) { // loop backwards until earliest list tkn
|
||||
byte cur_acs_tid = ctx.Stack_get(i).Tkn_tid();
|
||||
switch (cur_acs_tid) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb:
|
||||
case Xop_tkn_itm_.Tid_tblw_tc:
|
||||
case Xop_tkn_itm_.Tid_tblw_te:
|
||||
case Xop_tkn_itm_.Tid_tblw_td:
|
||||
case Xop_tkn_itm_.Tid_tblw_th:
|
||||
case Xop_tkn_itm_.Tid_tblw_tr: i = -1; break; // tblw: stop loop; do not close a list above tbl; EX: ": {| |- *a |b }" should not close ":"; stops at "|-"
|
||||
case Xop_tkn_itm_.Tid_list: acs_pos = i; break; // list: update acs_pos
|
||||
default: break; // else: keep looping
|
||||
}
|
||||
}
|
||||
if (acs_pos == Xop_ctx.Stack_not_found) return; // no list tokens found; exit
|
||||
ctx.Stack_pop_til(root, src, acs_pos, true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_list);
|
||||
}
|
||||
}
|
||||
@@ -1,353 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_list_wkr_basic_tst {
|
||||
private final Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void List_1() {
|
||||
fxt.Test_parse_page_wiki("\n*a"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_end_(3).List_path_(0).List_uid_(0)
|
||||
);
|
||||
}
|
||||
@Test public void Bos() {
|
||||
fxt.Test_parse_page_wiki("*a"
|
||||
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(1, 2)
|
||||
, fxt.tkn_list_end_(2).List_path_(0).List_uid_(0)
|
||||
);
|
||||
}
|
||||
@Test public void List_1_2() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n**b"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(6, 7)
|
||||
, fxt.tkn_list_end_(7).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(7).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void List_1_2_2() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n**b\n**c"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(6, 7)
|
||||
, fxt.tkn_list_end_(7).List_path_(0, 0)
|
||||
, fxt.tkn_list_bgn_(7, 10, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 1).List_uid_(0)
|
||||
, fxt.tkn_txt_(10, 11)
|
||||
, fxt.tkn_list_end_(11).List_path_(0, 1)
|
||||
, fxt.tkn_list_end_(11).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void List_1_2_3() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n**b\n***c"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(6, 7)
|
||||
, fxt.tkn_list_bgn_(7, 11, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(11, 12)
|
||||
, fxt.tkn_list_end_(12).List_path_(0, 0, 0)
|
||||
, fxt.tkn_list_end_(12).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(12).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void List_2() {
|
||||
fxt.Test_parse_page_wiki("\n**a"
|
||||
, fxt.tkn_list_bgn_(0, 3, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(0, 3, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(3, 4)
|
||||
, fxt.tkn_list_end_(4).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(4).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void List_1_3() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n***b"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_bgn_(3, 7, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(3, 7, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(7, 8)
|
||||
, fxt.tkn_list_end_(8).List_path_(0, 0, 0)
|
||||
, fxt.tkn_list_end_(8).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(8).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void List_1_2_1() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n**b\n*c"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(6, 7)
|
||||
, fxt.tkn_list_end_(7).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(7).List_path_(0)
|
||||
, fxt.tkn_list_bgn_(7, 9, Xop_list_tkn_.List_itmTyp_ul).List_path_(1).List_uid_(0)
|
||||
, fxt.tkn_txt_(9, 10)
|
||||
, fxt.tkn_list_end_(10).List_path_(1)
|
||||
);
|
||||
}
|
||||
@Test public void List_1_1_1() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n*b\n*c"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_end_(3).List_path_(0)
|
||||
, fxt.tkn_list_bgn_(3, 5, Xop_list_tkn_.List_itmTyp_ul).List_path_(1).List_uid_(0)
|
||||
, fxt.tkn_txt_(5, 6)
|
||||
, fxt.tkn_list_end_(6).List_path_(1)
|
||||
, fxt.tkn_list_bgn_(6, 8, Xop_list_tkn_.List_itmTyp_ul).List_path_(2).List_uid_(0)
|
||||
, fxt.tkn_txt_(8, 9)
|
||||
, fxt.tkn_list_end_(9).List_path_(2)
|
||||
);
|
||||
}
|
||||
@Test public void List_1___1() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n\n*b"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_end_(3).List_path_(0)
|
||||
, fxt.tkn_nl_char_len1_(3)
|
||||
, fxt.tkn_list_bgn_(4, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(1)
|
||||
, fxt.tkn_txt_(6, 7)
|
||||
, fxt.tkn_list_end_(7).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void List_1_3_1() {
|
||||
fxt.Test_parse_page_wiki("\n*a\n***b\n*c"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_bgn_(3, 7, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(3, 7, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(7, 8)
|
||||
, fxt.tkn_list_end_(8).List_path_(0, 0, 0)
|
||||
, fxt.tkn_list_end_(8).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(8).List_path_(0)
|
||||
, fxt.tkn_list_bgn_(8, 10, Xop_list_tkn_.List_itmTyp_ul).List_path_(1).List_uid_(0)
|
||||
, fxt.tkn_txt_(10, 11)
|
||||
, fxt.tkn_list_end_(11).List_path_(1)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_2o_2u() {
|
||||
fxt.Test_parse_page_wiki("\n**a\n##b"
|
||||
, fxt.tkn_list_bgn_(0, 3, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(0, 3, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(3, 4)
|
||||
, fxt.tkn_list_end_(4).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(4).List_path_(0)
|
||||
, fxt.tkn_list_bgn_(4, 7, Xop_list_tkn_.List_itmTyp_ol).List_path_(0).List_uid_(1)
|
||||
, fxt.tkn_list_bgn_(4, 7, Xop_list_tkn_.List_itmTyp_ol).List_path_(0, 0).List_uid_(1)
|
||||
, fxt.tkn_txt_(7, 8)
|
||||
, fxt.tkn_list_end_(8).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(8).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void Dt_dd() {
|
||||
fxt.Test_parse_page_wiki(";a\n:b"
|
||||
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_dt).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(1, 2)
|
||||
, fxt.tkn_list_end_(2).List_path_(0)
|
||||
, fxt.tkn_list_bgn_(2, 4, Xop_list_tkn_.List_itmTyp_dd).List_path_(1).List_uid_(0)
|
||||
, fxt.tkn_txt_(4, 5)
|
||||
, fxt.tkn_list_end_(5).List_path_(1)
|
||||
);
|
||||
}
|
||||
@Test public void Dt_dd_inline() {
|
||||
fxt.Test_parse_page_wiki(";a:b" // NOTE: no line break
|
||||
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_dt).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(1, 2)
|
||||
, fxt.tkn_list_end_(2).List_path_(0)
|
||||
, fxt.tkn_list_bgn_(2, 3, Xop_list_tkn_.List_itmTyp_dd).List_path_(1).List_uid_(0)
|
||||
, fxt.tkn_txt_(3, 4)
|
||||
, fxt.tkn_list_end_(4).List_path_(1)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_1dd_1ul() {
|
||||
fxt.Test_parse_page_wiki(":*a"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_dd).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_end_(3).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(3).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_1ul__1dd_1ul() {
|
||||
fxt.Test_parse_page_wiki("*a\n:*b"
|
||||
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ul).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(1, 2)
|
||||
, fxt.tkn_list_end_(2).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(2, 5, Xop_list_tkn_.List_itmTyp_dd).List_path_(0).List_uid_(1)
|
||||
, fxt.tkn_list_bgn_(2, 5, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(1)
|
||||
, fxt.tkn_txt_(5, 6)
|
||||
, fxt.tkn_list_end_(6).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(6).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_1dd_1ul__1dd_1ul() {
|
||||
fxt.Test_parse_page_wiki(":*a\n:*b"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_dd).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_txt_(2, 3)
|
||||
, fxt.tkn_list_end_(3).List_path_(0, 0)
|
||||
, fxt.tkn_list_bgn_(3, 6, Xop_list_tkn_.List_itmTyp_ul).List_path_(0, 1).List_uid_(0)
|
||||
, fxt.tkn_txt_(6, 7)
|
||||
, fxt.tkn_list_end_(7).List_path_(0, 1)
|
||||
, fxt.tkn_list_end_(7).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_1ul_1hdr() {
|
||||
fxt.Test_parse_page_wiki_str("*a\n==a==\n", String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
, "<h2>a</h2>"
|
||||
));
|
||||
}
|
||||
@Test public void Mix_1ul_1hdr_1ul() {
|
||||
fxt.Test_parse_page_wiki_str("*a\n==a==\n*b", String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
, "<h2>a</h2>"
|
||||
, ""
|
||||
, "<ul>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Mix_1ol_1hr_1ol() {
|
||||
fxt.Test_parse_page_wiki("#a\n----\n#b"
|
||||
, fxt.tkn_list_bgn_(0, 1, Xop_list_tkn_.List_itmTyp_ol).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_txt_(1, 2)
|
||||
, fxt.tkn_list_end_(2)
|
||||
, fxt.tkn_para_blank_(2)
|
||||
, fxt.tkn_hr_(2, 7)
|
||||
, fxt.tkn_list_bgn_(7, 9, Xop_list_tkn_.List_itmTyp_ol).List_path_(0).List_uid_(1)
|
||||
, fxt.tkn_txt_(9, 10)
|
||||
, fxt.tkn_list_end_(10)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_tblw() {
|
||||
fxt.Test_parse_page_wiki("::{|\n|a\n|}"
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_dd).List_path_(0).List_uid_(0)
|
||||
, fxt.tkn_list_bgn_(0, 2, Xop_list_tkn_.List_itmTyp_dd).List_path_(0, 0).List_uid_(0)
|
||||
, fxt.tkn_tblw_tb_(2, 10).Subs_
|
||||
( fxt.tkn_tblw_tr_(4, 7).Subs_
|
||||
( fxt.tkn_tblw_td_(4, 7).Subs_(fxt.tkn_txt_(6, 7), fxt.tkn_para_blank_(8)))
|
||||
|
||||
)
|
||||
, fxt.tkn_list_end_(10).List_path_(0, 0)
|
||||
, fxt.tkn_list_end_(10).List_path_(0)
|
||||
);
|
||||
}
|
||||
@Test public void Mix_tblw_w_space() {
|
||||
fxt.Test_html_full_str(": {|\n|a\n|}", String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dd>"
|
||||
, " <table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
));
|
||||
}
|
||||
@Test public void Dif_lvls_1_3_1() {
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "*1"
|
||||
, "***3"
|
||||
, "*1"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>1"
|
||||
, " <ul>"
|
||||
, " <li>"
|
||||
, " <ul>"
|
||||
, " <li>3"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, " <li>1"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Dif_lvls_1_3_2() {// uneven lists
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "*1"
|
||||
, "***3"
|
||||
, "**2"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>1"
|
||||
, " <ul>"
|
||||
, " <li>"
|
||||
, " <ul>"
|
||||
, " <li>3"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, " <li>2"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void New_lines() {
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "*a"
|
||||
, ""
|
||||
, "**b"
|
||||
, ""
|
||||
, "**c"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
, "<ul>"
|
||||
, " <li>"
|
||||
, " <ul>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
, "<ul>"
|
||||
, " <li>"
|
||||
, " <ul>"
|
||||
, " <li>c"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -1,88 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_list_wkr_para_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private final Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Basic() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "*a"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Multiple() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "*a"
|
||||
, "*b"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Multiple_w_1_nl() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "*a"
|
||||
, ""
|
||||
, "*b"
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
, "<ul>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Pre_between_lists() { // PURPOSE: list should close pre; EX:en.b:Knowing Knoppix/Other applications; DATE:2014-02-18
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "#a"
|
||||
, " b"
|
||||
, "#c" // should close <pre> opened by b
|
||||
) , String_.Concat_lines_nl_skip_last
|
||||
( "<ol>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, "</ol>"
|
||||
, ""
|
||||
, "<pre>b"
|
||||
, "</pre>"
|
||||
, ""
|
||||
, "<ol>"
|
||||
, " <li>c"
|
||||
, " </li>"
|
||||
, "</ol>"
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,409 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lists; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_list_wkr_uncommon_tst {
|
||||
private final Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Bug_specified_div() { // FIX: </div> was not clearing state for lnki; PAGE:en.w:Ananke (moon)
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "<div>"
|
||||
, "#<i>a"
|
||||
, "</div>"
|
||||
, "*b"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<div>"
|
||||
, "<ol>"
|
||||
, " <li><i>a"
|
||||
, "</i>"
|
||||
, " </li>"
|
||||
, "</ol></div>"
|
||||
, "<ul>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Bug_mismatched() { // FIX: </div> was not clearing state for lnki; PAGE:en.w:Ananke (moon)
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "::a"
|
||||
, ":::1"
|
||||
, "::::11"
|
||||
, ":::::111"
|
||||
, "::b"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dd>"
|
||||
, " <dl>"
|
||||
, " <dd>a"
|
||||
, " <dl>"
|
||||
, " <dd>1"
|
||||
, " <dl>"
|
||||
, " <dd>11"
|
||||
, " <dl>"
|
||||
, " <dd>111"
|
||||
, " </dd>"
|
||||
, " </dl>"
|
||||
, " </dd>"
|
||||
, " </dl>"
|
||||
, " </dd>"
|
||||
, " </dl>"
|
||||
, " </dd>"
|
||||
, " <dd>b"
|
||||
, " </dd>"
|
||||
, " </dl>"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
));
|
||||
}
|
||||
@Test public void Empty_li_ignored() { // PURPOSE: inner template can cause dupe li; PAGE:en.w:any Calendar day and NYT link; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "*a"
|
||||
, "* "
|
||||
, "*b"
|
||||
, "*c"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, " <li> "
|
||||
, " </li>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, " <li>c"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void List_in_tblw() { // PURPOSE: list inside table should not be close outer list; PAGE:en.w:Cato the Elder
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "*a"
|
||||
, "{|"
|
||||
, "|b"
|
||||
, "::c"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, "<table>"
|
||||
, " <tr>"
|
||||
, " <td>b"
|
||||
, ""
|
||||
, " <dl>"
|
||||
, " <dd>"
|
||||
, " <dl>"
|
||||
, " <dd>c"
|
||||
, " </dd>"
|
||||
, " </dl>"
|
||||
, " </dd>"
|
||||
, " </dl>"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Dt_dd_colon_at_eol() { // PURPOSE: dangling ":" should not put next line in <dt>; PAGE:en.w:Stein; b was being wrapped in <dt>b</dt>; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( ";a:"
|
||||
, "*b"
|
||||
, ""
|
||||
, ";c"
|
||||
, "*d"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dt>a"
|
||||
, " </dt>"
|
||||
, " <dd>"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
, "<ul>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
, "<dl>"
|
||||
, " <dt>c"
|
||||
, " </dt>"
|
||||
, "</dl>"
|
||||
, "<ul>"
|
||||
, " <li>d"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Dd_should_not_print_colon() {// PURPOSE: ;a:\n should show as ";a" not ";a:". colon should still be considered as part of empty list; DATE:2013-11-07; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
|
||||
fxt.Test_parse_page_all_str
|
||||
( ";a:\nb"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dt>a"
|
||||
, " </dt>"
|
||||
, " <dd>"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
, "b"
|
||||
));
|
||||
}
|
||||
@Test public void Dt_dd_colon_in_lnki() { // PURPOSE: "; [[Portal:a]]" should not split lnki; PAGE:en.w:Wikipedia:WikiProject Military history/Operation Majestic Titan; "; [[Wikipedia:WikiProject Military history/Operation Majestic Titan/Phase I|Phase I]]: a b"
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( ";[[Portal:a]]"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dt><a href=\"/wiki/Portal:A\">Portal:A</a>"
|
||||
, " </dt>"
|
||||
, "</dl>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Max_list_depth() { // PURPOSE: 256+ * caused list parser to fail; ignore; PAGE:en.w:Bariatric surgery
|
||||
String multiple = String_.Repeat("*", 300);
|
||||
fxt.Test_parse_page_all_str(multiple, multiple);
|
||||
}
|
||||
@Test public void Numbered_list_resets_incorrectly() { // PURPOSE: as description
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "#A"
|
||||
, "#*Aa"
|
||||
, "#**Aaa"
|
||||
, "#*Ab"
|
||||
, "#B"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<ol>"
|
||||
, " <li>A"
|
||||
, ""
|
||||
, " <ul>"
|
||||
, " <li>Aa"
|
||||
, ""
|
||||
, " <ul>"
|
||||
, " <li>Aaa"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, " <li>Ab"
|
||||
, " </li>"
|
||||
, " </ul>" // was showing as </ol>
|
||||
, " </li>"
|
||||
, " <li>B"
|
||||
, " </li>"
|
||||
, "</ol>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void List_should_not_end_indented_table() {// PURPOSE: :{| was being closed by \n*; EX:w:Maxwell's equations; DATE:20121231
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( ":{|"
|
||||
, "|-"
|
||||
, "|"
|
||||
, "*a"
|
||||
, "|b"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dd>"
|
||||
, " <table>"
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " <ul>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </td>"
|
||||
, " <td>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
));
|
||||
}
|
||||
@Test public void Dt_dd_broken_by_xnde() { // PURPOSE.fix: xnde was resetting dl incorrectly; EX:w:Virus; DATE:2013-01-31
|
||||
fxt.Test_parse_page_all_str(";<b>a</b>:c"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dt><b>a</b>"
|
||||
, " </dt>"
|
||||
, " <dd>c"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
));
|
||||
}
|
||||
@Test public void Trim_empty_list_items() { // PURPOSE: empty list items should be ignored; DATE:2013-07-02; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
|
||||
fxt.Test_parse_page_all_str
|
||||
("*** \n"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>"
|
||||
, " <ul>"
|
||||
, " <li>"
|
||||
, " <ul>"
|
||||
, " <li> "
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Trim_empty_list_items_error() { // PURPOSE.fix: do not add empty itm's nesting to current list; DATE:2013-07-07; NOTE:deactivated prune_empty_list logic; DATE:2014-09-05
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( "* a"
|
||||
, "** " // was: do not add ** to nest; now: add ** and \s
|
||||
, "*** b"
|
||||
, "* c"
|
||||
), String_.Concat_lines_nl
|
||||
( "<ul>"
|
||||
, " <li> a"
|
||||
, " <ul>"
|
||||
, " <li> "
|
||||
, " <ul>"
|
||||
, " <li> b"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, " <li> c"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Tblw_should_autoclose() {// PURPOSE: tblw should auto-close open list
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "#a"
|
||||
, "{|"
|
||||
, "|b"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<ol>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, "</ol>"
|
||||
, "<table>"
|
||||
, " <tr>"
|
||||
, " <td>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Tblx_should_not_autoclose() { // PURPOSE: do not auto-close list if table is xnde; DATE:2014-02-05
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( "#a"
|
||||
, "# <table><tr><td>b</td></tr></table>"
|
||||
, "c"
|
||||
), String_.Concat_lines_nl
|
||||
( "<ol>"
|
||||
, " <li>a"
|
||||
, " </li>"
|
||||
, " <li> "
|
||||
, " <table>"
|
||||
, " <tr>"
|
||||
, " <td>b"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, " </table>"
|
||||
, " </li>"
|
||||
, "</ol>"
|
||||
, "c"
|
||||
));
|
||||
}
|
||||
@Test public void Li_disappears() { // PURPOSE: "\n*" disappears when followed by "<li>"; PAGE:en.w:Bristol_Bullfinch; DATE:2014-06-24
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( "a"
|
||||
, "*b<li>"
|
||||
), String_.Concat_lines_nl_skip_last // NOTE: tag sequence matches MW output
|
||||
( "a"
|
||||
, "<ul>"
|
||||
, " <li>b"
|
||||
, "<li>"
|
||||
, "</li>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Ul_should_end_wlst() { // PURPOSE: </ul> should end wiki_list; PAGE:en.w:Bristol_Bullfinch; DATE:2014-06-24
|
||||
fxt.Test_parse_page_all_str
|
||||
( "*a</ul>b"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a</ul>b" // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Colon_causes_dd() { // PURPOSE: colon was mistakenly being ignored due to proximity to "\n;"; PAGE:de.w:Schmach_von_Tirana#Kuriosit.C3.A4t:_EM-Qualifikationsspiel_vom_20._November_1983 DATE:2014-07-11
|
||||
fxt.Test_parse_page_all_str
|
||||
( String_.Concat_lines_nl_skip_last
|
||||
( "a:b"
|
||||
, ";c"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "a:b"
|
||||
, "<dl>"
|
||||
, " <dt>c"
|
||||
, " </dt>"
|
||||
, "</dl>"
|
||||
));
|
||||
}
|
||||
@Test public void Pre_and_nested() { // PURPOSE: pre should interrupt list; PAGE:fi.w:Luettelo_hyönteisistä; DATE:2015-03-31
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_all_str
|
||||
( String_.Concat_lines_nl_skip_last
|
||||
( "*a"
|
||||
, "**b"
|
||||
, " c" // pre
|
||||
, "*d" // *d treated mistakenly as **d
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li>a"
|
||||
, ""
|
||||
, " <ul>"
|
||||
, " <li>b"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
, "<pre>c"
|
||||
, "</pre>"
|
||||
, ""
|
||||
, "<ul>"
|
||||
, " <li>d"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
, ""
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_lnke_end_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_lnke_end;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Brack_end, this);}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Lnke().MakeTkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_lnke_end_lxr Instance = new Xop_lnke_end_lxr(); Xop_lnke_end_lxr() {}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.log_msgs.*;
|
||||
public class Xop_lnke_log {
|
||||
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "lnke");
|
||||
public static final Gfo_msg_itm Dangling = Gfo_msg_itm_.new_note_(owner, "dangling"); // NOTE: WP.BOT:YOBOT;PAGE:en.w:Pan_flute
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
import gplx.core.net.*;
|
||||
public class Xop_lnke_lxr implements Xop_lxr {
|
||||
Xop_lnke_lxr(byte lnke_typ, byte[] protocol, byte tid) {this.lnke_typ = lnke_typ; this.protocol = protocol; this.tid = tid;} private byte lnke_typ; byte[] protocol; byte tid;
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_lnke_bgn;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {
|
||||
Gfo_protocol_itm[] ary = Gfo_protocol_itm.Ary();
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
Gfo_protocol_itm itm = ary[i];
|
||||
Ctor_lxr_add(core_trie, itm.Key_w_colon_bry(), itm.Tid());
|
||||
}
|
||||
core_trie.Add(Bry_relative_1, new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, Gfo_protocol_itm.Bry_relative, Gfo_protocol_itm.Tid_relative_1));
|
||||
core_trie.Add(Bry_relative_2, new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, Gfo_protocol_itm.Bry_relative, Gfo_protocol_itm.Tid_relative_2));
|
||||
Ctor_lxr_add(core_trie, Bry_.new_a7("xowa-cmd"), Gfo_protocol_itm.Tid_xowa);
|
||||
} private static final byte[] Bry_relative_1 = Bry_.new_a7("[//"), Bry_relative_2 = Bry_.new_a7("[[//");
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
private void Ctor_lxr_add(Btrie_fast_mgr core_trie, byte[] protocol_bry, byte tid) {
|
||||
core_trie.Add(protocol_bry , new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_text, protocol_bry, tid));
|
||||
core_trie.Add(Bry_.Add(Byte_ascii.Brack_bgn, protocol_bry) , new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, protocol_bry, tid));
|
||||
}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if (this.tid == Gfo_protocol_itm.Tid_xowa && !ctx.Wiki().Sys_cfg().Xowa_proto_enabled()) return ctx.Lxr_make_txt_(cur_pos);
|
||||
return ctx.Lnke().MakeTkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, protocol, tid, lnke_typ);
|
||||
}
|
||||
public static final Xop_lnke_lxr Instance = new Xop_lnke_lxr(); Xop_lnke_lxr() {}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.net.*; import gplx.core.net.qargs.*;
|
||||
public class Xop_lnke_tkn extends Xop_tkn_itm_base {//20111222
|
||||
public static final byte Lnke_typ_null = 0, Lnke_typ_brack = 1, Lnke_typ_text = 2, Lnke_typ_brack_dangling = 3;
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_lnke;}
|
||||
public boolean Lnke_relative() {return lnke_relative;} public Xop_lnke_tkn Lnke_relative_(boolean v) {lnke_relative = v; return this;} private boolean lnke_relative;
|
||||
public byte Lnke_typ() {return lnke_typ;} public Xop_lnke_tkn Lnke_typ_(byte v) {lnke_typ = v; return this;} private byte lnke_typ = Lnke_typ_null;
|
||||
public byte[] Lnke_site() {return lnke_site;} public Xop_lnke_tkn Lnke_site_(byte[] v) {lnke_site = v; return this;} private byte[] lnke_site;
|
||||
public byte[] Lnke_xwiki_wiki() {return lnke_xwiki_wiki;} private byte[] lnke_xwiki_wiki;
|
||||
public byte[] Lnke_xwiki_page() {return lnke_xwiki_page;} private byte[] lnke_xwiki_page;
|
||||
public Gfo_qarg_itm[] Lnke_xwiki_qargs() {return lnke_xwiki_qargs;} Gfo_qarg_itm[] lnke_xwiki_qargs;
|
||||
public void Lnke_xwiki_(byte[] wiki, byte[] page, Gfo_qarg_itm[] args) {this.lnke_xwiki_wiki = wiki; this.lnke_xwiki_page = page; this.lnke_xwiki_qargs = args;}
|
||||
public int Lnke_href_bgn() {return lnke_href_bgn;} private int lnke_href_bgn;
|
||||
public int Lnke_href_end() {return lnke_href_end;} private int lnke_href_end;
|
||||
public byte[] Protocol() {return protocol;} private byte[] protocol;
|
||||
public byte Proto_tid() {return proto_tid;} private byte proto_tid;
|
||||
public Xop_lnke_tkn Subs_add_ary(Xop_tkn_itm... ary) {for (Xop_tkn_itm itm : ary) super.Subs_add(itm); return this;}
|
||||
|
||||
public Xop_lnke_tkn(int bgn, int end, byte[] protocol, byte proto_tid, byte lnke_typ, int lnke_href_bgn, int lnke_href_end) {
|
||||
this.Tkn_ini_pos(false, bgn, end); this.protocol = protocol; this.proto_tid = proto_tid; this.lnke_typ = lnke_typ; this.lnke_href_bgn = lnke_href_bgn; this.lnke_href_end = lnke_href_end;
|
||||
} Xop_lnke_tkn() {}
|
||||
}
|
||||
@@ -1,319 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.net.*; import gplx.xowa.apps.urls.*;
|
||||
import gplx.xowa.apps.progs.*; import gplx.xowa.wikis.xwikis.*;
|
||||
public class Xop_lnke_wkr implements Xop_ctx_wkr {
|
||||
public void Ctor_ctx(Xop_ctx ctx) {url_parser = ctx.Wiki().Utl__url_parser().Url_parser();} Gfo_url_parser url_parser; Gfo_url_site_data site_data = new Gfo_url_site_data();
|
||||
private Xoa_url xo_url_parser_url = Xoa_url.blank();
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
|
||||
public boolean Dangling_goes_on_stack() {return dangling_goes_on_stack;} public void Dangling_goes_on_stack_(boolean v) {dangling_goes_on_stack = v;} private boolean dangling_goes_on_stack;
|
||||
public void AutoClose(Xop_ctx ctx, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {
|
||||
// "[" but no "]"; EX: "[irc://a"; NOTE: lnkes that start with protocol will be ac'd in MakeTkn_bgn; EX: "http://a"
|
||||
Xop_lnke_tkn bgn_tkn = (Xop_lnke_tkn)tkn;
|
||||
bgn_tkn.Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack_dangling);
|
||||
bgn_tkn.Src_end_(bgn_tkn.Lnke_href_end()); // NOTE: endPos is lnke_end, not cur_pos or src_len; EX: "[irc://a b", lnk ends at a, not b; NOTE: still bgns at [
|
||||
ctx.Msg_log().Add_itm_none(Xop_lnke_log.Dangling, src, tkn.Src_bgn(), cur_pos);
|
||||
}
|
||||
public static final String Str_xowa_protocol = "xowa-cmd:";
|
||||
public static final byte[] Bry_xowa_protocol = Bry_.new_a7(Str_xowa_protocol);
|
||||
public int MakeTkn_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, byte[] protocol, byte proto_tid, byte lnke_type) {
|
||||
boolean lnke_type_brack = (lnke_type == Xop_lnke_tkn.Lnke_typ_brack);
|
||||
if ( !lnke_type_brack // lnke doesn't have "["; EX: "ttl:"
|
||||
&& !Valid_text_lnke(ctx, src, src_len, bgn_pos, cur_pos) // tkn is part of work; EX: " ttl:" vs "attl:"
|
||||
)
|
||||
return ctx.Lxr_make_txt_(cur_pos - 1); // -1 to ignore ":" in making text colon; needed to process ":" for list like "; attl: b" PAGE:de.w:Mord_(Deutschland)#Besonders_verwerfliche_Begehungsweise; DATE:2015-01-09
|
||||
if (ctx.Stack_get_typ(Xop_tkn_itm_.Tid_lnke) != null) return ctx.Lxr_make_txt_(cur_pos); // no nested lnke; return cur lnke as text; EX: "[irc://a irc://b]" -> "<a href='irc:a'>irc:b</a>"
|
||||
if (proto_tid == Gfo_protocol_itm.Tid_xowa) return Make_tkn_xowa(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, protocol, proto_tid, lnke_type);
|
||||
|
||||
// HACK: need to disable lnke if enclosing type is lnki and (1) arg is "link=" or (2) in 1st arg; basically, only enable for caption tkns (and preferably, thumb only) (which should be neither 1 or 2)
|
||||
if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_lnki && lnke_type == Xop_lnke_tkn.Lnke_typ_text) {
|
||||
byte mode = Lnki_linkMode_init;
|
||||
int lnki_pipe_count = 0;
|
||||
int tkn_idx = -1;
|
||||
for (int i = root.Subs_len() - 1; i > -1; i--) {
|
||||
Xop_tkn_itm link_tkn = root.Subs_get(i);
|
||||
tkn_idx = i;
|
||||
switch (link_tkn.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_pipe:
|
||||
if (mode == Lnki_linkMode_text) {ctx.Lxr_make_(false); return bgn_pos + 1;} // +1 to position after lnke_hook; EX:[[File:A.png|link=http:b.org]] position at t in http so http hook won't be invoked.
|
||||
else {i = -1; ++lnki_pipe_count;}
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_txt:
|
||||
if (mode == Lnki_linkMode_eq) mode = Lnki_linkMode_text;
|
||||
// else i = -1; // DELETE: do not be overly strict; need to handle pattern of link=http://a.org?b=http://c.org; DATE:2013-02-03
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_eq:
|
||||
if (mode == Lnki_linkMode_init) mode = Lnki_linkMode_eq;
|
||||
// else i = -1; // DELETE: do not be overly strict; need to handle pattern of link=http://a.org?b=http://c.org; DATE:2013-02-03
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (lnki_pipe_count == 0) {
|
||||
for (int i = tkn_idx; i > -1; i--) {
|
||||
Xop_tkn_itm link_tkn = root.Subs_get(i);
|
||||
tkn_idx = i;
|
||||
switch (link_tkn.Tkn_tid()) {
|
||||
// case Xop_tkn_itm_.Tid_txt: return cur_pos; // REMOVED:2012-11-12: was causing [[http://a.org a]] [[http://b.org b]] to fail; PAGE:en.w:Template:Infobox_country
|
||||
case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
int lnke_bgn = bgn_pos, lnke_end = -1, brack_end_pos = -1;
|
||||
int lnke_end_tid = End_tid_null;
|
||||
while (true) { // loop until lnke_end_tid char;
|
||||
if (cur_pos == src_len) {lnke_end_tid = End_tid_eos; lnke_end = cur_pos; break;}
|
||||
switch (src[cur_pos]) {
|
||||
case Byte_ascii.Brack_end:
|
||||
if (lnke_type_brack) { // NOTE: check that frame begins with [ in order to end with ]
|
||||
lnke_end_tid = End_tid_brack; brack_end_pos = cur_pos + 1; // 1=adj_next_char
|
||||
}
|
||||
else { // NOTE: frame does not begin with [ but ] encountered. mark "invalid" in order to force parser to stop before "]"
|
||||
lnke_end_tid = End_tid_invalid;
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Space: lnke_end_tid = End_tid_space; break;
|
||||
case Byte_ascii.Nl: lnke_end_tid = End_tid_nl; break;
|
||||
case Byte_ascii.Gt: case Byte_ascii.Lt:
|
||||
lnke_end_tid = End_tid_invalid;
|
||||
break;
|
||||
case Byte_ascii.Apos:
|
||||
if (cur_pos + 1 < src_len && src[cur_pos + 1] == Byte_ascii.Apos) // NOTE: '' breaks link, but not '; EX: [http://a.org''b'']]; DATE:2013-03-18
|
||||
lnke_end_tid = End_tid_invalid;
|
||||
break;
|
||||
case Byte_ascii.Brack_bgn: // NOTE: always stop lnke at "[" regardless of brack_type; EX: [http:a.org[[B]]] and http:a.org[[B]]; DATE:2014-07-11
|
||||
case Byte_ascii.Quote: // NOTE: quote should also stop lnke; DATE:2014-10-10
|
||||
lnke_end_tid = End_tid_symbol;
|
||||
break;
|
||||
}
|
||||
if (lnke_end_tid == End_tid_null) cur_pos++;
|
||||
else {
|
||||
lnke_end = cur_pos;
|
||||
cur_pos++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (lnke_type_brack) {
|
||||
switch (lnke_end_tid) {
|
||||
case End_tid_eos:
|
||||
if (brack_end_pos == -1) { // eos but no ]; EX: "[irc://a"
|
||||
if (dangling_goes_on_stack) { // added for Xow_popup_parser which needs to handle dangling lnke due to block_len; DATE:2014-06-20
|
||||
ctx.Subs_add_and_stack(root, tkn_mkr.Txt(bgn_pos, src_len)); // note that tkn doesn't matter, as Xow_popup_parser only cares *if* something is on stack, not *what* is on stack
|
||||
return src_len;
|
||||
}
|
||||
ctx.Subs_add(root, tkn_mkr.Txt(bgn_pos, bgn_pos + 1));// convert open brack to txt; // FUTURE: don't make brack_tkn; just flag
|
||||
bgn_pos += 1;
|
||||
brack_end_pos = cur_pos;
|
||||
lnke_bgn = bgn_pos;
|
||||
lnke_type = Xop_lnke_tkn.Lnke_typ_brack_dangling;
|
||||
}
|
||||
break;
|
||||
case End_tid_nl:
|
||||
lnke_type = Xop_lnke_tkn.Lnke_typ_brack_dangling;
|
||||
return ctx.Lxr_make_txt_(lnke_end); // textify lnk; EX: [irc://a\n] textifies "[irc://a"
|
||||
default:
|
||||
lnke_bgn += proto_tid == Gfo_protocol_itm.Tid_relative_2 ? 2 : 1; // if Tid_relative_2, then starts with [[; adjust by 2; EX:"[[//en" should have lnke_bgn at "//en", not "[//en"
|
||||
lnke_type = Xop_lnke_tkn.Lnke_typ_brack;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else { // else, plain text
|
||||
brack_end_pos = lnke_end;
|
||||
lnke_type = Xop_lnke_tkn.Lnke_typ_text;
|
||||
if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_lnki) { // SEE:NOTE_1
|
||||
Xop_tkn_itm prv_tkn = root.Subs_get(root.Subs_len() - 1); // get last tkn
|
||||
if (prv_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) { // is tkn lnki?
|
||||
root.Subs_del_after(prv_tkn.Tkn_sub_idx()); // delete [[ tkn and replace with [ tkn
|
||||
root.Subs_add(tkn_mkr.Txt(prv_tkn.Src_bgn(), prv_tkn.Src_bgn() + 1));
|
||||
ctx.Stack_pop_last(); // don't forget to remove from stack
|
||||
lnke_type = Xop_lnke_tkn.Lnke_typ_brack; // change lnke_typee to brack
|
||||
--bgn_pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (proto_tid == Gfo_protocol_itm.Tid_relative_2) // for "[[//", add "["; rest of code handles "[//" normally, but still want to include literal "["; DATE:2013-02-02
|
||||
ctx.Subs_add(root, tkn_mkr.Txt(lnke_bgn - 1, lnke_bgn));
|
||||
url_parser.Parse_site_fast(site_data, src, lnke_bgn, lnke_end);
|
||||
int site_bgn = site_data.Site_bgn(), site_end = site_data.Site_end();
|
||||
if (site_bgn == site_end) return ctx.Lxr_make_txt_(cur_pos); // empty proto should return text, not lnke; EX: "http:", "http://", "[http://]"; DATE:2014-10-09
|
||||
int adj = Ignore_punctuation_at_end(src, site_bgn, lnke_end);
|
||||
if (adj != 0) {
|
||||
lnke_end -= adj;
|
||||
brack_end_pos -= adj;
|
||||
cur_pos -= adj;
|
||||
}
|
||||
Xop_lnke_tkn tkn = tkn_mkr.Lnke(bgn_pos, brack_end_pos, protocol, proto_tid, lnke_type, lnke_bgn, lnke_end);
|
||||
tkn.Lnke_relative_(site_data.Rel());
|
||||
Xow_xwiki_itm xwiki = ctx.App().Usere().Wiki().Xwiki_mgr().Get_by_mid(src, site_bgn, site_end); // NOTE: check User_wiki.Xwiki_mgr, not App.Wiki_mgr() b/c only it is guaranteed to know all wikis on system
|
||||
if ( xwiki != null // lnke is to an xwiki; EX: [http://en.wikipedia.org/A a]
|
||||
&& Byte_.In(proto_tid, Gfo_protocol_itm.Tid_relative_1, Gfo_protocol_itm.Tid_relative_2, Gfo_protocol_itm.Tid_http, Gfo_protocol_itm.Tid_https) // only consider http / https; ignore mailto and others; PAGE:uk.w:Маскалі; DATE:2015-07-28
|
||||
&& Bry_.Match(src, site_bgn, site_end, xwiki.Domain_bry()) // only consider full domains, not alliases; EX: [http://w/b] should not match alias of w for en.wikipedia.org
|
||||
) {
|
||||
Xowe_wiki wiki = ctx.Wiki();
|
||||
|
||||
// HACK: this is not correct; "=" or "&" is not handled by Gfo_url_parser which assumes that all "&" separates qargs; DATE:2016-10-10
|
||||
byte[] decoded_src = gplx.xowa.parsers.amps.Xop_amp_mgr.Instance.Decode_as_bry(Bry_.Mid(src, lnke_bgn, lnke_end));
|
||||
xo_url_parser_url = wiki.Utl__url_parser().Parse(decoded_src, 0, decoded_src.length);
|
||||
|
||||
byte[] xwiki_wiki = xo_url_parser_url.Wiki_bry();
|
||||
byte[] xwiki_page = xo_url_parser_url.Page_bry();
|
||||
if (xwiki_page == null) { // handle xwiki lnke's to history page else null ref; EX:[http://ru.wikipedia.org/w/index.php?title&diff=19103464&oldid=18910980 извещен]; PAGE:ru.w:Project:Заявки_на_снятие_флагов/Архив/Патрулирующие/2009 DATE:2016-11-24
|
||||
xwiki_page = decoded_src;
|
||||
}
|
||||
else {
|
||||
Xoa_ttl ttl = Xoa_ttl.Parse(wiki, xwiki_page);
|
||||
if (ttl != null && ttl.Wik_itm() != null) {
|
||||
xwiki_wiki = ttl.Wik_itm().Domain_bry();
|
||||
xwiki_page = ttl.Page_url();
|
||||
}
|
||||
tkn.Lnke_xwiki_(xwiki_wiki, xwiki_page, xo_url_parser_url.Qargs_ary());
|
||||
}
|
||||
}
|
||||
ctx.Subs_add(root, tkn);
|
||||
if (lnke_type == Xop_lnke_tkn.Lnke_typ_brack) {
|
||||
if (lnke_end_tid == End_tid_brack) {
|
||||
tkn.Src_end_(cur_pos);
|
||||
tkn.Subs_move(root);
|
||||
return cur_pos;
|
||||
}
|
||||
ctx.Stack_add(tkn);
|
||||
if (lnke_end_tid == End_tid_invalid) {
|
||||
return cur_pos - 1; // -1 to return before < or >
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (lnke_end_tid) {
|
||||
case End_tid_space:
|
||||
ctx.Subs_add(root, tkn_mkr.Space(root, cur_pos - 1, cur_pos));
|
||||
break;
|
||||
case End_tid_symbol:
|
||||
case End_tid_nl:
|
||||
case End_tid_invalid: // NOTE that cur_pos is set after <, must subtract 1 else </xnde> will be ignored; EX: <span>irc://a</span>
|
||||
return cur_pos - 1;
|
||||
}
|
||||
}
|
||||
return cur_pos;
|
||||
}
|
||||
private static int Ignore_punctuation_at_end(byte[] src, int proto_end, int lnke_end) { // DATE:2014-10-09
|
||||
int rv = 0;
|
||||
int pos = lnke_end - 1; // -1 b/c pos is after char; EX: "abc" has pos of 3; need --pos to start at src[2] = 'c'
|
||||
byte paren_bgn_chk = Bool_.__byte;
|
||||
while (pos >= proto_end) {
|
||||
byte b = src[pos];
|
||||
switch (b) { // REF.MW: $sep = ',;\.:!?';
|
||||
case Byte_ascii.Comma: case Byte_ascii.Semic: case Byte_ascii.Backslash: case Byte_ascii.Dot:
|
||||
case Byte_ascii.Bang: case Byte_ascii.Question:
|
||||
break;
|
||||
case Byte_ascii.Colon: // differentiate between "http:" (don't trim) and "http://a.org:" (trim)
|
||||
if (pos == proto_end -1) return rv;
|
||||
break;
|
||||
case Byte_ascii.Paren_end: // differentiate between "(http://a.org)" (trim) and "http://a.org/b(c)" (don't trim)
|
||||
if (paren_bgn_chk == Bool_.__byte) {
|
||||
int paren_bgn_pos = Bry_find_.Find_fwd(src, Byte_ascii.Paren_bgn, proto_end, lnke_end);
|
||||
paren_bgn_chk = paren_bgn_pos == Bry_find_.Not_found ? Bool_.N_byte : Bool_.Y_byte;
|
||||
}
|
||||
if (paren_bgn_chk == Bool_.Y_byte) // "(" found; do not ignore ")"
|
||||
return rv;
|
||||
else
|
||||
break;
|
||||
default:
|
||||
return rv;
|
||||
}
|
||||
--pos;
|
||||
++rv;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private static final byte Lnki_linkMode_init = 0, Lnki_linkMode_eq = 1, Lnki_linkMode_text = 2;
|
||||
private static final byte End_tid_null = 0, End_tid_eos = 1, End_tid_brack = 2, End_tid_space = 3, End_tid_nl = 4, End_tid_symbol = 5, End_tid_invalid = 6;
|
||||
public int MakeTkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
// Xop_tkn_itm last_tkn = ctx.Stack_get_last(); // BLOCK:invalid_ttl_check; // TODO_OLD: backout apos changes
|
||||
// if ( last_tkn != null
|
||||
// && last_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) {
|
||||
// Xop_lnki_tkn lnki = (Xop_lnki_tkn)last_tkn;
|
||||
// if ( lnki.Pipe_count_is_zero()) { // always invalid
|
||||
// ctx.Stack_pop_last();
|
||||
// return Xop_lnki_wkr_.Invalidate_lnki(ctx, src, root, lnki, bgn_pos);
|
||||
// }
|
||||
// }
|
||||
int lnke_bgn_idx = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_lnke);
|
||||
if (lnke_bgn_idx == -1) return ctx.Lxr_make_txt_(cur_pos); // no lnke_bgn tkn; occurs when just ]; EX: "a]b"
|
||||
Xop_lnke_tkn bgnTkn = (Xop_lnke_tkn)ctx.Stack_pop_til(root, src, lnke_bgn_idx, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_lnke);
|
||||
bgnTkn.Src_end_(cur_pos);
|
||||
bgnTkn.Subs_move(root);
|
||||
return cur_pos;
|
||||
}
|
||||
private static boolean Valid_text_lnke(Xop_ctx ctx, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_char_0) return true; // lnke starts at 0; always true
|
||||
int prv_pos = bgn_pos - 1;
|
||||
byte prv_byte = src[prv_pos];
|
||||
switch (prv_byte) {
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
return false; // alpha-numerical is invalid; EX: "titel:" should not generate a lnke for "tel:"
|
||||
}
|
||||
if (prv_byte >= Byte_ascii.Ascii_min && prv_byte <= Byte_ascii.Ascii_max) return true; // consider all other ASCII chars as true; EX: \t\n !, etc;
|
||||
prv_pos = gplx.core.intls.Utf8_.Get_pos0_of_char_bwd(src, prv_pos);
|
||||
prv_byte = src[prv_pos];
|
||||
boolean prv_char_is_letter = ctx.Lang().Case_mgr().Match_any_exists(prv_byte, src, prv_pos, bgn_pos);
|
||||
return !prv_char_is_letter;
|
||||
}
|
||||
private int Make_tkn_xowa(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, byte[] protocol, byte proto_tid, byte lnke_type) {
|
||||
// NOTE: fmt is [xowa-cmd:^"app.setup_mgr.import_wiki('');"^ ]
|
||||
if (lnke_type != Xop_lnke_tkn.Lnke_typ_brack) return ctx.Lxr_make_txt_(cur_pos); // NOTE: must check for [ or else C:\xowa\ will cause it to evaluate as lnke
|
||||
int proto_end_pos = cur_pos + 1; // +1 to skip past :
|
||||
int lhs_dlm_pos = Bry_find_.Find_fwd(src, Byte_ascii.Quote, proto_end_pos, src_len); if (lhs_dlm_pos == Bry_find_.Not_found) return ctx.Lxr_make_txt_(cur_pos);
|
||||
int lnke_bgn_pos = lhs_dlm_pos + 1;
|
||||
byte[] rhs_dlm_bry = Bry_quote;
|
||||
if (lhs_dlm_pos - proto_end_pos > 0) {
|
||||
Bry_bfr bfr = ctx.Wiki().Utl__bfr_mkr().Get_k004();
|
||||
rhs_dlm_bry = bfr.Add(Bry_quote).Add_mid(src, proto_end_pos, lhs_dlm_pos).To_bry_and_clear();
|
||||
bfr.Mkr_rls();
|
||||
}
|
||||
int rhs_dlm_pos = Bry_find_.Find_fwd(src, rhs_dlm_bry, lnke_bgn_pos, src_len); if (rhs_dlm_pos == Bry_find_.Not_found) return ctx.Lxr_make_txt_(cur_pos);
|
||||
int txt_bgn = Bry_find_.Find_fwd_while_space_or_tab(src, rhs_dlm_pos + rhs_dlm_bry.length, src_len); if (txt_bgn == Bry_find_.Not_found) return ctx.Lxr_make_txt_(cur_pos);
|
||||
int txt_end = Bry_find_.Find_fwd(src, Byte_ascii.Brack_end, txt_bgn, src_len); if (txt_end == Bry_find_.Not_found) return ctx.Lxr_make_txt_(cur_pos);
|
||||
|
||||
int end_pos = txt_end + 1; // +1 to place after ]
|
||||
Xop_lnke_tkn tkn = tkn_mkr.Lnke(bgn_pos, end_pos, protocol, proto_tid, lnke_type, lnke_bgn_pos, rhs_dlm_pos); // +1 to ignore [
|
||||
ctx.Subs_add(root, tkn);
|
||||
tkn.Subs_add(tkn_mkr.Txt(txt_bgn, txt_end));
|
||||
return end_pos;
|
||||
} private static final byte[] Bry_quote = new byte[] {Byte_ascii.Quote};
|
||||
}
|
||||
/*
|
||||
NOTE_1
|
||||
lnke takes precedence over lnki.
|
||||
EX: [[irc://a b]]
|
||||
pass: [<a href="irc://a">b</a>] i.e. [b] where b is a lnke with caption b and trg of irc://a
|
||||
fail: <a href="irc://a">b</a> i.e. b where b is a lnki with caption b and trg of irc://a
|
||||
*/
|
||||
@@ -1,94 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.parsers.xndes.*;
|
||||
public class Xop_lnke_wkr_brack_tst {
|
||||
@Before public void init() {fxt.Reset();} private final Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Brace_noText() {
|
||||
fxt.Test_parse_page_wiki("[irc://a]", fxt.tkn_lnke_(0, 9).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack).Lnke_rng_(1, 8));
|
||||
}
|
||||
@Test public void Brace_eos() {
|
||||
fxt.Test_parse_page_wiki("[irc://a", fxt.tkn_txt_(0, 1), fxt.tkn_lnke_(1, 8).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack_dangling).Lnke_rng_(1, 8));
|
||||
}
|
||||
@Test public void Brace_text() {
|
||||
fxt.Test_parse_page_wiki("[irc://a b c]", fxt.tkn_lnke_(0, 13).Lnke_rng_(1, 8).Subs_(fxt.tkn_txt_(9, 10), fxt.tkn_space_(10, 11), fxt.tkn_txt_(11, 12)));
|
||||
}
|
||||
@Test public void Brace_lt() {
|
||||
fxt.Init_log_(Xop_xnde_log.Eos_while_closing_tag).Test_parse_page_wiki("[irc://a<b c]", fxt.tkn_lnke_(0, 13).Lnke_rng_(1, 8).Subs_(fxt.tkn_txt_(8, 10), fxt.tkn_space_(10, 11), fxt.tkn_txt_(11, 12)));
|
||||
}
|
||||
@Test public void Brace_xnde_bgn() {// PURPOSE: occurred at ref of UK; a {{cite web|url=http://www.abc.gov/{{dead link|date=December 2011}}|title=UK}} b
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "[http://b.org<sup>c</sup>]"
|
||||
, "<a href=\"http://b.org\" rel=\"nofollow\" class=\"external text\"><sup>c</sup></a>"
|
||||
);
|
||||
}
|
||||
@Test public void Brace_newLine() {
|
||||
fxt.Test_parse_page_wiki("[irc://a\n]", fxt.tkn_txt_(0, 8), fxt.tkn_nl_char_len1_(8), fxt.tkn_txt_(9, 10));
|
||||
}
|
||||
@Test public void Html_brack() {
|
||||
fxt.Test_parse_page_wiki_str("[irc://a]", "<a href=\"irc://a\" rel=\"nofollow\" class=\"external autonumber\">[1]</a>");
|
||||
}
|
||||
@Test public void Apos() {
|
||||
fxt.Test_parse_page_wiki_str("[http://www.a.org''b'']", "<a href=\"http://www.a.org\" rel=\"nofollow\" class=\"external text\"><i>b</i></a>");
|
||||
fxt.Test_parse_page_wiki_str("[http://www.a.org'b]", "<a href=\"http://www.a.org'b\" rel=\"nofollow\" class=\"external autonumber\">[1]</a>");
|
||||
}
|
||||
@Test public void Nowiki() {
|
||||
fxt.Test_parse_page_all_str
|
||||
( "<nowiki>http://a.org</nowiki>"
|
||||
, "http://a.org"
|
||||
);
|
||||
}
|
||||
@Test public void Lnki_one() { // PURPOSE: parallel test for "http://a.org[[B]]"; DATE:2014-07-11
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "[http://a.org b [[C]] d]"
|
||||
,String_.Concat_lines_nl_skip_last
|
||||
( "<a href=\"http://a.org\" rel=\"nofollow\" class=\"external text\">b <a href=\"/wiki/C\">C</a> d</a>"
|
||||
));
|
||||
}
|
||||
@Test public void Encode_xwiki() { // PURPOSE: href title and args should always be encoded; PAGE:en.w:List_of_Category_A_listed_buildings_in_West_Lothian DATE:2014-07-15
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("commons.wikimedia.org"), Bry_.new_a7("commons.wikimedia.org"));
|
||||
fxt.Test__parse__wtxt_to_html // encode page
|
||||
( "[http://commons.wikimedia.org/%22%3E_A B]"
|
||||
, "<a href='/site/commons.wikimedia.org/wiki/%22%3E_A'>B</a>" // '%22%3E' not '">'
|
||||
);
|
||||
fxt.Test__parse__wtxt_to_html // encode args
|
||||
( "[http://commons.wikimedia.org/A?b=%22%3E_C D]"
|
||||
, "<a href='/site/commons.wikimedia.org/wiki/A?b=%22%3E_C'>D</a>" // '%22%3E' not '">'
|
||||
);
|
||||
}
|
||||
@Test public void Encode_basic() { // PURPOSE: counterpart to Encode_xwiki; DATE:2014-07-15
|
||||
fxt.Test_parse_page_wiki_str // encode page
|
||||
( "[http://a.org/%22%3E_A B]"
|
||||
, "<a href=\"http://a.org/%22%3E_A\" rel=\"nofollow\" class=\"external text\">B</a>" // '%22%3E' not '">'
|
||||
);
|
||||
fxt.Test_parse_page_wiki_str // encode args
|
||||
( "[http://a.org/A?b=%22%3E_C D]"
|
||||
, "<a href=\"http://a.org/A?b=%22%3E_C\" rel=\"nofollow\" class=\"external text\">D</a>" // '%22%3E' not '">'
|
||||
);
|
||||
}
|
||||
@Test public void Encode_relative() { // PURPOSE: counterpart to Encode_xwiki; DATE:2014-07-15
|
||||
fxt.Test_parse_page_wiki_str // encode page
|
||||
( "[//a.org/%22%3E_A B]"
|
||||
, "<a href=\"https://a.org/%22%3E_A\" rel=\"nofollow\" class=\"external text\">B</a>" // '%22%3E' not '">'
|
||||
);
|
||||
fxt.Test_parse_page_wiki_str // encode args
|
||||
( "[//a.org/A?b=%22%3E_C D]"
|
||||
, "<a href=\"https://a.org/A?b=%22%3E_C\" rel=\"nofollow\" class=\"external text\">D</a>" // '%22%3E' not '">'
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_lnke_wkr_dangling_tst {
|
||||
@Before public void init() {fxt.Reset();} private final Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Dangling_eos() {
|
||||
fxt.Test_parse_page_wiki("[irc://a b"
|
||||
, fxt.tkn_lnke_(0, 8).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack_dangling)
|
||||
, fxt.tkn_txt_(9, 10)
|
||||
);
|
||||
}
|
||||
@Test public void Dangling_newLine() {
|
||||
fxt.Test_parse_page_wiki("[irc://a b\nc]"
|
||||
, fxt.tkn_lnke_(0, 8).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack_dangling)
|
||||
, fxt.tkn_txt_(9, 10)
|
||||
, fxt.tkn_nl_char_len1_(10)
|
||||
, fxt.tkn_txt_(11, 13)
|
||||
);
|
||||
}
|
||||
@Test public void Dangling_gt() {
|
||||
fxt.Test_parse_page_wiki("[irc://a>b c]", fxt.tkn_lnke_(0, 13).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_brack).Subs_(fxt.tkn_txt_(8, 10), fxt.tkn_space_(10, 11), fxt.tkn_txt_(11, 12)));
|
||||
}
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_lnke_wkr_relative_tst {
|
||||
@Before public void init() {fxt.Reset();} private final Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Relative_obj() {
|
||||
fxt.Test_parse_page_wiki("[//a b]"
|
||||
, fxt.tkn_lnke_(0, 7).Lnke_rng_(1, 4).Subs_(fxt.tkn_txt_(5, 6))
|
||||
);
|
||||
}
|
||||
@Test public void Relative_external() {
|
||||
fxt.Test__parse__wtxt_to_html("[//www.a.org a]", "<a href='https://www.a.org' rel='nofollow' class='external text'>a</a>");
|
||||
}
|
||||
@Test public void Relative_internal() {
|
||||
fxt.Init_xwiki_add_user_("en.wikipedia.org");
|
||||
fxt.Test__parse__wtxt_to_html("[//en.wikipedia.org/wiki Wikipedia]", "<a href='/site/en.wikipedia.org/wiki/'>Wikipedia</a>");
|
||||
}
|
||||
@Test public void Relative_w_category() { // EX: [//commons.wikimedia.org/wiki/Category:Diomedeidae A]
|
||||
fxt.Init_xwiki_add_user_("en.wikipedia.org");
|
||||
fxt.Test__parse__wtxt_to_html("[//en.wikipedia.org/wiki/Category:A A]", "<a href='/site/en.wikipedia.org/wiki/Category:A'>A</a>");
|
||||
}
|
||||
@Test public void Relurl() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.Test__parse__wtxt_to_html("[[//en.wikipedia.org/ a]]", "[<a href='/site/en.wikipedia.org/wiki/'>a</a>]");
|
||||
}
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.langs.cases.*;
|
||||
public class Xop_lnke_wkr_text_tst {
|
||||
@Before public void init() {fxt.Reset();} private final Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Text_obj() {
|
||||
fxt.Test_parse_page_wiki("irc://a", fxt.tkn_lnke_(0, 7).Lnke_typ_(Xop_lnke_tkn.Lnke_typ_text).Lnke_rng_(0, 7));
|
||||
}
|
||||
@Test public void Text_html() {
|
||||
fxt.Test_parse_page_wiki_str("irc://a", "<a href=\"irc://a\" rel=\"nofollow\" class=\"external free\">irc://a</a>");
|
||||
}
|
||||
@Test public void Text_after() {
|
||||
fxt.Test_parse_page_wiki("irc://a b c", fxt.tkn_lnke_(0, 7).Lnke_rng_(0, 7), fxt.tkn_space_(7, 8), fxt.tkn_txt_(8, 9), fxt.tkn_space_(9, 10), fxt.tkn_txt_(10, 11));
|
||||
}
|
||||
@Test public void Text_before_ascii() { // PURPOSE: free form external urls should not match if preceded by letters; EX:de.w:Sylvie_und_Bruno; DATE:2014-05-11
|
||||
fxt.Ctx().Lang().Case_mgr_u8_();
|
||||
String expd_lnke_html = "<a href=\"tel:a\" rel=\"nofollow\" class=\"external free\">tel:a</a>";
|
||||
fxt.Test_parse_page_wiki_str("titel:a" , "titel:a");
|
||||
fxt.Test_parse_page_wiki_str(" tel:a" , " " + expd_lnke_html);
|
||||
fxt.Test_parse_page_wiki_str("!tel:a" , "!" + expd_lnke_html);
|
||||
fxt.Test_parse_page_wiki_str("ätel:a" , "ätel:a");
|
||||
fxt.Test_parse_page_wiki_str("€tel:a" , "€" + expd_lnke_html);
|
||||
}
|
||||
@Test public void Invalid_lnki_and_list_dt_dd() { // PURPOSE: invalid lnke should still allow processing of ":" in list <dd>; PAGE:de.w:Mord_(Deutschland)#Besonders_verwerfliche_Begehungsweise DATE:2015-01-08
|
||||
fxt.Test_parse_page_wiki_str("; atel: b" , String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dt> atel"
|
||||
, " </dt>"
|
||||
, " <dd> b"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
));
|
||||
}
|
||||
@Test public void Xnde() {// NOTE: compare to Brace_lt
|
||||
fxt.Test_parse_page_wiki("<span>irc://a</span>"
|
||||
, fxt.tkn_xnde_(0, 20).Subs_
|
||||
( fxt.tkn_lnke_(6, 13)
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void List() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "*irc://a"
|
||||
, "*irc://b"
|
||||
),String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li><a href=\"irc://a\" rel=\"nofollow\" class=\"external free\">irc://a</a>"
|
||||
, " </li>"
|
||||
, " <li><a href=\"irc://b\" rel=\"nofollow\" class=\"external free\">irc://b</a>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Defect_reverse_caption_link() { // PURPOSE: bad lnke formatting (caption before link); ] should show up at end, but only [ shows up; PAGE:en.w:Paul Philippoteaux; [caption http://www.americanheritage.com]
|
||||
fxt.Test_parse_page_wiki_str("[caption irc://a]", "[caption <a href=\"irc://a\" rel=\"nofollow\" class=\"external free\">irc://a</a>]");
|
||||
}
|
||||
@Test public void Lnki() { // PURPOSE: trailing lnki should not get absorbed into lnke; DATE:2014-07-11
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "http://a.org[[B]]" // NOTE: [[ should create another lnki
|
||||
,String_.Concat_lines_nl_skip_last
|
||||
( "<a href=\"http://a.org\" rel=\"nofollow\" class=\"external free\">http://a.org</a><a href=\"/wiki/B\">B</a>"
|
||||
));
|
||||
}
|
||||
@Test public void Protocol_only() { // PURPOSE: protocol only should return text; DATE:2014-10-09
|
||||
fxt.Test_parse_page_wiki_str("http://" , "http://");
|
||||
fxt.Test_parse_page_wiki_str("http:" , "http:");
|
||||
fxt.Test_parse_page_wiki_str("[http://]" , "[http://]");
|
||||
fxt.Test_parse_page_wiki_str("[http:]" , "[http:]");
|
||||
}
|
||||
@Test public void Ignore_punctuation_at_end() { // PURPOSE: ignore "," and related punctuation at end; DATE:2014-10-09
|
||||
fxt.Test_parse_page_wiki_str("http://a.org," , "<a href=\"http://a.org\" rel=\"nofollow\" class=\"external free\">http://a.org</a>,"); // basic
|
||||
fxt.Test_parse_page_wiki_str("http://a.org,," , "<a href=\"http://a.org\" rel=\"nofollow\" class=\"external free\">http://a.org</a>,,"); // many
|
||||
fxt.Test_parse_page_wiki_str("http://a.org/b,c" , "<a href=\"http://a.org/b,c\" rel=\"nofollow\" class=\"external free\">http://a.org/b,c</a>"); // do not ignore if in middle
|
||||
fxt.Test_parse_page_wiki_str("http://a.org:" , "<a href=\"http://a.org\" rel=\"nofollow\" class=\"external free\">http://a.org</a>:"); // colon at end; compare to "http:"
|
||||
}
|
||||
@Test public void Ignore_punctuation_at_end__paren_end() { // PURPOSE: end parent has special rules; DATE:2014-10-10
|
||||
fxt.Test_parse_page_wiki_str("(http://a.org)" , "(<a href=\"http://a.org\" rel=\"nofollow\" class=\"external free\">http://a.org</a>)"); // trim=y
|
||||
fxt.Test_parse_page_wiki_str("http://a.org/b(c)", "<a href=\"http://a.org/b(c)\" rel=\"nofollow\" class=\"external free\">http://a.org/b(c)</a>"); // trim=n
|
||||
}
|
||||
@Test public void Sym_quote() { // PURPOSE: quote should interrupt lnke; DATE:2014-10-10
|
||||
fxt.Test_parse_page_wiki_str("http://a.org/b\"c", "<a href=\"http://a.org/b\" rel=\"nofollow\" class=\"external free\">http://a.org/b</a>"c");
|
||||
}
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_lnke_wkr_uncommon_tst {
|
||||
@Before public void init() {fxt.Reset();} private final Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Err_multiple() {
|
||||
fxt.Test_parse_page_wiki("[irc://a][irc://b]"
|
||||
, fxt.tkn_lnke_(0, 9)
|
||||
, fxt.tkn_lnke_(9, 18)
|
||||
);
|
||||
}
|
||||
@Test public void Err_txt_is_protocol() {
|
||||
fxt.Test_parse_page_wiki("[irc://a irc://b]"
|
||||
, fxt.tkn_lnke_(0, 17).Lnke_rng_(1, 8).Subs_(fxt.tkn_txt_(9, 16))
|
||||
);
|
||||
}
|
||||
@Test public void Lnke_should_precede_lnki() { // PURPOSE: [[ should not be interpreted as lnki if [irc is available
|
||||
fxt.Test_parse_page_wiki("[[irc://a/b c]]"
|
||||
, fxt.tkn_txt_(0, 1)
|
||||
, fxt.tkn_lnke_(1, 14).Subs_
|
||||
( fxt.tkn_txt_(12, 13)
|
||||
)
|
||||
, fxt.tkn_txt_(14, 15)
|
||||
);
|
||||
}
|
||||
@Test public void Defect_2nd_consecutive_lnke() { // PURPOSE: bad code that was causing lnkes to show up; PAGE:en.w:Template:Infobox_country;
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "[[http://a.org a]] [[http://b.org b]]"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "[<a href=\"http://a.org\" rel=\"nofollow\" class=\"external text\">a</a>] [<a href=\"http://b.org\" rel=\"nofollow\" class=\"external text\">b</a>]"
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_lnke_wkr_xwiki_tst {
|
||||
@Before public void init() {fxt.Reset();} private final Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Xwiki() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.Test__parse__wtxt_to_html("[http://en.wikipedia.org/wiki/A a]", "<a href='/site/en.wikipedia.org/wiki/A'>a</a>");
|
||||
}
|
||||
@Test public void Xwiki_relative() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.Test__parse__wtxt_to_html("[//en.wikipedia.org/ a]", "<a href='/site/en.wikipedia.org/wiki/'>a</a>");
|
||||
}
|
||||
@Test public void Xwiki_qarg() {// DATE:2013-02-02
|
||||
fxt.Init_xwiki_add_user_("en.wikipedia.org");
|
||||
fxt.Test__parse__wtxt_to_html("http://en.wikipedia.org/wiki/Special:Allpages?from=Earth", "<a href='/site/en.wikipedia.org/wiki/Special:Allpages?from=Earth'>http://en.wikipedia.org/wiki/Special:Allpages?from=Earth</a>");
|
||||
}
|
||||
@Test public void Lang_prefix() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("fr"), Bry_.new_a7("fr.wikipedia.org"));
|
||||
fxt.Test__parse__wtxt_to_html("[http://en.wikipedia.org/wiki/fr:A a]", "<a href='/site/fr.wikipedia.org/wiki/A' rel='nofollow' class='external text'>a</a>");
|
||||
}
|
||||
@Test public void Xwiki_query_arg() {
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.Test__parse__wtxt_to_html("[http://en.wikipedia.org/wiki/A?action=edit a]", "<a href='/site/en.wikipedia.org/wiki/A?action=edit'>a</a>");
|
||||
}
|
||||
@Test public void Xwiki__history() { // PURPOSE: handle xwiki lnke's to history page else null ref; EX:[http://ru.wikipedia.org/w/index.php?title&diff=19103464&oldid=18910980 извещен]; PAGE:ru.w:Project:Заявки_на_снятие_флагов/Архив/Патрулирующие/2009 DATE:2016-11-24
|
||||
fxt.App().Usere().Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
|
||||
fxt.Test__parse__wtxt_to_html("[http://en.wikipedia.org/w/index.php?title&diff=1&oldid=2 abc]", "<a href='http://en.wikipedia.org/w/index.php?title&diff=1&oldid=2' rel='nofollow' class='external text'>abc</a>");
|
||||
}
|
||||
@Test public void Ignore_proto() { // PURPOSE: handle other protocols; PAGE:uk.w:Маскалі; DATE:2015-07-28
|
||||
fxt.Test__parse__wtxt_to_html("[mailto:a b]", "<a href='mailto:a' rel='nofollow' class='external text'>b</a>");// should be /w/, not /en.wikipedia.org
|
||||
}
|
||||
@Test public void Ignore_alias() { // PURPOSE: fictitious example to make sure aliases are not subbed for domains; DATE:2015-07-28
|
||||
fxt.Init_xwiki_add_user_("w", "en.wikipedia.org");
|
||||
fxt.Test__parse__wtxt_to_html("[https://w/b c]", "<a href='https://w/b' rel='nofollow' class='external text'>c</a>");// should be /w/, not /en.wikipedia.org
|
||||
}
|
||||
@Test public void Xwiki__qargs() { // PURPOSE: fix null ref error; PAGE:en.w:Wikipedia:Template_standardisation/demometa DATE:2015-08-02
|
||||
fxt.Init_xwiki_add_user_("en.wikipedia.org");
|
||||
fxt.Test__parse__wtxt_to_html
|
||||
( "[http://en.wikipedia.org/w/index.php?action=edit&preload=Template:Afd2+starter&editintro=Template:Afd3+starter&title=Wikipedia:Articles+for+deletion/Template_standardisation/demometa]"
|
||||
// CHANGED: lnke_now decodes html_entities; DATE:2016-10-10
|
||||
//, "<a href='/site/en.wikipedia.org/wiki/index.php?action==edit=&preload==Template:Afd2+starter=&editintro==Template:Afd3+starter=&title=&='>[1]</a>"
|
||||
, "<a href='/site/en.wikipedia.org/wiki/Wikipedia:Articles+for+deletion/Template_standardisation/demometa?action=edit&preload=Template:Afd2+starter&editintro=Template:Afd3+starter&title=Wikipedia:Articles+for+deletion/Template_standardisation/demometa'>[1]</a>"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.tests.*;
|
||||
public class Xop_tkn_chkr_lnke extends Xop_tkn_chkr_base {
|
||||
@Override public Class<?> TypeOf() {return Xop_lnke_tkn.class;}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_lnke;}
|
||||
public Xop_tkn_chkr_lnke(int bgn, int end) {super.Src_rng_(bgn, end);}
|
||||
public byte Lnke_typ() {return lnke_typ;} public Xop_tkn_chkr_lnke Lnke_typ_(byte v) {lnke_typ = v; return this;} private byte lnke_typ = Xop_lnke_tkn.Lnke_typ_null;
|
||||
public Xop_tkn_chkr_lnke Lnke_rng_(int bgn, int end) {lnke_bgn = bgn; lnke_end = end; return this;} private int lnke_bgn = -1; int lnke_end = -1;
|
||||
@Override public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {
|
||||
Xop_lnke_tkn actl = (Xop_lnke_tkn)actl_obj;
|
||||
err += mgr.Tst_val(lnke_typ == Xop_lnke_tkn.Lnke_typ_null, path, "lnke_typ", lnke_typ, actl.Lnke_typ());
|
||||
err += mgr.Tst_val(lnke_bgn == -1, path, "lnke_bgn", lnke_bgn, actl.Lnke_href_bgn());
|
||||
err += mgr.Tst_val(lnke_end == -1, path, "lnke_end", lnke_end, actl.Lnke_href_end());
|
||||
return err;
|
||||
}
|
||||
}
|
||||
@@ -1,114 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.net.*; import gplx.xowa.wikis.xwikis.*;
|
||||
import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.wkrs.lnkis.htmls.*; import gplx.xowa.htmls.hrefs.*;
|
||||
import gplx.xowa.wikis.domains.*;
|
||||
public class Xop_link_parser {
|
||||
public byte[] Html_xowa_ttl() {return html_xowa_ttl;} private byte[] html_xowa_ttl;
|
||||
public byte Html_anchor_cls() {return html_anchor_cls;} private byte html_anchor_cls;
|
||||
public byte Html_anchor_rel() {return html_anchor_rel;} private byte html_anchor_rel;
|
||||
public byte[] Parse(Bry_bfr tmp_bfr, Xoa_url tmp_url, Xowe_wiki wiki, byte[] raw, byte[] or) {
|
||||
html_xowa_ttl = null; html_anchor_cls = Xoh_lnki_consts.Tid_a_cls_image; html_anchor_rel = Xoh_lnki_consts.Tid_a_rel_none; // default member variables for html
|
||||
Xoae_app app = wiki.Appe(); int raw_len = raw.length;
|
||||
wiki.Utl__url_parser().Parse(tmp_url, raw);
|
||||
switch (tmp_url.Protocol_tid()) {
|
||||
case Gfo_protocol_itm.Tid_http: case Gfo_protocol_itm.Tid_https: // "http:" or "https:"; check if to offline wiki and redirect
|
||||
byte[] wiki_bry = tmp_url.Wiki_bry(), page_bry = tmp_url.Page_bry();
|
||||
if ( !tmp_url.Wiki_is_missing() // https://www.a.org and others will be marked "missing" by Xow_url_parser
|
||||
&&( Bry_.Eq(wiki_bry, wiki.Domain_bry()) // link is to this wiki; check if alias
|
||||
|| app.Xwiki_mgr__exists(wiki_bry) // link is to an xwiki
|
||||
)
|
||||
) {
|
||||
page_bry = tmp_url.Page_for_lnki();
|
||||
Parse__ttl(tmp_bfr, wiki, wiki_bry, page_bry);
|
||||
}
|
||||
else { // http is to an unknown site
|
||||
if (tmp_url.Protocol_is_relative()) { // relative protocol; EX:"//www.a.org";
|
||||
Gfo_protocol_itm protocol_itm = Gfo_protocol_itm.Get_or(wiki.Props().Protocol_tid(), Gfo_protocol_itm.Itm_https);
|
||||
tmp_bfr.Add(protocol_itm.Key_w_colon_bry()); // prepend protocol b/c mozilla cannot launch "//www.a.org", but can launch "https://www.a.org"; DATE:2015-07-27
|
||||
}
|
||||
tmp_bfr.Add(raw); // dump everything
|
||||
}
|
||||
raw = tmp_bfr.To_bry_and_clear();
|
||||
html_anchor_cls = Xoh_lnki_consts.Tid_a_cls_none;
|
||||
Xow_domain_itm domain_itm = Xow_domain_itm_.parse(wiki_bry);
|
||||
html_anchor_rel = domain_itm.Domain_type().Tid() == Xow_domain_tid_.Tid__other ? Xoh_lnki_consts.Tid_a_rel_nofollow : Xoh_lnki_consts.Tid_a_rel_none; // rel=nofollow if not WM wiki; DATE:2015-11-19
|
||||
break;
|
||||
case Gfo_protocol_itm.Tid_file: // "file:///" or "File:A.png"
|
||||
int proto_len = Gfo_protocol_itm.Bry_file.length; // "file:"
|
||||
if (proto_len + 1 < raw_len && raw[proto_len + 1] == Byte_ascii.Slash) { // next char is slash, assume xfer_itm refers to protocol; EX: file:///C/A.png
|
||||
int slash_pos = Bry_find_.Find_bwd(raw, Byte_ascii.Slash);
|
||||
if (slash_pos != Bry_find_.Not_found) // set xowa_title to file_name; TODO_OLD: call Xoa_url.build; note that this will fail sometimes when (a) xfer_itm is very long (File:ReallyLongName will be shortened to 128 chars) or (b) xfer_itm has invalid windows characters (EX:File:a"b"c.jpg)
|
||||
html_xowa_ttl = Bry_.Mid(raw, slash_pos + Int_.Const_dlm_len, raw.length);
|
||||
}
|
||||
else // next char is not slash; assume xfer_itm refers to ns; EX:File:A.png
|
||||
raw = tmp_bfr.Add(Xoh_href_.Bry__wiki).Add(raw).To_bry_and_clear();
|
||||
break;
|
||||
default: // is page only; EX: Abc
|
||||
if (Bry_.Len_eq_0(raw)) // empty link should not create anchor; EX:[[File:A.png|link=|abc]]; [[File:Loudspeaker.svg|11px|link=|alt=play]]; PAGE:en.w:List_of_counties_in_New_York; DATE:2016-01-10;
|
||||
raw = Bry_.Empty;
|
||||
else {
|
||||
if (raw[0] == Byte_ascii.Colon) raw = Bry_.Mid(raw, 1, raw.length); // ignore initial colon; EX: [[:commons:A.png]]
|
||||
if (!Parse__ttl(tmp_bfr, wiki, wiki.Domain_bry(), raw)) {
|
||||
tmp_bfr.Clear();
|
||||
return null;
|
||||
}
|
||||
raw = tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
break;
|
||||
}
|
||||
return raw;
|
||||
}
|
||||
private static boolean Parse__ttl(Bry_bfr tmp_bfr, Xowe_wiki wiki, byte[] wiki_bry, byte[] page_bry) {
|
||||
// handle colon-only aliases; EX:"link:" PAGE:en.w:Wikipedia:Main_Page_alternative_(CSS_Update) DATE:2016-08-18
|
||||
Xoa_ttl page_ttl = wiki.Ttl_parse(page_bry);
|
||||
Xow_xwiki_itm xwiki_itm = page_ttl == null ? null : page_ttl.Wik_itm();
|
||||
if ( xwiki_itm != null // ttl is xwiki; EX:[[File:A.png|link=wikt:A]]
|
||||
&& page_ttl.Page_db().length == 0) { // ttl is empty; EX:[[File:A.png|link=wikt:]]
|
||||
Xow_wiki xwiki_wiki = wiki.App().Wiki_mgri().Get_by_or_make_init_n(page_ttl.Wik_itm().Domain_bry());
|
||||
page_bry = Bry_.Add(page_bry, xwiki_wiki.Props().Main_page()); // append Main_Page to ttl; EX:"wikt:" + "Wikipedia:Main_Page" -> "wikt:Wikipedia:Main_Page"
|
||||
page_ttl = wiki.Ttl_parse(page_bry);
|
||||
xwiki_itm = page_ttl.Wik_itm(); // should still be the same, but re-set it for good form
|
||||
}
|
||||
|
||||
// identify wiki / page
|
||||
boolean page_ttl_is_valid = page_ttl != null;
|
||||
if (page_ttl_is_valid) { // xwiki; need to define wiki / page
|
||||
if (xwiki_itm != null) { // is alias; set wiki, page
|
||||
wiki_bry = xwiki_itm.Domain_bry();
|
||||
page_bry = Bry_.Mid(page_bry, xwiki_itm.Key_bry().length + 1, page_bry.length); // +1 to skip ":"
|
||||
}
|
||||
else // basic; just define page; use ttl.Full_db() to normalize; EX: -> _
|
||||
page_bry = page_ttl.Full_db_w_anch(); // add anch; PAGE:en.w:History_of_Nauru; DATE:2015-12-27
|
||||
}
|
||||
|
||||
// build either "/wiki/Page" or "/site/domain/wiki/Page"
|
||||
if (Bry_.Eq(wiki_bry, wiki.Domain_bry())) { // NOTE: check against wiki.Key_bry() again; EX: in en_wiki, and http://commons.wikimedia.org/wiki/w:A
|
||||
// title-case by ns; needed to handle "link=w:Help:a" which needs to generate "w:Help:A"
|
||||
if (page_ttl_is_valid) { // valid_ttl; parse in same ns to title-case; EX:link=w:Help:a -> Help:A; DATE:2016-01-11
|
||||
page_ttl = wiki.Ttl_parse(page_ttl.Full_db_wo_xwiki());
|
||||
page_bry = page_ttl.Full_db_w_anch();
|
||||
}
|
||||
tmp_bfr.Add(Xoh_href_.Bry__wiki).Add(page_bry);
|
||||
}
|
||||
else
|
||||
tmp_bfr.Add(Xoh_href_.Bry__site).Add(wiki_bry).Add(Xoh_href_.Bry__wiki).Add(page_bry);
|
||||
return page_ttl_is_valid;
|
||||
}
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_lnki_align_h_ {
|
||||
public static final byte Null = 0, None = 1, Left = 2, Center = 3, Right = 4; // SERIALIZED
|
||||
public static final byte[][] Html_names = new byte[][]
|
||||
{ Object_.Bry__null
|
||||
, Bry_.new_a7("none")
|
||||
, Bry_.new_a7("left")
|
||||
, Bry_.new_a7("center")
|
||||
, Bry_.new_a7("right")
|
||||
};
|
||||
public static final Hash_adp_bry Hash = Hash_adp_bry.ci_a7()
|
||||
.Add_str_byte("tnone" , None)
|
||||
.Add_str_byte("tleft" , Left)
|
||||
.Add_str_byte("tcenter" , Center)
|
||||
.Add_str_byte("tright" , Right)
|
||||
;
|
||||
public static byte[] To_bry(int v) {return Html_names[v];}
|
||||
}
|
||||
class Xop_lnki_align_v_ {
|
||||
public static final byte None = 0, Top = 1, Middle = 2, Bottom = 4, Super = 8, Sub = 16, TextTop = 32, TextBottom = 64, Baseline = 127;
|
||||
}
|
||||
@@ -1,192 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.core.envs.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.kwds.*; import gplx.xowa.langs.numbers.*;
|
||||
public class Xop_lnki_arg_parser {
|
||||
private final Btrie_fast_mgr key_trie = Btrie_fast_mgr.cs();
|
||||
private final Bry_bfr int_bfr = Bry_bfr_.Reset(16);
|
||||
private final Btrie_bwd_mgr px_trie = Btrie_bwd_mgr.cs_(); private final Btrie_fast_mgr size_trie = Btrie_fast_mgr.cs();
|
||||
private final Btrie_rv key_trie_rv = new Btrie_rv(), px_trie_rv = new Btrie_rv(), size_trie_rv = new Btrie_rv();
|
||||
private int lnki_w, lnki_h;
|
||||
public void Evt_lang_changed(Xol_lang_itm lang) {
|
||||
Bry_bfr tmp_bfr = int_bfr;
|
||||
Byte_obj_ref rslt = Byte_obj_ref.zero_();
|
||||
Xol_kwd_mgr mgr = lang.Kwd_mgr();
|
||||
key_trie.Clear();
|
||||
Xol_kwd_grp list = null;
|
||||
int len = Keys_ids.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
int[] val = Keys_ids[i];
|
||||
list = mgr.Get_at(val[0]); // NOTE: val[0] is magic_word id
|
||||
if (list == null) {
|
||||
if (Env_.Mode_testing())
|
||||
continue; // TEST: allows partial parsing of $magicWords
|
||||
else
|
||||
list = lang.Lang_mgr().Lang_en().Kwd_mgr().Get_at(val[0]);
|
||||
}
|
||||
Xol_kwd_itm[] words = list.Itms();
|
||||
int words_len = words.length;
|
||||
for (int j = 0; j < words_len; j++) {
|
||||
Xol_kwd_itm word = words[j];
|
||||
byte[] word_bry = Xol_kwd_parse_data.Strip(tmp_bfr, word.Val(), rslt);
|
||||
Init_key_trie(word_bry, (byte)val[1]); // NOTE: val[1] is lnki_key tid; ASSUME: case_sensitive for all "img_" words; note that all Messages**.php seem to be case_sensitive ("array(1, ..."); resisting change b/c of complexity/perf (need a cs trie and a ci trie)
|
||||
}
|
||||
}
|
||||
list = mgr.Get_at(Xol_kwd_grp_.Id_img_width);
|
||||
if (list == null)
|
||||
list = lang.Lang_mgr().Lang_en().Kwd_mgr().Get_at(Xol_kwd_grp_.Id_img_width);
|
||||
Init_size_trie(tmp_bfr, lang.Num_mgr().Digits_mgr(), list);
|
||||
}
|
||||
public byte Identify_tid(byte[] src, int bgn, int end, Xop_lnki_tkn lnki) {
|
||||
lnki_w = Xop_lnki_tkn.Width_null;
|
||||
lnki_h = Xop_lnki_tkn.Height_null;
|
||||
byte rv = Identify_tid(src, bgn, end);
|
||||
if (lnki_w != Xop_lnki_tkn.Width_null) lnki.W_(lnki_w);
|
||||
if (lnki_h != Xop_lnki_tkn.Height_null)lnki.H_(lnki_h);
|
||||
return rv;
|
||||
}
|
||||
public byte Identify_tid(byte[] src, int bgn, int end) {
|
||||
int len = end - bgn;
|
||||
Byte_obj_val val = (Byte_obj_val)key_trie.Match_at(key_trie_rv, src, bgn, end);
|
||||
if (val != null && len == key_trie_rv.Pos() - bgn) // check for false matches; EX: alternate= should not match alt=
|
||||
return val.Val(); // match; return val;
|
||||
Object bwd_obj = px_trie.Match_at(px_trie_rv, src, end - 1, bgn - 1);
|
||||
if (bwd_obj != null && ((Byte_obj_val)bwd_obj).Val() == Tid_dim) { // ends with "px"; try to parse size
|
||||
int_bfr.Clear();
|
||||
int match_len = end -1 - px_trie_rv.Pos();
|
||||
boolean mode_width = true;
|
||||
int itm_end = bgn + (len - match_len); // remove trailing px
|
||||
for (int i = bgn; i < itm_end; i++) {
|
||||
byte b = src[i];
|
||||
Object o = size_trie.Match_at_w_b0(size_trie_rv, b, src, i, itm_end);
|
||||
if (o == null) {
|
||||
this.lnki_w = Xop_lnki_tkn.Width_null; // NOTE: must null out width; EX: "123xTextpx"; PAGE:es.b:Alimentación_infantil; DATE:2015-07-10; NOTE: must be -1, not 0; DATE:2015-08-05
|
||||
return Tid_caption; // letter or other invalid character; return caption
|
||||
}
|
||||
Byte_obj_val v = (Byte_obj_val)o;
|
||||
switch (v.Val()) { // NOTE: d0 - d9 handle non-english numbers; EX:fa.w and ۲۰۰px; DATE:2015-07-18
|
||||
case Key_dim_d0: int_bfr.Add_byte(Byte_ascii.Num_0); i += (size_trie_rv.Pos() - i) - 1; break; // -1 b/c loop will ++i
|
||||
case Key_dim_d1: int_bfr.Add_byte(Byte_ascii.Num_1); i += (size_trie_rv.Pos() - i) - 1; break;
|
||||
case Key_dim_d2: int_bfr.Add_byte(Byte_ascii.Num_2); i += (size_trie_rv.Pos() - i) - 1; break;
|
||||
case Key_dim_d3: int_bfr.Add_byte(Byte_ascii.Num_3); i += (size_trie_rv.Pos() - i) - 1; break;
|
||||
case Key_dim_d4: int_bfr.Add_byte(Byte_ascii.Num_4); i += (size_trie_rv.Pos() - i) - 1; break;
|
||||
case Key_dim_d5: int_bfr.Add_byte(Byte_ascii.Num_5); i += (size_trie_rv.Pos() - i) - 1; break;
|
||||
case Key_dim_d6: int_bfr.Add_byte(Byte_ascii.Num_6); i += (size_trie_rv.Pos() - i) - 1; break;
|
||||
case Key_dim_d7: int_bfr.Add_byte(Byte_ascii.Num_7); i += (size_trie_rv.Pos() - i) - 1; break;
|
||||
case Key_dim_d8: int_bfr.Add_byte(Byte_ascii.Num_8); i += (size_trie_rv.Pos() - i) - 1; break;
|
||||
case Key_dim_d9: int_bfr.Add_byte(Byte_ascii.Num_9); i += (size_trie_rv.Pos() - i) - 1; break;
|
||||
case Key_dim_num: int_bfr.Add_byte(b); break;
|
||||
case Key_space: break; // ignore space; EX: "100 px"
|
||||
case Key_dim_px: { // 2nd px found; EX: "40pxpx"; "40px px"
|
||||
int tmp_pos = size_trie_rv.Pos();
|
||||
tmp_pos = Bry_find_.Find_fwd_while_space_or_tab(src, tmp_pos, itm_end); // look for next ws pos;
|
||||
if (tmp_pos == itm_end) // no non-ws found; tmp_pos == itm_end; allow itm; EX: "40pxpx"; "40px px"; DATE:2014-03-01
|
||||
i = itm_end;
|
||||
else // non-ws found; consider as caption; EX: "20px20px"; "20pxpxpx"
|
||||
return Tid_caption;
|
||||
break;
|
||||
}
|
||||
case Key_dim_x: {
|
||||
if (mode_width) {
|
||||
this.lnki_w = int_bfr.To_int_and_clear(-1);
|
||||
mode_width = false;
|
||||
break;
|
||||
}
|
||||
else return Tid_caption;
|
||||
}
|
||||
}
|
||||
}
|
||||
int dim = int_bfr.To_int_and_clear(-1);
|
||||
if (mode_width) this.lnki_w = dim;
|
||||
else this.lnki_h = dim;
|
||||
return Tid_dim;
|
||||
}
|
||||
return Tid_caption;
|
||||
}
|
||||
private void Init_key_trie(byte[] key, byte v) {
|
||||
Byte_obj_val val = Byte_obj_val.new_(v);
|
||||
key_trie.Add(key, val);
|
||||
}
|
||||
private void Init_size_trie(Bry_bfr tmp_bfr, Xol_transform_mgr digit_mgr, Xol_kwd_grp list) {
|
||||
if (list == null && Env_.Mode_testing()) return; // TEST: allows partial parsing of $magicWords
|
||||
size_trie.Clear(); px_trie.Clear();
|
||||
for (int i = 0; i < 10; i++)
|
||||
size_trie.Add((byte)(i + Byte_ascii.Num_0), Byte_obj_val.new_(Key_dim_num));
|
||||
int len = digit_mgr.Len(); // NOTE: add non-english numbers; EX: ۲۰۰px; DATE:2015-07-18
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Keyval kv = digit_mgr.Get_at(i);
|
||||
int num = (byte)Int_.parse_or(kv.Key(), -1); if (num == -1) continue; // ignore separators; EX: "," "."
|
||||
size_trie.Add((byte[])kv.Val(), Byte_obj_val.new_((byte)num)); // NOTE: num corresponds to dim_d0 -> d9 below
|
||||
}
|
||||
size_trie.Add(Byte_ascii.Space, Byte_obj_val.new_(Key_space));
|
||||
size_trie.Add(X_bry, Byte_obj_val.new_(Key_dim_x));
|
||||
Xol_kwd_itm[] words = list.Itms();
|
||||
int words_len = words.length;
|
||||
Byte_obj_ref rslt = Byte_obj_ref.zero_();
|
||||
for (int i = 0; i < words_len; i++) {
|
||||
byte[] word_bry = Xol_kwd_parse_data.Strip(tmp_bfr, words[i].Val(), rslt);
|
||||
size_trie.Add(word_bry, Byte_obj_val.new_(Key_dim_px));
|
||||
px_trie.Add(word_bry, Byte_obj_val.new_(Tid_dim));
|
||||
}
|
||||
}
|
||||
public static final byte[] Bry_upright = Bry_.new_a7("upright"), Bry_thumbtime = Bry_.new_a7("thumbtime"), Bry_target = Bry_.new_a7("target");
|
||||
public static final byte
|
||||
Tid_unknown = 0, Tid_thumb = 1, Tid_left = 2, Tid_right = 3, Tid_none = 4, Tid_center = 5, Tid_frame = 6, Tid_frameless = 7, Tid_upright = 8, Tid_border = 9
|
||||
, Tid_alt = 10, Tid_link = 11, Tid_baseline = 12, Tid_sub = 13, Tid_super = 14, Tid_top = 15, Tid_text_top = 16, Tid_middle = 17, Tid_bottom = 18, Tid_text_bottom = 19
|
||||
, Tid_dim = 20
|
||||
, Tid_trg = 21, Tid_caption = 22
|
||||
, Tid_page = 23
|
||||
, Tid_noplayer = 24, Tid_noicon = 25, Tid_thumbtime = 26
|
||||
, Tid_class = 27
|
||||
, Tid_target = 28
|
||||
;
|
||||
private static final byte[] X_bry = Bry_.new_a7("x");
|
||||
private static final byte // NOTE: d0 - d9 must match 0 - 9; DATE:2015-07-18
|
||||
Key_dim_d0 = 0, Key_dim_d1 = 1, Key_dim_d2 = 2, Key_dim_d3 = 3, Key_dim_d4 = 4
|
||||
, Key_dim_d5 = 5, Key_dim_d6 = 6, Key_dim_d7 = 7, Key_dim_d8 = 8, Key_dim_d9 = 9
|
||||
, Key_dim_num = 10, Key_dim_x = 11, Key_dim_px = 12, Key_space = 13
|
||||
;
|
||||
private static final int[][] Keys_ids = new int[][]
|
||||
{ new int[] {Xol_kwd_grp_.Id_img_thumbnail , Tid_thumb}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_manualthumb , Tid_thumb} // RESEARCH: what is manualthumb? 'thumb=$1' vs 'thumb'
|
||||
, new int[] {Xol_kwd_grp_.Id_img_right , Tid_right}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_left , Tid_left}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_none , Tid_none}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_center , Tid_center}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_framed , Tid_frame}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_frameless , Tid_frameless}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_page , Tid_page} // for pdf
|
||||
, new int[] {Xol_kwd_grp_.Id_img_upright , Tid_upright}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_border , Tid_border}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_baseline , Tid_baseline}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_sub , Tid_sub}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_super , Tid_super}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_top , Tid_top}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_text_top , Tid_text_top}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_middle , Tid_middle}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_bottom , Tid_bottom}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_text_bottom , Tid_text_bottom}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_link , Tid_link}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_alt , Tid_alt}
|
||||
, new int[] {Xol_kwd_grp_.Id_img_class , Tid_class}
|
||||
, new int[] {Xol_kwd_grp_.Id_ogg_noplayer , Tid_noplayer} // RESEARCH: what does noplayer do?; find example
|
||||
, new int[] {Xol_kwd_grp_.Id_ogg_noicon , Tid_noicon}
|
||||
, new int[] {Xol_kwd_grp_.Id_ogg_thumbtime , Tid_thumbtime}
|
||||
};
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.log_msgs.*;
|
||||
public class Xop_lnki_log {
|
||||
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "lnki");
|
||||
public static final Gfo_msg_itm
|
||||
Upright_val_is_invalid = Gfo_msg_itm_.new_warn_(owner, "upright_val_is_invalid")
|
||||
, Escaped_lnki = Gfo_msg_itm_.new_warn_(owner, "escaped_lnki")
|
||||
, Key_is_empty = Gfo_msg_itm_.new_warn_(owner, "key_is_empty")
|
||||
, Ext_is_missing = Gfo_msg_itm_.new_warn_(owner, "ext_is_missing")
|
||||
, Invalid_ttl = Gfo_msg_itm_.new_warn_(owner, "invalid_ttl")
|
||||
;
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
import gplx.xowa.parsers.tmpls.*;
|
||||
public class Xop_lnki_lxr_bgn implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_lnki_bgn;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Xop_tkn_.Lnki_bgn, this);}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
Xop_tkn_itm prv_tkn = ctx.Stack_get_last();
|
||||
if (prv_tkn != null
|
||||
&& prv_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) {
|
||||
Xop_lnki_tkn prv_lnki = (Xop_lnki_tkn)prv_tkn;
|
||||
if (prv_lnki.Pipe_count() == 0) {
|
||||
ctx.Stack_pop_last();
|
||||
return Xop_lnki_wkr_.Invalidate_lnki(ctx, src, root, prv_lnki, bgn_pos);
|
||||
}
|
||||
}
|
||||
Xop_lnki_tkn lnki = tkn_mkr.Lnki(bgn_pos, cur_pos);
|
||||
ctx.Subs_add_and_stack(root, lnki);
|
||||
return cur_pos;
|
||||
}
|
||||
public static final Xop_lnki_lxr_bgn Instance = new Xop_lnki_lxr_bgn();
|
||||
}
|
||||
class Xop_lnki_size {public static final int None = 0, Width = 1, Height = 2, WidthHeight = 4, Upright = 8;}
|
||||
/*
|
||||
Spaces + NewLines
|
||||
. ignored near posts: '[[ '; ' ]]'; ' | '
|
||||
. not ignored in: ' ='; basically breaks key
|
||||
. not ignored in: '= '; will add to value; EX: alt= a -> ' a'
|
||||
|
||||
NewLines
|
||||
. will break lnk if in trg area (before | or ]]); EX:[[Image:The\nFabs -> [[Image:The Fabs
|
||||
. will break alt (which apparently does not like new lines)
|
||||
. will be converted to space for caption
|
||||
|
||||
http://en.wikipedia.org/wiki/Wikipedia:Extended_image_syntax
|
||||
The image syntax begins with "[[", contains components separated by "|", and ends with "]]". The "[[" and the first "|" (or, if there is no "|", the terminating "]]")
|
||||
must be on the same line; other spaces and line breaks are ignored if they are next to "|" characters or just inside the brackets.
|
||||
Spaces or line breaks are not allowed just before the "=" in the following options, and may have undesirable side effects if they appear just after the "=".
|
||||
*/
|
||||
@@ -1,28 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
import gplx.xowa.parsers.tmpls.*;
|
||||
public class Xop_lnki_lxr_end implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_lnki_end;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Xop_tkn_.Lnki_end, this);}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Lnki().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_lnki_lxr_end Instance = new Xop_lnki_lxr_end();
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user