Mw_parse: Add case-match logic and more keywords for double_underscore

pull/620/head
gnosygnu 8 years ago
parent 31ade6aa5f
commit e231df0ce1

@ -35,6 +35,7 @@ public class Xomw_parser {
private final Xomw_link_holders holders; private final Xomw_link_holders holders;
private final Xomw_heading_cbk__html heading_wkr_cbk; private final Xomw_heading_cbk__html heading_wkr_cbk;
private final Btrie_slim_mgr protocols_trie; private final Btrie_slim_mgr protocols_trie;
private final Xomw_doubleunder_data doubleunder_data = new Xomw_doubleunder_data();
private static Xomw_regex_space regex_space; private static Xomw_regex_space regex_space;
private static Xomw_regex_boundary regex_boundary; private static Xomw_regex_boundary regex_boundary;
private static Xomw_regex_url regex_url; private static Xomw_regex_url regex_url;
@ -69,7 +70,7 @@ public class Xomw_parser {
lnke_wkr.Init_by_wiki(protocols_trie, regex_url, regex_space); lnke_wkr.Init_by_wiki(protocols_trie, regex_url, regex_space);
lnki_wkr.Init_by_wiki(wiki); lnki_wkr.Init_by_wiki(wiki);
magiclinks_wkr.Init_by_wiki(linker, regex_boundary, regex_url); magiclinks_wkr.Init_by_wiki(linker, regex_boundary, regex_url);
doubleunder_wkr.Init_by_wiki(); doubleunder_wkr.Init_by_wiki(doubleunder_data, wiki.Lang());
} }
public void Init_by_page(Xoa_ttl ttl) { public void Init_by_page(Xoa_ttl ttl) {
pctx.Init_by_page(ttl); pctx.Init_by_page(ttl);

@ -17,18 +17,29 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
package gplx.xowa.mws.parsers.doubleunders; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*; package gplx.xowa.mws.parsers.doubleunders; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.core.btries.*; import gplx.core.btries.*;
// TODO.CS: handle case sensitive keys; EX: __notoc__ should not match __NOTOC__ if cs is enabled for magic word import gplx.xowa.langs.*; import gplx.xowa.langs.kwds.*;
public class Xomw_doubleunder_wkr { public class Xomw_doubleunder_wkr {
private final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_u8(); private final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_u8();
private final Btrie_rv trv = new Btrie_rv(); private final Btrie_rv trv = new Btrie_rv();
public Xomw_doubleunder_data data = new Xomw_doubleunder_data(); private Xomw_doubleunder_data data;
public void Init_by_wiki() { public void Init_by_wiki(Xomw_doubleunder_data data, Xol_lang_itm lang) {
// TODO.XO: pull from lang this.data = data;
trie.Add_str_byte("__TOC__", Tid__toc); Reg(trie, lang.Kwd_mgr()
trie.Add_str_byte("__NOTOC__", Tid__no_toc); , Xol_kwd_grp_.Id_notoc
trie.Add_str_byte("__FORCETOC__", Tid__force_toc); , Xol_kwd_grp_.Id_nogallery
, Xol_kwd_grp_.Id_forcetoc
, Xol_kwd_grp_.Id_toc
, Xol_kwd_grp_.Id_noeditsection
, Xol_kwd_grp_.Id_newsectionlink
, Xol_kwd_grp_.Id_hiddencat
, Xol_kwd_grp_.Id_index
, Xol_kwd_grp_.Id_noindex
, Xol_kwd_grp_.Id_staticredirect
, Xol_kwd_grp_.Id_notitleconvert
, Xol_kwd_grp_.Id_nocontentconvert
);
} }
public void Do_double_underscore(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) { // REF.MW: text = preg_replace('/(^|\n)-----*/', '\\1<hr />', text); public void Do_double_underscore(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
// XO.PBFR // XO.PBFR
Bry_bfr src_bfr = pbfr.Src(); Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr(); byte[] src = src_bfr.Bfr();
@ -37,14 +48,15 @@ public class Xomw_doubleunder_wkr {
Bry_bfr bfr = pbfr.Trg(); Bry_bfr bfr = pbfr.Trg();
data.Reset(); data.Reset();
// XO.MW: MW does TOC before others; XO does it at the same time
// XO.MW: MW does TOC before others; XO does it at the same time
// Now match and remove the rest of them // Now match and remove the rest of them
// XO.MW.BGN: $this->mDoubleUnderscores = $mwa->matchAndRemove( $text ); // XO.MW.BGN: $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
int cur = src_bgn; int cur = src_bgn;
int prv = cur; int prv = cur;
boolean dirty = false; boolean dirty = false;
while (true) { while (true) {
// reached end; stop
if (cur == src_end) { if (cur == src_end) {
if (dirty) { if (dirty) {
bfr.Add_mid(src, prv, src_end); bfr.Add_mid(src, prv, src_end);
@ -52,6 +64,7 @@ public class Xomw_doubleunder_wkr {
break; break;
} }
// no match; keep searching
byte b = src[cur]; byte b = src[cur];
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end); Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
if (o == null) { if (o == null) {
@ -59,11 +72,19 @@ public class Xomw_doubleunder_wkr {
continue; continue;
} }
// if cs, ensure exact-match (trie is case-insensitive)
int kwd_end = trv.Pos();
Xomw_doubleunder_itm itm = (Xomw_doubleunder_itm)o;
if (itm.case_match && !Bry_.Match(src, cur, kwd_end, itm.val)) {
cur = kwd_end;
continue;
}
// match; replace __KWD__ with "" (or "<!--MWTOC-->" if __TOC__)
dirty = true; dirty = true;
bfr.Add_mid(src, prv, cur); bfr.Add_mid(src, prv, cur);
byte tid = ((gplx.core.primitives.Byte_obj_val)o).Val(); switch (itm.tid) {
switch (tid) { case Xol_kwd_grp_.Id_toc:
case Tid__toc:
// The position of __TOC__ needs to be recorded // The position of __TOC__ needs to be recorded
boolean already_seen = !data.show_toc; boolean already_seen = !data.show_toc;
data.toc = true; data.toc = true;
@ -77,20 +98,20 @@ public class Xomw_doubleunder_wkr {
} }
break; break;
// XO.MW: MW adds boolean to hash_table; XO uses boolean props; note that "remove" is done by not adding to bfr // XO.MW: MW adds boolean to hash_table; XO uses boolean props; note that "remove" is done by not adding to bfr
case Tid__no_toc: data.no_toc = true; break; case Xol_kwd_grp_.Id_notoc: data.no_toc = true; break;
case Tid__no_gallery: data.no_gallery = true; break; case Xol_kwd_grp_.Id_nogallery: data.no_gallery = true; break;
case Tid__force_toc: data.force_toc = true; break; case Xol_kwd_grp_.Id_forcetoc: data.force_toc = true; break;
case Tid__no_edit_section: data.no_edit_section = true; break; case Xol_kwd_grp_.Id_noeditsection: data.no_edit_section = true; break;
case Tid__new_section_link: data.new_section_link = true; break; case Xol_kwd_grp_.Id_newsectionlink: data.new_section_link = true; break;
case Tid__hidden_cat: data.hidden_cat = true; break; case Xol_kwd_grp_.Id_hiddencat: data.hidden_cat = true; break;
case Tid__index: data.index = true; break; case Xol_kwd_grp_.Id_index: data.index = true; break;
case Tid__no_index: data.no_index = true; break; case Xol_kwd_grp_.Id_noindex: data.no_index = true; break;
case Tid__static_redirect: data.static_redirect = true; break; case Xol_kwd_grp_.Id_staticredirect: data.static_redirect = true; break;
case Tid__no_title_convert: data.no_title_convert = true; break; case Xol_kwd_grp_.Id_notitleconvert: data.no_title_convert = true; break;
case Tid__no_content_convert: data.no_content_convert = true; break; case Xol_kwd_grp_.Id_nocontentconvert: data.no_content_convert = true; break;
default: throw Err_.new_unhandled_default(tid); default: throw Err_.new_unhandled_default(itm.tid);
} }
cur = trv.Pos(); cur = kwd_end;
prv = cur; prv = cur;
} }
// XO.MW.END: $this->mDoubleUnderscores = $mwa->matchAndRemove( $text ); // XO.MW.END: $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
@ -100,22 +121,28 @@ public class Xomw_doubleunder_wkr {
} }
// XO.MW.EDIT: hidden_cat, index, noindex are used to add to tracking category // XO.MW.EDIT: hidden_cat, index, noindex are used to add to tracking category
if (dirty) if (dirty)
pbfr.Switch(); pbfr.Switch();
} }
private static void Reg(Btrie_slim_mgr trie, Xol_kwd_mgr mgr, int... ids) {
private static final byte for (int id : ids) {
Tid__no_toc = 0 Xol_kwd_grp grp = mgr.Get_or_new(id);
, Tid__no_gallery = 1 Xol_kwd_itm[] itms = grp.Itms();
, Tid__force_toc = 2 for (Xol_kwd_itm itm : itms) {
, Tid__toc = 3 byte[] val = itm.Val();
, Tid__no_edit_section = 4 trie.Add_obj(val, new Xomw_doubleunder_itm(id, grp.Case_match(), val));
, Tid__new_section_link = 5 }
, Tid__hidden_cat = 6 }
, Tid__index = 7 }
, Tid__no_index = 8 }
, Tid__static_redirect = 9 class Xomw_doubleunder_itm {
, Tid__no_title_convert = 10 public int tid;
, Tid__no_content_convert = 11 public boolean case_match;
; public byte[] val;
public Xomw_doubleunder_itm(int tid, boolean case_match, byte[] val) {
this.tid = tid;
this.case_match = case_match;
this.val = val;
}
} }

@ -24,15 +24,17 @@ public class Xomw_doubleunder_wkr__tst {
@Test public void Toc() {fxt.Test__parse("a __TOC__ b __TOC__ c" , "a <!--MWTOC--> b c").Test__prop_y(fxt.data.toc, fxt.data.show_toc, fxt.data.force_toc_position);} @Test public void Toc() {fxt.Test__parse("a __TOC__ b __TOC__ c" , "a <!--MWTOC--> b c").Test__prop_y(fxt.data.toc, fxt.data.show_toc, fxt.data.force_toc_position);}
@Test public void Notoc_only() {fxt.Test__parse("a __NOTOC__ b" , "a b").Test__prop_y(fxt.data.no_toc).Test__prop_n(fxt.data.show_toc);} // show_toc is false @Test public void Notoc_only() {fxt.Test__parse("a __NOTOC__ b" , "a b").Test__prop_y(fxt.data.no_toc).Test__prop_n(fxt.data.show_toc);} // show_toc is false
@Test public void Notoc_w_toc() {fxt.Test__parse("a __TOC__ b __NOTOC__ c" , "a <!--MWTOC--> b c").Test__prop_y(fxt.data.toc, fxt.data.show_toc, fxt.data.force_toc_position);} // show_toc is true @Test public void Notoc_w_toc() {fxt.Test__parse("a __TOC__ b __NOTOC__ c" , "a <!--MWTOC--> b c").Test__prop_y(fxt.data.toc, fxt.data.show_toc, fxt.data.force_toc_position);} // show_toc is true
@Test public void Case_match() {fxt.Test__parse("a __index__ b" , "a __index__ b");}
} }
class Xomw_doubleunder_wkr__fxt { class Xomw_doubleunder_wkr__fxt {
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx(); private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr(); private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
private final Xomw_doubleunder_wkr wkr = new Xomw_doubleunder_wkr(); private final Xomw_doubleunder_wkr wkr = new Xomw_doubleunder_wkr();
public Xomw_doubleunder_data data; public Xomw_doubleunder_data data = new Xomw_doubleunder_data();
public Xomw_doubleunder_wkr__fxt() { public Xomw_doubleunder_wkr__fxt() {
wkr.Init_by_wiki(); Xoae_app app = Xoa_app_fxt.Make__app__edit();
data = wkr.data; Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
wkr.Init_by_wiki(data, wiki.Lang());
} }
public Xomw_doubleunder_wkr__fxt Test__parse(String src_str, String expd) { public Xomw_doubleunder_wkr__fxt Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str); byte[] src_bry = Bry_.new_u8(src_str);

Loading…
Cancel
Save