1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-07-12 21:10:02 -04:00
commit 794b5a232f
3099 changed files with 238212 additions and 0 deletions

View File

@@ -0,0 +1,96 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*; import gplx.xowa.parsers.paras.*;
public class Xop_comm_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_comment;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Bgn_ary, this);}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
int lhs_end = cur_pos;
int end_pos = Bry_finder.Find_fwd(src, End_ary, cur_pos, src_len); // search for "-->" // NOTE: do not reuse cur_pos, else cur_pos may become -1 and fatal error in ctx.Msg_log() below; DATE:2014-06-08
int rhs_bgn = end_pos;
if (end_pos == Bry_finder.Not_found) { // "-->" not found
ctx.Msg_log().Add_itm_none(Xop_comment_log.Eos, src, bgn_pos, cur_pos);
cur_pos = src_len; // gobble up rest of content
}
else
cur_pos = end_pos + End_len;
cur_pos = Trim_ws_if_entire_line_is_commment(ctx, tkn_mkr, root, src, src_len, cur_pos, lhs_end, rhs_bgn);
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, cur_pos, Xop_ignore_tkn.Ignore_tid_comment));
return cur_pos;
}
private static int Trim_ws_if_entire_line_is_commment(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int cur_pos, int lhs_end, int rhs_bgn) {// REF.MW:Preprocessor_DOM.php|preprocessToXml|handle comments; DATE:2014-02-24
if ( ctx.Tid_is_popup()
&& ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki // note that only popup parse can generate <!-- --> that makes it to wtxt
&& Bry_.Match(src, lhs_end, rhs_bgn, Xowa_skip_text_bry) // <!--XOWA_SKIP-->
)
return cur_pos; // in popup mode only do not gobble trailing \n; PAGE:en.w:Gwynedd; DATE:2014-07-01
int nl_lhs = -1;
int subs_len = root.Subs_len();
for (int i = subs_len - 1; i > -1; i--) { // look bwd for "\n"
Xop_tkn_itm sub = root.Subs_get(i);
switch (sub.Tkn_tid()) {
case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab:
break;
case Xop_tkn_itm_.Tid_ignore:
Xop_ignore_tkn sub_as_ignore = (Xop_ignore_tkn)sub;
if (sub_as_ignore.Ignore_type() != Xop_ignore_tkn.Ignore_tid_comment)
i = -1;
break;
case Xop_tkn_itm_.Tid_newLine: // new_line found; anything afterwards is a \s or a \t; SEE.WIKT:coincidence
nl_lhs = i;
break;
default:
i = -1;
break;
}
}
if (nl_lhs == -1) return cur_pos; // non ws tkns found before \n; exit now; EX: \n\sa<!--
boolean loop = true;
int nl_rhs = -1, loop_pos = cur_pos;
while (loop) { // look fwd for \n
if (loop_pos == src_len) break;
switch (src[loop_pos++]) {
case Byte_ascii.Space:
case Byte_ascii.Tab:
break;
case Byte_ascii.Nl:
loop = false;
nl_rhs = loop_pos;
break;
default:
loop = false;
break;
}
}
if (nl_rhs == -1) return cur_pos; // non ws tkns found before \n; exit now; EX: -->a\n
for (int i = nl_lhs + 1; i < subs_len; i++) { // entire line is ws; trim everything from nl_lhs + 1 to nl_rhs; do not trim nl_lhs
Xop_tkn_itm sub_tkn = root.Subs_get(i);
sub_tkn.Ignore_y_grp_(ctx, root, i);
}
ctx.Subs_add(root, tkn_mkr.NewLine(nl_rhs - 1, nl_rhs, Xop_nl_tkn.Tid_char, 1).Ignore_y_()); // add tkn for nl_rhs, but mark as ignore; needed for multiple comment nls; EX: "<!-- -->\n<!-- -->\n;"; DATE:2014-02-24
return nl_rhs;
}
public static final byte[] Bgn_ary = new byte[] {60, 33, 45, 45}, /*<!--*/ End_ary = new byte[] {45, 45, 62}; /*-->*/
private static final int End_len = End_ary.length;
public static final Xop_comm_lxr _ = new Xop_comm_lxr(); Xop_comm_lxr() {}
private static final String Xowa_skip_text_str = "XOWA_SKIP";
private static final byte[] Xowa_skip_text_bry = Bry_.new_a7(Xowa_skip_text_str);
public static final byte[] Xowa_skip_comment_bry = Bry_.new_a7("<!--" + Xowa_skip_text_str + "-->");
}

View File

@@ -0,0 +1,102 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_comm_lxr_tst {
private Xop_fxt fxt = new Xop_fxt();
@Test public void Basic() {
fxt.Test_parse_page_all_str("a<!-- b -->c", "ac");
}
@Test public void Err() {
fxt.Init_log_(Xop_comment_log.Eos).Test_parse_page_all_str("<!-- ", "");
}
@Test public void Ws_end() {
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "a"
, "<!-- b --> "
, "c"
), String_.Concat_lines_nl_skip_last
( "a"
, "c"
));
}
@Test public void Ws_bgn_end() {
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "a"
, " <!-- b --> "
, "c"
), String_.Concat_lines_nl_skip_last
( "a"
, "c"
));
}
@Test public void Ws_noop() { // PURPOSE: assert that comments do not strip ws
fxt.Test_parse_page_all_str("a <!-- b -->c", "a c");
}
@Test public void Noinclude() {// PURPOSE: templates can construct comments; EX:WBK: {{Subjects/allbooks|subject=Computer programming|origin=Computer programming languages|diagnose=}}
fxt.Test_parse_page_all_str("a <!-<noinclude></noinclude>- b -->c", "a c");
}
@Test public void Comment_can_cause_pre() {// PURPOSE: assert that comment causes pre; DATE:2014-02-18
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "a"
, " <!-- b -->c"
, "d"
), String_.Concat_lines_nl_skip_last
( "<p>a"
, "</p>"
, ""
, "<pre>c"
, "</pre>"
, ""
, "<p>d"
, "</p>"
, ""
));
fxt.Init_para_n_();
}
@Test public void Ws_bgn_needs_nl() { // PURPOSE: do not strip new line unles *entire* line is comment
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "a"
, " <!-- b -->"
, "c"
), String_.Concat_lines_nl_skip_last
( "<p>a"
, "c"
, "</p>"
, ""
));
fxt.Init_para_n_();
}
@Test public void Ws_strip_nl() { // PURPOSE: handle multiple "<!-- -->\n"; was only trimming 1st; DATE:2014-02-24
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "a"
, "<!-- -->"
, "<!-- -->"
, "b"
), String_.Concat_lines_nl_skip_last
( "<p>a"
, "b"
, "</p>"
, ""
));
fxt.Init_para_n_();
}
}

View File

@@ -0,0 +1,30 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*;
public class Xop_cr_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_cr;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {
core_trie.Add(Byte_ascii.Cr, this);
}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
return cur_pos; //ignore
}
public static final Xop_cr_lxr _ = new Xop_cr_lxr(); Xop_cr_lxr() {}
}

View File

@@ -0,0 +1,35 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_cr_tkn extends Xop_tkn_itm_base {
public Xop_cr_tkn(int bgn, int end) {this.Tkn_ini_pos(true, -1, -1);}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_cr;}
}
/*
NOTE_1:tabs
. tabs exist in wikimedia source; note that tabs (\t) are not a meaningful HTML character
. xowa uses tabs for delimiters in its xowa files
. in order to maintain some semblance of fidelity, "\t" was replaced with &#09;
. unfortunately, "\t" is generally trimmed as whitespace throughout mediawiki; "&#09;" is not
. so, as a HACK, replace "&#09;" with "\t\s\s\s\s";
.. note that all 5 chars of "&#09;" must be replaced; hence "\t\s\s\s\s"
.. note that they all need to be ws in order to be trimmed out
.. note that shrinking the src[] would be (a) memory-expensive (b) complexity-expensive (many functions assume a static src size)
.. note that "\t\t\t\t\t" was the 1st attempt, but this resulted in exponential growth of "\t"s with each save (1 -> 5 -> 25 -> 125). "\t\s\s\s\s" is less worse with its linear growth (1 -> 5 -> 10)
. TODO: swap out the "&#09;" at point of file-read;
*/

View File

@@ -0,0 +1,81 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*; import gplx.xowa.parsers.paras.*;
class Xop_eq_lxr implements Xop_lxr {
public Xop_eq_lxr(boolean tmpl_mode) {this.tmpl_mode = tmpl_mode;} boolean tmpl_mode;
public byte Lxr_tid() {return Xop_lxr_.Tid_eq;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Eq, this);}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
cur_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Eq); // gobble up eq; "==" should produce 1 eq_tkn with len of 2, not 2 eq_tkn with len of 1; DATE:2014-04-17
int eq_len = cur_pos - bgn_pos;
boolean hdr_like = false;
if (tmpl_mode) {
Xop_tkn_itm owner = ctx.Stack_get_last(); // beginning of "is == part of a hdr tkn sequence?"; DATE:2014-02-09
if ( owner != null && owner.Tkn_tid() == Xop_tkn_itm_.Tid_tmpl_curly_bgn // inside curly
&& eq_len > 1) { // only skip if at least "=="; don't want to skip "=" which could be kv delimiter; DATE:2014-04-17
int prv_pos = bgn_pos - 1;
if (prv_pos > -1 && src[prv_pos] == Byte_ascii.Nl) // is prv char \n; EX: "\n==="
hdr_like = true;
else {
int eol_pos = Bry_finder.Find_fwd_while_space_or_tab(src, cur_pos, src_len); // skip trailing ws; EX: "== \n"; PAGE:nl.q:Geert_Wilders; DATE:2014-06-05
if ( eol_pos == src_len // eos
|| src[eol_pos] == Byte_ascii.Nl // cur_pos is \n; EX: "===\n"
) {
hdr_like = true;
cur_pos = eol_pos;
}
}
if (hdr_like) // ignore hdr tkn;
return ctx.Lxr_make_txt_(cur_pos);
}
ctx.Subs_add(root, tkn_mkr.Eq(bgn_pos, cur_pos));
return cur_pos;
}
// wiki_mode; chk if hdr exists
int stack_pos = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_hdr);
if (stack_pos == Xop_ctx.Stack_not_found) { // no hdr; make eq_tkn and return;
ctx.Subs_add(root, tkn_mkr.Eq(bgn_pos, cur_pos));
return cur_pos;
}
int ws_end = Bry_finder.Find_fwd_while_space_or_tab(src, cur_pos, src_len);
hdr_like = ws_end == src_len || src[ws_end] == Byte_ascii.Nl; // hdr_like if next char \n or eos
if (!hdr_like) {
int ctg_end = Xop_nl_lxr.Scan_fwd_for_ctg(ctx, src, cur_pos, src_len); // check if ==[[Category:A]]; DATE:2014-04-17
if ( ctg_end != Bry_.NotFound) { // [[Category: found
ctg_end = Bry_finder.Find_fwd(src, Xop_tkn_.Lnki_end, ctg_end, src_len);
if (ctg_end != Bry_.NotFound) { // ]] found; note that this should do more validation; EX: [[Category:]] should not be valid; DATE:2014-04-17
ctg_end += Xop_tkn_.Lnki_end_len;
ctg_end = Bry_finder.Find_fwd_while_space_or_tab(src, ctg_end, src_len);
if (ctg_end == src_len || src[ctg_end] == Byte_ascii.Nl) // hdr_like if ]]\n after [[Category:A]]
hdr_like = true;
}
}
}
if (hdr_like) {
cur_pos = ws_end;
return ctx.Hdr().Make_tkn_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, stack_pos, eq_len);
}
// = is just text; create = tkn and any other ws tkns; NOTE: also create ws tkns if scanned; EX: "== a === bad"; create "===" and " "; position at "b"
ctx.Subs_add(root, tkn_mkr.Eq(bgn_pos, cur_pos, eq_len));
return cur_pos;
}
}

View File

@@ -0,0 +1,24 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_eq_tkn extends Xop_tkn_itm_base {//20111222
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_eq;}
public int Eq_len() {return eq_len;} private int eq_len = -1;
public int Eq_ws_rhs_bgn() {return eq_ws_rhs_bgn;} public Xop_eq_tkn Eq_ws_rhs_bgn_(int v) {eq_ws_rhs_bgn = v; return this;} private int eq_ws_rhs_bgn = -1;
public Xop_eq_tkn(int bgn, int end, int eq_len) {this.Tkn_ini_pos(false, bgn, end); this.eq_len = eq_len;}
}

View File

@@ -0,0 +1,44 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*;
public class Xop_hr_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_hr;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr parse_trie) {parse_trie.Add(Hook_ary, this);} static final byte[] Hook_ary = new byte[] {Byte_ascii.Nl, Byte_ascii.Dash, Byte_ascii.Dash, Byte_ascii.Dash, Byte_ascii.Dash};
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
int nl_adj = -1; // -1 to ignore nl at bgn for hr_len
boolean bos = bgn_pos == Xop_parser_.Doc_bgn_bos;
if (bos) {
bgn_pos = 0; // do not allow -1 pos
nl_adj = 0; // no nl at bgn, so nl_adj = 0
}
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false);
ctx.CloseOpenItms(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos); // close open items
cur_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Hook_byt); // gobble consecutive dashes
if (!bos)
ctx.Para().Process_nl(ctx, root, src, bgn_pos, bgn_pos); // simulate \n in front of ----
ctx.Para().Process_block__bgn_y__end_n(Xop_xnde_tag_.Tag_hr); // para=n; block=y
int hr_len = cur_pos - bgn_pos + nl_adj; // TODO: syntax_check if > 4
ctx.Subs_add(root, tkn_mkr.Hr(bgn_pos, cur_pos, hr_len));
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_hr); // block=n; para=y;
return cur_pos;
} private static final byte Hook_byt = Byte_ascii.Dash;
public static final int Hr_len = 4;
public static final Xop_hr_lxr _ = new Xop_hr_lxr(); Xop_hr_lxr() {}
}

View File

@@ -0,0 +1,31 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_hr_lxr_basic_tst {
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
@Test public void Basic() {fxt.Test_parse_page_wiki("----" , fxt.tkn_hr_(0, 4));}
@Test public void Basic_w_nl() {fxt.Test_parse_page_wiki("\n----a" , fxt.tkn_para_blank_(0), fxt.tkn_hr_(0, 5), fxt.tkn_txt_(5, 6));}
@Test public void Many() {fxt.Test_parse_page_wiki("---------" , fxt.tkn_hr_(0, 9).Hr_len_(9));}
@Test public void Exc_short() {fxt.Test_parse_page_wiki("---" , fxt.tkn_txt_(0, 3));}
@Test public void Exc_interrupt() {fxt.Test_parse_page_wiki("\na----" , fxt.tkn_nl_char_len1_(0), fxt.tkn_txt_(1, 6));}
@Test public void Html_basic() {fxt.Test_parse_page_wiki_str("----" , "<hr/>");}
@Test public void Html_extended() {fxt.Test_parse_page_wiki_str("------" , "<hr/>");}
@Test public void Nl_bgn() {fxt.Test_parse_page_wiki_str("a\n----" , "a\n<hr/>");}
@Test public void Nl_end() {fxt.Test_parse_page_wiki_str("----\na" , "<hr/>\na");}
}

View File

@@ -0,0 +1,54 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_hr_lxr_para_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@Test public void Bos() { // PURPOSE: check that bos rendered correctly; DATE:2014-04-18
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "----"
, "a"
), String_.Concat_lines_nl_skip_last
( "<hr/>"
, ""
, "<p>a"
, "</p>"
));
}
@Test public void Multiple() { // PURPOSE.fix: hr disables para for rest of page; ca.b:Xarxes; DATE:2014-04-18
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "a"
, "----"
, "b"
, ""
, ""
, "c"
), String_.Concat_lines_nl_skip_last
( "<p>a"
, "</p>"
, "<hr/>"
, ""
, "<p>b"
, "</p>"
, ""
, "<p><br/>"
, "c"
, "</p>"
));
}
}

View File

@@ -0,0 +1,23 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_hr_tkn extends Xop_tkn_itm_base {
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_hr;}
public int Hr_len() {return hr_len;} public Xop_hr_tkn Hr_len_(int v) {hr_len = v; return this;} private int hr_len;
public Xop_hr_tkn(int bgn, int end, int hr_len) {this.Tkn_ini_pos(false, bgn, end); this.hr_len = hr_len;}
}

View File

@@ -0,0 +1,26 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_ignore_tkn extends Xop_tkn_itm_base {
public Xop_ignore_tkn(int bgn, int end, byte ignore_type) {this.Tkn_ini_pos(false, bgn, end); this.ignore_type = ignore_type;}
public byte Ignore_type() {return ignore_type;} private byte ignore_type = Ignore_tid_null;
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_ignore;}
@Override public void Tmpl_compile(Xop_ctx ctx, byte[] src, Xot_compile_data prep_data) {}
@Override public boolean Tmpl_evaluate(Xop_ctx ctx, byte[] src, Xot_invk caller, Bry_bfr bfr) {return true;}
public static final byte Ignore_tid_null = 0, Ignore_tid_comment = 1, Ignore_tid_include_tmpl = 2, Ignore_tid_include_wiki = 3, Ignore_tid_htmlTidy_tblw = 3, Ignore_tid_xnde_dangling = 4, Ignore_tid_nbsp = 5, Ignore_tid_empty_li = 6, Ignore_tid_pre_at_bos = 7;
}

View File

@@ -0,0 +1,83 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*; import gplx.xowa.parsers.tblws.*;
public class Xop_pipe_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_pipe;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Pipe, this);}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
int cur_stack_tid = ctx.Cur_tkn_tid(), rv = -1;
switch (cur_stack_tid) {
case Xop_tkn_itm_.Tid_brack_bgn: // used for tmpl mode where full lnki_wkr is too heavyweight; matches "[ |"
switch (ctx.Parse_tid()) {
case Xop_parser_.Parse_tid_tmpl:
case Xop_parser_.Parse_tid_page_tmpl:
ctx.Subs_add(root, tkn_mkr.Txt(bgn_pos, cur_pos));
break;
case Xop_parser_.Parse_tid_page_wiki: // should never happen?
ctx.Subs_add(root, tkn_mkr.Pipe(bgn_pos, cur_pos));
break;
default: throw Exc_.new_unhandled(ctx.Parse_tid());
}
return cur_pos;
case Xop_tkn_itm_.Tid_tblw_tb:
case Xop_tkn_itm_.Tid_tblw_tr:
rv = Xop_tblw_lxr_ws.Make(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, Xop_tblw_wkr.Tblw_type_td, false);
if (rv == Xop_tblw_lxr_ws.Tblw_ws_cell_pipe) {
ctx.Subs_add(root, tkn_mkr.Pipe(bgn_pos, cur_pos));
return cur_pos;
}
else
return rv;
case Xop_tkn_itm_.Tid_tblw_td:
case Xop_tkn_itm_.Tid_tblw_th:
case Xop_tkn_itm_.Tid_tblw_tc:
rv = Xop_tblw_lxr_ws.Make(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, Xop_tblw_wkr.Tblw_type_td, false);
if (rv != Xop_tblw_lxr_ws.Tblw_ws_cell_pipe) return rv;
if (ctx.Tblw().Cell_pipe_seen()) {
ctx.Subs_add(root, tkn_mkr.Pipe(bgn_pos, cur_pos));
return cur_pos;
}
else {
Xop_tblw_tkn cur_tkn = (Xop_tblw_tkn)ctx.Stack_get_typ(cur_stack_tid);
Xop_tblw_wkr.Atrs_make(ctx, src, root, ctx.Tblw(), cur_tkn, Bool_.N);
return cur_pos;
}
case Xop_tkn_itm_.Tid_vnt:
gplx.xowa.langs.vnts.Xop_vnt_tkn vnt_tkn = (gplx.xowa.langs.vnts.Xop_vnt_tkn)ctx.Stack_get_typ(Xop_tkn_itm_.Tid_vnt);
vnt_tkn.Vnt_pipe_tkn_count_add_();
ctx.Subs_add(root, tkn_mkr.Pipe(bgn_pos, cur_pos));
return cur_pos;
case Xop_tkn_itm_.Tid_lnki:
Xop_lnki_tkn lnki = (Xop_lnki_tkn)ctx.Stack_get_last(); // BLOCK:invalid_ttl_check
if ( lnki.Pipe_count_is_zero()
&& !Xop_lnki_wkr_.Parse_ttl(ctx, src, lnki, bgn_pos)) {
ctx.Stack_pop_last();
return Xop_lnki_wkr_.Invalidate_lnki(ctx, src, root, lnki, bgn_pos);
}
ctx.Subs_add(root, tkn_mkr.Pipe(bgn_pos, cur_pos));
return cur_pos;
default:
ctx.Subs_add(root, tkn_mkr.Pipe(bgn_pos, cur_pos));
return cur_pos;
}
}
public static final Xop_pipe_lxr _ = new Xop_pipe_lxr();
}

View File

@@ -0,0 +1,22 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
class Xop_pipe_tkn extends Xop_tkn_itm_base {
public Xop_pipe_tkn(int bgn, int end) {this.Tkn_ini_pos(false, bgn, end);}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_pipe;}
}

View File

@@ -0,0 +1,28 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_root_tkn extends Xop_tkn_itm_base {
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_root;}
public byte[] Root_src() {return root_src;} public Xop_root_tkn Root_src_(byte[] v) {root_src = v; return this;} private byte[] root_src = Bry_.Empty;
public byte[] Data_mid() {return data_mid;} public Xop_root_tkn Data_mid_(byte[] v) {data_mid = v; return this;} private byte[] data_mid = Bry_.Empty;
public byte[] Data_htm() {return data_htm;} public Xop_root_tkn Data_htm_(byte[] v) {data_htm = v; return this;} private byte[] data_htm = Bry_.Empty;
@Override public void Reset() {
super.Reset();
root_src = Bry_.Empty;
}
}

View File

@@ -0,0 +1,32 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_space_lxr_tst {
private Xop_fxt fxt = new Xop_fxt();
@Before public void init() {fxt.Reset();}
@After public void term() {fxt.Init_para_n_();}
@Test public void Toc_basic() { // PURPOSE: make sure nbsp char is not converted to space; PAGE:en.w:MacedonianCarthaginian_Treaty; DATE:2014-06-07
fxt.Init_para_y_();
fxt.Test_parse_page_all_str("     a", String_.Concat_lines_nl_skip_last // NOTE: ws is actually nbsp;
( "<p>     a" // should be <p> not <pre>
, "</p>"
, ""
));
}
}

View File

@@ -0,0 +1,46 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*;
public class Xop_space_tkn extends Xop_tkn_itm_base {
public Xop_space_tkn(boolean immutable, int bgn, int end) {this.Tkn_ini_pos(immutable, bgn, end);}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_space;}
@Override public Xop_tkn_itm Tkn_clone(Xop_ctx ctx, int bgn, int end) {
return ctx.Tkn_mkr().Space_mutable(bgn, end);
}
@Override public boolean Tmpl_evaluate(Xop_ctx ctx, byte[] src, Xot_invk caller, Bry_bfr bfr) {
if (this.Tkn_immutable()) {
bfr.Add_byte(Byte_ascii.Space);
return true;
}
else
return super.Tmpl_evaluate(ctx, src, caller, bfr);
}
Xop_space_tkn() {}
}
class Xop_space_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_space;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Space, this);}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
cur_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Space);
ctx.Subs_add(root, tkn_mkr.Space(root, bgn_pos, cur_pos));
return cur_pos;
}
public static final Xop_space_lxr _ = new Xop_space_lxr();
}

View File

@@ -0,0 +1,54 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*;
public class Xop_tab_tkn extends Xop_tkn_itm_base {
public Xop_tab_tkn(int bgn, int end) {this.Tkn_ini_pos(false, bgn, end);}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_tab;}
public static final byte[] Bry_tab_ent = Bry_.new_a7("&#09;");
}
class Xop_tab_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_tab;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {
core_trie.Add(Byte_ascii.Tab, this);
core_trie.Add(Xop_tab_tkn.Bry_tab_ent, this);
}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
cur_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Tab);
src[bgn_pos] = Byte_ascii.Tab; // HACK: SEE:NOTE_1:tabs
for (int i = bgn_pos + 1; i < cur_pos; i++)
src[i] = Byte_ascii.Space;
ctx.Subs_add(root, tkn_mkr.Tab(bgn_pos, cur_pos));
return cur_pos;
}
public static final Xop_tab_lxr _ = new Xop_tab_lxr();
}
/*
NOTE_1:tabs
. tabs exist in wikimedia source; note that tabs (\t) are not a meaningful HTML character
. xowa uses tabs for delimiters in its xowa files
. in order to maintain some semblance of fidelity, "\t" was replaced with &#09;
. unfortunately, "\t" is generally trimmed as whitespace throughout mediawiki; "&#09;" is not
. so, as a HACK, replace "&#09;" with "\t\s\s\s\s";
.. note that all 5 chars of "&#09;" must be replaced; hence "\t\s\s\s\s"
.. note that they all need to be ws in order to be trimmed out
.. note that shrinking the src[] would be (a) memory-expensive (b) complexity-expensive (many functions assume a static src size)
.. note that "\t\t\t\t\t" was the 1st attempt, but this resulted in exponential growth of "\t"s with each save (1 -> 5 -> 25 -> 125). "\t\s\s\s\s" is less worse with its linear growth (1 -> 5 -> 10)
. TODO: swap out the "&#09;" at point of file-read;
*/

View File

@@ -0,0 +1,146 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*;
import gplx.xowa.html.tocs.*;
class Xop_under_lxr implements Xop_lxr {
private Btrie_mgr words_trie_ci, words_trie_cs;
public byte Lxr_tid() {return Xop_lxr_.Tid_under;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {
Xol_kwd_mgr kwd_mgr = lang.Kwd_mgr();
int under_kwds_len = under_kwds.length;
Xop_under_lxr lxr = new Xop_under_lxr();
lxr.words_trie_cs = Btrie_slim_mgr.cs_();
lxr.words_trie_ci = Btrie_utf8_mgr.new_(lang.Case_mgr());
core_trie.Add(Xop_under_hook.Key_std, lxr);
boolean hook_alt_null = true;
for (int i = 0; i < under_kwds_len; i++) {
int kwd_id = under_kwds[i];
Xol_kwd_grp kwd_grp = kwd_mgr.Get_or_new(kwd_id);
Xol_kwd_itm[] kwd_itms = kwd_grp.Itms(); if (kwd_itms == null) continue;
int kwd_itms_len = kwd_itms.length;
boolean kwd_case_match = kwd_grp.Case_match();
Btrie_mgr words_trie = kwd_grp.Case_match() ? lxr.words_trie_cs : lxr.words_trie_ci;
for (int j = 0; j < kwd_itms_len; j++) {
Xol_kwd_itm kwd_itm = kwd_itms[j];
byte[] kwd_bry = kwd_itm.Val();
int kwd_len = kwd_bry.length;
Object hook_obj = Hook_trie.Match_bgn(kwd_bry, 0, kwd_len);
if (hook_obj != null) {
Xop_under_hook hook = (Xop_under_hook)hook_obj;
byte[] word_bry = Bry_.Mid(kwd_bry, hook.Key_len(), kwd_bry.length);
words_trie.Add_obj(word_bry, new Xop_under_word(kwd_id, word_bry));
if (hook_alt_null && hook.Tid() == Xop_under_hook.Tid_alt) {
core_trie.Add(Xop_under_hook.Key_alt, lxr);
hook_alt_null = false;
}
}
else { // kwd doesn't start with __; no known examples, but just in case; EX: "NOTOC"; DATE:2014-02-14
Xop_word_lxr word_lxr = new Xop_word_lxr(kwd_id);
if (kwd_case_match) // cs; add word directly to trie
core_trie.Add(kwd_bry, word_lxr);
else { // NOTE: next part is imprecise; XOWA parser is cs, but kwd is ci; for now, just add all upper and all lower
Gfo_usr_dlg_.I.Warn_many("", "", "under keyword does not start with __; id=~{0} key=~{1} word=~{2}", kwd_id, String_.new_u8(kwd_grp.Key()), String_.new_u8(kwd_bry));
core_trie.Add(lang.Case_mgr().Case_build_lower(kwd_bry), word_lxr);
core_trie.Add(lang.Case_mgr().Case_build_upper(kwd_bry), word_lxr);
}
}
}
}
}
private static final int[] under_kwds = new int[] // REF.MW:MagicWord.php
{ Xol_kwd_grp_.Id_toc, Xol_kwd_grp_.Id_notoc, Xol_kwd_grp_.Id_forcetoc
, Xol_kwd_grp_.Id_nogallery, Xol_kwd_grp_.Id_noheader, Xol_kwd_grp_.Id_noeditsection
, Xol_kwd_grp_.Id_notitleconvert, Xol_kwd_grp_.Id_nocontentconvert, Xol_kwd_grp_.Id_newsectionlink, Xol_kwd_grp_.Id_nonewsectionlink
, Xol_kwd_grp_.Id_hiddencat, Xol_kwd_grp_.Id_index, Xol_kwd_grp_.Id_noindex, Xol_kwd_grp_.Id_staticredirect
, Xol_kwd_grp_.Id_disambig
};
private static final Btrie_fast_mgr Hook_trie = Btrie_fast_mgr.cs_()
.Add(Xop_under_hook.Key_std, Xop_under_hook.Itm_std)
.Add(Xop_under_hook.Key_alt, Xop_under_hook.Itm_alt)
;
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
if (cur_pos == src_len) return ctx.Lxr_make_txt_(cur_pos); // eos
int rv = cur_pos;
Object word_obj = words_trie_cs.Match_bgn(src, cur_pos, src_len); // check cs
if (word_obj == null) {
word_obj = words_trie_ci.Match_bgn(src, cur_pos, src_len); // check ci
if (word_obj == null)
return ctx.Lxr_make_txt_(cur_pos); // kwd not found; EX: "TOCA__"
else
rv = words_trie_ci.Match_pos();
}
else
rv = words_trie_cs.Match_pos();
Xop_under_word word_itm = (Xop_under_word)word_obj;
Xop_under_lxr.Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, rv, word_itm.Kwd_id());
return rv;
}
public static void Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int kwd_id) {
Xoae_page page = ctx.Cur_page();
Xow_hdr_mgr hdr_mgr = page.Hdr_mgr();
switch (kwd_id) {
case Xol_kwd_grp_.Id_toc:
hdr_mgr.Toc_manual_();
ctx.Para().Process_block_lnki_div(); // NOTE: __TOC__ will manually place <div toc> here; simulate div in order to close any pres; EX:\n\s__TOC__; PAGE:de.w: DATE:2014-07-05
ctx.Subs_add(root, tkn_mkr.Under(bgn_pos, cur_pos, kwd_id)); // NOTE: only save under_tkn for TOC (b/c its position is needed for insertion); DATE:2013-07-01
break;
case Xol_kwd_grp_.Id_forcetoc: hdr_mgr.Toc_force_(); break;
case Xol_kwd_grp_.Id_notoc: hdr_mgr.Toc_hide_(); break;
case Xol_kwd_grp_.Id_noeditsection: break; // ignore; not handling edit sections
case Xol_kwd_grp_.Id_nocontentconvert: page.Html_data().Lang_convert_content_(false); break;
case Xol_kwd_grp_.Id_notitleconvert: page.Html_data().Lang_convert_title_(false); break;
default: break; // ignore anything else
}
}
public static final Xop_under_lxr _ = new Xop_under_lxr(); Xop_under_lxr() {}
}
class Xop_word_lxr implements Xop_lxr {
private int kwd_id;
public Xop_word_lxr(int kwd_id) {this.kwd_id = kwd_id;}
public byte Lxr_tid() {return Xop_lxr_.Tid_word;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
Xop_under_lxr.Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, kwd_id); // for now, all word_lxrs only call the under_lxr; DATE:2014-02-14
return cur_pos;
}
}
class Xop_under_hook {
Xop_under_hook(byte tid, byte[] key) {this.tid = tid; this.key = key; this.key_len = key.length;}
public byte Tid() {return tid;} private byte tid;
public byte[] Key() {return key;} private byte[] key;
public int Key_len() {return key_len;} private int key_len;
public static final byte Tid_std = 1, Tid_alt = 2;
public static final byte[] Key_std = new byte[] {Byte_ascii.Underline, Byte_ascii.Underline}, Key_alt = Bry_.new_u8("__"); // ja wikis
public static final Xop_under_hook
Itm_std = new Xop_under_hook(Tid_std, Key_std)
, Itm_alt = new Xop_under_hook(Tid_alt, Key_alt)
;
}
class Xop_under_word {
public Xop_under_word(int kwd_id, byte[] word_bry) {
this.kwd_id = kwd_id;
this.word_bry = word_bry;
this.word_len = word_bry.length;
}
public int Kwd_id() {return kwd_id;} private int kwd_id;
public byte[] Word_bry() {return word_bry;} private byte[] word_bry;
public int Word_len() {return word_len;} private int word_len;
}

View File

@@ -0,0 +1,187 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_under_lxr_tst {
private Xop_fxt fxt = new Xop_fxt();
@Before public void init() {fxt.Reset();}
@After public void term() {fxt.Init_para_n_();}
@Test public void Toc_basic() {
fxt.Test_parse_page_all_str("a__TOC__b", "ab");
}
@Test public void Toc_match_failed() {
fxt.Test_parse_page_all_str("a__TOCA__b", "a__TOCA__b");
}
@Test public void Toc_match_ci() {
fxt.Test_parse_page_all_str("a__toc__b", "ab");
}
@Test public void Notoc_basic() {
fxt.Wtr_cfg().Toc__show_(Bool_.Y); // NOTE: must enable in order for TOC to show (and to make sure NOTOC suppresses)
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( "__NOTOC__"
, "==a=="
, "==b=="
, "==c=="
, "==d=="
), String_.Concat_lines_nl
( "<h2><span class='mw-headline' id='a'>a</span></h2>"
, ""
, "<h2><span class='mw-headline' id='b'>b</span></h2>"
, ""
, "<h2><span class='mw-headline' id='c'>c</span></h2>"
, ""
, "<h2><span class='mw-headline' id='d'>d</span></h2>"
));
fxt.Wtr_cfg().Toc__show_(Bool_.N);
}
@Test public void Ignore_pre() {
fxt.Init_para_y_();
fxt.Test_parse_page_all_str("a\n __NOTOC__\n", String_.Concat_lines_nl
( "<p>a"
, "</p>" // NOTE: do not capture " " in front of __NOTOC__; confirmed against MW; DATE:2014-02-19
, ""
, "<p><br/>"
, "</p>"
));
fxt.Init_para_n_();
}
@Test public void Toc_works() { // PURPOSE: make sure "suppressed" pre does not somehow suppress TOC
fxt.Init_para_y_();
fxt.Test_parse_page_all_str("a\n__TOC__\n==b==\n", String_.Concat_lines_nl
( "<p>a"
, "</p>"
, "<div id=\"toc\" class=\"toc\">"
, " <div id=\"toctitle\">"
, " <h2>Contents</h2>"
, " </div>"
, " <ul>"
, " <li class=\"toclevel-1 tocsection-1\"><a href=\"#b\"><span class=\"tocnumber\">1</span> <span class=\"toctext\">b</span></a>"
, " </li>"
, " </ul>"
, "</div>"
, ""
, "<h2>b</h2>"
));
fxt.Init_para_n_();
}
@Test public void Ignore_pre_after() { // PURPOSE: "__TOC__\s\n" must be trimmed at end, else false pre; assertion only (no code exists to handle this test); DATE:2013-07-08
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( "a"
, "__NOTOC__ "
, "b"
), String_.Concat_lines_nl
( "<p>a"
, "</p>" // NOTE: do not capture " "; confirmed against MW; DATE:2014-02-19
, ""
, "<p>b"
, "</p>"
));
fxt.Init_para_n_();
}
@Test public void Disambig() { // PURPOSE: ignore "__DISAMBIG__"; EX:{{disambiguation}} DATE:2013-07-24
fxt.Test_parse_page_all_str("__DISAMBIG__", "");
}
@Test public void Nocontentconvert() { // simple test; test for flag only; DATE:2014-02-06
gplx.xowa.pages.Xopg_html_data html_data = fxt.Page().Html_data();
Tfds.Eq(html_data.Lang_convert_content(), true);
Tfds.Eq(html_data.Lang_convert_title(), true);
fxt.Test_parse_page_all_str("__NOCONTENTCONVERT__ __NOTITLECONVERT__", " ");
Tfds.Eq(html_data.Lang_convert_content(), false);
Tfds.Eq(html_data.Lang_convert_title(), false);
}
@Test public void Eos() { // PURPOSE: check that __ at eos doesn't fail; es.s:Luisa de Bustamante: 3; DATE:2014-02-15
fxt.Test_parse_page_all_str("__", "__");
}
@Test public void Pre_toc() { // PURPOSE: make sure that "\n\s__TOC" does not create pre; PAGE:de.w:Main_Page; DATE:2014-04-07
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "a"
, " __TOC__ " // NOTE: this should not be a pre; DATE:2014-07-05
, "b"
), String_.Concat_lines_nl
( "<p>a"
, "</p>"
, " " // NOTE: \s should not be captured, but leaving for now
, ""
, "<p>b"
, "</p>"
));
fxt.Init_para_n_();
}
@Test public void Pre_notoc() { // PURPOSE: make sure that "\n\s__NOTOC" does not create pre. note that mechanism is different from TOC; DATE:2014-07-05
fxt.Init_para_y_();
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "a"
, " __NOTOC__ " // NOTE: does not capture " "; confirmed against MW
, "b"
), String_.Concat_lines_nl
( "<p>a"
, "</p>"
, ""
, "<p>b"
, "</p>"
));
fxt.Init_para_n_();
}
@Test public void Hook_alt() { // PURPOSE: ja wikis use alternate __; DATE:2014-03-04
Xowe_wiki wiki = fxt.Wiki(); Xol_lang lang = wiki.Lang();
fxt.Init_lang_kwds(lang, Xol_kwd_grp_.Id_toc, true, "__TOC__");
wiki.Parser().Init_by_lang(lang);
fxt.Test_parse_page_all_str("a__TOC__b", "ab");
}
@Test public void Ascii_ci() { // PURPOSE: case-insensitive ascii; DATE:2014-07-10
Xowe_wiki wiki = fxt.Wiki(); Xol_lang lang = wiki.Lang();
fxt.Init_lang_kwds(lang, Xol_kwd_grp_.Id_toc, false, "__TOC__");
wiki.Parser().Init_by_lang(lang);
fxt.Test_parse_page_all_str("a__TOC__b", "ab");
fxt.Test_parse_page_all_str("a__toc__b", "ab");
}
@Test public void Utf8_ci() { // PURPOSE: case-insensitive UTF8; DATE:2014-07-10
Xowe_wiki wiki = fxt.Wiki(); Xol_lang lang = wiki.Lang();
lang.Case_mgr_utf8_();
fxt.Init_lang_kwds(lang, Xol_kwd_grp_.Id_toc, false, "__AÉI__");
wiki.Parser().Init_by_lang(lang);
fxt.Test_parse_page_all_str("a__AÉI__b", "ab");
fxt.Test_parse_page_all_str("a__aéi__b", "ab");
}
@Test public void Utf8_ci_asymmetric() { // PURPOSE: case-insensitive UTF8; asymmetric; DATE:2014-07-10
Xowe_wiki wiki = fxt.Wiki(); Xol_lang lang = wiki.Lang();
lang.Case_mgr_utf8_();
fxt.Init_lang_kwds(lang, Xol_kwd_grp_.Id_toc, false, "__İÇİNDEKİLER__"); // __TOC__ for tr.w
wiki.Parser().Init_by_lang(lang);
fxt.Test_parse_page_all_str("a__İçindekiler__b", "ab");
}
@Test public void Cs() { // PURPOSE: cs (ascii / utf8 doesn't matter); DATE:2014-07-11
Xowe_wiki wiki = fxt.Wiki(); Xol_lang lang = wiki.Lang();
fxt.Init_lang_kwds(lang, Xol_kwd_grp_.Id_toc , Bool_.Y, "__TOC__");
wiki.Parser().Init_by_lang(lang);
fxt.Test_parse_page_all_str("a__TOC__b" , "ab"); // ci.pass
fxt.Test_parse_page_all_str("a__toc__b" , "a__toc__b"); // ci.pass
}
@Test public void Ascii_cs_ci() { // PURPOSE: test simultaneous cs and ci; DATE:2014-07-11
Xowe_wiki wiki = fxt.Wiki(); Xol_lang lang = wiki.Lang();
fxt.Init_lang_kwds(lang, Xol_kwd_grp_.Id_toc , Bool_.N, "__TOC__");
fxt.Init_lang_kwds(lang, Xol_kwd_grp_.Id_notoc , Bool_.Y, "__NOTOC__");
wiki.Parser().Init_by_lang(lang);
fxt.Test_parse_page_all_str("a__TOC__b" , "ab"); // ci.pass
fxt.Test_parse_page_all_str("a__toc__b" , "ab"); // ci.pass
fxt.Test_parse_page_all_str("a__NOTOC__b" , "ab"); // cs.pass
fxt.Test_parse_page_all_str("a__notoc__b" , "a__notoc__b"); // cs.fail
}
}

View File

@@ -0,0 +1,23 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_under_tkn extends Xop_tkn_itm_base {
public Xop_under_tkn(int bgn, int end, int under_tid) {this.under_tid = under_tid; this.Tkn_ini_pos(false, bgn, end);}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_under;}
public int Under_tid() {return under_tid;} private int under_tid;
}