1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2014-06-30 00:04:32 -04:00
parent 85594d3cdd
commit bae88e739c
2482 changed files with 198730 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
class Xop_hdr_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_hdr;}
public void Init_by_wiki(Xow_wiki wiki, ByteTrieMgr_fast core_trie) {core_trie.Add(Hook_bgn, this);} static final byte[] Hook_bgn = new byte[] {Byte_ascii.NewLine, Byte_ascii.Eq};
public void Init_by_lang(Xol_lang lang, ByteTrieMgr_fast core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Hdr().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
public static final Xop_hdr_lxr _ = new Xop_hdr_lxr(); Xop_hdr_lxr() {}
public static final byte Hook = Byte_ascii.Eq;
}

View File

@@ -0,0 +1,33 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_hdr_tkn extends Xop_tkn_itm_base {
public Xop_hdr_tkn(int bgn, int end, int hdr_len) {this.Tkn_ini_pos(false, bgn, end); this.hdr_len = hdr_len;}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_hdr;}
public int Hdr_len() {return hdr_len;} public Xop_hdr_tkn Hdr_len_(int v) {hdr_len = v; return this;} private int hdr_len = -1;
public int Hdr_bgn_manual() {return hdr_bgn_manual;} public Xop_hdr_tkn Hdr_bgn_manual_(int v) {hdr_bgn_manual = v; return this;} private int hdr_bgn_manual;
public int Hdr_end_manual() {return hdr_end_manual;} public Xop_hdr_tkn Hdr_end_manual_(int v) {hdr_end_manual = v; return this;} private int hdr_end_manual;
public boolean Hdr_html_first() {return hdr_html_first;} public Xop_hdr_tkn Hdr_html_first_y_() {hdr_html_first = true; return this;} private boolean hdr_html_first;
public int Hdr_html_dupe_idx() {return hdr_html_dupe_idx;} private int hdr_html_dupe_idx;
public byte[] Hdr_toc_text() {return hdr_toc_text;} public Xop_hdr_tkn Hdr_toc_text_(byte[] v) {hdr_toc_text = v; return this;} private byte[] hdr_toc_text;
public int Hdr_html_dupe_idx_next() {
hdr_html_dupe_idx = hdr_html_dupe_idx == 0 ? 2 : hdr_html_dupe_idx + 1;
return hdr_html_dupe_idx;
}
public byte[] Hdr_html_id() {return hdr_html_id;} public Xop_hdr_tkn Hdr_html_id_(byte[] v) {hdr_html_id = v; return this;} private byte[] hdr_html_id = Bry_.Empty;
}

View File

@@ -0,0 +1,123 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_hdr_wkr implements Xop_ctx_wkr {
public void Ctor_ctx(Xop_ctx ctx) {}
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
public void AutoClose(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {
// bgn never closed; mark inert; EX: "==a"
Xop_hdr_tkn bgn = (Xop_hdr_tkn)tkn;
int bgn_hdr_len = bgn.Hdr_len();
bgn.Hdr_bgn_manual_(bgn_hdr_len);
bgn.Hdr_len_(0);
if (bgn_hdr_len > 1 && ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki) // NOTE: \n= is not uncommon for templates; ignore them;
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Dangling_hdr, src, bgn.Src_bgn(), bgn_pos);
}
public int Make_tkn_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
if (bgn_pos == Xop_parser_.Doc_bgn_bos) bgn_pos = 0; // do not allow -1 pos
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false);
Close_open_itms(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);
ctx.Para().Process_block__bgn__nl_w_symbol(ctx, root, src, bgn_pos, cur_pos, Xop_xnde_tag_.Tag_h2); // pass h2; should pass h# where # is correct #, but for purpose of Para_wkr, <h2> tag does not matter
int new_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Xop_hdr_lxr.Hook); // count all =
int hdr_len = new_pos - cur_pos + 1; // +1 b/c Hook has 1 eq: "\n="
switch (hdr_len) {
case 1: ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_1, src, bgn_pos, new_pos); break; // <h1>; flag
case 2: case 3: case 4: case 5: case 6: break; // <h2>-<h6>: normal
default: ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_7_or_more, src, bgn_pos, new_pos); break; // <h7>+; limit to 6; flag; NOTE: only 14 pages in 2011-07-27
}
Xop_hdr_tkn tkn = tkn_mkr.Hdr(bgn_pos, new_pos, hdr_len); // make tkn
ctx.StackTkn_add(root, tkn);
return new_pos;
}
public int Make_tkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int stackPos, int end_hdr_len) {// REF.MW: Parser|doHeadings
if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_tmpl_curly_bgn) return ctx.Lxr_make_txt_(cur_pos);
Xop_hdr_tkn hdr = (Xop_hdr_tkn)ctx.Stack_pop_til(root, src, stackPos, false, bgn_pos, cur_pos);
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false); // end any apos; EX: ==''a==
int hdr_len = hdr.Hdr_len(), bgn_manual = 0, end_manual = 0;
boolean dirty = false;
if (end_hdr_len < hdr_len) { // mismatch: end has more; adjust hdr
bgn_manual = hdr_len - end_hdr_len;
hdr_len = end_hdr_len;
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Mismatched, src, bgn_pos, cur_pos);
if (hdr_len == 1) ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_1, src, bgn_pos, cur_pos);
dirty = true;
}
else if (end_hdr_len > hdr_len) { // mismatch: hdr has more; adjust variables
end_manual = end_hdr_len - hdr_len;
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Mismatched, src, bgn_pos, cur_pos);
dirty = true;
}
if (hdr_len > 6) { // <h7>+; limit to 6; NOTE: both bgn/end are equal length; EX: bgn=8,end=7 -> bgn=7,end=7;bgn_manual=1
bgn_manual = end_manual = hdr_len - 6;
hdr_len = 6;
dirty = true;
}
if (dirty)
hdr.Hdr_bgn_manual_(bgn_manual).Hdr_end_manual_(end_manual).Hdr_len_(hdr_len);
cur_pos = Find_fwd_while_ws_hdr_version(src, cur_pos, src_len); // NOTE: hdr gobbles up trailing ws; EX: "==a== \n\t \n \nb" gobbles up all 3 "\n"s; otherwise para_wkr will process <br/>
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_h2);
hdr.Subs_move(root);
hdr.Src_end_(cur_pos);
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki)
ctx.Cur_page().Hdr_mgr().Add(ctx, hdr, src);
return cur_pos;
}
private void Close_open_itms(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
int stack_pos = -1, stack_len = ctx.Stack_len(); boolean stop = false;
for (int i = 0; i < stack_len; i++) { // loop over stack
Xop_tkn_itm prv_tkn = ctx.Stack_get(i);
switch (prv_tkn.Tkn_tid()) { // find first list/hdr; close everything until this
case Xop_tkn_itm_.Tid_list:
case Xop_tkn_itm_.Tid_hdr:
stack_pos = i; stop = true; break;
}
if (stop) break;
}
if (stack_pos == -1) return;
ctx.Stack_pop_til(root, src, stack_pos, true, bgn_pos, cur_pos);
}
private static int Find_fwd_while_ws_hdr_version(byte[] src, int cur, int end) {
int last_nl = -1;
while (true) {
if (cur == end) return cur;
byte b = src[cur];
switch (b) {
case Byte_ascii.NewLine:
cur++;
last_nl = cur;
break;
case Byte_ascii.Space:
case Byte_ascii.Tab:
cur++;
break;
default:
return last_nl == -1 ? cur : last_nl - 1;
}
}
}
}
/*
NOTE:hdr.trailing_nl
. by design, the hdr_tkn's src_end will not include the trailing \n
.. for example, for "\n==a==\n", the src_bgn will be 0, but the src_end will be 6
.. note that at 6, it does not include the \n at pos 6
. this is needed to leave the \n for the parser to handle other tkns, such as hdrs, tblws, lists.
. for example, in "\n==a==\n*b", if the \n at pos 6 was taken by the hdr_tkn, then the parser would encounter a "*" instead of a "\n*"
*/

View File

@@ -0,0 +1,127 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_hdr_wkr_basic_tst {
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void H2() {fxt.Test_parse_page_wiki_str("==a==" , "<h2>a</h2>\n");}
@Test public void H3() {fxt.Test_parse_page_wiki_str("===a===" , "<h3>a</h3>\n");}
@Test public void H6_limit() {fxt.Test_parse_page_wiki_str("=======a=======" , "<h6>=a=</h6>\n");}
@Test public void Mismatch_bgn() {fxt.Test_parse_page_wiki_str("=====a==" , "<h2>===a</h2>\n");}
@Test public void Mismatch_end() {fxt.Test_parse_page_wiki_str("==a=====" , "<h2>a===</h2>\n");}
@Test public void Dangling() {fxt.Test_parse_page_wiki_str("==a" , "==a");}
@Test public void Comment_bgn() {fxt.Test_parse_page_all_str ("<!--b-->==a==" , "<h2>a</h2>\n");}
@Test public void Comment_end() {fxt.Test_parse_page_all_str ("==a==<!--b-->" , "<h2>a</h2>\n");}
@Test public void Ws_end() { // PURPOSE: "==\n" merges all ws following it; \n\n\n is not transformed by Para_wkr to "<br/>"
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "==a== \t"
, ""
, ""
, ""
, "b"
), String_.Concat_lines_nl_skip_last
( "<h2>a</h2>"
, "b"
));
}
@Test public void Many() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "==a=="
, "===b==="
), String_.Concat_lines_nl_skip_last
( "<h2>a</h2>"
, ""
, "<h3>b</h3>"
, ""
));
}
@Test public void Hdr_w_tblw() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "==a=="
, "{|"
, "|+"
, "|}"
), String_.Concat_lines_nl_skip_last
( "<h2>a</h2>"
, "<table>"
, " <caption>"
, " </caption>"
, "</table>"
, ""
));
}
@Test public void Hdr_w_hr() {
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "==a=="
, "----"
), String_.Concat_lines_nl_skip_last
( "<h2>a</h2>"
, "<hr/>"
));
}
@Test public void Mix_apos_dangling() {fxt.Test_parse_page_wiki_str("==''a==" , "<h2><i>a</i></h2>\n");}
@Test public void Mix_xnde_dangling() {fxt.Test_parse_page_wiki_str("==<i>a==" , "<h2><i>a</i></h2>\n");}
@Test public void Mix_tblw_cell() {fxt.Test_parse_page_wiki_str("==a!!==" , "<h2>a!!</h2>\n");}
@Test public void Ws() {fxt.Test_parse_page_wiki_str("== a b ==" , "<h2> a b </h2>\n");}
@Test public void Err_hdr() {fxt.Init_log_(Xop_hdr_log.Mismatched) .Test_parse_page_wiki_str("====a== ==" , "<h2>==a== </h2>\n").tst_Log_check();}
@Test public void Err_end_hdr_is_1() {fxt.Init_log_(Xop_hdr_log.Mismatched, Xop_hdr_log.Len_1).Test_parse_page_wiki_str("==a=" , "<h1>=a</h1>\n").tst_Log_check();}
@Test public void Html_hdr_many() {
fxt.Wtr_cfg().Toc_show_(true);
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "==a=="
, "==a=="
, "==a=="
), String_.Concat_lines_nl_skip_last
( "<h2><span class='mw-headline' id='a'>a</span></h2>"
, ""
, "<h2><span class='mw-headline' id='a_2'>a</span></h2>"
, ""
, "<h2><span class='mw-headline' id='a_3'>a</span></h2>"
, ""
));
fxt.Wtr_cfg().Toc_show_(false);
}
@Test public void Hdr_inside_dangling_tmpl_fix() { // PURPOSE: one-off fix to handle == inside dangling tmpl; DATE:2014-02-11
fxt.Test_parse_page_all_str("{{a|}\n==b=="
, String_.Concat_lines_nl_skip_last
( "{{a|}"
, ""
, "<h2>b</h2>"
, ""
));
}
// @Test public void Pfunc() {// PAGE:en.w:Wikipedia:WikiProject_Articles_for_creation/Submissions/List DATE:2014-06-24
// fxt.Test_parse_page_all_str
// ( "{{#if:exists|==a==|no}}"
// , String_.Concat_lines_nl_skip_last
// ( "<h2>a</h2>"
// , ""
// ));
// }
// @Test public void Hdr_inside_dangling_tmpl_fix_2() { // PURPOSE: hdr == inside dangling tmpl; DATE:2014-06-10
// fxt.Init_defn_add("Print", "{{{1}}}");
// fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
// ( "=={{Print|b=="
// , "}}"
// ), String_.Concat_lines_nl_skip_last
// ( "==b="
// , ""
// ));
// }
}

View File

@@ -0,0 +1,26 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_hdr_wkr_para_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Hdr_at_bos() { // PURPOSE: check that BOS==a== does not throw null ref in para; DATE:2014-02-18
fxt.Test_parse_page_all_str("==a==", "<h2>a</h2>\n");
}
}