mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v1.6.5.1
This commit is contained in:
26
400_xowa/src_450_hdr/gplx/xowa/Xop_hdr_lxr.java
Normal file
26
400_xowa/src_450_hdr/gplx/xowa/Xop_hdr_lxr.java
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
class Xop_hdr_lxr implements Xop_lxr {
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_hdr;}
|
||||
public void Init_by_wiki(Xow_wiki wiki, ByteTrieMgr_fast core_trie) {core_trie.Add(Hook_bgn, this);} static final byte[] Hook_bgn = new byte[] {Byte_ascii.NewLine, Byte_ascii.Eq};
|
||||
public void Init_by_lang(Xol_lang lang, ByteTrieMgr_fast core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Hdr().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
|
||||
public static final Xop_hdr_lxr _ = new Xop_hdr_lxr(); Xop_hdr_lxr() {}
|
||||
public static final byte Hook = Byte_ascii.Eq;
|
||||
}
|
||||
33
400_xowa/src_450_hdr/gplx/xowa/Xop_hdr_tkn.java
Normal file
33
400_xowa/src_450_hdr/gplx/xowa/Xop_hdr_tkn.java
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
public class Xop_hdr_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_hdr_tkn(int bgn, int end, int hdr_len) {this.Tkn_ini_pos(false, bgn, end); this.hdr_len = hdr_len;}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_hdr;}
|
||||
public int Hdr_len() {return hdr_len;} public Xop_hdr_tkn Hdr_len_(int v) {hdr_len = v; return this;} private int hdr_len = -1;
|
||||
public int Hdr_bgn_manual() {return hdr_bgn_manual;} public Xop_hdr_tkn Hdr_bgn_manual_(int v) {hdr_bgn_manual = v; return this;} private int hdr_bgn_manual;
|
||||
public int Hdr_end_manual() {return hdr_end_manual;} public Xop_hdr_tkn Hdr_end_manual_(int v) {hdr_end_manual = v; return this;} private int hdr_end_manual;
|
||||
public boolean Hdr_html_first() {return hdr_html_first;} public Xop_hdr_tkn Hdr_html_first_y_() {hdr_html_first = true; return this;} private boolean hdr_html_first;
|
||||
public int Hdr_html_dupe_idx() {return hdr_html_dupe_idx;} private int hdr_html_dupe_idx;
|
||||
public byte[] Hdr_toc_text() {return hdr_toc_text;} public Xop_hdr_tkn Hdr_toc_text_(byte[] v) {hdr_toc_text = v; return this;} private byte[] hdr_toc_text;
|
||||
public int Hdr_html_dupe_idx_next() {
|
||||
hdr_html_dupe_idx = hdr_html_dupe_idx == 0 ? 2 : hdr_html_dupe_idx + 1;
|
||||
return hdr_html_dupe_idx;
|
||||
}
|
||||
public byte[] Hdr_html_id() {return hdr_html_id;} public Xop_hdr_tkn Hdr_html_id_(byte[] v) {hdr_html_id = v; return this;} private byte[] hdr_html_id = Bry_.Empty;
|
||||
}
|
||||
123
400_xowa/src_450_hdr/gplx/xowa/Xop_hdr_wkr.java
Normal file
123
400_xowa/src_450_hdr/gplx/xowa/Xop_hdr_wkr.java
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
public class Xop_hdr_wkr implements Xop_ctx_wkr {
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
|
||||
public void AutoClose(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {
|
||||
// bgn never closed; mark inert; EX: "==a"
|
||||
Xop_hdr_tkn bgn = (Xop_hdr_tkn)tkn;
|
||||
int bgn_hdr_len = bgn.Hdr_len();
|
||||
bgn.Hdr_bgn_manual_(bgn_hdr_len);
|
||||
bgn.Hdr_len_(0);
|
||||
if (bgn_hdr_len > 1 && ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki) // NOTE: \n= is not uncommon for templates; ignore them;
|
||||
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Dangling_hdr, src, bgn.Src_bgn(), bgn_pos);
|
||||
}
|
||||
public int Make_tkn_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_bos) bgn_pos = 0; // do not allow -1 pos
|
||||
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false);
|
||||
Close_open_itms(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);
|
||||
ctx.Para().Process_block__bgn__nl_w_symbol(ctx, root, src, bgn_pos, cur_pos, Xop_xnde_tag_.Tag_h2); // pass h2; should pass h# where # is correct #, but for purpose of Para_wkr, <h2> tag does not matter
|
||||
int new_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Xop_hdr_lxr.Hook); // count all =
|
||||
int hdr_len = new_pos - cur_pos + 1; // +1 b/c Hook has 1 eq: "\n="
|
||||
switch (hdr_len) {
|
||||
case 1: ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_1, src, bgn_pos, new_pos); break; // <h1>; flag
|
||||
case 2: case 3: case 4: case 5: case 6: break; // <h2>-<h6>: normal
|
||||
default: ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_7_or_more, src, bgn_pos, new_pos); break; // <h7>+; limit to 6; flag; NOTE: only 14 pages in 2011-07-27
|
||||
}
|
||||
|
||||
Xop_hdr_tkn tkn = tkn_mkr.Hdr(bgn_pos, new_pos, hdr_len); // make tkn
|
||||
ctx.StackTkn_add(root, tkn);
|
||||
return new_pos;
|
||||
}
|
||||
public int Make_tkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int stackPos, int end_hdr_len) {// REF.MW: Parser|doHeadings
|
||||
if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_tmpl_curly_bgn) return ctx.Lxr_make_txt_(cur_pos);
|
||||
Xop_hdr_tkn hdr = (Xop_hdr_tkn)ctx.Stack_pop_til(root, src, stackPos, false, bgn_pos, cur_pos);
|
||||
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false); // end any apos; EX: ==''a==
|
||||
int hdr_len = hdr.Hdr_len(), bgn_manual = 0, end_manual = 0;
|
||||
boolean dirty = false;
|
||||
if (end_hdr_len < hdr_len) { // mismatch: end has more; adjust hdr
|
||||
bgn_manual = hdr_len - end_hdr_len;
|
||||
hdr_len = end_hdr_len;
|
||||
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Mismatched, src, bgn_pos, cur_pos);
|
||||
if (hdr_len == 1) ctx.Msg_log().Add_itm_none(Xop_hdr_log.Len_1, src, bgn_pos, cur_pos);
|
||||
dirty = true;
|
||||
}
|
||||
else if (end_hdr_len > hdr_len) { // mismatch: hdr has more; adjust variables
|
||||
end_manual = end_hdr_len - hdr_len;
|
||||
ctx.Msg_log().Add_itm_none(Xop_hdr_log.Mismatched, src, bgn_pos, cur_pos);
|
||||
dirty = true;
|
||||
}
|
||||
if (hdr_len > 6) { // <h7>+; limit to 6; NOTE: both bgn/end are equal length; EX: bgn=8,end=7 -> bgn=7,end=7;bgn_manual=1
|
||||
bgn_manual = end_manual = hdr_len - 6;
|
||||
hdr_len = 6;
|
||||
dirty = true;
|
||||
}
|
||||
if (dirty)
|
||||
hdr.Hdr_bgn_manual_(bgn_manual).Hdr_end_manual_(end_manual).Hdr_len_(hdr_len);
|
||||
cur_pos = Find_fwd_while_ws_hdr_version(src, cur_pos, src_len); // NOTE: hdr gobbles up trailing ws; EX: "==a== \n\t \n \nb" gobbles up all 3 "\n"s; otherwise para_wkr will process <br/>
|
||||
ctx.Para().Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_h2);
|
||||
hdr.Subs_move(root);
|
||||
hdr.Src_end_(cur_pos);
|
||||
if (ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki)
|
||||
ctx.Cur_page().Hdr_mgr().Add(ctx, hdr, src);
|
||||
return cur_pos;
|
||||
}
|
||||
private void Close_open_itms(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
int stack_pos = -1, stack_len = ctx.Stack_len(); boolean stop = false;
|
||||
for (int i = 0; i < stack_len; i++) { // loop over stack
|
||||
Xop_tkn_itm prv_tkn = ctx.Stack_get(i);
|
||||
switch (prv_tkn.Tkn_tid()) { // find first list/hdr; close everything until this
|
||||
case Xop_tkn_itm_.Tid_list:
|
||||
case Xop_tkn_itm_.Tid_hdr:
|
||||
stack_pos = i; stop = true; break;
|
||||
}
|
||||
if (stop) break;
|
||||
}
|
||||
if (stack_pos == -1) return;
|
||||
ctx.Stack_pop_til(root, src, stack_pos, true, bgn_pos, cur_pos);
|
||||
}
|
||||
private static int Find_fwd_while_ws_hdr_version(byte[] src, int cur, int end) {
|
||||
int last_nl = -1;
|
||||
while (true) {
|
||||
if (cur == end) return cur;
|
||||
byte b = src[cur];
|
||||
switch (b) {
|
||||
case Byte_ascii.NewLine:
|
||||
cur++;
|
||||
last_nl = cur;
|
||||
break;
|
||||
case Byte_ascii.Space:
|
||||
case Byte_ascii.Tab:
|
||||
cur++;
|
||||
break;
|
||||
default:
|
||||
return last_nl == -1 ? cur : last_nl - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
NOTE:hdr.trailing_nl
|
||||
. by design, the hdr_tkn's src_end will not include the trailing \n
|
||||
.. for example, for "\n==a==\n", the src_bgn will be 0, but the src_end will be 6
|
||||
.. note that at 6, it does not include the \n at pos 6
|
||||
. this is needed to leave the \n for the parser to handle other tkns, such as hdrs, tblws, lists.
|
||||
. for example, in "\n==a==\n*b", if the \n at pos 6 was taken by the hdr_tkn, then the parser would encounter a "*" instead of a "\n*"
|
||||
*/
|
||||
127
400_xowa/src_450_hdr/gplx/xowa/Xop_hdr_wkr_basic_tst.java
Normal file
127
400_xowa/src_450_hdr/gplx/xowa/Xop_hdr_wkr_basic_tst.java
Normal file
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_hdr_wkr_basic_tst {
|
||||
@Before public void init() {fxt.Reset();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void H2() {fxt.Test_parse_page_wiki_str("==a==" , "<h2>a</h2>\n");}
|
||||
@Test public void H3() {fxt.Test_parse_page_wiki_str("===a===" , "<h3>a</h3>\n");}
|
||||
@Test public void H6_limit() {fxt.Test_parse_page_wiki_str("=======a=======" , "<h6>=a=</h6>\n");}
|
||||
@Test public void Mismatch_bgn() {fxt.Test_parse_page_wiki_str("=====a==" , "<h2>===a</h2>\n");}
|
||||
@Test public void Mismatch_end() {fxt.Test_parse_page_wiki_str("==a=====" , "<h2>a===</h2>\n");}
|
||||
@Test public void Dangling() {fxt.Test_parse_page_wiki_str("==a" , "==a");}
|
||||
@Test public void Comment_bgn() {fxt.Test_parse_page_all_str ("<!--b-->==a==" , "<h2>a</h2>\n");}
|
||||
@Test public void Comment_end() {fxt.Test_parse_page_all_str ("==a==<!--b-->" , "<h2>a</h2>\n");}
|
||||
@Test public void Ws_end() { // PURPOSE: "==\n" merges all ws following it; \n\n\n is not transformed by Para_wkr to "<br/>"
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a== \t"
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, "b"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, "b"
|
||||
));
|
||||
}
|
||||
@Test public void Many() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a=="
|
||||
, "===b==="
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, ""
|
||||
, "<h3>b</h3>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Hdr_w_tblw() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a=="
|
||||
, "{|"
|
||||
, "|+"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, "<table>"
|
||||
, " <caption>"
|
||||
, " </caption>"
|
||||
, "</table>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Hdr_w_hr() {
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a=="
|
||||
, "----"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>a</h2>"
|
||||
, "<hr/>"
|
||||
));
|
||||
}
|
||||
@Test public void Mix_apos_dangling() {fxt.Test_parse_page_wiki_str("==''a==" , "<h2><i>a</i></h2>\n");}
|
||||
@Test public void Mix_xnde_dangling() {fxt.Test_parse_page_wiki_str("==<i>a==" , "<h2><i>a</i></h2>\n");}
|
||||
@Test public void Mix_tblw_cell() {fxt.Test_parse_page_wiki_str("==a!!==" , "<h2>a!!</h2>\n");}
|
||||
@Test public void Ws() {fxt.Test_parse_page_wiki_str("== a b ==" , "<h2> a b </h2>\n");}
|
||||
@Test public void Err_hdr() {fxt.Init_log_(Xop_hdr_log.Mismatched) .Test_parse_page_wiki_str("====a== ==" , "<h2>==a== </h2>\n").tst_Log_check();}
|
||||
@Test public void Err_end_hdr_is_1() {fxt.Init_log_(Xop_hdr_log.Mismatched, Xop_hdr_log.Len_1).Test_parse_page_wiki_str("==a=" , "<h1>=a</h1>\n").tst_Log_check();}
|
||||
@Test public void Html_hdr_many() {
|
||||
fxt.Wtr_cfg().Toc_show_(true);
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "==a=="
|
||||
, "==a=="
|
||||
, "==a=="
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2><span class='mw-headline' id='a'>a</span></h2>"
|
||||
, ""
|
||||
, "<h2><span class='mw-headline' id='a_2'>a</span></h2>"
|
||||
, ""
|
||||
, "<h2><span class='mw-headline' id='a_3'>a</span></h2>"
|
||||
, ""
|
||||
));
|
||||
fxt.Wtr_cfg().Toc_show_(false);
|
||||
}
|
||||
@Test public void Hdr_inside_dangling_tmpl_fix() { // PURPOSE: one-off fix to handle == inside dangling tmpl; DATE:2014-02-11
|
||||
fxt.Test_parse_page_all_str("{{a|}\n==b=="
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "{{a|}"
|
||||
, ""
|
||||
, "<h2>b</h2>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
// @Test public void Pfunc() {// PAGE:en.w:Wikipedia:WikiProject_Articles_for_creation/Submissions/List DATE:2014-06-24
|
||||
// fxt.Test_parse_page_all_str
|
||||
// ( "{{#if:exists|==a==|no}}"
|
||||
// , String_.Concat_lines_nl_skip_last
|
||||
// ( "<h2>a</h2>"
|
||||
// , ""
|
||||
// ));
|
||||
// }
|
||||
// @Test public void Hdr_inside_dangling_tmpl_fix_2() { // PURPOSE: hdr == inside dangling tmpl; DATE:2014-06-10
|
||||
// fxt.Init_defn_add("Print", "{{{1}}}");
|
||||
// fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
// ( "=={{Print|b=="
|
||||
// , "}}"
|
||||
// ), String_.Concat_lines_nl_skip_last
|
||||
// ( "==b="
|
||||
// , ""
|
||||
// ));
|
||||
// }
|
||||
}
|
||||
26
400_xowa/src_450_hdr/gplx/xowa/Xop_hdr_wkr_para_tst.java
Normal file
26
400_xowa/src_450_hdr/gplx/xowa/Xop_hdr_wkr_para_tst.java
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_hdr_wkr_para_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Hdr_at_bos() { // PURPOSE: check that BOS==a== does not throw null ref in para; DATE:2014-02-18
|
||||
fxt.Test_parse_page_all_str("==a==", "<h2>a</h2>\n");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user