1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2016-01-03 21:27:38 -05:00
parent 9509363f46
commit 096045614c
647 changed files with 11693 additions and 7648 deletions

View File

@@ -23,6 +23,7 @@ public class Xop_xnde_tag {
this.name_bry = Bry_.new_a7(name_str);
this.name_str = name_str;
this.name_len = name_bry.length;
this.xtn_bgn_tag = Bry_.Add(Byte_ascii.Angle_bgn_bry, name_bry);
this.xtn_end_tag = Bry_.Add(Xop_xnde_tag_.Xtn_end_tag_bgn, name_bry); // always force endtag; needed for <noinclude>
this.xtn_end_tag_tmp = new byte[xtn_end_tag.length]; Array_.Copy(xtn_end_tag, xtn_end_tag_tmp);
}
@@ -30,6 +31,7 @@ public class Xop_xnde_tag {
public byte[] Name_bry() {return name_bry;} private final byte[] name_bry;
public String Name_str() {return name_str;} private final String name_str;
public int Name_len() {return name_len;} private final int name_len;
public byte[] Xtn_bgn_tag() {return xtn_bgn_tag;} private final byte[] xtn_bgn_tag;
public byte[] Xtn_end_tag() {return xtn_end_tag;} private final byte[] xtn_end_tag;
public byte[] Xtn_end_tag_tmp() {return xtn_end_tag_tmp;} private final byte[] xtn_end_tag_tmp;
public boolean Xtn() {return xtn;} public Xop_xnde_tag Xtn_() {xtn = true; return this;} private boolean xtn;

View File

@@ -219,10 +219,10 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
private static Xop_tkn_itm Make_bry_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int bgn_pos, int cur_pos) {
int len = cur_pos - bgn_pos;
byte[] bry = null;
if (len == 1 && src[bgn_pos] == Byte_ascii.Lt) bry = Html_entity_.Lt_bry;
if (len == 1 && src[bgn_pos] == Byte_ascii.Lt) bry = Gfh_entity_.Lt_bry;
else if (len == 2 && src[bgn_pos] == Byte_ascii.Lt
&& src[bgn_pos + 1] == Byte_ascii.Slash) bry = Bry_escape_lt_slash; // NOTE: should use bgn_pos, not cur_pos; DATE:2014-10-22
else bry = Bry_.Add(Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, cur_pos)); // +1 to skip <
else bry = Bry_.Add(Gfh_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, cur_pos)); // +1 to skip <
return tkn_mkr.Bry_raw(bgn_pos, cur_pos, bry);
}
private int Make_noinclude(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int gtPos, Xop_xnde_tag tag, int tag_end_pos, boolean tag_is_closing) {
@@ -313,7 +313,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
if ( page.Html_data().Html_restricted()
&& page.Wiki().Domain_tid() != Xow_domain_tid_.Int__home) {
int end_pos = gtPos + 1;
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, end_pos, Bry_.Add(gplx.langs.htmls.Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, end_pos)))); // +1 to skip <
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, end_pos, Bry_.Add(gplx.langs.htmls.Gfh_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, end_pos)))); // +1 to skip <
return end_pos;
}
}
@@ -473,7 +473,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
}
if (end_tag.Restricted()) // restricted tags (like <script>) are not placed on stack; for now, just write it out
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, cur_pos, Bry_.Add(gplx.langs.htmls.Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, cur_pos)))); // +1 to skip <
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, cur_pos, Bry_.Add(gplx.langs.htmls.Gfh_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, cur_pos)))); // +1 to skip <
else {
if (pre2_pending) {
pre2_pending = false;
@@ -538,13 +538,13 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
return found ? rv : Bry_find_.Not_found;
}
private int Find_xtn_end_lhs(Xop_ctx ctx, Xop_xnde_tag tag, byte[] src, int src_len, int open_bgn, int open_end, byte[] close_bry) {
private int Find_xtn_end_lhs(Xop_ctx ctx, Xop_xnde_tag tag, byte[] src, int src_len, int open_bgn, int open_end, byte[] open_bry, byte[] close_bry) {
int tag_bgn = open_bgn - Pfunc_tag.Xtag_len;
if (tag_bgn > -1
&& Bry_.Eq(src, tag_bgn, tag_bgn, Pfunc_tag.Xtag_bgn_lhs)) // xtn created by tag
return Xop_xnde_wkr_.Find_xtag_end(ctx, src, open_end, src_len);
else // search rest of String for case-insensitive name; NOTE: used to do CS first, then fall-back on CI; DATE:2013-12-02
return Xop_xnde_wkr_.Find_xtn_end(ctx, src, open_end, src_len, close_bry);
return Xop_xnde_wkr_.Find_xtn_end(ctx, src, open_end, src_len, open_bry, close_bry);
}
private int Make_xnde_xtn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, Xop_xnde_tag tag, int open_bgn, int open_end, int name_bgn, int name_end, int atrs_bgn, int atrs_end, Mwh_atr_itm[] atrs, boolean inline, boolean pre2_hack) {
// NOTE: find end_tag that exactly matches bgnTag; must be case sensitive;
@@ -555,12 +555,12 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
xnde.Tag_close_rng_(open_end, open_end); // NOTE: inline tag, so set TagClose to open_end; should noop
}
else {
byte[] close_bry = tag.Xtn_end_tag_tmp(); // get tmp bry (so as not to new)
byte[] close_bry = tag.Xtn_end_tag_tmp(); // get tmp bry (so as not to new)
if (tag.Langs() != null) { // cur tag has langs; EX:<section>; DATE:2014-07-18
Xop_xnde_tag_lang tag_lang = tag.Langs_get(ctx.Lang().Case_mgr(), ctx.Cur_page().Lang().Lang_id(), src, name_bgn, name_end);
if (tag_lang == null) // tag does not match lang; EX:<trecho> and lang=de;
return ctx.Lxr_make_txt_(open_end);
if (tag_lang != Xop_xnde_tag_lang.Instance) // tag matches; note Xop_xnde_tag_lang._ is a wildcard match; EX:<section>
if (tag_lang != Xop_xnde_tag_lang.Instance) // tag matches; note Xop_xnde_tag_lang._ is a wildcard match; EX:<section>
close_bry = tag_lang.Xtn_end_tag_tmp();
}
int src_offset = open_bgn - 1; // open bgn to start at <; -2 to ignore </ ; +1 to include <
@@ -568,7 +568,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
for (int i = 2; i < close_ary_len; i++) // 2 to ignore </
close_bry[i] = src[src_offset + i];
boolean auto_close = false;
int close_bgn = Find_xtn_end_lhs(ctx, tag, src, src_len, open_bgn, open_end, close_bry);
int close_bgn = Find_xtn_end_lhs(ctx, tag, src, src_len, open_bgn, open_end, tag.Xtn_bgn_tag(), close_bry);
if (close_bgn == Bry_find_.Not_found) auto_close = true; // auto-close if end not found; verified with <poem>, <gallery>, <imagemap>, <hiero>, <references> DATE:2014-08-23
int close_end = -1;
if (auto_close) {
@@ -608,8 +608,8 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
case Xop_xnde_tag_.Tid_xowa_cmd: xnde_xtn = tkn_mkr.Xnde_xowa_cmd(); break;
case Xop_xnde_tag_.Tid_math: xnde_xtn = tkn_mkr.Xnde_math(); break;
case Xop_xnde_tag_.Tid_poem: xnde_xtn = tkn_mkr.Xnde_poem(); break;
case Xop_xnde_tag_.Tid_ref: xnde_xtn = gplx.xowa.xtns.cite.References_nde.Enabled ? tkn_mkr.Xnde_ref() : null; break;
case Xop_xnde_tag_.Tid_references: xnde_xtn = gplx.xowa.xtns.cite.References_nde.Enabled ? tkn_mkr.Xnde_references() : null; break;
case Xop_xnde_tag_.Tid_ref: xnde_xtn = gplx.xowa.xtns.cites.References_nde.Enabled ? tkn_mkr.Xnde_ref() : null; break;
case Xop_xnde_tag_.Tid_references: xnde_xtn = gplx.xowa.xtns.cites.References_nde.Enabled ? tkn_mkr.Xnde_references() : null; break;
case Xop_xnde_tag_.Tid_gallery: xnde_xtn = tkn_mkr.Xnde_gallery(); break;
case Xop_xnde_tag_.Tid_imageMap: xnde_xtn = tkn_mkr.Xnde_imageMap(); break;
case Xop_xnde_tag_.Tid_hiero: xnde_xtn = tkn_mkr.Xnde_hiero(); break;

View File

@@ -20,8 +20,8 @@ import gplx.core.primitives.*; import gplx.core.btries.*;
import gplx.xowa.xtns.pfuncs.strings.*;
class Xop_xnde_wkr_ {
private static final Btrie_slim_mgr xtn_end_tag_trie = Btrie_slim_mgr.ci_a7(); // NOTE:ci.ascii:MW_const.en; listed XML node names are en
private static final int Find_xtn_end__tid__end = 0, Find_xtn_end__tid__xtag = 1;
private static final Int_obj_ref Find_xtn_end__key__end = Int_obj_ref.new_(Find_xtn_end__tid__end), Find_xtn_end__key__xtag = Int_obj_ref.new_(Find_xtn_end__tid__xtag);
private static final int Find_xtn_end__tid__bgn = 0, Find_xtn_end__tid__end = 1, Find_xtn_end__tid__xtag = 2;
private static final Int_obj_ref Find_xtn_end__key__bgn = Int_obj_ref.new_(Find_xtn_end__tid__bgn), Find_xtn_end__key__end = Int_obj_ref.new_(Find_xtn_end__tid__end), Find_xtn_end__key__xtag = Int_obj_ref.new_(Find_xtn_end__tid__xtag);
private static final Bry_bfr Find_xtag_end__bfr = Bry_bfr.new_(32);
public static void AutoClose_handle_dangling_nde_in_caption(Xop_root_tkn root, Xop_tkn_itm owner) {
int subs_bgn = -1, subs_len = owner.Subs_len();
@@ -35,20 +35,34 @@ class Xop_xnde_wkr_ {
if (subs_bgn != -1)
root.Subs_move(owner, subs_bgn, subs_len); // move everything after "|" back to root
}
public static int Find_xtn_end(Xop_ctx ctx, byte[] src, int open_end, int src_end, byte[] close_bry) {
public static int Find_xtn_end(Xop_ctx ctx, byte[] src, int open_end, int src_end, byte[] open_bry, byte[] close_bry) {
xtn_end_tag_trie.Clear();
xtn_end_tag_trie.Add_obj(Pfunc_tag.Xtag_bgn_lhs, Find_xtn_end__key__xtag);
xtn_end_tag_trie.Add_obj(open_bry, Find_xtn_end__key__bgn);
xtn_end_tag_trie.Add_obj(close_bry, Find_xtn_end__key__end);
int depth = 0;
for (int i = open_end; i < src_end; ++i) {
Object o = xtn_end_tag_trie.Match_bgn(src, i, src_end);
if (o != null) {
int tid = ((Int_obj_ref)o).Val();
if (tid == Find_xtn_end__tid__end) // xtn_end found; use it
return i;
else { // xtag found; skip over it; PAGE:it.s:La_Secchia_rapita/Canto_primo DATE:2015-12-03
int xtag_end = Find_xtag_end(ctx, src, i, src_end);
int angle_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, xtag_end, src_end);
i = angle_end;
switch (tid) {
case Find_xtn_end__tid__bgn: // handle nested refs; PAGE:en.w:UK; DATE:2015-12-26
int angle_end_pos = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, i, src_end); if (angle_end_pos == Bry_find_.Not_found) {Xoa_app_.Usr_dlg().Warn_many("", "", "parser.xtn: could not find angle_end: page=~{0}", ctx.Cur_page().Url().To_str()); return Bry_find_.Not_found;}
if (src[angle_end_pos -1] == Byte_ascii.Slash) {}
else
++depth;
break;
case Find_xtn_end__tid__end: // xtn_end found; use it
if (depth == 0)
return i;
else
--depth;
break;
case Find_xtn_end__tid__xtag: // xtag found; skip over it; PAGE:it.s:La_Secchia_rapita/Canto_primo DATE:2015-12-03
int xtag_end = Find_xtag_end(ctx, src, i, src_end);
int angle_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, xtag_end, src_end);
i = angle_end;
break;
}
}
}

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*; import gplx.xowa.wikis.nss.*;
public class Xop_xnde_wkr__basic_tst {
private Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Escape_lt() { // PURPOSE: some templates have unknown tags; PAGE:en.w:PHP
fxt.Init_para_y_();

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_xnde_wkr__blockquote_tst {
private Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Pre() { // PURPOSE: preserve leading spaces within blockquote; PAGE:en.w:Tenerife_airport_disaster
fxt.Init_para_y_();

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*; import gplx.xowa.parsers.lists.*;
public class Xop_xnde_wkr__err_dangling_tst {
private Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Basic() {
fxt.Init_log_(Xop_xnde_log.Dangling_xnde)

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_xnde_wkr__err_malformed_tst {
private Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Lt_only() {
fxt.Test_parse_page_wiki("<", fxt.tkn_txt_(0, 1));

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_xnde_wkr__err_misc_tst {
private Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Error_br_removed() {
fxt.Init_para_y_();

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_xnde_wkr__include_basic_tst {
private Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@Before public void init() {fxt.Reset();}
@Test public void Tmpl_includeonly() {fxt.Test_parse_tmpl_str_test("a<includeonly>b</includeonly>c" , "{{test}}", "abc");}
@Test public void Tmpl_noinclude() {fxt.Test_parse_tmpl_str_test("a<noinclude>b</noinclude>c" , "{{test}}", "ac");}

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_xnde_wkr__include_uncommon_tst {
private Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@Before public void init() {fxt.Reset();}
@Test public void Ex_Tmpl_io_oi() { // PURPOSE: <includeonly> not parsing internals; PAGE:en.w:[[Template:MONTHNAME]]
fxt.Test_parse_tmpl_str_test("<includeonly>{{#if:{{{1}}}|a|b}}</includeonly><noinclude>c</noinclude>", "{{test|1}}", "a");

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_xnde_wkr__li_tst {
private Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Inside_tblx() { // PURPOSE: auto-close <li> (EX: "<li>a<li>") was causing 3rd <li> to close incorrectly
fxt.Test_parse_page_wiki_str

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_xnde_wkr__nowiki_tst {
private Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Basic() {
fxt.Test_parse_page_wiki_str

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_xnde_wkr__tblx_tst {
private Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Table() {
fxt.Test_parse_page_wiki("a<table><tr><td>b</td></tr></table>c"
@@ -61,7 +61,7 @@ public class Xop_xnde_wkr__tblx_tst {
( "<table>"
, " <tr>"
, " <td>"
, "<a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\"B\" src=\"file:///mem/wiki/repo/trg/thumb/7/0/A.png/150px.png\" width=\"150\" height=\"0\" /></a>"
, "<a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xoimg_0\" alt=\"B\" src=\"file:///mem/wiki/repo/trg/thumb/7/0/A.png/150px.png\" width=\"150\" height=\"0\" /></a>"
, " </td>"
, " </tr>"
, "</table>"

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_xnde_wkr__text_block_tst {
private Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Source_wikitext() { // PURPOSE.ASSERT: wikitext should be rendered literally; DATE:2014-03-11
fxt.Test_parse_page_wiki_str("<source>''a''</source>", "<div class=\"mw-highlight\"><pre style=\"overflow:auto\">''a''</pre></div>");

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_xnde_wkr__tidy_tst {
private Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Sub_sup_autocorrect() {
fxt.Test_parse_page_wiki_str("<sub>a</sup>b", "<sub>a</sub>b");