1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-09-20 23:43:51 -04:00
parent 5fe27b5b3b
commit fa70c05354
1056 changed files with 8375 additions and 7095 deletions

View File

@@ -34,7 +34,7 @@ public class Xop_xatr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_AT
++pos;
b = src[pos];
}
int gt_pos = Bry_finder.Find_fwd(src, Byte_ascii.Gt, pos, end); if (gt_pos == Bry_.NotFound) return String_.Find_none;
int gt_pos = Bry_find_.Find_fwd(src, Byte_ascii.Gt, pos, end); if (gt_pos == Bry_.NotFound) return String_.Find_none;
byte[] bry = (byte[])xnde_hash.Get_by_mid(src, pos, gt_pos);
bry_ref.Val_(bry);
return bry == null ? String_.Find_none : bry.length + pos;
@@ -76,12 +76,12 @@ public class Xop_xatr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_AT
while (true) {
if (i == end) {
if (mode == Mode_val_quote) { // quote still open
int reset_pos = Bry_finder.Find_fwd(src, Byte_ascii.Space, atr_bgn, end); // try to find 1st space within quote; EX:"a='b c=d" should try to reset at c=d
boolean reset_found = reset_pos != Bry_finder.Not_found;
int reset_pos = Bry_find_.Find_fwd(src, Byte_ascii.Space, atr_bgn, end); // try to find 1st space within quote; EX:"a='b c=d" should try to reset at c=d
boolean reset_found = reset_pos != Bry_find_.Not_found;
valid = false; val_end = reset_found ? reset_pos : end;
Make(log_mgr, src, val_end); // create invalid atr
if (reset_found) { // space found; resume from text after space; EX: "a='b c=d"; PAGE:en.w:Aubervilliers DATE:2014-06-25
i = Bry_finder.Find_fwd_while_not_ws(src, reset_pos, end); // skip ws
i = Bry_find_.Find_fwd_while_not_ws(src, reset_pos, end); // skip ws
atr_bgn = -1;
mode = Mode_atr_bgn;
val_bfr.Clear();

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
import org.junit.*; import gplx.core.tests.*;
public class Xop_xatr_parser_tst {
@Test public void Kv_quote_double() {fxt.tst_("a=\"b\"", fxt.new_atr_("a", "b"));} private Xop_xatr_parser_fxt fxt = new Xop_xatr_parser_fxt();
@Test public void Kv_quote_single() {fxt.tst_("a='b'", fxt.new_atr_("a", "b"));}

View File

@@ -16,11 +16,12 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
import gplx.core.btries.*; import gplx.xowa.langs.*;
public class Xop_xnde_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_xnde;}
public int Lxr_tid() {return Xop_lxr_.Tid_xnde;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Lt, this);}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public void Term(Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Xnde().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
public static final Xop_xnde_lxr _ = new Xop_xnde_lxr(); Xop_xnde_lxr() {}
}

View File

@@ -69,7 +69,7 @@ public class Xop_xnde_tag {
}
public Xop_xnde_tag_lang Langs_get(gplx.xowa.langs.cases.Xol_case_mgr case_mgr, int cur_lang, byte[] src, int bgn, int end) {
if (langs == null) return Xop_xnde_tag_lang._; // no langs defined; always return true; EX:<b>
if (Bry_.Eq(name_bry, src, bgn, end)) return Xop_xnde_tag_lang._; // canonical name (name_bry) is valid in all langs; EX: <section> and cur_lang=de
if (Bry_.Eq(src, bgn, end, name_bry)) return Xop_xnde_tag_lang._; // canonical name (name_bry) is valid in all langs; EX: <section> and cur_lang=de
synchronized (langs) {
langs_key.Val_(cur_lang);
}

View File

@@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.tests.*;
public class Xop_xnde_tkn_chkr extends Xop_tkn_chkr_base {
@Override public Class<?> TypeOf() {return Xop_xnde_tkn.class;}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_xnde;}

View File

@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.xowa.apps.progs.*;
import gplx.xowa.wikis.domains.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.strings.*; import gplx.html.*;
import gplx.xowa.wikis.domains.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.strings.*; import gplx.langs.htmls.*;
import gplx.xowa.parsers.logs.*; import gplx.xowa.parsers.tblws.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.miscs.*;
public class Xop_xnde_wkr implements Xop_ctx_wkr {
public void Ctor_ctx(Xop_ctx ctx) {}
@@ -226,7 +226,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
return tkn_mkr.Bry_raw(bgn_pos, cur_pos, bry);
}
private int Make_noinclude(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int gtPos, Xop_xnde_tag tag, int tag_end_pos, boolean tag_is_closing) {
tag_end_pos = Bry_finder.Find_fwd_while(src, tag_end_pos, src_len, Byte_ascii.Space);// NOTE: must skip spaces else "<noinclude />" will not work with safesubst; PAGE:en.w:Wikipedia:Featured_picture_candidates; DATE:2014-06-24
tag_end_pos = Bry_find_.Find_fwd_while(src, tag_end_pos, src_len, Byte_ascii.Space);// NOTE: must skip spaces else "<noinclude />" will not work with safesubst; PAGE:en.w:Wikipedia:Featured_picture_candidates; DATE:2014-06-24
byte tag_end_byte = src[tag_end_pos];
if (tag_end_byte == Byte_ascii.Slash) { // inline
boolean valid = true;
@@ -251,7 +251,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
end_rhs = gtPos;
else { // <noinclude>; search for end tag
while (true) {
int end_lhs = Bry_finder.Find_fwd(src, end_bry, findPos);
int end_lhs = Bry_find_.Find_fwd(src, end_bry, findPos);
if (end_lhs == -1 || (end_lhs + end_bry_len) == src_len) break; // nothing found or EOS;
findPos = end_lhs;
for (int i = end_lhs + end_bry_len; i < src_len; i++) {
@@ -313,7 +313,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
if ( page.Html_data().Html_restricted()
&& page.Wiki().Domain_tid() != Xow_domain_type_.Int__home) {
int end_pos = gtPos + 1;
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, end_pos, Bry_.Add(gplx.html.Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, end_pos)))); // +1 to skip <
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, end_pos, Bry_.Add(gplx.langs.htmls.Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, end_pos)))); // +1 to skip <
return end_pos;
}
}
@@ -409,7 +409,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
private int Make_xtag_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_xnde_tag end_tag) {
int end_tag_id = end_tag.Id();
cur_pos = Bry_finder.Find_fwd_while_not_ws(src, cur_pos, src_len) + 1;
cur_pos = Bry_find_.Find_fwd_while_not_ws(src, cur_pos, src_len) + 1;
int prv_xnde_pos = ctx.Stack_idx_find_but_stop_at_tbl(Xop_tkn_itm_.Tid_xnde); // find any previous xnde on stack
Xop_xnde_tkn bgn_nde = (Xop_xnde_tkn)ctx.Stack_get(prv_xnde_pos);
int bgn_tag_id = bgn_nde == null ? -1 : bgn_nde.Tag().Id();
@@ -473,7 +473,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
}
if (end_tag.Restricted()) // restricted tags (like <script>) are not placed on stack; for now, just write it out
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, cur_pos, Bry_.Add(gplx.html.Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, cur_pos)))); // +1 to skip <
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, cur_pos, Bry_.Add(gplx.langs.htmls.Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, cur_pos)))); // +1 to skip <
else {
if (pre2_pending) {
pre2_pending = false;
@@ -541,7 +541,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
private int Find_xtn_end_lhs(Xop_ctx ctx, Xop_xnde_tag tag, byte[] src, int src_len, int open_bgn, int open_end, byte[] close_bry) {
int tag_bgn = open_bgn - Pfunc_tag.Xtag_len;
if (tag_bgn > -1
&& Bry_.Eq(Pfunc_tag.Xtag_bgn_lhs, src, tag_bgn, tag_bgn + Pfunc_tag.Xtag_bgn_lhs.length)) // xtn created by tag
&& Bry_.Eq(src, tag_bgn, tag_bgn + Pfunc_tag.Xtag_bgn_lhs.length, Pfunc_tag.Xtag_bgn_lhs)) // xtn created by tag
return Find_xtn_end_tag(ctx, src, src_len, open_end, close_bry, tag_bgn + Pfunc_tag.Xtag_bgn);
else { // search rest of String for case-insensitive name; NOTE: used to do CS first, then fall-back on CI; DATE:2013-12-02
xtn_end_tag_trie.Clear();
@@ -557,14 +557,14 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
private int Find_xtn_end_tag(Xop_ctx ctx, byte[] src, int src_len, int open_end, byte[] close_bry, int tag_bgn) {
int tag_id = Bry_.To_int_or(src, tag_bgn, tag_bgn + 10, -1);
if (tag_id == -1) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not extract int: page=~{0}", ctx.Cur_page().Url().To_str()); return Bry_finder.Not_found;}
if (tag_id == -1) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not extract int: page=~{0}", ctx.Cur_page().Url().To_str()); return Bry_find_.Not_found;}
Bry_bfr tmp = ctx.Wiki().Utl__bfr_mkr().Get_b128();
tmp.Add(Pfunc_tag.Xtag_end_lhs).Add_int_pad_bgn(Byte_ascii.Num_0, 10, tag_id).Add(Pfunc_tag.Xtag_rhs);
byte[] tag_end = tmp.To_bry_and_rls();
int rv = Bry_finder.Find_fwd(src, tag_end, open_end + Pfunc_tag.Xtag_rhs.length);
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find end: page=~{0}", ctx.Cur_page().Url().To_str()); return Bry_finder.Not_found;}
rv = Bry_finder.Find_bwd(src, Byte_ascii.Lt, rv - 1);
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find <: page=~{0}", ctx.Cur_page().Url().To_str()); return Bry_finder.Not_found;}
int rv = Bry_find_.Find_fwd(src, tag_end, open_end + Pfunc_tag.Xtag_rhs.length);
if (rv == Bry_find_.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find end: page=~{0}", ctx.Cur_page().Url().To_str()); return Bry_find_.Not_found;}
rv = Bry_find_.Find_bwd(src, Byte_ascii.Lt, rv - 1);
if (rv == Bry_find_.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find <: page=~{0}", ctx.Cur_page().Url().To_str()); return Bry_find_.Not_found;}
return rv;
}
private int Make_xnde_xtn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, Xop_xnde_tag tag, int open_bgn, int open_end, int name_bgn, int name_end, int atrs_bgn, int atrs_end, Xop_xatr_itm[] atrs, boolean inline, boolean pre2_hack) {
@@ -676,7 +676,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
case Xop_xnde_tag_.Tid_source: // added on DATE:2014-06-24
case Xop_xnde_tag_.Tid_pre: // NOTE: pre must be an xtn, but does not create an xtn node (it gobbles up everything between); still need to touch the para_wkr; DATE:2014-02-20
ctx.Para().Process_block__xnde(tag, Xop_xnde_tag.Block_bgn);
if (Bry_finder.Find_fwd(src, Byte_ascii.Nl, xnde.Tag_open_end(), xnde.Tag_close_bgn()) != Bry_finder.Not_found)
if (Bry_find_.Find_fwd(src, Byte_ascii.Nl, xnde.Tag_open_end(), xnde.Tag_close_bgn()) != Bry_find_.Not_found)
ctx.Para().Process_nl(ctx, root, src, xnde.Tag_open_bgn(), xnde.Tag_open_bgn());
ctx.Para().Process_block__xnde(tag, Xop_xnde_tag.Block_end);
break;

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
import org.junit.*; import gplx.xowa.nss.*;
public class Xop_xnde_wkr__basic_tst {
private Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}