mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Source: Restore broken commit
This commit is contained in:
29
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_lxr.java
Normal file
29
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_lxr.java
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_amp_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_amp;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Amp, this);}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
return ctx.Amp().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);
|
||||
}
|
||||
public static final Xop_amp_lxr Instance = new Xop_amp_lxr();
|
||||
}
|
||||
157
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_mgr.java
Normal file
157
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_mgr.java
Normal file
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.langs.htmls.entitys.*;
|
||||
public class Xop_amp_mgr { // TS
|
||||
private static final Btrie_rv trv = new Btrie_rv();
|
||||
public Btrie_slim_mgr Amp_trie() {return amp_trie;} private final Btrie_slim_mgr amp_trie = Gfh_entity_trie.Instance;
|
||||
public Xop_amp_mgr_rslt Parse_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, int amp_pos, int bgn) {
|
||||
int fail_pos = amp_pos + 1; // default to fail pos which is after &
|
||||
|
||||
// check amp_trie; EX: 'lt'
|
||||
Xop_amp_mgr_rslt rv = new Xop_amp_mgr_rslt();
|
||||
Gfh_entity_itm itm; int cur;
|
||||
synchronized (trv) {
|
||||
itm = (Gfh_entity_itm)amp_trie.Match_at(trv, src, bgn, src_len);
|
||||
cur = trv.Pos();
|
||||
}
|
||||
|
||||
if (itm == null) {
|
||||
rv.Pass_n_(fail_pos);
|
||||
return rv;
|
||||
}
|
||||
|
||||
// check itm
|
||||
switch (itm.Tid()) {
|
||||
// letters; EX: '<'
|
||||
case Gfh_entity_itm.Tid_name_std:
|
||||
case Gfh_entity_itm.Tid_name_xowa:
|
||||
rv.Pos_(cur);
|
||||
rv.Tkn_(tkn_mkr.Amp_txt(amp_pos, cur, itm));
|
||||
return rv;
|
||||
// numbers; EX: '{' 'ģ'
|
||||
case Gfh_entity_itm.Tid_num_hex:
|
||||
case Gfh_entity_itm.Tid_num_dec:
|
||||
boolean ncr_is_hex = itm.Tid() == Gfh_entity_itm.Tid_num_hex;
|
||||
boolean pass = Parse_ncr(rv, ncr_is_hex, src, src_len, amp_pos, cur);
|
||||
if (pass) { // NOTE: do not set rv.Pos_(); will be set by Parse_ncr
|
||||
rv.Tkn_(tkn_mkr.Amp_num(amp_pos, rv.Pos(), rv.Val()));
|
||||
return rv;
|
||||
}
|
||||
else {
|
||||
rv.Pass_n_(fail_pos);
|
||||
return rv;
|
||||
}
|
||||
default: throw Err_.new_unhandled_default(itm.Tid());
|
||||
}
|
||||
}
|
||||
public boolean Parse_ncr(Xop_amp_mgr_rslt rv, boolean ncr_is_hex, byte[] src, int src_len, int amp_pos, int num_bgn) {
|
||||
int fail_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
|
||||
// find semic; fail if none found
|
||||
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, num_bgn, src_len);
|
||||
if (semic_pos == Bry_find_.Not_found) return rv.Pass_n_(fail_pos);
|
||||
int num_end = semic_pos - 1; // num_end = pos before semicolon
|
||||
|
||||
// calc amp_val; EX: Σ -> 931; Σ -> 931;
|
||||
int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
|
||||
for (int i = num_end; i >= num_bgn; i--) {
|
||||
byte b = src[i];
|
||||
if (ncr_is_hex) {
|
||||
if (b >= 48 && b <= 57) cur = b - 48;
|
||||
else if (b >= 65 && b <= 70) cur = b - 55;
|
||||
else if (b >= 97 && b <= 102) cur = b - 87;
|
||||
else if((b >= 71 && b <= 90)
|
||||
|| (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
|
||||
else return rv.Pass_n_(fail_pos);
|
||||
}
|
||||
else {
|
||||
cur = b - Byte_ascii.Num_0;
|
||||
if (cur < 0 || cur > 10) return rv.Pass_n_(fail_pos);
|
||||
}
|
||||
val += cur * factor;
|
||||
if (val > gplx.core.intls.Utf8_.Codepoint_max) return rv.Pass_n_(fail_pos); // fail if value > largest_unicode_codepoint
|
||||
factor *= multiple;
|
||||
}
|
||||
return rv.Pass_y_(semic_pos + 1, val); // +1 to position after semic
|
||||
}
|
||||
public byte[] Decode_as_bry(byte[] src) {
|
||||
if (src == null) return src;
|
||||
boolean dirty = false;
|
||||
int end = src.length;
|
||||
int pos = 0;
|
||||
Xop_amp_mgr_rslt amp_rv = null;
|
||||
Bry_bfr bfr = null;
|
||||
Btrie_rv trv = null;
|
||||
|
||||
// scan for &
|
||||
while (pos < end) {
|
||||
byte b = src[pos];
|
||||
if (b == Byte_ascii.Amp) { // & found
|
||||
int nxt_pos = pos + 1;
|
||||
if (nxt_pos < end) { // check & is not eos
|
||||
byte nxt_b = src[nxt_pos];
|
||||
|
||||
if (trv == null) trv = new Btrie_rv();
|
||||
Object amp_obj = amp_trie.Match_at_w_b0(trv, nxt_b, src, nxt_pos, end);
|
||||
int amp_pos = trv.Pos();
|
||||
|
||||
if (amp_obj != null) {
|
||||
if (!dirty) { // 1st amp found; add preceding String to bfr
|
||||
if (bfr == null) {
|
||||
bfr = Bry_bfr_.Get();
|
||||
dirty = true;
|
||||
}
|
||||
bfr.Add_mid(src, 0, pos);
|
||||
}
|
||||
Gfh_entity_itm amp_itm = (Gfh_entity_itm)amp_obj;
|
||||
switch (amp_itm.Tid()) {
|
||||
case Gfh_entity_itm.Tid_name_std:
|
||||
case Gfh_entity_itm.Tid_name_xowa:
|
||||
bfr.Add(amp_itm.U8_bry());
|
||||
pos = amp_pos;
|
||||
break;
|
||||
case Gfh_entity_itm.Tid_num_hex:
|
||||
case Gfh_entity_itm.Tid_num_dec:
|
||||
boolean ncr_is_hex = amp_itm.Tid() == Gfh_entity_itm.Tid_num_hex;
|
||||
int int_bgn = amp_pos;
|
||||
if (amp_rv == null)
|
||||
amp_rv = new Xop_amp_mgr_rslt();
|
||||
boolean pass = Parse_ncr(amp_rv, ncr_is_hex, src, end, pos, int_bgn);
|
||||
if (pass)
|
||||
bfr.Add_u8_int(amp_rv.Val());
|
||||
else
|
||||
bfr.Add_mid(src, pos, nxt_pos);
|
||||
pos = amp_rv.Pos();
|
||||
break;
|
||||
default:
|
||||
throw Err_.new_unhandled_default(amp_itm.Tid());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (dirty)
|
||||
bfr.Add_byte(b);
|
||||
++pos;
|
||||
}
|
||||
return dirty ? bfr.To_bry_and_clear_and_rls() : src;
|
||||
}
|
||||
public static final Xop_amp_mgr Instance = new Xop_amp_mgr(); Xop_amp_mgr() {}
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xop_amp_mgr__decode__tst {
|
||||
@Before public void init() {} private final Xop_amp_mgr_fxt fxt = new Xop_amp_mgr_fxt();
|
||||
@Test public void Text() {fxt.Test__decode_as_bry("a" , "a");}
|
||||
@Test public void Name() {fxt.Test__decode_as_bry("&" , "&");}
|
||||
@Test public void Name_w_text() {fxt.Test__decode_as_bry("a&b" , "a&b");}
|
||||
@Test public void Name_fail_semic_missing() {fxt.Test__decode_as_bry("a&b" , "a&b");}
|
||||
@Test public void Name_fail_amp_only() {fxt.Test__decode_as_bry("a&" , "a&");}
|
||||
@Test public void Num_fail() {fxt.Test__decode_as_bry("&#!;" , "&#!;");} // ! is not valid num
|
||||
@Test public void Hex_fail() {fxt.Test__decode_as_bry("&#x!;" , "&#x!;");} // ! is not valid hex
|
||||
@Test public void Num_basic() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Num_zero_padded() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_upper() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_lower() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_zero_padded() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_upper_x() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Num_fail_large_codepoint() {fxt.Test__decode_as_bry("�" , "�");}
|
||||
@Test public void Num_ignore_extra_x() {fxt.Test__decode_as_bry("&#xx26D0;" , Char_.To_str(Char_.By_int(9936)));} // 2nd x is ignored
|
||||
}
|
||||
class Xop_amp_mgr_fxt {
|
||||
private final Xop_amp_mgr amp_mgr = Xop_amp_mgr.Instance;
|
||||
public void Test__decode_as_bry(String raw, String expd) {
|
||||
Gftest.Eq__str(expd, String_.new_u8(amp_mgr.Decode_as_bry(Bry_.new_u8(raw))));
|
||||
}
|
||||
public void Test__parse_tkn__ent(String raw, String expd) {
|
||||
Xop_amp_mgr_rslt rv = Exec__parse_tkn(raw);
|
||||
Xop_amp_tkn_ent tkn = (Xop_amp_tkn_ent)rv.Tkn();
|
||||
Gftest.Eq__byte(Xop_tkn_itm_.Tid_html_ref, tkn.Tkn_tid());
|
||||
Gftest.Eq__str(expd, tkn.Xml_name_bry());
|
||||
}
|
||||
public void Test__parse_tkn__ncr(String raw, int expd) {
|
||||
Xop_amp_mgr_rslt rv = Exec__parse_tkn(raw);
|
||||
Xop_amp_tkn_num tkn = (Xop_amp_tkn_num)rv.Tkn();
|
||||
Gftest.Eq__byte(Xop_tkn_itm_.Tid_html_ncr, tkn.Tkn_tid());
|
||||
Gftest.Eq__int(expd, tkn.Val());
|
||||
}
|
||||
public void Test__parse_tkn__txt(String raw, int expd) {
|
||||
Xop_amp_mgr_rslt rv = Exec__parse_tkn(raw);
|
||||
Gftest.Eq__null(Bool_.Y, rv.Tkn());
|
||||
Gftest.Eq__int(expd, rv.Pos());
|
||||
}
|
||||
private Xop_amp_mgr_rslt Exec__parse_tkn(String raw) {
|
||||
byte[] src = Bry_.new_u8(raw);
|
||||
return amp_mgr.Parse_tkn(new Xop_tkn_mkr(), src, src.length, 0, 1);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xop_amp_mgr__parse_tkn__tst {
|
||||
@Before public void init() {} private final Xop_amp_mgr_fxt fxt = new Xop_amp_mgr_fxt();
|
||||
@Test public void Ent() {fxt.Test__parse_tkn__ent("&" , "&");} // check for html_ref
|
||||
@Test public void Ent__fail() {fxt.Test__parse_tkn__txt("&nil;" , 1);}
|
||||
@Test public void Num__nex() {fxt.Test__parse_tkn__ncr("Σ" , 931);} // check for html_ncr; Σ: http://en.wikipedia.org/wiki/Numeric_character_reference
|
||||
@Test public void Num__dec() {fxt.Test__parse_tkn__ncr("Σ" , 931);}
|
||||
@Test public void Num__fail() {fxt.Test__parse_tkn__txt("&#" , 1);}
|
||||
}
|
||||
42
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_mgr_rslt.java
Normal file
42
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_mgr_rslt.java
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_amp_mgr_rslt {
|
||||
public Xop_amp_mgr_rslt(int pos, int val, Xop_tkn_itm tkn) {
|
||||
this.pos = pos;
|
||||
this.val = val;
|
||||
this.tkn = tkn;
|
||||
}
|
||||
public Xop_amp_mgr_rslt() {}
|
||||
public boolean Pass() {return pass;} private boolean pass; public void Valid_(boolean v) {this.pass = v;}
|
||||
public int Pos() {return pos;} private int pos; public void Pos_(int v) {this.pos = v;}
|
||||
public int Val() {return val;} private int val; public void Val_(int v) {this.val = v;}
|
||||
public Xop_tkn_itm Tkn() {return tkn;} private Xop_tkn_itm tkn; public void Tkn_(Xop_tkn_itm v) {this.tkn = v;}
|
||||
public boolean Pass_y_(int pos, int val) {
|
||||
this.pos = pos; this.val = val;
|
||||
this.pass = true;
|
||||
return true;
|
||||
}
|
||||
public boolean Pass_n_(int pos) {
|
||||
this.pass = false;
|
||||
this.pos = pos;
|
||||
this.val = -1;
|
||||
this.tkn = null;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
32
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_tkn_ent.java
Normal file
32
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_tkn_ent.java
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.langs.htmls.entitys.*;
|
||||
public class Xop_amp_tkn_ent extends Xop_tkn_itm_base {
|
||||
private Gfh_entity_itm html_ref_itm;
|
||||
public Xop_amp_tkn_ent(int bgn, int end, Gfh_entity_itm html_ref_itm) {
|
||||
this.html_ref_itm = html_ref_itm;
|
||||
this.Tkn_ini_pos(false, bgn, end);
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_html_ref;}
|
||||
public int Char_int() {return html_ref_itm.Char_int();}
|
||||
public byte[] Xml_name_bry() {return html_ref_itm.Xml_name_bry();}
|
||||
public boolean Itm_is_custom() {return html_ref_itm.Tid() == Gfh_entity_itm.Tid_name_xowa;}
|
||||
public void Print_ncr(Bry_bfr bfr) {html_ref_itm.Print_ncr(bfr);}
|
||||
public void Print_literal(Bry_bfr bfr) {html_ref_itm.Print_literal(bfr);}
|
||||
}
|
||||
27
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_tkn_num.java
Normal file
27
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_tkn_num.java
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_amp_tkn_num extends Xop_tkn_itm_base {
|
||||
public Xop_amp_tkn_num(int bgn, int end, int val, byte[] str_as_bry) {
|
||||
this.val = val; this.str_as_bry = str_as_bry;
|
||||
this.Tkn_ini_pos(false, bgn, end);
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_html_ncr;}
|
||||
public int Val() {return val;} private int val;
|
||||
public byte[] Str_as_bry() {return str_as_bry;} private byte[] str_as_bry;
|
||||
}
|
||||
34
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_wkr.java
Normal file
34
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_wkr.java
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_amp_wkr implements Xop_ctx_wkr {
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn, int cur) {
|
||||
if (cur == src_len) return ctx.Lxr_make_txt_(cur); // NOTE: & is last char in page; strange and rare, but don't raise error
|
||||
|
||||
Xop_amp_mgr amp_mgr = ctx.App().Parser_amp_mgr();
|
||||
Xop_amp_mgr_rslt amp_rv = amp_mgr.Parse_tkn(tkn_mkr, src, src_len, bgn, cur);
|
||||
Xop_tkn_itm amp_tkn = amp_rv.Tkn();
|
||||
int rv_pos = amp_rv.Pos();
|
||||
if (amp_tkn == null) return ctx.Lxr_make_txt_(rv_pos);
|
||||
ctx.Subs_add(root, amp_tkn);
|
||||
return rv_pos;
|
||||
}
|
||||
}
|
||||
37
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_wkr_tst.java
Normal file
37
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_wkr_tst.java
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_amp_wkr_tst {
|
||||
private final Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Convert_to_named() {fxt.Test_parse_page_wiki_str("&" , "&");} // note that & is printed, not &
|
||||
@Test public void Convert_to_named_amp() {fxt.Test_parse_page_wiki_str("&" , "&");} // PURPOSE: html_wtr was not handling & only
|
||||
@Test public void Convert_to_numeric() {fxt.Test_parse_page_wiki_str("á" , "á");} // testing that á is outputted, not á
|
||||
@Test public void Defect_bad_code_fails() { // PURPOSE: early rewrite of Xop_amp_mgr caused Xoh_html_wtr_escaper to fail with array out of bounds error; EX:w:Czech_Republic; DATE:2014-05-11
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "[[File:A.png|alt=<p> </p>]]" // basically checks amp parsing inside xnde inside lnki's alt (which uses different parsing code
|
||||
, "<a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xoimg_0\" alt=\" \" src=\"file:///mem/wiki/repo/trg/orig/7/0/A.png\" width=\"0\" height=\"0\" /></a>"
|
||||
);
|
||||
}
|
||||
@Test public void Ignore_ncr() { // PURPOSE: check that ncr is unescaped; PAGE:de.w:Cross-Site-Scripting; DATE:2014-07-23
|
||||
fxt.Test_parse_page_all_str
|
||||
( "a <code><iframe></code>) b"
|
||||
, "a <code><iframe></code>) b" // < should not become <
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user