1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

'v3.7.1.1'

This commit is contained in:
gnosygnu
2016-07-03 22:41:56 -04:00
parent 1a4ca00c0b
commit 36584a0cc2
220 changed files with 4762 additions and 2627 deletions

View File

@@ -17,113 +17,189 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
public class Xop_amp_mgr {
private final Object thread_lock_1 = new Object(), thread_lock_2 = new Object();
private final Bry_bfr tmp_bfr = Bry_bfr_.Reset(32);
public class Xop_amp_mgr { // TS
public Btrie_slim_mgr Amp_trie() {return amp_trie;} private final Btrie_slim_mgr amp_trie = Xop_amp_trie.Instance;
public int Rslt_pos() {return rslt_pos;} private int rslt_pos;
public int Rslt_val() {return rslt_val;} private int rslt_val;
public Xop_tkn_itm Parse_as_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, int amp_pos, int cur_pos) {
synchronized (thread_lock_1) {
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
Object o = amp_trie.Match_bgn(src, cur_pos, src_len);
cur_pos = amp_trie.Match_pos();
if (o == null) return null;
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
switch (itm.Tid()) {
case Xop_amp_trie_itm.Tid_name_std:
case Xop_amp_trie_itm.Tid_name_xowa:
rslt_pos = cur_pos;
return tkn_mkr.Amp_txt(amp_pos, cur_pos, itm);
case Xop_amp_trie_itm.Tid_num_hex:
case Xop_amp_trie_itm.Tid_num_dec:
boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
boolean pass = Parse_as_int(ncr_is_hex, src, src_len, amp_pos, cur_pos);
return pass ? tkn_mkr.Amp_num(amp_pos, rslt_pos, rslt_val) : null;
default: throw Err_.new_unhandled(itm.Tid());
}
public Xop_amp_mgr_rslt Parse_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, int amp_pos, int bgn) {
int fail_pos = amp_pos + 1; // default to fail pos which is after &
// check amp_trie; EX: 'lt'
Xop_amp_mgr_rslt rv = new Xop_amp_mgr_rslt();
Btrie_rv match = amp_trie.Match_at(src, bgn, src_len);
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)match.Obj();
int cur = match.Pos();
match.Pool__rls();
if (itm == null) {
rv.Pass_n_(fail_pos);
return rv;
}
}
public boolean Parse_as_int(boolean ncr_is_hex, byte[] src, int src_len, int amp_pos, int int_bgn) {
synchronized (thread_lock_2) {
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
rslt_val = -1; // clear any previous setting
int cur_pos = int_bgn, int_end = -1;
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, cur_pos, src_len);
if (semic_pos == Bry_find_.Not_found) return false;
int_end = semic_pos - 1; // int_end = pos before semicolon
int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
for (int i = int_end; i >= int_bgn; i--) {
byte b = src[i];
if (ncr_is_hex) {
if (b >= 48 && b <= 57) cur = b - 48;
else if (b >= 65 && b <= 70) cur = b - 55;
else if (b >= 97 && b <= 102) cur = b - 87;
else if((b >= 71 && b <= 90)
|| (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
else return false;
// check itm
switch (itm.Tid()) {
// letters; EX: '&lt;'
case Xop_amp_trie_itm.Tid_name_std:
case Xop_amp_trie_itm.Tid_name_xowa:
rv.Pos_(cur);
rv.Tkn_(tkn_mkr.Amp_txt(amp_pos, cur, itm));
return rv;
// numbers; EX: '&#123;' '&#x123'
case Xop_amp_trie_itm.Tid_num_hex:
case Xop_amp_trie_itm.Tid_num_dec:
boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
boolean pass = Parse_ncr(rv, ncr_is_hex, src, src_len, amp_pos, cur);
if (pass) { // NOTE: do not set rv.Pos_(); will be set by Parse_ncr
rv.Tkn_(tkn_mkr.Amp_num(amp_pos, rv.Pos(), rv.Val()));
return rv;
}
else {
cur = b - Byte_ascii.Num_0;
if (cur < 0 || cur > 10) return false;
rv.Pass_n_(fail_pos);
return rv;
}
val += cur * factor;
if (val > gplx.core.intls.Utf8_.Codepoint_max) return false; // fail if value > largest_unicode_codepoint
factor *= multiple;
}
rslt_val = val;
rslt_pos = semic_pos + 1; // position after semic
return true;
default: throw Err_.new_unhandled_default(itm.Tid());
}
}
public boolean Parse_ncr(Xop_amp_mgr_rslt rv, boolean ncr_is_hex, byte[] src, int src_len, int amp_pos, int num_bgn) {
int fail_pos = amp_pos + 1; // default to fail pos; after amp;
// find semic; fail if none found
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, num_bgn, src_len);
if (semic_pos == Bry_find_.Not_found) return rv.Pass_n_(fail_pos);
int num_end = semic_pos - 1; // num_end = pos before semicolon
// calc amp_val; EX: &#x3A3; -> 931; &#931; -> 931;
int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
for (int i = num_end; i >= num_bgn; i--) {
byte b = src[i];
if (ncr_is_hex) {
if (b >= 48 && b <= 57) cur = b - 48;
else if (b >= 65 && b <= 70) cur = b - 55;
else if (b >= 97 && b <= 102) cur = b - 87;
else if((b >= 71 && b <= 90)
|| (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
else return rv.Pass_n_(fail_pos);
}
else {
cur = b - Byte_ascii.Num_0;
if (cur < 0 || cur > 10) return rv.Pass_n_(fail_pos);
}
val += cur * factor;
if (val > gplx.core.intls.Utf8_.Codepoint_max) return rv.Pass_n_(fail_pos); // fail if value > largest_unicode_codepoint
factor *= multiple;
}
return rv.Pass_y_(semic_pos + 1, val); // +1 to position after semic
}
public byte[] Decode_as_bry(byte[] src) {
if (src == null) return src;
int src_len = src.length;
boolean dirty = false;
int end = src.length;
int pos = 0;
synchronized (tmp_bfr) {
while (pos < src_len) {
byte b = src[pos];
if (b == Byte_ascii.Amp) {
int nxt_pos = pos + 1;
if (nxt_pos < src_len) {
byte nxt_b = src[nxt_pos];
Object amp_obj = amp_trie.Match_bgn_w_byte(nxt_b, src, nxt_pos, src_len);
if (amp_obj != null) {
if (!dirty) {
tmp_bfr.Add_mid(src, 0, pos);
Xop_amp_mgr_rslt amp_rv = null;
Bry_bfr bfr = null;
// scan for &
while (pos < end) {
byte b = src[pos];
if (b == Byte_ascii.Amp) { // & found
int nxt_pos = pos + 1;
if (nxt_pos < end) { // check & is not eos
byte nxt_b = src[nxt_pos];
Btrie_rv trie_rv = amp_trie.Match_at_w_b0(nxt_b, src, nxt_pos, end);
Object amp_obj = trie_rv.Obj();
int amp_pos = trie_rv.Pos();
trie_rv.Pool__rls();
if (amp_obj != null) {
if (!dirty) { // 1st amp found; add preceding String to bfr
if (bfr == null) {
bfr = Bry_bfr_.Get();
dirty = true;
}
Xop_amp_trie_itm amp_itm = (Xop_amp_trie_itm)amp_obj;
switch (amp_itm.Tid()) {
case Xop_amp_trie_itm.Tid_name_std:
case Xop_amp_trie_itm.Tid_name_xowa:
tmp_bfr.Add(amp_itm.U8_bry());
pos = amp_trie.Match_pos();
break;
case Xop_amp_trie_itm.Tid_num_hex:
case Xop_amp_trie_itm.Tid_num_dec:
boolean ncr_is_hex = amp_itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
int int_bgn = amp_trie.Match_pos();
if (Parse_as_int(ncr_is_hex, src, src_len, pos, int_bgn))
tmp_bfr.Add_u8_int(rslt_val);
else
tmp_bfr.Add_mid(src, pos, nxt_pos);
pos = rslt_pos;
break;
default:
throw Err_.new_unhandled(amp_itm.Tid());
}
continue;
bfr.Add_mid(src, 0, pos);
}
}
}
if (dirty)
tmp_bfr.Add_byte(b);
++pos;
Xop_amp_trie_itm amp_itm = (Xop_amp_trie_itm)amp_obj;
switch (amp_itm.Tid()) {
case Xop_amp_trie_itm.Tid_name_std:
case Xop_amp_trie_itm.Tid_name_xowa:
bfr.Add(amp_itm.U8_bry());
pos = amp_pos;
break;
case Xop_amp_trie_itm.Tid_num_hex:
case Xop_amp_trie_itm.Tid_num_dec:
boolean ncr_is_hex = amp_itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
int int_bgn = amp_pos;
if (amp_rv == null)
amp_rv = new Xop_amp_mgr_rslt();
boolean pass = Parse_ncr(amp_rv, ncr_is_hex, src, end, pos, int_bgn);
if (pass)
bfr.Add_u8_int(amp_rv.Val());
else
bfr.Add_mid(src, pos, nxt_pos);
pos = amp_rv.Pos();
break;
default:
throw Err_.new_unhandled_default(amp_itm.Tid());
}
continue;
}
}
}
return dirty ? tmp_bfr.To_bry_and_clear() : src;
if (dirty)
bfr.Add_byte(b);
++pos;
}
return dirty ? bfr.To_bry_and_clear_and_rls() : src;
}
public static final Xop_amp_mgr Instance = new Xop_amp_mgr(); Xop_amp_mgr() {}
// private Xop_tkn_itm Parse_as_tkn_old(Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, int amp_pos, int cur_pos) {
// synchronized (thread_lock_1) {
// rv_pos = amp_pos + 1; // default to fail pos; after amp;
// Object o = amp_trie.Match_bgn(src, cur_pos, src_len);
// cur_pos = amp_trie.Match_pos();
// if (o == null) return null;
// Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
// switch (itm.Tid()) {
// case Xop_amp_trie_itm.Tid_name_std:
// case Xop_amp_trie_itm.Tid_name_xowa:
// rv_pos = cur_pos;
// return tkn_mkr.Amp_txt(amp_pos, cur_pos, itm);
// case Xop_amp_trie_itm.Tid_num_hex:
// case Xop_amp_trie_itm.Tid_num_dec:
// boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
// Xop_amp_mgr_rslt rv = Parse_as_int2(ncr_is_hex, src, src_len, amp_pos, cur_pos);
// return rv.Pass() ? tkn_mkr.Amp_num(amp_pos, rv_pos, rslt_val) : null;
// default: throw Err_.new_unhandled(itm.Tid());
// }
// }
// }
// private boolean Parse_as_int_old(boolean ncr_is_hex, byte[] src, int src_len, int amp_pos, int int_bgn) {
// synchronized (thread_lock_2) {
// rv_pos = amp_pos + 1; // default to fail pos; after amp;
// rslt_val = -1; // clear any previous setting
// int cur_pos = int_bgn, int_end = -1;
// int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, cur_pos, src_len);
// if (semic_pos == Bry_find_.Not_found) return false;
// int_end = semic_pos - 1; // int_end = pos before semicolon
// int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
// for (int i = int_end; i >= int_bgn; i--) {
// byte b = src[i];
// if (ncr_is_hex) {
// if (b >= 48 && b <= 57) cur = b - 48;
// else if (b >= 65 && b <= 70) cur = b - 55;
// else if (b >= 97 && b <= 102) cur = b - 87;
// else if((b >= 71 && b <= 90)
// || (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
// else return false;
// }
// else {
// cur = b - Byte_ascii.Num_0;
// if (cur < 0 || cur > 10) return false;
// }
// val += cur * factor;
// if (val > gplx.core.intls.Utf8_.Codepoint_max) return false; // fail if value > largest_unicode_codepoint
// factor *= multiple;
// }
// rslt_val = val;
// rv_pos = semic_pos + 1; // position after semic
// return true;
// }
// }
}

View File

@@ -0,0 +1,64 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*; import gplx.core.tests.*;
public class Xop_amp_mgr__decode__tst {
@Before public void init() {} private final Xop_amp_mgr_fxt fxt = new Xop_amp_mgr_fxt();
@Test public void Text() {fxt.Test__decode_as_bry("a" , "a");}
@Test public void Name() {fxt.Test__decode_as_bry("&amp;" , "&");}
@Test public void Name_w_text() {fxt.Test__decode_as_bry("a&amp;b" , "a&b");}
@Test public void Name_fail_semic_missing() {fxt.Test__decode_as_bry("a&ampb" , "a&ampb");}
@Test public void Name_fail_amp_only() {fxt.Test__decode_as_bry("a&" , "a&");}
@Test public void Num_fail() {fxt.Test__decode_as_bry("&#!;" , "&#!;");} // ! is not valid num
@Test public void Hex_fail() {fxt.Test__decode_as_bry("&#x!;" , "&#x!;");} // ! is not valid hex
@Test public void Num_basic() {fxt.Test__decode_as_bry("&#0931;" , "Σ");}
@Test public void Num_zero_padded() {fxt.Test__decode_as_bry("&#00931;" , "Σ");}
@Test public void Hex_upper() {fxt.Test__decode_as_bry("&#x3A3;" , "Σ");}
@Test public void Hex_lower() {fxt.Test__decode_as_bry("&#x3a3;" , "Σ");}
@Test public void Hex_zero_padded() {fxt.Test__decode_as_bry("&#x03a3;" , "Σ");}
@Test public void Hex_upper_x() {fxt.Test__decode_as_bry("&#X3A3;" , "Σ");}
@Test public void Num_fail_large_codepoint() {fxt.Test__decode_as_bry("&#538189831;" , "&#538189831;");}
@Test public void Num_ignore_extra_x() {fxt.Test__decode_as_bry("&#xx26D0;" , Char_.To_str(Char_.By_int(9936)));} // 2nd x is ignored
}
class Xop_amp_mgr_fxt {
private final Xop_amp_mgr amp_mgr = Xop_amp_mgr.Instance;
public void Test__decode_as_bry(String raw, String expd) {
Gftest.Eq__str(expd, String_.new_u8(amp_mgr.Decode_as_bry(Bry_.new_u8(raw))));
}
public void Test__parse_tkn__ent(String raw, String expd) {
Xop_amp_mgr_rslt rv = Exec__parse_tkn(raw);
Xop_amp_tkn_ent tkn = (Xop_amp_tkn_ent)rv.Tkn();
Gftest.Eq__byte(Xop_tkn_itm_.Tid_html_ref, tkn.Tkn_tid());
Gftest.Eq__str(expd, tkn.Xml_name_bry());
}
public void Test__parse_tkn__ncr(String raw, int expd) {
Xop_amp_mgr_rslt rv = Exec__parse_tkn(raw);
Xop_amp_tkn_num tkn = (Xop_amp_tkn_num)rv.Tkn();
Gftest.Eq__byte(Xop_tkn_itm_.Tid_html_ncr, tkn.Tkn_tid());
Gftest.Eq__int(expd, tkn.Val());
}
public void Test__parse_tkn__txt(String raw, int expd) {
Xop_amp_mgr_rslt rv = Exec__parse_tkn(raw);
Gftest.Eq__null(Bool_.Y, rv.Tkn());
Gftest.Eq__int(expd, rv.Pos());
}
private Xop_amp_mgr_rslt Exec__parse_tkn(String raw) {
byte[] src = Bry_.new_u8(raw);
return amp_mgr.Parse_tkn(new Xop_tkn_mkr(), src, src.length, 0, 1);
}
}

View File

@@ -16,14 +16,12 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.tests.*;
public class Xop_html_num_tkn_chkr extends Xop_tkn_chkr_base {
@Override public Class<?> TypeOf() {return Xop_amp_tkn_num.class;}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_html_ncr;}
public int Html_ncr_val() {return html_ncr_val;} public Xop_html_num_tkn_chkr Html_ncr_val_(int v) {html_ncr_val = v; return this;} private int html_ncr_val = -1;
@Override public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {
Xop_amp_tkn_num actl = (Xop_amp_tkn_num)actl_obj;
err += mgr.Tst_val(html_ncr_val == -1, path, "html_ncr_val", html_ncr_val, actl.Val());
return err;
}
import org.junit.*; import gplx.core.tests.*;
public class Xop_amp_mgr__parse_tkn__tst {
@Before public void init() {} private final Xop_amp_mgr_fxt fxt = new Xop_amp_mgr_fxt();
@Test public void Ent() {fxt.Test__parse_tkn__ent("&amp;" , "&amp;");} // check for html_ref
@Test public void Ent__fail() {fxt.Test__parse_tkn__txt("&nil;" , 1);}
@Test public void Num__nex() {fxt.Test__parse_tkn__ncr("&#x3A3;" , 931);} // check for html_ncr; Σ: http://en.wikipedia.org/wiki/Numeric_character_reference
@Test public void Num__dec() {fxt.Test__parse_tkn__ncr("&#931;" , 931);}
@Test public void Num__fail() {fxt.Test__parse_tkn__txt("&#" , 1);}
}

View File

@@ -1,44 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_amp_mgr_decode_tst {
@Before public void init() {fxt.Reset();} private Xop_amp_mgr_fxt fxt = new Xop_amp_mgr_fxt();
@Test public void Text() {fxt.Test_decode_as_bry("a" , "a");}
@Test public void Name() {fxt.Test_decode_as_bry("&amp;" , "&");}
@Test public void Name_w_text() {fxt.Test_decode_as_bry("a&amp;b" , "a&b");}
@Test public void Name_fail_semic_missing() {fxt.Test_decode_as_bry("a&ampb" , "a&ampb");}
@Test public void Name_fail_amp_only() {fxt.Test_decode_as_bry("a&" , "a&");}
@Test public void Num_fail() {fxt.Test_decode_as_bry("&#!;" , "&#!;");} // ! is not valid num
@Test public void Hex_fail() {fxt.Test_decode_as_bry("&#x!;" , "&#x!;");} // ! is not valid hex
@Test public void Num_basic() {fxt.Test_decode_as_bry("&#0931;" , "Σ");}
@Test public void Num_zero_padded() {fxt.Test_decode_as_bry("&#00931;" , "Σ");}
@Test public void Hex_upper() {fxt.Test_decode_as_bry("&#x3A3;" , "Σ");}
@Test public void Hex_lower() {fxt.Test_decode_as_bry("&#x3a3;" , "Σ");}
@Test public void Hex_zero_padded() {fxt.Test_decode_as_bry("&#x03a3;" , "Σ");}
@Test public void Hex_upper_x() {fxt.Test_decode_as_bry("&#X3A3;" , "Σ");}
@Test public void Num_fail_large_codepoint() {fxt.Test_decode_as_bry("&#538189831;" , "&#538189831;");}
@Test public void Num_ignore_extra_x() {fxt.Test_decode_as_bry("&#xx26D0;" , Char_.To_str(Char_.By_int(9936)));} // 2nd x is ignored
}
class Xop_amp_mgr_fxt {
private Xop_amp_mgr amp_mgr = Xop_amp_mgr.Instance;
public void Reset() {}
public void Test_decode_as_bry(String raw, String expd) {
Tfds.Eq(expd, String_.new_u8(amp_mgr.Decode_as_bry(Bry_.new_u8(raw))));
}
}

View File

@@ -0,0 +1,42 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_amp_mgr_rslt {
public Xop_amp_mgr_rslt(int pos, int val, Xop_tkn_itm tkn) {
this.pos = pos;
this.val = val;
this.tkn = tkn;
}
public Xop_amp_mgr_rslt() {}
public boolean Pass() {return pass;} private boolean pass; public void Valid_(boolean v) {this.pass = v;}
public int Pos() {return pos;} private int pos; public void Pos_(int v) {this.pos = v;}
public int Val() {return val;} private int val; public void Val_(int v) {this.val = v;}
public Xop_tkn_itm Tkn() {return tkn;} private Xop_tkn_itm tkn; public void Tkn_(Xop_tkn_itm v) {this.tkn = v;}
public boolean Pass_y_(int pos, int val) {
this.pos = pos; this.val = val;
this.pass = true;
return true;
}
public boolean Pass_n_(int pos) {
this.pass = false;
this.pos = pos;
this.val = -1;
this.tkn = null;
return false;
}
}

View File

@@ -16,9 +16,9 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_amp_tkn_txt extends Xop_tkn_itm_base {
public class Xop_amp_tkn_ent extends Xop_tkn_itm_base {
private Xop_amp_trie_itm html_ref_itm;
public Xop_amp_tkn_txt(int bgn, int end, Xop_amp_trie_itm html_ref_itm) {
public Xop_amp_tkn_ent(int bgn, int end, Xop_amp_trie_itm html_ref_itm) {
this.html_ref_itm = html_ref_itm;
this.Tkn_ini_pos(false, bgn, end);
}

View File

@@ -17,301 +17,300 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
public class Xop_amp_trie {
public static final byte[] // NOTE: top_define
Bry_xowa_lt = Bry_.new_a7("&xowa_lt;")
, Bry_xowa_brack_bgn = Bry_.new_a7("&xowa_brack_bgn;")
, Bry_xowa_brack_end = Bry_.new_a7("&xowa_brack_end;")
, Bry_xowa_pipe = Bry_.new_a7("&xowa_pipe;")
, Bry_xowa_apos = Bry_.new_a7("&xowa_apos;")
, Bry_xowa_colon = Bry_.new_a7("&xowa_colon;")
, Bry_xowa_underline = Bry_.new_a7("&xowa_underline;")
, Bry_xowa_asterisk = Bry_.new_a7("&xowa_asterisk;")
, Bry_xowa_space = Bry_.new_a7("&xowa_space;")
, Bry_xowa_nl = Bry_.new_a7("&xowa_nl;")
, Bry_xowa_dash = Bry_.new_a7("&xowa_dash;")
public class Xop_amp_trie { // TS
public static final String // NOTE: top_define; entities needed for <nowiki> escaping
Str__xowa_lt = "&xowa_lt;"
, Str__xowa_brack_bgn = "&xowa_brack_bgn;"
, Str__xowa_brack_end = "&xowa_brack_end;"
, Str__xowa_pipe = "&xowa_pipe;"
, Str__xowa_apos = "&xowa_apos;"
, Str__xowa_colon = "&xowa_colon;"
, Str__xowa_underline = "&xowa_underline;"
, Str__xowa_asterisk = "&xowa_asterisk;"
, Str__xowa_space = "&xowa_space;"
, Str__xowa_nl = "&xowa_nl;"
, Str__xowa_dash = "&xowa_dash;"
;
public static final Btrie_slim_mgr Instance = new_(); Xop_amp_trie() {}
private static Btrie_slim_mgr new_() {// REF.MW: Sanitizer|$wgHtmlEntities; NOTE:added apos
public static final Btrie_slim_mgr Instance = New(); Xop_amp_trie() {}
private static Btrie_slim_mgr New() {// REF.MW: Sanitizer|$wgHtmlEntities; NOTE:added apos
Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
Reg_name(rv, Bool_.Y, 60, Bry_xowa_lt);
Reg_name(rv, Bool_.Y, 91, Bry_xowa_brack_bgn);
Reg_name(rv, Bool_.Y, 93, Bry_xowa_brack_end);
Reg_name(rv, Bool_.Y, 124, Bry_xowa_pipe);
Reg_name(rv, Bool_.Y, 39, Bry_xowa_apos);
Reg_name(rv, Bool_.Y, 58, Bry_xowa_colon);
Reg_name(rv, Bool_.Y, 95, Bry_xowa_underline);
Reg_name(rv, Bool_.Y, 42, Bry_xowa_asterisk);
Reg_name(rv, Bool_.Y, 32, Bry_xowa_space);
Reg_name(rv, Bool_.Y, 10, Bry_xowa_nl);
Reg_name(rv, Bool_.Y, 45, Bry_xowa_dash);
Reg_name(rv, Bool_.N, 39, "&apos;");
Reg_name(rv, Bool_.N, 193, "&Aacute;");
Reg_name(rv, Bool_.N, 225, "&aacute;");
Reg_name(rv, Bool_.N, 194, "&Acirc;");
Reg_name(rv, Bool_.N, 226, "&acirc;");
Reg_name(rv, Bool_.N, 180, "&acute;");
Reg_name(rv, Bool_.N, 198, "&AElig;");
Reg_name(rv, Bool_.N, 230, "&aelig;");
Reg_name(rv, Bool_.N, 192, "&Agrave;");
Reg_name(rv, Bool_.N, 224, "&agrave;");
Reg_name(rv, Bool_.N, 8501, "&alefsym;");
Reg_name(rv, Bool_.N, 913, "&Alpha;");
Reg_name(rv, Bool_.N, 945, "&alpha;");
Reg_name(rv, Bool_.N, 38, "&amp;");
Reg_name(rv, Bool_.N, 8743, "&and;");
Reg_name(rv, Bool_.N, 8736, "&ang;");
Reg_name(rv, Bool_.N, 197, "&Aring;");
Reg_name(rv, Bool_.N, 229, "&aring;");
Reg_name(rv, Bool_.N, 8776, "&asymp;");
Reg_name(rv, Bool_.N, 195, "&Atilde;");
Reg_name(rv, Bool_.N, 227, "&atilde;");
Reg_name(rv, Bool_.N, 196, "&Auml;");
Reg_name(rv, Bool_.N, 228, "&auml;");
Reg_name(rv, Bool_.N, 8222, "&bdquo;");
Reg_name(rv, Bool_.N, 914, "&Beta;");
Reg_name(rv, Bool_.N, 946, "&beta;");
Reg_name(rv, Bool_.N, 166, "&brvbar;");
Reg_name(rv, Bool_.N, 8226, "&bull;");
Reg_name(rv, Bool_.N, 8745, "&cap;");
Reg_name(rv, Bool_.N, 199, "&Ccedil;");
Reg_name(rv, Bool_.N, 231, "&ccedil;");
Reg_name(rv, Bool_.N, 184, "&cedil;");
Reg_name(rv, Bool_.N, 162, "&cent;");
Reg_name(rv, Bool_.N, 935, "&Chi;");
Reg_name(rv, Bool_.N, 967, "&chi;");
Reg_name(rv, Bool_.N, 710, "&circ;");
Reg_name(rv, Bool_.N, 9827, "&clubs;");
Reg_name(rv, Bool_.N, 8773, "&cong;");
Reg_name(rv, Bool_.N, 169, "&copy;");
Reg_name(rv, Bool_.N, 8629, "&crarr;");
Reg_name(rv, Bool_.N, 8746, "&cup;");
Reg_name(rv, Bool_.N, 164, "&curren;");
Reg_name(rv, Bool_.N, 8224, "&dagger;");
Reg_name(rv, Bool_.N, 8225, "&Dagger;");
Reg_name(rv, Bool_.N, 8595, "&darr;");
Reg_name(rv, Bool_.N, 8659, "&dArr;");
Reg_name(rv, Bool_.N, 176, "&deg;");
Reg_name(rv, Bool_.N, 916, "&Delta;");
Reg_name(rv, Bool_.N, 948, "&delta;");
Reg_name(rv, Bool_.N, 9830, "&diams;");
Reg_name(rv, Bool_.N, 247, "&divide;");
Reg_name(rv, Bool_.N, 201, "&Eacute;");
Reg_name(rv, Bool_.N, 233, "&eacute;");
Reg_name(rv, Bool_.N, 202, "&Ecirc;");
Reg_name(rv, Bool_.N, 234, "&ecirc;");
Reg_name(rv, Bool_.N, 200, "&Egrave;");
Reg_name(rv, Bool_.N, 232, "&egrave;");
Reg_name(rv, Bool_.N, 8709, "&empty;");
Reg_name(rv, Bool_.N, 8195, "&emsp;");
Reg_name(rv, Bool_.N, 8194, "&ensp;");
Reg_name(rv, Bool_.N, 917, "&Epsilon;");
Reg_name(rv, Bool_.N, 949, "&epsilon;");
Reg_name(rv, Bool_.N, 8801, "&equiv;");
Reg_name(rv, Bool_.N, 919, "&Eta;");
Reg_name(rv, Bool_.N, 951, "&eta;");
Reg_name(rv, Bool_.N, 208, "&ETH;");
Reg_name(rv, Bool_.N, 240, "&eth;");
Reg_name(rv, Bool_.N, 203, "&Euml;");
Reg_name(rv, Bool_.N, 235, "&euml;");
Reg_name(rv, Bool_.N, 8364, "&euro;");
Reg_name(rv, Bool_.N, 8707, "&exist;");
Reg_name(rv, Bool_.N, 402, "&fnof;");
Reg_name(rv, Bool_.N, 8704, "&forall;");
Reg_name(rv, Bool_.N, 189, "&frac12;");
Reg_name(rv, Bool_.N, 188, "&frac14;");
Reg_name(rv, Bool_.N, 190, "&frac34;");
Reg_name(rv, Bool_.N, 8260, "&frasl;");
Reg_name(rv, Bool_.N, 915, "&Gamma;");
Reg_name(rv, Bool_.N, 947, "&gamma;");
Reg_name(rv, Bool_.N, 8805, "&ge;");
Reg_name(rv, Bool_.N, 62, "&gt;");
Reg_name(rv, Bool_.N, 8596, "&harr;");
Reg_name(rv, Bool_.N, 8660, "&hArr;");
Reg_name(rv, Bool_.N, 9829, "&hearts;");
Reg_name(rv, Bool_.N, 8230, "&hellip;");
Reg_name(rv, Bool_.N, 205, "&Iacute;");
Reg_name(rv, Bool_.N, 237, "&iacute;");
Reg_name(rv, Bool_.N, 206, "&Icirc;");
Reg_name(rv, Bool_.N, 238, "&icirc;");
Reg_name(rv, Bool_.N, 161, "&iexcl;");
Reg_name(rv, Bool_.N, 204, "&Igrave;");
Reg_name(rv, Bool_.N, 236, "&igrave;");
Reg_name(rv, Bool_.N, 8465, "&image;");
Reg_name(rv, Bool_.N, 8734, "&infin;");
Reg_name(rv, Bool_.N, 8747, "&int;");
Reg_name(rv, Bool_.N, 921, "&Iota;");
Reg_name(rv, Bool_.N, 953, "&iota;");
Reg_name(rv, Bool_.N, 191, "&iquest;");
Reg_name(rv, Bool_.N, 8712, "&isin;");
Reg_name(rv, Bool_.N, 207, "&Iuml;");
Reg_name(rv, Bool_.N, 239, "&iuml;");
Reg_name(rv, Bool_.N, 922, "&Kappa;");
Reg_name(rv, Bool_.N, 954, "&kappa;");
Reg_name(rv, Bool_.N, 923, "&Lambda;");
Reg_name(rv, Bool_.N, 955, "&lambda;");
Reg_name(rv, Bool_.N, 9001, "&lang;");
Reg_name(rv, Bool_.N, 171, "&laquo;");
Reg_name(rv, Bool_.N, 8592, "&larr;");
Reg_name(rv, Bool_.N, 8656, "&lArr;");
Reg_name(rv, Bool_.N, 8968, "&lceil;");
Reg_name(rv, Bool_.N, 8220, "&ldquo;");
Reg_name(rv, Bool_.N, 8804, "&le;");
Reg_name(rv, Bool_.N, 8970, "&lfloor;");
Reg_name(rv, Bool_.N, 8727, "&lowast;");
Reg_name(rv, Bool_.N, 9674, "&loz;");
Reg_name(rv, Bool_.N, 8206, "&lrm;");
Reg_name(rv, Bool_.N, 8249, "&lsaquo;");
Reg_name(rv, Bool_.N, 8216, "&lsquo;");
Reg_name(rv, Bool_.N, 60, "&lt;");
Reg_name(rv, Bool_.N, 175, "&macr;");
Reg_name(rv, Bool_.N, 8212, "&mdash;");
Reg_name(rv, Bool_.N, 181, "&micro;");
Reg_name(rv, Bool_.N, 183, "&middot;");
Reg_name(rv, Bool_.N, 8722, "&minus;");
Reg_name(rv, Bool_.N, 924, "&Mu;");
Reg_name(rv, Bool_.N, 956, "&mu;");
Reg_name(rv, Bool_.N, 8711, "&nabla;");
Reg_name(rv, Bool_.N, 160, "&nbsp;");
Reg_name(rv, Bool_.N, 8211, "&ndash;");
Reg_name(rv, Bool_.N, 8800, "&ne;");
Reg_name(rv, Bool_.N, 8715, "&ni;");
Reg_name(rv, Bool_.N, 172, "&not;");
Reg_name(rv, Bool_.N, 8713, "&notin;");
Reg_name(rv, Bool_.N, 8836, "&nsub;");
Reg_name(rv, Bool_.N, 209, "&Ntilde;");
Reg_name(rv, Bool_.N, 241, "&ntilde;");
Reg_name(rv, Bool_.N, 925, "&Nu;");
Reg_name(rv, Bool_.N, 957, "&nu;");
Reg_name(rv, Bool_.N, 211, "&Oacute;");
Reg_name(rv, Bool_.N, 243, "&oacute;");
Reg_name(rv, Bool_.N, 212, "&Ocirc;");
Reg_name(rv, Bool_.N, 244, "&ocirc;");
Reg_name(rv, Bool_.N, 338, "&OElig;");
Reg_name(rv, Bool_.N, 339, "&oelig;");
Reg_name(rv, Bool_.N, 210, "&Ograve;");
Reg_name(rv, Bool_.N, 242, "&ograve;");
Reg_name(rv, Bool_.N, 8254, "&oline;");
Reg_name(rv, Bool_.N, 937, "&Omega;");
Reg_name(rv, Bool_.N, 969, "&omega;");
Reg_name(rv, Bool_.N, 927, "&Omicron;");
Reg_name(rv, Bool_.N, 959, "&omicron;");
Reg_name(rv, Bool_.N, 8853, "&oplus;");
Reg_name(rv, Bool_.N, 8744, "&or;");
Reg_name(rv, Bool_.N, 170, "&ordf;");
Reg_name(rv, Bool_.N, 186, "&ordm;");
Reg_name(rv, Bool_.N, 216, "&Oslash;");
Reg_name(rv, Bool_.N, 248, "&oslash;");
Reg_name(rv, Bool_.N, 213, "&Otilde;");
Reg_name(rv, Bool_.N, 245, "&otilde;");
Reg_name(rv, Bool_.N, 8855, "&otimes;");
Reg_name(rv, Bool_.N, 214, "&Ouml;");
Reg_name(rv, Bool_.N, 246, "&ouml;");
Reg_name(rv, Bool_.N, 182, "&para;");
Reg_name(rv, Bool_.N, 8706, "&part;");
Reg_name(rv, Bool_.N, 8240, "&permil;");
Reg_name(rv, Bool_.N, 8869, "&perp;");
Reg_name(rv, Bool_.N, 934, "&Phi;");
Reg_name(rv, Bool_.N, 966, "&phi;");
Reg_name(rv, Bool_.N, 928, "&Pi;");
Reg_name(rv, Bool_.N, 960, "&pi;");
Reg_name(rv, Bool_.N, 982, "&piv;");
Reg_name(rv, Bool_.N, 177, "&plusmn;");
Reg_name(rv, Bool_.N, 163, "&pound;");
Reg_name(rv, Bool_.N, 8242, "&prime;");
Reg_name(rv, Bool_.N, 8243, "&Prime;");
Reg_name(rv, Bool_.N, 8719, "&prod;");
Reg_name(rv, Bool_.N, 8733, "&prop;");
Reg_name(rv, Bool_.N, 936, "&Psi;");
Reg_name(rv, Bool_.N, 968, "&psi;");
Reg_name(rv, Bool_.N, 34, "&quot;");
Reg_name(rv, Bool_.N, 8730, "&radic;");
Reg_name(rv, Bool_.N, 9002, "&rang;");
Reg_name(rv, Bool_.N, 187, "&raquo;");
Reg_name(rv, Bool_.N, 8594, "&rarr;");
Reg_name(rv, Bool_.N, 8658, "&rArr;");
Reg_name(rv, Bool_.N, 8969, "&rceil;");
Reg_name(rv, Bool_.N, 8221, "&rdquo;");
Reg_name(rv, Bool_.N, 8476, "&real;");
Reg_name(rv, Bool_.N, 174, "&reg;");
Reg_name(rv, Bool_.N, 8971, "&rfloor;");
Reg_name(rv, Bool_.N, 929, "&Rho;");
Reg_name(rv, Bool_.N, 961, "&rho;");
Reg_name(rv, Bool_.N, 8207, "&rlm;");
Reg_name(rv, Bool_.N, 8250, "&rsaquo;");
Reg_name(rv, Bool_.N, 8217, "&rsquo;");
Reg_name(rv, Bool_.N, 8218, "&sbquo;");
Reg_name(rv, Bool_.N, 352, "&Scaron;");
Reg_name(rv, Bool_.N, 353, "&scaron;");
Reg_name(rv, Bool_.N, 8901, "&sdot;");
Reg_name(rv, Bool_.N, 167, "&sect;");
Reg_name(rv, Bool_.N, 173, "&shy;");
Reg_name(rv, Bool_.N, 931, "&Sigma;");
Reg_name(rv, Bool_.N, 963, "&sigma;");
Reg_name(rv, Bool_.N, 962, "&sigmaf;");
Reg_name(rv, Bool_.N, 8764, "&sim;");
Reg_name(rv, Bool_.N, 9824, "&spades;");
Reg_name(rv, Bool_.N, 8834, "&sub;");
Reg_name(rv, Bool_.N, 8838, "&sube;");
Reg_name(rv, Bool_.N, 8721, "&sum;");
Reg_name(rv, Bool_.N, 8835, "&sup;");
Reg_name(rv, Bool_.N, 185, "&sup1;");
Reg_name(rv, Bool_.N, 178, "&sup2;");
Reg_name(rv, Bool_.N, 179, "&sup3;");
Reg_name(rv, Bool_.N, 8839, "&supe;");
Reg_name(rv, Bool_.N, 223, "&szlig;");
Reg_name(rv, Bool_.N, 932, "&Tau;");
Reg_name(rv, Bool_.N, 964, "&tau;");
Reg_name(rv, Bool_.N, 8756, "&there4;");
Reg_name(rv, Bool_.N, 920, "&Theta;");
Reg_name(rv, Bool_.N, 952, "&theta;");
Reg_name(rv, Bool_.N, 977, "&thetasym;");
Reg_name(rv, Bool_.N, 8201, "&thinsp;");
Reg_name(rv, Bool_.N, 222, "&THORN;");
Reg_name(rv, Bool_.N, 254, "&thorn;");
Reg_name(rv, Bool_.N, 732, "&tilde;");
Reg_name(rv, Bool_.N, 215, "&times;");
Reg_name(rv, Bool_.N, 8482, "&trade;");
Reg_name(rv, Bool_.N, 218, "&Uacute;");
Reg_name(rv, Bool_.N, 250, "&uacute;");
Reg_name(rv, Bool_.N, 8593, "&uarr;");
Reg_name(rv, Bool_.N, 8657, "&uArr;");
Reg_name(rv, Bool_.N, 219, "&Ucirc;");
Reg_name(rv, Bool_.N, 251, "&ucirc;");
Reg_name(rv, Bool_.N, 217, "&Ugrave;");
Reg_name(rv, Bool_.N, 249, "&ugrave;");
Reg_name(rv, Bool_.N, 168, "&uml;");
Reg_name(rv, Bool_.N, 978, "&upsih;");
Reg_name(rv, Bool_.N, 933, "&Upsilon;");
Reg_name(rv, Bool_.N, 965, "&upsilon;");
Reg_name(rv, Bool_.N, 220, "&Uuml;");
Reg_name(rv, Bool_.N, 252, "&uuml;");
Reg_name(rv, Bool_.N, 8472, "&weierp;");
Reg_name(rv, Bool_.N, 926, "&Xi;");
Reg_name(rv, Bool_.N, 958, "&xi;");
Reg_name(rv, Bool_.N, 221, "&Yacute;");
Reg_name(rv, Bool_.N, 253, "&yacute;");
Reg_name(rv, Bool_.N, 165, "&yen;");
Reg_name(rv, Bool_.N, 376, "&Yuml;");
Reg_name(rv, Bool_.N, 255, "&yuml;");
Reg_name(rv, Bool_.N, 918, "&Zeta;");
Reg_name(rv, Bool_.N, 950, "&zeta;");
Reg_name(rv, Bool_.N, 8205, "&zwj;");
Reg_name(rv, Bool_.N, 8204, "&zwnj;");
Reg_prefix(rv, Xop_amp_trie_itm.Tid_num_hex, "#x");
Reg_prefix(rv, Xop_amp_trie_itm.Tid_num_hex, "#X");
Reg_prefix(rv, Xop_amp_trie_itm.Tid_num_dec, "#");
Add_name(rv, Bool_.Y, 60, Str__xowa_lt);
Add_name(rv, Bool_.Y, 91, Str__xowa_brack_bgn);
Add_name(rv, Bool_.Y, 93, Str__xowa_brack_end);
Add_name(rv, Bool_.Y, 124, Str__xowa_pipe);
Add_name(rv, Bool_.Y, 39, Str__xowa_apos);
Add_name(rv, Bool_.Y, 58, Str__xowa_colon);
Add_name(rv, Bool_.Y, 95, Str__xowa_underline);
Add_name(rv, Bool_.Y, 42, Str__xowa_asterisk);
Add_name(rv, Bool_.Y, 32, Str__xowa_space);
Add_name(rv, Bool_.Y, 10, Str__xowa_nl);
Add_name(rv, Bool_.Y, 45, Str__xowa_dash);
Add_name(rv, Bool_.N, 39, "&apos;");
Add_name(rv, Bool_.N, 193, "&Aacute;");
Add_name(rv, Bool_.N, 225, "&aacute;");
Add_name(rv, Bool_.N, 194, "&Acirc;");
Add_name(rv, Bool_.N, 226, "&acirc;");
Add_name(rv, Bool_.N, 180, "&acute;");
Add_name(rv, Bool_.N, 198, "&AElig;");
Add_name(rv, Bool_.N, 230, "&aelig;");
Add_name(rv, Bool_.N, 192, "&Agrave;");
Add_name(rv, Bool_.N, 224, "&agrave;");
Add_name(rv, Bool_.N, 8501, "&alefsym;");
Add_name(rv, Bool_.N, 913, "&Alpha;");
Add_name(rv, Bool_.N, 945, "&alpha;");
Add_name(rv, Bool_.N, 38, "&amp;");
Add_name(rv, Bool_.N, 8743, "&and;");
Add_name(rv, Bool_.N, 8736, "&ang;");
Add_name(rv, Bool_.N, 197, "&Aring;");
Add_name(rv, Bool_.N, 229, "&aring;");
Add_name(rv, Bool_.N, 8776, "&asymp;");
Add_name(rv, Bool_.N, 195, "&Atilde;");
Add_name(rv, Bool_.N, 227, "&atilde;");
Add_name(rv, Bool_.N, 196, "&Auml;");
Add_name(rv, Bool_.N, 228, "&auml;");
Add_name(rv, Bool_.N, 8222, "&bdquo;");
Add_name(rv, Bool_.N, 914, "&Beta;");
Add_name(rv, Bool_.N, 946, "&beta;");
Add_name(rv, Bool_.N, 166, "&brvbar;");
Add_name(rv, Bool_.N, 8226, "&bull;");
Add_name(rv, Bool_.N, 8745, "&cap;");
Add_name(rv, Bool_.N, 199, "&Ccedil;");
Add_name(rv, Bool_.N, 231, "&ccedil;");
Add_name(rv, Bool_.N, 184, "&cedil;");
Add_name(rv, Bool_.N, 162, "&cent;");
Add_name(rv, Bool_.N, 935, "&Chi;");
Add_name(rv, Bool_.N, 967, "&chi;");
Add_name(rv, Bool_.N, 710, "&circ;");
Add_name(rv, Bool_.N, 9827, "&clubs;");
Add_name(rv, Bool_.N, 8773, "&cong;");
Add_name(rv, Bool_.N, 169, "&copy;");
Add_name(rv, Bool_.N, 8629, "&crarr;");
Add_name(rv, Bool_.N, 8746, "&cup;");
Add_name(rv, Bool_.N, 164, "&curren;");
Add_name(rv, Bool_.N, 8224, "&dagger;");
Add_name(rv, Bool_.N, 8225, "&Dagger;");
Add_name(rv, Bool_.N, 8595, "&darr;");
Add_name(rv, Bool_.N, 8659, "&dArr;");
Add_name(rv, Bool_.N, 176, "&deg;");
Add_name(rv, Bool_.N, 916, "&Delta;");
Add_name(rv, Bool_.N, 948, "&delta;");
Add_name(rv, Bool_.N, 9830, "&diams;");
Add_name(rv, Bool_.N, 247, "&divide;");
Add_name(rv, Bool_.N, 201, "&Eacute;");
Add_name(rv, Bool_.N, 233, "&eacute;");
Add_name(rv, Bool_.N, 202, "&Ecirc;");
Add_name(rv, Bool_.N, 234, "&ecirc;");
Add_name(rv, Bool_.N, 200, "&Egrave;");
Add_name(rv, Bool_.N, 232, "&egrave;");
Add_name(rv, Bool_.N, 8709, "&empty;");
Add_name(rv, Bool_.N, 8195, "&emsp;");
Add_name(rv, Bool_.N, 8194, "&ensp;");
Add_name(rv, Bool_.N, 917, "&Epsilon;");
Add_name(rv, Bool_.N, 949, "&epsilon;");
Add_name(rv, Bool_.N, 8801, "&equiv;");
Add_name(rv, Bool_.N, 919, "&Eta;");
Add_name(rv, Bool_.N, 951, "&eta;");
Add_name(rv, Bool_.N, 208, "&ETH;");
Add_name(rv, Bool_.N, 240, "&eth;");
Add_name(rv, Bool_.N, 203, "&Euml;");
Add_name(rv, Bool_.N, 235, "&euml;");
Add_name(rv, Bool_.N, 8364, "&euro;");
Add_name(rv, Bool_.N, 8707, "&exist;");
Add_name(rv, Bool_.N, 402, "&fnof;");
Add_name(rv, Bool_.N, 8704, "&forall;");
Add_name(rv, Bool_.N, 189, "&frac12;");
Add_name(rv, Bool_.N, 188, "&frac14;");
Add_name(rv, Bool_.N, 190, "&frac34;");
Add_name(rv, Bool_.N, 8260, "&frasl;");
Add_name(rv, Bool_.N, 915, "&Gamma;");
Add_name(rv, Bool_.N, 947, "&gamma;");
Add_name(rv, Bool_.N, 8805, "&ge;");
Add_name(rv, Bool_.N, 62, "&gt;");
Add_name(rv, Bool_.N, 8596, "&harr;");
Add_name(rv, Bool_.N, 8660, "&hArr;");
Add_name(rv, Bool_.N, 9829, "&hearts;");
Add_name(rv, Bool_.N, 8230, "&hellip;");
Add_name(rv, Bool_.N, 205, "&Iacute;");
Add_name(rv, Bool_.N, 237, "&iacute;");
Add_name(rv, Bool_.N, 206, "&Icirc;");
Add_name(rv, Bool_.N, 238, "&icirc;");
Add_name(rv, Bool_.N, 161, "&iexcl;");
Add_name(rv, Bool_.N, 204, "&Igrave;");
Add_name(rv, Bool_.N, 236, "&igrave;");
Add_name(rv, Bool_.N, 8465, "&image;");
Add_name(rv, Bool_.N, 8734, "&infin;");
Add_name(rv, Bool_.N, 8747, "&int;");
Add_name(rv, Bool_.N, 921, "&Iota;");
Add_name(rv, Bool_.N, 953, "&iota;");
Add_name(rv, Bool_.N, 191, "&iquest;");
Add_name(rv, Bool_.N, 8712, "&isin;");
Add_name(rv, Bool_.N, 207, "&Iuml;");
Add_name(rv, Bool_.N, 239, "&iuml;");
Add_name(rv, Bool_.N, 922, "&Kappa;");
Add_name(rv, Bool_.N, 954, "&kappa;");
Add_name(rv, Bool_.N, 923, "&Lambda;");
Add_name(rv, Bool_.N, 955, "&lambda;");
Add_name(rv, Bool_.N, 9001, "&lang;");
Add_name(rv, Bool_.N, 171, "&laquo;");
Add_name(rv, Bool_.N, 8592, "&larr;");
Add_name(rv, Bool_.N, 8656, "&lArr;");
Add_name(rv, Bool_.N, 8968, "&lceil;");
Add_name(rv, Bool_.N, 8220, "&ldquo;");
Add_name(rv, Bool_.N, 8804, "&le;");
Add_name(rv, Bool_.N, 8970, "&lfloor;");
Add_name(rv, Bool_.N, 8727, "&lowast;");
Add_name(rv, Bool_.N, 9674, "&loz;");
Add_name(rv, Bool_.N, 8206, "&lrm;");
Add_name(rv, Bool_.N, 8249, "&lsaquo;");
Add_name(rv, Bool_.N, 8216, "&lsquo;");
Add_name(rv, Bool_.N, 60, "&lt;");
Add_name(rv, Bool_.N, 175, "&macr;");
Add_name(rv, Bool_.N, 8212, "&mdash;");
Add_name(rv, Bool_.N, 181, "&micro;");
Add_name(rv, Bool_.N, 183, "&middot;");
Add_name(rv, Bool_.N, 8722, "&minus;");
Add_name(rv, Bool_.N, 924, "&Mu;");
Add_name(rv, Bool_.N, 956, "&mu;");
Add_name(rv, Bool_.N, 8711, "&nabla;");
Add_name(rv, Bool_.N, 160, "&nbsp;");
Add_name(rv, Bool_.N, 8211, "&ndash;");
Add_name(rv, Bool_.N, 8800, "&ne;");
Add_name(rv, Bool_.N, 8715, "&ni;");
Add_name(rv, Bool_.N, 172, "&not;");
Add_name(rv, Bool_.N, 8713, "&notin;");
Add_name(rv, Bool_.N, 8836, "&nsub;");
Add_name(rv, Bool_.N, 209, "&Ntilde;");
Add_name(rv, Bool_.N, 241, "&ntilde;");
Add_name(rv, Bool_.N, 925, "&Nu;");
Add_name(rv, Bool_.N, 957, "&nu;");
Add_name(rv, Bool_.N, 211, "&Oacute;");
Add_name(rv, Bool_.N, 243, "&oacute;");
Add_name(rv, Bool_.N, 212, "&Ocirc;");
Add_name(rv, Bool_.N, 244, "&ocirc;");
Add_name(rv, Bool_.N, 338, "&OElig;");
Add_name(rv, Bool_.N, 339, "&oelig;");
Add_name(rv, Bool_.N, 210, "&Ograve;");
Add_name(rv, Bool_.N, 242, "&ograve;");
Add_name(rv, Bool_.N, 8254, "&oline;");
Add_name(rv, Bool_.N, 937, "&Omega;");
Add_name(rv, Bool_.N, 969, "&omega;");
Add_name(rv, Bool_.N, 927, "&Omicron;");
Add_name(rv, Bool_.N, 959, "&omicron;");
Add_name(rv, Bool_.N, 8853, "&oplus;");
Add_name(rv, Bool_.N, 8744, "&or;");
Add_name(rv, Bool_.N, 170, "&ordf;");
Add_name(rv, Bool_.N, 186, "&ordm;");
Add_name(rv, Bool_.N, 216, "&Oslash;");
Add_name(rv, Bool_.N, 248, "&oslash;");
Add_name(rv, Bool_.N, 213, "&Otilde;");
Add_name(rv, Bool_.N, 245, "&otilde;");
Add_name(rv, Bool_.N, 8855, "&otimes;");
Add_name(rv, Bool_.N, 214, "&Ouml;");
Add_name(rv, Bool_.N, 246, "&ouml;");
Add_name(rv, Bool_.N, 182, "&para;");
Add_name(rv, Bool_.N, 8706, "&part;");
Add_name(rv, Bool_.N, 8240, "&permil;");
Add_name(rv, Bool_.N, 8869, "&perp;");
Add_name(rv, Bool_.N, 934, "&Phi;");
Add_name(rv, Bool_.N, 966, "&phi;");
Add_name(rv, Bool_.N, 928, "&Pi;");
Add_name(rv, Bool_.N, 960, "&pi;");
Add_name(rv, Bool_.N, 982, "&piv;");
Add_name(rv, Bool_.N, 177, "&plusmn;");
Add_name(rv, Bool_.N, 163, "&pound;");
Add_name(rv, Bool_.N, 8242, "&prime;");
Add_name(rv, Bool_.N, 8243, "&Prime;");
Add_name(rv, Bool_.N, 8719, "&prod;");
Add_name(rv, Bool_.N, 8733, "&prop;");
Add_name(rv, Bool_.N, 936, "&Psi;");
Add_name(rv, Bool_.N, 968, "&psi;");
Add_name(rv, Bool_.N, 34, "&quot;");
Add_name(rv, Bool_.N, 8730, "&radic;");
Add_name(rv, Bool_.N, 9002, "&rang;");
Add_name(rv, Bool_.N, 187, "&raquo;");
Add_name(rv, Bool_.N, 8594, "&rarr;");
Add_name(rv, Bool_.N, 8658, "&rArr;");
Add_name(rv, Bool_.N, 8969, "&rceil;");
Add_name(rv, Bool_.N, 8221, "&rdquo;");
Add_name(rv, Bool_.N, 8476, "&real;");
Add_name(rv, Bool_.N, 174, "&reg;");
Add_name(rv, Bool_.N, 8971, "&rfloor;");
Add_name(rv, Bool_.N, 929, "&Rho;");
Add_name(rv, Bool_.N, 961, "&rho;");
Add_name(rv, Bool_.N, 8207, "&rlm;");
Add_name(rv, Bool_.N, 8250, "&rsaquo;");
Add_name(rv, Bool_.N, 8217, "&rsquo;");
Add_name(rv, Bool_.N, 8218, "&sbquo;");
Add_name(rv, Bool_.N, 352, "&Scaron;");
Add_name(rv, Bool_.N, 353, "&scaron;");
Add_name(rv, Bool_.N, 8901, "&sdot;");
Add_name(rv, Bool_.N, 167, "&sect;");
Add_name(rv, Bool_.N, 173, "&shy;");
Add_name(rv, Bool_.N, 931, "&Sigma;");
Add_name(rv, Bool_.N, 963, "&sigma;");
Add_name(rv, Bool_.N, 962, "&sigmaf;");
Add_name(rv, Bool_.N, 8764, "&sim;");
Add_name(rv, Bool_.N, 9824, "&spades;");
Add_name(rv, Bool_.N, 8834, "&sub;");
Add_name(rv, Bool_.N, 8838, "&sube;");
Add_name(rv, Bool_.N, 8721, "&sum;");
Add_name(rv, Bool_.N, 8835, "&sup;");
Add_name(rv, Bool_.N, 185, "&sup1;");
Add_name(rv, Bool_.N, 178, "&sup2;");
Add_name(rv, Bool_.N, 179, "&sup3;");
Add_name(rv, Bool_.N, 8839, "&supe;");
Add_name(rv, Bool_.N, 223, "&szlig;");
Add_name(rv, Bool_.N, 932, "&Tau;");
Add_name(rv, Bool_.N, 964, "&tau;");
Add_name(rv, Bool_.N, 8756, "&there4;");
Add_name(rv, Bool_.N, 920, "&Theta;");
Add_name(rv, Bool_.N, 952, "&theta;");
Add_name(rv, Bool_.N, 977, "&thetasym;");
Add_name(rv, Bool_.N, 8201, "&thinsp;");
Add_name(rv, Bool_.N, 222, "&THORN;");
Add_name(rv, Bool_.N, 254, "&thorn;");
Add_name(rv, Bool_.N, 732, "&tilde;");
Add_name(rv, Bool_.N, 215, "&times;");
Add_name(rv, Bool_.N, 8482, "&trade;");
Add_name(rv, Bool_.N, 218, "&Uacute;");
Add_name(rv, Bool_.N, 250, "&uacute;");
Add_name(rv, Bool_.N, 8593, "&uarr;");
Add_name(rv, Bool_.N, 8657, "&uArr;");
Add_name(rv, Bool_.N, 219, "&Ucirc;");
Add_name(rv, Bool_.N, 251, "&ucirc;");
Add_name(rv, Bool_.N, 217, "&Ugrave;");
Add_name(rv, Bool_.N, 249, "&ugrave;");
Add_name(rv, Bool_.N, 168, "&uml;");
Add_name(rv, Bool_.N, 978, "&upsih;");
Add_name(rv, Bool_.N, 933, "&Upsilon;");
Add_name(rv, Bool_.N, 965, "&upsilon;");
Add_name(rv, Bool_.N, 220, "&Uuml;");
Add_name(rv, Bool_.N, 252, "&uuml;");
Add_name(rv, Bool_.N, 8472, "&weierp;");
Add_name(rv, Bool_.N, 926, "&Xi;");
Add_name(rv, Bool_.N, 958, "&xi;");
Add_name(rv, Bool_.N, 221, "&Yacute;");
Add_name(rv, Bool_.N, 253, "&yacute;");
Add_name(rv, Bool_.N, 165, "&yen;");
Add_name(rv, Bool_.N, 376, "&Yuml;");
Add_name(rv, Bool_.N, 255, "&yuml;");
Add_name(rv, Bool_.N, 918, "&Zeta;");
Add_name(rv, Bool_.N, 950, "&zeta;");
Add_name(rv, Bool_.N, 8205, "&zwj;");
Add_name(rv, Bool_.N, 8204, "&zwnj;");
Add_prefix(rv, Xop_amp_trie_itm.Tid_num_hex, "#x");
Add_prefix(rv, Xop_amp_trie_itm.Tid_num_hex, "#X");
Add_prefix(rv, Xop_amp_trie_itm.Tid_num_dec, "#");
return rv;
}
private static void Reg_name(Btrie_slim_mgr trie, boolean tid_is_xowa, int char_int, String xml_name_str) {Reg_name(trie, tid_is_xowa, char_int, Bry_.new_a7(xml_name_str));}
private static void Reg_name(Btrie_slim_mgr trie, boolean tid_is_xowa, int char_int, byte[] xml_name_bry) {
private static void Add_name(Btrie_slim_mgr trie, boolean tid_is_xowa, int char_int, String xml_name_str) {
byte itm_tid = tid_is_xowa ? Xop_amp_trie_itm.Tid_name_xowa : Xop_amp_trie_itm.Tid_name_std;
Xop_amp_trie_itm itm = new Xop_amp_trie_itm(itm_tid, char_int, xml_name_bry);
byte[] xml_name_bry = Bry_.new_a7(xml_name_str);
byte[] key = Bry_.Mid(xml_name_bry, 1, xml_name_bry.length); // ignore & for purpose of trie; EX: "amp;"; NOTE: must keep trailing ";" else "&amp " will be valid;
trie.Add_obj(key, itm);
trie.Add_obj(key, new Xop_amp_trie_itm(itm_tid, char_int, xml_name_bry));
}
private static void Reg_prefix(Btrie_slim_mgr trie, byte prefix_type, String prefix) {
byte[] prefix_ary = Bry_.new_a7(prefix);
private static void Add_prefix(Btrie_slim_mgr trie, byte prefix_type, String prefix) {
byte[] prefix_ary = Bry_.new_u8(prefix);
Xop_amp_trie_itm itm = new Xop_amp_trie_itm(prefix_type, Xop_amp_trie_itm.Char_int_null, prefix_ary);
trie.Add_obj(prefix_ary, itm);
}

View File

@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.langs.htmls.*; import gplx.xowa.htmls.core.wkrs.lnkis.htmls.*;
public class Xop_amp_trie_itm {
public class Xop_amp_trie_itm { // TS
public Xop_amp_trie_itm(byte tid, int char_int, byte[] xml_name_bry) {
this.tid = tid;
this.char_int = char_int;
@@ -25,11 +25,12 @@ public class Xop_amp_trie_itm {
this.xml_name_bry = xml_name_bry;
this.key_name_len = xml_name_bry.length - 2; // 2 for & and ;
}
public byte Tid() {return tid;} private final byte tid;
public int Char_int() {return char_int;} private final int char_int; // val; EX: 160
public byte[] U8_bry() {return u8_bry;} private final byte[] u8_bry; // EX: new byte[] {192, 160}; (C2, A0)
public byte[] Xml_name_bry() {return xml_name_bry;} private final byte[] xml_name_bry; // EX: "&nbsp;"
public int Key_name_len() {return key_name_len;} private final int key_name_len; // EX: "nbsp".Len
public byte Tid() {return tid;} private final byte tid;
public int Char_int() {return char_int;} private final int char_int; // val; EX: 160
public byte[] U8_bry() {return u8_bry;} private final byte[] u8_bry; // EX: new byte[] {192, 160}; (C2, A0)
public byte[] Xml_name_bry() {return xml_name_bry;} private final byte[] xml_name_bry; // EX: "&nbsp;"
public int Key_name_len() {return key_name_len;} private final int key_name_len; // EX: "nbsp".Len
public void Print_ncr(Bry_bfr bfr) {
switch (char_int) {
case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Quote: case Byte_ascii.Amp:
@@ -48,9 +49,7 @@ public class Xop_amp_trie_itm {
case Byte_ascii.Gt: bfr.Add(Gfh_entity_.Gt_bry); break;
case Byte_ascii.Quote: bfr.Add(Gfh_entity_.Quote_bry); break;
case Byte_ascii.Amp: bfr.Add(Gfh_entity_.Amp_bry); break;
default:
bfr.Add(u8_bry); // write literal; EX: "[" not "&#91;"
break;
default: bfr.Add(u8_bry); break; // write literal; EX: "[" not "&#91;"
}
}
public static final byte Tid_name_std = 1, Tid_name_xowa = 2, Tid_num_hex = 3, Tid_num_dec = 4;

View File

@@ -20,11 +20,13 @@ public class Xop_amp_wkr implements Xop_ctx_wkr {
public void Ctor_ctx(Xop_ctx ctx) {}
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn, int cur_pos) {
if (cur_pos == src_len) return ctx.Lxr_make_txt_(cur_pos); // NOTE: & is last char in page; strange and rare, but don't raise error
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn, int cur) {
if (cur == src_len) return ctx.Lxr_make_txt_(cur); // NOTE: & is last char in page; strange and rare, but don't raise error
Xop_amp_mgr amp_mgr = ctx.App().Parser_amp_mgr();
Xop_tkn_itm amp_tkn = amp_mgr.Parse_as_tkn(tkn_mkr, src, src_len, bgn, cur_pos);
int rv_pos = amp_mgr.Rslt_pos();
Xop_amp_mgr_rslt amp_rv = amp_mgr.Parse_tkn(tkn_mkr, src, src_len, bgn, cur);
Xop_tkn_itm amp_tkn = amp_rv.Tkn();
int rv_pos = amp_rv.Pos();
if (amp_tkn == null) return ctx.Lxr_make_txt_(rv_pos);
ctx.Subs_add(root, amp_tkn);
return rv_pos;

View File

@@ -18,11 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_amp_wkr_tst {
private final Xop_fxt fxt = new Xop_fxt();
@Test public void Name() {fxt.Test_parse_page_wiki("&amp;" , fxt.tkn_html_ref_("&amp;"));} // check for html_ref
@Test public void Name_fail() {fxt.Test_parse_page_wiki("&nil;" , fxt.tkn_txt_(0, 5));} // check for text
@Test public void Hex() {fxt.Test_parse_page_wiki("&#x3A3;" , fxt.tkn_html_ncr_(931));} // check for html_ncr; Σ: http://en.wikipedia.org/wiki/Numeric_character_reference
@Test public void Num_fail_incomplete() {fxt.Test_parse_page_wiki("&#" , fxt.tkn_txt_());}
private final Xop_fxt fxt = new Xop_fxt();
@Test public void Convert_to_named() {fxt.Test_parse_page_wiki_str("&amp;" , "&amp;");} // note that &amp; is printed, not &
@Test public void Convert_to_named_amp() {fxt.Test_parse_page_wiki_str("&" , "&amp;");} // PURPOSE: html_wtr was not handling & only
@Test public void Convert_to_numeric() {fxt.Test_parse_page_wiki_str("&aacute;" , "&#225;");} // testing that &#225; is outputted, not á

View File

@@ -1,29 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.tests.*;
public class Xop_html_txt_tkn_chkr extends Xop_tkn_chkr_base {
@Override public Class<?> TypeOf() {return Xop_amp_tkn_txt.class;}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_html_ref;}
public String Html_ref_key() {return html_ref_key;} public Xop_html_txt_tkn_chkr Html_ref_key_(String v) {html_ref_key = v; return this;} private String html_ref_key;
@Override public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {
Xop_amp_tkn_txt actl = (Xop_amp_tkn_txt)actl_obj;
err += mgr.Tst_val(html_ref_key == null, path, "html_ref_key", html_ref_key, String_.new_u8(actl.Xml_name_bry()));
return err;
}
}