mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
'v3.7.1.1'
This commit is contained in:
@@ -25,7 +25,7 @@ public class Xop_tkn_mkr {
|
||||
public Xop_space_tkn Space_mutable(int bgn, int end) {return new Xop_space_tkn(false, bgn, end);}
|
||||
public Xop_apos_tkn Apos(int bgn, int end
|
||||
, int aposLen, int typ, int cmd, int lit_apos) {return new Xop_apos_tkn(bgn, end, aposLen, typ, cmd, lit_apos);}
|
||||
public Xop_tkn_itm Amp_txt(int bgn, int end, Xop_amp_trie_itm itm) {return new Xop_amp_tkn_txt(bgn, end, itm);}
|
||||
public Xop_tkn_itm Amp_txt(int bgn, int end, Xop_amp_trie_itm itm) {return new Xop_amp_tkn_ent(bgn, end, itm);}
|
||||
public Xop_tkn_itm Amp_num(int bgn, int end, int val_int, byte[] val_bry) {return new Xop_amp_tkn_num(bgn, end, val_int, val_bry);}
|
||||
public Xop_tkn_itm Amp_num(int bgn, int end, int val_int) {return new Xop_amp_tkn_num(bgn, end, val_int, gplx.core.intls.Utf16_.Encode_int_to_bry(val_int));}
|
||||
public Xop_nl_tkn NewLine(int bgn, int end, byte nl_typ, int nl_len) {return new Xop_nl_tkn(bgn, end, nl_typ, nl_len);}
|
||||
|
||||
@@ -17,113 +17,189 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xop_amp_mgr {
|
||||
private final Object thread_lock_1 = new Object(), thread_lock_2 = new Object();
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr_.Reset(32);
|
||||
public class Xop_amp_mgr { // TS
|
||||
public Btrie_slim_mgr Amp_trie() {return amp_trie;} private final Btrie_slim_mgr amp_trie = Xop_amp_trie.Instance;
|
||||
public int Rslt_pos() {return rslt_pos;} private int rslt_pos;
|
||||
public int Rslt_val() {return rslt_val;} private int rslt_val;
|
||||
public Xop_tkn_itm Parse_as_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, int amp_pos, int cur_pos) {
|
||||
synchronized (thread_lock_1) {
|
||||
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
Object o = amp_trie.Match_bgn(src, cur_pos, src_len);
|
||||
cur_pos = amp_trie.Match_pos();
|
||||
if (o == null) return null;
|
||||
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
|
||||
switch (itm.Tid()) {
|
||||
case Xop_amp_trie_itm.Tid_name_std:
|
||||
case Xop_amp_trie_itm.Tid_name_xowa:
|
||||
rslt_pos = cur_pos;
|
||||
return tkn_mkr.Amp_txt(amp_pos, cur_pos, itm);
|
||||
case Xop_amp_trie_itm.Tid_num_hex:
|
||||
case Xop_amp_trie_itm.Tid_num_dec:
|
||||
boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
|
||||
boolean pass = Parse_as_int(ncr_is_hex, src, src_len, amp_pos, cur_pos);
|
||||
return pass ? tkn_mkr.Amp_num(amp_pos, rslt_pos, rslt_val) : null;
|
||||
default: throw Err_.new_unhandled(itm.Tid());
|
||||
}
|
||||
public Xop_amp_mgr_rslt Parse_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, int amp_pos, int bgn) {
|
||||
int fail_pos = amp_pos + 1; // default to fail pos which is after &
|
||||
|
||||
// check amp_trie; EX: 'lt'
|
||||
Xop_amp_mgr_rslt rv = new Xop_amp_mgr_rslt();
|
||||
Btrie_rv match = amp_trie.Match_at(src, bgn, src_len);
|
||||
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)match.Obj();
|
||||
int cur = match.Pos();
|
||||
match.Pool__rls();
|
||||
if (itm == null) {
|
||||
rv.Pass_n_(fail_pos);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
public boolean Parse_as_int(boolean ncr_is_hex, byte[] src, int src_len, int amp_pos, int int_bgn) {
|
||||
synchronized (thread_lock_2) {
|
||||
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
rslt_val = -1; // clear any previous setting
|
||||
int cur_pos = int_bgn, int_end = -1;
|
||||
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, cur_pos, src_len);
|
||||
if (semic_pos == Bry_find_.Not_found) return false;
|
||||
int_end = semic_pos - 1; // int_end = pos before semicolon
|
||||
int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
|
||||
for (int i = int_end; i >= int_bgn; i--) {
|
||||
byte b = src[i];
|
||||
if (ncr_is_hex) {
|
||||
if (b >= 48 && b <= 57) cur = b - 48;
|
||||
else if (b >= 65 && b <= 70) cur = b - 55;
|
||||
else if (b >= 97 && b <= 102) cur = b - 87;
|
||||
else if((b >= 71 && b <= 90)
|
||||
|| (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
|
||||
else return false;
|
||||
|
||||
// check itm
|
||||
switch (itm.Tid()) {
|
||||
// letters; EX: '<'
|
||||
case Xop_amp_trie_itm.Tid_name_std:
|
||||
case Xop_amp_trie_itm.Tid_name_xowa:
|
||||
rv.Pos_(cur);
|
||||
rv.Tkn_(tkn_mkr.Amp_txt(amp_pos, cur, itm));
|
||||
return rv;
|
||||
// numbers; EX: '{' 'ģ'
|
||||
case Xop_amp_trie_itm.Tid_num_hex:
|
||||
case Xop_amp_trie_itm.Tid_num_dec:
|
||||
boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
|
||||
boolean pass = Parse_ncr(rv, ncr_is_hex, src, src_len, amp_pos, cur);
|
||||
if (pass) { // NOTE: do not set rv.Pos_(); will be set by Parse_ncr
|
||||
rv.Tkn_(tkn_mkr.Amp_num(amp_pos, rv.Pos(), rv.Val()));
|
||||
return rv;
|
||||
}
|
||||
else {
|
||||
cur = b - Byte_ascii.Num_0;
|
||||
if (cur < 0 || cur > 10) return false;
|
||||
rv.Pass_n_(fail_pos);
|
||||
return rv;
|
||||
}
|
||||
val += cur * factor;
|
||||
if (val > gplx.core.intls.Utf8_.Codepoint_max) return false; // fail if value > largest_unicode_codepoint
|
||||
factor *= multiple;
|
||||
}
|
||||
rslt_val = val;
|
||||
rslt_pos = semic_pos + 1; // position after semic
|
||||
return true;
|
||||
default: throw Err_.new_unhandled_default(itm.Tid());
|
||||
}
|
||||
}
|
||||
public boolean Parse_ncr(Xop_amp_mgr_rslt rv, boolean ncr_is_hex, byte[] src, int src_len, int amp_pos, int num_bgn) {
|
||||
int fail_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
|
||||
// find semic; fail if none found
|
||||
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, num_bgn, src_len);
|
||||
if (semic_pos == Bry_find_.Not_found) return rv.Pass_n_(fail_pos);
|
||||
int num_end = semic_pos - 1; // num_end = pos before semicolon
|
||||
|
||||
// calc amp_val; EX: Σ -> 931; Σ -> 931;
|
||||
int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
|
||||
for (int i = num_end; i >= num_bgn; i--) {
|
||||
byte b = src[i];
|
||||
if (ncr_is_hex) {
|
||||
if (b >= 48 && b <= 57) cur = b - 48;
|
||||
else if (b >= 65 && b <= 70) cur = b - 55;
|
||||
else if (b >= 97 && b <= 102) cur = b - 87;
|
||||
else if((b >= 71 && b <= 90)
|
||||
|| (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
|
||||
else return rv.Pass_n_(fail_pos);
|
||||
}
|
||||
else {
|
||||
cur = b - Byte_ascii.Num_0;
|
||||
if (cur < 0 || cur > 10) return rv.Pass_n_(fail_pos);
|
||||
}
|
||||
val += cur * factor;
|
||||
if (val > gplx.core.intls.Utf8_.Codepoint_max) return rv.Pass_n_(fail_pos); // fail if value > largest_unicode_codepoint
|
||||
factor *= multiple;
|
||||
}
|
||||
return rv.Pass_y_(semic_pos + 1, val); // +1 to position after semic
|
||||
}
|
||||
public byte[] Decode_as_bry(byte[] src) {
|
||||
if (src == null) return src;
|
||||
int src_len = src.length;
|
||||
boolean dirty = false;
|
||||
int end = src.length;
|
||||
int pos = 0;
|
||||
synchronized (tmp_bfr) {
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
if (b == Byte_ascii.Amp) {
|
||||
int nxt_pos = pos + 1;
|
||||
if (nxt_pos < src_len) {
|
||||
byte nxt_b = src[nxt_pos];
|
||||
Object amp_obj = amp_trie.Match_bgn_w_byte(nxt_b, src, nxt_pos, src_len);
|
||||
if (amp_obj != null) {
|
||||
if (!dirty) {
|
||||
tmp_bfr.Add_mid(src, 0, pos);
|
||||
Xop_amp_mgr_rslt amp_rv = null;
|
||||
Bry_bfr bfr = null;
|
||||
|
||||
// scan for &
|
||||
while (pos < end) {
|
||||
byte b = src[pos];
|
||||
if (b == Byte_ascii.Amp) { // & found
|
||||
int nxt_pos = pos + 1;
|
||||
if (nxt_pos < end) { // check & is not eos
|
||||
byte nxt_b = src[nxt_pos];
|
||||
|
||||
Btrie_rv trie_rv = amp_trie.Match_at_w_b0(nxt_b, src, nxt_pos, end);
|
||||
Object amp_obj = trie_rv.Obj();
|
||||
int amp_pos = trie_rv.Pos();
|
||||
trie_rv.Pool__rls();
|
||||
if (amp_obj != null) {
|
||||
if (!dirty) { // 1st amp found; add preceding String to bfr
|
||||
if (bfr == null) {
|
||||
bfr = Bry_bfr_.Get();
|
||||
dirty = true;
|
||||
}
|
||||
Xop_amp_trie_itm amp_itm = (Xop_amp_trie_itm)amp_obj;
|
||||
switch (amp_itm.Tid()) {
|
||||
case Xop_amp_trie_itm.Tid_name_std:
|
||||
case Xop_amp_trie_itm.Tid_name_xowa:
|
||||
tmp_bfr.Add(amp_itm.U8_bry());
|
||||
pos = amp_trie.Match_pos();
|
||||
break;
|
||||
case Xop_amp_trie_itm.Tid_num_hex:
|
||||
case Xop_amp_trie_itm.Tid_num_dec:
|
||||
boolean ncr_is_hex = amp_itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
|
||||
int int_bgn = amp_trie.Match_pos();
|
||||
if (Parse_as_int(ncr_is_hex, src, src_len, pos, int_bgn))
|
||||
tmp_bfr.Add_u8_int(rslt_val);
|
||||
else
|
||||
tmp_bfr.Add_mid(src, pos, nxt_pos);
|
||||
pos = rslt_pos;
|
||||
break;
|
||||
default:
|
||||
throw Err_.new_unhandled(amp_itm.Tid());
|
||||
}
|
||||
continue;
|
||||
bfr.Add_mid(src, 0, pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (dirty)
|
||||
tmp_bfr.Add_byte(b);
|
||||
++pos;
|
||||
Xop_amp_trie_itm amp_itm = (Xop_amp_trie_itm)amp_obj;
|
||||
switch (amp_itm.Tid()) {
|
||||
case Xop_amp_trie_itm.Tid_name_std:
|
||||
case Xop_amp_trie_itm.Tid_name_xowa:
|
||||
bfr.Add(amp_itm.U8_bry());
|
||||
pos = amp_pos;
|
||||
break;
|
||||
case Xop_amp_trie_itm.Tid_num_hex:
|
||||
case Xop_amp_trie_itm.Tid_num_dec:
|
||||
boolean ncr_is_hex = amp_itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
|
||||
int int_bgn = amp_pos;
|
||||
if (amp_rv == null)
|
||||
amp_rv = new Xop_amp_mgr_rslt();
|
||||
boolean pass = Parse_ncr(amp_rv, ncr_is_hex, src, end, pos, int_bgn);
|
||||
if (pass)
|
||||
bfr.Add_u8_int(amp_rv.Val());
|
||||
else
|
||||
bfr.Add_mid(src, pos, nxt_pos);
|
||||
pos = amp_rv.Pos();
|
||||
break;
|
||||
default:
|
||||
throw Err_.new_unhandled_default(amp_itm.Tid());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
return dirty ? tmp_bfr.To_bry_and_clear() : src;
|
||||
if (dirty)
|
||||
bfr.Add_byte(b);
|
||||
++pos;
|
||||
}
|
||||
return dirty ? bfr.To_bry_and_clear_and_rls() : src;
|
||||
}
|
||||
public static final Xop_amp_mgr Instance = new Xop_amp_mgr(); Xop_amp_mgr() {}
|
||||
// private Xop_tkn_itm Parse_as_tkn_old(Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, int amp_pos, int cur_pos) {
|
||||
// synchronized (thread_lock_1) {
|
||||
// rv_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
// Object o = amp_trie.Match_bgn(src, cur_pos, src_len);
|
||||
// cur_pos = amp_trie.Match_pos();
|
||||
// if (o == null) return null;
|
||||
// Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
|
||||
// switch (itm.Tid()) {
|
||||
// case Xop_amp_trie_itm.Tid_name_std:
|
||||
// case Xop_amp_trie_itm.Tid_name_xowa:
|
||||
// rv_pos = cur_pos;
|
||||
// return tkn_mkr.Amp_txt(amp_pos, cur_pos, itm);
|
||||
// case Xop_amp_trie_itm.Tid_num_hex:
|
||||
// case Xop_amp_trie_itm.Tid_num_dec:
|
||||
// boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
|
||||
// Xop_amp_mgr_rslt rv = Parse_as_int2(ncr_is_hex, src, src_len, amp_pos, cur_pos);
|
||||
// return rv.Pass() ? tkn_mkr.Amp_num(amp_pos, rv_pos, rslt_val) : null;
|
||||
// default: throw Err_.new_unhandled(itm.Tid());
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// private boolean Parse_as_int_old(boolean ncr_is_hex, byte[] src, int src_len, int amp_pos, int int_bgn) {
|
||||
// synchronized (thread_lock_2) {
|
||||
// rv_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
// rslt_val = -1; // clear any previous setting
|
||||
// int cur_pos = int_bgn, int_end = -1;
|
||||
// int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, cur_pos, src_len);
|
||||
// if (semic_pos == Bry_find_.Not_found) return false;
|
||||
// int_end = semic_pos - 1; // int_end = pos before semicolon
|
||||
// int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
|
||||
// for (int i = int_end; i >= int_bgn; i--) {
|
||||
// byte b = src[i];
|
||||
// if (ncr_is_hex) {
|
||||
// if (b >= 48 && b <= 57) cur = b - 48;
|
||||
// else if (b >= 65 && b <= 70) cur = b - 55;
|
||||
// else if (b >= 97 && b <= 102) cur = b - 87;
|
||||
// else if((b >= 71 && b <= 90)
|
||||
// || (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
|
||||
// else return false;
|
||||
// }
|
||||
// else {
|
||||
// cur = b - Byte_ascii.Num_0;
|
||||
// if (cur < 0 || cur > 10) return false;
|
||||
// }
|
||||
// val += cur * factor;
|
||||
// if (val > gplx.core.intls.Utf8_.Codepoint_max) return false; // fail if value > largest_unicode_codepoint
|
||||
// factor *= multiple;
|
||||
// }
|
||||
// rslt_val = val;
|
||||
// rv_pos = semic_pos + 1; // position after semic
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xop_amp_mgr__decode__tst {
|
||||
@Before public void init() {} private final Xop_amp_mgr_fxt fxt = new Xop_amp_mgr_fxt();
|
||||
@Test public void Text() {fxt.Test__decode_as_bry("a" , "a");}
|
||||
@Test public void Name() {fxt.Test__decode_as_bry("&" , "&");}
|
||||
@Test public void Name_w_text() {fxt.Test__decode_as_bry("a&b" , "a&b");}
|
||||
@Test public void Name_fail_semic_missing() {fxt.Test__decode_as_bry("a&b" , "a&b");}
|
||||
@Test public void Name_fail_amp_only() {fxt.Test__decode_as_bry("a&" , "a&");}
|
||||
@Test public void Num_fail() {fxt.Test__decode_as_bry("&#!;" , "&#!;");} // ! is not valid num
|
||||
@Test public void Hex_fail() {fxt.Test__decode_as_bry("&#x!;" , "&#x!;");} // ! is not valid hex
|
||||
@Test public void Num_basic() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Num_zero_padded() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_upper() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_lower() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_zero_padded() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_upper_x() {fxt.Test__decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Num_fail_large_codepoint() {fxt.Test__decode_as_bry("�" , "�");}
|
||||
@Test public void Num_ignore_extra_x() {fxt.Test__decode_as_bry("&#xx26D0;" , Char_.To_str(Char_.By_int(9936)));} // 2nd x is ignored
|
||||
}
|
||||
class Xop_amp_mgr_fxt {
|
||||
private final Xop_amp_mgr amp_mgr = Xop_amp_mgr.Instance;
|
||||
public void Test__decode_as_bry(String raw, String expd) {
|
||||
Gftest.Eq__str(expd, String_.new_u8(amp_mgr.Decode_as_bry(Bry_.new_u8(raw))));
|
||||
}
|
||||
public void Test__parse_tkn__ent(String raw, String expd) {
|
||||
Xop_amp_mgr_rslt rv = Exec__parse_tkn(raw);
|
||||
Xop_amp_tkn_ent tkn = (Xop_amp_tkn_ent)rv.Tkn();
|
||||
Gftest.Eq__byte(Xop_tkn_itm_.Tid_html_ref, tkn.Tkn_tid());
|
||||
Gftest.Eq__str(expd, tkn.Xml_name_bry());
|
||||
}
|
||||
public void Test__parse_tkn__ncr(String raw, int expd) {
|
||||
Xop_amp_mgr_rslt rv = Exec__parse_tkn(raw);
|
||||
Xop_amp_tkn_num tkn = (Xop_amp_tkn_num)rv.Tkn();
|
||||
Gftest.Eq__byte(Xop_tkn_itm_.Tid_html_ncr, tkn.Tkn_tid());
|
||||
Gftest.Eq__int(expd, tkn.Val());
|
||||
}
|
||||
public void Test__parse_tkn__txt(String raw, int expd) {
|
||||
Xop_amp_mgr_rslt rv = Exec__parse_tkn(raw);
|
||||
Gftest.Eq__null(Bool_.Y, rv.Tkn());
|
||||
Gftest.Eq__int(expd, rv.Pos());
|
||||
}
|
||||
private Xop_amp_mgr_rslt Exec__parse_tkn(String raw) {
|
||||
byte[] src = Bry_.new_u8(raw);
|
||||
return amp_mgr.Parse_tkn(new Xop_tkn_mkr(), src, src.length, 0, 1);
|
||||
}
|
||||
}
|
||||
@@ -16,14 +16,12 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.tests.*;
|
||||
public class Xop_html_num_tkn_chkr extends Xop_tkn_chkr_base {
|
||||
@Override public Class<?> TypeOf() {return Xop_amp_tkn_num.class;}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_html_ncr;}
|
||||
public int Html_ncr_val() {return html_ncr_val;} public Xop_html_num_tkn_chkr Html_ncr_val_(int v) {html_ncr_val = v; return this;} private int html_ncr_val = -1;
|
||||
@Override public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {
|
||||
Xop_amp_tkn_num actl = (Xop_amp_tkn_num)actl_obj;
|
||||
err += mgr.Tst_val(html_ncr_val == -1, path, "html_ncr_val", html_ncr_val, actl.Val());
|
||||
return err;
|
||||
}
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xop_amp_mgr__parse_tkn__tst {
|
||||
@Before public void init() {} private final Xop_amp_mgr_fxt fxt = new Xop_amp_mgr_fxt();
|
||||
@Test public void Ent() {fxt.Test__parse_tkn__ent("&" , "&");} // check for html_ref
|
||||
@Test public void Ent__fail() {fxt.Test__parse_tkn__txt("&nil;" , 1);}
|
||||
@Test public void Num__nex() {fxt.Test__parse_tkn__ncr("Σ" , 931);} // check for html_ncr; Σ: http://en.wikipedia.org/wiki/Numeric_character_reference
|
||||
@Test public void Num__dec() {fxt.Test__parse_tkn__ncr("Σ" , 931);}
|
||||
@Test public void Num__fail() {fxt.Test__parse_tkn__txt("&#" , 1);}
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_amp_mgr_decode_tst {
|
||||
@Before public void init() {fxt.Reset();} private Xop_amp_mgr_fxt fxt = new Xop_amp_mgr_fxt();
|
||||
@Test public void Text() {fxt.Test_decode_as_bry("a" , "a");}
|
||||
@Test public void Name() {fxt.Test_decode_as_bry("&" , "&");}
|
||||
@Test public void Name_w_text() {fxt.Test_decode_as_bry("a&b" , "a&b");}
|
||||
@Test public void Name_fail_semic_missing() {fxt.Test_decode_as_bry("a&b" , "a&b");}
|
||||
@Test public void Name_fail_amp_only() {fxt.Test_decode_as_bry("a&" , "a&");}
|
||||
@Test public void Num_fail() {fxt.Test_decode_as_bry("&#!;" , "&#!;");} // ! is not valid num
|
||||
@Test public void Hex_fail() {fxt.Test_decode_as_bry("&#x!;" , "&#x!;");} // ! is not valid hex
|
||||
@Test public void Num_basic() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Num_zero_padded() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_upper() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_lower() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_zero_padded() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_upper_x() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Num_fail_large_codepoint() {fxt.Test_decode_as_bry("�" , "�");}
|
||||
@Test public void Num_ignore_extra_x() {fxt.Test_decode_as_bry("&#xx26D0;" , Char_.To_str(Char_.By_int(9936)));} // 2nd x is ignored
|
||||
}
|
||||
class Xop_amp_mgr_fxt {
|
||||
private Xop_amp_mgr amp_mgr = Xop_amp_mgr.Instance;
|
||||
public void Reset() {}
|
||||
public void Test_decode_as_bry(String raw, String expd) {
|
||||
Tfds.Eq(expd, String_.new_u8(amp_mgr.Decode_as_bry(Bry_.new_u8(raw))));
|
||||
}
|
||||
}
|
||||
42
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_mgr_rslt.java
Normal file
42
400_xowa/src/gplx/xowa/parsers/amps/Xop_amp_mgr_rslt.java
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_amp_mgr_rslt {
|
||||
public Xop_amp_mgr_rslt(int pos, int val, Xop_tkn_itm tkn) {
|
||||
this.pos = pos;
|
||||
this.val = val;
|
||||
this.tkn = tkn;
|
||||
}
|
||||
public Xop_amp_mgr_rslt() {}
|
||||
public boolean Pass() {return pass;} private boolean pass; public void Valid_(boolean v) {this.pass = v;}
|
||||
public int Pos() {return pos;} private int pos; public void Pos_(int v) {this.pos = v;}
|
||||
public int Val() {return val;} private int val; public void Val_(int v) {this.val = v;}
|
||||
public Xop_tkn_itm Tkn() {return tkn;} private Xop_tkn_itm tkn; public void Tkn_(Xop_tkn_itm v) {this.tkn = v;}
|
||||
public boolean Pass_y_(int pos, int val) {
|
||||
this.pos = pos; this.val = val;
|
||||
this.pass = true;
|
||||
return true;
|
||||
}
|
||||
public boolean Pass_n_(int pos) {
|
||||
this.pass = false;
|
||||
this.pos = pos;
|
||||
this.val = -1;
|
||||
this.tkn = null;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -16,9 +16,9 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_amp_tkn_txt extends Xop_tkn_itm_base {
|
||||
public class Xop_amp_tkn_ent extends Xop_tkn_itm_base {
|
||||
private Xop_amp_trie_itm html_ref_itm;
|
||||
public Xop_amp_tkn_txt(int bgn, int end, Xop_amp_trie_itm html_ref_itm) {
|
||||
public Xop_amp_tkn_ent(int bgn, int end, Xop_amp_trie_itm html_ref_itm) {
|
||||
this.html_ref_itm = html_ref_itm;
|
||||
this.Tkn_ini_pos(false, bgn, end);
|
||||
}
|
||||
@@ -17,301 +17,300 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xop_amp_trie {
|
||||
public static final byte[] // NOTE: top_define
|
||||
Bry_xowa_lt = Bry_.new_a7("&xowa_lt;")
|
||||
, Bry_xowa_brack_bgn = Bry_.new_a7("&xowa_brack_bgn;")
|
||||
, Bry_xowa_brack_end = Bry_.new_a7("&xowa_brack_end;")
|
||||
, Bry_xowa_pipe = Bry_.new_a7("&xowa_pipe;")
|
||||
, Bry_xowa_apos = Bry_.new_a7("&xowa_apos;")
|
||||
, Bry_xowa_colon = Bry_.new_a7("&xowa_colon;")
|
||||
, Bry_xowa_underline = Bry_.new_a7("&xowa_underline;")
|
||||
, Bry_xowa_asterisk = Bry_.new_a7("&xowa_asterisk;")
|
||||
, Bry_xowa_space = Bry_.new_a7("&xowa_space;")
|
||||
, Bry_xowa_nl = Bry_.new_a7("&xowa_nl;")
|
||||
, Bry_xowa_dash = Bry_.new_a7("&xowa_dash;")
|
||||
public class Xop_amp_trie { // TS
|
||||
public static final String // NOTE: top_define; entities needed for <nowiki> escaping
|
||||
Str__xowa_lt = "&xowa_lt;"
|
||||
, Str__xowa_brack_bgn = "&xowa_brack_bgn;"
|
||||
, Str__xowa_brack_end = "&xowa_brack_end;"
|
||||
, Str__xowa_pipe = "&xowa_pipe;"
|
||||
, Str__xowa_apos = "&xowa_apos;"
|
||||
, Str__xowa_colon = "&xowa_colon;"
|
||||
, Str__xowa_underline = "&xowa_underline;"
|
||||
, Str__xowa_asterisk = "&xowa_asterisk;"
|
||||
, Str__xowa_space = "&xowa_space;"
|
||||
, Str__xowa_nl = "&xowa_nl;"
|
||||
, Str__xowa_dash = "&xowa_dash;"
|
||||
;
|
||||
public static final Btrie_slim_mgr Instance = new_(); Xop_amp_trie() {}
|
||||
private static Btrie_slim_mgr new_() {// REF.MW: Sanitizer|$wgHtmlEntities; NOTE:added apos
|
||||
public static final Btrie_slim_mgr Instance = New(); Xop_amp_trie() {}
|
||||
private static Btrie_slim_mgr New() {// REF.MW: Sanitizer|$wgHtmlEntities; NOTE:added apos
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
|
||||
Reg_name(rv, Bool_.Y, 60, Bry_xowa_lt);
|
||||
Reg_name(rv, Bool_.Y, 91, Bry_xowa_brack_bgn);
|
||||
Reg_name(rv, Bool_.Y, 93, Bry_xowa_brack_end);
|
||||
Reg_name(rv, Bool_.Y, 124, Bry_xowa_pipe);
|
||||
Reg_name(rv, Bool_.Y, 39, Bry_xowa_apos);
|
||||
Reg_name(rv, Bool_.Y, 58, Bry_xowa_colon);
|
||||
Reg_name(rv, Bool_.Y, 95, Bry_xowa_underline);
|
||||
Reg_name(rv, Bool_.Y, 42, Bry_xowa_asterisk);
|
||||
Reg_name(rv, Bool_.Y, 32, Bry_xowa_space);
|
||||
Reg_name(rv, Bool_.Y, 10, Bry_xowa_nl);
|
||||
Reg_name(rv, Bool_.Y, 45, Bry_xowa_dash);
|
||||
Reg_name(rv, Bool_.N, 39, "'");
|
||||
Reg_name(rv, Bool_.N, 193, "Á");
|
||||
Reg_name(rv, Bool_.N, 225, "á");
|
||||
Reg_name(rv, Bool_.N, 194, "Â");
|
||||
Reg_name(rv, Bool_.N, 226, "â");
|
||||
Reg_name(rv, Bool_.N, 180, "´");
|
||||
Reg_name(rv, Bool_.N, 198, "Æ");
|
||||
Reg_name(rv, Bool_.N, 230, "æ");
|
||||
Reg_name(rv, Bool_.N, 192, "À");
|
||||
Reg_name(rv, Bool_.N, 224, "à");
|
||||
Reg_name(rv, Bool_.N, 8501, "ℵ");
|
||||
Reg_name(rv, Bool_.N, 913, "Α");
|
||||
Reg_name(rv, Bool_.N, 945, "α");
|
||||
Reg_name(rv, Bool_.N, 38, "&");
|
||||
Reg_name(rv, Bool_.N, 8743, "∧");
|
||||
Reg_name(rv, Bool_.N, 8736, "∠");
|
||||
Reg_name(rv, Bool_.N, 197, "Å");
|
||||
Reg_name(rv, Bool_.N, 229, "å");
|
||||
Reg_name(rv, Bool_.N, 8776, "≈");
|
||||
Reg_name(rv, Bool_.N, 195, "Ã");
|
||||
Reg_name(rv, Bool_.N, 227, "ã");
|
||||
Reg_name(rv, Bool_.N, 196, "Ä");
|
||||
Reg_name(rv, Bool_.N, 228, "ä");
|
||||
Reg_name(rv, Bool_.N, 8222, "„");
|
||||
Reg_name(rv, Bool_.N, 914, "Β");
|
||||
Reg_name(rv, Bool_.N, 946, "β");
|
||||
Reg_name(rv, Bool_.N, 166, "¦");
|
||||
Reg_name(rv, Bool_.N, 8226, "•");
|
||||
Reg_name(rv, Bool_.N, 8745, "∩");
|
||||
Reg_name(rv, Bool_.N, 199, "Ç");
|
||||
Reg_name(rv, Bool_.N, 231, "ç");
|
||||
Reg_name(rv, Bool_.N, 184, "¸");
|
||||
Reg_name(rv, Bool_.N, 162, "¢");
|
||||
Reg_name(rv, Bool_.N, 935, "Χ");
|
||||
Reg_name(rv, Bool_.N, 967, "χ");
|
||||
Reg_name(rv, Bool_.N, 710, "ˆ");
|
||||
Reg_name(rv, Bool_.N, 9827, "♣");
|
||||
Reg_name(rv, Bool_.N, 8773, "≅");
|
||||
Reg_name(rv, Bool_.N, 169, "©");
|
||||
Reg_name(rv, Bool_.N, 8629, "↵");
|
||||
Reg_name(rv, Bool_.N, 8746, "∪");
|
||||
Reg_name(rv, Bool_.N, 164, "¤");
|
||||
Reg_name(rv, Bool_.N, 8224, "†");
|
||||
Reg_name(rv, Bool_.N, 8225, "‡");
|
||||
Reg_name(rv, Bool_.N, 8595, "↓");
|
||||
Reg_name(rv, Bool_.N, 8659, "⇓");
|
||||
Reg_name(rv, Bool_.N, 176, "°");
|
||||
Reg_name(rv, Bool_.N, 916, "Δ");
|
||||
Reg_name(rv, Bool_.N, 948, "δ");
|
||||
Reg_name(rv, Bool_.N, 9830, "♦");
|
||||
Reg_name(rv, Bool_.N, 247, "÷");
|
||||
Reg_name(rv, Bool_.N, 201, "É");
|
||||
Reg_name(rv, Bool_.N, 233, "é");
|
||||
Reg_name(rv, Bool_.N, 202, "Ê");
|
||||
Reg_name(rv, Bool_.N, 234, "ê");
|
||||
Reg_name(rv, Bool_.N, 200, "È");
|
||||
Reg_name(rv, Bool_.N, 232, "è");
|
||||
Reg_name(rv, Bool_.N, 8709, "∅");
|
||||
Reg_name(rv, Bool_.N, 8195, " ");
|
||||
Reg_name(rv, Bool_.N, 8194, " ");
|
||||
Reg_name(rv, Bool_.N, 917, "Ε");
|
||||
Reg_name(rv, Bool_.N, 949, "ε");
|
||||
Reg_name(rv, Bool_.N, 8801, "≡");
|
||||
Reg_name(rv, Bool_.N, 919, "Η");
|
||||
Reg_name(rv, Bool_.N, 951, "η");
|
||||
Reg_name(rv, Bool_.N, 208, "Ð");
|
||||
Reg_name(rv, Bool_.N, 240, "ð");
|
||||
Reg_name(rv, Bool_.N, 203, "Ë");
|
||||
Reg_name(rv, Bool_.N, 235, "ë");
|
||||
Reg_name(rv, Bool_.N, 8364, "€");
|
||||
Reg_name(rv, Bool_.N, 8707, "∃");
|
||||
Reg_name(rv, Bool_.N, 402, "ƒ");
|
||||
Reg_name(rv, Bool_.N, 8704, "∀");
|
||||
Reg_name(rv, Bool_.N, 189, "½");
|
||||
Reg_name(rv, Bool_.N, 188, "¼");
|
||||
Reg_name(rv, Bool_.N, 190, "¾");
|
||||
Reg_name(rv, Bool_.N, 8260, "⁄");
|
||||
Reg_name(rv, Bool_.N, 915, "Γ");
|
||||
Reg_name(rv, Bool_.N, 947, "γ");
|
||||
Reg_name(rv, Bool_.N, 8805, "≥");
|
||||
Reg_name(rv, Bool_.N, 62, ">");
|
||||
Reg_name(rv, Bool_.N, 8596, "↔");
|
||||
Reg_name(rv, Bool_.N, 8660, "⇔");
|
||||
Reg_name(rv, Bool_.N, 9829, "♥");
|
||||
Reg_name(rv, Bool_.N, 8230, "…");
|
||||
Reg_name(rv, Bool_.N, 205, "Í");
|
||||
Reg_name(rv, Bool_.N, 237, "í");
|
||||
Reg_name(rv, Bool_.N, 206, "Î");
|
||||
Reg_name(rv, Bool_.N, 238, "î");
|
||||
Reg_name(rv, Bool_.N, 161, "¡");
|
||||
Reg_name(rv, Bool_.N, 204, "Ì");
|
||||
Reg_name(rv, Bool_.N, 236, "ì");
|
||||
Reg_name(rv, Bool_.N, 8465, "ℑ");
|
||||
Reg_name(rv, Bool_.N, 8734, "∞");
|
||||
Reg_name(rv, Bool_.N, 8747, "∫");
|
||||
Reg_name(rv, Bool_.N, 921, "Ι");
|
||||
Reg_name(rv, Bool_.N, 953, "ι");
|
||||
Reg_name(rv, Bool_.N, 191, "¿");
|
||||
Reg_name(rv, Bool_.N, 8712, "∈");
|
||||
Reg_name(rv, Bool_.N, 207, "Ï");
|
||||
Reg_name(rv, Bool_.N, 239, "ï");
|
||||
Reg_name(rv, Bool_.N, 922, "Κ");
|
||||
Reg_name(rv, Bool_.N, 954, "κ");
|
||||
Reg_name(rv, Bool_.N, 923, "Λ");
|
||||
Reg_name(rv, Bool_.N, 955, "λ");
|
||||
Reg_name(rv, Bool_.N, 9001, "⟨");
|
||||
Reg_name(rv, Bool_.N, 171, "«");
|
||||
Reg_name(rv, Bool_.N, 8592, "←");
|
||||
Reg_name(rv, Bool_.N, 8656, "⇐");
|
||||
Reg_name(rv, Bool_.N, 8968, "⌈");
|
||||
Reg_name(rv, Bool_.N, 8220, "“");
|
||||
Reg_name(rv, Bool_.N, 8804, "≤");
|
||||
Reg_name(rv, Bool_.N, 8970, "⌊");
|
||||
Reg_name(rv, Bool_.N, 8727, "∗");
|
||||
Reg_name(rv, Bool_.N, 9674, "◊");
|
||||
Reg_name(rv, Bool_.N, 8206, "‎");
|
||||
Reg_name(rv, Bool_.N, 8249, "‹");
|
||||
Reg_name(rv, Bool_.N, 8216, "‘");
|
||||
Reg_name(rv, Bool_.N, 60, "<");
|
||||
Reg_name(rv, Bool_.N, 175, "¯");
|
||||
Reg_name(rv, Bool_.N, 8212, "—");
|
||||
Reg_name(rv, Bool_.N, 181, "µ");
|
||||
Reg_name(rv, Bool_.N, 183, "·");
|
||||
Reg_name(rv, Bool_.N, 8722, "−");
|
||||
Reg_name(rv, Bool_.N, 924, "Μ");
|
||||
Reg_name(rv, Bool_.N, 956, "μ");
|
||||
Reg_name(rv, Bool_.N, 8711, "∇");
|
||||
Reg_name(rv, Bool_.N, 160, " ");
|
||||
Reg_name(rv, Bool_.N, 8211, "–");
|
||||
Reg_name(rv, Bool_.N, 8800, "≠");
|
||||
Reg_name(rv, Bool_.N, 8715, "∋");
|
||||
Reg_name(rv, Bool_.N, 172, "¬");
|
||||
Reg_name(rv, Bool_.N, 8713, "∉");
|
||||
Reg_name(rv, Bool_.N, 8836, "⊄");
|
||||
Reg_name(rv, Bool_.N, 209, "Ñ");
|
||||
Reg_name(rv, Bool_.N, 241, "ñ");
|
||||
Reg_name(rv, Bool_.N, 925, "Ν");
|
||||
Reg_name(rv, Bool_.N, 957, "ν");
|
||||
Reg_name(rv, Bool_.N, 211, "Ó");
|
||||
Reg_name(rv, Bool_.N, 243, "ó");
|
||||
Reg_name(rv, Bool_.N, 212, "Ô");
|
||||
Reg_name(rv, Bool_.N, 244, "ô");
|
||||
Reg_name(rv, Bool_.N, 338, "Œ");
|
||||
Reg_name(rv, Bool_.N, 339, "œ");
|
||||
Reg_name(rv, Bool_.N, 210, "Ò");
|
||||
Reg_name(rv, Bool_.N, 242, "ò");
|
||||
Reg_name(rv, Bool_.N, 8254, "‾");
|
||||
Reg_name(rv, Bool_.N, 937, "Ω");
|
||||
Reg_name(rv, Bool_.N, 969, "ω");
|
||||
Reg_name(rv, Bool_.N, 927, "Ο");
|
||||
Reg_name(rv, Bool_.N, 959, "ο");
|
||||
Reg_name(rv, Bool_.N, 8853, "⊕");
|
||||
Reg_name(rv, Bool_.N, 8744, "∨");
|
||||
Reg_name(rv, Bool_.N, 170, "ª");
|
||||
Reg_name(rv, Bool_.N, 186, "º");
|
||||
Reg_name(rv, Bool_.N, 216, "Ø");
|
||||
Reg_name(rv, Bool_.N, 248, "ø");
|
||||
Reg_name(rv, Bool_.N, 213, "Õ");
|
||||
Reg_name(rv, Bool_.N, 245, "õ");
|
||||
Reg_name(rv, Bool_.N, 8855, "⊗");
|
||||
Reg_name(rv, Bool_.N, 214, "Ö");
|
||||
Reg_name(rv, Bool_.N, 246, "ö");
|
||||
Reg_name(rv, Bool_.N, 182, "¶");
|
||||
Reg_name(rv, Bool_.N, 8706, "∂");
|
||||
Reg_name(rv, Bool_.N, 8240, "‰");
|
||||
Reg_name(rv, Bool_.N, 8869, "⊥");
|
||||
Reg_name(rv, Bool_.N, 934, "Φ");
|
||||
Reg_name(rv, Bool_.N, 966, "φ");
|
||||
Reg_name(rv, Bool_.N, 928, "Π");
|
||||
Reg_name(rv, Bool_.N, 960, "π");
|
||||
Reg_name(rv, Bool_.N, 982, "ϖ");
|
||||
Reg_name(rv, Bool_.N, 177, "±");
|
||||
Reg_name(rv, Bool_.N, 163, "£");
|
||||
Reg_name(rv, Bool_.N, 8242, "′");
|
||||
Reg_name(rv, Bool_.N, 8243, "″");
|
||||
Reg_name(rv, Bool_.N, 8719, "∏");
|
||||
Reg_name(rv, Bool_.N, 8733, "∝");
|
||||
Reg_name(rv, Bool_.N, 936, "Ψ");
|
||||
Reg_name(rv, Bool_.N, 968, "ψ");
|
||||
Reg_name(rv, Bool_.N, 34, """);
|
||||
Reg_name(rv, Bool_.N, 8730, "√");
|
||||
Reg_name(rv, Bool_.N, 9002, "⟩");
|
||||
Reg_name(rv, Bool_.N, 187, "»");
|
||||
Reg_name(rv, Bool_.N, 8594, "→");
|
||||
Reg_name(rv, Bool_.N, 8658, "⇒");
|
||||
Reg_name(rv, Bool_.N, 8969, "⌉");
|
||||
Reg_name(rv, Bool_.N, 8221, "”");
|
||||
Reg_name(rv, Bool_.N, 8476, "ℜ");
|
||||
Reg_name(rv, Bool_.N, 174, "®");
|
||||
Reg_name(rv, Bool_.N, 8971, "⌋");
|
||||
Reg_name(rv, Bool_.N, 929, "Ρ");
|
||||
Reg_name(rv, Bool_.N, 961, "ρ");
|
||||
Reg_name(rv, Bool_.N, 8207, "‏");
|
||||
Reg_name(rv, Bool_.N, 8250, "›");
|
||||
Reg_name(rv, Bool_.N, 8217, "’");
|
||||
Reg_name(rv, Bool_.N, 8218, "‚");
|
||||
Reg_name(rv, Bool_.N, 352, "Š");
|
||||
Reg_name(rv, Bool_.N, 353, "š");
|
||||
Reg_name(rv, Bool_.N, 8901, "⋅");
|
||||
Reg_name(rv, Bool_.N, 167, "§");
|
||||
Reg_name(rv, Bool_.N, 173, "­");
|
||||
Reg_name(rv, Bool_.N, 931, "Σ");
|
||||
Reg_name(rv, Bool_.N, 963, "σ");
|
||||
Reg_name(rv, Bool_.N, 962, "ς");
|
||||
Reg_name(rv, Bool_.N, 8764, "∼");
|
||||
Reg_name(rv, Bool_.N, 9824, "♠");
|
||||
Reg_name(rv, Bool_.N, 8834, "⊂");
|
||||
Reg_name(rv, Bool_.N, 8838, "⊆");
|
||||
Reg_name(rv, Bool_.N, 8721, "∑");
|
||||
Reg_name(rv, Bool_.N, 8835, "⊃");
|
||||
Reg_name(rv, Bool_.N, 185, "¹");
|
||||
Reg_name(rv, Bool_.N, 178, "²");
|
||||
Reg_name(rv, Bool_.N, 179, "³");
|
||||
Reg_name(rv, Bool_.N, 8839, "⊇");
|
||||
Reg_name(rv, Bool_.N, 223, "ß");
|
||||
Reg_name(rv, Bool_.N, 932, "Τ");
|
||||
Reg_name(rv, Bool_.N, 964, "τ");
|
||||
Reg_name(rv, Bool_.N, 8756, "∴");
|
||||
Reg_name(rv, Bool_.N, 920, "Θ");
|
||||
Reg_name(rv, Bool_.N, 952, "θ");
|
||||
Reg_name(rv, Bool_.N, 977, "ϑ");
|
||||
Reg_name(rv, Bool_.N, 8201, " ");
|
||||
Reg_name(rv, Bool_.N, 222, "Þ");
|
||||
Reg_name(rv, Bool_.N, 254, "þ");
|
||||
Reg_name(rv, Bool_.N, 732, "˜");
|
||||
Reg_name(rv, Bool_.N, 215, "×");
|
||||
Reg_name(rv, Bool_.N, 8482, "™");
|
||||
Reg_name(rv, Bool_.N, 218, "Ú");
|
||||
Reg_name(rv, Bool_.N, 250, "ú");
|
||||
Reg_name(rv, Bool_.N, 8593, "↑");
|
||||
Reg_name(rv, Bool_.N, 8657, "⇑");
|
||||
Reg_name(rv, Bool_.N, 219, "Û");
|
||||
Reg_name(rv, Bool_.N, 251, "û");
|
||||
Reg_name(rv, Bool_.N, 217, "Ù");
|
||||
Reg_name(rv, Bool_.N, 249, "ù");
|
||||
Reg_name(rv, Bool_.N, 168, "¨");
|
||||
Reg_name(rv, Bool_.N, 978, "ϒ");
|
||||
Reg_name(rv, Bool_.N, 933, "Υ");
|
||||
Reg_name(rv, Bool_.N, 965, "υ");
|
||||
Reg_name(rv, Bool_.N, 220, "Ü");
|
||||
Reg_name(rv, Bool_.N, 252, "ü");
|
||||
Reg_name(rv, Bool_.N, 8472, "℘");
|
||||
Reg_name(rv, Bool_.N, 926, "Ξ");
|
||||
Reg_name(rv, Bool_.N, 958, "ξ");
|
||||
Reg_name(rv, Bool_.N, 221, "Ý");
|
||||
Reg_name(rv, Bool_.N, 253, "ý");
|
||||
Reg_name(rv, Bool_.N, 165, "¥");
|
||||
Reg_name(rv, Bool_.N, 376, "Ÿ");
|
||||
Reg_name(rv, Bool_.N, 255, "ÿ");
|
||||
Reg_name(rv, Bool_.N, 918, "Ζ");
|
||||
Reg_name(rv, Bool_.N, 950, "ζ");
|
||||
Reg_name(rv, Bool_.N, 8205, "‍");
|
||||
Reg_name(rv, Bool_.N, 8204, "‌");
|
||||
Reg_prefix(rv, Xop_amp_trie_itm.Tid_num_hex, "#x");
|
||||
Reg_prefix(rv, Xop_amp_trie_itm.Tid_num_hex, "#X");
|
||||
Reg_prefix(rv, Xop_amp_trie_itm.Tid_num_dec, "#");
|
||||
Add_name(rv, Bool_.Y, 60, Str__xowa_lt);
|
||||
Add_name(rv, Bool_.Y, 91, Str__xowa_brack_bgn);
|
||||
Add_name(rv, Bool_.Y, 93, Str__xowa_brack_end);
|
||||
Add_name(rv, Bool_.Y, 124, Str__xowa_pipe);
|
||||
Add_name(rv, Bool_.Y, 39, Str__xowa_apos);
|
||||
Add_name(rv, Bool_.Y, 58, Str__xowa_colon);
|
||||
Add_name(rv, Bool_.Y, 95, Str__xowa_underline);
|
||||
Add_name(rv, Bool_.Y, 42, Str__xowa_asterisk);
|
||||
Add_name(rv, Bool_.Y, 32, Str__xowa_space);
|
||||
Add_name(rv, Bool_.Y, 10, Str__xowa_nl);
|
||||
Add_name(rv, Bool_.Y, 45, Str__xowa_dash);
|
||||
Add_name(rv, Bool_.N, 39, "'");
|
||||
Add_name(rv, Bool_.N, 193, "Á");
|
||||
Add_name(rv, Bool_.N, 225, "á");
|
||||
Add_name(rv, Bool_.N, 194, "Â");
|
||||
Add_name(rv, Bool_.N, 226, "â");
|
||||
Add_name(rv, Bool_.N, 180, "´");
|
||||
Add_name(rv, Bool_.N, 198, "Æ");
|
||||
Add_name(rv, Bool_.N, 230, "æ");
|
||||
Add_name(rv, Bool_.N, 192, "À");
|
||||
Add_name(rv, Bool_.N, 224, "à");
|
||||
Add_name(rv, Bool_.N, 8501, "ℵ");
|
||||
Add_name(rv, Bool_.N, 913, "Α");
|
||||
Add_name(rv, Bool_.N, 945, "α");
|
||||
Add_name(rv, Bool_.N, 38, "&");
|
||||
Add_name(rv, Bool_.N, 8743, "∧");
|
||||
Add_name(rv, Bool_.N, 8736, "∠");
|
||||
Add_name(rv, Bool_.N, 197, "Å");
|
||||
Add_name(rv, Bool_.N, 229, "å");
|
||||
Add_name(rv, Bool_.N, 8776, "≈");
|
||||
Add_name(rv, Bool_.N, 195, "Ã");
|
||||
Add_name(rv, Bool_.N, 227, "ã");
|
||||
Add_name(rv, Bool_.N, 196, "Ä");
|
||||
Add_name(rv, Bool_.N, 228, "ä");
|
||||
Add_name(rv, Bool_.N, 8222, "„");
|
||||
Add_name(rv, Bool_.N, 914, "Β");
|
||||
Add_name(rv, Bool_.N, 946, "β");
|
||||
Add_name(rv, Bool_.N, 166, "¦");
|
||||
Add_name(rv, Bool_.N, 8226, "•");
|
||||
Add_name(rv, Bool_.N, 8745, "∩");
|
||||
Add_name(rv, Bool_.N, 199, "Ç");
|
||||
Add_name(rv, Bool_.N, 231, "ç");
|
||||
Add_name(rv, Bool_.N, 184, "¸");
|
||||
Add_name(rv, Bool_.N, 162, "¢");
|
||||
Add_name(rv, Bool_.N, 935, "Χ");
|
||||
Add_name(rv, Bool_.N, 967, "χ");
|
||||
Add_name(rv, Bool_.N, 710, "ˆ");
|
||||
Add_name(rv, Bool_.N, 9827, "♣");
|
||||
Add_name(rv, Bool_.N, 8773, "≅");
|
||||
Add_name(rv, Bool_.N, 169, "©");
|
||||
Add_name(rv, Bool_.N, 8629, "↵");
|
||||
Add_name(rv, Bool_.N, 8746, "∪");
|
||||
Add_name(rv, Bool_.N, 164, "¤");
|
||||
Add_name(rv, Bool_.N, 8224, "†");
|
||||
Add_name(rv, Bool_.N, 8225, "‡");
|
||||
Add_name(rv, Bool_.N, 8595, "↓");
|
||||
Add_name(rv, Bool_.N, 8659, "⇓");
|
||||
Add_name(rv, Bool_.N, 176, "°");
|
||||
Add_name(rv, Bool_.N, 916, "Δ");
|
||||
Add_name(rv, Bool_.N, 948, "δ");
|
||||
Add_name(rv, Bool_.N, 9830, "♦");
|
||||
Add_name(rv, Bool_.N, 247, "÷");
|
||||
Add_name(rv, Bool_.N, 201, "É");
|
||||
Add_name(rv, Bool_.N, 233, "é");
|
||||
Add_name(rv, Bool_.N, 202, "Ê");
|
||||
Add_name(rv, Bool_.N, 234, "ê");
|
||||
Add_name(rv, Bool_.N, 200, "È");
|
||||
Add_name(rv, Bool_.N, 232, "è");
|
||||
Add_name(rv, Bool_.N, 8709, "∅");
|
||||
Add_name(rv, Bool_.N, 8195, " ");
|
||||
Add_name(rv, Bool_.N, 8194, " ");
|
||||
Add_name(rv, Bool_.N, 917, "Ε");
|
||||
Add_name(rv, Bool_.N, 949, "ε");
|
||||
Add_name(rv, Bool_.N, 8801, "≡");
|
||||
Add_name(rv, Bool_.N, 919, "Η");
|
||||
Add_name(rv, Bool_.N, 951, "η");
|
||||
Add_name(rv, Bool_.N, 208, "Ð");
|
||||
Add_name(rv, Bool_.N, 240, "ð");
|
||||
Add_name(rv, Bool_.N, 203, "Ë");
|
||||
Add_name(rv, Bool_.N, 235, "ë");
|
||||
Add_name(rv, Bool_.N, 8364, "€");
|
||||
Add_name(rv, Bool_.N, 8707, "∃");
|
||||
Add_name(rv, Bool_.N, 402, "ƒ");
|
||||
Add_name(rv, Bool_.N, 8704, "∀");
|
||||
Add_name(rv, Bool_.N, 189, "½");
|
||||
Add_name(rv, Bool_.N, 188, "¼");
|
||||
Add_name(rv, Bool_.N, 190, "¾");
|
||||
Add_name(rv, Bool_.N, 8260, "⁄");
|
||||
Add_name(rv, Bool_.N, 915, "Γ");
|
||||
Add_name(rv, Bool_.N, 947, "γ");
|
||||
Add_name(rv, Bool_.N, 8805, "≥");
|
||||
Add_name(rv, Bool_.N, 62, ">");
|
||||
Add_name(rv, Bool_.N, 8596, "↔");
|
||||
Add_name(rv, Bool_.N, 8660, "⇔");
|
||||
Add_name(rv, Bool_.N, 9829, "♥");
|
||||
Add_name(rv, Bool_.N, 8230, "…");
|
||||
Add_name(rv, Bool_.N, 205, "Í");
|
||||
Add_name(rv, Bool_.N, 237, "í");
|
||||
Add_name(rv, Bool_.N, 206, "Î");
|
||||
Add_name(rv, Bool_.N, 238, "î");
|
||||
Add_name(rv, Bool_.N, 161, "¡");
|
||||
Add_name(rv, Bool_.N, 204, "Ì");
|
||||
Add_name(rv, Bool_.N, 236, "ì");
|
||||
Add_name(rv, Bool_.N, 8465, "ℑ");
|
||||
Add_name(rv, Bool_.N, 8734, "∞");
|
||||
Add_name(rv, Bool_.N, 8747, "∫");
|
||||
Add_name(rv, Bool_.N, 921, "Ι");
|
||||
Add_name(rv, Bool_.N, 953, "ι");
|
||||
Add_name(rv, Bool_.N, 191, "¿");
|
||||
Add_name(rv, Bool_.N, 8712, "∈");
|
||||
Add_name(rv, Bool_.N, 207, "Ï");
|
||||
Add_name(rv, Bool_.N, 239, "ï");
|
||||
Add_name(rv, Bool_.N, 922, "Κ");
|
||||
Add_name(rv, Bool_.N, 954, "κ");
|
||||
Add_name(rv, Bool_.N, 923, "Λ");
|
||||
Add_name(rv, Bool_.N, 955, "λ");
|
||||
Add_name(rv, Bool_.N, 9001, "⟨");
|
||||
Add_name(rv, Bool_.N, 171, "«");
|
||||
Add_name(rv, Bool_.N, 8592, "←");
|
||||
Add_name(rv, Bool_.N, 8656, "⇐");
|
||||
Add_name(rv, Bool_.N, 8968, "⌈");
|
||||
Add_name(rv, Bool_.N, 8220, "“");
|
||||
Add_name(rv, Bool_.N, 8804, "≤");
|
||||
Add_name(rv, Bool_.N, 8970, "⌊");
|
||||
Add_name(rv, Bool_.N, 8727, "∗");
|
||||
Add_name(rv, Bool_.N, 9674, "◊");
|
||||
Add_name(rv, Bool_.N, 8206, "‎");
|
||||
Add_name(rv, Bool_.N, 8249, "‹");
|
||||
Add_name(rv, Bool_.N, 8216, "‘");
|
||||
Add_name(rv, Bool_.N, 60, "<");
|
||||
Add_name(rv, Bool_.N, 175, "¯");
|
||||
Add_name(rv, Bool_.N, 8212, "—");
|
||||
Add_name(rv, Bool_.N, 181, "µ");
|
||||
Add_name(rv, Bool_.N, 183, "·");
|
||||
Add_name(rv, Bool_.N, 8722, "−");
|
||||
Add_name(rv, Bool_.N, 924, "Μ");
|
||||
Add_name(rv, Bool_.N, 956, "μ");
|
||||
Add_name(rv, Bool_.N, 8711, "∇");
|
||||
Add_name(rv, Bool_.N, 160, " ");
|
||||
Add_name(rv, Bool_.N, 8211, "–");
|
||||
Add_name(rv, Bool_.N, 8800, "≠");
|
||||
Add_name(rv, Bool_.N, 8715, "∋");
|
||||
Add_name(rv, Bool_.N, 172, "¬");
|
||||
Add_name(rv, Bool_.N, 8713, "∉");
|
||||
Add_name(rv, Bool_.N, 8836, "⊄");
|
||||
Add_name(rv, Bool_.N, 209, "Ñ");
|
||||
Add_name(rv, Bool_.N, 241, "ñ");
|
||||
Add_name(rv, Bool_.N, 925, "Ν");
|
||||
Add_name(rv, Bool_.N, 957, "ν");
|
||||
Add_name(rv, Bool_.N, 211, "Ó");
|
||||
Add_name(rv, Bool_.N, 243, "ó");
|
||||
Add_name(rv, Bool_.N, 212, "Ô");
|
||||
Add_name(rv, Bool_.N, 244, "ô");
|
||||
Add_name(rv, Bool_.N, 338, "Œ");
|
||||
Add_name(rv, Bool_.N, 339, "œ");
|
||||
Add_name(rv, Bool_.N, 210, "Ò");
|
||||
Add_name(rv, Bool_.N, 242, "ò");
|
||||
Add_name(rv, Bool_.N, 8254, "‾");
|
||||
Add_name(rv, Bool_.N, 937, "Ω");
|
||||
Add_name(rv, Bool_.N, 969, "ω");
|
||||
Add_name(rv, Bool_.N, 927, "Ο");
|
||||
Add_name(rv, Bool_.N, 959, "ο");
|
||||
Add_name(rv, Bool_.N, 8853, "⊕");
|
||||
Add_name(rv, Bool_.N, 8744, "∨");
|
||||
Add_name(rv, Bool_.N, 170, "ª");
|
||||
Add_name(rv, Bool_.N, 186, "º");
|
||||
Add_name(rv, Bool_.N, 216, "Ø");
|
||||
Add_name(rv, Bool_.N, 248, "ø");
|
||||
Add_name(rv, Bool_.N, 213, "Õ");
|
||||
Add_name(rv, Bool_.N, 245, "õ");
|
||||
Add_name(rv, Bool_.N, 8855, "⊗");
|
||||
Add_name(rv, Bool_.N, 214, "Ö");
|
||||
Add_name(rv, Bool_.N, 246, "ö");
|
||||
Add_name(rv, Bool_.N, 182, "¶");
|
||||
Add_name(rv, Bool_.N, 8706, "∂");
|
||||
Add_name(rv, Bool_.N, 8240, "‰");
|
||||
Add_name(rv, Bool_.N, 8869, "⊥");
|
||||
Add_name(rv, Bool_.N, 934, "Φ");
|
||||
Add_name(rv, Bool_.N, 966, "φ");
|
||||
Add_name(rv, Bool_.N, 928, "Π");
|
||||
Add_name(rv, Bool_.N, 960, "π");
|
||||
Add_name(rv, Bool_.N, 982, "ϖ");
|
||||
Add_name(rv, Bool_.N, 177, "±");
|
||||
Add_name(rv, Bool_.N, 163, "£");
|
||||
Add_name(rv, Bool_.N, 8242, "′");
|
||||
Add_name(rv, Bool_.N, 8243, "″");
|
||||
Add_name(rv, Bool_.N, 8719, "∏");
|
||||
Add_name(rv, Bool_.N, 8733, "∝");
|
||||
Add_name(rv, Bool_.N, 936, "Ψ");
|
||||
Add_name(rv, Bool_.N, 968, "ψ");
|
||||
Add_name(rv, Bool_.N, 34, """);
|
||||
Add_name(rv, Bool_.N, 8730, "√");
|
||||
Add_name(rv, Bool_.N, 9002, "⟩");
|
||||
Add_name(rv, Bool_.N, 187, "»");
|
||||
Add_name(rv, Bool_.N, 8594, "→");
|
||||
Add_name(rv, Bool_.N, 8658, "⇒");
|
||||
Add_name(rv, Bool_.N, 8969, "⌉");
|
||||
Add_name(rv, Bool_.N, 8221, "”");
|
||||
Add_name(rv, Bool_.N, 8476, "ℜ");
|
||||
Add_name(rv, Bool_.N, 174, "®");
|
||||
Add_name(rv, Bool_.N, 8971, "⌋");
|
||||
Add_name(rv, Bool_.N, 929, "Ρ");
|
||||
Add_name(rv, Bool_.N, 961, "ρ");
|
||||
Add_name(rv, Bool_.N, 8207, "‏");
|
||||
Add_name(rv, Bool_.N, 8250, "›");
|
||||
Add_name(rv, Bool_.N, 8217, "’");
|
||||
Add_name(rv, Bool_.N, 8218, "‚");
|
||||
Add_name(rv, Bool_.N, 352, "Š");
|
||||
Add_name(rv, Bool_.N, 353, "š");
|
||||
Add_name(rv, Bool_.N, 8901, "⋅");
|
||||
Add_name(rv, Bool_.N, 167, "§");
|
||||
Add_name(rv, Bool_.N, 173, "­");
|
||||
Add_name(rv, Bool_.N, 931, "Σ");
|
||||
Add_name(rv, Bool_.N, 963, "σ");
|
||||
Add_name(rv, Bool_.N, 962, "ς");
|
||||
Add_name(rv, Bool_.N, 8764, "∼");
|
||||
Add_name(rv, Bool_.N, 9824, "♠");
|
||||
Add_name(rv, Bool_.N, 8834, "⊂");
|
||||
Add_name(rv, Bool_.N, 8838, "⊆");
|
||||
Add_name(rv, Bool_.N, 8721, "∑");
|
||||
Add_name(rv, Bool_.N, 8835, "⊃");
|
||||
Add_name(rv, Bool_.N, 185, "¹");
|
||||
Add_name(rv, Bool_.N, 178, "²");
|
||||
Add_name(rv, Bool_.N, 179, "³");
|
||||
Add_name(rv, Bool_.N, 8839, "⊇");
|
||||
Add_name(rv, Bool_.N, 223, "ß");
|
||||
Add_name(rv, Bool_.N, 932, "Τ");
|
||||
Add_name(rv, Bool_.N, 964, "τ");
|
||||
Add_name(rv, Bool_.N, 8756, "∴");
|
||||
Add_name(rv, Bool_.N, 920, "Θ");
|
||||
Add_name(rv, Bool_.N, 952, "θ");
|
||||
Add_name(rv, Bool_.N, 977, "ϑ");
|
||||
Add_name(rv, Bool_.N, 8201, " ");
|
||||
Add_name(rv, Bool_.N, 222, "Þ");
|
||||
Add_name(rv, Bool_.N, 254, "þ");
|
||||
Add_name(rv, Bool_.N, 732, "˜");
|
||||
Add_name(rv, Bool_.N, 215, "×");
|
||||
Add_name(rv, Bool_.N, 8482, "™");
|
||||
Add_name(rv, Bool_.N, 218, "Ú");
|
||||
Add_name(rv, Bool_.N, 250, "ú");
|
||||
Add_name(rv, Bool_.N, 8593, "↑");
|
||||
Add_name(rv, Bool_.N, 8657, "⇑");
|
||||
Add_name(rv, Bool_.N, 219, "Û");
|
||||
Add_name(rv, Bool_.N, 251, "û");
|
||||
Add_name(rv, Bool_.N, 217, "Ù");
|
||||
Add_name(rv, Bool_.N, 249, "ù");
|
||||
Add_name(rv, Bool_.N, 168, "¨");
|
||||
Add_name(rv, Bool_.N, 978, "ϒ");
|
||||
Add_name(rv, Bool_.N, 933, "Υ");
|
||||
Add_name(rv, Bool_.N, 965, "υ");
|
||||
Add_name(rv, Bool_.N, 220, "Ü");
|
||||
Add_name(rv, Bool_.N, 252, "ü");
|
||||
Add_name(rv, Bool_.N, 8472, "℘");
|
||||
Add_name(rv, Bool_.N, 926, "Ξ");
|
||||
Add_name(rv, Bool_.N, 958, "ξ");
|
||||
Add_name(rv, Bool_.N, 221, "Ý");
|
||||
Add_name(rv, Bool_.N, 253, "ý");
|
||||
Add_name(rv, Bool_.N, 165, "¥");
|
||||
Add_name(rv, Bool_.N, 376, "Ÿ");
|
||||
Add_name(rv, Bool_.N, 255, "ÿ");
|
||||
Add_name(rv, Bool_.N, 918, "Ζ");
|
||||
Add_name(rv, Bool_.N, 950, "ζ");
|
||||
Add_name(rv, Bool_.N, 8205, "‍");
|
||||
Add_name(rv, Bool_.N, 8204, "‌");
|
||||
Add_prefix(rv, Xop_amp_trie_itm.Tid_num_hex, "#x");
|
||||
Add_prefix(rv, Xop_amp_trie_itm.Tid_num_hex, "#X");
|
||||
Add_prefix(rv, Xop_amp_trie_itm.Tid_num_dec, "#");
|
||||
return rv;
|
||||
}
|
||||
private static void Reg_name(Btrie_slim_mgr trie, boolean tid_is_xowa, int char_int, String xml_name_str) {Reg_name(trie, tid_is_xowa, char_int, Bry_.new_a7(xml_name_str));}
|
||||
private static void Reg_name(Btrie_slim_mgr trie, boolean tid_is_xowa, int char_int, byte[] xml_name_bry) {
|
||||
private static void Add_name(Btrie_slim_mgr trie, boolean tid_is_xowa, int char_int, String xml_name_str) {
|
||||
byte itm_tid = tid_is_xowa ? Xop_amp_trie_itm.Tid_name_xowa : Xop_amp_trie_itm.Tid_name_std;
|
||||
Xop_amp_trie_itm itm = new Xop_amp_trie_itm(itm_tid, char_int, xml_name_bry);
|
||||
byte[] xml_name_bry = Bry_.new_a7(xml_name_str);
|
||||
byte[] key = Bry_.Mid(xml_name_bry, 1, xml_name_bry.length); // ignore & for purpose of trie; EX: "amp;"; NOTE: must keep trailing ";" else "& " will be valid;
|
||||
trie.Add_obj(key, itm);
|
||||
trie.Add_obj(key, new Xop_amp_trie_itm(itm_tid, char_int, xml_name_bry));
|
||||
}
|
||||
private static void Reg_prefix(Btrie_slim_mgr trie, byte prefix_type, String prefix) {
|
||||
byte[] prefix_ary = Bry_.new_a7(prefix);
|
||||
private static void Add_prefix(Btrie_slim_mgr trie, byte prefix_type, String prefix) {
|
||||
byte[] prefix_ary = Bry_.new_u8(prefix);
|
||||
Xop_amp_trie_itm itm = new Xop_amp_trie_itm(prefix_type, Xop_amp_trie_itm.Char_int_null, prefix_ary);
|
||||
trie.Add_obj(prefix_ary, itm);
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.langs.htmls.*; import gplx.xowa.htmls.core.wkrs.lnkis.htmls.*;
|
||||
public class Xop_amp_trie_itm {
|
||||
public class Xop_amp_trie_itm { // TS
|
||||
public Xop_amp_trie_itm(byte tid, int char_int, byte[] xml_name_bry) {
|
||||
this.tid = tid;
|
||||
this.char_int = char_int;
|
||||
@@ -25,11 +25,12 @@ public class Xop_amp_trie_itm {
|
||||
this.xml_name_bry = xml_name_bry;
|
||||
this.key_name_len = xml_name_bry.length - 2; // 2 for & and ;
|
||||
}
|
||||
public byte Tid() {return tid;} private final byte tid;
|
||||
public int Char_int() {return char_int;} private final int char_int; // val; EX: 160
|
||||
public byte[] U8_bry() {return u8_bry;} private final byte[] u8_bry; // EX: new byte[] {192, 160}; (C2, A0)
|
||||
public byte[] Xml_name_bry() {return xml_name_bry;} private final byte[] xml_name_bry; // EX: " "
|
||||
public int Key_name_len() {return key_name_len;} private final int key_name_len; // EX: "nbsp".Len
|
||||
public byte Tid() {return tid;} private final byte tid;
|
||||
public int Char_int() {return char_int;} private final int char_int; // val; EX: 160
|
||||
public byte[] U8_bry() {return u8_bry;} private final byte[] u8_bry; // EX: new byte[] {192, 160}; (C2, A0)
|
||||
public byte[] Xml_name_bry() {return xml_name_bry;} private final byte[] xml_name_bry; // EX: " "
|
||||
public int Key_name_len() {return key_name_len;} private final int key_name_len; // EX: "nbsp".Len
|
||||
|
||||
public void Print_ncr(Bry_bfr bfr) {
|
||||
switch (char_int) {
|
||||
case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Quote: case Byte_ascii.Amp:
|
||||
@@ -48,9 +49,7 @@ public class Xop_amp_trie_itm {
|
||||
case Byte_ascii.Gt: bfr.Add(Gfh_entity_.Gt_bry); break;
|
||||
case Byte_ascii.Quote: bfr.Add(Gfh_entity_.Quote_bry); break;
|
||||
case Byte_ascii.Amp: bfr.Add(Gfh_entity_.Amp_bry); break;
|
||||
default:
|
||||
bfr.Add(u8_bry); // write literal; EX: "[" not "["
|
||||
break;
|
||||
default: bfr.Add(u8_bry); break; // write literal; EX: "[" not "["
|
||||
}
|
||||
}
|
||||
public static final byte Tid_name_std = 1, Tid_name_xowa = 2, Tid_num_hex = 3, Tid_num_dec = 4;
|
||||
|
||||
@@ -20,11 +20,13 @@ public class Xop_amp_wkr implements Xop_ctx_wkr {
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn, int cur_pos) {
|
||||
if (cur_pos == src_len) return ctx.Lxr_make_txt_(cur_pos); // NOTE: & is last char in page; strange and rare, but don't raise error
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn, int cur) {
|
||||
if (cur == src_len) return ctx.Lxr_make_txt_(cur); // NOTE: & is last char in page; strange and rare, but don't raise error
|
||||
|
||||
Xop_amp_mgr amp_mgr = ctx.App().Parser_amp_mgr();
|
||||
Xop_tkn_itm amp_tkn = amp_mgr.Parse_as_tkn(tkn_mkr, src, src_len, bgn, cur_pos);
|
||||
int rv_pos = amp_mgr.Rslt_pos();
|
||||
Xop_amp_mgr_rslt amp_rv = amp_mgr.Parse_tkn(tkn_mkr, src, src_len, bgn, cur);
|
||||
Xop_tkn_itm amp_tkn = amp_rv.Tkn();
|
||||
int rv_pos = amp_rv.Pos();
|
||||
if (amp_tkn == null) return ctx.Lxr_make_txt_(rv_pos);
|
||||
ctx.Subs_add(root, amp_tkn);
|
||||
return rv_pos;
|
||||
|
||||
@@ -18,11 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_amp_wkr_tst {
|
||||
private final Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Name() {fxt.Test_parse_page_wiki("&" , fxt.tkn_html_ref_("&"));} // check for html_ref
|
||||
@Test public void Name_fail() {fxt.Test_parse_page_wiki("&nil;" , fxt.tkn_txt_(0, 5));} // check for text
|
||||
@Test public void Hex() {fxt.Test_parse_page_wiki("Σ" , fxt.tkn_html_ncr_(931));} // check for html_ncr; Σ: http://en.wikipedia.org/wiki/Numeric_character_reference
|
||||
@Test public void Num_fail_incomplete() {fxt.Test_parse_page_wiki("&#" , fxt.tkn_txt_());}
|
||||
private final Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Convert_to_named() {fxt.Test_parse_page_wiki_str("&" , "&");} // note that & is printed, not &
|
||||
@Test public void Convert_to_named_amp() {fxt.Test_parse_page_wiki_str("&" , "&");} // PURPOSE: html_wtr was not handling & only
|
||||
@Test public void Convert_to_numeric() {fxt.Test_parse_page_wiki_str("á" , "á");} // testing that á is outputted, not á
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.tests.*;
|
||||
public class Xop_html_txt_tkn_chkr extends Xop_tkn_chkr_base {
|
||||
@Override public Class<?> TypeOf() {return Xop_amp_tkn_txt.class;}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_html_ref;}
|
||||
public String Html_ref_key() {return html_ref_key;} public Xop_html_txt_tkn_chkr Html_ref_key_(String v) {html_ref_key = v; return this;} private String html_ref_key;
|
||||
@Override public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {
|
||||
Xop_amp_tkn_txt actl = (Xop_amp_tkn_txt)actl_obj;
|
||||
err += mgr.Tst_val(html_ref_key == null, path, "html_ref_key", html_ref_key, String_.new_u8(actl.Xml_name_bry()));
|
||||
return err;
|
||||
}
|
||||
}
|
||||
@@ -33,8 +33,6 @@ public class Xop_apos_dat {
|
||||
default:
|
||||
lit_apos = apos_len - Xop_apos_tkn_.Len_dual;
|
||||
Ident_props(Xop_apos_tkn_.Len_dual);
|
||||
if (lit_apos > 1)
|
||||
ctx.Msg_log().Add_itm_none(Xop_apos_log.Multiple_apos, src, cur_pos - apos_len, cur_pos);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
84
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_itm.java
Normal file
84
400_xowa/src/gplx/xowa/parsers/apos/Xop_apos_itm.java
Normal file
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_apos_itm {
|
||||
public int State() {return state;} public void State_clear() {state = Xop_apos_tkn_.State_nil;} private int state = Xop_apos_tkn_.State_nil;
|
||||
public int Typ() {return typ;} private int typ;
|
||||
public int Cmd() {return cmd;} private int cmd;
|
||||
public int Lit_apos() {return lit_apos;} private int lit_apos;
|
||||
public int Dual_cmd() {return dual_cmd;} private int dual_cmd;
|
||||
public void Init(int state, int typ, int cmd, int lit_apos, int dual_cmd) {
|
||||
this.state = state;
|
||||
this.typ = typ; this.cmd = cmd; this.lit_apos = lit_apos; this.dual_cmd = dual_cmd;
|
||||
}
|
||||
public static void Ident(Xop_apos_itm rv, Xop_ctx ctx, byte[] src, int apos_len, int cur_pos, int state) {
|
||||
switch (apos_len) {
|
||||
case Xop_apos_tkn_.Len_ital: case Xop_apos_tkn_.Len_bold: case Xop_apos_tkn_.Len_dual:
|
||||
Ident_props(rv, state, apos_len, 0); break;
|
||||
case Xop_apos_tkn_.Len_apos_bold:
|
||||
Ident_props(rv, state, Xop_apos_tkn_.Len_bold, 1); break;
|
||||
default:
|
||||
Ident_props(rv, state, Xop_apos_tkn_.Len_dual, apos_len - Xop_apos_tkn_.Len_dual);
|
||||
break;
|
||||
}
|
||||
}
|
||||
private static void Ident_props(Xop_apos_itm rv, int state, int apos_len, int lit_apos) {
|
||||
int typ = apos_len;
|
||||
int cmd = 0, dual_cmd = 0;
|
||||
switch (apos_len) {
|
||||
case Xop_apos_tkn_.Len_ital: {
|
||||
switch (state) {
|
||||
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_b; break;
|
||||
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_bi_end__b_bgn; state = Xop_apos_tkn_.State_b; break;
|
||||
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_b; dual_cmd = Xop_apos_tkn_.Cmd_bi_bgn; break;
|
||||
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_i_bgn; state = Xop_apos_tkn_.State_bi; break;
|
||||
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_i_bgn; state = Xop_apos_tkn_.State_i; break;
|
||||
default: throw Err_.new_unhandled(state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Xop_apos_tkn_.Len_bold: {
|
||||
switch (state) {
|
||||
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_ib_end__i_bgn; state = Xop_apos_tkn_.State_i; break;
|
||||
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_i; break;
|
||||
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_i; break; // NOTE: dual_cmd = Cmd_ib_bgn is implied
|
||||
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_b_bgn; state = Xop_apos_tkn_.State_ib; break;
|
||||
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_b_bgn; state = Xop_apos_tkn_.State_b; break;
|
||||
default: throw Err_.new_unhandled(state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Xop_apos_tkn_.Len_dual: {
|
||||
switch (state) {
|
||||
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_b_end__i_bgn; state = Xop_apos_tkn_.State_i; break;
|
||||
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_i_end__b_bgn; state = Xop_apos_tkn_.State_b; break;
|
||||
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_ib_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_bi_end; state = Xop_apos_tkn_.State_nil; break;
|
||||
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_bi_end; state = Xop_apos_tkn_.State_nil; break; // NOTE: dual_cmd = Cmd_ib_bgn is implied
|
||||
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_ib_bgn; state = Xop_apos_tkn_.State_dual; break;
|
||||
default: throw Err_.new_unhandled(state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: throw Err_.new_unhandled_default(apos_len);
|
||||
}
|
||||
rv.Init(state, typ, cmd, lit_apos, dual_cmd);
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.log_msgs.*;
|
||||
public class Xop_apos_log {
|
||||
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "apos");
|
||||
public static final Gfo_msg_itm
|
||||
Bold_converted_to_ital = Gfo_msg_itm_.new_note_(owner, "Bold_converted_to_ital")
|
||||
, Dangling_apos = Gfo_msg_itm_.new_note_(owner, "Dangling_apos")
|
||||
, Multiple_apos = Gfo_msg_itm_.new_note_(owner, "Multiple_apos")
|
||||
;
|
||||
}
|
||||
@@ -17,27 +17,32 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_apos_wkr implements Xop_ctx_wkr {
|
||||
public Xop_apos_dat Dat() {return dat;} private Xop_apos_dat dat = new Xop_apos_dat();
|
||||
private List_adp stack = List_adp_.New(); private int bold_count, ital_count; private Xop_apos_tkn dual_tkn = null;
|
||||
private final List_adp stack = List_adp_.New();
|
||||
private int bold_count, ital_count; private Xop_apos_tkn dual_tkn = null;
|
||||
private Xop_apos_dat dat = new Xop_apos_dat();
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {
|
||||
Reset();
|
||||
}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {Clear();}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {
|
||||
this.EndFrame(ctx, root, src, src_len, false);
|
||||
this.End_frame(ctx, root, src, src_len, false);
|
||||
}
|
||||
public void AutoClose(Xop_ctx ctx, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {}
|
||||
public int Stack_len() {return stack.Count();}
|
||||
public int Stack_len() {return stack.Len();}
|
||||
private void Clear() {
|
||||
bold_count = ital_count = 0;
|
||||
dual_tkn = null;
|
||||
stack.Clear();
|
||||
dat.State_clear();
|
||||
}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
cur_pos = Bry_find_.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Apos);
|
||||
int apos_len = cur_pos - bgn_pos;
|
||||
dat.Ident(ctx, src, apos_len, cur_pos);
|
||||
Xop_apos_tkn apos_tkn = tkn_mkr.Apos(bgn_pos, cur_pos, apos_len, dat.Typ(), dat.Cmd(), dat.Lit_apos());
|
||||
ctx.Subs_add(root, apos_tkn);
|
||||
ctx.Apos().RegTkn(apos_tkn, cur_pos);
|
||||
ctx.Apos().Reg_tkn(apos_tkn, cur_pos); // NOTE: register in root ctx (main document)
|
||||
return cur_pos;
|
||||
}
|
||||
public void RegTkn(Xop_apos_tkn tkn, int cur_pos) { // REF.MW: Parser|doQuotes
|
||||
private void Reg_tkn(Xop_apos_tkn tkn, int cur_pos) { // REF.MW: Parser|doQuotes
|
||||
stack.Add(tkn);
|
||||
switch (tkn.Apos_tid()) {
|
||||
case Xop_apos_tkn_.Len_ital: ital_count++; break;
|
||||
@@ -52,19 +57,18 @@ public class Xop_apos_wkr implements Xop_ctx_wkr {
|
||||
dual_tkn = null;
|
||||
}
|
||||
}
|
||||
public void EndFrame(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int cur_pos, boolean skip_cancel_if_lnki_and_apos) {
|
||||
public void End_frame(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int cur_pos, boolean skip_cancel_if_lnki_and_apos) {
|
||||
int state = dat.State();
|
||||
if (state == 0) {Reset(); return;}
|
||||
if (bold_count % 2 == 1 && ital_count % 2 == 1) ConvertBoldToItal(ctx, src);
|
||||
if (state == 0) {Clear(); return;} // all apos close correctly; nothing dangling; return;
|
||||
|
||||
if (bold_count % 2 == 1 && ital_count % 2 == 1) Convert_bold_to_ital(ctx, src, stack, dat);
|
||||
state = dat.State();
|
||||
if (state == 0) {Clear(); return;} // all apos close correctly after converting bold to italic; return;
|
||||
|
||||
int closeCmd = 0, closeTyp = 0;
|
||||
if (state == 0) {Reset(); return;} // all closed: return
|
||||
byte cur_tkn_tid = ctx.Cur_tkn_tid();
|
||||
Xop_apos_tkn prv = Previous_bgn(stack, closeTyp);
|
||||
if ( skip_cancel_if_lnki_and_apos // NOTE: if \n or tblw
|
||||
&& cur_tkn_tid == Xop_tkn_itm_.Tid_lnki // and cur scope is lnki
|
||||
// && prv.Ctx_tkn_tid() != Xop_tkn_itm_.Tid_lnki // but apos_bgn is not lnki; NOTE: disabled on 2013-11-10
|
||||
)
|
||||
return; // don't end frame
|
||||
switch (state) {
|
||||
@@ -74,30 +78,29 @@ public class Xop_apos_wkr implements Xop_ctx_wkr {
|
||||
case Xop_apos_tkn_.State_ib: closeTyp = Xop_apos_tkn_.Typ_dual; closeCmd = Xop_apos_tkn_.Cmd_bi_end; break;
|
||||
case Xop_apos_tkn_.State_bi: closeTyp = Xop_apos_tkn_.Typ_dual; closeCmd = Xop_apos_tkn_.Cmd_ib_end; break;
|
||||
}
|
||||
ctx.Msg_log().Add_itm_none(Xop_apos_log.Dangling_apos, src, prv.Src_bgn(), cur_pos);
|
||||
ctx.Subs_add(root, ctx.Tkn_mkr().Apos(cur_pos, cur_pos, 0, closeTyp, closeCmd, 0));
|
||||
Reset();
|
||||
Clear();
|
||||
}
|
||||
private void ConvertBoldToItal(Xop_ctx ctx, byte[] src) {
|
||||
private static void Convert_bold_to_ital(Xop_ctx ctx, byte[] src, List_adp stack, Xop_apos_dat dat) {
|
||||
Xop_apos_tkn idxNeg1 = null, idxNeg2 = null, idxNone = null; // look at previous tkn for spaces; EX: "a '''" -> idxNeg1; " a'''" -> idxNeg2; "ab'''" -> idxNone
|
||||
int tknsLen = stack.Count();
|
||||
for (int i = 0; i < tknsLen; i++) {
|
||||
int len = stack.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xop_apos_tkn apos = (Xop_apos_tkn)stack.Get_at(i);
|
||||
if (apos.Apos_tid() != Xop_apos_tkn_.Typ_bold) continue; // only look for bold
|
||||
int tknBgn = apos.Src_bgn();
|
||||
boolean idxNeg1Space = tknBgn > 0 && src[tknBgn - 1] == Byte_ascii.Space;
|
||||
boolean idxNeg2Space = tknBgn > 1 && src[tknBgn - 2] == Byte_ascii.Space;
|
||||
int tkn_bgn = apos.Src_bgn();
|
||||
boolean idxNeg1Space = tkn_bgn > 0 && src[tkn_bgn - 1] == Byte_ascii.Space;
|
||||
boolean idxNeg2Space = tkn_bgn > 1 && src[tkn_bgn - 2] == Byte_ascii.Space;
|
||||
if (idxNeg1 == null && idxNeg1Space) {idxNeg1 = apos;}
|
||||
else if (idxNeg2 == null && idxNeg2Space) {idxNeg2 = apos;}
|
||||
else if (idxNone == null && !idxNeg1Space && !idxNeg2Space) {idxNone = apos;}
|
||||
}
|
||||
if (idxNeg2 != null) ConvertBoldToItal(ctx, src, idxNeg2); // 1st single letter word
|
||||
else if (idxNone != null) ConvertBoldToItal(ctx, src, idxNone); // 1st multi letter word
|
||||
else if (idxNeg1 != null) ConvertBoldToItal(ctx, src, idxNeg1); // everything else
|
||||
if (idxNeg2 != null) Convert_bold_to_ital(ctx, src, idxNeg2); // 1st single letter word
|
||||
else if (idxNone != null) Convert_bold_to_ital(ctx, src, idxNone); // 1st multi letter word
|
||||
else if (idxNeg1 != null) Convert_bold_to_ital(ctx, src, idxNeg1); // everything else
|
||||
|
||||
// now recalc all cmds for stack
|
||||
dat.State_clear();
|
||||
for (int i = 0; i < tknsLen; i++) {
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xop_apos_tkn apos = (Xop_apos_tkn)stack.Get_at(i);
|
||||
dat.Ident(ctx, src, apos.Apos_tid(), apos.Src_end()); // NOTE: apos.Typ() must map to apos_len
|
||||
int newCmd = dat.Cmd();
|
||||
@@ -105,57 +108,7 @@ public class Xop_apos_wkr implements Xop_ctx_wkr {
|
||||
apos.Apos_cmd_(newCmd);
|
||||
}
|
||||
}
|
||||
private void ConvertBoldToItal(Xop_ctx ctx, byte[] src, Xop_apos_tkn oldTkn) {
|
||||
ctx.Msg_log().Add_itm_none(Xop_apos_log.Bold_converted_to_ital, src, oldTkn.Src_bgn(), oldTkn.Src_end());
|
||||
private static void Convert_bold_to_ital(Xop_ctx ctx, byte[] src, Xop_apos_tkn oldTkn) {
|
||||
oldTkn.Apos_tid_(Xop_apos_tkn_.Typ_ital).Apos_cmd_(Xop_apos_tkn_.Cmd_i_bgn).Apos_lit_(oldTkn.Apos_lit() + 1);// NOTE: Cmd_i_bgn may be overridden later
|
||||
}
|
||||
private void Reset() {
|
||||
bold_count = ital_count = 0;
|
||||
dual_tkn = null;
|
||||
stack.Clear();
|
||||
dat.State_clear();
|
||||
}
|
||||
private static Xop_apos_tkn Previous_bgn(List_adp stack, int typ) {
|
||||
int stack_len = stack.Count();
|
||||
for (int i = stack_len - 1; i > -1; --i) {
|
||||
Xop_apos_tkn apos = (Xop_apos_tkn)stack.Get_at(i);
|
||||
int cmd = apos.Apos_cmd();
|
||||
switch (typ) {
|
||||
case Xop_apos_tkn_.Typ_ital:
|
||||
switch (cmd) {
|
||||
case Xop_apos_tkn_.Cmd_i_bgn:
|
||||
case Xop_apos_tkn_.Cmd_ib_bgn:
|
||||
case Xop_apos_tkn_.Cmd_bi_bgn:
|
||||
case Xop_apos_tkn_.Cmd_ib_end__i_bgn:
|
||||
case Xop_apos_tkn_.Cmd_b_end__i_bgn:
|
||||
return apos;
|
||||
}
|
||||
break;
|
||||
case Xop_apos_tkn_.Typ_bold:
|
||||
switch (cmd) {
|
||||
case Xop_apos_tkn_.Cmd_b_bgn:
|
||||
case Xop_apos_tkn_.Cmd_ib_bgn:
|
||||
case Xop_apos_tkn_.Cmd_bi_bgn:
|
||||
case Xop_apos_tkn_.Cmd_bi_end__b_bgn:
|
||||
case Xop_apos_tkn_.Cmd_i_end__b_bgn:
|
||||
return apos;
|
||||
}
|
||||
break;
|
||||
default: // NOTE: this is approximate; will not be exact in most dual situations; EX: <b>a<i>b will return <i>; should return <b> and <i>
|
||||
switch (cmd) {
|
||||
case Xop_apos_tkn_.Cmd_b_bgn:
|
||||
case Xop_apos_tkn_.Cmd_i_bgn:
|
||||
case Xop_apos_tkn_.Cmd_ib_bgn:
|
||||
case Xop_apos_tkn_.Cmd_bi_bgn:
|
||||
case Xop_apos_tkn_.Cmd_bi_end__b_bgn:
|
||||
case Xop_apos_tkn_.Cmd_i_end__b_bgn:
|
||||
case Xop_apos_tkn_.Cmd_ib_end__i_bgn:
|
||||
case Xop_apos_tkn_.Cmd_b_end__i_bgn:
|
||||
return apos;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ public class Xop_hdr_wkr implements Xop_ctx_wkr {
|
||||
}
|
||||
public int Make_tkn_bgn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_bos) bgn_pos = 0; // do not allow -1 pos
|
||||
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false);
|
||||
ctx.Apos().End_frame(ctx, root, src, bgn_pos, false);
|
||||
Close_open_itms(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);
|
||||
ctx.Para().Process_block__bgn__nl_w_symbol(ctx, root, src, bgn_pos, cur_pos, Xop_xnde_tag_.Tag__h2); // pass h2; should pass h# where # is correct #, but for purpose of Para_wkr, <h2> tag does not matter
|
||||
int new_pos = Bry_find_.Find_fwd_while(src, cur_pos, src_len, Xop_hdr_lxr.Hook); // count all =
|
||||
@@ -50,7 +50,7 @@ public class Xop_hdr_wkr implements Xop_ctx_wkr {
|
||||
public int Make_tkn_end(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int stackPos, int end_hdr_len) {// REF.MW: Parser|doHeadings
|
||||
if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_tmpl_curly_bgn) return ctx.Lxr_make_txt_(cur_pos);
|
||||
Xop_hdr_tkn hdr = (Xop_hdr_tkn)ctx.Stack_pop_til(root, src, stackPos, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_hdr);
|
||||
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false); // end any apos; EX: ==''a==
|
||||
ctx.Apos().End_frame(ctx, root, src, bgn_pos, false); // end any apos; EX: ==''a==
|
||||
int hdr_len = hdr.Hdr_level(), bgn_manual = 0, end_manual = 0;
|
||||
boolean dirty = false;
|
||||
if (end_hdr_len < hdr_len) { // mismatch: end has more; adjust hdr
|
||||
|
||||
@@ -43,13 +43,14 @@ public class Mwh_doc_parser {
|
||||
pos = Parse_nde(pos);
|
||||
break;
|
||||
case Byte_ascii.Amp: // "&": check for entity; EX: in sr-ec -> sr-el
|
||||
Xop_tkn_itm tkn = amp_mgr.Parse_as_tkn(tkn_mkr, src, src_end, pos, pos + 1);
|
||||
if (tkn == null)
|
||||
Xop_amp_mgr_rslt rv = amp_mgr.Parse_tkn(tkn_mkr, src, src_end, pos, pos + 1);
|
||||
Xop_tkn_itm rv_tkn = rv.Tkn();
|
||||
if (rv_tkn == null)
|
||||
++pos;
|
||||
else {
|
||||
wkr.On_txt_end(this, src, cur_nde_tid, txt_bgn, pos);
|
||||
wkr.On_entity_end(this, src, cur_nde_tid, tkn.Src_bgn(), tkn.Src_end());
|
||||
pos = tkn.Src_end();
|
||||
wkr.On_entity_end(this, src, cur_nde_tid, rv_tkn.Src_bgn(), rv_tkn.Src_end());
|
||||
pos = rv_tkn.Src_end();
|
||||
txt_bgn = pos;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -40,7 +40,7 @@ public class Xop_list_wkr implements Xop_ctx_wkr {
|
||||
if (acsPos != -1) ctx.Stack_pop_til(root, src, acsPos, true, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_list);
|
||||
|
||||
// close apos
|
||||
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false);
|
||||
ctx.Apos().End_frame(ctx, root, src, bgn_pos, false);
|
||||
byte symByt = src[cur_pos - 1]; // -1 b/c symByt is byte before curByt; EX: \n*a; cur_pos is at a; want to get *
|
||||
int prvSymLen = curSymLen;
|
||||
cur_pos = SymAry_fill(src, cur_pos, src_len, symByt);
|
||||
|
||||
@@ -21,12 +21,14 @@ import gplx.xowa.langs.*; import gplx.xowa.langs.cases.*; import gplx.xowa.langs
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.parsers.paras.*; import gplx.xowa.wikis.ttls.*;
|
||||
public class Xop_lnki_wkr__basic_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_n_();} private final Xop_fxt fxt = new Xop_fxt();
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_n_();} private final Xop_fxt fxt = new Xop_fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test_parse_page_wiki("[[a]]", fxt.tkn_lnki_().Trg_tkn_(fxt.tkn_arg_val_txt_(2, 3)));
|
||||
}
|
||||
@Test public void HtmlRef() {
|
||||
fxt.Test_parse_page_wiki("[[a&b]]", fxt.tkn_lnki_().Trg_tkn_(fxt.tkn_arg_nde_().Val_tkn_(fxt.tkn_arg_itm_(fxt.tkn_txt_(2, 3), fxt.tkn_html_ref_("&"), fxt.tkn_txt_(8, 9)))));
|
||||
fxt.Test_parse_page_wiki_str("[[a&b]]"
|
||||
, "<a href=\"/wiki/A%26b\">a&b</a>"
|
||||
);
|
||||
}
|
||||
@Test public void Url_encode() { // PURPOSE:title should automatically do url decoding; DATE:2013-08-26
|
||||
fxt.Test_parse_page_all_str("[[A%20b]]", "<a href=\"/wiki/A_b\">A b</a>");
|
||||
|
||||
@@ -30,7 +30,7 @@ public class Xop_hr_lxr implements Xop_lxr {
|
||||
bgn_pos = 0; // do not allow -1 pos
|
||||
nl_adj = 0; // no nl at bgn, so nl_adj = 0
|
||||
}
|
||||
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, false);
|
||||
ctx.Apos().End_frame(ctx, root, src, bgn_pos, false);
|
||||
ctx.CloseOpenItms(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos); // close open items
|
||||
cur_pos = Bry_find_.Find_fwd_while(src, cur_pos, src_len, Hook_byt); // gobble consecutive dashes
|
||||
if (!bos)
|
||||
|
||||
@@ -51,7 +51,7 @@ public class Xop_nl_lxr implements Xop_lxr {
|
||||
}
|
||||
}
|
||||
|
||||
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, true); // NOTE: frame should at end at bgn_pos (before \n) not after; else, will create tkn at (5,5), while tkn_mkr.Space creates one at (4,5); DATE:2013-10-31
|
||||
ctx.Apos().End_frame(ctx, root, src, bgn_pos, true); // NOTE: frame should at end at bgn_pos (before \n) not after; else, will create tkn at (5,5), while tkn_mkr.Space creates one at (4,5); DATE:2013-10-31
|
||||
ctx.Tblw().Cell_pipe_seen_(false); // flip off "|" in tblw seq; EX: "| a\n||" needs to flip off "|" else "||" will be seen as style dlm"; NOTE: not covered by test?
|
||||
|
||||
Xop_para_wkr para_wkr = ctx.Para();
|
||||
@@ -113,5 +113,5 @@ public class Xop_nl_lxr implements Xop_lxr {
|
||||
}
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
public static final Xop_nl_lxr Instance = new Xop_nl_lxr(); Xop_nl_lxr() {}
|
||||
public static final Xop_nl_lxr Instance = new Xop_nl_lxr(); Xop_nl_lxr() {}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.parsers.paras; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_para_wkr_pre_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private final Xop_fxt fxt = new Xop_fxt();
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private final Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void teardown() {fxt.Init_para_n_();}
|
||||
@Test public void Pre_ignore_bos() { // PURPOSE: ignore pre at bgn; DATE:2013-07-09
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
@@ -57,7 +57,7 @@ public class Xop_para_wkr_pre_tst {
|
||||
));
|
||||
}
|
||||
@Test public void Ignore_pre_in_gallery() {// PURPOSE: pre in gallery should be ignored; EX:uk.w:EP2; DATE:2014-03-11
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.Y_byte;
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_wtr.File_found_mode = Bool_.Y_byte;
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, ""
|
||||
@@ -82,10 +82,10 @@ public class Xop_para_wkr_pre_tst {
|
||||
, "</ul>"
|
||||
,""
|
||||
));
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.N_byte;
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_wtr.File_found_mode = Bool_.N_byte;
|
||||
}
|
||||
@Test public void Pre_xnde_gallery() { // PURPOSE: <gallery> should invalidate pre; EX: en.w:Mary, Queen of Scots
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.Y_byte;
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_wtr.File_found_mode = Bool_.Y_byte;
|
||||
fxt.Wiki().Xtn_mgr().Init_by_wiki(fxt.Wiki());
|
||||
String raw = String_.Concat_lines_nl_skip_last
|
||||
( " <gallery>"
|
||||
@@ -109,7 +109,7 @@ public class Xop_para_wkr_pre_tst {
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.N_byte;
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_wtr.File_found_mode = Bool_.N_byte;
|
||||
}
|
||||
@Test public void Ignore_pre_in_center() {// PURPOSE: pre in gallery should be ignored; EX:uk.w:EP2; DATE:2014-03-11
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
|
||||
@@ -47,7 +47,7 @@ public class Xop_tblw_wkr implements Xop_ctx_wkr {
|
||||
}
|
||||
}
|
||||
if (ctx.Apos().Stack_len() > 0) // open apos; note that apos keeps its own stack, as they are not "structural" (not sure about this)
|
||||
ctx.Apos().EndFrame(ctx, root, src, cur_pos, true); // close it
|
||||
ctx.Apos().End_frame(ctx, root, src, cur_pos, true);// close it
|
||||
Xop_tblw_tkn prv_tkn = ctx.Stack_get_tbl();
|
||||
if ( prv_tkn == null // prv_tkn not found; i.e.: no earlier "{|" or "<table>"
|
||||
|| ( ctx.Stack_get_tblw_tb() == null // no {| on stack; DATE:2014-05-05
|
||||
|
||||
@@ -43,25 +43,27 @@ public class Nowiki_escape_itm {
|
||||
}
|
||||
return dirty;
|
||||
}
|
||||
private static final byte[] Pre_bry = new byte[] {Byte_ascii.Nl, Byte_ascii.Space}; // NOTE: must go before trie_new
|
||||
private static final Btrie_slim_mgr trie = trie_new();
|
||||
private static Btrie_slim_mgr trie_new() {
|
||||
|
||||
private static final Btrie_slim_mgr trie = New_trie();
|
||||
private static Btrie_slim_mgr New_trie() {
|
||||
byte[] pre_bry = new byte[] {Byte_ascii.Nl, Byte_ascii.Space}; // NOTE: must go before New_trie
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
|
||||
trie_new_itm(rv, Byte_ascii.Lt_bry , Xop_amp_trie.Bry_xowa_lt);
|
||||
trie_new_itm(rv, Byte_ascii.Brack_bgn_bry , Xop_amp_trie.Bry_xowa_brack_bgn);
|
||||
trie_new_itm(rv, Byte_ascii.Brack_end_bry , Xop_amp_trie.Bry_xowa_brack_end); // PAGE:en.w: Tall_poppy_syndrome DATE:2014-07-23
|
||||
trie_new_itm(rv, Byte_ascii.Pipe_bry , Xop_amp_trie.Bry_xowa_pipe);
|
||||
trie_new_itm(rv, Byte_ascii.Apos_bry , Xop_amp_trie.Bry_xowa_apos); // NOTE: for backward compatibility, use ' note that amp_wkr will turn ' -> ' but ' -> '; DATE:2014-07-03
|
||||
trie_new_itm(rv, Byte_ascii.Colon_bry , Xop_amp_trie.Bry_xowa_colon);
|
||||
trie_new_itm(rv, Byte_ascii.Underline_bry , Xop_amp_trie.Bry_xowa_underline);
|
||||
trie_new_itm(rv, Byte_ascii.Star_bry , Xop_amp_trie.Bry_xowa_asterisk);
|
||||
trie_new_itm(rv, Byte_ascii.Dash_bry , Xop_amp_trie.Bry_xowa_dash); // needed to handle "|<nowiki>-</nowiki>"; PAGE:de.w:Liste_von_Vereinen_und_Vereinigungen_von_Gl<47>ubigen_(r<>misch-katholische_Kirche) DATE:2015-01-08
|
||||
trie_new_itm(rv, Byte_ascii.Space_bry , Xop_amp_trie.Bry_xowa_space);
|
||||
trie_new_itm(rv, Byte_ascii.Nl_bry , Xop_amp_trie.Bry_xowa_nl);
|
||||
trie_new_itm(rv, Pre_bry , Pre_bry);
|
||||
New_trie_itm(rv, Byte_ascii.Lt_bry , Xop_amp_trie.Str__xowa_lt);
|
||||
New_trie_itm(rv, Byte_ascii.Brack_bgn_bry , Xop_amp_trie.Str__xowa_brack_bgn);
|
||||
New_trie_itm(rv, Byte_ascii.Brack_end_bry , Xop_amp_trie.Str__xowa_brack_end);// PAGE:en.w: Tall_poppy_syndrome DATE:2014-07-23
|
||||
New_trie_itm(rv, Byte_ascii.Pipe_bry , Xop_amp_trie.Str__xowa_pipe);
|
||||
New_trie_itm(rv, Byte_ascii.Apos_bry , Xop_amp_trie.Str__xowa_apos); // NOTE: for backward compatibility, use ' note that amp_wkr will turn ' -> ' but ' -> '; DATE:2014-07-03
|
||||
New_trie_itm(rv, Byte_ascii.Colon_bry , Xop_amp_trie.Str__xowa_colon);
|
||||
New_trie_itm(rv, Byte_ascii.Underline_bry , Xop_amp_trie.Str__xowa_underline);
|
||||
New_trie_itm(rv, Byte_ascii.Star_bry , Xop_amp_trie.Str__xowa_asterisk);
|
||||
New_trie_itm(rv, Byte_ascii.Dash_bry , Xop_amp_trie.Str__xowa_dash); // needed to handle "|<nowiki>-</nowiki>"; PAGE:de.w:Liste_von_Vereinen_und_Vereinigungen_von_Gl<47>ubigen_(r<>misch-katholische_Kirche) DATE:2015-01-08
|
||||
New_trie_itm(rv, Byte_ascii.Space_bry , Xop_amp_trie.Str__xowa_space);
|
||||
New_trie_itm(rv, Byte_ascii.Nl_bry , Xop_amp_trie.Str__xowa_nl);
|
||||
New_trie_itm(rv, pre_bry , pre_bry);
|
||||
return rv;
|
||||
}
|
||||
private static void trie_new_itm(Btrie_slim_mgr rv, byte[] src, byte[] trg) {
|
||||
private static void New_trie_itm(Btrie_slim_mgr rv, byte[] src, String trg) {New_trie_itm(rv, src, Bry_.new_u8(trg));}
|
||||
private static void New_trie_itm(Btrie_slim_mgr rv, byte[] src, byte[] trg) {
|
||||
Nowiki_escape_itm itm = new Nowiki_escape_itm(src, trg);
|
||||
rv.Add_obj(src, itm);
|
||||
}
|
||||
|
||||
@@ -81,12 +81,13 @@ public class Xop_sanitizer {
|
||||
break;
|
||||
case Xop_amp_trie_itm.Tid_num_dec:
|
||||
case Xop_amp_trie_itm.Tid_num_hex:
|
||||
boolean pass = amp_mgr.Parse_as_int(itm_tid == Xop_amp_trie_itm.Tid_num_hex, src, end, pos - 1, pos + itm.Xml_name_bry().length);
|
||||
if (pass)
|
||||
bfr.Add_u8_int(amp_mgr.Rslt_val());
|
||||
Xop_amp_mgr_rslt rv = new Xop_amp_mgr_rslt();
|
||||
amp_mgr.Parse_ncr(rv, itm_tid == Xop_amp_trie_itm.Tid_num_hex, src, end, pos - 1, pos + itm.Xml_name_bry().length);
|
||||
if (rv.Pass())
|
||||
bfr.Add_u8_int(rv.Val());
|
||||
else
|
||||
bfr.Add_byte(Byte_ascii.Amp);
|
||||
pos = amp_mgr.Rslt_pos();
|
||||
pos = rv.Pos();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_xnde_wkr__tidy_tst {
|
||||
private final Xop_fxt fxt = new Xop_fxt();
|
||||
private final Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Sub_sup_autocorrect() {
|
||||
fxt.Test_parse_page_wiki_str("<sub>a</sup>b", "<sub>a</sub>b");
|
||||
|
||||
Reference in New Issue
Block a user