mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.10.3.1
This commit is contained in:
@@ -20,10 +20,10 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xop_amp_lxr implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_amp;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Amp, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
return ctx.Amp().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);
|
||||
}
|
||||
public static final Xop_amp_lxr _ = new Xop_amp_lxr();
|
||||
public static final Xop_amp_lxr Instance = new Xop_amp_lxr();
|
||||
}
|
||||
|
||||
@@ -18,58 +18,63 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xop_amp_mgr {
|
||||
private final Object thread_lock_1 = new Object(), thread_lock_2 = new Object();
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr.reset_(32);
|
||||
public Btrie_slim_mgr Amp_trie() {return amp_trie;} private final Btrie_slim_mgr amp_trie = Xop_amp_trie._;
|
||||
public Btrie_slim_mgr Amp_trie() {return amp_trie;} private final Btrie_slim_mgr amp_trie = Xop_amp_trie.Instance;
|
||||
public int Rslt_pos() {return rslt_pos;} private int rslt_pos;
|
||||
public int Rslt_val() {return rslt_val;} private int rslt_val;
|
||||
public Xop_tkn_itm Parse_as_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, int amp_pos, int cur_pos) {
|
||||
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
Object o = amp_trie.Match_bgn(src, cur_pos, src_len);
|
||||
cur_pos = amp_trie.Match_pos();
|
||||
if (o == null) return null;
|
||||
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
|
||||
switch (itm.Tid()) {
|
||||
case Xop_amp_trie_itm.Tid_name_std:
|
||||
case Xop_amp_trie_itm.Tid_name_xowa:
|
||||
rslt_pos = cur_pos;
|
||||
return tkn_mkr.Amp_txt(amp_pos, cur_pos, itm);
|
||||
case Xop_amp_trie_itm.Tid_num_hex:
|
||||
case Xop_amp_trie_itm.Tid_num_dec:
|
||||
boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
|
||||
boolean pass = Parse_as_int(ncr_is_hex, src, src_len, amp_pos, cur_pos);
|
||||
return pass ? tkn_mkr.Amp_num(amp_pos, rslt_pos, rslt_val) : null;
|
||||
default: throw Err_.new_unhandled(itm.Tid());
|
||||
synchronized (thread_lock_1) {
|
||||
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
Object o = amp_trie.Match_bgn(src, cur_pos, src_len);
|
||||
cur_pos = amp_trie.Match_pos();
|
||||
if (o == null) return null;
|
||||
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
|
||||
switch (itm.Tid()) {
|
||||
case Xop_amp_trie_itm.Tid_name_std:
|
||||
case Xop_amp_trie_itm.Tid_name_xowa:
|
||||
rslt_pos = cur_pos;
|
||||
return tkn_mkr.Amp_txt(amp_pos, cur_pos, itm);
|
||||
case Xop_amp_trie_itm.Tid_num_hex:
|
||||
case Xop_amp_trie_itm.Tid_num_dec:
|
||||
boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
|
||||
boolean pass = Parse_as_int(ncr_is_hex, src, src_len, amp_pos, cur_pos);
|
||||
return pass ? tkn_mkr.Amp_num(amp_pos, rslt_pos, rslt_val) : null;
|
||||
default: throw Err_.new_unhandled(itm.Tid());
|
||||
}
|
||||
}
|
||||
}
|
||||
public boolean Parse_as_int(boolean ncr_is_hex, byte[] src, int src_len, int amp_pos, int int_bgn) {
|
||||
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
rslt_val = -1; // clear any previous setting
|
||||
int cur_pos = int_bgn, int_end = -1;
|
||||
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, cur_pos, src_len);
|
||||
if (semic_pos == Bry_find_.Not_found) return false;
|
||||
int_end = semic_pos - 1; // int_end = pos before semicolon
|
||||
int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
|
||||
for (int i = int_end; i >= int_bgn; i--) {
|
||||
byte b = src[i];
|
||||
if (ncr_is_hex) {
|
||||
if (b >= 48 && b <= 57) cur = b - 48;
|
||||
else if (b >= 65 && b <= 70) cur = b - 55;
|
||||
else if (b >= 97 && b <= 102) cur = b - 87;
|
||||
else if((b >= 71 && b <= 90)
|
||||
|| (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
|
||||
else return false;
|
||||
synchronized (thread_lock_2) {
|
||||
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
|
||||
rslt_val = -1; // clear any previous setting
|
||||
int cur_pos = int_bgn, int_end = -1;
|
||||
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, cur_pos, src_len);
|
||||
if (semic_pos == Bry_find_.Not_found) return false;
|
||||
int_end = semic_pos - 1; // int_end = pos before semicolon
|
||||
int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
|
||||
for (int i = int_end; i >= int_bgn; i--) {
|
||||
byte b = src[i];
|
||||
if (ncr_is_hex) {
|
||||
if (b >= 48 && b <= 57) cur = b - 48;
|
||||
else if (b >= 65 && b <= 70) cur = b - 55;
|
||||
else if (b >= 97 && b <= 102) cur = b - 87;
|
||||
else if((b >= 71 && b <= 90)
|
||||
|| (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
|
||||
else return false;
|
||||
}
|
||||
else {
|
||||
cur = b - Byte_ascii.Num_0;
|
||||
if (cur < 0 || cur > 10) return false;
|
||||
}
|
||||
val += cur * factor;
|
||||
if (val > gplx.core.intls.Utf8_.Codepoint_max) return false; // fail if value > largest_unicode_codepoint
|
||||
factor *= multiple;
|
||||
}
|
||||
else {
|
||||
cur = b - Byte_ascii.Num_0;
|
||||
if (cur < 0 || cur > 10) return false;
|
||||
}
|
||||
val += cur * factor;
|
||||
if (val > gplx.core.intls.Utf8_.Codepoint_max) return false; // fail if value > largest_unicode_codepoint
|
||||
factor *= multiple;
|
||||
rslt_val = val;
|
||||
rslt_pos = semic_pos + 1; // position after semic
|
||||
return true;
|
||||
}
|
||||
rslt_val = val;
|
||||
rslt_pos = semic_pos + 1; // position after semic
|
||||
return true;
|
||||
}
|
||||
public byte[] Decode_as_bry(byte[] src) {
|
||||
if (src == null) return src;
|
||||
@@ -117,8 +122,8 @@ public class Xop_amp_mgr {
|
||||
tmp_bfr.Add_byte(b);
|
||||
++pos;
|
||||
}
|
||||
return dirty ? tmp_bfr.Xto_bry_and_clear() : src;
|
||||
return dirty ? tmp_bfr.To_bry_and_clear() : src;
|
||||
}
|
||||
}
|
||||
public static final Xop_amp_mgr I = new Xop_amp_mgr(); Xop_amp_mgr() {}
|
||||
public static final Xop_amp_mgr Instance = new Xop_amp_mgr(); Xop_amp_mgr() {}
|
||||
}
|
||||
|
||||
@@ -33,10 +33,10 @@ public class Xop_amp_mgr_decode_tst {
|
||||
@Test public void Hex_zero_padded() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Hex_upper_x() {fxt.Test_decode_as_bry("Σ" , "Σ");}
|
||||
@Test public void Num_fail_large_codepoint() {fxt.Test_decode_as_bry("�" , "�");}
|
||||
@Test public void Num_ignore_extra_x() {fxt.Test_decode_as_bry("&#xx26D0;" , Char_.To_str(Char_.XbyInt(9936)));} // 2nd x is ignored
|
||||
@Test public void Num_ignore_extra_x() {fxt.Test_decode_as_bry("&#xx26D0;" , Char_.To_str(Char_.By_int(9936)));} // 2nd x is ignored
|
||||
}
|
||||
class Xop_amp_mgr_fxt {
|
||||
private Xop_amp_mgr amp_mgr = Xop_amp_mgr.I;
|
||||
private Xop_amp_mgr amp_mgr = Xop_amp_mgr.Instance;
|
||||
public void Reset() {}
|
||||
public void Test_decode_as_bry(String raw, String expd) {
|
||||
Tfds.Eq(expd, String_.new_u8(amp_mgr.Decode_as_bry(Bry_.new_u8(raw))));
|
||||
|
||||
@@ -31,7 +31,7 @@ public class Xop_amp_trie {
|
||||
, Bry_xowa_nl = Bry_.new_a7("&xowa_nl;")
|
||||
, Bry_xowa_dash = Bry_.new_a7("&xowa_dash;")
|
||||
;
|
||||
public static final Btrie_slim_mgr _ = new_(); Xop_amp_trie() {}
|
||||
public static final Btrie_slim_mgr Instance = new_(); Xop_amp_trie() {}
|
||||
private static Btrie_slim_mgr new_() {// REF.MW: Sanitizer|$wgHtmlEntities; NOTE:added apos
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
|
||||
Reg_name(rv, Bool_.Y, 60, Bry_xowa_lt);
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.langs.htmls.*; import gplx.xowa.html.lnkis.*;
|
||||
import gplx.langs.htmls.*; import gplx.xowa.htmls.lnkis.*;
|
||||
public class Xop_amp_trie_itm {
|
||||
public Xop_amp_trie_itm(byte tid, int char_int, byte[] xml_name_bry) {
|
||||
this.tid = tid;
|
||||
|
||||
Reference in New Issue
Block a user