1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

v2.10.3.1

This commit is contained in:
gnosygnu
2015-10-18 22:17:57 -04:00
parent 8e18af05b6
commit 4f43f51b18
1935 changed files with 12500 additions and 12889 deletions

View File

@@ -20,10 +20,10 @@ import gplx.core.btries.*; import gplx.xowa.langs.*;
public class Xop_amp_lxr implements Xop_lxr {
public int Lxr_tid() {return Xop_lxr_.Tid_amp;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Byte_ascii.Amp, this);}
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {}
public void Term(Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
return ctx.Amp().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);
}
public static final Xop_amp_lxr _ = new Xop_amp_lxr();
public static final Xop_amp_lxr Instance = new Xop_amp_lxr();
}

View File

@@ -18,58 +18,63 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
public class Xop_amp_mgr {
private final Object thread_lock_1 = new Object(), thread_lock_2 = new Object();
private final Bry_bfr tmp_bfr = Bry_bfr.reset_(32);
public Btrie_slim_mgr Amp_trie() {return amp_trie;} private final Btrie_slim_mgr amp_trie = Xop_amp_trie._;
public Btrie_slim_mgr Amp_trie() {return amp_trie;} private final Btrie_slim_mgr amp_trie = Xop_amp_trie.Instance;
public int Rslt_pos() {return rslt_pos;} private int rslt_pos;
public int Rslt_val() {return rslt_val;} private int rslt_val;
public Xop_tkn_itm Parse_as_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, int amp_pos, int cur_pos) {
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
Object o = amp_trie.Match_bgn(src, cur_pos, src_len);
cur_pos = amp_trie.Match_pos();
if (o == null) return null;
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
switch (itm.Tid()) {
case Xop_amp_trie_itm.Tid_name_std:
case Xop_amp_trie_itm.Tid_name_xowa:
rslt_pos = cur_pos;
return tkn_mkr.Amp_txt(amp_pos, cur_pos, itm);
case Xop_amp_trie_itm.Tid_num_hex:
case Xop_amp_trie_itm.Tid_num_dec:
boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
boolean pass = Parse_as_int(ncr_is_hex, src, src_len, amp_pos, cur_pos);
return pass ? tkn_mkr.Amp_num(amp_pos, rslt_pos, rslt_val) : null;
default: throw Err_.new_unhandled(itm.Tid());
synchronized (thread_lock_1) {
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
Object o = amp_trie.Match_bgn(src, cur_pos, src_len);
cur_pos = amp_trie.Match_pos();
if (o == null) return null;
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
switch (itm.Tid()) {
case Xop_amp_trie_itm.Tid_name_std:
case Xop_amp_trie_itm.Tid_name_xowa:
rslt_pos = cur_pos;
return tkn_mkr.Amp_txt(amp_pos, cur_pos, itm);
case Xop_amp_trie_itm.Tid_num_hex:
case Xop_amp_trie_itm.Tid_num_dec:
boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
boolean pass = Parse_as_int(ncr_is_hex, src, src_len, amp_pos, cur_pos);
return pass ? tkn_mkr.Amp_num(amp_pos, rslt_pos, rslt_val) : null;
default: throw Err_.new_unhandled(itm.Tid());
}
}
}
public boolean Parse_as_int(boolean ncr_is_hex, byte[] src, int src_len, int amp_pos, int int_bgn) {
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
rslt_val = -1; // clear any previous setting
int cur_pos = int_bgn, int_end = -1;
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, cur_pos, src_len);
if (semic_pos == Bry_find_.Not_found) return false;
int_end = semic_pos - 1; // int_end = pos before semicolon
int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
for (int i = int_end; i >= int_bgn; i--) {
byte b = src[i];
if (ncr_is_hex) {
if (b >= 48 && b <= 57) cur = b - 48;
else if (b >= 65 && b <= 70) cur = b - 55;
else if (b >= 97 && b <= 102) cur = b - 87;
else if((b >= 71 && b <= 90)
|| (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
else return false;
synchronized (thread_lock_2) {
rslt_pos = amp_pos + 1; // default to fail pos; after amp;
rslt_val = -1; // clear any previous setting
int cur_pos = int_bgn, int_end = -1;
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, cur_pos, src_len);
if (semic_pos == Bry_find_.Not_found) return false;
int_end = semic_pos - 1; // int_end = pos before semicolon
int multiple = ncr_is_hex ? 16 : 10, val = 0, factor = 1, cur = 0;
for (int i = int_end; i >= int_bgn; i--) {
byte b = src[i];
if (ncr_is_hex) {
if (b >= 48 && b <= 57) cur = b - 48;
else if (b >= 65 && b <= 70) cur = b - 55;
else if (b >= 97 && b <= 102) cur = b - 87;
else if((b >= 71 && b <= 90)
|| (b >= 91 && b <= 122)) continue; // NOTE: wiki discards letters G-Z; PAGE:en.w:Miscellaneous_Symbols "{{Unicode|&#xx26D0;}}"; NOTE 2nd x is discarded
else return false;
}
else {
cur = b - Byte_ascii.Num_0;
if (cur < 0 || cur > 10) return false;
}
val += cur * factor;
if (val > gplx.core.intls.Utf8_.Codepoint_max) return false; // fail if value > largest_unicode_codepoint
factor *= multiple;
}
else {
cur = b - Byte_ascii.Num_0;
if (cur < 0 || cur > 10) return false;
}
val += cur * factor;
if (val > gplx.core.intls.Utf8_.Codepoint_max) return false; // fail if value > largest_unicode_codepoint
factor *= multiple;
rslt_val = val;
rslt_pos = semic_pos + 1; // position after semic
return true;
}
rslt_val = val;
rslt_pos = semic_pos + 1; // position after semic
return true;
}
public byte[] Decode_as_bry(byte[] src) {
if (src == null) return src;
@@ -117,8 +122,8 @@ public class Xop_amp_mgr {
tmp_bfr.Add_byte(b);
++pos;
}
return dirty ? tmp_bfr.Xto_bry_and_clear() : src;
return dirty ? tmp_bfr.To_bry_and_clear() : src;
}
}
public static final Xop_amp_mgr I = new Xop_amp_mgr(); Xop_amp_mgr() {}
public static final Xop_amp_mgr Instance = new Xop_amp_mgr(); Xop_amp_mgr() {}
}

View File

@@ -33,10 +33,10 @@ public class Xop_amp_mgr_decode_tst {
@Test public void Hex_zero_padded() {fxt.Test_decode_as_bry("&#x03a3;" , "Σ");}
@Test public void Hex_upper_x() {fxt.Test_decode_as_bry("&#X3A3;" , "Σ");}
@Test public void Num_fail_large_codepoint() {fxt.Test_decode_as_bry("&#538189831;" , "&#538189831;");}
@Test public void Num_ignore_extra_x() {fxt.Test_decode_as_bry("&#xx26D0;" , Char_.To_str(Char_.XbyInt(9936)));} // 2nd x is ignored
@Test public void Num_ignore_extra_x() {fxt.Test_decode_as_bry("&#xx26D0;" , Char_.To_str(Char_.By_int(9936)));} // 2nd x is ignored
}
class Xop_amp_mgr_fxt {
private Xop_amp_mgr amp_mgr = Xop_amp_mgr.I;
private Xop_amp_mgr amp_mgr = Xop_amp_mgr.Instance;
public void Reset() {}
public void Test_decode_as_bry(String raw, String expd) {
Tfds.Eq(expd, String_.new_u8(amp_mgr.Decode_as_bry(Bry_.new_u8(raw))));

View File

@@ -31,7 +31,7 @@ public class Xop_amp_trie {
, Bry_xowa_nl = Bry_.new_a7("&xowa_nl;")
, Bry_xowa_dash = Bry_.new_a7("&xowa_dash;")
;
public static final Btrie_slim_mgr _ = new_(); Xop_amp_trie() {}
public static final Btrie_slim_mgr Instance = new_(); Xop_amp_trie() {}
private static Btrie_slim_mgr new_() {// REF.MW: Sanitizer|$wgHtmlEntities; NOTE:added apos
Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
Reg_name(rv, Bool_.Y, 60, Bry_xowa_lt);

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.langs.htmls.*; import gplx.xowa.html.lnkis.*;
import gplx.langs.htmls.*; import gplx.xowa.htmls.lnkis.*;
public class Xop_amp_trie_itm {
public Xop_amp_trie_itm(byte tid, int char_int, byte[] xml_name_bry) {
this.tid = tid;