1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-07-19 23:16:49 -04:00
parent 794b5a232f
commit 8e041d6e06
834 changed files with 4749 additions and 4461 deletions

View File

@@ -17,10 +17,12 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.primitives.*; import gplx.core.btries.*;
import gplx.xowa.langs.numbers.*;
public class Xop_lnki_arg_parser {
private final Btrie_fast_mgr key_trie = Btrie_fast_mgr.cs_();
private final Bry_bfr int_bfr = Bry_bfr.reset_(16);
private final Btrie_bwd_mgr px_trie = Btrie_bwd_mgr.cs_(); private final Btrie_fast_mgr size_trie = Btrie_fast_mgr.cs_();
private int lnki_w, lnki_h;
private Btrie_fast_mgr key_trie = Btrie_fast_mgr.cs_();
private Bry_bfr int_bfr = Bry_bfr.reset_(16);
public void Evt_lang_changed(Xol_lang lang) {
Bry_bfr tmp_bfr = int_bfr;
Byte_obj_ref rslt = Byte_obj_ref.zero_();
@@ -48,7 +50,7 @@ public class Xop_lnki_arg_parser {
list = mgr.Get_at(Xol_kwd_grp_.Id_img_width);
if (list == null)
list = lang.Lang_mgr().Lang_en().Kwd_mgr().Get_at(Xol_kwd_grp_.Id_img_width);
Init_size_trie(tmp_bfr, list);
Init_size_trie(tmp_bfr, lang.Num_mgr().Digits_mgr(), list);
}
public byte Identify_tid(byte[] src, int bgn, int end, Xop_lnki_tkn lnki) {
lnki_w = Xop_lnki_tkn.Width_null;
@@ -63,21 +65,31 @@ public class Xop_lnki_arg_parser {
Byte_obj_val val = (Byte_obj_val)key_trie.Match_bgn(src, bgn, end);
if (val != null && len == key_trie.Match_pos() - bgn) // check for false matches; EX: alternate= should not match alt=
return val.Val(); // match; return val;
Object bwd_obj = bwd_trie.Match_bgn(src, end - 1, bgn - 1);
Object bwd_obj = px_trie.Match_bgn(src, end - 1, bgn - 1);
if (bwd_obj != null && ((Byte_obj_val)bwd_obj).Val() == Tid_dim) { // ends with "px"; try to parse size
int_bfr.Clear();
int match_len = end -1 - bwd_trie.Match_pos();
int match_len = end -1 - px_trie.Match_pos();
boolean mode_width = true;
int itm_end = bgn + (len - match_len); // remove trailing px
for (int i = bgn; i < itm_end; i++) {
byte b = src[i];
Object o = size_trie.Match_bgn_w_byte(b, src, i, itm_end);
if (o == null) {
this.lnki_w = 0; // NOTE: must null out width; EX: "123xTextpx"; PAGE:es.b:Alimentaci<EFBFBD>n_infantil; DATE:2015-07-10
this.lnki_w = 0; // NOTE: must null out width; EX: "123xTextpx"; PAGE:es.b:Alimentación_infantil; DATE:2015-07-10
return Tid_caption; // letter or other invalid character; return caption
}
Byte_obj_val v = (Byte_obj_val)o;
switch (v.Val()) {
switch (v.Val()) { // NOTE: d0 - d9 handle non-english numbers; EX:fa.w and ۲۰۰px; DATE:2015-07-18
case Key_dim_d0: int_bfr.Add_byte(Byte_ascii.Num_0); i += (size_trie.Match_pos() - i) - 1; break; // -1 b/c loop will ++i
case Key_dim_d1: int_bfr.Add_byte(Byte_ascii.Num_1); i += (size_trie.Match_pos() - i) - 1; break;
case Key_dim_d2: int_bfr.Add_byte(Byte_ascii.Num_2); i += (size_trie.Match_pos() - i) - 1; break;
case Key_dim_d3: int_bfr.Add_byte(Byte_ascii.Num_3); i += (size_trie.Match_pos() - i) - 1; break;
case Key_dim_d4: int_bfr.Add_byte(Byte_ascii.Num_4); i += (size_trie.Match_pos() - i) - 1; break;
case Key_dim_d5: int_bfr.Add_byte(Byte_ascii.Num_5); i += (size_trie.Match_pos() - i) - 1; break;
case Key_dim_d6: int_bfr.Add_byte(Byte_ascii.Num_6); i += (size_trie.Match_pos() - i) - 1; break;
case Key_dim_d7: int_bfr.Add_byte(Byte_ascii.Num_7); i += (size_trie.Match_pos() - i) - 1; break;
case Key_dim_d8: int_bfr.Add_byte(Byte_ascii.Num_8); i += (size_trie.Match_pos() - i) - 1; break;
case Key_dim_d9: int_bfr.Add_byte(Byte_ascii.Num_9); i += (size_trie.Match_pos() - i) - 1; break;
case Key_dim_num: int_bfr.Add_byte(b); break;
case Key_space: break; // ignore space; EX: "100 px"
case Key_dim_px: { // 2nd px found; EX: "40pxpx"; "40px px"
@@ -105,16 +117,22 @@ public class Xop_lnki_arg_parser {
return Tid_dim;
}
return Tid_caption;
} private Btrie_bwd_mgr bwd_trie = Btrie_bwd_mgr.cs_(); private Btrie_fast_mgr size_trie = Btrie_fast_mgr.cs_();
}
private void Init_key_trie(byte[] key, byte v) {
Byte_obj_val val = Byte_obj_val.new_(v);
key_trie.Add(key, val);
}
private void Init_size_trie(Bry_bfr tmp_bfr, Xol_kwd_grp list) {
private void Init_size_trie(Bry_bfr tmp_bfr, Xol_transform_mgr digit_mgr, Xol_kwd_grp list) {
if (list == null && Env_.Mode_testing()) return; // TEST: allows partial parsing of $magicWords
size_trie.Clear(); bwd_trie.Clear();
size_trie.Clear(); px_trie.Clear();
for (int i = 0; i < 10; i++)
size_trie.Add((byte)(i + Char_.AsciiZero), Byte_obj_val.new_(Key_dim_num));
int len = digit_mgr.Len(); // NOTE: add non-english numbers; EX: ۲۰۰px; DATE:2015-07-18
for (int i = 0; i < len; ++i) {
KeyVal kv = digit_mgr.Get_at(i);
int num = (byte)Int_.parse_or_(kv.Key(), -1); if (num == -1) continue; // ignore separators; EX: "," "."
size_trie.Add((byte[])kv.Val(), Byte_obj_val.new_((byte)num)); // NOTE: num corresponds to dim_d0 -> d9 below
}
size_trie.Add(Byte_ascii.Space, Byte_obj_val.new_(Key_space));
size_trie.Add(X_bry, Byte_obj_val.new_(Key_dim_x));
Xol_kwd_itm[] words = list.Itms();
@@ -123,10 +141,10 @@ public class Xop_lnki_arg_parser {
for (int i = 0; i < words_len; i++) {
byte[] word_bry = Xol_kwd_parse_data.Strip(tmp_bfr, words[i].Val(), rslt);
size_trie.Add(word_bry, Byte_obj_val.new_(Key_dim_px));
bwd_trie.Add(word_bry, Byte_obj_val.new_(Tid_dim));
px_trie.Add(word_bry, Byte_obj_val.new_(Tid_dim));
}
}
public static final byte[] Bry_upright = Bry_.new_u8("upright"), Bry_thumbtime = Bry_.new_u8("thumbtime");
public static final byte[] Bry_upright = Bry_.new_a7("upright"), Bry_thumbtime = Bry_.new_a7("thumbtime");
public static final byte
Tid_unknown = 0, Tid_thumb = 1, Tid_left = 2, Tid_right = 3, Tid_none = 4, Tid_center = 5, Tid_frame = 6, Tid_frameless = 7, Tid_upright = 8, Tid_border = 9
, Tid_alt = 10, Tid_link = 11, Tid_baseline = 12, Tid_sub = 13, Tid_super = 14, Tid_top = 15, Tid_text_top = 16, Tid_middle = 17, Tid_bottom = 18, Tid_text_bottom = 19
@@ -136,8 +154,12 @@ public class Xop_lnki_arg_parser {
, Tid_noplayer = 24, Tid_noicon = 25, Tid_thumbtime = 26
, Tid_class = 27
;
private static final byte[] X_bry = Bry_.new_u8("x");
private static final byte Key_dim_num = 0, Key_dim_x = 1, Key_dim_px = 2, Key_space = 3;
private static final byte[] X_bry = Bry_.new_a7("x");
private static final byte // NOTE: d0 - d9 must match 0 - 9; DATE:2015-07-18
Key_dim_d0 = 0, Key_dim_d1 = 1, Key_dim_d2 = 2, Key_dim_d3 = 3, Key_dim_d4 = 4
, Key_dim_d5 = 5, Key_dim_d6 = 6, Key_dim_d7 = 7, Key_dim_d8 = 8, Key_dim_d9 = 9
, Key_dim_num = 10, Key_dim_x = 11, Key_dim_px = 12, Key_space = 13
;
private static final int[][] Keys_ids = new int[][]
{ new int[] {Xol_kwd_grp_.Id_img_thumbnail , Tid_thumb}
, new int[] {Xol_kwd_grp_.Id_img_manualthumb , Tid_thumb} // RESEARCH: what is manualthumb? 'thumb=$1' vs 'thumb'

View File

@@ -34,7 +34,7 @@ public class Xop_lnki_type {
)
return false;
else // should not happen
throw Exc_.new_unhandled(id);
throw Err_.new_unhandled(id);
}
public static boolean Id_limits_large_size(byte id) {// Linker.php|makeThumbLink2|Do not present an image bigger than the source, for bitmap-style images; assuming original of 400,200
if ( Enm_.HasInt(id, Id_thumb) // [[File:A.png|600px|thumb]] -> 400,200
@@ -47,7 +47,7 @@ public class Xop_lnki_type {
)
return false;
else // should not happen;
throw Exc_.new_unhandled(id);
throw Err_.new_unhandled(id);
}
public static boolean Id_supports_upright(byte id) {// REF:Linker.php|makeImageLink;if ( isset( $fp['thumbnail'] ) || isset( $fp['manualthumb'] ) || isset( $fp['framed'] ) || isset( $fp['frameless'] ) || !$hp['width'] ) DATE:2014-05-22
if ( Enm_.HasInt(id, Id_thumb)
@@ -60,6 +60,6 @@ public class Xop_lnki_type {
)
return false;
else // should not happen;
throw Exc_.new_unhandled(id);
throw Err_.new_unhandled(id);
}
}

View File

@@ -170,7 +170,7 @@ public class Xop_lnki_wkr implements Xop_ctx_wkr, Xop_arg_wkr {
}
return true;
} catch (Exception e) {
ctx.App().Usr_dlg().Warn_many("", "", "fatal error in lnki: page=~{0} src=~{1} err=~{2}", String_.new_u8(ctx.Cur_page().Ttl().Full_db()), String_.new_u8(src, lnki.Src_bgn(), lnki.Src_end()), Err_.Message_gplx(e));
ctx.App().Usr_dlg().Warn_many("", "", "fatal error in lnki: page=~{0} src=~{1} err=~{2}", String_.new_u8(ctx.Cur_page().Ttl().Full_db()), String_.new_u8(src, lnki.Src_bgn(), lnki.Src_end()), Err_.Message_gplx_full(e));
return false;
}
} private static final byte[] Const_pipe = Bry_.new_a7("|");

View File

@@ -56,5 +56,11 @@ public class Xop_lnki_wkr__uncommon_tst {
@Test public void Mistaken_x_px() { // PURPOSE: 1234xTextpx interpreted as size; PAGE:es.b:Alimentación_infantil; DATE:2015-07-10
fxt.Test_html_wiki_frag("[[File:A.png|1234xSomeTextpx]]", " width=\"0\" height=\"0\""); // width should be 0
}
@Test public void Persian() { // PURPOSE: handle il8n nums; EX:[[پرونده:Shahbazi 3.jpg|۲۰۰px]] -> 200px; PAGE:fa.w:فهرست_آثار_علیرضااپور_شهبازی; DATE:2015-07-18
Xol_lang lang = fxt.Wiki().Lang();
fxt.App().Gfs_mgr().Run_str_for(lang, gplx.xowa.xtns.pfuncs.numbers.Pf_formatnum_fa_tst.Persian_numbers_gfs);
lang.Evt_lang_changed(); // force rebuild of size_trie
fxt.Test_html_wiki_frag("[[File:A.png|۲۰۰px]]", " width=\"200\" height=\"0\"");
}
}