diff --git a/100_core/src_120_basicDataType/gplx/Bitmask_.java b/100_core/src_120_basicDataType/gplx/Bitmask_.java new file mode 100644 index 000000000..ccd2a2135 --- /dev/null +++ b/100_core/src_120_basicDataType/gplx/Bitmask_.java @@ -0,0 +1,40 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx; +public class Bitmask_ { + public static boolean Has_int(int val, int find) {return find == (val & find);} + public static int Flip_int(boolean enable, int val, int find) { + boolean has = find == (val & find); + return (has ^ enable) ? val ^ find : val; + } + public static int Add_int(int lhs, int rhs) {return lhs | rhs;} + public static int Add_int_ary(int... ary) { + int rv = 0; + int len = ary.length; + for (int i = 0; i < len; ++i) { + int itm = ary[i]; + if (rv == 0) + rv = itm; + else + rv = Flip_int(true, rv, itm); + } + return rv; + } + public static boolean Has_byte(byte val, byte find) {return find == (val & find);} + public static byte Add_byte(byte flag, byte itm) {return (byte)(flag | itm);} +} diff --git a/100_core/src_120_basicDataType/gplx/Enm_.java b/100_core/src_120_basicDataType/gplx/Enm_.java index 2917ce502..05dd6be99 100644 --- a/100_core/src_120_basicDataType/gplx/Enm_.java +++ b/100_core/src_120_basicDataType/gplx/Enm_.java @@ -18,25 +18,5 @@ along with this program. If not, see . package gplx; public class Enm_ { public static int To_int(Object enm) {return Ordinal_lang(enm);} - public static boolean Has_int(int val, int find) {return find == (val & find);} - public static int Add_int(int lhs, int rhs) {return lhs | rhs;} - public static int Add_int_ary(int... ary) { - int rv = 0; - int len = ary.length; - for (int i = 0; i < len; ++i) { - int itm = ary[i]; - if (rv == 0) - rv = itm; - else - rv = Flip_int(true, rv, itm); - } - return rv; - } - public static int Flip_int(boolean enable, int val, int find) { - boolean has = find == (val & find); - return (has ^ enable) ? val ^ find : val; - } - public static boolean Has_byte(byte val, byte find) {return find == (val & find);} - public static byte Add_byte(byte flag, byte itm) {return (byte)(flag | itm);} private static int Ordinal_lang(Object v) {return ((Enum)v).ordinal();} } diff --git a/100_core/src_130_brys/gplx/Bry_find_.java b/100_core/src_130_brys/gplx/Bry_find_.java index ee186c990..97289edb6 100644 --- a/100_core/src_130_brys/gplx/Bry_find_.java +++ b/100_core/src_130_brys/gplx/Bry_find_.java @@ -162,6 +162,21 @@ public class Bry_find_ { } return end; } + public static int Find_bwd__skip_ws(byte[] src, int end, int bgn) { + int src_len = src.length; + if (end == src_len) return end; + if (end > src_len || end < 0) return Bry_find_.Not_found; + int pos = end - 1; // start from end - 1; handles situations where len is passed in + for (int i = pos; i >= bgn; --i) { + switch (src[i]) { + case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: + break; + default: + return i + 1; + } + } + return bgn; + } public static int Find_bwd_while(byte[] src, int cur, int end, byte while_byte) { --cur; while (true) { @@ -295,6 +310,7 @@ public class Bry_find_ { } public static int Find_bwd_while_alphanum(byte[] src, int cur) {return Find_bwd_while_alphanum(src, cur, -1);} public static int Find_bwd_while_alphanum(byte[] src, int cur, int end) { + --cur; while (cur > end) { switch (src[cur]) { case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4: diff --git a/100_core/src_130_brys/gplx/Bry_split_.java b/100_core/src_130_brys/gplx/Bry_split_.java index 5d39e139e..585a5175a 100644 --- a/100_core/src_130_brys/gplx/Bry_split_.java +++ b/100_core/src_130_brys/gplx/Bry_split_.java @@ -23,16 +23,17 @@ public class Bry_split_ { public static byte[][] Split(byte[] src, byte dlm, boolean trim) { synchronized (thread_lock) { Bry_split_wkr__to_ary wkr = Bry_split_wkr__to_ary.I; - Split(src, dlm, trim, wkr); + Split(src, 0, src == null ? 0 : src.length, dlm, trim, wkr); return wkr.To_ary(); } } - public static void Split(byte[] src, byte dlm, boolean trim, Bry_split_wkr wkr) { - if (src == null) return; - int src_len = src.length, pos = 0; if (src_len == 0) return; + public static int Split(byte[] src, int src_bgn, int src_end, byte dlm, boolean trim, Bry_split_wkr wkr) { + if (src == null || src_end - src_bgn < 1) return 0; + int pos = src_bgn; int itm_bgn = -1, itm_end = -1; + int count = 0; while (true) { - boolean pos_is_last = pos == src_len; + boolean pos_is_last = pos == src_end; byte b = pos_is_last ? dlm : src[pos]; int nxt_pos = pos + 1; boolean process = true; @@ -51,9 +52,9 @@ public class Bry_split_ { else { int rv = wkr.Split(src, itm_bgn, itm_end); switch (rv) { - case Rv__ok: break; + case Rv__ok: ++count; break; case Rv__extend: reset = false; break; - case Rv__cancel: pos_is_last = true; break; + case Rv__cancel: return count; default: throw Err_.new_unhandled(rv); } } @@ -67,6 +68,7 @@ public class Bry_split_ { if (pos_is_last) break; pos = nxt_pos; } + return count; } public static byte[][] Split(byte[] src, byte[] dlm) { if (Bry_.Len_eq_0(src)) return Bry_.Ary_empty; @@ -115,12 +117,16 @@ public class Bry_split_ { class Bry_split_wkr__to_ary implements gplx.core.brys.Bry_split_wkr { private final List_adp list = List_adp_.new_(); public int Split(byte[] src, int itm_bgn, int itm_end) { - byte[] bry = itm_end == itm_bgn ? Bry_.Empty : Bry_.Mid(src, itm_bgn, itm_end); - list.Add(bry); - return Bry_split_.Rv__ok; + synchronized (list) { + byte[] bry = itm_end == itm_bgn ? Bry_.Empty : Bry_.Mid(src, itm_bgn, itm_end); + list.Add(bry); + return Bry_split_.Rv__ok; + } } public byte[][] To_ary() { - return (byte[][])list.To_ary_and_clear(byte[].class); + synchronized (list) { + return (byte[][])list.To_ary_and_clear(byte[].class); + } } public static final Bry_split_wkr__to_ary I = new Bry_split_wkr__to_ary(); Bry_split_wkr__to_ary() {} } diff --git a/100_core/src_130_brys/gplx/Bry_split__tst.java b/100_core/src_130_brys/gplx/Bry_split__tst.java index cdd0d1f3f..26289e846 100644 --- a/100_core/src_130_brys/gplx/Bry_split__tst.java +++ b/100_core/src_130_brys/gplx/Bry_split__tst.java @@ -35,11 +35,30 @@ public class Bry_split__tst { fxt.Test_Split(" a b | c d " , Byte_ascii.Pipe, Bool_.Y, "a b", "c d"); fxt.Test_Split(" a \n b " , Byte_ascii.Nl , Bool_.N, " a ", " b "); // ws as dlm fxt.Test_Split(" a \n b " , Byte_ascii.Nl , Bool_.Y, "a", "b"); // ws as dlm; trim + fxt.Test_Split("a|extend|b" , Byte_ascii.Pipe, Bool_.Y, "a", "extend|b"); // extend + fxt.Test_Split("extend|a" , Byte_ascii.Pipe, Bool_.Y, "extend|a"); // extend + fxt.Test_Split("a|cancel|b" , Byte_ascii.Pipe, Bool_.Y, "a"); // cancel } } class Bry_split__fxt { + private final Bry_split_wkr__example wkr = new Bry_split_wkr__example(); public void Test_Split(String raw_str, byte dlm, boolean trim, String... expd) { - byte[][] actl_ary = Bry_split_.Split(Bry_.new_a7(raw_str), dlm, trim); + byte[] src = Bry_.new_a7(raw_str); + Bry_split_.Split(src, 0, src.length, dlm, trim, wkr); + byte[][] actl_ary = wkr.To_ary(); Tfds.Eq_ary_str(expd, String_.Ary(actl_ary)); } } +class Bry_split_wkr__example implements gplx.core.brys.Bry_split_wkr { + private final List_adp list = List_adp_.new_(); + public int Split(byte[] src, int itm_bgn, int itm_end) { + byte[] bry = itm_end == itm_bgn ? Bry_.Empty : Bry_.Mid(src, itm_bgn, itm_end); + if (Bry_.Eq(bry, Bry_.new_a7("extend"))) return Bry_split_.Rv__extend; + else if (Bry_.Eq(bry, Bry_.new_a7("cancel"))) return Bry_split_.Rv__cancel; + list.Add(bry); + return Bry_split_.Rv__ok; + } + public byte[][] To_ary() { + return (byte[][])list.To_ary_and_clear(byte[].class); + } +} diff --git a/100_core/src_140_list/gplx/List_adp_.java b/100_core/src_140_list/gplx/List_adp_.java index d2c90d402..f892df133 100644 --- a/100_core/src_140_list/gplx/List_adp_.java +++ b/100_core/src_140_list/gplx/List_adp_.java @@ -50,6 +50,13 @@ public class List_adp_ { list.Del_at(last_idx); return rv; } + public static Object Pop_or(List_adp list, Object or) { + int list_len = list.Count(); if (list_len == 0) return or; + int last_idx = list_len - 1; + Object rv = list.Get_at(last_idx); + list.Del_at(last_idx); + return rv; + } public static void DisposeAll(List_adp list) { for (int i = 0; i < list.Count(); i++) ((RlsAble)list.Get_at(i)).Rls(); diff --git a/100_core/src_140_list/gplx/List_adp_base.java b/100_core/src_140_list/gplx/List_adp_base.java index eabd53602..ff2a25717 100644 --- a/100_core/src_140_list/gplx/List_adp_base.java +++ b/100_core/src_140_list/gplx/List_adp_base.java @@ -138,7 +138,7 @@ public abstract class List_adp_base implements List_adp, GfoInvkAble { public String To_str() { Bry_bfr bfr = Bry_bfr.new_(); for (int i = 0; i < count; ++i) - bfr.Add_obj(list[i]); + bfr.Add_str_u8(Object_.Xto_str_strict_or_null_mark(list[i])).Add_byte_nl(); return bfr.Xto_str_and_clear(); } private void BoundsChk(int bgn, int end, int len) { diff --git a/100_core/src_800_tst/gplx/Tfds.java b/100_core/src_800_tst/gplx/Tfds.java index 6d48b6a6b..9695a68e4 100644 --- a/100_core/src_800_tst/gplx/Tfds.java +++ b/100_core/src_800_tst/gplx/Tfds.java @@ -166,6 +166,7 @@ public class Tfds { // URL:doc/gplx.tfds/Tfds.txt private static final DateAdp time0 = DateAdp_.parse_gplx("2001-01-01 00:00:00.000"); private static DateAdp nowTime; // NOTE: cannot set to time0 due to static initialization; public static void WriteText(String text) {Console_adp__sys.I.Write_str(text);} + public static void Write(byte[] s, int b, int e) {Write(Bry_.Mid(s, b, e));} public static void Write() {Write("tmp");} public static void Write(Object... ary) { String_bldr sb = String_bldr_.new_(); diff --git a/150_gfui/src_100_basic/gplx/gfui/GfuiBorderEdge.java b/150_gfui/src_100_basic/gplx/gfui/GfuiBorderEdge.java index 6f76fce61..dc79b8f82 100644 --- a/150_gfui/src_100_basic/gplx/gfui/GfuiBorderEdge.java +++ b/150_gfui/src_100_basic/gplx/gfui/GfuiBorderEdge.java @@ -18,7 +18,7 @@ along with this program. If not, see . package gplx.gfui; import gplx.*; public class GfuiBorderEdge { public int Val() {return val;} int val; - public boolean Has(GfuiBorderEdge comp) {return Enm_.Has_int(val, comp.val);} + public boolean Has(GfuiBorderEdge comp) {return Bitmask_.Has_int(val, comp.val);} public GfuiBorderEdge Add(GfuiBorderEdge comp) { return new GfuiBorderEdge(comp.val + val); } diff --git a/150_gfui/src_200_ipt/gplx/gfui/IptEventType_.java b/150_gfui/src_200_ipt/gplx/gfui/IptEventType_.java index 07561cc7f..ad9f38d33 100644 --- a/150_gfui/src_200_ipt/gplx/gfui/IptEventType_.java +++ b/150_gfui/src_200_ipt/gplx/gfui/IptEventType_.java @@ -32,7 +32,7 @@ public class IptEventType_ { if (ary.length == 0) return IptEventType_.None; int newVal = ary[0].Val(); for (int i = 1; i < ary.length; i++) - newVal = Enm_.Flip_int(true, newVal, ary[i].Val()); + newVal = Bitmask_.Flip_int(true, newVal, ary[i].Val()); return getOrNew_(newVal); } static IptEventType getOrNew_(int v) { @@ -46,7 +46,7 @@ public class IptEventType_ { } @gplx.Internal protected static boolean Has(IptEventType val, IptEventType find) { if (find == IptEventType_.None && val != IptEventType_.None) return false; // check .None manually b/c 0 is identity when BitShifting - return Enm_.Has_int(val.Val(), find.Val()); + return Bitmask_.Has_int(val.Val(), find.Val()); } public static IptEventType default_(IptArg[] args) { IptEventType rv = IptEventType_.None; diff --git a/150_gfui/src_200_ipt/gplx/gfui/IptKey.java b/150_gfui/src_200_ipt/gplx/gfui/IptKey.java index 9c3172bcd..d21cecebe 100644 --- a/150_gfui/src_200_ipt/gplx/gfui/IptKey.java +++ b/150_gfui/src_200_ipt/gplx/gfui/IptKey.java @@ -23,7 +23,7 @@ public class IptKey implements IptArg { public boolean Eq(IptArg comp) {return String_.Eq(key, comp.Key());} public String XtoUiStr() {return IptKeyStrMgr._.To_str(this);} public IptKey Add(IptKey comp) {return IptKey_.add_(this, comp);} - public boolean Mod_shift() {return Enm_.Has_int(val, IptKey_.Shift.Val());} - public boolean Mod_ctrl() {return Enm_.Has_int(val, IptKey_.Ctrl.Val());} - public boolean Mod_alt() {return Enm_.Has_int(val, IptKey_.Alt.Val());} + public boolean Mod_shift() {return Bitmask_.Has_int(val, IptKey_.Shift.Val());} + public boolean Mod_ctrl() {return Bitmask_.Has_int(val, IptKey_.Ctrl.Val());} + public boolean Mod_alt() {return Bitmask_.Has_int(val, IptKey_.Alt.Val());} } diff --git a/150_gfui/src_200_ipt/gplx/gfui/IptKey_.java b/150_gfui/src_200_ipt/gplx/gfui/IptKey_.java index eee38ab9b..d9efcd956 100644 --- a/150_gfui/src_200_ipt/gplx/gfui/IptKey_.java +++ b/150_gfui/src_200_ipt/gplx/gfui/IptKey_.java @@ -28,7 +28,7 @@ public class IptKey_ { if (ary.length == 0) return IptKey_.None; int newVal = ary[0].Val(); for (int i = 1; i < ary.length; i++) - newVal = Enm_.Flip_int(true, newVal, ary[i].Val()); + newVal = Bitmask_.Flip_int(true, newVal, ary[i].Val()); return get_or_new_(newVal); } public static IptKey api_(int val) { @@ -138,9 +138,9 @@ public class IptKey_ { } public static String To_str(int val) { String mod_str = "", rv = ""; - boolean mod_c = Enm_.Has_int(val, IptKey_.Ctrl.Val()); if (mod_c) {mod_str += "c"; val = Enm_.Flip_int(Bool_.N, val, IptKey_.Ctrl.Val());} - boolean mod_a = Enm_.Has_int(val, IptKey_.Alt.Val()); if (mod_a) {mod_str += "a"; val = Enm_.Flip_int(Bool_.N, val, IptKey_.Alt.Val());} - boolean mod_s = Enm_.Has_int(val, IptKey_.Shift.Val()); if (mod_s) {mod_str += "s"; val = Enm_.Flip_int(Bool_.N, val, IptKey_.Shift.Val());} + boolean mod_c = Bitmask_.Has_int(val, IptKey_.Ctrl.Val()); if (mod_c) {mod_str += "c"; val = Bitmask_.Flip_int(Bool_.N, val, IptKey_.Ctrl.Val());} + boolean mod_a = Bitmask_.Has_int(val, IptKey_.Alt.Val()); if (mod_a) {mod_str += "a"; val = Bitmask_.Flip_int(Bool_.N, val, IptKey_.Alt.Val());} + boolean mod_s = Bitmask_.Has_int(val, IptKey_.Shift.Val()); if (mod_s) {mod_str += "s"; val = Bitmask_.Flip_int(Bool_.N, val, IptKey_.Shift.Val());} if (String_.Len_gt_0(mod_str)) { rv = "mod." + mod_str; if (val == 0) return rv; // handle modifiers only, like "mod.cs"; else will be "mod.cs+key.#0" diff --git a/150_gfui/src_400_win/gplx/gfui/GfuiWinKeyCmdMgr.java b/150_gfui/src_400_win/gplx/gfui/GfuiWinKeyCmdMgr.java index 2946865cd..b8a000993 100644 --- a/150_gfui/src_400_win/gplx/gfui/GfuiWinKeyCmdMgr.java +++ b/150_gfui/src_400_win/gplx/gfui/GfuiWinKeyCmdMgr.java @@ -30,7 +30,7 @@ class GfuiWinKeyCmdMgr implements GfuiWinOpenAble, GfoInvkAble, GfoEvObj { int keyVal = iptData.Key().Val(); GfuiElem sender = GfuiElem_.as_(iptData.Sender()); if (GfuiTextBox_.as_(sender) != null // is sender textBox? - && !Enm_.Has_int(keyVal, IptKey_.Alt.Val()) // does key not have alt + && !Bitmask_.Has_int(keyVal, IptKey_.Alt.Val()) // does key not have alt ) return false; // ignore keys from textbox if they do not have alt List_adp elemList = (List_adp)listHash.Get_by(keyVal); if (elemList == null) return false; for (int i = 0; i < elemList.Count(); i++) { diff --git a/150_gfui/xtn/gplx/gfui/Swt_core_lnrs.java b/150_gfui/xtn/gplx/gfui/Swt_core_lnrs.java index d3c89518d..69d238472 100644 --- a/150_gfui/xtn/gplx/gfui/Swt_core_lnrs.java +++ b/150_gfui/xtn/gplx/gfui/Swt_core_lnrs.java @@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.gfui; +import gplx.Bitmask_; import gplx.Byte_ascii; import gplx.Enm_; import gplx.GfoEvMgr_; @@ -108,12 +109,12 @@ class Swt_lnr_key implements KeyListener { case 327680: val = IptKey_.Insert.Val(); break; } if (Has_ctrl(ev.stateMask)) val |= IptKey_.KeyCode_Ctrl; - if (Enm_.Has_int(ev.stateMask, IptKey_.KeyCode_Shift)) val |= IptKey_.KeyCode_Alt; - if (Enm_.Has_int(ev.stateMask, IptKey_.KeyCode_Ctrl)) val |= IptKey_.KeyCode_Shift; + if (Bitmask_.Has_int(ev.stateMask, IptKey_.KeyCode_Shift)) val |= IptKey_.KeyCode_Alt; + if (Bitmask_.Has_int(ev.stateMask, IptKey_.KeyCode_Ctrl)) val |= IptKey_.KeyCode_Shift; // Tfds.Write(String_.Format("val={4} keyCode={0} stateMask={1} keyLocation={2} character={3}", ev.keyCode, ev.stateMask, ev.keyLocation, ev.character, val)); return IptEvtDataKey.int_(val); } - public static boolean Has_ctrl(int val) {return Enm_.Has_int(val, IptKey_.KeyCode_Alt);} // NOTE:SWT's ctrl constant is different from SWING's + public static boolean Has_ctrl(int val) {return Bitmask_.Has_int(val, IptKey_.KeyCode_Alt);} // NOTE:SWT's ctrl constant is different from SWING's } class Swt_lnr_mouse implements MouseListener { public Swt_lnr_mouse(GxwElem elem) {this.elem = elem;} GxwElem elem; diff --git a/400_xowa/src/gplx/core/primitives/Int_pool.java b/400_xowa/src/gplx/core/primitives/Int_pool.java index 116b9da5b..1ff7482bf 100644 --- a/400_xowa/src/gplx/core/primitives/Int_pool.java +++ b/400_xowa/src/gplx/core/primitives/Int_pool.java @@ -18,19 +18,25 @@ along with this program. If not, see . package gplx.core.primitives; import gplx.*; import gplx.core.*; public class Int_pool { private final List_adp available_list = List_adp_.new_(); private int available_len; + // private final Bry_bfr dbg_bfr = Bry_bfr.new_(); private int uid_max = -1; public void Clear() { - available_list.Clear(); - available_len = 0; - uid_max = -1; + synchronized (available_list) { + available_list.Clear(); + available_len = 0; + uid_max = -1; + } } public int Get_next() { synchronized (available_list) { - if (available_len == 0) + if (available_len == 0) { + // dbg_bfr.Add_str("+:u:").Add_int_variable(uid_max + 1).Add_byte_nl(); return ++uid_max; + } else { Int_obj_val val = (Int_obj_val)List_adp_.Pop_last(available_list); --available_len; + // dbg_bfr.Add_str("+:a:").Add_int_variable(val.Val()).Add_byte_nl(); return val.Val(); } } @@ -40,17 +46,22 @@ public class Int_pool { synchronized (available_list) { if (available_len == 0 && v == uid_max) { --this.uid_max; + // dbg_bfr.Add_str("-:m:").Add_int_variable(v).Add_byte_nl(); return; } if (available_len == uid_max) { + available_list.Add(Int_obj_val.new_(v)); available_list.Sort(); for (int i = 0; i < available_len; ++i) { Int_obj_val itm = (Int_obj_val)available_list.Get_at(i); - if (i != itm.Val()) throw Err_.new_("core", "available_list out of order", "contents", available_list.To_str()); + if (i != itm.Val()) + throw Err_.new_("core", "available_list out of order", "contents", available_list.To_str()); } + // dbg_bfr.Add_str("-:c:").Add_int_variable(v).Add_byte_nl(); this.Clear(); } else { + // dbg_bfr.Add_str("-:a:").Add_int_variable(v).Add_byte_nl(); available_list.Add(Int_obj_val.new_(v)); ++available_len; } diff --git a/400_xowa/src/gplx/core/primitives/Int_pool_tst.java b/400_xowa/src/gplx/core/primitives/Int_pool_tst.java index 5ab6e2519..01d3b2fde 100644 --- a/400_xowa/src/gplx/core/primitives/Int_pool_tst.java +++ b/400_xowa/src/gplx/core/primitives/Int_pool_tst.java @@ -52,6 +52,14 @@ public class Int_pool_tst { tstr.Exec_del(2); tstr.Test_get(0); } + @Test public void Del__out_of_order_2() { + tstr.Test_get(0); + tstr.Test_get(1); + tstr.Test_get(2); + tstr.Exec_del(1); + tstr.Exec_del(2); + tstr.Exec_del(0); + } } class Int_pool_tstr { private final Int_pool pool = new Int_pool(); diff --git a/400_xowa/src/gplx/gfui/Gfui_bnd_parser.java b/400_xowa/src/gplx/gfui/Gfui_bnd_parser.java index 4eaea37e1..0a2c14125 100644 --- a/400_xowa/src/gplx/gfui/Gfui_bnd_parser.java +++ b/400_xowa/src/gplx/gfui/Gfui_bnd_parser.java @@ -127,7 +127,7 @@ public class Gfui_bnd_parser { switch (sym_tkn.Tid()) { case Gfui_bnd_tkn.Tid_sym_plus: // EX: Ctrl + A if (mod_adj != Mod_val_null) { // if mod, just update mod_val and exit - mod_val = Enm_.Flip_int(true, mod_val, mod_adj); + mod_val = Bitmask_.Flip_int(true, mod_val, mod_adj); return; } break; diff --git a/400_xowa/src/gplx/xowa/Xoa_app_.java b/400_xowa/src/gplx/xowa/Xoa_app_.java index 0e5f430e8..fad2daa8a 100644 --- a/400_xowa/src/gplx/xowa/Xoa_app_.java +++ b/400_xowa/src/gplx/xowa/Xoa_app_.java @@ -58,7 +58,7 @@ public class Xoa_app_ { } } public static final String Name = "xowa"; - public static final String Version = "2.9.3.1"; + public static final String Version = "2.9.4.1"; public static String Build_date = "2012-12-30 00:00:00"; public static String Op_sys_str; public static String User_agent = ""; diff --git a/400_xowa/src/gplx/xowa/files/Xof_img_size.java b/400_xowa/src/gplx/xowa/files/Xof_img_size.java index d329ed6fd..cb1d228da 100644 --- a/400_xowa/src/gplx/xowa/files/Xof_img_size.java +++ b/400_xowa/src/gplx/xowa/files/Xof_img_size.java @@ -35,7 +35,7 @@ public class Xof_img_size { && !Xop_lnki_type.Id_is_thumbable(lnki_type) // not thumb which is implicitly 220; PAGE:en.w:Edward_Snowden; DATE:2015-08-17 ) lnki_w = orig_w; // use original size; EX:[[File:A.ogv]] -> [[File:A.ogv|550px]] where 550px is orig_w; DATE:2015-08-07 - if (Enm_.Has_int(lnki_type, Xop_lnki_type.Id_frame) // frame: always return orig size; Linker.php!makeThumbLink2; // Use image dimensions, don't scale + if (Bitmask_.Has_int(lnki_type, Xop_lnki_type.Id_frame) // frame: always return orig size; Linker.php!makeThumbLink2; // Use image dimensions, don't scale && lnki_h == Null) { // unless lnki_h specified; DATE:2013-12-22 html_w = file_w = orig_w; html_h = file_h = orig_h; diff --git a/400_xowa/src/gplx/xowa/files/Xof_img_size_tst.java b/400_xowa/src/gplx/xowa/files/Xof_img_size_tst.java index 1ff239380..0cfe84ec0 100644 --- a/400_xowa/src/gplx/xowa/files/Xof_img_size_tst.java +++ b/400_xowa/src/gplx/xowa/files/Xof_img_size_tst.java @@ -76,7 +76,7 @@ public class Xof_img_size_tst { fxt.Lnki_type_(Xop_lnki_type.Id_frame).Lnki_ext_(Xof_ext_.Id_png).Lnki_(200, 200).Orig_(2038, 1529).Test_html(200, 150, Bool_.N); } @Test public void Frame_and_thumb(){ // PURPOSE: frame and thumb should be treated as frame; Enm.Has(val, Id_frame) vs val == Id_frame; PAGE:en.w:History_of_Western_Civilization; DATE:2015-04-16 - fxt.Lnki_type_(Enm_.Add_byte(Xop_lnki_type.Id_frame, Xop_lnki_type.Id_thumb)).Lnki_(200, -1).Test_html(400, 200, Bool_.Y); // mut return same as Lnki_lt_orig_frame above + fxt.Lnki_type_(Bitmask_.Add_byte(Xop_lnki_type.Id_frame, Xop_lnki_type.Id_thumb)).Lnki_(200, -1).Test_html(400, 200, Bool_.Y); // mut return same as Lnki_lt_orig_frame above } @Test public void Video__use_orig_w(){ // PURPOSE: video should use orig_w; DATE:2015-08-07 fxt.Lnki_type_(Xop_lnki_type.Id_none).Lnki_ext_(Xof_ext_.Id_ogv).Lnki_(-1, -1).Orig_(500, 250).Test_html(500, 250, Bool_.N); diff --git a/400_xowa/src/gplx/xowa/files/Xof_patch_upright_tid_.java b/400_xowa/src/gplx/xowa/files/Xof_patch_upright_tid_.java index 4b78b2eb8..14954d849 100644 --- a/400_xowa/src/gplx/xowa/files/Xof_patch_upright_tid_.java +++ b/400_xowa/src/gplx/xowa/files/Xof_patch_upright_tid_.java @@ -20,11 +20,11 @@ public class Xof_patch_upright_tid_ { public static final int Tid_unpatched = 0, Tid_use_thumb_w = 1, Tid_fix_default = 2; public static final int Tid_all = Tid_use_thumb_w | Tid_fix_default; public static int Merge(boolean use_thumb_w, boolean fix_default) { - if (use_thumb_w && fix_default) return Enm_.Add_int(Tid_use_thumb_w, Tid_fix_default); + if (use_thumb_w && fix_default) return Bitmask_.Add_int(Tid_use_thumb_w, Tid_fix_default); else if (use_thumb_w) return Tid_use_thumb_w; else if (fix_default) return Tid_fix_default; else return Tid_unpatched; } - public static boolean Split_use_thumb_w(int tid) {return Enm_.Has_int(tid, Tid_use_thumb_w);} - public static boolean Split_fix_default(int tid) {return Enm_.Has_int(tid, Tid_fix_default);} + public static boolean Split_use_thumb_w(int tid) {return Bitmask_.Has_int(tid, Tid_use_thumb_w);} + public static boolean Split_fix_default(int tid) {return Bitmask_.Has_int(tid, Tid_fix_default);} } diff --git a/400_xowa/src/gplx/xowa/html/Xoh_consts.java b/400_xowa/src/gplx/xowa/html/Xoh_consts.java index c1518f741..3f86e3c07 100644 --- a/400_xowa/src/gplx/xowa/html/Xoh_consts.java +++ b/400_xowa/src/gplx/xowa/html/Xoh_consts.java @@ -23,10 +23,11 @@ public class Xoh_consts { , Img_h_str = "height" ; public static final byte[] - __end = Bry_.new_a7(">") - , __end_quote = Bry_.new_a7("\">") - , __inline_quote = Bry_.new_a7("\"/>") - , Space_2 = Bry_.new_a7(" ") + __end = Bry_.new_a7(">") + , __inline = Bry_.new_a7("/>") + , __end_quote = Bry_.new_a7("\">") + , __inline_quote = Bry_.new_a7("\"/>") + , Space_2 = Bry_.new_a7(" ") , A_bgn = Bry_.new_a7(". +*/ +package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*; +public class Xol_vnt_dir_ { + public static final int Tid__none = 0, Tid__uni = 1, Tid__bi = 2; + public static int Parse(byte[] v) {return hash.Get_as_int_or(v, Tid__none);} + private static final byte[] Bry__none = Bry_.new_a7("disable"), Bry__uni = Bry_.new_a7("unidirectional"), Bry__bi = Bry_.new_a7("bidirectional"); + private static final Hash_adp_bry hash = Hash_adp_bry.cs() + .Add_bry_int(Bry__none , Tid__none) + .Add_bry_int(Bry__uni , Tid__uni) + .Add_bry_int(Bry__bi , Tid__bi); +} diff --git a/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_itm.java b/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_itm.java index 420730adb..b7ffba31c 100644 --- a/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_itm.java +++ b/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_itm.java @@ -18,27 +18,33 @@ along with this program. If not, see . package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*; import gplx.xowa.langs.vnts.converts.*; public class Xol_vnt_itm implements GfoInvkAble { - public Xol_vnt_itm(byte[] key, byte[] name, int mask__vnt) { - this.key = key; this.name = name; this.mask__vnt = mask__vnt; + public Xol_vnt_itm(int idx, byte[] key, byte[] name, int mask__vnt) { + this.idx = idx; this.key = key; this.name = name; this.mask__vnt = mask__vnt; this.convert_wkr = new Xol_convert_wkr(key); } + public int Idx() {return idx;} private final int idx; // EX: 2 public byte[] Key() {return key;} private final byte[] key; // EX: zh-cn public byte[] Name() {return name;} private final byte[] name; // EX: 大陆简体 public boolean Visible() {return visible;} private boolean visible = true; // visible in menu public byte[][] Fallback_ary() {return fallback_ary;} private byte[][] fallback_ary = Bry_.Ary_empty; // EX: zh-hans|zh + public int Dir() {return dir;} private int dir = Xol_vnt_dir_.Tid__bi; // EX: "bidirectional" public int Mask__vnt() {return mask__vnt;} private final int mask__vnt; // EX: 8 public int Mask__fallbacks() {return mask_fallbacks;} private int mask_fallbacks; // EX: 11 for zh,zh-hans,zh-cn public byte[][] Convert_ary() {return convert_ary;} private byte[][] convert_ary = Bry_.Ary_empty; // EX: zh-hans|zh-cn public Xol_convert_wkr Convert_wkr() {return convert_wkr;} private final Xol_convert_wkr convert_wkr; public void Visible_(boolean v) {this.visible = v;} public void Convert_ary_(byte[][] v) {convert_ary = v;} + public void Init(int dir, byte[][] fallback_ary) { + this.dir = dir; this.fallback_ary = fallback_ary; + } public void Mask__fallbacks__calc(Xol_vnt_regy regy, byte[][] ary) { this.mask_fallbacks = regy.Mask__calc(Bry_.Ary_add(Bry_.Ary(key), ary));// NOTE: must add lang.key which is not part of fallback; EX: "zh-cn" has fallback of "zh-hans", but chain should calc "zh-cn","zh-hans" } public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) { if (ctx.Match(k, Invk_fallbacks_)) fallback_ary = Bry_split_.Split(m.ReadBry("v"), Byte_ascii.Pipe); else if (ctx.Match(k, Invk_converts_)) convert_ary = Bry_split_.Split(m.ReadBry("v"), Byte_ascii.Pipe); + else if (ctx.Match(k, Invk_dir_)) dir = Xol_vnt_dir_.Parse(m.ReadBry("v")); else return GfoInvkAble_.Rv_unhandled; return this; - } private static final String Invk_fallbacks_ = "fallbacks_", Invk_converts_ = "converts_"; + } private static final String Invk_fallbacks_ = "fallbacks_", Invk_converts_ = "converts_", Invk_dir_ = "dir_"; } diff --git a/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_itm_sorter.java b/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_itm_sorter.java index d6692d098..2d901890e 100644 --- a/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_itm_sorter.java +++ b/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_itm_sorter.java @@ -18,8 +18,8 @@ along with this program. If not, see . package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*; import gplx.xowa.parsers.vnts.*; class Xol_vnt_itm_sorter__rule implements gplx.lists.ComparerAble { - private Ordered_hash hash; - public void Sort(Ordered_hash hash, Xop_vnt_rule_tkn[] ary) { + private Hash_adp hash; + public void Sort(Hash_adp hash, Xop_vnt_rule_tkn[] ary) { synchronized (hash) { this.hash = hash; Array_.Sort(ary, this); diff --git a/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_regy.java b/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_regy.java index 196daafcf..5118b8072 100644 --- a/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_regy.java +++ b/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_regy.java @@ -19,17 +19,20 @@ package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xow import gplx.core.btries.*; import gplx.xowa.parsers.vnts.*; public class Xol_vnt_regy { - private final Ordered_hash hash = Ordered_hash_.new_bry_(); private int hash_len; + private final Hash_adp_bry hash = Hash_adp_bry.ci_a7(); private int hash_len; + private final List_adp list = List_adp_.new_(); public Btrie_slim_mgr Trie() {return trie;} private final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7(); public int Len() {return hash.Count();} public boolean Has(byte[] k) {return hash.Has(k);} - public Xol_vnt_itm Get_at(int i) {return (Xol_vnt_itm)hash.Get_at(i);} + public Xol_vnt_itm Get_at(int i) {return (Xol_vnt_itm)list.Get_at(i);} public Xol_vnt_itm Get_by(byte[] k) {return (Xol_vnt_itm)hash.Get_by(k);} - public void Clear() {hash.Clear(); trie.Clear(); hash_len = 0;} + public Xol_vnt_itm Get_by(byte[] s, int b, int e) {return (Xol_vnt_itm)hash.Get_by_mid(s, b, e);} + public void Clear() {hash.Clear(); list.Clear(); trie.Clear(); hash_len = 0;} public Xol_vnt_itm Add(byte[] key, byte[] name) { int mask = gplx.core.brys.Bit_.Get_flag(hash_len); - Xol_vnt_itm itm = new Xol_vnt_itm(key, name, mask); + Xol_vnt_itm itm = new Xol_vnt_itm(hash_len, key, name, mask); hash.Add(key, itm); + list.Add(itm); trie.Add_obj(key, itm); hash_len = hash.Count(); return itm; @@ -41,15 +44,15 @@ public class Xol_vnt_regy { byte[] key = ary[i]; Xol_vnt_itm itm = (Xol_vnt_itm)hash.Get_by(key); if (itm == null) continue; // handle bad vnt from user input; EX: -{zh;bad|text}- int itm_mask = itm.Mask__vnt(); - rv = rv == 0 ? itm_mask : Enm_.Flip_int(true, rv, itm_mask); + rv = rv == 0 ? itm_mask : Bitmask_.Flip_int(true, rv, itm_mask); } return rv; } public boolean Mask__match_any(int lhs, int rhs) { // EX: match "zh-cn|zh-hans|zh-hant" against "zh|zh-hans|zh-hant" for (int i = 0; i < hash_len; ++i) { int mask = gplx.core.brys.Bit_.Get_flag(i); // 1,2,4,8 - if (Enm_.Has_int(lhs, mask)) { // lhs has mask; EX: for lhs=6, mask=1 -> 'n'; mask=2 -> 'y' - if (Enm_.Has_int(rhs, mask)) // if rhs does not have mask, return false; + if (Bitmask_.Has_int(lhs, mask)) { // lhs has mask; EX: for lhs=6, mask=1 -> 'n'; mask=2 -> 'y' + if (Bitmask_.Has_int(rhs, mask)) // if rhs does not have mask, return false; return true; } } diff --git a/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_regy_fxt.java b/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_regy_fxt.java new file mode 100644 index 000000000..28240dee3 --- /dev/null +++ b/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_regy_fxt.java @@ -0,0 +1,63 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*; +import gplx.xowa.parsers.vnts.*; +public class Xol_vnt_regy_fxt { + private final Xol_vnt_regy mgr = new_chinese(); + public String[] Make_lang_chain_cn() {return String_.Ary("zh-cn", "zh-hans", "zh-hant", "zh");} + public void Test_match_any(boolean expd, String[] lang_chain, String[]... vnt_chain_ary) { + int len = vnt_chain_ary.length; + int lang_flag = mgr.Mask__calc(Bry_.Ary(lang_chain)); + for (int i = 0; i < len; ++i) { + String[] vnt_chain = vnt_chain_ary[i]; // EX: -{zh;zh-hans;zh-hant}- + int vnt_flag = mgr.Mask__calc(Bry_.Ary(vnt_chain)); + Tfds.Eq(expd, mgr.Mask__match_any(vnt_flag, lang_flag), String_.Concat_with_str(";", vnt_chain) + "<>" + String_.Concat_with_str(";", lang_chain)); + } + } + public void Test_calc(String[] ary, int expd) { + Tfds.Eq(expd, mgr.Mask__calc(Bry_.Ary(ary))); + } + public void Test_sort(String[] vnt_ary, String[] expd) { + int vnt_len = vnt_ary.length; + Xop_vnt_rule_tkn[] rule_ary = new Xop_vnt_rule_tkn[vnt_len]; + for (int i = 0; i < vnt_len; ++i) + rule_ary[i] = new Xop_vnt_rule_tkn(Bry_.Empty, Bry_.new_u8(vnt_ary[i]), gplx.xowa.parsers.Xop_tkn_itm_.Ary_empty); + mgr.Mask__sort(rule_ary); + for (int i = 0; i < vnt_len; ++i) + vnt_ary[i] = String_.new_u8(rule_ary[i].Rule_lang()); + Tfds.Eq_ary_str(expd, vnt_ary); + } + public static Xol_vnt_regy new_chinese() { // REF.MW:/languages/classes/LanguageZh.php|LanguageZh|__construct + Xol_vnt_regy rv = new Xol_vnt_regy(); + new_chinese_vnt(rv, "zh" , Xol_vnt_dir_.Tid__none, "zh-hans", "zh-hant", "zh-cn", "zh-tw", "zh-hk", "zh-sg", "zh-mo", "zh-my"); + new_chinese_vnt(rv, "zh-hans" , Xol_vnt_dir_.Tid__uni , "zh-cn", "zh-sg", "zh-my"); + new_chinese_vnt(rv, "zh-hant" , Xol_vnt_dir_.Tid__uni , "zh-tw", "zh-hk", "zh-mo"); + new_chinese_vnt(rv, "zh-cn" , Xol_vnt_dir_.Tid__bi , "zh-hans", "zh-sg", "zh-my"); + new_chinese_vnt(rv, "zh-hk" , Xol_vnt_dir_.Tid__bi , "zh-hant", "zh-mo", "zh-tw"); + new_chinese_vnt(rv, "zh-my" , Xol_vnt_dir_.Tid__bi , "zh-hans", "zh-sg", "zh-cn"); + new_chinese_vnt(rv, "zh-mo" , Xol_vnt_dir_.Tid__bi , "zh-hant", "zh-hk", "zh-tw"); + new_chinese_vnt(rv, "zh-sg" , Xol_vnt_dir_.Tid__bi , "zh-hans", "zh-cn", "zh-my"); + new_chinese_vnt(rv, "zh-tw" , Xol_vnt_dir_.Tid__bi , "zh-hant", "zh-hk", "zh-mo"); + return rv; + } + private static void new_chinese_vnt(Xol_vnt_regy regy, String key, int dir, String... fallbacks) { + byte[] key_bry = Bry_.new_u8(key); + Xol_vnt_itm itm = regy.Add(key_bry, Bry_.Ucase__all(key_bry)); + itm.Init(dir, Bry_.Ary(fallbacks)); + } +} diff --git a/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_regy_tst.java b/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_regy_tst.java index 9220d5354..ed2d30256 100644 --- a/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_regy_tst.java +++ b/400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_regy_tst.java @@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*; -import org.junit.*; import gplx.xowa.parsers.vnts.*; +import org.junit.*; public class Xol_vnt_regy_tst { private final Xol_vnt_regy_fxt fxt = new Xol_vnt_regy_fxt(); @Test public void Calc() { @@ -51,34 +51,3 @@ public class Xol_vnt_regy_tst { fxt.Test_sort(String_.Ary("zh", "zh-hans", "zh-cn" ) , String_.Ary("zh-cn", "zh-hans", "zh")); } } -class Xol_vnt_regy_fxt { - private final Xol_vnt_regy mgr = new Xol_vnt_regy(); - public Xol_vnt_regy_fxt() { - String[] ary = Xop_vnt_parser_fxt.Vnts_chinese; - for (String itm : ary) - mgr.Add(Bry_.new_u8(itm), Bry_.Empty); - } - public String[] Make_lang_chain_cn() {return String_.Ary("zh-cn", "zh-hans", "zh-hant", "zh");} - public void Test_match_any(boolean expd, String[] lang_chain, String[]... vnt_chain_ary) { - int len = vnt_chain_ary.length; - int lang_flag = mgr.Mask__calc(Bry_.Ary(lang_chain)); - for (int i = 0; i < len; ++i) { - String[] vnt_chain = vnt_chain_ary[i]; // EX: -{zh;zh-hans;zh-hant}- - int vnt_flag = mgr.Mask__calc(Bry_.Ary(vnt_chain)); - Tfds.Eq(expd, mgr.Mask__match_any(vnt_flag, lang_flag), String_.Concat_with_str(";", vnt_chain) + "<>" + String_.Concat_with_str(";", lang_chain)); - } - } - public void Test_calc(String[] ary, int expd) { - Tfds.Eq(expd, mgr.Mask__calc(Bry_.Ary(ary))); - } - public void Test_sort(String[] vnt_ary, String[] expd) { - int vnt_len = vnt_ary.length; - Xop_vnt_rule_tkn[] rule_ary = new Xop_vnt_rule_tkn[vnt_len]; - for (int i = 0; i < vnt_len; ++i) - rule_ary[i] = new Xop_vnt_rule_tkn(Bry_.Empty, Bry_.new_u8(vnt_ary[i]), gplx.xowa.parsers.Xop_tkn_itm_.Ary_empty); - mgr.Mask__sort(rule_ary); - for (int i = 0; i < vnt_len; ++i) - vnt_ary[i] = String_.new_u8(rule_ary[i].Rule_lang()); - Tfds.Eq_ary_str(expd, vnt_ary); - } -} diff --git a/400_xowa/src/gplx/xowa/langs/vnts/converts/Xol_convert_mgr.java b/400_xowa/src/gplx/xowa/langs/vnts/converts/Xol_convert_mgr.java index a607e3400..b76481fdf 100644 --- a/400_xowa/src/gplx/xowa/langs/vnts/converts/Xol_convert_mgr.java +++ b/400_xowa/src/gplx/xowa/langs/vnts/converts/Xol_convert_mgr.java @@ -44,10 +44,11 @@ public class Xol_convert_mgr { if (new_wkr_idx == -1) throw Err_.new_("lang.vnt", "unknown vnt", "key", cur_vnt); this.cur_wkr_idx = new_wkr_idx; } - public byte[] Convert_text(Xowe_wiki wiki, byte[] src) {return Convert_text(wiki, src, 0, src.length);} - public byte[] Convert_text(Xowe_wiki wiki, byte[] src, int bgn, int end) { - Bry_bfr tmp_bfr = wiki.Utl__bfr_mkr().Get_m001(); - Xol_convert_wkr converter = wkr_ary[cur_wkr_idx]; + public byte[] Convert_text(byte[] src) {return Convert_text(src, 0, src.length);} + public byte[] Convert_text(byte[] src, int bgn, int end) {return Convert_text(cur_wkr_idx, src, bgn, end);} + public byte[] Convert_text(int vnt_idx, byte[] src, int bgn, int end) { + Bry_bfr tmp_bfr = Xoa_app_.Utl__bfr_mkr().Get_m001(); + Xol_convert_wkr converter = wkr_ary[vnt_idx]; converter.Convert_text(tmp_bfr, src, bgn, end); return tmp_bfr.To_bry_and_rls(); } diff --git a/400_xowa/src/gplx/xowa/langs/vnts/converts/Xol_convert_wkr.java b/400_xowa/src/gplx/xowa/langs/vnts/converts/Xol_convert_wkr.java index 4eee5bdbf..3973036d8 100644 --- a/400_xowa/src/gplx/xowa/langs/vnts/converts/Xol_convert_wkr.java +++ b/400_xowa/src/gplx/xowa/langs/vnts/converts/Xol_convert_wkr.java @@ -21,6 +21,8 @@ public class Xol_convert_wkr { private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs(); public Xol_convert_wkr(byte[] key) {this.key = key;} public byte[] Key() {return key;} private final byte[] key; + public void Add(byte[] src, byte[] trg) {trie.Add_obj(src, trg);} + public void Del(byte[] src) {trie.Del(src);} public boolean Convert_text(Bry_bfr bfr, byte[] src) {return Convert_text(bfr, src, 0, src.length);} public boolean Convert_text(Bry_bfr bfr, byte[] src, int bgn, int end) { int pos = bgn; @@ -47,7 +49,7 @@ public class Xol_convert_wkr { pos = trie.Match_pos(); } } - if (!matched) bfr.Add(src); // no convert; make sure to add back src, else bfr will be blank + if (!matched) bfr.Add_mid(src, bgn, end); // no convert; make sure to add back src, else bfr will be blank return matched; } public void Rebuild(Xol_convert_regy regy, byte[][] ary) { diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_itm.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_itm.java new file mode 100644 index 000000000..c2377a61b --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_itm.java @@ -0,0 +1,86 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +public class Mwh_atr_itm { + public Mwh_atr_itm + ( byte[] src, boolean valid, boolean repeated, boolean key_exists, int atr_bgn, int atr_end + , int key_bgn, int key_end, byte[] key_bry + , int val_bgn, int val_end, byte[] val_bry + , int eql_pos, int qte_tid + ) { + this.src = src; + this.valid = valid; this.repeated = repeated; this.key_exists = key_exists; + this.atr_bgn = atr_bgn; this.atr_end = atr_end; + this.key_bgn = key_bgn; this.key_end = key_end; this.key_bry = key_bry; + this.val_bgn = val_bgn; this.val_end = val_end; this.val_bry = val_bry; + this.eql_pos = eql_pos; this.qte_tid = qte_tid; + } + public byte[] Src() {return src;} private final byte[] src; + public boolean Valid() {return valid;} private final boolean valid; + public boolean Key_exists() {return key_exists;} private final boolean key_exists; + public boolean Repeated() {return repeated;} private final boolean repeated; + public int Atr_bgn() {return atr_bgn;} private int atr_bgn; + public int Atr_end() {return atr_end;} private int atr_end; + public int Key_bgn() {return key_bgn;} private final int key_bgn; + public int Key_end() {return key_end;} private final int key_end; + public byte[] Key_bry() {return key_bry;} private byte[] key_bry; + public byte Key_tid() {return key_tid;} public Mwh_atr_itm Key_tid_(byte v) {key_tid = v; return this;} private byte key_tid; + public int Val_bgn() {return val_bgn;} private final int val_bgn; + public int Val_end() {return val_end;} private final int val_end; + public byte[] Val_bry() {return val_bry;} private byte[] val_bry; + public int Eql_pos() {return eql_pos;} private final int eql_pos; + public int Qte_tid() {return qte_tid;} private final int qte_tid; + public Mwh_atr_itm Atr_rng(int bgn, int end) {this.atr_bgn = bgn; this.atr_end = end; return this;} + public String Val_as_str() {return String_.new_u8(Val_as_bry());} + public byte[] Val_as_bry() {if (val_bry == null) val_bry = Bry_.Mid(src, val_bgn, val_end); return val_bry;} // NOTE: val_bry is cached + public byte[] Val_as_bry__blank_to_null() {byte[] rv = Val_as_bry(); return Bry_.Len_eq_0(rv) ? null : rv;} + public int Val_as_int_or(int or) {return val_bry == null ? Bry_.To_int_or__lax(src, val_bgn, val_end, or) : Bry_.To_int_or(val_bry, or);} + public boolean Val_as_bool_by_int() {return Val_as_int_or(0) == 1;} + public boolean Val_as_bool() {return Bry_.Eq(Bry_.Lcase__all(Val_as_bry()), Bool_.True_bry);} + public static final Mwh_atr_itm[] Ary_empty = new Mwh_atr_itm[0]; + public static final int Atr_tid__invalid = 1, Atr_tid__repeat = 2, Atr_tid__pair = 4, Atr_tid__name = 8; // NOTE: id order is important; see above; + public static final int Qte_tid__none = 0, Qte_tid__apos = 1, Qte_tid__qute = 2; + public static final int Mask__qte__none = 0, Mask__qte__apos = 1, Mask__qte_qute = 2; + public static final int + Mask__valid = 8 + , Mask__repeated = 16 + , Mask__key_exists = 32 + , Mask__val_made = 64 + ; + public static final boolean Mask__valid__n = false, Mask__valid__y = true; + public static final boolean Mask__key_exists__n = false, Mask__key_exists__y = true; + public static final boolean Mask__repeated__n = false, Mask__repeated__y = true; + public static final boolean Mask__val_made__n = false, Mask__val_made__y = true; + public static int Calc_atr_utl(int qte_tid, boolean valid, boolean repeated, boolean key_exists, boolean val_made) { + int rv = qte_tid; + if (valid) rv |= Mwh_atr_itm.Mask__valid; + if (repeated) rv |= Mwh_atr_itm.Mask__repeated; + if (key_exists) rv |= Mwh_atr_itm.Mask__key_exists; + if (val_made) rv |= Mwh_atr_itm.Mask__val_made; + return rv; + } + public static int Calc_qte_tid(int val) { + return val & ((1 << 3) - 1); + } + public static byte Calc_qte_byte(int[] data_ary, int idx) { + int val = data_ary[idx + Mwh_atr_mgr.Idx_atr_utl]; + int qte_tid = (val & ((1 << 3) - 1)); + return qte_tid == Qte_tid__apos ? Byte_ascii.Apos : Byte_ascii.Quote; + } +// public static final byte Key_tid_generic = 0, Key_tid_id = 1, Key_tid_style = 2, Key_tid_role = 3; +} diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_mgr.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_mgr.java new file mode 100644 index 000000000..e00cd07dc --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_mgr.java @@ -0,0 +1,98 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import gplx.core.brys.*; +public class Mwh_atr_mgr { + private final int data_max_orig; + public Mwh_atr_mgr(int max) { + this.data_max_orig = max * Idx__mult; + this.Max_(max); + } + public int Len() {return itm_len;} private int itm_len; + public int[] Data_ary() {return data_ary;} private int[] data_ary; private int data_max; + public byte[][] Text_ary() {return text_ary;} private byte[][] text_ary; + private void Max_(int len) { + this.data_max = len * Idx__mult; + this.data_ary = new int[data_max]; + this.text_ary = new byte[len * Text__mult][]; + this.itm_len = 0; + } + public void Clear() { + if (data_max == data_max_orig) + itm_len = 0; + else + Max_(data_max_orig / Idx__mult); + } + public int Add(int nde_uid, int nde_tid, boolean valid, boolean repeated, boolean key_exists, int atr_bgn, int atr_end, int key_bgn, int key_end, byte[] key_bry, int eql_pos, int qte_tid, int val_bgn, int val_end, byte[] val_bry) { + int data_idx = itm_len * Idx__mult; + if (data_idx == data_max) { + int new_data_max = data_max == 0 ? Idx__mult : data_max * 2; + int[] new_data_ary = new int[new_data_max]; + Int_.Ary_copy_to(data_ary, data_max, data_ary); + this.data_ary = new_data_ary; + + int text_max = text_ary.length; + int new_text_max = data_max == 0 ? Text__mult : text_max * 2; + byte[][] new_text_ary = new byte[new_text_max][]; + for (int i = 0; i < text_max; ++i) + new_text_ary[i] = text_ary[i]; + this.text_ary = new_text_ary; + + this.data_max = new_data_max; + } + boolean val_made = false; + int text_idx = itm_len * Text__mult; + text_ary[text_idx] = key_bry; + if (val_bry != null) { + text_ary[text_idx + 1] = val_bry; + val_made = true; + } + data_ary[data_idx + Idx_nde_uid] = nde_uid; + data_ary[data_idx + Idx_nde_tid] = nde_tid; + data_ary[data_idx + Idx_atr_utl] = Mwh_atr_itm.Calc_atr_utl(qte_tid, valid, repeated, key_exists, val_made); + data_ary[data_idx + Idx_atr_bgn] = atr_bgn; + data_ary[data_idx + Idx_atr_end] = atr_end; + data_ary[data_idx + Idx_key_bgn] = key_bgn; + data_ary[data_idx + Idx_key_end] = key_end; + data_ary[data_idx + Idx_val_bgn] = val_bgn; + data_ary[data_idx + Idx_val_end] = val_end; + data_ary[data_idx + Idx_eql_pos] = eql_pos; + return itm_len++; + } + public void Set_repeated(int atr_uid) { + int atr_utl_idx = (atr_uid * Idx__mult) + Idx_atr_utl; + int atr_utl = data_ary[atr_utl_idx]; + int val_bry_exists = atr_utl & Atr_utl__val_bry_exists; + data_ary[atr_utl_idx] = Mwh_atr_itm.Atr_tid__repeat | val_bry_exists; + } + public static final int + Idx_nde_uid = 0 + , Idx_nde_tid = 1 + , Idx_atr_utl = 2 + , Idx_atr_bgn = 3 + , Idx_atr_end = 4 + , Idx_key_bgn = 5 + , Idx_key_end = 6 + , Idx_val_bgn = 7 + , Idx_val_end = 8 + , Idx_eql_pos = 9 + , Idx__mult = 10 + ; + public static final int Text__mult = 2; + public static final int Atr_utl__val_bry_exists = 16; +} diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_mgr_tst.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_mgr_tst.java new file mode 100644 index 000000000..3e666e16d --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_mgr_tst.java @@ -0,0 +1,39 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import org.junit.*; +public class Mwh_atr_mgr_tst { + private final Mwh_atr_mgr_fxt fxt = new Mwh_atr_mgr_fxt(); + @Test public void Atr_utl_make() { + // key="val" + fxt.Test_atr_utl_make(Mwh_atr_itm.Qte_tid__qute, Mwh_atr_itm.Mask__valid__y, Mwh_atr_itm.Mask__repeated__n, Mwh_atr_itm.Mask__key_exists__y, Mwh_atr_itm.Mask__val_made__n, 42); + // key=val key=val + fxt.Test_atr_utl_make(Mwh_atr_itm.Qte_tid__none, Mwh_atr_itm.Mask__valid__y, Mwh_atr_itm.Mask__repeated__y, Mwh_atr_itm.Mask__key_exists__y, Mwh_atr_itm.Mask__val_made__y, 120); + } +} +class Mwh_atr_mgr_fxt { + public void Test_atr_utl_make(int qte_tid, boolean valid, boolean repeated, boolean key_exists, boolean val_made, int expd) { + int atr_utl = Mwh_atr_itm.Calc_atr_utl(qte_tid, valid, repeated, key_exists, val_made); + Tfds.Eq_int(expd, atr_utl); + Tfds.Eq_int(qte_tid, Mwh_atr_itm.Calc_qte_tid(atr_utl)); + Tfds.Eq_bool(valid, (atr_utl & Mwh_atr_itm.Mask__valid) == Mwh_atr_itm.Mask__valid); + Tfds.Eq_bool(repeated, (atr_utl & Mwh_atr_itm.Mask__repeated) == Mwh_atr_itm.Mask__repeated); + Tfds.Eq_bool(key_exists, (atr_utl & Mwh_atr_itm.Mask__key_exists) == Mwh_atr_itm.Mask__key_exists); + Tfds.Eq_bool(val_made, (atr_utl & Mwh_atr_itm.Mask__val_made) == Mwh_atr_itm.Mask__val_made); + } +} diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser.java new file mode 100644 index 000000000..f5af870aa --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser.java @@ -0,0 +1,457 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import gplx.core.primitives.*; +import gplx.xowa.parsers.xndes.*; // for brys: , , , +public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATTRIBS_REGEX + private static final byte Area__invalid = 0, Area__atr_limbo = 1, Area__key = 2, Area__eql_limbo = 3, Area__val_limbo = 4, Area__val_quote = 5, Area__val_naked = 6; + private final Hash_adp_bry repeated_atrs_hash = Hash_adp_bry.ci_a7(); // ASCII:xnde_atrs + private final Mwh_atr_mgr atr_mgr = new Mwh_atr_mgr(16); + private final Bry_bfr key_bfr = Bry_bfr.new_(), val_bfr = Bry_bfr.new_(); + private byte area = Area__atr_limbo; + private int atr_bgn = -1, key_bgn = -1, key_end = -1, eql_pos = -1, val_bgn = -1, val_end = -1; + private byte qte_byte = Byte_ascii.Null; + private boolean key_bfr_on = false, val_bfr_on = false, ws_is_before_val = false; + private int nde_uid, nde_tid; + public Bry_obj_ref Bry_obj() {return bry_ref;} private final Bry_obj_ref bry_ref = Bry_obj_ref.null_(); + public int Nde_end_tid() {return nde_end_tid;} private int nde_end_tid; + public int Parse(Mwh_doc_wkr wkr, int nde_uid, int nde_tid, byte[] src, int src_bgn, int src_end) { + this.nde_uid = nde_uid; this.nde_tid = nde_tid; + this.nde_end_tid = Mwh_doc_parser.Nde_end_tid__invalid; + area = Area__atr_limbo; + boolean prv_is_ws = false; + int pos = src_bgn; + boolean loop = true; + while (loop) { + if (pos == src_end) { + if (area == Area__val_quote) { // quote still open + int reset_pos = Bry_find_.Find_fwd(src, Byte_ascii.Space, val_bgn, src_end); // try to find 1st space within quote; EX:"a='b c=d" should try to reset at c=d + boolean reset_found = reset_pos != Bry_find_.Not_found; + area = Area__invalid; val_end = reset_found ? reset_pos : src_end; + Make(src, val_end); // create invalid atr + if (reset_found) { // space found; resume from text after space; EX: "a='b c=d"; PAGE:en.w:Aubervilliers DATE:2014-06-25 + pos = Bry_find_.Find_fwd_while_not_ws(src, reset_pos, src_end); // skip ws + atr_bgn = -1; + area = Area__atr_limbo; + val_bfr.Clear(); + val_bfr_on = false; + ws_is_before_val = false; + continue; + } + else + break; + } + else { + if (area == Area__val_limbo) // NOTE: handle dangling "k=" else will be "k"; EX: x> ; PAGE:en.s:Notes_by_the_Way/Chapter_2; DATE:2015-01-31 + area = Area__invalid; + if (atr_bgn != -1) { // atr_bgn will be -1 if atrs ends on quoted (EX:"a='b'"); else, pending atr that needs to be processed; EX: "a=b" b wil be in bfr + val_end = src_end; + Make(src, src_end); + } + break; + } + } + else if (pos > src_end) + break; + byte b = src[pos]; + switch (area) { + case Area__atr_limbo: // 1st area after node_name or attribute + switch (b) { + // gt -> stop iterating + case Byte_ascii.Gt: + nde_end_tid = Mwh_doc_parser.Nde_end_tid__gt; + loop = false; + break; + // slash -> check for "/>" or " / " + case Byte_ascii.Slash: + int nxt_pos = pos + 1; + if (nxt_pos == src_end) { + pos = nxt_pos; + return Mwh_doc_parser.Nde_end_tid__invalid; + } + else if (src[nxt_pos] == Byte_ascii.Gt) { + nde_end_tid = Mwh_doc_parser.Nde_end_tid__inline; + pos = nxt_pos; + loop = false; + } + else { + area = Area__invalid; atr_bgn = pos; + } + break; + // ws -> ignore; skip any ws in atr_limbo; note that once a non-ws char is encountered, it will immediately go into another area + case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: + if (atr_bgn == -1) atr_bgn = pos; + break; + // alphanum -> enter Area__key + case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4: + case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9: + case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E: + case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J: + case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O: + case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T: + case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z: + case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e: + case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j: + case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o: + case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t: + case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z: + case Byte_ascii.Colon: + area = Area__key; + if (atr_bgn == -1) atr_bgn = pos; + key_bgn = pos; + break; + // lt -> check for + case Byte_ascii.Lt: // handle "" + int gt_pos = Xnde_find_gt(src, pos, src_end); + if (gt_pos == Bry_find_.Not_found) { + area = Area__invalid; + atr_bgn = pos; + } + else + pos = gt_pos; // position after ">"; note that there is ++pos below and loop will continue at gt_pos + 1 (next character after) + break; + // rest -> invalid + default: // quote and other non-valid key characters are invalid until next space; EX: " src_end invalid area + case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: + Make(src, pos); + area = Area__atr_limbo; + break; + // rest -> continue eating up invalid chars + default: + break; + } + break; + case Area__key: + switch (b) { + // alphanum -> valid key chars + case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4: + case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9: + case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E: + case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J: + case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O: + case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T: + case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z: + case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e: + case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j: + case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o: + case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t: + case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z: + case Byte_ascii.Colon: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Underline: + if (key_bfr_on) key_bfr.Add_byte(b); + break; + // ws -> src_end key + case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: + area = Area__eql_limbo; + key_end = pos; + break; + // eq -> src_end key; skip Area_eq and go to Area_val_bgn + case Byte_ascii.Eq: + area = Area__val_limbo; + key_end = eql_pos = pos; + break; + // lt -> check for + case Byte_ascii.Lt: + int gt_pos = Xnde_find_gt(src, pos, src_end); + if (gt_pos == Bry_find_.Not_found) // "<" should not be in key; EX: "ke enter invalid + default: + area = Area__invalid; + break; + } + break; + case Area__eql_limbo: + switch (b) { + // ws -> skip + case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip ws + if (key_end == -1) { // EX: "a = b"; key_end != -1 b/c 1st \s sets key_end; EX: "a b = c"; key_end + val_end = pos - 1; + Make(src, pos); + area = Area__atr_limbo; + continue; + } + break; + // eq -> enter Area__eq + case Byte_ascii.Eq: + eql_pos = pos; + area = Area__val_limbo; + break; + // rest -> make atr and enter limbo + case Byte_ascii.Quote: case Byte_ascii.Apos: // FUTURE: previous word was key + default: // NOTE: added this late; xml_parser was not handling "line start=3" DATE:2013-07-03 + val_end = pos - 1; + Make(src, pos); + area = Area__atr_limbo; + continue; + } + break; + case Area__val_limbo: + switch (b) { + // ws -> skip + case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: + ws_is_before_val = true; + break; + // quote -> enter Area_val_quote + case Byte_ascii.Quote: case Byte_ascii.Apos: + area = Area__val_quote; qte_byte = b; prv_is_ws = false; + val_bgn = pos + 1; + break; + // alphanum -> enter Area_val_raw + case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4: + case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9: + case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E: + case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J: + case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O: + case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T: + case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z: + case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e: + case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j: + case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o: + case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t: + case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z: + case Byte_ascii.Colon: + case Byte_ascii.Hash: + area = Area__val_naked; + val_bgn = pos; + break; + // lt -> check for + case Byte_ascii.Lt: + int gt_pos = Xnde_find_gt(src, pos, src_end); + if (gt_pos == Bry_find_.Not_found) + area = Area__invalid; + else + pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after) + break; + // rest -> ignore (?) + default: + break; + } + break; + case Area__val_quote: { // EX: "'val' " in "key = 'val'" + switch (b) { + // quote: check if same as opening quote + case Byte_ascii.Quote: case Byte_ascii.Apos: + if (qte_byte == b) { // quote closes val + val_end = pos; + Make(src, pos + 1); // NOTE: set atr_end *after* quote + } + else { // quote is just char; EX: title="1 o'clock" or title='The "C" way' + prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char + } + break; + // lt -> check for ; EX: + case Byte_ascii.Lt: + if (!val_bfr_on) {val_bfr.Add_mid(src, val_bgn, pos); val_bfr_on = true;} // INLINE: val_bfr.init + int gt_pos = Xnde_find_gt(src, pos, src_end); + if (gt_pos == Bry_find_.Not_found) + // area = Area__invalid; // DELETE: 2012-11-13; unpaired < should not mark atr invalid; EX: style='margin:1em convert all ws to \s; only allow 1 ws at any point in time + case Byte_ascii.Nl: case Byte_ascii.Tab: case Byte_ascii.Cr: // REF.MW:Sanitizer.php|decodeTagAttributes $value = preg_replace( '/[\t\r\n ]+/', ' ', $value ); + case Byte_ascii.Space: + if (!val_bfr_on) {val_bfr.Add_mid(src, val_bgn, pos); val_bfr_on = true;} // INLINE: val_bfr.init + if (prv_is_ws) {} // noop; only allow one ws at a time; EX: "a b" -> "a b"; "a\n\nb" -> "a b" + else { + prv_is_ws = true; val_bfr.Add_byte(Byte_ascii.Space); + } + break; + // rest -> add to val + default: + prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char + break; + } + break; + } + case Area__val_naked: // no quotes; EX:a=bcd + switch (b) { + // alphanum -> continue reading + case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4: + case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9: + case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E: + case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J: + case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O: + case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T: + case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z: + case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e: + case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j: + case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o: + case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t: + case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z: + case Byte_ascii.Bang: case Byte_ascii.Hash: case Byte_ascii.Dollar: case Byte_ascii.Percent: + case Byte_ascii.Amp: case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star: + case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Slash: + case Byte_ascii.Colon: case Byte_ascii.Semic: case Byte_ascii.Gt: + case Byte_ascii.Question: case Byte_ascii.At: case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end: + case Byte_ascii.Pow: case Byte_ascii.Underline: case Byte_ascii.Tick: + case Byte_ascii.Curly_bgn: case Byte_ascii.Pipe: case Byte_ascii.Curly_end: case Byte_ascii.Tilde: + break; + // ws -> src_end atr + case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: + val_end = pos; + Make(src, pos); + break; + case Byte_ascii.Eq: // EX:"a= b=c" or "a=b=c"; PAGE:en.w:2013_in_American_television + if (ws_is_before_val) { // "a= b=c"; discard 1st and resume at 2nd + int old_val_bgn = val_bgn; + area = Area__invalid; Make(src, val_bgn); // invalidate cur atr; EX:"a=" + atr_bgn = key_bgn = old_val_bgn; // reset atr / key to new atr; EX: "b" + key_end = pos; + area = Area__val_limbo; // set area to val_bgn (basically, put after =) + } + else // "a=b=c"; discard all + area = Area__invalid; + break; + case Byte_ascii.Lt: + val_end = pos; + Make(src, pos); + --pos; // NOTE: --pos to include "<" as part of next atr; above ws excludes from next atr + break; + default: + area = Area__invalid; + break; + } + break; + } + ++pos; + } + + // iterate atrs and notify + int len = atr_mgr.Len(); + int[] data_ary = atr_mgr.Data_ary(); + byte[][] text_ary = atr_mgr.Text_ary(); + for (int j = 0; j < len; ++j) { + int itm_idx = j * Mwh_atr_mgr.Idx__mult; + byte[] key_bry = text_ary[j * Mwh_atr_mgr.Text__mult]; + byte[] val_bry_manual = null; + int atr_utl = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_utl]; + boolean atr_valid = (atr_utl & Mwh_atr_itm.Mask__valid) == Mwh_atr_itm.Mask__valid; + boolean repeated = (atr_utl & Mwh_atr_itm.Mask__repeated) == Mwh_atr_itm.Mask__repeated; + boolean key_exists = (atr_utl & Mwh_atr_itm.Mask__key_exists) == Mwh_atr_itm.Mask__key_exists; + boolean val_made = (atr_utl & Mwh_atr_itm.Mask__val_made) == Mwh_atr_itm.Mask__val_made; + if (val_made) + val_bry_manual = text_ary[(j * Mwh_atr_mgr.Text__mult) + 1]; + wkr.On_atr_each(this, src, nde_tid, atr_valid, repeated, key_exists, key_bry, val_bry_manual, data_ary, itm_idx); + } + atr_mgr.Clear(); + repeated_atrs_hash.Clear(); + + return pos; + } + public int Xnde_find_gt_find(byte[] src, int pos, int end) { + bry_ref.Val_(null); + byte b = src[pos]; + if (b == Byte_ascii.Slash && pos + 1 < end) { // if " + key_exists = true; + else { // not a pair; EX: "" + if (key_end == -1) key_end = val_end; // NOTE: key_end == -1 when eos; EX: "a" would have key_bgn = 0; key_end = -1; val_end = 1 DATE:2014-07-03 + val_bgn = val_end = -1; + } + key_bry = key_bfr_on ? key_bfr.Xto_bry_and_clear() : Bry_.Mid(src, key_bgn, key_end); // always make key_bry; needed for repeated_atrs as well as key_tid + if (val_bfr_on) val_bry = val_bfr.Xto_bry_and_clear(); + } + else { + atr_valid = false; + key_bry = Bry_.Empty; + key_bfr.Clear(); + if (val_bgn == -1) val_bgn = atr_bgn; + } + int qte_tid = Mwh_atr_itm.Mask__qte__none; + if (qte_byte != Byte_ascii.Null) + qte_tid = qte_byte == Byte_ascii.Quote ? Mwh_atr_itm.Mask__qte_qute : Mwh_atr_itm.Mask__qte__apos; + int atr_uid = atr_mgr.Add(nde_uid, nde_tid, atr_valid, false, key_exists, atr_bgn, atr_end, key_bgn, key_end, key_bry, eql_pos, qte_tid, val_bgn, val_end, val_bry); + + // handle repeated atrs + if (atr_valid) { + int repeated_uid = repeated_atrs_hash.Get_as_int_or(key_bry, -1); + if (repeated_uid != -1) { + repeated_atrs_hash.Del(key_bry); + atr_mgr.Set_repeated(repeated_uid); + } + repeated_atrs_hash.Add_bry_int(key_bry, atr_uid); + } + + // reset temp variables + area = Area__atr_limbo; qte_byte = Byte_ascii.Null; + atr_bgn = key_bgn = val_bgn = key_end = val_end = eql_pos = -1; + key_bfr_on = val_bfr_on = ws_is_before_val = false; + } + private static final Hash_adp_bry xnde_hash = Hash_adp_bry.ci_a7() + .Add_bry_bry(Xop_xnde_tag_.Tag_nowiki.Name_bry()) + .Add_bry_bry(Xop_xnde_tag_.Tag_noinclude.Name_bry()) + .Add_bry_bry(Xop_xnde_tag_.Tag_includeonly.Name_bry()) + .Add_bry_bry(Xop_xnde_tag_.Tag_onlyinclude.Name_bry()) + ; + public static final int Key_tid__unknown = -1; +} diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser_fxt.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser_fxt.java new file mode 100644 index 000000000..ee4fed408 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser_fxt.java @@ -0,0 +1,99 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +class Mwh_atr_parser_fxt { + private final Bry_bfr expd_bfr = Bry_bfr.new_(), actl_bfr = Bry_bfr.new_(); + private final Mwh_atr_parser parser = new Mwh_atr_parser(); + private final Mwh_doc_wkr__atr_bldr wkr = new Mwh_doc_wkr__atr_bldr(); + public Mwh_atr_itm Make_pair(String key, String val) {return new Mwh_atr_itm(Bry_.Empty, Bool_.Y, Bool_.N, Bool_.Y, -1, -1, -1, -1, Bry_.new_u8(key) , -1, -1, Bry_.new_u8(val) , -1, -1);} + public Mwh_atr_itm Make_name(String key) {return new Mwh_atr_itm(Bry_.Empty, Bool_.Y, Bool_.N, Bool_.N, -1, -1, -1, -1, Bry_.new_u8(key) , -1, -1, null , -1, -1);} + public Mwh_atr_itm Make_fail(int bgn, int end) {return new Mwh_atr_itm(Bry_.Empty, Bool_.N, Bool_.N, Bool_.N, bgn, end, -1, -1, null , -1, -1, null , -1, -1);} + public void Test_val_as_int(String raw, int expd) { + byte[] src = Bry_.new_u8(raw); + Mwh_atr_itm itm = new Mwh_atr_itm(src, true, false, false, 0, src.length, -1, -1, null, 0, src.length, src, -1, -1); + Tfds.Eq_int(expd, itm.Val_as_int_or(-1)); + } + public void Test_parse(String raw, Mwh_atr_itm... expd) { + Mwh_atr_itm[] actl = Exec_parse(raw); + Test_print(expd, actl); + } + private Mwh_atr_itm[] Exec_parse(String raw) { + byte[] bry = Bry_.new_u8(raw); + parser.Parse(wkr, -1, -1, bry, 0, bry.length); + return wkr.To_atr_ary(); + } + public void Test_print(Mwh_atr_itm[] expd_ary, Mwh_atr_itm[] actl_ary) { + int expd_len = expd_ary.length; + int actl_len = actl_ary.length; + int len = expd_len > actl_len ? expd_len : actl_len; + for (int i = 0; i < len; ++i) { + To_bfr(expd_bfr, i < expd_len ? expd_ary[i] : null, actl_bfr, i < actl_len ? actl_ary[i] : null); + } + Tfds.Eq_str_lines(expd_bfr.Xto_str_and_clear(), actl_bfr.Xto_str_and_clear()); + } + private void To_bfr(Bry_bfr expd_bfr, Mwh_atr_itm expd_itm, Bry_bfr actl_bfr, Mwh_atr_itm actl_itm) { + To_bfr__main(expd_bfr, expd_itm); + To_bfr__main(actl_bfr, actl_itm); + To_bfr__head(expd_bfr, expd_itm); + To_bfr__head(actl_bfr, actl_itm); + if (expd_itm.Atr_bgn() != -1) { + To_bfr__atr_rng(expd_bfr, expd_itm); + To_bfr__atr_rng(actl_bfr, actl_itm); + } + } + private void To_bfr__head(Bry_bfr bfr, Mwh_atr_itm itm) { + if (itm == null) return; + bfr.Add_str_a7("head:").Add_yn(itm.Valid()).Add_byte_semic().Add_yn(itm.Repeated()).Add_byte_semic().Add_yn(itm.Key_exists()).Add_byte_nl(); + } + private void To_bfr__main(Bry_bfr bfr, Mwh_atr_itm itm) { + if (itm == null) return; + if (itm.Valid()) { + bfr.Add_str_a7("key:").Add(itm.Key_bry()).Add_byte_nl(); + bfr.Add_str_a7("val:").Add(itm.Val_as_bry()).Add_byte_nl(); + } +// else +// To_bfr__atr_rng(bfr, itm); + } + private void To_bfr__atr_rng(Bry_bfr bfr, Mwh_atr_itm itm) { + if (itm == null) return; + bfr.Add_str_a7("rng:").Add_int_variable(itm.Atr_bgn()).Add_byte_semic().Add_int_variable(itm.Atr_end()).Add_byte_nl(); + } +} +class Mwh_doc_wkr__atr_bldr implements Mwh_doc_wkr { + private final List_adp list = List_adp_.new_(); + public Hash_adp_bry Nde_regy() {return null;} + public void On_atr_each(Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] data_ary, int itm_idx) { + int atr_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_bgn]; + int atr_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_end]; + int key_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_key_bgn]; + int key_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_key_end]; + int val_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn]; + int val_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_val_end]; + int eql_pos = data_ary[itm_idx + Mwh_atr_mgr.Idx_eql_pos]; + int qte_tid = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_utl]; + qte_tid = Mwh_atr_itm.Calc_qte_tid(qte_tid); + Mwh_atr_itm atr = new Mwh_atr_itm(src, valid, repeated, key_exists, atr_bgn, atr_end, key_bgn, key_end, key_bry, val_bgn, val_end, val_bry_manual, eql_pos, qte_tid); + list.Add(atr); + } + public void On_txt_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {} + public void On_nde_head_bgn(Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end) {} + public void On_nde_head_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline) {} + public void On_nde_tail_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {} + public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {} + public Mwh_atr_itm[] To_atr_ary() {return (Mwh_atr_itm[])list.To_ary_and_clear(Mwh_atr_itm.class);} +} diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser_tst.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser_tst.java new file mode 100644 index 000000000..b742db996 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser_tst.java @@ -0,0 +1,63 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import org.junit.*; +public class Mwh_atr_parser_tst { + private final Mwh_atr_parser_fxt fxt = new Mwh_atr_parser_fxt(); + @Test public void Pair__quote__double() {fxt.Test_parse("a=\"b\"" , fxt.Make_pair("a" , "b"));} + @Test public void Pair__quote__single() {fxt.Test_parse("a='b'" , fxt.Make_pair("a" , "b"));} + @Test public void Pair__quote__none() {fxt.Test_parse("a=b" , fxt.Make_pair("a" , "b"));} + @Test public void Pair__empty() {fxt.Test_parse("a=''" , fxt.Make_pair("a" , ""));} + @Test public void Pair__key_w_underline() {fxt.Test_parse("a_b=c" , fxt.Make_pair("a_b" , "c"));} + + @Test public void Name__quote__none() {fxt.Test_parse("b" , fxt.Make_name("b"));} + @Test public void Name__ws() {fxt.Test_parse(" b " , fxt.Make_name("b"));} // PURPOSE:discovered while writing test for ref's "lower-alpha" DATE:2014-07-03 + @Test public void Name__mult() {fxt.Test_parse("a b1 c" , fxt.Make_name("a"), fxt.Make_name("b1"), fxt.Make_name("c"));} + + @Test public void Fail__key_w_plus() {fxt.Test_parse("a+b" , fxt.Make_fail(0, 3));} + @Test public void Fail__key_w_plus__many() {fxt.Test_parse("a+b c=d" , fxt.Make_fail(0, 3) , fxt.Make_pair("c", "d"));} + @Test public void Fail__val_w_plus() {fxt.Test_parse("a=b+c" , fxt.Make_fail(0, 5));} + @Test public void Fail__recover() {fxt.Test_parse("* a=b" , fxt.Make_fail(0, 1) , fxt.Make_pair("a", "b"));} // PURPOSE: * is invalid, but should not stop parsing of a=b + @Test public void Fail__incomplete() {fxt.Test_parse("a= c=d" , fxt.Make_fail(0, 3) , fxt.Make_pair("c", "d"));} // PURPOSE: discard xatr if incomplete and followed by valid atr; PAGE:en.w:2013_in_American_television DATE:2014-09-25 + @Test public void Fail__incomplete_2() {fxt.Test_parse("a=c=d" , fxt.Make_fail(0, 5));} // PURPOSE: variation of above; per MW regex, missing space invalidates entire attribute; DATE:2014-09-25 + @Test public void Fail__incomplete_pair() {fxt.Test_parse("a= b=" , fxt.Make_fail(0, 3) , fxt.Make_fail(3, 5));} // PURPOSE: "b=" should be invalid not a kv of "b" = "b"; PAGE:en.s:Notes_by_the_Way/Chapter_2; DATE:2015-01-31 + + @Test public void Dangling_eos() {fxt.Test_parse("a='b' c='d" , fxt.Make_pair("a", "b") , fxt.Make_fail(5, 10));} // PURPOSE: handle dangling quote at eos; PAGE:en.w:Aubervilliers DATE:2014-06-25 + @Test public void Dangling_bos() {fxt.Test_parse("a='b c=d" , fxt.Make_fail(0, 4) , fxt.Make_pair("c", "d"));}// PURPOSE: handle dangling quote at bos; resume at next valid atr; PAGE:en.w:Aubervilliers DATE:2014-06-25 + + @Test public void Ws__ini() {fxt.Test_parse(" a='b'" , fxt.Make_pair("a", "b").Atr_rng(0, 6));} + @Test public void Ws__end() {fxt.Test_parse(" a='b' c='d'" , fxt.Make_pair("a", "b").Atr_rng(0, 6), fxt.Make_pair("c", "d").Atr_rng(6, 12));} + @Test public void Ws() {fxt.Test_parse("a = 'b'" , fxt.Make_pair("a", "b"));} // PURPOSE: fix wherein multiple space was causing "a=a"; PAGE:fr.s:La_Sculpture_dans_les_cimetières_de_Paris/Père-Lachaise; DATE:2014-01-18 + + @Test public void Many__quote__apos() {fxt.Test_parse("a='b' c='d' e='f'" , fxt.Make_pair("a", "b"), fxt.Make_pair("c", "d"), fxt.Make_pair("e", "f"));} + @Test public void Many__naked() {fxt.Test_parse("a=b c=d e=f" , fxt.Make_pair("a", "b"), fxt.Make_pair("c", "d"), fxt.Make_pair("e", "f"));} + + @Test public void Val__ws__nl() {fxt.Test_parse("a='b\nc'" , fxt.Make_pair("a", "b c"));} + @Test public void Val__ws__mult() {fxt.Test_parse("a='b c'" , fxt.Make_pair("a", "b c"));} + @Test public void Val__ws__mult_mult() {fxt.Test_parse("a='b c d'" , fxt.Make_pair("a", "b c d"));} // PURPOSE: fix wherein 1st-gobble gobbled rest of spaces (was b cd) + @Test public void Val__apos() {fxt.Test_parse("a=\"b c'd\"" , fxt.Make_pair("a", "b c'd"));} // PURPOSE: fix wherein apos was gobbled up; PAGE:en.s:Alice's_Adventures_in_Wonderland; DATE:2013-11-22 + @Test public void Val__apos_2() {fxt.Test_parse("a=\"b'c d\"" , fxt.Make_pair("a", "b'c d"));} // PURPOSE: fix wherein apos was causing "'b'c d"; PAGE:en.s:Grimm's_Household_Tales,_Volume_1; DATE:2013-12-22 + + @Test public void Nowiki__val() {fxt.Test_parse("a='b'" , fxt.Make_pair("a", "b").Atr_rng(0, 13));} + @Test public void Nowiki__key() {fxt.Test_parse("a=b" , fxt.Make_pair("a", "b").Atr_rng(8, 11));} + @Test public void Nowiki__key_2() {fxt.Test_parse("abc=d" , fxt.Make_pair("abc", "d").Atr_rng(0, 22));} + @Test public void Nowiki__key_3() {fxt.Test_parse("a=\"b\"" , fxt.Make_pair("a", "b").Atr_rng(0, 22));} // EX:fr.w:{{Portail|Transpédia|Californie}} + @Test public void Nowiki__quote() {fxt.Test_parse("a=\"bcdef\"", fxt.Make_pair("a", "bcdef"));} + + @Test public void Val__as_int() {fxt.Test_val_as_int("-123" , -123);} +} diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_itm.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_itm.java new file mode 100644 index 000000000..0949678fb --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_itm.java @@ -0,0 +1,25 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +class Mwh_doc_itm { + public Mwh_doc_itm(int itm_tid, int nde_tid, byte[] itm_bry) {this.itm_tid = itm_tid; this.itm_bry = itm_bry; this.nde_tid = nde_tid;} + public int Itm_tid() {return itm_tid;} private final int itm_tid; + public byte[] Itm_bry() {return itm_bry;} private final byte[] itm_bry; + public int Nde_tid() {return nde_tid;} private final int nde_tid; + public static final int Itm_tid__txt = 0, Itm_tid__nde_head = 1, Itm_tid__nde_tail = 2, Itm_tid__comment = 3; +} diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_mgr.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_mgr.java new file mode 100644 index 000000000..91f900d9b --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_mgr.java @@ -0,0 +1,62 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +class Mwh_doc_mgr { + private final int data_max_orig; + public Mwh_doc_mgr(int max) { + this.data_max_orig = max * Idx__mult; + this.Max_(max); + } + public int Len() {return itm_len;} private int itm_len; + public int[] Data_ary() {return data_ary;} private int[] data_ary; private int data_max; + private void Max_(int len) { + this.data_max = len * Idx__mult; + this.data_ary = new int[data_max]; + this.itm_len = 0; + } + public void Clear() { + if (data_max == data_max_orig) + itm_len = 0; + else + Max_(data_max_orig / Idx__mult); + } + public int Add(int dom_tid, int src_bgn, int src_end) { + int data_idx = itm_len * Idx__mult; + if (data_idx == data_max) { + int new_data_max = data_max == 0 ? Idx__mult : data_max * 2; + int[] new_data_ary = new int[new_data_max]; + Int_.Ary_copy_to(data_ary, data_max, data_ary); + this.data_ary = new_data_ary; + this.data_max = new_data_max; + } + int dom_uid = itm_len; + data_ary[data_idx + Idx_dom_uid] = dom_uid; + data_ary[data_idx + Idx_dom_tid] = dom_tid; + data_ary[data_idx + Idx_src_bgn] = src_bgn; + data_ary[data_idx + Idx_src_end] = src_end; + ++itm_len; + return dom_uid; + } + public static final int + Idx_dom_uid = 0 + , Idx_dom_tid = 1 + , Idx_src_bgn = 2 + , Idx_src_end = 3 + , Idx__mult = 4 + ; +} diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser.java new file mode 100644 index 000000000..e4612bb61 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser.java @@ -0,0 +1,191 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import gplx.core.primitives.*; +import gplx.xowa.parsers.xndes.*; +public class Mwh_doc_parser { + private final Mwh_doc_mgr dom_mgr = new Mwh_doc_mgr(16); + private final Mwh_atr_parser atr_parser = new Mwh_atr_parser(); + private final List_adp nde_stack = List_adp_.new_(); + private byte[] src; private int src_end; + private Mwh_doc_wkr wkr; + private Hash_adp_bry nde_regy; + private int txt_bgn, nde_uid; + private Xop_xnde_tag cur_nde; private int cur_nde_tid; + public void Parse(Mwh_doc_wkr wkr, byte[] src, int src_bgn, int src_end) { + this.wkr = wkr; this.src = src; this.src_end = src_end; + this.nde_regy = wkr.Nde_regy(); + nde_stack.Clear(); + int pos = txt_bgn = src_bgn; + nde_uid = cur_nde_tid = -1; + cur_nde = null; + while (pos < src_end) { + if (src[pos] == Byte_ascii.Angle_bgn) // "<": possible nde start + pos = Parse_nde(pos); + else // else, just increment + ++pos; + } + if (src_end != txt_bgn) wkr.On_txt_end(this, src, cur_nde_tid, txt_bgn, pos); + } + private int Parse_nde(int pos) { + int nde_end_tid = Nde_end_tid__invalid; + boolean nde_is_head = true; + int nde_bgn = pos; + ++pos; + int name_bgn = pos; + int name_end = pos; + while (pos < src_end) { + byte b = src[pos]; + switch (b) { + // valid chars for name + case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E: + case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J: + case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O: + case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T: + case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z: + case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e: + case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j: + case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o: + case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t: + case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z: + case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4: + case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9: + case Byte_ascii.Dot: case Byte_ascii.Dash: case Byte_ascii.Underline: case Byte_ascii.Colon: // XML allowed punctuation + case Byte_ascii.Dollar:// MW: handles ; + ++pos; + break; + // comment check + case Byte_ascii.Bang: + boolean comment_found = false; + if (name_bgn == pos && Bry_.Eq(src, pos + 1, pos + 3, Comment_bgn)) { + int comment_end_pos = Bry_find_.Find_fwd(src, Comment_end, pos + 3); + if (comment_end_pos != Bry_find_.Not_found) { + nde_end_tid = Nde_end_tid__comment; + pos = comment_end_pos + 3; + comment_found = true; + } + } + if (!comment_found) + return pos; + else + break; + // invalid char; not a node; treat as text; EX: "" + default: + return pos; + // slash -> either "" or "" + case Byte_ascii.Slash: + if (name_bgn == pos) { // "" + nde_is_head = false; + ++name_bgn; + ++pos; + continue; + } + else { // check for "/>"; NOTE:
, 
 are allowed
+						name_end = pos;
+						++pos;
+						if (pos == src_end) return pos;		// end of doc; treat as text; EX: "" -> "
" + nde_end_tid = Nde_end_tid__backslash; + name_end = pos; + break; + } + if (nde_end_tid != Nde_end_tid__invalid) break; + } + // get name + Xop_xnde_tag nde_itm = null; + if (nde_end_tid != Nde_end_tid__comment) { + nde_itm = (Xop_xnde_tag)nde_regy.Get_by_mid(src, name_bgn, name_end); + if (nde_itm == null) return pos; // not a known nde; exit + } + if (txt_bgn != nde_bgn) { // notify txt + wkr.On_txt_end(this, src, cur_nde_tid, txt_bgn, nde_bgn); + txt_bgn = pos; + } + if (nde_is_head) { + wkr.On_nde_head_bgn(this, src, cur_nde_tid, name_bgn, name_end); + switch (nde_end_tid) { + case Nde_end_tid__comment: + wkr.On_comment_end(this, src, cur_nde_tid, nde_bgn, pos); + break; + case Nde_end_tid__ws: + case Nde_end_tid__slash: + case Nde_end_tid__backslash: // handled above + pos = atr_parser.Parse(wkr, nde_uid, cur_nde_tid, src, pos, src_end); + nde_end_tid = atr_parser.Nde_end_tid(); + txt_bgn = pos; + break; + } + switch (nde_end_tid) { + case Nde_end_tid__inline: + wkr.On_nde_head_end(this, src, cur_nde_tid, nde_bgn, pos, Bool_.Y); + txt_bgn = pos; + break; + case Nde_end_tid__gt: + wkr.On_nde_head_end(this, src, cur_nde_tid, nde_bgn, pos, Bool_.N); + txt_bgn = pos; + if ( nde_itm != null + && !nde_itm.Single_only_html() // ignore + && (cur_nde == null || !cur_nde.Xtn()) //
 ignores inner
+						) {
+						if (cur_nde != null)
+							nde_stack.Add(cur_nde);
+						this.cur_nde = nde_itm;
+						this.cur_nde_tid = nde_itm.Id();
+					}
+					break;
+				case Nde_end_tid__ws:
+				case Nde_end_tid__slash:
+				case Nde_end_tid__backslash: break; // handled above
+			}
+			nde_uid = dom_mgr.Add(Mwh_doc_itm.Itm_tid__nde_head, nde_bgn, pos);
+		}
+		else {
+			switch (nde_end_tid) {
+				case Nde_end_tid__gt:
+					wkr.On_nde_tail_end(this, src, cur_nde_tid, nde_bgn, pos);
+					txt_bgn = pos;
+					if (nde_itm.Id() == cur_nde_tid) {
+						cur_nde = (Xop_xnde_tag)List_adp_.Pop_or(nde_stack, null);
+						cur_nde_tid = cur_nde == null ? -1 : cur_nde.Id();
+					}
+					break;
+			}
+		}
+		return pos;
+	}
+	public static final int Nde_end_tid__invalid = 0, Nde_end_tid__gt = 1, Nde_end_tid__ws = 2, Nde_end_tid__inline = 3, Nde_end_tid__slash = 4, Nde_end_tid__backslash = 5, Nde_end_tid__comment = 6;
+	private static final byte[] Comment_bgn = Bry_.new_a7("--"), Comment_end = Bry_.new_a7("-->");
+}
diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser_fxt.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser_fxt.java
new file mode 100644
index 000000000..10e788f2c
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser_fxt.java
@@ -0,0 +1,73 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see .
+*/
+package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
+class Mwh_doc_parser_fxt {
+	private final Bry_bfr expd_bfr = Bry_bfr.new_(), actl_bfr = Bry_bfr.new_();
+	private final Mwh_doc_parser parser = new Mwh_doc_parser();
+	private final Mwh_doc_wkr__itm_bldr wkr = new Mwh_doc_wkr__itm_bldr();
+	public Mwh_doc_itm Make_txt		(String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt		, -1, Bry_.new_u8(raw));}
+	public Mwh_doc_itm Make_txt		(String raw, int nde_tid) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt		, nde_tid, Bry_.new_u8(raw));}
+	public Mwh_doc_itm Make_comment (String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__comment	, -1, Bry_.new_u8(raw));}
+	public Mwh_doc_itm Make_nde_head(String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_head	, -1, Bry_.new_u8(raw));}
+	public Mwh_doc_itm Make_nde_tail(String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_tail	, -1, Bry_.new_u8(raw));}
+	public void Test_parse(String raw, Mwh_doc_itm... expd) {
+		Mwh_doc_itm[] actl = Exec_parse(raw);
+		Test_print(expd, actl);
+	}
+	public Mwh_doc_itm[] Exec_parse(String raw) {
+		byte[] bry = Bry_.new_u8(raw);
+		parser.Parse(wkr, bry, 0, bry.length);
+		return wkr.To_atr_ary();
+	}
+	public void Test_print(Mwh_doc_itm[] expd_ary, Mwh_doc_itm[] actl_ary) {
+		int expd_len = expd_ary.length;
+		int actl_len = actl_ary.length;
+		int len = expd_len > actl_len ? expd_len : actl_len;
+		for (int i = 0; i < len; ++i) {
+			To_bfr(expd_bfr, i < expd_len ? expd_ary[i] : null, actl_bfr, i < actl_len ? actl_ary[i] : null);
+		}
+		Tfds.Eq_str_lines(expd_bfr.Xto_str_and_clear(), actl_bfr.Xto_str_and_clear());
+	}
+	private void To_bfr(Bry_bfr expd_bfr, Mwh_doc_itm expd_itm, Bry_bfr actl_bfr, Mwh_doc_itm actl_itm) {
+		To_bfr__main(expd_bfr, expd_itm); To_bfr__main(actl_bfr, actl_itm);
+		if (expd_itm != null && expd_itm.Nde_tid() != -1) {
+			To_bfr__nde_tid(expd_bfr, expd_itm); To_bfr__nde_tid(actl_bfr, actl_itm);
+		}
+	}
+	private void To_bfr__main(Bry_bfr bfr, Mwh_doc_itm itm) {
+		if (itm == null) return;
+		bfr.Add_str_a7("itm_tid:").Add_int_variable(itm.Itm_tid()).Add_byte_nl();
+		bfr.Add_str_a7("txt:").Add(itm.Itm_bry()).Add_byte_nl();
+	}
+	private void To_bfr__nde_tid(Bry_bfr bfr, Mwh_doc_itm itm) {
+		if (itm == null) return;
+		bfr.Add_str_a7("nde_tid:").Add_int_variable(itm.Nde_tid()).Add_byte_nl();
+	}
+}
+class Mwh_doc_wkr__itm_bldr implements Mwh_doc_wkr {
+	private final List_adp list = List_adp_.new_();		
+	public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw();
+	public void On_atr_each	(Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {}
+	public void On_txt_end		(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt		, nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
+	public void On_nde_head_bgn (Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end) {}
+	public void On_nde_head_end	(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_head	, nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
+	public void On_nde_tail_end	(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_tail	, nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
+	public void On_comment_end  (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__comment	, nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
+
+	public Mwh_doc_itm[] To_atr_ary() {return (Mwh_doc_itm[])list.To_ary_and_clear(Mwh_doc_itm.class);}
+}
diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser_tst.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser_tst.java
new file mode 100644
index 000000000..43a38b1e1
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser_tst.java
@@ -0,0 +1,60 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see .
+*/
+package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
+import org.junit.*; import gplx.xowa.parsers.xndes.*;
+public class Mwh_doc_parser_tst {
+	private final Mwh_doc_parser_fxt fxt = new Mwh_doc_parser_fxt();
+	@Test   public void Text__basic()				{fxt.Test_parse("abc"				, fxt.Make_txt("abc"));}
+	@Test   public void Comment()					{fxt.Test_parse("ac"		, fxt.Make_txt("a"), fxt.Make_comment(""), fxt.Make_txt("c"));}
+	@Test   public void Fail__inline_eos()			{fxt.Test_parse("ad"			, fxt.Make_txt("ad"));}
+	@Test   public void Node__inline()				{fxt.Test_parse("ac"			, fxt.Make_txt("a"), fxt.Make_nde_head("")	, fxt.Make_txt("c"));}
+	@Test   public void Node__pair()				{fxt.Test_parse("acd"		, fxt.Make_txt("a"), fxt.Make_nde_head("")	, fxt.Make_txt("c"), fxt.Make_nde_tail(""), fxt.Make_txt("d"));}
+	@Test   public void Atrs__pair() {
+		fxt.Test_parse("
a
" + , fxt.Make_nde_head("
") + , fxt.Make_txt("a") + , fxt.Make_nde_tail("
")); + } + @Test public void Atrs__inline() { + fxt.Test_parse("a
b" + , fxt.Make_txt("a") + , fxt.Make_nde_head("
") + , fxt.Make_txt("b")); + } + @Test public void Node__single_only() { + fxt.Test_parse("a
b
c" + , fxt.Make_nde_head("") + , fxt.Make_txt("a", Xop_xnde_tag_.Tid_b) + , fxt.Make_nde_head("
") + , fxt.Make_txt("b", Xop_xnde_tag_.Tid_b) // not
+ , fxt.Make_nde_tail("
") + , fxt.Make_txt("c", Xop_xnde_tag_.Tid__null) + ); + } + @Test public void Node__pre() { + fxt.Test_parse("
a
b
c" + , fxt.Make_nde_head("
")
+		, fxt.Make_txt("a", Xop_xnde_tag_.Tid_pre)
+		, fxt.Make_nde_head("
") + , fxt.Make_txt("b", Xop_xnde_tag_.Tid_pre) //
 not 
+ , fxt.Make_nde_tail("
") + , fxt.Make_txt("c", Xop_xnde_tag_.Tid__null) + ); + } +} diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_wkr.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_wkr.java new file mode 100644 index 000000000..76dc02ff2 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_wkr.java @@ -0,0 +1,27 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +public interface Mwh_doc_wkr { + Hash_adp_bry Nde_regy(); + void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx); + void On_txt_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end); + void On_nde_head_bgn(Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end); + void On_nde_head_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline); + void On_nde_tail_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end); + void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end); +} diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_wkr_.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_wkr_.java new file mode 100644 index 000000000..57e00d389 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_wkr_.java @@ -0,0 +1,31 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import gplx.xowa.parsers.xndes.*; +public class Mwh_doc_wkr_ { + public static Hash_adp_bry Nde_regy__mw() { + Xop_xnde_tag[] ary = Xop_xnde_tag_.Ary; + int len = ary.length; + Hash_adp_bry rv = Hash_adp_bry.ci_a7(); + for (int i = 0; i < len; ++i) { + Xop_xnde_tag itm = ary[i]; + rv.Add(itm.Name_bry(), itm); + } + return rv; + } +} diff --git a/400_xowa/src/gplx/xowa/parsers/lnkis/Xop_lnki_tkn.java b/400_xowa/src/gplx/xowa/parsers/lnkis/Xop_lnki_tkn.java index 3475fb9be..de21e40a3 100644 --- a/400_xowa/src/gplx/xowa/parsers/lnkis/Xop_lnki_tkn.java +++ b/400_xowa/src/gplx/xowa/parsers/lnkis/Xop_lnki_tkn.java @@ -24,7 +24,7 @@ public class Xop_lnki_tkn extends Xop_tkn_itm_base { public void Tkn_tid_to_txt() {tkn_tid = Xop_tkn_itm_.Tid_txt;} public int Ns_id() {return ns_id;} public Xop_lnki_tkn Ns_id_(int v) {ns_id = v; return this;} private int ns_id; public Xoa_ttl Ttl() {return ttl;} public Xop_lnki_tkn Ttl_(Xoa_ttl v) {ttl = v; return this;} private Xoa_ttl ttl; - public byte Lnki_type() {return lnki_type;} public Xop_lnki_tkn Lnki_type_(byte v) {lnki_type = (byte)Enm_.Add_int(lnki_type, v); return this;} private byte lnki_type = Xop_lnki_type.Id_null; + public byte Lnki_type() {return lnki_type;} public Xop_lnki_tkn Lnki_type_(byte v) {lnki_type = (byte)Bitmask_.Add_int(lnki_type, v); return this;} private byte lnki_type = Xop_lnki_type.Id_null; public int Tail_bgn() {return tail_bgn;} public Xop_lnki_tkn Tail_bgn_(int v) {tail_bgn = v; return this;} private int tail_bgn = -1; public int Tail_end() {return tail_end;} public Xop_lnki_tkn Tail_end_(int v) {tail_end = v; return this;} private int tail_end = -1; public byte Border() {return border;} public Xop_lnki_tkn Border_(byte v) {border = v; return this;} private byte border = Bool_.__byte; diff --git a/400_xowa/src/gplx/xowa/parsers/lnkis/Xop_lnki_type.java b/400_xowa/src/gplx/xowa/parsers/lnkis/Xop_lnki_type.java index 49ebca35c..3342cad20 100644 --- a/400_xowa/src/gplx/xowa/parsers/lnkis/Xop_lnki_type.java +++ b/400_xowa/src/gplx/xowa/parsers/lnkis/Xop_lnki_type.java @@ -19,44 +19,44 @@ package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx. public class Xop_lnki_type { public static final byte Id_null = 0, Id_none = 1, Id_frameless = 2, Id_frame = 4, Id_thumb = 8; public static boolean Id_is_thumbable(byte id) { - return ( Enm_.Has_int(id, Id_thumb) // for purposes of displaying images on page, thumb and frame both create a thumb box - || Enm_.Has_int(id, Id_frame) + return ( Bitmask_.Has_int(id, Id_thumb) // for purposes of displaying images on page, thumb and frame both create a thumb box + || Bitmask_.Has_int(id, Id_frame) ); } public static boolean Id_defaults_to_thumb(byte id) { // assuming original of 400,200 - if ( Enm_.Has_int(id, Id_thumb) // [[File:A.png|thumb]] -> 220,-1 - || Enm_.Has_int(id, Id_frameless) // [[File:A.png|frameless]] -> 220,-1 + if ( Bitmask_.Has_int(id, Id_thumb) // [[File:A.png|thumb]] -> 220,-1 + || Bitmask_.Has_int(id, Id_frameless) // [[File:A.png|frameless]] -> 220,-1 ) return true; - else if ( Enm_.Has_int(id, Id_frame) // [[File:A.png|frame]] -> 400,200 (frame is always default size) + else if ( Bitmask_.Has_int(id, Id_frame) // [[File:A.png|frame]] -> 400,200 (frame is always default size) || id == Id_null // [[File:A.png]] -> 400,200 (default to original size) - || Enm_.Has_int(id, Id_none) // TODO: deprecate; NOTE: still used by one test; DATE:2015-08-03 + || Bitmask_.Has_int(id, Id_none) // TODO: deprecate; NOTE: still used by one test; DATE:2015-08-03 ) return false; else // should not happen throw Err_.new_unhandled(id); } public static boolean Id_limits_large_size(byte id) {// Linker.php|makeThumbLink2|Do not present an image bigger than the source, for bitmap-style images; assuming original of 400,200 - if ( Enm_.Has_int(id, Id_thumb) // [[File:A.png|600px|thumb]] -> 400,200 - || Enm_.Has_int(id, Id_frameless) // [[File:A.png|600px|frameless]] -> 400,200 - || Enm_.Has_int(id, Id_frame) // [[File:A.png|600px|frame]] -> 400,200 (frame is always default size) + if ( Bitmask_.Has_int(id, Id_thumb) // [[File:A.png|600px|thumb]] -> 400,200 + || Bitmask_.Has_int(id, Id_frameless) // [[File:A.png|600px|frameless]] -> 400,200 + || Bitmask_.Has_int(id, Id_frame) // [[File:A.png|600px|frame]] -> 400,200 (frame is always default size) ) return true; else if ( id == Id_null // [[File:A.png|600px]] -> 600,400; uses orig file of 400,200, but tag src_width / src_height set to 600,400 - || Enm_.Has_int(id, Id_none) // TODO: deprecate; NOTE: leaving in b/c of above failed-deprecate; DATE:2015-08-03 + || Bitmask_.Has_int(id, Id_none) // TODO: deprecate; NOTE: leaving in b/c of above failed-deprecate; DATE:2015-08-03 ) return false; else // should not happen; throw Err_.new_unhandled(id); } public static boolean Id_supports_upright(byte id) {// REF:Linker.php|makeImageLink;if ( isset( $fp['thumbnail'] ) || isset( $fp['manualthumb'] ) || isset( $fp['framed'] ) || isset( $fp['frameless'] ) || !$hp['width'] ) DATE:2014-05-22 - if ( Enm_.Has_int(id, Id_thumb) - || Enm_.Has_int(id, Id_frameless) - || Enm_.Has_int(id, Id_frame) + if ( Bitmask_.Has_int(id, Id_thumb) + || Bitmask_.Has_int(id, Id_frameless) + || Bitmask_.Has_int(id, Id_frame) ) return true; else if ( id == Id_null - || Enm_.Has_int(id, Id_none) + || Bitmask_.Has_int(id, Id_none) ) return false; else // should not happen; diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_lang.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_lang.java new file mode 100644 index 000000000..2eb89f625 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_lang.java @@ -0,0 +1,144 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import gplx.core.btries.*; import gplx.core.primitives.*; +import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*; +import gplx.xowa.parsers.htmls.*; +public class Vnt_converter_lang { + private final Bry_bfr bfr = Bry_bfr.new_(); + private int max_depth = 32; + private byte[] src; private int src_len; + private boolean convert_needed; + private int pos; + private final Vnt_converter_rule converter_rule = new Vnt_converter_rule(); + private Xol_convert_mgr convert_mgr; private Xol_vnt_regy vnt_regy; // private Xol_vnt_mgr vnt_mgr; // private Xol_vnt_itm vnt_itm; + private final Mwh_doc_parser doc_parser = new Mwh_doc_parser(); + private final Vnt_html_doc_wkr html_convert_wkr; + private final Bry_bfr tmp_convert_bfr = Bry_bfr.new_(); + public Vnt_converter_lang(Xol_convert_mgr convert_mgr, Xol_vnt_regy vnt_regy) { + this.html_convert_wkr = new Vnt_html_doc_wkr(convert_mgr); + this.convert_mgr = convert_mgr; this.vnt_regy = vnt_regy; + } + public byte[] Converted_title() {return converted_title;} private byte[] converted_title; + public byte[] Parse(Xol_vnt_itm vnt_itm, byte[] src) {// REF.MW:/languages/LanguageConverter.php!recursiveConvertTopLevel + this.converted_title = null; + converter_rule.Init(this, vnt_regy, vnt_itm); + this.converted_title = null; + int markup_count = 0; + this.pos = 0; + this.convert_needed = true; // false for sr lang; SEE:LanguageSr.php !$this->guessVariant(src, vnt); + this.src = src; this.src_len = src.length; + while (pos < src_len) { + int curly_bgn = Bry_find_.Find_fwd(src, Bry__curly_bgn, pos, src_len); + if (curly_bgn == Bry_find_.Not_found) { // No more markup, append final segment + if (markup_count == 0) return src; // no markups found; just return original + Add_output(vnt_itm, convert_needed, src, pos, src_len); + return bfr.Xto_bry_and_clear(); + } + Add_output(vnt_itm, convert_needed, src, pos, curly_bgn); // Markup found; append segment + pos = curly_bgn; // Advance position + ++markup_count; + Parse_recursive(vnt_itm, 1); // Do recursive conversion + } + return bfr.Xto_bry_and_clear(); + } + private void Parse_recursive(Xol_vnt_itm vnt_itm, int depth) { + pos += 2; // skip "-{" + boolean warning_done = false; + int bgn_pos = pos; + while (pos < src_len) { + byte b = src[pos]; + Object o = trie.Match_bgn_w_byte(b, src,pos, src_len); + if (o == null) { // char; + ++pos; + continue; + } + switch (((Byte_obj_val)o).Val()) { + case Tid__curly_bgn: + if (depth >= max_depth) { + bfr.Add(Bry__curly_bgn); + if (!warning_done) { + bfr.Add_str(""); + // wfMessage('language-converter-depth-warning')->numParams($this->mMaxDepth)->inContentLanguage()->text() + bfr.Add_str(""); + warning_done = true; + } + pos += 2; // skip "-{" + continue; + } + bgn_pos = pos; + Parse_recursive(vnt_itm, depth + 1); // Recursively parse another rule + break; + case Tid__curly_end: + converter_rule.Parse(src, bgn_pos, pos); + Apply_manual_conv(converter_rule); + bfr.Add(converter_rule.Display()); + pos += 2; + return; + default: throw Err_.new_unhandled(-1); // never happens + } + } + if (pos < src_len) { // Unclosed rule + bfr.Add(Bry__curly_bgn); + Auto_convert(bfr, vnt_itm, src, pos, src_len); + } + pos = src_len; + } + private void Add_output(Xol_vnt_itm vnt_itm, boolean convert_needed, byte[] src, int bgn, int end) { + if (end - bgn == 0) return; + if (convert_needed) { + Auto_convert(bfr, vnt_itm, src, bgn, end); + } + else + bfr.Add_mid(src, bgn, end); + } + public byte[] Auto_convert(Xol_vnt_itm vnt_itm, byte[] src) { + Auto_convert(tmp_convert_bfr, vnt_itm, src, 0, src.length); + return tmp_convert_bfr.Xto_bry_and_clear(); + } + private void Auto_convert(Bry_bfr bfr, Xol_vnt_itm vnt_itm, byte[] src, int bgn, int end) { + html_convert_wkr.Init(bfr, vnt_itm.Idx()); + doc_parser.Parse(html_convert_wkr, src, bgn, end); + } + private void Apply_manual_conv(Vnt_converter_rule rule) { + this.converted_title = rule.Title(); + byte action = rule.Action(); + Vnt_rule_undi_mgr cnv_tbl = rule.Cnv_tbl(); + int len = cnv_tbl.Len(); + for (int i = 0; i < len; ++i) { + Vnt_rule_undi_grp grp = cnv_tbl.Get_at(i); + byte[] grp_key = grp.Vnt(); + Xol_vnt_itm vnt_itm = vnt_regy.Get_by(grp_key); if (vnt_itm == null) continue; + int grp_len = grp.Len(); + Xol_convert_wkr wkr = convert_mgr.Converter_ary()[vnt_itm.Idx()]; + for (int j = 0; j < grp_len; ++j) { + Vnt_rule_undi_itm itm = grp.Get_at(j); + if (action == Byte_ascii.Plus) { + wkr.Add(itm.Src(), itm.Trg()); + } + else if (action == Byte_ascii.Dash) + wkr.Del(itm.Src()); + } + } + } + private static final byte Tid__curly_bgn = 1, Tid__curly_end = 2; + private static final byte[] Bry__curly_bgn = Bry_.new_a7("-{"), Bry__curly_end = Bry_.new_a7("}-"); + private static final Btrie_fast_mgr trie = Btrie_fast_mgr.cs() + .Add_bry_byte(Bry__curly_bgn, Tid__curly_bgn) + .Add_bry_byte(Bry__curly_end, Tid__curly_end); +} diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_lang__html__tst.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_lang__html__tst.java new file mode 100644 index 000000000..4b3e54d55 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_lang__html__tst.java @@ -0,0 +1,53 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import org.junit.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*; +public class Vnt_converter_lang__html__tst { // REF: https://www.mediawiki.org/wiki/Writing_systems/Syntax + private final Vnt_converter_lang_fxt fxt = new Vnt_converter_lang_fxt(); + private String rule; + @Before public void init() { + rule = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}-"; + } + @Test public void Node() { + fxt.Test_parse(rule + "hkhkhk", "cncncn"); + } + @Test public void Attribs() { + fxt.Test_parse(rule + "hk", "cn"); + } + @Test public void Attribs__title() { + fxt.Test_parse(rule + "hk", "cn"); + } + @Test public void Attribs__alt() { + fxt.Test_parse(rule + "hk", "cn"); + } + @Test public void Attribs__skip_url() { + fxt.Test_parse(rule + "hk", "cn"); + } + @Test public void Node__style() { + fxt.Test_parse(rule + "hkhk", "cncn"); + } + @Test public void Node__code() { + fxt.Test_parse(rule + "hkhkhk", "cnhkcn"); + } + @Test public void Node__pre() { + fxt.Test_parse(rule + "hk
hk
hk", "cn
hk
cn"); + } + @Test public void Node__pre__nested() { + fxt.Test_parse(rule + "hk
hk
hk", "cn
hk
cn"); + } +} diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_lang__syntax__tst.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_lang__syntax__tst.java new file mode 100644 index 000000000..cfa7889df --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_lang__syntax__tst.java @@ -0,0 +1,117 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import org.junit.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*; +public class Vnt_converter_lang__syntax__tst { // REF: https://www.mediawiki.org/wiki/Writing_systems/Syntax + private final Vnt_converter_lang_fxt fxt = new Vnt_converter_lang_fxt(); + @Test public void Bidi() { + String text = "-{zh-hans:a;zh-hant:b}-"; + fxt.Test_parse_many(text, "a", "zh-hans", "zh-cn", "zh-sg", "zh"); + fxt.Test_parse_many(text, "b", "zh-hant", "zh-hk", "zh-tw"); + } + @Test public void Undi() { + String text = "-{H|cn_k=>zh-cn:cn_v}-cn_k"; + fxt.Test_parse_many(text, "cn_k", "zh", "zh-hans", "zh-hant", "zh-hk", "zh-my", "zh-mo", "zh-sg", "zh-tw"); + fxt.Test_parse_many(text, "cn_v", "zh-cn"); + } + @Test public void Raw() { + fxt.Test_parse_many("-{a}-", "a", "zh-hans", "zh-cn", "zh-sg", "zh", "zh-hant", "zh-hk", "zh-tw"); + fxt.Test_parse_many("-{R|a}-", "a", "zh-hans", "zh-cn", "zh-sg", "zh", "zh-hant", "zh-hk", "zh-tw"); + } + @Test public void Hide() { + String text = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}-cn hk tw"; + fxt.Test_parse_many(text, "cn cn cn", "zh-cn", "zh-sg"); + fxt.Test_parse_many(text, "hk hk hk", "zh-hk"); + fxt.Test_parse_many(text, "tw tw tw", "zh-tw"); + fxt.Test_parse_many(text, "cn hk tw", "zh", "zh-hans", "zh-hant"); + } + @Test public void Aout() { + String text = "-{A|zh-cn:cn;zh-hk:hk;zh-tw:tw}- cn hk tw"; + fxt.Test_parse_many(text, "cn cn cn cn", "zh-cn", "zh-sg"); + fxt.Test_parse_many(text, "hk hk hk hk", "zh-hk"); + fxt.Test_parse_many(text, "tw tw tw tw", "zh-tw"); + fxt.Test_parse_many(text, "cn cn hk tw", "zh", "zh-hans"); + fxt.Test_parse_many(text, "tw cn hk tw", "zh-hant"); + fxt.Test_parse_many("h-{}-k", "hk", "zh-cn"); // semi-disabled + } + @Test public void Del() { + String text = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}-cn hk tw-{-|zh-cn:cn;zh-hk:hk;zh-tw:tw}- cn hk tw"; + fxt.Test_parse_many(text, "cn cn cn cn hk tw", "zh-cn", "zh-sg"); + fxt.Test_parse_many(text, "hk hk hk cn hk tw", "zh-hk"); + fxt.Test_parse_many(text, "tw tw tw cn hk tw", "zh-tw"); + fxt.Test_parse_many(text, "cn hk tw cn hk tw", "zh", "zh-hans", "zh-hant"); + } + @Test public void Title() { + fxt.Test_parse_title("-{}-", null, "", "zh-cn"); + String text = "-{T|zh-cn:cn;zh-hk:hk;zh-tw:tw}-cn hk tw"; + fxt.Test_parse_title(text, "cn", "cn hk tw", "zh-cn"); + fxt.Test_parse_title(text, "cn", "cn hk tw", "zh-sg"); + fxt.Test_parse_title(text, "hk", "cn hk tw", "zh-hk"); + fxt.Test_parse_title(text, "tw", "cn hk tw", "zh-tw"); + fxt.Test_parse_title(text, "cn", "cn hk tw", "zh-hans"); + fxt.Test_parse_title(text, "tw", "cn hk tw", "zh-hant"); + fxt.Test_parse_title(text, null, "cn hk tw", "zh"); + } + @Test public void Descrip() { + String text = "-{D|zh-cn:cn;zh-hk:hk;zh-tw:tw}-"; + fxt.Test_parse_many(text, "ZH-CN:cn;ZH-HK:hk;ZH-TW:tw;", "zh", "zh-hans", "zh-hant", "zh-cn", "zh-hk", "zh-my", "zh-mo", "zh-sg", "zh-tw"); + } + @Test public void Mixture() { + String text = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}--{zh;zh-hans;zh-hant|cn hk tw}- -{zh;zh-cn;zh-hk;zh-tw|cn hk tw}-"; + fxt.Test_parse_many(text, "cn hk tw cn cn cn", "zh-cn", "zh-sg", "zh-hans"); + fxt.Test_parse_many(text, "cn hk tw hk hk hk", "zh-hk"); + fxt.Test_parse_many(text, "cn hk tw tw tw tw", "zh-tw", "zh-hant"); + fxt.Test_parse_many(text, "cn hk tw cn hk tw", "zh"); + } + @Test public void Descrip__undi() {fxt.Test_parse("-{D|cn_k=>zh-cn:cn_v;hk_k=>zh-hk:hk_v}-", "cn_k⇒ZH-CN:cn_v;hk_k⇒ZH-HK:hk_v;");} + @Test public void Descrip__mixd() {fxt.Test_parse("-{D|zh-tw:tw_v;cn_k=>zh-cn:cn_v;hk_k=>zh-hk:hk_v;zh-mo:mo_v}-", "ZH-TW:tw_v;ZH-MO:mo_v;cn_k⇒ZH-CN:cn_v;hk_k⇒ZH-HK:hk_v;");} +} +class Vnt_converter_lang_fxt { + private final Vnt_converter_lang converter_lang; + private final Xol_convert_mgr convert_mgr = new Xol_convert_mgr(); + private final Xol_vnt_regy vnt_regy = Xol_vnt_regy_fxt.new_chinese(); + private Xol_vnt_itm vnt_itm; + public Vnt_converter_lang_fxt() { + converter_lang = new Vnt_converter_lang(convert_mgr, vnt_regy); + convert_mgr.Init(vnt_regy); + Init_cur("zh-cn"); + } + public void Init_cur(String vnt) { + byte[] cur_vnt = Bry_.new_a7(vnt); + this.vnt_itm = vnt_regy.Get_by(cur_vnt); + convert_mgr.Cur_vnt_(cur_vnt); + } + public void Test_parse(String raw, String expd) { + Tfds.Eq_str(expd, String_.new_u8(converter_lang.Parse(vnt_itm, Bry_.new_u8(raw)))); + } + public void Test_parse_many(String raw, String expd, String... vnts) { + int len = vnts.length; + for (int i = 0; i < len; ++i) { + String vnt_key = vnts[i]; + Init_cur(vnt_key); + Xol_vnt_itm vnt = vnt_regy.Get_by(Bry_.new_a7(vnt_key)); + Tfds.Eq_str(expd, String_.new_u8(converter_lang.Parse(vnt, Bry_.new_u8(raw))), vnt_key); + } + } + public void Test_parse_title(String raw, String expd_title, String expd_text, String vnt_key) { + Init_cur(vnt_key); + Xol_vnt_itm vnt = vnt_regy.Get_by(Bry_.new_a7(vnt_key)); + Tfds.Eq_str(expd_text, String_.new_u8(converter_lang.Parse(vnt, Bry_.new_u8(raw))), vnt_key); + Tfds.Eq_str(expd_title, converter_lang.Converted_title()); + } +} diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_rule.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_rule.java index 24ac9d11f..160e6f97a 100644 --- a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_rule.java +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_rule.java @@ -17,42 +17,170 @@ along with this program. If not, see . */ package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.core.btries.*; import gplx.core.primitives.*; +import gplx.xowa.langs.vnts.*; class Vnt_converter_rule { // REF.MW: /languages/LanguageConverter.php|ConverterRule - private final byte[] src; - private final int src_bgn, src_end; - private int pipe_pos = -1; - public Vnt_converter_rule(byte[] src, int src_bgn, int src_end) { - this.src = src; this.src_bgn = src_bgn; this.src_end = src_end; + private final Vnt_flag_parser flag_parser = new Vnt_flag_parser(); private final Vnt_flag_code_mgr flag_codes = new Vnt_flag_code_mgr(); private final Vnt_flag_lang_mgr flag_langs = new Vnt_flag_lang_mgr(); + private final Vnt_rule_parser rule_parser = new Vnt_rule_parser(); private final Vnt_rule_undi_mgr rule_undis = new Vnt_rule_undi_mgr(); private final Vnt_rule_bidi_mgr rule_bidis = new Vnt_rule_bidi_mgr(); + private final Bry_bfr tmp_bfr = Bry_bfr.new_(); + private final Ordered_hash cnv_marked_hash = Ordered_hash_.new_bry_(); + private Vnt_converter_lang converter; + private Xol_vnt_regy vnt_regy; private Xol_vnt_itm vnt_itm; private byte[] vnt_key; + private byte[] rule_raw; + public byte[] Display() {return display;} private byte[] display; + public byte[] Title() {return title;} private byte[] title; + public byte Action() {return action;} private byte action; + public Vnt_rule_undi_mgr Cnv_tbl() {return cnv_tbl;} private final Vnt_rule_undi_mgr cnv_tbl = new Vnt_rule_undi_mgr(); + public void Init(Vnt_converter_lang converter, Xol_vnt_regy vnt_regy, Xol_vnt_itm vnt_itm) { + this.converter = converter; + this.vnt_regy = vnt_regy; this.vnt_itm = vnt_itm; this.vnt_key = vnt_itm.Key(); + rule_parser.Init(vnt_regy); } - public void Parse() { - } - public void Parse_flags(Vnt_flag_parser parser) { - this.pipe_pos = Bry_find_.Find_fwd(src, Byte_ascii.Pipe, src_bgn, src_end); - if (pipe_pos != Bry_find_.Not_found) // "|" found; EX: -{A|}- - parser.Parse(src, src_bgn, pipe_pos); - int flag_count = parser.Count(); - if (flag_count == 0) parser.Set_y(Vnt_flag_itm_.Tid_show); - else if (parser.Limit_if_exists(Vnt_flag_itm_.Tid_raw)) {} - else if (parser.Limit_if_exists(Vnt_flag_itm_.Tid_name)) {} - else if (parser.Limit_if_exists(Vnt_flag_itm_.Tid_del)) {} - else if (flag_count == 1 && parser.Get(Vnt_flag_itm_.Tid_title)) parser.Set_y(Vnt_flag_itm_.Tid_macro); - else if (parser.Get(Vnt_flag_itm_.Tid_macro)) { - boolean exists_d = parser.Get(Vnt_flag_itm_.Tid_descrip); - boolean exists_t = parser.Get(Vnt_flag_itm_.Tid_title); - parser.Clear(); - parser.Set_y_many(Vnt_flag_itm_.Tid_all, Vnt_flag_itm_.Tid_macro); - if (exists_d) parser.Set_y(Vnt_flag_itm_.Tid_descrip); - if (exists_t) parser.Set_y(Vnt_flag_itm_.Tid_title); + public void Parse(byte[] src, int src_bgn, int src_end) { + this.display = this.title = null; + this.action = Byte_ascii.Null; + int pipe_pos = Bry_find_.Find_fwd(src, Byte_ascii.Pipe, src_bgn, src_end); + flag_parser.Parse(flag_codes, flag_langs, vnt_regy, src, src_bgn, pipe_pos); + int rule_bgn = pipe_pos == -1 ? src_bgn : pipe_pos + 1; + this.rule_raw = Bry_.Mid(src, rule_bgn, src_end); + int flag_langs_count = flag_langs.Count(); + if (flag_langs_count > 0) { // vnts exist in flag; EX: -{zh-hans;zh-hant|text}- + if (flag_langs.Has(vnt_key)) + rule_raw = converter.Auto_convert(vnt_itm, rule_raw); // convert rule text to current language; EX:-{|convert}- + else { + byte[][] fallbacks = vnt_itm.Fallback_ary(); + int fallbacks_len = fallbacks.length; + for (int i = 0; i < fallbacks_len; ++i) { + byte[] fallback = fallbacks[i]; + if (flag_langs.Has(fallback)) { + Xol_vnt_itm fallback_itm = (Xol_vnt_itm)vnt_regy.Get_by(fallback); + rule_raw = converter.Auto_convert(fallback_itm, rule_raw); + break; + } + } + } + flag_codes.Limit(Vnt_flag_code_.Tid_raw); } - else { - if (parser.Get(Vnt_flag_itm_.Tid_add)) - parser.Set_y_many(Vnt_flag_itm_.Tid_all, Vnt_flag_itm_.Tid_show); - if (parser.Get(Vnt_flag_itm_.Tid_descrip)) - parser.Set_n(Vnt_flag_itm_.Tid_show); - parser.Limit_if_exists_vnts(); // try to find flags like "zh-hans", "zh-hant"; allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" + rule_parser.Clear(rule_undis, rule_bidis, rule_raw); + if (!flag_codes.Get(Vnt_flag_code_.Tid_raw) && !flag_codes.Get(Vnt_flag_code_.Tid_name)) { + rule_parser.Parse(src, rule_bgn, src_end); + } + if (rule_undis.Has_none() && rule_bidis.Has_none()) { + if ( flag_codes.Get(Vnt_flag_code_.Tid_add) + || flag_codes.Get(Vnt_flag_code_.Tid_del) + ) { // fill all variants if text in -{A/H/-|text} without rules + for (int i = 0; i < flag_langs_count; ++i) { + Xol_vnt_itm itm = flag_langs.Get_at(i); + rule_bidis.Set(itm.Key(), rule_raw); + } + } + else if ( !flag_codes.Get(Vnt_flag_code_.Tid_name) + && !flag_codes.Get(Vnt_flag_code_.Tid_title) + ) { + flag_codes.Limit(Vnt_flag_code_.Tid_raw); + } + } + int flag_count = Vnt_flag_code_.Tid__max; + for (int flag = 0; flag < flag_count; ++flag) { + if (!flag_codes.Get(flag)) continue; + switch (flag) { + case Vnt_flag_code_.Tid_raw: display = rule_parser.Raw(); break; // if we don't do content convert, still strip the -{}- tags + case Vnt_flag_code_.Tid_name: // process N flag: output current variant name + byte[] vnt_key_trim = Bry_.Trim(rule_parser.Raw()); + Xol_vnt_itm vnt_itm_trim = vnt_regy.Get_by(vnt_key_trim); + display = vnt_itm_trim == null ? display = Bry_.Empty : vnt_itm_trim.Name(); + break; + case Vnt_flag_code_.Tid_descrip: display = Make_descrip(); break; // process D flag: output rules description + case Vnt_flag_code_.Tid_hide: display = Bry_.Empty; break; // process H,- flag or T only: output nothing + case Vnt_flag_code_.Tid_del: display = Bry_.Empty; action = Byte_ascii.Dash; break; + case Vnt_flag_code_.Tid_add: display = Bry_.Empty; action = Byte_ascii.Plus; break; + case Vnt_flag_code_.Tid_show: display = Make_converted(vnt_itm); break; + case Vnt_flag_code_.Tid_title: display = Bry_.Empty; title = Make_title(vnt_itm); break; + default: break; // ignore unknown flags (but see error case below) + } + } + if (display == null) + display = Bry_.Add(Bry__error_bgn, Bry__error_end); // wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped() + Make_conv_tbl(); + } + private void Make_conv_tbl() { + if (rule_undis.Has_none() && rule_bidis.Has_none()) return; // Special case optimisation + cnv_tbl.Clear(); cnv_marked_hash.Clear(); + int vnt_regy_len = vnt_regy.Len(); + for (int i = 0; i < vnt_regy_len; ++i) { + Xol_vnt_itm vnt = vnt_regy.Get_at(i); + byte[] vnt_key = vnt.Key(); + // bidi: fill in missing variants with fallbacks + byte[] bidi_bry = rule_bidis.Get_text_by_key_or_null(vnt_key); + if (bidi_bry == null) { + bidi_bry = rule_bidis.Get_text_by_ary_or_null(vnt.Fallback_ary()); + if (bidi_bry != null) rule_bidis.Set(vnt_key, bidi_bry); + } + if (bidi_bry != null) { + int marked_len = cnv_marked_hash.Count(); + for (int j = 0; j < marked_len; ++j) { + Xol_vnt_itm marked_itm = (Xol_vnt_itm)cnv_marked_hash.Get_at(j); + byte[] marked_key = marked_itm.Key(); + byte[] marked_bry = rule_bidis.Get_text_by_key_or_null(marked_key); + byte[] cur_bidi_bry = rule_bidis.Get_text_by_key_or_null(vnt_key); + if (vnt.Dir() == Xol_vnt_dir_.Tid__bi) + cnv_tbl.Set(vnt_key, marked_bry, cur_bidi_bry); + if (marked_itm.Dir() == Xol_vnt_dir_.Tid__bi) + cnv_tbl.Set(marked_key, cur_bidi_bry, marked_bry); + } + cnv_marked_hash.Add(vnt_key, vnt); + } + // undi: fill to convert tables + byte[] undi_bry = rule_undis.Get_text_by_key_or_null(vnt_key); + if (vnt.Dir() != Xol_vnt_dir_.Tid__none && undi_bry != null) { + Vnt_rule_undi_grp undi_grp = rule_undis.Get_by(vnt_key); + int undi_grp_len = undi_grp.Len(); + for (int j = 0; j < undi_grp_len; ++j) { + Vnt_rule_undi_itm undi_itm = undi_grp.Get_at(j); + cnv_tbl.Set(vnt_key, undi_itm.Src(), undi_itm.Trg()); + } + } } } - public void Parse_rules(Vnt_rule_parser parser) { - parser.Parse(src, src_bgn, src_end); + private byte[] Make_descrip() { + int len = rule_bidis.Len(); + for (int i = 0; i < len; ++i) { + Vnt_rule_bidi_itm bidi_itm = rule_bidis.Get_at(i); + Xol_vnt_itm vnt_itm = vnt_regy.Get_by(bidi_itm.Vnt()); + tmp_bfr.Add(vnt_itm.Name()).Add_byte_colon().Add(bidi_itm.Text()).Add_byte_semic(); + } + len = rule_undis.Len(); + for (int i = 0; i < len; ++i) { + Vnt_rule_undi_grp undi_grp = rule_undis.Get_at(i); + int sub_len = undi_grp.Len(); + for (int j = 0; j < sub_len; ++j) { + Vnt_rule_undi_itm undi_itm = (Vnt_rule_undi_itm)undi_grp.Get_at(j); + Xol_vnt_itm undi_vnt = vnt_regy.Get_by(undi_grp.Vnt()); + tmp_bfr.Add(undi_itm.Src()).Add(Bry__undi_spr).Add(undi_vnt.Name()).Add_byte_colon().Add(undi_itm.Trg()).Add_byte_semic(); + } + } + return tmp_bfr.Xto_bry_and_clear(); } + private byte[] Make_title(Xol_vnt_itm vnt) { + if (vnt.Idx() == 0) { // for mainLanguageCode; EX: "zh" + byte[] rv = rule_bidis.Get_text_by_key_or_null(vnt.Key()); + return rv == null ? rule_undis.Get_text_by_key_or_null(vnt.Key()) : rv; + } + else + return Make_converted(vnt); + } + private byte[] Make_converted(Xol_vnt_itm vnt) { + if (rule_bidis.Len() == 0 && rule_undis.Len() == 0) return rule_raw; + byte[] rv = rule_bidis.Get_text_by_key_or_null(vnt.Key()); // display current variant in bidirectional array + if (rv == null) rv = rule_bidis.Get_text_by_ary_or_null(vnt.Fallback_ary()); // or display current variant in fallbacks + if (rv == null) rv = rule_undis.Get_text_by_key_or_null(vnt.Key()); // or display current variant in unidirectional array + if (rv == null && vnt.Dir() == Xol_vnt_dir_.Tid__none) { // or display first text under disable manual convert + rv = (rule_bidis.Len() > 0) ? rule_bidis.Get_text_at(0) : rule_undis.Get_text_at(0); + } + return rv; + } + private final static byte[] + Bry__error_bgn = Bry_.new_a7("") + , Bry__error_end = Bry_.new_a7("") + , Bry__undi_spr = Bry_.new_u8("⇒") + ; } diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_itm_.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_code_.java similarity index 52% rename from 400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_itm_.java rename to 400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_code_.java index 6e3fa482e..d8359b4d6 100644 --- a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_itm_.java +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_code_.java @@ -16,37 +16,35 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; -class Vnt_flag_itm_ { +class Vnt_flag_code_ { public static final int - Tid_unknown = 0 - , Tid_show = 1 // S: EX: -{S|zh-hans:A;zh-hant:B}- -> "A" - , Tid_all = 2 // +: EX: -{+|zh-hans:A;zh-hant:B}- -> "A" - , Tid_err = 3 // E: EX: -{E|zh-hans:A;zh-hant:B}- -> "A" - , Tid_add = 4 // A: add and output; EX: -{A|zh-hans:A;zh-hant:B}- -> "A" - , Tid_title = 5 // T: page_title; EX: -{T|zh-hans:A;zh-hant:B}- -> "" - , Tid_raw = 6 // R: raw: no convert; EX: -{R|zh-hans:A;zh-hant:B}- -> "zh-hans:A;zh-hant:B" - , Tid_descrip = 7 // D: describe; EX: -{D|zh-hans:A;zh-hant:B}- -> "简体:A;繁體:B;" (简体=Simplified;繁體=Traditional) - , Tid_del = 8 // -: remove; EX: -{-|zh-hans:A;zh-hant:B}- -> "" - , Tid_macro = 9 // H: macro; EX: -{H|zh-hans:A;zh-hant:B}- -> "" - , Tid_name = 10 // N: EX: -{N|zh-hans:A;zh-hant:B}- -> "" - , Tid_lang = 11 // vnt: EX: -{zh-hant|B}- -> "B" - , Tid__max = 12 + Tid_add = 0 // +: EX: -{+|zh-hans:A;zh-hant:B}- -> "A" + , Tid_del = 1 // -: remove; EX: -{-|zh-hans:A;zh-hant:B}- -> "" + , Tid_aout = 2 // A: Add and output; EX: -{A|zh-hans:A;zh-hant:B}- -> "A" + , Tid_hide = 3 // H: Hide macro; EX: -{H|zh-hans:A;zh-hant:B}- -> "" + , Tid_raw = 4 // R: Raw: no convert; EX: -{R|zh-hans:A;zh-hant:B}- -> "zh-hans:A;zh-hant:B" + , Tid_show = 5 // S: Show EX: -{S|zh-hans:A;zh-hant:B}- -> "A" + , Tid_descrip = 6 // D: Describe; EX: -{D|zh-hans:A;zh-hant:B}- -> "简体:A;繁體:B;" (简体=Simplified;繁體=Traditional) + , Tid_name = 7 // N: variant Name EX: -{N|zh-hans:A;zh-hant:B}- -> "" + , Tid_title = 8 // T: page Title; EX: -{T|zh-hans:A;zh-hant:B}- -> "" + , Tid_err = 9 // E: Error EX: -{E|zh-hans:A;zh-hant:B}- -> "A" + , Tid__max = 10 ; private static final String[] Tid__names = new String[] - { "unknown", "show", "all", "err", "add", "title" - , "raw", "descrip", "del", "macro", "name", "lang" + { "+", "-", "A", "H", "R" + , "S", "D", "N", "T", "E" }; - public static String To_name(int tid) {return Tid__names[tid];} + public static String To_str(int tid) {return Tid__names[tid];} public static final Hash_adp_bry Regy = Hash_adp_bry.ci_a7() // NOTE: match either lc or uc; EX: -{D}- or -{d}-; - .Add_byte_int(Byte_ascii.Ltr_S , Tid_show) - .Add_byte_int(Byte_ascii.Plus , Tid_all) - .Add_byte_int(Byte_ascii.Ltr_E , Tid_err) - .Add_byte_int(Byte_ascii.Ltr_A , Tid_add) - .Add_byte_int(Byte_ascii.Ltr_T , Tid_title) - .Add_byte_int(Byte_ascii.Ltr_R , Tid_raw) - .Add_byte_int(Byte_ascii.Ltr_D , Tid_descrip) + .Add_byte_int(Byte_ascii.Plus , Tid_add) .Add_byte_int(Byte_ascii.Dash , Tid_del) - .Add_byte_int(Byte_ascii.Ltr_H , Tid_macro) + .Add_byte_int(Byte_ascii.Ltr_A , Tid_aout) + .Add_byte_int(Byte_ascii.Ltr_H , Tid_hide) + .Add_byte_int(Byte_ascii.Ltr_R , Tid_raw) + .Add_byte_int(Byte_ascii.Ltr_S , Tid_show) + .Add_byte_int(Byte_ascii.Ltr_D , Tid_descrip) .Add_byte_int(Byte_ascii.Ltr_N , Tid_name) + .Add_byte_int(Byte_ascii.Ltr_T , Tid_title) + .Add_byte_int(Byte_ascii.Ltr_E , Tid_err) ; } diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_code_mgr.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_code_mgr.java new file mode 100644 index 000000000..1ef36e257 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_code_mgr.java @@ -0,0 +1,56 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +class Vnt_flag_code_mgr { + private final boolean[] ary = new boolean[Ary_len]; private final static int Ary_len = Vnt_flag_code_.Tid__max; + public int Count() {return count;} private int count = 0; + public boolean Get(int tid) {return ary[tid];} + public void Clear() { + count = 0; + for (int i = 0; i < Ary_len; ++i) + ary[i] = false; + } + public void Add(int tid) { + this.Set_y(tid); + ++count; + } + public void Set_y(int tid) {ary[tid] = Bool_.Y;} + public void Set_y_many(int... vals) { + int len = vals.length; + for (int i = 0; i < len; ++i) + ary[vals[i]] = Bool_.Y; + } + public void Set_n(int tid) {ary[tid] = Bool_.N;} + public void Limit(int tid) { + for (int i = 0; i < Ary_len; ++i) + ary[i] = i == tid; + } + public boolean Limit_if_exists(int tid) { + boolean exists = ary[tid]; if (!exists) return false; + this.Limit(tid); + return true; + } + public void To_bfr__dbg(Bry_bfr bfr) { + for (int i = 0; i < Ary_len; ++i) { + if (ary[i]) { + if (bfr.Len_gt_0()) bfr.Add_byte_semic(); + bfr.Add_str_a7(Vnt_flag_code_.To_str(i)); + } + } + } +} diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_lang_mgr.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_lang_mgr.java new file mode 100644 index 000000000..49e3b6b64 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_lang_mgr.java @@ -0,0 +1,35 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import gplx.xowa.langs.vnts.*; +class Vnt_flag_lang_mgr { + private final Ordered_hash regy = Ordered_hash_.new_bry_(); + public int Count() {return regy.Count();} + public boolean Has(byte[] vnt) {return regy.Has(vnt);} + public void Clear() {regy.Clear();} + public void Add(Xol_vnt_itm itm) {regy.Add(itm.Key(), itm);} + public Xol_vnt_itm Get_at(int i) {return (Xol_vnt_itm)regy.Get_at(i);} + public void To_bfr__dbg(Bry_bfr bfr) { + int len = regy.Count(); + for (int i = 0; i < len; ++i) { + Xol_vnt_itm itm = (Xol_vnt_itm)regy.Get_at(i); + if (bfr.Len_gt_0()) bfr.Add_byte_semic(); + bfr.Add(itm.Key()); + } + } +} diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_parser.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_parser.java index ba4981bb7..046374eae 100644 --- a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_parser.java +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_parser.java @@ -16,52 +16,48 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import gplx.xowa.langs.vnts.*; class Vnt_flag_parser implements gplx.core.brys.Bry_split_wkr { - private final Hash_adp_bry flag_regy = Vnt_flag_itm_.Regy; - private final Hash_adp_bry vnt_regy = Hash_adp_bry.cs(); - private final boolean[] flag_ary = new boolean[Vnt_flag_itm_.Tid__max]; - private int count = 0; - public int Count() {return count;} - public boolean Get(int tid) {return flag_ary[tid];} - public void Set_y(int tid) {flag_ary[tid] = Bool_.Y;} - public void Set_y_many(int... ary) { - int len = ary.length; - for (int i = 0; i < len; ++i) - flag_ary[ary[i]] = Bool_.Y; - } - public void Set_n(int tid) {flag_ary[tid] = Bool_.N;} - public void Limit(int tid) { - for (int i = 0; i < Vnt_flag_itm_.Tid__max; ++i) { - if (i != tid) flag_ary[i] = false; + private final Hash_adp_bry codes_regy = Vnt_flag_code_.Regy; + private Vnt_flag_code_mgr codes; private Vnt_flag_lang_mgr langs; + private Xol_vnt_regy vnt_regy; + public void Parse(Vnt_flag_code_mgr codes, Vnt_flag_lang_mgr langs, Xol_vnt_regy vnt_regy, byte[] src, int src_bgn, int src_end) { + this.codes = codes; this.langs = langs; this.vnt_regy = vnt_regy; + codes.Clear(); langs.Clear(); + if (src_end != Bry_find_.Not_found) // "|" found; EX: -{A|}- + Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Semic, true, this); + int codes_count = codes.Count(), langs_count = langs.Count(); + if (codes_count == 0) codes.Set_y(Vnt_flag_code_.Tid_show); + else if (codes.Limit_if_exists(Vnt_flag_code_.Tid_raw)) {} + else if (codes.Limit_if_exists(Vnt_flag_code_.Tid_name)) {} + else if (codes.Limit_if_exists(Vnt_flag_code_.Tid_del)) {} + else if (codes_count == 1 && codes.Get(Vnt_flag_code_.Tid_title)) codes.Set_y(Vnt_flag_code_.Tid_hide); + else if (codes.Get(Vnt_flag_code_.Tid_hide)) { + boolean exists_d = codes.Get(Vnt_flag_code_.Tid_descrip); + boolean exists_t = codes.Get(Vnt_flag_code_.Tid_title); + codes.Clear(); + codes.Set_y_many(Vnt_flag_code_.Tid_add, Vnt_flag_code_.Tid_hide); + if (exists_d) codes.Set_y(Vnt_flag_code_.Tid_descrip); + if (exists_t) codes.Set_y(Vnt_flag_code_.Tid_title); } - } - public boolean Limit_if_exists(int tid) { - boolean exists = flag_ary[tid]; if (!exists) return false; - for (int i = 0; i < Vnt_flag_itm_.Tid__max; ++i) { - if (i != tid) flag_ary[i] = false; + else { + if (codes.Get(Vnt_flag_code_.Tid_aout)) + codes.Set_y_many(Vnt_flag_code_.Tid_add, Vnt_flag_code_.Tid_show); + if (codes.Get(Vnt_flag_code_.Tid_descrip)) + codes.Set_n(Vnt_flag_code_.Tid_show); + if (langs_count > 0) + codes.Clear(); } - return true; - } - public boolean Limit_if_exists_vnts() { - return false; - } - public void Clear() { - count = 0; - for (int i = 0; i < Vnt_flag_itm_.Tid__max; ++i) - flag_ary[i] = false; - } - public void Parse(byte[] src, int src_bgn, int src_end) { - this.Clear(); - Bry_split_.Split(src, Byte_ascii.Semic, true, this); } public int Split(byte[] src, int itm_bgn, int itm_end) { - int flag_tid = flag_regy.Get_as_int_or(src, itm_bgn, itm_end, -1); - if (flag_tid == -1) { - int vnt_tid = vnt_regy.Get_as_int_or(src, itm_bgn, itm_end, -1); - if (vnt_tid == -1) return Bry_split_.Rv__ok; // unknown flag; ignore + int flag_tid = codes_regy.Get_as_int_or(src, itm_bgn, itm_end, -1); + if (flag_tid == -1) { // try to find flags like "zh-hans", "zh-hant"; allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" + Xol_vnt_itm vnt_itm = vnt_regy.Get_by(src, itm_bgn, itm_end); + if (vnt_itm == null) return Bry_split_.Rv__ok; // unknown flag; ignore + langs.Add(vnt_itm); + return Bry_split_.Rv__ok; } - flag_ary[flag_tid] = true; - ++count; + codes.Add(flag_tid); return Bry_split_.Rv__ok; } } diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_parser_tst.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_parser_tst.java new file mode 100644 index 000000000..08abbe0db --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_parser_tst.java @@ -0,0 +1,55 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import org.junit.*; import gplx.xowa.langs.vnts.*; +public class Vnt_flag_parser_tst { + private final Vnt_flag_parser_fxt fxt = new Vnt_flag_parser_fxt(); + @Test public void Basic() {fxt.Test_parse("D" , "D");} + @Test public void Multiple() {fxt.Test_parse("+;S;E" , "+;S;E");} + @Test public void Ws() {fxt.Test_parse(" + ; S ; E " , "+;S;E");} + @Test public void None() {fxt.Test_parse("" , "S");} + @Test public void Wrong() {fxt.Test_parse("XYZ" , "S");} + @Test public void Raw__limit() {fxt.Test_parse("R;S" , "R");} + @Test public void Name__limit() {fxt.Test_parse("N;S" , "N");} + @Test public void Del_limit() {fxt.Test_parse("-;S" , "-");} + @Test public void Title__also_macro_y() {fxt.Test_parse("T" , "H;T");} + @Test public void Title__also_macro_n() {fxt.Test_parse("T;S" , "S;T");} + @Test public void Hide__remove_all() {fxt.Test_parse("H;S" , "+;H");} + @Test public void Hide__keep_descrip() {fxt.Test_parse("H;S;D" , "+;H;D");} + @Test public void Hide__keep_title() {fxt.Test_parse("H;S;T" , "+;H;T");} + @Test public void Aout__also_show_all() {fxt.Test_parse("A" , "+;A;S");} + @Test public void Descrip__remove_show() {fxt.Test_parse("D;S" , "D");} + @Test public void Aout_w_descrip() {fxt.Test_parse("A;D;S" , "+;A;D");} + @Test public void Lang__one() {fxt.Test_parse("zh-hans" , "S;zh-hans");} + @Test public void Lang__many() {fxt.Test_parse("zh-cn;zh-hk" , "S;zh-cn;zh-hk");} + @Test public void Lang__many__ws() {fxt.Test_parse(" zh-cn ; zh-hk " , "S;zh-cn;zh-hk");} + @Test public void Lang__zap__codes() {fxt.Test_parse("+;S;zh-hans;" , "zh-hans");} +} +class Vnt_flag_parser_fxt { + private final Vnt_flag_parser parser = new Vnt_flag_parser(); + private final Vnt_flag_code_mgr codes = new Vnt_flag_code_mgr(); private final Vnt_flag_lang_mgr langs = new Vnt_flag_lang_mgr(); + private final Xol_vnt_regy vnt_regy = Xol_vnt_regy_fxt.new_chinese(); + private final Bry_bfr bfr = Bry_bfr.new_(); + public void Test_parse(String raw, String expd) { + byte[] src = Bry_.new_u8(raw); + parser.Parse(codes, langs, vnt_regy, src, 0, src.length); + codes.To_bfr__dbg(bfr); + langs.To_bfr__dbg(bfr); + Tfds.Eq_str(expd, bfr.Xto_str_and_clear()); + } +} diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_html_doc_wkr.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_html_doc_wkr.java new file mode 100644 index 000000000..5071a8453 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_html_doc_wkr.java @@ -0,0 +1,74 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.xndes.*; +import gplx.xowa.langs.vnts.converts.*; +import gplx.xowa.html.*; +class Vnt_html_doc_wkr implements Mwh_doc_wkr { + private final Hash_adp_bry atr_hash = Hash_adp_bry.ci_a7(); + private Bry_bfr bfr; + private final Xol_convert_mgr convert_mgr; private int convert_vnt_idx; + public Vnt_html_doc_wkr(Xol_convert_mgr convert_mgr) { + this.convert_mgr = convert_mgr; + atr_hash.Add_many_str("title", "alt"); + } + public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw(); + public void Init(Bry_bfr bfr, int convert_vnt_idx) {this.bfr = bfr; this.convert_vnt_idx = convert_vnt_idx;} + public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) { + int val_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn]; + int val_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_end]; + if ( atr_hash.Get_by_mid(key_bry, 0, key_bry.length) == null // title, alt + || !key_exists + || Bry_find_.Find_fwd(src, Bry__url_frag, val_bgn, val_end) != Bry_find_.Not_found + ) { // handle name-only attribs like "" + int atr_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_atr_bgn]; + int atr_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_atr_end]; + bfr.Add_mid(src, atr_bgn, atr_end); + } + else { + bfr.Add_byte_space(); + bfr.Add(key_bry); + bfr.Add_byte(Byte_ascii.Eq); + byte quote_byte = Mwh_atr_itm.Calc_qte_byte(itm_ary, itm_idx); + bfr.Add_byte(quote_byte); + bfr.Add(convert_mgr.Convert_text(convert_vnt_idx, src, val_bgn, val_end)); + bfr.Add_byte(quote_byte); + } + } + public void On_txt_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) { + switch (nde_tid) { + case Xop_xnde_tag_.Tid_code: + case Xop_xnde_tag_.Tid_script: + case Xop_xnde_tag_.Tid_pre: + bfr.Add_mid(src, itm_bgn, itm_end); + break; + default: + bfr.Add(convert_mgr.Convert_text(convert_vnt_idx, src, itm_bgn, itm_end)); + break; + } + } + public void On_nde_head_bgn(Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end) { + bfr.Add_byte(Byte_ascii.Angle_bgn).Add_mid(src, key_bgn, key_end); // EX: "" or ">" + } + public void On_nde_tail_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);} + public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);} + private static final byte[] Bry__url_frag = Bry_.new_a7("://"); // REF.MW: if ( !strpos( $attr, '://' ) ) { +} diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_language_converter.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_language_converter.java deleted file mode 100644 index 5f502246e..000000000 --- a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_language_converter.java +++ /dev/null @@ -1,110 +0,0 @@ -/* -XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012 gnosygnu@gmail.com - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as -published by the Free Software Foundation, either version 3 of the -License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see . -*/ -package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; -import gplx.core.btries.*; import gplx.core.primitives.*; -public class Vnt_language_converter { - private final Bry_bfr bfr = Bry_bfr.new_(); - private int max_depth = 32; - private byte[] src; private int src_len; - private boolean convert_needed; - private int pos; - public byte[] Parse(byte[] vnt, byte[] src) {// REF.MW:/languages/LanguageConverter.php!recursiveConvertTopLevel - synchronized (bfr) { - int markup_count = 0; - this.pos = 0; - this.convert_needed = false; // for sr lang; SEE:LanguageSr.php !$this->guessVariant(src, vnt); - this.src = src; this.src_len = src.length; - while (pos < src_len) { - int curly_bgn = Bry_find_.Find_fwd(src, Bry__curly_bgn, pos, src_len); - if (curly_bgn == Bry_find_.Not_found) { // No more markup, append final segment - if (markup_count == 0) return src; // no markups found; just return original - Add_output(vnt, convert_needed, src, pos, src_len); - return bfr.Xto_bry_and_clear(); - } - bfr.Add_mid(src, pos, curly_bgn); // Markup found; append segment - Add_output(vnt, convert_needed, src, pos, src_len); - pos = curly_bgn; // Advance position - ++markup_count; - Parse_recursive(vnt, pos, 1); // Do recursive conversion - } - return bfr.Xto_bry_and_clear(); - } - } - private void Parse_recursive(byte[] vnt, int pos, int depth) { - pos += 2; // skip "-{" - boolean warning_done = false; - // $inner = ''; - while (pos < src_len) { - byte b = src[pos]; - Object o = trie.Match_bgn_w_byte(b, src,pos, src_len); - if (o == null) { // char; - ++pos; - continue; - } - int new_pos = trie.Match_pos(); // Markup found; Append initial segment - bfr.Add_mid(src, pos, new_pos); - pos = new_pos; // Advance position - switch (((Byte_obj_val)o).Val()) { - case Tid__curly_bgn: - if (depth >= max_depth) { - bfr.Add(Bry__curly_bgn); - if (!warning_done) { - bfr.Add_str(""); - // wfMessage('language-converter-depth-warning')->numParams($this->mMaxDepth)->inContentLanguage()->text() - bfr.Add_str(""); - warning_done = true; - } - pos += 2; // skip "-{" - continue; - } - Parse_recursive(vnt, pos, depth + 1); // Recursively parse another rule - break; - case Tid__curly_end: - pos += 2; - /* - // Apply the rule - $rule = new ConverterRule($inner, $this); - $rule->parse($variant); - $this->applyManualConv($rule); - return $rule->getDisplay(); - */ - return; - default: throw Err_.new_unhandled(-1); // never happens - } - } - if (pos < src_len) { // Unclosed rule - byte[] frag = Auto_convert(vnt, src, pos, src_len); - bfr.Add(Bry__curly_bgn).Add(frag); - } - pos = src_len; - } - private void Add_output(byte[] vnt, boolean convert_needed, byte[] src, int pos, int src_len) { - if (convert_needed) { - byte[] frag = Auto_convert(vnt, src, pos, src_len); - bfr.Add(frag); - } - else - bfr.Add_mid(src, pos, src_len); - } - private byte[] Auto_convert(byte[] vnt, byte[] src, int bgn, int end) {return src;} - private static final byte Tid__curly_bgn = 1, Tid__curly_end = 2; - private static final byte[] Bry__curly_bgn = Bry_.new_a7("-{"), Bry__curly_end = Bry_.new_a7("}-"); - private static final Btrie_fast_mgr trie = Btrie_fast_mgr.cs() - .Add_bry_byte(Bry__curly_bgn, Tid__curly_bgn) - .Add_bry_byte(Bry__curly_end, Tid__curly_end); -} diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_bidi_mgr.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_bidi_mgr.java new file mode 100644 index 000000000..b812aca51 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_bidi_mgr.java @@ -0,0 +1,68 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +class Vnt_rule_bidi_mgr { + private final Ordered_hash hash = Ordered_hash_.new_bry_(); + public int Len() {return hash.Count();} + public boolean Has_none() {return hash.Count() == 0;} + public void Clear() {hash.Clear();} + public Vnt_rule_bidi_itm Get_at(int i) {return (Vnt_rule_bidi_itm)hash.Get_at(i);} + public Vnt_rule_bidi_itm Get_by(byte[] k) {return (Vnt_rule_bidi_itm)hash.Get_by(k);} + public byte[] Get_text_by_ary_or_null(byte[]... ary) { + int len = ary.length; + byte[] rv = null; + for (int i = 0; i < len; ++i) { + byte[] itm = ary[i]; + Vnt_rule_bidi_itm bidi_itm = (Vnt_rule_bidi_itm)hash.Get_by(itm); if (bidi_itm == null) continue; + rv = Get_text_by_key_or_null(bidi_itm.Vnt()); + if (rv != null) return rv; + } + return rv; + } + public byte[] Get_text_by_key_or_null(byte[] vnt) { + Vnt_rule_bidi_itm rv = (Vnt_rule_bidi_itm)hash.Get_by(vnt); + return rv == null ? null : rv.Text(); + } + public byte[] Get_text_at(int i) { + Vnt_rule_bidi_itm itm = (Vnt_rule_bidi_itm)hash.Get_at(i); + return itm == null ? null : itm.Text(); + } + public void Set(byte[] vnt, byte[] text) { + Vnt_rule_bidi_itm itm = (Vnt_rule_bidi_itm)hash.Get_by(vnt); + if (itm == null) { + itm = new Vnt_rule_bidi_itm(vnt, text); + hash.Add(vnt, itm); + } + else + itm.Text_(text); + } + public void To_bry__dbg(Bry_bfr bfr) { + int len = hash.Count(); + for (int i = 0; i < len; ++i) { + if (i != 0) bfr.Add_byte_nl(); + Vnt_rule_bidi_itm itm = (Vnt_rule_bidi_itm)hash.Get_at(i); + bfr.Add(itm.Vnt()).Add_byte_colon().Add(itm.Text()); + } + } +} +class Vnt_rule_bidi_itm { + public Vnt_rule_bidi_itm(byte[] vnt, byte[] text) {this.vnt = vnt; this.text = text;} + public byte[] Vnt() {return vnt;} private final byte[] vnt; + public byte[] Text() {return text;} private byte[] text; + public void Text_(byte[] v) {this.text = v;} +} diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser.java index 6456576e2..81d346d3e 100644 --- a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser.java +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser.java @@ -17,45 +17,76 @@ along with this program. If not, see . */ package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.core.btries.*; +import gplx.xowa.langs.vnts.*; class Vnt_rule_parser implements gplx.core.brys.Bry_split_wkr { -// private final Btrie_slim_mgr vnt_trie = Btrie_slim_mgr.ci_a7(); - public void Parse(byte[] src, int src_bgn, int src_end) { - Bry_split_.Split(src, Byte_ascii.Semic, false, this); // trim=false for "&#entity;" check below + private final Btrie_slim_mgr vnt_trie = Btrie_slim_mgr.ci_a7(); + private Vnt_rule_undi_mgr undis; private Vnt_rule_bidi_mgr bidis; + private int src_end, src_len; private byte[] rule_raw; + public byte[] Raw() {return rule_raw;} + public void Init(Xol_vnt_regy vnt_regy) { + this.vnt_trie.Clear(); + int len = vnt_regy.Len(); + for (int i = 0; i < len; ++i) { + Xol_vnt_itm itm = (Xol_vnt_itm)vnt_regy.Get_at(i); + vnt_trie.Add_obj(itm.Key(), itm); + } } - public int Split(byte[] src, int itm_bgn, int itm_end) { - int html_entity_pos = Bry_find_.Find_bwd_while_alphanum(src, itm_bgn); - if (Bry_.Eq(src, html_entity_pos - 2, html_entity_pos, Bry__html_entity)) return Bry_split_.Rv__extend; // reject "&#entity;"; EX: " zh-hans;" - /* - itm_bgn = skip fwd for ws; - itm_bgn = skip fwd for "=>" - Object vnt_obj = vnt_trie.Match_bgn(src, itm_bgn, itm_end); if (vnt_obj == null) return Bry_split_.Rv__extend; // reject ";not_variant"; EX: ";border" in "zh-hans:;zh-hant:" - itm_end = skip bwd for ws -// val = trim( val[0] ); -// trg = trim( val[1] ); -// $u = explode( '=>', val, 2 ); -// // if trg is empty, strtr() could return a wrong result -// if ( count( $u ) == 1 && trg && in_array( val, $variants ) ) { -// bidi_ary[val] = trg; -// } elseif ( count( $u ) == 2 ) { -// $from = trim( $u[0] ); -// val = trim( $u[1] ); -// if ( array_key_exists( val, $unidtable ) -// && !is_array( $unidtable[val] ) -// && trg -// && in_array( val, $variants ) ) { -// $unidtable[val] = array( $from => trg ); -// } elseif ( trg && in_array( val, $variants ) ) { -// $unidtable[val][$from] = trg; -// } -// } -// // syntax error, pass -// if ( !isset( $this->mConverter->mVariantNames[val] ) ) { -// bidi_ary = array(); -// $unidtable = array(); -// break; -// } - */ + public void Clear(Vnt_rule_undi_mgr undis, Vnt_rule_bidi_mgr bidis, byte[] rule_raw) { + this.undis = undis; this.bidis = bidis; + undis.Clear(); bidis.Clear(); + this.rule_raw = rule_raw; + } + public void Parse(byte[] src, int src_bgn, int src_end) { + this.src_end = src_end; this.src_len = src.length; + Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Semic, false, this); // trim=false for "&#entity;" check below + } + public int Split(byte[] src, int itm_bgn, int itm_end) { // macro=>zh-hans:text; + int html_entity_pos = Bry_find_.Find_bwd_while_alphanum(src, itm_end); + byte html_entity_byte = src[html_entity_pos]; + if (html_entity_byte == Byte_ascii.Hash) html_entity_byte = src[html_entity_pos - 2]; // skip #; EX: { + if (html_entity_byte == Byte_ascii.Amp) return Bry_split_.Rv__extend; // reject "&#entity;"; EX: " zh-hans;" + if (itm_end != src_end) { + int nxt_lang_bgn = Bry_find_.Find_fwd(src, Bry__bidi_dlm, itm_end + 1, src_len); // look for next "=>" + if (nxt_lang_bgn == Bry_find_.Not_found) + nxt_lang_bgn = Bry_find_.Find_fwd_while_ws(src, itm_end + 1, src_len); // skip any ws after end ";"; EX: "a:1; b:2"; NOTE: +1 to skip semic; + else + nxt_lang_bgn += 2; + int nxt_lang_end = Bry_find_.Find_fwd(src, Byte_ascii.Colon, nxt_lang_bgn, src_len); // get colon; + if (nxt_lang_end != Bry_find_.Not_found) { + nxt_lang_end = Bry_find_.Find_bwd__skip_ws(src, nxt_lang_end, src_len); // trim + if (vnt_trie.Match_bgn(src, nxt_lang_bgn, nxt_lang_end) == null) return Bry_split_.Rv__extend; // reject ";not_variant"; EX: ";border" in "zh-hans:;zh-hant:" + } + } + int undi_bgn = Bry_find_.Find_fwd_while_ws(src, itm_bgn, itm_end); // skip any ws after bgn ";"; EX: " a=>b:c;" + int undi_end = Bry_find_.Find_fwd(src, Bry__bidi_dlm, undi_bgn, itm_end); // look for "=>" + int lang_bgn = undi_bgn; // default lang_bgn to undi_bgn; assumes no bidi found + if (undi_end != Bry_find_.Not_found) { // "=>" found; bidi exists + lang_bgn = Bry_find_.Find_fwd_while_ws(src, undi_end + 2, itm_end); // set lang_bgn after => and gobble up ws + undi_end = Bry_find_.Find_bwd__skip_ws(src, undi_end, undi_bgn); // trim ws from end of bd; + } + Object vnt_obj = vnt_trie.Match_bgn(src, lang_bgn, itm_end); + if (vnt_obj == null) { + return (itm_bgn == 0) ? Bry_split_.Rv__cancel : Bry_split_.Rv__extend; // if 1st item; cancel rest; otherwise, extend + } + int lang_end = vnt_trie.Match_pos(); + int text_bgn = Bry_find_.Find_fwd_while_ws(src, lang_end, itm_end); if (src[text_bgn] != Byte_ascii.Colon) return Bry_split_.Rv__extend; + ++text_bgn; + Xol_vnt_itm vnt_itm = (Xol_vnt_itm)vnt_obj; + byte[] vnt_key = vnt_itm.Key(); + byte[] text_bry = Bry_.Mid_w_trim(src, text_bgn, itm_end); + if (undi_end == Bry_find_.Not_found) + bidis.Set(vnt_key, text_bry); + else { + byte[] undi_bry = Bry_.Mid(src, undi_bgn, undi_end); + if (itm_end - text_bgn > 0) + undis.Set(vnt_key, undi_bry, text_bry); + } return Bry_split_.Rv__ok; } - private static final byte[] Bry__html_entity = Bry_.new_a7("&#"); + public void To_bry__dbg(Bry_bfr bfr) { + undis.To_bry__dbg(bfr); + if (bfr.Len_gt_0()) bfr.Add_byte_nl(); + bidis.To_bry__dbg(bfr); + } + private static final byte[] Bry__bidi_dlm = Bry_.new_a7("=>"); } diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser__bidi_tst.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser__bidi_tst.java new file mode 100644 index 000000000..fd7e6eb1c --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser__bidi_tst.java @@ -0,0 +1,27 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import org.junit.*; +public class Vnt_rule_parser__bidi_tst { + private final Vnt_rule_parser_fxt fxt = new Vnt_rule_parser_fxt(); + @Test public void Basic() {fxt.Test_parse("x1:v1;" , "x1:v1");} + @Test public void Ws() {fxt.Test_parse(" x1 : v1 ;" , "x1:v1");} + @Test public void Entity() {fxt.Test_parse("x1:a x2:b;x2:b;" , "x1:a x2:b" , "x2:b");} + @Test public void Unknown__nth() {fxt.Test_parse("x1:a;wx2:b;x2:b;" , "x1:a;wx2:b" , "x2:b");} + @Test public void Unknown__1st() {fxt.Test_parse("wx1:a;x1:b;" , "");} +} diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser__undi_tst.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser__undi_tst.java new file mode 100644 index 000000000..dcb0e8a22 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser__undi_tst.java @@ -0,0 +1,24 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import org.junit.*; +public class Vnt_rule_parser__undi_tst { + private final Vnt_rule_parser_fxt fxt = new Vnt_rule_parser_fxt(); + @Test public void One() {fxt.Test_parse("k1=>x1:v1;" , "x1:k1=v1");} + @Test public void Many() {fxt.Test_parse("k1=>x1:v1;k2=>x2:v2;" , "x1:k1=v1", "x2:k2=v2");} +} diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser_fxt.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser_fxt.java new file mode 100644 index 000000000..bff4c5c24 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser_fxt.java @@ -0,0 +1,37 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import gplx.xowa.langs.vnts.*; +class Vnt_rule_parser_fxt { + private final Vnt_rule_parser parser = new Vnt_rule_parser(); private final Vnt_rule_undi_mgr undis = new Vnt_rule_undi_mgr(); private final Vnt_rule_bidi_mgr bidis = new Vnt_rule_bidi_mgr(); + private final Bry_bfr bfr = Bry_bfr.new_(255); + public Vnt_rule_parser_fxt() { + Xol_vnt_regy vnt_regy = new Xol_vnt_regy(); + vnt_regy.Add(Bry_.new_a7("x1"), Bry_.new_a7("lang1")); + vnt_regy.Add(Bry_.new_a7("x2"), Bry_.new_a7("lang2")); + vnt_regy.Add(Bry_.new_a7("x3"), Bry_.new_a7("lang3")); + parser.Init(vnt_regy); + } + public void Test_parse(String raw, String... expd_ary) { + byte[] src = Bry_.new_u8(raw); + parser.Clear(undis, bidis, src); + parser.Parse(src, 0, src.length); + parser.To_bry__dbg(bfr); + Tfds.Eq_str_lines(String_.Concat_lines_nl_skip_last(expd_ary), bfr.Xto_str_and_clear()); + } +} diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_undi_mgr.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_undi_mgr.java new file mode 100644 index 000000000..f4735675a --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_undi_mgr.java @@ -0,0 +1,80 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +class Vnt_rule_undi_mgr { + private final Ordered_hash hash = Ordered_hash_.new_bry_(); + public int Len() {return hash.Count();} + public boolean Has_none() {return hash.Count() == 0;} + public void Clear() {hash.Clear();} + public Vnt_rule_undi_grp Get_at(int i) {return (Vnt_rule_undi_grp)hash.Get_at(i);} + public Vnt_rule_undi_grp Get_by(byte[] key) {return (Vnt_rule_undi_grp)hash.Get_by(key);} + public byte[] Get_text_by_key_or_null(byte[] key) { + Vnt_rule_undi_grp grp = (Vnt_rule_undi_grp)hash.Get_by(key); if (grp == null) return null; + return grp.Len() == 0 ? null : grp.Get_at(0).Trg(); // REF.MW: $disp = $disp[0]; + } + public byte[] Get_text_at(int i) { + Vnt_rule_undi_grp grp = (Vnt_rule_undi_grp)hash.Get_at(i); if (grp == null) return null; + return grp.Len() == 0 ? null : grp.Get_at(0).Trg(); + } + public Vnt_rule_undi_grp Set(byte[] vnt, byte[] src, byte[] trg) { + Vnt_rule_undi_grp grp = (Vnt_rule_undi_grp)hash.Get_by(vnt); + if (grp == null) { + grp = new Vnt_rule_undi_grp(vnt); + hash.Add(vnt, grp); + } + grp.Set(src, trg); + return grp; + } + public void To_bry__dbg(Bry_bfr bfr) { + int len = hash.Count(); + for (int i = 0; i < len; ++i) { + if (i != 0) bfr.Add_byte_nl(); + Vnt_rule_undi_grp grp = (Vnt_rule_undi_grp)hash.Get_at(i); + bfr.Add(grp.Vnt()).Add_byte_colon(); + grp.To_bry__dbg(bfr); + } + } +} +class Vnt_rule_undi_grp { + private final Ordered_hash hash = Ordered_hash_.new_bry_(); + public Vnt_rule_undi_grp(byte[] vnt) {this.vnt = vnt;} + public int Len() {return hash.Count();} + public Vnt_rule_undi_itm Get_at(int i) {return (Vnt_rule_undi_itm)hash.Get_at(i);} + public byte[] Vnt() {return vnt;} private final byte[] vnt; + public Vnt_rule_undi_itm Set(byte[] src, byte[] trg) { + Vnt_rule_undi_itm itm = (Vnt_rule_undi_itm)hash.Get_by(src); + if (itm == null) { + itm = new Vnt_rule_undi_itm(src, trg); + hash.Add(src, itm); + } + return itm; + } + public void To_bry__dbg(Bry_bfr bfr) { + int len = hash.Count(); + for (int i = 0; i < len; ++i) { + Vnt_rule_undi_itm itm = (Vnt_rule_undi_itm)hash.Get_at(i); + bfr.Add(itm.Src()).Add_byte_eq().Add(itm.Trg()); + } + } +} +class Vnt_rule_undi_itm { + public Vnt_rule_undi_itm(byte[] src, byte[] trg) {this.src = src; this.trg = trg;} + public byte[] Src() {return src;} private final byte[] src; + public byte[] Trg() {return trg;} private byte[] trg; + public void Trg_(byte[] v) {this.trg = v;} +} diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Xop_vnt_lang_bldr.java b/400_xowa/src/gplx/xowa/parsers/vnts/Xop_vnt_lang_bldr.java index 787d1aad3..43672840b 100644 --- a/400_xowa/src/gplx/xowa/parsers/vnts/Xop_vnt_lang_bldr.java +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Xop_vnt_lang_bldr.java @@ -25,7 +25,7 @@ class Xop_vnt_lang_bldr { // performant way of building langs; EX: -{zh;zh-hans; public void Add(byte[] key) { Xol_vnt_itm vnt = vnt_regy.Get_by(key); if (vnt == null) return; // ignore invalid vnts; EX: -{zh;zhx}- int vnt_mask = vnt.Mask__vnt(); - this.rslt_mask = (rslt_mask == 0) ? vnt_mask : Enm_.Flip_int(true, rslt_mask, vnt_mask); + this.rslt_mask = (rslt_mask == 0) ? vnt_mask : Bitmask_.Flip_int(true, rslt_mask, vnt_mask); } public Xop_vnt_flag Bld() { return (rslt_mask == 0) ? Xop_vnt_flag_.Flag_unknown : Xop_vnt_flag.new_lang(rslt_mask); diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Xop_vnt_parser__tkn__basic__tst.java b/400_xowa/src/gplx/xowa/parsers/vnts/Xop_vnt_parser__tkn__basic__tst.java index 32446fc1b..51a1ae1e0 100644 --- a/400_xowa/src/gplx/xowa/parsers/vnts/Xop_vnt_parser__tkn__basic__tst.java +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Xop_vnt_parser__tkn__basic__tst.java @@ -53,7 +53,7 @@ class Xop_vnt_tkn_mok { } public Xop_vnt_tkn_mok Flags_none_() {flags_list.Clear(); return this;} public Xop_vnt_tkn_mok Flags_unknown_(String... v) {flags_list.Add(Xop_vnt_flag_.Flag_unknown); return this;} - public Xop_vnt_tkn_mok Flags_langs_(int... ary) {flags_list.Add(Xop_vnt_flag.new_lang(Enm_.Add_int_ary(ary))); return this;} + public Xop_vnt_tkn_mok Flags_langs_(int... ary) {flags_list.Add(Xop_vnt_flag.new_lang(Bitmask_.Add_int_ary(ary))); return this;} public Xop_vnt_tkn_mok Flags_codes_(String... ary) { int len = ary.length; for (int i = 0; i < len; i++) { @@ -122,7 +122,7 @@ class Xop_vnt_lxr_fxt { int itm_mask = itm.Mask(); for (int i = 0; i < 32; ++i) { int mask = gplx.core.brys.Bit_.Get_flag(i); - if (Enm_.Has_int(mask, itm_mask)) { + if (Bitmask_.Has_int(mask, itm_mask)) { Xol_vnt_itm vnt = vnt_regy.Get_at(i); bfr.Add(vnt.Key()).Add_byte(Byte_ascii.Semic); } diff --git a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xatr_parser_tst.java b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xatr_parser_tst.java index 4ad8ea7eb..96b7b493d 100644 --- a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xatr_parser_tst.java +++ b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xatr_parser_tst.java @@ -18,7 +18,8 @@ along with this program. If not, see . package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import org.junit.*; import gplx.core.tests.*; public class Xop_xatr_parser_tst { - @Test public void Kv_quote_double() {fxt.tst_("a=\"b\"", fxt.new_atr_("a", "b"));} private Xop_xatr_parser_fxt fxt = new Xop_xatr_parser_fxt(); + private final Xop_xatr_parser_fxt fxt = new Xop_xatr_parser_fxt(); + @Test public void Kv_quote_double() {fxt.tst_("a=\"b\"", fxt.new_atr_("a", "b"));} @Test public void Kv_quote_single() {fxt.tst_("a='b'", fxt.new_atr_("a", "b"));} @Test public void Kv_quote_none() {fxt.tst_("a=b", fxt.new_atr_("a", "b"));} @Test public void Kv_empty() {fxt.tst_("a=''", fxt.new_atr_("a", ""));} diff --git a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xatr_whitelist_mgr_tst.java b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xatr_whitelist_mgr_tst.java index 6415aff6d..308d7022f 100644 --- a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xatr_whitelist_mgr_tst.java +++ b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xatr_whitelist_mgr_tst.java @@ -49,12 +49,12 @@ class Xop_xatr_whitelist_fxt { public void Clear() { if (whitelist_mgr == null) whitelist_mgr = new Xop_xatr_whitelist_mgr().Ini(); } private Xop_xatr_whitelist_mgr whitelist_mgr; - public void Whitelist(byte tag_id, String key_str, boolean expd) { + public void Whitelist(int tag_id, String key_str, boolean expd) { byte[] key_bry = Bry_.new_a7(key_str); atr_itm.Key_rng_(0, key_bry.length); Tfds.Eq(expd, whitelist_mgr.Chk(tag_id, key_bry, atr_itm), key_str); } private Xop_xatr_itm atr_itm = new Xop_xatr_itm(0, 0); - public void Whitelist(byte tag_id, String key_str, String val_str, boolean expd) { + public void Whitelist(int tag_id, String key_str, String val_str, boolean expd) { byte[] key_bry = Bry_.new_a7(key_str); atr_itm.Key_rng_(0, key_bry.length); atr_itm.Val_bry_(Bry_.new_a7(val_str)); diff --git a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag.java b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag.java index 79bece15e..d7081e8bc 100644 --- a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag.java +++ b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag.java @@ -22,32 +22,33 @@ public class Xop_xnde_tag { this.id = id; this.name_bry = Bry_.new_a7(name_str); this.name_str = name_str; - name_len = name_bry.length; - xtn_end_tag = Bry_.Add(Xop_xnde_tag_.XtnEndTag_bgn, name_bry); // always force endtag; needed for - xtn_end_tag_tmp = new byte[xtn_end_tag.length]; Array_.Copy(xtn_end_tag, xtn_end_tag_tmp); + this.name_len = name_bry.length; + this.xtn_end_tag = Bry_.Add(Xop_xnde_tag_.Xtn_end_tag_bgn, name_bry); // always force endtag; needed for + this.xtn_end_tag_tmp = new byte[xtn_end_tag.length]; Array_.Copy(xtn_end_tag, xtn_end_tag_tmp); } - public int Id() {return id;} public Xop_xnde_tag Id_(int v) {id = v; return this;} private int id; - public byte[] Name_bry() {return name_bry;} private byte[] name_bry; - public String Name_str() {return name_str;} private String name_str; - public int Name_len() {return name_len;} private int name_len; + public int Id() {return id;} private final int id; + public byte[] Name_bry() {return name_bry;} private final byte[] name_bry; + public String Name_str() {return name_str;} private final String name_str; + public int Name_len() {return name_len;} private final int name_len; + public byte[] Xtn_end_tag() {return xtn_end_tag;} private final byte[] xtn_end_tag; + public byte[] Xtn_end_tag_tmp() {return xtn_end_tag_tmp;} private final byte[] xtn_end_tag_tmp; public boolean Xtn() {return xtn;} public Xop_xnde_tag Xtn_() {xtn = true; return this;} private boolean xtn; public boolean Xtn_mw() {return xtn_mw;} public Xop_xnde_tag Xtn_mw_() {xtn_mw = true; xtn = true; return this;} private boolean xtn_mw; // NOTE: Xtn_mw_() marks both xtn and xtn_mw as true - public byte[] XtnEndTag() {return xtn_end_tag;} private byte[] xtn_end_tag; - public byte[] XtnEndTag_tmp() {return xtn_end_tag_tmp;} private byte[] xtn_end_tag_tmp; - public int BgnNdeMode() {return bgnNdeMode;} private int bgnNdeMode = Xop_xnde_tag_.BgnNdeMode_normal; - public Xop_xnde_tag BgnNdeMode_inline_() {bgnNdeMode = Xop_xnde_tag_.BgnNdeMode_inline; return this;} - public int EndNdeMode() {return endNdeMode;} private int endNdeMode = Xop_xnde_tag_.EndNdeMode_normal; - public Xop_xnde_tag EndNdeMode_inline_() {endNdeMode = Xop_xnde_tag_.EndNdeMode_inline; return this;} - public Xop_xnde_tag EndNdeMode_escape_() {endNdeMode = Xop_xnde_tag_.EndNdeMode_escape; return this;} - public boolean SingleOnly() {return singleOnly;} public Xop_xnde_tag SingleOnly_() {singleOnly = true; return this;} private boolean singleOnly; - public boolean TblSub() {return tblSub;} public Xop_xnde_tag TblSub_() {tblSub = true; return this;} private boolean tblSub; + public int Bgn_nde_mode() {return bgn_nde_mode;} private int bgn_nde_mode = Xop_xnde_tag_.Bgn_nde_mode_normal; + public Xop_xnde_tag Bgn_nde_mode_inline_() {bgn_nde_mode = Xop_xnde_tag_.Bgn_nde_mode_inline; return this;} + public int End_nde_mode() {return end_nde_mode;} private int end_nde_mode = Xop_xnde_tag_.End_nde_mode_normal; + public Xop_xnde_tag End_nde_mode_inline_() {end_nde_mode = Xop_xnde_tag_.End_nde_mode_inline; return this;} + public Xop_xnde_tag End_nde_mode_escape_() {end_nde_mode = Xop_xnde_tag_.End_nde_mode_escape; return this;} + public boolean Single_only() {return single_only;} public Xop_xnde_tag Single_only_() {single_only = true; return this;} private boolean single_only; + public boolean Tbl_sub() {return tbl_sub;} public Xop_xnde_tag Tbl_sub_() {tbl_sub = true; return this;} private boolean tbl_sub; public boolean Restricted() {return restricted;} public Xop_xnde_tag Restricted_() {restricted = true; return this;} private boolean restricted; - public boolean NoInline() {return noInline;} public Xop_xnde_tag NoInline_() {noInline = true; return this;} private boolean noInline; + public boolean No_inline() {return no_inline;} public Xop_xnde_tag No_inline_() {no_inline = true; return this;} private boolean no_inline; public boolean Inline_by_backslash() {return inline_by_backslash;} public Xop_xnde_tag Inline_by_backslash_() {inline_by_backslash = true; return this;} private boolean inline_by_backslash; public boolean Section() {return section;} public Xop_xnde_tag Section_() {section = true; return this;} private boolean section; public boolean Repeat_ends() {return repeat_ends;} public Xop_xnde_tag Repeat_ends_() {repeat_ends = true; return this;} private boolean repeat_ends; public boolean Repeat_mids() {return repeat_mids;} public Xop_xnde_tag Repeat_mids_() {repeat_mids = true; return this;} private boolean repeat_mids; public boolean Empty_ignored() {return empty_ignored;} public Xop_xnde_tag Empty_ignored_() {empty_ignored = true; return this;} private boolean empty_ignored; + public boolean Single_only_html() {return single_only_html;} public Xop_xnde_tag Single_only_html_() {single_only_html = true; return this;} private boolean single_only_html; public boolean Raw() {return raw;} public Xop_xnde_tag Raw_() {raw = true; return this;} private boolean raw; public static final byte Block_noop = 0, Block_bgn = 1, Block_end = 2; public byte Block_open() {return block_open;} private byte block_open = Block_noop; diff --git a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag_.java b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag_.java index f6640abfc..a3d41d149 100644 --- a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag_.java +++ b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag_.java @@ -18,12 +18,13 @@ along with this program. If not, see . package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.langs.*; public class Xop_xnde_tag_ { - public static final int EndNdeMode_normal = 0, EndNdeMode_inline = 1, EndNdeMode_escape = 2; // escape is for hr which does not support - public static final int BgnNdeMode_normal = 0, BgnNdeMode_inline = 1; + public static final int End_nde_mode_normal = 0, End_nde_mode_inline = 1, End_nde_mode_escape = 2; // escape is for hr which does not support + public static final int Bgn_nde_mode_normal = 0, Bgn_nde_mode_inline = 1; public static final byte[] Name_onlyinclude = Bry_.new_a7("onlyinclude"); - public static final byte[] XtnEndTag_bgn = Bry_.new_a7(""); - public static final byte - Tid_b = 0 + public static final byte[] Xtn_end_tag_bgn = Bry_.new_a7(""); + public static final int + Tid__null = -1 +, Tid_b = 0 , Tid_strong = 1 , Tid_i = 2 , Tid_em = 3 @@ -147,64 +148,64 @@ public class Xop_xnde_tag_ { return rv; } public static final Xop_xnde_tag - Tag_b = new_(Tid_b, "b").NoInline_() -, Tag_strong = new_(Tid_strong, "strong").NoInline_() -, Tag_i = new_(Tid_i, "i").NoInline_() -, Tag_em = new_(Tid_em, "em").NoInline_() -, Tag_cite = new_(Tid_cite, "cite").NoInline_() -, Tag_dfn = new_(Tid_dfn, "dfn").NoInline_() -, Tag_var = new_(Tid_var, "var").NoInline_() -, Tag_u = new_(Tid_u, "u").NoInline_().Repeat_ends_() // PAGE:en.b:Textbook_of_Psychiatry/Alcoholism_and_Psychoactive_Substance_Use_Disorders; DATE:2014-09-05 -, Tag_ins = new_(Tid_ins, "ins").NoInline_() -, Tag_abbr = new_(Tid_abbr, "abbr").NoInline_() -, Tag_strike = new_(Tid_strike, "strike").NoInline_() -, Tag_del = new_(Tid_del, "del").NoInline_() -, Tag_s = new_(Tid_s, "s").NoInline_() -, Tag_sub = new_(Tid_sub, "sub").NoInline_() -, Tag_sup = new_(Tid_sup, "sup").NoInline_() -, Tag_big = new_(Tid_big, "big").NoInline_() -, Tag_small = new_(Tid_small, "small").NoInline_() -, Tag_code = new_(Tid_code, "code").NoInline_().Repeat_ends_() -, Tag_tt = new_(Tid_tt, "tt").NoInline_().Repeat_ends_() -, Tag_kbd = new_(Tid_kbd, "kbd").NoInline_() -, Tag_samp = new_(Tid_samp, "samp").NoInline_() -, Tag_blockquote = new_(Tid_blockquote, "blockquote").NoInline_().Repeat_mids_().Section_().Block_open_bgn_().Block_close_end_() // NOTE: should be open_end_, but leaving for now; DATE:2014-03-11; added Repeat_mids_(); PAGE:en.w:Ring_a_Ring_o'_Roses DATE:2014-06-26 -, Tag_pre = new_(Tid_pre, "pre").NoInline_().Section_().Xtn_().Raw_().Block_open_bgn_().Block_close_end_().Ignore_empty_().Xtn_skips_template_args_() -, Tag_font = new_(Tid_font, "font").NoInline_() -, Tag_center = new_(Tid_center, "center").NoInline_().Block_open_end_().Block_close_end_() // removed .Repeat_ends_(); added Nest_(); EX: w:Burr Truss; DATE:2012-12-12 -, Tag_p = new_(Tid_p, "p").NoInline_().Section_().Block_open_bgn_().Block_close_end_() + Tag_b = new_(Tid_b, "b").No_inline_() +, Tag_strong = new_(Tid_strong, "strong").No_inline_() +, Tag_i = new_(Tid_i, "i").No_inline_() +, Tag_em = new_(Tid_em, "em").No_inline_() +, Tag_cite = new_(Tid_cite, "cite").No_inline_() +, Tag_dfn = new_(Tid_dfn, "dfn").No_inline_() +, Tag_var = new_(Tid_var, "var").No_inline_() +, Tag_u = new_(Tid_u, "u").No_inline_().Repeat_ends_() // PAGE:en.b:Textbook_of_Psychiatry/Alcoholism_and_Psychoactive_Substance_Use_Disorders; DATE:2014-09-05 +, Tag_ins = new_(Tid_ins, "ins").No_inline_() +, Tag_abbr = new_(Tid_abbr, "abbr").No_inline_() +, Tag_strike = new_(Tid_strike, "strike").No_inline_() +, Tag_del = new_(Tid_del, "del").No_inline_() +, Tag_s = new_(Tid_s, "s").No_inline_() +, Tag_sub = new_(Tid_sub, "sub").No_inline_() +, Tag_sup = new_(Tid_sup, "sup").No_inline_() +, Tag_big = new_(Tid_big, "big").No_inline_() +, Tag_small = new_(Tid_small, "small").No_inline_() +, Tag_code = new_(Tid_code, "code").No_inline_().Repeat_ends_() +, Tag_tt = new_(Tid_tt, "tt").No_inline_().Repeat_ends_() +, Tag_kbd = new_(Tid_kbd, "kbd").No_inline_() +, Tag_samp = new_(Tid_samp, "samp").No_inline_() +, Tag_blockquote = new_(Tid_blockquote, "blockquote").No_inline_().Repeat_mids_().Section_().Block_open_bgn_().Block_close_end_() // NOTE: should be open_end_, but leaving for now; DATE:2014-03-11; added Repeat_mids_(); PAGE:en.w:Ring_a_Ring_o'_Roses DATE:2014-06-26 +, Tag_pre = new_(Tid_pre, "pre").No_inline_().Section_().Xtn_().Raw_().Block_open_bgn_().Block_close_end_().Ignore_empty_().Xtn_skips_template_args_() +, Tag_font = new_(Tid_font, "font").No_inline_() +, Tag_center = new_(Tid_center, "center").No_inline_().Block_open_end_().Block_close_end_() // removed .Repeat_ends_(); added Nest_(); EX: w:Burr Truss; DATE:2012-12-12 +, Tag_p = new_(Tid_p, "p").No_inline_().Section_().Block_open_bgn_().Block_close_end_() , Tag_span = new_(Tid_span, "span").Section_() , Tag_div = new_(Tid_div, "div").Section_().Block_open_end_().Block_close_end_() -, Tag_hr = new_(Tid_hr, "hr").SingleOnly_().BgnNdeMode_inline_().Inline_by_backslash_().EndNdeMode_escape_().Section_().Block_close_end_() -, Tag_br = new_(Tid_br, "br").SingleOnly_().BgnNdeMode_inline_().Inline_by_backslash_().EndNdeMode_inline_().Section_() -, Tag_h1 = new_(Tid_h1, "h1").NoInline_().Section_().Block_open_bgn_().Block_close_end_() -, Tag_h2 = new_(Tid_h2, "h2").NoInline_().Section_().Block_open_bgn_().Block_close_end_() -, Tag_h3 = new_(Tid_h3, "h3").NoInline_().Section_().Block_open_bgn_().Block_close_end_() -, Tag_h4 = new_(Tid_h4, "h4").NoInline_().Section_().Block_open_bgn_().Block_close_end_() -, Tag_h5 = new_(Tid_h5, "h5").NoInline_().Section_().Block_open_bgn_().Block_close_end_() -, Tag_h6 = new_(Tid_h6, "h6").NoInline_().Section_().Block_open_bgn_().Block_close_end_() +, Tag_hr = new_(Tid_hr, "hr").Single_only_().Single_only_html_().Bgn_nde_mode_inline_().Inline_by_backslash_().End_nde_mode_escape_().Section_().Block_close_end_() +, Tag_br = new_(Tid_br, "br").Single_only_().Single_only_html_().Bgn_nde_mode_inline_().Inline_by_backslash_().End_nde_mode_inline_().Section_() +, Tag_h1 = new_(Tid_h1, "h1").No_inline_().Section_().Block_open_bgn_().Block_close_end_() +, Tag_h2 = new_(Tid_h2, "h2").No_inline_().Section_().Block_open_bgn_().Block_close_end_() +, Tag_h3 = new_(Tid_h3, "h3").No_inline_().Section_().Block_open_bgn_().Block_close_end_() +, Tag_h4 = new_(Tid_h4, "h4").No_inline_().Section_().Block_open_bgn_().Block_close_end_() +, Tag_h5 = new_(Tid_h5, "h5").No_inline_().Section_().Block_open_bgn_().Block_close_end_() +, Tag_h6 = new_(Tid_h6, "h6").No_inline_().Section_().Block_open_bgn_().Block_close_end_() , Tag_li = new_(Tid_li, "li").Repeat_mids_().Empty_ignored_().Block_open_bgn_().Block_close_end_() , Tag_dt = new_(Tid_dt, "dt").Repeat_mids_() , Tag_dd = new_(Tid_dd, "dd").Repeat_mids_() -, Tag_ol = new_(Tid_ol, "ol").NoInline_().Block_open_bgn_().Block_close_end_() -, Tag_ul = new_(Tid_ul, "ul").NoInline_().Block_open_bgn_().Block_close_end_() -, Tag_dl = new_(Tid_dl, "dl").NoInline_() -, Tag_table = new_(Tid_table, "table").NoInline_().Block_open_bgn_().Block_close_end_() -, Tag_tr = new_(Tid_tr, "tr").TblSub_().Block_open_bgn_().Block_open_end_() -, Tag_td = new_(Tid_td, "td").TblSub_().Block_open_end_().Block_close_bgn_() -, Tag_th = new_(Tid_th, "th").TblSub_().Block_open_end_().Block_close_bgn_() +, Tag_ol = new_(Tid_ol, "ol").No_inline_().Block_open_bgn_().Block_close_end_() +, Tag_ul = new_(Tid_ul, "ul").No_inline_().Block_open_bgn_().Block_close_end_() +, Tag_dl = new_(Tid_dl, "dl").No_inline_() +, Tag_table = new_(Tid_table, "table").No_inline_().Block_open_bgn_().Block_close_end_() +, Tag_tr = new_(Tid_tr, "tr").Tbl_sub_().Block_open_bgn_().Block_open_end_() +, Tag_td = new_(Tid_td, "td").Tbl_sub_().Block_open_end_().Block_close_bgn_() +, Tag_th = new_(Tid_th, "th").Tbl_sub_().Block_open_end_().Block_close_bgn_() , Tag_thead = new_(Tid_thead, "thead") , Tag_tfoot = new_(Tid_tfoot, "tfoot") , Tag_tbody = new_(Tid_tbody, "tbody") -, Tag_caption = new_(Tid_caption, "caption").NoInline_().TblSub_() +, Tag_caption = new_(Tid_caption, "caption").No_inline_().Tbl_sub_() , Tag_colgroup = new_(Tid_colgroup, "colgroup") , Tag_col = new_(Tid_col, "col") , Tag_a = new_(Tid_a, "a").Restricted_() -, Tag_img = new_(Tid_img, "img").Restricted_() // NOTE: was .Xtn() DATE:2014-11-06 -, Tag_ruby = new_(Tid_ruby, "ruby").NoInline_() -, Tag_rt = new_(Tid_rt, "rt").NoInline_() -, Tag_rb = new_(Tid_rb, "rb").NoInline_() -, Tag_rp = new_(Tid_rp, "rp").NoInline_() +, Tag_img = new_(Tid_img, "img").Single_only_html_().Restricted_() // NOTE: was .Xtn() DATE:2014-11-06 +, Tag_ruby = new_(Tid_ruby, "ruby").No_inline_() +, Tag_rt = new_(Tid_rt, "rt").No_inline_() +, Tag_rb = new_(Tid_rb, "rb").No_inline_() +, Tag_rp = new_(Tid_rp, "rp").No_inline_() , Tag_includeonly = new_(Tid_includeonly, "includeonly") , Tag_noinclude = new_(Tid_noinclude, "noinclude") , Tag_onlyinclude = new_(Tid_onlyinclude, "onlyinclude") @@ -245,8 +246,8 @@ public class Xop_xnde_tag_ { , Tag_bdi = new_(Tid_bdi, "bdi") , Tag_data = new_(Tid_data, "data") , Tag_mark = new_(Tid_mark, "mark") -, Tag_wbr = new_(Tid_wbr, "wbr").SingleOnly_() -, Tag_bdo = new_(Tid_bdo, "bdo").NoInline_().Section_().Block_open_bgn_().Block_close_end_() +, Tag_wbr = new_(Tid_wbr, "wbr").Single_only_().Single_only_html_() +, Tag_bdo = new_(Tid_bdo, "bdo").No_inline_().Section_().Block_open_bgn_().Block_close_end_() , Tag_listing_buy = new_(Tid_listing_buy, "buy").Xtn_mw_() , Tag_listing_do = new_(Tid_listing_do, "do").Xtn_mw_() , Tag_listing_drink = new_(Tid_listing_drink, "drink").Xtn_mw_() diff --git a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag_lang.java b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag_lang.java index 154d38747..87fa009ab 100644 --- a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag_lang.java +++ b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag_lang.java @@ -22,11 +22,11 @@ public class Xop_xnde_tag_lang { lang_code = Int_obj_ref.new_(lang_code_int); this.name_str = name_str; this.name_bry = Bry_.new_u8(name_str); - this.xtnEndTag_tmp = Bry_.Add(Xop_xnde_tag_.XtnEndTag_bgn, name_bry); + this.xtnEndTag_tmp = Bry_.Add(Xop_xnde_tag_.Xtn_end_tag_bgn, name_bry); } public Int_obj_ref Lang_code() {return lang_code;} private Int_obj_ref lang_code; public String Name_str() {return name_str;} private String name_str; public byte[] Name_bry() {return name_bry;} private byte[] name_bry; - public byte[] XtnEndTag_tmp() {return xtnEndTag_tmp;} private byte[] xtnEndTag_tmp; + public byte[] Xtn_end_tag_tmp() {return xtnEndTag_tmp;} private byte[] xtnEndTag_tmp; public static final Xop_xnde_tag_lang _ = new Xop_xnde_tag_lang(-1, String_.Empty); } diff --git a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tkn.java b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tkn.java index 320944e6a..4b4999e3b 100644 --- a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tkn.java +++ b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tkn.java @@ -106,7 +106,7 @@ public class Xop_xnde_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn { this.Subs_get(i).Tmpl_evaluate(ctx, src, caller, bfr); bfr.Add_mid(src, tag_close_bgn, tag_close_end); // write tag_end if (tag_close_bgn == Int_.Min_value) {// xtn is unclosed; add a else rest of page will be gobbled; PAGE:en.w:Provinces_and_territories_of_Canada DATE:2014-11-13 - bfr.Add(tag.XtnEndTag()); + bfr.Add(tag.Xtn_end_tag()); bfr.Add(Byte_ascii.Gt_bry); } } diff --git a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_wkr.java b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_wkr.java index 876c61797..afb24dc46 100644 --- a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_wkr.java +++ b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_wkr.java @@ -71,10 +71,10 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space: ++atrs_bgn_pos; // set bgn_pos to be after ws break; - case Byte_ascii.Slash: case Byte_ascii.Gt: + case Byte_ascii.Slash: case Byte_ascii.Angle_end: ++atrs_bgn_pos; // set bgn_pos to be after char break; - case Byte_ascii.Backslash: + case Byte_ascii.Backslash: // NOTE: MW treats \ as /; EX: " -> "
++tag_end_pos; break; case Byte_ascii.Dollar:// handles ; @@ -246,7 +246,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { } } int end_rhs = -1, findPos = gtPos; - byte[] end_bry = Xop_xnde_tag_.Tag_noinclude.XtnEndTag(); int end_bry_len = end_bry.length; + byte[] end_bry = Xop_xnde_tag_.Tag_noinclude.Xtn_end_tag(); int end_bry_len = end_bry.length; if (tag_is_closing) //
; no end tag to search for; DATE:2014-05-02 end_rhs = gtPos; else { // ; search for end tag @@ -281,7 +281,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { break; case Byte_ascii.Backslash: // allow ; EX:w:Mosquito if (tag.Inline_by_backslash()) - src[tag_end_pos] = Byte_ascii.Slash; + src[tag_end_pos] = Byte_ascii.Slash; break; case Byte_ascii.Gt: // ">" "normal" tag; noop break; @@ -323,7 +323,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { boolean tag_ignore = false; int tagId = tag.Id(); - if (tagId == Xop_xnde_tag_.Tid_table || tag.TblSub()) { // tbl tag; EX: ,,
, + if (tagId == Xop_xnde_tag_.Tid_table || tag.Tbl_sub()) { // tbl tag; EX: ,,
, Tblw_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, gtPos + 1, tagId, atrs_bgn, atrs_end); return gtPos + 1; } @@ -338,8 +338,8 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { else if (tagId == prv_xnde_tagId && tag.Repeat_mids()) { // EX: "
  • a
  • b" -> "
  • a
  • b" End_tag(ctx, root, prv_xnde, src, src_len, bgn_pos - 1, bgn_pos - 1, tagId, true, tag); } - else if (tag.SingleOnly()) inline = true; //

    not allowed; convert
    to

    will be escaped - else if (tag.NoInline() && inline) { + else if (tag.Single_only()) inline = true; //

    not allowed; convert
    to

    will be escaped + else if (tag.No_inline() && inline) { Xop_xnde_tkn xnde_inline = Xnde_bgn(ctx, tkn_mkr, root, tag, Xop_xnde_tkn.CloseMode_open, src, bgn_pos, open_tag_end, atrs_bgn, atrs_end, atrs); End_tag(ctx, root, xnde_inline, src, src_len, bgn_pos, gtPos, tagId, false, tag); ctx.Msg_log().Add_itm_none(Xop_xnde_log.No_inline, src, bgn_pos, gtPos); @@ -347,7 +347,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { } Xop_xnde_tkn xnde = null; xnde = Xnde_bgn(ctx, tkn_mkr, root, tag, inline ? Xop_xnde_tkn.CloseMode_inline : Xop_xnde_tkn.CloseMode_open, src, bgn_pos, open_tag_end, atrs_bgn, atrs_end, atrs); - if (!inline && tag.BgnNdeMode() != Xop_xnde_tag_.BgnNdeMode_inline) + if (!inline && tag.Bgn_nde_mode() != Xop_xnde_tag_.Bgn_nde_mode_inline) ctx.Stack_add(xnde); if (tag_ignore) xnde.Tag_visible_(false); @@ -414,7 +414,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { Xop_xnde_tkn bgn_nde = (Xop_xnde_tkn)ctx.Stack_get(prv_xnde_pos); int bgn_tag_id = bgn_nde == null ? -1 : bgn_nde.Tag().Id(); - int end_nde_mode = end_tag.EndNdeMode(); + int end_nde_mode = end_tag.End_nde_mode(); boolean force_end_tag_to_match_bgn_tag = false; switch (bgn_tag_id) { case Xop_xnde_tag_.Tid_sub: if (end_tag_id == Xop_xnde_tag_.Tid_sup) force_end_tag_to_match_bgn_tag = true; break; @@ -426,7 +426,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { end_tag_id = bgn_tag_id; ctx.Msg_log().Add_itm_none(Xop_xnde_log.Sub_sup_swapped, src, bgn_pos, cur_pos); } - if (end_tag_id == Xop_xnde_tag_.Tid_table || end_tag.TblSub()) { + if (end_tag_id == Xop_xnde_tag_.Tid_table || end_tag.Tbl_sub()) { Tblw_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, end_tag_id); return cur_pos; } @@ -437,10 +437,10 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { return cur_pos; } switch (end_nde_mode) { - case Xop_xnde_tag_.EndNdeMode_inline: // PATCH.WP: allows
    ,
    and many other variants + case Xop_xnde_tag_.End_nde_mode_inline: // PATCH.WP: allows
    ,
    and many other variants Xnde_bgn(ctx, tkn_mkr, root, end_tag, Xop_xnde_tkn.CloseMode_inline, src, bgn_pos, cur_pos, Int_.Min_value, Int_.Min_value, null); // NOTE: atrs is null b/c
    will never have atrs return cur_pos; - case Xop_xnde_tag_.EndNdeMode_escape: // handle + case Xop_xnde_tag_.End_nde_mode_escape: // handle ctx.Lxr_make_(false); ctx.Msg_log().Add_itm_none(Xop_xnde_log.Escaped_xnde, src, bgn_pos, cur_pos - 1); return cur_pos; @@ -576,13 +576,13 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { xnde.Tag_close_rng_(open_end, open_end); // NOTE: inline tag, so set TagClose to open_end; should noop } else { - byte[] close_bry = tag.XtnEndTag_tmp(); // get tmp bry (so as not to new) + byte[] close_bry = tag.Xtn_end_tag_tmp(); // get tmp bry (so as not to new) if (tag.Langs() != null) { // cur tag has langs; EX:
    ; DATE:2014-07-18 Xop_xnde_tag_lang tag_lang = tag.Langs_get(ctx.Lang().Case_mgr(), ctx.Cur_page().Lang().Lang_id(), src, name_bgn, name_end); if (tag_lang == null) // tag does not match lang; EX: and lang=de; return ctx.Lxr_make_txt_(open_end); if (tag_lang != Xop_xnde_tag_lang._) // tag matches; note Xop_xnde_tag_lang._ is a wildcard match; EX:
    - close_bry = tag_lang.XtnEndTag_tmp(); + close_bry = tag_lang.Xtn_end_tag_tmp(); } int src_offset = open_bgn - 1; // open bgn to start at <; -2 to ignore 0 - && !Enm_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys) + && !Bitmask_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys) ) { json_util.Reindex_arrays(reindex_data, itm_as_kvy, true); if (reindex_data.Rv_is_kvy()) { @@ -107,12 +107,12 @@ public class Scrib_lib_text implements Scrib_lib { byte[] json = args.Pull_bry(0); int flags = args.Cast_int_or(1, 0); int opts = Scrib_lib_text__json_util.Opt__force_assoc; - if (Enm_.Has_int(flags, Scrib_lib_text__json_util.Flag__try_fixing)) - opts = Enm_.Add_int(opts, Scrib_lib_text__json_util.Flag__try_fixing); + if (Bitmask_.Has_int(flags, Scrib_lib_text__json_util.Flag__try_fixing)) + opts = Bitmask_.Add_int(opts, Scrib_lib_text__json_util.Flag__try_fixing); synchronized (procs) { byte rv_tid = json_util.Decode(core.App().Utl__json_parser(), json, opts); if (rv_tid == Bool_.__byte) throw Err_.new_("scribunto", "mw.text.jsonEncode: Unable to decode String " + String_.new_u8(json)); - if (rv_tid == Bool_.Y_byte && !(Enm_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys))) { + if (rv_tid == Bool_.Y_byte && !(Bitmask_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys))) { KeyVal[] rv_as_kvy = (KeyVal[])json_util.Decode_rslt_as_nde(); synchronized (reindex_data) { json_util.Reindex_arrays(reindex_data, rv_as_kvy, false); diff --git a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_wikibase_srl_tst.java b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_wikibase_srl_tst.java index 66d56a653..f130bd911 100644 --- a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_wikibase_srl_tst.java +++ b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_wikibase_srl_tst.java @@ -230,7 +230,7 @@ public class Scrib_lib_wikibase_srl_tst { ); } @Test public void Claims_time() { - fxt.Init_prop(fxt.Wdata_fxt().Make_claim_time(2, "2001-02-03 04:05:06")); + fxt.Init_prop(fxt.Wdata_fxt().Make_claim_time(2, "2001-02-03 04:05:06", 9)); fxt.Test ( "claims:" , " P2:" @@ -241,7 +241,7 @@ public class Scrib_lib_wikibase_srl_tst { , " type:'time'" , " value:" , " time:'+00000002001-02-03T04:05:06Z'" - , " precision:'11'" + , " precision:'9'" , " before:'0'" , " after:'0'" , " timezone:'0'" @@ -341,7 +341,7 @@ public class Scrib_lib_wikibase_srl_tst { , " type:'time'" , " value:" , " time:'+00000002001-02-03T04:05:06Z'" - , " precision:'11'" + , " precision:'14'" , " before:'0'" , " after:'0'" , " timezone:'0'" diff --git a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_wikibase_srl_visitor.java b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_wikibase_srl_visitor.java index c6f59674e..274d680c6 100644 --- a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_wikibase_srl_visitor.java +++ b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_wikibase_srl_visitor.java @@ -68,10 +68,10 @@ class Scrib_lib_wikibase_srl_visitor implements Wdata_claim_visitor { private static KeyVal[] Time_value(Wdata_claim_itm_time itm) { KeyVal[] rv = new KeyVal[6]; rv[0] = KeyVal_.new_(Wdata_dict_value_time.Str_time , String_.new_a7(itm.Time())); - rv[1] = KeyVal_.new_(Wdata_dict_value_time.Str_precision , Wdata_dict_value_time.Val_precision_int); // NOTE: must return int, not str; DATE:2014-02-18 - rv[2] = KeyVal_.new_(Wdata_dict_value_time.Str_before , Wdata_dict_value_time.Val_before_int); - rv[3] = KeyVal_.new_(Wdata_dict_value_time.Str_after , Wdata_dict_value_time.Val_after_int); - rv[4] = KeyVal_.new_(Wdata_dict_value_time.Str_timezone , Wdata_dict_value_time.Val_timezone_str); + rv[1] = KeyVal_.new_(Wdata_dict_value_time.Str_precision , itm.Precision_int()); // NOTE: must return int, not str; DATE:2014-02-18 + rv[2] = KeyVal_.new_(Wdata_dict_value_time.Str_before , itm.Before_int()); + rv[3] = KeyVal_.new_(Wdata_dict_value_time.Str_after , itm.After_int()); + rv[4] = KeyVal_.new_(Wdata_dict_value_time.Str_timezone , Wdata_dict_value_time.Val_timezone_str); // ASSUME: always 0 b/c UTF?; DATE:2015-09-21 rv[5] = KeyVal_.new_(Wdata_dict_value_time.Str_calendarmodel , Wdata_dict_value_time.Val_calendarmodel_str); return rv; } @@ -80,13 +80,13 @@ class Scrib_lib_wikibase_srl_visitor implements Wdata_claim_visitor { rv[0] = KeyVal_.new_(Scrib_lib_wikibase_srl.Key_type, Wdata_dict_val_tid.Str_globecoordinate); rv[1] = KeyVal_.new_(Scrib_lib_wikibase_srl.Key_value, Globecoordinate_value(itm)); } - private static KeyVal[] Globecoordinate_value(Wdata_claim_itm_globecoordinate itm) { + private static KeyVal[] Globecoordinate_value(Wdata_claim_itm_globecoordinate itm) { KeyVal[] rv = new KeyVal[5]; rv[0] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_latitude , Double_.parse(String_.new_a7(itm.Lat()))); rv[1] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_longitude , Double_.parse(String_.new_a7(itm.Lng()))); - rv[2] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_altitude , null); - rv[3] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_globe , Wdata_dict_value_globecoordinate.Val_globe_dflt_str); - rv[4] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_precision , .00001d); + rv[2] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_altitude , String_.new_u8(itm.Alt())); + rv[3] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_globe , String_.new_u8(itm.Glb())); + rv[4] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_precision , itm.Prc_as_num().To_double()); return rv; } public void Visit_system(Wdata_claim_itm_system itm) { diff --git a/400_xowa/src/gplx/xowa/xtns/wdatas/Wdata_wiki_mgr_fxt.java b/400_xowa/src/gplx/xowa/xtns/wdatas/Wdata_wiki_mgr_fxt.java index aaa65cc16..02f57df2d 100644 --- a/400_xowa/src/gplx/xowa/xtns/wdatas/Wdata_wiki_mgr_fxt.java +++ b/400_xowa/src/gplx/xowa/xtns/wdatas/Wdata_wiki_mgr_fxt.java @@ -57,7 +57,7 @@ public class Wdata_wiki_mgr_fxt { public Wdata_claim_itm_core Make_claim_quantity(int pid, String amount, String unit, String ubound, String lbound) {return new Wdata_claim_itm_quantity(pid, Wdata_dict_snak_tid.Tid_value, Bry_.new_a7(amount), Bry_.new_a7(unit), Bry_.new_a7(ubound), Bry_.new_a7(lbound));} public Wdata_claim_itm_core Make_claim_entity_qid(int pid, int val) {return new Wdata_claim_itm_entity(pid, Wdata_dict_snak_tid.Tid_value, Wdata_dict_value_entity_tid.Tid_item, Int_.Xto_bry(val));} public Wdata_claim_itm_core Make_claim_entity_pid(int pid, int val) {return new Wdata_claim_itm_entity(pid, Wdata_dict_snak_tid.Tid_value, Wdata_dict_value_entity_tid.Tid_property, Int_.Xto_bry(val));} - public Wdata_claim_itm_core Make_claim_geo(int pid, String lon, String lat) {return Make_claim_geo(pid, lon, lat, ".000277777", null, "Q2");} + public Wdata_claim_itm_core Make_claim_geo(int pid, String lon, String lat) {return Make_claim_geo(pid, lon, lat, ".00001", null, "http://www.wikidata.org/entity/Q2");} public Wdata_claim_itm_core Make_claim_geo(int pid, String lon, String lat, String prc, String alt, String glb) { return new Wdata_claim_itm_globecoordinate(pid, Wdata_dict_snak_tid.Tid_value, Bry_.new_a7(lat), Bry_.new_a7(lon), Bry_.new_a7(alt), Bry_.new_a7(prc), Bry_.new_a7(glb)); } diff --git a/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-file-user.xowa b/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-file-user.xowa index 0ee4f3a4d..4e61025a6 100644 Binary files a/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-file-user.xowa and b/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-file-user.xowa differ diff --git a/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-file.xowa b/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-file.xowa index 08867a5c4..1bb26d403 100644 Binary files a/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-file.xowa and b/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-file.xowa differ diff --git a/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-text.xowa b/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-text.xowa index b36d6e6d4..3fd530837 100644 Binary files a/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-text.xowa and b/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-text.xowa differ