mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
v2.9.4.1
This commit is contained in:
parent
fa70c05354
commit
8e18af05b6
40
100_core/src_120_basicDataType/gplx/Bitmask_.java
Normal file
40
100_core/src_120_basicDataType/gplx/Bitmask_.java
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx;
|
||||
public class Bitmask_ {
|
||||
public static boolean Has_int(int val, int find) {return find == (val & find);}
|
||||
public static int Flip_int(boolean enable, int val, int find) {
|
||||
boolean has = find == (val & find);
|
||||
return (has ^ enable) ? val ^ find : val;
|
||||
}
|
||||
public static int Add_int(int lhs, int rhs) {return lhs | rhs;}
|
||||
public static int Add_int_ary(int... ary) {
|
||||
int rv = 0;
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
int itm = ary[i];
|
||||
if (rv == 0)
|
||||
rv = itm;
|
||||
else
|
||||
rv = Flip_int(true, rv, itm);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static boolean Has_byte(byte val, byte find) {return find == (val & find);}
|
||||
public static byte Add_byte(byte flag, byte itm) {return (byte)(flag | itm);}
|
||||
}
|
@ -18,25 +18,5 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx;
|
||||
public class Enm_ {
|
||||
public static int To_int(Object enm) {return Ordinal_lang(enm);}
|
||||
public static boolean Has_int(int val, int find) {return find == (val & find);}
|
||||
public static int Add_int(int lhs, int rhs) {return lhs | rhs;}
|
||||
public static int Add_int_ary(int... ary) {
|
||||
int rv = 0;
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
int itm = ary[i];
|
||||
if (rv == 0)
|
||||
rv = itm;
|
||||
else
|
||||
rv = Flip_int(true, rv, itm);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static int Flip_int(boolean enable, int val, int find) {
|
||||
boolean has = find == (val & find);
|
||||
return (has ^ enable) ? val ^ find : val;
|
||||
}
|
||||
public static boolean Has_byte(byte val, byte find) {return find == (val & find);}
|
||||
public static byte Add_byte(byte flag, byte itm) {return (byte)(flag | itm);}
|
||||
private static int Ordinal_lang(Object v) {return ((Enum)v).ordinal();}
|
||||
}
|
||||
|
@ -162,6 +162,21 @@ public class Bry_find_ {
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static int Find_bwd__skip_ws(byte[] src, int end, int bgn) {
|
||||
int src_len = src.length;
|
||||
if (end == src_len) return end;
|
||||
if (end > src_len || end < 0) return Bry_find_.Not_found;
|
||||
int pos = end - 1; // start from end - 1; handles situations where len is passed in
|
||||
for (int i = pos; i >= bgn; --i) {
|
||||
switch (src[i]) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
|
||||
break;
|
||||
default:
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
return bgn;
|
||||
}
|
||||
public static int Find_bwd_while(byte[] src, int cur, int end, byte while_byte) {
|
||||
--cur;
|
||||
while (true) {
|
||||
@ -295,6 +310,7 @@ public class Bry_find_ {
|
||||
}
|
||||
public static int Find_bwd_while_alphanum(byte[] src, int cur) {return Find_bwd_while_alphanum(src, cur, -1);}
|
||||
public static int Find_bwd_while_alphanum(byte[] src, int cur, int end) {
|
||||
--cur;
|
||||
while (cur > end) {
|
||||
switch (src[cur]) {
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
|
@ -23,16 +23,17 @@ public class Bry_split_ {
|
||||
public static byte[][] Split(byte[] src, byte dlm, boolean trim) {
|
||||
synchronized (thread_lock) {
|
||||
Bry_split_wkr__to_ary wkr = Bry_split_wkr__to_ary.I;
|
||||
Split(src, dlm, trim, wkr);
|
||||
Split(src, 0, src == null ? 0 : src.length, dlm, trim, wkr);
|
||||
return wkr.To_ary();
|
||||
}
|
||||
}
|
||||
public static void Split(byte[] src, byte dlm, boolean trim, Bry_split_wkr wkr) {
|
||||
if (src == null) return;
|
||||
int src_len = src.length, pos = 0; if (src_len == 0) return;
|
||||
public static int Split(byte[] src, int src_bgn, int src_end, byte dlm, boolean trim, Bry_split_wkr wkr) {
|
||||
if (src == null || src_end - src_bgn < 1) return 0;
|
||||
int pos = src_bgn;
|
||||
int itm_bgn = -1, itm_end = -1;
|
||||
int count = 0;
|
||||
while (true) {
|
||||
boolean pos_is_last = pos == src_len;
|
||||
boolean pos_is_last = pos == src_end;
|
||||
byte b = pos_is_last ? dlm : src[pos];
|
||||
int nxt_pos = pos + 1;
|
||||
boolean process = true;
|
||||
@ -51,9 +52,9 @@ public class Bry_split_ {
|
||||
else {
|
||||
int rv = wkr.Split(src, itm_bgn, itm_end);
|
||||
switch (rv) {
|
||||
case Rv__ok: break;
|
||||
case Rv__ok: ++count; break;
|
||||
case Rv__extend: reset = false; break;
|
||||
case Rv__cancel: pos_is_last = true; break;
|
||||
case Rv__cancel: return count;
|
||||
default: throw Err_.new_unhandled(rv);
|
||||
}
|
||||
}
|
||||
@ -67,6 +68,7 @@ public class Bry_split_ {
|
||||
if (pos_is_last) break;
|
||||
pos = nxt_pos;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
public static byte[][] Split(byte[] src, byte[] dlm) {
|
||||
if (Bry_.Len_eq_0(src)) return Bry_.Ary_empty;
|
||||
@ -115,12 +117,16 @@ public class Bry_split_ {
|
||||
class Bry_split_wkr__to_ary implements gplx.core.brys.Bry_split_wkr {
|
||||
private final List_adp list = List_adp_.new_();
|
||||
public int Split(byte[] src, int itm_bgn, int itm_end) {
|
||||
byte[] bry = itm_end == itm_bgn ? Bry_.Empty : Bry_.Mid(src, itm_bgn, itm_end);
|
||||
list.Add(bry);
|
||||
return Bry_split_.Rv__ok;
|
||||
synchronized (list) {
|
||||
byte[] bry = itm_end == itm_bgn ? Bry_.Empty : Bry_.Mid(src, itm_bgn, itm_end);
|
||||
list.Add(bry);
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
}
|
||||
public byte[][] To_ary() {
|
||||
return (byte[][])list.To_ary_and_clear(byte[].class);
|
||||
synchronized (list) {
|
||||
return (byte[][])list.To_ary_and_clear(byte[].class);
|
||||
}
|
||||
}
|
||||
public static final Bry_split_wkr__to_ary I = new Bry_split_wkr__to_ary(); Bry_split_wkr__to_ary() {}
|
||||
}
|
||||
|
@ -35,11 +35,30 @@ public class Bry_split__tst {
|
||||
fxt.Test_Split(" a b | c d " , Byte_ascii.Pipe, Bool_.Y, "a b", "c d");
|
||||
fxt.Test_Split(" a \n b " , Byte_ascii.Nl , Bool_.N, " a ", " b "); // ws as dlm
|
||||
fxt.Test_Split(" a \n b " , Byte_ascii.Nl , Bool_.Y, "a", "b"); // ws as dlm; trim
|
||||
fxt.Test_Split("a|extend|b" , Byte_ascii.Pipe, Bool_.Y, "a", "extend|b"); // extend
|
||||
fxt.Test_Split("extend|a" , Byte_ascii.Pipe, Bool_.Y, "extend|a"); // extend
|
||||
fxt.Test_Split("a|cancel|b" , Byte_ascii.Pipe, Bool_.Y, "a"); // cancel
|
||||
}
|
||||
}
|
||||
class Bry_split__fxt {
|
||||
private final Bry_split_wkr__example wkr = new Bry_split_wkr__example();
|
||||
public void Test_Split(String raw_str, byte dlm, boolean trim, String... expd) {
|
||||
byte[][] actl_ary = Bry_split_.Split(Bry_.new_a7(raw_str), dlm, trim);
|
||||
byte[] src = Bry_.new_a7(raw_str);
|
||||
Bry_split_.Split(src, 0, src.length, dlm, trim, wkr);
|
||||
byte[][] actl_ary = wkr.To_ary();
|
||||
Tfds.Eq_ary_str(expd, String_.Ary(actl_ary));
|
||||
}
|
||||
}
|
||||
class Bry_split_wkr__example implements gplx.core.brys.Bry_split_wkr {
|
||||
private final List_adp list = List_adp_.new_();
|
||||
public int Split(byte[] src, int itm_bgn, int itm_end) {
|
||||
byte[] bry = itm_end == itm_bgn ? Bry_.Empty : Bry_.Mid(src, itm_bgn, itm_end);
|
||||
if (Bry_.Eq(bry, Bry_.new_a7("extend"))) return Bry_split_.Rv__extend;
|
||||
else if (Bry_.Eq(bry, Bry_.new_a7("cancel"))) return Bry_split_.Rv__cancel;
|
||||
list.Add(bry);
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
public byte[][] To_ary() {
|
||||
return (byte[][])list.To_ary_and_clear(byte[].class);
|
||||
}
|
||||
}
|
||||
|
@ -50,6 +50,13 @@ public class List_adp_ {
|
||||
list.Del_at(last_idx);
|
||||
return rv;
|
||||
}
|
||||
public static Object Pop_or(List_adp list, Object or) {
|
||||
int list_len = list.Count(); if (list_len == 0) return or;
|
||||
int last_idx = list_len - 1;
|
||||
Object rv = list.Get_at(last_idx);
|
||||
list.Del_at(last_idx);
|
||||
return rv;
|
||||
}
|
||||
public static void DisposeAll(List_adp list) {
|
||||
for (int i = 0; i < list.Count(); i++)
|
||||
((RlsAble)list.Get_at(i)).Rls();
|
||||
|
@ -138,7 +138,7 @@ public abstract class List_adp_base implements List_adp, GfoInvkAble {
|
||||
public String To_str() {
|
||||
Bry_bfr bfr = Bry_bfr.new_();
|
||||
for (int i = 0; i < count; ++i)
|
||||
bfr.Add_obj(list[i]);
|
||||
bfr.Add_str_u8(Object_.Xto_str_strict_or_null_mark(list[i])).Add_byte_nl();
|
||||
return bfr.Xto_str_and_clear();
|
||||
}
|
||||
private void BoundsChk(int bgn, int end, int len) {
|
||||
|
@ -166,6 +166,7 @@ public class Tfds { // URL:doc/gplx.tfds/Tfds.txt
|
||||
private static final DateAdp time0 = DateAdp_.parse_gplx("2001-01-01 00:00:00.000");
|
||||
private static DateAdp nowTime; // NOTE: cannot set to time0 due to static initialization;
|
||||
public static void WriteText(String text) {Console_adp__sys.I.Write_str(text);}
|
||||
public static void Write(byte[] s, int b, int e) {Write(Bry_.Mid(s, b, e));}
|
||||
public static void Write() {Write("tmp");}
|
||||
public static void Write(Object... ary) {
|
||||
String_bldr sb = String_bldr_.new_();
|
||||
|
@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.gfui; import gplx.*;
|
||||
public class GfuiBorderEdge {
|
||||
public int Val() {return val;} int val;
|
||||
public boolean Has(GfuiBorderEdge comp) {return Enm_.Has_int(val, comp.val);}
|
||||
public boolean Has(GfuiBorderEdge comp) {return Bitmask_.Has_int(val, comp.val);}
|
||||
public GfuiBorderEdge Add(GfuiBorderEdge comp) {
|
||||
return new GfuiBorderEdge(comp.val + val);
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ public class IptEventType_ {
|
||||
if (ary.length == 0) return IptEventType_.None;
|
||||
int newVal = ary[0].Val();
|
||||
for (int i = 1; i < ary.length; i++)
|
||||
newVal = Enm_.Flip_int(true, newVal, ary[i].Val());
|
||||
newVal = Bitmask_.Flip_int(true, newVal, ary[i].Val());
|
||||
return getOrNew_(newVal);
|
||||
}
|
||||
static IptEventType getOrNew_(int v) {
|
||||
@ -46,7 +46,7 @@ public class IptEventType_ {
|
||||
}
|
||||
@gplx.Internal protected static boolean Has(IptEventType val, IptEventType find) {
|
||||
if (find == IptEventType_.None && val != IptEventType_.None) return false; // check .None manually b/c 0 is identity when BitShifting
|
||||
return Enm_.Has_int(val.Val(), find.Val());
|
||||
return Bitmask_.Has_int(val.Val(), find.Val());
|
||||
}
|
||||
public static IptEventType default_(IptArg[] args) {
|
||||
IptEventType rv = IptEventType_.None;
|
||||
|
@ -23,7 +23,7 @@ public class IptKey implements IptArg {
|
||||
public boolean Eq(IptArg comp) {return String_.Eq(key, comp.Key());}
|
||||
public String XtoUiStr() {return IptKeyStrMgr._.To_str(this);}
|
||||
public IptKey Add(IptKey comp) {return IptKey_.add_(this, comp);}
|
||||
public boolean Mod_shift() {return Enm_.Has_int(val, IptKey_.Shift.Val());}
|
||||
public boolean Mod_ctrl() {return Enm_.Has_int(val, IptKey_.Ctrl.Val());}
|
||||
public boolean Mod_alt() {return Enm_.Has_int(val, IptKey_.Alt.Val());}
|
||||
public boolean Mod_shift() {return Bitmask_.Has_int(val, IptKey_.Shift.Val());}
|
||||
public boolean Mod_ctrl() {return Bitmask_.Has_int(val, IptKey_.Ctrl.Val());}
|
||||
public boolean Mod_alt() {return Bitmask_.Has_int(val, IptKey_.Alt.Val());}
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ public class IptKey_ {
|
||||
if (ary.length == 0) return IptKey_.None;
|
||||
int newVal = ary[0].Val();
|
||||
for (int i = 1; i < ary.length; i++)
|
||||
newVal = Enm_.Flip_int(true, newVal, ary[i].Val());
|
||||
newVal = Bitmask_.Flip_int(true, newVal, ary[i].Val());
|
||||
return get_or_new_(newVal);
|
||||
}
|
||||
public static IptKey api_(int val) {
|
||||
@ -138,9 +138,9 @@ public class IptKey_ {
|
||||
}
|
||||
public static String To_str(int val) {
|
||||
String mod_str = "", rv = "";
|
||||
boolean mod_c = Enm_.Has_int(val, IptKey_.Ctrl.Val()); if (mod_c) {mod_str += "c"; val = Enm_.Flip_int(Bool_.N, val, IptKey_.Ctrl.Val());}
|
||||
boolean mod_a = Enm_.Has_int(val, IptKey_.Alt.Val()); if (mod_a) {mod_str += "a"; val = Enm_.Flip_int(Bool_.N, val, IptKey_.Alt.Val());}
|
||||
boolean mod_s = Enm_.Has_int(val, IptKey_.Shift.Val()); if (mod_s) {mod_str += "s"; val = Enm_.Flip_int(Bool_.N, val, IptKey_.Shift.Val());}
|
||||
boolean mod_c = Bitmask_.Has_int(val, IptKey_.Ctrl.Val()); if (mod_c) {mod_str += "c"; val = Bitmask_.Flip_int(Bool_.N, val, IptKey_.Ctrl.Val());}
|
||||
boolean mod_a = Bitmask_.Has_int(val, IptKey_.Alt.Val()); if (mod_a) {mod_str += "a"; val = Bitmask_.Flip_int(Bool_.N, val, IptKey_.Alt.Val());}
|
||||
boolean mod_s = Bitmask_.Has_int(val, IptKey_.Shift.Val()); if (mod_s) {mod_str += "s"; val = Bitmask_.Flip_int(Bool_.N, val, IptKey_.Shift.Val());}
|
||||
if (String_.Len_gt_0(mod_str)) {
|
||||
rv = "mod." + mod_str;
|
||||
if (val == 0) return rv; // handle modifiers only, like "mod.cs"; else will be "mod.cs+key.#0"
|
||||
|
@ -30,7 +30,7 @@ class GfuiWinKeyCmdMgr implements GfuiWinOpenAble, GfoInvkAble, GfoEvObj {
|
||||
int keyVal = iptData.Key().Val();
|
||||
GfuiElem sender = GfuiElem_.as_(iptData.Sender());
|
||||
if (GfuiTextBox_.as_(sender) != null // is sender textBox?
|
||||
&& !Enm_.Has_int(keyVal, IptKey_.Alt.Val()) // does key not have alt
|
||||
&& !Bitmask_.Has_int(keyVal, IptKey_.Alt.Val()) // does key not have alt
|
||||
) return false; // ignore keys from textbox if they do not have alt
|
||||
List_adp elemList = (List_adp)listHash.Get_by(keyVal); if (elemList == null) return false;
|
||||
for (int i = 0; i < elemList.Count(); i++) {
|
||||
|
@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.gfui;
|
||||
import gplx.Bitmask_;
|
||||
import gplx.Byte_ascii;
|
||||
import gplx.Enm_;
|
||||
import gplx.GfoEvMgr_;
|
||||
@ -108,12 +109,12 @@ class Swt_lnr_key implements KeyListener {
|
||||
case 327680: val = IptKey_.Insert.Val(); break;
|
||||
}
|
||||
if (Has_ctrl(ev.stateMask)) val |= IptKey_.KeyCode_Ctrl;
|
||||
if (Enm_.Has_int(ev.stateMask, IptKey_.KeyCode_Shift)) val |= IptKey_.KeyCode_Alt;
|
||||
if (Enm_.Has_int(ev.stateMask, IptKey_.KeyCode_Ctrl)) val |= IptKey_.KeyCode_Shift;
|
||||
if (Bitmask_.Has_int(ev.stateMask, IptKey_.KeyCode_Shift)) val |= IptKey_.KeyCode_Alt;
|
||||
if (Bitmask_.Has_int(ev.stateMask, IptKey_.KeyCode_Ctrl)) val |= IptKey_.KeyCode_Shift;
|
||||
// Tfds.Write(String_.Format("val={4} keyCode={0} stateMask={1} keyLocation={2} character={3}", ev.keyCode, ev.stateMask, ev.keyLocation, ev.character, val));
|
||||
return IptEvtDataKey.int_(val);
|
||||
}
|
||||
public static boolean Has_ctrl(int val) {return Enm_.Has_int(val, IptKey_.KeyCode_Alt);} // NOTE:SWT's ctrl constant is different from SWING's
|
||||
public static boolean Has_ctrl(int val) {return Bitmask_.Has_int(val, IptKey_.KeyCode_Alt);} // NOTE:SWT's ctrl constant is different from SWING's
|
||||
}
|
||||
class Swt_lnr_mouse implements MouseListener {
|
||||
public Swt_lnr_mouse(GxwElem elem) {this.elem = elem;} GxwElem elem;
|
||||
|
@ -18,19 +18,25 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.core.primitives; import gplx.*; import gplx.core.*;
|
||||
public class Int_pool {
|
||||
private final List_adp available_list = List_adp_.new_(); private int available_len;
|
||||
// private final Bry_bfr dbg_bfr = Bry_bfr.new_();
|
||||
private int uid_max = -1;
|
||||
public void Clear() {
|
||||
available_list.Clear();
|
||||
available_len = 0;
|
||||
uid_max = -1;
|
||||
synchronized (available_list) {
|
||||
available_list.Clear();
|
||||
available_len = 0;
|
||||
uid_max = -1;
|
||||
}
|
||||
}
|
||||
public int Get_next() {
|
||||
synchronized (available_list) {
|
||||
if (available_len == 0)
|
||||
if (available_len == 0) {
|
||||
// dbg_bfr.Add_str("+:u:").Add_int_variable(uid_max + 1).Add_byte_nl();
|
||||
return ++uid_max;
|
||||
}
|
||||
else {
|
||||
Int_obj_val val = (Int_obj_val)List_adp_.Pop_last(available_list);
|
||||
--available_len;
|
||||
// dbg_bfr.Add_str("+:a:").Add_int_variable(val.Val()).Add_byte_nl();
|
||||
return val.Val();
|
||||
}
|
||||
}
|
||||
@ -40,17 +46,22 @@ public class Int_pool {
|
||||
synchronized (available_list) {
|
||||
if (available_len == 0 && v == uid_max) {
|
||||
--this.uid_max;
|
||||
// dbg_bfr.Add_str("-:m:").Add_int_variable(v).Add_byte_nl();
|
||||
return;
|
||||
}
|
||||
if (available_len == uid_max) {
|
||||
available_list.Add(Int_obj_val.new_(v));
|
||||
available_list.Sort();
|
||||
for (int i = 0; i < available_len; ++i) {
|
||||
Int_obj_val itm = (Int_obj_val)available_list.Get_at(i);
|
||||
if (i != itm.Val()) throw Err_.new_("core", "available_list out of order", "contents", available_list.To_str());
|
||||
if (i != itm.Val())
|
||||
throw Err_.new_("core", "available_list out of order", "contents", available_list.To_str());
|
||||
}
|
||||
// dbg_bfr.Add_str("-:c:").Add_int_variable(v).Add_byte_nl();
|
||||
this.Clear();
|
||||
}
|
||||
else {
|
||||
// dbg_bfr.Add_str("-:a:").Add_int_variable(v).Add_byte_nl();
|
||||
available_list.Add(Int_obj_val.new_(v));
|
||||
++available_len;
|
||||
}
|
||||
|
@ -52,6 +52,14 @@ public class Int_pool_tst {
|
||||
tstr.Exec_del(2);
|
||||
tstr.Test_get(0);
|
||||
}
|
||||
@Test public void Del__out_of_order_2() {
|
||||
tstr.Test_get(0);
|
||||
tstr.Test_get(1);
|
||||
tstr.Test_get(2);
|
||||
tstr.Exec_del(1);
|
||||
tstr.Exec_del(2);
|
||||
tstr.Exec_del(0);
|
||||
}
|
||||
}
|
||||
class Int_pool_tstr {
|
||||
private final Int_pool pool = new Int_pool();
|
||||
|
@ -127,7 +127,7 @@ public class Gfui_bnd_parser {
|
||||
switch (sym_tkn.Tid()) {
|
||||
case Gfui_bnd_tkn.Tid_sym_plus: // EX: Ctrl + A
|
||||
if (mod_adj != Mod_val_null) { // if mod, just update mod_val and exit
|
||||
mod_val = Enm_.Flip_int(true, mod_val, mod_adj);
|
||||
mod_val = Bitmask_.Flip_int(true, mod_val, mod_adj);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
@ -58,7 +58,7 @@ public class Xoa_app_ {
|
||||
}
|
||||
}
|
||||
public static final String Name = "xowa";
|
||||
public static final String Version = "2.9.3.1";
|
||||
public static final String Version = "2.9.4.1";
|
||||
public static String Build_date = "2012-12-30 00:00:00";
|
||||
public static String Op_sys_str;
|
||||
public static String User_agent = "";
|
||||
|
@ -35,7 +35,7 @@ public class Xof_img_size {
|
||||
&& !Xop_lnki_type.Id_is_thumbable(lnki_type) // not thumb which is implicitly 220; PAGE:en.w:Edward_Snowden; DATE:2015-08-17
|
||||
)
|
||||
lnki_w = orig_w; // use original size; EX:[[File:A.ogv]] -> [[File:A.ogv|550px]] where 550px is orig_w; DATE:2015-08-07
|
||||
if (Enm_.Has_int(lnki_type, Xop_lnki_type.Id_frame) // frame: always return orig size; Linker.php!makeThumbLink2; // Use image dimensions, don't scale
|
||||
if (Bitmask_.Has_int(lnki_type, Xop_lnki_type.Id_frame) // frame: always return orig size; Linker.php!makeThumbLink2; // Use image dimensions, don't scale
|
||||
&& lnki_h == Null) { // unless lnki_h specified; DATE:2013-12-22
|
||||
html_w = file_w = orig_w;
|
||||
html_h = file_h = orig_h;
|
||||
|
@ -76,7 +76,7 @@ public class Xof_img_size_tst {
|
||||
fxt.Lnki_type_(Xop_lnki_type.Id_frame).Lnki_ext_(Xof_ext_.Id_png).Lnki_(200, 200).Orig_(2038, 1529).Test_html(200, 150, Bool_.N);
|
||||
}
|
||||
@Test public void Frame_and_thumb(){ // PURPOSE: frame and thumb should be treated as frame; Enm.Has(val, Id_frame) vs val == Id_frame; PAGE:en.w:History_of_Western_Civilization; DATE:2015-04-16
|
||||
fxt.Lnki_type_(Enm_.Add_byte(Xop_lnki_type.Id_frame, Xop_lnki_type.Id_thumb)).Lnki_(200, -1).Test_html(400, 200, Bool_.Y); // mut return same as Lnki_lt_orig_frame above
|
||||
fxt.Lnki_type_(Bitmask_.Add_byte(Xop_lnki_type.Id_frame, Xop_lnki_type.Id_thumb)).Lnki_(200, -1).Test_html(400, 200, Bool_.Y); // mut return same as Lnki_lt_orig_frame above
|
||||
}
|
||||
@Test public void Video__use_orig_w(){ // PURPOSE: video should use orig_w; DATE:2015-08-07
|
||||
fxt.Lnki_type_(Xop_lnki_type.Id_none).Lnki_ext_(Xof_ext_.Id_ogv).Lnki_(-1, -1).Orig_(500, 250).Test_html(500, 250, Bool_.N);
|
||||
|
@ -20,11 +20,11 @@ public class Xof_patch_upright_tid_ {
|
||||
public static final int Tid_unpatched = 0, Tid_use_thumb_w = 1, Tid_fix_default = 2;
|
||||
public static final int Tid_all = Tid_use_thumb_w | Tid_fix_default;
|
||||
public static int Merge(boolean use_thumb_w, boolean fix_default) {
|
||||
if (use_thumb_w && fix_default) return Enm_.Add_int(Tid_use_thumb_w, Tid_fix_default);
|
||||
if (use_thumb_w && fix_default) return Bitmask_.Add_int(Tid_use_thumb_w, Tid_fix_default);
|
||||
else if (use_thumb_w) return Tid_use_thumb_w;
|
||||
else if (fix_default) return Tid_fix_default;
|
||||
else return Tid_unpatched;
|
||||
}
|
||||
public static boolean Split_use_thumb_w(int tid) {return Enm_.Has_int(tid, Tid_use_thumb_w);}
|
||||
public static boolean Split_fix_default(int tid) {return Enm_.Has_int(tid, Tid_fix_default);}
|
||||
public static boolean Split_use_thumb_w(int tid) {return Bitmask_.Has_int(tid, Tid_use_thumb_w);}
|
||||
public static boolean Split_fix_default(int tid) {return Bitmask_.Has_int(tid, Tid_fix_default);}
|
||||
}
|
||||
|
@ -23,10 +23,11 @@ public class Xoh_consts {
|
||||
, Img_h_str = "height"
|
||||
;
|
||||
public static final byte[]
|
||||
__end = Bry_.new_a7(">")
|
||||
, __end_quote = Bry_.new_a7("\">")
|
||||
, __inline_quote = Bry_.new_a7("\"/>")
|
||||
, Space_2 = Bry_.new_a7(" ")
|
||||
__end = Bry_.new_a7(">")
|
||||
, __inline = Bry_.new_a7("/>")
|
||||
, __end_quote = Bry_.new_a7("\">")
|
||||
, __inline_quote = Bry_.new_a7("\"/>")
|
||||
, Space_2 = Bry_.new_a7(" ")
|
||||
|
||||
, A_bgn = Bry_.new_a7("<a href=\""), A_bgn_lnki_0 = Bry_.new_a7("\" title=\""), A_mid_xowa_title = Bry_.new_a7("\" xowa_title=\"")
|
||||
, A_mid_id = Bry_.new_a7("\" id=\"xowa_lnki_")
|
||||
|
@ -76,8 +76,8 @@ public class Xoh_page_wtr_wkr implements Bry_fmtr_arg {
|
||||
byte[] page_display = Xoh_page_wtr_wkr_.Bld_page_name(tmp_bfr, page_ttl, page.Html_data().Display_ttl());
|
||||
Xol_vnt_mgr vnt_mgr = wiki.Lang().Vnt_mgr();
|
||||
if (vnt_mgr.Enabled()) { // VNT
|
||||
page_name = vnt_mgr.Convert_mgr().Convert_text(wiki, page_name);
|
||||
page_display = vnt_mgr.Convert_mgr().Convert_text(wiki, page_display);
|
||||
page_name = vnt_mgr.Convert_mgr().Convert_text(page_name);
|
||||
page_display = vnt_mgr.Convert_mgr().Convert_text(page_display);
|
||||
}
|
||||
fmtr.Bld_bfr_many(html_bfr
|
||||
, root_dir_bry, Xoa_app_.Version, Xoa_app_.Build_date, app.Tcp_server().Running_str()
|
||||
@ -155,7 +155,7 @@ public class Xoh_page_wtr_wkr implements Bry_fmtr_arg {
|
||||
}
|
||||
Xol_vnt_mgr vnt_mgr = wiki.Lang().Vnt_mgr();
|
||||
if (vnt_mgr.Enabled()) // VNT
|
||||
bfr.Add(vnt_mgr.Convert_mgr().Convert_text(wiki, bfr.Xto_bry_and_clear()));
|
||||
bfr.Add(vnt_mgr.Convert_mgr().Convert_text(bfr.Xto_bry_and_clear()));
|
||||
}
|
||||
private void Write_body_pre(Bry_bfr bfr, Xoae_app app, Xowe_wiki wiki, byte[] data_raw, Bry_bfr tmp_bfr) {
|
||||
Xoh_html_wtr_escaper.Escape(app.Parser_amp_mgr(), tmp_bfr, data_raw, 0, data_raw.length, false, false);
|
||||
|
@ -44,7 +44,7 @@ class Xohd_page_srl_itm__html_module implements Xohd_page_srl_itm {
|
||||
public int Load(Xog_page hpg, byte[] bry, int bry_len, int itm_bgn, Int_obj_ref count_ref) {
|
||||
itm_bgn += 2; // skip bin_int_abrv of [1, 0]
|
||||
byte flag = bry[itm_bgn];
|
||||
hpg.Head_mgr().Init(Enm_.Has_byte(flag, Tid_math), Enm_.Has_byte(flag, Tid_imap), Enm_.Has_byte(flag, Tid_packed), Enm_.Has_byte(flag, Tid_hiero));
|
||||
hpg.Head_mgr().Init(Bitmask_.Has_byte(flag, Tid_math), Bitmask_.Has_byte(flag, Tid_imap), Bitmask_.Has_byte(flag, Tid_packed), Bitmask_.Has_byte(flag, Tid_hiero));
|
||||
return 3;
|
||||
}
|
||||
public void Save(Xog_page hpg, Bry_bfr bfr) {
|
||||
@ -60,10 +60,10 @@ class Xohd_page_srl_itm__html_module implements Xohd_page_srl_itm {
|
||||
}
|
||||
public static byte Calc_flag(boolean math, boolean imap, boolean packed, boolean hiero) {
|
||||
byte rv = 0;
|
||||
if (math) rv = Enm_.Add_byte(rv, Tid_math);
|
||||
if (imap) rv = Enm_.Add_byte(rv, Tid_imap);
|
||||
if (packed) rv = Enm_.Add_byte(rv, Tid_packed);
|
||||
if (hiero) rv = Enm_.Add_byte(rv, Tid_hiero);
|
||||
if (math) rv = Bitmask_.Add_byte(rv, Tid_math);
|
||||
if (imap) rv = Bitmask_.Add_byte(rv, Tid_imap);
|
||||
if (packed) rv = Bitmask_.Add_byte(rv, Tid_packed);
|
||||
if (hiero) rv = Bitmask_.Add_byte(rv, Tid_hiero);
|
||||
return rv;
|
||||
}
|
||||
private static final byte // SERIALIZED; only supports 8 different types
|
||||
|
@ -156,13 +156,13 @@ public class Xoh_head_mgr implements Bry_fmtr_arg {
|
||||
boolean enabled = itm.Enabled();
|
||||
if (enabled) {
|
||||
int flag = itms[i].Flags();
|
||||
if (Enm_.Has_int(flag, Xoh_head_itm__base.Flag__css_include)) list__css_include.Add(itm);
|
||||
if (Enm_.Has_int(flag, Xoh_head_itm__base.Flag__css_text)) list__css_text.Add(itm);
|
||||
if (Enm_.Has_int(flag, Xoh_head_itm__base.Flag__js_include)) list__js_include.Add(itm);
|
||||
if (Enm_.Has_int(flag, Xoh_head_itm__base.Flag__js_head_global)) list__js_head_global.Add(itm);
|
||||
if (Enm_.Has_int(flag, Xoh_head_itm__base.Flag__js_head_script)) list__js_head_script.Add(itm);
|
||||
if (Enm_.Has_int(flag, Xoh_head_itm__base.Flag__js_tail_script)) list__js_tail_script.Add(itm);
|
||||
if (Enm_.Has_int(flag, Xoh_head_itm__base.Flag__js_window_onload)) list__js_window_onload.Add(itm);
|
||||
if (Bitmask_.Has_int(flag, Xoh_head_itm__base.Flag__css_include)) list__css_include.Add(itm);
|
||||
if (Bitmask_.Has_int(flag, Xoh_head_itm__base.Flag__css_text)) list__css_text.Add(itm);
|
||||
if (Bitmask_.Has_int(flag, Xoh_head_itm__base.Flag__js_include)) list__js_include.Add(itm);
|
||||
if (Bitmask_.Has_int(flag, Xoh_head_itm__base.Flag__js_head_global)) list__js_head_global.Add(itm);
|
||||
if (Bitmask_.Has_int(flag, Xoh_head_itm__base.Flag__js_head_script)) list__js_head_script.Add(itm);
|
||||
if (Bitmask_.Has_int(flag, Xoh_head_itm__base.Flag__js_tail_script)) list__js_tail_script.Add(itm);
|
||||
if (Bitmask_.Has_int(flag, Xoh_head_itm__base.Flag__js_window_onload)) list__js_window_onload.Add(itm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -215,11 +215,11 @@ public class Xoh_file_wtr__basic {
|
||||
return scratch_bfr.Xto_bry_and_clear();
|
||||
}
|
||||
private static byte[] Arg_anchor_title(Bry_bfr tmp_bfr, byte[] src, Xop_lnki_tkn lnki, byte[] lnki_ttl, Xoh_lnki_title_fmtr anchor_title_wkr) {
|
||||
if ( Enm_.Has_int(lnki.Lnki_type(), Xop_lnki_type.Id_thumb)
|
||||
|| Enm_.Has_int(lnki.Lnki_type(), Xop_lnki_type.Id_frame) // If the image is a thumb, do not add a title / alt, even if a caption is available
|
||||
if ( Bitmask_.Has_int(lnki.Lnki_type(), Xop_lnki_type.Id_thumb)
|
||||
|| Bitmask_.Has_int(lnki.Lnki_type(), Xop_lnki_type.Id_frame) // If the image is a thumb, do not add a title / alt, even if a caption is available
|
||||
)
|
||||
return Bry_.Empty;
|
||||
else if ( Enm_.Has_int(lnki.Lnki_type(), Xop_lnki_type.Id_frameless)) { // If the image is frameless, add the caption as a title / alt. If no caption is available, do not add a title / alt
|
||||
else if ( Bitmask_.Has_int(lnki.Lnki_type(), Xop_lnki_type.Id_frameless)) { // If the image is frameless, add the caption as a title / alt. If no caption is available, do not add a title / alt
|
||||
}
|
||||
Xop_tkn_itm anchor_title_tkn = lnki.Caption_tkn();
|
||||
if (anchor_title_tkn == Xop_tkn_null.Null_tkn) return Bry_.Empty; // no caption; return empty; (do not use lnki); DATE:2013-12-31
|
||||
|
27
400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_dir_.java
Normal file
27
400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_dir_.java
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
public class Xol_vnt_dir_ {
|
||||
public static final int Tid__none = 0, Tid__uni = 1, Tid__bi = 2;
|
||||
public static int Parse(byte[] v) {return hash.Get_as_int_or(v, Tid__none);}
|
||||
private static final byte[] Bry__none = Bry_.new_a7("disable"), Bry__uni = Bry_.new_a7("unidirectional"), Bry__bi = Bry_.new_a7("bidirectional");
|
||||
private static final Hash_adp_bry hash = Hash_adp_bry.cs()
|
||||
.Add_bry_int(Bry__none , Tid__none)
|
||||
.Add_bry_int(Bry__uni , Tid__uni)
|
||||
.Add_bry_int(Bry__bi , Tid__bi);
|
||||
}
|
@ -18,27 +18,33 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import gplx.xowa.langs.vnts.converts.*;
|
||||
public class Xol_vnt_itm implements GfoInvkAble {
|
||||
public Xol_vnt_itm(byte[] key, byte[] name, int mask__vnt) {
|
||||
this.key = key; this.name = name; this.mask__vnt = mask__vnt;
|
||||
public Xol_vnt_itm(int idx, byte[] key, byte[] name, int mask__vnt) {
|
||||
this.idx = idx; this.key = key; this.name = name; this.mask__vnt = mask__vnt;
|
||||
this.convert_wkr = new Xol_convert_wkr(key);
|
||||
}
|
||||
public int Idx() {return idx;} private final int idx; // EX: 2
|
||||
public byte[] Key() {return key;} private final byte[] key; // EX: zh-cn
|
||||
public byte[] Name() {return name;} private final byte[] name; // EX: 大陆简体
|
||||
public boolean Visible() {return visible;} private boolean visible = true; // visible in menu
|
||||
public byte[][] Fallback_ary() {return fallback_ary;} private byte[][] fallback_ary = Bry_.Ary_empty; // EX: zh-hans|zh
|
||||
public int Dir() {return dir;} private int dir = Xol_vnt_dir_.Tid__bi; // EX: "bidirectional"
|
||||
public int Mask__vnt() {return mask__vnt;} private final int mask__vnt; // EX: 8
|
||||
public int Mask__fallbacks() {return mask_fallbacks;} private int mask_fallbacks; // EX: 11 for zh,zh-hans,zh-cn
|
||||
public byte[][] Convert_ary() {return convert_ary;} private byte[][] convert_ary = Bry_.Ary_empty; // EX: zh-hans|zh-cn
|
||||
public Xol_convert_wkr Convert_wkr() {return convert_wkr;} private final Xol_convert_wkr convert_wkr;
|
||||
public void Visible_(boolean v) {this.visible = v;}
|
||||
public void Convert_ary_(byte[][] v) {convert_ary = v;}
|
||||
public void Init(int dir, byte[][] fallback_ary) {
|
||||
this.dir = dir; this.fallback_ary = fallback_ary;
|
||||
}
|
||||
public void Mask__fallbacks__calc(Xol_vnt_regy regy, byte[][] ary) {
|
||||
this.mask_fallbacks = regy.Mask__calc(Bry_.Ary_add(Bry_.Ary(key), ary));// NOTE: must add lang.key which is not part of fallback; EX: "zh-cn" has fallback of "zh-hans", but chain should calc "zh-cn","zh-hans"
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_fallbacks_)) fallback_ary = Bry_split_.Split(m.ReadBry("v"), Byte_ascii.Pipe);
|
||||
else if (ctx.Match(k, Invk_converts_)) convert_ary = Bry_split_.Split(m.ReadBry("v"), Byte_ascii.Pipe);
|
||||
else if (ctx.Match(k, Invk_dir_)) dir = Xol_vnt_dir_.Parse(m.ReadBry("v"));
|
||||
else return GfoInvkAble_.Rv_unhandled;
|
||||
return this;
|
||||
} private static final String Invk_fallbacks_ = "fallbacks_", Invk_converts_ = "converts_";
|
||||
} private static final String Invk_fallbacks_ = "fallbacks_", Invk_converts_ = "converts_", Invk_dir_ = "dir_";
|
||||
}
|
||||
|
@ -18,8 +18,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import gplx.xowa.parsers.vnts.*;
|
||||
class Xol_vnt_itm_sorter__rule implements gplx.lists.ComparerAble {
|
||||
private Ordered_hash hash;
|
||||
public void Sort(Ordered_hash hash, Xop_vnt_rule_tkn[] ary) {
|
||||
private Hash_adp hash;
|
||||
public void Sort(Hash_adp hash, Xop_vnt_rule_tkn[] ary) {
|
||||
synchronized (hash) {
|
||||
this.hash = hash;
|
||||
Array_.Sort(ary, this);
|
||||
|
@ -19,17 +19,20 @@ package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xow
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.parsers.vnts.*;
|
||||
public class Xol_vnt_regy {
|
||||
private final Ordered_hash hash = Ordered_hash_.new_bry_(); private int hash_len;
|
||||
private final Hash_adp_bry hash = Hash_adp_bry.ci_a7(); private int hash_len;
|
||||
private final List_adp list = List_adp_.new_();
|
||||
public Btrie_slim_mgr Trie() {return trie;} private final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7();
|
||||
public int Len() {return hash.Count();}
|
||||
public boolean Has(byte[] k) {return hash.Has(k);}
|
||||
public Xol_vnt_itm Get_at(int i) {return (Xol_vnt_itm)hash.Get_at(i);}
|
||||
public Xol_vnt_itm Get_at(int i) {return (Xol_vnt_itm)list.Get_at(i);}
|
||||
public Xol_vnt_itm Get_by(byte[] k) {return (Xol_vnt_itm)hash.Get_by(k);}
|
||||
public void Clear() {hash.Clear(); trie.Clear(); hash_len = 0;}
|
||||
public Xol_vnt_itm Get_by(byte[] s, int b, int e) {return (Xol_vnt_itm)hash.Get_by_mid(s, b, e);}
|
||||
public void Clear() {hash.Clear(); list.Clear(); trie.Clear(); hash_len = 0;}
|
||||
public Xol_vnt_itm Add(byte[] key, byte[] name) {
|
||||
int mask = gplx.core.brys.Bit_.Get_flag(hash_len);
|
||||
Xol_vnt_itm itm = new Xol_vnt_itm(key, name, mask);
|
||||
Xol_vnt_itm itm = new Xol_vnt_itm(hash_len, key, name, mask);
|
||||
hash.Add(key, itm);
|
||||
list.Add(itm);
|
||||
trie.Add_obj(key, itm);
|
||||
hash_len = hash.Count();
|
||||
return itm;
|
||||
@ -41,15 +44,15 @@ public class Xol_vnt_regy {
|
||||
byte[] key = ary[i];
|
||||
Xol_vnt_itm itm = (Xol_vnt_itm)hash.Get_by(key); if (itm == null) continue; // handle bad vnt from user input; EX: -{zh;bad|text}-
|
||||
int itm_mask = itm.Mask__vnt();
|
||||
rv = rv == 0 ? itm_mask : Enm_.Flip_int(true, rv, itm_mask);
|
||||
rv = rv == 0 ? itm_mask : Bitmask_.Flip_int(true, rv, itm_mask);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public boolean Mask__match_any(int lhs, int rhs) { // EX: match "zh-cn|zh-hans|zh-hant" against "zh|zh-hans|zh-hant"
|
||||
for (int i = 0; i < hash_len; ++i) {
|
||||
int mask = gplx.core.brys.Bit_.Get_flag(i); // 1,2,4,8
|
||||
if (Enm_.Has_int(lhs, mask)) { // lhs has mask; EX: for lhs=6, mask=1 -> 'n'; mask=2 -> 'y'
|
||||
if (Enm_.Has_int(rhs, mask)) // if rhs does not have mask, return false;
|
||||
if (Bitmask_.Has_int(lhs, mask)) { // lhs has mask; EX: for lhs=6, mask=1 -> 'n'; mask=2 -> 'y'
|
||||
if (Bitmask_.Has_int(rhs, mask)) // if rhs does not have mask, return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
63
400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_regy_fxt.java
Normal file
63
400_xowa/src/gplx/xowa/langs/vnts/Xol_vnt_regy_fxt.java
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import gplx.xowa.parsers.vnts.*;
|
||||
public class Xol_vnt_regy_fxt {
|
||||
private final Xol_vnt_regy mgr = new_chinese();
|
||||
public String[] Make_lang_chain_cn() {return String_.Ary("zh-cn", "zh-hans", "zh-hant", "zh");}
|
||||
public void Test_match_any(boolean expd, String[] lang_chain, String[]... vnt_chain_ary) {
|
||||
int len = vnt_chain_ary.length;
|
||||
int lang_flag = mgr.Mask__calc(Bry_.Ary(lang_chain));
|
||||
for (int i = 0; i < len; ++i) {
|
||||
String[] vnt_chain = vnt_chain_ary[i]; // EX: -{zh;zh-hans;zh-hant}-
|
||||
int vnt_flag = mgr.Mask__calc(Bry_.Ary(vnt_chain));
|
||||
Tfds.Eq(expd, mgr.Mask__match_any(vnt_flag, lang_flag), String_.Concat_with_str(";", vnt_chain) + "<>" + String_.Concat_with_str(";", lang_chain));
|
||||
}
|
||||
}
|
||||
public void Test_calc(String[] ary, int expd) {
|
||||
Tfds.Eq(expd, mgr.Mask__calc(Bry_.Ary(ary)));
|
||||
}
|
||||
public void Test_sort(String[] vnt_ary, String[] expd) {
|
||||
int vnt_len = vnt_ary.length;
|
||||
Xop_vnt_rule_tkn[] rule_ary = new Xop_vnt_rule_tkn[vnt_len];
|
||||
for (int i = 0; i < vnt_len; ++i)
|
||||
rule_ary[i] = new Xop_vnt_rule_tkn(Bry_.Empty, Bry_.new_u8(vnt_ary[i]), gplx.xowa.parsers.Xop_tkn_itm_.Ary_empty);
|
||||
mgr.Mask__sort(rule_ary);
|
||||
for (int i = 0; i < vnt_len; ++i)
|
||||
vnt_ary[i] = String_.new_u8(rule_ary[i].Rule_lang());
|
||||
Tfds.Eq_ary_str(expd, vnt_ary);
|
||||
}
|
||||
public static Xol_vnt_regy new_chinese() { // REF.MW:/languages/classes/LanguageZh.php|LanguageZh|__construct
|
||||
Xol_vnt_regy rv = new Xol_vnt_regy();
|
||||
new_chinese_vnt(rv, "zh" , Xol_vnt_dir_.Tid__none, "zh-hans", "zh-hant", "zh-cn", "zh-tw", "zh-hk", "zh-sg", "zh-mo", "zh-my");
|
||||
new_chinese_vnt(rv, "zh-hans" , Xol_vnt_dir_.Tid__uni , "zh-cn", "zh-sg", "zh-my");
|
||||
new_chinese_vnt(rv, "zh-hant" , Xol_vnt_dir_.Tid__uni , "zh-tw", "zh-hk", "zh-mo");
|
||||
new_chinese_vnt(rv, "zh-cn" , Xol_vnt_dir_.Tid__bi , "zh-hans", "zh-sg", "zh-my");
|
||||
new_chinese_vnt(rv, "zh-hk" , Xol_vnt_dir_.Tid__bi , "zh-hant", "zh-mo", "zh-tw");
|
||||
new_chinese_vnt(rv, "zh-my" , Xol_vnt_dir_.Tid__bi , "zh-hans", "zh-sg", "zh-cn");
|
||||
new_chinese_vnt(rv, "zh-mo" , Xol_vnt_dir_.Tid__bi , "zh-hant", "zh-hk", "zh-tw");
|
||||
new_chinese_vnt(rv, "zh-sg" , Xol_vnt_dir_.Tid__bi , "zh-hans", "zh-cn", "zh-my");
|
||||
new_chinese_vnt(rv, "zh-tw" , Xol_vnt_dir_.Tid__bi , "zh-hant", "zh-hk", "zh-mo");
|
||||
return rv;
|
||||
}
|
||||
private static void new_chinese_vnt(Xol_vnt_regy regy, String key, int dir, String... fallbacks) {
|
||||
byte[] key_bry = Bry_.new_u8(key);
|
||||
Xol_vnt_itm itm = regy.Add(key_bry, Bry_.Ucase__all(key_bry));
|
||||
itm.Init(dir, Bry_.Ary(fallbacks));
|
||||
}
|
||||
}
|
@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import org.junit.*; import gplx.xowa.parsers.vnts.*;
|
||||
import org.junit.*;
|
||||
public class Xol_vnt_regy_tst {
|
||||
private final Xol_vnt_regy_fxt fxt = new Xol_vnt_regy_fxt();
|
||||
@Test public void Calc() {
|
||||
@ -51,34 +51,3 @@ public class Xol_vnt_regy_tst {
|
||||
fxt.Test_sort(String_.Ary("zh", "zh-hans", "zh-cn" ) , String_.Ary("zh-cn", "zh-hans", "zh"));
|
||||
}
|
||||
}
|
||||
class Xol_vnt_regy_fxt {
|
||||
private final Xol_vnt_regy mgr = new Xol_vnt_regy();
|
||||
public Xol_vnt_regy_fxt() {
|
||||
String[] ary = Xop_vnt_parser_fxt.Vnts_chinese;
|
||||
for (String itm : ary)
|
||||
mgr.Add(Bry_.new_u8(itm), Bry_.Empty);
|
||||
}
|
||||
public String[] Make_lang_chain_cn() {return String_.Ary("zh-cn", "zh-hans", "zh-hant", "zh");}
|
||||
public void Test_match_any(boolean expd, String[] lang_chain, String[]... vnt_chain_ary) {
|
||||
int len = vnt_chain_ary.length;
|
||||
int lang_flag = mgr.Mask__calc(Bry_.Ary(lang_chain));
|
||||
for (int i = 0; i < len; ++i) {
|
||||
String[] vnt_chain = vnt_chain_ary[i]; // EX: -{zh;zh-hans;zh-hant}-
|
||||
int vnt_flag = mgr.Mask__calc(Bry_.Ary(vnt_chain));
|
||||
Tfds.Eq(expd, mgr.Mask__match_any(vnt_flag, lang_flag), String_.Concat_with_str(";", vnt_chain) + "<>" + String_.Concat_with_str(";", lang_chain));
|
||||
}
|
||||
}
|
||||
public void Test_calc(String[] ary, int expd) {
|
||||
Tfds.Eq(expd, mgr.Mask__calc(Bry_.Ary(ary)));
|
||||
}
|
||||
public void Test_sort(String[] vnt_ary, String[] expd) {
|
||||
int vnt_len = vnt_ary.length;
|
||||
Xop_vnt_rule_tkn[] rule_ary = new Xop_vnt_rule_tkn[vnt_len];
|
||||
for (int i = 0; i < vnt_len; ++i)
|
||||
rule_ary[i] = new Xop_vnt_rule_tkn(Bry_.Empty, Bry_.new_u8(vnt_ary[i]), gplx.xowa.parsers.Xop_tkn_itm_.Ary_empty);
|
||||
mgr.Mask__sort(rule_ary);
|
||||
for (int i = 0; i < vnt_len; ++i)
|
||||
vnt_ary[i] = String_.new_u8(rule_ary[i].Rule_lang());
|
||||
Tfds.Eq_ary_str(expd, vnt_ary);
|
||||
}
|
||||
}
|
||||
|
@ -44,10 +44,11 @@ public class Xol_convert_mgr {
|
||||
if (new_wkr_idx == -1) throw Err_.new_("lang.vnt", "unknown vnt", "key", cur_vnt);
|
||||
this.cur_wkr_idx = new_wkr_idx;
|
||||
}
|
||||
public byte[] Convert_text(Xowe_wiki wiki, byte[] src) {return Convert_text(wiki, src, 0, src.length);}
|
||||
public byte[] Convert_text(Xowe_wiki wiki, byte[] src, int bgn, int end) {
|
||||
Bry_bfr tmp_bfr = wiki.Utl__bfr_mkr().Get_m001();
|
||||
Xol_convert_wkr converter = wkr_ary[cur_wkr_idx];
|
||||
public byte[] Convert_text(byte[] src) {return Convert_text(src, 0, src.length);}
|
||||
public byte[] Convert_text(byte[] src, int bgn, int end) {return Convert_text(cur_wkr_idx, src, bgn, end);}
|
||||
public byte[] Convert_text(int vnt_idx, byte[] src, int bgn, int end) {
|
||||
Bry_bfr tmp_bfr = Xoa_app_.Utl__bfr_mkr().Get_m001();
|
||||
Xol_convert_wkr converter = wkr_ary[vnt_idx];
|
||||
converter.Convert_text(tmp_bfr, src, bgn, end);
|
||||
return tmp_bfr.To_bry_and_rls();
|
||||
}
|
||||
|
@ -21,6 +21,8 @@ public class Xol_convert_wkr {
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
public Xol_convert_wkr(byte[] key) {this.key = key;}
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public void Add(byte[] src, byte[] trg) {trie.Add_obj(src, trg);}
|
||||
public void Del(byte[] src) {trie.Del(src);}
|
||||
public boolean Convert_text(Bry_bfr bfr, byte[] src) {return Convert_text(bfr, src, 0, src.length);}
|
||||
public boolean Convert_text(Bry_bfr bfr, byte[] src, int bgn, int end) {
|
||||
int pos = bgn;
|
||||
@ -47,7 +49,7 @@ public class Xol_convert_wkr {
|
||||
pos = trie.Match_pos();
|
||||
}
|
||||
}
|
||||
if (!matched) bfr.Add(src); // no convert; make sure to add back src, else bfr will be blank
|
||||
if (!matched) bfr.Add_mid(src, bgn, end); // no convert; make sure to add back src, else bfr will be blank
|
||||
return matched;
|
||||
}
|
||||
public void Rebuild(Xol_convert_regy regy, byte[][] ary) {
|
||||
|
86
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_itm.java
Normal file
86
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_itm.java
Normal file
@ -0,0 +1,86 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Mwh_atr_itm {
|
||||
public Mwh_atr_itm
|
||||
( byte[] src, boolean valid, boolean repeated, boolean key_exists, int atr_bgn, int atr_end
|
||||
, int key_bgn, int key_end, byte[] key_bry
|
||||
, int val_bgn, int val_end, byte[] val_bry
|
||||
, int eql_pos, int qte_tid
|
||||
) {
|
||||
this.src = src;
|
||||
this.valid = valid; this.repeated = repeated; this.key_exists = key_exists;
|
||||
this.atr_bgn = atr_bgn; this.atr_end = atr_end;
|
||||
this.key_bgn = key_bgn; this.key_end = key_end; this.key_bry = key_bry;
|
||||
this.val_bgn = val_bgn; this.val_end = val_end; this.val_bry = val_bry;
|
||||
this.eql_pos = eql_pos; this.qte_tid = qte_tid;
|
||||
}
|
||||
public byte[] Src() {return src;} private final byte[] src;
|
||||
public boolean Valid() {return valid;} private final boolean valid;
|
||||
public boolean Key_exists() {return key_exists;} private final boolean key_exists;
|
||||
public boolean Repeated() {return repeated;} private final boolean repeated;
|
||||
public int Atr_bgn() {return atr_bgn;} private int atr_bgn;
|
||||
public int Atr_end() {return atr_end;} private int atr_end;
|
||||
public int Key_bgn() {return key_bgn;} private final int key_bgn;
|
||||
public int Key_end() {return key_end;} private final int key_end;
|
||||
public byte[] Key_bry() {return key_bry;} private byte[] key_bry;
|
||||
public byte Key_tid() {return key_tid;} public Mwh_atr_itm Key_tid_(byte v) {key_tid = v; return this;} private byte key_tid;
|
||||
public int Val_bgn() {return val_bgn;} private final int val_bgn;
|
||||
public int Val_end() {return val_end;} private final int val_end;
|
||||
public byte[] Val_bry() {return val_bry;} private byte[] val_bry;
|
||||
public int Eql_pos() {return eql_pos;} private final int eql_pos;
|
||||
public int Qte_tid() {return qte_tid;} private final int qte_tid;
|
||||
public Mwh_atr_itm Atr_rng(int bgn, int end) {this.atr_bgn = bgn; this.atr_end = end; return this;}
|
||||
public String Val_as_str() {return String_.new_u8(Val_as_bry());}
|
||||
public byte[] Val_as_bry() {if (val_bry == null) val_bry = Bry_.Mid(src, val_bgn, val_end); return val_bry;} // NOTE: val_bry is cached
|
||||
public byte[] Val_as_bry__blank_to_null() {byte[] rv = Val_as_bry(); return Bry_.Len_eq_0(rv) ? null : rv;}
|
||||
public int Val_as_int_or(int or) {return val_bry == null ? Bry_.To_int_or__lax(src, val_bgn, val_end, or) : Bry_.To_int_or(val_bry, or);}
|
||||
public boolean Val_as_bool_by_int() {return Val_as_int_or(0) == 1;}
|
||||
public boolean Val_as_bool() {return Bry_.Eq(Bry_.Lcase__all(Val_as_bry()), Bool_.True_bry);}
|
||||
public static final Mwh_atr_itm[] Ary_empty = new Mwh_atr_itm[0];
|
||||
public static final int Atr_tid__invalid = 1, Atr_tid__repeat = 2, Atr_tid__pair = 4, Atr_tid__name = 8; // NOTE: id order is important; see above;
|
||||
public static final int Qte_tid__none = 0, Qte_tid__apos = 1, Qte_tid__qute = 2;
|
||||
public static final int Mask__qte__none = 0, Mask__qte__apos = 1, Mask__qte_qute = 2;
|
||||
public static final int
|
||||
Mask__valid = 8
|
||||
, Mask__repeated = 16
|
||||
, Mask__key_exists = 32
|
||||
, Mask__val_made = 64
|
||||
;
|
||||
public static final boolean Mask__valid__n = false, Mask__valid__y = true;
|
||||
public static final boolean Mask__key_exists__n = false, Mask__key_exists__y = true;
|
||||
public static final boolean Mask__repeated__n = false, Mask__repeated__y = true;
|
||||
public static final boolean Mask__val_made__n = false, Mask__val_made__y = true;
|
||||
public static int Calc_atr_utl(int qte_tid, boolean valid, boolean repeated, boolean key_exists, boolean val_made) {
|
||||
int rv = qte_tid;
|
||||
if (valid) rv |= Mwh_atr_itm.Mask__valid;
|
||||
if (repeated) rv |= Mwh_atr_itm.Mask__repeated;
|
||||
if (key_exists) rv |= Mwh_atr_itm.Mask__key_exists;
|
||||
if (val_made) rv |= Mwh_atr_itm.Mask__val_made;
|
||||
return rv;
|
||||
}
|
||||
public static int Calc_qte_tid(int val) {
|
||||
return val & ((1 << 3) - 1);
|
||||
}
|
||||
public static byte Calc_qte_byte(int[] data_ary, int idx) {
|
||||
int val = data_ary[idx + Mwh_atr_mgr.Idx_atr_utl];
|
||||
int qte_tid = (val & ((1 << 3) - 1));
|
||||
return qte_tid == Qte_tid__apos ? Byte_ascii.Apos : Byte_ascii.Quote;
|
||||
}
|
||||
// public static final byte Key_tid_generic = 0, Key_tid_id = 1, Key_tid_style = 2, Key_tid_role = 3;
|
||||
}
|
98
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_mgr.java
Normal file
98
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_mgr.java
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.brys.*;
|
||||
public class Mwh_atr_mgr {
|
||||
private final int data_max_orig;
|
||||
public Mwh_atr_mgr(int max) {
|
||||
this.data_max_orig = max * Idx__mult;
|
||||
this.Max_(max);
|
||||
}
|
||||
public int Len() {return itm_len;} private int itm_len;
|
||||
public int[] Data_ary() {return data_ary;} private int[] data_ary; private int data_max;
|
||||
public byte[][] Text_ary() {return text_ary;} private byte[][] text_ary;
|
||||
private void Max_(int len) {
|
||||
this.data_max = len * Idx__mult;
|
||||
this.data_ary = new int[data_max];
|
||||
this.text_ary = new byte[len * Text__mult][];
|
||||
this.itm_len = 0;
|
||||
}
|
||||
public void Clear() {
|
||||
if (data_max == data_max_orig)
|
||||
itm_len = 0;
|
||||
else
|
||||
Max_(data_max_orig / Idx__mult);
|
||||
}
|
||||
public int Add(int nde_uid, int nde_tid, boolean valid, boolean repeated, boolean key_exists, int atr_bgn, int atr_end, int key_bgn, int key_end, byte[] key_bry, int eql_pos, int qte_tid, int val_bgn, int val_end, byte[] val_bry) {
|
||||
int data_idx = itm_len * Idx__mult;
|
||||
if (data_idx == data_max) {
|
||||
int new_data_max = data_max == 0 ? Idx__mult : data_max * 2;
|
||||
int[] new_data_ary = new int[new_data_max];
|
||||
Int_.Ary_copy_to(data_ary, data_max, data_ary);
|
||||
this.data_ary = new_data_ary;
|
||||
|
||||
int text_max = text_ary.length;
|
||||
int new_text_max = data_max == 0 ? Text__mult : text_max * 2;
|
||||
byte[][] new_text_ary = new byte[new_text_max][];
|
||||
for (int i = 0; i < text_max; ++i)
|
||||
new_text_ary[i] = text_ary[i];
|
||||
this.text_ary = new_text_ary;
|
||||
|
||||
this.data_max = new_data_max;
|
||||
}
|
||||
boolean val_made = false;
|
||||
int text_idx = itm_len * Text__mult;
|
||||
text_ary[text_idx] = key_bry;
|
||||
if (val_bry != null) {
|
||||
text_ary[text_idx + 1] = val_bry;
|
||||
val_made = true;
|
||||
}
|
||||
data_ary[data_idx + Idx_nde_uid] = nde_uid;
|
||||
data_ary[data_idx + Idx_nde_tid] = nde_tid;
|
||||
data_ary[data_idx + Idx_atr_utl] = Mwh_atr_itm.Calc_atr_utl(qte_tid, valid, repeated, key_exists, val_made);
|
||||
data_ary[data_idx + Idx_atr_bgn] = atr_bgn;
|
||||
data_ary[data_idx + Idx_atr_end] = atr_end;
|
||||
data_ary[data_idx + Idx_key_bgn] = key_bgn;
|
||||
data_ary[data_idx + Idx_key_end] = key_end;
|
||||
data_ary[data_idx + Idx_val_bgn] = val_bgn;
|
||||
data_ary[data_idx + Idx_val_end] = val_end;
|
||||
data_ary[data_idx + Idx_eql_pos] = eql_pos;
|
||||
return itm_len++;
|
||||
}
|
||||
public void Set_repeated(int atr_uid) {
|
||||
int atr_utl_idx = (atr_uid * Idx__mult) + Idx_atr_utl;
|
||||
int atr_utl = data_ary[atr_utl_idx];
|
||||
int val_bry_exists = atr_utl & Atr_utl__val_bry_exists;
|
||||
data_ary[atr_utl_idx] = Mwh_atr_itm.Atr_tid__repeat | val_bry_exists;
|
||||
}
|
||||
public static final int
|
||||
Idx_nde_uid = 0
|
||||
, Idx_nde_tid = 1
|
||||
, Idx_atr_utl = 2
|
||||
, Idx_atr_bgn = 3
|
||||
, Idx_atr_end = 4
|
||||
, Idx_key_bgn = 5
|
||||
, Idx_key_end = 6
|
||||
, Idx_val_bgn = 7
|
||||
, Idx_val_end = 8
|
||||
, Idx_eql_pos = 9
|
||||
, Idx__mult = 10
|
||||
;
|
||||
public static final int Text__mult = 2;
|
||||
public static final int Atr_utl__val_bry_exists = 16;
|
||||
}
|
39
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_mgr_tst.java
Normal file
39
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_mgr_tst.java
Normal file
@ -0,0 +1,39 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Mwh_atr_mgr_tst {
|
||||
private final Mwh_atr_mgr_fxt fxt = new Mwh_atr_mgr_fxt();
|
||||
@Test public void Atr_utl_make() {
|
||||
// key="val"
|
||||
fxt.Test_atr_utl_make(Mwh_atr_itm.Qte_tid__qute, Mwh_atr_itm.Mask__valid__y, Mwh_atr_itm.Mask__repeated__n, Mwh_atr_itm.Mask__key_exists__y, Mwh_atr_itm.Mask__val_made__n, 42);
|
||||
// key=val key=v<nowiki/>al
|
||||
fxt.Test_atr_utl_make(Mwh_atr_itm.Qte_tid__none, Mwh_atr_itm.Mask__valid__y, Mwh_atr_itm.Mask__repeated__y, Mwh_atr_itm.Mask__key_exists__y, Mwh_atr_itm.Mask__val_made__y, 120);
|
||||
}
|
||||
}
|
||||
class Mwh_atr_mgr_fxt {
|
||||
public void Test_atr_utl_make(int qte_tid, boolean valid, boolean repeated, boolean key_exists, boolean val_made, int expd) {
|
||||
int atr_utl = Mwh_atr_itm.Calc_atr_utl(qte_tid, valid, repeated, key_exists, val_made);
|
||||
Tfds.Eq_int(expd, atr_utl);
|
||||
Tfds.Eq_int(qte_tid, Mwh_atr_itm.Calc_qte_tid(atr_utl));
|
||||
Tfds.Eq_bool(valid, (atr_utl & Mwh_atr_itm.Mask__valid) == Mwh_atr_itm.Mask__valid);
|
||||
Tfds.Eq_bool(repeated, (atr_utl & Mwh_atr_itm.Mask__repeated) == Mwh_atr_itm.Mask__repeated);
|
||||
Tfds.Eq_bool(key_exists, (atr_utl & Mwh_atr_itm.Mask__key_exists) == Mwh_atr_itm.Mask__key_exists);
|
||||
Tfds.Eq_bool(val_made, (atr_utl & Mwh_atr_itm.Mask__val_made) == Mwh_atr_itm.Mask__val_made);
|
||||
}
|
||||
}
|
457
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser.java
Normal file
457
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser.java
Normal file
@ -0,0 +1,457 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.primitives.*;
|
||||
import gplx.xowa.parsers.xndes.*; // for brys: <nowiki>, <noinclude>, <includeonly>, <onlyinclude>
|
||||
public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATTRIBS_REGEX
|
||||
private static final byte Area__invalid = 0, Area__atr_limbo = 1, Area__key = 2, Area__eql_limbo = 3, Area__val_limbo = 4, Area__val_quote = 5, Area__val_naked = 6;
|
||||
private final Hash_adp_bry repeated_atrs_hash = Hash_adp_bry.ci_a7(); // ASCII:xnde_atrs
|
||||
private final Mwh_atr_mgr atr_mgr = new Mwh_atr_mgr(16);
|
||||
private final Bry_bfr key_bfr = Bry_bfr.new_(), val_bfr = Bry_bfr.new_();
|
||||
private byte area = Area__atr_limbo;
|
||||
private int atr_bgn = -1, key_bgn = -1, key_end = -1, eql_pos = -1, val_bgn = -1, val_end = -1;
|
||||
private byte qte_byte = Byte_ascii.Null;
|
||||
private boolean key_bfr_on = false, val_bfr_on = false, ws_is_before_val = false;
|
||||
private int nde_uid, nde_tid;
|
||||
public Bry_obj_ref Bry_obj() {return bry_ref;} private final Bry_obj_ref bry_ref = Bry_obj_ref.null_();
|
||||
public int Nde_end_tid() {return nde_end_tid;} private int nde_end_tid;
|
||||
public int Parse(Mwh_doc_wkr wkr, int nde_uid, int nde_tid, byte[] src, int src_bgn, int src_end) {
|
||||
this.nde_uid = nde_uid; this.nde_tid = nde_tid;
|
||||
this.nde_end_tid = Mwh_doc_parser.Nde_end_tid__invalid;
|
||||
area = Area__atr_limbo;
|
||||
boolean prv_is_ws = false;
|
||||
int pos = src_bgn;
|
||||
boolean loop = true;
|
||||
while (loop) {
|
||||
if (pos == src_end) {
|
||||
if (area == Area__val_quote) { // quote still open
|
||||
int reset_pos = Bry_find_.Find_fwd(src, Byte_ascii.Space, val_bgn, src_end); // try to find 1st space within quote; EX:"a='b c=d" should try to reset at c=d
|
||||
boolean reset_found = reset_pos != Bry_find_.Not_found;
|
||||
area = Area__invalid; val_end = reset_found ? reset_pos : src_end;
|
||||
Make(src, val_end); // create invalid atr
|
||||
if (reset_found) { // space found; resume from text after space; EX: "a='b c=d"; PAGE:en.w:Aubervilliers DATE:2014-06-25
|
||||
pos = Bry_find_.Find_fwd_while_not_ws(src, reset_pos, src_end); // skip ws
|
||||
atr_bgn = -1;
|
||||
area = Area__atr_limbo;
|
||||
val_bfr.Clear();
|
||||
val_bfr_on = false;
|
||||
ws_is_before_val = false;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
else {
|
||||
if (area == Area__val_limbo) // NOTE: handle dangling "k=" else will be "k"; EX: <a b=> x> <a b>; PAGE:en.s:Notes_by_the_Way/Chapter_2; DATE:2015-01-31
|
||||
area = Area__invalid;
|
||||
if (atr_bgn != -1) { // atr_bgn will be -1 if atrs ends on quoted (EX:"a='b'"); else, pending atr that needs to be processed; EX: "a=b" b wil be in bfr
|
||||
val_end = src_end;
|
||||
Make(src, src_end);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (pos > src_end)
|
||||
break;
|
||||
byte b = src[pos];
|
||||
switch (area) {
|
||||
case Area__atr_limbo: // 1st area after node_name or attribute
|
||||
switch (b) {
|
||||
// gt -> stop iterating
|
||||
case Byte_ascii.Gt:
|
||||
nde_end_tid = Mwh_doc_parser.Nde_end_tid__gt;
|
||||
loop = false;
|
||||
break;
|
||||
// slash -> check for "/>" or " / "
|
||||
case Byte_ascii.Slash:
|
||||
int nxt_pos = pos + 1;
|
||||
if (nxt_pos == src_end) {
|
||||
pos = nxt_pos;
|
||||
return Mwh_doc_parser.Nde_end_tid__invalid;
|
||||
}
|
||||
else if (src[nxt_pos] == Byte_ascii.Gt) {
|
||||
nde_end_tid = Mwh_doc_parser.Nde_end_tid__inline;
|
||||
pos = nxt_pos;
|
||||
loop = false;
|
||||
}
|
||||
else {
|
||||
area = Area__invalid; atr_bgn = pos;
|
||||
}
|
||||
break;
|
||||
// ws -> ignore; skip any ws in atr_limbo; note that once a non-ws char is encountered, it will immediately go into another area
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
|
||||
if (atr_bgn == -1) atr_bgn = pos;
|
||||
break;
|
||||
// alphanum -> enter Area__key
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Colon:
|
||||
area = Area__key;
|
||||
if (atr_bgn == -1) atr_bgn = pos;
|
||||
key_bgn = pos;
|
||||
break;
|
||||
// lt -> check for <nowiki>
|
||||
case Byte_ascii.Lt: // handle "<nowiki>"
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found) {
|
||||
area = Area__invalid;
|
||||
atr_bgn = pos;
|
||||
}
|
||||
else
|
||||
pos = gt_pos; // position after ">"; note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
break;
|
||||
// rest -> invalid
|
||||
default: // quote and other non-valid key characters are invalid until next space; EX: "<span 'key_cannot_be_quoted' id='123'"
|
||||
area = Area__invalid; atr_bgn = pos;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Area__invalid:
|
||||
switch (b) {
|
||||
// ws -> src_end invalid area
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
|
||||
Make(src, pos);
|
||||
area = Area__atr_limbo;
|
||||
break;
|
||||
// rest -> continue eating up invalid chars
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Area__key:
|
||||
switch (b) {
|
||||
// alphanum -> valid key chars
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Colon: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Underline:
|
||||
if (key_bfr_on) key_bfr.Add_byte(b);
|
||||
break;
|
||||
// ws -> src_end key
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
|
||||
area = Area__eql_limbo;
|
||||
key_end = pos;
|
||||
break;
|
||||
// eq -> src_end key; skip Area_eq and go to Area_val_bgn
|
||||
case Byte_ascii.Eq:
|
||||
area = Area__val_limbo;
|
||||
key_end = eql_pos = pos;
|
||||
break;
|
||||
// lt -> check for <nowiki>
|
||||
case Byte_ascii.Lt:
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found) // "<" should not be in key; EX: "ke<y"
|
||||
area = Area__invalid;
|
||||
else {
|
||||
if (!key_bfr_on) {key_bfr.Add_mid(src, key_bgn, pos); key_bfr_on = true;}
|
||||
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
}
|
||||
break;
|
||||
// rest -> enter invalid
|
||||
default:
|
||||
area = Area__invalid;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Area__eql_limbo:
|
||||
switch (b) {
|
||||
// ws -> skip
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip ws
|
||||
if (key_end == -1) { // EX: "a = b"; key_end != -1 b/c 1st \s sets key_end; EX: "a b = c"; key_end
|
||||
val_end = pos - 1;
|
||||
Make(src, pos);
|
||||
area = Area__atr_limbo;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
// eq -> enter Area__eq
|
||||
case Byte_ascii.Eq:
|
||||
eql_pos = pos;
|
||||
area = Area__val_limbo;
|
||||
break;
|
||||
// rest -> make atr and enter limbo
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos: // FUTURE: previous word was key
|
||||
default: // NOTE: added this late; xml_parser was not handling "line start=3" DATE:2013-07-03
|
||||
val_end = pos - 1;
|
||||
Make(src, pos);
|
||||
area = Area__atr_limbo;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case Area__val_limbo:
|
||||
switch (b) {
|
||||
// ws -> skip
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
|
||||
ws_is_before_val = true;
|
||||
break;
|
||||
// quote -> enter Area_val_quote
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos:
|
||||
area = Area__val_quote; qte_byte = b; prv_is_ws = false;
|
||||
val_bgn = pos + 1;
|
||||
break;
|
||||
// alphanum -> enter Area_val_raw
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Colon:
|
||||
case Byte_ascii.Hash:
|
||||
area = Area__val_naked;
|
||||
val_bgn = pos;
|
||||
break;
|
||||
// lt -> check for <nowiki>
|
||||
case Byte_ascii.Lt:
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found)
|
||||
area = Area__invalid;
|
||||
else
|
||||
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
break;
|
||||
// rest -> ignore (?)
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Area__val_quote: { // EX: "'val' " in "key = 'val'"
|
||||
switch (b) {
|
||||
// quote: check if same as opening quote
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos:
|
||||
if (qte_byte == b) { // quote closes val
|
||||
val_end = pos;
|
||||
Make(src, pos + 1); // NOTE: set atr_end *after* quote
|
||||
}
|
||||
else { // quote is just char; EX: title="1 o'clock" or title='The "C" way'
|
||||
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
|
||||
}
|
||||
break;
|
||||
// lt -> check for <nowiki>; EX: <span title='ab<nowiki>c</nowiki>de'>
|
||||
case Byte_ascii.Lt:
|
||||
if (!val_bfr_on) {val_bfr.Add_mid(src, val_bgn, pos); val_bfr_on = true;} // INLINE: val_bfr.init
|
||||
int gt_pos = Xnde_find_gt(src, pos, src_end);
|
||||
if (gt_pos == Bry_find_.Not_found)
|
||||
// area = Area__invalid; // DELETE: 2012-11-13; unpaired < should not mark atr invalid; EX: style='margin:1em<f'
|
||||
val_bfr.Add_byte(Byte_ascii.Lt);
|
||||
else
|
||||
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
|
||||
prv_is_ws = false;
|
||||
break;
|
||||
// ws -> convert all ws to \s; only allow 1 ws at any point in time
|
||||
case Byte_ascii.Nl: case Byte_ascii.Tab: case Byte_ascii.Cr: // REF.MW:Sanitizer.php|decodeTagAttributes $value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
|
||||
case Byte_ascii.Space:
|
||||
if (!val_bfr_on) {val_bfr.Add_mid(src, val_bgn, pos); val_bfr_on = true;} // INLINE: val_bfr.init
|
||||
if (prv_is_ws) {} // noop; only allow one ws at a time; EX: "a b" -> "a b"; "a\n\nb" -> "a b"
|
||||
else {
|
||||
prv_is_ws = true; val_bfr.Add_byte(Byte_ascii.Space);
|
||||
}
|
||||
break;
|
||||
// rest -> add to val
|
||||
default:
|
||||
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Area__val_naked: // no quotes; EX:a=bcd
|
||||
switch (b) {
|
||||
// alphanum -> continue reading
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Bang: case Byte_ascii.Hash: case Byte_ascii.Dollar: case Byte_ascii.Percent:
|
||||
case Byte_ascii.Amp: case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star:
|
||||
case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Slash:
|
||||
case Byte_ascii.Colon: case Byte_ascii.Semic: case Byte_ascii.Gt:
|
||||
case Byte_ascii.Question: case Byte_ascii.At: case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end:
|
||||
case Byte_ascii.Pow: case Byte_ascii.Underline: case Byte_ascii.Tick:
|
||||
case Byte_ascii.Curly_bgn: case Byte_ascii.Pipe: case Byte_ascii.Curly_end: case Byte_ascii.Tilde:
|
||||
break;
|
||||
// ws -> src_end atr
|
||||
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl:
|
||||
val_end = pos;
|
||||
Make(src, pos);
|
||||
break;
|
||||
case Byte_ascii.Eq: // EX:"a= b=c" or "a=b=c"; PAGE:en.w:2013_in_American_television
|
||||
if (ws_is_before_val) { // "a= b=c"; discard 1st and resume at 2nd
|
||||
int old_val_bgn = val_bgn;
|
||||
area = Area__invalid; Make(src, val_bgn); // invalidate cur atr; EX:"a="
|
||||
atr_bgn = key_bgn = old_val_bgn; // reset atr / key to new atr; EX: "b"
|
||||
key_end = pos;
|
||||
area = Area__val_limbo; // set area to val_bgn (basically, put after =)
|
||||
}
|
||||
else // "a=b=c"; discard all
|
||||
area = Area__invalid;
|
||||
break;
|
||||
case Byte_ascii.Lt:
|
||||
val_end = pos;
|
||||
Make(src, pos);
|
||||
--pos; // NOTE: --pos to include "<" as part of next atr; above ws excludes from next atr
|
||||
break;
|
||||
default:
|
||||
area = Area__invalid;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
|
||||
// iterate atrs and notify
|
||||
int len = atr_mgr.Len();
|
||||
int[] data_ary = atr_mgr.Data_ary();
|
||||
byte[][] text_ary = atr_mgr.Text_ary();
|
||||
for (int j = 0; j < len; ++j) {
|
||||
int itm_idx = j * Mwh_atr_mgr.Idx__mult;
|
||||
byte[] key_bry = text_ary[j * Mwh_atr_mgr.Text__mult];
|
||||
byte[] val_bry_manual = null;
|
||||
int atr_utl = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_utl];
|
||||
boolean atr_valid = (atr_utl & Mwh_atr_itm.Mask__valid) == Mwh_atr_itm.Mask__valid;
|
||||
boolean repeated = (atr_utl & Mwh_atr_itm.Mask__repeated) == Mwh_atr_itm.Mask__repeated;
|
||||
boolean key_exists = (atr_utl & Mwh_atr_itm.Mask__key_exists) == Mwh_atr_itm.Mask__key_exists;
|
||||
boolean val_made = (atr_utl & Mwh_atr_itm.Mask__val_made) == Mwh_atr_itm.Mask__val_made;
|
||||
if (val_made)
|
||||
val_bry_manual = text_ary[(j * Mwh_atr_mgr.Text__mult) + 1];
|
||||
wkr.On_atr_each(this, src, nde_tid, atr_valid, repeated, key_exists, key_bry, val_bry_manual, data_ary, itm_idx);
|
||||
}
|
||||
atr_mgr.Clear();
|
||||
repeated_atrs_hash.Clear();
|
||||
|
||||
return pos;
|
||||
}
|
||||
public int Xnde_find_gt_find(byte[] src, int pos, int end) {
|
||||
bry_ref.Val_(null);
|
||||
byte b = src[pos];
|
||||
if (b == Byte_ascii.Slash && pos + 1 < end) { // if </ move pos to after /
|
||||
++pos;
|
||||
b = src[pos];
|
||||
}
|
||||
int gt_pos = Bry_find_.Find_fwd(src, Byte_ascii.Gt, pos, end); if (gt_pos == Bry_.NotFound) return Bry_find_.Not_found;
|
||||
byte[] bry = (byte[])xnde_hash.Get_by_mid(src, pos, gt_pos);
|
||||
bry_ref.Val_(bry);
|
||||
return bry == null ? Bry_find_.Not_found : bry.length + pos;
|
||||
}
|
||||
private int Xnde_find_gt(byte[] src, int lt_pos, int end) {
|
||||
int pos = lt_pos + 1;
|
||||
byte b = src[pos];
|
||||
if (b == Byte_ascii.Slash && pos + 1 < end) {
|
||||
++pos;
|
||||
b = src[pos];
|
||||
}
|
||||
int match_pos = Xnde_find_gt_find(src, pos, end);
|
||||
if (match_pos == Bry_find_.Not_found) {return Bry_find_.Not_found;}
|
||||
boolean slash_found = false;
|
||||
for (int i = match_pos; i < end; i++) {
|
||||
b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Gt: return i;
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip any ws
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
if (slash_found) {return Bry_find_.Not_found;} // only allow one slash
|
||||
else slash_found = true;
|
||||
break;
|
||||
default:
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
}
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
private void Make(byte[] src, int atr_end) {
|
||||
// calc final values for atr
|
||||
boolean key_exists = false;
|
||||
byte[] key_bry = null, val_bry = null;
|
||||
boolean atr_valid = true;
|
||||
if (area != Area__invalid) {
|
||||
if (key_bgn != -1 && val_bgn != -1) // key && val exists; EX: "<input id='123'>"
|
||||
key_exists = true;
|
||||
else { // not a pair; EX: "<input checked>"
|
||||
if (key_end == -1) key_end = val_end; // NOTE: key_end == -1 when eos; EX: "a" would have key_bgn = 0; key_end = -1; val_end = 1 DATE:2014-07-03
|
||||
val_bgn = val_end = -1;
|
||||
}
|
||||
key_bry = key_bfr_on ? key_bfr.Xto_bry_and_clear() : Bry_.Mid(src, key_bgn, key_end); // always make key_bry; needed for repeated_atrs as well as key_tid
|
||||
if (val_bfr_on) val_bry = val_bfr.Xto_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
atr_valid = false;
|
||||
key_bry = Bry_.Empty;
|
||||
key_bfr.Clear();
|
||||
if (val_bgn == -1) val_bgn = atr_bgn;
|
||||
}
|
||||
int qte_tid = Mwh_atr_itm.Mask__qte__none;
|
||||
if (qte_byte != Byte_ascii.Null)
|
||||
qte_tid = qte_byte == Byte_ascii.Quote ? Mwh_atr_itm.Mask__qte_qute : Mwh_atr_itm.Mask__qte__apos;
|
||||
int atr_uid = atr_mgr.Add(nde_uid, nde_tid, atr_valid, false, key_exists, atr_bgn, atr_end, key_bgn, key_end, key_bry, eql_pos, qte_tid, val_bgn, val_end, val_bry);
|
||||
|
||||
// handle repeated atrs
|
||||
if (atr_valid) {
|
||||
int repeated_uid = repeated_atrs_hash.Get_as_int_or(key_bry, -1);
|
||||
if (repeated_uid != -1) {
|
||||
repeated_atrs_hash.Del(key_bry);
|
||||
atr_mgr.Set_repeated(repeated_uid);
|
||||
}
|
||||
repeated_atrs_hash.Add_bry_int(key_bry, atr_uid);
|
||||
}
|
||||
|
||||
// reset temp variables
|
||||
area = Area__atr_limbo; qte_byte = Byte_ascii.Null;
|
||||
atr_bgn = key_bgn = val_bgn = key_end = val_end = eql_pos = -1;
|
||||
key_bfr_on = val_bfr_on = ws_is_before_val = false;
|
||||
}
|
||||
private static final Hash_adp_bry xnde_hash = Hash_adp_bry.ci_a7()
|
||||
.Add_bry_bry(Xop_xnde_tag_.Tag_nowiki.Name_bry())
|
||||
.Add_bry_bry(Xop_xnde_tag_.Tag_noinclude.Name_bry())
|
||||
.Add_bry_bry(Xop_xnde_tag_.Tag_includeonly.Name_bry())
|
||||
.Add_bry_bry(Xop_xnde_tag_.Tag_onlyinclude.Name_bry())
|
||||
;
|
||||
public static final int Key_tid__unknown = -1;
|
||||
}
|
99
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser_fxt.java
Normal file
99
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser_fxt.java
Normal file
@ -0,0 +1,99 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Mwh_atr_parser_fxt {
|
||||
private final Bry_bfr expd_bfr = Bry_bfr.new_(), actl_bfr = Bry_bfr.new_();
|
||||
private final Mwh_atr_parser parser = new Mwh_atr_parser();
|
||||
private final Mwh_doc_wkr__atr_bldr wkr = new Mwh_doc_wkr__atr_bldr();
|
||||
public Mwh_atr_itm Make_pair(String key, String val) {return new Mwh_atr_itm(Bry_.Empty, Bool_.Y, Bool_.N, Bool_.Y, -1, -1, -1, -1, Bry_.new_u8(key) , -1, -1, Bry_.new_u8(val) , -1, -1);}
|
||||
public Mwh_atr_itm Make_name(String key) {return new Mwh_atr_itm(Bry_.Empty, Bool_.Y, Bool_.N, Bool_.N, -1, -1, -1, -1, Bry_.new_u8(key) , -1, -1, null , -1, -1);}
|
||||
public Mwh_atr_itm Make_fail(int bgn, int end) {return new Mwh_atr_itm(Bry_.Empty, Bool_.N, Bool_.N, Bool_.N, bgn, end, -1, -1, null , -1, -1, null , -1, -1);}
|
||||
public void Test_val_as_int(String raw, int expd) {
|
||||
byte[] src = Bry_.new_u8(raw);
|
||||
Mwh_atr_itm itm = new Mwh_atr_itm(src, true, false, false, 0, src.length, -1, -1, null, 0, src.length, src, -1, -1);
|
||||
Tfds.Eq_int(expd, itm.Val_as_int_or(-1));
|
||||
}
|
||||
public void Test_parse(String raw, Mwh_atr_itm... expd) {
|
||||
Mwh_atr_itm[] actl = Exec_parse(raw);
|
||||
Test_print(expd, actl);
|
||||
}
|
||||
private Mwh_atr_itm[] Exec_parse(String raw) {
|
||||
byte[] bry = Bry_.new_u8(raw);
|
||||
parser.Parse(wkr, -1, -1, bry, 0, bry.length);
|
||||
return wkr.To_atr_ary();
|
||||
}
|
||||
public void Test_print(Mwh_atr_itm[] expd_ary, Mwh_atr_itm[] actl_ary) {
|
||||
int expd_len = expd_ary.length;
|
||||
int actl_len = actl_ary.length;
|
||||
int len = expd_len > actl_len ? expd_len : actl_len;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
To_bfr(expd_bfr, i < expd_len ? expd_ary[i] : null, actl_bfr, i < actl_len ? actl_ary[i] : null);
|
||||
}
|
||||
Tfds.Eq_str_lines(expd_bfr.Xto_str_and_clear(), actl_bfr.Xto_str_and_clear());
|
||||
}
|
||||
private void To_bfr(Bry_bfr expd_bfr, Mwh_atr_itm expd_itm, Bry_bfr actl_bfr, Mwh_atr_itm actl_itm) {
|
||||
To_bfr__main(expd_bfr, expd_itm);
|
||||
To_bfr__main(actl_bfr, actl_itm);
|
||||
To_bfr__head(expd_bfr, expd_itm);
|
||||
To_bfr__head(actl_bfr, actl_itm);
|
||||
if (expd_itm.Atr_bgn() != -1) {
|
||||
To_bfr__atr_rng(expd_bfr, expd_itm);
|
||||
To_bfr__atr_rng(actl_bfr, actl_itm);
|
||||
}
|
||||
}
|
||||
private void To_bfr__head(Bry_bfr bfr, Mwh_atr_itm itm) {
|
||||
if (itm == null) return;
|
||||
bfr.Add_str_a7("head:").Add_yn(itm.Valid()).Add_byte_semic().Add_yn(itm.Repeated()).Add_byte_semic().Add_yn(itm.Key_exists()).Add_byte_nl();
|
||||
}
|
||||
private void To_bfr__main(Bry_bfr bfr, Mwh_atr_itm itm) {
|
||||
if (itm == null) return;
|
||||
if (itm.Valid()) {
|
||||
bfr.Add_str_a7("key:").Add(itm.Key_bry()).Add_byte_nl();
|
||||
bfr.Add_str_a7("val:").Add(itm.Val_as_bry()).Add_byte_nl();
|
||||
}
|
||||
// else
|
||||
// To_bfr__atr_rng(bfr, itm);
|
||||
}
|
||||
private void To_bfr__atr_rng(Bry_bfr bfr, Mwh_atr_itm itm) {
|
||||
if (itm == null) return;
|
||||
bfr.Add_str_a7("rng:").Add_int_variable(itm.Atr_bgn()).Add_byte_semic().Add_int_variable(itm.Atr_end()).Add_byte_nl();
|
||||
}
|
||||
}
|
||||
class Mwh_doc_wkr__atr_bldr implements Mwh_doc_wkr {
|
||||
private final List_adp list = List_adp_.new_();
|
||||
public Hash_adp_bry Nde_regy() {return null;}
|
||||
public void On_atr_each(Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] data_ary, int itm_idx) {
|
||||
int atr_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_bgn];
|
||||
int atr_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_end];
|
||||
int key_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_key_bgn];
|
||||
int key_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_key_end];
|
||||
int val_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
|
||||
int val_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
|
||||
int eql_pos = data_ary[itm_idx + Mwh_atr_mgr.Idx_eql_pos];
|
||||
int qte_tid = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_utl];
|
||||
qte_tid = Mwh_atr_itm.Calc_qte_tid(qte_tid);
|
||||
Mwh_atr_itm atr = new Mwh_atr_itm(src, valid, repeated, key_exists, atr_bgn, atr_end, key_bgn, key_end, key_bry, val_bgn, val_end, val_bry_manual, eql_pos, qte_tid);
|
||||
list.Add(atr);
|
||||
}
|
||||
public void On_txt_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public void On_nde_head_bgn(Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end) {}
|
||||
public void On_nde_head_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline) {}
|
||||
public void On_nde_tail_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public Mwh_atr_itm[] To_atr_ary() {return (Mwh_atr_itm[])list.To_ary_and_clear(Mwh_atr_itm.class);}
|
||||
}
|
63
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser_tst.java
Normal file
63
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_atr_parser_tst.java
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Mwh_atr_parser_tst {
|
||||
private final Mwh_atr_parser_fxt fxt = new Mwh_atr_parser_fxt();
|
||||
@Test public void Pair__quote__double() {fxt.Test_parse("a=\"b\"" , fxt.Make_pair("a" , "b"));}
|
||||
@Test public void Pair__quote__single() {fxt.Test_parse("a='b'" , fxt.Make_pair("a" , "b"));}
|
||||
@Test public void Pair__quote__none() {fxt.Test_parse("a=b" , fxt.Make_pair("a" , "b"));}
|
||||
@Test public void Pair__empty() {fxt.Test_parse("a=''" , fxt.Make_pair("a" , ""));}
|
||||
@Test public void Pair__key_w_underline() {fxt.Test_parse("a_b=c" , fxt.Make_pair("a_b" , "c"));}
|
||||
|
||||
@Test public void Name__quote__none() {fxt.Test_parse("b" , fxt.Make_name("b"));}
|
||||
@Test public void Name__ws() {fxt.Test_parse(" b " , fxt.Make_name("b"));} // PURPOSE:discovered while writing test for ref's "lower-alpha" DATE:2014-07-03
|
||||
@Test public void Name__mult() {fxt.Test_parse("a b1 c" , fxt.Make_name("a"), fxt.Make_name("b1"), fxt.Make_name("c"));}
|
||||
|
||||
@Test public void Fail__key_w_plus() {fxt.Test_parse("a+b" , fxt.Make_fail(0, 3));}
|
||||
@Test public void Fail__key_w_plus__many() {fxt.Test_parse("a+b c=d" , fxt.Make_fail(0, 3) , fxt.Make_pair("c", "d"));}
|
||||
@Test public void Fail__val_w_plus() {fxt.Test_parse("a=b+c" , fxt.Make_fail(0, 5));}
|
||||
@Test public void Fail__recover() {fxt.Test_parse("* a=b" , fxt.Make_fail(0, 1) , fxt.Make_pair("a", "b"));} // PURPOSE: * is invalid, but should not stop parsing of a=b
|
||||
@Test public void Fail__incomplete() {fxt.Test_parse("a= c=d" , fxt.Make_fail(0, 3) , fxt.Make_pair("c", "d"));} // PURPOSE: discard xatr if incomplete and followed by valid atr; PAGE:en.w:2013_in_American_television DATE:2014-09-25
|
||||
@Test public void Fail__incomplete_2() {fxt.Test_parse("a=c=d" , fxt.Make_fail(0, 5));} // PURPOSE: variation of above; per MW regex, missing space invalidates entire attribute; DATE:2014-09-25
|
||||
@Test public void Fail__incomplete_pair() {fxt.Test_parse("a= b=" , fxt.Make_fail(0, 3) , fxt.Make_fail(3, 5));} // PURPOSE: "b=" should be invalid not a kv of "b" = "b"; PAGE:en.s:Notes_by_the_Way/Chapter_2; DATE:2015-01-31
|
||||
|
||||
@Test public void Dangling_eos() {fxt.Test_parse("a='b' c='d" , fxt.Make_pair("a", "b") , fxt.Make_fail(5, 10));} // PURPOSE: handle dangling quote at eos; PAGE:en.w:Aubervilliers DATE:2014-06-25
|
||||
@Test public void Dangling_bos() {fxt.Test_parse("a='b c=d" , fxt.Make_fail(0, 4) , fxt.Make_pair("c", "d"));}// PURPOSE: handle dangling quote at bos; resume at next valid atr; PAGE:en.w:Aubervilliers DATE:2014-06-25
|
||||
|
||||
@Test public void Ws__ini() {fxt.Test_parse(" a='b'" , fxt.Make_pair("a", "b").Atr_rng(0, 6));}
|
||||
@Test public void Ws__end() {fxt.Test_parse(" a='b' c='d'" , fxt.Make_pair("a", "b").Atr_rng(0, 6), fxt.Make_pair("c", "d").Atr_rng(6, 12));}
|
||||
@Test public void Ws() {fxt.Test_parse("a = 'b'" , fxt.Make_pair("a", "b"));} // PURPOSE: fix wherein multiple space was causing "a=a"; PAGE:fr.s:La_Sculpture_dans_les_cimetières_de_Paris/Père-Lachaise; DATE:2014-01-18
|
||||
|
||||
@Test public void Many__quote__apos() {fxt.Test_parse("a='b' c='d' e='f'" , fxt.Make_pair("a", "b"), fxt.Make_pair("c", "d"), fxt.Make_pair("e", "f"));}
|
||||
@Test public void Many__naked() {fxt.Test_parse("a=b c=d e=f" , fxt.Make_pair("a", "b"), fxt.Make_pair("c", "d"), fxt.Make_pair("e", "f"));}
|
||||
|
||||
@Test public void Val__ws__nl() {fxt.Test_parse("a='b\nc'" , fxt.Make_pair("a", "b c"));}
|
||||
@Test public void Val__ws__mult() {fxt.Test_parse("a='b c'" , fxt.Make_pair("a", "b c"));}
|
||||
@Test public void Val__ws__mult_mult() {fxt.Test_parse("a='b c d'" , fxt.Make_pair("a", "b c d"));} // PURPOSE: fix wherein 1st-gobble gobbled rest of spaces (was b cd)
|
||||
@Test public void Val__apos() {fxt.Test_parse("a=\"b c'd\"" , fxt.Make_pair("a", "b c'd"));} // PURPOSE: fix wherein apos was gobbled up; PAGE:en.s:Alice's_Adventures_in_Wonderland; DATE:2013-11-22
|
||||
@Test public void Val__apos_2() {fxt.Test_parse("a=\"b'c d\"" , fxt.Make_pair("a", "b'c d"));} // PURPOSE: fix wherein apos was causing "'b'c d"; PAGE:en.s:Grimm's_Household_Tales,_Volume_1; DATE:2013-12-22
|
||||
|
||||
@Test public void Nowiki__val() {fxt.Test_parse("a=<nowiki>'b'</nowiki>" , fxt.Make_pair("a", "b").Atr_rng(0, 13));}
|
||||
@Test public void Nowiki__key() {fxt.Test_parse("<nowiki>a=b</nowiki>" , fxt.Make_pair("a", "b").Atr_rng(8, 11));}
|
||||
@Test public void Nowiki__key_2() {fxt.Test_parse("a<nowiki>b</nowiki>c=d" , fxt.Make_pair("abc", "d").Atr_rng(0, 22));}
|
||||
@Test public void Nowiki__key_3() {fxt.Test_parse("a<nowiki>=</nowiki>\"b\"" , fxt.Make_pair("a", "b").Atr_rng(0, 22));} // EX:fr.w:{{Portail|Transpédia|Californie}}
|
||||
@Test public void Nowiki__quote() {fxt.Test_parse("a=\"b<nowiki>c</nowiki>d<nowiki>e</nowiki>f\"", fxt.Make_pair("a", "bcdef"));}
|
||||
|
||||
@Test public void Val__as_int() {fxt.Test_val_as_int("-123" , -123);}
|
||||
}
|
25
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_itm.java
Normal file
25
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_itm.java
Normal file
@ -0,0 +1,25 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Mwh_doc_itm {
|
||||
public Mwh_doc_itm(int itm_tid, int nde_tid, byte[] itm_bry) {this.itm_tid = itm_tid; this.itm_bry = itm_bry; this.nde_tid = nde_tid;}
|
||||
public int Itm_tid() {return itm_tid;} private final int itm_tid;
|
||||
public byte[] Itm_bry() {return itm_bry;} private final byte[] itm_bry;
|
||||
public int Nde_tid() {return nde_tid;} private final int nde_tid;
|
||||
public static final int Itm_tid__txt = 0, Itm_tid__nde_head = 1, Itm_tid__nde_tail = 2, Itm_tid__comment = 3;
|
||||
}
|
62
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_mgr.java
Normal file
62
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_mgr.java
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Mwh_doc_mgr {
|
||||
private final int data_max_orig;
|
||||
public Mwh_doc_mgr(int max) {
|
||||
this.data_max_orig = max * Idx__mult;
|
||||
this.Max_(max);
|
||||
}
|
||||
public int Len() {return itm_len;} private int itm_len;
|
||||
public int[] Data_ary() {return data_ary;} private int[] data_ary; private int data_max;
|
||||
private void Max_(int len) {
|
||||
this.data_max = len * Idx__mult;
|
||||
this.data_ary = new int[data_max];
|
||||
this.itm_len = 0;
|
||||
}
|
||||
public void Clear() {
|
||||
if (data_max == data_max_orig)
|
||||
itm_len = 0;
|
||||
else
|
||||
Max_(data_max_orig / Idx__mult);
|
||||
}
|
||||
public int Add(int dom_tid, int src_bgn, int src_end) {
|
||||
int data_idx = itm_len * Idx__mult;
|
||||
if (data_idx == data_max) {
|
||||
int new_data_max = data_max == 0 ? Idx__mult : data_max * 2;
|
||||
int[] new_data_ary = new int[new_data_max];
|
||||
Int_.Ary_copy_to(data_ary, data_max, data_ary);
|
||||
this.data_ary = new_data_ary;
|
||||
this.data_max = new_data_max;
|
||||
}
|
||||
int dom_uid = itm_len;
|
||||
data_ary[data_idx + Idx_dom_uid] = dom_uid;
|
||||
data_ary[data_idx + Idx_dom_tid] = dom_tid;
|
||||
data_ary[data_idx + Idx_src_bgn] = src_bgn;
|
||||
data_ary[data_idx + Idx_src_end] = src_end;
|
||||
++itm_len;
|
||||
return dom_uid;
|
||||
}
|
||||
public static final int
|
||||
Idx_dom_uid = 0
|
||||
, Idx_dom_tid = 1
|
||||
, Idx_src_bgn = 2
|
||||
, Idx_src_end = 3
|
||||
, Idx__mult = 4
|
||||
;
|
||||
}
|
191
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser.java
Normal file
191
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser.java
Normal file
@ -0,0 +1,191 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.primitives.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
public class Mwh_doc_parser {
|
||||
private final Mwh_doc_mgr dom_mgr = new Mwh_doc_mgr(16);
|
||||
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
||||
private final List_adp nde_stack = List_adp_.new_();
|
||||
private byte[] src; private int src_end;
|
||||
private Mwh_doc_wkr wkr;
|
||||
private Hash_adp_bry nde_regy;
|
||||
private int txt_bgn, nde_uid;
|
||||
private Xop_xnde_tag cur_nde; private int cur_nde_tid;
|
||||
public void Parse(Mwh_doc_wkr wkr, byte[] src, int src_bgn, int src_end) {
|
||||
this.wkr = wkr; this.src = src; this.src_end = src_end;
|
||||
this.nde_regy = wkr.Nde_regy();
|
||||
nde_stack.Clear();
|
||||
int pos = txt_bgn = src_bgn;
|
||||
nde_uid = cur_nde_tid = -1;
|
||||
cur_nde = null;
|
||||
while (pos < src_end) {
|
||||
if (src[pos] == Byte_ascii.Angle_bgn) // "<": possible nde start
|
||||
pos = Parse_nde(pos);
|
||||
else // else, just increment
|
||||
++pos;
|
||||
}
|
||||
if (src_end != txt_bgn) wkr.On_txt_end(this, src, cur_nde_tid, txt_bgn, pos);
|
||||
}
|
||||
private int Parse_nde(int pos) {
|
||||
int nde_end_tid = Nde_end_tid__invalid;
|
||||
boolean nde_is_head = true;
|
||||
int nde_bgn = pos;
|
||||
++pos;
|
||||
int name_bgn = pos;
|
||||
int name_end = pos;
|
||||
while (pos < src_end) {
|
||||
byte b = src[pos];
|
||||
switch (b) {
|
||||
// valid chars for name
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Dot: case Byte_ascii.Dash: case Byte_ascii.Underline: case Byte_ascii.Colon: // XML allowed punctuation
|
||||
case Byte_ascii.Dollar:// MW: handles <br$2>;
|
||||
++pos;
|
||||
break;
|
||||
// comment check
|
||||
case Byte_ascii.Bang:
|
||||
boolean comment_found = false;
|
||||
if (name_bgn == pos && Bry_.Eq(src, pos + 1, pos + 3, Comment_bgn)) {
|
||||
int comment_end_pos = Bry_find_.Find_fwd(src, Comment_end, pos + 3);
|
||||
if (comment_end_pos != Bry_find_.Not_found) {
|
||||
nde_end_tid = Nde_end_tid__comment;
|
||||
pos = comment_end_pos + 3;
|
||||
comment_found = true;
|
||||
}
|
||||
}
|
||||
if (!comment_found)
|
||||
return pos;
|
||||
else
|
||||
break;
|
||||
// invalid char; not a node; treat as text; EX: "<!@#", "< /b>"
|
||||
default:
|
||||
return pos;
|
||||
// slash -> either "</b>" or "<b/>"
|
||||
case Byte_ascii.Slash:
|
||||
if (name_bgn == pos) { // "</"; EX: "</b>"
|
||||
nde_is_head = false;
|
||||
++name_bgn;
|
||||
++pos;
|
||||
continue;
|
||||
}
|
||||
else { // check for "/>"; NOTE: <pre/a>, <pre//> are allowed
|
||||
name_end = pos;
|
||||
++pos;
|
||||
if (pos == src_end) return pos; // end of doc; treat as text; EX: "<b/EOS"
|
||||
if (src[pos] == Byte_ascii.Gt) {
|
||||
nde_end_tid = Nde_end_tid__inline;
|
||||
++pos;
|
||||
}
|
||||
else
|
||||
nde_end_tid = Nde_end_tid__slash;
|
||||
}
|
||||
break;
|
||||
// stops "name"
|
||||
case Byte_ascii.Gt:
|
||||
nde_end_tid = Nde_end_tid__gt;
|
||||
name_end = pos;
|
||||
++pos;
|
||||
break;
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
nde_end_tid = Nde_end_tid__ws;
|
||||
name_end = pos;
|
||||
break;
|
||||
case Byte_ascii.Backslash: // MW: allows "<br\>" -> "<br/>"
|
||||
nde_end_tid = Nde_end_tid__backslash;
|
||||
name_end = pos;
|
||||
break;
|
||||
}
|
||||
if (nde_end_tid != Nde_end_tid__invalid) break;
|
||||
}
|
||||
// get name
|
||||
Xop_xnde_tag nde_itm = null;
|
||||
if (nde_end_tid != Nde_end_tid__comment) {
|
||||
nde_itm = (Xop_xnde_tag)nde_regy.Get_by_mid(src, name_bgn, name_end);
|
||||
if (nde_itm == null) return pos; // not a known nde; exit
|
||||
}
|
||||
if (txt_bgn != nde_bgn) { // notify txt
|
||||
wkr.On_txt_end(this, src, cur_nde_tid, txt_bgn, nde_bgn);
|
||||
txt_bgn = pos;
|
||||
}
|
||||
if (nde_is_head) {
|
||||
wkr.On_nde_head_bgn(this, src, cur_nde_tid, name_bgn, name_end);
|
||||
switch (nde_end_tid) {
|
||||
case Nde_end_tid__comment:
|
||||
wkr.On_comment_end(this, src, cur_nde_tid, nde_bgn, pos);
|
||||
break;
|
||||
case Nde_end_tid__ws:
|
||||
case Nde_end_tid__slash:
|
||||
case Nde_end_tid__backslash: // handled above
|
||||
pos = atr_parser.Parse(wkr, nde_uid, cur_nde_tid, src, pos, src_end);
|
||||
nde_end_tid = atr_parser.Nde_end_tid();
|
||||
txt_bgn = pos;
|
||||
break;
|
||||
}
|
||||
switch (nde_end_tid) {
|
||||
case Nde_end_tid__inline:
|
||||
wkr.On_nde_head_end(this, src, cur_nde_tid, nde_bgn, pos, Bool_.Y);
|
||||
txt_bgn = pos;
|
||||
break;
|
||||
case Nde_end_tid__gt:
|
||||
wkr.On_nde_head_end(this, src, cur_nde_tid, nde_bgn, pos, Bool_.N);
|
||||
txt_bgn = pos;
|
||||
if ( nde_itm != null
|
||||
&& !nde_itm.Single_only_html() // ignore <b>
|
||||
&& (cur_nde == null || !cur_nde.Xtn()) // <pre> ignores inner
|
||||
) {
|
||||
if (cur_nde != null)
|
||||
nde_stack.Add(cur_nde);
|
||||
this.cur_nde = nde_itm;
|
||||
this.cur_nde_tid = nde_itm.Id();
|
||||
}
|
||||
break;
|
||||
case Nde_end_tid__ws:
|
||||
case Nde_end_tid__slash:
|
||||
case Nde_end_tid__backslash: break; // handled above
|
||||
}
|
||||
nde_uid = dom_mgr.Add(Mwh_doc_itm.Itm_tid__nde_head, nde_bgn, pos);
|
||||
}
|
||||
else {
|
||||
switch (nde_end_tid) {
|
||||
case Nde_end_tid__gt:
|
||||
wkr.On_nde_tail_end(this, src, cur_nde_tid, nde_bgn, pos);
|
||||
txt_bgn = pos;
|
||||
if (nde_itm.Id() == cur_nde_tid) {
|
||||
cur_nde = (Xop_xnde_tag)List_adp_.Pop_or(nde_stack, null);
|
||||
cur_nde_tid = cur_nde == null ? -1 : cur_nde.Id();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
public static final int Nde_end_tid__invalid = 0, Nde_end_tid__gt = 1, Nde_end_tid__ws = 2, Nde_end_tid__inline = 3, Nde_end_tid__slash = 4, Nde_end_tid__backslash = 5, Nde_end_tid__comment = 6;
|
||||
private static final byte[] Comment_bgn = Bry_.new_a7("--"), Comment_end = Bry_.new_a7("-->");
|
||||
}
|
73
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser_fxt.java
Normal file
73
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser_fxt.java
Normal file
@ -0,0 +1,73 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Mwh_doc_parser_fxt {
|
||||
private final Bry_bfr expd_bfr = Bry_bfr.new_(), actl_bfr = Bry_bfr.new_();
|
||||
private final Mwh_doc_parser parser = new Mwh_doc_parser();
|
||||
private final Mwh_doc_wkr__itm_bldr wkr = new Mwh_doc_wkr__itm_bldr();
|
||||
public Mwh_doc_itm Make_txt (String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt , -1, Bry_.new_u8(raw));}
|
||||
public Mwh_doc_itm Make_txt (String raw, int nde_tid) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt , nde_tid, Bry_.new_u8(raw));}
|
||||
public Mwh_doc_itm Make_comment (String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__comment , -1, Bry_.new_u8(raw));}
|
||||
public Mwh_doc_itm Make_nde_head(String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_head , -1, Bry_.new_u8(raw));}
|
||||
public Mwh_doc_itm Make_nde_tail(String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_tail , -1, Bry_.new_u8(raw));}
|
||||
public void Test_parse(String raw, Mwh_doc_itm... expd) {
|
||||
Mwh_doc_itm[] actl = Exec_parse(raw);
|
||||
Test_print(expd, actl);
|
||||
}
|
||||
public Mwh_doc_itm[] Exec_parse(String raw) {
|
||||
byte[] bry = Bry_.new_u8(raw);
|
||||
parser.Parse(wkr, bry, 0, bry.length);
|
||||
return wkr.To_atr_ary();
|
||||
}
|
||||
public void Test_print(Mwh_doc_itm[] expd_ary, Mwh_doc_itm[] actl_ary) {
|
||||
int expd_len = expd_ary.length;
|
||||
int actl_len = actl_ary.length;
|
||||
int len = expd_len > actl_len ? expd_len : actl_len;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
To_bfr(expd_bfr, i < expd_len ? expd_ary[i] : null, actl_bfr, i < actl_len ? actl_ary[i] : null);
|
||||
}
|
||||
Tfds.Eq_str_lines(expd_bfr.Xto_str_and_clear(), actl_bfr.Xto_str_and_clear());
|
||||
}
|
||||
private void To_bfr(Bry_bfr expd_bfr, Mwh_doc_itm expd_itm, Bry_bfr actl_bfr, Mwh_doc_itm actl_itm) {
|
||||
To_bfr__main(expd_bfr, expd_itm); To_bfr__main(actl_bfr, actl_itm);
|
||||
if (expd_itm != null && expd_itm.Nde_tid() != -1) {
|
||||
To_bfr__nde_tid(expd_bfr, expd_itm); To_bfr__nde_tid(actl_bfr, actl_itm);
|
||||
}
|
||||
}
|
||||
private void To_bfr__main(Bry_bfr bfr, Mwh_doc_itm itm) {
|
||||
if (itm == null) return;
|
||||
bfr.Add_str_a7("itm_tid:").Add_int_variable(itm.Itm_tid()).Add_byte_nl();
|
||||
bfr.Add_str_a7("txt:").Add(itm.Itm_bry()).Add_byte_nl();
|
||||
}
|
||||
private void To_bfr__nde_tid(Bry_bfr bfr, Mwh_doc_itm itm) {
|
||||
if (itm == null) return;
|
||||
bfr.Add_str_a7("nde_tid:").Add_int_variable(itm.Nde_tid()).Add_byte_nl();
|
||||
}
|
||||
}
|
||||
class Mwh_doc_wkr__itm_bldr implements Mwh_doc_wkr {
|
||||
private final List_adp list = List_adp_.new_();
|
||||
public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw();
|
||||
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {}
|
||||
public void On_txt_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
|
||||
public void On_nde_head_bgn (Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end) {}
|
||||
public void On_nde_head_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_head , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
|
||||
public void On_nde_tail_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_tail , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
|
||||
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__comment , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
|
||||
|
||||
public Mwh_doc_itm[] To_atr_ary() {return (Mwh_doc_itm[])list.To_ary_and_clear(Mwh_doc_itm.class);}
|
||||
}
|
60
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser_tst.java
Normal file
60
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser_tst.java
Normal file
@ -0,0 +1,60 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.parsers.xndes.*;
|
||||
public class Mwh_doc_parser_tst {
|
||||
private final Mwh_doc_parser_fxt fxt = new Mwh_doc_parser_fxt();
|
||||
@Test public void Text__basic() {fxt.Test_parse("abc" , fxt.Make_txt("abc"));}
|
||||
@Test public void Comment() {fxt.Test_parse("a<!--b-->c" , fxt.Make_txt("a"), fxt.Make_comment("<!--b-->"), fxt.Make_txt("c"));}
|
||||
@Test public void Fail__inline_eos() {fxt.Test_parse("a<b/" , fxt.Make_txt("a<b/"));}
|
||||
@Test public void Fail__unknown() {fxt.Test_parse("a<bc/>d" , fxt.Make_txt("a<bc/>d"));}
|
||||
@Test public void Node__inline() {fxt.Test_parse("a<b/>c" , fxt.Make_txt("a"), fxt.Make_nde_head("<b/>") , fxt.Make_txt("c"));}
|
||||
@Test public void Node__pair() {fxt.Test_parse("a<b>c</b>d" , fxt.Make_txt("a"), fxt.Make_nde_head("<b>") , fxt.Make_txt("c"), fxt.Make_nde_tail("</b>"), fxt.Make_txt("d"));}
|
||||
@Test public void Atrs__pair() {
|
||||
fxt.Test_parse("<div id='1'>a</div>"
|
||||
, fxt.Make_nde_head("<div id='1'>")
|
||||
, fxt.Make_txt("a")
|
||||
, fxt.Make_nde_tail("</div>"));
|
||||
}
|
||||
@Test public void Atrs__inline() {
|
||||
fxt.Test_parse("a<div id='1'/>b"
|
||||
, fxt.Make_txt("a")
|
||||
, fxt.Make_nde_head("<div id='1'/>")
|
||||
, fxt.Make_txt("b"));
|
||||
}
|
||||
@Test public void Node__single_only() {
|
||||
fxt.Test_parse("<b>a<br>b</b>c"
|
||||
, fxt.Make_nde_head("<b>")
|
||||
, fxt.Make_txt("a", Xop_xnde_tag_.Tid_b)
|
||||
, fxt.Make_nde_head("<br>")
|
||||
, fxt.Make_txt("b", Xop_xnde_tag_.Tid_b) // <b> not <br>
|
||||
, fxt.Make_nde_tail("</b>")
|
||||
, fxt.Make_txt("c", Xop_xnde_tag_.Tid__null)
|
||||
);
|
||||
}
|
||||
@Test public void Node__pre() {
|
||||
fxt.Test_parse("<pre>a<div>b</pre>c"
|
||||
, fxt.Make_nde_head("<pre>")
|
||||
, fxt.Make_txt("a", Xop_xnde_tag_.Tid_pre)
|
||||
, fxt.Make_nde_head("<div>")
|
||||
, fxt.Make_txt("b", Xop_xnde_tag_.Tid_pre) // <pre> not <div>
|
||||
, fxt.Make_nde_tail("</pre>")
|
||||
, fxt.Make_txt("c", Xop_xnde_tag_.Tid__null)
|
||||
);
|
||||
}
|
||||
}
|
27
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_wkr.java
Normal file
27
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_wkr.java
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public interface Mwh_doc_wkr {
|
||||
Hash_adp_bry Nde_regy();
|
||||
void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx);
|
||||
void On_txt_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end);
|
||||
void On_nde_head_bgn(Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end);
|
||||
void On_nde_head_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline);
|
||||
void On_nde_tail_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end);
|
||||
void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end);
|
||||
}
|
31
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_wkr_.java
Normal file
31
400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_wkr_.java
Normal file
@ -0,0 +1,31 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
public class Mwh_doc_wkr_ {
|
||||
public static Hash_adp_bry Nde_regy__mw() {
|
||||
Xop_xnde_tag[] ary = Xop_xnde_tag_.Ary;
|
||||
int len = ary.length;
|
||||
Hash_adp_bry rv = Hash_adp_bry.ci_a7();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xop_xnde_tag itm = ary[i];
|
||||
rv.Add(itm.Name_bry(), itm);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
@ -24,7 +24,7 @@ public class Xop_lnki_tkn extends Xop_tkn_itm_base {
|
||||
public void Tkn_tid_to_txt() {tkn_tid = Xop_tkn_itm_.Tid_txt;}
|
||||
public int Ns_id() {return ns_id;} public Xop_lnki_tkn Ns_id_(int v) {ns_id = v; return this;} private int ns_id;
|
||||
public Xoa_ttl Ttl() {return ttl;} public Xop_lnki_tkn Ttl_(Xoa_ttl v) {ttl = v; return this;} private Xoa_ttl ttl;
|
||||
public byte Lnki_type() {return lnki_type;} public Xop_lnki_tkn Lnki_type_(byte v) {lnki_type = (byte)Enm_.Add_int(lnki_type, v); return this;} private byte lnki_type = Xop_lnki_type.Id_null;
|
||||
public byte Lnki_type() {return lnki_type;} public Xop_lnki_tkn Lnki_type_(byte v) {lnki_type = (byte)Bitmask_.Add_int(lnki_type, v); return this;} private byte lnki_type = Xop_lnki_type.Id_null;
|
||||
public int Tail_bgn() {return tail_bgn;} public Xop_lnki_tkn Tail_bgn_(int v) {tail_bgn = v; return this;} private int tail_bgn = -1;
|
||||
public int Tail_end() {return tail_end;} public Xop_lnki_tkn Tail_end_(int v) {tail_end = v; return this;} private int tail_end = -1;
|
||||
public byte Border() {return border;} public Xop_lnki_tkn Border_(byte v) {border = v; return this;} private byte border = Bool_.__byte;
|
||||
|
@ -19,44 +19,44 @@ package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.
|
||||
public class Xop_lnki_type {
|
||||
public static final byte Id_null = 0, Id_none = 1, Id_frameless = 2, Id_frame = 4, Id_thumb = 8;
|
||||
public static boolean Id_is_thumbable(byte id) {
|
||||
return ( Enm_.Has_int(id, Id_thumb) // for purposes of displaying images on page, thumb and frame both create a thumb box
|
||||
|| Enm_.Has_int(id, Id_frame)
|
||||
return ( Bitmask_.Has_int(id, Id_thumb) // for purposes of displaying images on page, thumb and frame both create a thumb box
|
||||
|| Bitmask_.Has_int(id, Id_frame)
|
||||
);
|
||||
}
|
||||
public static boolean Id_defaults_to_thumb(byte id) { // assuming original of 400,200
|
||||
if ( Enm_.Has_int(id, Id_thumb) // [[File:A.png|thumb]] -> 220,-1
|
||||
|| Enm_.Has_int(id, Id_frameless) // [[File:A.png|frameless]] -> 220,-1
|
||||
if ( Bitmask_.Has_int(id, Id_thumb) // [[File:A.png|thumb]] -> 220,-1
|
||||
|| Bitmask_.Has_int(id, Id_frameless) // [[File:A.png|frameless]] -> 220,-1
|
||||
)
|
||||
return true;
|
||||
else if ( Enm_.Has_int(id, Id_frame) // [[File:A.png|frame]] -> 400,200 (frame is always default size)
|
||||
else if ( Bitmask_.Has_int(id, Id_frame) // [[File:A.png|frame]] -> 400,200 (frame is always default size)
|
||||
|| id == Id_null // [[File:A.png]] -> 400,200 (default to original size)
|
||||
|| Enm_.Has_int(id, Id_none) // TODO: deprecate; NOTE: still used by one test; DATE:2015-08-03
|
||||
|| Bitmask_.Has_int(id, Id_none) // TODO: deprecate; NOTE: still used by one test; DATE:2015-08-03
|
||||
)
|
||||
return false;
|
||||
else // should not happen
|
||||
throw Err_.new_unhandled(id);
|
||||
}
|
||||
public static boolean Id_limits_large_size(byte id) {// Linker.php|makeThumbLink2|Do not present an image bigger than the source, for bitmap-style images; assuming original of 400,200
|
||||
if ( Enm_.Has_int(id, Id_thumb) // [[File:A.png|600px|thumb]] -> 400,200
|
||||
|| Enm_.Has_int(id, Id_frameless) // [[File:A.png|600px|frameless]] -> 400,200
|
||||
|| Enm_.Has_int(id, Id_frame) // [[File:A.png|600px|frame]] -> 400,200 (frame is always default size)
|
||||
if ( Bitmask_.Has_int(id, Id_thumb) // [[File:A.png|600px|thumb]] -> 400,200
|
||||
|| Bitmask_.Has_int(id, Id_frameless) // [[File:A.png|600px|frameless]] -> 400,200
|
||||
|| Bitmask_.Has_int(id, Id_frame) // [[File:A.png|600px|frame]] -> 400,200 (frame is always default size)
|
||||
)
|
||||
return true;
|
||||
else if ( id == Id_null // [[File:A.png|600px]] -> 600,400; uses orig file of 400,200, but <img> tag src_width / src_height set to 600,400
|
||||
|| Enm_.Has_int(id, Id_none) // TODO: deprecate; NOTE: leaving in b/c of above failed-deprecate; DATE:2015-08-03
|
||||
|| Bitmask_.Has_int(id, Id_none) // TODO: deprecate; NOTE: leaving in b/c of above failed-deprecate; DATE:2015-08-03
|
||||
)
|
||||
return false;
|
||||
else // should not happen;
|
||||
throw Err_.new_unhandled(id);
|
||||
}
|
||||
public static boolean Id_supports_upright(byte id) {// REF:Linker.php|makeImageLink;if ( isset( $fp['thumbnail'] ) || isset( $fp['manualthumb'] ) || isset( $fp['framed'] ) || isset( $fp['frameless'] ) || !$hp['width'] ) DATE:2014-05-22
|
||||
if ( Enm_.Has_int(id, Id_thumb)
|
||||
|| Enm_.Has_int(id, Id_frameless)
|
||||
|| Enm_.Has_int(id, Id_frame)
|
||||
if ( Bitmask_.Has_int(id, Id_thumb)
|
||||
|| Bitmask_.Has_int(id, Id_frameless)
|
||||
|| Bitmask_.Has_int(id, Id_frame)
|
||||
)
|
||||
return true;
|
||||
else if ( id == Id_null
|
||||
|| Enm_.Has_int(id, Id_none)
|
||||
|| Bitmask_.Has_int(id, Id_none)
|
||||
)
|
||||
return false;
|
||||
else // should not happen;
|
||||
|
144
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_lang.java
Normal file
144
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_converter_lang.java
Normal file
@ -0,0 +1,144 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
public class Vnt_converter_lang {
|
||||
private final Bry_bfr bfr = Bry_bfr.new_();
|
||||
private int max_depth = 32;
|
||||
private byte[] src; private int src_len;
|
||||
private boolean convert_needed;
|
||||
private int pos;
|
||||
private final Vnt_converter_rule converter_rule = new Vnt_converter_rule();
|
||||
private Xol_convert_mgr convert_mgr; private Xol_vnt_regy vnt_regy; // private Xol_vnt_mgr vnt_mgr; // private Xol_vnt_itm vnt_itm;
|
||||
private final Mwh_doc_parser doc_parser = new Mwh_doc_parser();
|
||||
private final Vnt_html_doc_wkr html_convert_wkr;
|
||||
private final Bry_bfr tmp_convert_bfr = Bry_bfr.new_();
|
||||
public Vnt_converter_lang(Xol_convert_mgr convert_mgr, Xol_vnt_regy vnt_regy) {
|
||||
this.html_convert_wkr = new Vnt_html_doc_wkr(convert_mgr);
|
||||
this.convert_mgr = convert_mgr; this.vnt_regy = vnt_regy;
|
||||
}
|
||||
public byte[] Converted_title() {return converted_title;} private byte[] converted_title;
|
||||
public byte[] Parse(Xol_vnt_itm vnt_itm, byte[] src) {// REF.MW:/languages/LanguageConverter.php!recursiveConvertTopLevel
|
||||
this.converted_title = null;
|
||||
converter_rule.Init(this, vnt_regy, vnt_itm);
|
||||
this.converted_title = null;
|
||||
int markup_count = 0;
|
||||
this.pos = 0;
|
||||
this.convert_needed = true; // false for sr lang; SEE:LanguageSr.php !$this->guessVariant(src, vnt);
|
||||
this.src = src; this.src_len = src.length;
|
||||
while (pos < src_len) {
|
||||
int curly_bgn = Bry_find_.Find_fwd(src, Bry__curly_bgn, pos, src_len);
|
||||
if (curly_bgn == Bry_find_.Not_found) { // No more markup, append final segment
|
||||
if (markup_count == 0) return src; // no markups found; just return original
|
||||
Add_output(vnt_itm, convert_needed, src, pos, src_len);
|
||||
return bfr.Xto_bry_and_clear();
|
||||
}
|
||||
Add_output(vnt_itm, convert_needed, src, pos, curly_bgn); // Markup found; append segment
|
||||
pos = curly_bgn; // Advance position
|
||||
++markup_count;
|
||||
Parse_recursive(vnt_itm, 1); // Do recursive conversion
|
||||
}
|
||||
return bfr.Xto_bry_and_clear();
|
||||
}
|
||||
private void Parse_recursive(Xol_vnt_itm vnt_itm, int depth) {
|
||||
pos += 2; // skip "-{"
|
||||
boolean warning_done = false;
|
||||
int bgn_pos = pos;
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
Object o = trie.Match_bgn_w_byte(b, src,pos, src_len);
|
||||
if (o == null) { // char;
|
||||
++pos;
|
||||
continue;
|
||||
}
|
||||
switch (((Byte_obj_val)o).Val()) {
|
||||
case Tid__curly_bgn:
|
||||
if (depth >= max_depth) {
|
||||
bfr.Add(Bry__curly_bgn);
|
||||
if (!warning_done) {
|
||||
bfr.Add_str("<span class=\"error\">");
|
||||
// wfMessage('language-converter-depth-warning')->numParams($this->mMaxDepth)->inContentLanguage()->text()
|
||||
bfr.Add_str("</span>");
|
||||
warning_done = true;
|
||||
}
|
||||
pos += 2; // skip "-{"
|
||||
continue;
|
||||
}
|
||||
bgn_pos = pos;
|
||||
Parse_recursive(vnt_itm, depth + 1); // Recursively parse another rule
|
||||
break;
|
||||
case Tid__curly_end:
|
||||
converter_rule.Parse(src, bgn_pos, pos);
|
||||
Apply_manual_conv(converter_rule);
|
||||
bfr.Add(converter_rule.Display());
|
||||
pos += 2;
|
||||
return;
|
||||
default: throw Err_.new_unhandled(-1); // never happens
|
||||
}
|
||||
}
|
||||
if (pos < src_len) { // Unclosed rule
|
||||
bfr.Add(Bry__curly_bgn);
|
||||
Auto_convert(bfr, vnt_itm, src, pos, src_len);
|
||||
}
|
||||
pos = src_len;
|
||||
}
|
||||
private void Add_output(Xol_vnt_itm vnt_itm, boolean convert_needed, byte[] src, int bgn, int end) {
|
||||
if (end - bgn == 0) return;
|
||||
if (convert_needed) {
|
||||
Auto_convert(bfr, vnt_itm, src, bgn, end);
|
||||
}
|
||||
else
|
||||
bfr.Add_mid(src, bgn, end);
|
||||
}
|
||||
public byte[] Auto_convert(Xol_vnt_itm vnt_itm, byte[] src) {
|
||||
Auto_convert(tmp_convert_bfr, vnt_itm, src, 0, src.length);
|
||||
return tmp_convert_bfr.Xto_bry_and_clear();
|
||||
}
|
||||
private void Auto_convert(Bry_bfr bfr, Xol_vnt_itm vnt_itm, byte[] src, int bgn, int end) {
|
||||
html_convert_wkr.Init(bfr, vnt_itm.Idx());
|
||||
doc_parser.Parse(html_convert_wkr, src, bgn, end);
|
||||
}
|
||||
private void Apply_manual_conv(Vnt_converter_rule rule) {
|
||||
this.converted_title = rule.Title();
|
||||
byte action = rule.Action();
|
||||
Vnt_rule_undi_mgr cnv_tbl = rule.Cnv_tbl();
|
||||
int len = cnv_tbl.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Vnt_rule_undi_grp grp = cnv_tbl.Get_at(i);
|
||||
byte[] grp_key = grp.Vnt();
|
||||
Xol_vnt_itm vnt_itm = vnt_regy.Get_by(grp_key); if (vnt_itm == null) continue;
|
||||
int grp_len = grp.Len();
|
||||
Xol_convert_wkr wkr = convert_mgr.Converter_ary()[vnt_itm.Idx()];
|
||||
for (int j = 0; j < grp_len; ++j) {
|
||||
Vnt_rule_undi_itm itm = grp.Get_at(j);
|
||||
if (action == Byte_ascii.Plus) {
|
||||
wkr.Add(itm.Src(), itm.Trg());
|
||||
}
|
||||
else if (action == Byte_ascii.Dash)
|
||||
wkr.Del(itm.Src());
|
||||
}
|
||||
}
|
||||
}
|
||||
private static final byte Tid__curly_bgn = 1, Tid__curly_end = 2;
|
||||
private static final byte[] Bry__curly_bgn = Bry_.new_a7("-{"), Bry__curly_end = Bry_.new_a7("}-");
|
||||
private static final Btrie_fast_mgr trie = Btrie_fast_mgr.cs()
|
||||
.Add_bry_byte(Bry__curly_bgn, Tid__curly_bgn)
|
||||
.Add_bry_byte(Bry__curly_end, Tid__curly_end);
|
||||
}
|
@ -0,0 +1,53 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
|
||||
public class Vnt_converter_lang__html__tst { // REF: https://www.mediawiki.org/wiki/Writing_systems/Syntax
|
||||
private final Vnt_converter_lang_fxt fxt = new Vnt_converter_lang_fxt();
|
||||
private String rule;
|
||||
@Before public void init() {
|
||||
rule = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}-";
|
||||
}
|
||||
@Test public void Node() {
|
||||
fxt.Test_parse(rule + "hk<span>hk</span>hk", "cn<span>cn</span>cn");
|
||||
}
|
||||
@Test public void Attribs() {
|
||||
fxt.Test_parse(rule + "<span class='hk'>hk</span>", "<span class='hk'>cn</span>");
|
||||
}
|
||||
@Test public void Attribs__title() {
|
||||
fxt.Test_parse(rule + "<span title='hk'>hk</span>", "<span title='cn'>cn</span>");
|
||||
}
|
||||
@Test public void Attribs__alt() {
|
||||
fxt.Test_parse(rule + "<span alt='hk'>hk</span>", "<span alt='cn'>cn</span>");
|
||||
}
|
||||
@Test public void Attribs__skip_url() {
|
||||
fxt.Test_parse(rule + "<span alt='http://hk.org'>hk</span>", "<span alt='http://hk.org'>cn</span>");
|
||||
}
|
||||
@Test public void Node__style() {
|
||||
fxt.Test_parse(rule + "hk<script>hk</script>hk", "cn<script>hk</script>cn");
|
||||
}
|
||||
@Test public void Node__code() {
|
||||
fxt.Test_parse(rule + "hk<code>hk</code>hk", "cn<code>hk</code>cn");
|
||||
}
|
||||
@Test public void Node__pre() {
|
||||
fxt.Test_parse(rule + "hk<pre>hk</pre>hk", "cn<pre>hk</pre>cn");
|
||||
}
|
||||
@Test public void Node__pre__nested() {
|
||||
fxt.Test_parse(rule + "hk<pre><span>hk</span></pre>hk", "cn<pre><span>hk</span></pre>cn");
|
||||
}
|
||||
}
|
@ -0,0 +1,117 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
|
||||
public class Vnt_converter_lang__syntax__tst { // REF: https://www.mediawiki.org/wiki/Writing_systems/Syntax
|
||||
private final Vnt_converter_lang_fxt fxt = new Vnt_converter_lang_fxt();
|
||||
@Test public void Bidi() {
|
||||
String text = "-{zh-hans:a;zh-hant:b}-";
|
||||
fxt.Test_parse_many(text, "a", "zh-hans", "zh-cn", "zh-sg", "zh");
|
||||
fxt.Test_parse_many(text, "b", "zh-hant", "zh-hk", "zh-tw");
|
||||
}
|
||||
@Test public void Undi() {
|
||||
String text = "-{H|cn_k=>zh-cn:cn_v}-cn_k";
|
||||
fxt.Test_parse_many(text, "cn_k", "zh", "zh-hans", "zh-hant", "zh-hk", "zh-my", "zh-mo", "zh-sg", "zh-tw");
|
||||
fxt.Test_parse_many(text, "cn_v", "zh-cn");
|
||||
}
|
||||
@Test public void Raw() {
|
||||
fxt.Test_parse_many("-{a}-", "a", "zh-hans", "zh-cn", "zh-sg", "zh", "zh-hant", "zh-hk", "zh-tw");
|
||||
fxt.Test_parse_many("-{R|a}-", "a", "zh-hans", "zh-cn", "zh-sg", "zh", "zh-hant", "zh-hk", "zh-tw");
|
||||
}
|
||||
@Test public void Hide() {
|
||||
String text = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}-cn hk tw";
|
||||
fxt.Test_parse_many(text, "cn cn cn", "zh-cn", "zh-sg");
|
||||
fxt.Test_parse_many(text, "hk hk hk", "zh-hk");
|
||||
fxt.Test_parse_many(text, "tw tw tw", "zh-tw");
|
||||
fxt.Test_parse_many(text, "cn hk tw", "zh", "zh-hans", "zh-hant");
|
||||
}
|
||||
@Test public void Aout() {
|
||||
String text = "-{A|zh-cn:cn;zh-hk:hk;zh-tw:tw}- cn hk tw";
|
||||
fxt.Test_parse_many(text, "cn cn cn cn", "zh-cn", "zh-sg");
|
||||
fxt.Test_parse_many(text, "hk hk hk hk", "zh-hk");
|
||||
fxt.Test_parse_many(text, "tw tw tw tw", "zh-tw");
|
||||
fxt.Test_parse_many(text, "cn cn hk tw", "zh", "zh-hans");
|
||||
fxt.Test_parse_many(text, "tw cn hk tw", "zh-hant");
|
||||
fxt.Test_parse_many("h-{}-k", "hk", "zh-cn"); // semi-disabled
|
||||
}
|
||||
@Test public void Del() {
|
||||
String text = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}-cn hk tw-{-|zh-cn:cn;zh-hk:hk;zh-tw:tw}- cn hk tw";
|
||||
fxt.Test_parse_many(text, "cn cn cn cn hk tw", "zh-cn", "zh-sg");
|
||||
fxt.Test_parse_many(text, "hk hk hk cn hk tw", "zh-hk");
|
||||
fxt.Test_parse_many(text, "tw tw tw cn hk tw", "zh-tw");
|
||||
fxt.Test_parse_many(text, "cn hk tw cn hk tw", "zh", "zh-hans", "zh-hant");
|
||||
}
|
||||
@Test public void Title() {
|
||||
fxt.Test_parse_title("-{}-", null, "", "zh-cn");
|
||||
String text = "-{T|zh-cn:cn;zh-hk:hk;zh-tw:tw}-cn hk tw";
|
||||
fxt.Test_parse_title(text, "cn", "cn hk tw", "zh-cn");
|
||||
fxt.Test_parse_title(text, "cn", "cn hk tw", "zh-sg");
|
||||
fxt.Test_parse_title(text, "hk", "cn hk tw", "zh-hk");
|
||||
fxt.Test_parse_title(text, "tw", "cn hk tw", "zh-tw");
|
||||
fxt.Test_parse_title(text, "cn", "cn hk tw", "zh-hans");
|
||||
fxt.Test_parse_title(text, "tw", "cn hk tw", "zh-hant");
|
||||
fxt.Test_parse_title(text, null, "cn hk tw", "zh");
|
||||
}
|
||||
@Test public void Descrip() {
|
||||
String text = "-{D|zh-cn:cn;zh-hk:hk;zh-tw:tw}-";
|
||||
fxt.Test_parse_many(text, "ZH-CN:cn;ZH-HK:hk;ZH-TW:tw;", "zh", "zh-hans", "zh-hant", "zh-cn", "zh-hk", "zh-my", "zh-mo", "zh-sg", "zh-tw");
|
||||
}
|
||||
@Test public void Mixture() {
|
||||
String text = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}--{zh;zh-hans;zh-hant|cn hk tw}- -{zh;zh-cn;zh-hk;zh-tw|cn hk tw}-";
|
||||
fxt.Test_parse_many(text, "cn hk tw cn cn cn", "zh-cn", "zh-sg", "zh-hans");
|
||||
fxt.Test_parse_many(text, "cn hk tw hk hk hk", "zh-hk");
|
||||
fxt.Test_parse_many(text, "cn hk tw tw tw tw", "zh-tw", "zh-hant");
|
||||
fxt.Test_parse_many(text, "cn hk tw cn hk tw", "zh");
|
||||
}
|
||||
@Test public void Descrip__undi() {fxt.Test_parse("-{D|cn_k=>zh-cn:cn_v;hk_k=>zh-hk:hk_v}-", "cn_k⇒ZH-CN:cn_v;hk_k⇒ZH-HK:hk_v;");}
|
||||
@Test public void Descrip__mixd() {fxt.Test_parse("-{D|zh-tw:tw_v;cn_k=>zh-cn:cn_v;hk_k=>zh-hk:hk_v;zh-mo:mo_v}-", "ZH-TW:tw_v;ZH-MO:mo_v;cn_k⇒ZH-CN:cn_v;hk_k⇒ZH-HK:hk_v;");}
|
||||
}
|
||||
class Vnt_converter_lang_fxt {
|
||||
private final Vnt_converter_lang converter_lang;
|
||||
private final Xol_convert_mgr convert_mgr = new Xol_convert_mgr();
|
||||
private final Xol_vnt_regy vnt_regy = Xol_vnt_regy_fxt.new_chinese();
|
||||
private Xol_vnt_itm vnt_itm;
|
||||
public Vnt_converter_lang_fxt() {
|
||||
converter_lang = new Vnt_converter_lang(convert_mgr, vnt_regy);
|
||||
convert_mgr.Init(vnt_regy);
|
||||
Init_cur("zh-cn");
|
||||
}
|
||||
public void Init_cur(String vnt) {
|
||||
byte[] cur_vnt = Bry_.new_a7(vnt);
|
||||
this.vnt_itm = vnt_regy.Get_by(cur_vnt);
|
||||
convert_mgr.Cur_vnt_(cur_vnt);
|
||||
}
|
||||
public void Test_parse(String raw, String expd) {
|
||||
Tfds.Eq_str(expd, String_.new_u8(converter_lang.Parse(vnt_itm, Bry_.new_u8(raw))));
|
||||
}
|
||||
public void Test_parse_many(String raw, String expd, String... vnts) {
|
||||
int len = vnts.length;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
String vnt_key = vnts[i];
|
||||
Init_cur(vnt_key);
|
||||
Xol_vnt_itm vnt = vnt_regy.Get_by(Bry_.new_a7(vnt_key));
|
||||
Tfds.Eq_str(expd, String_.new_u8(converter_lang.Parse(vnt, Bry_.new_u8(raw))), vnt_key);
|
||||
}
|
||||
}
|
||||
public void Test_parse_title(String raw, String expd_title, String expd_text, String vnt_key) {
|
||||
Init_cur(vnt_key);
|
||||
Xol_vnt_itm vnt = vnt_regy.Get_by(Bry_.new_a7(vnt_key));
|
||||
Tfds.Eq_str(expd_text, String_.new_u8(converter_lang.Parse(vnt, Bry_.new_u8(raw))), vnt_key);
|
||||
Tfds.Eq_str(expd_title, converter_lang.Converted_title());
|
||||
}
|
||||
}
|
@ -17,42 +17,170 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
class Vnt_converter_rule { // REF.MW: /languages/LanguageConverter.php|ConverterRule
|
||||
private final byte[] src;
|
||||
private final int src_bgn, src_end;
|
||||
private int pipe_pos = -1;
|
||||
public Vnt_converter_rule(byte[] src, int src_bgn, int src_end) {
|
||||
this.src = src; this.src_bgn = src_bgn; this.src_end = src_end;
|
||||
private final Vnt_flag_parser flag_parser = new Vnt_flag_parser(); private final Vnt_flag_code_mgr flag_codes = new Vnt_flag_code_mgr(); private final Vnt_flag_lang_mgr flag_langs = new Vnt_flag_lang_mgr();
|
||||
private final Vnt_rule_parser rule_parser = new Vnt_rule_parser(); private final Vnt_rule_undi_mgr rule_undis = new Vnt_rule_undi_mgr(); private final Vnt_rule_bidi_mgr rule_bidis = new Vnt_rule_bidi_mgr();
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr.new_();
|
||||
private final Ordered_hash cnv_marked_hash = Ordered_hash_.new_bry_();
|
||||
private Vnt_converter_lang converter;
|
||||
private Xol_vnt_regy vnt_regy; private Xol_vnt_itm vnt_itm; private byte[] vnt_key;
|
||||
private byte[] rule_raw;
|
||||
public byte[] Display() {return display;} private byte[] display;
|
||||
public byte[] Title() {return title;} private byte[] title;
|
||||
public byte Action() {return action;} private byte action;
|
||||
public Vnt_rule_undi_mgr Cnv_tbl() {return cnv_tbl;} private final Vnt_rule_undi_mgr cnv_tbl = new Vnt_rule_undi_mgr();
|
||||
public void Init(Vnt_converter_lang converter, Xol_vnt_regy vnt_regy, Xol_vnt_itm vnt_itm) {
|
||||
this.converter = converter;
|
||||
this.vnt_regy = vnt_regy; this.vnt_itm = vnt_itm; this.vnt_key = vnt_itm.Key();
|
||||
rule_parser.Init(vnt_regy);
|
||||
}
|
||||
public void Parse() {
|
||||
}
|
||||
public void Parse_flags(Vnt_flag_parser parser) {
|
||||
this.pipe_pos = Bry_find_.Find_fwd(src, Byte_ascii.Pipe, src_bgn, src_end);
|
||||
if (pipe_pos != Bry_find_.Not_found) // "|" found; EX: -{A|}-
|
||||
parser.Parse(src, src_bgn, pipe_pos);
|
||||
int flag_count = parser.Count();
|
||||
if (flag_count == 0) parser.Set_y(Vnt_flag_itm_.Tid_show);
|
||||
else if (parser.Limit_if_exists(Vnt_flag_itm_.Tid_raw)) {}
|
||||
else if (parser.Limit_if_exists(Vnt_flag_itm_.Tid_name)) {}
|
||||
else if (parser.Limit_if_exists(Vnt_flag_itm_.Tid_del)) {}
|
||||
else if (flag_count == 1 && parser.Get(Vnt_flag_itm_.Tid_title)) parser.Set_y(Vnt_flag_itm_.Tid_macro);
|
||||
else if (parser.Get(Vnt_flag_itm_.Tid_macro)) {
|
||||
boolean exists_d = parser.Get(Vnt_flag_itm_.Tid_descrip);
|
||||
boolean exists_t = parser.Get(Vnt_flag_itm_.Tid_title);
|
||||
parser.Clear();
|
||||
parser.Set_y_many(Vnt_flag_itm_.Tid_all, Vnt_flag_itm_.Tid_macro);
|
||||
if (exists_d) parser.Set_y(Vnt_flag_itm_.Tid_descrip);
|
||||
if (exists_t) parser.Set_y(Vnt_flag_itm_.Tid_title);
|
||||
public void Parse(byte[] src, int src_bgn, int src_end) {
|
||||
this.display = this.title = null;
|
||||
this.action = Byte_ascii.Null;
|
||||
int pipe_pos = Bry_find_.Find_fwd(src, Byte_ascii.Pipe, src_bgn, src_end);
|
||||
flag_parser.Parse(flag_codes, flag_langs, vnt_regy, src, src_bgn, pipe_pos);
|
||||
int rule_bgn = pipe_pos == -1 ? src_bgn : pipe_pos + 1;
|
||||
this.rule_raw = Bry_.Mid(src, rule_bgn, src_end);
|
||||
int flag_langs_count = flag_langs.Count();
|
||||
if (flag_langs_count > 0) { // vnts exist in flag; EX: -{zh-hans;zh-hant|text}-
|
||||
if (flag_langs.Has(vnt_key))
|
||||
rule_raw = converter.Auto_convert(vnt_itm, rule_raw); // convert rule text to current language; EX:-{|convert}-
|
||||
else {
|
||||
byte[][] fallbacks = vnt_itm.Fallback_ary();
|
||||
int fallbacks_len = fallbacks.length;
|
||||
for (int i = 0; i < fallbacks_len; ++i) {
|
||||
byte[] fallback = fallbacks[i];
|
||||
if (flag_langs.Has(fallback)) {
|
||||
Xol_vnt_itm fallback_itm = (Xol_vnt_itm)vnt_regy.Get_by(fallback);
|
||||
rule_raw = converter.Auto_convert(fallback_itm, rule_raw);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
flag_codes.Limit(Vnt_flag_code_.Tid_raw);
|
||||
}
|
||||
else {
|
||||
if (parser.Get(Vnt_flag_itm_.Tid_add))
|
||||
parser.Set_y_many(Vnt_flag_itm_.Tid_all, Vnt_flag_itm_.Tid_show);
|
||||
if (parser.Get(Vnt_flag_itm_.Tid_descrip))
|
||||
parser.Set_n(Vnt_flag_itm_.Tid_show);
|
||||
parser.Limit_if_exists_vnts(); // try to find flags like "zh-hans", "zh-hant"; allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
|
||||
rule_parser.Clear(rule_undis, rule_bidis, rule_raw);
|
||||
if (!flag_codes.Get(Vnt_flag_code_.Tid_raw) && !flag_codes.Get(Vnt_flag_code_.Tid_name)) {
|
||||
rule_parser.Parse(src, rule_bgn, src_end);
|
||||
}
|
||||
if (rule_undis.Has_none() && rule_bidis.Has_none()) {
|
||||
if ( flag_codes.Get(Vnt_flag_code_.Tid_add)
|
||||
|| flag_codes.Get(Vnt_flag_code_.Tid_del)
|
||||
) { // fill all variants if text in -{A/H/-|text} without rules
|
||||
for (int i = 0; i < flag_langs_count; ++i) {
|
||||
Xol_vnt_itm itm = flag_langs.Get_at(i);
|
||||
rule_bidis.Set(itm.Key(), rule_raw);
|
||||
}
|
||||
}
|
||||
else if ( !flag_codes.Get(Vnt_flag_code_.Tid_name)
|
||||
&& !flag_codes.Get(Vnt_flag_code_.Tid_title)
|
||||
) {
|
||||
flag_codes.Limit(Vnt_flag_code_.Tid_raw);
|
||||
}
|
||||
}
|
||||
int flag_count = Vnt_flag_code_.Tid__max;
|
||||
for (int flag = 0; flag < flag_count; ++flag) {
|
||||
if (!flag_codes.Get(flag)) continue;
|
||||
switch (flag) {
|
||||
case Vnt_flag_code_.Tid_raw: display = rule_parser.Raw(); break; // if we don't do content convert, still strip the -{}- tags
|
||||
case Vnt_flag_code_.Tid_name: // process N flag: output current variant name
|
||||
byte[] vnt_key_trim = Bry_.Trim(rule_parser.Raw());
|
||||
Xol_vnt_itm vnt_itm_trim = vnt_regy.Get_by(vnt_key_trim);
|
||||
display = vnt_itm_trim == null ? display = Bry_.Empty : vnt_itm_trim.Name();
|
||||
break;
|
||||
case Vnt_flag_code_.Tid_descrip: display = Make_descrip(); break; // process D flag: output rules description
|
||||
case Vnt_flag_code_.Tid_hide: display = Bry_.Empty; break; // process H,- flag or T only: output nothing
|
||||
case Vnt_flag_code_.Tid_del: display = Bry_.Empty; action = Byte_ascii.Dash; break;
|
||||
case Vnt_flag_code_.Tid_add: display = Bry_.Empty; action = Byte_ascii.Plus; break;
|
||||
case Vnt_flag_code_.Tid_show: display = Make_converted(vnt_itm); break;
|
||||
case Vnt_flag_code_.Tid_title: display = Bry_.Empty; title = Make_title(vnt_itm); break;
|
||||
default: break; // ignore unknown flags (but see error case below)
|
||||
}
|
||||
}
|
||||
if (display == null)
|
||||
display = Bry_.Add(Bry__error_bgn, Bry__error_end); // wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
|
||||
Make_conv_tbl();
|
||||
}
|
||||
private void Make_conv_tbl() {
|
||||
if (rule_undis.Has_none() && rule_bidis.Has_none()) return; // Special case optimisation
|
||||
cnv_tbl.Clear(); cnv_marked_hash.Clear();
|
||||
int vnt_regy_len = vnt_regy.Len();
|
||||
for (int i = 0; i < vnt_regy_len; ++i) {
|
||||
Xol_vnt_itm vnt = vnt_regy.Get_at(i);
|
||||
byte[] vnt_key = vnt.Key();
|
||||
// bidi: fill in missing variants with fallbacks
|
||||
byte[] bidi_bry = rule_bidis.Get_text_by_key_or_null(vnt_key);
|
||||
if (bidi_bry == null) {
|
||||
bidi_bry = rule_bidis.Get_text_by_ary_or_null(vnt.Fallback_ary());
|
||||
if (bidi_bry != null) rule_bidis.Set(vnt_key, bidi_bry);
|
||||
}
|
||||
if (bidi_bry != null) {
|
||||
int marked_len = cnv_marked_hash.Count();
|
||||
for (int j = 0; j < marked_len; ++j) {
|
||||
Xol_vnt_itm marked_itm = (Xol_vnt_itm)cnv_marked_hash.Get_at(j);
|
||||
byte[] marked_key = marked_itm.Key();
|
||||
byte[] marked_bry = rule_bidis.Get_text_by_key_or_null(marked_key);
|
||||
byte[] cur_bidi_bry = rule_bidis.Get_text_by_key_or_null(vnt_key);
|
||||
if (vnt.Dir() == Xol_vnt_dir_.Tid__bi)
|
||||
cnv_tbl.Set(vnt_key, marked_bry, cur_bidi_bry);
|
||||
if (marked_itm.Dir() == Xol_vnt_dir_.Tid__bi)
|
||||
cnv_tbl.Set(marked_key, cur_bidi_bry, marked_bry);
|
||||
}
|
||||
cnv_marked_hash.Add(vnt_key, vnt);
|
||||
}
|
||||
// undi: fill to convert tables
|
||||
byte[] undi_bry = rule_undis.Get_text_by_key_or_null(vnt_key);
|
||||
if (vnt.Dir() != Xol_vnt_dir_.Tid__none && undi_bry != null) {
|
||||
Vnt_rule_undi_grp undi_grp = rule_undis.Get_by(vnt_key);
|
||||
int undi_grp_len = undi_grp.Len();
|
||||
for (int j = 0; j < undi_grp_len; ++j) {
|
||||
Vnt_rule_undi_itm undi_itm = undi_grp.Get_at(j);
|
||||
cnv_tbl.Set(vnt_key, undi_itm.Src(), undi_itm.Trg());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Parse_rules(Vnt_rule_parser parser) {
|
||||
parser.Parse(src, src_bgn, src_end);
|
||||
private byte[] Make_descrip() {
|
||||
int len = rule_bidis.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Vnt_rule_bidi_itm bidi_itm = rule_bidis.Get_at(i);
|
||||
Xol_vnt_itm vnt_itm = vnt_regy.Get_by(bidi_itm.Vnt());
|
||||
tmp_bfr.Add(vnt_itm.Name()).Add_byte_colon().Add(bidi_itm.Text()).Add_byte_semic();
|
||||
}
|
||||
len = rule_undis.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Vnt_rule_undi_grp undi_grp = rule_undis.Get_at(i);
|
||||
int sub_len = undi_grp.Len();
|
||||
for (int j = 0; j < sub_len; ++j) {
|
||||
Vnt_rule_undi_itm undi_itm = (Vnt_rule_undi_itm)undi_grp.Get_at(j);
|
||||
Xol_vnt_itm undi_vnt = vnt_regy.Get_by(undi_grp.Vnt());
|
||||
tmp_bfr.Add(undi_itm.Src()).Add(Bry__undi_spr).Add(undi_vnt.Name()).Add_byte_colon().Add(undi_itm.Trg()).Add_byte_semic();
|
||||
}
|
||||
}
|
||||
return tmp_bfr.Xto_bry_and_clear();
|
||||
}
|
||||
private byte[] Make_title(Xol_vnt_itm vnt) {
|
||||
if (vnt.Idx() == 0) { // for mainLanguageCode; EX: "zh"
|
||||
byte[] rv = rule_bidis.Get_text_by_key_or_null(vnt.Key());
|
||||
return rv == null ? rule_undis.Get_text_by_key_or_null(vnt.Key()) : rv;
|
||||
}
|
||||
else
|
||||
return Make_converted(vnt);
|
||||
}
|
||||
private byte[] Make_converted(Xol_vnt_itm vnt) {
|
||||
if (rule_bidis.Len() == 0 && rule_undis.Len() == 0) return rule_raw;
|
||||
byte[] rv = rule_bidis.Get_text_by_key_or_null(vnt.Key()); // display current variant in bidirectional array
|
||||
if (rv == null) rv = rule_bidis.Get_text_by_ary_or_null(vnt.Fallback_ary()); // or display current variant in fallbacks
|
||||
if (rv == null) rv = rule_undis.Get_text_by_key_or_null(vnt.Key()); // or display current variant in unidirectional array
|
||||
if (rv == null && vnt.Dir() == Xol_vnt_dir_.Tid__none) { // or display first text under disable manual convert
|
||||
rv = (rule_bidis.Len() > 0) ? rule_bidis.Get_text_at(0) : rule_undis.Get_text_at(0);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private final static byte[]
|
||||
Bry__error_bgn = Bry_.new_a7("<span class=\"error\">")
|
||||
, Bry__error_end = Bry_.new_a7("</span>")
|
||||
, Bry__undi_spr = Bry_.new_u8("⇒")
|
||||
;
|
||||
}
|
||||
|
@ -16,37 +16,35 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Vnt_flag_itm_ {
|
||||
class Vnt_flag_code_ {
|
||||
public static final int
|
||||
Tid_unknown = 0
|
||||
, Tid_show = 1 // S: EX: -{S|zh-hans:A;zh-hant:B}- -> "A"
|
||||
, Tid_all = 2 // +: EX: -{+|zh-hans:A;zh-hant:B}- -> "A"
|
||||
, Tid_err = 3 // E: EX: -{E|zh-hans:A;zh-hant:B}- -> "A"
|
||||
, Tid_add = 4 // A: add and output; EX: -{A|zh-hans:A;zh-hant:B}- -> "A"
|
||||
, Tid_title = 5 // T: page_title; EX: -{T|zh-hans:A;zh-hant:B}- -> ""
|
||||
, Tid_raw = 6 // R: raw: no convert; EX: -{R|zh-hans:A;zh-hant:B}- -> "zh-hans:A;zh-hant:B"
|
||||
, Tid_descrip = 7 // D: describe; EX: -{D|zh-hans:A;zh-hant:B}- -> "简体:A;繁體:B;" (简体=Simplified;繁體=Traditional)
|
||||
, Tid_del = 8 // -: remove; EX: -{-|zh-hans:A;zh-hant:B}- -> ""
|
||||
, Tid_macro = 9 // H: macro; EX: -{H|zh-hans:A;zh-hant:B}- -> ""
|
||||
, Tid_name = 10 // N: EX: -{N|zh-hans:A;zh-hant:B}- -> ""
|
||||
, Tid_lang = 11 // vnt: EX: -{zh-hant|B}- -> "B"
|
||||
, Tid__max = 12
|
||||
Tid_add = 0 // +: EX: -{+|zh-hans:A;zh-hant:B}- -> "A"
|
||||
, Tid_del = 1 // -: remove; EX: -{-|zh-hans:A;zh-hant:B}- -> ""
|
||||
, Tid_aout = 2 // A: Add and output; EX: -{A|zh-hans:A;zh-hant:B}- -> "A"
|
||||
, Tid_hide = 3 // H: Hide macro; EX: -{H|zh-hans:A;zh-hant:B}- -> ""
|
||||
, Tid_raw = 4 // R: Raw: no convert; EX: -{R|zh-hans:A;zh-hant:B}- -> "zh-hans:A;zh-hant:B"
|
||||
, Tid_show = 5 // S: Show EX: -{S|zh-hans:A;zh-hant:B}- -> "A"
|
||||
, Tid_descrip = 6 // D: Describe; EX: -{D|zh-hans:A;zh-hant:B}- -> "简体:A;繁體:B;" (简体=Simplified;繁體=Traditional)
|
||||
, Tid_name = 7 // N: variant Name EX: -{N|zh-hans:A;zh-hant:B}- -> ""
|
||||
, Tid_title = 8 // T: page Title; EX: -{T|zh-hans:A;zh-hant:B}- -> ""
|
||||
, Tid_err = 9 // E: Error EX: -{E|zh-hans:A;zh-hant:B}- -> "A"
|
||||
, Tid__max = 10
|
||||
;
|
||||
private static final String[] Tid__names = new String[]
|
||||
{ "unknown", "show", "all", "err", "add", "title"
|
||||
, "raw", "descrip", "del", "macro", "name", "lang"
|
||||
{ "+", "-", "A", "H", "R"
|
||||
, "S", "D", "N", "T", "E"
|
||||
};
|
||||
public static String To_name(int tid) {return Tid__names[tid];}
|
||||
public static String To_str(int tid) {return Tid__names[tid];}
|
||||
public static final Hash_adp_bry Regy = Hash_adp_bry.ci_a7() // NOTE: match either lc or uc; EX: -{D}- or -{d}-;
|
||||
.Add_byte_int(Byte_ascii.Ltr_S , Tid_show)
|
||||
.Add_byte_int(Byte_ascii.Plus , Tid_all)
|
||||
.Add_byte_int(Byte_ascii.Ltr_E , Tid_err)
|
||||
.Add_byte_int(Byte_ascii.Ltr_A , Tid_add)
|
||||
.Add_byte_int(Byte_ascii.Ltr_T , Tid_title)
|
||||
.Add_byte_int(Byte_ascii.Ltr_R , Tid_raw)
|
||||
.Add_byte_int(Byte_ascii.Ltr_D , Tid_descrip)
|
||||
.Add_byte_int(Byte_ascii.Plus , Tid_add)
|
||||
.Add_byte_int(Byte_ascii.Dash , Tid_del)
|
||||
.Add_byte_int(Byte_ascii.Ltr_H , Tid_macro)
|
||||
.Add_byte_int(Byte_ascii.Ltr_A , Tid_aout)
|
||||
.Add_byte_int(Byte_ascii.Ltr_H , Tid_hide)
|
||||
.Add_byte_int(Byte_ascii.Ltr_R , Tid_raw)
|
||||
.Add_byte_int(Byte_ascii.Ltr_S , Tid_show)
|
||||
.Add_byte_int(Byte_ascii.Ltr_D , Tid_descrip)
|
||||
.Add_byte_int(Byte_ascii.Ltr_N , Tid_name)
|
||||
.Add_byte_int(Byte_ascii.Ltr_T , Tid_title)
|
||||
.Add_byte_int(Byte_ascii.Ltr_E , Tid_err)
|
||||
;
|
||||
}
|
56
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_code_mgr.java
Normal file
56
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_code_mgr.java
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Vnt_flag_code_mgr {
|
||||
private final boolean[] ary = new boolean[Ary_len]; private final static int Ary_len = Vnt_flag_code_.Tid__max;
|
||||
public int Count() {return count;} private int count = 0;
|
||||
public boolean Get(int tid) {return ary[tid];}
|
||||
public void Clear() {
|
||||
count = 0;
|
||||
for (int i = 0; i < Ary_len; ++i)
|
||||
ary[i] = false;
|
||||
}
|
||||
public void Add(int tid) {
|
||||
this.Set_y(tid);
|
||||
++count;
|
||||
}
|
||||
public void Set_y(int tid) {ary[tid] = Bool_.Y;}
|
||||
public void Set_y_many(int... vals) {
|
||||
int len = vals.length;
|
||||
for (int i = 0; i < len; ++i)
|
||||
ary[vals[i]] = Bool_.Y;
|
||||
}
|
||||
public void Set_n(int tid) {ary[tid] = Bool_.N;}
|
||||
public void Limit(int tid) {
|
||||
for (int i = 0; i < Ary_len; ++i)
|
||||
ary[i] = i == tid;
|
||||
}
|
||||
public boolean Limit_if_exists(int tid) {
|
||||
boolean exists = ary[tid]; if (!exists) return false;
|
||||
this.Limit(tid);
|
||||
return true;
|
||||
}
|
||||
public void To_bfr__dbg(Bry_bfr bfr) {
|
||||
for (int i = 0; i < Ary_len; ++i) {
|
||||
if (ary[i]) {
|
||||
if (bfr.Len_gt_0()) bfr.Add_byte_semic();
|
||||
bfr.Add_str_a7(Vnt_flag_code_.To_str(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
35
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_lang_mgr.java
Normal file
35
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_lang_mgr.java
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
class Vnt_flag_lang_mgr {
|
||||
private final Ordered_hash regy = Ordered_hash_.new_bry_();
|
||||
public int Count() {return regy.Count();}
|
||||
public boolean Has(byte[] vnt) {return regy.Has(vnt);}
|
||||
public void Clear() {regy.Clear();}
|
||||
public void Add(Xol_vnt_itm itm) {regy.Add(itm.Key(), itm);}
|
||||
public Xol_vnt_itm Get_at(int i) {return (Xol_vnt_itm)regy.Get_at(i);}
|
||||
public void To_bfr__dbg(Bry_bfr bfr) {
|
||||
int len = regy.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xol_vnt_itm itm = (Xol_vnt_itm)regy.Get_at(i);
|
||||
if (bfr.Len_gt_0()) bfr.Add_byte_semic();
|
||||
bfr.Add(itm.Key());
|
||||
}
|
||||
}
|
||||
}
|
@ -16,52 +16,48 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
class Vnt_flag_parser implements gplx.core.brys.Bry_split_wkr {
|
||||
private final Hash_adp_bry flag_regy = Vnt_flag_itm_.Regy;
|
||||
private final Hash_adp_bry vnt_regy = Hash_adp_bry.cs();
|
||||
private final boolean[] flag_ary = new boolean[Vnt_flag_itm_.Tid__max];
|
||||
private int count = 0;
|
||||
public int Count() {return count;}
|
||||
public boolean Get(int tid) {return flag_ary[tid];}
|
||||
public void Set_y(int tid) {flag_ary[tid] = Bool_.Y;}
|
||||
public void Set_y_many(int... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; ++i)
|
||||
flag_ary[ary[i]] = Bool_.Y;
|
||||
}
|
||||
public void Set_n(int tid) {flag_ary[tid] = Bool_.N;}
|
||||
public void Limit(int tid) {
|
||||
for (int i = 0; i < Vnt_flag_itm_.Tid__max; ++i) {
|
||||
if (i != tid) flag_ary[i] = false;
|
||||
private final Hash_adp_bry codes_regy = Vnt_flag_code_.Regy;
|
||||
private Vnt_flag_code_mgr codes; private Vnt_flag_lang_mgr langs;
|
||||
private Xol_vnt_regy vnt_regy;
|
||||
public void Parse(Vnt_flag_code_mgr codes, Vnt_flag_lang_mgr langs, Xol_vnt_regy vnt_regy, byte[] src, int src_bgn, int src_end) {
|
||||
this.codes = codes; this.langs = langs; this.vnt_regy = vnt_regy;
|
||||
codes.Clear(); langs.Clear();
|
||||
if (src_end != Bry_find_.Not_found) // "|" found; EX: -{A|}-
|
||||
Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Semic, true, this);
|
||||
int codes_count = codes.Count(), langs_count = langs.Count();
|
||||
if (codes_count == 0) codes.Set_y(Vnt_flag_code_.Tid_show);
|
||||
else if (codes.Limit_if_exists(Vnt_flag_code_.Tid_raw)) {}
|
||||
else if (codes.Limit_if_exists(Vnt_flag_code_.Tid_name)) {}
|
||||
else if (codes.Limit_if_exists(Vnt_flag_code_.Tid_del)) {}
|
||||
else if (codes_count == 1 && codes.Get(Vnt_flag_code_.Tid_title)) codes.Set_y(Vnt_flag_code_.Tid_hide);
|
||||
else if (codes.Get(Vnt_flag_code_.Tid_hide)) {
|
||||
boolean exists_d = codes.Get(Vnt_flag_code_.Tid_descrip);
|
||||
boolean exists_t = codes.Get(Vnt_flag_code_.Tid_title);
|
||||
codes.Clear();
|
||||
codes.Set_y_many(Vnt_flag_code_.Tid_add, Vnt_flag_code_.Tid_hide);
|
||||
if (exists_d) codes.Set_y(Vnt_flag_code_.Tid_descrip);
|
||||
if (exists_t) codes.Set_y(Vnt_flag_code_.Tid_title);
|
||||
}
|
||||
}
|
||||
public boolean Limit_if_exists(int tid) {
|
||||
boolean exists = flag_ary[tid]; if (!exists) return false;
|
||||
for (int i = 0; i < Vnt_flag_itm_.Tid__max; ++i) {
|
||||
if (i != tid) flag_ary[i] = false;
|
||||
else {
|
||||
if (codes.Get(Vnt_flag_code_.Tid_aout))
|
||||
codes.Set_y_many(Vnt_flag_code_.Tid_add, Vnt_flag_code_.Tid_show);
|
||||
if (codes.Get(Vnt_flag_code_.Tid_descrip))
|
||||
codes.Set_n(Vnt_flag_code_.Tid_show);
|
||||
if (langs_count > 0)
|
||||
codes.Clear();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
public boolean Limit_if_exists_vnts() {
|
||||
return false;
|
||||
}
|
||||
public void Clear() {
|
||||
count = 0;
|
||||
for (int i = 0; i < Vnt_flag_itm_.Tid__max; ++i)
|
||||
flag_ary[i] = false;
|
||||
}
|
||||
public void Parse(byte[] src, int src_bgn, int src_end) {
|
||||
this.Clear();
|
||||
Bry_split_.Split(src, Byte_ascii.Semic, true, this);
|
||||
}
|
||||
public int Split(byte[] src, int itm_bgn, int itm_end) {
|
||||
int flag_tid = flag_regy.Get_as_int_or(src, itm_bgn, itm_end, -1);
|
||||
if (flag_tid == -1) {
|
||||
int vnt_tid = vnt_regy.Get_as_int_or(src, itm_bgn, itm_end, -1);
|
||||
if (vnt_tid == -1) return Bry_split_.Rv__ok; // unknown flag; ignore
|
||||
int flag_tid = codes_regy.Get_as_int_or(src, itm_bgn, itm_end, -1);
|
||||
if (flag_tid == -1) { // try to find flags like "zh-hans", "zh-hant"; allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
|
||||
Xol_vnt_itm vnt_itm = vnt_regy.Get_by(src, itm_bgn, itm_end);
|
||||
if (vnt_itm == null) return Bry_split_.Rv__ok; // unknown flag; ignore
|
||||
langs.Add(vnt_itm);
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
flag_ary[flag_tid] = true;
|
||||
++count;
|
||||
codes.Add(flag_tid);
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
}
|
||||
|
55
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_parser_tst.java
Normal file
55
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_flag_parser_tst.java
Normal file
@ -0,0 +1,55 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.langs.vnts.*;
|
||||
public class Vnt_flag_parser_tst {
|
||||
private final Vnt_flag_parser_fxt fxt = new Vnt_flag_parser_fxt();
|
||||
@Test public void Basic() {fxt.Test_parse("D" , "D");}
|
||||
@Test public void Multiple() {fxt.Test_parse("+;S;E" , "+;S;E");}
|
||||
@Test public void Ws() {fxt.Test_parse(" + ; S ; E " , "+;S;E");}
|
||||
@Test public void None() {fxt.Test_parse("" , "S");}
|
||||
@Test public void Wrong() {fxt.Test_parse("XYZ" , "S");}
|
||||
@Test public void Raw__limit() {fxt.Test_parse("R;S" , "R");}
|
||||
@Test public void Name__limit() {fxt.Test_parse("N;S" , "N");}
|
||||
@Test public void Del_limit() {fxt.Test_parse("-;S" , "-");}
|
||||
@Test public void Title__also_macro_y() {fxt.Test_parse("T" , "H;T");}
|
||||
@Test public void Title__also_macro_n() {fxt.Test_parse("T;S" , "S;T");}
|
||||
@Test public void Hide__remove_all() {fxt.Test_parse("H;S" , "+;H");}
|
||||
@Test public void Hide__keep_descrip() {fxt.Test_parse("H;S;D" , "+;H;D");}
|
||||
@Test public void Hide__keep_title() {fxt.Test_parse("H;S;T" , "+;H;T");}
|
||||
@Test public void Aout__also_show_all() {fxt.Test_parse("A" , "+;A;S");}
|
||||
@Test public void Descrip__remove_show() {fxt.Test_parse("D;S" , "D");}
|
||||
@Test public void Aout_w_descrip() {fxt.Test_parse("A;D;S" , "+;A;D");}
|
||||
@Test public void Lang__one() {fxt.Test_parse("zh-hans" , "S;zh-hans");}
|
||||
@Test public void Lang__many() {fxt.Test_parse("zh-cn;zh-hk" , "S;zh-cn;zh-hk");}
|
||||
@Test public void Lang__many__ws() {fxt.Test_parse(" zh-cn ; zh-hk " , "S;zh-cn;zh-hk");}
|
||||
@Test public void Lang__zap__codes() {fxt.Test_parse("+;S;zh-hans;" , "zh-hans");}
|
||||
}
|
||||
class Vnt_flag_parser_fxt {
|
||||
private final Vnt_flag_parser parser = new Vnt_flag_parser();
|
||||
private final Vnt_flag_code_mgr codes = new Vnt_flag_code_mgr(); private final Vnt_flag_lang_mgr langs = new Vnt_flag_lang_mgr();
|
||||
private final Xol_vnt_regy vnt_regy = Xol_vnt_regy_fxt.new_chinese();
|
||||
private final Bry_bfr bfr = Bry_bfr.new_();
|
||||
public void Test_parse(String raw, String expd) {
|
||||
byte[] src = Bry_.new_u8(raw);
|
||||
parser.Parse(codes, langs, vnt_regy, src, 0, src.length);
|
||||
codes.To_bfr__dbg(bfr);
|
||||
langs.To_bfr__dbg(bfr);
|
||||
Tfds.Eq_str(expd, bfr.Xto_str_and_clear());
|
||||
}
|
||||
}
|
74
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_html_doc_wkr.java
Normal file
74
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_html_doc_wkr.java
Normal file
@ -0,0 +1,74 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.xndes.*;
|
||||
import gplx.xowa.langs.vnts.converts.*;
|
||||
import gplx.xowa.html.*;
|
||||
class Vnt_html_doc_wkr implements Mwh_doc_wkr {
|
||||
private final Hash_adp_bry atr_hash = Hash_adp_bry.ci_a7();
|
||||
private Bry_bfr bfr;
|
||||
private final Xol_convert_mgr convert_mgr; private int convert_vnt_idx;
|
||||
public Vnt_html_doc_wkr(Xol_convert_mgr convert_mgr) {
|
||||
this.convert_mgr = convert_mgr;
|
||||
atr_hash.Add_many_str("title", "alt");
|
||||
}
|
||||
public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw();
|
||||
public void Init(Bry_bfr bfr, int convert_vnt_idx) {this.bfr = bfr; this.convert_vnt_idx = convert_vnt_idx;}
|
||||
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {
|
||||
int val_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
|
||||
int val_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
|
||||
if ( atr_hash.Get_by_mid(key_bry, 0, key_bry.length) == null // title, alt
|
||||
|| !key_exists
|
||||
|| Bry_find_.Find_fwd(src, Bry__url_frag, val_bgn, val_end) != Bry_find_.Not_found
|
||||
) { // handle name-only attribs like "<span title>"
|
||||
int atr_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_atr_bgn];
|
||||
int atr_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_atr_end];
|
||||
bfr.Add_mid(src, atr_bgn, atr_end);
|
||||
}
|
||||
else {
|
||||
bfr.Add_byte_space();
|
||||
bfr.Add(key_bry);
|
||||
bfr.Add_byte(Byte_ascii.Eq);
|
||||
byte quote_byte = Mwh_atr_itm.Calc_qte_byte(itm_ary, itm_idx);
|
||||
bfr.Add_byte(quote_byte);
|
||||
bfr.Add(convert_mgr.Convert_text(convert_vnt_idx, src, val_bgn, val_end));
|
||||
bfr.Add_byte(quote_byte);
|
||||
}
|
||||
}
|
||||
public void On_txt_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {
|
||||
switch (nde_tid) {
|
||||
case Xop_xnde_tag_.Tid_code:
|
||||
case Xop_xnde_tag_.Tid_script:
|
||||
case Xop_xnde_tag_.Tid_pre:
|
||||
bfr.Add_mid(src, itm_bgn, itm_end);
|
||||
break;
|
||||
default:
|
||||
bfr.Add(convert_mgr.Convert_text(convert_vnt_idx, src, itm_bgn, itm_end));
|
||||
break;
|
||||
}
|
||||
}
|
||||
public void On_nde_head_bgn(Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end) {
|
||||
bfr.Add_byte(Byte_ascii.Angle_bgn).Add_mid(src, key_bgn, key_end); // EX: "<span"
|
||||
}
|
||||
public void On_nde_head_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline) {
|
||||
bfr.Add(inline ? Xoh_consts.__inline : Xoh_consts.__end); // add "/>" or ">"
|
||||
}
|
||||
public void On_nde_tail_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);}
|
||||
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);}
|
||||
private static final byte[] Bry__url_frag = Bry_.new_a7("://"); // REF.MW: if ( !strpos( $attr, '://' ) ) {
|
||||
}
|
@ -1,110 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
public class Vnt_language_converter {
|
||||
private final Bry_bfr bfr = Bry_bfr.new_();
|
||||
private int max_depth = 32;
|
||||
private byte[] src; private int src_len;
|
||||
private boolean convert_needed;
|
||||
private int pos;
|
||||
public byte[] Parse(byte[] vnt, byte[] src) {// REF.MW:/languages/LanguageConverter.php!recursiveConvertTopLevel
|
||||
synchronized (bfr) {
|
||||
int markup_count = 0;
|
||||
this.pos = 0;
|
||||
this.convert_needed = false; // for sr lang; SEE:LanguageSr.php !$this->guessVariant(src, vnt);
|
||||
this.src = src; this.src_len = src.length;
|
||||
while (pos < src_len) {
|
||||
int curly_bgn = Bry_find_.Find_fwd(src, Bry__curly_bgn, pos, src_len);
|
||||
if (curly_bgn == Bry_find_.Not_found) { // No more markup, append final segment
|
||||
if (markup_count == 0) return src; // no markups found; just return original
|
||||
Add_output(vnt, convert_needed, src, pos, src_len);
|
||||
return bfr.Xto_bry_and_clear();
|
||||
}
|
||||
bfr.Add_mid(src, pos, curly_bgn); // Markup found; append segment
|
||||
Add_output(vnt, convert_needed, src, pos, src_len);
|
||||
pos = curly_bgn; // Advance position
|
||||
++markup_count;
|
||||
Parse_recursive(vnt, pos, 1); // Do recursive conversion
|
||||
}
|
||||
return bfr.Xto_bry_and_clear();
|
||||
}
|
||||
}
|
||||
private void Parse_recursive(byte[] vnt, int pos, int depth) {
|
||||
pos += 2; // skip "-{"
|
||||
boolean warning_done = false;
|
||||
// $inner = '';
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
Object o = trie.Match_bgn_w_byte(b, src,pos, src_len);
|
||||
if (o == null) { // char;
|
||||
++pos;
|
||||
continue;
|
||||
}
|
||||
int new_pos = trie.Match_pos(); // Markup found; Append initial segment
|
||||
bfr.Add_mid(src, pos, new_pos);
|
||||
pos = new_pos; // Advance position
|
||||
switch (((Byte_obj_val)o).Val()) {
|
||||
case Tid__curly_bgn:
|
||||
if (depth >= max_depth) {
|
||||
bfr.Add(Bry__curly_bgn);
|
||||
if (!warning_done) {
|
||||
bfr.Add_str("<span class=\"error\">");
|
||||
// wfMessage('language-converter-depth-warning')->numParams($this->mMaxDepth)->inContentLanguage()->text()
|
||||
bfr.Add_str("</span>");
|
||||
warning_done = true;
|
||||
}
|
||||
pos += 2; // skip "-{"
|
||||
continue;
|
||||
}
|
||||
Parse_recursive(vnt, pos, depth + 1); // Recursively parse another rule
|
||||
break;
|
||||
case Tid__curly_end:
|
||||
pos += 2;
|
||||
/*
|
||||
// Apply the rule
|
||||
$rule = new ConverterRule($inner, $this);
|
||||
$rule->parse($variant);
|
||||
$this->applyManualConv($rule);
|
||||
return $rule->getDisplay();
|
||||
*/
|
||||
return;
|
||||
default: throw Err_.new_unhandled(-1); // never happens
|
||||
}
|
||||
}
|
||||
if (pos < src_len) { // Unclosed rule
|
||||
byte[] frag = Auto_convert(vnt, src, pos, src_len);
|
||||
bfr.Add(Bry__curly_bgn).Add(frag);
|
||||
}
|
||||
pos = src_len;
|
||||
}
|
||||
private void Add_output(byte[] vnt, boolean convert_needed, byte[] src, int pos, int src_len) {
|
||||
if (convert_needed) {
|
||||
byte[] frag = Auto_convert(vnt, src, pos, src_len);
|
||||
bfr.Add(frag);
|
||||
}
|
||||
else
|
||||
bfr.Add_mid(src, pos, src_len);
|
||||
}
|
||||
private byte[] Auto_convert(byte[] vnt, byte[] src, int bgn, int end) {return src;}
|
||||
private static final byte Tid__curly_bgn = 1, Tid__curly_end = 2;
|
||||
private static final byte[] Bry__curly_bgn = Bry_.new_a7("-{"), Bry__curly_end = Bry_.new_a7("}-");
|
||||
private static final Btrie_fast_mgr trie = Btrie_fast_mgr.cs()
|
||||
.Add_bry_byte(Bry__curly_bgn, Tid__curly_bgn)
|
||||
.Add_bry_byte(Bry__curly_end, Tid__curly_end);
|
||||
}
|
68
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_bidi_mgr.java
Normal file
68
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_bidi_mgr.java
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Vnt_rule_bidi_mgr {
|
||||
private final Ordered_hash hash = Ordered_hash_.new_bry_();
|
||||
public int Len() {return hash.Count();}
|
||||
public boolean Has_none() {return hash.Count() == 0;}
|
||||
public void Clear() {hash.Clear();}
|
||||
public Vnt_rule_bidi_itm Get_at(int i) {return (Vnt_rule_bidi_itm)hash.Get_at(i);}
|
||||
public Vnt_rule_bidi_itm Get_by(byte[] k) {return (Vnt_rule_bidi_itm)hash.Get_by(k);}
|
||||
public byte[] Get_text_by_ary_or_null(byte[]... ary) {
|
||||
int len = ary.length;
|
||||
byte[] rv = null;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
byte[] itm = ary[i];
|
||||
Vnt_rule_bidi_itm bidi_itm = (Vnt_rule_bidi_itm)hash.Get_by(itm); if (bidi_itm == null) continue;
|
||||
rv = Get_text_by_key_or_null(bidi_itm.Vnt());
|
||||
if (rv != null) return rv;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public byte[] Get_text_by_key_or_null(byte[] vnt) {
|
||||
Vnt_rule_bidi_itm rv = (Vnt_rule_bidi_itm)hash.Get_by(vnt);
|
||||
return rv == null ? null : rv.Text();
|
||||
}
|
||||
public byte[] Get_text_at(int i) {
|
||||
Vnt_rule_bidi_itm itm = (Vnt_rule_bidi_itm)hash.Get_at(i);
|
||||
return itm == null ? null : itm.Text();
|
||||
}
|
||||
public void Set(byte[] vnt, byte[] text) {
|
||||
Vnt_rule_bidi_itm itm = (Vnt_rule_bidi_itm)hash.Get_by(vnt);
|
||||
if (itm == null) {
|
||||
itm = new Vnt_rule_bidi_itm(vnt, text);
|
||||
hash.Add(vnt, itm);
|
||||
}
|
||||
else
|
||||
itm.Text_(text);
|
||||
}
|
||||
public void To_bry__dbg(Bry_bfr bfr) {
|
||||
int len = hash.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
if (i != 0) bfr.Add_byte_nl();
|
||||
Vnt_rule_bidi_itm itm = (Vnt_rule_bidi_itm)hash.Get_at(i);
|
||||
bfr.Add(itm.Vnt()).Add_byte_colon().Add(itm.Text());
|
||||
}
|
||||
}
|
||||
}
|
||||
class Vnt_rule_bidi_itm {
|
||||
public Vnt_rule_bidi_itm(byte[] vnt, byte[] text) {this.vnt = vnt; this.text = text;}
|
||||
public byte[] Vnt() {return vnt;} private final byte[] vnt;
|
||||
public byte[] Text() {return text;} private byte[] text;
|
||||
public void Text_(byte[] v) {this.text = v;}
|
||||
}
|
@ -17,45 +17,76 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
class Vnt_rule_parser implements gplx.core.brys.Bry_split_wkr {
|
||||
// private final Btrie_slim_mgr vnt_trie = Btrie_slim_mgr.ci_a7();
|
||||
public void Parse(byte[] src, int src_bgn, int src_end) {
|
||||
Bry_split_.Split(src, Byte_ascii.Semic, false, this); // trim=false for "&#entity;" check below
|
||||
private final Btrie_slim_mgr vnt_trie = Btrie_slim_mgr.ci_a7();
|
||||
private Vnt_rule_undi_mgr undis; private Vnt_rule_bidi_mgr bidis;
|
||||
private int src_end, src_len; private byte[] rule_raw;
|
||||
public byte[] Raw() {return rule_raw;}
|
||||
public void Init(Xol_vnt_regy vnt_regy) {
|
||||
this.vnt_trie.Clear();
|
||||
int len = vnt_regy.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xol_vnt_itm itm = (Xol_vnt_itm)vnt_regy.Get_at(i);
|
||||
vnt_trie.Add_obj(itm.Key(), itm);
|
||||
}
|
||||
}
|
||||
public int Split(byte[] src, int itm_bgn, int itm_end) {
|
||||
int html_entity_pos = Bry_find_.Find_bwd_while_alphanum(src, itm_bgn);
|
||||
if (Bry_.Eq(src, html_entity_pos - 2, html_entity_pos, Bry__html_entity)) return Bry_split_.Rv__extend; // reject "&#entity;"; EX: " zh-hans;"
|
||||
/*
|
||||
itm_bgn = skip fwd for ws;
|
||||
itm_bgn = skip fwd for "=>"
|
||||
Object vnt_obj = vnt_trie.Match_bgn(src, itm_bgn, itm_end); if (vnt_obj == null) return Bry_split_.Rv__extend; // reject ";not_variant"; EX: ";border" in "zh-hans:<span style='color:blue;border:1px;'>;zh-hant:"
|
||||
itm_end = skip bwd for ws
|
||||
// val = trim( val[0] );
|
||||
// trg = trim( val[1] );
|
||||
// $u = explode( '=>', val, 2 );
|
||||
// // if trg is empty, strtr() could return a wrong result
|
||||
// if ( count( $u ) == 1 && trg && in_array( val, $variants ) ) {
|
||||
// bidi_ary[val] = trg;
|
||||
// } elseif ( count( $u ) == 2 ) {
|
||||
// $from = trim( $u[0] );
|
||||
// val = trim( $u[1] );
|
||||
// if ( array_key_exists( val, $unidtable )
|
||||
// && !is_array( $unidtable[val] )
|
||||
// && trg
|
||||
// && in_array( val, $variants ) ) {
|
||||
// $unidtable[val] = array( $from => trg );
|
||||
// } elseif ( trg && in_array( val, $variants ) ) {
|
||||
// $unidtable[val][$from] = trg;
|
||||
// }
|
||||
// }
|
||||
// // syntax error, pass
|
||||
// if ( !isset( $this->mConverter->mVariantNames[val] ) ) {
|
||||
// bidi_ary = array();
|
||||
// $unidtable = array();
|
||||
// break;
|
||||
// }
|
||||
*/
|
||||
public void Clear(Vnt_rule_undi_mgr undis, Vnt_rule_bidi_mgr bidis, byte[] rule_raw) {
|
||||
this.undis = undis; this.bidis = bidis;
|
||||
undis.Clear(); bidis.Clear();
|
||||
this.rule_raw = rule_raw;
|
||||
}
|
||||
public void Parse(byte[] src, int src_bgn, int src_end) {
|
||||
this.src_end = src_end; this.src_len = src.length;
|
||||
Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Semic, false, this); // trim=false for "&#entity;" check below
|
||||
}
|
||||
public int Split(byte[] src, int itm_bgn, int itm_end) { // macro=>zh-hans:text;
|
||||
int html_entity_pos = Bry_find_.Find_bwd_while_alphanum(src, itm_end);
|
||||
byte html_entity_byte = src[html_entity_pos];
|
||||
if (html_entity_byte == Byte_ascii.Hash) html_entity_byte = src[html_entity_pos - 2]; // skip #; EX: {
|
||||
if (html_entity_byte == Byte_ascii.Amp) return Bry_split_.Rv__extend; // reject "&#entity;"; EX: " zh-hans;"
|
||||
if (itm_end != src_end) {
|
||||
int nxt_lang_bgn = Bry_find_.Find_fwd(src, Bry__bidi_dlm, itm_end + 1, src_len); // look for next "=>"
|
||||
if (nxt_lang_bgn == Bry_find_.Not_found)
|
||||
nxt_lang_bgn = Bry_find_.Find_fwd_while_ws(src, itm_end + 1, src_len); // skip any ws after end ";"; EX: "a:1; b:2"; NOTE: +1 to skip semic;
|
||||
else
|
||||
nxt_lang_bgn += 2;
|
||||
int nxt_lang_end = Bry_find_.Find_fwd(src, Byte_ascii.Colon, nxt_lang_bgn, src_len); // get colon;
|
||||
if (nxt_lang_end != Bry_find_.Not_found) {
|
||||
nxt_lang_end = Bry_find_.Find_bwd__skip_ws(src, nxt_lang_end, src_len); // trim
|
||||
if (vnt_trie.Match_bgn(src, nxt_lang_bgn, nxt_lang_end) == null) return Bry_split_.Rv__extend; // reject ";not_variant"; EX: ";border" in "zh-hans:<span style='color:blue;border:1px;'>;zh-hant:"
|
||||
}
|
||||
}
|
||||
int undi_bgn = Bry_find_.Find_fwd_while_ws(src, itm_bgn, itm_end); // skip any ws after bgn ";"; EX: " a=>b:c;"
|
||||
int undi_end = Bry_find_.Find_fwd(src, Bry__bidi_dlm, undi_bgn, itm_end); // look for "=>"
|
||||
int lang_bgn = undi_bgn; // default lang_bgn to undi_bgn; assumes no bidi found
|
||||
if (undi_end != Bry_find_.Not_found) { // "=>" found; bidi exists
|
||||
lang_bgn = Bry_find_.Find_fwd_while_ws(src, undi_end + 2, itm_end); // set lang_bgn after => and gobble up ws
|
||||
undi_end = Bry_find_.Find_bwd__skip_ws(src, undi_end, undi_bgn); // trim ws from end of bd;
|
||||
}
|
||||
Object vnt_obj = vnt_trie.Match_bgn(src, lang_bgn, itm_end);
|
||||
if (vnt_obj == null) {
|
||||
return (itm_bgn == 0) ? Bry_split_.Rv__cancel : Bry_split_.Rv__extend; // if 1st item; cancel rest; otherwise, extend
|
||||
}
|
||||
int lang_end = vnt_trie.Match_pos();
|
||||
int text_bgn = Bry_find_.Find_fwd_while_ws(src, lang_end, itm_end); if (src[text_bgn] != Byte_ascii.Colon) return Bry_split_.Rv__extend;
|
||||
++text_bgn;
|
||||
Xol_vnt_itm vnt_itm = (Xol_vnt_itm)vnt_obj;
|
||||
byte[] vnt_key = vnt_itm.Key();
|
||||
byte[] text_bry = Bry_.Mid_w_trim(src, text_bgn, itm_end);
|
||||
if (undi_end == Bry_find_.Not_found)
|
||||
bidis.Set(vnt_key, text_bry);
|
||||
else {
|
||||
byte[] undi_bry = Bry_.Mid(src, undi_bgn, undi_end);
|
||||
if (itm_end - text_bgn > 0)
|
||||
undis.Set(vnt_key, undi_bry, text_bry);
|
||||
}
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
private static final byte[] Bry__html_entity = Bry_.new_a7("&#");
|
||||
public void To_bry__dbg(Bry_bfr bfr) {
|
||||
undis.To_bry__dbg(bfr);
|
||||
if (bfr.Len_gt_0()) bfr.Add_byte_nl();
|
||||
bidis.To_bry__dbg(bfr);
|
||||
}
|
||||
private static final byte[] Bry__bidi_dlm = Bry_.new_a7("=>");
|
||||
}
|
||||
|
@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Vnt_rule_parser__bidi_tst {
|
||||
private final Vnt_rule_parser_fxt fxt = new Vnt_rule_parser_fxt();
|
||||
@Test public void Basic() {fxt.Test_parse("x1:v1;" , "x1:v1");}
|
||||
@Test public void Ws() {fxt.Test_parse(" x1 : v1 ;" , "x1:v1");}
|
||||
@Test public void Entity() {fxt.Test_parse("x1:a x2:b;x2:b;" , "x1:a x2:b" , "x2:b");}
|
||||
@Test public void Unknown__nth() {fxt.Test_parse("x1:a;wx2:b;x2:b;" , "x1:a;wx2:b" , "x2:b");}
|
||||
@Test public void Unknown__1st() {fxt.Test_parse("wx1:a;x1:b;" , "");}
|
||||
}
|
@ -0,0 +1,24 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Vnt_rule_parser__undi_tst {
|
||||
private final Vnt_rule_parser_fxt fxt = new Vnt_rule_parser_fxt();
|
||||
@Test public void One() {fxt.Test_parse("k1=>x1:v1;" , "x1:k1=v1");}
|
||||
@Test public void Many() {fxt.Test_parse("k1=>x1:v1;k2=>x2:v2;" , "x1:k1=v1", "x2:k2=v2");}
|
||||
}
|
37
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser_fxt.java
Normal file
37
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_parser_fxt.java
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
class Vnt_rule_parser_fxt {
|
||||
private final Vnt_rule_parser parser = new Vnt_rule_parser(); private final Vnt_rule_undi_mgr undis = new Vnt_rule_undi_mgr(); private final Vnt_rule_bidi_mgr bidis = new Vnt_rule_bidi_mgr();
|
||||
private final Bry_bfr bfr = Bry_bfr.new_(255);
|
||||
public Vnt_rule_parser_fxt() {
|
||||
Xol_vnt_regy vnt_regy = new Xol_vnt_regy();
|
||||
vnt_regy.Add(Bry_.new_a7("x1"), Bry_.new_a7("lang1"));
|
||||
vnt_regy.Add(Bry_.new_a7("x2"), Bry_.new_a7("lang2"));
|
||||
vnt_regy.Add(Bry_.new_a7("x3"), Bry_.new_a7("lang3"));
|
||||
parser.Init(vnt_regy);
|
||||
}
|
||||
public void Test_parse(String raw, String... expd_ary) {
|
||||
byte[] src = Bry_.new_u8(raw);
|
||||
parser.Clear(undis, bidis, src);
|
||||
parser.Parse(src, 0, src.length);
|
||||
parser.To_bry__dbg(bfr);
|
||||
Tfds.Eq_str_lines(String_.Concat_lines_nl_skip_last(expd_ary), bfr.Xto_str_and_clear());
|
||||
}
|
||||
}
|
80
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_undi_mgr.java
Normal file
80
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_rule_undi_mgr.java
Normal file
@ -0,0 +1,80 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Vnt_rule_undi_mgr {
|
||||
private final Ordered_hash hash = Ordered_hash_.new_bry_();
|
||||
public int Len() {return hash.Count();}
|
||||
public boolean Has_none() {return hash.Count() == 0;}
|
||||
public void Clear() {hash.Clear();}
|
||||
public Vnt_rule_undi_grp Get_at(int i) {return (Vnt_rule_undi_grp)hash.Get_at(i);}
|
||||
public Vnt_rule_undi_grp Get_by(byte[] key) {return (Vnt_rule_undi_grp)hash.Get_by(key);}
|
||||
public byte[] Get_text_by_key_or_null(byte[] key) {
|
||||
Vnt_rule_undi_grp grp = (Vnt_rule_undi_grp)hash.Get_by(key); if (grp == null) return null;
|
||||
return grp.Len() == 0 ? null : grp.Get_at(0).Trg(); // REF.MW: $disp = $disp[0];
|
||||
}
|
||||
public byte[] Get_text_at(int i) {
|
||||
Vnt_rule_undi_grp grp = (Vnt_rule_undi_grp)hash.Get_at(i); if (grp == null) return null;
|
||||
return grp.Len() == 0 ? null : grp.Get_at(0).Trg();
|
||||
}
|
||||
public Vnt_rule_undi_grp Set(byte[] vnt, byte[] src, byte[] trg) {
|
||||
Vnt_rule_undi_grp grp = (Vnt_rule_undi_grp)hash.Get_by(vnt);
|
||||
if (grp == null) {
|
||||
grp = new Vnt_rule_undi_grp(vnt);
|
||||
hash.Add(vnt, grp);
|
||||
}
|
||||
grp.Set(src, trg);
|
||||
return grp;
|
||||
}
|
||||
public void To_bry__dbg(Bry_bfr bfr) {
|
||||
int len = hash.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
if (i != 0) bfr.Add_byte_nl();
|
||||
Vnt_rule_undi_grp grp = (Vnt_rule_undi_grp)hash.Get_at(i);
|
||||
bfr.Add(grp.Vnt()).Add_byte_colon();
|
||||
grp.To_bry__dbg(bfr);
|
||||
}
|
||||
}
|
||||
}
|
||||
class Vnt_rule_undi_grp {
|
||||
private final Ordered_hash hash = Ordered_hash_.new_bry_();
|
||||
public Vnt_rule_undi_grp(byte[] vnt) {this.vnt = vnt;}
|
||||
public int Len() {return hash.Count();}
|
||||
public Vnt_rule_undi_itm Get_at(int i) {return (Vnt_rule_undi_itm)hash.Get_at(i);}
|
||||
public byte[] Vnt() {return vnt;} private final byte[] vnt;
|
||||
public Vnt_rule_undi_itm Set(byte[] src, byte[] trg) {
|
||||
Vnt_rule_undi_itm itm = (Vnt_rule_undi_itm)hash.Get_by(src);
|
||||
if (itm == null) {
|
||||
itm = new Vnt_rule_undi_itm(src, trg);
|
||||
hash.Add(src, itm);
|
||||
}
|
||||
return itm;
|
||||
}
|
||||
public void To_bry__dbg(Bry_bfr bfr) {
|
||||
int len = hash.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Vnt_rule_undi_itm itm = (Vnt_rule_undi_itm)hash.Get_at(i);
|
||||
bfr.Add(itm.Src()).Add_byte_eq().Add(itm.Trg());
|
||||
}
|
||||
}
|
||||
}
|
||||
class Vnt_rule_undi_itm {
|
||||
public Vnt_rule_undi_itm(byte[] src, byte[] trg) {this.src = src; this.trg = trg;}
|
||||
public byte[] Src() {return src;} private final byte[] src;
|
||||
public byte[] Trg() {return trg;} private byte[] trg;
|
||||
public void Trg_(byte[] v) {this.trg = v;}
|
||||
}
|
@ -25,7 +25,7 @@ class Xop_vnt_lang_bldr { // performant way of building langs; EX: -{zh;zh-hans;
|
||||
public void Add(byte[] key) {
|
||||
Xol_vnt_itm vnt = vnt_regy.Get_by(key); if (vnt == null) return; // ignore invalid vnts; EX: -{zh;zhx}-
|
||||
int vnt_mask = vnt.Mask__vnt();
|
||||
this.rslt_mask = (rslt_mask == 0) ? vnt_mask : Enm_.Flip_int(true, rslt_mask, vnt_mask);
|
||||
this.rslt_mask = (rslt_mask == 0) ? vnt_mask : Bitmask_.Flip_int(true, rslt_mask, vnt_mask);
|
||||
}
|
||||
public Xop_vnt_flag Bld() {
|
||||
return (rslt_mask == 0) ? Xop_vnt_flag_.Flag_unknown : Xop_vnt_flag.new_lang(rslt_mask);
|
||||
|
@ -53,7 +53,7 @@ class Xop_vnt_tkn_mok {
|
||||
}
|
||||
public Xop_vnt_tkn_mok Flags_none_() {flags_list.Clear(); return this;}
|
||||
public Xop_vnt_tkn_mok Flags_unknown_(String... v) {flags_list.Add(Xop_vnt_flag_.Flag_unknown); return this;}
|
||||
public Xop_vnt_tkn_mok Flags_langs_(int... ary) {flags_list.Add(Xop_vnt_flag.new_lang(Enm_.Add_int_ary(ary))); return this;}
|
||||
public Xop_vnt_tkn_mok Flags_langs_(int... ary) {flags_list.Add(Xop_vnt_flag.new_lang(Bitmask_.Add_int_ary(ary))); return this;}
|
||||
public Xop_vnt_tkn_mok Flags_codes_(String... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
@ -122,7 +122,7 @@ class Xop_vnt_lxr_fxt {
|
||||
int itm_mask = itm.Mask();
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
int mask = gplx.core.brys.Bit_.Get_flag(i);
|
||||
if (Enm_.Has_int(mask, itm_mask)) {
|
||||
if (Bitmask_.Has_int(mask, itm_mask)) {
|
||||
Xol_vnt_itm vnt = vnt_regy.Get_at(i);
|
||||
bfr.Add(vnt.Key()).Add_byte(Byte_ascii.Semic);
|
||||
}
|
||||
|
@ -18,7 +18,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xop_xatr_parser_tst {
|
||||
@Test public void Kv_quote_double() {fxt.tst_("a=\"b\"", fxt.new_atr_("a", "b"));} private Xop_xatr_parser_fxt fxt = new Xop_xatr_parser_fxt();
|
||||
private final Xop_xatr_parser_fxt fxt = new Xop_xatr_parser_fxt();
|
||||
@Test public void Kv_quote_double() {fxt.tst_("a=\"b\"", fxt.new_atr_("a", "b"));}
|
||||
@Test public void Kv_quote_single() {fxt.tst_("a='b'", fxt.new_atr_("a", "b"));}
|
||||
@Test public void Kv_quote_none() {fxt.tst_("a=b", fxt.new_atr_("a", "b"));}
|
||||
@Test public void Kv_empty() {fxt.tst_("a=''", fxt.new_atr_("a", ""));}
|
||||
|
@ -49,12 +49,12 @@ class Xop_xatr_whitelist_fxt {
|
||||
public void Clear() {
|
||||
if (whitelist_mgr == null) whitelist_mgr = new Xop_xatr_whitelist_mgr().Ini();
|
||||
} private Xop_xatr_whitelist_mgr whitelist_mgr;
|
||||
public void Whitelist(byte tag_id, String key_str, boolean expd) {
|
||||
public void Whitelist(int tag_id, String key_str, boolean expd) {
|
||||
byte[] key_bry = Bry_.new_a7(key_str);
|
||||
atr_itm.Key_rng_(0, key_bry.length);
|
||||
Tfds.Eq(expd, whitelist_mgr.Chk(tag_id, key_bry, atr_itm), key_str);
|
||||
} private Xop_xatr_itm atr_itm = new Xop_xatr_itm(0, 0);
|
||||
public void Whitelist(byte tag_id, String key_str, String val_str, boolean expd) {
|
||||
public void Whitelist(int tag_id, String key_str, String val_str, boolean expd) {
|
||||
byte[] key_bry = Bry_.new_a7(key_str);
|
||||
atr_itm.Key_rng_(0, key_bry.length);
|
||||
atr_itm.Val_bry_(Bry_.new_a7(val_str));
|
||||
|
@ -22,32 +22,33 @@ public class Xop_xnde_tag {
|
||||
this.id = id;
|
||||
this.name_bry = Bry_.new_a7(name_str);
|
||||
this.name_str = name_str;
|
||||
name_len = name_bry.length;
|
||||
xtn_end_tag = Bry_.Add(Xop_xnde_tag_.XtnEndTag_bgn, name_bry); // always force endtag; needed for <noinclude>
|
||||
xtn_end_tag_tmp = new byte[xtn_end_tag.length]; Array_.Copy(xtn_end_tag, xtn_end_tag_tmp);
|
||||
this.name_len = name_bry.length;
|
||||
this.xtn_end_tag = Bry_.Add(Xop_xnde_tag_.Xtn_end_tag_bgn, name_bry); // always force endtag; needed for <noinclude>
|
||||
this.xtn_end_tag_tmp = new byte[xtn_end_tag.length]; Array_.Copy(xtn_end_tag, xtn_end_tag_tmp);
|
||||
}
|
||||
public int Id() {return id;} public Xop_xnde_tag Id_(int v) {id = v; return this;} private int id;
|
||||
public byte[] Name_bry() {return name_bry;} private byte[] name_bry;
|
||||
public String Name_str() {return name_str;} private String name_str;
|
||||
public int Name_len() {return name_len;} private int name_len;
|
||||
public int Id() {return id;} private final int id;
|
||||
public byte[] Name_bry() {return name_bry;} private final byte[] name_bry;
|
||||
public String Name_str() {return name_str;} private final String name_str;
|
||||
public int Name_len() {return name_len;} private final int name_len;
|
||||
public byte[] Xtn_end_tag() {return xtn_end_tag;} private final byte[] xtn_end_tag;
|
||||
public byte[] Xtn_end_tag_tmp() {return xtn_end_tag_tmp;} private final byte[] xtn_end_tag_tmp;
|
||||
public boolean Xtn() {return xtn;} public Xop_xnde_tag Xtn_() {xtn = true; return this;} private boolean xtn;
|
||||
public boolean Xtn_mw() {return xtn_mw;} public Xop_xnde_tag Xtn_mw_() {xtn_mw = true; xtn = true; return this;} private boolean xtn_mw; // NOTE: Xtn_mw_() marks both xtn and xtn_mw as true
|
||||
public byte[] XtnEndTag() {return xtn_end_tag;} private byte[] xtn_end_tag;
|
||||
public byte[] XtnEndTag_tmp() {return xtn_end_tag_tmp;} private byte[] xtn_end_tag_tmp;
|
||||
public int BgnNdeMode() {return bgnNdeMode;} private int bgnNdeMode = Xop_xnde_tag_.BgnNdeMode_normal;
|
||||
public Xop_xnde_tag BgnNdeMode_inline_() {bgnNdeMode = Xop_xnde_tag_.BgnNdeMode_inline; return this;}
|
||||
public int EndNdeMode() {return endNdeMode;} private int endNdeMode = Xop_xnde_tag_.EndNdeMode_normal;
|
||||
public Xop_xnde_tag EndNdeMode_inline_() {endNdeMode = Xop_xnde_tag_.EndNdeMode_inline; return this;}
|
||||
public Xop_xnde_tag EndNdeMode_escape_() {endNdeMode = Xop_xnde_tag_.EndNdeMode_escape; return this;}
|
||||
public boolean SingleOnly() {return singleOnly;} public Xop_xnde_tag SingleOnly_() {singleOnly = true; return this;} private boolean singleOnly;
|
||||
public boolean TblSub() {return tblSub;} public Xop_xnde_tag TblSub_() {tblSub = true; return this;} private boolean tblSub;
|
||||
public int Bgn_nde_mode() {return bgn_nde_mode;} private int bgn_nde_mode = Xop_xnde_tag_.Bgn_nde_mode_normal;
|
||||
public Xop_xnde_tag Bgn_nde_mode_inline_() {bgn_nde_mode = Xop_xnde_tag_.Bgn_nde_mode_inline; return this;}
|
||||
public int End_nde_mode() {return end_nde_mode;} private int end_nde_mode = Xop_xnde_tag_.End_nde_mode_normal;
|
||||
public Xop_xnde_tag End_nde_mode_inline_() {end_nde_mode = Xop_xnde_tag_.End_nde_mode_inline; return this;}
|
||||
public Xop_xnde_tag End_nde_mode_escape_() {end_nde_mode = Xop_xnde_tag_.End_nde_mode_escape; return this;}
|
||||
public boolean Single_only() {return single_only;} public Xop_xnde_tag Single_only_() {single_only = true; return this;} private boolean single_only;
|
||||
public boolean Tbl_sub() {return tbl_sub;} public Xop_xnde_tag Tbl_sub_() {tbl_sub = true; return this;} private boolean tbl_sub;
|
||||
public boolean Restricted() {return restricted;} public Xop_xnde_tag Restricted_() {restricted = true; return this;} private boolean restricted;
|
||||
public boolean NoInline() {return noInline;} public Xop_xnde_tag NoInline_() {noInline = true; return this;} private boolean noInline;
|
||||
public boolean No_inline() {return no_inline;} public Xop_xnde_tag No_inline_() {no_inline = true; return this;} private boolean no_inline;
|
||||
public boolean Inline_by_backslash() {return inline_by_backslash;} public Xop_xnde_tag Inline_by_backslash_() {inline_by_backslash = true; return this;} private boolean inline_by_backslash;
|
||||
public boolean Section() {return section;} public Xop_xnde_tag Section_() {section = true; return this;} private boolean section;
|
||||
public boolean Repeat_ends() {return repeat_ends;} public Xop_xnde_tag Repeat_ends_() {repeat_ends = true; return this;} private boolean repeat_ends;
|
||||
public boolean Repeat_mids() {return repeat_mids;} public Xop_xnde_tag Repeat_mids_() {repeat_mids = true; return this;} private boolean repeat_mids;
|
||||
public boolean Empty_ignored() {return empty_ignored;} public Xop_xnde_tag Empty_ignored_() {empty_ignored = true; return this;} private boolean empty_ignored;
|
||||
public boolean Single_only_html() {return single_only_html;} public Xop_xnde_tag Single_only_html_() {single_only_html = true; return this;} private boolean single_only_html;
|
||||
public boolean Raw() {return raw;} public Xop_xnde_tag Raw_() {raw = true; return this;} private boolean raw;
|
||||
public static final byte Block_noop = 0, Block_bgn = 1, Block_end = 2;
|
||||
public byte Block_open() {return block_open;} private byte block_open = Block_noop;
|
||||
|
@ -18,12 +18,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.langs.*;
|
||||
public class Xop_xnde_tag_ {
|
||||
public static final int EndNdeMode_normal = 0, EndNdeMode_inline = 1, EndNdeMode_escape = 2; // escape is for hr which does not support </hr>
|
||||
public static final int BgnNdeMode_normal = 0, BgnNdeMode_inline = 1;
|
||||
public static final int End_nde_mode_normal = 0, End_nde_mode_inline = 1, End_nde_mode_escape = 2; // escape is for hr which does not support </hr>
|
||||
public static final int Bgn_nde_mode_normal = 0, Bgn_nde_mode_inline = 1;
|
||||
public static final byte[] Name_onlyinclude = Bry_.new_a7("onlyinclude");
|
||||
public static final byte[] XtnEndTag_bgn = Bry_.new_a7("</");//, XtnEndTag_end = Bry_.new_a7(">");
|
||||
public static final byte
|
||||
Tid_b = 0
|
||||
public static final byte[] Xtn_end_tag_bgn = Bry_.new_a7("</");//, Xtn_end_tag_end = Bry_.new_a7(">");
|
||||
public static final int
|
||||
Tid__null = -1
|
||||
, Tid_b = 0
|
||||
, Tid_strong = 1
|
||||
, Tid_i = 2
|
||||
, Tid_em = 3
|
||||
@ -147,64 +148,64 @@ public class Xop_xnde_tag_ {
|
||||
return rv;
|
||||
}
|
||||
public static final Xop_xnde_tag
|
||||
Tag_b = new_(Tid_b, "b").NoInline_()
|
||||
, Tag_strong = new_(Tid_strong, "strong").NoInline_()
|
||||
, Tag_i = new_(Tid_i, "i").NoInline_()
|
||||
, Tag_em = new_(Tid_em, "em").NoInline_()
|
||||
, Tag_cite = new_(Tid_cite, "cite").NoInline_()
|
||||
, Tag_dfn = new_(Tid_dfn, "dfn").NoInline_()
|
||||
, Tag_var = new_(Tid_var, "var").NoInline_()
|
||||
, Tag_u = new_(Tid_u, "u").NoInline_().Repeat_ends_() // PAGE:en.b:Textbook_of_Psychiatry/Alcoholism_and_Psychoactive_Substance_Use_Disorders; DATE:2014-09-05
|
||||
, Tag_ins = new_(Tid_ins, "ins").NoInline_()
|
||||
, Tag_abbr = new_(Tid_abbr, "abbr").NoInline_()
|
||||
, Tag_strike = new_(Tid_strike, "strike").NoInline_()
|
||||
, Tag_del = new_(Tid_del, "del").NoInline_()
|
||||
, Tag_s = new_(Tid_s, "s").NoInline_()
|
||||
, Tag_sub = new_(Tid_sub, "sub").NoInline_()
|
||||
, Tag_sup = new_(Tid_sup, "sup").NoInline_()
|
||||
, Tag_big = new_(Tid_big, "big").NoInline_()
|
||||
, Tag_small = new_(Tid_small, "small").NoInline_()
|
||||
, Tag_code = new_(Tid_code, "code").NoInline_().Repeat_ends_()
|
||||
, Tag_tt = new_(Tid_tt, "tt").NoInline_().Repeat_ends_()
|
||||
, Tag_kbd = new_(Tid_kbd, "kbd").NoInline_()
|
||||
, Tag_samp = new_(Tid_samp, "samp").NoInline_()
|
||||
, Tag_blockquote = new_(Tid_blockquote, "blockquote").NoInline_().Repeat_mids_().Section_().Block_open_bgn_().Block_close_end_() // NOTE: should be open_end_, but leaving for now; DATE:2014-03-11; added Repeat_mids_(); PAGE:en.w:Ring_a_Ring_o'_Roses DATE:2014-06-26
|
||||
, Tag_pre = new_(Tid_pre, "pre").NoInline_().Section_().Xtn_().Raw_().Block_open_bgn_().Block_close_end_().Ignore_empty_().Xtn_skips_template_args_()
|
||||
, Tag_font = new_(Tid_font, "font").NoInline_()
|
||||
, Tag_center = new_(Tid_center, "center").NoInline_().Block_open_end_().Block_close_end_() // removed .Repeat_ends_(); added Nest_(); EX: w:Burr Truss; DATE:2012-12-12
|
||||
, Tag_p = new_(Tid_p, "p").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
Tag_b = new_(Tid_b, "b").No_inline_()
|
||||
, Tag_strong = new_(Tid_strong, "strong").No_inline_()
|
||||
, Tag_i = new_(Tid_i, "i").No_inline_()
|
||||
, Tag_em = new_(Tid_em, "em").No_inline_()
|
||||
, Tag_cite = new_(Tid_cite, "cite").No_inline_()
|
||||
, Tag_dfn = new_(Tid_dfn, "dfn").No_inline_()
|
||||
, Tag_var = new_(Tid_var, "var").No_inline_()
|
||||
, Tag_u = new_(Tid_u, "u").No_inline_().Repeat_ends_() // PAGE:en.b:Textbook_of_Psychiatry/Alcoholism_and_Psychoactive_Substance_Use_Disorders; DATE:2014-09-05
|
||||
, Tag_ins = new_(Tid_ins, "ins").No_inline_()
|
||||
, Tag_abbr = new_(Tid_abbr, "abbr").No_inline_()
|
||||
, Tag_strike = new_(Tid_strike, "strike").No_inline_()
|
||||
, Tag_del = new_(Tid_del, "del").No_inline_()
|
||||
, Tag_s = new_(Tid_s, "s").No_inline_()
|
||||
, Tag_sub = new_(Tid_sub, "sub").No_inline_()
|
||||
, Tag_sup = new_(Tid_sup, "sup").No_inline_()
|
||||
, Tag_big = new_(Tid_big, "big").No_inline_()
|
||||
, Tag_small = new_(Tid_small, "small").No_inline_()
|
||||
, Tag_code = new_(Tid_code, "code").No_inline_().Repeat_ends_()
|
||||
, Tag_tt = new_(Tid_tt, "tt").No_inline_().Repeat_ends_()
|
||||
, Tag_kbd = new_(Tid_kbd, "kbd").No_inline_()
|
||||
, Tag_samp = new_(Tid_samp, "samp").No_inline_()
|
||||
, Tag_blockquote = new_(Tid_blockquote, "blockquote").No_inline_().Repeat_mids_().Section_().Block_open_bgn_().Block_close_end_() // NOTE: should be open_end_, but leaving for now; DATE:2014-03-11; added Repeat_mids_(); PAGE:en.w:Ring_a_Ring_o'_Roses DATE:2014-06-26
|
||||
, Tag_pre = new_(Tid_pre, "pre").No_inline_().Section_().Xtn_().Raw_().Block_open_bgn_().Block_close_end_().Ignore_empty_().Xtn_skips_template_args_()
|
||||
, Tag_font = new_(Tid_font, "font").No_inline_()
|
||||
, Tag_center = new_(Tid_center, "center").No_inline_().Block_open_end_().Block_close_end_() // removed .Repeat_ends_(); added Nest_(); EX: w:Burr Truss; DATE:2012-12-12
|
||||
, Tag_p = new_(Tid_p, "p").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_span = new_(Tid_span, "span").Section_()
|
||||
, Tag_div = new_(Tid_div, "div").Section_().Block_open_end_().Block_close_end_()
|
||||
, Tag_hr = new_(Tid_hr, "hr").SingleOnly_().BgnNdeMode_inline_().Inline_by_backslash_().EndNdeMode_escape_().Section_().Block_close_end_()
|
||||
, Tag_br = new_(Tid_br, "br").SingleOnly_().BgnNdeMode_inline_().Inline_by_backslash_().EndNdeMode_inline_().Section_()
|
||||
, Tag_h1 = new_(Tid_h1, "h1").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h2 = new_(Tid_h2, "h2").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h3 = new_(Tid_h3, "h3").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h4 = new_(Tid_h4, "h4").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h5 = new_(Tid_h5, "h5").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h6 = new_(Tid_h6, "h6").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_hr = new_(Tid_hr, "hr").Single_only_().Single_only_html_().Bgn_nde_mode_inline_().Inline_by_backslash_().End_nde_mode_escape_().Section_().Block_close_end_()
|
||||
, Tag_br = new_(Tid_br, "br").Single_only_().Single_only_html_().Bgn_nde_mode_inline_().Inline_by_backslash_().End_nde_mode_inline_().Section_()
|
||||
, Tag_h1 = new_(Tid_h1, "h1").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h2 = new_(Tid_h2, "h2").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h3 = new_(Tid_h3, "h3").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h4 = new_(Tid_h4, "h4").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h5 = new_(Tid_h5, "h5").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_h6 = new_(Tid_h6, "h6").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_li = new_(Tid_li, "li").Repeat_mids_().Empty_ignored_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_dt = new_(Tid_dt, "dt").Repeat_mids_()
|
||||
, Tag_dd = new_(Tid_dd, "dd").Repeat_mids_()
|
||||
, Tag_ol = new_(Tid_ol, "ol").NoInline_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_ul = new_(Tid_ul, "ul").NoInline_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_dl = new_(Tid_dl, "dl").NoInline_()
|
||||
, Tag_table = new_(Tid_table, "table").NoInline_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_tr = new_(Tid_tr, "tr").TblSub_().Block_open_bgn_().Block_open_end_()
|
||||
, Tag_td = new_(Tid_td, "td").TblSub_().Block_open_end_().Block_close_bgn_()
|
||||
, Tag_th = new_(Tid_th, "th").TblSub_().Block_open_end_().Block_close_bgn_()
|
||||
, Tag_ol = new_(Tid_ol, "ol").No_inline_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_ul = new_(Tid_ul, "ul").No_inline_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_dl = new_(Tid_dl, "dl").No_inline_()
|
||||
, Tag_table = new_(Tid_table, "table").No_inline_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_tr = new_(Tid_tr, "tr").Tbl_sub_().Block_open_bgn_().Block_open_end_()
|
||||
, Tag_td = new_(Tid_td, "td").Tbl_sub_().Block_open_end_().Block_close_bgn_()
|
||||
, Tag_th = new_(Tid_th, "th").Tbl_sub_().Block_open_end_().Block_close_bgn_()
|
||||
, Tag_thead = new_(Tid_thead, "thead")
|
||||
, Tag_tfoot = new_(Tid_tfoot, "tfoot")
|
||||
, Tag_tbody = new_(Tid_tbody, "tbody")
|
||||
, Tag_caption = new_(Tid_caption, "caption").NoInline_().TblSub_()
|
||||
, Tag_caption = new_(Tid_caption, "caption").No_inline_().Tbl_sub_()
|
||||
, Tag_colgroup = new_(Tid_colgroup, "colgroup")
|
||||
, Tag_col = new_(Tid_col, "col")
|
||||
, Tag_a = new_(Tid_a, "a").Restricted_()
|
||||
, Tag_img = new_(Tid_img, "img").Restricted_() // NOTE: was .Xtn() DATE:2014-11-06
|
||||
, Tag_ruby = new_(Tid_ruby, "ruby").NoInline_()
|
||||
, Tag_rt = new_(Tid_rt, "rt").NoInline_()
|
||||
, Tag_rb = new_(Tid_rb, "rb").NoInline_()
|
||||
, Tag_rp = new_(Tid_rp, "rp").NoInline_()
|
||||
, Tag_img = new_(Tid_img, "img").Single_only_html_().Restricted_() // NOTE: was .Xtn() DATE:2014-11-06
|
||||
, Tag_ruby = new_(Tid_ruby, "ruby").No_inline_()
|
||||
, Tag_rt = new_(Tid_rt, "rt").No_inline_()
|
||||
, Tag_rb = new_(Tid_rb, "rb").No_inline_()
|
||||
, Tag_rp = new_(Tid_rp, "rp").No_inline_()
|
||||
, Tag_includeonly = new_(Tid_includeonly, "includeonly")
|
||||
, Tag_noinclude = new_(Tid_noinclude, "noinclude")
|
||||
, Tag_onlyinclude = new_(Tid_onlyinclude, "onlyinclude")
|
||||
@ -245,8 +246,8 @@ public class Xop_xnde_tag_ {
|
||||
, Tag_bdi = new_(Tid_bdi, "bdi")
|
||||
, Tag_data = new_(Tid_data, "data")
|
||||
, Tag_mark = new_(Tid_mark, "mark")
|
||||
, Tag_wbr = new_(Tid_wbr, "wbr").SingleOnly_()
|
||||
, Tag_bdo = new_(Tid_bdo, "bdo").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_wbr = new_(Tid_wbr, "wbr").Single_only_().Single_only_html_()
|
||||
, Tag_bdo = new_(Tid_bdo, "bdo").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
|
||||
, Tag_listing_buy = new_(Tid_listing_buy, "buy").Xtn_mw_()
|
||||
, Tag_listing_do = new_(Tid_listing_do, "do").Xtn_mw_()
|
||||
, Tag_listing_drink = new_(Tid_listing_drink, "drink").Xtn_mw_()
|
||||
|
@ -22,11 +22,11 @@ public class Xop_xnde_tag_lang {
|
||||
lang_code = Int_obj_ref.new_(lang_code_int);
|
||||
this.name_str = name_str;
|
||||
this.name_bry = Bry_.new_u8(name_str);
|
||||
this.xtnEndTag_tmp = Bry_.Add(Xop_xnde_tag_.XtnEndTag_bgn, name_bry);
|
||||
this.xtnEndTag_tmp = Bry_.Add(Xop_xnde_tag_.Xtn_end_tag_bgn, name_bry);
|
||||
}
|
||||
public Int_obj_ref Lang_code() {return lang_code;} private Int_obj_ref lang_code;
|
||||
public String Name_str() {return name_str;} private String name_str;
|
||||
public byte[] Name_bry() {return name_bry;} private byte[] name_bry;
|
||||
public byte[] XtnEndTag_tmp() {return xtnEndTag_tmp;} private byte[] xtnEndTag_tmp;
|
||||
public byte[] Xtn_end_tag_tmp() {return xtnEndTag_tmp;} private byte[] xtnEndTag_tmp;
|
||||
public static final Xop_xnde_tag_lang _ = new Xop_xnde_tag_lang(-1, String_.Empty);
|
||||
}
|
||||
|
@ -106,7 +106,7 @@ public class Xop_xnde_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
|
||||
this.Subs_get(i).Tmpl_evaluate(ctx, src, caller, bfr);
|
||||
bfr.Add_mid(src, tag_close_bgn, tag_close_end); // write tag_end
|
||||
if (tag_close_bgn == Int_.Min_value) {// xtn is unclosed; add a </xtn> else rest of page will be gobbled; PAGE:en.w:Provinces_and_territories_of_Canada DATE:2014-11-13
|
||||
bfr.Add(tag.XtnEndTag());
|
||||
bfr.Add(tag.Xtn_end_tag());
|
||||
bfr.Add(Byte_ascii.Gt_bry);
|
||||
}
|
||||
}
|
||||
|
@ -71,10 +71,10 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
++atrs_bgn_pos; // set bgn_pos to be after ws
|
||||
break;
|
||||
case Byte_ascii.Slash: case Byte_ascii.Gt:
|
||||
case Byte_ascii.Slash: case Byte_ascii.Angle_end:
|
||||
++atrs_bgn_pos; // set bgn_pos to be after char
|
||||
break;
|
||||
case Byte_ascii.Backslash:
|
||||
case Byte_ascii.Backslash: // NOTE: MW treats \ as /; EX: <br\>" -> "<br/>
|
||||
++tag_end_pos;
|
||||
break;
|
||||
case Byte_ascii.Dollar:// handles <br$2>;
|
||||
@ -246,7 +246,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
}
|
||||
}
|
||||
int end_rhs = -1, findPos = gtPos;
|
||||
byte[] end_bry = Xop_xnde_tag_.Tag_noinclude.XtnEndTag(); int end_bry_len = end_bry.length;
|
||||
byte[] end_bry = Xop_xnde_tag_.Tag_noinclude.Xtn_end_tag(); int end_bry_len = end_bry.length;
|
||||
if (tag_is_closing) // </noinclude>; no end tag to search for; DATE:2014-05-02
|
||||
end_rhs = gtPos;
|
||||
else { // <noinclude>; search for end tag
|
||||
@ -281,7 +281,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
break;
|
||||
case Byte_ascii.Backslash: // allow <br\>; EX:w:Mosquito
|
||||
if (tag.Inline_by_backslash())
|
||||
src[tag_end_pos] = Byte_ascii.Slash;
|
||||
src[tag_end_pos] = Byte_ascii.Slash;
|
||||
break;
|
||||
case Byte_ascii.Gt: // ">" "normal" tag; noop
|
||||
break;
|
||||
@ -323,7 +323,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
|
||||
boolean tag_ignore = false;
|
||||
int tagId = tag.Id();
|
||||
if (tagId == Xop_xnde_tag_.Tid_table || tag.TblSub()) { // tbl tag; EX: <table>,<tr>,<td>,<th>
|
||||
if (tagId == Xop_xnde_tag_.Tid_table || tag.Tbl_sub()) { // tbl tag; EX: <table>,<tr>,<td>,<th>
|
||||
Tblw_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, gtPos + 1, tagId, atrs_bgn, atrs_end);
|
||||
return gtPos + 1;
|
||||
}
|
||||
@ -338,8 +338,8 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
else if (tagId == prv_xnde_tagId && tag.Repeat_mids()) { // EX: "<li>a<li>b" -> "<li>a</li><li>b"
|
||||
End_tag(ctx, root, prv_xnde, src, src_len, bgn_pos - 1, bgn_pos - 1, tagId, true, tag);
|
||||
}
|
||||
else if (tag.SingleOnly()) inline = true; // <br></br> not allowed; convert <br> to <br/> </br> will be escaped
|
||||
else if (tag.NoInline() && inline) {
|
||||
else if (tag.Single_only()) inline = true; // <br></br> not allowed; convert <br> to <br/> </br> will be escaped
|
||||
else if (tag.No_inline() && inline) {
|
||||
Xop_xnde_tkn xnde_inline = Xnde_bgn(ctx, tkn_mkr, root, tag, Xop_xnde_tkn.CloseMode_open, src, bgn_pos, open_tag_end, atrs_bgn, atrs_end, atrs);
|
||||
End_tag(ctx, root, xnde_inline, src, src_len, bgn_pos, gtPos, tagId, false, tag);
|
||||
ctx.Msg_log().Add_itm_none(Xop_xnde_log.No_inline, src, bgn_pos, gtPos);
|
||||
@ -347,7 +347,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
}
|
||||
Xop_xnde_tkn xnde = null;
|
||||
xnde = Xnde_bgn(ctx, tkn_mkr, root, tag, inline ? Xop_xnde_tkn.CloseMode_inline : Xop_xnde_tkn.CloseMode_open, src, bgn_pos, open_tag_end, atrs_bgn, atrs_end, atrs);
|
||||
if (!inline && tag.BgnNdeMode() != Xop_xnde_tag_.BgnNdeMode_inline)
|
||||
if (!inline && tag.Bgn_nde_mode() != Xop_xnde_tag_.Bgn_nde_mode_inline)
|
||||
ctx.Stack_add(xnde);
|
||||
if (tag_ignore)
|
||||
xnde.Tag_visible_(false);
|
||||
@ -414,7 +414,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
Xop_xnde_tkn bgn_nde = (Xop_xnde_tkn)ctx.Stack_get(prv_xnde_pos);
|
||||
int bgn_tag_id = bgn_nde == null ? -1 : bgn_nde.Tag().Id();
|
||||
|
||||
int end_nde_mode = end_tag.EndNdeMode();
|
||||
int end_nde_mode = end_tag.End_nde_mode();
|
||||
boolean force_end_tag_to_match_bgn_tag = false;
|
||||
switch (bgn_tag_id) {
|
||||
case Xop_xnde_tag_.Tid_sub: if (end_tag_id == Xop_xnde_tag_.Tid_sup) force_end_tag_to_match_bgn_tag = true; break;
|
||||
@ -426,7 +426,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
end_tag_id = bgn_tag_id;
|
||||
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Sub_sup_swapped, src, bgn_pos, cur_pos);
|
||||
}
|
||||
if (end_tag_id == Xop_xnde_tag_.Tid_table || end_tag.TblSub()) {
|
||||
if (end_tag_id == Xop_xnde_tag_.Tid_table || end_tag.Tbl_sub()) {
|
||||
Tblw_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, end_tag_id);
|
||||
return cur_pos;
|
||||
}
|
||||
@ -437,10 +437,10 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
return cur_pos;
|
||||
}
|
||||
switch (end_nde_mode) {
|
||||
case Xop_xnde_tag_.EndNdeMode_inline: // PATCH.WP: allows </br>, </br/> and many other variants
|
||||
case Xop_xnde_tag_.End_nde_mode_inline: // PATCH.WP: allows </br>, </br/> and many other variants
|
||||
Xnde_bgn(ctx, tkn_mkr, root, end_tag, Xop_xnde_tkn.CloseMode_inline, src, bgn_pos, cur_pos, Int_.Min_value, Int_.Min_value, null); // NOTE: atrs is null b/c </br> will never have atrs
|
||||
return cur_pos;
|
||||
case Xop_xnde_tag_.EndNdeMode_escape: // handle </hr>
|
||||
case Xop_xnde_tag_.End_nde_mode_escape: // handle </hr>
|
||||
ctx.Lxr_make_(false);
|
||||
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Escaped_xnde, src, bgn_pos, cur_pos - 1);
|
||||
return cur_pos;
|
||||
@ -576,13 +576,13 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
xnde.Tag_close_rng_(open_end, open_end); // NOTE: inline tag, so set TagClose to open_end; should noop
|
||||
}
|
||||
else {
|
||||
byte[] close_bry = tag.XtnEndTag_tmp(); // get tmp bry (so as not to new)
|
||||
byte[] close_bry = tag.Xtn_end_tag_tmp(); // get tmp bry (so as not to new)
|
||||
if (tag.Langs() != null) { // cur tag has langs; EX:<section>; DATE:2014-07-18
|
||||
Xop_xnde_tag_lang tag_lang = tag.Langs_get(ctx.Lang().Case_mgr(), ctx.Cur_page().Lang().Lang_id(), src, name_bgn, name_end);
|
||||
if (tag_lang == null) // tag does not match lang; EX:<trecho> and lang=de;
|
||||
return ctx.Lxr_make_txt_(open_end);
|
||||
if (tag_lang != Xop_xnde_tag_lang._) // tag matches; note Xop_xnde_tag_lang._ is a wildcard match; EX:<section>
|
||||
close_bry = tag_lang.XtnEndTag_tmp();
|
||||
close_bry = tag_lang.Xtn_end_tag_tmp();
|
||||
}
|
||||
int src_offset = open_bgn - 1; // open bgn to start at <; -2 to ignore </ ; +1 to include <
|
||||
int close_ary_len = close_bry.length;
|
||||
|
@ -113,7 +113,7 @@ public class Xoue_user implements Xou_user, GfoEvMgrOwner, GfoInvkAble {
|
||||
) continue;
|
||||
byte[] dir_name_as_bry = Bry_.new_u8(name);
|
||||
Xow_xwiki_itm xwiki = Available_add(usr_wiki, dir_name_as_bry);
|
||||
if (xwiki != null) // Add_full can return null if adding invalid lang; should not apply here, but guard against null ref
|
||||
if (xwiki != null) // Add_full can return null if adding invalid lang; should not apply here, but guard against null ref
|
||||
xwiki.Offline_(true); // mark xwiki as offline; needed for available wikis sidebar; DATE:2014-09-21
|
||||
app.Setup_mgr().Maint_mgr().Wiki_mgr().Add(dir_name_as_bry);
|
||||
}
|
||||
|
@ -152,14 +152,14 @@ public class Xow_xwiki_mgr implements GfoInvkAble {
|
||||
default: domain_str = String_.Format("{0}.{1}.org", lang_key_str, wiki_name); break; // EX: en.wiktionary.org
|
||||
}
|
||||
byte[] domain_bry = Bry_.new_u8(domain_str);
|
||||
Xowe_wiki lang_wiki = wiki.Appe().Wiki_mgr().Get_by_key_or_null(domain_bry);
|
||||
boolean offline_exists = lang_wiki != null;
|
||||
// Xowe_wiki lang_wiki = wiki.Appe().Wiki_mgr().Get_by_key_or_null(domain_bry); // DELETE: causes commons to show; DATE:2015-09-23
|
||||
// boolean offline_exists = lang_wiki != null;
|
||||
String fmt = String_.Format("http://" + domain_str + "/wiki/~{0}");
|
||||
int aliases_len = wiki_itm.Aliases().length;
|
||||
for (int j = 0; j < aliases_len; j++) {
|
||||
byte[] alias = wiki_itm.Aliases()[j];
|
||||
if (wiki.Ns_mgr().Names_get_or_null(alias, 0, alias.length) != null) continue; // NOTE: do not add xwiki if alias matches namespace; EX: en.wiktionary.org has ns of "Wiktionary"; do not add alias of "wiktionary"; note that wikipedia does have an alias to wiktionary
|
||||
Xow_xwiki_itm xwiki = Xow_xwiki_itm.new_(alias, Bry_.new_u8(fmt), lang_id, domain_tid, domain_bry).Offline_(offline_exists); // NOTE: domain_tid must be used, not wiki.Domain_tid; DATE:2014-09-14
|
||||
Xow_xwiki_itm xwiki = Xow_xwiki_itm.new_(alias, Bry_.new_u8(fmt), lang_id, domain_tid, domain_bry); // .Offline_(offline_exists); // NOTE: domain_tid must be used, not wiki.Domain_tid; DATE:2014-09-14
|
||||
Add_itm(xwiki, null);
|
||||
}
|
||||
}
|
||||
@ -180,11 +180,11 @@ public class Xow_xwiki_mgr implements GfoInvkAble {
|
||||
Xoac_lang_itm lang = (Xoac_lang_itm)langs.Get_at(i);
|
||||
String domain_str = String_.Format("{0}.{1}.org", String_.new_u8(lang.Key_bry()), wiki_tid_name_str); // EX: fr.wikipedia.org
|
||||
byte[] domain_bry = Bry_.new_u8(domain_str);
|
||||
Xowe_wiki lang_wiki = wiki.Appe().Wiki_mgr().Get_by_key_or_null(domain_bry);
|
||||
boolean offline_exists = lang_wiki != null;
|
||||
// Xowe_wiki lang_wiki = wiki.Appe().Wiki_mgr().Get_by_key_or_null(domain_bry); // DELETE: causes commons to show; DATE:2015-09-23
|
||||
// boolean offline_exists = lang_wiki != null;
|
||||
String url_fmt = String_.Format("http://" + domain_str + "/wiki/~{0}");
|
||||
int lang_id = Xol_lang_itm_.Get_by_key(lang.Key_bry()).Id();
|
||||
Xow_xwiki_itm xwiki = Xow_xwiki_itm.new_(lang.Key_bry(), Bry_.new_u8(url_fmt), lang_id, domain_tid, domain_bry).Offline_(offline_exists);
|
||||
Xow_xwiki_itm xwiki = Xow_xwiki_itm.new_(lang.Key_bry(), Bry_.new_u8(url_fmt), lang_id, domain_tid, domain_bry); // .Offline_(offline_exists);
|
||||
Add_itm(xwiki, lang);
|
||||
}
|
||||
lang_mgr.Grps_sort();
|
||||
|
@ -61,7 +61,7 @@ public class Scrib_lib_text implements Scrib_lib {
|
||||
// if (Type_adp_.Eq(itm_type, typeof(KeyVal[]))) itm_as_kvy = (KeyVal[])itm;
|
||||
// else if (Type_adp_.Is_array(itm_type)) itm_as_ary = Array_.cast(itm);
|
||||
// int flags = args.Cast_int_or(1, 0);
|
||||
// if (itm_as_kvy != null && !Enm_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys))
|
||||
// if (itm_as_kvy != null && !Bitmask_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys))
|
||||
// itm_as_kvy = json_util.Reindex_arrays(itm_as_kvy, true);
|
||||
// byte[] rv = null;
|
||||
// if (itm_as_kvy != null)
|
||||
@ -82,7 +82,7 @@ public class Scrib_lib_text implements Scrib_lib {
|
||||
synchronized (reindex_data) {
|
||||
if ( itm_as_kvy != null
|
||||
&& itm_as_kvy.length > 0
|
||||
&& !Enm_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys)
|
||||
&& !Bitmask_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys)
|
||||
) {
|
||||
json_util.Reindex_arrays(reindex_data, itm_as_kvy, true);
|
||||
if (reindex_data.Rv_is_kvy()) {
|
||||
@ -107,12 +107,12 @@ public class Scrib_lib_text implements Scrib_lib {
|
||||
byte[] json = args.Pull_bry(0);
|
||||
int flags = args.Cast_int_or(1, 0);
|
||||
int opts = Scrib_lib_text__json_util.Opt__force_assoc;
|
||||
if (Enm_.Has_int(flags, Scrib_lib_text__json_util.Flag__try_fixing))
|
||||
opts = Enm_.Add_int(opts, Scrib_lib_text__json_util.Flag__try_fixing);
|
||||
if (Bitmask_.Has_int(flags, Scrib_lib_text__json_util.Flag__try_fixing))
|
||||
opts = Bitmask_.Add_int(opts, Scrib_lib_text__json_util.Flag__try_fixing);
|
||||
synchronized (procs) {
|
||||
byte rv_tid = json_util.Decode(core.App().Utl__json_parser(), json, opts);
|
||||
if (rv_tid == Bool_.__byte) throw Err_.new_("scribunto", "mw.text.jsonEncode: Unable to decode String " + String_.new_u8(json));
|
||||
if (rv_tid == Bool_.Y_byte && !(Enm_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys))) {
|
||||
if (rv_tid == Bool_.Y_byte && !(Bitmask_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys))) {
|
||||
KeyVal[] rv_as_kvy = (KeyVal[])json_util.Decode_rslt_as_nde();
|
||||
synchronized (reindex_data) {
|
||||
json_util.Reindex_arrays(reindex_data, rv_as_kvy, false);
|
||||
|
@ -230,7 +230,7 @@ public class Scrib_lib_wikibase_srl_tst {
|
||||
);
|
||||
}
|
||||
@Test public void Claims_time() {
|
||||
fxt.Init_prop(fxt.Wdata_fxt().Make_claim_time(2, "2001-02-03 04:05:06"));
|
||||
fxt.Init_prop(fxt.Wdata_fxt().Make_claim_time(2, "2001-02-03 04:05:06", 9));
|
||||
fxt.Test
|
||||
( "claims:"
|
||||
, " P2:"
|
||||
@ -241,7 +241,7 @@ public class Scrib_lib_wikibase_srl_tst {
|
||||
, " type:'time'"
|
||||
, " value:"
|
||||
, " time:'+00000002001-02-03T04:05:06Z'"
|
||||
, " precision:'11'"
|
||||
, " precision:'9'"
|
||||
, " before:'0'"
|
||||
, " after:'0'"
|
||||
, " timezone:'0'"
|
||||
@ -341,7 +341,7 @@ public class Scrib_lib_wikibase_srl_tst {
|
||||
, " type:'time'"
|
||||
, " value:"
|
||||
, " time:'+00000002001-02-03T04:05:06Z'"
|
||||
, " precision:'11'"
|
||||
, " precision:'14'"
|
||||
, " before:'0'"
|
||||
, " after:'0'"
|
||||
, " timezone:'0'"
|
||||
|
@ -68,10 +68,10 @@ class Scrib_lib_wikibase_srl_visitor implements Wdata_claim_visitor {
|
||||
private static KeyVal[] Time_value(Wdata_claim_itm_time itm) {
|
||||
KeyVal[] rv = new KeyVal[6];
|
||||
rv[0] = KeyVal_.new_(Wdata_dict_value_time.Str_time , String_.new_a7(itm.Time()));
|
||||
rv[1] = KeyVal_.new_(Wdata_dict_value_time.Str_precision , Wdata_dict_value_time.Val_precision_int); // NOTE: must return int, not str; DATE:2014-02-18
|
||||
rv[2] = KeyVal_.new_(Wdata_dict_value_time.Str_before , Wdata_dict_value_time.Val_before_int);
|
||||
rv[3] = KeyVal_.new_(Wdata_dict_value_time.Str_after , Wdata_dict_value_time.Val_after_int);
|
||||
rv[4] = KeyVal_.new_(Wdata_dict_value_time.Str_timezone , Wdata_dict_value_time.Val_timezone_str);
|
||||
rv[1] = KeyVal_.new_(Wdata_dict_value_time.Str_precision , itm.Precision_int()); // NOTE: must return int, not str; DATE:2014-02-18
|
||||
rv[2] = KeyVal_.new_(Wdata_dict_value_time.Str_before , itm.Before_int());
|
||||
rv[3] = KeyVal_.new_(Wdata_dict_value_time.Str_after , itm.After_int());
|
||||
rv[4] = KeyVal_.new_(Wdata_dict_value_time.Str_timezone , Wdata_dict_value_time.Val_timezone_str); // ASSUME: always 0 b/c UTF?; DATE:2015-09-21
|
||||
rv[5] = KeyVal_.new_(Wdata_dict_value_time.Str_calendarmodel , Wdata_dict_value_time.Val_calendarmodel_str);
|
||||
return rv;
|
||||
}
|
||||
@ -80,13 +80,13 @@ class Scrib_lib_wikibase_srl_visitor implements Wdata_claim_visitor {
|
||||
rv[0] = KeyVal_.new_(Scrib_lib_wikibase_srl.Key_type, Wdata_dict_val_tid.Str_globecoordinate);
|
||||
rv[1] = KeyVal_.new_(Scrib_lib_wikibase_srl.Key_value, Globecoordinate_value(itm));
|
||||
}
|
||||
private static KeyVal[] Globecoordinate_value(Wdata_claim_itm_globecoordinate itm) {
|
||||
private static KeyVal[] Globecoordinate_value(Wdata_claim_itm_globecoordinate itm) {
|
||||
KeyVal[] rv = new KeyVal[5];
|
||||
rv[0] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_latitude , Double_.parse(String_.new_a7(itm.Lat())));
|
||||
rv[1] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_longitude , Double_.parse(String_.new_a7(itm.Lng())));
|
||||
rv[2] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_altitude , null);
|
||||
rv[3] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_globe , Wdata_dict_value_globecoordinate.Val_globe_dflt_str);
|
||||
rv[4] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_precision , .00001d);
|
||||
rv[2] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_altitude , String_.new_u8(itm.Alt()));
|
||||
rv[3] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_globe , String_.new_u8(itm.Glb()));
|
||||
rv[4] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_precision , itm.Prc_as_num().To_double());
|
||||
return rv;
|
||||
}
|
||||
public void Visit_system(Wdata_claim_itm_system itm) {
|
||||
|
@ -57,7 +57,7 @@ public class Wdata_wiki_mgr_fxt {
|
||||
public Wdata_claim_itm_core Make_claim_quantity(int pid, String amount, String unit, String ubound, String lbound) {return new Wdata_claim_itm_quantity(pid, Wdata_dict_snak_tid.Tid_value, Bry_.new_a7(amount), Bry_.new_a7(unit), Bry_.new_a7(ubound), Bry_.new_a7(lbound));}
|
||||
public Wdata_claim_itm_core Make_claim_entity_qid(int pid, int val) {return new Wdata_claim_itm_entity(pid, Wdata_dict_snak_tid.Tid_value, Wdata_dict_value_entity_tid.Tid_item, Int_.Xto_bry(val));}
|
||||
public Wdata_claim_itm_core Make_claim_entity_pid(int pid, int val) {return new Wdata_claim_itm_entity(pid, Wdata_dict_snak_tid.Tid_value, Wdata_dict_value_entity_tid.Tid_property, Int_.Xto_bry(val));}
|
||||
public Wdata_claim_itm_core Make_claim_geo(int pid, String lon, String lat) {return Make_claim_geo(pid, lon, lat, ".000277777", null, "Q2");}
|
||||
public Wdata_claim_itm_core Make_claim_geo(int pid, String lon, String lat) {return Make_claim_geo(pid, lon, lat, ".00001", null, "http://www.wikidata.org/entity/Q2");}
|
||||
public Wdata_claim_itm_core Make_claim_geo(int pid, String lon, String lat, String prc, String alt, String glb) {
|
||||
return new Wdata_claim_itm_globecoordinate(pid, Wdata_dict_snak_tid.Tid_value, Bry_.new_a7(lat), Bry_.new_a7(lon), Bry_.new_a7(alt), Bry_.new_a7(prc), Bry_.new_a7(glb));
|
||||
}
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user