1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00
This commit is contained in:
gnosygnu 2015-09-27 23:04:13 -04:00
parent fa70c05354
commit 8e18af05b6
84 changed files with 2795 additions and 507 deletions

View File

@ -0,0 +1,40 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx;
public class Bitmask_ {
public static boolean Has_int(int val, int find) {return find == (val & find);}
public static int Flip_int(boolean enable, int val, int find) {
boolean has = find == (val & find);
return (has ^ enable) ? val ^ find : val;
}
public static int Add_int(int lhs, int rhs) {return lhs | rhs;}
public static int Add_int_ary(int... ary) {
int rv = 0;
int len = ary.length;
for (int i = 0; i < len; ++i) {
int itm = ary[i];
if (rv == 0)
rv = itm;
else
rv = Flip_int(true, rv, itm);
}
return rv;
}
public static boolean Has_byte(byte val, byte find) {return find == (val & find);}
public static byte Add_byte(byte flag, byte itm) {return (byte)(flag | itm);}
}

View File

@ -18,25 +18,5 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx;
public class Enm_ {
public static int To_int(Object enm) {return Ordinal_lang(enm);}
public static boolean Has_int(int val, int find) {return find == (val & find);}
public static int Add_int(int lhs, int rhs) {return lhs | rhs;}
public static int Add_int_ary(int... ary) {
int rv = 0;
int len = ary.length;
for (int i = 0; i < len; ++i) {
int itm = ary[i];
if (rv == 0)
rv = itm;
else
rv = Flip_int(true, rv, itm);
}
return rv;
}
public static int Flip_int(boolean enable, int val, int find) {
boolean has = find == (val & find);
return (has ^ enable) ? val ^ find : val;
}
public static boolean Has_byte(byte val, byte find) {return find == (val & find);}
public static byte Add_byte(byte flag, byte itm) {return (byte)(flag | itm);}
private static int Ordinal_lang(Object v) {return ((Enum)v).ordinal();}
}

View File

@ -162,6 +162,21 @@ public class Bry_find_ {
}
return end;
}
public static int Find_bwd__skip_ws(byte[] src, int end, int bgn) {
int src_len = src.length;
if (end == src_len) return end;
if (end > src_len || end < 0) return Bry_find_.Not_found;
int pos = end - 1; // start from end - 1; handles situations where len is passed in
for (int i = pos; i >= bgn; --i) {
switch (src[i]) {
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
break;
default:
return i + 1;
}
}
return bgn;
}
public static int Find_bwd_while(byte[] src, int cur, int end, byte while_byte) {
--cur;
while (true) {
@ -295,6 +310,7 @@ public class Bry_find_ {
}
public static int Find_bwd_while_alphanum(byte[] src, int cur) {return Find_bwd_while_alphanum(src, cur, -1);}
public static int Find_bwd_while_alphanum(byte[] src, int cur, int end) {
--cur;
while (cur > end) {
switch (src[cur]) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:

View File

@ -23,16 +23,17 @@ public class Bry_split_ {
public static byte[][] Split(byte[] src, byte dlm, boolean trim) {
synchronized (thread_lock) {
Bry_split_wkr__to_ary wkr = Bry_split_wkr__to_ary.I;
Split(src, dlm, trim, wkr);
Split(src, 0, src == null ? 0 : src.length, dlm, trim, wkr);
return wkr.To_ary();
}
}
public static void Split(byte[] src, byte dlm, boolean trim, Bry_split_wkr wkr) {
if (src == null) return;
int src_len = src.length, pos = 0; if (src_len == 0) return;
public static int Split(byte[] src, int src_bgn, int src_end, byte dlm, boolean trim, Bry_split_wkr wkr) {
if (src == null || src_end - src_bgn < 1) return 0;
int pos = src_bgn;
int itm_bgn = -1, itm_end = -1;
int count = 0;
while (true) {
boolean pos_is_last = pos == src_len;
boolean pos_is_last = pos == src_end;
byte b = pos_is_last ? dlm : src[pos];
int nxt_pos = pos + 1;
boolean process = true;
@ -51,9 +52,9 @@ public class Bry_split_ {
else {
int rv = wkr.Split(src, itm_bgn, itm_end);
switch (rv) {
case Rv__ok: break;
case Rv__ok: ++count; break;
case Rv__extend: reset = false; break;
case Rv__cancel: pos_is_last = true; break;
case Rv__cancel: return count;
default: throw Err_.new_unhandled(rv);
}
}
@ -67,6 +68,7 @@ public class Bry_split_ {
if (pos_is_last) break;
pos = nxt_pos;
}
return count;
}
public static byte[][] Split(byte[] src, byte[] dlm) {
if (Bry_.Len_eq_0(src)) return Bry_.Ary_empty;
@ -115,12 +117,16 @@ public class Bry_split_ {
class Bry_split_wkr__to_ary implements gplx.core.brys.Bry_split_wkr {
private final List_adp list = List_adp_.new_();
public int Split(byte[] src, int itm_bgn, int itm_end) {
synchronized (list) {
byte[] bry = itm_end == itm_bgn ? Bry_.Empty : Bry_.Mid(src, itm_bgn, itm_end);
list.Add(bry);
return Bry_split_.Rv__ok;
}
}
public byte[][] To_ary() {
synchronized (list) {
return (byte[][])list.To_ary_and_clear(byte[].class);
}
}
public static final Bry_split_wkr__to_ary I = new Bry_split_wkr__to_ary(); Bry_split_wkr__to_ary() {}
}

View File

@ -35,11 +35,30 @@ public class Bry_split__tst {
fxt.Test_Split(" a b | c d " , Byte_ascii.Pipe, Bool_.Y, "a b", "c d");
fxt.Test_Split(" a \n b " , Byte_ascii.Nl , Bool_.N, " a ", " b "); // ws as dlm
fxt.Test_Split(" a \n b " , Byte_ascii.Nl , Bool_.Y, "a", "b"); // ws as dlm; trim
fxt.Test_Split("a|extend|b" , Byte_ascii.Pipe, Bool_.Y, "a", "extend|b"); // extend
fxt.Test_Split("extend|a" , Byte_ascii.Pipe, Bool_.Y, "extend|a"); // extend
fxt.Test_Split("a|cancel|b" , Byte_ascii.Pipe, Bool_.Y, "a"); // cancel
}
}
class Bry_split__fxt {
private final Bry_split_wkr__example wkr = new Bry_split_wkr__example();
public void Test_Split(String raw_str, byte dlm, boolean trim, String... expd) {
byte[][] actl_ary = Bry_split_.Split(Bry_.new_a7(raw_str), dlm, trim);
byte[] src = Bry_.new_a7(raw_str);
Bry_split_.Split(src, 0, src.length, dlm, trim, wkr);
byte[][] actl_ary = wkr.To_ary();
Tfds.Eq_ary_str(expd, String_.Ary(actl_ary));
}
}
class Bry_split_wkr__example implements gplx.core.brys.Bry_split_wkr {
private final List_adp list = List_adp_.new_();
public int Split(byte[] src, int itm_bgn, int itm_end) {
byte[] bry = itm_end == itm_bgn ? Bry_.Empty : Bry_.Mid(src, itm_bgn, itm_end);
if (Bry_.Eq(bry, Bry_.new_a7("extend"))) return Bry_split_.Rv__extend;
else if (Bry_.Eq(bry, Bry_.new_a7("cancel"))) return Bry_split_.Rv__cancel;
list.Add(bry);
return Bry_split_.Rv__ok;
}
public byte[][] To_ary() {
return (byte[][])list.To_ary_and_clear(byte[].class);
}
}

View File

@ -50,6 +50,13 @@ public class List_adp_ {
list.Del_at(last_idx);
return rv;
}
public static Object Pop_or(List_adp list, Object or) {
int list_len = list.Count(); if (list_len == 0) return or;
int last_idx = list_len - 1;
Object rv = list.Get_at(last_idx);
list.Del_at(last_idx);
return rv;
}
public static void DisposeAll(List_adp list) {
for (int i = 0; i < list.Count(); i++)
((RlsAble)list.Get_at(i)).Rls();

View File

@ -138,7 +138,7 @@ public abstract class List_adp_base implements List_adp, GfoInvkAble {
public String To_str() {
Bry_bfr bfr = Bry_bfr.new_();
for (int i = 0; i < count; ++i)
bfr.Add_obj(list[i]);
bfr.Add_str_u8(Object_.Xto_str_strict_or_null_mark(list[i])).Add_byte_nl();
return bfr.Xto_str_and_clear();
}
private void BoundsChk(int bgn, int end, int len) {

View File

@ -166,6 +166,7 @@ public class Tfds { // URL:doc/gplx.tfds/Tfds.txt
private static final DateAdp time0 = DateAdp_.parse_gplx("2001-01-01 00:00:00.000");
private static DateAdp nowTime; // NOTE: cannot set to time0 due to static initialization;
public static void WriteText(String text) {Console_adp__sys.I.Write_str(text);}
public static void Write(byte[] s, int b, int e) {Write(Bry_.Mid(s, b, e));}
public static void Write() {Write("tmp");}
public static void Write(Object... ary) {
String_bldr sb = String_bldr_.new_();

View File

@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.gfui; import gplx.*;
public class GfuiBorderEdge {
public int Val() {return val;} int val;
public boolean Has(GfuiBorderEdge comp) {return Enm_.Has_int(val, comp.val);}
public boolean Has(GfuiBorderEdge comp) {return Bitmask_.Has_int(val, comp.val);}
public GfuiBorderEdge Add(GfuiBorderEdge comp) {
return new GfuiBorderEdge(comp.val + val);
}

View File

@ -32,7 +32,7 @@ public class IptEventType_ {
if (ary.length == 0) return IptEventType_.None;
int newVal = ary[0].Val();
for (int i = 1; i < ary.length; i++)
newVal = Enm_.Flip_int(true, newVal, ary[i].Val());
newVal = Bitmask_.Flip_int(true, newVal, ary[i].Val());
return getOrNew_(newVal);
}
static IptEventType getOrNew_(int v) {
@ -46,7 +46,7 @@ public class IptEventType_ {
}
@gplx.Internal protected static boolean Has(IptEventType val, IptEventType find) {
if (find == IptEventType_.None && val != IptEventType_.None) return false; // check .None manually b/c 0 is identity when BitShifting
return Enm_.Has_int(val.Val(), find.Val());
return Bitmask_.Has_int(val.Val(), find.Val());
}
public static IptEventType default_(IptArg[] args) {
IptEventType rv = IptEventType_.None;

View File

@ -23,7 +23,7 @@ public class IptKey implements IptArg {
public boolean Eq(IptArg comp) {return String_.Eq(key, comp.Key());}
public String XtoUiStr() {return IptKeyStrMgr._.To_str(this);}
public IptKey Add(IptKey comp) {return IptKey_.add_(this, comp);}
public boolean Mod_shift() {return Enm_.Has_int(val, IptKey_.Shift.Val());}
public boolean Mod_ctrl() {return Enm_.Has_int(val, IptKey_.Ctrl.Val());}
public boolean Mod_alt() {return Enm_.Has_int(val, IptKey_.Alt.Val());}
public boolean Mod_shift() {return Bitmask_.Has_int(val, IptKey_.Shift.Val());}
public boolean Mod_ctrl() {return Bitmask_.Has_int(val, IptKey_.Ctrl.Val());}
public boolean Mod_alt() {return Bitmask_.Has_int(val, IptKey_.Alt.Val());}
}

View File

@ -28,7 +28,7 @@ public class IptKey_ {
if (ary.length == 0) return IptKey_.None;
int newVal = ary[0].Val();
for (int i = 1; i < ary.length; i++)
newVal = Enm_.Flip_int(true, newVal, ary[i].Val());
newVal = Bitmask_.Flip_int(true, newVal, ary[i].Val());
return get_or_new_(newVal);
}
public static IptKey api_(int val) {
@ -138,9 +138,9 @@ public class IptKey_ {
}
public static String To_str(int val) {
String mod_str = "", rv = "";
boolean mod_c = Enm_.Has_int(val, IptKey_.Ctrl.Val()); if (mod_c) {mod_str += "c"; val = Enm_.Flip_int(Bool_.N, val, IptKey_.Ctrl.Val());}
boolean mod_a = Enm_.Has_int(val, IptKey_.Alt.Val()); if (mod_a) {mod_str += "a"; val = Enm_.Flip_int(Bool_.N, val, IptKey_.Alt.Val());}
boolean mod_s = Enm_.Has_int(val, IptKey_.Shift.Val()); if (mod_s) {mod_str += "s"; val = Enm_.Flip_int(Bool_.N, val, IptKey_.Shift.Val());}
boolean mod_c = Bitmask_.Has_int(val, IptKey_.Ctrl.Val()); if (mod_c) {mod_str += "c"; val = Bitmask_.Flip_int(Bool_.N, val, IptKey_.Ctrl.Val());}
boolean mod_a = Bitmask_.Has_int(val, IptKey_.Alt.Val()); if (mod_a) {mod_str += "a"; val = Bitmask_.Flip_int(Bool_.N, val, IptKey_.Alt.Val());}
boolean mod_s = Bitmask_.Has_int(val, IptKey_.Shift.Val()); if (mod_s) {mod_str += "s"; val = Bitmask_.Flip_int(Bool_.N, val, IptKey_.Shift.Val());}
if (String_.Len_gt_0(mod_str)) {
rv = "mod." + mod_str;
if (val == 0) return rv; // handle modifiers only, like "mod.cs"; else will be "mod.cs+key.#0"

View File

@ -30,7 +30,7 @@ class GfuiWinKeyCmdMgr implements GfuiWinOpenAble, GfoInvkAble, GfoEvObj {
int keyVal = iptData.Key().Val();
GfuiElem sender = GfuiElem_.as_(iptData.Sender());
if (GfuiTextBox_.as_(sender) != null // is sender textBox?
&& !Enm_.Has_int(keyVal, IptKey_.Alt.Val()) // does key not have alt
&& !Bitmask_.Has_int(keyVal, IptKey_.Alt.Val()) // does key not have alt
) return false; // ignore keys from textbox if they do not have alt
List_adp elemList = (List_adp)listHash.Get_by(keyVal); if (elemList == null) return false;
for (int i = 0; i < elemList.Count(); i++) {

View File

@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.gfui;
import gplx.Bitmask_;
import gplx.Byte_ascii;
import gplx.Enm_;
import gplx.GfoEvMgr_;
@ -108,12 +109,12 @@ class Swt_lnr_key implements KeyListener {
case 327680: val = IptKey_.Insert.Val(); break;
}
if (Has_ctrl(ev.stateMask)) val |= IptKey_.KeyCode_Ctrl;
if (Enm_.Has_int(ev.stateMask, IptKey_.KeyCode_Shift)) val |= IptKey_.KeyCode_Alt;
if (Enm_.Has_int(ev.stateMask, IptKey_.KeyCode_Ctrl)) val |= IptKey_.KeyCode_Shift;
if (Bitmask_.Has_int(ev.stateMask, IptKey_.KeyCode_Shift)) val |= IptKey_.KeyCode_Alt;
if (Bitmask_.Has_int(ev.stateMask, IptKey_.KeyCode_Ctrl)) val |= IptKey_.KeyCode_Shift;
// Tfds.Write(String_.Format("val={4} keyCode={0} stateMask={1} keyLocation={2} character={3}", ev.keyCode, ev.stateMask, ev.keyLocation, ev.character, val));
return IptEvtDataKey.int_(val);
}
public static boolean Has_ctrl(int val) {return Enm_.Has_int(val, IptKey_.KeyCode_Alt);} // NOTE:SWT's ctrl constant is different from SWING's
public static boolean Has_ctrl(int val) {return Bitmask_.Has_int(val, IptKey_.KeyCode_Alt);} // NOTE:SWT's ctrl constant is different from SWING's
}
class Swt_lnr_mouse implements MouseListener {
public Swt_lnr_mouse(GxwElem elem) {this.elem = elem;} GxwElem elem;

View File

@ -18,19 +18,25 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.core.primitives; import gplx.*; import gplx.core.*;
public class Int_pool {
private final List_adp available_list = List_adp_.new_(); private int available_len;
// private final Bry_bfr dbg_bfr = Bry_bfr.new_();
private int uid_max = -1;
public void Clear() {
synchronized (available_list) {
available_list.Clear();
available_len = 0;
uid_max = -1;
}
}
public int Get_next() {
synchronized (available_list) {
if (available_len == 0)
if (available_len == 0) {
// dbg_bfr.Add_str("+:u:").Add_int_variable(uid_max + 1).Add_byte_nl();
return ++uid_max;
}
else {
Int_obj_val val = (Int_obj_val)List_adp_.Pop_last(available_list);
--available_len;
// dbg_bfr.Add_str("+:a:").Add_int_variable(val.Val()).Add_byte_nl();
return val.Val();
}
}
@ -40,17 +46,22 @@ public class Int_pool {
synchronized (available_list) {
if (available_len == 0 && v == uid_max) {
--this.uid_max;
// dbg_bfr.Add_str("-:m:").Add_int_variable(v).Add_byte_nl();
return;
}
if (available_len == uid_max) {
available_list.Add(Int_obj_val.new_(v));
available_list.Sort();
for (int i = 0; i < available_len; ++i) {
Int_obj_val itm = (Int_obj_val)available_list.Get_at(i);
if (i != itm.Val()) throw Err_.new_("core", "available_list out of order", "contents", available_list.To_str());
if (i != itm.Val())
throw Err_.new_("core", "available_list out of order", "contents", available_list.To_str());
}
// dbg_bfr.Add_str("-:c:").Add_int_variable(v).Add_byte_nl();
this.Clear();
}
else {
// dbg_bfr.Add_str("-:a:").Add_int_variable(v).Add_byte_nl();
available_list.Add(Int_obj_val.new_(v));
++available_len;
}

View File

@ -52,6 +52,14 @@ public class Int_pool_tst {
tstr.Exec_del(2);
tstr.Test_get(0);
}
@Test public void Del__out_of_order_2() {
tstr.Test_get(0);
tstr.Test_get(1);
tstr.Test_get(2);
tstr.Exec_del(1);
tstr.Exec_del(2);
tstr.Exec_del(0);
}
}
class Int_pool_tstr {
private final Int_pool pool = new Int_pool();

View File

@ -127,7 +127,7 @@ public class Gfui_bnd_parser {
switch (sym_tkn.Tid()) {
case Gfui_bnd_tkn.Tid_sym_plus: // EX: Ctrl + A
if (mod_adj != Mod_val_null) { // if mod, just update mod_val and exit
mod_val = Enm_.Flip_int(true, mod_val, mod_adj);
mod_val = Bitmask_.Flip_int(true, mod_val, mod_adj);
return;
}
break;

View File

@ -58,7 +58,7 @@ public class Xoa_app_ {
}
}
public static final String Name = "xowa";
public static final String Version = "2.9.3.1";
public static final String Version = "2.9.4.1";
public static String Build_date = "2012-12-30 00:00:00";
public static String Op_sys_str;
public static String User_agent = "";

View File

@ -35,7 +35,7 @@ public class Xof_img_size {
&& !Xop_lnki_type.Id_is_thumbable(lnki_type) // not thumb which is implicitly 220; PAGE:en.w:Edward_Snowden; DATE:2015-08-17
)
lnki_w = orig_w; // use original size; EX:[[File:A.ogv]] -> [[File:A.ogv|550px]] where 550px is orig_w; DATE:2015-08-07
if (Enm_.Has_int(lnki_type, Xop_lnki_type.Id_frame) // frame: always return orig size; Linker.php!makeThumbLink2; // Use image dimensions, don't scale
if (Bitmask_.Has_int(lnki_type, Xop_lnki_type.Id_frame) // frame: always return orig size; Linker.php!makeThumbLink2; // Use image dimensions, don't scale
&& lnki_h == Null) { // unless lnki_h specified; DATE:2013-12-22
html_w = file_w = orig_w;
html_h = file_h = orig_h;

View File

@ -76,7 +76,7 @@ public class Xof_img_size_tst {
fxt.Lnki_type_(Xop_lnki_type.Id_frame).Lnki_ext_(Xof_ext_.Id_png).Lnki_(200, 200).Orig_(2038, 1529).Test_html(200, 150, Bool_.N);
}
@Test public void Frame_and_thumb(){ // PURPOSE: frame and thumb should be treated as frame; Enm.Has(val, Id_frame) vs val == Id_frame; PAGE:en.w:History_of_Western_Civilization; DATE:2015-04-16
fxt.Lnki_type_(Enm_.Add_byte(Xop_lnki_type.Id_frame, Xop_lnki_type.Id_thumb)).Lnki_(200, -1).Test_html(400, 200, Bool_.Y); // mut return same as Lnki_lt_orig_frame above
fxt.Lnki_type_(Bitmask_.Add_byte(Xop_lnki_type.Id_frame, Xop_lnki_type.Id_thumb)).Lnki_(200, -1).Test_html(400, 200, Bool_.Y); // mut return same as Lnki_lt_orig_frame above
}
@Test public void Video__use_orig_w(){ // PURPOSE: video should use orig_w; DATE:2015-08-07
fxt.Lnki_type_(Xop_lnki_type.Id_none).Lnki_ext_(Xof_ext_.Id_ogv).Lnki_(-1, -1).Orig_(500, 250).Test_html(500, 250, Bool_.N);

View File

@ -20,11 +20,11 @@ public class Xof_patch_upright_tid_ {
public static final int Tid_unpatched = 0, Tid_use_thumb_w = 1, Tid_fix_default = 2;
public static final int Tid_all = Tid_use_thumb_w | Tid_fix_default;
public static int Merge(boolean use_thumb_w, boolean fix_default) {
if (use_thumb_w && fix_default) return Enm_.Add_int(Tid_use_thumb_w, Tid_fix_default);
if (use_thumb_w && fix_default) return Bitmask_.Add_int(Tid_use_thumb_w, Tid_fix_default);
else if (use_thumb_w) return Tid_use_thumb_w;
else if (fix_default) return Tid_fix_default;
else return Tid_unpatched;
}
public static boolean Split_use_thumb_w(int tid) {return Enm_.Has_int(tid, Tid_use_thumb_w);}
public static boolean Split_fix_default(int tid) {return Enm_.Has_int(tid, Tid_fix_default);}
public static boolean Split_use_thumb_w(int tid) {return Bitmask_.Has_int(tid, Tid_use_thumb_w);}
public static boolean Split_fix_default(int tid) {return Bitmask_.Has_int(tid, Tid_fix_default);}
}

View File

@ -24,6 +24,7 @@ public class Xoh_consts {
;
public static final byte[]
__end = Bry_.new_a7(">")
, __inline = Bry_.new_a7("/>")
, __end_quote = Bry_.new_a7("\">")
, __inline_quote = Bry_.new_a7("\"/>")
, Space_2 = Bry_.new_a7(" ")

View File

@ -76,8 +76,8 @@ public class Xoh_page_wtr_wkr implements Bry_fmtr_arg {
byte[] page_display = Xoh_page_wtr_wkr_.Bld_page_name(tmp_bfr, page_ttl, page.Html_data().Display_ttl());
Xol_vnt_mgr vnt_mgr = wiki.Lang().Vnt_mgr();
if (vnt_mgr.Enabled()) { // VNT
page_name = vnt_mgr.Convert_mgr().Convert_text(wiki, page_name);
page_display = vnt_mgr.Convert_mgr().Convert_text(wiki, page_display);
page_name = vnt_mgr.Convert_mgr().Convert_text(page_name);
page_display = vnt_mgr.Convert_mgr().Convert_text(page_display);
}
fmtr.Bld_bfr_many(html_bfr
, root_dir_bry, Xoa_app_.Version, Xoa_app_.Build_date, app.Tcp_server().Running_str()
@ -155,7 +155,7 @@ public class Xoh_page_wtr_wkr implements Bry_fmtr_arg {
}
Xol_vnt_mgr vnt_mgr = wiki.Lang().Vnt_mgr();
if (vnt_mgr.Enabled()) // VNT
bfr.Add(vnt_mgr.Convert_mgr().Convert_text(wiki, bfr.Xto_bry_and_clear()));
bfr.Add(vnt_mgr.Convert_mgr().Convert_text(bfr.Xto_bry_and_clear()));
}
private void Write_body_pre(Bry_bfr bfr, Xoae_app app, Xowe_wiki wiki, byte[] data_raw, Bry_bfr tmp_bfr) {
Xoh_html_wtr_escaper.Escape(app.Parser_amp_mgr(), tmp_bfr, data_raw, 0, data_raw.length, false, false);

View File

@ -44,7 +44,7 @@ class Xohd_page_srl_itm__html_module implements Xohd_page_srl_itm {
public int Load(Xog_page hpg, byte[] bry, int bry_len, int itm_bgn, Int_obj_ref count_ref) {
itm_bgn += 2; // skip bin_int_abrv of [1, 0]
byte flag = bry[itm_bgn];
hpg.Head_mgr().Init(Enm_.Has_byte(flag, Tid_math), Enm_.Has_byte(flag, Tid_imap), Enm_.Has_byte(flag, Tid_packed), Enm_.Has_byte(flag, Tid_hiero));
hpg.Head_mgr().Init(Bitmask_.Has_byte(flag, Tid_math), Bitmask_.Has_byte(flag, Tid_imap), Bitmask_.Has_byte(flag, Tid_packed), Bitmask_.Has_byte(flag, Tid_hiero));
return 3;
}
public void Save(Xog_page hpg, Bry_bfr bfr) {
@ -60,10 +60,10 @@ class Xohd_page_srl_itm__html_module implements Xohd_page_srl_itm {
}
public static byte Calc_flag(boolean math, boolean imap, boolean packed, boolean hiero) {
byte rv = 0;
if (math) rv = Enm_.Add_byte(rv, Tid_math);
if (imap) rv = Enm_.Add_byte(rv, Tid_imap);
if (packed) rv = Enm_.Add_byte(rv, Tid_packed);
if (hiero) rv = Enm_.Add_byte(rv, Tid_hiero);
if (math) rv = Bitmask_.Add_byte(rv, Tid_math);
if (imap) rv = Bitmask_.Add_byte(rv, Tid_imap);
if (packed) rv = Bitmask_.Add_byte(rv, Tid_packed);
if (hiero) rv = Bitmask_.Add_byte(rv, Tid_hiero);
return rv;
}
private static final byte // SERIALIZED; only supports 8 different types

View File

@ -156,13 +156,13 @@ public class Xoh_head_mgr implements Bry_fmtr_arg {
boolean enabled = itm.Enabled();
if (enabled) {
int flag = itms[i].Flags();
if (Enm_.Has_int(flag, Xoh_head_itm__base.Flag__css_include)) list__css_include.Add(itm);
if (Enm_.Has_int(flag, Xoh_head_itm__base.Flag__css_text)) list__css_text.Add(itm);
if (Enm_.Has_int(flag, Xoh_head_itm__base.Flag__js_include)) list__js_include.Add(itm);
if (Enm_.Has_int(flag, Xoh_head_itm__base.Flag__js_head_global)) list__js_head_global.Add(itm);
if (Enm_.Has_int(flag, Xoh_head_itm__base.Flag__js_head_script)) list__js_head_script.Add(itm);
if (Enm_.Has_int(flag, Xoh_head_itm__base.Flag__js_tail_script)) list__js_tail_script.Add(itm);
if (Enm_.Has_int(flag, Xoh_head_itm__base.Flag__js_window_onload)) list__js_window_onload.Add(itm);
if (Bitmask_.Has_int(flag, Xoh_head_itm__base.Flag__css_include)) list__css_include.Add(itm);
if (Bitmask_.Has_int(flag, Xoh_head_itm__base.Flag__css_text)) list__css_text.Add(itm);
if (Bitmask_.Has_int(flag, Xoh_head_itm__base.Flag__js_include)) list__js_include.Add(itm);
if (Bitmask_.Has_int(flag, Xoh_head_itm__base.Flag__js_head_global)) list__js_head_global.Add(itm);
if (Bitmask_.Has_int(flag, Xoh_head_itm__base.Flag__js_head_script)) list__js_head_script.Add(itm);
if (Bitmask_.Has_int(flag, Xoh_head_itm__base.Flag__js_tail_script)) list__js_tail_script.Add(itm);
if (Bitmask_.Has_int(flag, Xoh_head_itm__base.Flag__js_window_onload)) list__js_window_onload.Add(itm);
}
}
}

View File

@ -215,11 +215,11 @@ public class Xoh_file_wtr__basic {
return scratch_bfr.Xto_bry_and_clear();
}
private static byte[] Arg_anchor_title(Bry_bfr tmp_bfr, byte[] src, Xop_lnki_tkn lnki, byte[] lnki_ttl, Xoh_lnki_title_fmtr anchor_title_wkr) {
if ( Enm_.Has_int(lnki.Lnki_type(), Xop_lnki_type.Id_thumb)
|| Enm_.Has_int(lnki.Lnki_type(), Xop_lnki_type.Id_frame) // If the image is a thumb, do not add a title / alt, even if a caption is available
if ( Bitmask_.Has_int(lnki.Lnki_type(), Xop_lnki_type.Id_thumb)
|| Bitmask_.Has_int(lnki.Lnki_type(), Xop_lnki_type.Id_frame) // If the image is a thumb, do not add a title / alt, even if a caption is available
)
return Bry_.Empty;
else if ( Enm_.Has_int(lnki.Lnki_type(), Xop_lnki_type.Id_frameless)) { // If the image is frameless, add the caption as a title / alt. If no caption is available, do not add a title / alt
else if ( Bitmask_.Has_int(lnki.Lnki_type(), Xop_lnki_type.Id_frameless)) { // If the image is frameless, add the caption as a title / alt. If no caption is available, do not add a title / alt
}
Xop_tkn_itm anchor_title_tkn = lnki.Caption_tkn();
if (anchor_title_tkn == Xop_tkn_null.Null_tkn) return Bry_.Empty; // no caption; return empty; (do not use lnki); DATE:2013-12-31

View File

@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
public class Xol_vnt_dir_ {
public static final int Tid__none = 0, Tid__uni = 1, Tid__bi = 2;
public static int Parse(byte[] v) {return hash.Get_as_int_or(v, Tid__none);}
private static final byte[] Bry__none = Bry_.new_a7("disable"), Bry__uni = Bry_.new_a7("unidirectional"), Bry__bi = Bry_.new_a7("bidirectional");
private static final Hash_adp_bry hash = Hash_adp_bry.cs()
.Add_bry_int(Bry__none , Tid__none)
.Add_bry_int(Bry__uni , Tid__uni)
.Add_bry_int(Bry__bi , Tid__bi);
}

View File

@ -18,27 +18,33 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import gplx.xowa.langs.vnts.converts.*;
public class Xol_vnt_itm implements GfoInvkAble {
public Xol_vnt_itm(byte[] key, byte[] name, int mask__vnt) {
this.key = key; this.name = name; this.mask__vnt = mask__vnt;
public Xol_vnt_itm(int idx, byte[] key, byte[] name, int mask__vnt) {
this.idx = idx; this.key = key; this.name = name; this.mask__vnt = mask__vnt;
this.convert_wkr = new Xol_convert_wkr(key);
}
public int Idx() {return idx;} private final int idx; // EX: 2
public byte[] Key() {return key;} private final byte[] key; // EX: zh-cn
public byte[] Name() {return name;} private final byte[] name; // EX: 大陆简体
public boolean Visible() {return visible;} private boolean visible = true; // visible in menu
public byte[][] Fallback_ary() {return fallback_ary;} private byte[][] fallback_ary = Bry_.Ary_empty; // EX: zh-hans|zh
public int Dir() {return dir;} private int dir = Xol_vnt_dir_.Tid__bi; // EX: "bidirectional"
public int Mask__vnt() {return mask__vnt;} private final int mask__vnt; // EX: 8
public int Mask__fallbacks() {return mask_fallbacks;} private int mask_fallbacks; // EX: 11 for zh,zh-hans,zh-cn
public byte[][] Convert_ary() {return convert_ary;} private byte[][] convert_ary = Bry_.Ary_empty; // EX: zh-hans|zh-cn
public Xol_convert_wkr Convert_wkr() {return convert_wkr;} private final Xol_convert_wkr convert_wkr;
public void Visible_(boolean v) {this.visible = v;}
public void Convert_ary_(byte[][] v) {convert_ary = v;}
public void Init(int dir, byte[][] fallback_ary) {
this.dir = dir; this.fallback_ary = fallback_ary;
}
public void Mask__fallbacks__calc(Xol_vnt_regy regy, byte[][] ary) {
this.mask_fallbacks = regy.Mask__calc(Bry_.Ary_add(Bry_.Ary(key), ary));// NOTE: must add lang.key which is not part of fallback; EX: "zh-cn" has fallback of "zh-hans", but chain should calc "zh-cn","zh-hans"
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_fallbacks_)) fallback_ary = Bry_split_.Split(m.ReadBry("v"), Byte_ascii.Pipe);
else if (ctx.Match(k, Invk_converts_)) convert_ary = Bry_split_.Split(m.ReadBry("v"), Byte_ascii.Pipe);
else if (ctx.Match(k, Invk_dir_)) dir = Xol_vnt_dir_.Parse(m.ReadBry("v"));
else return GfoInvkAble_.Rv_unhandled;
return this;
} private static final String Invk_fallbacks_ = "fallbacks_", Invk_converts_ = "converts_";
} private static final String Invk_fallbacks_ = "fallbacks_", Invk_converts_ = "converts_", Invk_dir_ = "dir_";
}

View File

@ -18,8 +18,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import gplx.xowa.parsers.vnts.*;
class Xol_vnt_itm_sorter__rule implements gplx.lists.ComparerAble {
private Ordered_hash hash;
public void Sort(Ordered_hash hash, Xop_vnt_rule_tkn[] ary) {
private Hash_adp hash;
public void Sort(Hash_adp hash, Xop_vnt_rule_tkn[] ary) {
synchronized (hash) {
this.hash = hash;
Array_.Sort(ary, this);

View File

@ -19,17 +19,20 @@ package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xow
import gplx.core.btries.*;
import gplx.xowa.parsers.vnts.*;
public class Xol_vnt_regy {
private final Ordered_hash hash = Ordered_hash_.new_bry_(); private int hash_len;
private final Hash_adp_bry hash = Hash_adp_bry.ci_a7(); private int hash_len;
private final List_adp list = List_adp_.new_();
public Btrie_slim_mgr Trie() {return trie;} private final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7();
public int Len() {return hash.Count();}
public boolean Has(byte[] k) {return hash.Has(k);}
public Xol_vnt_itm Get_at(int i) {return (Xol_vnt_itm)hash.Get_at(i);}
public Xol_vnt_itm Get_at(int i) {return (Xol_vnt_itm)list.Get_at(i);}
public Xol_vnt_itm Get_by(byte[] k) {return (Xol_vnt_itm)hash.Get_by(k);}
public void Clear() {hash.Clear(); trie.Clear(); hash_len = 0;}
public Xol_vnt_itm Get_by(byte[] s, int b, int e) {return (Xol_vnt_itm)hash.Get_by_mid(s, b, e);}
public void Clear() {hash.Clear(); list.Clear(); trie.Clear(); hash_len = 0;}
public Xol_vnt_itm Add(byte[] key, byte[] name) {
int mask = gplx.core.brys.Bit_.Get_flag(hash_len);
Xol_vnt_itm itm = new Xol_vnt_itm(key, name, mask);
Xol_vnt_itm itm = new Xol_vnt_itm(hash_len, key, name, mask);
hash.Add(key, itm);
list.Add(itm);
trie.Add_obj(key, itm);
hash_len = hash.Count();
return itm;
@ -41,15 +44,15 @@ public class Xol_vnt_regy {
byte[] key = ary[i];
Xol_vnt_itm itm = (Xol_vnt_itm)hash.Get_by(key); if (itm == null) continue; // handle bad vnt from user input; EX: -{zh;bad|text}-
int itm_mask = itm.Mask__vnt();
rv = rv == 0 ? itm_mask : Enm_.Flip_int(true, rv, itm_mask);
rv = rv == 0 ? itm_mask : Bitmask_.Flip_int(true, rv, itm_mask);
}
return rv;
}
public boolean Mask__match_any(int lhs, int rhs) { // EX: match "zh-cn|zh-hans|zh-hant" against "zh|zh-hans|zh-hant"
for (int i = 0; i < hash_len; ++i) {
int mask = gplx.core.brys.Bit_.Get_flag(i); // 1,2,4,8
if (Enm_.Has_int(lhs, mask)) { // lhs has mask; EX: for lhs=6, mask=1 -> 'n'; mask=2 -> 'y'
if (Enm_.Has_int(rhs, mask)) // if rhs does not have mask, return false;
if (Bitmask_.Has_int(lhs, mask)) { // lhs has mask; EX: for lhs=6, mask=1 -> 'n'; mask=2 -> 'y'
if (Bitmask_.Has_int(rhs, mask)) // if rhs does not have mask, return false;
return true;
}
}

View File

@ -0,0 +1,63 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import gplx.xowa.parsers.vnts.*;
public class Xol_vnt_regy_fxt {
private final Xol_vnt_regy mgr = new_chinese();
public String[] Make_lang_chain_cn() {return String_.Ary("zh-cn", "zh-hans", "zh-hant", "zh");}
public void Test_match_any(boolean expd, String[] lang_chain, String[]... vnt_chain_ary) {
int len = vnt_chain_ary.length;
int lang_flag = mgr.Mask__calc(Bry_.Ary(lang_chain));
for (int i = 0; i < len; ++i) {
String[] vnt_chain = vnt_chain_ary[i]; // EX: -{zh;zh-hans;zh-hant}-
int vnt_flag = mgr.Mask__calc(Bry_.Ary(vnt_chain));
Tfds.Eq(expd, mgr.Mask__match_any(vnt_flag, lang_flag), String_.Concat_with_str(";", vnt_chain) + "<>" + String_.Concat_with_str(";", lang_chain));
}
}
public void Test_calc(String[] ary, int expd) {
Tfds.Eq(expd, mgr.Mask__calc(Bry_.Ary(ary)));
}
public void Test_sort(String[] vnt_ary, String[] expd) {
int vnt_len = vnt_ary.length;
Xop_vnt_rule_tkn[] rule_ary = new Xop_vnt_rule_tkn[vnt_len];
for (int i = 0; i < vnt_len; ++i)
rule_ary[i] = new Xop_vnt_rule_tkn(Bry_.Empty, Bry_.new_u8(vnt_ary[i]), gplx.xowa.parsers.Xop_tkn_itm_.Ary_empty);
mgr.Mask__sort(rule_ary);
for (int i = 0; i < vnt_len; ++i)
vnt_ary[i] = String_.new_u8(rule_ary[i].Rule_lang());
Tfds.Eq_ary_str(expd, vnt_ary);
}
public static Xol_vnt_regy new_chinese() { // REF.MW:/languages/classes/LanguageZh.php|LanguageZh|__construct
Xol_vnt_regy rv = new Xol_vnt_regy();
new_chinese_vnt(rv, "zh" , Xol_vnt_dir_.Tid__none, "zh-hans", "zh-hant", "zh-cn", "zh-tw", "zh-hk", "zh-sg", "zh-mo", "zh-my");
new_chinese_vnt(rv, "zh-hans" , Xol_vnt_dir_.Tid__uni , "zh-cn", "zh-sg", "zh-my");
new_chinese_vnt(rv, "zh-hant" , Xol_vnt_dir_.Tid__uni , "zh-tw", "zh-hk", "zh-mo");
new_chinese_vnt(rv, "zh-cn" , Xol_vnt_dir_.Tid__bi , "zh-hans", "zh-sg", "zh-my");
new_chinese_vnt(rv, "zh-hk" , Xol_vnt_dir_.Tid__bi , "zh-hant", "zh-mo", "zh-tw");
new_chinese_vnt(rv, "zh-my" , Xol_vnt_dir_.Tid__bi , "zh-hans", "zh-sg", "zh-cn");
new_chinese_vnt(rv, "zh-mo" , Xol_vnt_dir_.Tid__bi , "zh-hant", "zh-hk", "zh-tw");
new_chinese_vnt(rv, "zh-sg" , Xol_vnt_dir_.Tid__bi , "zh-hans", "zh-cn", "zh-my");
new_chinese_vnt(rv, "zh-tw" , Xol_vnt_dir_.Tid__bi , "zh-hant", "zh-hk", "zh-mo");
return rv;
}
private static void new_chinese_vnt(Xol_vnt_regy regy, String key, int dir, String... fallbacks) {
byte[] key_bry = Bry_.new_u8(key);
Xol_vnt_itm itm = regy.Add(key_bry, Bry_.Ucase__all(key_bry));
itm.Init(dir, Bry_.Ary(fallbacks));
}
}

View File

@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import org.junit.*; import gplx.xowa.parsers.vnts.*;
import org.junit.*;
public class Xol_vnt_regy_tst {
private final Xol_vnt_regy_fxt fxt = new Xol_vnt_regy_fxt();
@Test public void Calc() {
@ -51,34 +51,3 @@ public class Xol_vnt_regy_tst {
fxt.Test_sort(String_.Ary("zh", "zh-hans", "zh-cn" ) , String_.Ary("zh-cn", "zh-hans", "zh"));
}
}
class Xol_vnt_regy_fxt {
private final Xol_vnt_regy mgr = new Xol_vnt_regy();
public Xol_vnt_regy_fxt() {
String[] ary = Xop_vnt_parser_fxt.Vnts_chinese;
for (String itm : ary)
mgr.Add(Bry_.new_u8(itm), Bry_.Empty);
}
public String[] Make_lang_chain_cn() {return String_.Ary("zh-cn", "zh-hans", "zh-hant", "zh");}
public void Test_match_any(boolean expd, String[] lang_chain, String[]... vnt_chain_ary) {
int len = vnt_chain_ary.length;
int lang_flag = mgr.Mask__calc(Bry_.Ary(lang_chain));
for (int i = 0; i < len; ++i) {
String[] vnt_chain = vnt_chain_ary[i]; // EX: -{zh;zh-hans;zh-hant}-
int vnt_flag = mgr.Mask__calc(Bry_.Ary(vnt_chain));
Tfds.Eq(expd, mgr.Mask__match_any(vnt_flag, lang_flag), String_.Concat_with_str(";", vnt_chain) + "<>" + String_.Concat_with_str(";", lang_chain));
}
}
public void Test_calc(String[] ary, int expd) {
Tfds.Eq(expd, mgr.Mask__calc(Bry_.Ary(ary)));
}
public void Test_sort(String[] vnt_ary, String[] expd) {
int vnt_len = vnt_ary.length;
Xop_vnt_rule_tkn[] rule_ary = new Xop_vnt_rule_tkn[vnt_len];
for (int i = 0; i < vnt_len; ++i)
rule_ary[i] = new Xop_vnt_rule_tkn(Bry_.Empty, Bry_.new_u8(vnt_ary[i]), gplx.xowa.parsers.Xop_tkn_itm_.Ary_empty);
mgr.Mask__sort(rule_ary);
for (int i = 0; i < vnt_len; ++i)
vnt_ary[i] = String_.new_u8(rule_ary[i].Rule_lang());
Tfds.Eq_ary_str(expd, vnt_ary);
}
}

View File

@ -44,10 +44,11 @@ public class Xol_convert_mgr {
if (new_wkr_idx == -1) throw Err_.new_("lang.vnt", "unknown vnt", "key", cur_vnt);
this.cur_wkr_idx = new_wkr_idx;
}
public byte[] Convert_text(Xowe_wiki wiki, byte[] src) {return Convert_text(wiki, src, 0, src.length);}
public byte[] Convert_text(Xowe_wiki wiki, byte[] src, int bgn, int end) {
Bry_bfr tmp_bfr = wiki.Utl__bfr_mkr().Get_m001();
Xol_convert_wkr converter = wkr_ary[cur_wkr_idx];
public byte[] Convert_text(byte[] src) {return Convert_text(src, 0, src.length);}
public byte[] Convert_text(byte[] src, int bgn, int end) {return Convert_text(cur_wkr_idx, src, bgn, end);}
public byte[] Convert_text(int vnt_idx, byte[] src, int bgn, int end) {
Bry_bfr tmp_bfr = Xoa_app_.Utl__bfr_mkr().Get_m001();
Xol_convert_wkr converter = wkr_ary[vnt_idx];
converter.Convert_text(tmp_bfr, src, bgn, end);
return tmp_bfr.To_bry_and_rls();
}

View File

@ -21,6 +21,8 @@ public class Xol_convert_wkr {
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
public Xol_convert_wkr(byte[] key) {this.key = key;}
public byte[] Key() {return key;} private final byte[] key;
public void Add(byte[] src, byte[] trg) {trie.Add_obj(src, trg);}
public void Del(byte[] src) {trie.Del(src);}
public boolean Convert_text(Bry_bfr bfr, byte[] src) {return Convert_text(bfr, src, 0, src.length);}
public boolean Convert_text(Bry_bfr bfr, byte[] src, int bgn, int end) {
int pos = bgn;
@ -47,7 +49,7 @@ public class Xol_convert_wkr {
pos = trie.Match_pos();
}
}
if (!matched) bfr.Add(src); // no convert; make sure to add back src, else bfr will be blank
if (!matched) bfr.Add_mid(src, bgn, end); // no convert; make sure to add back src, else bfr will be blank
return matched;
}
public void Rebuild(Xol_convert_regy regy, byte[][] ary) {

View File

@ -0,0 +1,86 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Mwh_atr_itm {
public Mwh_atr_itm
( byte[] src, boolean valid, boolean repeated, boolean key_exists, int atr_bgn, int atr_end
, int key_bgn, int key_end, byte[] key_bry
, int val_bgn, int val_end, byte[] val_bry
, int eql_pos, int qte_tid
) {
this.src = src;
this.valid = valid; this.repeated = repeated; this.key_exists = key_exists;
this.atr_bgn = atr_bgn; this.atr_end = atr_end;
this.key_bgn = key_bgn; this.key_end = key_end; this.key_bry = key_bry;
this.val_bgn = val_bgn; this.val_end = val_end; this.val_bry = val_bry;
this.eql_pos = eql_pos; this.qte_tid = qte_tid;
}
public byte[] Src() {return src;} private final byte[] src;
public boolean Valid() {return valid;} private final boolean valid;
public boolean Key_exists() {return key_exists;} private final boolean key_exists;
public boolean Repeated() {return repeated;} private final boolean repeated;
public int Atr_bgn() {return atr_bgn;} private int atr_bgn;
public int Atr_end() {return atr_end;} private int atr_end;
public int Key_bgn() {return key_bgn;} private final int key_bgn;
public int Key_end() {return key_end;} private final int key_end;
public byte[] Key_bry() {return key_bry;} private byte[] key_bry;
public byte Key_tid() {return key_tid;} public Mwh_atr_itm Key_tid_(byte v) {key_tid = v; return this;} private byte key_tid;
public int Val_bgn() {return val_bgn;} private final int val_bgn;
public int Val_end() {return val_end;} private final int val_end;
public byte[] Val_bry() {return val_bry;} private byte[] val_bry;
public int Eql_pos() {return eql_pos;} private final int eql_pos;
public int Qte_tid() {return qte_tid;} private final int qte_tid;
public Mwh_atr_itm Atr_rng(int bgn, int end) {this.atr_bgn = bgn; this.atr_end = end; return this;}
public String Val_as_str() {return String_.new_u8(Val_as_bry());}
public byte[] Val_as_bry() {if (val_bry == null) val_bry = Bry_.Mid(src, val_bgn, val_end); return val_bry;} // NOTE: val_bry is cached
public byte[] Val_as_bry__blank_to_null() {byte[] rv = Val_as_bry(); return Bry_.Len_eq_0(rv) ? null : rv;}
public int Val_as_int_or(int or) {return val_bry == null ? Bry_.To_int_or__lax(src, val_bgn, val_end, or) : Bry_.To_int_or(val_bry, or);}
public boolean Val_as_bool_by_int() {return Val_as_int_or(0) == 1;}
public boolean Val_as_bool() {return Bry_.Eq(Bry_.Lcase__all(Val_as_bry()), Bool_.True_bry);}
public static final Mwh_atr_itm[] Ary_empty = new Mwh_atr_itm[0];
public static final int Atr_tid__invalid = 1, Atr_tid__repeat = 2, Atr_tid__pair = 4, Atr_tid__name = 8; // NOTE: id order is important; see above;
public static final int Qte_tid__none = 0, Qte_tid__apos = 1, Qte_tid__qute = 2;
public static final int Mask__qte__none = 0, Mask__qte__apos = 1, Mask__qte_qute = 2;
public static final int
Mask__valid = 8
, Mask__repeated = 16
, Mask__key_exists = 32
, Mask__val_made = 64
;
public static final boolean Mask__valid__n = false, Mask__valid__y = true;
public static final boolean Mask__key_exists__n = false, Mask__key_exists__y = true;
public static final boolean Mask__repeated__n = false, Mask__repeated__y = true;
public static final boolean Mask__val_made__n = false, Mask__val_made__y = true;
public static int Calc_atr_utl(int qte_tid, boolean valid, boolean repeated, boolean key_exists, boolean val_made) {
int rv = qte_tid;
if (valid) rv |= Mwh_atr_itm.Mask__valid;
if (repeated) rv |= Mwh_atr_itm.Mask__repeated;
if (key_exists) rv |= Mwh_atr_itm.Mask__key_exists;
if (val_made) rv |= Mwh_atr_itm.Mask__val_made;
return rv;
}
public static int Calc_qte_tid(int val) {
return val & ((1 << 3) - 1);
}
public static byte Calc_qte_byte(int[] data_ary, int idx) {
int val = data_ary[idx + Mwh_atr_mgr.Idx_atr_utl];
int qte_tid = (val & ((1 << 3) - 1));
return qte_tid == Qte_tid__apos ? Byte_ascii.Apos : Byte_ascii.Quote;
}
// public static final byte Key_tid_generic = 0, Key_tid_id = 1, Key_tid_style = 2, Key_tid_role = 3;
}

View File

@ -0,0 +1,98 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.brys.*;
public class Mwh_atr_mgr {
private final int data_max_orig;
public Mwh_atr_mgr(int max) {
this.data_max_orig = max * Idx__mult;
this.Max_(max);
}
public int Len() {return itm_len;} private int itm_len;
public int[] Data_ary() {return data_ary;} private int[] data_ary; private int data_max;
public byte[][] Text_ary() {return text_ary;} private byte[][] text_ary;
private void Max_(int len) {
this.data_max = len * Idx__mult;
this.data_ary = new int[data_max];
this.text_ary = new byte[len * Text__mult][];
this.itm_len = 0;
}
public void Clear() {
if (data_max == data_max_orig)
itm_len = 0;
else
Max_(data_max_orig / Idx__mult);
}
public int Add(int nde_uid, int nde_tid, boolean valid, boolean repeated, boolean key_exists, int atr_bgn, int atr_end, int key_bgn, int key_end, byte[] key_bry, int eql_pos, int qte_tid, int val_bgn, int val_end, byte[] val_bry) {
int data_idx = itm_len * Idx__mult;
if (data_idx == data_max) {
int new_data_max = data_max == 0 ? Idx__mult : data_max * 2;
int[] new_data_ary = new int[new_data_max];
Int_.Ary_copy_to(data_ary, data_max, data_ary);
this.data_ary = new_data_ary;
int text_max = text_ary.length;
int new_text_max = data_max == 0 ? Text__mult : text_max * 2;
byte[][] new_text_ary = new byte[new_text_max][];
for (int i = 0; i < text_max; ++i)
new_text_ary[i] = text_ary[i];
this.text_ary = new_text_ary;
this.data_max = new_data_max;
}
boolean val_made = false;
int text_idx = itm_len * Text__mult;
text_ary[text_idx] = key_bry;
if (val_bry != null) {
text_ary[text_idx + 1] = val_bry;
val_made = true;
}
data_ary[data_idx + Idx_nde_uid] = nde_uid;
data_ary[data_idx + Idx_nde_tid] = nde_tid;
data_ary[data_idx + Idx_atr_utl] = Mwh_atr_itm.Calc_atr_utl(qte_tid, valid, repeated, key_exists, val_made);
data_ary[data_idx + Idx_atr_bgn] = atr_bgn;
data_ary[data_idx + Idx_atr_end] = atr_end;
data_ary[data_idx + Idx_key_bgn] = key_bgn;
data_ary[data_idx + Idx_key_end] = key_end;
data_ary[data_idx + Idx_val_bgn] = val_bgn;
data_ary[data_idx + Idx_val_end] = val_end;
data_ary[data_idx + Idx_eql_pos] = eql_pos;
return itm_len++;
}
public void Set_repeated(int atr_uid) {
int atr_utl_idx = (atr_uid * Idx__mult) + Idx_atr_utl;
int atr_utl = data_ary[atr_utl_idx];
int val_bry_exists = atr_utl & Atr_utl__val_bry_exists;
data_ary[atr_utl_idx] = Mwh_atr_itm.Atr_tid__repeat | val_bry_exists;
}
public static final int
Idx_nde_uid = 0
, Idx_nde_tid = 1
, Idx_atr_utl = 2
, Idx_atr_bgn = 3
, Idx_atr_end = 4
, Idx_key_bgn = 5
, Idx_key_end = 6
, Idx_val_bgn = 7
, Idx_val_end = 8
, Idx_eql_pos = 9
, Idx__mult = 10
;
public static final int Text__mult = 2;
public static final int Atr_utl__val_bry_exists = 16;
}

View File

@ -0,0 +1,39 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Mwh_atr_mgr_tst {
private final Mwh_atr_mgr_fxt fxt = new Mwh_atr_mgr_fxt();
@Test public void Atr_utl_make() {
// key="val"
fxt.Test_atr_utl_make(Mwh_atr_itm.Qte_tid__qute, Mwh_atr_itm.Mask__valid__y, Mwh_atr_itm.Mask__repeated__n, Mwh_atr_itm.Mask__key_exists__y, Mwh_atr_itm.Mask__val_made__n, 42);
// key=val key=v<nowiki/>al
fxt.Test_atr_utl_make(Mwh_atr_itm.Qte_tid__none, Mwh_atr_itm.Mask__valid__y, Mwh_atr_itm.Mask__repeated__y, Mwh_atr_itm.Mask__key_exists__y, Mwh_atr_itm.Mask__val_made__y, 120);
}
}
class Mwh_atr_mgr_fxt {
public void Test_atr_utl_make(int qte_tid, boolean valid, boolean repeated, boolean key_exists, boolean val_made, int expd) {
int atr_utl = Mwh_atr_itm.Calc_atr_utl(qte_tid, valid, repeated, key_exists, val_made);
Tfds.Eq_int(expd, atr_utl);
Tfds.Eq_int(qte_tid, Mwh_atr_itm.Calc_qte_tid(atr_utl));
Tfds.Eq_bool(valid, (atr_utl & Mwh_atr_itm.Mask__valid) == Mwh_atr_itm.Mask__valid);
Tfds.Eq_bool(repeated, (atr_utl & Mwh_atr_itm.Mask__repeated) == Mwh_atr_itm.Mask__repeated);
Tfds.Eq_bool(key_exists, (atr_utl & Mwh_atr_itm.Mask__key_exists) == Mwh_atr_itm.Mask__key_exists);
Tfds.Eq_bool(val_made, (atr_utl & Mwh_atr_itm.Mask__val_made) == Mwh_atr_itm.Mask__val_made);
}
}

View File

@ -0,0 +1,457 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.primitives.*;
import gplx.xowa.parsers.xndes.*; // for brys: <nowiki>, <noinclude>, <includeonly>, <onlyinclude>
public class Mwh_atr_parser { // REF.MW:Sanitizer.php|decodeTagAttributes;MW_ATTRIBS_REGEX
private static final byte Area__invalid = 0, Area__atr_limbo = 1, Area__key = 2, Area__eql_limbo = 3, Area__val_limbo = 4, Area__val_quote = 5, Area__val_naked = 6;
private final Hash_adp_bry repeated_atrs_hash = Hash_adp_bry.ci_a7(); // ASCII:xnde_atrs
private final Mwh_atr_mgr atr_mgr = new Mwh_atr_mgr(16);
private final Bry_bfr key_bfr = Bry_bfr.new_(), val_bfr = Bry_bfr.new_();
private byte area = Area__atr_limbo;
private int atr_bgn = -1, key_bgn = -1, key_end = -1, eql_pos = -1, val_bgn = -1, val_end = -1;
private byte qte_byte = Byte_ascii.Null;
private boolean key_bfr_on = false, val_bfr_on = false, ws_is_before_val = false;
private int nde_uid, nde_tid;
public Bry_obj_ref Bry_obj() {return bry_ref;} private final Bry_obj_ref bry_ref = Bry_obj_ref.null_();
public int Nde_end_tid() {return nde_end_tid;} private int nde_end_tid;
public int Parse(Mwh_doc_wkr wkr, int nde_uid, int nde_tid, byte[] src, int src_bgn, int src_end) {
this.nde_uid = nde_uid; this.nde_tid = nde_tid;
this.nde_end_tid = Mwh_doc_parser.Nde_end_tid__invalid;
area = Area__atr_limbo;
boolean prv_is_ws = false;
int pos = src_bgn;
boolean loop = true;
while (loop) {
if (pos == src_end) {
if (area == Area__val_quote) { // quote still open
int reset_pos = Bry_find_.Find_fwd(src, Byte_ascii.Space, val_bgn, src_end); // try to find 1st space within quote; EX:"a='b c=d" should try to reset at c=d
boolean reset_found = reset_pos != Bry_find_.Not_found;
area = Area__invalid; val_end = reset_found ? reset_pos : src_end;
Make(src, val_end); // create invalid atr
if (reset_found) { // space found; resume from text after space; EX: "a='b c=d"; PAGE:en.w:Aubervilliers DATE:2014-06-25
pos = Bry_find_.Find_fwd_while_not_ws(src, reset_pos, src_end); // skip ws
atr_bgn = -1;
area = Area__atr_limbo;
val_bfr.Clear();
val_bfr_on = false;
ws_is_before_val = false;
continue;
}
else
break;
}
else {
if (area == Area__val_limbo) // NOTE: handle dangling "k=" else will be "k"; EX: <a b=> x> <a b>; PAGE:en.s:Notes_by_the_Way/Chapter_2; DATE:2015-01-31
area = Area__invalid;
if (atr_bgn != -1) { // atr_bgn will be -1 if atrs ends on quoted (EX:"a='b'"); else, pending atr that needs to be processed; EX: "a=b" b wil be in bfr
val_end = src_end;
Make(src, src_end);
}
break;
}
}
else if (pos > src_end)
break;
byte b = src[pos];
switch (area) {
case Area__atr_limbo: // 1st area after node_name or attribute
switch (b) {
// gt -> stop iterating
case Byte_ascii.Gt:
nde_end_tid = Mwh_doc_parser.Nde_end_tid__gt;
loop = false;
break;
// slash -> check for "/>" or " / "
case Byte_ascii.Slash:
int nxt_pos = pos + 1;
if (nxt_pos == src_end) {
pos = nxt_pos;
return Mwh_doc_parser.Nde_end_tid__invalid;
}
else if (src[nxt_pos] == Byte_ascii.Gt) {
nde_end_tid = Mwh_doc_parser.Nde_end_tid__inline;
pos = nxt_pos;
loop = false;
}
else {
area = Area__invalid; atr_bgn = pos;
}
break;
// ws -> ignore; skip any ws in atr_limbo; note that once a non-ws char is encountered, it will immediately go into another area
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
if (atr_bgn == -1) atr_bgn = pos;
break;
// alphanum -> enter Area__key
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Colon:
area = Area__key;
if (atr_bgn == -1) atr_bgn = pos;
key_bgn = pos;
break;
// lt -> check for <nowiki>
case Byte_ascii.Lt: // handle "<nowiki>"
int gt_pos = Xnde_find_gt(src, pos, src_end);
if (gt_pos == Bry_find_.Not_found) {
area = Area__invalid;
atr_bgn = pos;
}
else
pos = gt_pos; // position after ">"; note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
break;
// rest -> invalid
default: // quote and other non-valid key characters are invalid until next space; EX: "<span 'key_cannot_be_quoted' id='123'"
area = Area__invalid; atr_bgn = pos;
break;
}
break;
case Area__invalid:
switch (b) {
// ws -> src_end invalid area
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
Make(src, pos);
area = Area__atr_limbo;
break;
// rest -> continue eating up invalid chars
default:
break;
}
break;
case Area__key:
switch (b) {
// alphanum -> valid key chars
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Colon: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Underline:
if (key_bfr_on) key_bfr.Add_byte(b);
break;
// ws -> src_end key
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
area = Area__eql_limbo;
key_end = pos;
break;
// eq -> src_end key; skip Area_eq and go to Area_val_bgn
case Byte_ascii.Eq:
area = Area__val_limbo;
key_end = eql_pos = pos;
break;
// lt -> check for <nowiki>
case Byte_ascii.Lt:
int gt_pos = Xnde_find_gt(src, pos, src_end);
if (gt_pos == Bry_find_.Not_found) // "<" should not be in key; EX: "ke<y"
area = Area__invalid;
else {
if (!key_bfr_on) {key_bfr.Add_mid(src, key_bgn, pos); key_bfr_on = true;}
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
}
break;
// rest -> enter invalid
default:
area = Area__invalid;
break;
}
break;
case Area__eql_limbo:
switch (b) {
// ws -> skip
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip ws
if (key_end == -1) { // EX: "a = b"; key_end != -1 b/c 1st \s sets key_end; EX: "a b = c"; key_end
val_end = pos - 1;
Make(src, pos);
area = Area__atr_limbo;
continue;
}
break;
// eq -> enter Area__eq
case Byte_ascii.Eq:
eql_pos = pos;
area = Area__val_limbo;
break;
// rest -> make atr and enter limbo
case Byte_ascii.Quote: case Byte_ascii.Apos: // FUTURE: previous word was key
default: // NOTE: added this late; xml_parser was not handling "line start=3" DATE:2013-07-03
val_end = pos - 1;
Make(src, pos);
area = Area__atr_limbo;
continue;
}
break;
case Area__val_limbo:
switch (b) {
// ws -> skip
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab:
ws_is_before_val = true;
break;
// quote -> enter Area_val_quote
case Byte_ascii.Quote: case Byte_ascii.Apos:
area = Area__val_quote; qte_byte = b; prv_is_ws = false;
val_bgn = pos + 1;
break;
// alphanum -> enter Area_val_raw
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Colon:
case Byte_ascii.Hash:
area = Area__val_naked;
val_bgn = pos;
break;
// lt -> check for <nowiki>
case Byte_ascii.Lt:
int gt_pos = Xnde_find_gt(src, pos, src_end);
if (gt_pos == Bry_find_.Not_found)
area = Area__invalid;
else
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
break;
// rest -> ignore (?)
default:
break;
}
break;
case Area__val_quote: { // EX: "'val' " in "key = 'val'"
switch (b) {
// quote: check if same as opening quote
case Byte_ascii.Quote: case Byte_ascii.Apos:
if (qte_byte == b) { // quote closes val
val_end = pos;
Make(src, pos + 1); // NOTE: set atr_end *after* quote
}
else { // quote is just char; EX: title="1 o'clock" or title='The "C" way'
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
}
break;
// lt -> check for <nowiki>; EX: <span title='ab<nowiki>c</nowiki>de'>
case Byte_ascii.Lt:
if (!val_bfr_on) {val_bfr.Add_mid(src, val_bgn, pos); val_bfr_on = true;} // INLINE: val_bfr.init
int gt_pos = Xnde_find_gt(src, pos, src_end);
if (gt_pos == Bry_find_.Not_found)
// area = Area__invalid; // DELETE: 2012-11-13; unpaired < should not mark atr invalid; EX: style='margin:1em<f'
val_bfr.Add_byte(Byte_ascii.Lt);
else
pos = gt_pos; // note that there is ++pos below and loop will continue at gt_pos + 1 (next character after)
prv_is_ws = false;
break;
// ws -> convert all ws to \s; only allow 1 ws at any point in time
case Byte_ascii.Nl: case Byte_ascii.Tab: case Byte_ascii.Cr: // REF.MW:Sanitizer.php|decodeTagAttributes $value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
case Byte_ascii.Space:
if (!val_bfr_on) {val_bfr.Add_mid(src, val_bgn, pos); val_bfr_on = true;} // INLINE: val_bfr.init
if (prv_is_ws) {} // noop; only allow one ws at a time; EX: "a b" -> "a b"; "a\n\nb" -> "a b"
else {
prv_is_ws = true; val_bfr.Add_byte(Byte_ascii.Space);
}
break;
// rest -> add to val
default:
prv_is_ws = false; if (val_bfr_on) val_bfr.Add_byte(b); // INLINE: add char
break;
}
break;
}
case Area__val_naked: // no quotes; EX:a=bcd
switch (b) {
// alphanum -> continue reading
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Bang: case Byte_ascii.Hash: case Byte_ascii.Dollar: case Byte_ascii.Percent:
case Byte_ascii.Amp: case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star:
case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Slash:
case Byte_ascii.Colon: case Byte_ascii.Semic: case Byte_ascii.Gt:
case Byte_ascii.Question: case Byte_ascii.At: case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end:
case Byte_ascii.Pow: case Byte_ascii.Underline: case Byte_ascii.Tick:
case Byte_ascii.Curly_bgn: case Byte_ascii.Pipe: case Byte_ascii.Curly_end: case Byte_ascii.Tilde:
break;
// ws -> src_end atr
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl:
val_end = pos;
Make(src, pos);
break;
case Byte_ascii.Eq: // EX:"a= b=c" or "a=b=c"; PAGE:en.w:2013_in_American_television
if (ws_is_before_val) { // "a= b=c"; discard 1st and resume at 2nd
int old_val_bgn = val_bgn;
area = Area__invalid; Make(src, val_bgn); // invalidate cur atr; EX:"a="
atr_bgn = key_bgn = old_val_bgn; // reset atr / key to new atr; EX: "b"
key_end = pos;
area = Area__val_limbo; // set area to val_bgn (basically, put after =)
}
else // "a=b=c"; discard all
area = Area__invalid;
break;
case Byte_ascii.Lt:
val_end = pos;
Make(src, pos);
--pos; // NOTE: --pos to include "<" as part of next atr; above ws excludes from next atr
break;
default:
area = Area__invalid;
break;
}
break;
}
++pos;
}
// iterate atrs and notify
int len = atr_mgr.Len();
int[] data_ary = atr_mgr.Data_ary();
byte[][] text_ary = atr_mgr.Text_ary();
for (int j = 0; j < len; ++j) {
int itm_idx = j * Mwh_atr_mgr.Idx__mult;
byte[] key_bry = text_ary[j * Mwh_atr_mgr.Text__mult];
byte[] val_bry_manual = null;
int atr_utl = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_utl];
boolean atr_valid = (atr_utl & Mwh_atr_itm.Mask__valid) == Mwh_atr_itm.Mask__valid;
boolean repeated = (atr_utl & Mwh_atr_itm.Mask__repeated) == Mwh_atr_itm.Mask__repeated;
boolean key_exists = (atr_utl & Mwh_atr_itm.Mask__key_exists) == Mwh_atr_itm.Mask__key_exists;
boolean val_made = (atr_utl & Mwh_atr_itm.Mask__val_made) == Mwh_atr_itm.Mask__val_made;
if (val_made)
val_bry_manual = text_ary[(j * Mwh_atr_mgr.Text__mult) + 1];
wkr.On_atr_each(this, src, nde_tid, atr_valid, repeated, key_exists, key_bry, val_bry_manual, data_ary, itm_idx);
}
atr_mgr.Clear();
repeated_atrs_hash.Clear();
return pos;
}
public int Xnde_find_gt_find(byte[] src, int pos, int end) {
bry_ref.Val_(null);
byte b = src[pos];
if (b == Byte_ascii.Slash && pos + 1 < end) { // if </ move pos to after /
++pos;
b = src[pos];
}
int gt_pos = Bry_find_.Find_fwd(src, Byte_ascii.Gt, pos, end); if (gt_pos == Bry_.NotFound) return Bry_find_.Not_found;
byte[] bry = (byte[])xnde_hash.Get_by_mid(src, pos, gt_pos);
bry_ref.Val_(bry);
return bry == null ? Bry_find_.Not_found : bry.length + pos;
}
private int Xnde_find_gt(byte[] src, int lt_pos, int end) {
int pos = lt_pos + 1;
byte b = src[pos];
if (b == Byte_ascii.Slash && pos + 1 < end) {
++pos;
b = src[pos];
}
int match_pos = Xnde_find_gt_find(src, pos, end);
if (match_pos == Bry_find_.Not_found) {return Bry_find_.Not_found;}
boolean slash_found = false;
for (int i = match_pos; i < end; i++) {
b = src[i];
switch (b) {
case Byte_ascii.Gt: return i;
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: // skip any ws
break;
case Byte_ascii.Slash:
if (slash_found) {return Bry_find_.Not_found;} // only allow one slash
else slash_found = true;
break;
default:
return Bry_find_.Not_found;
}
}
return Bry_find_.Not_found;
}
private void Make(byte[] src, int atr_end) {
// calc final values for atr
boolean key_exists = false;
byte[] key_bry = null, val_bry = null;
boolean atr_valid = true;
if (area != Area__invalid) {
if (key_bgn != -1 && val_bgn != -1) // key && val exists; EX: "<input id='123'>"
key_exists = true;
else { // not a pair; EX: "<input checked>"
if (key_end == -1) key_end = val_end; // NOTE: key_end == -1 when eos; EX: "a" would have key_bgn = 0; key_end = -1; val_end = 1 DATE:2014-07-03
val_bgn = val_end = -1;
}
key_bry = key_bfr_on ? key_bfr.Xto_bry_and_clear() : Bry_.Mid(src, key_bgn, key_end); // always make key_bry; needed for repeated_atrs as well as key_tid
if (val_bfr_on) val_bry = val_bfr.Xto_bry_and_clear();
}
else {
atr_valid = false;
key_bry = Bry_.Empty;
key_bfr.Clear();
if (val_bgn == -1) val_bgn = atr_bgn;
}
int qte_tid = Mwh_atr_itm.Mask__qte__none;
if (qte_byte != Byte_ascii.Null)
qte_tid = qte_byte == Byte_ascii.Quote ? Mwh_atr_itm.Mask__qte_qute : Mwh_atr_itm.Mask__qte__apos;
int atr_uid = atr_mgr.Add(nde_uid, nde_tid, atr_valid, false, key_exists, atr_bgn, atr_end, key_bgn, key_end, key_bry, eql_pos, qte_tid, val_bgn, val_end, val_bry);
// handle repeated atrs
if (atr_valid) {
int repeated_uid = repeated_atrs_hash.Get_as_int_or(key_bry, -1);
if (repeated_uid != -1) {
repeated_atrs_hash.Del(key_bry);
atr_mgr.Set_repeated(repeated_uid);
}
repeated_atrs_hash.Add_bry_int(key_bry, atr_uid);
}
// reset temp variables
area = Area__atr_limbo; qte_byte = Byte_ascii.Null;
atr_bgn = key_bgn = val_bgn = key_end = val_end = eql_pos = -1;
key_bfr_on = val_bfr_on = ws_is_before_val = false;
}
private static final Hash_adp_bry xnde_hash = Hash_adp_bry.ci_a7()
.Add_bry_bry(Xop_xnde_tag_.Tag_nowiki.Name_bry())
.Add_bry_bry(Xop_xnde_tag_.Tag_noinclude.Name_bry())
.Add_bry_bry(Xop_xnde_tag_.Tag_includeonly.Name_bry())
.Add_bry_bry(Xop_xnde_tag_.Tag_onlyinclude.Name_bry())
;
public static final int Key_tid__unknown = -1;
}

View File

@ -0,0 +1,99 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
class Mwh_atr_parser_fxt {
private final Bry_bfr expd_bfr = Bry_bfr.new_(), actl_bfr = Bry_bfr.new_();
private final Mwh_atr_parser parser = new Mwh_atr_parser();
private final Mwh_doc_wkr__atr_bldr wkr = new Mwh_doc_wkr__atr_bldr();
public Mwh_atr_itm Make_pair(String key, String val) {return new Mwh_atr_itm(Bry_.Empty, Bool_.Y, Bool_.N, Bool_.Y, -1, -1, -1, -1, Bry_.new_u8(key) , -1, -1, Bry_.new_u8(val) , -1, -1);}
public Mwh_atr_itm Make_name(String key) {return new Mwh_atr_itm(Bry_.Empty, Bool_.Y, Bool_.N, Bool_.N, -1, -1, -1, -1, Bry_.new_u8(key) , -1, -1, null , -1, -1);}
public Mwh_atr_itm Make_fail(int bgn, int end) {return new Mwh_atr_itm(Bry_.Empty, Bool_.N, Bool_.N, Bool_.N, bgn, end, -1, -1, null , -1, -1, null , -1, -1);}
public void Test_val_as_int(String raw, int expd) {
byte[] src = Bry_.new_u8(raw);
Mwh_atr_itm itm = new Mwh_atr_itm(src, true, false, false, 0, src.length, -1, -1, null, 0, src.length, src, -1, -1);
Tfds.Eq_int(expd, itm.Val_as_int_or(-1));
}
public void Test_parse(String raw, Mwh_atr_itm... expd) {
Mwh_atr_itm[] actl = Exec_parse(raw);
Test_print(expd, actl);
}
private Mwh_atr_itm[] Exec_parse(String raw) {
byte[] bry = Bry_.new_u8(raw);
parser.Parse(wkr, -1, -1, bry, 0, bry.length);
return wkr.To_atr_ary();
}
public void Test_print(Mwh_atr_itm[] expd_ary, Mwh_atr_itm[] actl_ary) {
int expd_len = expd_ary.length;
int actl_len = actl_ary.length;
int len = expd_len > actl_len ? expd_len : actl_len;
for (int i = 0; i < len; ++i) {
To_bfr(expd_bfr, i < expd_len ? expd_ary[i] : null, actl_bfr, i < actl_len ? actl_ary[i] : null);
}
Tfds.Eq_str_lines(expd_bfr.Xto_str_and_clear(), actl_bfr.Xto_str_and_clear());
}
private void To_bfr(Bry_bfr expd_bfr, Mwh_atr_itm expd_itm, Bry_bfr actl_bfr, Mwh_atr_itm actl_itm) {
To_bfr__main(expd_bfr, expd_itm);
To_bfr__main(actl_bfr, actl_itm);
To_bfr__head(expd_bfr, expd_itm);
To_bfr__head(actl_bfr, actl_itm);
if (expd_itm.Atr_bgn() != -1) {
To_bfr__atr_rng(expd_bfr, expd_itm);
To_bfr__atr_rng(actl_bfr, actl_itm);
}
}
private void To_bfr__head(Bry_bfr bfr, Mwh_atr_itm itm) {
if (itm == null) return;
bfr.Add_str_a7("head:").Add_yn(itm.Valid()).Add_byte_semic().Add_yn(itm.Repeated()).Add_byte_semic().Add_yn(itm.Key_exists()).Add_byte_nl();
}
private void To_bfr__main(Bry_bfr bfr, Mwh_atr_itm itm) {
if (itm == null) return;
if (itm.Valid()) {
bfr.Add_str_a7("key:").Add(itm.Key_bry()).Add_byte_nl();
bfr.Add_str_a7("val:").Add(itm.Val_as_bry()).Add_byte_nl();
}
// else
// To_bfr__atr_rng(bfr, itm);
}
private void To_bfr__atr_rng(Bry_bfr bfr, Mwh_atr_itm itm) {
if (itm == null) return;
bfr.Add_str_a7("rng:").Add_int_variable(itm.Atr_bgn()).Add_byte_semic().Add_int_variable(itm.Atr_end()).Add_byte_nl();
}
}
class Mwh_doc_wkr__atr_bldr implements Mwh_doc_wkr {
private final List_adp list = List_adp_.new_();
public Hash_adp_bry Nde_regy() {return null;}
public void On_atr_each(Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] data_ary, int itm_idx) {
int atr_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_bgn];
int atr_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_end];
int key_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_key_bgn];
int key_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_key_end];
int val_bgn = data_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
int val_end = data_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
int eql_pos = data_ary[itm_idx + Mwh_atr_mgr.Idx_eql_pos];
int qte_tid = data_ary[itm_idx + Mwh_atr_mgr.Idx_atr_utl];
qte_tid = Mwh_atr_itm.Calc_qte_tid(qte_tid);
Mwh_atr_itm atr = new Mwh_atr_itm(src, valid, repeated, key_exists, atr_bgn, atr_end, key_bgn, key_end, key_bry, val_bgn, val_end, val_bry_manual, eql_pos, qte_tid);
list.Add(atr);
}
public void On_txt_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
public void On_nde_head_bgn(Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end) {}
public void On_nde_head_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline) {}
public void On_nde_tail_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
public Mwh_atr_itm[] To_atr_ary() {return (Mwh_atr_itm[])list.To_ary_and_clear(Mwh_atr_itm.class);}
}

View File

@ -0,0 +1,63 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Mwh_atr_parser_tst {
private final Mwh_atr_parser_fxt fxt = new Mwh_atr_parser_fxt();
@Test public void Pair__quote__double() {fxt.Test_parse("a=\"b\"" , fxt.Make_pair("a" , "b"));}
@Test public void Pair__quote__single() {fxt.Test_parse("a='b'" , fxt.Make_pair("a" , "b"));}
@Test public void Pair__quote__none() {fxt.Test_parse("a=b" , fxt.Make_pair("a" , "b"));}
@Test public void Pair__empty() {fxt.Test_parse("a=''" , fxt.Make_pair("a" , ""));}
@Test public void Pair__key_w_underline() {fxt.Test_parse("a_b=c" , fxt.Make_pair("a_b" , "c"));}
@Test public void Name__quote__none() {fxt.Test_parse("b" , fxt.Make_name("b"));}
@Test public void Name__ws() {fxt.Test_parse(" b " , fxt.Make_name("b"));} // PURPOSE:discovered while writing test for ref's "lower-alpha" DATE:2014-07-03
@Test public void Name__mult() {fxt.Test_parse("a b1 c" , fxt.Make_name("a"), fxt.Make_name("b1"), fxt.Make_name("c"));}
@Test public void Fail__key_w_plus() {fxt.Test_parse("a+b" , fxt.Make_fail(0, 3));}
@Test public void Fail__key_w_plus__many() {fxt.Test_parse("a+b c=d" , fxt.Make_fail(0, 3) , fxt.Make_pair("c", "d"));}
@Test public void Fail__val_w_plus() {fxt.Test_parse("a=b+c" , fxt.Make_fail(0, 5));}
@Test public void Fail__recover() {fxt.Test_parse("* a=b" , fxt.Make_fail(0, 1) , fxt.Make_pair("a", "b"));} // PURPOSE: * is invalid, but should not stop parsing of a=b
@Test public void Fail__incomplete() {fxt.Test_parse("a= c=d" , fxt.Make_fail(0, 3) , fxt.Make_pair("c", "d"));} // PURPOSE: discard xatr if incomplete and followed by valid atr; PAGE:en.w:2013_in_American_television DATE:2014-09-25
@Test public void Fail__incomplete_2() {fxt.Test_parse("a=c=d" , fxt.Make_fail(0, 5));} // PURPOSE: variation of above; per MW regex, missing space invalidates entire attribute; DATE:2014-09-25
@Test public void Fail__incomplete_pair() {fxt.Test_parse("a= b=" , fxt.Make_fail(0, 3) , fxt.Make_fail(3, 5));} // PURPOSE: "b=" should be invalid not a kv of "b" = "b"; PAGE:en.s:Notes_by_the_Way/Chapter_2; DATE:2015-01-31
@Test public void Dangling_eos() {fxt.Test_parse("a='b' c='d" , fxt.Make_pair("a", "b") , fxt.Make_fail(5, 10));} // PURPOSE: handle dangling quote at eos; PAGE:en.w:Aubervilliers DATE:2014-06-25
@Test public void Dangling_bos() {fxt.Test_parse("a='b c=d" , fxt.Make_fail(0, 4) , fxt.Make_pair("c", "d"));}// PURPOSE: handle dangling quote at bos; resume at next valid atr; PAGE:en.w:Aubervilliers DATE:2014-06-25
@Test public void Ws__ini() {fxt.Test_parse(" a='b'" , fxt.Make_pair("a", "b").Atr_rng(0, 6));}
@Test public void Ws__end() {fxt.Test_parse(" a='b' c='d'" , fxt.Make_pair("a", "b").Atr_rng(0, 6), fxt.Make_pair("c", "d").Atr_rng(6, 12));}
@Test public void Ws() {fxt.Test_parse("a = 'b'" , fxt.Make_pair("a", "b"));} // PURPOSE: fix wherein multiple space was causing "a=a"; PAGE:fr.s:La_Sculpture_dans_les_cimetières_de_Paris/Père-Lachaise; DATE:2014-01-18
@Test public void Many__quote__apos() {fxt.Test_parse("a='b' c='d' e='f'" , fxt.Make_pair("a", "b"), fxt.Make_pair("c", "d"), fxt.Make_pair("e", "f"));}
@Test public void Many__naked() {fxt.Test_parse("a=b c=d e=f" , fxt.Make_pair("a", "b"), fxt.Make_pair("c", "d"), fxt.Make_pair("e", "f"));}
@Test public void Val__ws__nl() {fxt.Test_parse("a='b\nc'" , fxt.Make_pair("a", "b c"));}
@Test public void Val__ws__mult() {fxt.Test_parse("a='b c'" , fxt.Make_pair("a", "b c"));}
@Test public void Val__ws__mult_mult() {fxt.Test_parse("a='b c d'" , fxt.Make_pair("a", "b c d"));} // PURPOSE: fix wherein 1st-gobble gobbled rest of spaces (was b cd)
@Test public void Val__apos() {fxt.Test_parse("a=\"b c'd\"" , fxt.Make_pair("a", "b c'd"));} // PURPOSE: fix wherein apos was gobbled up; PAGE:en.s:Alice's_Adventures_in_Wonderland; DATE:2013-11-22
@Test public void Val__apos_2() {fxt.Test_parse("a=\"b'c d\"" , fxt.Make_pair("a", "b'c d"));} // PURPOSE: fix wherein apos was causing "'b'c d"; PAGE:en.s:Grimm's_Household_Tales,_Volume_1; DATE:2013-12-22
@Test public void Nowiki__val() {fxt.Test_parse("a=<nowiki>'b'</nowiki>" , fxt.Make_pair("a", "b").Atr_rng(0, 13));}
@Test public void Nowiki__key() {fxt.Test_parse("<nowiki>a=b</nowiki>" , fxt.Make_pair("a", "b").Atr_rng(8, 11));}
@Test public void Nowiki__key_2() {fxt.Test_parse("a<nowiki>b</nowiki>c=d" , fxt.Make_pair("abc", "d").Atr_rng(0, 22));}
@Test public void Nowiki__key_3() {fxt.Test_parse("a<nowiki>=</nowiki>\"b\"" , fxt.Make_pair("a", "b").Atr_rng(0, 22));} // EX:fr.w:{{Portail|Transpédia|Californie}}
@Test public void Nowiki__quote() {fxt.Test_parse("a=\"b<nowiki>c</nowiki>d<nowiki>e</nowiki>f\"", fxt.Make_pair("a", "bcdef"));}
@Test public void Val__as_int() {fxt.Test_val_as_int("-123" , -123);}
}

View File

@ -0,0 +1,25 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
class Mwh_doc_itm {
public Mwh_doc_itm(int itm_tid, int nde_tid, byte[] itm_bry) {this.itm_tid = itm_tid; this.itm_bry = itm_bry; this.nde_tid = nde_tid;}
public int Itm_tid() {return itm_tid;} private final int itm_tid;
public byte[] Itm_bry() {return itm_bry;} private final byte[] itm_bry;
public int Nde_tid() {return nde_tid;} private final int nde_tid;
public static final int Itm_tid__txt = 0, Itm_tid__nde_head = 1, Itm_tid__nde_tail = 2, Itm_tid__comment = 3;
}

View File

@ -0,0 +1,62 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
class Mwh_doc_mgr {
private final int data_max_orig;
public Mwh_doc_mgr(int max) {
this.data_max_orig = max * Idx__mult;
this.Max_(max);
}
public int Len() {return itm_len;} private int itm_len;
public int[] Data_ary() {return data_ary;} private int[] data_ary; private int data_max;
private void Max_(int len) {
this.data_max = len * Idx__mult;
this.data_ary = new int[data_max];
this.itm_len = 0;
}
public void Clear() {
if (data_max == data_max_orig)
itm_len = 0;
else
Max_(data_max_orig / Idx__mult);
}
public int Add(int dom_tid, int src_bgn, int src_end) {
int data_idx = itm_len * Idx__mult;
if (data_idx == data_max) {
int new_data_max = data_max == 0 ? Idx__mult : data_max * 2;
int[] new_data_ary = new int[new_data_max];
Int_.Ary_copy_to(data_ary, data_max, data_ary);
this.data_ary = new_data_ary;
this.data_max = new_data_max;
}
int dom_uid = itm_len;
data_ary[data_idx + Idx_dom_uid] = dom_uid;
data_ary[data_idx + Idx_dom_tid] = dom_tid;
data_ary[data_idx + Idx_src_bgn] = src_bgn;
data_ary[data_idx + Idx_src_end] = src_end;
++itm_len;
return dom_uid;
}
public static final int
Idx_dom_uid = 0
, Idx_dom_tid = 1
, Idx_src_bgn = 2
, Idx_src_end = 3
, Idx__mult = 4
;
}

View File

@ -0,0 +1,191 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.primitives.*;
import gplx.xowa.parsers.xndes.*;
public class Mwh_doc_parser {
private final Mwh_doc_mgr dom_mgr = new Mwh_doc_mgr(16);
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
private final List_adp nde_stack = List_adp_.new_();
private byte[] src; private int src_end;
private Mwh_doc_wkr wkr;
private Hash_adp_bry nde_regy;
private int txt_bgn, nde_uid;
private Xop_xnde_tag cur_nde; private int cur_nde_tid;
public void Parse(Mwh_doc_wkr wkr, byte[] src, int src_bgn, int src_end) {
this.wkr = wkr; this.src = src; this.src_end = src_end;
this.nde_regy = wkr.Nde_regy();
nde_stack.Clear();
int pos = txt_bgn = src_bgn;
nde_uid = cur_nde_tid = -1;
cur_nde = null;
while (pos < src_end) {
if (src[pos] == Byte_ascii.Angle_bgn) // "<": possible nde start
pos = Parse_nde(pos);
else // else, just increment
++pos;
}
if (src_end != txt_bgn) wkr.On_txt_end(this, src, cur_nde_tid, txt_bgn, pos);
}
private int Parse_nde(int pos) {
int nde_end_tid = Nde_end_tid__invalid;
boolean nde_is_head = true;
int nde_bgn = pos;
++pos;
int name_bgn = pos;
int name_end = pos;
while (pos < src_end) {
byte b = src[pos];
switch (b) {
// valid chars for name
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Dot: case Byte_ascii.Dash: case Byte_ascii.Underline: case Byte_ascii.Colon: // XML allowed punctuation
case Byte_ascii.Dollar:// MW: handles <br$2>;
++pos;
break;
// comment check
case Byte_ascii.Bang:
boolean comment_found = false;
if (name_bgn == pos && Bry_.Eq(src, pos + 1, pos + 3, Comment_bgn)) {
int comment_end_pos = Bry_find_.Find_fwd(src, Comment_end, pos + 3);
if (comment_end_pos != Bry_find_.Not_found) {
nde_end_tid = Nde_end_tid__comment;
pos = comment_end_pos + 3;
comment_found = true;
}
}
if (!comment_found)
return pos;
else
break;
// invalid char; not a node; treat as text; EX: "<!@#", "< /b>"
default:
return pos;
// slash -> either "</b>" or "<b/>"
case Byte_ascii.Slash:
if (name_bgn == pos) { // "</"; EX: "</b>"
nde_is_head = false;
++name_bgn;
++pos;
continue;
}
else { // check for "/>"; NOTE: <pre/a>, <pre//> are allowed
name_end = pos;
++pos;
if (pos == src_end) return pos; // end of doc; treat as text; EX: "<b/EOS"
if (src[pos] == Byte_ascii.Gt) {
nde_end_tid = Nde_end_tid__inline;
++pos;
}
else
nde_end_tid = Nde_end_tid__slash;
}
break;
// stops "name"
case Byte_ascii.Gt:
nde_end_tid = Nde_end_tid__gt;
name_end = pos;
++pos;
break;
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
nde_end_tid = Nde_end_tid__ws;
name_end = pos;
break;
case Byte_ascii.Backslash: // MW: allows "<br\>" -> "<br/>"
nde_end_tid = Nde_end_tid__backslash;
name_end = pos;
break;
}
if (nde_end_tid != Nde_end_tid__invalid) break;
}
// get name
Xop_xnde_tag nde_itm = null;
if (nde_end_tid != Nde_end_tid__comment) {
nde_itm = (Xop_xnde_tag)nde_regy.Get_by_mid(src, name_bgn, name_end);
if (nde_itm == null) return pos; // not a known nde; exit
}
if (txt_bgn != nde_bgn) { // notify txt
wkr.On_txt_end(this, src, cur_nde_tid, txt_bgn, nde_bgn);
txt_bgn = pos;
}
if (nde_is_head) {
wkr.On_nde_head_bgn(this, src, cur_nde_tid, name_bgn, name_end);
switch (nde_end_tid) {
case Nde_end_tid__comment:
wkr.On_comment_end(this, src, cur_nde_tid, nde_bgn, pos);
break;
case Nde_end_tid__ws:
case Nde_end_tid__slash:
case Nde_end_tid__backslash: // handled above
pos = atr_parser.Parse(wkr, nde_uid, cur_nde_tid, src, pos, src_end);
nde_end_tid = atr_parser.Nde_end_tid();
txt_bgn = pos;
break;
}
switch (nde_end_tid) {
case Nde_end_tid__inline:
wkr.On_nde_head_end(this, src, cur_nde_tid, nde_bgn, pos, Bool_.Y);
txt_bgn = pos;
break;
case Nde_end_tid__gt:
wkr.On_nde_head_end(this, src, cur_nde_tid, nde_bgn, pos, Bool_.N);
txt_bgn = pos;
if ( nde_itm != null
&& !nde_itm.Single_only_html() // ignore <b>
&& (cur_nde == null || !cur_nde.Xtn()) // <pre> ignores inner
) {
if (cur_nde != null)
nde_stack.Add(cur_nde);
this.cur_nde = nde_itm;
this.cur_nde_tid = nde_itm.Id();
}
break;
case Nde_end_tid__ws:
case Nde_end_tid__slash:
case Nde_end_tid__backslash: break; // handled above
}
nde_uid = dom_mgr.Add(Mwh_doc_itm.Itm_tid__nde_head, nde_bgn, pos);
}
else {
switch (nde_end_tid) {
case Nde_end_tid__gt:
wkr.On_nde_tail_end(this, src, cur_nde_tid, nde_bgn, pos);
txt_bgn = pos;
if (nde_itm.Id() == cur_nde_tid) {
cur_nde = (Xop_xnde_tag)List_adp_.Pop_or(nde_stack, null);
cur_nde_tid = cur_nde == null ? -1 : cur_nde.Id();
}
break;
}
}
return pos;
}
public static final int Nde_end_tid__invalid = 0, Nde_end_tid__gt = 1, Nde_end_tid__ws = 2, Nde_end_tid__inline = 3, Nde_end_tid__slash = 4, Nde_end_tid__backslash = 5, Nde_end_tid__comment = 6;
private static final byte[] Comment_bgn = Bry_.new_a7("--"), Comment_end = Bry_.new_a7("-->");
}

View File

@ -0,0 +1,73 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
class Mwh_doc_parser_fxt {
private final Bry_bfr expd_bfr = Bry_bfr.new_(), actl_bfr = Bry_bfr.new_();
private final Mwh_doc_parser parser = new Mwh_doc_parser();
private final Mwh_doc_wkr__itm_bldr wkr = new Mwh_doc_wkr__itm_bldr();
public Mwh_doc_itm Make_txt (String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt , -1, Bry_.new_u8(raw));}
public Mwh_doc_itm Make_txt (String raw, int nde_tid) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt , nde_tid, Bry_.new_u8(raw));}
public Mwh_doc_itm Make_comment (String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__comment , -1, Bry_.new_u8(raw));}
public Mwh_doc_itm Make_nde_head(String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_head , -1, Bry_.new_u8(raw));}
public Mwh_doc_itm Make_nde_tail(String raw) {return new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_tail , -1, Bry_.new_u8(raw));}
public void Test_parse(String raw, Mwh_doc_itm... expd) {
Mwh_doc_itm[] actl = Exec_parse(raw);
Test_print(expd, actl);
}
public Mwh_doc_itm[] Exec_parse(String raw) {
byte[] bry = Bry_.new_u8(raw);
parser.Parse(wkr, bry, 0, bry.length);
return wkr.To_atr_ary();
}
public void Test_print(Mwh_doc_itm[] expd_ary, Mwh_doc_itm[] actl_ary) {
int expd_len = expd_ary.length;
int actl_len = actl_ary.length;
int len = expd_len > actl_len ? expd_len : actl_len;
for (int i = 0; i < len; ++i) {
To_bfr(expd_bfr, i < expd_len ? expd_ary[i] : null, actl_bfr, i < actl_len ? actl_ary[i] : null);
}
Tfds.Eq_str_lines(expd_bfr.Xto_str_and_clear(), actl_bfr.Xto_str_and_clear());
}
private void To_bfr(Bry_bfr expd_bfr, Mwh_doc_itm expd_itm, Bry_bfr actl_bfr, Mwh_doc_itm actl_itm) {
To_bfr__main(expd_bfr, expd_itm); To_bfr__main(actl_bfr, actl_itm);
if (expd_itm != null && expd_itm.Nde_tid() != -1) {
To_bfr__nde_tid(expd_bfr, expd_itm); To_bfr__nde_tid(actl_bfr, actl_itm);
}
}
private void To_bfr__main(Bry_bfr bfr, Mwh_doc_itm itm) {
if (itm == null) return;
bfr.Add_str_a7("itm_tid:").Add_int_variable(itm.Itm_tid()).Add_byte_nl();
bfr.Add_str_a7("txt:").Add(itm.Itm_bry()).Add_byte_nl();
}
private void To_bfr__nde_tid(Bry_bfr bfr, Mwh_doc_itm itm) {
if (itm == null) return;
bfr.Add_str_a7("nde_tid:").Add_int_variable(itm.Nde_tid()).Add_byte_nl();
}
}
class Mwh_doc_wkr__itm_bldr implements Mwh_doc_wkr {
private final List_adp list = List_adp_.new_();
public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw();
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {}
public void On_txt_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__txt , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
public void On_nde_head_bgn (Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end) {}
public void On_nde_head_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_head , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
public void On_nde_tail_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__nde_tail , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {list.Add(new Mwh_doc_itm(Mwh_doc_itm.Itm_tid__comment , nde_tid, Bry_.Mid(src, itm_bgn, itm_end)));}
public Mwh_doc_itm[] To_atr_ary() {return (Mwh_doc_itm[])list.To_ary_and_clear(Mwh_doc_itm.class);}
}

View File

@ -0,0 +1,60 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*; import gplx.xowa.parsers.xndes.*;
public class Mwh_doc_parser_tst {
private final Mwh_doc_parser_fxt fxt = new Mwh_doc_parser_fxt();
@Test public void Text__basic() {fxt.Test_parse("abc" , fxt.Make_txt("abc"));}
@Test public void Comment() {fxt.Test_parse("a<!--b-->c" , fxt.Make_txt("a"), fxt.Make_comment("<!--b-->"), fxt.Make_txt("c"));}
@Test public void Fail__inline_eos() {fxt.Test_parse("a<b/" , fxt.Make_txt("a<b/"));}
@Test public void Fail__unknown() {fxt.Test_parse("a<bc/>d" , fxt.Make_txt("a<bc/>d"));}
@Test public void Node__inline() {fxt.Test_parse("a<b/>c" , fxt.Make_txt("a"), fxt.Make_nde_head("<b/>") , fxt.Make_txt("c"));}
@Test public void Node__pair() {fxt.Test_parse("a<b>c</b>d" , fxt.Make_txt("a"), fxt.Make_nde_head("<b>") , fxt.Make_txt("c"), fxt.Make_nde_tail("</b>"), fxt.Make_txt("d"));}
@Test public void Atrs__pair() {
fxt.Test_parse("<div id='1'>a</div>"
, fxt.Make_nde_head("<div id='1'>")
, fxt.Make_txt("a")
, fxt.Make_nde_tail("</div>"));
}
@Test public void Atrs__inline() {
fxt.Test_parse("a<div id='1'/>b"
, fxt.Make_txt("a")
, fxt.Make_nde_head("<div id='1'/>")
, fxt.Make_txt("b"));
}
@Test public void Node__single_only() {
fxt.Test_parse("<b>a<br>b</b>c"
, fxt.Make_nde_head("<b>")
, fxt.Make_txt("a", Xop_xnde_tag_.Tid_b)
, fxt.Make_nde_head("<br>")
, fxt.Make_txt("b", Xop_xnde_tag_.Tid_b) // <b> not <br>
, fxt.Make_nde_tail("</b>")
, fxt.Make_txt("c", Xop_xnde_tag_.Tid__null)
);
}
@Test public void Node__pre() {
fxt.Test_parse("<pre>a<div>b</pre>c"
, fxt.Make_nde_head("<pre>")
, fxt.Make_txt("a", Xop_xnde_tag_.Tid_pre)
, fxt.Make_nde_head("<div>")
, fxt.Make_txt("b", Xop_xnde_tag_.Tid_pre) // <pre> not <div>
, fxt.Make_nde_tail("</pre>")
, fxt.Make_txt("c", Xop_xnde_tag_.Tid__null)
);
}
}

View File

@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public interface Mwh_doc_wkr {
Hash_adp_bry Nde_regy();
void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx);
void On_txt_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end);
void On_nde_head_bgn(Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end);
void On_nde_head_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline);
void On_nde_tail_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end);
void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end);
}

View File

@ -0,0 +1,31 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.parsers.xndes.*;
public class Mwh_doc_wkr_ {
public static Hash_adp_bry Nde_regy__mw() {
Xop_xnde_tag[] ary = Xop_xnde_tag_.Ary;
int len = ary.length;
Hash_adp_bry rv = Hash_adp_bry.ci_a7();
for (int i = 0; i < len; ++i) {
Xop_xnde_tag itm = ary[i];
rv.Add(itm.Name_bry(), itm);
}
return rv;
}
}

View File

@ -24,7 +24,7 @@ public class Xop_lnki_tkn extends Xop_tkn_itm_base {
public void Tkn_tid_to_txt() {tkn_tid = Xop_tkn_itm_.Tid_txt;}
public int Ns_id() {return ns_id;} public Xop_lnki_tkn Ns_id_(int v) {ns_id = v; return this;} private int ns_id;
public Xoa_ttl Ttl() {return ttl;} public Xop_lnki_tkn Ttl_(Xoa_ttl v) {ttl = v; return this;} private Xoa_ttl ttl;
public byte Lnki_type() {return lnki_type;} public Xop_lnki_tkn Lnki_type_(byte v) {lnki_type = (byte)Enm_.Add_int(lnki_type, v); return this;} private byte lnki_type = Xop_lnki_type.Id_null;
public byte Lnki_type() {return lnki_type;} public Xop_lnki_tkn Lnki_type_(byte v) {lnki_type = (byte)Bitmask_.Add_int(lnki_type, v); return this;} private byte lnki_type = Xop_lnki_type.Id_null;
public int Tail_bgn() {return tail_bgn;} public Xop_lnki_tkn Tail_bgn_(int v) {tail_bgn = v; return this;} private int tail_bgn = -1;
public int Tail_end() {return tail_end;} public Xop_lnki_tkn Tail_end_(int v) {tail_end = v; return this;} private int tail_end = -1;
public byte Border() {return border;} public Xop_lnki_tkn Border_(byte v) {border = v; return this;} private byte border = Bool_.__byte;

View File

@ -19,44 +19,44 @@ package gplx.xowa.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.
public class Xop_lnki_type {
public static final byte Id_null = 0, Id_none = 1, Id_frameless = 2, Id_frame = 4, Id_thumb = 8;
public static boolean Id_is_thumbable(byte id) {
return ( Enm_.Has_int(id, Id_thumb) // for purposes of displaying images on page, thumb and frame both create a thumb box
|| Enm_.Has_int(id, Id_frame)
return ( Bitmask_.Has_int(id, Id_thumb) // for purposes of displaying images on page, thumb and frame both create a thumb box
|| Bitmask_.Has_int(id, Id_frame)
);
}
public static boolean Id_defaults_to_thumb(byte id) { // assuming original of 400,200
if ( Enm_.Has_int(id, Id_thumb) // [[File:A.png|thumb]] -> 220,-1
|| Enm_.Has_int(id, Id_frameless) // [[File:A.png|frameless]] -> 220,-1
if ( Bitmask_.Has_int(id, Id_thumb) // [[File:A.png|thumb]] -> 220,-1
|| Bitmask_.Has_int(id, Id_frameless) // [[File:A.png|frameless]] -> 220,-1
)
return true;
else if ( Enm_.Has_int(id, Id_frame) // [[File:A.png|frame]] -> 400,200 (frame is always default size)
else if ( Bitmask_.Has_int(id, Id_frame) // [[File:A.png|frame]] -> 400,200 (frame is always default size)
|| id == Id_null // [[File:A.png]] -> 400,200 (default to original size)
|| Enm_.Has_int(id, Id_none) // TODO: deprecate; NOTE: still used by one test; DATE:2015-08-03
|| Bitmask_.Has_int(id, Id_none) // TODO: deprecate; NOTE: still used by one test; DATE:2015-08-03
)
return false;
else // should not happen
throw Err_.new_unhandled(id);
}
public static boolean Id_limits_large_size(byte id) {// Linker.php|makeThumbLink2|Do not present an image bigger than the source, for bitmap-style images; assuming original of 400,200
if ( Enm_.Has_int(id, Id_thumb) // [[File:A.png|600px|thumb]] -> 400,200
|| Enm_.Has_int(id, Id_frameless) // [[File:A.png|600px|frameless]] -> 400,200
|| Enm_.Has_int(id, Id_frame) // [[File:A.png|600px|frame]] -> 400,200 (frame is always default size)
if ( Bitmask_.Has_int(id, Id_thumb) // [[File:A.png|600px|thumb]] -> 400,200
|| Bitmask_.Has_int(id, Id_frameless) // [[File:A.png|600px|frameless]] -> 400,200
|| Bitmask_.Has_int(id, Id_frame) // [[File:A.png|600px|frame]] -> 400,200 (frame is always default size)
)
return true;
else if ( id == Id_null // [[File:A.png|600px]] -> 600,400; uses orig file of 400,200, but <img> tag src_width / src_height set to 600,400
|| Enm_.Has_int(id, Id_none) // TODO: deprecate; NOTE: leaving in b/c of above failed-deprecate; DATE:2015-08-03
|| Bitmask_.Has_int(id, Id_none) // TODO: deprecate; NOTE: leaving in b/c of above failed-deprecate; DATE:2015-08-03
)
return false;
else // should not happen;
throw Err_.new_unhandled(id);
}
public static boolean Id_supports_upright(byte id) {// REF:Linker.php|makeImageLink;if ( isset( $fp['thumbnail'] ) || isset( $fp['manualthumb'] ) || isset( $fp['framed'] ) || isset( $fp['frameless'] ) || !$hp['width'] ) DATE:2014-05-22
if ( Enm_.Has_int(id, Id_thumb)
|| Enm_.Has_int(id, Id_frameless)
|| Enm_.Has_int(id, Id_frame)
if ( Bitmask_.Has_int(id, Id_thumb)
|| Bitmask_.Has_int(id, Id_frameless)
|| Bitmask_.Has_int(id, Id_frame)
)
return true;
else if ( id == Id_null
|| Enm_.Has_int(id, Id_none)
|| Bitmask_.Has_int(id, Id_none)
)
return false;
else // should not happen;

View File

@ -0,0 +1,144 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.core.primitives.*;
import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
import gplx.xowa.parsers.htmls.*;
public class Vnt_converter_lang {
private final Bry_bfr bfr = Bry_bfr.new_();
private int max_depth = 32;
private byte[] src; private int src_len;
private boolean convert_needed;
private int pos;
private final Vnt_converter_rule converter_rule = new Vnt_converter_rule();
private Xol_convert_mgr convert_mgr; private Xol_vnt_regy vnt_regy; // private Xol_vnt_mgr vnt_mgr; // private Xol_vnt_itm vnt_itm;
private final Mwh_doc_parser doc_parser = new Mwh_doc_parser();
private final Vnt_html_doc_wkr html_convert_wkr;
private final Bry_bfr tmp_convert_bfr = Bry_bfr.new_();
public Vnt_converter_lang(Xol_convert_mgr convert_mgr, Xol_vnt_regy vnt_regy) {
this.html_convert_wkr = new Vnt_html_doc_wkr(convert_mgr);
this.convert_mgr = convert_mgr; this.vnt_regy = vnt_regy;
}
public byte[] Converted_title() {return converted_title;} private byte[] converted_title;
public byte[] Parse(Xol_vnt_itm vnt_itm, byte[] src) {// REF.MW:/languages/LanguageConverter.php!recursiveConvertTopLevel
this.converted_title = null;
converter_rule.Init(this, vnt_regy, vnt_itm);
this.converted_title = null;
int markup_count = 0;
this.pos = 0;
this.convert_needed = true; // false for sr lang; SEE:LanguageSr.php !$this->guessVariant(src, vnt);
this.src = src; this.src_len = src.length;
while (pos < src_len) {
int curly_bgn = Bry_find_.Find_fwd(src, Bry__curly_bgn, pos, src_len);
if (curly_bgn == Bry_find_.Not_found) { // No more markup, append final segment
if (markup_count == 0) return src; // no markups found; just return original
Add_output(vnt_itm, convert_needed, src, pos, src_len);
return bfr.Xto_bry_and_clear();
}
Add_output(vnt_itm, convert_needed, src, pos, curly_bgn); // Markup found; append segment
pos = curly_bgn; // Advance position
++markup_count;
Parse_recursive(vnt_itm, 1); // Do recursive conversion
}
return bfr.Xto_bry_and_clear();
}
private void Parse_recursive(Xol_vnt_itm vnt_itm, int depth) {
pos += 2; // skip "-{"
boolean warning_done = false;
int bgn_pos = pos;
while (pos < src_len) {
byte b = src[pos];
Object o = trie.Match_bgn_w_byte(b, src,pos, src_len);
if (o == null) { // char;
++pos;
continue;
}
switch (((Byte_obj_val)o).Val()) {
case Tid__curly_bgn:
if (depth >= max_depth) {
bfr.Add(Bry__curly_bgn);
if (!warning_done) {
bfr.Add_str("<span class=\"error\">");
// wfMessage('language-converter-depth-warning')->numParams($this->mMaxDepth)->inContentLanguage()->text()
bfr.Add_str("</span>");
warning_done = true;
}
pos += 2; // skip "-{"
continue;
}
bgn_pos = pos;
Parse_recursive(vnt_itm, depth + 1); // Recursively parse another rule
break;
case Tid__curly_end:
converter_rule.Parse(src, bgn_pos, pos);
Apply_manual_conv(converter_rule);
bfr.Add(converter_rule.Display());
pos += 2;
return;
default: throw Err_.new_unhandled(-1); // never happens
}
}
if (pos < src_len) { // Unclosed rule
bfr.Add(Bry__curly_bgn);
Auto_convert(bfr, vnt_itm, src, pos, src_len);
}
pos = src_len;
}
private void Add_output(Xol_vnt_itm vnt_itm, boolean convert_needed, byte[] src, int bgn, int end) {
if (end - bgn == 0) return;
if (convert_needed) {
Auto_convert(bfr, vnt_itm, src, bgn, end);
}
else
bfr.Add_mid(src, bgn, end);
}
public byte[] Auto_convert(Xol_vnt_itm vnt_itm, byte[] src) {
Auto_convert(tmp_convert_bfr, vnt_itm, src, 0, src.length);
return tmp_convert_bfr.Xto_bry_and_clear();
}
private void Auto_convert(Bry_bfr bfr, Xol_vnt_itm vnt_itm, byte[] src, int bgn, int end) {
html_convert_wkr.Init(bfr, vnt_itm.Idx());
doc_parser.Parse(html_convert_wkr, src, bgn, end);
}
private void Apply_manual_conv(Vnt_converter_rule rule) {
this.converted_title = rule.Title();
byte action = rule.Action();
Vnt_rule_undi_mgr cnv_tbl = rule.Cnv_tbl();
int len = cnv_tbl.Len();
for (int i = 0; i < len; ++i) {
Vnt_rule_undi_grp grp = cnv_tbl.Get_at(i);
byte[] grp_key = grp.Vnt();
Xol_vnt_itm vnt_itm = vnt_regy.Get_by(grp_key); if (vnt_itm == null) continue;
int grp_len = grp.Len();
Xol_convert_wkr wkr = convert_mgr.Converter_ary()[vnt_itm.Idx()];
for (int j = 0; j < grp_len; ++j) {
Vnt_rule_undi_itm itm = grp.Get_at(j);
if (action == Byte_ascii.Plus) {
wkr.Add(itm.Src(), itm.Trg());
}
else if (action == Byte_ascii.Dash)
wkr.Del(itm.Src());
}
}
}
private static final byte Tid__curly_bgn = 1, Tid__curly_end = 2;
private static final byte[] Bry__curly_bgn = Bry_.new_a7("-{"), Bry__curly_end = Bry_.new_a7("}-");
private static final Btrie_fast_mgr trie = Btrie_fast_mgr.cs()
.Add_bry_byte(Bry__curly_bgn, Tid__curly_bgn)
.Add_bry_byte(Bry__curly_end, Tid__curly_end);
}

View File

@ -0,0 +1,53 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
public class Vnt_converter_lang__html__tst { // REF: https://www.mediawiki.org/wiki/Writing_systems/Syntax
private final Vnt_converter_lang_fxt fxt = new Vnt_converter_lang_fxt();
private String rule;
@Before public void init() {
rule = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}-";
}
@Test public void Node() {
fxt.Test_parse(rule + "hk<span>hk</span>hk", "cn<span>cn</span>cn");
}
@Test public void Attribs() {
fxt.Test_parse(rule + "<span class='hk'>hk</span>", "<span class='hk'>cn</span>");
}
@Test public void Attribs__title() {
fxt.Test_parse(rule + "<span title='hk'>hk</span>", "<span title='cn'>cn</span>");
}
@Test public void Attribs__alt() {
fxt.Test_parse(rule + "<span alt='hk'>hk</span>", "<span alt='cn'>cn</span>");
}
@Test public void Attribs__skip_url() {
fxt.Test_parse(rule + "<span alt='http://hk.org'>hk</span>", "<span alt='http://hk.org'>cn</span>");
}
@Test public void Node__style() {
fxt.Test_parse(rule + "hk<script>hk</script>hk", "cn<script>hk</script>cn");
}
@Test public void Node__code() {
fxt.Test_parse(rule + "hk<code>hk</code>hk", "cn<code>hk</code>cn");
}
@Test public void Node__pre() {
fxt.Test_parse(rule + "hk<pre>hk</pre>hk", "cn<pre>hk</pre>cn");
}
@Test public void Node__pre__nested() {
fxt.Test_parse(rule + "hk<pre><span>hk</span></pre>hk", "cn<pre><span>hk</span></pre>cn");
}
}

View File

@ -0,0 +1,117 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
public class Vnt_converter_lang__syntax__tst { // REF: https://www.mediawiki.org/wiki/Writing_systems/Syntax
private final Vnt_converter_lang_fxt fxt = new Vnt_converter_lang_fxt();
@Test public void Bidi() {
String text = "-{zh-hans:a;zh-hant:b}-";
fxt.Test_parse_many(text, "a", "zh-hans", "zh-cn", "zh-sg", "zh");
fxt.Test_parse_many(text, "b", "zh-hant", "zh-hk", "zh-tw");
}
@Test public void Undi() {
String text = "-{H|cn_k=>zh-cn:cn_v}-cn_k";
fxt.Test_parse_many(text, "cn_k", "zh", "zh-hans", "zh-hant", "zh-hk", "zh-my", "zh-mo", "zh-sg", "zh-tw");
fxt.Test_parse_many(text, "cn_v", "zh-cn");
}
@Test public void Raw() {
fxt.Test_parse_many("-{a}-", "a", "zh-hans", "zh-cn", "zh-sg", "zh", "zh-hant", "zh-hk", "zh-tw");
fxt.Test_parse_many("-{R|a}-", "a", "zh-hans", "zh-cn", "zh-sg", "zh", "zh-hant", "zh-hk", "zh-tw");
}
@Test public void Hide() {
String text = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}-cn hk tw";
fxt.Test_parse_many(text, "cn cn cn", "zh-cn", "zh-sg");
fxt.Test_parse_many(text, "hk hk hk", "zh-hk");
fxt.Test_parse_many(text, "tw tw tw", "zh-tw");
fxt.Test_parse_many(text, "cn hk tw", "zh", "zh-hans", "zh-hant");
}
@Test public void Aout() {
String text = "-{A|zh-cn:cn;zh-hk:hk;zh-tw:tw}- cn hk tw";
fxt.Test_parse_many(text, "cn cn cn cn", "zh-cn", "zh-sg");
fxt.Test_parse_many(text, "hk hk hk hk", "zh-hk");
fxt.Test_parse_many(text, "tw tw tw tw", "zh-tw");
fxt.Test_parse_many(text, "cn cn hk tw", "zh", "zh-hans");
fxt.Test_parse_many(text, "tw cn hk tw", "zh-hant");
fxt.Test_parse_many("h-{}-k", "hk", "zh-cn"); // semi-disabled
}
@Test public void Del() {
String text = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}-cn hk tw-{-|zh-cn:cn;zh-hk:hk;zh-tw:tw}- cn hk tw";
fxt.Test_parse_many(text, "cn cn cn cn hk tw", "zh-cn", "zh-sg");
fxt.Test_parse_many(text, "hk hk hk cn hk tw", "zh-hk");
fxt.Test_parse_many(text, "tw tw tw cn hk tw", "zh-tw");
fxt.Test_parse_many(text, "cn hk tw cn hk tw", "zh", "zh-hans", "zh-hant");
}
@Test public void Title() {
fxt.Test_parse_title("-{}-", null, "", "zh-cn");
String text = "-{T|zh-cn:cn;zh-hk:hk;zh-tw:tw}-cn hk tw";
fxt.Test_parse_title(text, "cn", "cn hk tw", "zh-cn");
fxt.Test_parse_title(text, "cn", "cn hk tw", "zh-sg");
fxt.Test_parse_title(text, "hk", "cn hk tw", "zh-hk");
fxt.Test_parse_title(text, "tw", "cn hk tw", "zh-tw");
fxt.Test_parse_title(text, "cn", "cn hk tw", "zh-hans");
fxt.Test_parse_title(text, "tw", "cn hk tw", "zh-hant");
fxt.Test_parse_title(text, null, "cn hk tw", "zh");
}
@Test public void Descrip() {
String text = "-{D|zh-cn:cn;zh-hk:hk;zh-tw:tw}-";
fxt.Test_parse_many(text, "ZH-CN:cn;ZH-HK:hk;ZH-TW:tw;", "zh", "zh-hans", "zh-hant", "zh-cn", "zh-hk", "zh-my", "zh-mo", "zh-sg", "zh-tw");
}
@Test public void Mixture() {
String text = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}--{zh;zh-hans;zh-hant|cn hk tw}- -{zh;zh-cn;zh-hk;zh-tw|cn hk tw}-";
fxt.Test_parse_many(text, "cn hk tw cn cn cn", "zh-cn", "zh-sg", "zh-hans");
fxt.Test_parse_many(text, "cn hk tw hk hk hk", "zh-hk");
fxt.Test_parse_many(text, "cn hk tw tw tw tw", "zh-tw", "zh-hant");
fxt.Test_parse_many(text, "cn hk tw cn hk tw", "zh");
}
@Test public void Descrip__undi() {fxt.Test_parse("-{D|cn_k=>zh-cn:cn_v;hk_k=>zh-hk:hk_v}-", "cn_k⇒ZH-CN:cn_v;hk_k⇒ZH-HK:hk_v;");}
@Test public void Descrip__mixd() {fxt.Test_parse("-{D|zh-tw:tw_v;cn_k=>zh-cn:cn_v;hk_k=>zh-hk:hk_v;zh-mo:mo_v}-", "ZH-TW:tw_v;ZH-MO:mo_v;cn_k⇒ZH-CN:cn_v;hk_k⇒ZH-HK:hk_v;");}
}
class Vnt_converter_lang_fxt {
private final Vnt_converter_lang converter_lang;
private final Xol_convert_mgr convert_mgr = new Xol_convert_mgr();
private final Xol_vnt_regy vnt_regy = Xol_vnt_regy_fxt.new_chinese();
private Xol_vnt_itm vnt_itm;
public Vnt_converter_lang_fxt() {
converter_lang = new Vnt_converter_lang(convert_mgr, vnt_regy);
convert_mgr.Init(vnt_regy);
Init_cur("zh-cn");
}
public void Init_cur(String vnt) {
byte[] cur_vnt = Bry_.new_a7(vnt);
this.vnt_itm = vnt_regy.Get_by(cur_vnt);
convert_mgr.Cur_vnt_(cur_vnt);
}
public void Test_parse(String raw, String expd) {
Tfds.Eq_str(expd, String_.new_u8(converter_lang.Parse(vnt_itm, Bry_.new_u8(raw))));
}
public void Test_parse_many(String raw, String expd, String... vnts) {
int len = vnts.length;
for (int i = 0; i < len; ++i) {
String vnt_key = vnts[i];
Init_cur(vnt_key);
Xol_vnt_itm vnt = vnt_regy.Get_by(Bry_.new_a7(vnt_key));
Tfds.Eq_str(expd, String_.new_u8(converter_lang.Parse(vnt, Bry_.new_u8(raw))), vnt_key);
}
}
public void Test_parse_title(String raw, String expd_title, String expd_text, String vnt_key) {
Init_cur(vnt_key);
Xol_vnt_itm vnt = vnt_regy.Get_by(Bry_.new_a7(vnt_key));
Tfds.Eq_str(expd_text, String_.new_u8(converter_lang.Parse(vnt, Bry_.new_u8(raw))), vnt_key);
Tfds.Eq_str(expd_title, converter_lang.Converted_title());
}
}

View File

@ -17,42 +17,170 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.core.primitives.*;
import gplx.xowa.langs.vnts.*;
class Vnt_converter_rule { // REF.MW: /languages/LanguageConverter.php|ConverterRule
private final byte[] src;
private final int src_bgn, src_end;
private int pipe_pos = -1;
public Vnt_converter_rule(byte[] src, int src_bgn, int src_end) {
this.src = src; this.src_bgn = src_bgn; this.src_end = src_end;
}
public void Parse() {
}
public void Parse_flags(Vnt_flag_parser parser) {
this.pipe_pos = Bry_find_.Find_fwd(src, Byte_ascii.Pipe, src_bgn, src_end);
if (pipe_pos != Bry_find_.Not_found) // "|" found; EX: -{A|}-
parser.Parse(src, src_bgn, pipe_pos);
int flag_count = parser.Count();
if (flag_count == 0) parser.Set_y(Vnt_flag_itm_.Tid_show);
else if (parser.Limit_if_exists(Vnt_flag_itm_.Tid_raw)) {}
else if (parser.Limit_if_exists(Vnt_flag_itm_.Tid_name)) {}
else if (parser.Limit_if_exists(Vnt_flag_itm_.Tid_del)) {}
else if (flag_count == 1 && parser.Get(Vnt_flag_itm_.Tid_title)) parser.Set_y(Vnt_flag_itm_.Tid_macro);
else if (parser.Get(Vnt_flag_itm_.Tid_macro)) {
boolean exists_d = parser.Get(Vnt_flag_itm_.Tid_descrip);
boolean exists_t = parser.Get(Vnt_flag_itm_.Tid_title);
parser.Clear();
parser.Set_y_many(Vnt_flag_itm_.Tid_all, Vnt_flag_itm_.Tid_macro);
if (exists_d) parser.Set_y(Vnt_flag_itm_.Tid_descrip);
if (exists_t) parser.Set_y(Vnt_flag_itm_.Tid_title);
private final Vnt_flag_parser flag_parser = new Vnt_flag_parser(); private final Vnt_flag_code_mgr flag_codes = new Vnt_flag_code_mgr(); private final Vnt_flag_lang_mgr flag_langs = new Vnt_flag_lang_mgr();
private final Vnt_rule_parser rule_parser = new Vnt_rule_parser(); private final Vnt_rule_undi_mgr rule_undis = new Vnt_rule_undi_mgr(); private final Vnt_rule_bidi_mgr rule_bidis = new Vnt_rule_bidi_mgr();
private final Bry_bfr tmp_bfr = Bry_bfr.new_();
private final Ordered_hash cnv_marked_hash = Ordered_hash_.new_bry_();
private Vnt_converter_lang converter;
private Xol_vnt_regy vnt_regy; private Xol_vnt_itm vnt_itm; private byte[] vnt_key;
private byte[] rule_raw;
public byte[] Display() {return display;} private byte[] display;
public byte[] Title() {return title;} private byte[] title;
public byte Action() {return action;} private byte action;
public Vnt_rule_undi_mgr Cnv_tbl() {return cnv_tbl;} private final Vnt_rule_undi_mgr cnv_tbl = new Vnt_rule_undi_mgr();
public void Init(Vnt_converter_lang converter, Xol_vnt_regy vnt_regy, Xol_vnt_itm vnt_itm) {
this.converter = converter;
this.vnt_regy = vnt_regy; this.vnt_itm = vnt_itm; this.vnt_key = vnt_itm.Key();
rule_parser.Init(vnt_regy);
}
public void Parse(byte[] src, int src_bgn, int src_end) {
this.display = this.title = null;
this.action = Byte_ascii.Null;
int pipe_pos = Bry_find_.Find_fwd(src, Byte_ascii.Pipe, src_bgn, src_end);
flag_parser.Parse(flag_codes, flag_langs, vnt_regy, src, src_bgn, pipe_pos);
int rule_bgn = pipe_pos == -1 ? src_bgn : pipe_pos + 1;
this.rule_raw = Bry_.Mid(src, rule_bgn, src_end);
int flag_langs_count = flag_langs.Count();
if (flag_langs_count > 0) { // vnts exist in flag; EX: -{zh-hans;zh-hant|text}-
if (flag_langs.Has(vnt_key))
rule_raw = converter.Auto_convert(vnt_itm, rule_raw); // convert rule text to current language; EX:-{|convert}-
else {
if (parser.Get(Vnt_flag_itm_.Tid_add))
parser.Set_y_many(Vnt_flag_itm_.Tid_all, Vnt_flag_itm_.Tid_show);
if (parser.Get(Vnt_flag_itm_.Tid_descrip))
parser.Set_n(Vnt_flag_itm_.Tid_show);
parser.Limit_if_exists_vnts(); // try to find flags like "zh-hans", "zh-hant"; allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
byte[][] fallbacks = vnt_itm.Fallback_ary();
int fallbacks_len = fallbacks.length;
for (int i = 0; i < fallbacks_len; ++i) {
byte[] fallback = fallbacks[i];
if (flag_langs.Has(fallback)) {
Xol_vnt_itm fallback_itm = (Xol_vnt_itm)vnt_regy.Get_by(fallback);
rule_raw = converter.Auto_convert(fallback_itm, rule_raw);
break;
}
}
public void Parse_rules(Vnt_rule_parser parser) {
parser.Parse(src, src_bgn, src_end);
}
flag_codes.Limit(Vnt_flag_code_.Tid_raw);
}
rule_parser.Clear(rule_undis, rule_bidis, rule_raw);
if (!flag_codes.Get(Vnt_flag_code_.Tid_raw) && !flag_codes.Get(Vnt_flag_code_.Tid_name)) {
rule_parser.Parse(src, rule_bgn, src_end);
}
if (rule_undis.Has_none() && rule_bidis.Has_none()) {
if ( flag_codes.Get(Vnt_flag_code_.Tid_add)
|| flag_codes.Get(Vnt_flag_code_.Tid_del)
) { // fill all variants if text in -{A/H/-|text} without rules
for (int i = 0; i < flag_langs_count; ++i) {
Xol_vnt_itm itm = flag_langs.Get_at(i);
rule_bidis.Set(itm.Key(), rule_raw);
}
}
else if ( !flag_codes.Get(Vnt_flag_code_.Tid_name)
&& !flag_codes.Get(Vnt_flag_code_.Tid_title)
) {
flag_codes.Limit(Vnt_flag_code_.Tid_raw);
}
}
int flag_count = Vnt_flag_code_.Tid__max;
for (int flag = 0; flag < flag_count; ++flag) {
if (!flag_codes.Get(flag)) continue;
switch (flag) {
case Vnt_flag_code_.Tid_raw: display = rule_parser.Raw(); break; // if we don't do content convert, still strip the -{}- tags
case Vnt_flag_code_.Tid_name: // process N flag: output current variant name
byte[] vnt_key_trim = Bry_.Trim(rule_parser.Raw());
Xol_vnt_itm vnt_itm_trim = vnt_regy.Get_by(vnt_key_trim);
display = vnt_itm_trim == null ? display = Bry_.Empty : vnt_itm_trim.Name();
break;
case Vnt_flag_code_.Tid_descrip: display = Make_descrip(); break; // process D flag: output rules description
case Vnt_flag_code_.Tid_hide: display = Bry_.Empty; break; // process H,- flag or T only: output nothing
case Vnt_flag_code_.Tid_del: display = Bry_.Empty; action = Byte_ascii.Dash; break;
case Vnt_flag_code_.Tid_add: display = Bry_.Empty; action = Byte_ascii.Plus; break;
case Vnt_flag_code_.Tid_show: display = Make_converted(vnt_itm); break;
case Vnt_flag_code_.Tid_title: display = Bry_.Empty; title = Make_title(vnt_itm); break;
default: break; // ignore unknown flags (but see error case below)
}
}
if (display == null)
display = Bry_.Add(Bry__error_bgn, Bry__error_end); // wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
Make_conv_tbl();
}
private void Make_conv_tbl() {
if (rule_undis.Has_none() && rule_bidis.Has_none()) return; // Special case optimisation
cnv_tbl.Clear(); cnv_marked_hash.Clear();
int vnt_regy_len = vnt_regy.Len();
for (int i = 0; i < vnt_regy_len; ++i) {
Xol_vnt_itm vnt = vnt_regy.Get_at(i);
byte[] vnt_key = vnt.Key();
// bidi: fill in missing variants with fallbacks
byte[] bidi_bry = rule_bidis.Get_text_by_key_or_null(vnt_key);
if (bidi_bry == null) {
bidi_bry = rule_bidis.Get_text_by_ary_or_null(vnt.Fallback_ary());
if (bidi_bry != null) rule_bidis.Set(vnt_key, bidi_bry);
}
if (bidi_bry != null) {
int marked_len = cnv_marked_hash.Count();
for (int j = 0; j < marked_len; ++j) {
Xol_vnt_itm marked_itm = (Xol_vnt_itm)cnv_marked_hash.Get_at(j);
byte[] marked_key = marked_itm.Key();
byte[] marked_bry = rule_bidis.Get_text_by_key_or_null(marked_key);
byte[] cur_bidi_bry = rule_bidis.Get_text_by_key_or_null(vnt_key);
if (vnt.Dir() == Xol_vnt_dir_.Tid__bi)
cnv_tbl.Set(vnt_key, marked_bry, cur_bidi_bry);
if (marked_itm.Dir() == Xol_vnt_dir_.Tid__bi)
cnv_tbl.Set(marked_key, cur_bidi_bry, marked_bry);
}
cnv_marked_hash.Add(vnt_key, vnt);
}
// undi: fill to convert tables
byte[] undi_bry = rule_undis.Get_text_by_key_or_null(vnt_key);
if (vnt.Dir() != Xol_vnt_dir_.Tid__none && undi_bry != null) {
Vnt_rule_undi_grp undi_grp = rule_undis.Get_by(vnt_key);
int undi_grp_len = undi_grp.Len();
for (int j = 0; j < undi_grp_len; ++j) {
Vnt_rule_undi_itm undi_itm = undi_grp.Get_at(j);
cnv_tbl.Set(vnt_key, undi_itm.Src(), undi_itm.Trg());
}
}
}
}
private byte[] Make_descrip() {
int len = rule_bidis.Len();
for (int i = 0; i < len; ++i) {
Vnt_rule_bidi_itm bidi_itm = rule_bidis.Get_at(i);
Xol_vnt_itm vnt_itm = vnt_regy.Get_by(bidi_itm.Vnt());
tmp_bfr.Add(vnt_itm.Name()).Add_byte_colon().Add(bidi_itm.Text()).Add_byte_semic();
}
len = rule_undis.Len();
for (int i = 0; i < len; ++i) {
Vnt_rule_undi_grp undi_grp = rule_undis.Get_at(i);
int sub_len = undi_grp.Len();
for (int j = 0; j < sub_len; ++j) {
Vnt_rule_undi_itm undi_itm = (Vnt_rule_undi_itm)undi_grp.Get_at(j);
Xol_vnt_itm undi_vnt = vnt_regy.Get_by(undi_grp.Vnt());
tmp_bfr.Add(undi_itm.Src()).Add(Bry__undi_spr).Add(undi_vnt.Name()).Add_byte_colon().Add(undi_itm.Trg()).Add_byte_semic();
}
}
return tmp_bfr.Xto_bry_and_clear();
}
private byte[] Make_title(Xol_vnt_itm vnt) {
if (vnt.Idx() == 0) { // for mainLanguageCode; EX: "zh"
byte[] rv = rule_bidis.Get_text_by_key_or_null(vnt.Key());
return rv == null ? rule_undis.Get_text_by_key_or_null(vnt.Key()) : rv;
}
else
return Make_converted(vnt);
}
private byte[] Make_converted(Xol_vnt_itm vnt) {
if (rule_bidis.Len() == 0 && rule_undis.Len() == 0) return rule_raw;
byte[] rv = rule_bidis.Get_text_by_key_or_null(vnt.Key()); // display current variant in bidirectional array
if (rv == null) rv = rule_bidis.Get_text_by_ary_or_null(vnt.Fallback_ary()); // or display current variant in fallbacks
if (rv == null) rv = rule_undis.Get_text_by_key_or_null(vnt.Key()); // or display current variant in unidirectional array
if (rv == null && vnt.Dir() == Xol_vnt_dir_.Tid__none) { // or display first text under disable manual convert
rv = (rule_bidis.Len() > 0) ? rule_bidis.Get_text_at(0) : rule_undis.Get_text_at(0);
}
return rv;
}
private final static byte[]
Bry__error_bgn = Bry_.new_a7("<span class=\"error\">")
, Bry__error_end = Bry_.new_a7("</span>")
, Bry__undi_spr = Bry_.new_u8("")
;
}

View File

@ -16,37 +16,35 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
class Vnt_flag_itm_ {
class Vnt_flag_code_ {
public static final int
Tid_unknown = 0
, Tid_show = 1 // S: EX: -{S|zh-hans:A;zh-hant:B}- -> "A"
, Tid_all = 2 // +: EX: -{+|zh-hans:A;zh-hant:B}- -> "A"
, Tid_err = 3 // E: EX: -{E|zh-hans:A;zh-hant:B}- -> "A"
, Tid_add = 4 // A: add and output; EX: -{A|zh-hans:A;zh-hant:B}- -> "A"
, Tid_title = 5 // T: page_title; EX: -{T|zh-hans:A;zh-hant:B}- -> ""
, Tid_raw = 6 // R: raw: no convert; EX: -{R|zh-hans:A;zh-hant:B}- -> "zh-hans:A;zh-hant:B"
, Tid_descrip = 7 // D: describe; EX: -{D|zh-hans:A;zh-hant:B}- -> "简体A繁體B" (简体=Simplified;繁體=Traditional)
, Tid_del = 8 // -: remove; EX: -{-|zh-hans:A;zh-hant:B}- -> ""
, Tid_macro = 9 // H: macro; EX: -{H|zh-hans:A;zh-hant:B}- -> ""
, Tid_name = 10 // N: EX: -{N|zh-hans:A;zh-hant:B}- -> ""
, Tid_lang = 11 // vnt: EX: -{zh-hant|B}- -> "B"
, Tid__max = 12
Tid_add = 0 // +: EX: -{+|zh-hans:A;zh-hant:B}- -> "A"
, Tid_del = 1 // -: remove; EX: -{-|zh-hans:A;zh-hant:B}- -> ""
, Tid_aout = 2 // A: Add and output; EX: -{A|zh-hans:A;zh-hant:B}- -> "A"
, Tid_hide = 3 // H: Hide macro; EX: -{H|zh-hans:A;zh-hant:B}- -> ""
, Tid_raw = 4 // R: Raw: no convert; EX: -{R|zh-hans:A;zh-hant:B}- -> "zh-hans:A;zh-hant:B"
, Tid_show = 5 // S: Show EX: -{S|zh-hans:A;zh-hant:B}- -> "A"
, Tid_descrip = 6 // D: Describe; EX: -{D|zh-hans:A;zh-hant:B}- -> "简体A繁體B" (简体=Simplified;繁體=Traditional)
, Tid_name = 7 // N: variant Name EX: -{N|zh-hans:A;zh-hant:B}- -> ""
, Tid_title = 8 // T: page Title; EX: -{T|zh-hans:A;zh-hant:B}- -> ""
, Tid_err = 9 // E: Error EX: -{E|zh-hans:A;zh-hant:B}- -> "A"
, Tid__max = 10
;
private static final String[] Tid__names = new String[]
{ "unknown", "show", "all", "err", "add", "title"
, "raw", "descrip", "del", "macro", "name", "lang"
{ "+", "-", "A", "H", "R"
, "S", "D", "N", "T", "E"
};
public static String To_name(int tid) {return Tid__names[tid];}
public static String To_str(int tid) {return Tid__names[tid];}
public static final Hash_adp_bry Regy = Hash_adp_bry.ci_a7() // NOTE: match either lc or uc; EX: -{D}- or -{d}-;
.Add_byte_int(Byte_ascii.Ltr_S , Tid_show)
.Add_byte_int(Byte_ascii.Plus , Tid_all)
.Add_byte_int(Byte_ascii.Ltr_E , Tid_err)
.Add_byte_int(Byte_ascii.Ltr_A , Tid_add)
.Add_byte_int(Byte_ascii.Ltr_T , Tid_title)
.Add_byte_int(Byte_ascii.Ltr_R , Tid_raw)
.Add_byte_int(Byte_ascii.Ltr_D , Tid_descrip)
.Add_byte_int(Byte_ascii.Plus , Tid_add)
.Add_byte_int(Byte_ascii.Dash , Tid_del)
.Add_byte_int(Byte_ascii.Ltr_H , Tid_macro)
.Add_byte_int(Byte_ascii.Ltr_A , Tid_aout)
.Add_byte_int(Byte_ascii.Ltr_H , Tid_hide)
.Add_byte_int(Byte_ascii.Ltr_R , Tid_raw)
.Add_byte_int(Byte_ascii.Ltr_S , Tid_show)
.Add_byte_int(Byte_ascii.Ltr_D , Tid_descrip)
.Add_byte_int(Byte_ascii.Ltr_N , Tid_name)
.Add_byte_int(Byte_ascii.Ltr_T , Tid_title)
.Add_byte_int(Byte_ascii.Ltr_E , Tid_err)
;
}

View File

@ -0,0 +1,56 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
class Vnt_flag_code_mgr {
private final boolean[] ary = new boolean[Ary_len]; private final static int Ary_len = Vnt_flag_code_.Tid__max;
public int Count() {return count;} private int count = 0;
public boolean Get(int tid) {return ary[tid];}
public void Clear() {
count = 0;
for (int i = 0; i < Ary_len; ++i)
ary[i] = false;
}
public void Add(int tid) {
this.Set_y(tid);
++count;
}
public void Set_y(int tid) {ary[tid] = Bool_.Y;}
public void Set_y_many(int... vals) {
int len = vals.length;
for (int i = 0; i < len; ++i)
ary[vals[i]] = Bool_.Y;
}
public void Set_n(int tid) {ary[tid] = Bool_.N;}
public void Limit(int tid) {
for (int i = 0; i < Ary_len; ++i)
ary[i] = i == tid;
}
public boolean Limit_if_exists(int tid) {
boolean exists = ary[tid]; if (!exists) return false;
this.Limit(tid);
return true;
}
public void To_bfr__dbg(Bry_bfr bfr) {
for (int i = 0; i < Ary_len; ++i) {
if (ary[i]) {
if (bfr.Len_gt_0()) bfr.Add_byte_semic();
bfr.Add_str_a7(Vnt_flag_code_.To_str(i));
}
}
}
}

View File

@ -0,0 +1,35 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.langs.vnts.*;
class Vnt_flag_lang_mgr {
private final Ordered_hash regy = Ordered_hash_.new_bry_();
public int Count() {return regy.Count();}
public boolean Has(byte[] vnt) {return regy.Has(vnt);}
public void Clear() {regy.Clear();}
public void Add(Xol_vnt_itm itm) {regy.Add(itm.Key(), itm);}
public Xol_vnt_itm Get_at(int i) {return (Xol_vnt_itm)regy.Get_at(i);}
public void To_bfr__dbg(Bry_bfr bfr) {
int len = regy.Count();
for (int i = 0; i < len; ++i) {
Xol_vnt_itm itm = (Xol_vnt_itm)regy.Get_at(i);
if (bfr.Len_gt_0()) bfr.Add_byte_semic();
bfr.Add(itm.Key());
}
}
}

View File

@ -16,52 +16,48 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.langs.vnts.*;
class Vnt_flag_parser implements gplx.core.brys.Bry_split_wkr {
private final Hash_adp_bry flag_regy = Vnt_flag_itm_.Regy;
private final Hash_adp_bry vnt_regy = Hash_adp_bry.cs();
private final boolean[] flag_ary = new boolean[Vnt_flag_itm_.Tid__max];
private int count = 0;
public int Count() {return count;}
public boolean Get(int tid) {return flag_ary[tid];}
public void Set_y(int tid) {flag_ary[tid] = Bool_.Y;}
public void Set_y_many(int... ary) {
int len = ary.length;
for (int i = 0; i < len; ++i)
flag_ary[ary[i]] = Bool_.Y;
private final Hash_adp_bry codes_regy = Vnt_flag_code_.Regy;
private Vnt_flag_code_mgr codes; private Vnt_flag_lang_mgr langs;
private Xol_vnt_regy vnt_regy;
public void Parse(Vnt_flag_code_mgr codes, Vnt_flag_lang_mgr langs, Xol_vnt_regy vnt_regy, byte[] src, int src_bgn, int src_end) {
this.codes = codes; this.langs = langs; this.vnt_regy = vnt_regy;
codes.Clear(); langs.Clear();
if (src_end != Bry_find_.Not_found) // "|" found; EX: -{A|}-
Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Semic, true, this);
int codes_count = codes.Count(), langs_count = langs.Count();
if (codes_count == 0) codes.Set_y(Vnt_flag_code_.Tid_show);
else if (codes.Limit_if_exists(Vnt_flag_code_.Tid_raw)) {}
else if (codes.Limit_if_exists(Vnt_flag_code_.Tid_name)) {}
else if (codes.Limit_if_exists(Vnt_flag_code_.Tid_del)) {}
else if (codes_count == 1 && codes.Get(Vnt_flag_code_.Tid_title)) codes.Set_y(Vnt_flag_code_.Tid_hide);
else if (codes.Get(Vnt_flag_code_.Tid_hide)) {
boolean exists_d = codes.Get(Vnt_flag_code_.Tid_descrip);
boolean exists_t = codes.Get(Vnt_flag_code_.Tid_title);
codes.Clear();
codes.Set_y_many(Vnt_flag_code_.Tid_add, Vnt_flag_code_.Tid_hide);
if (exists_d) codes.Set_y(Vnt_flag_code_.Tid_descrip);
if (exists_t) codes.Set_y(Vnt_flag_code_.Tid_title);
}
public void Set_n(int tid) {flag_ary[tid] = Bool_.N;}
public void Limit(int tid) {
for (int i = 0; i < Vnt_flag_itm_.Tid__max; ++i) {
if (i != tid) flag_ary[i] = false;
else {
if (codes.Get(Vnt_flag_code_.Tid_aout))
codes.Set_y_many(Vnt_flag_code_.Tid_add, Vnt_flag_code_.Tid_show);
if (codes.Get(Vnt_flag_code_.Tid_descrip))
codes.Set_n(Vnt_flag_code_.Tid_show);
if (langs_count > 0)
codes.Clear();
}
}
public boolean Limit_if_exists(int tid) {
boolean exists = flag_ary[tid]; if (!exists) return false;
for (int i = 0; i < Vnt_flag_itm_.Tid__max; ++i) {
if (i != tid) flag_ary[i] = false;
}
return true;
}
public boolean Limit_if_exists_vnts() {
return false;
}
public void Clear() {
count = 0;
for (int i = 0; i < Vnt_flag_itm_.Tid__max; ++i)
flag_ary[i] = false;
}
public void Parse(byte[] src, int src_bgn, int src_end) {
this.Clear();
Bry_split_.Split(src, Byte_ascii.Semic, true, this);
}
public int Split(byte[] src, int itm_bgn, int itm_end) {
int flag_tid = flag_regy.Get_as_int_or(src, itm_bgn, itm_end, -1);
if (flag_tid == -1) {
int vnt_tid = vnt_regy.Get_as_int_or(src, itm_bgn, itm_end, -1);
if (vnt_tid == -1) return Bry_split_.Rv__ok; // unknown flag; ignore
int flag_tid = codes_regy.Get_as_int_or(src, itm_bgn, itm_end, -1);
if (flag_tid == -1) { // try to find flags like "zh-hans", "zh-hant"; allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
Xol_vnt_itm vnt_itm = vnt_regy.Get_by(src, itm_bgn, itm_end);
if (vnt_itm == null) return Bry_split_.Rv__ok; // unknown flag; ignore
langs.Add(vnt_itm);
return Bry_split_.Rv__ok;
}
flag_ary[flag_tid] = true;
++count;
codes.Add(flag_tid);
return Bry_split_.Rv__ok;
}
}

View File

@ -0,0 +1,55 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*; import gplx.xowa.langs.vnts.*;
public class Vnt_flag_parser_tst {
private final Vnt_flag_parser_fxt fxt = new Vnt_flag_parser_fxt();
@Test public void Basic() {fxt.Test_parse("D" , "D");}
@Test public void Multiple() {fxt.Test_parse("+;S;E" , "+;S;E");}
@Test public void Ws() {fxt.Test_parse(" + ; S ; E " , "+;S;E");}
@Test public void None() {fxt.Test_parse("" , "S");}
@Test public void Wrong() {fxt.Test_parse("XYZ" , "S");}
@Test public void Raw__limit() {fxt.Test_parse("R;S" , "R");}
@Test public void Name__limit() {fxt.Test_parse("N;S" , "N");}
@Test public void Del_limit() {fxt.Test_parse("-;S" , "-");}
@Test public void Title__also_macro_y() {fxt.Test_parse("T" , "H;T");}
@Test public void Title__also_macro_n() {fxt.Test_parse("T;S" , "S;T");}
@Test public void Hide__remove_all() {fxt.Test_parse("H;S" , "+;H");}
@Test public void Hide__keep_descrip() {fxt.Test_parse("H;S;D" , "+;H;D");}
@Test public void Hide__keep_title() {fxt.Test_parse("H;S;T" , "+;H;T");}
@Test public void Aout__also_show_all() {fxt.Test_parse("A" , "+;A;S");}
@Test public void Descrip__remove_show() {fxt.Test_parse("D;S" , "D");}
@Test public void Aout_w_descrip() {fxt.Test_parse("A;D;S" , "+;A;D");}
@Test public void Lang__one() {fxt.Test_parse("zh-hans" , "S;zh-hans");}
@Test public void Lang__many() {fxt.Test_parse("zh-cn;zh-hk" , "S;zh-cn;zh-hk");}
@Test public void Lang__many__ws() {fxt.Test_parse(" zh-cn ; zh-hk " , "S;zh-cn;zh-hk");}
@Test public void Lang__zap__codes() {fxt.Test_parse("+;S;zh-hans;" , "zh-hans");}
}
class Vnt_flag_parser_fxt {
private final Vnt_flag_parser parser = new Vnt_flag_parser();
private final Vnt_flag_code_mgr codes = new Vnt_flag_code_mgr(); private final Vnt_flag_lang_mgr langs = new Vnt_flag_lang_mgr();
private final Xol_vnt_regy vnt_regy = Xol_vnt_regy_fxt.new_chinese();
private final Bry_bfr bfr = Bry_bfr.new_();
public void Test_parse(String raw, String expd) {
byte[] src = Bry_.new_u8(raw);
parser.Parse(codes, langs, vnt_regy, src, 0, src.length);
codes.To_bfr__dbg(bfr);
langs.To_bfr__dbg(bfr);
Tfds.Eq_str(expd, bfr.Xto_str_and_clear());
}
}

View File

@ -0,0 +1,74 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.xndes.*;
import gplx.xowa.langs.vnts.converts.*;
import gplx.xowa.html.*;
class Vnt_html_doc_wkr implements Mwh_doc_wkr {
private final Hash_adp_bry atr_hash = Hash_adp_bry.ci_a7();
private Bry_bfr bfr;
private final Xol_convert_mgr convert_mgr; private int convert_vnt_idx;
public Vnt_html_doc_wkr(Xol_convert_mgr convert_mgr) {
this.convert_mgr = convert_mgr;
atr_hash.Add_many_str("title", "alt");
}
public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw();
public void Init(Bry_bfr bfr, int convert_vnt_idx) {this.bfr = bfr; this.convert_vnt_idx = convert_vnt_idx;}
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {
int val_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
int val_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
if ( atr_hash.Get_by_mid(key_bry, 0, key_bry.length) == null // title, alt
|| !key_exists
|| Bry_find_.Find_fwd(src, Bry__url_frag, val_bgn, val_end) != Bry_find_.Not_found
) { // handle name-only attribs like "<span title>"
int atr_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_atr_bgn];
int atr_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_atr_end];
bfr.Add_mid(src, atr_bgn, atr_end);
}
else {
bfr.Add_byte_space();
bfr.Add(key_bry);
bfr.Add_byte(Byte_ascii.Eq);
byte quote_byte = Mwh_atr_itm.Calc_qte_byte(itm_ary, itm_idx);
bfr.Add_byte(quote_byte);
bfr.Add(convert_mgr.Convert_text(convert_vnt_idx, src, val_bgn, val_end));
bfr.Add_byte(quote_byte);
}
}
public void On_txt_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {
switch (nde_tid) {
case Xop_xnde_tag_.Tid_code:
case Xop_xnde_tag_.Tid_script:
case Xop_xnde_tag_.Tid_pre:
bfr.Add_mid(src, itm_bgn, itm_end);
break;
default:
bfr.Add(convert_mgr.Convert_text(convert_vnt_idx, src, itm_bgn, itm_end));
break;
}
}
public void On_nde_head_bgn(Mwh_doc_parser mgr, byte[] src, int nde_tid, int key_bgn, int key_end) {
bfr.Add_byte(Byte_ascii.Angle_bgn).Add_mid(src, key_bgn, key_end); // EX: "<span"
}
public void On_nde_head_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end, boolean inline) {
bfr.Add(inline ? Xoh_consts.__inline : Xoh_consts.__end); // add "/>" or ">"
}
public void On_nde_tail_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);}
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);}
private static final byte[] Bry__url_frag = Bry_.new_a7("://"); // REF.MW: if ( !strpos( $attr, '://' ) ) {
}

View File

@ -1,110 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.core.primitives.*;
public class Vnt_language_converter {
private final Bry_bfr bfr = Bry_bfr.new_();
private int max_depth = 32;
private byte[] src; private int src_len;
private boolean convert_needed;
private int pos;
public byte[] Parse(byte[] vnt, byte[] src) {// REF.MW:/languages/LanguageConverter.php!recursiveConvertTopLevel
synchronized (bfr) {
int markup_count = 0;
this.pos = 0;
this.convert_needed = false; // for sr lang; SEE:LanguageSr.php !$this->guessVariant(src, vnt);
this.src = src; this.src_len = src.length;
while (pos < src_len) {
int curly_bgn = Bry_find_.Find_fwd(src, Bry__curly_bgn, pos, src_len);
if (curly_bgn == Bry_find_.Not_found) { // No more markup, append final segment
if (markup_count == 0) return src; // no markups found; just return original
Add_output(vnt, convert_needed, src, pos, src_len);
return bfr.Xto_bry_and_clear();
}
bfr.Add_mid(src, pos, curly_bgn); // Markup found; append segment
Add_output(vnt, convert_needed, src, pos, src_len);
pos = curly_bgn; // Advance position
++markup_count;
Parse_recursive(vnt, pos, 1); // Do recursive conversion
}
return bfr.Xto_bry_and_clear();
}
}
private void Parse_recursive(byte[] vnt, int pos, int depth) {
pos += 2; // skip "-{"
boolean warning_done = false;
// $inner = '';
while (pos < src_len) {
byte b = src[pos];
Object o = trie.Match_bgn_w_byte(b, src,pos, src_len);
if (o == null) { // char;
++pos;
continue;
}
int new_pos = trie.Match_pos(); // Markup found; Append initial segment
bfr.Add_mid(src, pos, new_pos);
pos = new_pos; // Advance position
switch (((Byte_obj_val)o).Val()) {
case Tid__curly_bgn:
if (depth >= max_depth) {
bfr.Add(Bry__curly_bgn);
if (!warning_done) {
bfr.Add_str("<span class=\"error\">");
// wfMessage('language-converter-depth-warning')->numParams($this->mMaxDepth)->inContentLanguage()->text()
bfr.Add_str("</span>");
warning_done = true;
}
pos += 2; // skip "-{"
continue;
}
Parse_recursive(vnt, pos, depth + 1); // Recursively parse another rule
break;
case Tid__curly_end:
pos += 2;
/*
// Apply the rule
$rule = new ConverterRule($inner, $this);
$rule->parse($variant);
$this->applyManualConv($rule);
return $rule->getDisplay();
*/
return;
default: throw Err_.new_unhandled(-1); // never happens
}
}
if (pos < src_len) { // Unclosed rule
byte[] frag = Auto_convert(vnt, src, pos, src_len);
bfr.Add(Bry__curly_bgn).Add(frag);
}
pos = src_len;
}
private void Add_output(byte[] vnt, boolean convert_needed, byte[] src, int pos, int src_len) {
if (convert_needed) {
byte[] frag = Auto_convert(vnt, src, pos, src_len);
bfr.Add(frag);
}
else
bfr.Add_mid(src, pos, src_len);
}
private byte[] Auto_convert(byte[] vnt, byte[] src, int bgn, int end) {return src;}
private static final byte Tid__curly_bgn = 1, Tid__curly_end = 2;
private static final byte[] Bry__curly_bgn = Bry_.new_a7("-{"), Bry__curly_end = Bry_.new_a7("}-");
private static final Btrie_fast_mgr trie = Btrie_fast_mgr.cs()
.Add_bry_byte(Bry__curly_bgn, Tid__curly_bgn)
.Add_bry_byte(Bry__curly_end, Tid__curly_end);
}

View File

@ -0,0 +1,68 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
class Vnt_rule_bidi_mgr {
private final Ordered_hash hash = Ordered_hash_.new_bry_();
public int Len() {return hash.Count();}
public boolean Has_none() {return hash.Count() == 0;}
public void Clear() {hash.Clear();}
public Vnt_rule_bidi_itm Get_at(int i) {return (Vnt_rule_bidi_itm)hash.Get_at(i);}
public Vnt_rule_bidi_itm Get_by(byte[] k) {return (Vnt_rule_bidi_itm)hash.Get_by(k);}
public byte[] Get_text_by_ary_or_null(byte[]... ary) {
int len = ary.length;
byte[] rv = null;
for (int i = 0; i < len; ++i) {
byte[] itm = ary[i];
Vnt_rule_bidi_itm bidi_itm = (Vnt_rule_bidi_itm)hash.Get_by(itm); if (bidi_itm == null) continue;
rv = Get_text_by_key_or_null(bidi_itm.Vnt());
if (rv != null) return rv;
}
return rv;
}
public byte[] Get_text_by_key_or_null(byte[] vnt) {
Vnt_rule_bidi_itm rv = (Vnt_rule_bidi_itm)hash.Get_by(vnt);
return rv == null ? null : rv.Text();
}
public byte[] Get_text_at(int i) {
Vnt_rule_bidi_itm itm = (Vnt_rule_bidi_itm)hash.Get_at(i);
return itm == null ? null : itm.Text();
}
public void Set(byte[] vnt, byte[] text) {
Vnt_rule_bidi_itm itm = (Vnt_rule_bidi_itm)hash.Get_by(vnt);
if (itm == null) {
itm = new Vnt_rule_bidi_itm(vnt, text);
hash.Add(vnt, itm);
}
else
itm.Text_(text);
}
public void To_bry__dbg(Bry_bfr bfr) {
int len = hash.Count();
for (int i = 0; i < len; ++i) {
if (i != 0) bfr.Add_byte_nl();
Vnt_rule_bidi_itm itm = (Vnt_rule_bidi_itm)hash.Get_at(i);
bfr.Add(itm.Vnt()).Add_byte_colon().Add(itm.Text());
}
}
}
class Vnt_rule_bidi_itm {
public Vnt_rule_bidi_itm(byte[] vnt, byte[] text) {this.vnt = vnt; this.text = text;}
public byte[] Vnt() {return vnt;} private final byte[] vnt;
public byte[] Text() {return text;} private byte[] text;
public void Text_(byte[] v) {this.text = v;}
}

View File

@ -17,45 +17,76 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
import gplx.xowa.langs.vnts.*;
class Vnt_rule_parser implements gplx.core.brys.Bry_split_wkr {
// private final Btrie_slim_mgr vnt_trie = Btrie_slim_mgr.ci_a7();
public void Parse(byte[] src, int src_bgn, int src_end) {
Bry_split_.Split(src, Byte_ascii.Semic, false, this); // trim=false for "&#entity;" check below
private final Btrie_slim_mgr vnt_trie = Btrie_slim_mgr.ci_a7();
private Vnt_rule_undi_mgr undis; private Vnt_rule_bidi_mgr bidis;
private int src_end, src_len; private byte[] rule_raw;
public byte[] Raw() {return rule_raw;}
public void Init(Xol_vnt_regy vnt_regy) {
this.vnt_trie.Clear();
int len = vnt_regy.Len();
for (int i = 0; i < len; ++i) {
Xol_vnt_itm itm = (Xol_vnt_itm)vnt_regy.Get_at(i);
vnt_trie.Add_obj(itm.Key(), itm);
}
}
public void Clear(Vnt_rule_undi_mgr undis, Vnt_rule_bidi_mgr bidis, byte[] rule_raw) {
this.undis = undis; this.bidis = bidis;
undis.Clear(); bidis.Clear();
this.rule_raw = rule_raw;
}
public void Parse(byte[] src, int src_bgn, int src_end) {
this.src_end = src_end; this.src_len = src.length;
Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Semic, false, this); // trim=false for "&#entity;" check below
}
public int Split(byte[] src, int itm_bgn, int itm_end) { // macro=>zh-hans:text;
int html_entity_pos = Bry_find_.Find_bwd_while_alphanum(src, itm_end);
byte html_entity_byte = src[html_entity_pos];
if (html_entity_byte == Byte_ascii.Hash) html_entity_byte = src[html_entity_pos - 2]; // skip #; EX: &#123;
if (html_entity_byte == Byte_ascii.Amp) return Bry_split_.Rv__extend; // reject "&#entity;"; EX: "&nbsp;zh-hans;"
if (itm_end != src_end) {
int nxt_lang_bgn = Bry_find_.Find_fwd(src, Bry__bidi_dlm, itm_end + 1, src_len); // look for next "=>"
if (nxt_lang_bgn == Bry_find_.Not_found)
nxt_lang_bgn = Bry_find_.Find_fwd_while_ws(src, itm_end + 1, src_len); // skip any ws after end ";"; EX: "a:1; b:2"; NOTE: +1 to skip semic;
else
nxt_lang_bgn += 2;
int nxt_lang_end = Bry_find_.Find_fwd(src, Byte_ascii.Colon, nxt_lang_bgn, src_len); // get colon;
if (nxt_lang_end != Bry_find_.Not_found) {
nxt_lang_end = Bry_find_.Find_bwd__skip_ws(src, nxt_lang_end, src_len); // trim
if (vnt_trie.Match_bgn(src, nxt_lang_bgn, nxt_lang_end) == null) return Bry_split_.Rv__extend; // reject ";not_variant"; EX: ";border" in "zh-hans:<span style='color:blue;border:1px;'>;zh-hant:"
}
}
int undi_bgn = Bry_find_.Find_fwd_while_ws(src, itm_bgn, itm_end); // skip any ws after bgn ";"; EX: " a=>b:c;"
int undi_end = Bry_find_.Find_fwd(src, Bry__bidi_dlm, undi_bgn, itm_end); // look for "=>"
int lang_bgn = undi_bgn; // default lang_bgn to undi_bgn; assumes no bidi found
if (undi_end != Bry_find_.Not_found) { // "=>" found; bidi exists
lang_bgn = Bry_find_.Find_fwd_while_ws(src, undi_end + 2, itm_end); // set lang_bgn after => and gobble up ws
undi_end = Bry_find_.Find_bwd__skip_ws(src, undi_end, undi_bgn); // trim ws from end of bd;
}
Object vnt_obj = vnt_trie.Match_bgn(src, lang_bgn, itm_end);
if (vnt_obj == null) {
return (itm_bgn == 0) ? Bry_split_.Rv__cancel : Bry_split_.Rv__extend; // if 1st item; cancel rest; otherwise, extend
}
int lang_end = vnt_trie.Match_pos();
int text_bgn = Bry_find_.Find_fwd_while_ws(src, lang_end, itm_end); if (src[text_bgn] != Byte_ascii.Colon) return Bry_split_.Rv__extend;
++text_bgn;
Xol_vnt_itm vnt_itm = (Xol_vnt_itm)vnt_obj;
byte[] vnt_key = vnt_itm.Key();
byte[] text_bry = Bry_.Mid_w_trim(src, text_bgn, itm_end);
if (undi_end == Bry_find_.Not_found)
bidis.Set(vnt_key, text_bry);
else {
byte[] undi_bry = Bry_.Mid(src, undi_bgn, undi_end);
if (itm_end - text_bgn > 0)
undis.Set(vnt_key, undi_bry, text_bry);
}
public int Split(byte[] src, int itm_bgn, int itm_end) {
int html_entity_pos = Bry_find_.Find_bwd_while_alphanum(src, itm_bgn);
if (Bry_.Eq(src, html_entity_pos - 2, html_entity_pos, Bry__html_entity)) return Bry_split_.Rv__extend; // reject "&#entity;"; EX: "&nbsp;zh-hans;"
/*
itm_bgn = skip fwd for ws;
itm_bgn = skip fwd for "=>"
Object vnt_obj = vnt_trie.Match_bgn(src, itm_bgn, itm_end); if (vnt_obj == null) return Bry_split_.Rv__extend; // reject ";not_variant"; EX: ";border" in "zh-hans:<span style='color:blue;border:1px;'>;zh-hant:"
itm_end = skip bwd for ws
// val = trim( val[0] );
// trg = trim( val[1] );
// $u = explode( '=>', val, 2 );
// // if trg is empty, strtr() could return a wrong result
// if ( count( $u ) == 1 && trg && in_array( val, $variants ) ) {
// bidi_ary[val] = trg;
// } elseif ( count( $u ) == 2 ) {
// $from = trim( $u[0] );
// val = trim( $u[1] );
// if ( array_key_exists( val, $unidtable )
// && !is_array( $unidtable[val] )
// && trg
// && in_array( val, $variants ) ) {
// $unidtable[val] = array( $from => trg );
// } elseif ( trg && in_array( val, $variants ) ) {
// $unidtable[val][$from] = trg;
// }
// }
// // syntax error, pass
// if ( !isset( $this->mConverter->mVariantNames[val] ) ) {
// bidi_ary = array();
// $unidtable = array();
// break;
// }
*/
return Bry_split_.Rv__ok;
}
private static final byte[] Bry__html_entity = Bry_.new_a7("&#");
public void To_bry__dbg(Bry_bfr bfr) {
undis.To_bry__dbg(bfr);
if (bfr.Len_gt_0()) bfr.Add_byte_nl();
bidis.To_bry__dbg(bfr);
}
private static final byte[] Bry__bidi_dlm = Bry_.new_a7("=>");
}

View File

@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Vnt_rule_parser__bidi_tst {
private final Vnt_rule_parser_fxt fxt = new Vnt_rule_parser_fxt();
@Test public void Basic() {fxt.Test_parse("x1:v1;" , "x1:v1");}
@Test public void Ws() {fxt.Test_parse(" x1 : v1 ;" , "x1:v1");}
@Test public void Entity() {fxt.Test_parse("x1:a&nbsp;x2:b;x2:b;" , "x1:a&nbsp;x2:b" , "x2:b");}
@Test public void Unknown__nth() {fxt.Test_parse("x1:a;wx2:b;x2:b;" , "x1:a;wx2:b" , "x2:b");}
@Test public void Unknown__1st() {fxt.Test_parse("wx1:a;x1:b;" , "");}
}

View File

@ -0,0 +1,24 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Vnt_rule_parser__undi_tst {
private final Vnt_rule_parser_fxt fxt = new Vnt_rule_parser_fxt();
@Test public void One() {fxt.Test_parse("k1=>x1:v1;" , "x1:k1=v1");}
@Test public void Many() {fxt.Test_parse("k1=>x1:v1;k2=>x2:v2;" , "x1:k1=v1", "x2:k2=v2");}
}

View File

@ -0,0 +1,37 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.langs.vnts.*;
class Vnt_rule_parser_fxt {
private final Vnt_rule_parser parser = new Vnt_rule_parser(); private final Vnt_rule_undi_mgr undis = new Vnt_rule_undi_mgr(); private final Vnt_rule_bidi_mgr bidis = new Vnt_rule_bidi_mgr();
private final Bry_bfr bfr = Bry_bfr.new_(255);
public Vnt_rule_parser_fxt() {
Xol_vnt_regy vnt_regy = new Xol_vnt_regy();
vnt_regy.Add(Bry_.new_a7("x1"), Bry_.new_a7("lang1"));
vnt_regy.Add(Bry_.new_a7("x2"), Bry_.new_a7("lang2"));
vnt_regy.Add(Bry_.new_a7("x3"), Bry_.new_a7("lang3"));
parser.Init(vnt_regy);
}
public void Test_parse(String raw, String... expd_ary) {
byte[] src = Bry_.new_u8(raw);
parser.Clear(undis, bidis, src);
parser.Parse(src, 0, src.length);
parser.To_bry__dbg(bfr);
Tfds.Eq_str_lines(String_.Concat_lines_nl_skip_last(expd_ary), bfr.Xto_str_and_clear());
}
}

View File

@ -0,0 +1,80 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
class Vnt_rule_undi_mgr {
private final Ordered_hash hash = Ordered_hash_.new_bry_();
public int Len() {return hash.Count();}
public boolean Has_none() {return hash.Count() == 0;}
public void Clear() {hash.Clear();}
public Vnt_rule_undi_grp Get_at(int i) {return (Vnt_rule_undi_grp)hash.Get_at(i);}
public Vnt_rule_undi_grp Get_by(byte[] key) {return (Vnt_rule_undi_grp)hash.Get_by(key);}
public byte[] Get_text_by_key_or_null(byte[] key) {
Vnt_rule_undi_grp grp = (Vnt_rule_undi_grp)hash.Get_by(key); if (grp == null) return null;
return grp.Len() == 0 ? null : grp.Get_at(0).Trg(); // REF.MW: $disp = $disp[0];
}
public byte[] Get_text_at(int i) {
Vnt_rule_undi_grp grp = (Vnt_rule_undi_grp)hash.Get_at(i); if (grp == null) return null;
return grp.Len() == 0 ? null : grp.Get_at(0).Trg();
}
public Vnt_rule_undi_grp Set(byte[] vnt, byte[] src, byte[] trg) {
Vnt_rule_undi_grp grp = (Vnt_rule_undi_grp)hash.Get_by(vnt);
if (grp == null) {
grp = new Vnt_rule_undi_grp(vnt);
hash.Add(vnt, grp);
}
grp.Set(src, trg);
return grp;
}
public void To_bry__dbg(Bry_bfr bfr) {
int len = hash.Count();
for (int i = 0; i < len; ++i) {
if (i != 0) bfr.Add_byte_nl();
Vnt_rule_undi_grp grp = (Vnt_rule_undi_grp)hash.Get_at(i);
bfr.Add(grp.Vnt()).Add_byte_colon();
grp.To_bry__dbg(bfr);
}
}
}
class Vnt_rule_undi_grp {
private final Ordered_hash hash = Ordered_hash_.new_bry_();
public Vnt_rule_undi_grp(byte[] vnt) {this.vnt = vnt;}
public int Len() {return hash.Count();}
public Vnt_rule_undi_itm Get_at(int i) {return (Vnt_rule_undi_itm)hash.Get_at(i);}
public byte[] Vnt() {return vnt;} private final byte[] vnt;
public Vnt_rule_undi_itm Set(byte[] src, byte[] trg) {
Vnt_rule_undi_itm itm = (Vnt_rule_undi_itm)hash.Get_by(src);
if (itm == null) {
itm = new Vnt_rule_undi_itm(src, trg);
hash.Add(src, itm);
}
return itm;
}
public void To_bry__dbg(Bry_bfr bfr) {
int len = hash.Count();
for (int i = 0; i < len; ++i) {
Vnt_rule_undi_itm itm = (Vnt_rule_undi_itm)hash.Get_at(i);
bfr.Add(itm.Src()).Add_byte_eq().Add(itm.Trg());
}
}
}
class Vnt_rule_undi_itm {
public Vnt_rule_undi_itm(byte[] src, byte[] trg) {this.src = src; this.trg = trg;}
public byte[] Src() {return src;} private final byte[] src;
public byte[] Trg() {return trg;} private byte[] trg;
public void Trg_(byte[] v) {this.trg = v;}
}

View File

@ -25,7 +25,7 @@ class Xop_vnt_lang_bldr { // performant way of building langs; EX: -{zh;zh-hans;
public void Add(byte[] key) {
Xol_vnt_itm vnt = vnt_regy.Get_by(key); if (vnt == null) return; // ignore invalid vnts; EX: -{zh;zhx}-
int vnt_mask = vnt.Mask__vnt();
this.rslt_mask = (rslt_mask == 0) ? vnt_mask : Enm_.Flip_int(true, rslt_mask, vnt_mask);
this.rslt_mask = (rslt_mask == 0) ? vnt_mask : Bitmask_.Flip_int(true, rslt_mask, vnt_mask);
}
public Xop_vnt_flag Bld() {
return (rslt_mask == 0) ? Xop_vnt_flag_.Flag_unknown : Xop_vnt_flag.new_lang(rslt_mask);

View File

@ -53,7 +53,7 @@ class Xop_vnt_tkn_mok {
}
public Xop_vnt_tkn_mok Flags_none_() {flags_list.Clear(); return this;}
public Xop_vnt_tkn_mok Flags_unknown_(String... v) {flags_list.Add(Xop_vnt_flag_.Flag_unknown); return this;}
public Xop_vnt_tkn_mok Flags_langs_(int... ary) {flags_list.Add(Xop_vnt_flag.new_lang(Enm_.Add_int_ary(ary))); return this;}
public Xop_vnt_tkn_mok Flags_langs_(int... ary) {flags_list.Add(Xop_vnt_flag.new_lang(Bitmask_.Add_int_ary(ary))); return this;}
public Xop_vnt_tkn_mok Flags_codes_(String... ary) {
int len = ary.length;
for (int i = 0; i < len; i++) {
@ -122,7 +122,7 @@ class Xop_vnt_lxr_fxt {
int itm_mask = itm.Mask();
for (int i = 0; i < 32; ++i) {
int mask = gplx.core.brys.Bit_.Get_flag(i);
if (Enm_.Has_int(mask, itm_mask)) {
if (Bitmask_.Has_int(mask, itm_mask)) {
Xol_vnt_itm vnt = vnt_regy.Get_at(i);
bfr.Add(vnt.Key()).Add_byte(Byte_ascii.Semic);
}

View File

@ -18,7 +18,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*; import gplx.core.tests.*;
public class Xop_xatr_parser_tst {
@Test public void Kv_quote_double() {fxt.tst_("a=\"b\"", fxt.new_atr_("a", "b"));} private Xop_xatr_parser_fxt fxt = new Xop_xatr_parser_fxt();
private final Xop_xatr_parser_fxt fxt = new Xop_xatr_parser_fxt();
@Test public void Kv_quote_double() {fxt.tst_("a=\"b\"", fxt.new_atr_("a", "b"));}
@Test public void Kv_quote_single() {fxt.tst_("a='b'", fxt.new_atr_("a", "b"));}
@Test public void Kv_quote_none() {fxt.tst_("a=b", fxt.new_atr_("a", "b"));}
@Test public void Kv_empty() {fxt.tst_("a=''", fxt.new_atr_("a", ""));}

View File

@ -49,12 +49,12 @@ class Xop_xatr_whitelist_fxt {
public void Clear() {
if (whitelist_mgr == null) whitelist_mgr = new Xop_xatr_whitelist_mgr().Ini();
} private Xop_xatr_whitelist_mgr whitelist_mgr;
public void Whitelist(byte tag_id, String key_str, boolean expd) {
public void Whitelist(int tag_id, String key_str, boolean expd) {
byte[] key_bry = Bry_.new_a7(key_str);
atr_itm.Key_rng_(0, key_bry.length);
Tfds.Eq(expd, whitelist_mgr.Chk(tag_id, key_bry, atr_itm), key_str);
} private Xop_xatr_itm atr_itm = new Xop_xatr_itm(0, 0);
public void Whitelist(byte tag_id, String key_str, String val_str, boolean expd) {
public void Whitelist(int tag_id, String key_str, String val_str, boolean expd) {
byte[] key_bry = Bry_.new_a7(key_str);
atr_itm.Key_rng_(0, key_bry.length);
atr_itm.Val_bry_(Bry_.new_a7(val_str));

View File

@ -22,32 +22,33 @@ public class Xop_xnde_tag {
this.id = id;
this.name_bry = Bry_.new_a7(name_str);
this.name_str = name_str;
name_len = name_bry.length;
xtn_end_tag = Bry_.Add(Xop_xnde_tag_.XtnEndTag_bgn, name_bry); // always force endtag; needed for <noinclude>
xtn_end_tag_tmp = new byte[xtn_end_tag.length]; Array_.Copy(xtn_end_tag, xtn_end_tag_tmp);
this.name_len = name_bry.length;
this.xtn_end_tag = Bry_.Add(Xop_xnde_tag_.Xtn_end_tag_bgn, name_bry); // always force endtag; needed for <noinclude>
this.xtn_end_tag_tmp = new byte[xtn_end_tag.length]; Array_.Copy(xtn_end_tag, xtn_end_tag_tmp);
}
public int Id() {return id;} public Xop_xnde_tag Id_(int v) {id = v; return this;} private int id;
public byte[] Name_bry() {return name_bry;} private byte[] name_bry;
public String Name_str() {return name_str;} private String name_str;
public int Name_len() {return name_len;} private int name_len;
public int Id() {return id;} private final int id;
public byte[] Name_bry() {return name_bry;} private final byte[] name_bry;
public String Name_str() {return name_str;} private final String name_str;
public int Name_len() {return name_len;} private final int name_len;
public byte[] Xtn_end_tag() {return xtn_end_tag;} private final byte[] xtn_end_tag;
public byte[] Xtn_end_tag_tmp() {return xtn_end_tag_tmp;} private final byte[] xtn_end_tag_tmp;
public boolean Xtn() {return xtn;} public Xop_xnde_tag Xtn_() {xtn = true; return this;} private boolean xtn;
public boolean Xtn_mw() {return xtn_mw;} public Xop_xnde_tag Xtn_mw_() {xtn_mw = true; xtn = true; return this;} private boolean xtn_mw; // NOTE: Xtn_mw_() marks both xtn and xtn_mw as true
public byte[] XtnEndTag() {return xtn_end_tag;} private byte[] xtn_end_tag;
public byte[] XtnEndTag_tmp() {return xtn_end_tag_tmp;} private byte[] xtn_end_tag_tmp;
public int BgnNdeMode() {return bgnNdeMode;} private int bgnNdeMode = Xop_xnde_tag_.BgnNdeMode_normal;
public Xop_xnde_tag BgnNdeMode_inline_() {bgnNdeMode = Xop_xnde_tag_.BgnNdeMode_inline; return this;}
public int EndNdeMode() {return endNdeMode;} private int endNdeMode = Xop_xnde_tag_.EndNdeMode_normal;
public Xop_xnde_tag EndNdeMode_inline_() {endNdeMode = Xop_xnde_tag_.EndNdeMode_inline; return this;}
public Xop_xnde_tag EndNdeMode_escape_() {endNdeMode = Xop_xnde_tag_.EndNdeMode_escape; return this;}
public boolean SingleOnly() {return singleOnly;} public Xop_xnde_tag SingleOnly_() {singleOnly = true; return this;} private boolean singleOnly;
public boolean TblSub() {return tblSub;} public Xop_xnde_tag TblSub_() {tblSub = true; return this;} private boolean tblSub;
public int Bgn_nde_mode() {return bgn_nde_mode;} private int bgn_nde_mode = Xop_xnde_tag_.Bgn_nde_mode_normal;
public Xop_xnde_tag Bgn_nde_mode_inline_() {bgn_nde_mode = Xop_xnde_tag_.Bgn_nde_mode_inline; return this;}
public int End_nde_mode() {return end_nde_mode;} private int end_nde_mode = Xop_xnde_tag_.End_nde_mode_normal;
public Xop_xnde_tag End_nde_mode_inline_() {end_nde_mode = Xop_xnde_tag_.End_nde_mode_inline; return this;}
public Xop_xnde_tag End_nde_mode_escape_() {end_nde_mode = Xop_xnde_tag_.End_nde_mode_escape; return this;}
public boolean Single_only() {return single_only;} public Xop_xnde_tag Single_only_() {single_only = true; return this;} private boolean single_only;
public boolean Tbl_sub() {return tbl_sub;} public Xop_xnde_tag Tbl_sub_() {tbl_sub = true; return this;} private boolean tbl_sub;
public boolean Restricted() {return restricted;} public Xop_xnde_tag Restricted_() {restricted = true; return this;} private boolean restricted;
public boolean NoInline() {return noInline;} public Xop_xnde_tag NoInline_() {noInline = true; return this;} private boolean noInline;
public boolean No_inline() {return no_inline;} public Xop_xnde_tag No_inline_() {no_inline = true; return this;} private boolean no_inline;
public boolean Inline_by_backslash() {return inline_by_backslash;} public Xop_xnde_tag Inline_by_backslash_() {inline_by_backslash = true; return this;} private boolean inline_by_backslash;
public boolean Section() {return section;} public Xop_xnde_tag Section_() {section = true; return this;} private boolean section;
public boolean Repeat_ends() {return repeat_ends;} public Xop_xnde_tag Repeat_ends_() {repeat_ends = true; return this;} private boolean repeat_ends;
public boolean Repeat_mids() {return repeat_mids;} public Xop_xnde_tag Repeat_mids_() {repeat_mids = true; return this;} private boolean repeat_mids;
public boolean Empty_ignored() {return empty_ignored;} public Xop_xnde_tag Empty_ignored_() {empty_ignored = true; return this;} private boolean empty_ignored;
public boolean Single_only_html() {return single_only_html;} public Xop_xnde_tag Single_only_html_() {single_only_html = true; return this;} private boolean single_only_html;
public boolean Raw() {return raw;} public Xop_xnde_tag Raw_() {raw = true; return this;} private boolean raw;
public static final byte Block_noop = 0, Block_bgn = 1, Block_end = 2;
public byte Block_open() {return block_open;} private byte block_open = Block_noop;

View File

@ -18,12 +18,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.langs.*;
public class Xop_xnde_tag_ {
public static final int EndNdeMode_normal = 0, EndNdeMode_inline = 1, EndNdeMode_escape = 2; // escape is for hr which does not support </hr>
public static final int BgnNdeMode_normal = 0, BgnNdeMode_inline = 1;
public static final int End_nde_mode_normal = 0, End_nde_mode_inline = 1, End_nde_mode_escape = 2; // escape is for hr which does not support </hr>
public static final int Bgn_nde_mode_normal = 0, Bgn_nde_mode_inline = 1;
public static final byte[] Name_onlyinclude = Bry_.new_a7("onlyinclude");
public static final byte[] XtnEndTag_bgn = Bry_.new_a7("</");//, XtnEndTag_end = Bry_.new_a7(">");
public static final byte
Tid_b = 0
public static final byte[] Xtn_end_tag_bgn = Bry_.new_a7("</");//, Xtn_end_tag_end = Bry_.new_a7(">");
public static final int
Tid__null = -1
, Tid_b = 0
, Tid_strong = 1
, Tid_i = 2
, Tid_em = 3
@ -147,64 +148,64 @@ public class Xop_xnde_tag_ {
return rv;
}
public static final Xop_xnde_tag
Tag_b = new_(Tid_b, "b").NoInline_()
, Tag_strong = new_(Tid_strong, "strong").NoInline_()
, Tag_i = new_(Tid_i, "i").NoInline_()
, Tag_em = new_(Tid_em, "em").NoInline_()
, Tag_cite = new_(Tid_cite, "cite").NoInline_()
, Tag_dfn = new_(Tid_dfn, "dfn").NoInline_()
, Tag_var = new_(Tid_var, "var").NoInline_()
, Tag_u = new_(Tid_u, "u").NoInline_().Repeat_ends_() // PAGE:en.b:Textbook_of_Psychiatry/Alcoholism_and_Psychoactive_Substance_Use_Disorders; DATE:2014-09-05
, Tag_ins = new_(Tid_ins, "ins").NoInline_()
, Tag_abbr = new_(Tid_abbr, "abbr").NoInline_()
, Tag_strike = new_(Tid_strike, "strike").NoInline_()
, Tag_del = new_(Tid_del, "del").NoInline_()
, Tag_s = new_(Tid_s, "s").NoInline_()
, Tag_sub = new_(Tid_sub, "sub").NoInline_()
, Tag_sup = new_(Tid_sup, "sup").NoInline_()
, Tag_big = new_(Tid_big, "big").NoInline_()
, Tag_small = new_(Tid_small, "small").NoInline_()
, Tag_code = new_(Tid_code, "code").NoInline_().Repeat_ends_()
, Tag_tt = new_(Tid_tt, "tt").NoInline_().Repeat_ends_()
, Tag_kbd = new_(Tid_kbd, "kbd").NoInline_()
, Tag_samp = new_(Tid_samp, "samp").NoInline_()
, Tag_blockquote = new_(Tid_blockquote, "blockquote").NoInline_().Repeat_mids_().Section_().Block_open_bgn_().Block_close_end_() // NOTE: should be open_end_, but leaving for now; DATE:2014-03-11; added Repeat_mids_(); PAGE:en.w:Ring_a_Ring_o'_Roses DATE:2014-06-26
, Tag_pre = new_(Tid_pre, "pre").NoInline_().Section_().Xtn_().Raw_().Block_open_bgn_().Block_close_end_().Ignore_empty_().Xtn_skips_template_args_()
, Tag_font = new_(Tid_font, "font").NoInline_()
, Tag_center = new_(Tid_center, "center").NoInline_().Block_open_end_().Block_close_end_() // removed .Repeat_ends_(); added Nest_(); EX: w:Burr Truss; DATE:2012-12-12
, Tag_p = new_(Tid_p, "p").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
Tag_b = new_(Tid_b, "b").No_inline_()
, Tag_strong = new_(Tid_strong, "strong").No_inline_()
, Tag_i = new_(Tid_i, "i").No_inline_()
, Tag_em = new_(Tid_em, "em").No_inline_()
, Tag_cite = new_(Tid_cite, "cite").No_inline_()
, Tag_dfn = new_(Tid_dfn, "dfn").No_inline_()
, Tag_var = new_(Tid_var, "var").No_inline_()
, Tag_u = new_(Tid_u, "u").No_inline_().Repeat_ends_() // PAGE:en.b:Textbook_of_Psychiatry/Alcoholism_and_Psychoactive_Substance_Use_Disorders; DATE:2014-09-05
, Tag_ins = new_(Tid_ins, "ins").No_inline_()
, Tag_abbr = new_(Tid_abbr, "abbr").No_inline_()
, Tag_strike = new_(Tid_strike, "strike").No_inline_()
, Tag_del = new_(Tid_del, "del").No_inline_()
, Tag_s = new_(Tid_s, "s").No_inline_()
, Tag_sub = new_(Tid_sub, "sub").No_inline_()
, Tag_sup = new_(Tid_sup, "sup").No_inline_()
, Tag_big = new_(Tid_big, "big").No_inline_()
, Tag_small = new_(Tid_small, "small").No_inline_()
, Tag_code = new_(Tid_code, "code").No_inline_().Repeat_ends_()
, Tag_tt = new_(Tid_tt, "tt").No_inline_().Repeat_ends_()
, Tag_kbd = new_(Tid_kbd, "kbd").No_inline_()
, Tag_samp = new_(Tid_samp, "samp").No_inline_()
, Tag_blockquote = new_(Tid_blockquote, "blockquote").No_inline_().Repeat_mids_().Section_().Block_open_bgn_().Block_close_end_() // NOTE: should be open_end_, but leaving for now; DATE:2014-03-11; added Repeat_mids_(); PAGE:en.w:Ring_a_Ring_o'_Roses DATE:2014-06-26
, Tag_pre = new_(Tid_pre, "pre").No_inline_().Section_().Xtn_().Raw_().Block_open_bgn_().Block_close_end_().Ignore_empty_().Xtn_skips_template_args_()
, Tag_font = new_(Tid_font, "font").No_inline_()
, Tag_center = new_(Tid_center, "center").No_inline_().Block_open_end_().Block_close_end_() // removed .Repeat_ends_(); added Nest_(); EX: w:Burr Truss; DATE:2012-12-12
, Tag_p = new_(Tid_p, "p").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_span = new_(Tid_span, "span").Section_()
, Tag_div = new_(Tid_div, "div").Section_().Block_open_end_().Block_close_end_()
, Tag_hr = new_(Tid_hr, "hr").SingleOnly_().BgnNdeMode_inline_().Inline_by_backslash_().EndNdeMode_escape_().Section_().Block_close_end_()
, Tag_br = new_(Tid_br, "br").SingleOnly_().BgnNdeMode_inline_().Inline_by_backslash_().EndNdeMode_inline_().Section_()
, Tag_h1 = new_(Tid_h1, "h1").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h2 = new_(Tid_h2, "h2").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h3 = new_(Tid_h3, "h3").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h4 = new_(Tid_h4, "h4").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h5 = new_(Tid_h5, "h5").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h6 = new_(Tid_h6, "h6").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_hr = new_(Tid_hr, "hr").Single_only_().Single_only_html_().Bgn_nde_mode_inline_().Inline_by_backslash_().End_nde_mode_escape_().Section_().Block_close_end_()
, Tag_br = new_(Tid_br, "br").Single_only_().Single_only_html_().Bgn_nde_mode_inline_().Inline_by_backslash_().End_nde_mode_inline_().Section_()
, Tag_h1 = new_(Tid_h1, "h1").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h2 = new_(Tid_h2, "h2").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h3 = new_(Tid_h3, "h3").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h4 = new_(Tid_h4, "h4").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h5 = new_(Tid_h5, "h5").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_h6 = new_(Tid_h6, "h6").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_li = new_(Tid_li, "li").Repeat_mids_().Empty_ignored_().Block_open_bgn_().Block_close_end_()
, Tag_dt = new_(Tid_dt, "dt").Repeat_mids_()
, Tag_dd = new_(Tid_dd, "dd").Repeat_mids_()
, Tag_ol = new_(Tid_ol, "ol").NoInline_().Block_open_bgn_().Block_close_end_()
, Tag_ul = new_(Tid_ul, "ul").NoInline_().Block_open_bgn_().Block_close_end_()
, Tag_dl = new_(Tid_dl, "dl").NoInline_()
, Tag_table = new_(Tid_table, "table").NoInline_().Block_open_bgn_().Block_close_end_()
, Tag_tr = new_(Tid_tr, "tr").TblSub_().Block_open_bgn_().Block_open_end_()
, Tag_td = new_(Tid_td, "td").TblSub_().Block_open_end_().Block_close_bgn_()
, Tag_th = new_(Tid_th, "th").TblSub_().Block_open_end_().Block_close_bgn_()
, Tag_ol = new_(Tid_ol, "ol").No_inline_().Block_open_bgn_().Block_close_end_()
, Tag_ul = new_(Tid_ul, "ul").No_inline_().Block_open_bgn_().Block_close_end_()
, Tag_dl = new_(Tid_dl, "dl").No_inline_()
, Tag_table = new_(Tid_table, "table").No_inline_().Block_open_bgn_().Block_close_end_()
, Tag_tr = new_(Tid_tr, "tr").Tbl_sub_().Block_open_bgn_().Block_open_end_()
, Tag_td = new_(Tid_td, "td").Tbl_sub_().Block_open_end_().Block_close_bgn_()
, Tag_th = new_(Tid_th, "th").Tbl_sub_().Block_open_end_().Block_close_bgn_()
, Tag_thead = new_(Tid_thead, "thead")
, Tag_tfoot = new_(Tid_tfoot, "tfoot")
, Tag_tbody = new_(Tid_tbody, "tbody")
, Tag_caption = new_(Tid_caption, "caption").NoInline_().TblSub_()
, Tag_caption = new_(Tid_caption, "caption").No_inline_().Tbl_sub_()
, Tag_colgroup = new_(Tid_colgroup, "colgroup")
, Tag_col = new_(Tid_col, "col")
, Tag_a = new_(Tid_a, "a").Restricted_()
, Tag_img = new_(Tid_img, "img").Restricted_() // NOTE: was .Xtn() DATE:2014-11-06
, Tag_ruby = new_(Tid_ruby, "ruby").NoInline_()
, Tag_rt = new_(Tid_rt, "rt").NoInline_()
, Tag_rb = new_(Tid_rb, "rb").NoInline_()
, Tag_rp = new_(Tid_rp, "rp").NoInline_()
, Tag_img = new_(Tid_img, "img").Single_only_html_().Restricted_() // NOTE: was .Xtn() DATE:2014-11-06
, Tag_ruby = new_(Tid_ruby, "ruby").No_inline_()
, Tag_rt = new_(Tid_rt, "rt").No_inline_()
, Tag_rb = new_(Tid_rb, "rb").No_inline_()
, Tag_rp = new_(Tid_rp, "rp").No_inline_()
, Tag_includeonly = new_(Tid_includeonly, "includeonly")
, Tag_noinclude = new_(Tid_noinclude, "noinclude")
, Tag_onlyinclude = new_(Tid_onlyinclude, "onlyinclude")
@ -245,8 +246,8 @@ public class Xop_xnde_tag_ {
, Tag_bdi = new_(Tid_bdi, "bdi")
, Tag_data = new_(Tid_data, "data")
, Tag_mark = new_(Tid_mark, "mark")
, Tag_wbr = new_(Tid_wbr, "wbr").SingleOnly_()
, Tag_bdo = new_(Tid_bdo, "bdo").NoInline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_wbr = new_(Tid_wbr, "wbr").Single_only_().Single_only_html_()
, Tag_bdo = new_(Tid_bdo, "bdo").No_inline_().Section_().Block_open_bgn_().Block_close_end_()
, Tag_listing_buy = new_(Tid_listing_buy, "buy").Xtn_mw_()
, Tag_listing_do = new_(Tid_listing_do, "do").Xtn_mw_()
, Tag_listing_drink = new_(Tid_listing_drink, "drink").Xtn_mw_()

View File

@ -22,11 +22,11 @@ public class Xop_xnde_tag_lang {
lang_code = Int_obj_ref.new_(lang_code_int);
this.name_str = name_str;
this.name_bry = Bry_.new_u8(name_str);
this.xtnEndTag_tmp = Bry_.Add(Xop_xnde_tag_.XtnEndTag_bgn, name_bry);
this.xtnEndTag_tmp = Bry_.Add(Xop_xnde_tag_.Xtn_end_tag_bgn, name_bry);
}
public Int_obj_ref Lang_code() {return lang_code;} private Int_obj_ref lang_code;
public String Name_str() {return name_str;} private String name_str;
public byte[] Name_bry() {return name_bry;} private byte[] name_bry;
public byte[] XtnEndTag_tmp() {return xtnEndTag_tmp;} private byte[] xtnEndTag_tmp;
public byte[] Xtn_end_tag_tmp() {return xtnEndTag_tmp;} private byte[] xtnEndTag_tmp;
public static final Xop_xnde_tag_lang _ = new Xop_xnde_tag_lang(-1, String_.Empty);
}

View File

@ -106,7 +106,7 @@ public class Xop_xnde_tkn extends Xop_tkn_itm_base implements Xop_tblw_tkn {
this.Subs_get(i).Tmpl_evaluate(ctx, src, caller, bfr);
bfr.Add_mid(src, tag_close_bgn, tag_close_end); // write tag_end
if (tag_close_bgn == Int_.Min_value) {// xtn is unclosed; add a </xtn> else rest of page will be gobbled; PAGE:en.w:Provinces_and_territories_of_Canada DATE:2014-11-13
bfr.Add(tag.XtnEndTag());
bfr.Add(tag.Xtn_end_tag());
bfr.Add(Byte_ascii.Gt_bry);
}
}

View File

@ -71,10 +71,10 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
++atrs_bgn_pos; // set bgn_pos to be after ws
break;
case Byte_ascii.Slash: case Byte_ascii.Gt:
case Byte_ascii.Slash: case Byte_ascii.Angle_end:
++atrs_bgn_pos; // set bgn_pos to be after char
break;
case Byte_ascii.Backslash:
case Byte_ascii.Backslash: // NOTE: MW treats \ as /; EX: <br\>" -> "<br/>
++tag_end_pos;
break;
case Byte_ascii.Dollar:// handles <br$2>;
@ -246,7 +246,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
}
int end_rhs = -1, findPos = gtPos;
byte[] end_bry = Xop_xnde_tag_.Tag_noinclude.XtnEndTag(); int end_bry_len = end_bry.length;
byte[] end_bry = Xop_xnde_tag_.Tag_noinclude.Xtn_end_tag(); int end_bry_len = end_bry.length;
if (tag_is_closing) // </noinclude>; no end tag to search for; DATE:2014-05-02
end_rhs = gtPos;
else { // <noinclude>; search for end tag
@ -323,7 +323,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
boolean tag_ignore = false;
int tagId = tag.Id();
if (tagId == Xop_xnde_tag_.Tid_table || tag.TblSub()) { // tbl tag; EX: <table>,<tr>,<td>,<th>
if (tagId == Xop_xnde_tag_.Tid_table || tag.Tbl_sub()) { // tbl tag; EX: <table>,<tr>,<td>,<th>
Tblw_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, gtPos + 1, tagId, atrs_bgn, atrs_end);
return gtPos + 1;
}
@ -338,8 +338,8 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
else if (tagId == prv_xnde_tagId && tag.Repeat_mids()) { // EX: "<li>a<li>b" -> "<li>a</li><li>b"
End_tag(ctx, root, prv_xnde, src, src_len, bgn_pos - 1, bgn_pos - 1, tagId, true, tag);
}
else if (tag.SingleOnly()) inline = true; // <br></br> not allowed; convert <br> to <br/> </br> will be escaped
else if (tag.NoInline() && inline) {
else if (tag.Single_only()) inline = true; // <br></br> not allowed; convert <br> to <br/> </br> will be escaped
else if (tag.No_inline() && inline) {
Xop_xnde_tkn xnde_inline = Xnde_bgn(ctx, tkn_mkr, root, tag, Xop_xnde_tkn.CloseMode_open, src, bgn_pos, open_tag_end, atrs_bgn, atrs_end, atrs);
End_tag(ctx, root, xnde_inline, src, src_len, bgn_pos, gtPos, tagId, false, tag);
ctx.Msg_log().Add_itm_none(Xop_xnde_log.No_inline, src, bgn_pos, gtPos);
@ -347,7 +347,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
Xop_xnde_tkn xnde = null;
xnde = Xnde_bgn(ctx, tkn_mkr, root, tag, inline ? Xop_xnde_tkn.CloseMode_inline : Xop_xnde_tkn.CloseMode_open, src, bgn_pos, open_tag_end, atrs_bgn, atrs_end, atrs);
if (!inline && tag.BgnNdeMode() != Xop_xnde_tag_.BgnNdeMode_inline)
if (!inline && tag.Bgn_nde_mode() != Xop_xnde_tag_.Bgn_nde_mode_inline)
ctx.Stack_add(xnde);
if (tag_ignore)
xnde.Tag_visible_(false);
@ -414,7 +414,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
Xop_xnde_tkn bgn_nde = (Xop_xnde_tkn)ctx.Stack_get(prv_xnde_pos);
int bgn_tag_id = bgn_nde == null ? -1 : bgn_nde.Tag().Id();
int end_nde_mode = end_tag.EndNdeMode();
int end_nde_mode = end_tag.End_nde_mode();
boolean force_end_tag_to_match_bgn_tag = false;
switch (bgn_tag_id) {
case Xop_xnde_tag_.Tid_sub: if (end_tag_id == Xop_xnde_tag_.Tid_sup) force_end_tag_to_match_bgn_tag = true; break;
@ -426,7 +426,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
end_tag_id = bgn_tag_id;
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Sub_sup_swapped, src, bgn_pos, cur_pos);
}
if (end_tag_id == Xop_xnde_tag_.Tid_table || end_tag.TblSub()) {
if (end_tag_id == Xop_xnde_tag_.Tid_table || end_tag.Tbl_sub()) {
Tblw_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, end_tag_id);
return cur_pos;
}
@ -437,10 +437,10 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
return cur_pos;
}
switch (end_nde_mode) {
case Xop_xnde_tag_.EndNdeMode_inline: // PATCH.WP: allows </br>, </br/> and many other variants
case Xop_xnde_tag_.End_nde_mode_inline: // PATCH.WP: allows </br>, </br/> and many other variants
Xnde_bgn(ctx, tkn_mkr, root, end_tag, Xop_xnde_tkn.CloseMode_inline, src, bgn_pos, cur_pos, Int_.Min_value, Int_.Min_value, null); // NOTE: atrs is null b/c </br> will never have atrs
return cur_pos;
case Xop_xnde_tag_.EndNdeMode_escape: // handle </hr>
case Xop_xnde_tag_.End_nde_mode_escape: // handle </hr>
ctx.Lxr_make_(false);
ctx.Msg_log().Add_itm_none(Xop_xnde_log.Escaped_xnde, src, bgn_pos, cur_pos - 1);
return cur_pos;
@ -576,13 +576,13 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
xnde.Tag_close_rng_(open_end, open_end); // NOTE: inline tag, so set TagClose to open_end; should noop
}
else {
byte[] close_bry = tag.XtnEndTag_tmp(); // get tmp bry (so as not to new)
byte[] close_bry = tag.Xtn_end_tag_tmp(); // get tmp bry (so as not to new)
if (tag.Langs() != null) { // cur tag has langs; EX:<section>; DATE:2014-07-18
Xop_xnde_tag_lang tag_lang = tag.Langs_get(ctx.Lang().Case_mgr(), ctx.Cur_page().Lang().Lang_id(), src, name_bgn, name_end);
if (tag_lang == null) // tag does not match lang; EX:<trecho> and lang=de;
return ctx.Lxr_make_txt_(open_end);
if (tag_lang != Xop_xnde_tag_lang._) // tag matches; note Xop_xnde_tag_lang._ is a wildcard match; EX:<section>
close_bry = tag_lang.XtnEndTag_tmp();
close_bry = tag_lang.Xtn_end_tag_tmp();
}
int src_offset = open_bgn - 1; // open bgn to start at <; -2 to ignore </ ; +1 to include <
int close_ary_len = close_bry.length;

View File

@ -152,14 +152,14 @@ public class Xow_xwiki_mgr implements GfoInvkAble {
default: domain_str = String_.Format("{0}.{1}.org", lang_key_str, wiki_name); break; // EX: en.wiktionary.org
}
byte[] domain_bry = Bry_.new_u8(domain_str);
Xowe_wiki lang_wiki = wiki.Appe().Wiki_mgr().Get_by_key_or_null(domain_bry);
boolean offline_exists = lang_wiki != null;
// Xowe_wiki lang_wiki = wiki.Appe().Wiki_mgr().Get_by_key_or_null(domain_bry); // DELETE: causes commons to show; DATE:2015-09-23
// boolean offline_exists = lang_wiki != null;
String fmt = String_.Format("http://" + domain_str + "/wiki/~{0}");
int aliases_len = wiki_itm.Aliases().length;
for (int j = 0; j < aliases_len; j++) {
byte[] alias = wiki_itm.Aliases()[j];
if (wiki.Ns_mgr().Names_get_or_null(alias, 0, alias.length) != null) continue; // NOTE: do not add xwiki if alias matches namespace; EX: en.wiktionary.org has ns of "Wiktionary"; do not add alias of "wiktionary"; note that wikipedia does have an alias to wiktionary
Xow_xwiki_itm xwiki = Xow_xwiki_itm.new_(alias, Bry_.new_u8(fmt), lang_id, domain_tid, domain_bry).Offline_(offline_exists); // NOTE: domain_tid must be used, not wiki.Domain_tid; DATE:2014-09-14
Xow_xwiki_itm xwiki = Xow_xwiki_itm.new_(alias, Bry_.new_u8(fmt), lang_id, domain_tid, domain_bry); // .Offline_(offline_exists); // NOTE: domain_tid must be used, not wiki.Domain_tid; DATE:2014-09-14
Add_itm(xwiki, null);
}
}
@ -180,11 +180,11 @@ public class Xow_xwiki_mgr implements GfoInvkAble {
Xoac_lang_itm lang = (Xoac_lang_itm)langs.Get_at(i);
String domain_str = String_.Format("{0}.{1}.org", String_.new_u8(lang.Key_bry()), wiki_tid_name_str); // EX: fr.wikipedia.org
byte[] domain_bry = Bry_.new_u8(domain_str);
Xowe_wiki lang_wiki = wiki.Appe().Wiki_mgr().Get_by_key_or_null(domain_bry);
boolean offline_exists = lang_wiki != null;
// Xowe_wiki lang_wiki = wiki.Appe().Wiki_mgr().Get_by_key_or_null(domain_bry); // DELETE: causes commons to show; DATE:2015-09-23
// boolean offline_exists = lang_wiki != null;
String url_fmt = String_.Format("http://" + domain_str + "/wiki/~{0}");
int lang_id = Xol_lang_itm_.Get_by_key(lang.Key_bry()).Id();
Xow_xwiki_itm xwiki = Xow_xwiki_itm.new_(lang.Key_bry(), Bry_.new_u8(url_fmt), lang_id, domain_tid, domain_bry).Offline_(offline_exists);
Xow_xwiki_itm xwiki = Xow_xwiki_itm.new_(lang.Key_bry(), Bry_.new_u8(url_fmt), lang_id, domain_tid, domain_bry); // .Offline_(offline_exists);
Add_itm(xwiki, lang);
}
lang_mgr.Grps_sort();

View File

@ -61,7 +61,7 @@ public class Scrib_lib_text implements Scrib_lib {
// if (Type_adp_.Eq(itm_type, typeof(KeyVal[]))) itm_as_kvy = (KeyVal[])itm;
// else if (Type_adp_.Is_array(itm_type)) itm_as_ary = Array_.cast(itm);
// int flags = args.Cast_int_or(1, 0);
// if (itm_as_kvy != null && !Enm_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys))
// if (itm_as_kvy != null && !Bitmask_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys))
// itm_as_kvy = json_util.Reindex_arrays(itm_as_kvy, true);
// byte[] rv = null;
// if (itm_as_kvy != null)
@ -82,7 +82,7 @@ public class Scrib_lib_text implements Scrib_lib {
synchronized (reindex_data) {
if ( itm_as_kvy != null
&& itm_as_kvy.length > 0
&& !Enm_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys)
&& !Bitmask_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys)
) {
json_util.Reindex_arrays(reindex_data, itm_as_kvy, true);
if (reindex_data.Rv_is_kvy()) {
@ -107,12 +107,12 @@ public class Scrib_lib_text implements Scrib_lib {
byte[] json = args.Pull_bry(0);
int flags = args.Cast_int_or(1, 0);
int opts = Scrib_lib_text__json_util.Opt__force_assoc;
if (Enm_.Has_int(flags, Scrib_lib_text__json_util.Flag__try_fixing))
opts = Enm_.Add_int(opts, Scrib_lib_text__json_util.Flag__try_fixing);
if (Bitmask_.Has_int(flags, Scrib_lib_text__json_util.Flag__try_fixing))
opts = Bitmask_.Add_int(opts, Scrib_lib_text__json_util.Flag__try_fixing);
synchronized (procs) {
byte rv_tid = json_util.Decode(core.App().Utl__json_parser(), json, opts);
if (rv_tid == Bool_.__byte) throw Err_.new_("scribunto", "mw.text.jsonEncode: Unable to decode String " + String_.new_u8(json));
if (rv_tid == Bool_.Y_byte && !(Enm_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys))) {
if (rv_tid == Bool_.Y_byte && !(Bitmask_.Has_int(flags, Scrib_lib_text__json_util.Flag__preserve_keys))) {
KeyVal[] rv_as_kvy = (KeyVal[])json_util.Decode_rslt_as_nde();
synchronized (reindex_data) {
json_util.Reindex_arrays(reindex_data, rv_as_kvy, false);

View File

@ -230,7 +230,7 @@ public class Scrib_lib_wikibase_srl_tst {
);
}
@Test public void Claims_time() {
fxt.Init_prop(fxt.Wdata_fxt().Make_claim_time(2, "2001-02-03 04:05:06"));
fxt.Init_prop(fxt.Wdata_fxt().Make_claim_time(2, "2001-02-03 04:05:06", 9));
fxt.Test
( "claims:"
, " P2:"
@ -241,7 +241,7 @@ public class Scrib_lib_wikibase_srl_tst {
, " type:'time'"
, " value:"
, " time:'+00000002001-02-03T04:05:06Z'"
, " precision:'11'"
, " precision:'9'"
, " before:'0'"
, " after:'0'"
, " timezone:'0'"
@ -341,7 +341,7 @@ public class Scrib_lib_wikibase_srl_tst {
, " type:'time'"
, " value:"
, " time:'+00000002001-02-03T04:05:06Z'"
, " precision:'11'"
, " precision:'14'"
, " before:'0'"
, " after:'0'"
, " timezone:'0'"

View File

@ -68,10 +68,10 @@ class Scrib_lib_wikibase_srl_visitor implements Wdata_claim_visitor {
private static KeyVal[] Time_value(Wdata_claim_itm_time itm) {
KeyVal[] rv = new KeyVal[6];
rv[0] = KeyVal_.new_(Wdata_dict_value_time.Str_time , String_.new_a7(itm.Time()));
rv[1] = KeyVal_.new_(Wdata_dict_value_time.Str_precision , Wdata_dict_value_time.Val_precision_int); // NOTE: must return int, not str; DATE:2014-02-18
rv[2] = KeyVal_.new_(Wdata_dict_value_time.Str_before , Wdata_dict_value_time.Val_before_int);
rv[3] = KeyVal_.new_(Wdata_dict_value_time.Str_after , Wdata_dict_value_time.Val_after_int);
rv[4] = KeyVal_.new_(Wdata_dict_value_time.Str_timezone , Wdata_dict_value_time.Val_timezone_str);
rv[1] = KeyVal_.new_(Wdata_dict_value_time.Str_precision , itm.Precision_int()); // NOTE: must return int, not str; DATE:2014-02-18
rv[2] = KeyVal_.new_(Wdata_dict_value_time.Str_before , itm.Before_int());
rv[3] = KeyVal_.new_(Wdata_dict_value_time.Str_after , itm.After_int());
rv[4] = KeyVal_.new_(Wdata_dict_value_time.Str_timezone , Wdata_dict_value_time.Val_timezone_str); // ASSUME: always 0 b/c UTF?; DATE:2015-09-21
rv[5] = KeyVal_.new_(Wdata_dict_value_time.Str_calendarmodel , Wdata_dict_value_time.Val_calendarmodel_str);
return rv;
}
@ -84,9 +84,9 @@ class Scrib_lib_wikibase_srl_visitor implements Wdata_claim_visitor {
KeyVal[] rv = new KeyVal[5];
rv[0] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_latitude , Double_.parse(String_.new_a7(itm.Lat())));
rv[1] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_longitude , Double_.parse(String_.new_a7(itm.Lng())));
rv[2] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_altitude , null);
rv[3] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_globe , Wdata_dict_value_globecoordinate.Val_globe_dflt_str);
rv[4] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_precision , .00001d);
rv[2] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_altitude , String_.new_u8(itm.Alt()));
rv[3] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_globe , String_.new_u8(itm.Glb()));
rv[4] = KeyVal_.new_(Wdata_dict_value_globecoordinate.Str_precision , itm.Prc_as_num().To_double());
return rv;
}
public void Visit_system(Wdata_claim_itm_system itm) {

View File

@ -57,7 +57,7 @@ public class Wdata_wiki_mgr_fxt {
public Wdata_claim_itm_core Make_claim_quantity(int pid, String amount, String unit, String ubound, String lbound) {return new Wdata_claim_itm_quantity(pid, Wdata_dict_snak_tid.Tid_value, Bry_.new_a7(amount), Bry_.new_a7(unit), Bry_.new_a7(ubound), Bry_.new_a7(lbound));}
public Wdata_claim_itm_core Make_claim_entity_qid(int pid, int val) {return new Wdata_claim_itm_entity(pid, Wdata_dict_snak_tid.Tid_value, Wdata_dict_value_entity_tid.Tid_item, Int_.Xto_bry(val));}
public Wdata_claim_itm_core Make_claim_entity_pid(int pid, int val) {return new Wdata_claim_itm_entity(pid, Wdata_dict_snak_tid.Tid_value, Wdata_dict_value_entity_tid.Tid_property, Int_.Xto_bry(val));}
public Wdata_claim_itm_core Make_claim_geo(int pid, String lon, String lat) {return Make_claim_geo(pid, lon, lat, ".000277777", null, "Q2");}
public Wdata_claim_itm_core Make_claim_geo(int pid, String lon, String lat) {return Make_claim_geo(pid, lon, lat, ".00001", null, "http://www.wikidata.org/entity/Q2");}
public Wdata_claim_itm_core Make_claim_geo(int pid, String lon, String lat, String prc, String alt, String glb) {
return new Wdata_claim_itm_globecoordinate(pid, Wdata_dict_snak_tid.Tid_value, Bry_.new_a7(lat), Bry_.new_a7(lon), Bry_.new_a7(alt), Bry_.new_a7(prc), Bry_.new_a7(glb));
}