1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Source: Restore broken commit

This commit is contained in:
gnosygnu
2017-02-06 22:14:55 -05:00
parent 938beac9f9
commit 3bfeb94b43
4380 changed files with 328018 additions and 0 deletions

View File

@@ -0,0 +1,204 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import gplx.core.primitives.*; import gplx.core.btries.*;
public class Xol_num_fmtr_base implements Gfo_invk {
private final Btrie_fast_mgr dlm_trie = Btrie_fast_mgr.cs(); private final Btrie_rv trv = new Btrie_rv();
private Xol_num_grp[] grp_ary = Xol_num_grp.Ary_empty; int grp_ary_len;
private Gfo_num_fmt_wkr[] cache; int cache_len = 16;
private Bry_bfr tmp = Bry_bfr_.New();
public boolean Standard() {return standard;} private boolean standard = true;
public byte[] Dec_dlm() {return dec_dlm;} public Xol_num_fmtr_base Dec_dlm_(byte[] v) {this.dec_dlm = v; dlm_trie.Add_bry_byte(v, Raw_tid_dec); return this;} private byte[] dec_dlm = Dec_dlm_default;
private byte[] grp_dlm;
public byte[] Raw(byte tid, byte[] src) {
int src_len = src.length;
for (int i = 0; i < src_len; i++) {
byte b = src[i];
Object o = dlm_trie.Match_at(trv, src, i, src_len);
if (o == null)
tmp.Add_byte(b);
else {
byte dlm_tid = ((Byte_obj_val)o).Val();
int dlm_match_pos = trv.Pos();
switch (dlm_tid) {
case Raw_tid_dec:
if (tid == Tid_raw)
tmp.Add_byte(Byte_ascii.Dot); // NOTE: dec_dlm is always outputted as dot, not regional dec_spr; EX: for dewiki, 12,34 -> 12.34
else
tmp.Add(dec_dlm);
break;
case Raw_tid_grp: {
if (tid == Tid_raw) {} // never add grp_sep for raw
else // add raw grp_spr
tmp.Add_mid(src, i, dlm_match_pos);
break;
}
}
i = dlm_match_pos - 1; // NOTE: handle multi-byte delims
}
}
return tmp.To_bry_and_clear();
}
public byte[] Fmt(int val) {return Fmt(Bry_.new_a7(Int_.To_str(val)));}
public byte[] Fmt(byte[] src) { // SEE: DOC_1:Fmt
int src_len = src.length;
int num_bgn = -1, dec_pos = -1;
for (int i = 0; i < src_len; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
if (dec_pos == -1) { // no decimal seen
if (num_bgn == -1) // num_bgn hasn't started
num_bgn = i; // set num_bgn
}
else // decimal seen; add rest of src literally
tmp.Add_byte(b);
break;
default: // non-number; includes alpha chars, as well as ".", "," and other potential separators
if (num_bgn != -1) { // number started; format group; EX: 1234. -> 1,234.
Gfo_num_fmt_wkr wkr = Get_or_new(i - num_bgn);
wkr.Fmt(src, num_bgn, i, tmp);
num_bgn = dec_pos = -1; // reset vars
if (b == Byte_ascii.Dot // current char is "."; NOTE: all languages treat "." as decimal separator for parse; EX: for de, "1.23" is "1,23" DATE:2013-10-21
//|| Bry_.Has_at_bgn(src, dec_dlm, i, src_len)
) { // current char is languages's decimal delimiter; note this can be "," or any other multi-byte separator
dec_pos = i;
// i += dec_dlm.length - 1;
tmp.Add(dec_dlm);
continue;
}
}
if (b == Byte_ascii.Comma)
tmp.Add(grp_dlm);
else
tmp.Add_byte(b);
break;
}
}
if (num_bgn != -1) { // digits left unprocessed
Gfo_num_fmt_wkr wkr = Get_or_new(src_len - num_bgn);
wkr.Fmt(src, num_bgn, src_len, tmp);
}
return tmp.To_bry_and_clear();
}
private Gfo_num_fmt_wkr Get_or_new(int src_len) {
Gfo_num_fmt_wkr rv = null;
if (src_len < cache_len) {
rv = cache[src_len];
if (rv != null) return rv;
}
rv = new Gfo_num_fmt_wkr(grp_ary, grp_ary_len, src_len);
if (src_len < cache_len) cache[src_len] = rv;
return rv;
}
public Xol_num_grp Grps_get_last() {return grp_ary[grp_ary_len - 1];}
public Xol_num_grp Grps_get(int i) {return grp_ary[i];}
public int Grps_len() {return grp_ary_len;}
public void Grps_add(Xol_num_grp dat_itm) {
standard = false;
this.grp_ary = (Xol_num_grp[])Array_.Resize(grp_ary, grp_ary_len + 1);
grp_ary[grp_ary_len] = dat_itm;
grp_ary_len = grp_ary.length;
for (int i = 0; i < grp_ary_len; i++) {
Xol_num_grp itm = grp_ary[i];
byte[] itm_dlm = itm.Dlm();
Object o = dlm_trie.Match_exact(itm_dlm, 0, itm_dlm.length); // check for existing Object
if (o == null) {
dlm_trie.Add_bry_byte(itm_dlm, Raw_tid_grp);
grp_dlm = itm_dlm;
}
}
}
public Xol_num_fmtr_base Clear() {
this.grp_ary = Xol_num_grp.Ary_empty;
grp_ary_len = 0;
cache = new Gfo_num_fmt_wkr[cache_len];
dlm_trie.Clear();
return this;
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_dec_dlm_)) this.Dec_dlm_(m.ReadBry("v")); // NOTE: must call mutator
else if (ctx.Match(k, Invk_clear)) this.Clear();
else if (ctx.Match(k, Invk_grps_add)) this.Grps_add(new Xol_num_grp(m.ReadBry("dlm"), m.ReadInt("digits"), m.ReadYn("repeat")));
else return Gfo_invk_.Rv_unhandled;
return this;
}
public static final String Invk_dec_dlm_ = "dec_dlm_", Invk_clear = "clear", Invk_grps_add = "grps_add";
private static final byte Raw_tid_dec = 0, Raw_tid_grp = 1;
private static final byte[] Dec_dlm_default = new byte[] {Byte_ascii.Dot};
public static final byte[] Grp_dlm_default = new byte[] {Byte_ascii.Comma};
public static final byte Tid_format = 0, Tid_raw = 1, Tid_nosep = 2;
}
class Gfo_num_fmt_wkr {
public void Fmt(byte[] src, int bgn, int end, Bry_bfr bb) {
if (itm_max == 0) {bb.Add_mid(src, bgn, end); return;}; // NOTE: small numbers (<=3) will have a 0-len ary
int cur_idx = itm_max - 1;
Gfo_num_fmt_bldr cur = itm_ary[cur_idx];
int cur_pos = cur.Pos();
for (int i = bgn; i < end; i++) {
if (i == cur_pos + bgn) {
cur.Gen(bb);
if (cur_idx > 0) cur = itm_ary[--cur_idx];
cur_pos = cur.Pos();
}
bb.Add_byte(src[i]);
}
}
public Gfo_num_fmt_wkr(Xol_num_grp[] grp_ary, int grp_ary_len, int src_len) {
itm_ary = new Gfo_num_fmt_bldr[src_len]; // default to src_len; will resize below;
int src_pos = src_len, dat_idx = 0, dat_repeat = -1;
while (true) {
if (dat_idx == grp_ary_len) dat_idx = dat_repeat; // no more itms left; return to repeat
Xol_num_grp dat = grp_ary[dat_idx];
src_pos -= dat.Digits();
if (src_pos < 1) break; // no more digits needed; stop
byte[] dat_dlm = dat.Dlm();
itm_ary[itm_max++] = dat_dlm.length == 1 ? new Gfo_num_fmt_bldr_one(src_pos, dat_dlm[0]) : (Gfo_num_fmt_bldr)new Gfo_num_fmt_bldr_many(src_pos, dat_dlm);
if (dat.Repeat() && dat_repeat == -1) dat_repeat = dat_idx;
++dat_idx;
}
itm_ary = (Gfo_num_fmt_bldr[])Array_.Resize(itm_ary, itm_max);
}
private Gfo_num_fmt_bldr[] itm_ary; private int itm_max;
}
interface Gfo_num_fmt_bldr {
int Pos();
void Gen(Bry_bfr bb);
}
class Gfo_num_fmt_bldr_one implements Gfo_num_fmt_bldr {
public int Pos() {return pos;} private int pos;
public void Gen(Bry_bfr bb) {bb.Add_byte(b);}
public Gfo_num_fmt_bldr_one(int pos, byte b) {this.pos = pos; this.b = b;} private byte b;
}
class Gfo_num_fmt_bldr_many implements Gfo_num_fmt_bldr {
public int Pos() {return pos;} private int pos;
public void Gen(Bry_bfr bb) {bb.Add(ary);}
public Gfo_num_fmt_bldr_many(int pos, byte[] ary) {this.pos = pos; this.ary = ary;} private byte[] ary;
}
/*
DOC_1:Fmt
. mediawiki does the following (from Language.php|commafy
.. split the number by digitGoupingPattern: ###,###,### -> 3,3,3
.. use regx to search for number groups
.. for each number group, format with "," and "."
.. replace final result with languages's decimal / grouping entry from separatorTransformTable
. XOWA does the following
.. iterate over bytes until non-number reached
.. take all seen numbers and format according to lang
*/

View File

@@ -0,0 +1,118 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import org.junit.*;
public class Xol_num_fmtr_base_tst {
Xol_num_fmtr_base mgr = new Xol_num_fmtr_base();
@Before public void init() {mgr.Clear();}
@Test public void Outliers() {
ini_(".", dat_(",", 3));
tst_Fmt("1234a1234" , "1,234a1,234");
tst_Fmt("1234abc1234" , "1,234abc1,234");
tst_Fmt("1234,1234" , "1,234,1,234");
tst_Fmt("1234.1234" , "1,234.1234");
tst_Fmt("1234." , "1,234.");
tst_Fmt("1234.1234.1234.1234" , "1,234.1234.1234.1234");
tst_Fmt("-1234567" , "-1,234,567");
tst_Fmt("1,234,567" , "1,234,567");
}
@Test public void English() {
ini_(".", dat_(",", 3));
tst_Fmt("123" , "123");
tst_Fmt("1234" , "1,234");
tst_Fmt("12345678" , "12,345,678");
tst_Fmt("12345678901234567890" , "12,345,678,901,234,567,890");
tst_Raw("1,234.12" , "1234.12");
}
@Test public void French() {
ini_(",", dat_(" ", 3));
tst_Fmt("123" , "123");
tst_Fmt("1234" , "1 234");
tst_Fmt("12345678" , "12 345 678");
tst_Fmt("12345678901234567890" , "12 345 678 901 234 567 890");
tst_Fmt("1234,5678" , "1 234 5 678"); // NOTE: nbsp here; also, nbsp is repeated. see dewiki and {{formatnum:1234,56}}
}
@Test public void Croatia() {
ini_(",", dat_(".", 3), dat_(",", 3));
tst_Fmt("123" , "123");
tst_Fmt("1234" , "1.234");
tst_Fmt("12345678" , "12,345.678");
tst_Fmt("12345678901234567890" , "12,345.678,901.234,567.890");
}
@Test public void Mexico() {
ini_(".", dat_(",", 3, false), dat_("'", 3, false), dat_(",", 3));
tst_Fmt("123" , "123");
tst_Fmt("1234" , "1,234");
tst_Fmt("12345678" , "12'345,678");
tst_Fmt("12345678901234567890" , "12,345,678,901,234'567,890");
tst_Raw("12'345,678.90" , "12345678.90");
}
@Test public void China() {
ini_(".", dat_(",", 4));
tst_Fmt("123" , "123");
tst_Fmt("1234" , "1234");
tst_Fmt("12345678" , "1234,5678");
tst_Fmt("12345678901234567890" , "1234,5678,9012,3456,7890");
}
@Test public void Hindi() {
ini_(".", dat_(",", 3, false), dat_(",", 2));
tst_Fmt("123" , "123");
tst_Fmt("1234" , "1,234");
tst_Fmt("12345678" , "1,23,45,678");
tst_Fmt("12345678901234567890" , "1,23,45,67,89,01,23,45,67,890");
}
@Test public void India() {
ini_(".", dat_(",", 3), dat_(",", 2), dat_(",", 2));
tst_Fmt("123" , "123");
tst_Fmt("1234" , "1,234");
tst_Fmt("12345678" , "1,23,45,678");
tst_Fmt("12345678901234567890" , "1,23,456,78,90,123,45,67,890");
}
@Test public void MiddleDot() {
ini_("·", dat_("·", 3));
tst_Fmt("123" , "123");
tst_Fmt("1234" , "1·234");
tst_Fmt("12345678" , "12·345·678");
tst_Fmt("12345678901234567890" , "12·345·678·901·234·567·890");
tst_Fmt("1234·5678" , "1·234·5·678");// NOTE: middle-dot is repeated. see dewiki and {{formatnum:1234,5678}}
tst_Raw("1234·5678" , "1234.5678");
}
Xol_num_grp dat_(String dlm, int digits) {return new Xol_num_grp(Bry_.new_u8(dlm), digits, true);}
Xol_num_grp dat_(String dlm, int digits, boolean repeat) {return new Xol_num_grp(Bry_.new_u8(dlm), digits, repeat);}
private void tst_Fmt(String val, String expd) {Tfds.Eq(expd, String_.new_u8(mgr.Fmt(Bry_.new_u8(val))));}
private void tst_Raw(String val, String expd) {Tfds.Eq(expd, String_.new_u8(mgr.Raw(Xol_num_fmtr_base.Tid_raw, Bry_.new_u8(val))));}
private void ini_(String dec_dlm, Xol_num_grp... ary) {
mgr.Dec_dlm_(Bry_.new_u8(dec_dlm));
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++)
mgr.Grps_add(ary[i]);
}
}
/*
'france' ' 3#' ',0%' // 1 234 567,89
'spain' '.3#' "'0%" // 1.234.567'89
'germany' '.3#' ",0%" // 1.234.567,89
'italy' ''3#' ",0%" // 1'234'567,89
'en-us' ',3#' '.0%' // 1,234,567.89
'en-sa' ',3#' '\u00120%' // 1,234,567·89
'croatia' ',3#*' '.3#*' ',0%' // 1,234.567,890.123,45
'china' ',4$' // 123,4567.89
'mexico' ',3#*' "'3#" ',3#' // 1'234,567.89
'hindi' ",2#*" ',3#' // 1,23,45,678.9
'india' ',2#*' ',2#*' ',3#*' // 1,245,67,89,012
*/

View File

@@ -0,0 +1,26 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
public class Xol_num_grp {
public Xol_num_grp(byte[] dlm, int digits, boolean repeat) {this.dlm = dlm; this.digits = digits; this.repeat = repeat;}
public byte[] Dlm() {return dlm;} private byte[] dlm;
public int Digits() {return digits;} private int digits;
public boolean Repeat() {return repeat;} private boolean repeat;
public static final Xol_num_grp[] Ary_empty = new Xol_num_grp[0];
public static final Xol_num_grp Default = new Xol_num_grp(new byte[] {Byte_ascii.Comma}, 3, true);
}

View File

@@ -0,0 +1,81 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
public class Xol_num_grp_fmtr {
public boolean Mode_is_regx() {return digit_grouping_pattern == null || Bry_.Eq(digit_grouping_pattern, Digit_grouping_pattern_normal);}
public byte[] Digit_grouping_pattern() {return digit_grouping_pattern;} public void Digit_grouping_pattern_(byte[] v) {digit_grouping_pattern = v;} private byte[] digit_grouping_pattern;
public void Clear() {digit_grouping_pattern = null;}
public byte[] Fmt_regx(Bry_bfr bfr, byte[] src) {// NOTE: specific code to handle preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $number ) ) );"; DATE:2014-04-15
int src_len = src.length;
int bgn = 0;
int pos = bgn;
boolean dirty = false;
int grp_len = 3;
while (true) {
if (pos == src_len) break;
byte b = src[pos];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9: {
int num_end = Bry_find_.Find_fwd_while_num(src, pos, src_len);
int num_len = num_end - pos;
if (num_len > grp_len) {
if (!dirty) {
bfr.Add_mid(src, bgn, pos);
dirty = true;
}
Fmt_grp(bfr, src, pos, num_end, num_len, grp_len);
}
else {
if (dirty)
bfr.Add_mid(src, pos, num_end);
}
pos = num_end;
break;
}
case Byte_ascii.Dot: {
int num_end = Bry_find_.Find_fwd_while_num(src, pos + 1, src_len); // +1 to skip dot
if (dirty)
bfr.Add_mid(src, pos, num_end);
pos = num_end;
break;
}
default:
if (dirty)
bfr.Add_byte(b);
++pos;
break;
}
}
return dirty ? bfr.To_bry_and_clear() : src;
}
private void Fmt_grp(Bry_bfr bfr, byte[] src, int bgn, int end, int len, int grp_len) {
int seg_0 = bgn + (len % grp_len); // 5 digit number will have seg_0 of 2; 12345 -> 12,345
for (int i = bgn; i < end; i++) {
if ( i != bgn // never format at bgn; necessary for even multiples of grp_len (6, 9)
&& ( i == seg_0 // seg_0
|| (i - seg_0) % grp_len == 0 // seg_n
)
) {
bfr.Add_byte(Byte_ascii.Comma); // MW: hard-coded
}
bfr.Add_byte(src[i]);
}
}
private static final byte[] Digit_grouping_pattern_normal = Bry_.new_a7("###,###,###");
}

View File

@@ -0,0 +1,54 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import org.junit.*;
public class Xol_num_grp_fmtr_tst {
@Before public void init() {fxt.Reset();} private Xol_num_grp_fmtr_fxt fxt = new Xol_num_grp_fmtr_fxt();
@Test public void Num() {
fxt.Test_fmt_regx("" , "");
fxt.Test_fmt_regx("1" , "1");
fxt.Test_fmt_regx("12" , "12");
fxt.Test_fmt_regx("123" , "123");
fxt.Test_fmt_regx("1234" , "1,234");
fxt.Test_fmt_regx("12345" , "12,345");
fxt.Test_fmt_regx("123456" , "123,456");
fxt.Test_fmt_regx("1234567" , "1,234,567");
fxt.Test_fmt_regx("1234567890" , "1,234,567,890");
}
@Test public void Dec() {
fxt.Test_fmt_regx("1.9876" , "1.9876");
fxt.Test_fmt_regx("1234.9876" , "1,234.9876");
}
@Test public void Neg() {
fxt.Test_fmt_regx("-1234.5678" , "-1,234.5678");
}
@Test public void Char() {
fxt.Test_fmt_regx("1,234" , "1,234");
fxt.Test_fmt_regx("1a2345" , "1a2,345");
fxt.Test_fmt_regx("1234a5678b2345c.3456d7890e3210.f5432", "1,234a5,678b2,345c.3456d7,890e3,210.f5,432");
}
}
class Xol_num_grp_fmtr_fxt {
private Xol_num_grp_fmtr grouper = new Xol_num_grp_fmtr();
private Bry_bfr bfr = Bry_bfr_.New();
public void Reset() {}
public void Test_fmt_regx(String raw, String expd) {
byte[] actl = grouper.Fmt_regx(bfr, Bry_.new_a7(raw));
Tfds.Eq(expd, String_.new_u8(actl));
}
}

View File

@@ -0,0 +1,75 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
public class Xol_num_mgr implements Gfo_invk {
private boolean digits_translate;
protected Bry_bfr tmp_bfr = Bry_bfr_.Reset(32);
private static final byte[] Comma_bry = Bry_.new_a7(",");
public Xol_num_grp_fmtr Num_grp_fmtr() {return num_grp_fmtr;} private Xol_num_grp_fmtr num_grp_fmtr = new Xol_num_grp_fmtr();
public Xol_transform_mgr Separators_mgr() {return separators_mgr;} private Xol_transform_mgr separators_mgr = new Xol_transform_mgr();
public Xol_transform_mgr Digits_mgr() {return digits_mgr;} private Xol_transform_mgr digits_mgr = new Xol_transform_mgr();
public byte[] Raw(byte[] num) {
if (digits_translate)
num = digits_mgr.Replace(tmp_bfr, num, false);
num = separators_mgr.Replace(tmp_bfr, num, false);
num = Bry_.Replace_safe(tmp_bfr, num, Comma_bry, Bry_.Empty);
return num;
}
public byte[] Format_num_no_separators(byte[] num) {return Format_num(num, true);}
public byte[] Format_num_by_long(long val) {return Format_num(Bry_.new_a7(Long_.To_str(val)));}
public byte[] Format_num_by_decimal(Decimal_adp val){return Format_num(Bry_.new_a7(val.To_str()));}
public byte[] Format_num(int val) {return Format_num(Bry_.new_a7(Int_.To_str(val)));}
public byte[] Format_num(byte[] num) {return Format_num(num, false);}
public byte[] Format_num(byte[] num, boolean skip_commafy) {
if (!skip_commafy) {
num = Commafy(num);
num = separators_mgr.Replace(tmp_bfr, num, true);
}
if (digits_translate)
num = digits_mgr.Replace(tmp_bfr, num, true);
return num;
}
@gplx.Virtual public byte[] Commafy(byte[] num_bry) {
if (num_bry == null) return Bry_.Empty; // MW: if ( $number === null ) return '';
if (num_grp_fmtr.Mode_is_regx())
return num_grp_fmtr.Fmt_regx(tmp_bfr, num_bry);
else // NOTE: for now, return same as ###,###,###; only affects 12 languages; current implementation is bad; https://bugzilla.wikimedia.org/show_bug.cgi?id=63977
return num_grp_fmtr.Fmt_regx(tmp_bfr, num_bry);
}
public Xol_num_mgr Clear() {
digits_mgr.Clear();
separators_mgr.Clear();
num_grp_fmtr.Clear();
return this;
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_clear)) this.Clear();
else if (ctx.Match(k, Invk_separators)) return separators_mgr;
else if (ctx.Match(k, Invk_digits)) {digits_translate = true; return digits_mgr;} // NOTE: only langes with a digit_transform_table will call digits; DATE:2014-05-28
else if (ctx.Match(k, Invk_digit_grouping_pattern)) return String_.new_u8(num_grp_fmtr.Digit_grouping_pattern());
else if (ctx.Match(k, Invk_digit_grouping_pattern_)) num_grp_fmtr.Digit_grouping_pattern_(m.ReadBry("v"));
else return Gfo_invk_.Rv_unhandled;
return this;
}
public static final String Invk_clear = "clear", Invk_separators = "separators"
, Invk_digits = "digits", Invk_digit_grouping_pattern = "digit_grouping_pattern", Invk_digit_grouping_pattern_ = "digit_grouping_pattern_";
public static final byte[]
Separators_key__grp = new byte[]{Byte_ascii.Comma}
, Separators_key__dec = new byte[]{Byte_ascii.Dot}
;
}

View File

@@ -0,0 +1,40 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
public class Xol_num_mgr_ {
public static Xol_num_mgr new_by_lang_id(int lang_id) {
switch (lang_id) {
case Xol_lang_stub_.Id_be_tarask:
case Xol_lang_stub_.Id_bg:
case Xol_lang_stub_.Id_ru:
case Xol_lang_stub_.Id_pl:
case Xol_lang_stub_.Id_uk:
case Xol_lang_stub_.Id_es:
case Xol_lang_stub_.Id_et:
case Xol_lang_stub_.Id_hy:
case Xol_lang_stub_.Id_kaa:
case Xol_lang_stub_.Id_kk_cyrl:
case Xol_lang_stub_.Id_ksh:
// case Xol_lang_stub_.Id_ku_ku:
return new Xol_num_mgr__commafy_5();
case Xol_lang_stub_.Id_km:
case Xol_lang_stub_.Id_my: return new Xol_num_mgr__noop();
default: return new Xol_num_mgr();
}
}
}

View File

@@ -0,0 +1,44 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
class Xol_num_mgr__commafy_5 extends Xol_num_mgr { @Override public byte[] Commafy(byte[] num) {
if (Bry_.Len_eq_0(num)) return num; // bounds check
int num_len = num.length;
int num_bgn = 0;
byte b = num[num_bgn];
if (b == Byte_ascii.Dash) {
if (num_len == 1) return num; // bounds check
b = num[++num_bgn]; // skip negative sign
}
if (Byte_ascii.Is_num(b)) { // check for preg_match( '/^-?\d{1,4}(\.\d+)?$/', $_ )
int num_end = Bry_find_.Find_fwd_while_num(num, num_bgn, num_len);
if (num_end - num_bgn < 5) { // 1-4 digits
if (num_end == num_len) return num; // no decimal; exit
b = num[num_end];
if ( b == Byte_ascii.Dot
&& num_end != num_len - 1) { // if dot at end, then no match on above regx; fall-thru to below
num_end = Bry_find_.Find_fwd_while_num(num, num_end + 1, num_len);
if (num_end == num_len) return num; // only numbers after dot; matches regx;
}
}
}
return this.Num_grp_fmtr().Fmt_regx(tmp_bfr, num); // otherwise do default grouping; '/(\d{3})(?=\d)(?!\d*\.)/', '$1,'
}
}
class Xol_num_mgr__noop extends Xol_num_mgr { @Override public byte[] Commafy(byte[] num) {return num;}
}

View File

@@ -0,0 +1,54 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import gplx.core.btries.*;
public class Xol_transform_mgr implements Gfo_invk {
private Btrie_fast_mgr trie_k_to_v = Btrie_fast_mgr.cs();
private Btrie_fast_mgr trie_v_to_k = Btrie_fast_mgr.cs();
private Ordered_hash hash = Ordered_hash_.New_bry();
private boolean empty = true;
public void Clear() {hash.Clear(); trie_k_to_v.Clear(); trie_v_to_k.Clear(); empty = true;}
public int Len() {return hash.Count();}
public Keyval Get_at(int i) {return (Keyval)hash.Get_at(i);}
public byte[] Get_val_or_self(byte[] k) { // NOTE: return self; note that MW defaults "." and "," to self, even though MessagesLa.php only specifies ","; i.e.: always return something for "."; DATE:2014-05-13
Keyval kv = (Keyval)hash.Get_by(k);
return kv == null ? k : (byte[])kv.Val();
}
public Xol_transform_mgr Set(byte[] k, byte[] v) {
trie_k_to_v.Add(k, v);
trie_v_to_k.Add(v, k);
Keyval kv = Keyval_.new_(String_.new_u8(k), v);
hash.Del(k);
hash.Add(k, kv);
empty = false;
return this;
}
public byte[] Replace(Bry_bfr tmp_bfr, byte[] src, boolean k_to_v) {
if (empty || src == null) return src;
int src_len = src.length; if (src_len == 0) return src;
Btrie_fast_mgr trie = k_to_v ? trie_k_to_v : trie_v_to_k;
return trie.Replace(tmp_bfr, src, 0, src_len);
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_set)) Set(m.ReadBry("k"), m.ReadBry("v"));
else if (ctx.Match(k, Invk_clear)) Clear();
else return Gfo_invk_.Rv_unhandled;
return this;
}
public static final String Invk_set = "set", Invk_clear = "clear";
}