mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.7.2.1
This commit is contained in:
204
400_xowa/src/gplx/xowa/langs/numbers/Xol_num_fmtr_base.java
Normal file
204
400_xowa/src/gplx/xowa/langs/numbers/Xol_num_fmtr_base.java
Normal file
@@ -0,0 +1,204 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*;
|
||||
public class Xol_num_fmtr_base implements GfoInvkAble {
|
||||
private Btrie_fast_mgr dlm_trie = Btrie_fast_mgr.cs_();
|
||||
private Xol_num_grp[] grp_ary = Xol_num_grp.Ary_empty; int grp_ary_len;
|
||||
private Gfo_num_fmt_wkr[] cache; int cache_len = 16;
|
||||
private Bry_bfr tmp = Bry_bfr.new_();
|
||||
public boolean Standard() {return standard;} private boolean standard = true;
|
||||
public byte[] Dec_dlm() {return dec_dlm;} public Xol_num_fmtr_base Dec_dlm_(byte[] v) {this.dec_dlm = v; dlm_trie.Add_bry_bval(v, Raw_tid_dec); return this;} private byte[] dec_dlm = Dec_dlm_default;
|
||||
private byte[] grp_dlm;
|
||||
public byte[] Raw(byte tid, byte[] src) {
|
||||
int src_len = src.length;
|
||||
for (int i = 0; i < src_len; i++) {
|
||||
byte b = src[i];
|
||||
Object o = dlm_trie.Match_bgn(src, i, src_len);
|
||||
if (o == null)
|
||||
tmp.Add_byte(b);
|
||||
else {
|
||||
byte dlm_tid = ((Byte_obj_val)o).Val();
|
||||
int dlm_match_pos = dlm_trie.Match_pos();
|
||||
switch (dlm_tid) {
|
||||
case Raw_tid_dec:
|
||||
if (tid == Tid_raw)
|
||||
tmp.Add_byte(Byte_ascii.Dot); // NOTE: dec_dlm is always outputted as dot, not regional dec_spr; EX: for dewiki, 12,34 -> 12.34
|
||||
else
|
||||
tmp.Add(dec_dlm);
|
||||
break;
|
||||
case Raw_tid_grp: {
|
||||
if (tid == Tid_raw) {} // never add grp_sep for raw
|
||||
else // add raw grp_spr
|
||||
tmp.Add_mid(src, i, dlm_match_pos);
|
||||
break;
|
||||
}
|
||||
}
|
||||
i = dlm_match_pos - 1; // NOTE: handle multi-byte delims
|
||||
}
|
||||
}
|
||||
return tmp.Xto_bry_and_clear();
|
||||
}
|
||||
public byte[] Fmt(int val) {return Fmt(Bry_.new_a7(Int_.Xto_str(val)));}
|
||||
public byte[] Fmt(byte[] src) { // SEE: DOC_1:Fmt
|
||||
int src_len = src.length;
|
||||
int num_bgn = -1, dec_pos = -1;
|
||||
for (int i = 0; i < src_len; i++) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
if (dec_pos == -1) { // no decimal seen
|
||||
if (num_bgn == -1) // num_bgn hasn't started
|
||||
num_bgn = i; // set num_bgn
|
||||
}
|
||||
else // decimal seen; add rest of src literally
|
||||
tmp.Add_byte(b);
|
||||
break;
|
||||
default: // non-number; includes alpha chars, as well as ".", "," and other potential separators
|
||||
if (num_bgn != -1) { // number started; format group; EX: 1234. -> 1,234.
|
||||
Gfo_num_fmt_wkr wkr = Get_or_new(i - num_bgn);
|
||||
wkr.Fmt(src, num_bgn, i, tmp);
|
||||
num_bgn = dec_pos = -1; // reset vars
|
||||
if (b == Byte_ascii.Dot // current char is "."; NOTE: all languages treat "." as decimal separator for parse; EX: for de, "1.23" is "1,23" DATE:2013-10-21
|
||||
//|| Bry_.Has_at_bgn(src, dec_dlm, i, src_len)
|
||||
) { // current char is languages's decimal delimiter; note this can be "," or any other multi-byte separator
|
||||
dec_pos = i;
|
||||
// i += dec_dlm.length - 1;
|
||||
tmp.Add(dec_dlm);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (b == Byte_ascii.Comma)
|
||||
tmp.Add(grp_dlm);
|
||||
else
|
||||
tmp.Add_byte(b);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (num_bgn != -1) { // digits left unprocessed
|
||||
Gfo_num_fmt_wkr wkr = Get_or_new(src_len - num_bgn);
|
||||
wkr.Fmt(src, num_bgn, src_len, tmp);
|
||||
}
|
||||
return tmp.Xto_bry_and_clear();
|
||||
}
|
||||
private Gfo_num_fmt_wkr Get_or_new(int src_len) {
|
||||
Gfo_num_fmt_wkr rv = null;
|
||||
if (src_len < cache_len) {
|
||||
rv = cache[src_len];
|
||||
if (rv != null) return rv;
|
||||
}
|
||||
rv = new Gfo_num_fmt_wkr(grp_ary, grp_ary_len, src_len);
|
||||
if (src_len < cache_len) cache[src_len] = rv;
|
||||
return rv;
|
||||
}
|
||||
public Xol_num_grp Grps_get_last() {return grp_ary[grp_ary_len - 1];}
|
||||
public Xol_num_grp Grps_get(int i) {return grp_ary[i];}
|
||||
public int Grps_len() {return grp_ary_len;}
|
||||
public void Grps_add(Xol_num_grp dat_itm) {
|
||||
standard = false;
|
||||
this.grp_ary = (Xol_num_grp[])Array_.Resize(grp_ary, grp_ary_len + 1);
|
||||
grp_ary[grp_ary_len] = dat_itm;
|
||||
grp_ary_len = grp_ary.length;
|
||||
for (int i = 0; i < grp_ary_len; i++) {
|
||||
Xol_num_grp itm = grp_ary[i];
|
||||
byte[] itm_dlm = itm.Dlm();
|
||||
Object o = dlm_trie.Match_exact(itm_dlm, 0, itm_dlm.length); // check for existing Object
|
||||
if (o == null) {
|
||||
dlm_trie.Add_bry_bval(itm_dlm, Raw_tid_grp);
|
||||
grp_dlm = itm_dlm;
|
||||
}
|
||||
}
|
||||
}
|
||||
public Xol_num_fmtr_base Clear() {
|
||||
this.grp_ary = Xol_num_grp.Ary_empty;
|
||||
grp_ary_len = 0;
|
||||
cache = new Gfo_num_fmt_wkr[cache_len];
|
||||
dlm_trie.Clear();
|
||||
return this;
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_dec_dlm_)) this.Dec_dlm_(m.ReadBry("v")); // NOTE: must call mutator
|
||||
else if (ctx.Match(k, Invk_clear)) this.Clear();
|
||||
else if (ctx.Match(k, Invk_grps_add)) this.Grps_add(new Xol_num_grp(m.ReadBry("dlm"), m.ReadInt("digits"), m.ReadYn("repeat")));
|
||||
else return GfoInvkAble_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
public static final String Invk_dec_dlm_ = "dec_dlm_", Invk_clear = "clear", Invk_grps_add = "grps_add";
|
||||
private static final byte Raw_tid_dec = 0, Raw_tid_grp = 1;
|
||||
private static final byte[] Dec_dlm_default = new byte[] {Byte_ascii.Dot};
|
||||
public static final byte[] Grp_dlm_default = new byte[] {Byte_ascii.Comma};
|
||||
public static final byte Tid_format = 0, Tid_raw = 1, Tid_nosep = 2;
|
||||
}
|
||||
class Gfo_num_fmt_wkr {
|
||||
public void Fmt(byte[] src, int bgn, int end, Bry_bfr bb) {
|
||||
if (itm_max == 0) {bb.Add_mid(src, bgn, end); return;}; // NOTE: small numbers (<=3) will have a 0-len ary
|
||||
int cur_idx = itm_max - 1;
|
||||
Gfo_num_fmt_bldr cur = itm_ary[cur_idx];
|
||||
int cur_pos = cur.Pos();
|
||||
for (int i = bgn; i < end; i++) {
|
||||
if (i == cur_pos + bgn) {
|
||||
cur.Gen(bb);
|
||||
if (cur_idx > 0) cur = itm_ary[--cur_idx];
|
||||
cur_pos = cur.Pos();
|
||||
}
|
||||
bb.Add_byte(src[i]);
|
||||
}
|
||||
}
|
||||
public Gfo_num_fmt_wkr(Xol_num_grp[] grp_ary, int grp_ary_len, int src_len) {
|
||||
itm_ary = new Gfo_num_fmt_bldr[src_len]; // default to src_len; will resize below;
|
||||
int src_pos = src_len, dat_idx = 0, dat_repeat = -1;
|
||||
while (true) {
|
||||
if (dat_idx == grp_ary_len) dat_idx = dat_repeat; // no more itms left; return to repeat
|
||||
Xol_num_grp dat = grp_ary[dat_idx];
|
||||
src_pos -= dat.Digits();
|
||||
if (src_pos < 1) break; // no more digits needed; stop
|
||||
byte[] dat_dlm = dat.Dlm();
|
||||
itm_ary[itm_max++] = dat_dlm.length == 1 ? new Gfo_num_fmt_bldr_one(src_pos, dat_dlm[0]) : (Gfo_num_fmt_bldr)new Gfo_num_fmt_bldr_many(src_pos, dat_dlm);
|
||||
if (dat.Repeat() && dat_repeat == -1) dat_repeat = dat_idx;
|
||||
++dat_idx;
|
||||
}
|
||||
itm_ary = (Gfo_num_fmt_bldr[])Array_.Resize(itm_ary, itm_max);
|
||||
}
|
||||
private Gfo_num_fmt_bldr[] itm_ary; private int itm_max;
|
||||
}
|
||||
interface Gfo_num_fmt_bldr {
|
||||
int Pos();
|
||||
void Gen(Bry_bfr bb);
|
||||
}
|
||||
class Gfo_num_fmt_bldr_one implements Gfo_num_fmt_bldr {
|
||||
public int Pos() {return pos;} private int pos;
|
||||
public void Gen(Bry_bfr bb) {bb.Add_byte(b);}
|
||||
public Gfo_num_fmt_bldr_one(int pos, byte b) {this.pos = pos; this.b = b;} private byte b;
|
||||
}
|
||||
class Gfo_num_fmt_bldr_many implements Gfo_num_fmt_bldr {
|
||||
public int Pos() {return pos;} private int pos;
|
||||
public void Gen(Bry_bfr bb) {bb.Add(ary);}
|
||||
public Gfo_num_fmt_bldr_many(int pos, byte[] ary) {this.pos = pos; this.ary = ary;} private byte[] ary;
|
||||
}
|
||||
/*
|
||||
DOC_1:Fmt
|
||||
. mediawiki does the following (from Language.php|commafy
|
||||
.. split the number by digitGoupingPattern: ###,###,### -> 3,3,3
|
||||
.. use regx to search for number groups
|
||||
.. for each number group, format with "," and "."
|
||||
.. replace final result with languages's decimal / grouping entry from separatorTransformTable
|
||||
. XOWA does the following
|
||||
.. iterate over bytes until non-number reached
|
||||
.. take all seen numbers and format according to lang
|
||||
*/
|
||||
118
400_xowa/src/gplx/xowa/langs/numbers/Xol_num_fmtr_base_tst.java
Normal file
118
400_xowa/src/gplx/xowa/langs/numbers/Xol_num_fmtr_base_tst.java
Normal file
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import org.junit.*;
|
||||
public class Xol_num_fmtr_base_tst {
|
||||
Xol_num_fmtr_base mgr = new Xol_num_fmtr_base();
|
||||
@Before public void init() {mgr.Clear();}
|
||||
@Test public void Outliers() {
|
||||
ini_(".", dat_(",", 3));
|
||||
tst_Fmt("1234a1234" , "1,234a1,234");
|
||||
tst_Fmt("1234abc1234" , "1,234abc1,234");
|
||||
tst_Fmt("1234,1234" , "1,234,1,234");
|
||||
tst_Fmt("1234.1234" , "1,234.1234");
|
||||
tst_Fmt("1234." , "1,234.");
|
||||
tst_Fmt("1234.1234.1234.1234" , "1,234.1234.1234.1234");
|
||||
tst_Fmt("-1234567" , "-1,234,567");
|
||||
tst_Fmt("1,234,567" , "1,234,567");
|
||||
}
|
||||
@Test public void English() {
|
||||
ini_(".", dat_(",", 3));
|
||||
tst_Fmt("123" , "123");
|
||||
tst_Fmt("1234" , "1,234");
|
||||
tst_Fmt("12345678" , "12,345,678");
|
||||
tst_Fmt("12345678901234567890" , "12,345,678,901,234,567,890");
|
||||
tst_Raw("1,234.12" , "1234.12");
|
||||
}
|
||||
@Test public void French() {
|
||||
ini_(",", dat_(" ", 3));
|
||||
tst_Fmt("123" , "123");
|
||||
tst_Fmt("1234" , "1 234");
|
||||
tst_Fmt("12345678" , "12 345 678");
|
||||
tst_Fmt("12345678901234567890" , "12 345 678 901 234 567 890");
|
||||
tst_Fmt("1234,5678" , "1 234 5 678"); // NOTE: nbsp here; also, nbsp is repeated. see dewiki and {{formatnum:1234,56}}
|
||||
}
|
||||
@Test public void Croatia() {
|
||||
ini_(",", dat_(".", 3), dat_(",", 3));
|
||||
tst_Fmt("123" , "123");
|
||||
tst_Fmt("1234" , "1.234");
|
||||
tst_Fmt("12345678" , "12,345.678");
|
||||
tst_Fmt("12345678901234567890" , "12,345.678,901.234,567.890");
|
||||
}
|
||||
@Test public void Mexico() {
|
||||
ini_(".", dat_(",", 3, false), dat_("'", 3, false), dat_(",", 3));
|
||||
tst_Fmt("123" , "123");
|
||||
tst_Fmt("1234" , "1,234");
|
||||
tst_Fmt("12345678" , "12'345,678");
|
||||
tst_Fmt("12345678901234567890" , "12,345,678,901,234'567,890");
|
||||
tst_Raw("12'345,678.90" , "12345678.90");
|
||||
}
|
||||
@Test public void China() {
|
||||
ini_(".", dat_(",", 4));
|
||||
tst_Fmt("123" , "123");
|
||||
tst_Fmt("1234" , "1234");
|
||||
tst_Fmt("12345678" , "1234,5678");
|
||||
tst_Fmt("12345678901234567890" , "1234,5678,9012,3456,7890");
|
||||
}
|
||||
@Test public void Hindi() {
|
||||
ini_(".", dat_(",", 3, false), dat_(",", 2));
|
||||
tst_Fmt("123" , "123");
|
||||
tst_Fmt("1234" , "1,234");
|
||||
tst_Fmt("12345678" , "1,23,45,678");
|
||||
tst_Fmt("12345678901234567890" , "1,23,45,67,89,01,23,45,67,890");
|
||||
}
|
||||
@Test public void India() {
|
||||
ini_(".", dat_(",", 3), dat_(",", 2), dat_(",", 2));
|
||||
tst_Fmt("123" , "123");
|
||||
tst_Fmt("1234" , "1,234");
|
||||
tst_Fmt("12345678" , "1,23,45,678");
|
||||
tst_Fmt("12345678901234567890" , "1,23,456,78,90,123,45,67,890");
|
||||
}
|
||||
@Test public void MiddleDot() {
|
||||
ini_("·", dat_("·", 3));
|
||||
tst_Fmt("123" , "123");
|
||||
tst_Fmt("1234" , "1·234");
|
||||
tst_Fmt("12345678" , "12·345·678");
|
||||
tst_Fmt("12345678901234567890" , "12·345·678·901·234·567·890");
|
||||
tst_Fmt("1234·5678" , "1·234·5·678");// NOTE: middle-dot is repeated. see dewiki and {{formatnum:1234,5678}}
|
||||
tst_Raw("1234·5678" , "1234.5678");
|
||||
}
|
||||
Xol_num_grp dat_(String dlm, int digits) {return new Xol_num_grp(Bry_.new_u8(dlm), digits, true);}
|
||||
Xol_num_grp dat_(String dlm, int digits, boolean repeat) {return new Xol_num_grp(Bry_.new_u8(dlm), digits, repeat);}
|
||||
private void tst_Fmt(String val, String expd) {Tfds.Eq(expd, String_.new_u8(mgr.Fmt(Bry_.new_u8(val))));}
|
||||
private void tst_Raw(String val, String expd) {Tfds.Eq(expd, String_.new_u8(mgr.Raw(Xol_num_fmtr_base.Tid_raw, Bry_.new_u8(val))));}
|
||||
private void ini_(String dec_dlm, Xol_num_grp... ary) {
|
||||
mgr.Dec_dlm_(Bry_.new_u8(dec_dlm));
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++)
|
||||
mgr.Grps_add(ary[i]);
|
||||
}
|
||||
}
|
||||
/*
|
||||
'france' ' 3#' ',0%' // 1 234 567,89
|
||||
'spain' '.3#' "'0%" // 1.234.567'89
|
||||
'germany' '.3#' ",0%" // 1.234.567,89
|
||||
'italy' ''3#' ",0%" // 1'234'567,89
|
||||
'en-us' ',3#' '.0%' // 1,234,567.89
|
||||
'en-sa' ',3#' '\u00120%' // 1,234,567·89
|
||||
'croatia' ',3#*' '.3#*' ',0%' // 1,234.567,890.123,45
|
||||
'china' ',4$' // 123,4567.89
|
||||
'mexico' ',3#*' "'3#" ',3#' // 1'234,567.89
|
||||
'hindi' ",2#*" ',3#' // 1,23,45,678.9
|
||||
'india' ',2#*' ',2#*' ',3#*' // 1,245,67,89,012
|
||||
*/
|
||||
26
400_xowa/src/gplx/xowa/langs/numbers/Xol_num_grp.java
Normal file
26
400_xowa/src/gplx/xowa/langs/numbers/Xol_num_grp.java
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
public class Xol_num_grp {
|
||||
public Xol_num_grp(byte[] dlm, int digits, boolean repeat) {this.dlm = dlm; this.digits = digits; this.repeat = repeat;}
|
||||
public byte[] Dlm() {return dlm;} private byte[] dlm;
|
||||
public int Digits() {return digits;} private int digits;
|
||||
public boolean Repeat() {return repeat;} private boolean repeat;
|
||||
public static final Xol_num_grp[] Ary_empty = new Xol_num_grp[0];
|
||||
public static final Xol_num_grp Default = new Xol_num_grp(new byte[] {Byte_ascii.Comma}, 3, true);
|
||||
}
|
||||
81
400_xowa/src/gplx/xowa/langs/numbers/Xol_num_grp_fmtr.java
Normal file
81
400_xowa/src/gplx/xowa/langs/numbers/Xol_num_grp_fmtr.java
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
public class Xol_num_grp_fmtr {
|
||||
public boolean Mode_is_regx() {return digit_grouping_pattern == null || Bry_.Eq(digit_grouping_pattern, Digit_grouping_pattern_normal);}
|
||||
public byte[] Digit_grouping_pattern() {return digit_grouping_pattern;} public void Digit_grouping_pattern_(byte[] v) {digit_grouping_pattern = v;} private byte[] digit_grouping_pattern;
|
||||
public void Clear() {digit_grouping_pattern = null;}
|
||||
public byte[] Fmt_regx(Bry_bfr bfr, byte[] src) {// NOTE: specific code to handle preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $number ) ) );"; DATE:2014-04-15
|
||||
int src_len = src.length;
|
||||
int bgn = 0;
|
||||
int pos = bgn;
|
||||
boolean dirty = false;
|
||||
int grp_len = 3;
|
||||
while (true) {
|
||||
if (pos == src_len) break;
|
||||
byte b = src[pos];
|
||||
switch (b) {
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9: {
|
||||
int num_end = Bry_finder.Find_fwd_while_num(src, pos, src_len);
|
||||
int num_len = num_end - pos;
|
||||
if (num_len > grp_len) {
|
||||
if (!dirty) {
|
||||
bfr.Add_mid(src, bgn, pos);
|
||||
dirty = true;
|
||||
}
|
||||
Fmt_grp(bfr, src, pos, num_end, num_len, grp_len);
|
||||
}
|
||||
else {
|
||||
if (dirty)
|
||||
bfr.Add_mid(src, pos, num_end);
|
||||
}
|
||||
pos = num_end;
|
||||
break;
|
||||
}
|
||||
case Byte_ascii.Dot: {
|
||||
int num_end = Bry_finder.Find_fwd_while_num(src, pos + 1, src_len); // +1 to skip dot
|
||||
if (dirty)
|
||||
bfr.Add_mid(src, pos, num_end);
|
||||
pos = num_end;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
if (dirty)
|
||||
bfr.Add_byte(b);
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return dirty ? bfr.Xto_bry_and_clear() : src;
|
||||
}
|
||||
private void Fmt_grp(Bry_bfr bfr, byte[] src, int bgn, int end, int len, int grp_len) {
|
||||
int seg_0 = bgn + (len % grp_len); // 5 digit number will have seg_0 of 2; 12345 -> 12,345
|
||||
for (int i = bgn; i < end; i++) {
|
||||
if ( i != bgn // never format at bgn; necessary for even multiples of grp_len (6, 9)
|
||||
&& ( i == seg_0 // seg_0
|
||||
|| (i - seg_0) % grp_len == 0 // seg_n
|
||||
)
|
||||
) {
|
||||
bfr.Add_byte(Byte_ascii.Comma); // MW: hard-coded
|
||||
}
|
||||
bfr.Add_byte(src[i]);
|
||||
}
|
||||
}
|
||||
private static final byte[] Digit_grouping_pattern_normal = Bry_.new_a7("###,###,###");
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import org.junit.*;
|
||||
public class Xol_num_grp_fmtr_tst {
|
||||
@Before public void init() {fxt.Reset();} private Xol_num_grp_fmtr_fxt fxt = new Xol_num_grp_fmtr_fxt();
|
||||
@Test public void Num() {
|
||||
fxt.Test_fmt_regx("" , "");
|
||||
fxt.Test_fmt_regx("1" , "1");
|
||||
fxt.Test_fmt_regx("12" , "12");
|
||||
fxt.Test_fmt_regx("123" , "123");
|
||||
fxt.Test_fmt_regx("1234" , "1,234");
|
||||
fxt.Test_fmt_regx("12345" , "12,345");
|
||||
fxt.Test_fmt_regx("123456" , "123,456");
|
||||
fxt.Test_fmt_regx("1234567" , "1,234,567");
|
||||
fxt.Test_fmt_regx("1234567890" , "1,234,567,890");
|
||||
}
|
||||
@Test public void Dec() {
|
||||
fxt.Test_fmt_regx("1.9876" , "1.9876");
|
||||
fxt.Test_fmt_regx("1234.9876" , "1,234.9876");
|
||||
}
|
||||
@Test public void Neg() {
|
||||
fxt.Test_fmt_regx("-1234.5678" , "-1,234.5678");
|
||||
}
|
||||
@Test public void Char() {
|
||||
fxt.Test_fmt_regx("1,234" , "1,234");
|
||||
fxt.Test_fmt_regx("1a2345" , "1a2,345");
|
||||
fxt.Test_fmt_regx("1234a5678b2345c.3456d7890e3210.f5432", "1,234a5,678b2,345c.3456d7,890e3,210.f5,432");
|
||||
}
|
||||
}
|
||||
class Xol_num_grp_fmtr_fxt {
|
||||
private Xol_num_grp_fmtr grouper = new Xol_num_grp_fmtr();
|
||||
private Bry_bfr bfr = Bry_bfr.new_();
|
||||
public void Reset() {}
|
||||
public void Test_fmt_regx(String raw, String expd) {
|
||||
byte[] actl = grouper.Fmt_regx(bfr, Bry_.new_a7(raw));
|
||||
Tfds.Eq(expd, String_.new_u8(actl));
|
||||
}
|
||||
}
|
||||
73
400_xowa/src/gplx/xowa/langs/numbers/Xol_num_mgr.java
Normal file
73
400_xowa/src/gplx/xowa/langs/numbers/Xol_num_mgr.java
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
public class Xol_num_mgr implements GfoInvkAble {
|
||||
private boolean digits_translate;
|
||||
protected Bry_bfr tmp_bfr = Bry_bfr.reset_(32);
|
||||
private static final byte[] Comma_bry = Bry_.new_a7(",");
|
||||
public Xol_num_grp_fmtr Num_grp_fmtr() {return num_grp_fmtr;} private Xol_num_grp_fmtr num_grp_fmtr = new Xol_num_grp_fmtr();
|
||||
public Xol_transform_mgr Separators_mgr() {return separators_mgr;} private Xol_transform_mgr separators_mgr = new Xol_transform_mgr();
|
||||
public Xol_transform_mgr Digits_mgr() {return digits_mgr;} private Xol_transform_mgr digits_mgr = new Xol_transform_mgr();
|
||||
public byte[] Raw(byte[] num) {
|
||||
if (digits_translate)
|
||||
num = digits_mgr.Replace(tmp_bfr, num, false);
|
||||
num = separators_mgr.Replace(tmp_bfr, num, false);
|
||||
num = Bry_.Replace_safe(tmp_bfr, num, Comma_bry, Bry_.Empty);
|
||||
return num;
|
||||
}
|
||||
public byte[] Format_num_no_separators(byte[] num) {return Format_num(num, true);}
|
||||
public byte[] Format_num(int val) {return Format_num(Bry_.new_a7(Int_.Xto_str(val)));}
|
||||
public byte[] Format_num(byte[] num) {return Format_num(num, false);}
|
||||
public byte[] Format_num(byte[] num, boolean skip_commafy) {
|
||||
if (!skip_commafy) {
|
||||
num = Commafy(num);
|
||||
num = separators_mgr.Replace(tmp_bfr, num, true);
|
||||
}
|
||||
if (digits_translate)
|
||||
num = digits_mgr.Replace(tmp_bfr, num, true);
|
||||
return num;
|
||||
}
|
||||
@gplx.Virtual public byte[] Commafy(byte[] num_bry) {
|
||||
if (num_bry == null) return Bry_.Empty; // MW: if ( $number === null ) return '';
|
||||
if (num_grp_fmtr.Mode_is_regx())
|
||||
return num_grp_fmtr.Fmt_regx(tmp_bfr, num_bry);
|
||||
else // NOTE: for now, return same as ###,###,###; only affects 12 languages; current implementation is bad; https://bugzilla.wikimedia.org/show_bug.cgi?id=63977
|
||||
return num_grp_fmtr.Fmt_regx(tmp_bfr, num_bry);
|
||||
}
|
||||
public Xol_num_mgr Clear() {
|
||||
digits_mgr.Clear();
|
||||
separators_mgr.Clear();
|
||||
num_grp_fmtr.Clear();
|
||||
return this;
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_clear)) this.Clear();
|
||||
else if (ctx.Match(k, Invk_separators)) return separators_mgr;
|
||||
else if (ctx.Match(k, Invk_digits)) {digits_translate = true; return digits_mgr;} // NOTE: only langes with a digit_transform_table will call digits; DATE:2014-05-28
|
||||
else if (ctx.Match(k, Invk_digit_grouping_pattern)) return String_.new_u8(num_grp_fmtr.Digit_grouping_pattern());
|
||||
else if (ctx.Match(k, Invk_digit_grouping_pattern_)) num_grp_fmtr.Digit_grouping_pattern_(m.ReadBry("v"));
|
||||
else return GfoInvkAble_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
public static final String Invk_clear = "clear", Invk_separators = "separators"
|
||||
, Invk_digits = "digits", Invk_digit_grouping_pattern = "digit_grouping_pattern", Invk_digit_grouping_pattern_ = "digit_grouping_pattern_";
|
||||
public static final byte[]
|
||||
Separators_key__grp = new byte[]{Byte_ascii.Comma}
|
||||
, Separators_key__dec = new byte[]{Byte_ascii.Dot}
|
||||
;
|
||||
}
|
||||
40
400_xowa/src/gplx/xowa/langs/numbers/Xol_num_mgr_.java
Normal file
40
400_xowa/src/gplx/xowa/langs/numbers/Xol_num_mgr_.java
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
public class Xol_num_mgr_ {
|
||||
public static Xol_num_mgr new_by_lang_id(int lang_id) {
|
||||
switch (lang_id) {
|
||||
case Xol_lang_itm_.Id_be_tarask:
|
||||
case Xol_lang_itm_.Id_bg:
|
||||
case Xol_lang_itm_.Id_ru:
|
||||
case Xol_lang_itm_.Id_pl:
|
||||
case Xol_lang_itm_.Id_uk:
|
||||
case Xol_lang_itm_.Id_es:
|
||||
case Xol_lang_itm_.Id_et:
|
||||
case Xol_lang_itm_.Id_hy:
|
||||
case Xol_lang_itm_.Id_kaa:
|
||||
case Xol_lang_itm_.Id_kk_cyrl:
|
||||
case Xol_lang_itm_.Id_ksh:
|
||||
// case Xol_lang_itm_.Id_ku_ku:
|
||||
return new Xol_num_mgr__commafy_5();
|
||||
case Xol_lang_itm_.Id_km:
|
||||
case Xol_lang_itm_.Id_my: return new Xol_num_mgr__noop();
|
||||
default: return new Xol_num_mgr();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
class Xol_num_mgr__commafy_5 extends Xol_num_mgr { @Override public byte[] Commafy(byte[] num) {
|
||||
if (Bry_.Len_eq_0(num)) return num; // bounds check
|
||||
int num_len = num.length;
|
||||
int num_bgn = 0;
|
||||
byte b = num[num_bgn];
|
||||
if (b == Byte_ascii.Dash) {
|
||||
if (num_len == 1) return num; // bounds check
|
||||
b = num[++num_bgn]; // skip negative sign
|
||||
}
|
||||
if (Byte_ascii.Is_num(b)) { // check for preg_match( '/^-?\d{1,4}(\.\d+)?$/', $_ )
|
||||
int num_end = Bry_finder.Find_fwd_while_num(num, num_bgn, num_len);
|
||||
if (num_end - num_bgn < 5) { // 1-4 digits
|
||||
if (num_end == num_len) return num; // no decimal; exit
|
||||
b = num[num_end];
|
||||
if ( b == Byte_ascii.Dot
|
||||
&& num_end != num_len - 1) { // if dot at end, then no match on above regx; fall-thru to below
|
||||
num_end = Bry_finder.Find_fwd_while_num(num, num_end + 1, num_len);
|
||||
if (num_end == num_len) return num; // only numbers after dot; matches regx;
|
||||
}
|
||||
}
|
||||
}
|
||||
return this.Num_grp_fmtr().Fmt_regx(tmp_bfr, num); // otherwise do default grouping; '/(\d{3})(?=\d)(?!\d*\.)/', '$1,'
|
||||
}
|
||||
}
|
||||
class Xol_num_mgr__noop extends Xol_num_mgr { @Override public byte[] Commafy(byte[] num) {return num;}
|
||||
}
|
||||
54
400_xowa/src/gplx/xowa/langs/numbers/Xol_transform_mgr.java
Normal file
54
400_xowa/src/gplx/xowa/langs/numbers/Xol_transform_mgr.java
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.numbers; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xol_transform_mgr implements GfoInvkAble {
|
||||
private Btrie_fast_mgr trie_k_to_v = Btrie_fast_mgr.cs_();
|
||||
private Btrie_fast_mgr trie_v_to_k = Btrie_fast_mgr.cs_();
|
||||
private Ordered_hash hash = Ordered_hash_.new_bry_();
|
||||
private boolean empty = true;
|
||||
public void Clear() {hash.Clear(); trie_k_to_v.Clear(); trie_v_to_k.Clear(); empty = true;}
|
||||
public int Len() {return hash.Count();}
|
||||
public KeyVal Get_at(int i) {return (KeyVal)hash.Get_at(i);}
|
||||
public byte[] Get_val_or_self(byte[] k) { // NOTE: return self; note that MW defaults "." and "," to self, even though MessagesLa.php only specifies ","; i.e.: always return something for "."; DATE:2014-05-13
|
||||
KeyVal kv = (KeyVal)hash.Get_by(k);
|
||||
return kv == null ? k : (byte[])kv.Val();
|
||||
}
|
||||
public Xol_transform_mgr Set(byte[] k, byte[] v) {
|
||||
trie_k_to_v.Add(k, v);
|
||||
trie_v_to_k.Add(v, k);
|
||||
KeyVal kv = KeyVal_.new_(String_.new_u8(k), v);
|
||||
hash.Del(k);
|
||||
hash.Add(k, kv);
|
||||
empty = false;
|
||||
return this;
|
||||
}
|
||||
public byte[] Replace(Bry_bfr tmp_bfr, byte[] src, boolean k_to_v) {
|
||||
if (empty || src == null) return src;
|
||||
int src_len = src.length; if (src_len == 0) return src;
|
||||
Btrie_fast_mgr trie = k_to_v ? trie_k_to_v : trie_v_to_k;
|
||||
return trie.Replace(tmp_bfr, src, 0, src_len);
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_set)) Set(m.ReadBry("k"), m.ReadBry("v"));
|
||||
else if (ctx.Match(k, Invk_clear)) Clear();
|
||||
else return GfoInvkAble_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
public static final String Invk_set = "set", Invk_clear = "clear";
|
||||
}
|
||||
Reference in New Issue
Block a user