From 21e90cd4791a155575aa8f50932f152f8992b756 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Tue, 8 Nov 2016 10:23:23 -0500 Subject: [PATCH] Wikibase: Parse numbers with fractions correctly; do not convert to zero --- .../gplx/xowa/langs/numbers/Xol_num_mgr.java | 7 ++- .../xtns/wbases/Wdata_prop_val_visitor.java | 56 ++++++++++++++----- .../xowa/xtns/wbases/Wdata_wiki_mgr_tst.java | 14 ++++- .../pfuncs/Wdata_pf_property__basic__tst.java | 5 ++ 4 files changed, 64 insertions(+), 18 deletions(-) diff --git a/400_xowa/src/gplx/xowa/langs/numbers/Xol_num_mgr.java b/400_xowa/src/gplx/xowa/langs/numbers/Xol_num_mgr.java index aa21d5aa7..4f871b781 100644 --- a/400_xowa/src/gplx/xowa/langs/numbers/Xol_num_mgr.java +++ b/400_xowa/src/gplx/xowa/langs/numbers/Xol_num_mgr.java @@ -31,9 +31,10 @@ public class Xol_num_mgr implements Gfo_invk { return num; } public byte[] Format_num_no_separators(byte[] num) {return Format_num(num, true);} - public byte[] Format_num_by_long(long val) {return Format_num(Bry_.new_a7(Long_.To_str(val)));} - public byte[] Format_num(int val) {return Format_num(Bry_.new_a7(Int_.To_str(val)));} - public byte[] Format_num(byte[] num) {return Format_num(num, false);} + public byte[] Format_num_by_long(long val) {return Format_num(Bry_.new_a7(Long_.To_str(val)));} + public byte[] Format_num_by_decimal(Decimal_adp val){return Format_num(Bry_.new_a7(val.To_str()));} + public byte[] Format_num(int val) {return Format_num(Bry_.new_a7(Int_.To_str(val)));} + public byte[] Format_num(byte[] num) {return Format_num(num, false);} public byte[] Format_num(byte[] num, boolean skip_commafy) { if (!skip_commafy) { num = Commafy(num); diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_prop_val_visitor.java b/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_prop_val_visitor.java index 7077765a5..41ef0807d 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_prop_val_visitor.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_prop_val_visitor.java @@ -58,25 +58,25 @@ public class Wdata_prop_val_visitor implements Wbase_claim_visitor { public void Visit_quantity(Wbase_claim_quantity itm) {Write_quantity(bfr, wdata_mgr, lang, itm.Amount(), itm.Lbound(), itm.Ubound(), itm.Unit());} public static void Write_quantity(Bry_bfr bfr, Wdata_wiki_mgr wdata_mgr, Xol_lang_itm lang, byte[] val_bry, byte[] lo_bry, byte[] hi_bry, byte[] unit) { // get val, lo, hi - long val = Bry_.To_long_or(val_bry, Byte_ascii.Comma_bry, 0, val_bry.length, 0); // NOTE: must cast to long for large numbers; EX:{{#property:P1082}} PAGE:en.w:Earth; DATE:2015-08-02 - long lo = Bry_.To_long_or(lo_bry, Byte_ascii.Comma_bry, 0, lo_bry.length, 0); - long hi = Bry_.To_long_or(hi_bry, Byte_ascii.Comma_bry, 0, hi_bry.length, 0); + Decimal_adp val = Decimal_adp_.parse(String_.new_u8(Normalize_for_decimal(val_bry))); // NOTE: must cast to long for large numbers; EX:{{#property:P1082}} PAGE:en.w:Earth; DATE:2015-08-02 + Decimal_adp lo = Decimal_adp_.parse(String_.new_u8(Normalize_for_decimal(lo_bry))); + Decimal_adp hi = Decimal_adp_.parse(String_.new_u8(Normalize_for_decimal(hi_bry))); // fmt val - if (lo == val && hi == val) // lo, hi, val are same; print val only; - bfr.Add(lang.Num_mgr().Format_num_by_long(val)); // amount; EX: 1,234 + if (lo.Eq(hi) && hi.Eq(val))// lo, hi, val are same; print val only; + bfr.Add(lang.Num_mgr().Format_num_by_decimal(val)); // amount; EX: 1,234 else { - long lo_dif = val - lo; - long hi_dif = hi - val; - if (lo_dif == hi_dif) { // lo_dif, hi_dif are same; print val±dif - bfr.Add(lang.Num_mgr().Format_num_by_long(val)); // amount; EX: 1,234 - bfr.Add(Bry__quantity_margin_of_error); // symbol: EX: ± - bfr.Add(lang.Num_mgr().Format_num_by_long(lo_dif)); // amount; EX: 4 + Decimal_adp lo_dif = val.Subtract(lo); + Decimal_adp hi_dif = hi.Subtract(val); + if (lo_dif.Eq(hi_dif)) { // lo_dif, hi_dif are same; print val±dif + bfr.Add(lang.Num_mgr().Format_num_by_decimal(val)); // amount; EX: 1,234 + bfr.Add(Bry__quantity_margin_of_error); // symbol: EX: ± + bfr.Add(lang.Num_mgr().Format_num_by_decimal(lo_dif)); // amount; EX: 4 } else { // lo_dif, hi_dif are diff; print lo - hi; this may not be what MW does - bfr.Add(lang.Num_mgr().Format_num_by_long(lo)); // lo; EX: 1,230 - bfr.Add_byte(Byte_ascii.Dash); // dash: EX: - - bfr.Add(lang.Num_mgr().Format_num_by_long(hi)); // hi; EX: 1,238 + bfr.Add(lang.Num_mgr().Format_num_by_decimal(lo)); // lo; EX: 1,230 + bfr.Add_byte(Byte_ascii.Dash); // dash: EX: - + bfr.Add(lang.Num_mgr().Format_num_by_decimal(hi)); // hi; EX: 1,238 } } @@ -91,6 +91,34 @@ public class Wdata_prop_val_visitor implements Wbase_claim_visitor { bfr.Add(entity_doc.Label_list__get_or_fallback(lang)); } } + public static byte[] Normalize_for_decimal(byte[] bry) { // remove leading "+" and any commas; was Bry_.To_long_or(val_bry, Byte_ascii.Comma_bry, 0, val_bry.length, 0) + Bry_bfr bfr = null; + int len = bry.length; + for (int i = 0; i < len; i++) { + byte b = bry[i]; + switch (b) { + case Byte_ascii.Plus: + if (i == 0) { + if (bfr == null) bfr = Bry_bfr_.New(); + } + else { + throw Err_.new_wo_type("invalid decimal format; plus must be at start of String", "raw", bry); + } + break; + case Byte_ascii.Comma: + if (bfr == null) { + bfr = Bry_bfr_.New(); + bfr.Add_mid(bry, 0, i); + } + break; + default: + if (bfr != null) + bfr.Add_byte(b); + break; + } + } + return bfr == null ? bry : bfr.To_bry_and_clear(); + } public void Visit_globecoordinate(Wbase_claim_globecoordinate itm) {Write_geo(bfr, wdata_mgr, lang, itm.Lat(), itm.Lng());} public static void Write_geo(Bry_bfr bfr, Wdata_wiki_mgr wdata_mgr, Xol_lang_itm lang, byte[] lat, byte[] lng) { bfr.Add(lat); diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr_tst.java b/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr_tst.java index a802eed31..cb9535506 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr_tst.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr_tst.java @@ -16,8 +16,9 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.xowa.xtns.wbases; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; -import org.junit.*; import gplx.xowa.xtns.wbases.imports.*; +import org.junit.*; import gplx.core.tests.*; import gplx.xowa.xtns.wbases.imports.*; public class Wdata_wiki_mgr_tst { + private final Wdata_prop_val_visitor__fxt fxt = new Wdata_prop_val_visitor__fxt(); @Test public void Basic() { Wdata_wiki_mgr_fxt fxt = new Wdata_wiki_mgr_fxt().Init(); fxt.Init_links_add("enwiki", "Q1", "Q1_en"); @@ -54,4 +55,15 @@ public class Wdata_wiki_mgr_tst { , "" )); } + @Test public void normalize_for_decimal() { + fxt.Test__normalize_for_decimal("1234" , "1234"); // basic + fxt.Test__normalize_for_decimal("+1234" , "1234"); // plus + fxt.Test__normalize_for_decimal("1,234" , "1234"); // comma + fxt.Test__normalize_for_decimal("+1,234" , "1234"); // both + } +} +class Wdata_prop_val_visitor__fxt { + public void Test__normalize_for_decimal(String raw, String expd) { + Gftest.Eq__str(expd, Wdata_prop_val_visitor.Normalize_for_decimal(Bry_.new_u8(raw)), raw); + } } diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/pfuncs/Wdata_pf_property__basic__tst.java b/400_xowa/src/gplx/xowa/xtns/wbases/pfuncs/Wdata_pf_property__basic__tst.java index ee9fb631d..922b2b2a8 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/pfuncs/Wdata_pf_property__basic__tst.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/pfuncs/Wdata_pf_property__basic__tst.java @@ -81,6 +81,11 @@ public class Wdata_pf_property__basic__tst { fxt.Init__docs__add(wdoc); fxt.Test_parse("{{#property:p1}}", "1,234±2 meter"); } + @Test public void Quantity__decimal() { + fxt.Init_links_add("enwiki", "Test_page", "q1"); + fxt.Init__docs__add(fxt.doc_("q1", fxt.Make_claim_quantity(1, "+1234.50", "meter", "+1236.75", "+1232.25"))); + fxt.Test_parse("{{#property:p1}}", "1,234.5±2.25 meter"); + } @Test public void Monolingualtext() { fxt.Init_links_add("enwiki", "Test_page", "q1"); fxt.Init__docs__add(fxt.doc_("q1", fxt.Make_claim_monolingual(1, "la", "Lorem ipsum dolor sit amet")));