Parser: Require coefficient for scientific notation [#795]

staging
gnosygnu 4 years ago
parent d3896bf547
commit 5f2e9c7514

@ -1,6 +1,6 @@
/* /*
XOWA: the XOWA Offline Wiki Application XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3, XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0. or alternatively under the terms of the Apache License Version 2.0.
@ -13,173 +13,181 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/ */
package gplx.core.primitives; import gplx.*; import gplx.core.*; package gplx.core.primitives;
public class Gfo_number_parser {
public int Rv_as_int() {return (int)num_val;} private long num_val = 0; import gplx.Byte_ascii;
public long Rv_as_long() {return num_val;} import gplx.Decimal_adp;
public Decimal_adp Rv_as_dec() {return dec_val == null ? Decimal_adp_.long_(num_val) : dec_val;} private Decimal_adp dec_val = null; import gplx.Decimal_adp_;
public boolean Is_int() {return dec_val == null && (num_val >= Int_.Min_value && num_val <= Int_.Max_value);} import gplx.Int_;
public boolean Has_err() {return has_err;} private boolean has_err; import gplx.Math_;
public boolean Has_frac() {return has_frac;} private boolean has_frac;
public boolean Hex_enabled() {return hex_enabled;} public Gfo_number_parser Hex_enabled_(boolean v) {hex_enabled = v; return this;} private boolean hex_enabled; public class Gfo_number_parser {
public Gfo_number_parser Ignore_chars_(byte[] v) {this.ignore_chars = v; return this;} private byte[] ignore_chars; public int Rv_as_int() {return (int)num_val;} private long num_val = 0;
public Gfo_number_parser Ignore_space_at_end_y_() {this.ignore_space_at_end = true; return this;} private boolean ignore_space_at_end; public long Rv_as_long() {return num_val;}
public void Clear() { public Decimal_adp Rv_as_dec() {return dec_val == null ? Decimal_adp_.long_(num_val) : dec_val;} private Decimal_adp dec_val = null;
ignore_chars = null; public boolean Is_int() {return dec_val == null && (num_val >= Int_.Min_value && num_val <= Int_.Max_value);}
} public boolean Has_err() {return has_err;} private boolean has_err;
public Gfo_number_parser Parse(byte[] src) {return Parse(src, 0, src.length);} public boolean Has_frac() {return has_frac;} private boolean has_frac;
public Gfo_number_parser Parse(byte[] ary, int bgn, int end) { public boolean Hex_enabled() {return hex_enabled;} public Gfo_number_parser Hex_enabled_(boolean v) {hex_enabled = v; return this;} private boolean hex_enabled;
int loop_bgn = end - 1, loop_end = bgn - 1, exp_multiplier = 1, factor = 10; public Gfo_number_parser Ignore_chars_(byte[] v) {this.ignore_chars = v; return this;} private byte[] ignore_chars;
long multiplier = 1, frc_multiplier = 1; public Gfo_number_parser Ignore_space_at_end_y_() {this.ignore_space_at_end = true; return this;} private boolean ignore_space_at_end;
num_val = 0; dec_val = null; boolean comma_nil = true; public void Clear() {
long frc_int = 0; ignore_chars = null;
has_err = false; has_frac = false; boolean has_exp = false, has_neg = false, exp_neg = false, has_plus = false, has_num = false; }
boolean input_is_hex = false; public Gfo_number_parser Parse(byte[] src) {return Parse(src, 0, src.length);}
if (hex_enabled) { public Gfo_number_parser Parse(byte[] ary, int bgn, int end) {
if (loop_end + 2 < end) { // ArrayOutOfBounds check int loop_bgn = end - 1, loop_end = bgn - 1, exp_multiplier = 1, factor = 10;
byte b_2 = ary[loop_end + 2]; long multiplier = 1, frc_multiplier = 1;
switch (b_2) { num_val = 0; dec_val = null; boolean comma_nil = true;
case Byte_ascii.Ltr_x: long frc_int = 0;
case Byte_ascii.Ltr_X: // is 2nd char x? has_err = false; has_frac = false; boolean has_exp = false, has_neg = false, exp_neg = false, has_plus = false, has_num = false;
if (ary[loop_end + 1] == Byte_ascii.Num_0) { // is 1st char 0? boolean input_is_hex = false;
factor = 16; if (hex_enabled) {
input_is_hex = true; if (loop_end + 2 < end) { // ArrayOutOfBounds check
} byte b_2 = ary[loop_end + 2];
break; switch (b_2) {
default: case Byte_ascii.Ltr_x:
break; case Byte_ascii.Ltr_X: // is 2nd char x?
} if (ary[loop_end + 1] == Byte_ascii.Num_0) { // is 1st char 0?
} factor = 16;
} input_is_hex = true;
for (int i = loop_bgn; i > loop_end; i--) { }
byte cur = ary[i]; break;
switch (cur) { default:
case Byte_ascii.Num_0: break;
case Byte_ascii.Num_1: }
case Byte_ascii.Num_2: }
case Byte_ascii.Num_3: }
case Byte_ascii.Num_4: for (int i = loop_bgn; i > loop_end; i--) {
case Byte_ascii.Num_5: byte cur = ary[i];
case Byte_ascii.Num_6: switch (cur) {
case Byte_ascii.Num_7: case Byte_ascii.Num_0:
case Byte_ascii.Num_8: case Byte_ascii.Num_1:
case Byte_ascii.Num_9: case Byte_ascii.Num_2:
num_val += (cur - Byte_ascii.Num_0) * multiplier; case Byte_ascii.Num_3:
multiplier *= factor; case Byte_ascii.Num_4:
has_num = true; case Byte_ascii.Num_5:
break; case Byte_ascii.Num_6:
case Byte_ascii.Dot: case Byte_ascii.Num_7:
if (has_frac) return Has_err_y_(); case Byte_ascii.Num_8:
frc_int = num_val; case Byte_ascii.Num_9:
num_val = 0; num_val += (cur - Byte_ascii.Num_0) * multiplier;
frc_multiplier = multiplier; multiplier *= factor;
multiplier = 1; has_num = true;
has_frac = true; break;
break; case Byte_ascii.Dot:
case Byte_ascii.Comma: if (has_frac) return Has_err_y_();
if (comma_nil) frc_int = num_val;
comma_nil = false; num_val = 0;
else frc_multiplier = multiplier;
return Has_err_y_(); multiplier = 1;
break; has_frac = true;
case Byte_ascii.Dash: break;
if (has_neg) return Has_err_y_(); case Byte_ascii.Comma:
has_neg = true; if (comma_nil)
break; comma_nil = false;
case Byte_ascii.Space: else
if (i == bgn) {} // space at bgn return Has_err_y_();
else if (i == end - 1 && ignore_space_at_end) {} // ignore space at end; DATE:2015-04-29 break;
else case Byte_ascii.Dash:
return Has_err_y_(); if (has_neg) return Has_err_y_();
break; has_neg = true;
case Byte_ascii.Plus: break;
if (has_plus) return Has_err_y_(); case Byte_ascii.Space:
has_plus = true; if (i == bgn) {} // space at bgn
break; else if (i == end - 1 && ignore_space_at_end) {} // ignore space at end; DATE:2015-04-29
case Byte_ascii.Ltr_e: else
case Byte_ascii.Ltr_E: return Has_err_y_();
if (input_is_hex) { break;
num_val += 14 * multiplier; // NOTE: 14=value of e/E case Byte_ascii.Plus:
multiplier *= factor; if (has_plus) return Has_err_y_();
has_num = true; has_plus = true;
} break;
else { case Byte_ascii.Ltr_e:
if (has_exp) return Has_err_y_(); case Byte_ascii.Ltr_E:
exp_neg = has_neg; if (input_is_hex) {
exp_multiplier = (int)Math_.Pow(10, num_val); num_val += 14 * multiplier; // NOTE: 14=value of e/E
num_val = 0; multiplier *= factor;
multiplier = 1; has_num = true;
has_exp = true; }
has_neg = false; else {
has_plus = false; // allow +1E+2 if (has_exp) return Has_err_y_();
} exp_neg = has_neg;
break; exp_multiplier = (int)Math_.Pow(10, num_val);
case Byte_ascii.Ltr_A: num_val = 0;
case Byte_ascii.Ltr_B: multiplier = 1;
case Byte_ascii.Ltr_C: has_exp = true;
case Byte_ascii.Ltr_D: has_neg = false;
case Byte_ascii.Ltr_F: has_plus = false; // allow +1E+2
if (input_is_hex) { has_num = false; // 2020-09-07|ISSUE#:795|scientific notation requires coefficient; set has_num to false which will fail below if no coefficient
num_val += (cur - Byte_ascii.Ltr_A + 10) * multiplier; }
multiplier *= factor; break;
has_num = true; case Byte_ascii.Ltr_A:
} case Byte_ascii.Ltr_B:
else case Byte_ascii.Ltr_C:
return Has_err_y_(); case Byte_ascii.Ltr_D:
break; case Byte_ascii.Ltr_F:
case Byte_ascii.Ltr_a: if (input_is_hex) {
case Byte_ascii.Ltr_b: num_val += (cur - Byte_ascii.Ltr_A + 10) * multiplier;
case Byte_ascii.Ltr_c: multiplier *= factor;
case Byte_ascii.Ltr_d: has_num = true;
case Byte_ascii.Ltr_f: }
if (input_is_hex) { else
num_val += (cur - Byte_ascii.Ltr_a + 10) * multiplier; return Has_err_y_();
multiplier *= factor; break;
has_num = true; case Byte_ascii.Ltr_a:
} case Byte_ascii.Ltr_b:
else case Byte_ascii.Ltr_c:
return Has_err_y_(); case Byte_ascii.Ltr_d:
break; case Byte_ascii.Ltr_f:
case Byte_ascii.Ltr_x: if (input_is_hex) {
case Byte_ascii.Ltr_X: num_val += (cur - Byte_ascii.Ltr_a + 10) * multiplier;
if (input_is_hex) multiplier *= factor;
return (factor == 16) ? this : Has_err_y_(); // check for '0x' has_num = true;
else }
return Has_err_y_(); else
default: return Has_err_y_();
if (ignore_chars != null) { break;
int ignore_chars_len = ignore_chars.length; case Byte_ascii.Ltr_x:
boolean ignored = false; case Byte_ascii.Ltr_X:
for (int j = 0; j < ignore_chars_len; ++j) { if (input_is_hex)
if (cur == ignore_chars[j]) { return (factor == 16) ? this : Has_err_y_(); // check for '0x'
ignored = true; else
break; return Has_err_y_();
} default:
} if (ignore_chars != null) {
if (ignored) continue; int ignore_chars_len = ignore_chars.length;
} boolean ignored = false;
return Has_err_y_(); for (int j = 0; j < ignore_chars_len; ++j) {
} if (cur == ignore_chars[j]) {
} ignored = true;
if (!has_num) return Has_err_y_(); // handles situations wherein just symbols; EX: "+", ".", "-.", " , " etc. break;
if (has_frac) { }
long full_val = (((num_val * frc_multiplier) + frc_int)); }
if (has_neg) full_val *= -1; if (ignored) continue;
if (has_exp) { }
if (exp_neg) frc_multiplier *= exp_multiplier; // divide, so apply to frc return Has_err_y_();
else full_val *= exp_multiplier; // multiply, so apply to full_val }
} }
dec_val = Decimal_adp_.divide_(full_val, frc_multiplier); if (!has_num) return Has_err_y_(); // handles situations wherein just symbols; EX: "+", ".", "-.", " , " etc.
} if (has_frac) {
else { long full_val = (((num_val * frc_multiplier) + frc_int));
if (has_neg) num_val *= -1; if (has_neg) full_val *= -1;
if (has_exp) { if (has_exp) {
num_val = exp_neg if (exp_neg) frc_multiplier *= exp_multiplier; // divide, so apply to frc
? num_val / exp_multiplier else full_val *= exp_multiplier; // multiply, so apply to full_val
: num_val * exp_multiplier; }
} dec_val = Decimal_adp_.divide_(full_val, frc_multiplier);
} }
return this; else {
} if (has_neg) num_val *= -1;
private Gfo_number_parser Has_err_y_() {has_err = true; return this;} if (has_exp) {
} num_val = exp_neg
? num_val / exp_multiplier
: num_val * exp_multiplier;
}
}
return this;
}
private Gfo_number_parser Has_err_y_() {has_err = true; return this;}
}

@ -1,6 +1,6 @@
/* /*
XOWA: the XOWA Offline Wiki Application XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3, XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0. or alternatively under the terms of the Apache License Version 2.0.
@ -13,95 +13,103 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/ */
package gplx.core.primitives; import gplx.*; import gplx.core.*; package gplx.core.primitives;
import org.junit.*;
public class Gfo_number_parser_tst { import gplx.Bry_;
@Before public void init() {fxt.Clear();} private final Gfo_number_parser_fxt fxt = new Gfo_number_parser_fxt(); import gplx.Decimal_adp;
@Test public void Integer() { import gplx.Decimal_adp_;
fxt.Test_int("1", 1); import gplx.Tfds;
fxt.Test_int("1234", 1234); import org.junit.Before;
fxt.Test_int("1234567890", 1234567890); import org.junit.Test;
fxt.Test_int("-1234", -1234);
fxt.Test_int("+1", 1); public class Gfo_number_parser_tst {
fxt.Test_int("00001", 1); @Before public void init() {fxt.Clear();} private final Gfo_number_parser_fxt fxt = new Gfo_number_parser_fxt();
} @Test public void Integer() {
@Test public void Long() { fxt.Test_int("1", 1);
fxt.Test_long("9876543210", 9876543210L); fxt.Test_int("1234", 1234);
} fxt.Test_int("1234567890", 1234567890);
@Test public void Decimal() { fxt.Test_int("-1234", -1234);
fxt.Test_dec("1.23", Decimal_adp_.parse("1.23")); fxt.Test_int("+1", 1);
fxt.Test_dec("1.023", Decimal_adp_.parse("1.023")); fxt.Test_int("00001", 1);
fxt.Test_dec("-1.23", Decimal_adp_.parse("-1.23")); }
} @Test public void Long() {
@Test public void Double_long() { fxt.Test_long("9876543210", 9876543210L);
fxt.Test_dec(".42190046219457", Decimal_adp_.parse(".42190046219457")); }
} @Test public void Decimal() {
@Test public void Exponent() { fxt.Test_dec("1.23", Decimal_adp_.parse("1.23"));
fxt.Test_int("1E2", 100); fxt.Test_dec("1.023", Decimal_adp_.parse("1.023"));
fxt.Test_dec("1.234E2", Decimal_adp_.parse("123.4")); fxt.Test_dec("-1.23", Decimal_adp_.parse("-1.23"));
fxt.Test_dec("1.234E-2", Decimal_adp_.parse(".01234")); }
fxt.Test_dec("123.4E-2", Decimal_adp_.parse("1.234")); @Test public void Double_long() {
fxt.Test_dec("+6.0E-3", Decimal_adp_.parse(".006")); fxt.Test_dec(".42190046219457", Decimal_adp_.parse(".42190046219457"));
} }
@Test public void Err() { @Test public void Exponent() {
fxt.Test_err("+", true); fxt.Test_int("1E2", 100);
fxt.Test_err("-", true); fxt.Test_dec("1.234E2", Decimal_adp_.parse("123.4"));
fxt.Test_err("a", true); fxt.Test_dec("1.234E-2", Decimal_adp_.parse(".01234"));
fxt.Test_err("1-2", false); fxt.Test_dec("123.4E-2", Decimal_adp_.parse("1.234"));
fxt.Test_err("1..1", true); fxt.Test_dec("+6.0E-3", Decimal_adp_.parse(".006"));
fxt.Test_err("1,,1", true); fxt.Test_err("e24", true); // 2020-09-07|ISSUE#:795|scientific notation requires coefficient
fxt.Test_err("1", false); }
} @Test public void Err() {
@Test public void Hex() { fxt.Test_err("+", true);
fxt.Test_hex("0x1" , 1); fxt.Test_err("-", true);
fxt.Test_hex("0xF" , 15); fxt.Test_err("a", true);
fxt.Test_hex("0x20" , 32); fxt.Test_err("1-2", false);
fxt.Test_hex("x20" , 0, false); fxt.Test_err("1..1", true);
fxt.Test_hex("d" , 0, false); // PURPOSE: d was being converted to 13; no.w:Hovedbanen; DATE:2014-04-13 fxt.Test_err("1,,1", true);
} fxt.Test_err("1", false);
@Test public void Ignore() { }
fxt.Init_ignore("\n\t"); @Test public void Hex() {
fxt.Test_int("1" , 1); fxt.Test_hex("0x1" , 1);
fxt.Test_int("1\n" , 1); fxt.Test_hex("0xF" , 15);
fxt.Test_int("1\t" , 1); fxt.Test_hex("0x20" , 32);
fxt.Test_int("1\n2" , 12); fxt.Test_hex("x20" , 0, false);
fxt.Test_err("1\r" , true); fxt.Test_hex("d" , 0, false); // PURPOSE: d was being converted to 13; no.w:Hovedbanen; DATE:2014-04-13
} }
} @Test public void Ignore() {
class Gfo_number_parser_fxt { fxt.Init_ignore("\n\t");
private final Gfo_number_parser parser = new Gfo_number_parser(); fxt.Test_int("1" , 1);
public void Clear() {parser.Clear();} fxt.Test_int("1\n" , 1);
public void Init_ignore(String chars) {parser.Ignore_chars_(Bry_.new_a7(chars));} fxt.Test_int("1\t" , 1);
public void Test_int(String raw, int expd) { fxt.Test_int("1\n2" , 12);
byte[] raw_bry = Bry_.new_a7(raw); fxt.Test_err("1\r" , true);
int actl = parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_int(); }
Tfds.Eq(expd, actl, raw); }
} class Gfo_number_parser_fxt {
public void Test_long(String raw, long expd) { private final Gfo_number_parser parser = new Gfo_number_parser();
byte[] raw_bry = Bry_.new_a7(raw); public void Clear() {parser.Clear();}
Tfds.Eq(expd, parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_long(), raw); public void Init_ignore(String chars) {parser.Ignore_chars_(Bry_.new_a7(chars));}
} public void Test_int(String raw, int expd) {
public void Test_dec(String raw, Decimal_adp expd) { byte[] raw_bry = Bry_.new_a7(raw);
byte[] raw_bry = Bry_.new_a7(raw); int actl = parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_int();
Decimal_adp actl = parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_dec(); Tfds.Eq(expd, actl, raw);
Tfds.Eq(expd.To_double(), actl.To_double(), raw); }
} public void Test_long(String raw, long expd) {
public void Test_err(String raw, boolean expd) { byte[] raw_bry = Bry_.new_a7(raw);
byte[] raw_bry = Bry_.new_a7(raw); Tfds.Eq(expd, parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_long(), raw);
boolean actl = parser.Parse(raw_bry, 0, raw_bry.length).Has_err(); }
Tfds.Eq(expd, actl, raw); public void Test_dec(String raw, Decimal_adp expd) {
} byte[] raw_bry = Bry_.new_a7(raw);
public void Test_hex(String raw, int expd_val) {Test_hex(raw, expd_val, true);} Decimal_adp actl = parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_dec();
public void Test_hex(String raw, int expd_val, boolean expd_pass) { Tfds.Eq(expd.To_double(), actl.To_double(), raw);
parser.Hex_enabled_(true); }
byte[] raw_bry = Bry_.new_a7(raw); public void Test_err(String raw, boolean expd) {
int actl = parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_int(); byte[] raw_bry = Bry_.new_a7(raw);
if (expd_pass) { boolean actl = parser.Parse(raw_bry, 0, raw_bry.length).Has_err();
Tfds.Eq(expd_val, actl, raw); Tfds.Eq(expd, actl, raw);
Tfds.Eq(true, !parser.Has_err()); }
} public void Test_hex(String raw, int expd_val) {Test_hex(raw, expd_val, true);}
else public void Test_hex(String raw, int expd_val, boolean expd_pass) {
Tfds.Eq(false, !parser.Has_err()); parser.Hex_enabled_(true);
parser.Hex_enabled_(false); byte[] raw_bry = Bry_.new_a7(raw);
} int actl = parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_int();
} if (expd_pass) {
Tfds.Eq(expd_val, actl, raw);
Tfds.Eq(true, !parser.Has_err());
}
else
Tfds.Eq(false, !parser.Has_err());
parser.Hex_enabled_(false);
}
}

Loading…
Cancel
Save