Parser: Require coefficient for scientific notation [#795]

staging
gnosygnu 4 years ago
parent d3896bf547
commit 5f2e9c7514

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,173 +13,181 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.core.primitives; import gplx.*; import gplx.core.*;
public class Gfo_number_parser {
public int Rv_as_int() {return (int)num_val;} private long num_val = 0;
public long Rv_as_long() {return num_val;}
public Decimal_adp Rv_as_dec() {return dec_val == null ? Decimal_adp_.long_(num_val) : dec_val;} private Decimal_adp dec_val = null;
public boolean Is_int() {return dec_val == null && (num_val >= Int_.Min_value && num_val <= Int_.Max_value);}
public boolean Has_err() {return has_err;} private boolean has_err;
public boolean Has_frac() {return has_frac;} private boolean has_frac;
public boolean Hex_enabled() {return hex_enabled;} public Gfo_number_parser Hex_enabled_(boolean v) {hex_enabled = v; return this;} private boolean hex_enabled;
public Gfo_number_parser Ignore_chars_(byte[] v) {this.ignore_chars = v; return this;} private byte[] ignore_chars;
public Gfo_number_parser Ignore_space_at_end_y_() {this.ignore_space_at_end = true; return this;} private boolean ignore_space_at_end;
public void Clear() {
ignore_chars = null;
}
public Gfo_number_parser Parse(byte[] src) {return Parse(src, 0, src.length);}
public Gfo_number_parser Parse(byte[] ary, int bgn, int end) {
int loop_bgn = end - 1, loop_end = bgn - 1, exp_multiplier = 1, factor = 10;
long multiplier = 1, frc_multiplier = 1;
num_val = 0; dec_val = null; boolean comma_nil = true;
long frc_int = 0;
has_err = false; has_frac = false; boolean has_exp = false, has_neg = false, exp_neg = false, has_plus = false, has_num = false;
boolean input_is_hex = false;
if (hex_enabled) {
if (loop_end + 2 < end) { // ArrayOutOfBounds check
byte b_2 = ary[loop_end + 2];
switch (b_2) {
case Byte_ascii.Ltr_x:
case Byte_ascii.Ltr_X: // is 2nd char x?
if (ary[loop_end + 1] == Byte_ascii.Num_0) { // is 1st char 0?
factor = 16;
input_is_hex = true;
}
break;
default:
break;
}
}
}
for (int i = loop_bgn; i > loop_end; i--) {
byte cur = ary[i];
switch (cur) {
case Byte_ascii.Num_0:
case Byte_ascii.Num_1:
case Byte_ascii.Num_2:
case Byte_ascii.Num_3:
case Byte_ascii.Num_4:
case Byte_ascii.Num_5:
case Byte_ascii.Num_6:
case Byte_ascii.Num_7:
case Byte_ascii.Num_8:
case Byte_ascii.Num_9:
num_val += (cur - Byte_ascii.Num_0) * multiplier;
multiplier *= factor;
has_num = true;
break;
case Byte_ascii.Dot:
if (has_frac) return Has_err_y_();
frc_int = num_val;
num_val = 0;
frc_multiplier = multiplier;
multiplier = 1;
has_frac = true;
break;
case Byte_ascii.Comma:
if (comma_nil)
comma_nil = false;
else
return Has_err_y_();
break;
case Byte_ascii.Dash:
if (has_neg) return Has_err_y_();
has_neg = true;
break;
case Byte_ascii.Space:
if (i == bgn) {} // space at bgn
else if (i == end - 1 && ignore_space_at_end) {} // ignore space at end; DATE:2015-04-29
else
return Has_err_y_();
break;
case Byte_ascii.Plus:
if (has_plus) return Has_err_y_();
has_plus = true;
break;
case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_E:
if (input_is_hex) {
num_val += 14 * multiplier; // NOTE: 14=value of e/E
multiplier *= factor;
has_num = true;
}
else {
if (has_exp) return Has_err_y_();
exp_neg = has_neg;
exp_multiplier = (int)Math_.Pow(10, num_val);
num_val = 0;
multiplier = 1;
has_exp = true;
has_neg = false;
has_plus = false; // allow +1E+2
}
break;
case Byte_ascii.Ltr_A:
case Byte_ascii.Ltr_B:
case Byte_ascii.Ltr_C:
case Byte_ascii.Ltr_D:
case Byte_ascii.Ltr_F:
if (input_is_hex) {
num_val += (cur - Byte_ascii.Ltr_A + 10) * multiplier;
multiplier *= factor;
has_num = true;
}
else
return Has_err_y_();
break;
case Byte_ascii.Ltr_a:
case Byte_ascii.Ltr_b:
case Byte_ascii.Ltr_c:
case Byte_ascii.Ltr_d:
case Byte_ascii.Ltr_f:
if (input_is_hex) {
num_val += (cur - Byte_ascii.Ltr_a + 10) * multiplier;
multiplier *= factor;
has_num = true;
}
else
return Has_err_y_();
break;
case Byte_ascii.Ltr_x:
case Byte_ascii.Ltr_X:
if (input_is_hex)
return (factor == 16) ? this : Has_err_y_(); // check for '0x'
else
return Has_err_y_();
default:
if (ignore_chars != null) {
int ignore_chars_len = ignore_chars.length;
boolean ignored = false;
for (int j = 0; j < ignore_chars_len; ++j) {
if (cur == ignore_chars[j]) {
ignored = true;
break;
}
}
if (ignored) continue;
}
return Has_err_y_();
}
}
if (!has_num) return Has_err_y_(); // handles situations wherein just symbols; EX: "+", ".", "-.", " , " etc.
if (has_frac) {
long full_val = (((num_val * frc_multiplier) + frc_int));
if (has_neg) full_val *= -1;
if (has_exp) {
if (exp_neg) frc_multiplier *= exp_multiplier; // divide, so apply to frc
else full_val *= exp_multiplier; // multiply, so apply to full_val
}
dec_val = Decimal_adp_.divide_(full_val, frc_multiplier);
}
else {
if (has_neg) num_val *= -1;
if (has_exp) {
num_val = exp_neg
? num_val / exp_multiplier
: num_val * exp_multiplier;
}
}
return this;
}
private Gfo_number_parser Has_err_y_() {has_err = true; return this;}
}
package gplx.core.primitives;
import gplx.Byte_ascii;
import gplx.Decimal_adp;
import gplx.Decimal_adp_;
import gplx.Int_;
import gplx.Math_;
public class Gfo_number_parser {
public int Rv_as_int() {return (int)num_val;} private long num_val = 0;
public long Rv_as_long() {return num_val;}
public Decimal_adp Rv_as_dec() {return dec_val == null ? Decimal_adp_.long_(num_val) : dec_val;} private Decimal_adp dec_val = null;
public boolean Is_int() {return dec_val == null && (num_val >= Int_.Min_value && num_val <= Int_.Max_value);}
public boolean Has_err() {return has_err;} private boolean has_err;
public boolean Has_frac() {return has_frac;} private boolean has_frac;
public boolean Hex_enabled() {return hex_enabled;} public Gfo_number_parser Hex_enabled_(boolean v) {hex_enabled = v; return this;} private boolean hex_enabled;
public Gfo_number_parser Ignore_chars_(byte[] v) {this.ignore_chars = v; return this;} private byte[] ignore_chars;
public Gfo_number_parser Ignore_space_at_end_y_() {this.ignore_space_at_end = true; return this;} private boolean ignore_space_at_end;
public void Clear() {
ignore_chars = null;
}
public Gfo_number_parser Parse(byte[] src) {return Parse(src, 0, src.length);}
public Gfo_number_parser Parse(byte[] ary, int bgn, int end) {
int loop_bgn = end - 1, loop_end = bgn - 1, exp_multiplier = 1, factor = 10;
long multiplier = 1, frc_multiplier = 1;
num_val = 0; dec_val = null; boolean comma_nil = true;
long frc_int = 0;
has_err = false; has_frac = false; boolean has_exp = false, has_neg = false, exp_neg = false, has_plus = false, has_num = false;
boolean input_is_hex = false;
if (hex_enabled) {
if (loop_end + 2 < end) { // ArrayOutOfBounds check
byte b_2 = ary[loop_end + 2];
switch (b_2) {
case Byte_ascii.Ltr_x:
case Byte_ascii.Ltr_X: // is 2nd char x?
if (ary[loop_end + 1] == Byte_ascii.Num_0) { // is 1st char 0?
factor = 16;
input_is_hex = true;
}
break;
default:
break;
}
}
}
for (int i = loop_bgn; i > loop_end; i--) {
byte cur = ary[i];
switch (cur) {
case Byte_ascii.Num_0:
case Byte_ascii.Num_1:
case Byte_ascii.Num_2:
case Byte_ascii.Num_3:
case Byte_ascii.Num_4:
case Byte_ascii.Num_5:
case Byte_ascii.Num_6:
case Byte_ascii.Num_7:
case Byte_ascii.Num_8:
case Byte_ascii.Num_9:
num_val += (cur - Byte_ascii.Num_0) * multiplier;
multiplier *= factor;
has_num = true;
break;
case Byte_ascii.Dot:
if (has_frac) return Has_err_y_();
frc_int = num_val;
num_val = 0;
frc_multiplier = multiplier;
multiplier = 1;
has_frac = true;
break;
case Byte_ascii.Comma:
if (comma_nil)
comma_nil = false;
else
return Has_err_y_();
break;
case Byte_ascii.Dash:
if (has_neg) return Has_err_y_();
has_neg = true;
break;
case Byte_ascii.Space:
if (i == bgn) {} // space at bgn
else if (i == end - 1 && ignore_space_at_end) {} // ignore space at end; DATE:2015-04-29
else
return Has_err_y_();
break;
case Byte_ascii.Plus:
if (has_plus) return Has_err_y_();
has_plus = true;
break;
case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_E:
if (input_is_hex) {
num_val += 14 * multiplier; // NOTE: 14=value of e/E
multiplier *= factor;
has_num = true;
}
else {
if (has_exp) return Has_err_y_();
exp_neg = has_neg;
exp_multiplier = (int)Math_.Pow(10, num_val);
num_val = 0;
multiplier = 1;
has_exp = true;
has_neg = false;
has_plus = false; // allow +1E+2
has_num = false; // 2020-09-07|ISSUE#:795|scientific notation requires coefficient; set has_num to false which will fail below if no coefficient
}
break;
case Byte_ascii.Ltr_A:
case Byte_ascii.Ltr_B:
case Byte_ascii.Ltr_C:
case Byte_ascii.Ltr_D:
case Byte_ascii.Ltr_F:
if (input_is_hex) {
num_val += (cur - Byte_ascii.Ltr_A + 10) * multiplier;
multiplier *= factor;
has_num = true;
}
else
return Has_err_y_();
break;
case Byte_ascii.Ltr_a:
case Byte_ascii.Ltr_b:
case Byte_ascii.Ltr_c:
case Byte_ascii.Ltr_d:
case Byte_ascii.Ltr_f:
if (input_is_hex) {
num_val += (cur - Byte_ascii.Ltr_a + 10) * multiplier;
multiplier *= factor;
has_num = true;
}
else
return Has_err_y_();
break;
case Byte_ascii.Ltr_x:
case Byte_ascii.Ltr_X:
if (input_is_hex)
return (factor == 16) ? this : Has_err_y_(); // check for '0x'
else
return Has_err_y_();
default:
if (ignore_chars != null) {
int ignore_chars_len = ignore_chars.length;
boolean ignored = false;
for (int j = 0; j < ignore_chars_len; ++j) {
if (cur == ignore_chars[j]) {
ignored = true;
break;
}
}
if (ignored) continue;
}
return Has_err_y_();
}
}
if (!has_num) return Has_err_y_(); // handles situations wherein just symbols; EX: "+", ".", "-.", " , " etc.
if (has_frac) {
long full_val = (((num_val * frc_multiplier) + frc_int));
if (has_neg) full_val *= -1;
if (has_exp) {
if (exp_neg) frc_multiplier *= exp_multiplier; // divide, so apply to frc
else full_val *= exp_multiplier; // multiply, so apply to full_val
}
dec_val = Decimal_adp_.divide_(full_val, frc_multiplier);
}
else {
if (has_neg) num_val *= -1;
if (has_exp) {
num_val = exp_neg
? num_val / exp_multiplier
: num_val * exp_multiplier;
}
}
return this;
}
private Gfo_number_parser Has_err_y_() {has_err = true; return this;}
}

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,95 +13,103 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.core.primitives; import gplx.*; import gplx.core.*;
import org.junit.*;
public class Gfo_number_parser_tst {
@Before public void init() {fxt.Clear();} private final Gfo_number_parser_fxt fxt = new Gfo_number_parser_fxt();
@Test public void Integer() {
fxt.Test_int("1", 1);
fxt.Test_int("1234", 1234);
fxt.Test_int("1234567890", 1234567890);
fxt.Test_int("-1234", -1234);
fxt.Test_int("+1", 1);
fxt.Test_int("00001", 1);
}
@Test public void Long() {
fxt.Test_long("9876543210", 9876543210L);
}
@Test public void Decimal() {
fxt.Test_dec("1.23", Decimal_adp_.parse("1.23"));
fxt.Test_dec("1.023", Decimal_adp_.parse("1.023"));
fxt.Test_dec("-1.23", Decimal_adp_.parse("-1.23"));
}
@Test public void Double_long() {
fxt.Test_dec(".42190046219457", Decimal_adp_.parse(".42190046219457"));
}
@Test public void Exponent() {
fxt.Test_int("1E2", 100);
fxt.Test_dec("1.234E2", Decimal_adp_.parse("123.4"));
fxt.Test_dec("1.234E-2", Decimal_adp_.parse(".01234"));
fxt.Test_dec("123.4E-2", Decimal_adp_.parse("1.234"));
fxt.Test_dec("+6.0E-3", Decimal_adp_.parse(".006"));
}
@Test public void Err() {
fxt.Test_err("+", true);
fxt.Test_err("-", true);
fxt.Test_err("a", true);
fxt.Test_err("1-2", false);
fxt.Test_err("1..1", true);
fxt.Test_err("1,,1", true);
fxt.Test_err("1", false);
}
@Test public void Hex() {
fxt.Test_hex("0x1" , 1);
fxt.Test_hex("0xF" , 15);
fxt.Test_hex("0x20" , 32);
fxt.Test_hex("x20" , 0, false);
fxt.Test_hex("d" , 0, false); // PURPOSE: d was being converted to 13; no.w:Hovedbanen; DATE:2014-04-13
}
@Test public void Ignore() {
fxt.Init_ignore("\n\t");
fxt.Test_int("1" , 1);
fxt.Test_int("1\n" , 1);
fxt.Test_int("1\t" , 1);
fxt.Test_int("1\n2" , 12);
fxt.Test_err("1\r" , true);
}
}
class Gfo_number_parser_fxt {
private final Gfo_number_parser parser = new Gfo_number_parser();
public void Clear() {parser.Clear();}
public void Init_ignore(String chars) {parser.Ignore_chars_(Bry_.new_a7(chars));}
public void Test_int(String raw, int expd) {
byte[] raw_bry = Bry_.new_a7(raw);
int actl = parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_int();
Tfds.Eq(expd, actl, raw);
}
public void Test_long(String raw, long expd) {
byte[] raw_bry = Bry_.new_a7(raw);
Tfds.Eq(expd, parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_long(), raw);
}
public void Test_dec(String raw, Decimal_adp expd) {
byte[] raw_bry = Bry_.new_a7(raw);
Decimal_adp actl = parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_dec();
Tfds.Eq(expd.To_double(), actl.To_double(), raw);
}
public void Test_err(String raw, boolean expd) {
byte[] raw_bry = Bry_.new_a7(raw);
boolean actl = parser.Parse(raw_bry, 0, raw_bry.length).Has_err();
Tfds.Eq(expd, actl, raw);
}
public void Test_hex(String raw, int expd_val) {Test_hex(raw, expd_val, true);}
public void Test_hex(String raw, int expd_val, boolean expd_pass) {
parser.Hex_enabled_(true);
byte[] raw_bry = Bry_.new_a7(raw);
int actl = parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_int();
if (expd_pass) {
Tfds.Eq(expd_val, actl, raw);
Tfds.Eq(true, !parser.Has_err());
}
else
Tfds.Eq(false, !parser.Has_err());
parser.Hex_enabled_(false);
}
}
package gplx.core.primitives;
import gplx.Bry_;
import gplx.Decimal_adp;
import gplx.Decimal_adp_;
import gplx.Tfds;
import org.junit.Before;
import org.junit.Test;
public class Gfo_number_parser_tst {
@Before public void init() {fxt.Clear();} private final Gfo_number_parser_fxt fxt = new Gfo_number_parser_fxt();
@Test public void Integer() {
fxt.Test_int("1", 1);
fxt.Test_int("1234", 1234);
fxt.Test_int("1234567890", 1234567890);
fxt.Test_int("-1234", -1234);
fxt.Test_int("+1", 1);
fxt.Test_int("00001", 1);
}
@Test public void Long() {
fxt.Test_long("9876543210", 9876543210L);
}
@Test public void Decimal() {
fxt.Test_dec("1.23", Decimal_adp_.parse("1.23"));
fxt.Test_dec("1.023", Decimal_adp_.parse("1.023"));
fxt.Test_dec("-1.23", Decimal_adp_.parse("-1.23"));
}
@Test public void Double_long() {
fxt.Test_dec(".42190046219457", Decimal_adp_.parse(".42190046219457"));
}
@Test public void Exponent() {
fxt.Test_int("1E2", 100);
fxt.Test_dec("1.234E2", Decimal_adp_.parse("123.4"));
fxt.Test_dec("1.234E-2", Decimal_adp_.parse(".01234"));
fxt.Test_dec("123.4E-2", Decimal_adp_.parse("1.234"));
fxt.Test_dec("+6.0E-3", Decimal_adp_.parse(".006"));
fxt.Test_err("e24", true); // 2020-09-07|ISSUE#:795|scientific notation requires coefficient
}
@Test public void Err() {
fxt.Test_err("+", true);
fxt.Test_err("-", true);
fxt.Test_err("a", true);
fxt.Test_err("1-2", false);
fxt.Test_err("1..1", true);
fxt.Test_err("1,,1", true);
fxt.Test_err("1", false);
}
@Test public void Hex() {
fxt.Test_hex("0x1" , 1);
fxt.Test_hex("0xF" , 15);
fxt.Test_hex("0x20" , 32);
fxt.Test_hex("x20" , 0, false);
fxt.Test_hex("d" , 0, false); // PURPOSE: d was being converted to 13; no.w:Hovedbanen; DATE:2014-04-13
}
@Test public void Ignore() {
fxt.Init_ignore("\n\t");
fxt.Test_int("1" , 1);
fxt.Test_int("1\n" , 1);
fxt.Test_int("1\t" , 1);
fxt.Test_int("1\n2" , 12);
fxt.Test_err("1\r" , true);
}
}
class Gfo_number_parser_fxt {
private final Gfo_number_parser parser = new Gfo_number_parser();
public void Clear() {parser.Clear();}
public void Init_ignore(String chars) {parser.Ignore_chars_(Bry_.new_a7(chars));}
public void Test_int(String raw, int expd) {
byte[] raw_bry = Bry_.new_a7(raw);
int actl = parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_int();
Tfds.Eq(expd, actl, raw);
}
public void Test_long(String raw, long expd) {
byte[] raw_bry = Bry_.new_a7(raw);
Tfds.Eq(expd, parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_long(), raw);
}
public void Test_dec(String raw, Decimal_adp expd) {
byte[] raw_bry = Bry_.new_a7(raw);
Decimal_adp actl = parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_dec();
Tfds.Eq(expd.To_double(), actl.To_double(), raw);
}
public void Test_err(String raw, boolean expd) {
byte[] raw_bry = Bry_.new_a7(raw);
boolean actl = parser.Parse(raw_bry, 0, raw_bry.length).Has_err();
Tfds.Eq(expd, actl, raw);
}
public void Test_hex(String raw, int expd_val) {Test_hex(raw, expd_val, true);}
public void Test_hex(String raw, int expd_val, boolean expd_pass) {
parser.Hex_enabled_(true);
byte[] raw_bry = Bry_.new_a7(raw);
int actl = parser.Parse(raw_bry, 0, raw_bry.length).Rv_as_int();
if (expd_pass) {
Tfds.Eq(expd_val, actl, raw);
Tfds.Eq(true, !parser.Has_err());
}
else
Tfds.Eq(false, !parser.Has_err());
parser.Hex_enabled_(false);
}
}

Loading…
Cancel
Save