1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

'v3.9.4.1'

This commit is contained in:
gnosygnu
2016-09-25 22:05:47 -04:00
parent 35d78f6106
commit e3b393650d
211 changed files with 3148 additions and 2197 deletions

View File

@@ -0,0 +1,22 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.sql_dumps; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.strings.*;
public interface Xosql_dump_cbk {
void On_fld_done(int fld_idx, byte[] src, int val_bgn, int val_end);
}

View File

@@ -0,0 +1,160 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.sql_dumps; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.flds.*; import gplx.core.ios.*; import gplx.core.ios.streams.*;
public class Xosql_dump_parser {
private Xosql_dump_cbk cbk;
private Io_url src_fil; private int src_rdr_bfr_len = 8 * Io_mgr.Len_mb;
private Xosql_fld_hash cbk_flds;
private Ordered_hash tbl_flds;
public Xosql_dump_parser(Xosql_dump_cbk cbk, String... cbk_keys) {
this.cbk = cbk;
this.cbk_flds = Xosql_fld_hash.New(cbk_keys);
}
public void Src_fil_(Io_url v) {this.src_fil = v;}
public void Parse(Gfo_usr_dlg usr_dlg) {
Io_buffer_rdr rdr = Io_buffer_rdr.Null;
try {
// init bfrs, rdr
Bry_bfr val_bfr = Bry_bfr_.New();
rdr = Io_buffer_rdr.new_(Io_stream_rdr_.new_by_url_(src_fil), src_rdr_bfr_len);
byte[] bfr = rdr.Bfr(); int bfr_len = rdr.Bfr_len(), fld_idx = 0, cur_pos = 0;
this.tbl_flds = Identify_flds(cbk_flds, bfr);
// init fld_rdr
Gfo_fld_rdr fld_rdr = Gfo_fld_rdr.sql_();
byte[] decode_regy = fld_rdr.Escape_decode();
byte mode_prv = Mode__sql_bgn; byte mode = Mode__sql_bgn;
boolean reading_file = true;
while (reading_file) {
if (cur_pos + 256 > bfr_len && rdr.Fil_pos() != rdr.Fil_len()) { // buffer 256 characters; can be 0, but erring on side of simplicity
rdr.Bfr_load_from(cur_pos);
cur_pos = 0;
bfr = rdr.Bfr();
bfr_len = rdr.Bfr_len();
}
if (cur_pos == bfr_len) break;
byte b = bfr[cur_pos];
switch (mode) {
case Mode__sql_bgn:// skip over header to 1st "VALUES"
cur_pos = Bry_find_.Find_fwd(bfr, Bry_insert_into, cur_pos);
if (cur_pos == Bry_find_.Not_found || cur_pos > bfr_len) {reading_file = false; continue;}
cur_pos = Bry_find_.Find_fwd(bfr, Bry_values, cur_pos);
if (cur_pos == Bry_find_.Not_found || cur_pos > bfr_len) throw Err_.new_wo_type("VALUES not found"); // something went wrong;
mode = Mode__fld;
cur_pos += Bry_values.length;
break;
case Mode__row_bgn: // assert "("
switch (b) {
case Byte_ascii.Paren_bgn: mode = Mode__fld; break;
default: throw Err_.new_unhandled(mode);
}
++cur_pos;
break;
case Mode__row_end: // handle 1st char after ")";
switch (b) {
case Byte_ascii.Nl: break; // ignore \n
case Byte_ascii.Comma: mode = Mode__row_bgn; break; // handle ","; EX: "(1),(2)"
case Byte_ascii.Semic: mode = Mode__sql_bgn; break; // handle ";"; EX: "(1);INSERT INTO"
default: throw Err_.new_unhandled(mode);
}
++cur_pos;
break;
case Mode__fld: // handle fld chars; EX: "(1,'ab')"
switch (b) {
case Byte_ascii.Space: // ws: skip; EX: "(1 , 2)"; "(1,\n2)"
case Byte_ascii.Nl:
break;
case Byte_ascii.Apos: // apos: switch modes; NOTE: never escape apos by doubling; will fail for empty fields; EX: ", '', ''"; DATE:2013-07-06
mode = Mode__quote;
break;
case Byte_ascii.Backslash: // backslash: switch modes;
mode_prv = mode;
mode = Mode__escape;
break;
case Byte_ascii.Comma: // comma: end fld
Commit_fld(fld_idx++, val_bfr);
break;
case Byte_ascii.Paren_end: // paren_end: end fld and row
Commit_fld(fld_idx++, val_bfr);
fld_idx = 0;
mode = Mode__row_end;
break;
default: // all other chars; add to val_bfr
val_bfr.Add_byte(b);
break;
}
++cur_pos;
break;
case Mode__quote: // add to val_bfr until quote encountered; also, handle backslashes;
switch (b) {
case Byte_ascii.Apos: mode = Mode__fld; break;
case Byte_ascii.Backslash: mode_prv = mode; mode = Mode__escape; break;
default: val_bfr.Add_byte(b); break;
}
++cur_pos;
break;
case Mode__escape: // get escape_val from decode_regy; if unknown, just add original
byte escape_val = decode_regy[b];
if (escape_val == Byte_ascii.Null)
val_bfr.Add_byte(Byte_ascii.Backslash).Add_byte(b);
else
val_bfr.Add_byte(escape_val);
mode = mode_prv; // switch back to prv_mode
++cur_pos;
break;
default: throw Err_.new_unhandled(mode);
}
}
}
finally {rdr.Rls();}
}
private void Commit_fld(int fld_idx, Bry_bfr val_bfr) {
Xosql_fld_itm fld = (Xosql_fld_itm)tbl_flds.Get_at(fld_idx); // handle new flds added by MW, but not supported by XO; EX:hiddencat and pp_sortkey; DATE:2014-04-28
if (fld.Uid() != Int_.Max_value)
cbk.On_fld_done(fld.Uid(), val_bfr.Bfr(), 0, val_bfr.Len());
val_bfr.Clear();
}
private static Ordered_hash Identify_flds(Xosql_fld_hash cbk_hash, byte[] raw) {
// parse tbl def
Xosql_tbl_parser tbl_parser = new Xosql_tbl_parser();
Ordered_hash tbl_flds = tbl_parser.Parse(raw);
// loop over tbl_flds
int len = tbl_flds.Len();
for (int i = 0; i < len; ++i) {
Xosql_fld_itm tbl_itm = (Xosql_fld_itm)tbl_flds.Get_at(i);
// get cbk_itm
Xosql_fld_itm cbk_itm = cbk_hash.Get_by_key(tbl_itm.Key());
if (cbk_itm == null) continue;// throw Err_.New("sql_dump_parser: failed to find fld; src={0} fld={1}", src_fil.Raw(), tbl_itm.Key());
// set tbl_def's uid to cbk_itm's uid
tbl_itm.Uid_(cbk_itm.Uid());
}
tbl_flds.Sort();
return tbl_flds;
}
public Xosql_dump_parser Src_rdr_bfr_len_(int v) {src_rdr_bfr_len = v; return this;} // TEST:
private static final byte[] Bry_insert_into = Bry_.new_a7("INSERT INTO "), Bry_values = Bry_.new_a7(" VALUES (");
private static final byte Mode__sql_bgn = 0, Mode__row_bgn = 1, Mode__row_end = 2, Mode__fld = 3, Mode__quote = 4, Mode__escape = 5;
}

View File

@@ -0,0 +1,88 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.sql_dumps; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*; import gplx.core.ios.*; import gplx.core.tests.*;
public class Xosql_dump_parser__tst {
private final Xosql_dump_parser__fxt fxt = new Xosql_dump_parser__fxt();
@Test public void One() {
fxt.Init(String_.Ary("c1", "c2"), "c2").Test__parse("INSERT INTO 'tbl_1' VALUES (1,2);", "2|");
}
@Test public void Many() {
fxt.Init(String_.Ary("c1", "c2"), "c2").Test__parse("INSERT INTO 'tbl_1' VALUES (1,2),(3,4),(5,6);", "2|\n4|\n6|");
}
@Test public void Quote_basic() {
fxt.Init(String_.Ary("c1", "c2", "c3"), "c2", "c3").Test__parse("INSERT INTO 'tbl_1' VALUES (1,'a','b');", "a|b|");
}
@Test public void Escape_backslash() {
fxt.Init(String_.Ary("c1", "c2", "c3"), "c2", "c3").Test__parse("INSERT INTO 'tbl_1' VALUES (1,'a\\\\b','c');", "a\\b|c|");
}
@Test public void Escape_quote() {
fxt.Init(String_.Ary("c1", "c2", "c3"), "c2", "c3").Test__parse("INSERT INTO 'tbl_1' VALUES (1,'a\"b','c');", "a\"b|c|");
}
@Test public void Fld_paren_end() {
fxt.Init(String_.Ary("c1", "c2", "c3"), "c2", "c3").Test__parse("INSERT INTO 'tbl_1' VALUES (1,'Психостимуляторы_(лекарственные_средства)','c');", "Психостимуляторы_(лекарственные_средства)|c|");
}
@Test public void Insert_multiple() {
fxt.Init(String_.Ary("c1", "c2"), "c2").Test__parse("INSERT INTO 'tbl_1' VALUES (1,2);INSERT INTO 'tbl_1' VALUES (3,4)", "2|\n4|");
}
}
class Xosql_dump_parser__fxt {
private Xosql_dump_parser parser;
private Xosql_dump_cbk__test cbk;
private String[] tbl_flds;
public Xosql_dump_parser__fxt Init(String[] tbl_flds, String... cbk_flds) {
this.tbl_flds = tbl_flds;
this.cbk = new Xosql_dump_cbk__test();
this.parser = new Xosql_dump_parser(cbk, cbk_flds);
return this;
}
public void Test__parse(String raw_str, String expd) {
Io_url src_fil = Io_url_.new_fil_("mem/test.sql");
Io_mgr.Instance.SaveFilBry(src_fil, Make_dump(tbl_flds, raw_str));
parser.Src_fil_(src_fil);
parser.Parse(Gfo_usr_dlg_.Test());
Gftest.Eq__str(expd, cbk.To_bry_and_clear());
}
private byte[] Make_dump(String[] tbl_flds, String insert) {
Bry_bfr bfr = Bry_bfr_.New();
bfr.Add_str_a7("CREATE TABLE tbl_0 (");
for (int i = 0; i < tbl_flds.length; ++i) {
bfr.Add_byte_nl();
bfr.Add_byte(Byte_ascii.Tick);
bfr.Add_str_a7(tbl_flds[i]);
bfr.Add_byte(Byte_ascii.Tick);
bfr.Add_byte_comma();
}
bfr.Add_str_a7("\nUNIQUE KEY idx_0 (fld_0));\n");
bfr.Add_str_u8(insert);
return bfr.To_bry_and_clear();
}
}
class Xosql_dump_cbk__test implements Xosql_dump_cbk {
private int prv_idx = -1;
private final Bry_bfr bfr = Bry_bfr_.New();
public void Clear() {prv_idx = -1; bfr.Clear();}
public void On_fld_done(int fld_idx, byte[] src, int val_bgn, int val_end) {
if (fld_idx <= prv_idx) {
if (prv_idx != -1) bfr.Add_byte_nl();
}
bfr.Add_mid(src, val_bgn, val_end).Add_byte_pipe();
prv_idx = fld_idx;
}
public byte[] To_bry_and_clear() {return bfr.To_bry_and_clear();}
}

View File

@@ -0,0 +1,55 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.sql_dumps; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
class Xosql_fld_itm implements gplx.CompareAble {
public Xosql_fld_itm(int uid, byte[] key, int idx) {
this.uid = uid;
this.key = key;
this.idx = idx;
}
public int Uid() {return uid;} private int uid;
public byte[] Key() {return key;} private final byte[] key;
public int Idx() {return idx;} private int idx;
public void Idx_(int v) {this.idx = v;}
public void Uid_(int v) {this.uid = v;}
public int compareTo(Object obj) {
Xosql_fld_itm comp = (Xosql_fld_itm)obj;
return Int_.Compare(idx, comp.idx);
}
}
class Xosql_fld_hash {
private final Ordered_hash hash = Ordered_hash_.New_bry();
private int hash_len;
public int Len() {return hash.Len();}
public Xosql_fld_itm Get_by_key(byte[] k) {return (Xosql_fld_itm)hash.Get_by(k);}
public Xosql_fld_itm Get_by_idx_or_null(int i) {
return i > -1 && i < hash_len ? (Xosql_fld_itm)hash.Get_at(i) : null;
}
public void Add(Xosql_fld_itm itm) {hash.Add(itm.Key(), itm); hash_len = hash.Len();}
public void Sort() {hash.Sort();}
public static Xosql_fld_hash New(String[] keys) { // NOTE: keys must be passed in uid order
int len = keys.length;
Xosql_fld_hash rv = new Xosql_fld_hash();
for (int i = 0; i < len; ++i) {
Xosql_fld_itm itm = new Xosql_fld_itm(i, Bry_.new_u8(keys[i]), -1);
rv.Add(itm);
}
return rv;
}
}

View File

@@ -0,0 +1,61 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.sql_dumps; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.ios.*;
class Xosql_tbl_parser {
public Ordered_hash Parse(byte[] raw) {
Ordered_hash rv = Ordered_hash_.New_bry();
// get bgn of fields def; assume after "CREATE TABLE"
int bgn = Bry_find_.Find_fwd(raw, Tkn__create_table); if (bgn == Bry_find_.Not_found) throw Err_.new_wo_type("could not find 'CREATE TABLE'");
bgn = Bry_find_.Find_fwd(raw, Byte_ascii.Nl, bgn); if (bgn == Bry_find_.Not_found) throw Err_.new_wo_type("could not find new line after 'CREATE TABLE'");
bgn += 1; // position after character
// get end of fields def; assume before "UNIQUE KEY" or "PRIMARY KEY"
int end = Bry_find_.Find_fwd(raw, Tkn__primary_key);
if (end == Bry_find_.Not_found) { // as of 2016-07, en.w:categorylinks no longer has UNIQUE KEY; try PRIMARY KEY; DATE:2016-07-08
end = Bry_find_.Find_fwd(raw, Tkn__unique_index);
if (end == Bry_find_.Not_found) throw Err_.new_wo_type("could not find 'UNIQUE KEY' or 'PRIMARY KEY'");
}
end = Bry_find_.Find_bwd(raw, Byte_ascii.Nl, end); if (bgn == Bry_find_.Not_found) throw Err_.new_wo_type("could not find new line before 'UNIQUE KEY'");
// do parse
Parse_flds(rv, Bry_.Mid(raw, bgn, end));
return rv;
}
private void Parse_flds(Ordered_hash rv, byte[] raw) {
byte[][] lines = Bry_split_.Split(raw, Byte_ascii.Nl);
int lines_len = lines.length;
int fld_idx = 0;
for (int i = 0; i < lines_len; i++) {
byte[] line = lines[i];
// get fld bgn / end; EX: "`fld_1`"
int bgn = Bry_find_.Find_fwd(line, Byte_ascii.Tick); if (bgn == Bry_find_.Not_found) continue; // skip blank lines
bgn += Int_.Const_position_after_char;
int end = Bry_find_.Find_fwd(line, Byte_ascii.Tick, bgn); if (end == Bry_find_.Not_found) continue; // skip blank lines
// add fld
byte[] key = Bry_.Mid(line, bgn, end);
rv.Add(key, new Xosql_fld_itm(Int_.Max_value, key, fld_idx++));
}
}
private static final byte[]
Tkn__create_table = Bry_.new_a7("CREATE TABLE")
, Tkn__unique_index = Bry_.new_a7("UNIQUE KEY")
, Tkn__primary_key = Bry_.new_a7("PRIMARY KEY")
;
}

View File

@@ -0,0 +1,64 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.sql_dumps; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*; import gplx.core.tests.*;
public class Xosql_tbl_parser__tst {
private final Xosql_tbl_parser__fxt fxt = new Xosql_tbl_parser__fxt();
@Test public void Basic() {
fxt.Exec__parse(String_.Concat_lines_nl
( "ignore"
, "CREATE TABLE tbl_0 ("
, " `fld_2` int,"
, " `fld_1` int,"
, " `fld_0` int,"
, " UNIQUE KEY idx_0 (fld_2)"
, ");"
));
fxt.Test__count(3);
fxt.Test__get("fld_0", 2);
fxt.Test__get("fld_1", 1);
fxt.Test__get("fld_2", 0);
fxt.Test__get("fld_3", -1);
}
@Test public void Primary_key() {
fxt.Exec__parse(String_.Concat_lines_nl
( "ignore"
, "CREATE TABLE tbl_0 ("
, " `fld_2` int,"
, " `fld_1` int,"
, " `fld_0` int,"
, " PRIMARY KEY idx_0 (fld_2)"
, ");"
));
fxt.Test__count(3);
fxt.Test__get("fld_0", 2);
fxt.Test__get("fld_1", 1);
fxt.Test__get("fld_2", 0);
fxt.Test__get("fld_3", -1);
}
}
class Xosql_tbl_parser__fxt {
private final Xosql_tbl_parser parser = new Xosql_tbl_parser();
private Ordered_hash tbl_flds;
public void Exec__parse(String v) {this.tbl_flds = parser.Parse(Bry_.new_a7(v));}
public void Test__count(int expd) {Gftest.Eq__int(expd, tbl_flds.Len());}
public void Test__get(String key, int expd) {
Xosql_fld_itm actl_itm = (Xosql_fld_itm)tbl_flds.Get_by(Bry_.new_u8(key));
Gftest.Eq__int(expd, actl_itm == null ? Bry_find_.Not_found : actl_itm.Idx());
}
}