1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-09-13 21:54:44 -04:00
parent 2145f6382c
commit 5fe27b5b3b
649 changed files with 4726 additions and 3432 deletions

View File

@@ -0,0 +1,162 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.flds.*; import gplx.ios.*;
public class Sql_file_parser {
public Io_url Src_fil() {return src_fil;} public Sql_file_parser Src_fil_(Io_url v) {src_fil = v; return this;} Io_url src_fil;
public int Src_len() {return src_len;} public Sql_file_parser Src_len_(int v) {src_len = v; return this;} private int src_len = 8 * Io_mgr.Len_mb;
public Io_url_gen Trg_fil_gen() {return trg_fil_gen;} public Sql_file_parser Trg_fil_gen_(Io_url_gen v) {trg_fil_gen = v; return this;} Io_url_gen trg_fil_gen;
public int Trg_len() {return trg_len;} public Sql_file_parser Trg_len_(int v) {trg_len = v; return this;} private int trg_len = 4 * Io_mgr.Len_mb;
private Sql_fld_itm[] flds_all; private int flds_all_len;
Gfo_fld_rdr sql_parser = Gfo_fld_rdr.sql_(); Gfo_fld_wtr fld_wtr = Gfo_fld_wtr.xowa_();
static final byte Mode_sql_bgn = 0, Mode_row_bgn = 1, Mode_row_end = 2, Mode_fld = 3, Mode_quote = 4, Mode_escape = 5;
public Sql_file_parser Fld_cmd_(Sql_file_parser_cmd v) {this.fld_cmd = v; return this;} Sql_file_parser_cmd fld_cmd;
public Sql_file_parser Flds_req_(byte[]... v) {flds_req = v; return this;} private byte[][] flds_req;
public Sql_file_parser Flds_req_idx_(int flds_all_len, int... idxs) {
new_flds_all(flds_all_len);
int len = idxs.length;
for (int i = 0; i < len; i++) {
int idx = idxs[i];
Sql_fld_itm itm = new Sql_fld_itm(idx, Bry_.Empty);
flds_all[idx] = itm;
}
return this;
}
private void Identify_flds(byte[] raw) {
Sql_fld_mgr fld_mgr = new Sql_fld_mgr().Parse(raw);
new_flds_all(fld_mgr.Count());
int len = flds_req.length;
for (int i = 0; i < len; i++) {
byte[] fld = flds_req[i];
Sql_fld_itm itm = fld_mgr.Get_by_key(fld); if (itm == null) throw Err_.new_wo_type("could not find field", "fld", fld);
flds_all[itm.Idx()] = itm;
}
}
private void new_flds_all(int len) {
this.flds_all_len = len; // NOTE: must set flds_all_len, else Commit_fld will not be correct;
this.flds_all = new Sql_fld_itm[len];
}
public void Parse(Gfo_usr_dlg usr_dlg) {
Io_buffer_rdr rdr = Io_buffer_rdr.Null;
try {
rdr = Io_buffer_rdr.new_(gplx.ios.Io_stream_rdr_.new_by_url_(src_fil), src_len);
Bry_bfr fil_bfr = Bry_bfr.new_(), val_bfr = Bry_bfr.new_();
byte[] bfr = rdr.Bfr(); int bfr_len = rdr.Bfr_len(), fld_idx = 0, cur_pos = 0;
if (flds_req != null) Identify_flds(bfr);
byte mode = Mode_sql_bgn; byte[] decode_regy = sql_parser.Escape_decode();
boolean reading_file = true; byte mode_prv = Mode_sql_bgn;
Sql_file_parser_data data = new Sql_file_parser_data();
while (reading_file) {
if (cur_pos + 256 > bfr_len && rdr.Fil_pos() != rdr.Fil_len()) { // buffer 256 characters; can be 0, but erring on side of simplicity
rdr.Bfr_load_from(cur_pos);
cur_pos = 0;
bfr = rdr.Bfr();
bfr_len = rdr.Bfr_len();
}
if (cur_pos == bfr_len) break;
byte b = bfr[cur_pos];
switch (mode) {
case Mode_sql_bgn:
cur_pos = Bry_finder.Find_fwd(bfr, Bry_insert_into, cur_pos);
if (cur_pos == Bry_.NotFound || cur_pos > bfr_len) {reading_file = false; continue;}
cur_pos = Bry_finder.Find_fwd(bfr, Bry_values, cur_pos);
if (cur_pos == Bry_.NotFound || cur_pos > bfr_len) throw Err_.new_wo_type("VALUES not found"); // something went wrong;
mode = Mode_fld;
cur_pos += Bry_values.length;
break;
case Mode_row_bgn:
switch (b) {
case Byte_ascii.Paren_bgn: mode = Mode_fld; break;
default: throw Err_.new_unhandled(mode);
}
++cur_pos;
break;
case Mode_row_end:
switch (b) {
case Byte_ascii.Nl: break; // ignore \n
case Byte_ascii.Comma: mode = Mode_row_bgn; break;
case Byte_ascii.Semic: mode = Mode_sql_bgn; break;
default: throw Err_.new_unhandled(mode);
}
++cur_pos;
break;
case Mode_fld:
switch (b) {
case Byte_ascii.Apos: mode = Mode_quote; break; // NOTE: never escape apos by doubling; will fail for empty fields; EX: ", '', ''"; DATE:2013-07-06
case Byte_ascii.Backslash: mode_prv = mode; mode = Mode_escape; break;
default: val_bfr.Add_byte(b); break;
case Byte_ascii.Space: case Byte_ascii.Nl: break;
case Byte_ascii.Comma:
Commit_fld(fld_idx++, val_bfr, fil_bfr, data);
break;
case Byte_ascii.Paren_end:
Commit_fld(fld_idx++, val_bfr, fil_bfr, data);
if (!data.Cancel_row())
Commit_row(usr_dlg, fil_bfr);
fld_idx = 0;
mode = Mode_row_end;
break;
}
++cur_pos;
break;
case Mode_quote:
switch (b) {
case Byte_ascii.Apos: mode = Mode_fld; break;
case Byte_ascii.Backslash: mode_prv = mode; mode = Mode_escape; break;
default: val_bfr.Add_byte(b); break;
}
++cur_pos;
break;
case Mode_escape:
byte escape_val = decode_regy[b];
if (escape_val == Byte_ascii.Null) {val_bfr.Add_byte(Byte_ascii.Backslash).Add_byte(b);}
else val_bfr.Add_byte(escape_val);
mode = mode_prv;
++cur_pos;
break;
default: throw Err_.new_unhandled(mode);
}
}
Io_mgr.I.AppendFilByt(trg_fil_gen.Nxt_url(), fil_bfr.Xto_bry_and_clear());
}
finally {rdr.Rls();}
}
private void Commit_row(Gfo_usr_dlg usr_dlg, Bry_bfr fil_bfr) {
fil_bfr.Add_byte(Byte_ascii.Nl);
if (fil_bfr.Len() > trg_len) {
Io_url trg_fil = trg_fil_gen.Nxt_url();
usr_dlg.Prog_one(GRP_KEY, "make", "making ~{0}", trg_fil.NameAndExt());
Io_mgr.I.AppendFilByt(trg_fil, fil_bfr.Xto_bry_and_clear());
}
}
private void Commit_fld(int fld_idx, Bry_bfr val_bfr, Bry_bfr fil_bfr, Sql_file_parser_data data) {
Sql_fld_itm fld = fld_idx < flds_all_len ? flds_all[fld_idx] : null; // handle new flds added by MW, but not supported by XO; EX:hiddencat and pp_sortkey; DATE:2014-04-28
if (fld != null) {
data.Cancel_row_n_();
if (fld_cmd == null) { // no custom cmd; assume append;
fld_wtr.Bfr_(fil_bfr);
fld_wtr.Write_bry_escape_fld(val_bfr.Bfr(), 0, val_bfr.Len());
}
else
fld_cmd.Exec(val_bfr.Bfr(), fld.Key(), fld_idx, 0, val_bfr.Len(), fil_bfr, data);
}
val_bfr.Clear();
}
private static final byte[] Bry_insert_into = Bry_.new_a7("INSERT INTO "), Bry_values = Bry_.new_a7(" VALUES (");
private static final String GRP_KEY = "xowa.bldr.sql";
}

View File

@@ -0,0 +1,43 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.strings.*;
public interface Sql_file_parser_cmd {
void Exec(byte[] src, byte[] fld_key, int fld_idx, int fld_bgn, int fld_end, Bry_bfr file_bfr, Sql_file_parser_data data);
}
class Sql_file_parser_cmd_max_len implements Sql_file_parser_cmd {
public void Log_len_(int v) {log_len = v;} private int log_len = 141;
public void Log_print(Io_url url) {
String_bldr sb = String_bldr_.new_();
for (int i = 0; i < log.Count(); i++) {
String itm = (String)log.Get_at(i);
sb.Add(String_.Len(itm) + "|" + itm + "\n");
}
Io_mgr.I.SaveFilStr(url, sb.To_str());
}
public int Max_len() {return max_len;} private int max_len;
public void Exec(byte[] src, byte[] fld_key, int fld_idx, int fld_bgn, int fld_end, Bry_bfr file_bfr, Sql_file_parser_data data) {
int fld_len = fld_end - fld_bgn;
if (fld_len > max_len) max_len = fld_len;
if (fld_len > log_len) {
log.Add(String_.new_u8(src, fld_bgn, fld_end));
}
file_bfr.Add_mid(src, fld_bgn, fld_end).Add_byte(Byte_ascii.Pipe);
}
List_adp log = List_adp_.new_();
}

View File

@@ -0,0 +1,23 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
public class Sql_file_parser_data {
public boolean Cancel_row() {return cancel_row;}
public Sql_file_parser_data Cancel_row_n_() {cancel_row = false; return this;}
public Sql_file_parser_data Cancel_row_y_() {cancel_row = true; return this;} private boolean cancel_row;
}

View File

@@ -0,0 +1,73 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*; import gplx.ios.*;
public class Sql_file_parser_tst {
Sql_file_parser_fxt fxt = new Sql_file_parser_fxt();
@Before public void init() {fxt.Clear();}
@Test public void One() {
fxt.Init_flds_req_idx_(2, 1).Test_parse("INSERT INTO 'tbl_1' VALUES (1,2);", "2|\n");
}
@Test public void Many() {
fxt.Init_flds_req_idx_(2, 1).Test_parse("INSERT INTO 'tbl_1' VALUES (1,2),(3,4),(5,6);", "2|\n4|\n6|\n");
}
@Test public void Quote_basic() {
fxt.Init_flds_req_idx_(3, 1, 2).Test_parse("INSERT INTO 'tbl_1' VALUES (1,'a','b');", "a|b|\n");
}
@Test public void Escape_pipe() {
fxt.Init_flds_req_idx_(3, 1, 2).Test_parse("INSERT INTO 'tbl_1' VALUES (1,'a|b','c');", "a~pb|c|\n");
}
@Test public void Escape_nl() {
fxt.Init_flds_req_idx_(3, 1, 2).Test_parse("INSERT INTO 'tbl_1' VALUES (1,'a\\nb','c');", "a~nb|c|\n");
}
@Test public void Escape_tab() {
fxt.Init_flds_req_idx_(3, 1, 2).Test_parse("INSERT INTO 'tbl_1' VALUES (1,'a\\tb','c');", "a~tb|c|\n");
}
@Test public void Escape_backslash() {
fxt.Init_flds_req_idx_(3, 1, 2).Test_parse("INSERT INTO 'tbl_1' VALUES (1,'a\\\\b','c');", "a\\b|c|\n");
}
@Test public void Escape_quote() {
fxt.Init_flds_req_idx_(3, 1, 2).Test_parse("INSERT INTO 'tbl_1' VALUES (1,'a\"b','c');", "a\"b|c|\n");
}
@Test public void Fld_paren_end() {
fxt.Init_flds_req_idx_(3, 1, 2).Test_parse("INSERT INTO 'tbl_1' VALUES (1,'Психостимуляторы_(лекарственные_средства)','c');", "Психостимуляторы_(лекарственные_средства)|c|\n");
}
@Test public void Insert_multiple() {
fxt.Init_flds_req_idx_(2, 1).Test_parse("INSERT INTO 'tbl_1' VALUES (1,2);INSERT INTO 'tbl_1' VALUES (3,4)", "2|\n4|\n");
}
@Test public void Cmds() {
Sql_file_parser_cmd_max_len cmd = new Sql_file_parser_cmd_max_len();
fxt.Init_flds_req_idx_(2, 1).Init_cmd_(cmd).Test_parse("INSERT INTO 'tbl_1' VALUES (1,'a'),(3,'abc');", "a|\nabc|\n");
Tfds.Eq(3, cmd.Max_len());
}
}
class Sql_file_parser_fxt {
Sql_file_parser parser = new Sql_file_parser().Src_len_(Io_mgr.Len_kb).Trg_len_(Io_mgr.Len_kb);
public Sql_file_parser_fxt Clear() {Io_mgr.I.InitEngine_mem(); return this;}
public Sql_file_parser_fxt Init_flds_req_idx_(int flds_all_len, int... idxs) {parser.Flds_req_idx_(flds_all_len, idxs); return this;}
public Sql_file_parser_fxt Init_cmd_(Sql_file_parser_cmd cmd) {parser.Fld_cmd_(cmd); return this;}
public void Test_parse(String raw_str, String expd) {
Io_url src_fil = Io_url_.new_fil_("mem/test.sql");
Io_mgr.I.SaveFilBry(src_fil, Bry_.new_u8(raw_str));
Io_url trg_fil = Io_url_.new_fil_("mem/test.csv");
parser.Src_fil_(src_fil).Trg_fil_gen_(Io_url_gen_.fil_(trg_fil));
parser.Parse(Gfo_usr_dlg_.Test());
byte[] actl = Io_mgr.I.LoadFilBry(trg_fil);
Tfds.Eq(expd, String_.new_u8(actl));
}
}

View File

@@ -0,0 +1,60 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.ios.*;
class Sql_fld_mgr {
public int Count() {return hash.Count();}
public Sql_fld_itm Get_by_key(String fld) {return Get_by_key(Bry_.new_u8(fld));}
public Sql_fld_itm Get_by_key(byte[] fld) {
return (Sql_fld_itm)hash.Get_by(fld);
} private Ordered_hash hash = Ordered_hash_.new_bry_();
public Sql_fld_mgr Parse(byte[] raw) {
hash.Clear();
int bgn = Bry_finder.Find_fwd(raw, Tkn_create_table); if (bgn == Bry_.NotFound) throw Err_.new_wo_type("could not find 'CREATE TABLE'");
bgn = Bry_finder.Find_fwd(raw, Byte_ascii.Nl, bgn); if (bgn == Bry_.NotFound) throw Err_.new_wo_type("could not find new line after 'CREATE TABLE'");
bgn += Int_.Const_position_after_char;
int end = Bry_finder.Find_fwd(raw, Tkn_unique_index); if (end == Bry_.NotFound) throw Err_.new_wo_type("could not find 'UNIQUE KEY'");
end = Bry_finder.Find_bwd(raw, Byte_ascii.Nl, end); if (bgn == Bry_.NotFound) throw Err_.new_wo_type("could not find new line before 'UNIQUE KEY'");
Parse_lines(Bry_.Mid(raw, bgn, end));
return this;
}
private void Parse_lines(byte[] raw) {
byte[][] lines = Bry_.Split(raw, Byte_ascii.Nl);
int lines_len = lines.length;
int fld_idx = 0;
for (int i = 0; i < lines_len; i++) {
byte[] line = lines[i];
int bgn = Bry_finder.Find_fwd(line, Byte_ascii.Tick); if (bgn == Bry_.NotFound) continue; // skip blank lines
bgn += Int_.Const_position_after_char;
int end = Bry_finder.Find_fwd(line, Byte_ascii.Tick, bgn); if (end == Bry_.NotFound) continue; // skip blank lines
byte[] key = Bry_.Mid(line, bgn, end);
Sql_fld_itm fld = new Sql_fld_itm(fld_idx++, key);
hash.Add(fld.Key(), fld);
}
}
private static final byte[]
Tkn_create_table = Bry_.new_a7("CREATE TABLE")
, Tkn_unique_index = Bry_.new_a7("UNIQUE KEY")
;
public static final int Not_found = -1;
}
class Sql_fld_itm {
public Sql_fld_itm(int idx, byte[] key) {this.idx = idx; this.key = key;}
public int Idx() {return idx;} private int idx;
public byte[] Key() {return key;} private byte[] key;
}

View File

@@ -0,0 +1,47 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*;
public class Sql_fld_mgr_tst {
Sql_fld_mgr_fxt fxt = new Sql_fld_mgr_fxt();
@Test public void Basic() {
fxt.Exec_parse(String_.Concat_lines_nl
( "ignore"
, "CREATE TABLE tbl_0 ("
, " `fld_2` int,"
, " `fld_1` int,"
, " `fld_0` int,"
, " UNIQUE KEY idx_0 (fld_2)"
, ");"
));
fxt.Test_count(3);
fxt.Exec_get("fld_0", 2);
fxt.Exec_get("fld_1", 1);
fxt.Exec_get("fld_2", 0);
fxt.Exec_get("fld_3", -1);
}
}
class Sql_fld_mgr_fxt {
Sql_fld_mgr fld_mgr = new Sql_fld_mgr();
public void Exec_parse(String v) {fld_mgr.Parse(Bry_.new_a7(v));}
public void Exec_get(String key, int expd) {
Sql_fld_itm actl_itm = fld_mgr.Get_by_key(key);
Tfds.Eq(expd, actl_itm == null ? Sql_fld_mgr.Not_found : actl_itm.Idx());
}
public void Test_count(int expd) {Tfds.Eq(expd, fld_mgr.Count());}
}