Import: Do not assume same database schema for all Wikimedia wikis

v3.3.4
gnosygnu 8 years ago
parent 50a063c8de
commit df45117d43

@ -60,15 +60,16 @@ public class Xobldr__image__create extends Xob_itm_dump_base implements Xob_cmd,
case Fld_img_bits: cur_bits = Bry_.To_int_or(src, val_bgn, val_end, -1); break;
case Fld_img_media_type: cur_media_type = Bry_.Mid(src, val_bgn, val_end); break;
case Fld_img_minor_mime: cur_minor_mime = Bry_.Mid(src, val_bgn, val_end); break;
case Fld_img_timestamp: cur_timestamp = Bry_.Mid(src, val_bgn, val_end);
cur_ext_id = Calc_ext_id(show_issues ? app.Usr_dlg() : Gfo_usr_dlg_.Noop, cur_ttl, cur_media_type, cur_minor_mime, cur_width, cur_height);
tbl_image.Insert(stmt, cur_ttl, cur_media_type, cur_minor_mime, cur_size, cur_width, cur_height, cur_bits, cur_ext_id, cur_timestamp);
++commit_count;
if ((commit_count % 10000) == 0) {
usr_dlg.Prog_many("", "", "committing: count=~{0} last=~{1}", commit_count, String_.new_u8(cur_ttl));
conn.Txn_sav();
}
break;
case Fld_img_timestamp: cur_timestamp = Bry_.Mid(src, val_bgn, val_end); break;
}
}
public void On_row_done() {
cur_ext_id = Calc_ext_id(show_issues ? app.Usr_dlg() : Gfo_usr_dlg_.Noop, cur_ttl, cur_media_type, cur_minor_mime, cur_width, cur_height);
tbl_image.Insert(stmt, cur_ttl, cur_media_type, cur_minor_mime, cur_size, cur_width, cur_height, cur_bits, cur_ext_id, cur_timestamp);
++commit_count;
if ((commit_count % 10000) == 0) {
usr_dlg.Prog_many("", "", "committing: count=~{0} last=~{1}", commit_count, String_.new_u8(cur_ttl));
conn.Txn_sav();
}
}
public void Cmd_end() {}

@ -20,7 +20,7 @@ import gplx.dbs.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.sql_dumps.*;
public class Imglnk_bldr_cmd extends Xob_sql_dump_base implements Xosql_dump_cbk {
private Imglnk_bldr_mgr mgr;
private int tmp_page_id;
private int tmp_page_id; private byte[] tmp_il_to;
private int rows = 0;
public Imglnk_bldr_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki); this.make_fil_len = Io_mgr.Len_mb;}
@ -33,12 +33,13 @@ public class Imglnk_bldr_cmd extends Xob_sql_dump_base implements Xosql_dump_cbk
public void On_fld_done(int fld_idx, byte[] src, int val_bgn, int val_end) {
switch (fld_idx) {
case Fld__il_from: this.tmp_page_id = Bry_.To_int_or(src, val_bgn, val_end, -1); break;
case Fld__il_to:
mgr.Tmp_tbl().Insert_by_batch(tmp_page_id, Bry_.Mid(src, val_bgn, val_end));
if (++rows % 100000 == 0) usr_dlg.Prog_many("", "", "reading row ~{0}", Int_.To_str_fmt(rows, "#,##0"));
break;
case Fld__il_to: this.tmp_il_to = Bry_.Mid(src, val_bgn, val_end); break;
}
} private static final byte Fld__il_from = 0, Fld__il_to = 1;
public void On_row_done() {
mgr.Tmp_tbl().Insert_by_batch(tmp_page_id, tmp_il_to);
if (++rows % 100000 == 0) usr_dlg.Prog_many("", "", "reading row ~{0}", Int_.To_str_fmt(rows, "#,##0"));
}
@Override public void Cmd_end() {
if (fail) return;
mgr.On_cmd_end();

@ -21,7 +21,7 @@ import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.xowa.wikis.data.*; import
public class Pglnk_bldr_cmd extends Xob_sql_dump_base implements Xosql_dump_cbk {
private Db_conn conn;
private Pglnk_page_link_temp_tbl temp_tbl;
private int tmp_src_id, tmp_trg_ns;
private int tmp_src_id, tmp_trg_ns; private byte[] tmp_trg_ttl;
private int rows = 0;
public Pglnk_bldr_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki); this.make_fil_len = Io_mgr.Len_mb;}
@Override public String Sql_file_name() {return Dump_type_key;} public static final String Dump_type_key = "pagelinks";
@ -53,13 +53,13 @@ public class Pglnk_bldr_cmd extends Xob_sql_dump_base implements Xosql_dump_cbk
switch (fld_idx) {
case Fld__pl_from: this.tmp_src_id = Bry_.To_int_or(src, val_bgn, val_end, -1); break;
case Fld__pl_namespace: this.tmp_trg_ns = Bry_.To_int_or(src, val_bgn, val_end, -1); break;
case Fld__pl_title:
byte[] tmp_trg_ttl = Bry_.Mid(src, val_bgn, val_end);
temp_tbl.Insert(tmp_src_id, tmp_trg_ns, tmp_trg_ttl);
if (++rows % 100000 == 0) usr_dlg.Prog_many("", "", "reading row ~{0}", Int_.To_str_fmt(rows, "#,##0"));
break;
case Fld__pl_title: this.tmp_trg_ttl = Bry_.Mid(src, val_bgn, val_end); break;
}
}
public void On_row_done() {
temp_tbl.Insert(tmp_src_id, tmp_trg_ns, tmp_trg_ttl);
if (++rows % 100000 == 0) usr_dlg.Prog_many("", "", "reading row ~{0}", Int_.To_str_fmt(rows, "#,##0"));
}
private static final byte Fld__pl_from = 0, Fld__pl_namespace = 1, Fld__pl_title = 2;
private static final String Sql__page_link__make = String_.Concat_lines_nl_skip_last
( "INSERT INTO page_link (src_id, trg_id, trg_count)"

@ -42,11 +42,12 @@ public class Xob_catlink_cmd extends Xob_sql_dump_base implements Xosql_dump_cbk
case Fld__cl_timestamp: this.tmp_timestamp = Bry_.Mid(src, val_bgn, val_end); break;
case Fld__cl_sortkey_prefix: this.tmp_sortkey_prefix = Bry_.Mid(src, val_bgn, val_end); break;
case Fld__cl_collation: this.tmp_collation = Bry_.Mid(src, val_bgn, val_end); break;
case Fld__cl_type: this.tmp_type = Bry_.Mid(src, val_bgn, val_end);
mgr.On_cmd_row(tmp_page_id, tmp_ctg_ttl, tmp_sortkey, tmp_timestamp, tmp_sortkey_prefix, tmp_collation, tmp_type);
break;
case Fld__cl_type: this.tmp_type = Bry_.Mid(src, val_bgn, val_end); break;
}
}
public void On_row_done() {
mgr.On_cmd_row(tmp_page_id, tmp_ctg_ttl, tmp_sortkey, tmp_timestamp, tmp_sortkey_prefix, tmp_collation, tmp_type);
}
private static final byte Fld__cl_from = 0, Fld__cl_to = 1, Fld__cl_sortkey = 2, Fld__cl_timestamp = 3, Fld__cl_sortkey_prefix = 4, Fld__cl_collation = 5, Fld__cl_type = 6;
public static final String BLDR_CMD_KEY = "wiki.categorylinks";

@ -42,14 +42,14 @@ public class Xob_pageprop_cmd extends Xob_sql_dump_base implements Xosql_dump_cb
switch (fld_idx) {
case Fld__pp_page: this.tmp_id = Bry_.To_int_or(src, val_bgn, val_end, -1); break;
case Fld__pp_propname: this.tmp_key_is_hiddencat = Bry_.Eq(src, val_bgn, val_end, Key_hiddencat); break;
case Fld__pp_value:
if (tmp_key_is_hiddencat)
tbl.Insert_cmd_by_batch(tmp_id);
if (++rows % 10000 == 0) usr_dlg.Prog_many("", "", "parsing pageprops sql: row=~{0}", Int_.To_str_fmt(rows, "#,##0"));
break;
}
}
private static final byte Fld__pp_page = 0, Fld__pp_propname = 1, Fld__pp_value = 2;
public void On_row_done() {
if (tmp_key_is_hiddencat)
tbl.Insert_cmd_by_batch(tmp_id);
if (++rows % 10000 == 0) usr_dlg.Prog_many("", "", "parsing pageprops sql: row=~{0}", Int_.To_str_fmt(rows, "#,##0"));
}
private static final byte Fld__pp_page = 0, Fld__pp_propname = 1;
public static final String BLDR_CMD_KEY = "wiki.page_props";
@Override public String Cmd_key() {return BLDR_CMD_KEY;}

@ -19,4 +19,5 @@ package gplx.xowa.bldrs.sql_dumps; import gplx.*; import gplx.xowa.*; import gpl
import gplx.core.strings.*;
public interface Xosql_dump_cbk {
void On_fld_done(int fld_idx, byte[] src, int val_bgn, int val_end);
void On_row_done();
}

@ -95,6 +95,7 @@ public class Xosql_dump_parser {
break;
case Byte_ascii.Paren_end: // paren_end: end fld and row
Commit_fld(fld_idx++, val_bfr);
cbk.On_row_done();
fld_idx = 0;
mode = Mode__row_end;
break;

@ -85,5 +85,6 @@ class Xosql_dump_cbk__test implements Xosql_dump_cbk {
bfr.Add_mid(src, val_bgn, val_end).Add_byte_pipe();
prv_idx = fld_idx;
}
public void On_row_done() {}
public byte[] To_bry_and_clear() {return bfr.To_bry_and_clear();}
}

@ -28,7 +28,9 @@ public abstract class Xob_sql_dump_base extends Xob_itm_dump_base implements Xob
public Io_url_gen Make_url_gen() {return make_url_gen;} private Io_url_gen make_url_gen;
public abstract String Sql_file_name();
protected abstract Xosql_dump_parser New_parser();
public void Cmd_init(Xob_bldr bldr) {}
public void Cmd_init(Xob_bldr bldr) {
}
public void Cmd_bgn(Xob_bldr bldr) {
this.Init_dump(this.Cmd_key());
make_url_gen = Io_url_gen_.dir_(temp_dir.GenSubDir("make"));

Loading…
Cancel
Save