1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Embeddable: Create core dbs in proper subdirectory

This commit is contained in:
gnosygnu
2017-10-23 20:50:50 -04:00
parent 1336d44f34
commit 66877212bf
4537 changed files with 311750 additions and 0 deletions

View File

@@ -13,3 +13,63 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.wmdumps.imglinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.wmdumps.*;
import gplx.dbs.*;
public abstract class Db_bulk_cmd_base {
public void Exec() {
int uid_max = Get_uid_max();
int uid_rng = Get_uid_rng();
int uid_bgn = -1, uid_end = uid_rng;
this.Bulk_bgn();
while (uid_bgn <= uid_max) {
Bulk_run(uid_bgn, uid_end);
uid_bgn += uid_rng;
uid_end += uid_rng;
}
this.Bulk_end();
}
protected abstract int Get_uid_max();
protected abstract int Get_uid_rng();
protected abstract void Bulk_bgn();
protected abstract void Bulk_end();
protected abstract void Bulk_run(int uid_bgn, int uid_end);
}
class Imglnk_bulk_cmd__img_id extends Db_bulk_cmd_base {
private final Db_conn conn;
private final Db_attach_mgr attach_mgr;
private final int img_wiki;
private String sql;
public Imglnk_bulk_cmd__img_id(Db_conn conn, boolean wiki_is_local, Xowe_wiki wiki) {
this.conn = conn;
this.attach_mgr = new Db_attach_mgr(conn, new Db_attach_itm("page_db", wiki.Data__core_mgr().Db__core().Tbl__page().Conn()));
this.img_wiki = wiki_is_local ? 0 : 1;
// xowa.wiki.image.sqlite3
// INSERT INTO img_link (img_name, img_wiki) SELECT img_name, Count(img_name) FROM img_link_tmp GROUP BY img_name
}
@Override protected int Get_uid_max() {
return conn.Exec_select_as_int("SELECT Max(img_uid) FROM img_link_tmp", -1);
}
@Override protected int Get_uid_rng() {return 10000;}
@Override protected void Bulk_bgn() {
sql = String_.Concat_lines_nl_skip_last // ANSI.Y
( "UPDATE img_link_tmp"
, "SET img_wiki = {0}"
, ", img_id = (SELECT p.page_id FROM <page_db>page p WHERE p.page_namespace = 6 AND p.page_title = img_link_tmp.img_name)"
, "WHERE img_uid > {1} AND img_uid <= {2}"
, "AND img_name IN (SELECT p.page_title FROM <page_db>page p WHERE p.page_namespace = 6 AND p.page_title = img_link_tmp.img_name)"
);
sql = attach_mgr.Resolve_sql(sql);
attach_mgr.Attach();
conn.Txn_bgn("imglnk.bulk");
}
@Override protected void Bulk_end() {
conn.Txn_end();
attach_mgr.Detach();
}
@Override protected void Bulk_run(int uid_bgn, int uid_end) {
conn.Exec_sql(String_.Format("updating img_link_tmp; wiki={0} uid={1}", img_wiki, uid_bgn), String_.Format(sql, img_wiki, uid_bgn, uid_end));
}
public static void Bulk_exec(Db_conn conn, boolean wiki_is_local, Xowe_wiki wiki) {
new Imglnk_bulk_cmd__img_id(conn, wiki_is_local, wiki).Exec();
}
}

View File

@@ -13,3 +13,15 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.wmdumps.imglinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.wmdumps.*;
import gplx.xowa.bldrs.wkrs.*;
import gplx.xowa.addons.bldrs.wmdumps.pagelinks.bldrs.*;
public class Imglnk_addon implements Xoax_addon_itm, Xoax_addon_itm__bldr {
public Xob_cmd[] Bldr_cmds() {
return new Xob_cmd[]
{ Imglnk_bldr_cmd.Prototype
};
}
public String Addon__key() {return "xowa.builds.imglinks";}
}

View File

@@ -13,3 +13,38 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.wmdumps.imglinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.wmdumps.*;
import gplx.dbs.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.sql_dumps.*;
public class Imglnk_bldr_cmd extends Xob_sql_dump_base implements Xosql_dump_cbk {
private Imglnk_bldr_mgr mgr;
private int tmp_page_id; private byte[] tmp_il_to;
private int rows = 0;
public Imglnk_bldr_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki); this.make_fil_len = Io_mgr.Len_mb;}
@Override public String Sql_file_name() {return "imagelinks";}
@Override protected Xosql_dump_parser New_parser() {return new Xosql_dump_parser(this, "il_from", "il_to");}
@Override public void Cmd_bgn_hook(Xob_bldr bldr, Xosql_dump_parser parser) {
mgr = new Imglnk_bldr_mgr(wiki);
}
public void On_fld_done(int fld_idx, byte[] src, int val_bgn, int val_end) {
switch (fld_idx) {
case Fld__il_from: this.tmp_page_id = Bry_.To_int_or(src, val_bgn, val_end, -1); break;
case Fld__il_to: this.tmp_il_to = Bry_.Mid(src, val_bgn, val_end); break;
}
} private static final byte Fld__il_from = 0, Fld__il_to = 1;
public void On_row_done() {
mgr.Tmp_tbl().Insert_by_batch(tmp_page_id, tmp_il_to);
if (++rows % 100000 == 0) usr_dlg.Prog_many("", "", "reading row ~{0}", Int_.To_str_fmt(rows, "#,##0"));
}
@Override public void Cmd_end() {
if (fail) return;
mgr.On_cmd_end();
}
public static final String BLDR_CMD_KEY = "wiki.imagelinks";
@Override public String Cmd_key() {return BLDR_CMD_KEY;}
public static final Xob_cmd Prototype = new Imglnk_bldr_cmd(null, null);
@Override public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Imglnk_bldr_cmd(bldr, wiki);}
}

View File

@@ -13,3 +13,37 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.wmdumps.imglinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.wmdumps.*;
import gplx.dbs.*;
import gplx.xowa.bldrs.*;
import gplx.xowa.files.repos.*;
class Imglnk_bldr_mgr {
private final Db_conn conn;
private final Xowe_wiki wiki;
public Imglnk_tmp_tbl Tmp_tbl() {return tmp_tbl;} private Imglnk_tmp_tbl tmp_tbl;
public Imglnk_bldr_mgr(Xowe_wiki wiki) {
wiki.Init_assert();
this.wiki = wiki;
this.conn = Xob_db_file.New__img_link(wiki).Conn();
this.tmp_tbl = new Imglnk_tmp_tbl(conn);
conn.Meta_tbl_remake(tmp_tbl);
tmp_tbl.Create_tbl();
tmp_tbl.Insert_bgn();
}
public void On_cmd_end() {
// finalize txn; create idx
tmp_tbl.Insert_end();
tmp_tbl.Create_idx__img_ttl();
// create reg_tbl
Imglnk_reg_tbl reg_tbl = new Imglnk_reg_tbl(conn);
conn.Meta_tbl_remake(reg_tbl);
reg_tbl.Create_idx__src_ttl();
reg_tbl.Insert(conn, Xof_repo_tid_.Tid__local , wiki);
reg_tbl.Insert(conn, Xof_repo_tid_.Tid__remote, wiki.Appe().Wiki_mgr().Wiki_commons());
reg_tbl.Create_idx__trg_ttl();
// Imglnk_bulk_cmd__img_id.Bulk_exec(conn, Bool_.Y, wiki);
// Imglnk_bulk_cmd__img_id.Bulk_exec(conn, Bool_.N, wiki.Appe().Wiki_mgr().Wiki_commons());
}
}

View File

@@ -13,3 +13,64 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.wmdumps.imglinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.wmdumps.*;
import gplx.dbs.*;
import gplx.xowa.bldrs.*;
public class Imglnk_reg_tbl implements Db_tbl {
private final String tbl_name = "imglnk_reg"; private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
private final String fld__img_src, fld__img_trg, fld__img_repo;
private final Db_conn conn;
public Imglnk_reg_tbl(Db_conn conn) {
this.conn = conn;
fld__img_src = flds.Add_str("img_src", 255);
fld__img_trg = flds.Add_str("img_trg", 255);
fld__img_repo = flds.Add_byte("img_repo");
flds.Add_int("img_count");
conn.Rls_reg(this);
}
public Db_conn Conn() {return conn;}
public String Tbl_name() {return tbl_name;}
public String Fld__img_src() {return fld__img_src;}
public void Create_tbl() {conn.Meta_tbl_create(Dbmeta_tbl_itm.New(tbl_name, flds));}
public void Create_idx__src_ttl() {conn.Meta_idx_create(tbl_name, fld__img_src, fld__img_src, fld__img_repo);}
public void Create_idx__trg_ttl() {conn.Meta_idx_create(tbl_name, fld__img_trg, fld__img_trg, fld__img_repo);}
public Db_stmt Select_by_ttl_stmt() {
if (select_by_ttl_stmt == null)
select_by_ttl_stmt = conn.Stmt_select(tbl_name, flds, fld__img_src);
return select_by_ttl_stmt;
} private Db_stmt select_by_ttl_stmt;
public void Insert(Db_conn conn, byte repo_id, Xowe_wiki wiki) {
String repo_id_str = Byte_.To_str(repo_id);
Db_attach_mgr attach_mgr = new Db_attach_mgr(conn);
String sql = "";
Xob_db_file redirect_db = Xob_db_file.New__wiki_redirect(wiki.Fsys_mgr().Root_dir());
attach_mgr.Conn_links_(new Db_attach_itm("redirect_db", redirect_db.Conn()));
sql = String_.Concat_lines_nl_skip_last // ANSI.Y
( "INSERT INTO imglnk_reg (img_src, img_trg, img_repo, img_count)"
, "SELECT ilt.img_name, r.trg_ttl, " + repo_id_str + ", Count(ilt.img_name)"
, "FROM imglnk_tmp ilt"
, " JOIN <redirect_db>redirect r ON ilt.img_name = r.src_ttl"
, " LEFT JOIN imglnk_reg il ON il.img_src = ilt.img_name"
, "WHERE il.img_src IS NULL"
, "GROUP BY ilt.img_name"
);
attach_mgr.Exec_sql_w_msg("imglnk_reg.insert.redirect: repo=" + repo_id_str, sql);
Xob_db_file image_db = Xob_db_file.New__wiki_image(wiki.Fsys_mgr().Root_dir());
attach_mgr.Conn_links_(new Db_attach_itm("image_db", image_db.Conn()));
sql = String_.Concat_lines_nl_skip_last // ANSI.Y
( "INSERT INTO imglnk_reg (img_src, img_trg, img_repo, img_count)"
, "SELECT ilt.img_name, ilt.img_name, " + repo_id_str + ", Count(ilt.img_name)"
, "FROM imglnk_tmp ilt"
, " JOIN <image_db>image i ON i.img_name = ilt.img_name"
, " LEFT JOIN imglnk_reg il ON il.img_src = ilt.img_name"
, "WHERE il.img_src IS NULL"
, "GROUP BY ilt.img_name"
);
attach_mgr.Exec_sql_w_msg("imglnk_reg.insert.direct: repo=" + repo_id_str, sql);
}
public void Rls() {
select_by_ttl_stmt = Db_stmt_.Rls(select_by_ttl_stmt);
}
}

View File

@@ -13,3 +13,40 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.wmdumps.imglinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.wmdumps.*;
import gplx.dbs.*;
public class Imglnk_tmp_tbl implements Db_tbl {
private final String tbl_name = "imglnk_tmp"; private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
private final String fld__page_id, fld__img_name;
private final Db_conn conn;
private Db_stmt stmt__insert;
public Imglnk_tmp_tbl(Db_conn conn) {
this.conn = conn;
flds.Add_int_pkey_autonum("img_uid");
this.fld__page_id = flds.Add_int("page_id");
this.fld__img_name = flds.Add_str("img_name", 255);
flds.Add_int_dflt("img_wiki", -1);
flds.Add_int_dflt("img_id", -1);
conn.Rls_reg(this);
}
public Db_conn Conn() {return conn;}
public String Tbl_name() {return tbl_name;}
public void Create_tbl() {conn.Meta_tbl_create(Dbmeta_tbl_itm.New(tbl_name, flds));}
public void Insert_bgn() {
stmt__insert = conn.Stmt_insert(tbl_name, fld__page_id, fld__img_name);
conn.Txn_bgn(tbl_name);
}
public void Insert_by_batch(int page_id, byte[] img_name) {
stmt__insert.Clear().Val_int(fld__page_id, page_id).Val_bry_as_str(fld__img_name, img_name).Exec_insert();
}
public void Insert_end() {
conn.Txn_end();
stmt__insert.Rls();
}
public void Create_idx__img_ttl() {
conn.Meta_idx_create(tbl_name, fld__img_name, fld__img_name);
}
public void Rls() {
stmt__insert = Db_stmt_.Rls(stmt__insert);
}
}

View File

@@ -13,3 +13,62 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.wmdumps.imglinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.wmdumps.*;
import gplx.dbs.*; import gplx.xowa.bldrs.*;
import gplx.xowa.files.origs.*; import gplx.xowa.files.repos.*;
public class Xof_orig_wkr__img_links implements Xof_orig_wkr {
private final Hash_adp_bry hash = Hash_adp_bry.cs();
private Db_conn imglnk_conn;
public Xof_orig_wkr__img_links(Xowe_wiki wiki) {
this.wiki = wiki;
}
public byte Tid() {return Xof_orig_wkr_.Tid_xowa_img_links;}
public Xof_orig_itm Find_as_itm(byte[] ttl, int list_idx, int list_len) {
Xof_orig_itm rv = (Xof_orig_itm)hash.Get_by(ttl);
if (rv == Missing) return Xof_orig_itm.Null;
else if (rv == null) rv = Load_from_db(ttl);
return rv == Missing ? Xof_orig_itm.Null : rv;
}
public void Find_by_list(Ordered_hash rv, List_adp itms) {throw Err_.new_unimplemented();}
public boolean Add_orig(byte repo, byte[] page, int ext_id, int w, int h, byte[] redirect) {return false;}
public void Db_txn_save() {}
public void Db_rls() {}
public Xowe_wiki Wiki() {return wiki;} private final Xowe_wiki wiki;
public Imglnk_reg_tbl Tbl__imglnk_reg() {
if (tbl__imglnk_reg == null)
this.tbl__imglnk_reg = new Imglnk_reg_tbl(imglnk_conn);
return tbl__imglnk_reg;
} private Imglnk_reg_tbl tbl__imglnk_reg;
public Db_stmt Stmt__image__select(byte repo, Xowe_wiki wiki) {
Db_stmt rv = stmt__image__select[repo];
if (rv == null) {
rv = Make__stmt__image__select(repo, wiki);
stmt__image__select[repo] = rv;
}
return rv;
} private Db_stmt[] stmt__image__select = new Db_stmt[2];
private Db_stmt Make__stmt__image__select(byte repo, Xowe_wiki wiki) {
Xob_db_file image_db = Xob_db_file.New__wiki_image(wiki.Fsys_mgr().Root_dir());
return image_db.Conn().Stmt_select
( "image"
, String_.Ary("img_media_type", "img_minor_mime", "img_size", "img_width", "img_height", "img_bits", "img_ext_id", "img_timestamp")
, String_.Ary("img_name")
);
}
public void Add_by_db(Xof_orig_itm itm) {
hash.Add(itm.Ttl(), itm);
}
private Xof_orig_itm Load_from_db(byte[] ttl) {
synchronized (hash) { // LOCK:orig_wkr is shared by multiple threads; NullPointerException on statement sometimes when concurrent; DATE:2016-09-03
if (imglnk_conn == null)
imglnk_conn = Xob_db_file.New__img_link(wiki).Conn();
Xof_orig_itm rv = Xof_orig_wkr__img_links_.Load_itm(this, imglnk_conn, wiki, ttl);
if (rv == Xof_orig_itm.Null)
rv = Missing;
hash.Add(ttl, rv);
return rv;
}
}
private static final Xof_orig_itm Missing = new Xof_orig_itm(Byte_.Max_value_127, Bry_.Empty, -1, -1, -1, Bry_.Empty);
}

View File

@@ -13,3 +13,79 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.wmdumps.imglinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.wmdumps.*;
import gplx.dbs.*;
import gplx.xowa.bldrs.*;
import gplx.xowa.files.repos.*; import gplx.xowa.files.origs.*;
public class Xof_orig_wkr__img_links_ {
public static void Load_all(Xof_orig_wkr__img_links wkr) {
Xowe_wiki wiki = wkr.Wiki();
Db_conn conn = Xob_db_file.New__img_link(wiki).Conn();
Load_all_by_wiki(wkr, conn, Xof_repo_tid_.Tid__local , wiki);
Load_all_by_wiki(wkr, conn, Xof_repo_tid_.Tid__remote, wiki.Appe().Wiki_mgr().Wiki_commons());
}
public static Xof_orig_itm Load_itm(Xof_orig_wkr__img_links wkr, Db_conn conn, Xowe_wiki wiki, byte[] ttl) {
Imglnk_reg_tbl imglnk_reg_tbl = wkr.Tbl__imglnk_reg();
Db_rdr rdr = imglnk_reg_tbl.Select_by_ttl_stmt().Clear().Crt_bry_as_str("img_src", ttl).Exec_select__rls_manual();
byte img_repo = Byte_.Max_value_127;
byte[] img_trg = null;
try {
if (rdr.Move_next()) {
img_repo = rdr.Read_byte("img_repo");
img_trg = rdr.Read_bry_by_str("img_trg");
}
else // ttl missing; EX:</*_File:Chehov_v_serpuhove11.JPG; DATE:2016-08-10
return Xof_orig_itm.Null;
} finally {rdr.Rls();}
Xowe_wiki image_wiki = img_repo == Xof_repo_tid_.Tid__local ? wiki : wiki.Appe().Wiki_mgr().Wiki_commons();
return Load_itm_by_wiki(wkr, conn, image_wiki, img_repo, ttl, img_trg);
}
private static Xof_orig_itm Load_itm_by_wiki(Xof_orig_wkr__img_links wkr, Db_conn conn, Xowe_wiki wiki, byte repo_id, byte[] img_src, byte[] img_trg) {
Db_stmt stmt = wkr.Stmt__image__select(repo_id, wiki);
Db_rdr rdr = stmt.Clear().Crt_bry_as_str("img_name", img_trg).Exec_select__rls_manual();
try {
return rdr.Move_next()
? new Xof_orig_itm
( repo_id
, img_src // NOTE: was originally (incorrectly) img_trg; PAGE:en.v:Ani; DATE:2016-10-18
, rdr.Read_int("img_ext_id")
, rdr.Read_int("img_width")
, rdr.Read_int("img_height")
, img_trg // NOTE: was originally (incorrectly) img_src; PAGE:en.v:Ani; DATE:2016-10-18
)
: Xof_orig_itm.Null;
} finally {rdr.Rls();}
}
private static void Load_all_by_wiki(Xof_orig_wkr__img_links rv, Db_conn conn, byte repo_id, Xowe_wiki wiki) {
String sql = String_.Concat_lines_nl_skip_last // ANSI.Y
( "SELECT ilr.img_repo, ilr.img_src, i.img_media_type, i.img_minor_mime, i.img_size, i.img_width, i.img_height, i.img_bits, i.img_ext_id, i.img_timestamp, ilr.img_trg AS img_redirect"
, "FROM imglnk_reg ilr"
, " JOIN <img_db>image i ON ilr.img_trg = i.img_name"
, "WHERE ilr.img_repo = " + repo_id
);
Xob_db_file img_db = Xob_db_file.New__wiki_image(wiki.Fsys_mgr().Root_dir());
Db_attach_mgr attach_mgr = new Db_attach_mgr(conn, new Db_attach_itm("img_db", img_db.Conn()));
sql = attach_mgr.Resolve_sql(sql);
attach_mgr.Attach();
int count = 0;
Db_rdr rdr = conn.Stmt_sql(sql).Exec_select__rls_auto();
try {
while (rdr.Move_next()) {
rv.Add_by_db(new Xof_orig_itm
( rdr.Read_byte("img_repo")
, rdr.Read_bry_by_str("img_src")
, rdr.Read_int("img_ext_id")
, rdr.Read_int("img_width")
, rdr.Read_int("img_height")
, rdr.Read_bry_by_str("img_redirect")
));
if ((++count % 10000) == 0)
Gfo_usr_dlg_.Instance.Prog_many("", "", "loading img_links.orig: rows=~{0}", count);
}
} finally {rdr.Rls();}
attach_mgr.Detach();
}
}

View File

@@ -13,3 +13,15 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.wmdumps.pagelinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.wmdumps.*;
import gplx.xowa.bldrs.wkrs.*;
import gplx.xowa.addons.bldrs.wmdumps.pagelinks.bldrs.*;
public class Xoax_builds_pagelinks_addon implements Xoax_addon_itm, Xoax_addon_itm__bldr {
public Xob_cmd[] Bldr_cmds() {
return new Xob_cmd[]
{ Pglnk_bldr_cmd.Prototype
};
}
public String Addon__key() {return "xowa.builds.pagelinks";}
}

View File

@@ -13,3 +13,64 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.wmdumps.pagelinks.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.wmdumps.*; import gplx.xowa.addons.bldrs.wmdumps.pagelinks.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.sql_dumps.*;
import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.xowa.wikis.data.*; import gplx.xowa.addons.bldrs.wmdumps.pagelinks.dbs.*;
public class Pglnk_bldr_cmd extends Xob_sql_dump_base implements Xosql_dump_cbk {
private Db_conn conn;
private Pglnk_page_link_temp_tbl temp_tbl;
private int tmp_src_id, tmp_trg_ns; private byte[] tmp_trg_ttl;
private int rows = 0;
public Pglnk_bldr_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki); this.make_fil_len = Io_mgr.Len_mb;}
@Override public String Sql_file_name() {return Dump_type_key;} public static final String Dump_type_key = "pagelinks";
@Override protected Xosql_dump_parser New_parser() {return new Xosql_dump_parser(this, "pl_from", "pl_namespace", "pl_title");}
@Override public void Cmd_bgn_hook(Xob_bldr bldr, Xosql_dump_parser parser) {
wiki.Init_assert();
Xob_db_file page_link_db = Xob_db_file.New__page_link(wiki);
this.conn = page_link_db.Conn();
this.temp_tbl = new Pglnk_page_link_temp_tbl(conn);
conn.Meta_tbl_delete(temp_tbl.Tbl_name());
temp_tbl.Create_tbl();
temp_tbl.Insert_bgn();
}
@Override public void Cmd_end() {
if (fail) return;
temp_tbl.Insert_end();
temp_tbl.Create_idx();
Pglnk_page_link_tbl actl_tbl = new Pglnk_page_link_tbl(conn);
conn.Meta_tbl_delete(actl_tbl.Tbl_name());
actl_tbl.Create_tbl();
new Db_attach_mgr(conn, new Db_attach_itm("page_db", wiki.Data__core_mgr().Db__core().Conn()))
.Exec_sql_w_msg("updating page_link", Sql__page_link__make);
conn.Meta_tbl_delete(temp_tbl.Tbl_name());
actl_tbl.Create_idx__src_trg();
actl_tbl.Create_idx__trg_src();
conn.Env_vacuum();
}
public void On_fld_done(int fld_idx, byte[] src, int val_bgn, int val_end) {
switch (fld_idx) {
case Fld__pl_from: this.tmp_src_id = Bry_.To_int_or(src, val_bgn, val_end, -1); break;
case Fld__pl_namespace: this.tmp_trg_ns = Bry_.To_int_or(src, val_bgn, val_end, -1); break;
case Fld__pl_title: this.tmp_trg_ttl = Bry_.Mid(src, val_bgn, val_end); break;
}
}
public void On_row_done() {
temp_tbl.Insert(tmp_src_id, tmp_trg_ns, tmp_trg_ttl);
if (++rows % 100000 == 0) usr_dlg.Prog_many("", "", "reading row ~{0}", Int_.To_str_fmt(rows, "#,##0"));
}
private static final byte Fld__pl_from = 0, Fld__pl_namespace = 1, Fld__pl_title = 2;
private static final String Sql__page_link__make = String_.Concat_lines_nl_skip_last
( "INSERT INTO page_link (src_id, trg_id, trg_count)"
, "SELECT pl.src_id"
, ", p.page_id"
, ", Count(p.page_id)"
, "FROM page_link_temp pl"
, " JOIN <page_db>page p ON pl.trg_ns = p.page_namespace AND pl.trg_ttl = p.page_title"
, "GROUP BY pl.src_id, p.page_id"
);
public static final String BLDR_CMD_KEY = "wiki.page_link";
@Override public String Cmd_key() {return BLDR_CMD_KEY;}
public static final Xob_cmd Prototype = new Pglnk_bldr_cmd(null, null);
@Override public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Pglnk_bldr_cmd(bldr, wiki);}
}

View File

@@ -13,3 +13,23 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.wmdumps.pagelinks.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.wmdumps.*; import gplx.xowa.addons.bldrs.wmdumps.pagelinks.*;
import gplx.core.ios.*; import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.xowa.wikis.dbs.*; import gplx.dbs.cfgs.*;
public class Pglnk_page_link_tbl implements Rls_able {
private final String tbl_name = "page_link"; private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
private final String fld_src_id, fld_trg_id;
private final Db_conn conn;
public Pglnk_page_link_tbl(Db_conn conn) {
this.conn = conn;
fld_src_id = flds.Add_int("src_id");
fld_trg_id = flds.Add_int("trg_id");
flds.Add_int("trg_count");
conn.Rls_reg(this);
}
public Db_conn Conn() {return conn;}
public String Tbl_name() {return tbl_name;}
public void Create_tbl() {conn.Meta_tbl_create(Dbmeta_tbl_itm.New(tbl_name, flds));}
public void Create_idx__src_trg() {conn.Meta_idx_create(Gfo_usr_dlg_.Instance, Dbmeta_idx_itm.new_unique_by_tbl(tbl_name, "src_trg", fld_src_id, fld_trg_id));}
public void Create_idx__trg_src() {conn.Meta_idx_create(Gfo_usr_dlg_.Instance, Dbmeta_idx_itm.new_unique_by_tbl(tbl_name, "trg_src", fld_trg_id, fld_src_id));}
public void Rls() {}
}

View File

@@ -13,3 +13,31 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.wmdumps.pagelinks.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.wmdumps.*; import gplx.xowa.addons.bldrs.wmdumps.pagelinks.*;
import gplx.core.ios.*; import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.xowa.wikis.dbs.*; import gplx.dbs.cfgs.*;
public class Pglnk_page_link_temp_tbl implements Rls_able {
private final String tbl_name = "page_link_temp"; private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
private final String fld_src_id, fld_trg_ns, fld_trg_ttl;
private final Db_conn conn; private Db_stmt stmt_insert;
public Pglnk_page_link_temp_tbl(Db_conn conn) {
this.conn = conn;
flds.Add_int_pkey_autonum("uid");
fld_src_id = flds.Add_int("src_id");
fld_trg_ns = flds.Add_int("trg_ns");
fld_trg_ttl = flds.Add_str("trg_ttl", 255);
conn.Rls_reg(this);
}
public Db_conn Conn() {return conn;}
public String Tbl_name() {return tbl_name;}
public void Create_tbl() {conn.Meta_tbl_create(Dbmeta_tbl_itm.New(tbl_name, flds));}
public void Create_idx() {conn.Meta_idx_create(Gfo_usr_dlg_.Instance, Dbmeta_idx_itm.new_normal_by_tbl(tbl_name, "main", fld_src_id, fld_trg_ns, fld_trg_ttl));}
public void Insert_bgn() {conn.Txn_bgn("page_link__insert_bulk"); stmt_insert = conn.Stmt_insert(tbl_name, flds);}
public void Insert_end() {conn.Txn_end(); stmt_insert = Db_stmt_.Rls(stmt_insert);}
public void Insert(int src_id, int trg_ns, byte[] trg_ttl) {
if (stmt_insert == null) stmt_insert = conn.Stmt_insert(tbl_name, flds);
stmt_insert.Clear().Val_int(fld_src_id, src_id).Val_int(fld_trg_ns, trg_ns).Val_bry_as_str(fld_trg_ttl, trg_ttl).Exec_insert();
}
public void Rls() {
stmt_insert = Db_stmt_.Rls(stmt_insert);
}
}