diff --git a/400_xowa/src/gplx/xowa/addons/Xoax_addon_mgr.java b/400_xowa/src/gplx/xowa/addons/Xoax_addon_mgr.java
index cc3f0632e..5186a1ae3 100644
--- a/400_xowa/src/gplx/xowa/addons/Xoax_addon_mgr.java
+++ b/400_xowa/src/gplx/xowa/addons/Xoax_addon_mgr.java
@@ -47,6 +47,7 @@ public class Xoax_addon_mgr {
, new gplx.xowa.addons.wikis.pages.randoms .Rndm_addon()
, new gplx.xowa.addons.bldrs.hdumps.diffs .Dumpdiff_addon()
, new gplx.xowa.addons.wikis.ctgs.bldrs .Xoax_ctg_bldr_addon()
+ , new gplx.xowa.xtns.wbases.imports .Xowb_bldr_addon()
// specials
, new gplx.xowa.addons.wikis.registrys .Wiki_registry_addon()
diff --git a/400_xowa/src/gplx/xowa/bldrs/Xob_cmd_keys.java b/400_xowa/src/gplx/xowa/bldrs/Xob_cmd_keys.java
index bb1101587..f1d07bc3a 100644
--- a/400_xowa/src/gplx/xowa/bldrs/Xob_cmd_keys.java
+++ b/400_xowa/src/gplx/xowa/bldrs/Xob_cmd_keys.java
@@ -30,7 +30,6 @@ public class Xob_cmd_keys {
, Key_util_xml_dump = "util.xml_dump"
, Key_util_random = "util.random"
, Key_util_delete = "util.delete"
- , Key_wbase_json_dump = "wbase.json_dump"
, Key_wbase_qid = "wbase.qid" // "text.wdata.qid"
, Key_wbase_pid = "wbase.pid" // "text.wdata.pid"
, Key_wbase_db = "wbase.db" // "wiki.wdata_db"
diff --git a/400_xowa/src/gplx/xowa/bldrs/Xob_cmd_mgr.java b/400_xowa/src/gplx/xowa/bldrs/Xob_cmd_mgr.java
index 9c8d636aa..f3daed932 100644
--- a/400_xowa/src/gplx/xowa/bldrs/Xob_cmd_mgr.java
+++ b/400_xowa/src/gplx/xowa/bldrs/Xob_cmd_mgr.java
@@ -50,7 +50,6 @@ public class Xob_cmd_mgr implements Gfo_invk {
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_util_delete)) return Add(new Xob_delete_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_util_download)) return Add(new Xob_download_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_util_xml_dump)) return Add(new Xob_xml_dumper_cmd(bldr, wiki));
- else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_json_dump)) return Add(new Xob_wbase_json_dump_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_qid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_qid_sql().Ctor(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_pid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_pid_sql().Ctor(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_db)) return Add(new Xob_wdata_db_cmd(bldr, wiki));
diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wbase_json_dump_parser.java b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wbase_json_dump_parser.java
deleted file mode 100644
index 84daa37af..000000000
--- a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wbase_json_dump_parser.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
-XOWA: the XOWA Offline Wiki Application
-Copyright (C) 2012 gnosygnu@gmail.com
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as
-published by the Free Software Foundation, either version 3 of the
-License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see .
-*/
-package gplx.xowa.xtns.wbases.imports; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*;
-import gplx.core.ios.*; import gplx.core.ios.streams.*;
-import gplx.xowa.bldrs.*;
-import gplx.xowa.wikis.data.tbls.*;
-class Xob_wbase_json_dump_parser {
- private final Gfo_usr_dlg usr_dlg; private final Xoae_app app; private final Xob_bldr bldr; private final Xowe_wiki wiki;
- private final Xob_wbase_json_dump_db dump_db;
- private final Io_stream_unzip_mgr unzip_mgr;
- public Xob_wbase_json_dump_parser(Xob_bldr bldr, Xowe_wiki wiki) {
- this.bldr = bldr; this.wiki = wiki;
- this.app = bldr.App(); this.usr_dlg = app.Usr_dlg();
- this.dump_db = new Xob_wbase_json_dump_db(bldr, wiki);
- this.unzip_mgr = new Io_stream_unzip_mgr(app.Setup_mgr().Dump_mgr().Import_bz2_by_stdout(), app.Prog_mgr().App_decompress_bz2_by_stdout(), String_.Ary(".bz2", ".gz", ".zip"));
- }
- public void Parse(Io_url src_fil) {
- byte[] json_bgn = Bry_.new_a7("[\n"), id_bgn = Bry_.new_a7("{\"id\":");
- String prog_fmt = "reading ~{0} MB: ~{1} ~{2}";
- Io_stream_rdr stream_rdr = Io_stream_rdr_mgr.Get_rdr_or_null(src_fil, wiki.Fsys_mgr().Root_dir(), unzip_mgr, "*wikidata-*-all.json", "*wikidata-*-all.json.gz");
- if (stream_rdr == null) {usr_dlg.Warn_many("", "", "wbase.import:file not found: src_dir=~{0}", wiki.Fsys_mgr().Root_dir()); return;}
- Io_buffer_rdr buffer_rdr = Io_buffer_rdr.new_(stream_rdr, 10 * Io_mgr.Len_mb); long buffer_rdr_len = buffer_rdr.Fil_len();
- try {
- Io_url stream_rdr_url = stream_rdr.Url();
- int page_bgn = Bry_find_.Find_fwd(buffer_rdr.Bfr(), id_bgn);
- if (page_bgn == Bry_find_.Not_found) {usr_dlg.Warn_many("", "", "wbase.import:initial id not found: url=~{0}", stream_rdr_url.Raw()); return;}
- if (!Bry_.Match(buffer_rdr.Bfr(), 0, page_bgn, json_bgn)) {usr_dlg.Warn_many("", "", "wbase.import:doc_bgn is not '[\n': url=~{0}", stream_rdr_url.Raw()); return;}
- Xowd_page_itm page = new Xowd_page_itm();
- dump_db.Parse_bgn(stream_rdr.Len(), stream_rdr.Url().NameAndExt());
- while (true) {
- int cur_pos = Extract_page(page, buffer_rdr, page_bgn);
- if (cur_pos == -1) break;
- if (cur_pos < page_bgn)
- bldr.Print_prog_msg(buffer_rdr.Fil_pos(), buffer_rdr_len, 1, prog_fmt, Int_.To_str_pad_bgn_zero((int)(buffer_rdr.Fil_pos() / Io_mgr.Len_mb), Int_.DigitCount((int)(buffer_rdr.Fil_len() / Io_mgr.Len_mb))), "", page.Ttl_page_db());
- page_bgn = cur_pos;
- }
- dump_db.Parse_end();
- }
- catch (Exception e) {
- String msg = usr_dlg.Warn_many("", "", "dump_rdr:error while reading; url=~{0} err=~{1}", src_fil.Raw(), Err_.Message_lang(e));
- throw Err_.new_wo_type(msg);
- }
- finally {buffer_rdr.Rls();}
- }
- private int Extract_page(Xowd_page_itm page, Io_buffer_rdr rdr, int page_bgn) {
- int pos = page_bgn;
- byte[] bry = rdr.Bfr();
- int bry_len = rdr.Bfr_len();
- while (true) {
- if (pos == bry_len) {
- rdr.Bfr_load_from(page_bgn); // refill src from pos;
- bry_len = rdr.Bfr_len();
- pos -= page_bgn;
- page_bgn = 0;
- }
- byte b = Byte_.Zero;
- boolean exit = false;
- if (pos < bry_len)
- b = bry[pos];
- else {
- b = Byte_ascii.Nl;
- pos = bry_len;
- exit = true;
- }
- if (b == Byte_ascii.Nl) {
- byte[] json_bry = Bry_.Mid(bry, page_bgn, pos);
- if (json_bry.length == 1 && json_bry[0] == Byte_ascii.Brack_end) return -1;
- if (exit) return -1;
- dump_db.Parse_cmd(json_bry);
- return pos + 1;
- }
- ++pos;
- }
- }
-}
diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xowb_bldr_addon.java b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xowb_bldr_addon.java
new file mode 100644
index 000000000..7ac33f3f3
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xowb_bldr_addon.java
@@ -0,0 +1,29 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.xtns.wbases.imports; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*;
+import gplx.xowa.addons.*;
+import gplx.xowa.bldrs.wkrs.*;
+public class Xowb_bldr_addon implements Xoax_addon_itm, Xoax_addon_itm__bldr {
+ public Xob_cmd[] Bldr_cmds() {
+ return new Xob_cmd[]
+ { gplx.xowa.xtns.wbases.imports.json.Xowb_json_dump_cmd.Prototype
+ };
+ }
+
+ public String Addon__key() {return "xowa.builds.wikibase";}
+}
diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Io_stream_rdr_mgr.java b/400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Io_stream_rdr_mgr.java
similarity index 93%
rename from 400_xowa/src/gplx/xowa/xtns/wbases/imports/Io_stream_rdr_mgr.java
rename to 400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Io_stream_rdr_mgr.java
index f33f0b301..e410275fa 100644
--- a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Io_stream_rdr_mgr.java
+++ b/400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Io_stream_rdr_mgr.java
@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-package gplx.xowa.xtns.wbases.imports; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*;
+package gplx.xowa.xtns.wbases.imports.json; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.imports.*;
import gplx.core.ios.*; import gplx.core.ios.streams.*; import gplx.core.criterias.*; import gplx.core.envs.*;
class Io_stream_rdr_mgr {
public static Io_stream_rdr Get_rdr_or_null(Io_url src_fil, Io_url src_dir, Io_stream_unzip_mgr unzip_mgr, String... filter_ary) {
diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wbase_json_dump_cmd.java b/400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Xowb_json_dump_cmd.java
similarity index 51%
rename from 400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wbase_json_dump_cmd.java
rename to 400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Xowb_json_dump_cmd.java
index fce46d25c..5d3a854d4 100644
--- a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wbase_json_dump_cmd.java
+++ b/400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Xowb_json_dump_cmd.java
@@ -15,24 +15,26 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-package gplx.xowa.xtns.wbases.imports; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*;
+package gplx.xowa.xtns.wbases.imports.json; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.imports.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
-public class Xob_wbase_json_dump_cmd implements Xob_cmd {
- private final Xob_wbase_json_dump_parser json_dump_parser;
+public class Xowb_json_dump_cmd extends Xob_cmd__base {
+ private final Xowb_json_dump_parser json_dump_parser;
private Io_url src_fil;
- public Xob_wbase_json_dump_cmd(Xob_bldr bldr, Xowe_wiki wiki) {
- this.json_dump_parser = new Xob_wbase_json_dump_parser(bldr, wiki);
+ public Xowb_json_dump_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);
+ this.json_dump_parser = new Xowb_json_dump_parser(bldr, wiki);
}
- public String Cmd_key() {return Xob_cmd_keys.Key_wbase_json_dump;}
- public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
- public void Cmd_run() {json_dump_parser.Parse(src_fil);}
- public void Cmd_init(Xob_bldr bldr) {}
- public void Cmd_bgn(Xob_bldr bldr) {}
- public void Cmd_end() {}
- public void Cmd_term() {}
- public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
+ @Override public void Cmd_run() {
+ json_dump_parser.Parse(src_fil);
+ }
+
+ @Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_src_fil_)) this.src_fil = m.ReadIoUrl("v");
else return Gfo_invk_.Rv_unhandled;
return this;
} private static final String Invk_src_fil_ = "src_fil_";
+
+ public static final String BLDR_CMD_KEY = "wbase.json_dump";
+ @Override public String Cmd_key() {return BLDR_CMD_KEY;}
+ public static final Xob_cmd Prototype = new Xowb_json_dump_cmd(null, null);
+ @Override public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Xowb_json_dump_cmd(bldr, wiki);}
}
diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wbase_json_dump_db.java b/400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Xowb_json_dump_db.java
similarity index 68%
rename from 400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wbase_json_dump_db.java
rename to 400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Xowb_json_dump_db.java
index 5cbcce7bd..bdce66c3c 100644
--- a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wbase_json_dump_db.java
+++ b/400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Xowb_json_dump_db.java
@@ -15,61 +15,69 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-package gplx.xowa.xtns.wbases.imports; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*;
+package gplx.xowa.xtns.wbases.imports.json; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.imports.*;
import gplx.core.ios.*;
import gplx.langs.jsons.*;
-import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.sqls.*;
-import gplx.xowa.wikis.nss.*;
-import gplx.xowa.wikis.*; import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
-import gplx.xowa.apps.apis.xowa.bldrs.imports.*;
-import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.parsers.*;
-class Xob_wbase_json_dump_db {
- private final Gfo_usr_dlg usr_dlg; private final Xoae_app app; private final Xowe_wiki wiki; private final Xob_bldr bldr;
+import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.sqls.*; import gplx.xowa.apps.apis.xowa.bldrs.imports.*;
+import gplx.xowa.wikis.*; import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
+import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.parsers.*;
+class Xowb_json_dump_db {
+ private final Xoae_app app; private final Gfo_usr_dlg usr_dlg; private final Xowe_wiki wiki; private final Xob_bldr bldr;
private final Json_parser json_parser;
private final Xob_wdata_pid_sql pid_cmd = new Xob_wdata_pid_sql(); private final Xob_wdata_qid_sql qid_cmd = new Xob_wdata_qid_sql();
- private Xowd_page_tbl page_tbl;
- private Xob_ns_to_db_mgr ns_to_db_mgr;
- private DateAdp page_modified_on;
- private Xow_db_mgr db_mgr;
- private Xowd_page_tbl page_core_tbl;
+ private Xow_ns_mgr ns_mgr; private Xow_db_mgr db_mgr;
+ private Xowd_page_tbl page_tbl; private Xob_ns_to_db_mgr ns_to_db_mgr;
private Io_stream_zip_mgr text_zip_mgr; private byte text_zip_tid;
- private Xow_ns_mgr ns_mgr;
- public Xob_wbase_json_dump_db(Xob_bldr bldr, Xowe_wiki wiki) {
+ private DateAdp page_modified_on;
+ private int page_id = 0, page_count_main = 0;
+ public Xowb_json_dump_db(Xob_bldr bldr, Xowe_wiki wiki) {
this.app = bldr.App(); this.usr_dlg = app.Usr_dlg(); this.wiki = wiki; this.bldr = bldr;
this.json_parser = bldr.App().Wiki_mgr().Wdata_mgr().Jdoc_parser();
this.ns_mgr = wiki.Ns_mgr();
}
- public void Parse_bgn(long src_fil_len, String src_fil_name) {
+ public void Parse_all_bgn(long src_fil_len, String src_fil_name) {
+ // load wiki
Xowe_wiki_.Create(wiki, src_fil_len, src_fil_name);
this.db_mgr = wiki.Data__core_mgr();
this.page_tbl = db_mgr.Tbl__page();
pid_cmd.Cmd_ctor(bldr, wiki); qid_cmd.Cmd_ctor(bldr, wiki);
+
+ // create ns_mgr
wiki.Ns_mgr().Add_defaults();
wiki.Ns_mgr().Add_new(Wdata_wiki_mgr.Ns_property, Wdata_wiki_mgr.Ns_property_name);
wiki.Ns_mgr().Init();
+
+ // init ns_map
Xoapi_import import_cfg = app.Api_root().Bldr().Wiki().Import();
this.ns_to_db_mgr = new Xob_ns_to_db_mgr(new Xob_ns_to_db_wkr__text(), db_mgr, import_cfg.Text_db_max());
- this.text_zip_mgr = wiki.Utl__zip_mgr(); text_zip_tid = import_cfg.Zip_tid_text();
byte[] ns_file_map = import_cfg.New_ns_file_map(src_fil_len);
Xob_ns_file_itm.Init_ns_bldr_data(Xow_db_file_.Tid__text, wiki.Ns_mgr(), ns_file_map);
+
+ // start import
+ this.text_zip_mgr = wiki.Utl__zip_mgr(); this.text_zip_tid = import_cfg.Zip_tid_text();
this.page_modified_on = Datetime_now.Get();
- this.page_core_tbl = db_mgr.Tbl__page();
page_tbl.Insert_bgn();
qid_cmd.Page_wkr__bgn();
pid_cmd.Pid_bgn();
}
- private int page_id = 0, page_count_main = 0;
- public void Parse_cmd(byte[] json_bry) {
+ public void Parse_doc(byte[] json_bry) {
+ // parse to jdoc
Json_doc jdoc = json_parser.Parse(json_bry);
if (jdoc == null) {usr_dlg.Warn_many("", "", "wbase.json_dump:json is invalid: json=~{0}", json_bry); return;}
- byte[] id = jdoc.Get_val_as_bry_or(id_key, null);
+
+ // extract xid
+ byte[] id = jdoc.Get_val_as_bry_or(Bry__id_key, null);
if (id == null) {usr_dlg.Warn_many("", "", "wbase.json_dump:id is invalid: json=~{0}", json_bry); return;}
boolean jdoc_is_qid = Bry_.Has_at_bgn(id, Byte_ascii.Ltr_Q, 0);
Xow_ns ns = jdoc_is_qid ? ns_mgr.Ns_main() : ns_mgr.Ids_get_or_null(Wdata_wiki_mgr.Ns_property);
+
+ // create page entry
int random_int = ns.Count() + 1; ns.Count_(random_int);
byte[] json_zip = text_zip_mgr.Zip(text_zip_tid, json_bry);
Xow_db_file text_db = ns_to_db_mgr.Get_by_ns(ns.Bldr_data(), json_zip.length);
- db_mgr.Create_page(page_core_tbl, text_db.Tbl__text(), ++page_id, ns.Id(), id, Bool_.N, page_modified_on, json_zip, json_bry.length, random_int, text_db.Id(), -1);
+ db_mgr.Create_page(page_tbl, text_db.Tbl__text(), ++page_id, ns.Id(), id, Bool_.N, page_modified_on, json_zip, json_bry.length, random_int, text_db.Id(), -1);
+
+ // insert text
if (jdoc_is_qid) {
qid_cmd.Parse_jdoc(jdoc);
++page_count_main;
@@ -77,16 +85,18 @@ class Xob_wbase_json_dump_db {
else
pid_cmd.Parse_jdoc(jdoc);
}
- public void Parse_end() {
+ public void Parse_all_end() {
page_tbl.Insert_end();
page_tbl.Create_idx();
qid_cmd.Qid_end();
pid_cmd.Pid_end();
ns_to_db_mgr.Rls_all();
+
+ // cleanup core
Xow_db_file db_core = db_mgr.Db__core();
db_core.Tbl__site_stats().Update(page_count_main, page_id, ns_mgr.Ns_file().Count()); // save page stats
- db_core.Tbl__ns().Insert(ns_mgr); // save ns
+ db_core.Tbl__ns().Insert(ns_mgr); // save ns
db_mgr.Tbl__cfg().Insert_str(Xow_cfg_consts.Grp__wiki_init, Xow_cfg_consts.Key__init__modified_latest, page_modified_on.XtoStr_fmt(DateAdp_.Fmt_iso8561_date_time));
}
- private static final byte[] id_key = Bry_.new_a7("id");
+ private static final byte[] Bry__id_key = Bry_.new_a7("id");
}
diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Xowb_json_dump_parser.java b/400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Xowb_json_dump_parser.java
new file mode 100644
index 000000000..9c7ea0af9
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Xowb_json_dump_parser.java
@@ -0,0 +1,87 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.xtns.wbases.imports.json; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.imports.*;
+import gplx.core.ios.*; import gplx.core.ios.streams.*;
+import gplx.xowa.bldrs.*;
+import gplx.xowa.wikis.data.tbls.*;
+class Xowb_json_dump_parser {
+ private final Xob_bldr bldr; private final Xowe_wiki wiki;
+ public Xowb_json_dump_parser(Xob_bldr bldr, Xowe_wiki wiki) {
+ this.bldr = bldr; this.wiki = wiki;
+ }
+ public void Parse(Io_url json_dump_file) {
+ // init
+ Xoae_app app = bldr.App(); Gfo_usr_dlg usr_dlg = app.Usr_dlg();
+ Xowb_json_dump_db dump_db = new Xowb_json_dump_db(bldr, wiki);
+ Io_stream_unzip_mgr unzip_mgr = new Io_stream_unzip_mgr(app.Setup_mgr().Dump_mgr().Import_bz2_by_stdout(), app.Prog_mgr().App_decompress_bz2_by_stdout(), String_.Ary(".bz2", ".gz", ".zip"));
+
+ // open buffer from file
+ Io_stream_rdr stream = Io_stream_rdr_mgr.Get_rdr_or_null(json_dump_file, wiki.Fsys_mgr().Root_dir(), unzip_mgr, "*wikidata-*-all.json", "*wikidata-*-all.json.gz");
+ if (stream == null) {usr_dlg.Warn_many("", "", "wbase.import:file not found: src_dir=~{0}", wiki.Fsys_mgr().Root_dir()); return;}
+ Io_buffer_rdr buffer = Io_buffer_rdr.new_(stream, 10 * Io_mgr.Len_mb);
+
+ try {
+ // set page_bgn
+ if (!Bry_.Match(buffer.Bfr(), 0, 3, Bry_.new_a7("[\n{"))) {usr_dlg.Warn_many("", "", "wbase.import:doc_bgn is not '[\n': url=~{0}", stream.Url().Raw()); return;} // validate file; if schema ever changes this will fail
+ int page_bgn = 2; // 2="[\n"
+
+ // read file and create pages for each json item
+ dump_db.Parse_all_bgn(stream.Len(), stream.Url().NameAndExt());
+ Xowd_page_itm page = new Xowd_page_itm();
+ while (true) {
+ int cur_pos = Parse_doc(dump_db, buffer, page, page_bgn);
+ if (cur_pos == -1) break;
+ if (cur_pos < page_bgn)
+ bldr.Print_prog_msg(buffer.Fil_pos(), buffer.Fil_len(), 1, "reading ~{0} MB: ~{1} ~{2}", Int_.To_str_pad_bgn_zero((int)(buffer.Fil_pos() / Io_mgr.Len_mb), Int_.DigitCount((int)(buffer.Fil_len() / Io_mgr.Len_mb))), "", page.Ttl_page_db());
+ page_bgn = cur_pos;
+ }
+ dump_db.Parse_all_end();
+ }
+ catch (Exception e) {
+ String msg = usr_dlg.Warn_many("", "", "dump_rdr:error while reading; url=~{0} err=~{1}", json_dump_file.Raw(), Err_.Message_lang(e));
+ throw Err_.new_wo_type(msg);
+ }
+ finally {buffer.Rls();}
+ }
+ private int Parse_doc(Xowb_json_dump_db dump_db, Io_buffer_rdr rdr, Xowd_page_itm page, int page_bgn) {
+ // init
+ int pos = page_bgn;
+ byte[] bry = rdr.Bfr();
+ int bry_len = rdr.Bfr_len();
+
+ while (true) {// loop 1 byte at a time until nl
+ if (pos == bry_len) { // refill if at end of 10 MB bfr
+ rdr.Bfr_load_from(page_bgn);
+ bry_len = rdr.Bfr_len();
+ pos -= page_bgn;
+ page_bgn = 0;
+ }
+
+ // read byte; parse if nl; otherwise move to next byte
+ byte b = bry[pos]; // NOTE: should never be out of bounds b/c json doc will end with "]\n"
+ if (b == Byte_ascii.Nl) {
+ if (pos - page_bgn == 1 && bry[page_bgn] == Byte_ascii.Brack_end) // EOF; note that json dump ends with "]\n"
+ return -1;
+ dump_db.Parse_doc(Bry_.Mid(bry, page_bgn, pos));
+ return pos + 1;
+ }
+ else
+ ++pos;
+ }
+ }
+}