From aaa8ebadd782e240ea5db5844c30704e335d89fd Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Sun, 9 Apr 2017 22:08:54 -0400 Subject: [PATCH] Download_central: Add support for wikitext downloads --- 400_xowa/src/gplx/xowa/Xoa_app_.java | 4 ++-- .../xowa/addons/bldrs/centrals/mgrs/Xobc_skip_mgr.java | 1 + .../addons/bldrs/centrals/tasks/Xobc_task_regy__work.java | 8 ++++---- .../addons/bldrs/exports/packs/files/Pack_file_mgr.java | 4 ++-- .../xowa/addons/bldrs/exports/packs/files/Pack_hash.java | 2 ++ .../xowa/addons/bldrs/exports/packs/files/Pack_list.java | 7 +++++-- 400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser.java | 4 ++++ 7 files changed, 20 insertions(+), 10 deletions(-) diff --git a/400_xowa/src/gplx/xowa/Xoa_app_.java b/400_xowa/src/gplx/xowa/Xoa_app_.java index 1ba7bc9ca..e8d0988d9 100644 --- a/400_xowa/src/gplx/xowa/Xoa_app_.java +++ b/400_xowa/src/gplx/xowa/Xoa_app_.java @@ -30,8 +30,8 @@ public class Xoa_app_ { } } public static final String Name = "xowa"; - public static final int Version_id = 523; - public static final String Version = "4.5.2.1703"; + public static final int Version_id = 524; + public static final String Version = "4.5.3.1703"; public static String Build_date = "2012-12-30 00:00:00"; public static String Build_date_fmt = "yyyy-MM-dd HH:mm:ss"; public static String Op_sys_str; diff --git a/400_xowa/src/gplx/xowa/addons/bldrs/centrals/mgrs/Xobc_skip_mgr.java b/400_xowa/src/gplx/xowa/addons/bldrs/centrals/mgrs/Xobc_skip_mgr.java index c904292da..b2253d45c 100644 --- a/400_xowa/src/gplx/xowa/addons/bldrs/centrals/mgrs/Xobc_skip_mgr.java +++ b/400_xowa/src/gplx/xowa/addons/bldrs/centrals/mgrs/Xobc_skip_mgr.java @@ -28,6 +28,7 @@ public class Xobc_skip_mgr implements Gfo_invk { } public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) { if (ctx.Match(k, Cfg__namespaces_category)) category_enabled = m.ReadBool("v"); + else if (ctx.Match(k, Cfg__namespaces_category)) category_enabled = m.ReadBool("v"); else return Gfo_invk_.Rv_unhandled; return this; } diff --git a/400_xowa/src/gplx/xowa/addons/bldrs/centrals/tasks/Xobc_task_regy__work.java b/400_xowa/src/gplx/xowa/addons/bldrs/centrals/tasks/Xobc_task_regy__work.java index 966924b0c..94b53b2cd 100644 --- a/400_xowa/src/gplx/xowa/addons/bldrs/centrals/tasks/Xobc_task_regy__work.java +++ b/400_xowa/src/gplx/xowa/addons/bldrs/centrals/tasks/Xobc_task_regy__work.java @@ -47,10 +47,10 @@ public class Xobc_task_regy__work extends Xobc_task_regy__base { } public void Run_task(Xobc_task_itm task, Xobc_cmd_itm cmd) { // if task marked for skip, launch skip-cmd on separate thread and exit; -// if (task_mgr.Skip_mgr().Should_skip(task_mgr.Data_db().Tbl__import_step().Select_one(cmd.Step_id()))) { -// thread_mgr.Add("skip_" + cmd.Cmd_uid(), Thread_adp_.Start_by_key("skip_xobc: " + cmd.Cmd_name(), new Xobc_task_skip(this, cmd), "")); -// return; -// } + if (task_mgr.Skip_mgr().Should_skip(task_mgr.Data_db().Tbl__import_step().Select_one(cmd.Step_id()))) { + thread_mgr.Add("skip_" + cmd.Cmd_uid(), Thread_adp_.Start_by_key("skip_xobc: " + cmd.Cmd_name(), new Xobc_task_skip(this, cmd), "")); + return; + } task.Task_status_(gplx.core.progs.Gfo_prog_ui_.Status__working); task_mgr.Send_json("xo.bldr.work.prog__start__recv", task.Save_to(Gfobj_nde.New())); diff --git a/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_file_mgr.java b/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_file_mgr.java index 740fb07a0..c02fde03e 100644 --- a/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_file_mgr.java +++ b/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_file_mgr.java @@ -49,12 +49,12 @@ public class Pack_file_mgr { } // build tasks - if (cfg.Pack_text()) // right now, only for wikidata - Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__text, Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__wiki__text, Xobc_import_type.Tid__wiki__ctg, Xobc_import_type.Tid__wiki__wbase); if (cfg.Pack_html()) Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__html, Xobc_import_type.Tid__wiki__core, Xobc_import_type.Tid__wiki__srch, Xobc_import_type.Tid__wiki__html, Xobc_import_type.Tid__wiki__ctg, Xobc_import_type.Tid__wiki__lucene); if (cfg.Pack_file()) Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__file, Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__file__data); // , Xobc_import_type.Tid__fsdb__delete + if (cfg.Pack_text()) // right now, only for wikidata + Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__text, Xobc_import_type.Tid__wiki__text, Xobc_import_type.Tid__wiki__wbase); // , Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__wiki__ctg if (cfg.Pack_custom()) Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, cfg.Pack_custom_name(), Xobc_import_type.Tid__misc); bc_conn.Txn_end(); diff --git a/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_hash.java b/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_hash.java index 951f825d5..8311d9c4a 100644 --- a/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_hash.java +++ b/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_hash.java @@ -27,6 +27,8 @@ class Pack_hash { hash.Add(list_tid, list); } Pack_itm itm = new Pack_itm(list_tid, pack_url, raw_urls); + if (list.Has(pack_url)) + return itm; list.Add(itm); return itm; } diff --git a/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_list.java b/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_list.java index 9885d165f..d9f953427 100644 --- a/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_list.java +++ b/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_list.java @@ -15,11 +15,14 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.addons.bldrs.exports.packs.files; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.exports.*; import gplx.xowa.addons.bldrs.exports.packs.*; class Pack_list { - private final List_adp list = List_adp_.New(); + private final Ordered_hash list = Ordered_hash_.New(); public Pack_list(int tid) {this.tid = tid;} public int Tid() {return tid;} private final int tid; public int Len() {return list.Len();} public Pack_itm Get_at(int i) {return (Pack_itm)list.Get_at(i);} - public void Add(Pack_itm itm) {list.Add(itm);} + public void Add(Pack_itm itm) {list.Add(itm.Zip_url().Raw(), itm);} public void Clear() {list.Clear();} + public boolean Has(Io_url url) { + return list.Has(url.Raw()); + } } diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser.java index 799e6ce2b..de68bc806 100644 --- a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser.java +++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_parser.java @@ -29,7 +29,11 @@ public class Mwh_doc_parser { public void Parse(Mwh_doc_wkr wkr, byte[] src, int src_bgn, int src_end) { this.wkr = wkr; this.src = src; this.src_end = src_end; this.nde_regy = wkr.Nde_regy(); + + // clear nde_stack.Clear(); + dom_mgr.Clear(); // must clear, or NegativeArraySizeException during mass_parse; DATE:2017-04-09 + int pos = txt_bgn = src_bgn; nde_uid = cur_nde_tid = -1; cur_nde = null;