mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
'v3.7.4.1'
This commit is contained in:
@@ -145,7 +145,7 @@ public abstract class Xob_dump_mgr_base extends Xob_itm_basic_base implements Xo
|
||||
byte[] page_src = page.Text();
|
||||
if (page_src != null) // some pages have no text; ignore them else null ref; PAGE: it.d:miercuri DATE:2015-12-05
|
||||
Exec_pg_itm_hook(ns_ord, ns, page, page_src);
|
||||
ctx.App().Utl__bfr_mkr().Clear_fail_check(); // make sure all bfrs are released
|
||||
ctx.Wiki().Utl__bfr_mkr().Clear_fail_check(); // make sure all bfrs are released
|
||||
if (ctx.Wiki().Cache_mgr().Tmpl_result_cache().Count() > 50000)
|
||||
ctx.Wiki().Cache_mgr().Tmpl_result_cache().Clear();
|
||||
++exec_count;
|
||||
@@ -159,7 +159,7 @@ public abstract class Xob_dump_mgr_base extends Xob_itm_basic_base implements Xo
|
||||
}
|
||||
catch (Exception exc) {
|
||||
bldr.Usr_dlg().Warn_many("", "", "parse failed: wiki=~{0} ttl=~{1} err=~{2}", wiki.Domain_str(), page.Ttl_full_db(), Err_.Message_gplx_log(exc));
|
||||
ctx.App().Utl__bfr_mkr().Clear();
|
||||
ctx.Wiki().Utl__bfr_mkr().Clear();
|
||||
this.Free();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,6 +69,7 @@ public abstract class Xob_categorylinks_base extends Xob_sql_dump_base implement
|
||||
@Override public void Cmd_end() {
|
||||
Xobdc_merger.Basic(bldr.Usr_dlg(), dump_url_gen, temp_dir.GenSubDir("sort"), sort_mem_len, Xoctg_link_sql_sorter.Instance, Io_line_rdr_key_gen_.noop, Make_sort_cmd(sql_parser));
|
||||
wiki.Html_mgr().Importing_ctgs_(Bool_.N);
|
||||
gplx.xowa.bldrs.wkrs.Xob_io_utl_.Delete_sql_files(wiki.Fsys_mgr().Root_dir(), this.Sql_file_name());
|
||||
}
|
||||
private static final byte[] Fld_cl_from = Bry_.new_a7("cl_from"), Fld_cl_to = Bry_.new_a7("cl_to"), Fld_cl_timestamp = Bry_.new_a7("cl_timestamp"), Fld_cl_collation = Bry_.new_a7("cl_collation"), Fld_cl_sortkey = Bry_.new_a7("cl_sortkey"), Fld_cl_type = Bry_.new_a7("cl_type");
|
||||
private static final byte[] Collation_uca = Bry_.new_a7("uca"), Sortkey_space = new byte[] {Byte_ascii.Space};
|
||||
|
||||
@@ -29,7 +29,7 @@ public abstract class Xob_ctg_v1_base extends Xob_itm_dump_base implements Xobd_
|
||||
public Ordered_hash Wkr_hooks() {return wkr_hooks;} private Ordered_hash wkr_hooks = Ordered_hash_.New_bry();
|
||||
public void Wkr_bgn(Xob_bldr bldr) {
|
||||
this.Init_dump(this.Wkr_key(), wiki.Tdb_fsys_mgr().Site_dir().GenSubDir(Xotdb_dir_info_.Name_category));
|
||||
Bry_bfr tmp_bfr = bldr.App().Utl__bfr_mkr().Get_b512();
|
||||
Bry_bfr tmp_bfr = wiki.Utl__bfr_mkr().Get_b512();
|
||||
Xol_lang_itm lang = wiki.Lang();
|
||||
wkr_hooks_add(tmp_bfr, lang.Ns_names());
|
||||
wkr_hooks_add(tmp_bfr, lang.Ns_aliases());
|
||||
|
||||
@@ -27,7 +27,7 @@ class Xob_ctg_v1_sql_make implements Io_make_cmd {
|
||||
private Gfo_fld_rdr fld_rdr = Gfo_fld_rdr.xowa_(); private Xob_tmp_wtr sql_wtr; private Gfo_usr_dlg usr_dlg; private boolean is_first = true;
|
||||
private byte[] prv_ctg_name = Bry_.Empty; private int prv_page_id = 0;
|
||||
private Xowe_wiki wiki; private Xodb_mgr_sql db_mgr; private int page_count = 0; private int progress_interval = 10000;
|
||||
private final Bry_fmtr fmtr = Bry_fmtr.new_("(~{page_id},'~{cat_name}','','','','','~{cat_type}')\n", "page_id", "cat_name", "cat_type");
|
||||
private final Bry_fmtr fmtr = Bry_fmtr.new_("(~{page_id},'~{cat_name}','','','','','~{cat_type}')\n", "page_id", "cat_name", "cat_type");
|
||||
public Xob_ctg_v1_sql_make(Xowe_wiki wiki) {this.wiki = wiki; db_mgr = wiki.Db_mgr_as_sql();}
|
||||
public Io_sort_cmd Make_dir_(Io_url v) {return this;} // ignore
|
||||
public void Sort_bgn() {
|
||||
@@ -57,10 +57,10 @@ class Xob_ctg_v1_sql_make implements Io_make_cmd {
|
||||
sql_wtr.Flush(usr_dlg);
|
||||
db_mgr.Category_version_update(true);
|
||||
}
|
||||
private static final byte[] Sql_hdr = Bry_.new_a7("INSERT INTO 'categorylinks' VALUES");
|
||||
public static final String Url_sql = "xowa_categorylinks.sql";
|
||||
private static final byte[] Sql_hdr = Bry_.new_a7("INSERT INTO 'categorylinks' VALUES");
|
||||
public static final String Url_sql = "xowa_categorylinks.sql";
|
||||
private static byte[] Escape_for_sql(Xowe_wiki wiki, byte[] bry) {
|
||||
Bry_bfr bfr = wiki.Appe().Utl__bfr_mkr().Get_b512();
|
||||
Bry_bfr bfr = wiki.Utl__bfr_mkr().Get_b512();
|
||||
int len = bry.length;
|
||||
boolean dirty = false;
|
||||
for (int i = 0; i < len; i++) {
|
||||
|
||||
@@ -38,12 +38,10 @@ public class Xoctg_hiddencat_parser_sql extends Xoctg_hiddencat_parser_base {
|
||||
tbl.Update_end();
|
||||
if (!Env_.Mode_testing()) // NOTE: do not delete when testing
|
||||
Io_mgr.Instance.DeleteDirDeep(wiki.Fsys_mgr().Tmp_dir()); // delete /wiki/wiki_name/tmp
|
||||
Io_url[] sql_files = Io_mgr.Instance.QueryDir_args(wiki.Fsys_mgr().Root_dir()).FilPath_("*.sql.gz").ExecAsUrlAry();
|
||||
int len = sql_files.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
Io_url sql_file = sql_files[i];
|
||||
Io_mgr.Instance.DeleteFil(sql_file);
|
||||
}
|
||||
Io_mgr.Instance.DeleteFil_args(wiki.Fsys_mgr().Root_dir().GenSubFil("xowa_categorylinks.sql")).MissingFails_off().Exec();
|
||||
|
||||
// cleanup; delete files;
|
||||
Io_url wiki_root_dir = wiki.Fsys_mgr().Root_dir();
|
||||
gplx.xowa.bldrs.wkrs.Xob_io_utl_.Delete_sql_files(wiki_root_dir, this.Sql_file_name());
|
||||
Io_mgr.Instance.DeleteFil_args(wiki_root_dir.GenSubFil("xowa_categorylinks.sql")).MissingFails_off().Exec();
|
||||
}
|
||||
}
|
||||
|
||||
60
400_xowa/src/gplx/xowa/bldrs/wkrs/Xob_io_utl_.java
Normal file
60
400_xowa/src/gplx/xowa/bldrs/wkrs/Xob_io_utl_.java
Normal file
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
public class Xob_io_utl_ {
|
||||
public static void Delete_sql_files(Io_url wiki_dir, String sql_file_name) {
|
||||
Delete_by_wildcard(wiki_dir, sql_file_name + ".sql", ".gz", ".sql");
|
||||
}
|
||||
public static void Delete_by_wildcard(Io_url dir, String name_pattern, String... ext_ary) {
|
||||
List_adp list = Find_by_wildcard(Io_mgr.Instance.QueryDir_args(dir).ExecAsUrlAry(), name_pattern, ext_ary);
|
||||
int len = list.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Io_url url = (Io_url)list.Get_at(i);
|
||||
Io_mgr.Instance.DeleteFil(url);
|
||||
}
|
||||
}
|
||||
public static Io_url Find_nth_by_wildcard_or_null(Io_url dir, String name_pattern, String... ext_ary) {
|
||||
return Find_nth_by_wildcard_or_null(Io_mgr.Instance.QueryDir_args(dir).ExecAsUrlAry(), name_pattern, ext_ary);
|
||||
}
|
||||
public static Io_url Find_nth_by_wildcard_or_null(Io_url[] fil_ary, String name_pattern, String... ext_ary) {
|
||||
List_adp list = Find_by_wildcard(fil_ary, name_pattern, ext_ary);
|
||||
int list_len = list.Len();
|
||||
return list_len == 0 ? null : (Io_url)list.Get_at(list_len - 1);
|
||||
}
|
||||
public static List_adp Find_by_wildcard(Io_url[] fil_ary, String name_pattern, String... ext_ary) {
|
||||
List_adp rv = List_adp_.New();
|
||||
|
||||
// create ext_hash
|
||||
Ordered_hash ext_hash = Ordered_hash_.New();
|
||||
for (String ext : ext_ary)
|
||||
ext_hash.Add(ext, ext);
|
||||
|
||||
// iterate fil_ary
|
||||
for (Io_url fil : fil_ary) {
|
||||
// file matches pattern
|
||||
if ( name_pattern == Pattern__wilcard // empty String means match anything
|
||||
|| String_.Has(fil.NameAndExt(), name_pattern)) { // name has name_pattern; EX: "enwiki-latest-pages-articles-current.xml" and "pagelinks"
|
||||
if ( ext_hash.Len() == 0 // empty hash means match any ext
|
||||
|| ext_hash.Has(fil.Ext())) // ext exists in hash
|
||||
rv.Add(fil);
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static final String Pattern__wilcard = String_.Empty;
|
||||
}
|
||||
43
400_xowa/src/gplx/xowa/bldrs/wkrs/Xob_io_utl__tst.java
Normal file
43
400_xowa/src/gplx/xowa/bldrs/wkrs/Xob_io_utl__tst.java
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xob_io_utl__tst {
|
||||
private final Xob_io_utl__fxt fxt = new Xob_io_utl__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test__match(String_.Ary("a.txt", "b.txt", "c.txt"), "b", String_.Ary(".txt"), "b.txt");
|
||||
}
|
||||
@Test public void Include__ext() {// PURPOSE: handle calls like "a.sql", ".sql", ".gz"
|
||||
fxt.Test__match(String_.Ary("a.txt", "b.txt", "c.txt"), "b.txt", String_.Ary(".txt"), "b.txt");
|
||||
}
|
||||
@Test public void Dupe__pick_last() {
|
||||
fxt.Test__match(String_.Ary("b0.txt", "b1.txt", "b2.txt"), "b", String_.Ary(".txt"), "b2.txt");
|
||||
}
|
||||
@Test public void Ext() {
|
||||
fxt.Test__match(String_.Ary("b.txt", "b.png", "b.xml"), "b", String_.Ary(".xml", ".bz2"), "b.xml");
|
||||
}
|
||||
@Test public void Ext__dupes() {
|
||||
fxt.Test__match(String_.Ary("b.txt", "b.png", "b.xml"), "b", String_.Ary(".txt", ".xml"), "b.xml");
|
||||
}
|
||||
}
|
||||
class Xob_io_utl__fxt {
|
||||
public void Test__match(String[] path_ary, String name_pattern, String[] ext_ary, String expd) {
|
||||
Io_url actl = Xob_io_utl_.Find_nth_by_wildcard_or_null(Io_url_.Ary(path_ary), name_pattern, ext_ary);
|
||||
Gftest.Eq__str(expd, actl == null ? "<<NULL>>" : actl.Raw());
|
||||
}
|
||||
}
|
||||
@@ -29,7 +29,7 @@ public abstract class Xob_sql_dump_base extends Xob_itm_dump_base implements Xob
|
||||
this.Init_dump(this.Cmd_key());
|
||||
make_url_gen = Io_url_gen_.dir_(temp_dir.GenSubDir("make"));
|
||||
if (src_fil == null) {
|
||||
src_fil = Xotdb_fsys_mgr.Find_file_or_null(wiki.Fsys_mgr().Root_dir(), "*" + Sql_file_name() + "*", ".gz", ".sql");
|
||||
src_fil = Xob_io_utl_.Find_nth_by_wildcard_or_null(wiki.Fsys_mgr().Root_dir(), Sql_file_name() + ".sql", ".gz", ".sql");
|
||||
if (src_fil == null) {
|
||||
String msg = String_.Format(".sql file not found in dir.\nPlease download the file for your wiki from dumps.wikimedia.org.\nfile={0} dir={1}", Sql_file_name(), wiki.Fsys_mgr().Root_dir());
|
||||
app.Usr_dlg().Warn_many("", "", msg);
|
||||
|
||||
@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.bldrs.xmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.ios.*; import gplx.core.ios.streams.*; import gplx.core.envs.*;
|
||||
import gplx.xowa.wikis.ctgs.*; import gplx.xowa.wikis.tdbs.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
public class Xob_import_cfg {
|
||||
public Xob_import_cfg(Xowe_wiki wiki) {this.wiki = wiki;} private Xowe_wiki wiki; private boolean src_fil_is_bz2 = true;
|
||||
public byte Category_version() {return category_version;} public Xob_import_cfg Category_version_(byte v) {category_version = v; return this;} private byte category_version = Xoa_ctg_mgr.Version_1;
|
||||
@@ -34,7 +35,8 @@ public class Xob_import_cfg {
|
||||
}
|
||||
public Io_stream_rdr Src_rdr() {
|
||||
if (src_fil_xml == null && src_fil_bz2 == null) { // will usually be null; non-null when user specifies src through command-line
|
||||
Io_url url = Xotdb_fsys_mgr.Find_file_or_fail(wiki.Fsys_mgr().Root_dir(), "*", ".xml", ".bz2");
|
||||
Io_url url = Xob_io_utl_.Find_nth_by_wildcard_or_null(wiki.Fsys_mgr().Root_dir(), Xob_io_utl_.Pattern__wilcard, ".xml", ".bz2");
|
||||
if (url == null) throw Err_.new_wo_type("could not find any .xml or .bz2 file", "dir", wiki.Fsys_mgr().Root_dir().Raw());
|
||||
if (String_.Eq(url.Ext(), ".xml")) Src_fil_xml_(url);
|
||||
else Src_fil_bz2_(url);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user