1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

'v3.7.4.1'

This commit is contained in:
gnosygnu
2016-07-25 21:59:51 -04:00
parent 7a851a41a5
commit 8e91ac0bc4
175 changed files with 2079 additions and 933 deletions

View File

@@ -145,7 +145,7 @@ public abstract class Xob_dump_mgr_base extends Xob_itm_basic_base implements Xo
byte[] page_src = page.Text();
if (page_src != null) // some pages have no text; ignore them else null ref; PAGE: it.d:miercuri DATE:2015-12-05
Exec_pg_itm_hook(ns_ord, ns, page, page_src);
ctx.App().Utl__bfr_mkr().Clear_fail_check(); // make sure all bfrs are released
ctx.Wiki().Utl__bfr_mkr().Clear_fail_check(); // make sure all bfrs are released
if (ctx.Wiki().Cache_mgr().Tmpl_result_cache().Count() > 50000)
ctx.Wiki().Cache_mgr().Tmpl_result_cache().Clear();
++exec_count;
@@ -159,7 +159,7 @@ public abstract class Xob_dump_mgr_base extends Xob_itm_basic_base implements Xo
}
catch (Exception exc) {
bldr.Usr_dlg().Warn_many("", "", "parse failed: wiki=~{0} ttl=~{1} err=~{2}", wiki.Domain_str(), page.Ttl_full_db(), Err_.Message_gplx_log(exc));
ctx.App().Utl__bfr_mkr().Clear();
ctx.Wiki().Utl__bfr_mkr().Clear();
this.Free();
}
}

View File

@@ -69,6 +69,7 @@ public abstract class Xob_categorylinks_base extends Xob_sql_dump_base implement
@Override public void Cmd_end() {
Xobdc_merger.Basic(bldr.Usr_dlg(), dump_url_gen, temp_dir.GenSubDir("sort"), sort_mem_len, Xoctg_link_sql_sorter.Instance, Io_line_rdr_key_gen_.noop, Make_sort_cmd(sql_parser));
wiki.Html_mgr().Importing_ctgs_(Bool_.N);
gplx.xowa.bldrs.wkrs.Xob_io_utl_.Delete_sql_files(wiki.Fsys_mgr().Root_dir(), this.Sql_file_name());
}
private static final byte[] Fld_cl_from = Bry_.new_a7("cl_from"), Fld_cl_to = Bry_.new_a7("cl_to"), Fld_cl_timestamp = Bry_.new_a7("cl_timestamp"), Fld_cl_collation = Bry_.new_a7("cl_collation"), Fld_cl_sortkey = Bry_.new_a7("cl_sortkey"), Fld_cl_type = Bry_.new_a7("cl_type");
private static final byte[] Collation_uca = Bry_.new_a7("uca"), Sortkey_space = new byte[] {Byte_ascii.Space};

View File

@@ -29,7 +29,7 @@ public abstract class Xob_ctg_v1_base extends Xob_itm_dump_base implements Xobd_
public Ordered_hash Wkr_hooks() {return wkr_hooks;} private Ordered_hash wkr_hooks = Ordered_hash_.New_bry();
public void Wkr_bgn(Xob_bldr bldr) {
this.Init_dump(this.Wkr_key(), wiki.Tdb_fsys_mgr().Site_dir().GenSubDir(Xotdb_dir_info_.Name_category));
Bry_bfr tmp_bfr = bldr.App().Utl__bfr_mkr().Get_b512();
Bry_bfr tmp_bfr = wiki.Utl__bfr_mkr().Get_b512();
Xol_lang_itm lang = wiki.Lang();
wkr_hooks_add(tmp_bfr, lang.Ns_names());
wkr_hooks_add(tmp_bfr, lang.Ns_aliases());

View File

@@ -27,7 +27,7 @@ class Xob_ctg_v1_sql_make implements Io_make_cmd {
private Gfo_fld_rdr fld_rdr = Gfo_fld_rdr.xowa_(); private Xob_tmp_wtr sql_wtr; private Gfo_usr_dlg usr_dlg; private boolean is_first = true;
private byte[] prv_ctg_name = Bry_.Empty; private int prv_page_id = 0;
private Xowe_wiki wiki; private Xodb_mgr_sql db_mgr; private int page_count = 0; private int progress_interval = 10000;
private final Bry_fmtr fmtr = Bry_fmtr.new_("(~{page_id},'~{cat_name}','','','','','~{cat_type}')\n", "page_id", "cat_name", "cat_type");
private final Bry_fmtr fmtr = Bry_fmtr.new_("(~{page_id},'~{cat_name}','','','','','~{cat_type}')\n", "page_id", "cat_name", "cat_type");
public Xob_ctg_v1_sql_make(Xowe_wiki wiki) {this.wiki = wiki; db_mgr = wiki.Db_mgr_as_sql();}
public Io_sort_cmd Make_dir_(Io_url v) {return this;} // ignore
public void Sort_bgn() {
@@ -57,10 +57,10 @@ class Xob_ctg_v1_sql_make implements Io_make_cmd {
sql_wtr.Flush(usr_dlg);
db_mgr.Category_version_update(true);
}
private static final byte[] Sql_hdr = Bry_.new_a7("INSERT INTO 'categorylinks' VALUES");
public static final String Url_sql = "xowa_categorylinks.sql";
private static final byte[] Sql_hdr = Bry_.new_a7("INSERT INTO 'categorylinks' VALUES");
public static final String Url_sql = "xowa_categorylinks.sql";
private static byte[] Escape_for_sql(Xowe_wiki wiki, byte[] bry) {
Bry_bfr bfr = wiki.Appe().Utl__bfr_mkr().Get_b512();
Bry_bfr bfr = wiki.Utl__bfr_mkr().Get_b512();
int len = bry.length;
boolean dirty = false;
for (int i = 0; i < len; i++) {

View File

@@ -38,12 +38,10 @@ public class Xoctg_hiddencat_parser_sql extends Xoctg_hiddencat_parser_base {
tbl.Update_end();
if (!Env_.Mode_testing()) // NOTE: do not delete when testing
Io_mgr.Instance.DeleteDirDeep(wiki.Fsys_mgr().Tmp_dir()); // delete /wiki/wiki_name/tmp
Io_url[] sql_files = Io_mgr.Instance.QueryDir_args(wiki.Fsys_mgr().Root_dir()).FilPath_("*.sql.gz").ExecAsUrlAry();
int len = sql_files.length;
for (int i = 0; i < len; i++) {
Io_url sql_file = sql_files[i];
Io_mgr.Instance.DeleteFil(sql_file);
}
Io_mgr.Instance.DeleteFil_args(wiki.Fsys_mgr().Root_dir().GenSubFil("xowa_categorylinks.sql")).MissingFails_off().Exec();
// cleanup; delete files;
Io_url wiki_root_dir = wiki.Fsys_mgr().Root_dir();
gplx.xowa.bldrs.wkrs.Xob_io_utl_.Delete_sql_files(wiki_root_dir, this.Sql_file_name());
Io_mgr.Instance.DeleteFil_args(wiki_root_dir.GenSubFil("xowa_categorylinks.sql")).MissingFails_off().Exec();
}
}

View File

@@ -0,0 +1,60 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
public class Xob_io_utl_ {
public static void Delete_sql_files(Io_url wiki_dir, String sql_file_name) {
Delete_by_wildcard(wiki_dir, sql_file_name + ".sql", ".gz", ".sql");
}
public static void Delete_by_wildcard(Io_url dir, String name_pattern, String... ext_ary) {
List_adp list = Find_by_wildcard(Io_mgr.Instance.QueryDir_args(dir).ExecAsUrlAry(), name_pattern, ext_ary);
int len = list.Len();
for (int i = 0; i < len; ++i) {
Io_url url = (Io_url)list.Get_at(i);
Io_mgr.Instance.DeleteFil(url);
}
}
public static Io_url Find_nth_by_wildcard_or_null(Io_url dir, String name_pattern, String... ext_ary) {
return Find_nth_by_wildcard_or_null(Io_mgr.Instance.QueryDir_args(dir).ExecAsUrlAry(), name_pattern, ext_ary);
}
public static Io_url Find_nth_by_wildcard_or_null(Io_url[] fil_ary, String name_pattern, String... ext_ary) {
List_adp list = Find_by_wildcard(fil_ary, name_pattern, ext_ary);
int list_len = list.Len();
return list_len == 0 ? null : (Io_url)list.Get_at(list_len - 1);
}
public static List_adp Find_by_wildcard(Io_url[] fil_ary, String name_pattern, String... ext_ary) {
List_adp rv = List_adp_.New();
// create ext_hash
Ordered_hash ext_hash = Ordered_hash_.New();
for (String ext : ext_ary)
ext_hash.Add(ext, ext);
// iterate fil_ary
for (Io_url fil : fil_ary) {
// file matches pattern
if ( name_pattern == Pattern__wilcard // empty String means match anything
|| String_.Has(fil.NameAndExt(), name_pattern)) { // name has name_pattern; EX: "enwiki-latest-pages-articles-current.xml" and "pagelinks"
if ( ext_hash.Len() == 0 // empty hash means match any ext
|| ext_hash.Has(fil.Ext())) // ext exists in hash
rv.Add(fil);
}
}
return rv;
}
public static final String Pattern__wilcard = String_.Empty;
}

View File

@@ -0,0 +1,43 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*; import gplx.core.tests.*;
public class Xob_io_utl__tst {
private final Xob_io_utl__fxt fxt = new Xob_io_utl__fxt();
@Test public void Basic() {
fxt.Test__match(String_.Ary("a.txt", "b.txt", "c.txt"), "b", String_.Ary(".txt"), "b.txt");
}
@Test public void Include__ext() {// PURPOSE: handle calls like "a.sql", ".sql", ".gz"
fxt.Test__match(String_.Ary("a.txt", "b.txt", "c.txt"), "b.txt", String_.Ary(".txt"), "b.txt");
}
@Test public void Dupe__pick_last() {
fxt.Test__match(String_.Ary("b0.txt", "b1.txt", "b2.txt"), "b", String_.Ary(".txt"), "b2.txt");
}
@Test public void Ext() {
fxt.Test__match(String_.Ary("b.txt", "b.png", "b.xml"), "b", String_.Ary(".xml", ".bz2"), "b.xml");
}
@Test public void Ext__dupes() {
fxt.Test__match(String_.Ary("b.txt", "b.png", "b.xml"), "b", String_.Ary(".txt", ".xml"), "b.xml");
}
}
class Xob_io_utl__fxt {
public void Test__match(String[] path_ary, String name_pattern, String[] ext_ary, String expd) {
Io_url actl = Xob_io_utl_.Find_nth_by_wildcard_or_null(Io_url_.Ary(path_ary), name_pattern, ext_ary);
Gftest.Eq__str(expd, actl == null ? "<<NULL>>" : actl.Raw());
}
}

View File

@@ -29,7 +29,7 @@ public abstract class Xob_sql_dump_base extends Xob_itm_dump_base implements Xob
this.Init_dump(this.Cmd_key());
make_url_gen = Io_url_gen_.dir_(temp_dir.GenSubDir("make"));
if (src_fil == null) {
src_fil = Xotdb_fsys_mgr.Find_file_or_null(wiki.Fsys_mgr().Root_dir(), "*" + Sql_file_name() + "*", ".gz", ".sql");
src_fil = Xob_io_utl_.Find_nth_by_wildcard_or_null(wiki.Fsys_mgr().Root_dir(), Sql_file_name() + ".sql", ".gz", ".sql");
if (src_fil == null) {
String msg = String_.Format(".sql file not found in dir.\nPlease download the file for your wiki from dumps.wikimedia.org.\nfile={0} dir={1}", Sql_file_name(), wiki.Fsys_mgr().Root_dir());
app.Usr_dlg().Warn_many("", "", msg);

View File

@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.bldrs.xmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.ios.*; import gplx.core.ios.streams.*; import gplx.core.envs.*;
import gplx.xowa.wikis.ctgs.*; import gplx.xowa.wikis.tdbs.*;
import gplx.xowa.bldrs.wkrs.*;
public class Xob_import_cfg {
public Xob_import_cfg(Xowe_wiki wiki) {this.wiki = wiki;} private Xowe_wiki wiki; private boolean src_fil_is_bz2 = true;
public byte Category_version() {return category_version;} public Xob_import_cfg Category_version_(byte v) {category_version = v; return this;} private byte category_version = Xoa_ctg_mgr.Version_1;
@@ -34,7 +35,8 @@ public class Xob_import_cfg {
}
public Io_stream_rdr Src_rdr() {
if (src_fil_xml == null && src_fil_bz2 == null) { // will usually be null; non-null when user specifies src through command-line
Io_url url = Xotdb_fsys_mgr.Find_file_or_fail(wiki.Fsys_mgr().Root_dir(), "*", ".xml", ".bz2");
Io_url url = Xob_io_utl_.Find_nth_by_wildcard_or_null(wiki.Fsys_mgr().Root_dir(), Xob_io_utl_.Pattern__wilcard, ".xml", ".bz2");
if (url == null) throw Err_.new_wo_type("could not find any .xml or .bz2 file", "dir", wiki.Fsys_mgr().Root_dir().Raw());
if (String_.Eq(url.Ext(), ".xml")) Src_fil_xml_(url);
else Src_fil_bz2_(url);
}