mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Html: Move get_elem_val to xo.elem
This commit is contained in:
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*;
|
||||
public class Xoi_cmd_dumpfile_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoi_cmd_dumpfile_fxt fxt = new Xoi_cmd_dumpfile_fxt();
|
||||
@Test public void Bz2__unzip() {
|
||||
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.xml.bz2", "", "unzip")
|
||||
.Test_domain("en.wikipedia.org")
|
||||
.Test_vals("mem/en.wikipedia.org/fil.xml.bz2", "mem/en.wikipedia.org/fil.xml", true)
|
||||
;
|
||||
}
|
||||
@Test public void Bz2__unzip__assert_xml_ext() { // xml ext relies on removing ".bz2" from ".xml.bz2"; if just ".bz2" add an ".xml"
|
||||
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.bz2", "", "unzip")
|
||||
.Test_vals("mem/en.wikipedia.org/fil.bz2", "mem/en.wikipedia.org/fil.xml", true)
|
||||
;
|
||||
}
|
||||
@Test public void Bz2__direct() {
|
||||
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.bz2", "", "")
|
||||
.Test_vals("mem/en.wikipedia.org/fil.bz2", null, false)
|
||||
;
|
||||
}
|
||||
@Test public void Xml__unzip_n() {
|
||||
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.xml", "", "")
|
||||
.Test_vals(null, "mem/en.wikipedia.org/fil.xml", false)
|
||||
;
|
||||
}
|
||||
@Test public void Xml__unzip_y() {
|
||||
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.xml", "", "")
|
||||
.Test_vals(null, "mem/en.wikipedia.org/fil.xml", false)
|
||||
;
|
||||
}
|
||||
}
|
||||
class Xoi_cmd_dumpfile_fxt {
|
||||
public void Clear() {
|
||||
dumpfile.Clear();
|
||||
} private Xoi_cmd_dumpfile dumpfile = new Xoi_cmd_dumpfile();
|
||||
public Xoi_cmd_dumpfile_fxt Exec_parse_msg(String url, String domain, String args) {
|
||||
GfoMsg m = GfoMsg_.new_parse_("").Add("url", url).Add("domain", domain).Add("args", args);
|
||||
dumpfile.Parse_msg(m);
|
||||
return this;
|
||||
}
|
||||
public Xoi_cmd_dumpfile_fxt Test_vals(String expd_bz2, String expd_xml, boolean expd_unzip) {
|
||||
Eq_url(expd_bz2, dumpfile.Bz2_url());
|
||||
Eq_url(expd_xml, dumpfile.Xml_url());
|
||||
Tfds.Eq(expd_unzip, dumpfile.Bz2_unzip());
|
||||
return this;
|
||||
}
|
||||
public Xoi_cmd_dumpfile_fxt Test_domain(String expd_domain) {
|
||||
Tfds.Eq(expd_domain, String_.new_u8(dumpfile.Domain()));
|
||||
return this;
|
||||
}
|
||||
private void Eq_url(String expd, Io_url actl) {
|
||||
if (expd == null && actl == null) return;
|
||||
else if (expd != null && actl != null) {
|
||||
Tfds.Eq(expd, actl.Raw());
|
||||
}
|
||||
else if (expd == null) throw Err_.new_wo_type("actl should be null", "expd", expd);
|
||||
else if (actl == null) throw Err_.new_wo_type("actl should not be null", "expd", expd);
|
||||
}
|
||||
}
|
||||
128
400_xowa/src/gplx/xowa/bldrs/installs/Xoi_cmd_wiki_tst.java
Normal file
128
400_xowa/src/gplx/xowa/bldrs/installs/Xoi_cmd_wiki_tst.java
Normal file
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*;
|
||||
import gplx.core.consoles.*;
|
||||
import gplx.core.brys.args.*; import gplx.core.threads.*; import gplx.xowa.bldrs.setups.maints.*; import gplx.xowa.xtns.wbases.imports.*;
|
||||
import gplx.xowa.wikis.domains.*;
|
||||
import gplx.xowa.bldrs.wms.*; import gplx.xowa.bldrs.wms.dumps.*;
|
||||
public class Xoi_cmd_wiki_tst {
|
||||
@Test public void Run() { // MAINT:2016-12-04
|
||||
// Bld_import_list(Xow_domain_regy.All);
|
||||
// Bld_cfg_files(Xow_domain_regy.All); // NOTE: remember to carry over the wikisource / page / index commands from the existing xowa_build_cfg.gfs; also, only run the xowa_build_cfg.gfs once; DATE:2013-10-15; last run: DATE:2014-09-09
|
||||
}
|
||||
public void Bld_import_list(String... ary) {
|
||||
int ary_len = ary.length;
|
||||
Bry_bfr bfr = Bry_bfr_.Reset(255);
|
||||
Wmf_latest_parser parser = new Wmf_latest_parser();
|
||||
Bfr_arg__time time_fmtr = new Bfr_arg__time();
|
||||
for (int i = 0; i < ary_len; i++)
|
||||
Bld_import_list_itm2(bfr, parser, time_fmtr, ary, i);
|
||||
Io_mgr.Instance.SaveFilStr("C:\\xowa\\user\\temp.txt", bfr.To_str());
|
||||
}
|
||||
private void Bld_import_list_itm2(Bry_bfr bfr, Wmf_latest_parser parser, Bfr_arg__time time_fmtr, String[] ary, int i) {
|
||||
String domain_str = ary[i];
|
||||
byte[] domain_bry = Bry_.new_a7(domain_str);
|
||||
Xow_domain_itm domain_itm = Xow_domain_itm_.parse(domain_bry);
|
||||
byte[] wmf_key_bry = Bry_.Replace(Xow_abrv_wm_.To_abrv(domain_itm), Byte_ascii.Dash, Byte_ascii.Underline);
|
||||
String wmf_key = String_.new_u8(wmf_key_bry);
|
||||
String url = "https://dumps.wikimedia.org/" + wmf_key + "/latest";
|
||||
byte[] latest_html = null;
|
||||
for (int j = 0; j < 5; ++j) {
|
||||
latest_html = Io_mgr.Instance.DownloadFil_args("", Io_url_.Empty).Exec_as_bry(url);
|
||||
if (latest_html != null) break;
|
||||
Tfds.Dbg("fail|" + domain_str + "|" + url);
|
||||
if (j == 4) return;
|
||||
}
|
||||
parser.Parse(latest_html);
|
||||
Xowm_dump_file dump_file = new Xowm_dump_file(domain_str, "latest", Xowm_dump_type_.Str__pages_articles);
|
||||
dump_file.Server_url_(Xowm_dump_file_.Server_wmf_https);
|
||||
byte[] pages_articles_key = Bry_.new_a7(wmf_key + "-latest-pages-articles.xml.bz2");
|
||||
Wmf_latest_itm latest_itm = parser.Get_by(pages_articles_key);
|
||||
if (latest_itm == null) {Tfds.Dbg("missing|" + domain_str + "|" + url); return;} // NOTE: commonswiki missing entry for commonswiki-latest-pages-articles.xml.bz2 DATE:2016-05-01
|
||||
Tfds.Dbg("pass|" + domain_str + "|" + url);
|
||||
bfr.Add(domain_bry).Add_byte_pipe();
|
||||
bfr.Add_str_u8(dump_file.File_url()).Add_byte_pipe();
|
||||
bfr.Add(Xow_domain_tid_.Get_type_as_bry(domain_itm.Domain_type_id())).Add_byte_pipe();
|
||||
long src_size = latest_itm.Size();
|
||||
bfr.Add_long_variable(src_size).Add_byte_pipe();
|
||||
bfr.Add_str_a7(gplx.core.ios.Io_size_.To_str(src_size)).Add_byte_pipe();
|
||||
time_fmtr.Seconds_(Math_.Div_safe_as_long(src_size, 1000000)).Bfr_arg__add(bfr);
|
||||
bfr.Add_byte_pipe();
|
||||
bfr.Add_str_a7(latest_itm.Date().XtoStr_fmt_yyyy_MM_dd_HH_mm());
|
||||
bfr.Add_byte_pipe();
|
||||
bfr.Add_str_a7(dump_file.Dump_date());
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
/*
|
||||
private void Bld_import_list_itm(Bry_bfr bfr, Xowm_dump_file dump_file, Bry_fmtr_arg_time time_fmtr, String[] ary, int i) {
|
||||
String itm = ary[i];
|
||||
dump_file.Ctor(itm, "latest", Xowm_dump_type_.Str__pages_articles);
|
||||
int count = 0;
|
||||
while (count++ < 1) {
|
||||
dump_file.Server_url_(Xowm_dump_file_.Server_wmf);
|
||||
if (dump_file.Connect()) break;
|
||||
Tfds.WriteText(String_.Format("retrying: {0} {1}\n", count, dump_file.File_modified()));
|
||||
Thread_adp_.Sleep(15000); // wait for connection to reset
|
||||
}
|
||||
if (count == 10) {
|
||||
Tfds.WriteText(String_.Format("failed: {0}\n", dump_file.File_url()));
|
||||
return;
|
||||
}
|
||||
else
|
||||
Tfds.WriteText(String_.Format("passed: {0}\n", itm));
|
||||
bfr.Add_str(itm).Add_byte_pipe();
|
||||
bfr.Add_str(dump_file.File_url()).Add_byte_pipe();
|
||||
bfr.Add(Xow_domain_tid_.Get_type_as_bry(dump_file.Wiki_type().Wiki_tid())).Add_byte_pipe();
|
||||
// Xol_lang_stub lang_itm = Xol_lang_stub_.Get_by_key(wiki_type.Lang_key());
|
||||
// if (lang_itm == null) lang_itm = Xol_lang_stub_.Get_by_key(Xol_lang_itm_.Key_en); // commons, species, meta, etc will have no lang
|
||||
// bfr.Add(lang_itm.Local_name()).Add_byte_pipe();
|
||||
// bfr.Add(lang_itm.Canonical_name()).Add_byte_pipe();
|
||||
long src_size = dump_file.File_len();
|
||||
bfr.Add_long_variable(src_size).Add_byte_pipe();
|
||||
bfr.Add_str(gplx.core.ios.Io_size_.To_str(src_size)).Add_byte_pipe();
|
||||
time_fmtr.Seconds_(Math_.Div_safe_as_long(src_size, 1000000)).XferAry(bfr, 0);
|
||||
bfr.Add_byte_pipe();
|
||||
bfr.Add_str(dump_file.File_modified().XtoStr_fmt_yyyy_MM_dd_HH_mm());
|
||||
bfr.Add_byte_pipe();
|
||||
// bfr.Add_str(String_.Concat_with_obj(",", (Object[])dump_file.Dump_available_dates()));
|
||||
// bfr.Add_byte_pipe();
|
||||
bfr.Add_str(dump_file.Dump_date());
|
||||
bfr.Add_byte_nl();
|
||||
Thread_adp_.Sleep(1000);
|
||||
}
|
||||
*/
|
||||
public void Bld_cfg_files(String... ary) {
|
||||
Bry_bfr bfr = Bry_bfr_.Reset(255);
|
||||
gplx.xowa.bldrs.wiki_cfgs.Xoi_wiki_props_api api = new gplx.xowa.bldrs.wiki_cfgs.Xoi_wiki_props_api();
|
||||
gplx.xowa.bldrs.wiki_cfgs.Xoi_wiki_props_wiki wiki = new gplx.xowa.bldrs.wiki_cfgs.Xoi_wiki_props_wiki();
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
String wiki_domain = ary[i];
|
||||
try {
|
||||
byte[] xml = api.Exec_api(api.Api_src(wiki_domain));
|
||||
wiki.Wiki_domain_(Bry_.new_a7(wiki_domain));
|
||||
api.Parse(wiki, String_.new_u8(xml));
|
||||
api.Build_cfg(bfr, wiki);
|
||||
}
|
||||
catch (Exception e) {
|
||||
Console_adp__sys.Instance.Write_str_w_nl(Err_.Message_gplx_full(e));
|
||||
}
|
||||
}
|
||||
bfr.Add_str_a7("app.bldr.wiki_cfg_bldr.run;").Add_byte_nl();
|
||||
Io_mgr.Instance.SaveFilStr("C:\\user\\xowa_build_cfg.gfs", bfr.To_str());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*;
|
||||
public class Xoi_mirror_parser_tst {
|
||||
@Test public void Basic() {
|
||||
Tst_parse(String_.Concat_lines_nl
|
||||
( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
|
||||
, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">"
|
||||
, "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">"
|
||||
, "<head>"
|
||||
, "<title>Index of /simplewiki/</title>"
|
||||
, "<link rel=\"stylesheet\" type=\"text/css\" href=\"/pub/misc/lighttpd-white-dir.css\" />"
|
||||
, "</head>"
|
||||
, "<body>"
|
||||
, "<h2>Index of /simplewiki/</h2>"
|
||||
, "<div class=\"list\">"
|
||||
, "<table summary=\"Directory Listing\" cellpadding=\"0\" cellspacing=\"0\">"
|
||||
, "<thead><tr><th class=\"n\">Name</th><th class=\"m\">Last Modified</th><th class=\"s\">Size</th><th class=\"t\">Type</th></tr></thead>"
|
||||
, "<tbody>"
|
||||
, "<tr><td class=\"n\"><a href=\"../\">Parent Directory</a>/</td><td class=\"m\"> </td><td class=\"s\">- </td><td class=\"t\">Directory</td></tr>"
|
||||
, "<tr><td class=\"n\"><a href=\"20120516/\">20120516</a>/</td><td class=\"m\">2012-May-17 01:04:39</td><td class=\"s\">- </td><td class=\"t\">Directory</td></tr>"
|
||||
, "<tr><td class=\"n\"><a href=\"20121220/\">20121220</a>/</td><td class=\"m\">2012-Dec-20 20:15:55</td><td class=\"s\">- </td><td class=\"t\">Directory</td></tr>"
|
||||
, "<tr><td class=\"n\"><a href=\"20130214/\">20130214</a>/</td><td class=\"m\">2013-Feb-14 06:28:41</td><td class=\"s\">- </td><td class=\"t\">Directory</td></tr>"
|
||||
, "<tr><td class=\"n\"><a href=\"latest/\">latest</a>/</td><td class=\"m\">2013-Feb-14 06:28:41</td><td class=\"s\">- </td><td class=\"t\">Directory</td></tr>"
|
||||
, "</tbody>"
|
||||
, "</table>"
|
||||
, "</div>"
|
||||
, "<div class=\"foot\">lighttpd</div>"
|
||||
, "</body>"
|
||||
, "</html>"
|
||||
), String_.Ary("20120516", "20121220", "20130214", "latest"));
|
||||
}
|
||||
@Test public void Find_last_lte() {
|
||||
Tst_find_last_lte(String_.Ary("20120516", "20121220", "20130214", "latest"), "20130101", "20121220");
|
||||
Tst_find_last_lte(String_.Ary("20120516", "20121220", "20130214", "latest"), "20120101", "");
|
||||
}
|
||||
private void Tst_parse(String raw, String[] expd) {
|
||||
Xoi_mirror_parser parser = new Xoi_mirror_parser();
|
||||
Tfds.Eq_ary_str(expd, parser.Parse(raw));
|
||||
}
|
||||
private void Tst_find_last_lte(String[] ary, String comp, String expd) {
|
||||
Tfds.Eq(expd, Xoi_mirror_parser.Find_last_lte(ary, comp));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user