mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.5.2.1
This commit is contained in:
@@ -1,257 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import gplx.ios.*; import gplx.xowa.html.*;
|
||||
import gplx.xowa.wikis.*; import gplx.xowa.wikis.data.*;
|
||||
public class Xoa_css_extractor {
|
||||
public IoEngine_xrg_downloadFil Download_xrg() {return download_xrg;} private IoEngine_xrg_downloadFil download_xrg = Io_mgr._.DownloadFil_args("", Io_url_.Null);
|
||||
public Xoa_css_extractor Wiki_domain_(byte[] v) {wiki_domain = v; return this;} private byte[] wiki_domain;
|
||||
public Xoa_css_extractor Usr_dlg_(Gfo_usr_dlg v) {usr_dlg = v; return this;} private Gfo_usr_dlg usr_dlg;
|
||||
public Xoa_css_extractor Failover_dir_(Io_url v) {failover_dir = v; return this;} private Io_url failover_dir;
|
||||
public Xoa_css_extractor Wiki_html_dir_(Io_url v) {wiki_html_dir = v; return this;} private Io_url wiki_html_dir;
|
||||
public Xoa_css_extractor Mainpage_url_(String v) {mainpage_url = v; return this;} private String mainpage_url;
|
||||
public Xoa_css_extractor Protocol_prefix_(String v) {protocol_prefix = v; return this;} private String protocol_prefix = "https:";// NOTE: changed from http to https; DATE:2015-02-17
|
||||
public Xoa_css_extractor Page_fetcher_(Xow_page_fetcher v) {page_fetcher = v; return this;} private Xow_page_fetcher page_fetcher;
|
||||
public Xoa_css_extractor Css_img_downloader_(Xoa_css_img_downloader v) {this.css_img_downloader = v; return this;} private Xoa_css_img_downloader css_img_downloader;
|
||||
public Xoa_css_extractor Opt_download_css_common_(boolean v) {opt_download_css_common = v; return this;} private boolean opt_download_css_common;
|
||||
public Xoa_css_extractor Url_encoder_(Url_encoder v) {url_encoder = v; return this;} private Url_encoder url_encoder;
|
||||
public Xoa_css_extractor Wiki_code_(byte[] v) {this.wiki_code = v; return this;} private byte[] wiki_code = null;
|
||||
private byte[] mainpage_html; private boolean lang_is_ltr = true;
|
||||
public void Init_by_app(Xoae_app app) {
|
||||
this.usr_dlg = app.Usr_dlg();
|
||||
Xof_download_wkr download_wkr = app.Wmf_mgr().Download_wkr();
|
||||
this.download_xrg = download_wkr.Download_xrg();
|
||||
css_img_downloader = new Xoa_css_img_downloader().Ctor(usr_dlg, download_wkr, Bry_.new_utf8_(protocol_prefix));
|
||||
failover_dir = app.Fsys_mgr().Bin_any_dir().GenSubDir_nest("html", "xowa", "import");
|
||||
url_encoder = Xoa_app_.Utl__encoder_mgr().Url();
|
||||
}
|
||||
public void Install(Xow_wiki wiki, String css_key) {
|
||||
try {
|
||||
this.wiki_html_dir = wiki.App().Fsys_mgr().Wiki_css_dir(wiki.Domain_str()); // EX: /xowa/user/anonymous/wiki/en.wikipedia.org
|
||||
Io_url css_comm_fil = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
Io_url css_wiki_fil = wiki_html_dir.GenSubFil(Css_wiki_name);
|
||||
wiki.Html__page_wtr_mgr().Init_css_urls(css_comm_fil, css_wiki_fil);
|
||||
if (wiki.Domain_tid() == Xow_domain_.Tid_int_home || Env_.Mode_testing()) return; // NOTE: do not download if home_wiki; also needed for TEST
|
||||
if (Io_mgr._.ExistsFil(css_wiki_fil)) return; // css file exists; nothing to generate
|
||||
wiki.App().Usr_dlg().Log_many("", "", "generating css for '~{0}'", wiki.Domain_str());
|
||||
if (css_key != null) {
|
||||
if (Install_by_db(wiki, wiki_html_dir, css_key)) return;
|
||||
}
|
||||
if (wiki.Type_is_edit())
|
||||
this.Install_by_wmf((Xowe_wiki)wiki, wiki_html_dir);
|
||||
}
|
||||
catch (Exception e) { // if error, failover; paranoia catch for outliers like bad network connectivity fail, or MediaWiki: message not existing; DATE:2013-11-21
|
||||
wiki.App().Usr_dlg().Warn_many("", "", "failed while trying to generate css; failing over; wiki='~{0}' err=~{1}", wiki.Domain_str(), Err_.Message_gplx(e));
|
||||
Css_common_failover(); // only failover xowa_common.css; xowa_wiki.css comes from MediaWiki:Common.css / Vector.css
|
||||
}
|
||||
}
|
||||
private void Install_by_wmf(Xowe_wiki wiki, Io_url wiki_html_dir) {
|
||||
opt_download_css_common = wiki.Appe().Setup_mgr().Dump_mgr().Css_commons_download();
|
||||
if (!wiki.Appe().User().Cfg_mgr().Security_mgr().Web_access_enabled()) opt_download_css_common = false; // if !web_access_enabled, don't download
|
||||
this.wiki_domain = wiki.Domain_bry();
|
||||
mainpage_url = "https://" + wiki.Domain_str(); // NOTE: cannot reuse protocol_prefix b/c "//" needs to be added manually; protocol_prefix is used for logo and images which have form of "//domain/image.png"; changed to https; DATE:2015-02-17
|
||||
if (page_fetcher == null) page_fetcher = new Xow_page_fetcher_wiki();
|
||||
page_fetcher.Wiki_(wiki);
|
||||
this.wiki_html_dir = wiki_html_dir;
|
||||
this.lang_is_ltr = wiki.Lang().Dir_ltr();
|
||||
this.wiki_code = wiki.Domain_abrv();
|
||||
mainpage_html = Mainpage_download_html();
|
||||
Css_common_setup();
|
||||
Css_wiki_setup();
|
||||
Logo_setup();
|
||||
}
|
||||
private boolean Install_by_db(Xow_wiki wiki, Io_url wiki_html_dir, String css_key) {
|
||||
Xowd_db_mgr core_db_mgr = wiki.Data_mgr__core_mgr();
|
||||
if ( core_db_mgr == null
|
||||
|| core_db_mgr.Props() == null
|
||||
|| core_db_mgr.Props().Schema_is_1()
|
||||
|| !core_db_mgr.Tbl__cfg().Select_yn_or(Xow_cfg_consts.Grp__wiki_schema, Xowd_db_file_schema_props.Key__tbl_css_core, Bool_.N)
|
||||
) return false;
|
||||
Xowd_db_file core_db = core_db_mgr.Db__core();
|
||||
gplx.xowa.html.css.Xowd_css_core_mgr.Get(core_db.Tbl__css_core(), core_db.Tbl__css_file(), wiki_html_dir, css_key);
|
||||
return true;
|
||||
}
|
||||
public void Css_common_setup() {
|
||||
if (opt_download_css_common)
|
||||
Css_common_download();
|
||||
else
|
||||
Css_common_failover();
|
||||
}
|
||||
private void Css_common_failover() {
|
||||
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
Io_mgr._.CopyFil(Css_common_failover_url(), trg_fil, true);
|
||||
css_img_downloader.Chk(wiki_domain, trg_fil);
|
||||
}
|
||||
private void Css_common_download() {
|
||||
boolean css_stylesheet_common_missing = true;
|
||||
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
css_stylesheet_common_missing = !Css_scrape_setup();
|
||||
if (css_stylesheet_common_missing)
|
||||
Io_mgr._.CopyFil(Css_common_failover_url(), trg_fil, true);
|
||||
else
|
||||
css_img_downloader.Chk(wiki_domain, trg_fil);
|
||||
}
|
||||
private Io_url Css_common_failover_url() {
|
||||
Io_url css_commons_url = failover_dir.GenSubDir("xowa_common_override").GenSubFil_ary("xowa_common_", String_.new_utf8_(wiki_code), ".css");
|
||||
if (Io_mgr._.ExistsFil(css_commons_url)) return css_commons_url; // specific css exists for wiki; use it; EX: xowa_common_wiki_mediawikiwiki.css
|
||||
return failover_dir.GenSubFil(lang_is_ltr ? Css_common_name_ltr : Css_common_name_rtl);
|
||||
}
|
||||
public void Css_wiki_setup() {
|
||||
boolean css_stylesheet_wiki_missing = true;
|
||||
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_wiki_name);
|
||||
if (Io_mgr._.ExistsFil(trg_fil)) return; // don't download if already there
|
||||
css_stylesheet_wiki_missing = !Css_wiki_generate(trg_fil);
|
||||
if (css_stylesheet_wiki_missing)
|
||||
Failover(trg_fil);
|
||||
else
|
||||
css_img_downloader.Chk(wiki_domain, trg_fil);
|
||||
}
|
||||
private boolean Css_wiki_generate(Io_url trg_fil) {
|
||||
Bry_bfr bfr = Bry_bfr.new_();
|
||||
Css_wiki_generate_section(bfr, Ttl_common_css);
|
||||
Css_wiki_generate_section(bfr, Ttl_vector_css);
|
||||
byte[] bry = bfr.Xto_bry_and_clear();
|
||||
bry = Bry_.Replace(bry, gplx.xowa.bldrs.xmls.Xob_xml_parser_.Bry_tab_ent, gplx.xowa.bldrs.xmls.Xob_xml_parser_.Bry_tab);
|
||||
Io_mgr._.SaveFilBry(trg_fil, bry);
|
||||
return true;
|
||||
} private static final byte[] Ttl_common_css = Bry_.new_ascii_("Common.css"), Ttl_vector_css = Bry_.new_ascii_("Vector.css");
|
||||
private boolean Css_wiki_generate_section(Bry_bfr bfr, byte[] ttl) {
|
||||
byte[] page = page_fetcher.Fetch(Xow_ns_.Id_mediawiki, ttl);
|
||||
if (page == null) return false;
|
||||
if (bfr.Len() != 0) bfr.Add_byte_nl().Add_byte_nl(); // add "\n\n" between sections; !=0 checks against first
|
||||
Css_wiki_section_hdr.Bld_bfr_many(bfr, ttl); // add "/*XOWA:MediaWiki:Common.css*/\n"
|
||||
bfr.Add(page); // add page
|
||||
return true;
|
||||
} static final Bry_fmtr Css_wiki_section_hdr = Bry_fmtr.new_("/*XOWA:MediaWiki:~{ttl}*/\n", "ttl");
|
||||
public void Logo_setup() {
|
||||
boolean logo_missing = true;
|
||||
Io_url logo_url = wiki_html_dir.GenSubFil("logo.png");
|
||||
if (Io_mgr._.ExistsFil(logo_url)) return; // don't download if already there
|
||||
logo_missing = !Logo_download(logo_url);
|
||||
if (logo_missing)
|
||||
Failover(logo_url);
|
||||
}
|
||||
private boolean Logo_download(Io_url trg_fil) {
|
||||
String src_fil = Logo_find_src();
|
||||
if (src_fil == null) {
|
||||
if (Logo_copy_from_css(trg_fil)) return true;
|
||||
usr_dlg.Warn_many("", "", "failed to extract logo: trg_fil=~{0};", trg_fil.Raw());
|
||||
return false;
|
||||
}
|
||||
String log_msg = usr_dlg.Prog_many("", "", "downloading logo: '~{0}'", src_fil);
|
||||
boolean rv = download_xrg.Prog_fmt_hdr_(log_msg).Src_(src_fil).Trg_(trg_fil).Exec();
|
||||
if (!rv)
|
||||
usr_dlg.Warn_many("", "", "failed to download logo: src_url=~{0};", src_fil);
|
||||
return rv;
|
||||
}
|
||||
private boolean Logo_copy_from_css(Io_url trg_fil) {
|
||||
Io_url commons_file = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
byte[] commons_src = Io_mgr._.LoadFilBry(commons_file);
|
||||
int bgn_pos = Bry_finder.Find_fwd(commons_src, Bry_mw_wiki_logo); if (bgn_pos == Bry_finder.Not_found) return false;
|
||||
bgn_pos += Bry_mw_wiki_logo.length;
|
||||
int end_pos = Bry_finder.Find_fwd(commons_src, Byte_ascii.Quote, bgn_pos + 1); if (end_pos == Bry_finder.Not_found) return false;
|
||||
byte[] src_bry = Bry_.Mid(commons_src, bgn_pos, end_pos);
|
||||
if (Op_sys.Cur().Tid_is_wnt())
|
||||
src_bry = Bry_.Replace(src_bry, Byte_ascii.Slash, Byte_ascii.Backslash);
|
||||
Io_url src_fil = wiki_html_dir.GenSubFil(String_.new_utf8_(src_bry));
|
||||
Io_mgr._.CopyFil(src_fil, trg_fil, true);
|
||||
return true;
|
||||
} private static final byte[] Bry_mw_wiki_logo = Bry_.new_ascii_(".mw-wiki-logo{background-image:url(\"");
|
||||
private String Logo_find_src() {
|
||||
if (mainpage_html == null) return null;
|
||||
int main_page_html_len = mainpage_html.length;
|
||||
int logo_bgn = Bry_finder.Find_fwd(mainpage_html, Logo_find_bgn, 0); if (logo_bgn == Bry_.NotFound) return null;
|
||||
logo_bgn += Logo_find_bgn.length;
|
||||
logo_bgn = Bry_finder.Find_fwd(mainpage_html, Logo_find_end, logo_bgn); if (logo_bgn == Bry_.NotFound) return null;
|
||||
logo_bgn += Logo_find_end.length;
|
||||
int logo_end = Bry_finder.Find_fwd(mainpage_html, Byte_ascii.Paren_end, logo_bgn, main_page_html_len); if (logo_bgn == Bry_.NotFound) return null;
|
||||
byte[] logo_bry = Bry_.Mid(mainpage_html, logo_bgn, logo_end);
|
||||
return protocol_prefix + String_.new_utf8_(logo_bry);
|
||||
}
|
||||
private static final byte[] Logo_find_bgn = Bry_.new_ascii_("<div id=\"p-logo\""), Logo_find_end = Bry_.new_ascii_("background-image: url(");
|
||||
public boolean Mainpage_download() {
|
||||
mainpage_html = Mainpage_download_html();
|
||||
return mainpage_html != null;
|
||||
}
|
||||
private byte[] Mainpage_download_html() {
|
||||
String main_page_url_temp = mainpage_url;
|
||||
if (Bry_.Eq(wiki_domain, Xow_domain_.Domain_bry_wikidata)) // if wikidata, download css for a Q* page; Main_Page has less css; DATE:2014-09-30
|
||||
main_page_url_temp = main_page_url_temp + "/wiki/Q2";
|
||||
String log_msg = usr_dlg.Prog_many("", "main_page.download", "downloading main page for '~{0}'", main_page_url_temp);
|
||||
byte[] main_page_html = download_xrg.Prog_fmt_hdr_(log_msg).Exec_as_bry(main_page_url_temp);
|
||||
if (main_page_html == null) usr_dlg.Warn_many("", "", "failed to download main_page: src_url=~{0};", main_page_url_temp);
|
||||
return main_page_html;
|
||||
}
|
||||
private void Failover(Io_url trg_fil) {
|
||||
usr_dlg.Note_many("", "", "copying failover file: trg_fil=~{0};", trg_fil.Raw());
|
||||
Io_mgr._.CopyFil(failover_dir.GenSubFil(trg_fil.NameAndExt()), trg_fil, true);
|
||||
}
|
||||
public boolean Css_scrape_setup() {
|
||||
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
// if (Io_mgr._.ExistsFil(trg_fil)) return; // don't download if already there; DELETED: else main_page is not scraped for all stylesheet links; simple.d: fails; DATE:2014-02-11
|
||||
byte[] css_url = Css_scrape();
|
||||
if (css_url == null) {
|
||||
Css_common_failover();
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
Io_mgr._.SaveFilBry(trg_fil, css_url);
|
||||
css_img_downloader.Chk(wiki_domain, trg_fil);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
private byte[] Css_scrape() {
|
||||
if (mainpage_html == null) return null;
|
||||
String[] css_urls = Css_scrape_urls(mainpage_html); if (css_urls.length == 0) return null;
|
||||
return Css_scrape_download(css_urls);
|
||||
}
|
||||
private String[] Css_scrape_urls(byte[] raw) {
|
||||
ListAdp rv = ListAdp_.new_();
|
||||
int raw_len = raw.length;
|
||||
int prv_pos = 0;
|
||||
int css_find_bgn_len = Css_find_bgn.length;
|
||||
byte[] protocol_prefix_bry = Bry_.new_utf8_(protocol_prefix);
|
||||
while (true) {
|
||||
int url_bgn = Bry_finder.Find_fwd(raw, Css_find_bgn, prv_pos); if (url_bgn == Bry_.NotFound) break; // nothing left; stop
|
||||
url_bgn += css_find_bgn_len;
|
||||
int url_end = Bry_finder.Find_fwd(raw, Byte_ascii.Quote, url_bgn, raw_len); if (url_end == Bry_.NotFound) {usr_dlg.Warn_many("", "main_page.css_parse", "could not find css; pos='~{0}' text='~{1}'", url_bgn, String_.new_utf8_len_safe_(raw, url_bgn, url_bgn + 32)); break;}
|
||||
byte[] css_url_bry = Bry_.Mid(raw, url_bgn, url_end);
|
||||
css_url_bry = Bry_.Replace(css_url_bry, Css_amp_find, Css_amp_repl); // & -> &
|
||||
css_url_bry = url_encoder.Decode(css_url_bry); // %2C -> %7C -> |
|
||||
css_url_bry = Bry_.Add(protocol_prefix_bry, css_url_bry);
|
||||
rv.Add(String_.new_utf8_(css_url_bry));
|
||||
prv_pos = url_end;
|
||||
}
|
||||
return rv.XtoStrAry();
|
||||
} private static final byte[] Css_find_bgn = Bry_.new_ascii_("<link rel=\"stylesheet\" href=\""), Css_amp_find = Bry_.new_ascii_("&"), Css_amp_repl = Bry_.new_ascii_("&");
|
||||
private byte[] Css_scrape_download(String[] css_urls) {
|
||||
int css_urls_len = css_urls.length;
|
||||
Bry_bfr tmp_bfr = Bry_bfr.new_();
|
||||
for (int i = 0; i < css_urls_len; i++) {
|
||||
String css_url = css_urls[i];
|
||||
usr_dlg.Prog_many("", "main_page.css_download", "downloading css for '~{0}'", css_url);
|
||||
download_xrg.Prog_fmt_hdr_(css_url);
|
||||
byte[] css_bry = download_xrg.Exec_as_bry(css_url); if (css_bry == null) continue; // css not found; continue
|
||||
tmp_bfr.Add(Xoa_css_img_downloader.Bry_comment_bgn).Add_str(css_url).Add(Xoa_css_img_downloader.Bry_comment_end).Add_byte_nl();
|
||||
tmp_bfr.Add(css_bry).Add_byte_nl().Add_byte_nl();
|
||||
}
|
||||
return tmp_bfr.Xto_bry_and_clear();
|
||||
}
|
||||
public static final String Css_common_name = "xowa_common.css", Css_wiki_name = "xowa_wiki.css"
|
||||
, Css_common_name_ltr = "xowa_common_ltr.css", Css_common_name_rtl = "xowa_common_rtl.css";
|
||||
}
|
||||
@@ -1,130 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*; import gplx.ios.*;
|
||||
public class Xoa_css_extractor_basic_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoa_css_extractor_fxt fxt = new Xoa_css_extractor_fxt();
|
||||
@Test public void Logo_download() {
|
||||
fxt.Init_fil("mem/http/en.wikipedia.org" , Xoa_css_extractor_fxt.Main_page_html);
|
||||
fxt.Init_fil("mem/http/wiki.png" , "download");
|
||||
fxt.Exec_logo_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/logo.png", "download");
|
||||
}
|
||||
@Test public void Logo_download_mw_wiki_logo() {
|
||||
fxt.Init_fil("mem/http/en.wikipedia.org" , "");
|
||||
fxt.Init_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/wiki.png" , "download");
|
||||
fxt.Init_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css" , ".mw-wiki-logo{background-image:url(\"wiki.png\");");
|
||||
fxt.Exec_logo_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/logo.png" , "download");
|
||||
}
|
||||
@Test public void Logo_failover() {
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/logo.png" , "failover");
|
||||
fxt.Exec_logo_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/logo.png", "failover");
|
||||
}
|
||||
@Test public void Css_common_download_failover() {
|
||||
fxt.Css_installer().Opt_download_css_common_(true);
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_ltr.css", "failover");
|
||||
fxt.Exec_css_common_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
|
||||
}
|
||||
@Test public void Css_common_copy() {
|
||||
fxt.Css_installer().Opt_download_css_common_(false);
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_ltr.css", "failover");
|
||||
fxt.Exec_css_common_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
|
||||
}
|
||||
@Test public void Css_common_copy_specific_wiki() { // PURPOSE: css for specific wiki
|
||||
fxt.Css_installer().Opt_download_css_common_(false).Wiki_code_(Bry_.new_ascii_("enwiki"));
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_override/xowa_common_enwiki.css", "failover");
|
||||
fxt.Exec_css_common_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
|
||||
}
|
||||
@Test public void Css_scrape_download() {
|
||||
fxt.Css_installer().Url_encoder_(Url_encoder.new_http_url_());
|
||||
fxt.Init_fil("mem/http/en.wikipedia.org" , Xoa_css_extractor_fxt.Main_page_html);
|
||||
fxt.Init_fil("mem/http/en.wikipedia.org/common.css" , "download");
|
||||
fxt.Init_fil("mem/http/www/a&0|b,c" , "data=css_0");
|
||||
fxt.Init_fil("mem/http/www/a&1|b,c" , "data=css_1");
|
||||
fxt.Exec_css_mainpage_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", String_.Concat_lines_nl
|
||||
( "/*XOWA:mem/http/www/a&0|b,c*/"
|
||||
, "data=css_0"
|
||||
, ""
|
||||
, "/*XOWA:mem/http/www/a&1|b,c*/"
|
||||
, "data=css_1"
|
||||
));
|
||||
}
|
||||
@Test public void Css_scrape_failover() {
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_ltr.css", "failover");
|
||||
fxt.Exec_css_mainpage_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
|
||||
}
|
||||
}
|
||||
class Xoa_css_extractor_fxt {
|
||||
public void Clear() {
|
||||
Io_mgr._.InitEngine_mem();
|
||||
Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_base.test_();
|
||||
css_installer = new Xoa_css_extractor();
|
||||
css_installer.Download_xrg().Trg_engine_key_(IoEngine_.MemKey);
|
||||
css_installer
|
||||
.Usr_dlg_(usr_dlg)
|
||||
.Wiki_domain_(Bry_.new_ascii_("en.wikipedia.org"))
|
||||
.Protocol_prefix_("mem/http/")
|
||||
.Mainpage_url_("mem/http/en.wikipedia.org")
|
||||
.Failover_dir_(Io_url_.new_any_("mem/xowa/bin/any/html/xowa/import/")) // "mem/xowa/user/anonymous/wiki/home/html/"
|
||||
.Wiki_html_dir_(Io_url_.new_any_("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/"))
|
||||
;
|
||||
page_fetcher = new Xow_page_fetcher_mok();
|
||||
css_installer.Page_fetcher_(page_fetcher);
|
||||
Xoa_css_img_downloader css_img_downloader = new Xoa_css_img_downloader();
|
||||
css_img_downloader.Ctor(usr_dlg, new Xof_download_wkr_test(), Bry_.new_ascii_("mem/http/"));
|
||||
css_installer.Css_img_downloader_(css_img_downloader);
|
||||
} private Xow_page_fetcher_mok page_fetcher;
|
||||
public Xoa_css_extractor Css_installer() {return css_installer;} private Xoa_css_extractor css_installer;
|
||||
public void Init_page(int ns_id, String ttl, String text) {
|
||||
page_fetcher.Add(ns_id, Bry_.new_ascii_(ttl), Bry_.new_ascii_(text));
|
||||
}
|
||||
public void Init_fil_empty(String url) {Init_fil(url, "");}
|
||||
public void Init_fil(String url, String text) {Io_mgr._.SaveFilStr(url, text);}
|
||||
public void Test_fil(String url, String expd) {Tfds.Eq_str_lines(expd, Io_mgr._.LoadFilStr(Io_url_.new_any_(url)));}
|
||||
public void Exec_logo_setup() {
|
||||
css_installer.Mainpage_download();
|
||||
css_installer.Logo_setup();
|
||||
}
|
||||
public void Exec_css_common_setup() {
|
||||
css_installer.Mainpage_download();
|
||||
css_installer.Css_common_setup();
|
||||
}
|
||||
public void Exec_css_wiki_setup() {css_installer.Css_wiki_setup();}
|
||||
public void Exec_css_mainpage_setup() {
|
||||
css_installer.Mainpage_download();
|
||||
css_installer.Css_scrape_setup();
|
||||
}
|
||||
public static String Main_page_html = String_.Concat_lines_nl
|
||||
( "<html>"
|
||||
, " <head>"
|
||||
, " <link rel=\"stylesheet\" href=\"www/a&0%7Cb%2Cc\" />"
|
||||
, " <link rel=\"stylesheet\" href=\"www/a&1%7Cb%2Cc\" />"
|
||||
, " </head>"
|
||||
, " <body>"
|
||||
, " <div id=\"p-logo\" role=\"banner\"><a style=\"background-image: url(wiki.png);\""
|
||||
, " </body>"
|
||||
, "</html>"
|
||||
);
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*; import gplx.ios.*;
|
||||
public class Xoa_css_extractor_wiki_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoa_css_extractor_fxt fxt = new Xoa_css_extractor_fxt();
|
||||
@Test public void Css_wiki_generate() {
|
||||
fxt.Init_page(Xow_ns_.Id_mediawiki, "Common.css" , "css_0");
|
||||
fxt.Init_page(Xow_ns_.Id_mediawiki, "Vector.css" , "css_1");
|
||||
fxt.Exec_css_wiki_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_wiki.css", String_.Concat_lines_nl
|
||||
( "/*XOWA:MediaWiki:Common.css*/"
|
||||
, "css_0"
|
||||
, ""
|
||||
, "/*XOWA:MediaWiki:Vector.css*/"
|
||||
, "css_1"
|
||||
));
|
||||
}
|
||||
@Test public void Css_wiki_missing() {
|
||||
fxt.Exec_css_wiki_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_wiki.css", "");
|
||||
}
|
||||
@Test public void Css_wiki_tab() { // PURPOSE: swap out 	 for xdat files
|
||||
fxt.Init_page(Xow_ns_.Id_mediawiki, "Common.css" , "a	b");
|
||||
fxt.Exec_css_wiki_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_wiki.css", String_.Concat_lines_nl
|
||||
( "/*XOWA:MediaWiki:Common.css*/"
|
||||
, "a\tb"
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -1,185 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
public class Xoa_css_img_downloader {
|
||||
public Xoa_css_img_downloader Ctor(Gfo_usr_dlg usr_dlg, Xof_download_wkr download_wkr, byte[] stylesheet_prefix) {
|
||||
this.usr_dlg = usr_dlg; this.download_wkr = download_wkr; this.stylesheet_prefix = stylesheet_prefix;
|
||||
return this;
|
||||
} private Gfo_usr_dlg usr_dlg; private Xof_download_wkr download_wkr;
|
||||
public Xoa_css_img_downloader Stylesheet_prefix_(byte[] v) {stylesheet_prefix = v; return this;} private byte[] stylesheet_prefix; // TEST: setter exposed b/c tests can handle "mem/" but not "//mem"
|
||||
public void Chk(byte[] wiki_domain, Io_url css_fil) {
|
||||
ListAdp img_list = ListAdp_.new_();
|
||||
byte[] old_bry = Io_mgr._.LoadFilBry(css_fil);
|
||||
byte[] rel_url_prefix = Bry_.Add(Bry_fwd_slashes, wiki_domain);
|
||||
byte[] new_bry = Convert_to_local_urls(rel_url_prefix, old_bry, img_list);
|
||||
Io_url img_dir = css_fil.OwnerDir();
|
||||
Download_fils(img_dir, img_list.XtoStrAry());
|
||||
Io_mgr._.SaveFilBry(css_fil, new_bry);
|
||||
}
|
||||
public byte[] Convert_to_local_urls(byte[] rel_url_prefix, byte[] src, ListAdp list) {
|
||||
try {
|
||||
int src_len = src.length;
|
||||
int prv_pos = 0;
|
||||
Bry_bfr bfr = Bry_bfr.new_(src_len);
|
||||
HashAdp img_hash = HashAdp_.new_bry_();
|
||||
while (true) {
|
||||
int url_pos = Bry_finder.Find_fwd(src, Bry_url, prv_pos);
|
||||
if (url_pos == Bry_.NotFound) {bfr.Add_mid(src, prv_pos, src_len); break;} // no more "url("; exit;
|
||||
int bgn_pos = url_pos + Bry_url_len; // set bgn_pos after "url("
|
||||
byte bgn_byte = src[bgn_pos];
|
||||
byte end_byte = Byte_ascii.Nil;
|
||||
boolean quoted = true;
|
||||
switch (bgn_byte) { // find end_byte
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos: // quoted; end_byte is ' or "
|
||||
end_byte = bgn_byte;
|
||||
++bgn_pos;
|
||||
break;
|
||||
default: // not quoted; end byte is ")"
|
||||
end_byte = Byte_ascii.Paren_end;
|
||||
quoted = false;
|
||||
break;
|
||||
}
|
||||
int end_pos = Bry_finder.Find_fwd(src, end_byte, bgn_pos, src_len);
|
||||
if (end_pos == Bry_.NotFound) { // unclosed "url("; exit since nothing else will be found
|
||||
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.end_missing", "could not find end_sequence for 'url(': bgn='~{0}' end='~{1}'", prv_pos, String_.new_utf8_len_safe_(src, prv_pos, prv_pos + 25));
|
||||
bfr.Add_mid(src, prv_pos, src_len);
|
||||
break;
|
||||
}
|
||||
if (end_pos - bgn_pos == 0) { // empty; "url()"; ignore
|
||||
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.empty", "'url(' is empty: bgn='~{0}' end='~{1}'", prv_pos, String_.new_utf8_len_safe_(src, prv_pos, prv_pos + 25));
|
||||
bfr.Add_mid(src, prv_pos, bgn_pos);
|
||||
prv_pos = bgn_pos;
|
||||
continue;
|
||||
}
|
||||
byte[] img_raw = Bry_.Mid(src, bgn_pos, end_pos); int img_raw_len = img_raw.length;
|
||||
if (Bry_.HasAtBgn(img_raw, Bry_data_image, 0, img_raw_len)) { // base64
|
||||
bfr.Add_mid(src, prv_pos, end_pos); // nothing to download; just add entire String
|
||||
prv_pos = end_pos;
|
||||
continue;
|
||||
}
|
||||
int import_url_end = Import_url_chk(rel_url_prefix, src, src_len, prv_pos, url_pos, img_raw, bfr); // check for embedded stylesheets via @import tag
|
||||
if (import_url_end != Bry_.NotFound) {
|
||||
prv_pos = import_url_end;
|
||||
continue;
|
||||
}
|
||||
byte[] img_cleaned = Clean_img_url(img_raw, img_raw_len);
|
||||
if (img_cleaned == null) { // could not clean img
|
||||
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.clean_failed", "could not extract valid http src: bgn='~{0}' end='~{1}'", prv_pos, String_.new_utf8_(img_raw));
|
||||
bfr.Add_mid(src, prv_pos, bgn_pos); prv_pos = bgn_pos; continue;
|
||||
}
|
||||
if (!img_hash.Has(img_cleaned)) {// only add unique items for download;
|
||||
img_hash.AddKeyVal(img_cleaned);
|
||||
list.Add(String_.new_utf8_(img_cleaned));
|
||||
}
|
||||
img_cleaned = Replace_invalid_chars(Bry_.Copy(img_cleaned)); // NOTE: must call ByteAry.Copy else img_cleaned will change *inside* hash
|
||||
bfr.Add_mid(src, prv_pos, bgn_pos);
|
||||
if (!quoted) bfr.Add_byte(Byte_ascii.Quote);
|
||||
bfr.Add(img_cleaned);
|
||||
if (!quoted) bfr.Add_byte(Byte_ascii.Quote);
|
||||
prv_pos = end_pos;
|
||||
}
|
||||
return bfr.Xto_bry_and_clear();
|
||||
}
|
||||
catch (Exception e) {
|
||||
usr_dlg.Warn_many("", "", "failed to convert local_urls: ~{0} ~{1}", String_.new_utf8_(rel_url_prefix), Err_.Message_gplx(e));
|
||||
return src;
|
||||
}
|
||||
}
|
||||
public static byte[] Import_url_build(byte[] stylesheet_prefix, byte[] rel_url_prefix, byte[] css_url) {
|
||||
return Bry_.HasAtBgn(css_url, Bry_http_protocol) // css_url already starts with "http"; return self; PAGE:tr.n:Main_Page; DATE:2014-06-04
|
||||
? css_url
|
||||
: Bry_.Add(stylesheet_prefix, css_url)
|
||||
;
|
||||
}
|
||||
private int Import_url_chk(byte[] rel_url_prefix, byte[] src, int src_len, int old_pos, int find_bgn, byte[] url_raw, Bry_bfr bfr) {
|
||||
if (find_bgn < Bry_import_len) return Bry_.NotFound;
|
||||
if (!Bry_.Match(src, find_bgn - Bry_import_len, find_bgn, Bry_import)) return Bry_.NotFound;
|
||||
byte[] css_url = url_raw; int css_url_len = css_url.length;
|
||||
if (css_url_len > 0 && css_url[0] == Byte_ascii.Slash) { // css_url starts with "/"; EX: "/page" or "//site/page" DATE:2014-02-03
|
||||
if (css_url_len > 1 && css_url[1] != Byte_ascii.Slash) // skip if css_url starts with "//"; EX: "//site/page"
|
||||
css_url = Bry_.Add(rel_url_prefix, css_url); // "/w/a.css" -> "//en.wikipedia.org/w/a.css"
|
||||
}
|
||||
css_url = Bry_.Replace(css_url, Byte_ascii.Space, Byte_ascii.Underline); // NOTE: must replace spaces with underlines else download will fail; EX:https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08
|
||||
byte[] css_src_bry = Import_url_build(stylesheet_prefix, rel_url_prefix, css_url);
|
||||
String css_src_str = String_.new_utf8_(css_src_bry);
|
||||
download_wkr.Download_xrg().Prog_fmt_hdr_(usr_dlg.Log_many(GRP_KEY, "logo.download", "downloading import for '~{0}'", css_src_str));
|
||||
byte[] css_trg_bry = download_wkr.Download_xrg().Exec_as_bry(css_src_str);
|
||||
if (css_trg_bry == null) {
|
||||
usr_dlg.Warn_many("", "", "could not import css: url=~{0}", css_src_str);
|
||||
return Bry_.NotFound; // css not found
|
||||
}
|
||||
bfr.Add_mid(src, old_pos, find_bgn - Bry_import_len).Add_byte_nl();
|
||||
bfr.Add(Bry_comment_bgn).Add(css_url).Add(Bry_comment_end).Add_byte_nl();
|
||||
if (Bry_finder.Find_fwd(css_url, Wikisource_dynimg_ttl) != -1) css_trg_bry = Bry_.Replace(css_trg_bry, Wikisource_dynimg_find, Wikisource_dynimg_repl); // FreedImg hack; PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3 DATE:2014-09-06
|
||||
bfr.Add(css_trg_bry).Add_byte_nl();
|
||||
bfr.Add_byte_nl();
|
||||
int semic_pos = Bry_finder.Find_fwd(src, Byte_ascii.Semic, find_bgn + url_raw.length, src_len);
|
||||
return semic_pos + Int_.Const_dlm_len;
|
||||
}
|
||||
private static final byte[]
|
||||
Wikisource_dynimg_ttl = Bry_.new_ascii_("en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css")
|
||||
, Wikisource_dynimg_find = Bry_.new_ascii_(".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {")
|
||||
, Wikisource_dynimg_repl = Bry_.new_ascii_(".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], /*XOWA:handle file:// paths which will have /commons.wikimedia.org/ but not /wikipedia/ */ .freedImg img[src*=\"wikimedia\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {")
|
||||
;
|
||||
public byte[] Clean_img_url(byte[] raw, int raw_len) {
|
||||
int pos_bgn = 0;
|
||||
if (Bry_.HasAtBgn(raw, Bry_fwd_slashes, 0, raw_len)) pos_bgn = Bry_fwd_slashes.length;
|
||||
if (Bry_.HasAtBgn(raw, Bry_http, 0, raw_len)) pos_bgn = Bry_http.length;
|
||||
int pos_slash = Bry_finder.Find_fwd(raw, Byte_ascii.Slash, pos_bgn, raw_len);
|
||||
if (pos_slash == Bry_.NotFound) return null; // first segment is site_name; at least one slash must be present for image name; EX: site.org/img_name.jpg
|
||||
if (pos_slash == raw_len - 1) return null; // "site.org/" is invalid
|
||||
int pos_end = raw_len;
|
||||
int pos_question = Bry_finder.Find_bwd(raw, Byte_ascii.Question);
|
||||
if (pos_question != Bry_.NotFound)
|
||||
pos_end = pos_question; // remove query params; EX: img_name?key=val
|
||||
return Bry_.Mid(raw, pos_bgn, pos_end);
|
||||
}
|
||||
private void Download_fils(Io_url css_dir, String[] ary) {
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
String src = ary[i];
|
||||
Io_url trg = css_dir.GenSubFil_nest(Op_sys.Cur().Fsys_http_frag_to_url_str(Replace_invalid_chars_str(src)));
|
||||
if (Io_mgr._.ExistsFil(trg)) continue;
|
||||
download_wkr.Download(true, "http://" + src, trg, "download: " + src); // ILN
|
||||
}
|
||||
}
|
||||
String Replace_invalid_chars_str(String raw_str) {return String_.new_utf8_(Replace_invalid_chars(Bry_.new_utf8_(raw_str)));}
|
||||
byte[] Replace_invalid_chars(byte[] raw_bry) {
|
||||
int raw_len = raw_bry.length;
|
||||
for (int i = 0; i < raw_len; i++) { // convert invalid wnt chars to underscores
|
||||
byte b = raw_bry[i];
|
||||
switch (b) {
|
||||
//case Byte_ascii.Slash:
|
||||
case Byte_ascii.Backslash: case Byte_ascii.Colon: case Byte_ascii.Asterisk: case Byte_ascii.Question:
|
||||
case Byte_ascii.Quote: case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Pipe:
|
||||
raw_bry[i] = Byte_ascii.Underline;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return raw_bry;
|
||||
}
|
||||
private static final byte[]
|
||||
Bry_url = Bry_.new_ascii_("url("), Bry_data_image = Bry_.new_ascii_("data:image/")
|
||||
, Bry_http = Bry_.new_ascii_("http://"), Bry_fwd_slashes = Bry_.new_ascii_("//"), Bry_import = Bry_.new_ascii_("@import ")
|
||||
, Bry_http_protocol = Bry_.new_ascii_("http")
|
||||
;
|
||||
public static final byte[]
|
||||
Bry_comment_bgn = Bry_.new_ascii_("/*XOWA:"), Bry_comment_end = Bry_.new_ascii_("*/");
|
||||
private static final int Bry_url_len = Bry_url.length, Bry_import_len = Bry_import.length;
|
||||
static final String GRP_KEY = "xowa.wikis.init.css";
|
||||
}
|
||||
@@ -1,183 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xoa_css_img_downloader_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoa_css_img_downloader_fxt fxt = new Xoa_css_img_downloader_fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg\")} y {url(\"//site/b.jpg\")}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"site/b.jpg\")}"
|
||||
, "site/a.jpg"
|
||||
, "site/b.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Unquoted() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(//site/a.jpg)}"
|
||||
, "x {url(\"site/a.jpg\")}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Http() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(http://site/a.jpg)}"
|
||||
, "x {url(\"site/a.jpg\")}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Base64() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg\")} y {url(\"data:image/png;base64,BASE64DATA;ABC=\")} z {}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"data:image/png;base64,BASE64DATA;ABC=\")} z {}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Exc_missing_quote() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg\")} y {url(\"//site/b.jpg} z {}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"//site/b.jpg} z {}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Exc_empty() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg\")} y {url(\"\"} z {}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"\"} z {}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Exc_name_only() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg\")} y {url(\"b.jpg\"} z {}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"b.jpg\"} z {}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Repeat() {// PURPOSE.fix: exact same item was being added literally
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg?a=b\")} y {url(\"//site/a.jpg?a=b\"}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"site/a.jpg\"}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Clean_basic() {fxt.Test_clean_img_url("//site/a.jpg" , "site/a.jpg");}
|
||||
@Test public void Clean_query() {fxt.Test_clean_img_url("//site/a.jpg?key=val" , "site/a.jpg");}
|
||||
@Test public void Clean_dir() {fxt.Test_clean_img_url("//site/a/b/c.jpg?key=val" , "site/a/b/c.jpg");}
|
||||
@Test public void Clean_exc_site_only() {fxt.Test_clean_img_url("//site" , null);}
|
||||
@Test public void Clean_exc_site_only_2() {fxt.Test_clean_img_url("//site/" , null);}
|
||||
@Test public void Import_url() {
|
||||
Io_mgr._.InitEngine_mem();
|
||||
Io_mgr._.SaveFilStr("mem/www/b.css", "imported_css");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"mem/www/b.css\") screen; z"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA:mem/www/b.css*/"
|
||||
, "imported_css"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Import_url_make() {
|
||||
fxt.Test_import_url("a.org/b" , "http:a.org/b"); // add "stylesheet_prefix"
|
||||
fxt.Test_import_url("http://a.org" , "http://a.org"); // unless it starts with http
|
||||
fxt.Test_import_url("https://a.org" , "https://a.org"); // unless starts with https EX:: handle @import(https://...); PAGE:tr.n:Main_Page; DATE:2014-06-04
|
||||
}
|
||||
@Test public void Import_url_relative() { // PURPOSE: if directory, add domain; "/a/b.css" -> "//domain/a/b.css"; DATE:2014-02-03
|
||||
Io_mgr._.InitEngine_mem();
|
||||
Io_mgr._.SaveFilStr("mem/en.wikipedia.org/www/b.css", "imported_css");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"/www/b.css\") screen; z" // starts with "/"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA:mem/en.wikipedia.org/www/b.css*/"
|
||||
, "imported_css"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Import_url_relative_skip() { // PURPOSE: if rel path, skip; "//site/a/b.css"; DATE:2014-02-03
|
||||
fxt.Downloader().Stylesheet_prefix_(Bry_.new_utf8_("mem")); // stylesheet prefix prefix defaults to ""; set to "mem", else test will try to retrieve "//url" which will fail
|
||||
Io_mgr._.InitEngine_mem();
|
||||
Io_mgr._.SaveFilStr("mem//en.wikipedia.org/a/b.css", "imported_css");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"//en.wikipedia.org/a/b.css\") screen; z" // starts with "//"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA://en.wikipedia.org/a/b.css*/"
|
||||
, "imported_css"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Import_url_space() { // PURPOSE: some css has spaces; replace with underlines else fails when downloaded; EX: https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08
|
||||
Io_mgr._.InitEngine_mem();
|
||||
Io_mgr._.SaveFilStr("mem/www/b_c.css", "imported_css");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"mem/www/b c.css\") screen; z"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA:mem/www/b_c.css*/"
|
||||
, "imported_css"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Wikisource_freedimg() { // PURPOSE: check that "wikimedia" is replaced for FreedImg hack; PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3 DATE:2014-09-06
|
||||
fxt.Downloader().Stylesheet_prefix_(Bry_.new_utf8_("mem")); // stylesheet prefix prefix defaults to ""; set to "mem", else test will try to retrieve "//url" which will fail
|
||||
Io_mgr._.InitEngine_mem();
|
||||
Io_mgr._.SaveFilStr("mem//en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css", ".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"//en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css\") screen; z" // starts with "//"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA://en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css*/"
|
||||
, ".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], /*XOWA:handle file:// paths which will have /commons.wikimedia.org/ but not /wikipedia/ */ .freedImg img[src*=\"wikimedia\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
class Xoa_css_img_downloader_fxt {
|
||||
public Xoa_css_img_downloader Downloader() {return downloader;} private Xoa_css_img_downloader downloader;
|
||||
public void Clear() {
|
||||
downloader = new Xoa_css_img_downloader();
|
||||
downloader.Ctor(Gfo_usr_dlg_base.test_(), new Xof_download_wkr_test(), Bry_.Empty);
|
||||
}
|
||||
public void Test_css_convert(String raw, String expd, String... expd_img_ary) {
|
||||
ListAdp actl_img_list = ListAdp_.new_();
|
||||
byte[] actl_bry = downloader.Convert_to_local_urls(Bry_.new_ascii_("mem/en.wikipedia.org"), Bry_.new_utf8_(raw), actl_img_list);
|
||||
Tfds.Eq_str_lines(expd, String_.new_utf8_(actl_bry));
|
||||
Tfds.Eq_ary_str(expd_img_ary, actl_img_list.XtoStrAry());
|
||||
}
|
||||
public void Test_clean_img_url(String raw_str, String expd) {
|
||||
byte[] raw = Bry_.new_ascii_(raw_str);
|
||||
byte[] actl = downloader.Clean_img_url(raw, raw.length);
|
||||
Tfds.Eq(expd, actl == null ? null : String_.new_ascii_(actl));
|
||||
}
|
||||
public void Test_import_url(String raw, String expd) {
|
||||
byte[] actl = Xoa_css_img_downloader.Import_url_build(Bry_.new_ascii_("http:"), Bry_.new_ascii_("//en.wikipedia.org"), Bry_.new_utf8_(raw));
|
||||
Tfds.Eq(expd, String_.new_utf8_(actl));
|
||||
}
|
||||
}
|
||||
@@ -19,7 +19,7 @@ package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
import gplx.brys.*; import gplx.core.threads.*; import gplx.xowa.wikis.*; import gplx.xowa.setup.maints.*; import gplx.xowa.xtns.wdatas.imports.*;
|
||||
public class Xoi_cmd_wiki_tst {
|
||||
@Test public void Run() { // MAIN
|
||||
@Test public void Run() { // MAINT
|
||||
// Bld_import_list(Xow_wmf_api_mgr.Wikis);
|
||||
// Bld_cfg_files(Xow_wmf_api_mgr.Wikis); // NOTE: remember to carry over the wikisource / page / index commands from the existing xowa_build_cfg.gfs; also, only run the xowa_build_cfg.gfs once; DATE:2013-10-15; last run: DATE:2014-09-09
|
||||
}
|
||||
|
||||
@@ -1,242 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*;
|
||||
class Xoi_css_offline_itm {
|
||||
public Xoi_css_offline_itm(byte[] http_url) {this.http_url = http_url;}
|
||||
public byte[] Http_url() {return http_url;} private byte[] http_url;
|
||||
public byte[] File_url() {return file_url;} public void File_url_(byte[] v) {file_url = v;} private byte[] file_url;
|
||||
}
|
||||
class Xoi_css_url_info {
|
||||
// private Gfo_usr_dlg usr_dlg;
|
||||
public int Bgn_pos() {return bgn_pos;} public void Bgn_pos_(int v) {bgn_pos = v;} private int bgn_pos;
|
||||
public int End_pos() {return end_pos;} public void End_pos_(int v) {end_pos = v;} private int end_pos;
|
||||
public boolean Found() {return found;} public void Found_(boolean v) {found = v;} private boolean found;
|
||||
public void Init(Gfo_usr_dlg usr_dlg) {
|
||||
// this.usr_dlg = usr_dlg;
|
||||
}
|
||||
public void Clear() {
|
||||
bgn_pos = end_pos = -1;
|
||||
found = false;
|
||||
}
|
||||
public void Quote_data(byte end_byte, boolean quoted) {
|
||||
}
|
||||
public Xoi_css_url_info Rslt_fail(int end_pos, String fmt, Object... args) {
|
||||
this.end_pos = end_pos;
|
||||
// if (bgn_pos == src_len) {usr_dlg.Warn_many("", "", "eos after 'url(': bgn=~{bgn}", tkn_bgn); return Bry_finder.Not_found;}
|
||||
return this;
|
||||
}
|
||||
public Xoi_css_url_info Rslt_pass(int end_pos) {
|
||||
this.end_pos = end_pos;
|
||||
return this;
|
||||
}
|
||||
public Xoi_css_url_info Rslt_pass(int end_pos, byte[] url_clean) {
|
||||
this.end_pos = end_pos;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
class Xoi_css_offline_mgr {
|
||||
private Bry_bfr bfr;
|
||||
private byte[] src;
|
||||
private int src_len, pos;
|
||||
private OrderedHash download_queue;
|
||||
private Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Null;
|
||||
public void Offline(Bry_bfr bfr, OrderedHash download_queue, Object download_wkr, byte[] src) {
|
||||
this.bfr = bfr;
|
||||
this.download_queue = download_queue;
|
||||
this.src = src; this.src_len = src.length;
|
||||
this.pos = 0;
|
||||
while (true) {
|
||||
boolean last = pos == src_len;
|
||||
byte b = last ? Byte_ascii.NewLine : src[pos];
|
||||
Object o = tkns_trie.Match_bgn_w_byte(b, src, pos, src_len);
|
||||
if (o == null) {
|
||||
bfr.Add_byte(b);
|
||||
++pos;
|
||||
}
|
||||
else {
|
||||
byte tkn_tid = ((Byte_obj_val)o).Val();
|
||||
int match_pos = tkns_trie.Match_pos();
|
||||
int nxt_pos = -1;
|
||||
switch (tkn_tid) {
|
||||
case Tkn_url: nxt_pos = Process_url(pos, match_pos); break;
|
||||
case Tkn_import: nxt_pos = Process_import(pos, match_pos); break;
|
||||
}
|
||||
pos = nxt_pos;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
// "//id.wikibooks.org/w/index.php?title=MediaWiki:Common.css&oldid=43393&action=raw&ctype=text/css";
|
||||
private int Process_import(int tkn_bgn, int tkn_end) { // @import
|
||||
// get url
|
||||
// if null, add to bfr and exit;
|
||||
// else download, and recursively call self
|
||||
// int bgn_pos = Bry_finder.Find_fwd(src, end_byte, bgn_pos, src_len);
|
||||
// int end_pos = Bry_finder.Find_fwd(src, end_byte, bgn_pos, src_len);
|
||||
return -1;
|
||||
}
|
||||
private int Process_url(int tkn_bgn, int tkn_end) { // " url"
|
||||
int bgn_pos = Bry_finder.Find_fwd_while_ws(src, tkn_end, src_len); // skip any ws after " url("
|
||||
if (bgn_pos == src_len) {usr_dlg.Warn_many("", "", "eos after 'url(': bgn=~{bgn}", tkn_bgn); return Bry_finder.Not_found;}
|
||||
byte end_byte = src[bgn_pos]; boolean quoted = true;
|
||||
switch (end_byte) {
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos: // quoted; increment position
|
||||
++bgn_pos;
|
||||
break;
|
||||
default: // not quoted; end byte is ")"
|
||||
end_byte = Byte_ascii.Paren_end;
|
||||
quoted = false;
|
||||
break;
|
||||
}
|
||||
int end_pos = Bry_finder.Find_fwd(src, end_byte, bgn_pos, src_len);
|
||||
if (end_pos == Bry_.NotFound) { // unclosed "url("; exit since nothing else will be found
|
||||
usr_dlg.Warn_many("", "", "could not end_byte for 'url(': bgn='~{0}' end='~{1}'", bgn_pos, String_.new_utf8_len_safe_(src, tkn_bgn, tkn_bgn + 25));
|
||||
bfr.Add_mid(src, tkn_bgn, src_len);
|
||||
return Bry_finder.Not_found;
|
||||
}
|
||||
if (end_pos - bgn_pos == 0) { // empty; "url()"; ignore
|
||||
usr_dlg.Warn_many("", "", "'url(' is empty: bgn='~{0}' end='~{1}'", tkn_bgn, String_.new_utf8_len_safe_(src, tkn_bgn, tkn_bgn + 25));
|
||||
return end_pos;
|
||||
}
|
||||
byte[] url_raw = Bry_.Mid(src, bgn_pos, end_pos); int url_raw_len = url_raw.length;
|
||||
if (Bry_.HasAtBgn(url_raw, Bry_data_image, 0, url_raw_len)) { // base64
|
||||
++end_pos; // include end_byte;
|
||||
bfr.Add_mid(src, tkn_bgn, end_pos); // nothing to download; just add entire String
|
||||
return end_pos;
|
||||
}
|
||||
byte[] url_cleaned = Clean_url(url_raw, url_raw_len);
|
||||
if (url_cleaned == null) { // could not clean url
|
||||
usr_dlg.Warn_many("", "", "could not extract valid url: bgn='~{0}' end='~{1}'", tkn_bgn, String_.new_utf8_(url_raw));
|
||||
bfr.Add_mid(src, tkn_bgn, bgn_pos);
|
||||
return bgn_pos;
|
||||
}
|
||||
Xoi_css_offline_itm url_itm = (Xoi_css_offline_itm)download_queue.Fetch(url_cleaned);
|
||||
if (url_itm == null) { // only add unique items for download;
|
||||
url_itm = new Xoi_css_offline_itm(url_cleaned);
|
||||
download_queue.Add(url_cleaned, url_itm);
|
||||
}
|
||||
byte[] file_url = Replace_invalid_chars(Bry_.Copy(url_cleaned)); // NOTE: must call ByteAry.Copy else url_cleaned will change *inside* bry
|
||||
url_itm.File_url_(file_url);
|
||||
bfr.Add_mid(src, tkn_bgn, tkn_end);
|
||||
if (!quoted) bfr.Add_byte(Byte_ascii.Quote);
|
||||
bfr.Add(file_url);
|
||||
if (!quoted) bfr.Add_byte(Byte_ascii.Quote);
|
||||
return end_pos;
|
||||
}
|
||||
public static Xoi_css_url_info Process_url(byte[] src, int src_len, int tkn_bgn, int tkn_end, Xoi_css_url_info inf) { // " url"
|
||||
inf.Clear();
|
||||
int bgn_pos = Bry_finder.Find_fwd_while_ws(src, tkn_end, src_len); // skip any ws after " url("
|
||||
if (bgn_pos == src_len) return inf.Rslt_fail(src_len, "eos after 'url(': bgn=~{bgn}");
|
||||
byte end_byte = src[bgn_pos]; boolean quoted = true;
|
||||
switch (end_byte) {
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos: // quoted; increment position
|
||||
++bgn_pos;
|
||||
break;
|
||||
default: // not quoted; end byte is ")"
|
||||
end_byte = Byte_ascii.Paren_end;
|
||||
quoted = false;
|
||||
break;
|
||||
}
|
||||
inf.Quote_data(end_byte, quoted);
|
||||
int end_pos = Bry_finder.Find_fwd(src, end_byte, bgn_pos, src_len);
|
||||
if (end_pos == Bry_.NotFound) { // unclosed "url("; exit since nothing else will be found
|
||||
return inf.Rslt_fail(src_len, "could not end_byte for 'url(': bgn='~{0}' end='~{1}'");
|
||||
}
|
||||
if (end_pos - bgn_pos == 0) { // empty; "url()"; ignore
|
||||
return inf.Rslt_fail(end_pos + 1, "'url(' is empty: bgn='~{0}' end='~{1}'");
|
||||
}
|
||||
byte[] url_raw = Bry_.Mid(src, bgn_pos, end_pos); int url_raw_len = url_raw.length;
|
||||
if (Bry_.HasAtBgn(url_raw, Bry_data_image, 0, url_raw_len)) { // base64
|
||||
return inf.Rslt_pass(end_pos + 1); // nothing to download; just add entire String
|
||||
}
|
||||
byte[] url_cleaned = Clean_url(url_raw, url_raw_len);
|
||||
if (url_cleaned == null) // could not clean url
|
||||
return inf.Rslt_fail(bgn_pos, "could not extract valid url: bgn='~{0}' end='~{1}'");
|
||||
return inf.Rslt_pass(end_pos, url_cleaned);
|
||||
// Xoi_css_offline_itm url_itm = (Xoi_css_offline_itm)download_queue.Fetch(url_cleaned);
|
||||
// if (url_itm == null) { // only add unique items for download;
|
||||
// url_itm = new Xoi_css_offline_itm(url_cleaned);
|
||||
// download_queue.Add(url_cleaned, url_itm);
|
||||
// }
|
||||
// byte[] file_url = Replace_invalid_chars(Bry_.Copy(url_cleaned)); // NOTE: must call ByteAry.Copy else url_cleaned will change *inside* bry
|
||||
// url_itm.File_url_(file_url);
|
||||
// bfr.Add_mid(src, tkn_bgn, tkn_end);
|
||||
// if (!quoted) bfr.Add_byte(Byte_ascii.Quote);
|
||||
// bfr.Add(file_url);
|
||||
// if (!quoted) bfr.Add_byte(Byte_ascii.Quote);
|
||||
// return inf;
|
||||
}
|
||||
public static byte[] Clean_url(byte[] raw, int len) { // return "site/img.png" if "//site/img.png" or "http://site/img.png", "img.png?key=val"
|
||||
int bgn = 0;
|
||||
if (Bry_.HasAtBgn(raw, Bry_fwd_slashes , 0, len)) bgn = Bry_fwd_slashes.length; // skip if starts with "//"
|
||||
else if (Bry_.HasAtBgn(raw, Bry_http , 0, len)) bgn = Bry_http.length; // skip if starts with "http://"
|
||||
else if (Bry_.HasAtBgn(raw, Bry_https , 0, len)) bgn = Bry_https.length; // skip if starts with "https://"
|
||||
int slash_pos = Bry_finder.Find_fwd(raw, Byte_ascii.Slash, bgn, len); // find 1st slash
|
||||
if ( slash_pos == Bry_finder.Not_found // no slashes; must have at least 1 slash to have 2 segments; EX: site.org/img.png
|
||||
|| slash_pos == len - 1 // first slash is last char; EX: "site.org/"
|
||||
)
|
||||
return null; // invalid
|
||||
int end = len;
|
||||
int question_pos = Bry_finder.Find_bwd(raw, Byte_ascii.Question);
|
||||
if (question_pos != Bry_finder.Not_found) // url has query String; EX:site.org/img.png?key=val
|
||||
end = question_pos; // remove query String
|
||||
return Bry_.Mid(raw, bgn, end);
|
||||
}
|
||||
public static byte[] Replace_invalid_chars(byte[] src) {
|
||||
int len = src.length;
|
||||
for (int i = 0; i < len; i++) { // convert invalid wnt chars to underscores
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
//case Byte_ascii.Slash:
|
||||
case Byte_ascii.Backslash: case Byte_ascii.Colon: case Byte_ascii.Asterisk: case Byte_ascii.Question:
|
||||
case Byte_ascii.Quote: case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Pipe:
|
||||
src[i] = Byte_ascii.Underline;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return src;
|
||||
}
|
||||
|
||||
public static final byte[] Tkn_url_bry = Bry_.new_ascii_(" url(");
|
||||
private static final byte Tkn_import = 1, Tkn_url = 2;
|
||||
private static final Btrie_slim_mgr tkns_trie = Btrie_slim_mgr.ci_ascii_()
|
||||
.Add_str_byte("@import" , Tkn_import)
|
||||
.Add_bry_bval(Tkn_url_bry , Tkn_url)
|
||||
;
|
||||
private static final byte[]
|
||||
Bry_data_image = Bry_.new_ascii_("data:image/")
|
||||
, Bry_http = Bry_.new_ascii_("http://")
|
||||
, Bry_https = Bry_.new_ascii_("https://")
|
||||
, Bry_fwd_slashes = Bry_.new_ascii_("//")
|
||||
// , Bry_http_protocol = Bry_.new_ascii_("http"), Bry_url = Bry_.new_ascii_("url("), Bry_import = Bry_.new_ascii_("@import ")
|
||||
;
|
||||
public static final byte[]
|
||||
Bry_comment_bgn = Bry_.new_ascii_("/*XOWA:")
|
||||
, Bry_comment_end = Bry_.new_ascii_("*/")
|
||||
;
|
||||
// private static final int Bry_url_len = Bry_url.length, Bry_import_len = Bry_import.length;
|
||||
}
|
||||
// class Io_download_itm {
|
||||
// public byte[] Src_url() {return src_url;} public void Src_url_(byte[] v) {src_url = v;} private byte[] src_url;
|
||||
// public Io_url Trg_url() {return trg_url;} public void Trg_url_(Io_url v) {trg_url = v;} private Io_url trg_url;]
|
||||
// public String Download_err() {return download_err;} public void Download_err_(String v) {download_err = v;} private String download_err;
|
||||
// }
|
||||
// interface Io_download_mgr {
|
||||
// void Download(Io_download_itm itm);
|
||||
// }
|
||||
@@ -1,52 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xoi_css_offline_mgr_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoi_css_offline_mgr_fxt fxt = new Xoi_css_offline_mgr_fxt();
|
||||
@Test public void Basic() {
|
||||
// fxt.Test_extract_url_warn("url(" , "end of stream");
|
||||
// fxt.Test_extract_url_warn("url(abc" , "end not found");
|
||||
// fxt.Test_extract_url_warn("url()" , "url is empty");
|
||||
fxt.Test_extract_url_pass("url('a/b')" , "url is empty");
|
||||
}
|
||||
}
|
||||
class Xoi_css_offline_mgr_fxt {
|
||||
private Xoi_css_url_info info = new Xoi_css_url_info();
|
||||
private Gfo_usr_dlg usr_dlg = null;
|
||||
// private Xoi_css_offline_mgr mgr;
|
||||
public void Clear() {
|
||||
info.Init(usr_dlg);
|
||||
// mgr = new Xoi_css_offline_mgr();
|
||||
}
|
||||
public void Test_extract_url_warn(String raw, String err) {
|
||||
}
|
||||
public void Test_extract_url_pass(String src_str, String expd) {
|
||||
byte[] src = Bry_.new_utf8_(src_str);
|
||||
Test_extract_url(src);
|
||||
// String actl = String_.new_ascii_(src, info.Bgn_pos(), info.End_pos());
|
||||
// Tfds.Eq(expd, actl);
|
||||
}
|
||||
private void Test_extract_url(byte[] src) {
|
||||
int src_len = src.length;
|
||||
info.Clear();
|
||||
int tkn_bgn = Bry_finder.Find_fwd(src, Xoi_css_offline_mgr.Tkn_url_bry, 0, src_len);
|
||||
int tkn_end = tkn_bgn + Xoi_css_offline_mgr.Tkn_url_bry.length;
|
||||
Xoi_css_offline_mgr.Process_url(src, src_len, tkn_bgn, tkn_end, info);
|
||||
}
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import gplx.xowa.wikis.data.tbls.*;
|
||||
public interface Xow_page_fetcher {
|
||||
Xow_page_fetcher Wiki_(Xowe_wiki v);
|
||||
byte[] Fetch(int ns_id, byte[] ttl);
|
||||
}
|
||||
class Xow_page_fetcher_wiki implements Xow_page_fetcher {
|
||||
public Xow_page_fetcher Wiki_(Xowe_wiki v) {this.wiki = v; return this;} private Xowe_wiki wiki;
|
||||
public byte[] Fetch(int ns_id, byte[] ttl_bry) {
|
||||
Xoa_ttl ttl = Xoa_ttl.parse_(wiki, ns_id, ttl_bry);
|
||||
Xoae_page page = wiki.Data_mgr().Get_page(ttl, false); // go through data_mgr in case of redirects
|
||||
return page.Missing() ? null : page.Data_raw();
|
||||
}
|
||||
}
|
||||
class Xow_page_fetcher_mok implements Xow_page_fetcher {
|
||||
public Xow_page_fetcher Wiki_(Xowe_wiki v) {return this;}
|
||||
public void Clear() {pages.Clear();} private HashAdp pages = HashAdp_.new_();
|
||||
public void Add(int ns_id, byte[] ttl, byte[] text) {
|
||||
Xowd_page_itm page = new Xowd_page_itm().Ns_id_(ns_id).Ttl_page_db_(ttl).Text_(text);
|
||||
pages.Add(Make_key(ns_id, ttl), page);
|
||||
}
|
||||
public byte[] Fetch(int ns_id, byte[] ttl) {
|
||||
Xowd_page_itm rv = (Xowd_page_itm)pages.Fetch(Make_key(ns_id, ttl));
|
||||
return rv == null ? null : rv.Text();
|
||||
}
|
||||
String Make_key(int ns_id, byte[] ttl) {return Int_.Xto_str(ns_id) + "|" + String_.new_utf8_(ttl);}
|
||||
}
|
||||
Reference in New Issue
Block a user