mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.7.2.1
This commit is contained in:
265
400_xowa/src/gplx/xowa/bldrs/css/Xoa_css_extractor.java
Normal file
265
400_xowa/src/gplx/xowa/bldrs/css/Xoa_css_extractor.java
Normal file
@@ -0,0 +1,265 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.ios.*; import gplx.xowa.html.*;
|
||||
import gplx.xowa.wikis.*; import gplx.xowa.wikis.data.*;
|
||||
import gplx.xowa.files.downloads.*;
|
||||
public class Xoa_css_extractor {
|
||||
public IoEngine_xrg_downloadFil Download_xrg() {return download_xrg;} private IoEngine_xrg_downloadFil download_xrg = Io_mgr.I.DownloadFil_args("", Io_url_.Empty);
|
||||
public Xoa_css_extractor Wiki_domain_(byte[] v) {wiki_domain = v; return this;} private byte[] wiki_domain;
|
||||
public Xoa_css_extractor Usr_dlg_(Gfo_usr_dlg v) {usr_dlg = v; return this;} private Gfo_usr_dlg usr_dlg;
|
||||
public Xoa_css_extractor Failover_dir_(Io_url v) {failover_dir = v; return this;} private Io_url failover_dir;
|
||||
public Xoa_css_extractor Wiki_html_dir_(Io_url v) {wiki_html_dir = v; return this;} private Io_url wiki_html_dir;
|
||||
public Xoa_css_extractor Mainpage_url_(String v) {mainpage_url = v; return this;} private String mainpage_url;
|
||||
public Xoa_css_extractor Protocol_prefix_(String v) {protocol_prefix = v; return this;} private String protocol_prefix = "https:";// NOTE: changed from http to https; DATE:2015-02-17
|
||||
public Xoa_css_extractor Page_fetcher_(Xow_page_fetcher v) {page_fetcher = v; return this;} private Xow_page_fetcher page_fetcher;
|
||||
public Xoa_css_extractor Css_img_downloader_(Xoa_css_img_downloader v) {this.css_img_downloader = v; return this;} private Xoa_css_img_downloader css_img_downloader;
|
||||
public Xoa_css_extractor Opt_download_css_common_(boolean v) {opt_download_css_common = v; return this;} private boolean opt_download_css_common;
|
||||
public Xoa_css_extractor Url_encoder_(Url_encoder v) {url_encoder = v; return this;} private Url_encoder url_encoder;
|
||||
public Xoa_css_extractor Wiki_code_(byte[] v) {this.wiki_code = v; return this;} private byte[] wiki_code = null;
|
||||
private byte[] mainpage_html; private boolean lang_is_ltr = true;
|
||||
public void Init_by_app(Xoae_app app) {
|
||||
this.usr_dlg = app.Usr_dlg();
|
||||
Xof_download_wkr download_wkr = app.Wmf_mgr().Download_wkr();
|
||||
this.download_xrg = download_wkr.Download_xrg();
|
||||
css_img_downloader = new Xoa_css_img_downloader().Ctor(usr_dlg, download_wkr, Bry_.new_u8(protocol_prefix));
|
||||
failover_dir = app.Fsys_mgr().Bin_any_dir().GenSubDir_nest("html", "xowa", "import");
|
||||
url_encoder = Xoa_app_.Utl__encoder_mgr().Http_url();
|
||||
}
|
||||
public void Install(Xow_wiki wiki, String css_key) {
|
||||
try {
|
||||
this.wiki_html_dir = wiki.App().Fsys_mgr().Wiki_css_dir(wiki.Domain_str()); // EX: /xowa/user/anonymous/wiki/en.wikipedia.org
|
||||
Io_url css_comm_fil = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
Io_url css_wiki_fil = wiki_html_dir.GenSubFil(Css_wiki_name);
|
||||
wiki.Html__page_wtr_mgr().Init_css_urls(css_comm_fil, css_wiki_fil);
|
||||
if (wiki.Domain_tid() == Xow_domain_type_.Tid_home || Env_.Mode_testing()) return; // NOTE: do not download if home_wiki; also needed for TEST
|
||||
if (Io_mgr.I.ExistsFil(css_wiki_fil)) return; // css file exists; nothing to generate
|
||||
if (wiki.Html__css_installing()) return;
|
||||
wiki.Html__css_installing_(true);
|
||||
wiki.App().Usr_dlg().Log_many("", "", "generating css for '~{0}'", wiki.Domain_str());
|
||||
if (css_key != null) {
|
||||
if (Install_by_db(wiki, wiki_html_dir, css_key)) return;
|
||||
}
|
||||
if (wiki.Type_is_edit())
|
||||
this.Install_by_wmf((Xowe_wiki)wiki, wiki_html_dir);
|
||||
wiki.Html__css_installing_(false);
|
||||
}
|
||||
catch (Exception e) { // if error, failover; paranoia catch for outliers like bad network connectivity fail, or MediaWiki: message not existing; DATE:2013-11-21
|
||||
wiki.App().Usr_dlg().Warn_many("", "", "failed to get css; failing over; wiki='~{0}' err=~{1}", wiki.Domain_str(), Err_.Message_gplx(e));
|
||||
Css_common_failover(); // only failover xowa_common.css; xowa_wiki.css comes from MediaWiki:Common.css / Vector.css
|
||||
wiki.Html__css_installing_(false);
|
||||
}
|
||||
}
|
||||
private void Install_by_wmf(Xowe_wiki wiki, Io_url wiki_html_dir) {
|
||||
opt_download_css_common = wiki.Appe().Setup_mgr().Dump_mgr().Css_commons_download();
|
||||
if (!wiki.Appe().Usere().Cfg_mgr().Security_mgr().Web_access_enabled()) opt_download_css_common = false; // if !web_access_enabled, don't download
|
||||
this.wiki_domain = wiki.Domain_bry();
|
||||
mainpage_url = "https://" + wiki.Domain_str(); // NOTE: cannot reuse protocol_prefix b/c "//" needs to be added manually; protocol_prefix is used for logo and images which have form of "//domain/image.png"; changed to https; DATE:2015-02-17
|
||||
if (page_fetcher == null) page_fetcher = new Xow_page_fetcher_wiki();
|
||||
page_fetcher.Wiki_(wiki);
|
||||
this.wiki_html_dir = wiki_html_dir;
|
||||
this.lang_is_ltr = wiki.Lang().Dir_ltr();
|
||||
this.wiki_code = wiki.Domain_abrv();
|
||||
mainpage_html = Mainpage_download_html();
|
||||
Css_common_setup();
|
||||
Css_wiki_setup();
|
||||
Logo_setup();
|
||||
}
|
||||
private boolean Install_by_db(Xow_wiki wiki, Io_url wiki_html_dir, String css_key) {
|
||||
Xowd_db_mgr core_db_mgr = wiki.Data__core_mgr();
|
||||
if ( core_db_mgr == null
|
||||
|| core_db_mgr.Props() == null
|
||||
|| core_db_mgr.Props().Schema_is_1()
|
||||
|| !core_db_mgr.Tbl__cfg().Select_yn_or(Xow_cfg_consts.Grp__wiki_schema, Xowd_db_file_schema_props.Key__tbl_css_core, Bool_.N)
|
||||
) {
|
||||
Xoa_app_.Usr_dlg().Warn_many("", "", "css.db not found; wiki=~{0} css_dir=~{1}", wiki.Domain_str(), wiki_html_dir.Raw());
|
||||
return false;
|
||||
}
|
||||
Xowd_db_file core_db = core_db_mgr.Db__core();
|
||||
return gplx.xowa.html.css.Xowd_css_core_mgr.Get(core_db.Tbl__css_core(), core_db.Tbl__css_file(), wiki_html_dir, css_key);
|
||||
}
|
||||
public void Css_common_setup() {
|
||||
if (opt_download_css_common)
|
||||
Css_common_download();
|
||||
else
|
||||
Css_common_failover();
|
||||
}
|
||||
private void Css_common_failover() {
|
||||
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
Io_mgr.I.CopyFil(Css_common_failover_url(), trg_fil, true);
|
||||
css_img_downloader.Chk(wiki_domain, trg_fil);
|
||||
}
|
||||
private void Css_common_download() {
|
||||
boolean css_stylesheet_common_missing = true;
|
||||
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
css_stylesheet_common_missing = !Css_scrape_setup();
|
||||
if (css_stylesheet_common_missing)
|
||||
Io_mgr.I.CopyFil(Css_common_failover_url(), trg_fil, true);
|
||||
else
|
||||
css_img_downloader.Chk(wiki_domain, trg_fil);
|
||||
}
|
||||
private Io_url Css_common_failover_url() {
|
||||
Io_url css_commons_url = failover_dir.GenSubDir("xowa_common_override").GenSubFil_ary("xowa_common_", String_.new_u8(wiki_code), ".css");
|
||||
if (Io_mgr.I.ExistsFil(css_commons_url)) return css_commons_url; // specific css exists for wiki; use it; EX: xowa_common_wiki_mediawikiwiki.css
|
||||
return failover_dir.GenSubFil(lang_is_ltr ? Css_common_name_ltr : Css_common_name_rtl);
|
||||
}
|
||||
public void Css_wiki_setup() {
|
||||
boolean css_stylesheet_wiki_missing = true;
|
||||
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_wiki_name);
|
||||
if (Io_mgr.I.ExistsFil(trg_fil)) return; // don't download if already there
|
||||
css_stylesheet_wiki_missing = !Css_wiki_generate(trg_fil);
|
||||
if (css_stylesheet_wiki_missing)
|
||||
Failover(trg_fil);
|
||||
else
|
||||
css_img_downloader.Chk(wiki_domain, trg_fil);
|
||||
}
|
||||
private boolean Css_wiki_generate(Io_url trg_fil) {
|
||||
Bry_bfr bfr = Bry_bfr.new_();
|
||||
Css_wiki_generate_section(bfr, Ttl_common_css);
|
||||
Css_wiki_generate_section(bfr, Ttl_vector_css);
|
||||
byte[] bry = bfr.Xto_bry_and_clear();
|
||||
bry = Bry_.Replace(bry, gplx.xowa.bldrs.xmls.Xob_xml_parser_.Bry_tab_ent, gplx.xowa.bldrs.xmls.Xob_xml_parser_.Bry_tab);
|
||||
Io_mgr.I.SaveFilBry(trg_fil, bry);
|
||||
return true;
|
||||
} private static final byte[] Ttl_common_css = Bry_.new_a7("Common.css"), Ttl_vector_css = Bry_.new_a7("Vector.css");
|
||||
private boolean Css_wiki_generate_section(Bry_bfr bfr, byte[] ttl) {
|
||||
byte[] page = page_fetcher.Get_by(Xow_ns_.Id_mediawiki, ttl);
|
||||
if (page == null) return false;
|
||||
if (bfr.Len() != 0) bfr.Add_byte_nl().Add_byte_nl(); // add "\n\n" between sections; !=0 checks against first
|
||||
Css_wiki_section_hdr.Bld_bfr_many(bfr, ttl); // add "/*XOWA:MediaWiki:Common.css*/\n"
|
||||
bfr.Add(page); // add page
|
||||
return true;
|
||||
} static final Bry_fmtr Css_wiki_section_hdr = Bry_fmtr.new_("/*XOWA:MediaWiki:~{ttl}*/\n", "ttl");
|
||||
public void Logo_setup() {
|
||||
boolean logo_missing = true;
|
||||
Io_url logo_url = wiki_html_dir.GenSubFil("logo.png");
|
||||
if (Io_mgr.I.ExistsFil(logo_url)) return; // don't download if already there
|
||||
logo_missing = !Logo_download(logo_url);
|
||||
if (logo_missing)
|
||||
Failover(logo_url);
|
||||
}
|
||||
private boolean Logo_download(Io_url trg_fil) {
|
||||
String src_fil = Logo_find_src();
|
||||
if (src_fil == null) {
|
||||
if (Logo_copy_from_css(trg_fil)) return true;
|
||||
usr_dlg.Warn_many("", "", "failed to extract logo: trg_fil=~{0};", trg_fil.Raw());
|
||||
return false;
|
||||
}
|
||||
String log_msg = usr_dlg.Prog_many("", "", "downloading logo: '~{0}'", src_fil);
|
||||
boolean rv = download_xrg.Prog_fmt_hdr_(log_msg).Src_(src_fil).Trg_(trg_fil).Exec();
|
||||
if (!rv)
|
||||
usr_dlg.Warn_many("", "", "failed to download logo: src_url=~{0};", src_fil);
|
||||
return rv;
|
||||
}
|
||||
private boolean Logo_copy_from_css(Io_url trg_fil) {
|
||||
Io_url commons_file = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
byte[] commons_src = Io_mgr.I.LoadFilBry(commons_file);
|
||||
int bgn_pos = Bry_finder.Find_fwd(commons_src, Bry_mw_wiki_logo); if (bgn_pos == Bry_finder.Not_found) return false;
|
||||
bgn_pos += Bry_mw_wiki_logo.length;
|
||||
int end_pos = Bry_finder.Find_fwd(commons_src, Byte_ascii.Quote, bgn_pos + 1); if (end_pos == Bry_finder.Not_found) return false;
|
||||
byte[] src_bry = Bry_.Mid(commons_src, bgn_pos, end_pos);
|
||||
src_bry = Xob_url_fixer.Fix(wiki_domain, src_bry, src_bry.length);
|
||||
if (wiki_html_dir.Info().DirSpr_byte() == Byte_ascii.Backslash)
|
||||
src_bry = Bry_.Replace(src_bry, Byte_ascii.Slash, Byte_ascii.Backslash);
|
||||
Io_url src_fil = wiki_html_dir.GenSubFil(String_.new_u8(src_bry));
|
||||
Io_mgr.I.CopyFil(src_fil, trg_fil, true);
|
||||
return true;
|
||||
} private static final byte[] Bry_mw_wiki_logo = Bry_.new_a7(".mw-wiki-logo{background-image:url(\"");
|
||||
private String Logo_find_src() {
|
||||
if (mainpage_html == null) return null;
|
||||
int main_page_html_len = mainpage_html.length;
|
||||
int logo_bgn = Bry_finder.Find_fwd(mainpage_html, Logo_find_bgn, 0); if (logo_bgn == Bry_.NotFound) return null;
|
||||
logo_bgn += Logo_find_bgn.length;
|
||||
logo_bgn = Bry_finder.Find_fwd(mainpage_html, Logo_find_end, logo_bgn); if (logo_bgn == Bry_.NotFound) return null;
|
||||
logo_bgn += Logo_find_end.length;
|
||||
int logo_end = Bry_finder.Find_fwd(mainpage_html, Byte_ascii.Paren_end, logo_bgn, main_page_html_len); if (logo_bgn == Bry_.NotFound) return null;
|
||||
byte[] logo_bry = Bry_.Mid(mainpage_html, logo_bgn, logo_end);
|
||||
return protocol_prefix + String_.new_u8(logo_bry);
|
||||
}
|
||||
private static final byte[] Logo_find_bgn = Bry_.new_a7("<div id=\"p-logo\""), Logo_find_end = Bry_.new_a7("background-image: url(");
|
||||
public boolean Mainpage_download() {
|
||||
mainpage_html = Mainpage_download_html();
|
||||
return mainpage_html != null;
|
||||
}
|
||||
private byte[] Mainpage_download_html() {
|
||||
String main_page_url_temp = mainpage_url;
|
||||
if (Bry_.Eq(wiki_domain, Xow_domain_.Domain_bry_wikidata)) // if wikidata, download css for a Q* page; Main_Page has less css; DATE:2014-09-30
|
||||
main_page_url_temp = main_page_url_temp + "/wiki/Q2";
|
||||
String log_msg = usr_dlg.Prog_many("", "main_page.download", "downloading main page for '~{0}'", main_page_url_temp);
|
||||
byte[] main_page_html = download_xrg.Prog_fmt_hdr_(log_msg).Exec_as_bry(main_page_url_temp);
|
||||
if (main_page_html == null) usr_dlg.Warn_many("", "", "failed to download main_page: src_url=~{0};", main_page_url_temp);
|
||||
return main_page_html;
|
||||
}
|
||||
private void Failover(Io_url trg_fil) {
|
||||
usr_dlg.Note_many("", "", "copying failover file: trg_fil=~{0};", trg_fil.Raw());
|
||||
Io_mgr.I.CopyFil(failover_dir.GenSubFil(trg_fil.NameAndExt()), trg_fil, true);
|
||||
}
|
||||
public boolean Css_scrape_setup() {
|
||||
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
// if (Io_mgr.I.ExistsFil(trg_fil)) return; // don't download if already there; DELETED: else main_page is not scraped for all stylesheet links; simple.d: fails; DATE:2014-02-11
|
||||
byte[] css_url = Css_scrape();
|
||||
if (css_url == null) {
|
||||
Css_common_failover();
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
Io_mgr.I.SaveFilBry(trg_fil, css_url);
|
||||
css_img_downloader.Chk(wiki_domain, trg_fil);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
private byte[] Css_scrape() {
|
||||
if (mainpage_html == null) return null;
|
||||
String[] css_urls = Css_scrape_urls(mainpage_html); if (css_urls.length == 0) return null;
|
||||
return Css_scrape_download(css_urls);
|
||||
}
|
||||
private String[] Css_scrape_urls(byte[] raw) {
|
||||
List_adp rv = List_adp_.new_();
|
||||
int raw_len = raw.length;
|
||||
int prv_pos = 0;
|
||||
int css_find_bgn_len = Css_find_bgn.length;
|
||||
byte[] protocol_prefix_bry = Bry_.new_u8(protocol_prefix);
|
||||
while (true) {
|
||||
int url_bgn = Bry_finder.Find_fwd(raw, Css_find_bgn, prv_pos); if (url_bgn == Bry_.NotFound) break; // nothing left; stop
|
||||
url_bgn += css_find_bgn_len;
|
||||
int url_end = Bry_finder.Find_fwd(raw, Byte_ascii.Quote, url_bgn, raw_len); if (url_end == Bry_.NotFound) {usr_dlg.Warn_many("", "main_page.css_parse", "could not find css; pos='~{0}' text='~{1}'", url_bgn, String_.new_u8_by_len(raw, url_bgn, url_bgn + 32)); break;}
|
||||
byte[] css_url_bry = Bry_.Mid(raw, url_bgn, url_end);
|
||||
css_url_bry = Bry_.Replace(css_url_bry, Css_amp_find, Css_amp_repl); // & -> &
|
||||
css_url_bry = url_encoder.Decode(css_url_bry); // %2C -> %7C -> |
|
||||
css_url_bry = Bry_.Add(protocol_prefix_bry, css_url_bry);
|
||||
rv.Add(String_.new_u8(css_url_bry));
|
||||
prv_pos = url_end;
|
||||
}
|
||||
return rv.To_str_ary();
|
||||
} private static final byte[] Css_find_bgn = Bry_.new_a7("<link rel=\"stylesheet\" href=\""), Css_amp_find = Bry_.new_a7("&"), Css_amp_repl = Bry_.new_a7("&");
|
||||
private byte[] Css_scrape_download(String[] css_urls) {
|
||||
int css_urls_len = css_urls.length;
|
||||
Bry_bfr tmp_bfr = Bry_bfr.new_();
|
||||
for (int i = 0; i < css_urls_len; i++) {
|
||||
String css_url = css_urls[i];
|
||||
usr_dlg.Prog_many("", "main_page.css_download", "downloading css for '~{0}'", css_url);
|
||||
download_xrg.Prog_fmt_hdr_(css_url);
|
||||
byte[] css_bry = download_xrg.Exec_as_bry(css_url); if (css_bry == null) continue; // css not found; continue
|
||||
tmp_bfr.Add(Xoa_css_img_downloader.Bry_comment_bgn).Add_str(css_url).Add(Xoa_css_img_downloader.Bry_comment_end).Add_byte_nl();
|
||||
tmp_bfr.Add(css_bry).Add_byte_nl().Add_byte_nl();
|
||||
}
|
||||
return tmp_bfr.Xto_bry_and_clear();
|
||||
}
|
||||
public static final String Css_common_name = "xowa_common.css", Css_wiki_name = "xowa_wiki.css"
|
||||
, Css_common_name_ltr = "xowa_common_ltr.css", Css_common_name_rtl = "xowa_common_rtl.css";
|
||||
}
|
||||
@@ -0,0 +1,130 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*; import gplx.ios.*; import gplx.xowa.wikis.data.*; import gplx.xowa.files.downloads.*;
|
||||
public class Xoa_css_extractor_basic_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoa_css_extractor_fxt fxt = new Xoa_css_extractor_fxt();
|
||||
@Test public void Logo_download() {
|
||||
fxt.Init_fil("mem/http/en.wikipedia.org" , Xoa_css_extractor_fxt.Main_page_html);
|
||||
fxt.Init_fil("mem/http/wiki.png" , "download");
|
||||
fxt.Exec_logo_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/logo.png", "download");
|
||||
}
|
||||
@Test public void Logo_download_mw_wiki_logo() {
|
||||
fxt.Init_fil("mem/http/en.wikipedia.org" , "");
|
||||
fxt.Init_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/a/wiki.png" , "download");
|
||||
fxt.Init_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css" , ".mw-wiki-logo{background-image:url(\"//a/wiki.png\");");
|
||||
fxt.Exec_logo_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/logo.png" , "download");
|
||||
}
|
||||
@Test public void Logo_failover() {
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/logo.png" , "failover");
|
||||
fxt.Exec_logo_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/logo.png", "failover");
|
||||
}
|
||||
@Test public void Css_common_download_failover() {
|
||||
fxt.Css_installer().Opt_download_css_common_(true);
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_ltr.css", "failover");
|
||||
fxt.Exec_css_common_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
|
||||
}
|
||||
@Test public void Css_common_copy() {
|
||||
fxt.Css_installer().Opt_download_css_common_(false);
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_ltr.css", "failover");
|
||||
fxt.Exec_css_common_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
|
||||
}
|
||||
@Test public void Css_common_copy_specific_wiki() { // PURPOSE: css for specific wiki
|
||||
fxt.Css_installer().Opt_download_css_common_(false).Wiki_code_(Bry_.new_a7("enwiki"));
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_override/xowa_common_enwiki.css", "failover");
|
||||
fxt.Exec_css_common_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
|
||||
}
|
||||
@Test public void Css_scrape_download() {
|
||||
fxt.Css_installer().Url_encoder_(Url_encoder.new_http_url_());
|
||||
fxt.Init_fil("mem/http/en.wikipedia.org" , Xoa_css_extractor_fxt.Main_page_html);
|
||||
fxt.Init_fil("mem/http/en.wikipedia.org/common.css" , "download");
|
||||
fxt.Init_fil("mem/http/www/a&0|b,c" , "data=css_0");
|
||||
fxt.Init_fil("mem/http/www/a&1|b,c" , "data=css_1");
|
||||
fxt.Exec_css_mainpage_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", String_.Concat_lines_nl
|
||||
( "/*XOWA:mem/http/www/a&0|b,c*/"
|
||||
, "data=css_0"
|
||||
, ""
|
||||
, "/*XOWA:mem/http/www/a&1|b,c*/"
|
||||
, "data=css_1"
|
||||
));
|
||||
}
|
||||
@Test public void Css_scrape_failover() {
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_ltr.css", "failover");
|
||||
fxt.Exec_css_mainpage_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
|
||||
}
|
||||
}
|
||||
class Xoa_css_extractor_fxt {
|
||||
public void Clear() {
|
||||
Io_mgr.I.InitEngine_mem();
|
||||
Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Test();
|
||||
css_installer = new Xoa_css_extractor();
|
||||
css_installer.Download_xrg().Trg_engine_key_(IoEngine_.MemKey);
|
||||
css_installer
|
||||
.Usr_dlg_(usr_dlg)
|
||||
.Wiki_domain_(Bry_.new_a7("en.wikipedia.org"))
|
||||
.Protocol_prefix_("mem/http/")
|
||||
.Mainpage_url_("mem/http/en.wikipedia.org")
|
||||
.Failover_dir_(Io_url_.new_any_("mem/xowa/bin/any/html/xowa/import/")) // "mem/xowa/user/anonymous/wiki/home/html/"
|
||||
.Wiki_html_dir_(Io_url_.new_any_("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/"))
|
||||
;
|
||||
page_fetcher = new Xow_page_fetcher_test();
|
||||
css_installer.Page_fetcher_(page_fetcher);
|
||||
Xoa_css_img_downloader css_img_downloader = new Xoa_css_img_downloader();
|
||||
css_img_downloader.Ctor(usr_dlg, new Xof_download_wkr_test(), Bry_.new_a7("mem/http/"));
|
||||
css_installer.Css_img_downloader_(css_img_downloader);
|
||||
} private Xow_page_fetcher_test page_fetcher;
|
||||
public Xoa_css_extractor Css_installer() {return css_installer;} private Xoa_css_extractor css_installer;
|
||||
public void Init_page(int ns_id, String ttl, String text) {
|
||||
page_fetcher.Add(ns_id, Bry_.new_a7(ttl), Bry_.new_a7(text));
|
||||
}
|
||||
public void Init_fil_empty(String url) {Init_fil(url, "");}
|
||||
public void Init_fil(String url, String text) {Io_mgr.I.SaveFilStr(url, text);}
|
||||
public void Test_fil(String url, String expd) {Tfds.Eq_str_lines(expd, Io_mgr.I.LoadFilStr(Io_url_.new_any_(url)));}
|
||||
public void Exec_logo_setup() {
|
||||
css_installer.Mainpage_download();
|
||||
css_installer.Logo_setup();
|
||||
}
|
||||
public void Exec_css_common_setup() {
|
||||
css_installer.Mainpage_download();
|
||||
css_installer.Css_common_setup();
|
||||
}
|
||||
public void Exec_css_wiki_setup() {css_installer.Css_wiki_setup();}
|
||||
public void Exec_css_mainpage_setup() {
|
||||
css_installer.Mainpage_download();
|
||||
css_installer.Css_scrape_setup();
|
||||
}
|
||||
public static String Main_page_html = String_.Concat_lines_nl
|
||||
( "<html>"
|
||||
, " <head>"
|
||||
, " <link rel=\"stylesheet\" href=\"www/a&0%7Cb%2Cc\" />"
|
||||
, " <link rel=\"stylesheet\" href=\"www/a&1%7Cb%2Cc\" />"
|
||||
, " </head>"
|
||||
, " <body>"
|
||||
, " <div id=\"p-logo\" role=\"banner\"><a style=\"background-image: url(wiki.png);\""
|
||||
, " </body>"
|
||||
, "</html>"
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*; import gplx.ios.*;
|
||||
public class Xoa_css_extractor_wiki_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoa_css_extractor_fxt fxt = new Xoa_css_extractor_fxt();
|
||||
@Test public void Css_wiki_generate() {
|
||||
fxt.Init_page(Xow_ns_.Id_mediawiki, "Common.css" , "css_0");
|
||||
fxt.Init_page(Xow_ns_.Id_mediawiki, "Vector.css" , "css_1");
|
||||
fxt.Exec_css_wiki_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_wiki.css", String_.Concat_lines_nl
|
||||
( "/*XOWA:MediaWiki:Common.css*/"
|
||||
, "css_0"
|
||||
, ""
|
||||
, "/*XOWA:MediaWiki:Vector.css*/"
|
||||
, "css_1"
|
||||
));
|
||||
}
|
||||
@Test public void Css_wiki_missing() {
|
||||
fxt.Exec_css_wiki_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_wiki.css", "");
|
||||
}
|
||||
@Test public void Css_wiki_tab() { // PURPOSE: swap out 	 for xdat files
|
||||
fxt.Init_page(Xow_ns_.Id_mediawiki, "Common.css" , "a	b");
|
||||
fxt.Exec_css_wiki_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_wiki.css", String_.Concat_lines_nl
|
||||
( "/*XOWA:MediaWiki:Common.css*/"
|
||||
, "a\tb"
|
||||
));
|
||||
}
|
||||
}
|
||||
191
400_xowa/src/gplx/xowa/bldrs/css/Xoa_css_img_downloader.java
Normal file
191
400_xowa/src/gplx/xowa/bldrs/css/Xoa_css_img_downloader.java
Normal file
@@ -0,0 +1,191 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.xowa.files.downloads.*;
|
||||
public class Xoa_css_img_downloader {
|
||||
private byte[] wiki_domain;
|
||||
public Xoa_css_img_downloader Ctor(Gfo_usr_dlg usr_dlg, Xof_download_wkr download_wkr, byte[] stylesheet_prefix) {
|
||||
this.usr_dlg = usr_dlg; this.download_wkr = download_wkr; this.stylesheet_prefix = stylesheet_prefix;
|
||||
return this;
|
||||
} private Gfo_usr_dlg usr_dlg; private Xof_download_wkr download_wkr;
|
||||
public Xoa_css_img_downloader Stylesheet_prefix_(byte[] v) {stylesheet_prefix = v; return this;} private byte[] stylesheet_prefix; // TEST: setter exposed b/c tests can handle "mem/" but not "//mem"
|
||||
public void Chk(byte[] wiki_domain, Io_url css_fil) {
|
||||
this.wiki_domain = wiki_domain;
|
||||
List_adp img_list = List_adp_.new_();
|
||||
byte[] old_bry = Io_mgr.I.LoadFilBry(css_fil);
|
||||
byte[] rel_url_prefix = Bry_.Add(Bry_fwd_slashes, wiki_domain);
|
||||
byte[] new_bry = Convert_to_local_urls(rel_url_prefix, old_bry, img_list);
|
||||
Io_url img_dir = css_fil.OwnerDir();
|
||||
Download_fils(img_dir, img_list.To_str_ary());
|
||||
Io_mgr.I.SaveFilBry(css_fil, new_bry);
|
||||
}
|
||||
public byte[] Convert_to_local_urls(byte[] rel_url_prefix, byte[] src, List_adp list) {
|
||||
try {
|
||||
int src_len = src.length;
|
||||
int prv_pos = 0;
|
||||
Bry_bfr bfr = Bry_bfr.new_(src_len);
|
||||
Hash_adp img_hash = Hash_adp_bry.cs_();
|
||||
while (true) {
|
||||
int url_pos = Bry_finder.Find_fwd(src, Bry_url, prv_pos);
|
||||
if (url_pos == Bry_.NotFound) {bfr.Add_mid(src, prv_pos, src_len); break;} // no more "url("; exit;
|
||||
int bgn_pos = url_pos + Bry_url_len; // set bgn_pos after "url("
|
||||
byte bgn_byte = src[bgn_pos];
|
||||
byte end_byte = Byte_ascii.Nil;
|
||||
boolean quoted = true;
|
||||
switch (bgn_byte) { // find end_byte
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos: // quoted; end_byte is ' or "
|
||||
end_byte = bgn_byte;
|
||||
++bgn_pos;
|
||||
break;
|
||||
default: // not quoted; end byte is ")"
|
||||
end_byte = Byte_ascii.Paren_end;
|
||||
quoted = false;
|
||||
break;
|
||||
}
|
||||
int end_pos = Bry_finder.Find_fwd(src, end_byte, bgn_pos, src_len);
|
||||
if (end_pos == Bry_.NotFound) { // unclosed "url("; exit since nothing else will be found
|
||||
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.end_missing", "could not find end_sequence for 'url(': bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8_by_len(src, prv_pos, prv_pos + 25));
|
||||
bfr.Add_mid(src, prv_pos, src_len);
|
||||
break;
|
||||
}
|
||||
if (end_pos - bgn_pos == 0) { // empty; "url()"; ignore
|
||||
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.empty", "'url(' is empty: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8_by_len(src, prv_pos, prv_pos + 25));
|
||||
bfr.Add_mid(src, prv_pos, bgn_pos);
|
||||
prv_pos = bgn_pos;
|
||||
continue;
|
||||
}
|
||||
byte[] img_raw = Bry_.Mid(src, bgn_pos, end_pos); int img_raw_len = img_raw.length;
|
||||
if (Bry_.Has_at_bgn(img_raw, Bry_data_image, 0, img_raw_len)) { // base64
|
||||
bfr.Add_mid(src, prv_pos, end_pos); // nothing to download; just add entire String
|
||||
prv_pos = end_pos;
|
||||
continue;
|
||||
}
|
||||
int import_url_end = Import_url_chk(rel_url_prefix, src, src_len, prv_pos, url_pos, img_raw, bfr); // check for embedded stylesheets via @import tag
|
||||
if (import_url_end != Bry_.NotFound) {
|
||||
prv_pos = import_url_end;
|
||||
continue;
|
||||
}
|
||||
byte[] img_cleaned = Xob_url_fixer.Fix(wiki_domain, img_raw, img_raw_len);
|
||||
if (img_cleaned == null) { // could not clean img
|
||||
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.clean_failed", "could not extract valid http src: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8(img_raw));
|
||||
bfr.Add_mid(src, prv_pos, bgn_pos); prv_pos = bgn_pos; continue;
|
||||
}
|
||||
if (!img_hash.Has(img_cleaned)) {// only add unique items for download;
|
||||
img_hash.Add_as_key_and_val(img_cleaned);
|
||||
list.Add(String_.new_u8(img_cleaned));
|
||||
}
|
||||
img_cleaned = Replace_invalid_chars(Bry_.Copy(img_cleaned)); // NOTE: must call ByteAry.Copy else img_cleaned will change *inside* hash
|
||||
bfr.Add_mid(src, prv_pos, bgn_pos);
|
||||
if (!quoted) bfr.Add_byte(Byte_ascii.Quote);
|
||||
bfr.Add(img_cleaned);
|
||||
if (!quoted) bfr.Add_byte(Byte_ascii.Quote);
|
||||
prv_pos = end_pos;
|
||||
}
|
||||
return bfr.Xto_bry_and_clear();
|
||||
}
|
||||
catch (Exception e) {
|
||||
usr_dlg.Warn_many("", "", "failed to convert local_urls: ~{0} ~{1}", String_.new_u8(rel_url_prefix), Err_.Message_gplx(e));
|
||||
return src;
|
||||
}
|
||||
}
|
||||
public static byte[] Import_url_build(byte[] stylesheet_prefix, byte[] rel_url_prefix, byte[] css_url) {
|
||||
return Bry_.Has_at_bgn(css_url, Bry_http_protocol) // css_url already starts with "http"; return self; PAGE:tr.n:Main_Page; DATE:2014-06-04
|
||||
? css_url
|
||||
: Bry_.Add(stylesheet_prefix, css_url)
|
||||
;
|
||||
}
|
||||
private int Import_url_chk(byte[] rel_url_prefix, byte[] src, int src_len, int old_pos, int find_bgn, byte[] url_raw, Bry_bfr bfr) {
|
||||
if (find_bgn < Bry_import_len) return Bry_.NotFound;
|
||||
if (!Bry_.Match(src, find_bgn - Bry_import_len, find_bgn, Bry_import)) return Bry_.NotFound;
|
||||
byte[] css_url = url_raw; int css_url_len = css_url.length;
|
||||
if (css_url_len > 0 && css_url[0] == Byte_ascii.Slash) { // css_url starts with "/"; EX: "/page" or "//site/page" DATE:2014-02-03
|
||||
if (css_url_len > 1 && css_url[1] != Byte_ascii.Slash) // skip if css_url starts with "//"; EX: "//site/page"
|
||||
css_url = Bry_.Add(rel_url_prefix, css_url); // "/w/a.css" -> "//en.wikipedia.org/w/a.css"
|
||||
}
|
||||
css_url = Bry_.Replace(css_url, Byte_ascii.Space, Byte_ascii.Underline); // NOTE: must replace spaces with underlines else download will fail; EX:https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08
|
||||
byte[] css_src_bry = Import_url_build(stylesheet_prefix, rel_url_prefix, css_url);
|
||||
String css_src_str = String_.new_u8(css_src_bry);
|
||||
download_wkr.Download_xrg().Prog_fmt_hdr_(usr_dlg.Log_many(GRP_KEY, "logo.download", "downloading import for '~{0}'", css_src_str));
|
||||
byte[] css_trg_bry = download_wkr.Download_xrg().Exec_as_bry(css_src_str);
|
||||
if (css_trg_bry == null) {
|
||||
usr_dlg.Warn_many("", "", "could not import css: url=~{0}", css_src_str);
|
||||
return Bry_.NotFound; // css not found
|
||||
}
|
||||
bfr.Add_mid(src, old_pos, find_bgn - Bry_import_len).Add_byte_nl();
|
||||
bfr.Add(Bry_comment_bgn).Add(css_url).Add(Bry_comment_end).Add_byte_nl();
|
||||
if (Bry_finder.Find_fwd(css_url, Wikisource_dynimg_ttl) != -1) css_trg_bry = Bry_.Replace(css_trg_bry, Wikisource_dynimg_find, Wikisource_dynimg_repl); // FreedImg hack; PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3 DATE:2014-09-06
|
||||
bfr.Add(css_trg_bry).Add_byte_nl();
|
||||
bfr.Add_byte_nl();
|
||||
int semic_pos = Bry_finder.Find_fwd(src, Byte_ascii.Semic, find_bgn + url_raw.length, src_len);
|
||||
return semic_pos + Int_.Const_dlm_len;
|
||||
}
|
||||
private static final byte[]
|
||||
Wikisource_dynimg_ttl = Bry_.new_a7("en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css")
|
||||
, Wikisource_dynimg_find = Bry_.new_a7(".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {")
|
||||
, Wikisource_dynimg_repl = Bry_.new_a7(".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], /*XOWA:handle file:// paths which will have /commons.wikimedia.org/ but not /wikipedia/ */ .freedImg img[src*=\"wikimedia\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {")
|
||||
;
|
||||
public byte[] Clean_img_url(byte[] raw, int raw_len) {
|
||||
int pos_bgn = 0;
|
||||
if (Bry_.Has_at_bgn(raw, Bry_fwd_slashes, 0, raw_len)) pos_bgn = Bry_fwd_slashes.length;
|
||||
if (Bry_.Has_at_bgn(raw, Bry_http, 0, raw_len)) pos_bgn = Bry_http.length;
|
||||
int pos_slash = Bry_finder.Find_fwd(raw, Byte_ascii.Slash, pos_bgn, raw_len);
|
||||
if (pos_slash == Bry_.NotFound) return null; // first segment is site_name; at least one slash must be present for image name; EX: site.org/img_name.jpg
|
||||
if (pos_slash == raw_len - 1) return null; // "site.org/" is invalid
|
||||
int pos_end = raw_len;
|
||||
int pos_question = Bry_finder.Find_bwd(raw, Byte_ascii.Question);
|
||||
if (pos_question != Bry_.NotFound)
|
||||
pos_end = pos_question; // remove query params; EX: img_name?key=val
|
||||
return Bry_.Mid(raw, pos_bgn, pos_end);
|
||||
}
|
||||
private void Download_fils(Io_url css_dir, String[] ary) {
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
String src = ary[i];
|
||||
Io_url trg = css_dir.GenSubFil_nest(Op_sys.Cur().Fsys_http_frag_to_url_str(Replace_invalid_chars_str(src)));
|
||||
if (Io_mgr.I.ExistsFil(trg)) continue;
|
||||
download_wkr.Download(true, "https://" + src, trg, "download: " + src); // ILN
|
||||
if (Io_mgr.I.QueryFil(trg).Size() == 0) { // warn if 0 byte files downloaded; DATE:2015-07-06
|
||||
Xoa_app_.Usr_dlg().Warn_many("", "", "css.download; 0 byte file downloaded; file=~{0}", trg.Raw());
|
||||
}
|
||||
}
|
||||
}
|
||||
String Replace_invalid_chars_str(String raw_str) {return String_.new_u8(Replace_invalid_chars(Bry_.new_u8(raw_str)));}
|
||||
byte[] Replace_invalid_chars(byte[] raw_bry) {
|
||||
int raw_len = raw_bry.length;
|
||||
for (int i = 0; i < raw_len; i++) { // convert invalid wnt chars to underscores
|
||||
byte b = raw_bry[i];
|
||||
switch (b) {
|
||||
//case Byte_ascii.Slash:
|
||||
case Byte_ascii.Backslash: case Byte_ascii.Colon: case Byte_ascii.Star: case Byte_ascii.Question:
|
||||
case Byte_ascii.Quote: case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Pipe:
|
||||
raw_bry[i] = Byte_ascii.Underline;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return raw_bry;
|
||||
}
|
||||
private static final byte[]
|
||||
Bry_url = Bry_.new_a7("url("), Bry_data_image = Bry_.new_a7("data:image/")
|
||||
, Bry_http = Bry_.new_a7("http://"), Bry_fwd_slashes = Bry_.new_a7("//"), Bry_import = Bry_.new_a7("@import ")
|
||||
, Bry_http_protocol = Bry_.new_a7("http")
|
||||
;
|
||||
public static final byte[]
|
||||
Bry_comment_bgn = Bry_.new_a7("/*XOWA:"), Bry_comment_end = Bry_.new_a7("*/");
|
||||
private static final int Bry_url_len = Bry_url.length, Bry_import_len = Bry_import.length;
|
||||
static final String GRP_KEY = "xowa.wikis.init.css";
|
||||
}
|
||||
183
400_xowa/src/gplx/xowa/bldrs/css/Xoa_css_img_downloader_tst.java
Normal file
183
400_xowa/src/gplx/xowa/bldrs/css/Xoa_css_img_downloader_tst.java
Normal file
@@ -0,0 +1,183 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*; import gplx.xowa.files.downloads.*;
|
||||
public class Xoa_css_img_downloader_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoa_css_img_downloader_fxt fxt = new Xoa_css_img_downloader_fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg\")} y {url(\"//site/b.jpg\")}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"site/b.jpg\")}"
|
||||
, "site/a.jpg"
|
||||
, "site/b.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Unquoted() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(//site/a.jpg)}"
|
||||
, "x {url(\"site/a.jpg\")}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Http() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(http://site/a.jpg)}"
|
||||
, "x {url(\"site/a.jpg\")}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Base64() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg\")} y {url(\"data:image/png;base64,BASE64DATA;ABC=\")} z {}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"data:image/png;base64,BASE64DATA;ABC=\")} z {}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Exc_missing_quote() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg\")} y {url(\"//site/b.jpg} z {}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"//site/b.jpg} z {}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Exc_empty() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg\")} y {url(\"\"} z {}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"\"} z {}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
// @Test public void Exc_name_only() { // COMMENTED: not sure how to handle "b.jpg" (automatically add "current" path?); RESTORE: when example found
|
||||
// fxt.Test_css_convert
|
||||
// ( "x {url(\"//site/a.jpg\")} y {url(\"b.jpg\"} z {}"
|
||||
// , "x {url(\"site/a.jpg\")} y {url(\"b.jpg\"} z {}"
|
||||
// , "site/a.jpg"
|
||||
// );
|
||||
// }
|
||||
@Test public void Repeat() {// PURPOSE.fix: exact same item was being added literally
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg?a=b\")} y {url(\"//site/a.jpg?a=b\"}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"site/a.jpg\"}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Clean_basic() {fxt.Test_clean_img_url("//site/a.jpg" , "site/a.jpg");}
|
||||
@Test public void Clean_query() {fxt.Test_clean_img_url("//site/a.jpg?key=val" , "site/a.jpg");}
|
||||
@Test public void Clean_dir() {fxt.Test_clean_img_url("//site/a/b/c.jpg?key=val" , "site/a/b/c.jpg");}
|
||||
@Test public void Clean_exc_site_only() {fxt.Test_clean_img_url("//site" , null);}
|
||||
@Test public void Clean_exc_site_only_2() {fxt.Test_clean_img_url("//site/" , null);}
|
||||
@Test public void Import_url() {
|
||||
Io_mgr.I.InitEngine_mem();
|
||||
Io_mgr.I.SaveFilStr("mem/www/b.css", "imported_css");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"mem/www/b.css\") screen; z"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA:mem/www/b.css*/"
|
||||
, "imported_css"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Import_url_make() {
|
||||
fxt.Test_import_url("a.org/b" , "http:a.org/b"); // add "stylesheet_prefix"
|
||||
fxt.Test_import_url("http://a.org" , "http://a.org"); // unless it starts with http
|
||||
fxt.Test_import_url("https://a.org" , "https://a.org"); // unless starts with https EX:: handle @import(https://...); PAGE:tr.n:Main_Page; DATE:2014-06-04
|
||||
}
|
||||
@Test public void Import_url_relative() { // PURPOSE: if directory, add domain; "/a/b.css" -> "//domain/a/b.css"; DATE:2014-02-03
|
||||
Io_mgr.I.InitEngine_mem();
|
||||
Io_mgr.I.SaveFilStr("mem/en.wikipedia.org/www/b.css", "imported_css");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"/www/b.css\") screen; z" // starts with "/"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA:mem/en.wikipedia.org/www/b.css*/"
|
||||
, "imported_css"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Import_url_relative_skip() { // PURPOSE: if rel path, skip; "//site/a/b.css"; DATE:2014-02-03
|
||||
fxt.Downloader().Stylesheet_prefix_(Bry_.new_u8("mem")); // stylesheet prefix prefix defaults to ""; set to "mem", else test will try to retrieve "//url" which will fail
|
||||
Io_mgr.I.InitEngine_mem();
|
||||
Io_mgr.I.SaveFilStr("mem//en.wikipedia.org/a/b.css", "imported_css");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"//en.wikipedia.org/a/b.css\") screen; z" // starts with "//"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA://en.wikipedia.org/a/b.css*/"
|
||||
, "imported_css"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Import_url_space() { // PURPOSE: some css has spaces; replace with underlines else fails when downloaded; EX: https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08
|
||||
Io_mgr.I.InitEngine_mem();
|
||||
Io_mgr.I.SaveFilStr("mem/www/b_c.css", "imported_css");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"mem/www/b c.css\") screen; z"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA:mem/www/b_c.css*/"
|
||||
, "imported_css"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Wikisource_freedimg() { // PURPOSE: check that "wikimedia" is replaced for FreedImg hack; PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3 DATE:2014-09-06
|
||||
fxt.Downloader().Stylesheet_prefix_(Bry_.new_u8("mem")); // stylesheet prefix prefix defaults to ""; set to "mem", else test will try to retrieve "//url" which will fail
|
||||
Io_mgr.I.InitEngine_mem();
|
||||
Io_mgr.I.SaveFilStr("mem//en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css", ".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"//en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css\") screen; z" // starts with "//"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA://en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css*/"
|
||||
, ".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], /*XOWA:handle file:// paths which will have /commons.wikimedia.org/ but not /wikipedia/ */ .freedImg img[src*=\"wikimedia\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
class Xoa_css_img_downloader_fxt {
|
||||
public Xoa_css_img_downloader Downloader() {return downloader;} private Xoa_css_img_downloader downloader;
|
||||
public void Clear() {
|
||||
downloader = new Xoa_css_img_downloader();
|
||||
downloader.Ctor(Gfo_usr_dlg_.Test(), new Xof_download_wkr_test(), Bry_.Empty);
|
||||
}
|
||||
public void Test_css_convert(String raw, String expd, String... expd_img_ary) {
|
||||
List_adp actl_img_list = List_adp_.new_();
|
||||
byte[] actl_bry = downloader.Convert_to_local_urls(Bry_.new_a7("mem/en.wikipedia.org"), Bry_.new_u8(raw), actl_img_list);
|
||||
Tfds.Eq_str_lines(expd, String_.new_u8(actl_bry));
|
||||
Tfds.Eq_ary_str(expd_img_ary, actl_img_list.To_str_ary());
|
||||
}
|
||||
public void Test_clean_img_url(String raw_str, String expd) {
|
||||
byte[] raw = Bry_.new_a7(raw_str);
|
||||
byte[] actl = downloader.Clean_img_url(raw, raw.length);
|
||||
Tfds.Eq(expd, actl == null ? null : String_.new_a7(actl));
|
||||
}
|
||||
public void Test_import_url(String raw, String expd) {
|
||||
byte[] actl = Xoa_css_img_downloader.Import_url_build(Bry_.new_a7("http:"), Bry_.new_a7("//en.wikipedia.org"), Bry_.new_u8(raw));
|
||||
Tfds.Eq(expd, String_.new_u8(actl));
|
||||
}
|
||||
}
|
||||
56
400_xowa/src/gplx/xowa/bldrs/css/Xob_css_parser.java
Normal file
56
400_xowa/src/gplx/xowa/bldrs/css/Xob_css_parser.java
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
class Xob_css_parser {
|
||||
private final Bry_bfr bfr = Bry_bfr.new_(255);
|
||||
private final Xob_mirror_mgr mgr;
|
||||
private final Xob_css_parser__url url_parser; private final Xob_css_parser__import import_parser;
|
||||
public Xob_css_parser(Xob_mirror_mgr mgr) {
|
||||
this.mgr = mgr;
|
||||
this.url_parser = new Xob_css_parser__url(mgr.Site_url());
|
||||
this.import_parser = new Xob_css_parser__import(url_parser);
|
||||
}
|
||||
public void Parse(byte[] src) {
|
||||
int src_len = src.length; int pos = 0;
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
Object o = tkns_trie.Match_bgn_w_byte(b, src, pos, src_len);
|
||||
if (o == null) {
|
||||
bfr.Add_byte(b);
|
||||
++pos;
|
||||
}
|
||||
else {
|
||||
byte tkn_tid = ((Byte_obj_val)o).Val();
|
||||
int match_pos = tkns_trie.Match_pos();
|
||||
Xob_css_tkn__base tkn = null;
|
||||
switch (tkn_tid) {
|
||||
case Tkn_url: tkn = url_parser.Parse(src, src_len, pos, match_pos); break;
|
||||
case Tkn_import: tkn = import_parser.Parse(src, src_len, pos, match_pos); break;
|
||||
}
|
||||
tkn.Process(mgr);
|
||||
pos = tkn.Write(bfr, src);
|
||||
}
|
||||
}
|
||||
}
|
||||
private static final byte Tkn_import = 1, Tkn_url = 2;
|
||||
private static final Btrie_slim_mgr tkns_trie = Btrie_slim_mgr.ci_ascii_()
|
||||
.Add_str_byte("@import" , Tkn_import)
|
||||
.Add_str_byte(" url(" , Tkn_url)
|
||||
;
|
||||
}
|
||||
43
400_xowa/src/gplx/xowa/bldrs/css/Xob_css_parser__import.java
Normal file
43
400_xowa/src/gplx/xowa/bldrs/css/Xob_css_parser__import.java
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.xowa.files.downloads.*;
|
||||
class Xob_css_parser__import {
|
||||
// // "//id.wikibooks.org/w/index.php?title=MediaWiki:Common.css&oldid=43393&action=raw&ctype=text/css";
|
||||
private final Xob_css_parser__url url_parser;
|
||||
public Xob_css_parser__import(Xob_css_parser__url url_parser) {this.url_parser = url_parser;}
|
||||
public Xob_css_tkn__base Parse(byte[] src, int src_len, int tkn_bgn, int tkn_end) { // " @import"
|
||||
int bgn_pos = Bry_finder.Find_fwd_while_ws(src, tkn_end, src_len); // skip any ws after " @import"
|
||||
if (bgn_pos == src_len) return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.import:EOS after import; bgn=~{0}", tkn_bgn);
|
||||
if (!Bry_.Has_at_bgn(src, Tkn_url_bry, bgn_pos, src_len)) return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.import:url missing; bgn=~{0}", tkn_bgn);
|
||||
tkn_end = bgn_pos + Tkn_url_bry.length;
|
||||
Xob_css_tkn__base frag = url_parser.Parse(src, src_len, bgn_pos, tkn_end);
|
||||
if (frag.Tid() != Xob_css_tkn__url.Tid_url) return Xob_css_tkn__warn.new_(tkn_bgn, frag.Pos_end(), "mirror.parser.import:url invalid; bgn=~{0}", tkn_bgn);
|
||||
Xob_css_tkn__url url_frag = (Xob_css_tkn__url)frag;
|
||||
byte[] src_url = url_frag.Src_url();
|
||||
src_url = Bry_.Replace(src_url, Byte_ascii.Space, Byte_ascii.Underline); // NOTE: must replace spaces with underlines else download will fail; EX:https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08
|
||||
int semic_pos = Bry_finder.Find_fwd(src, Byte_ascii.Semic, frag.Pos_end(), src_len);
|
||||
return Xob_css_tkn__import.new_(tkn_bgn, semic_pos + 1, src_url, url_frag.Trg_url(), url_frag.Quote_byte());
|
||||
}
|
||||
private static final byte[] Tkn_url_bry = Bry_.new_a7("url(");
|
||||
public static final byte[]
|
||||
Wikisource_dynimg_ttl = Bry_.new_a7("en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css")
|
||||
, Wikisource_dynimg_find = Bry_.new_a7(".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {")
|
||||
, Wikisource_dynimg_repl = Bry_.new_a7(".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], /*XOWA:handle file:// paths which will have /commons.wikimedia.org/ but not /wikipedia/ */ .freedImg img[src*=\"wikimedia\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {")
|
||||
;
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*;
|
||||
public class Xob_css_parser__import_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xob_css_parser__import_fxt fxt = new Xob_css_parser__import_fxt();
|
||||
@Test public void Basic() {fxt.Test_parse_import (" @import url(//site/a.png)" , " @import url('site/a.png')");}
|
||||
@Test public void Warn_eos() {fxt.Test_parse_warn (" @import" , " @import" , "EOS");}
|
||||
@Test public void Warn_missing() {fxt.Test_parse_warn (" @import ('//site/a.png')" , " @import" , "missing");} // no "url("
|
||||
@Test public void Warn_invalid() {fxt.Test_parse_warn (" @import url('//site')" , " @import url('//site')" , "invalid");} // invalid
|
||||
}
|
||||
class Xob_css_parser__import_fxt extends Xob_css_parser__url_fxt { private Xob_css_parser__import import_parser;
|
||||
@Override public void Clear() {
|
||||
super.Clear();
|
||||
this.import_parser = new Xob_css_parser__import(url_parser);
|
||||
}
|
||||
@Override protected void Exec_parse_hook() {
|
||||
this.cur_frag = import_parser.Parse(src_bry, src_bry.length, 0, 8); // 8=" @import".length
|
||||
}
|
||||
public void Test_parse_import(String src_str, String expd) {
|
||||
Exec_parse(src_str, Xob_css_tkn__base.Tid_import, expd);
|
||||
}
|
||||
}
|
||||
58
400_xowa/src/gplx/xowa/bldrs/css/Xob_css_parser__url.java
Normal file
58
400_xowa/src/gplx/xowa/bldrs/css/Xob_css_parser__url.java
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
class Xob_css_parser__url {
|
||||
private final byte[] site;
|
||||
public Xob_css_parser__url(byte[] site) {this.site = site;}
|
||||
public Xob_css_tkn__base Parse(byte[] src, int src_len, int tkn_bgn, int tkn_end) { // " url"
|
||||
int bgn_pos = Bry_finder.Find_fwd_while_ws(src, tkn_end, src_len); // skip any ws after " url("
|
||||
if (bgn_pos == src_len) return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.url:EOS; bgn=~{0}", tkn_bgn);
|
||||
byte end_byte = src[bgn_pos]; // note that first non-ws byte should determine end_byte
|
||||
byte quote_byte = end_byte;
|
||||
switch (end_byte) {
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos: // quoted; increment position; EX: ' url("a.png")'
|
||||
++bgn_pos;
|
||||
break;
|
||||
default: // not quoted; end byte is ")"; EX: ' url(a.png)'
|
||||
end_byte = Byte_ascii.Paren_end;
|
||||
quote_byte = Byte_ascii.Nil;
|
||||
break;
|
||||
}
|
||||
int end_pos = Bry_finder.Find_fwd(src, end_byte, bgn_pos, src_len);
|
||||
if (end_pos == Bry_.NotFound) // unclosed "url("; exit since nothing else will be found
|
||||
return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.url:dangling; bgn=~{0} excerpt=~{1}", bgn_pos, String_.new_u8_by_len(src, tkn_bgn, tkn_bgn + 128));
|
||||
if (end_pos - bgn_pos == 0) // empty; "url()"; ignore
|
||||
return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.url:empty; bgn=~{0} excerpt=~{1}", bgn_pos, String_.new_u8_by_len(src, tkn_bgn, tkn_bgn + 128));
|
||||
byte[] url_orig = Bry_.Mid(src, bgn_pos, end_pos); int url_orig_len = url_orig.length;
|
||||
++end_pos; // increment end_pos so rv will be after it;
|
||||
if ( end_byte != Byte_ascii.Paren_end) { // end_byte is apos / quote
|
||||
if ( end_pos < src_len
|
||||
&& src[end_pos] == Byte_ascii.Paren_end)
|
||||
++end_pos;
|
||||
else
|
||||
return Xob_css_tkn__warn.new_(tkn_bgn, end_pos, "mirror.parser.url:base64 dangling; bgn=~{0} excerpt=~{1}", bgn_pos, String_.new_u8(url_orig));
|
||||
}
|
||||
if (Bry_.Has_at_bgn(url_orig, Bry_data_image)) // base64
|
||||
return Xob_css_tkn__base64.new_(tkn_bgn, end_pos);
|
||||
byte[] src_url = Xob_url_fixer.Fix(site, url_orig, url_orig_len);
|
||||
if (src_url == null) // could not convert
|
||||
return Xob_css_tkn__warn.new_(tkn_bgn, end_pos, "mirror.parser.url:invalid url; bgn=~{0} excerpt=~{1}", tkn_bgn, String_.new_u8(url_orig));
|
||||
return Xob_css_tkn__url.new_(tkn_bgn, end_pos, src_url, quote_byte);
|
||||
}
|
||||
private static final byte[] Bry_data_image = Bry_.new_a7("data:image/");
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*;
|
||||
public class Xob_css_parser__url_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xob_css_parser__url_fxt fxt = new Xob_css_parser__url_fxt();
|
||||
@Test public void Quote_none() {fxt.Test_parse_url(" url(//site/A.png) b" , " url('site/A.png')");}
|
||||
@Test public void Quote_apos() {fxt.Test_parse_url(" url('//site/A.png') b" , " url('site/A.png')");}
|
||||
@Test public void Quote_quote() {fxt.Test_parse_url(" url(\"//site/A.png\") b" , " url(\"site/A.png\")");}
|
||||
@Test public void Base64() {fxt.Test_parse_base64(" url('data:image/png;base64,BASE64DATA;ABC=') b", " url('data:image/png;base64,BASE64DATA;ABC=')");}
|
||||
@Test public void Base64_dangling() {fxt.Test_parse_warn(" url('data:image/png;base64,BASE64DATA;ABC=' ", " url('data:image/png;base64,BASE64DATA;ABC='", "base64 dangling");}
|
||||
@Test public void Warn_eos() {fxt.Test_parse_warn(" url(" , " url(" , "EOS");}
|
||||
@Test public void Warn_dangling() {fxt.Test_parse_warn(" url(a" , " url(" , "dangling");}
|
||||
@Test public void Warn_empty() {fxt.Test_parse_warn(" url()" , " url(" , "empty");}
|
||||
@Test public void Warn_site() {fxt.Test_parse_warn(" url('//site')" , " url('//site')" , "invalid");}
|
||||
}
|
||||
class Xob_css_parser__url_fxt {
|
||||
protected Xob_css_parser__url url_parser; private final Bry_bfr bfr = Bry_bfr.new_(32);
|
||||
protected Xob_css_tkn__base cur_frag; protected byte[] src_bry;
|
||||
@gplx.Virtual public void Clear() {
|
||||
url_parser = new Xob_css_parser__url(Bry_.new_a7("site"));
|
||||
}
|
||||
protected void Exec_parse(String src_str, int expd_tid, String expd_str) {
|
||||
this.src_bry = Bry_.new_u8(src_str);
|
||||
this.Exec_parse_hook();
|
||||
cur_frag.Write(bfr, src_bry);
|
||||
String actl_str = bfr.Xto_str_and_clear();
|
||||
Tfds.Eq(expd_tid, cur_frag.Tid(), "wrong tid; expd={0}, actl={1}", expd_tid, cur_frag.Tid());
|
||||
Tfds.Eq(expd_str, actl_str);
|
||||
}
|
||||
@gplx.Virtual protected void Exec_parse_hook() {
|
||||
this.cur_frag = url_parser.Parse(src_bry, src_bry.length, 0, 5); // 5=" url(".length
|
||||
}
|
||||
public void Test_parse_url(String src_str, String expd) {
|
||||
Exec_parse(src_str, Xob_css_tkn__base.Tid_url, expd);
|
||||
}
|
||||
public void Test_parse_base64(String src_str, String expd) {
|
||||
Exec_parse(src_str, Xob_css_tkn__base.Tid_base64, expd);
|
||||
}
|
||||
public void Test_parse_warn(String src_str, String expd, String warn) {
|
||||
Exec_parse(src_str, Xob_css_tkn__base.Tid_warn, expd);
|
||||
Xob_css_tkn__warn sub_frag = (Xob_css_tkn__warn)cur_frag;
|
||||
Tfds.Eq(true, String_.Has(sub_frag.Fail_msg(), warn));
|
||||
}
|
||||
}
|
||||
117
400_xowa/src/gplx/xowa/bldrs/css/Xob_css_tkn__base.java
Normal file
117
400_xowa/src/gplx/xowa/bldrs/css/Xob_css_tkn__base.java
Normal file
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
abstract class Xob_css_tkn__base {
|
||||
public void Init(int tid, int pos_bgn, int pos_end) {
|
||||
this.tid = tid; this.pos_bgn = pos_bgn; this.pos_end = pos_end;
|
||||
}
|
||||
public int Tid() {return tid;} protected int tid;
|
||||
public int Pos_bgn() {return pos_bgn;} protected int pos_bgn;
|
||||
public int Pos_end() {return pos_end;} protected int pos_end;
|
||||
@gplx.Virtual public void Process(Xob_mirror_mgr mgr) {}
|
||||
public abstract int Write(Bry_bfr bfr, byte[] src);
|
||||
public static final int Tid_warn = 1, Tid_base64 = 2, Tid_url = 3, Tid_import = 4;
|
||||
}
|
||||
class Xob_css_tkn__warn extends Xob_css_tkn__base {
|
||||
public String Fail_msg() {return fail_msg;} private String fail_msg;
|
||||
@Override public void Process(Xob_mirror_mgr mgr) {
|
||||
mgr.Usr_dlg().Warn_many("", "", fail_msg);
|
||||
}
|
||||
@Override public int Write(Bry_bfr bfr, byte[] src) {
|
||||
bfr.Add_mid(src, pos_bgn, pos_end);
|
||||
return pos_end;
|
||||
}
|
||||
public static Xob_css_tkn__warn new_(int pos_bgn, int pos_end, String fmt, Object... fmt_args) {
|
||||
Xob_css_tkn__warn rv = new Xob_css_tkn__warn();
|
||||
rv.Init(Tid_warn, pos_bgn, pos_end);
|
||||
rv.fail_msg = String_.Format(fmt, fmt_args);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
class Xob_css_tkn__base64 extends Xob_css_tkn__base {
|
||||
@Override public int Write(Bry_bfr bfr, byte[] src) {
|
||||
bfr.Add_mid(src, pos_bgn, pos_end);
|
||||
return pos_end;
|
||||
}
|
||||
public static Xob_css_tkn__base64 new_(int pos_bgn, int pos_end) {
|
||||
Xob_css_tkn__base64 rv = new Xob_css_tkn__base64();
|
||||
rv.Init(Tid_base64, pos_bgn, pos_end);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
class Xob_css_tkn__url extends Xob_css_tkn__base {
|
||||
public byte Quote_byte() {return quote_byte;} private byte quote_byte;
|
||||
public byte[] Src_url() {return src_url;} private byte[] src_url;
|
||||
public byte[] Trg_url() {return trg_url;} private byte[] trg_url;
|
||||
@Override public void Process(Xob_mirror_mgr mgr) {
|
||||
mgr.File_hash().Add_if_dupe_use_1st(src_url, new Xobc_download_itm(Xobc_download_itm.Tid_file, String_.new_u8(src_url), trg_url));
|
||||
}
|
||||
@Override public int Write(Bry_bfr bfr, byte[] src) {
|
||||
byte quote = quote_byte; if (quote == Byte_ascii.Nil) quote = Byte_ascii.Apos;
|
||||
bfr.Add_str_a7(" url("); // EX: ' url('
|
||||
bfr.Add_byte(quote).Add(trg_url).Add_byte(quote); // EX: '"a.png"'
|
||||
bfr.Add_byte(Byte_ascii.Paren_end); // EX: ')'
|
||||
return pos_end;
|
||||
}
|
||||
public static Xob_css_tkn__url new_(int pos_bgn, int pos_end, byte[] src_url, byte quote_byte) {
|
||||
Xob_css_tkn__url rv = new Xob_css_tkn__url();
|
||||
rv.Init(Tid_url, pos_bgn, pos_end);
|
||||
rv.src_url = src_url; rv.trg_url = To_fsys(src_url); rv.quote_byte = quote_byte;
|
||||
return rv;
|
||||
}
|
||||
public static byte[] To_fsys(byte[] src) {
|
||||
if (!Op_sys.Cur().Tid_is_wnt()) return src;
|
||||
src = Bry_.Copy(src); // NOTE: must call ByteAry.Copy else url_actl will change *inside* bry
|
||||
int len = src.length;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Slash:
|
||||
case Byte_ascii.Backslash:
|
||||
break;
|
||||
case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Colon: case Byte_ascii.Pipe: case Byte_ascii.Question: case Byte_ascii.Star: case Byte_ascii.Quote:
|
||||
src[i] = Byte_ascii.Underline;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return src;
|
||||
}
|
||||
}
|
||||
class Xob_css_tkn__import extends Xob_css_tkn__base {
|
||||
public byte Quote_byte() {return quote_byte;} private byte quote_byte;
|
||||
public byte[] Src_url() {return src_url;} private byte[] src_url;
|
||||
public byte[] Trg_url() {return trg_url;} private byte[] trg_url;
|
||||
@Override public void Process(Xob_mirror_mgr mgr) {
|
||||
mgr.Code_add(src_url);
|
||||
}
|
||||
@Override public int Write(Bry_bfr bfr, byte[] src) {
|
||||
byte quote = quote_byte; if (quote == Byte_ascii.Nil) quote = Byte_ascii.Apos;
|
||||
bfr.Add_str_a7(" @import url("); // EX: ' @import url('
|
||||
bfr.Add_byte(quote).Add(trg_url).Add_byte(quote); // EX: '"a.png"'
|
||||
bfr.Add_byte(Byte_ascii.Paren_end); // EX: ')'
|
||||
return pos_end;
|
||||
}
|
||||
public static Xob_css_tkn__import new_(int pos_bgn, int pos_end, byte[] src_url, byte[] trg_url, byte quote_byte) {
|
||||
Xob_css_tkn__import rv = new Xob_css_tkn__import();
|
||||
rv.Init(Tid_import, pos_bgn, pos_end);
|
||||
rv.src_url = src_url; rv.trg_url = trg_url; rv.quote_byte = quote_byte;
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
59
400_xowa/src/gplx/xowa/bldrs/css/Xob_mirror_mgr.java
Normal file
59
400_xowa/src/gplx/xowa/bldrs/css/Xob_mirror_mgr.java
Normal file
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.ios.*; import gplx.xowa.files.downloads.*;
|
||||
public class Xob_mirror_mgr {
|
||||
private final Xof_download_wkr download_wkr; private final Xob_css_parser css_parser;
|
||||
private final byte[] page_url; private final Io_url fsys_root;
|
||||
public Xob_mirror_mgr(Gfo_usr_dlg usr_dlg, Xof_download_wkr download_wkr, byte[] site_url, byte[] page_url, Io_url fsys_root) {
|
||||
this.usr_dlg = usr_dlg; this.download_wkr = download_wkr;
|
||||
this.site_url = site_url; this.page_url = page_url; this.fsys_root = fsys_root;
|
||||
this.css_parser = new Xob_css_parser(this);
|
||||
}
|
||||
public Gfo_usr_dlg Usr_dlg() {return usr_dlg;} private final Gfo_usr_dlg usr_dlg;
|
||||
public byte[] Site_url() {return site_url;} private final byte[] site_url;
|
||||
public void Code_add(byte[] src_url) {
|
||||
byte[] trg_url = Xob_css_tkn__url.To_fsys(src_url);
|
||||
code_hash.Add_if_dupe_use_1st(src_url, new Xobc_download_itm(Xobc_download_itm.Tid_css, String_.new_u8(src_url), trg_url));
|
||||
}
|
||||
public Ordered_hash Code_hash() {return code_hash;} private final Ordered_hash code_hash = Ordered_hash_.new_();
|
||||
public Ordered_hash File_hash() {return file_hash;} private final Ordered_hash file_hash = Ordered_hash_.new_();
|
||||
public void Exec() {
|
||||
usr_dlg.Plog_many("", "", "html_mirror:download.root_page; url=~{0}", page_url);
|
||||
IoEngine_xrg_downloadFil download_xrg = download_wkr.Download_xrg();
|
||||
css_parser.Parse(download_xrg.Exec_as_bry(String_.new_u8(page_url)));
|
||||
while (true) {
|
||||
Xobc_download_itm[] code_ary = (Xobc_download_itm[])code_hash.To_ary_and_clear(Xobc_download_itm.class);
|
||||
int code_ary_len = code_ary.length;
|
||||
if (code_ary_len == 0) break;
|
||||
for (int i = 0; i < code_ary_len; ++i) {
|
||||
Xobc_download_itm code = code_ary[i];
|
||||
byte[] code_src = download_xrg.Exec_as_bry(code.Http_str());
|
||||
Io_mgr.I.SaveFilBry(fsys_root.Gen_sub_path_for_os(String_.new_u8(code.Fsys_url())), code_src);
|
||||
css_parser.Parse(code_src);
|
||||
}
|
||||
}
|
||||
Xobc_download_itm[] file_ary = (Xobc_download_itm[])file_hash.To_ary_and_clear(Xobc_download_itm.class);
|
||||
int file_ary_len = file_ary.length;
|
||||
for (int i = 0; i < file_ary_len; ++i) {
|
||||
Xobc_download_itm file = file_ary[i];
|
||||
download_xrg.Init(file.Http_str(), Io_url_.new_fil_(fsys_root.Gen_sub_path_for_os(String_.new_u8(file.Fsys_url()))));
|
||||
download_xrg.Exec();
|
||||
}
|
||||
}
|
||||
}
|
||||
63
400_xowa/src/gplx/xowa/bldrs/css/Xob_mirror_mgr_tst.java
Normal file
63
400_xowa/src/gplx/xowa/bldrs/css/Xob_mirror_mgr_tst.java
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*;
|
||||
import gplx.xowa.files.downloads.*;
|
||||
public class Xob_mirror_mgr_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xob_mirror_mgr_fxt fxt = new Xob_mirror_mgr_fxt();
|
||||
@Test public void Download_1() {
|
||||
fxt.Fsys().Init_fil("mem/http/enwiki/file/a.png");
|
||||
fxt.Fsys().Init_fil("mem/http/enwiki/wiki/Main_Page", "url('//enwiki/wiki/a.png')");
|
||||
// fxt.Test_css();
|
||||
// fxt.Fsys().Test_fil("url('//enwiki/wiki/a.png')", "url('enwiki/wiki/a.png')"); // remove "//"
|
||||
// fxt.Fsys().Test_fil("mem/fsys/enwiki/file/a.png");
|
||||
}
|
||||
}
|
||||
class Xob_mirror_mgr_fxt {
|
||||
// private Xob_mirror_mgr mirror_mgr;
|
||||
public Io_fsys_fxt Fsys() {return fsys;} private final Io_fsys_fxt fsys = new Io_fsys_fxt();
|
||||
public void Clear() {
|
||||
fsys.Clear();
|
||||
// mirror_mgr = new Xob_mirror_mgr(Gfo_usr_dlg_.Noop, new Xof_download_wkr_test(), Bry_.new_a7("mem/http/enwiki"), Bry_.new_a7("mem/http/enwiki/wiki/Main_Page"), Io_url_.new_dir_("mem/fsys"));
|
||||
}
|
||||
public void Test_css(String raw, String expd) {
|
||||
// byte[] raw_bry = Bry_.new_u8(raw);
|
||||
// mirror_mgr.Exec();
|
||||
}
|
||||
}
|
||||
class Io_fsys_fxt {
|
||||
public void Clear() {
|
||||
Io_mgr.I.InitEngine_mem();
|
||||
}
|
||||
public void Init_fil(String url_str) {
|
||||
Io_url url = Io_url_.new_fil_(url_str);
|
||||
Init_fil(url, url.NameAndExt());
|
||||
}
|
||||
public void Init_fil(String url_str, String text) {Init_fil(Io_url_.new_fil_(url_str), text);}
|
||||
public void Init_fil(Io_url url, String text) {
|
||||
Io_mgr.I.SaveFilStr(url, text);
|
||||
}
|
||||
public void Test_fil(String url_str) {
|
||||
Io_url url = Io_url_.new_fil_(url_str);
|
||||
Test_fil(url, url.NameAndExt());
|
||||
}
|
||||
public void Test_fil(String url, String expd) {Test_fil(Io_url_.new_fil_(url), expd);}
|
||||
public void Test_fil(Io_url url, String expd) {
|
||||
Tfds.Eq_str_lines(expd, Io_mgr.I.LoadFilStr(url));
|
||||
}
|
||||
}
|
||||
99
400_xowa/src/gplx/xowa/bldrs/css/Xob_url_fixer.java
Normal file
99
400_xowa/src/gplx/xowa/bldrs/css/Xob_url_fixer.java
Normal file
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*;
|
||||
class Xob_url_fixer {
|
||||
public static byte[] Fix(byte[] site, byte[] src, int src_len) { // return "site/img.png" if "//site/img.png" or "http://site/img.png"; also, handle "img.png?key=val"
|
||||
int bgn = 0; int bgn_tkn_tid = 0;
|
||||
Object o = Xob_url_fixer_tkn.Bgn_trie().Match_bgn(src, bgn, src_len);
|
||||
if (o != null) {
|
||||
Xob_url_fixer_tkn tkn = (Xob_url_fixer_tkn)o;
|
||||
bgn_tkn_tid = tkn.Tid();
|
||||
switch (bgn_tkn_tid) {
|
||||
case Xob_url_fixer_tkn.Tid_bgn_slash_2:
|
||||
case Xob_url_fixer_tkn.Tid_bgn_http:
|
||||
case Xob_url_fixer_tkn.Tid_bgn_https:
|
||||
bgn = tkn.Raw_len(); // remove "//", "http://", "https://"
|
||||
break;
|
||||
case Xob_url_fixer_tkn.Tid_bgn_slash_1: // convert "/a" to "site/a"
|
||||
src = Bry_.Add(site, src);
|
||||
src_len = src.length;
|
||||
break;
|
||||
}
|
||||
}
|
||||
int pos = bgn, end = src_len; boolean no_slashes = true;
|
||||
Btrie_slim_mgr mid_trie = Xob_url_fixer_tkn.Mid_trie();
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
o = mid_trie.Match_bgn_w_byte(b, src, pos, src_len);
|
||||
if (o != null) {
|
||||
Xob_url_fixer_tkn tkn = (Xob_url_fixer_tkn)o;
|
||||
switch (tkn.Tid()) {
|
||||
case Xob_url_fixer_tkn.Tid_mid_slash: if (no_slashes) no_slashes = false; break;
|
||||
case Xob_url_fixer_tkn.Tid_mid_question: end = pos; pos = src_len; break;
|
||||
case Xob_url_fixer_tkn.Tid_mid_rel_1:
|
||||
case Xob_url_fixer_tkn.Tid_mid_rel_2:
|
||||
Bry_bfr tmp_bfr = Bry_bfr.new_(src_len);
|
||||
byte[] to_rel_root = Bry_.Mid(src, bgn, pos);
|
||||
byte[] to_rel_qry = Bry_.Mid(src, pos, src_len);
|
||||
src = gplx.xowa.xtns.pfuncs.ttls.Pfunc_rel2abs.Rel2abs(tmp_bfr, to_rel_qry, to_rel_root, Int_obj_ref.neg1_());
|
||||
bgn = pos = 0;
|
||||
end = src_len = src.length;
|
||||
no_slashes = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
if (no_slashes) return null; // invalid; EX: "//site"
|
||||
return Bry_.Mid(src, bgn, end);
|
||||
}
|
||||
}
|
||||
class Xob_url_fixer_tkn {
|
||||
public Xob_url_fixer_tkn(int tid, byte[] raw) {this.tid = tid; this.raw = raw; this.raw_len = raw.length;}
|
||||
public int Tid() {return tid;} private int tid;
|
||||
public byte[] Raw() {return raw;} private byte[] raw;
|
||||
public int Raw_len() {return raw_len;} private int raw_len;
|
||||
public static Xob_url_fixer_tkn new_(int tid, String raw) {return new Xob_url_fixer_tkn(tid, Bry_.new_u8(raw));}
|
||||
|
||||
private static void trie_add(Btrie_slim_mgr trie, int tid, String s) {trie.Add_obj(s, new_(tid, s));}
|
||||
public static final int Tid_bgn_slash_1 = 1, Tid_bgn_slash_2 = 2, Tid_bgn_http = 3, Tid_bgn_https = 4;
|
||||
private static Btrie_slim_mgr bgn_trie;
|
||||
public static Btrie_slim_mgr Bgn_trie() {
|
||||
if (bgn_trie == null) {
|
||||
bgn_trie = Btrie_slim_mgr.ci_ascii_();
|
||||
trie_add(bgn_trie, Tid_bgn_slash_1 , "/");
|
||||
trie_add(bgn_trie, Tid_bgn_slash_2 , "//");
|
||||
trie_add(bgn_trie, Tid_bgn_http , "http://");
|
||||
trie_add(bgn_trie, Tid_bgn_https , "https://");
|
||||
}
|
||||
return bgn_trie;
|
||||
}
|
||||
public static final int Tid_mid_rel_1 = 1, Tid_mid_rel_2 = 2, Tid_mid_slash = 3, Tid_mid_question = 4;
|
||||
private static Btrie_slim_mgr mid_trie;
|
||||
public static Btrie_slim_mgr Mid_trie() {
|
||||
if (mid_trie == null) {
|
||||
mid_trie = Btrie_slim_mgr.ci_ascii_();
|
||||
trie_add(mid_trie, Tid_mid_rel_1 , "/../");
|
||||
trie_add(mid_trie, Tid_mid_rel_2 , "/./");
|
||||
trie_add(mid_trie, Tid_mid_slash , "/");
|
||||
trie_add(mid_trie, Tid_mid_question , "?");
|
||||
}
|
||||
return mid_trie;
|
||||
}
|
||||
}
|
||||
42
400_xowa/src/gplx/xowa/bldrs/css/Xob_url_fixer_tst.java
Normal file
42
400_xowa/src/gplx/xowa/bldrs/css/Xob_url_fixer_tst.java
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*;
|
||||
public class Xob_url_fixer_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xob_url_fixer_fxt fxt = new Xob_url_fixer_fxt();
|
||||
@Test public void Slash2() {fxt.Test_fix("//site/a.png" , "site/a.png");}
|
||||
@Test public void Http() {fxt.Test_fix("http://site/a.png" , "site/a.png");}
|
||||
@Test public void Https() {fxt.Test_fix("https://site/a.png" , "site/a.png");}
|
||||
@Test public void Qarg() {fxt.Test_fix("//site/a.png?key=val" , "site/a.png");}
|
||||
@Test public void Qarg_dir() {fxt.Test_fix("//site/a/b/c.png?key=val" , "site/a/b/c.png");}
|
||||
@Test public void Root() {fxt.Test_fix("/a/b.png" , "site/a/b.png");} // EX:/static/images/project-logos/wikivoyage.png; DATE:2015-05-09
|
||||
@Test public void Rel_dot2() {fxt.Test_fix("//site/a/../b/c.png" , "site/b/c.png");} // DATE:2015-05-09
|
||||
@Test public void Rel_dot2_mult() {fxt.Test_fix("//site/a/../b/../c/d.png" , "site/c/d.png");} // DATE:2015-05-09
|
||||
@Test public void Rel_dot1() {fxt.Test_fix("//site/a/./b/c.png" , "site/a/b/c.png");} // DATE:2015-05-09
|
||||
@Test public void Site_only() {fxt.Test_fix("//site" , null);}
|
||||
}
|
||||
class Xob_url_fixer_fxt {
|
||||
public void Site_(String v) {site_bry = Bry_.new_u8(v);} private byte[] site_bry;
|
||||
public void Clear() {
|
||||
this.Site_("site");
|
||||
}
|
||||
public void Test_fix(String raw, String expd) {
|
||||
byte[] raw_bry = Bry_.new_u8(raw);
|
||||
Tfds.Eq(expd, String_.new_u8(Xob_url_fixer.Fix(site_bry, raw_bry, raw_bry.length)));
|
||||
}
|
||||
}
|
||||
25
400_xowa/src/gplx/xowa/bldrs/css/Xobc_download_itm.java
Normal file
25
400_xowa/src/gplx/xowa/bldrs/css/Xobc_download_itm.java
Normal file
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
class Xobc_download_itm {
|
||||
public Xobc_download_itm(int tid, String http_str, byte[] fsys_url) {this.tid = tid; this.http_str = http_str; this.fsys_url = fsys_url;}
|
||||
public int Tid() {return tid;} private final int tid;
|
||||
public String Http_str() {return http_str;} private final String http_str;
|
||||
public byte[] Fsys_url() {return fsys_url;} private final byte[] fsys_url;
|
||||
public static final int Tid_file = 1, Tid_html = 2, Tid_css = 3;
|
||||
}
|
||||
Reference in New Issue
Block a user