mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.9.3.1
This commit is contained in:
@@ -17,6 +17,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.ios.*; import gplx.xowa.html.*;
|
||||
import gplx.langs.htmls.encoders.*;
|
||||
import gplx.xowa.nss.*;
|
||||
import gplx.xowa.wikis.*; import gplx.xowa.wikis.domains.*; import gplx.xowa.wikis.data.*;
|
||||
import gplx.xowa.files.downloads.*;
|
||||
import gplx.core.net.*;
|
||||
@@ -171,9 +173,9 @@ public class Xoa_css_extractor {
|
||||
private boolean Logo_copy_from_css(Io_url trg_fil) {
|
||||
Io_url commons_file = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
byte[] commons_src = Io_mgr.I.LoadFilBry(commons_file);
|
||||
int bgn_pos = Bry_finder.Find_fwd(commons_src, Bry_mw_wiki_logo); if (bgn_pos == Bry_finder.Not_found) return false;
|
||||
int bgn_pos = Bry_find_.Find_fwd(commons_src, Bry_mw_wiki_logo); if (bgn_pos == Bry_find_.Not_found) return false;
|
||||
bgn_pos += Bry_mw_wiki_logo.length;
|
||||
int end_pos = Bry_finder.Find_fwd(commons_src, Byte_ascii.Quote, bgn_pos + 1); if (end_pos == Bry_finder.Not_found) return false;
|
||||
int end_pos = Bry_find_.Find_fwd(commons_src, Byte_ascii.Quote, bgn_pos + 1); if (end_pos == Bry_find_.Not_found) return false;
|
||||
byte[] src_bry = Bry_.Mid(commons_src, bgn_pos, end_pos);
|
||||
src_bry = Xob_url_fixer.Fix(wiki_domain, src_bry, src_bry.length);
|
||||
if (wiki_html_dir.Info().DirSpr_byte() == Byte_ascii.Backslash)
|
||||
@@ -185,11 +187,11 @@ public class Xoa_css_extractor {
|
||||
private String Logo_find_src() {
|
||||
if (mainpage_html == null) return null;
|
||||
int main_page_html_len = mainpage_html.length;
|
||||
int logo_bgn = Bry_finder.Find_fwd(mainpage_html, Logo_find_bgn, 0); if (logo_bgn == Bry_.NotFound) return null;
|
||||
int logo_bgn = Bry_find_.Find_fwd(mainpage_html, Logo_find_bgn, 0); if (logo_bgn == Bry_.NotFound) return null;
|
||||
logo_bgn += Logo_find_bgn.length;
|
||||
logo_bgn = Bry_finder.Find_fwd(mainpage_html, Logo_find_end, logo_bgn); if (logo_bgn == Bry_.NotFound) return null;
|
||||
logo_bgn = Bry_find_.Find_fwd(mainpage_html, Logo_find_end, logo_bgn); if (logo_bgn == Bry_.NotFound) return null;
|
||||
logo_bgn += Logo_find_end.length;
|
||||
int logo_end = Bry_finder.Find_fwd(mainpage_html, Byte_ascii.Paren_end, logo_bgn, main_page_html_len); if (logo_bgn == Bry_.NotFound) return null;
|
||||
int logo_end = Bry_find_.Find_fwd(mainpage_html, Byte_ascii.Paren_end, logo_bgn, main_page_html_len); if (logo_bgn == Bry_.NotFound) return null;
|
||||
byte[] logo_bry = Bry_.Mid(mainpage_html, logo_bgn, logo_end);
|
||||
return protocol_prefix + String_.new_u8(logo_bry);
|
||||
}
|
||||
@@ -238,12 +240,13 @@ public class Xoa_css_extractor {
|
||||
byte[] protocol_prefix_bry = Bry_.new_u8(protocol_prefix);
|
||||
Gfo_url gfo_url = new Gfo_url();
|
||||
while (true) {
|
||||
int url_bgn = Bry_finder.Find_fwd(raw, Css_find_bgn, prv_pos); if (url_bgn == Bry_.NotFound) break; // nothing left; stop
|
||||
int url_bgn = Bry_find_.Find_fwd(raw, Css_find_bgn, prv_pos); if (url_bgn == Bry_.NotFound) break; // nothing left; stop
|
||||
url_bgn += css_find_bgn_len;
|
||||
int url_end = Bry_finder.Find_fwd(raw, Byte_ascii.Quote, url_bgn, raw_len); if (url_end == Bry_.NotFound) {usr_dlg.Warn_many("", "main_page.css_parse", "could not find css; pos='~{0}' text='~{1}'", url_bgn, String_.new_u8_by_len(raw, url_bgn, url_bgn + 32)); break;}
|
||||
int url_end = Bry_find_.Find_fwd(raw, Byte_ascii.Quote, url_bgn, raw_len); if (url_end == Bry_.NotFound) {usr_dlg.Warn_many("", "main_page.css_parse", "could not find css; pos='~{0}' text='~{1}'", url_bgn, String_.new_u8__by_len(raw, url_bgn, url_bgn + 32)); break;}
|
||||
byte[] css_url_bry = Bry_.Mid(raw, url_bgn, url_end);
|
||||
css_url_bry = Bry_.Replace(css_url_bry, Css_amp_find, Css_amp_repl); // & -> &
|
||||
css_url_bry = url_encoder.Decode(css_url_bry); // %2C -> %7C -> |
|
||||
css_url_bry = Xoa_css_extractor.Url_root_fix(wiki_domain, css_url_bry);
|
||||
url_parser.Parse(gfo_url, css_url_bry, 0, css_url_bry.length);
|
||||
if ( gfo_url.Protocol_tid() == Gfo_protocol_itm.Tid_relative_1 // if rel url, add protocol_prefix DATE:2015-08-01
|
||||
|| (Env_.Mode_testing() && gfo_url.Protocol_tid() == Gfo_protocol_itm.Tid_unknown)) // TEST:
|
||||
@@ -266,6 +269,15 @@ public class Xoa_css_extractor {
|
||||
}
|
||||
return tmp_bfr.Xto_bry_and_clear();
|
||||
}
|
||||
private static byte[] Url_root_fix(byte[] domain, byte[] url) {// DATE:2015-09-20
|
||||
if (url.length < 3) return url; // need at least 2 chars
|
||||
if ( url[0] == Byte_ascii.Slash // starts with "/" EX: "/w/api.php"
|
||||
&& url[1] != Byte_ascii.Slash // but not "//"; EX: "//en.wikipedia.org"
|
||||
)
|
||||
return Bry_.Add(gplx.xowa.html.hrefs.Xoh_href_.Bry__https, domain, url);
|
||||
else
|
||||
return url;
|
||||
}
|
||||
public static final String Css_common_name = "xowa_common.css", Css_wiki_name = "xowa_wiki.css"
|
||||
, Css_common_name_ltr = "xowa_common_ltr.css", Css_common_name_rtl = "xowa_common_rtl.css";
|
||||
}
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*; import gplx.ios.*; import gplx.xowa.wikis.data.*; import gplx.xowa.files.downloads.*;
|
||||
import org.junit.*; import gplx.ios.*; import gplx.langs.htmls.encoders.*; import gplx.xowa.wikis.data.*; import gplx.xowa.files.downloads.*;
|
||||
public class Xoa_css_extractor_basic_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoa_css_extractor_fxt fxt = new Xoa_css_extractor_fxt();
|
||||
@Test public void Logo_download() {
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*; import gplx.ios.*;
|
||||
import org.junit.*; import gplx.ios.*; import gplx.xowa.nss.*;
|
||||
public class Xoa_css_extractor_wiki_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoa_css_extractor_fxt fxt = new Xoa_css_extractor_fxt();
|
||||
@Test public void Css_wiki_generate() {
|
||||
|
||||
@@ -41,7 +41,7 @@ public class Xoa_css_img_downloader {
|
||||
Bry_bfr bfr = Bry_bfr.new_(src_len);
|
||||
Hash_adp img_hash = Hash_adp_bry.cs();
|
||||
while (true) {
|
||||
int url_pos = Bry_finder.Find_fwd(src, Bry_url, prv_pos);
|
||||
int url_pos = Bry_find_.Find_fwd(src, Bry_url, prv_pos);
|
||||
if (url_pos == Bry_.NotFound) {bfr.Add_mid(src, prv_pos, src_len); break;} // no more "url("; exit;
|
||||
int bgn_pos = url_pos + Bry_url_len; // set bgn_pos after "url("
|
||||
byte bgn_byte = src[bgn_pos];
|
||||
@@ -57,14 +57,14 @@ public class Xoa_css_img_downloader {
|
||||
quoted = false;
|
||||
break;
|
||||
}
|
||||
int end_pos = Bry_finder.Find_fwd(src, end_byte, bgn_pos, src_len);
|
||||
int end_pos = Bry_find_.Find_fwd(src, end_byte, bgn_pos, src_len);
|
||||
if (end_pos == Bry_.NotFound) { // unclosed "url("; exit since nothing else will be found
|
||||
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.end_missing", "could not find end_sequence for 'url(': bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8_by_len(src, prv_pos, prv_pos + 25));
|
||||
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.end_missing", "could not find end_sequence for 'url(': bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25));
|
||||
bfr.Add_mid(src, prv_pos, src_len);
|
||||
break;
|
||||
}
|
||||
if (end_pos - bgn_pos == 0) { // empty; "url()"; ignore
|
||||
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.empty", "'url(' is empty: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8_by_len(src, prv_pos, prv_pos + 25));
|
||||
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.empty", "'url(' is empty: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25));
|
||||
bfr.Add_mid(src, prv_pos, bgn_pos);
|
||||
prv_pos = bgn_pos;
|
||||
continue;
|
||||
@@ -128,10 +128,10 @@ public class Xoa_css_img_downloader {
|
||||
}
|
||||
bfr.Add_mid(src, old_pos, find_bgn - Bry_import_len).Add_byte_nl();
|
||||
bfr.Add(Bry_comment_bgn).Add(css_url).Add(Bry_comment_end).Add_byte_nl();
|
||||
if (Bry_finder.Find_fwd(css_url, Wikisource_dynimg_ttl) != -1) css_trg_bry = Bry_.Replace(css_trg_bry, Wikisource_dynimg_find, Wikisource_dynimg_repl); // FreedImg hack; PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3 DATE:2014-09-06
|
||||
if (Bry_find_.Find_fwd(css_url, Wikisource_dynimg_ttl) != -1) css_trg_bry = Bry_.Replace(css_trg_bry, Wikisource_dynimg_find, Wikisource_dynimg_repl); // FreedImg hack; PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3 DATE:2014-09-06
|
||||
bfr.Add(css_trg_bry).Add_byte_nl();
|
||||
bfr.Add_byte_nl();
|
||||
int semic_pos = Bry_finder.Find_fwd(src, Byte_ascii.Semic, find_bgn + url_raw.length, src_len);
|
||||
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, find_bgn + url_raw.length, src_len);
|
||||
return semic_pos + Int_.Const_dlm_len;
|
||||
}
|
||||
private static final byte[]
|
||||
@@ -143,11 +143,11 @@ public class Xoa_css_img_downloader {
|
||||
int pos_bgn = 0;
|
||||
if (Bry_.Has_at_bgn(raw, Bry_fwd_slashes, 0, raw_len)) pos_bgn = Bry_fwd_slashes.length;
|
||||
if (Bry_.Has_at_bgn(raw, Bry_http, 0, raw_len)) pos_bgn = Bry_http.length;
|
||||
int pos_slash = Bry_finder.Find_fwd(raw, Byte_ascii.Slash, pos_bgn, raw_len);
|
||||
int pos_slash = Bry_find_.Find_fwd(raw, Byte_ascii.Slash, pos_bgn, raw_len);
|
||||
if (pos_slash == Bry_.NotFound) return null; // first segment is site_name; at least one slash must be present for image name; EX: site.org/img_name.jpg
|
||||
if (pos_slash == raw_len - 1) return null; // "site.org/" is invalid
|
||||
int pos_end = raw_len;
|
||||
int pos_question = Bry_finder.Find_bwd(raw, Byte_ascii.Question);
|
||||
int pos_question = Bry_find_.Find_bwd(raw, Byte_ascii.Question);
|
||||
if (pos_question != Bry_.NotFound)
|
||||
pos_end = pos_question; // remove query params; EX: img_name?key=val
|
||||
return Bry_.Mid(raw, pos_bgn, pos_end);
|
||||
|
||||
@@ -22,7 +22,7 @@ class Xob_css_parser__import {
|
||||
private final Xob_css_parser__url url_parser;
|
||||
public Xob_css_parser__import(Xob_css_parser__url url_parser) {this.url_parser = url_parser;}
|
||||
public Xob_css_tkn__base Parse(byte[] src, int src_len, int tkn_bgn, int tkn_end) { // " @import"
|
||||
int bgn_pos = Bry_finder.Find_fwd_while_ws(src, tkn_end, src_len); // skip any ws after " @import"
|
||||
int bgn_pos = Bry_find_.Find_fwd_while_ws(src, tkn_end, src_len); // skip any ws after " @import"
|
||||
if (bgn_pos == src_len) return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.import:EOS after import; bgn=~{0}", tkn_bgn);
|
||||
if (!Bry_.Has_at_bgn(src, Tkn_url_bry, bgn_pos, src_len)) return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.import:url missing; bgn=~{0}", tkn_bgn);
|
||||
tkn_end = bgn_pos + Tkn_url_bry.length;
|
||||
@@ -31,7 +31,7 @@ class Xob_css_parser__import {
|
||||
Xob_css_tkn__url url_frag = (Xob_css_tkn__url)frag;
|
||||
byte[] src_url = url_frag.Src_url();
|
||||
src_url = Bry_.Replace(src_url, Byte_ascii.Space, Byte_ascii.Underline); // NOTE: must replace spaces with underlines else download will fail; EX:https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08
|
||||
int semic_pos = Bry_finder.Find_fwd(src, Byte_ascii.Semic, frag.Pos_end(), src_len);
|
||||
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, frag.Pos_end(), src_len);
|
||||
return Xob_css_tkn__import.new_(tkn_bgn, semic_pos + 1, src_url, url_frag.Trg_url(), url_frag.Quote_byte());
|
||||
}
|
||||
private static final byte[] Tkn_url_bry = Bry_.new_a7("url(");
|
||||
|
||||
@@ -20,7 +20,7 @@ class Xob_css_parser__url {
|
||||
private final byte[] site;
|
||||
public Xob_css_parser__url(byte[] site) {this.site = site;}
|
||||
public Xob_css_tkn__base Parse(byte[] src, int src_len, int tkn_bgn, int tkn_end) { // " url"
|
||||
int bgn_pos = Bry_finder.Find_fwd_while_ws(src, tkn_end, src_len); // skip any ws after " url("
|
||||
int bgn_pos = Bry_find_.Find_fwd_while_ws(src, tkn_end, src_len); // skip any ws after " url("
|
||||
if (bgn_pos == src_len) return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.url:EOS; bgn=~{0}", tkn_bgn);
|
||||
byte end_byte = src[bgn_pos]; // note that first non-ws byte should determine end_byte
|
||||
byte quote_byte = end_byte;
|
||||
@@ -33,11 +33,11 @@ class Xob_css_parser__url {
|
||||
quote_byte = Byte_ascii.Null;
|
||||
break;
|
||||
}
|
||||
int end_pos = Bry_finder.Find_fwd(src, end_byte, bgn_pos, src_len);
|
||||
int end_pos = Bry_find_.Find_fwd(src, end_byte, bgn_pos, src_len);
|
||||
if (end_pos == Bry_.NotFound) // unclosed "url("; exit since nothing else will be found
|
||||
return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.url:dangling; bgn=~{0} excerpt=~{1}", bgn_pos, String_.new_u8_by_len(src, tkn_bgn, tkn_bgn + 128));
|
||||
return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.url:dangling; bgn=~{0} excerpt=~{1}", bgn_pos, String_.new_u8__by_len(src, tkn_bgn, tkn_bgn + 128));
|
||||
if (end_pos - bgn_pos == 0) // empty; "url()"; ignore
|
||||
return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.url:empty; bgn=~{0} excerpt=~{1}", bgn_pos, String_.new_u8_by_len(src, tkn_bgn, tkn_bgn + 128));
|
||||
return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.url:empty; bgn=~{0} excerpt=~{1}", bgn_pos, String_.new_u8__by_len(src, tkn_bgn, tkn_bgn + 128));
|
||||
byte[] url_orig = Bry_.Mid(src, bgn_pos, end_pos); int url_orig_len = url_orig.length;
|
||||
++end_pos; // increment end_pos so rv will be after it;
|
||||
if ( end_byte != Byte_ascii.Paren_end) { // end_byte is apos / quote
|
||||
|
||||
Reference in New Issue
Block a user