mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
'v3.9.4.1'
This commit is contained in:
@@ -16,9 +16,9 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.pages.syncs.core.loaders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.pages.*; import gplx.xowa.addons.wikis.pages.syncs.*; import gplx.xowa.addons.wikis.pages.syncs.core.*;
|
||||
import gplx.core.brys.*;
|
||||
import gplx.core.brys.*; import gplx.core.btries.*;
|
||||
import gplx.langs.htmls.*; import gplx.xowa.htmls.*; import gplx.langs.htmls.docs.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.imgs.atrs.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.files.repos.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.files.repos.*; import gplx.xowa.files.imgs.*;
|
||||
import gplx.xowa.wikis.domains.*;
|
||||
import gplx.xowa.addons.wikis.pages.syncs.core.parsers.*;
|
||||
public class Xosync_page_loader {
|
||||
@@ -36,6 +36,7 @@ public class Xosync_page_loader {
|
||||
|
||||
// loop for all <img>
|
||||
int pos = 0;
|
||||
Btrie_rv trv = new Btrie_rv();
|
||||
while (true) {
|
||||
// get next "<img>"
|
||||
Gfh_tag img_tag = tag_rdr.Tag__find_fwd_head(pos, src_len, Gfh_tag_.Id__img);
|
||||
@@ -52,22 +53,16 @@ public class Xosync_page_loader {
|
||||
// do simple replace for @src
|
||||
Gfh_atr img_src_atr = img_tag.Atrs__get_by_or_fail(Gfh_atr_.Bry__src);
|
||||
byte[] img_src_val = img_src_atr.Val();
|
||||
img_src_val = Bry_.Replace(img_src_val, Xosync_img_src_parser.Bry__xowa_src_bgn, wiki.App().Fsys_mgr().File_dir().To_http_file_bry());
|
||||
|
||||
// parse other atrs for fsdb
|
||||
img_src_parser.Parse(err_wkr, hctx, wiki.Domain_bry(), img_src_atr.Val_bgn(), img_src_atr.Val_end());
|
||||
if (img_src_parser.File_ttl_bry() != null) { // NOTE: need to skip images that don't follow MW image format ("commons.wikimedia.org/thumb/7/70/A.png"); for example, math images
|
||||
Xof_fsdb_itm img = hpg.Img_mgr().Make_img(false);
|
||||
byte[] file_ttl_bry = gplx.langs.htmls.encoders.Gfo_url_encoder_.Http_url.Decode(img_src_parser.File_ttl_bry());
|
||||
img.Init_by_wm_parse(hctx.Wiki__domain_itm().Abrv_xo(), img_src_parser.Repo_is_commons(), img_src_parser.File_is_orig(), file_ttl_bry, img_src_parser.File_w(), img_src_parser.File_time(), img_src_parser.File_page());
|
||||
hctx.File__url_bldr().Init_by_root(img_src_parser.Repo_is_commons() ? hctx.Fsys__file__comm() : hctx.Fsys__file__wiki(), Bool_.N, Byte_ascii.Slash, Bool_.N, Bool_.N, 4);
|
||||
hctx.File__url_bldr().Init_by_itm(img_src_parser.File_is_orig() ? Xof_repo_itm_.Mode_orig : Xof_repo_itm_.Mode_thumb, file_ttl_bry, Xof_file_wkr_.Md5(file_ttl_bry), Xof_ext_.new_by_ttl_(file_ttl_bry), img_src_parser.File_w(), img_src_parser.File_time(), img_src_parser.File_page());
|
||||
img.Orig_repo_name_(img_src_parser.Repo_is_commons() ? Xow_domain_itm_.Bry__commons : wiki.Domain_bry());
|
||||
Io_url html_view_url = hctx.File__url_bldr().Xto_url_by_http();
|
||||
img.Init_at_gallery_end(img_tag.Atrs__get_as_int(Gfh_atr_.Bry__width), img_tag.Atrs__get_as_int(Gfh_atr_.Bry__height), html_view_url, html_view_url);
|
||||
|
||||
Xosync_hdoc_parser.Write_img_tag(tmp_bfr, img_tag, img_src_val, img.Html_uid());
|
||||
byte path_tid = Xosync_img_src_parser.Src_xo_trie.Match_byte_or(trv, img_src_val, Xosync_img_src_parser.Path__unknown);
|
||||
switch (path_tid) {
|
||||
case Xosync_img_src_parser.Path__file:
|
||||
Add_img(wiki, hpg, img_tag, img_src_atr, img_src_val, path_tid, Xosync_img_src_parser.Bry__xowa_file, wiki.App().Fsys_mgr().File_dir().To_http_file_bry());
|
||||
break;
|
||||
case Xosync_img_src_parser.Path__math:
|
||||
Add_img(wiki, hpg, img_tag, img_src_atr, img_src_val, path_tid, Xosync_img_src_parser.Bry__xowa_math, wiki.App().Fsys_mgr().File_dir().GenSubDir_nest("math").To_http_file_bry());
|
||||
break;
|
||||
}
|
||||
|
||||
pos = img_tag.Src_end();
|
||||
}
|
||||
|
||||
@@ -76,4 +71,41 @@ public class Xosync_page_loader {
|
||||
hpg.Db().Html().Html_bry_(src);
|
||||
return src;
|
||||
}
|
||||
private Xof_fsdb_itm Add_img(Xow_wiki wiki, Xoh_page hpg, Gfh_tag img_tag, Gfh_atr img_src_atr, byte[] img_src_val, byte path_tid, byte[] src_find, byte[] src_repl) {
|
||||
// replace "xowa:/file" with "file:////xowa/file/"
|
||||
img_src_val = Bry_.Replace(img_src_val, src_find, src_repl);
|
||||
|
||||
// parse src
|
||||
img_src_parser.Parse(err_wkr, hctx, wiki.Domain_bry(), img_src_atr.Val_bgn(), img_src_atr.Val_end());
|
||||
if (img_src_parser.File_ttl_bry() == null) return null; // skip images that don't follow format of "commons.wikimedia.org/thumb/7/70/A.png"; for example, enlarge buttons
|
||||
|
||||
// create img
|
||||
Xof_fsdb_itm img = hpg.Img_mgr().Make_img(false);
|
||||
|
||||
// use repo_tid to get fsys_root, orig_repo_name
|
||||
byte repo_tid = img_src_parser.Repo_tid();
|
||||
byte[] orig_repo_name = null, fsys_root = null;
|
||||
switch (repo_tid) {
|
||||
case Xof_repo_tid_.Tid__remote: fsys_root = hctx.Fsys__file__comm(); orig_repo_name = Xow_domain_itm_.Bry__commons; break;
|
||||
case Xof_repo_tid_.Tid__local: fsys_root = hctx.Fsys__file__wiki(); orig_repo_name = wiki.Domain_bry(); break;
|
||||
case Xof_repo_tid_.Tid__math: fsys_root = hctx.Fsys__file__math(); orig_repo_name = Xof_repo_tid_.Bry__math; break;
|
||||
}
|
||||
|
||||
// set vals
|
||||
img.Orig_repo_name_(orig_repo_name);
|
||||
byte[] file_ttl_bry = gplx.langs.htmls.encoders.Gfo_url_encoder_.Http_url.Decode(img_src_parser.File_ttl_bry());
|
||||
Xof_ext file_ext = Xosync_img_src_parser.Ext_by_ttl(file_ttl_bry, repo_tid);
|
||||
img.Init_by_wm_parse(hctx.Wiki__domain_itm().Abrv_xo(), img_src_parser.Repo_is_commons(), img_src_parser.File_is_orig(), file_ttl_bry, file_ext, img_src_parser.File_w(), img_src_parser.File_time(), img_src_parser.File_page());
|
||||
|
||||
// recalc src based on "file:////xowa/file/"
|
||||
hctx.File__url_bldr().Init_by_repo(repo_tid, fsys_root, Bool_.N, Byte_ascii.Slash, Bool_.N, Bool_.N, 4);
|
||||
hctx.File__url_bldr().Init_by_itm(img_src_parser.File_is_orig() ? Xof_img_mode_.Tid__orig : Xof_img_mode_.Tid__thumb, file_ttl_bry, Xof_file_wkr_.Md5(file_ttl_bry), Xof_ext_.new_by_ttl_(file_ttl_bry), img_src_parser.File_w(), img_src_parser.File_time(), img_src_parser.File_page());
|
||||
Io_url html_view_url = hctx.File__url_bldr().Xto_url_by_http();
|
||||
|
||||
// if (path_tid == Xosync_img_src_parser.Path__file)
|
||||
img.Init_at_gallery_end(img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__width,0), img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__height, 0), html_view_url, html_view_url);
|
||||
|
||||
Xosync_hdoc_parser.Write_img_tag(tmp_bfr, img_tag, img_src_val, img.Html_uid());
|
||||
return img;
|
||||
}
|
||||
}
|
||||
@@ -38,9 +38,10 @@ public class Xosync_page_loader__fxt {
|
||||
Gftest.Eq__ary__lines(expd, hpg.Db().Html().Html_bry(), "converted html");
|
||||
return this;
|
||||
}
|
||||
public Xof_fsdb_itm Make__fsdb(boolean repo_is_commons, boolean file_is_thumb, String file_ttl, int file_w, double file_time, int file_page) {
|
||||
public Xof_fsdb_itm Make__fsdb(boolean repo_is_commons, boolean file_is_orig, String file_ttl, String orig_ext_str, int file_w, double file_time, int file_page) {
|
||||
Xof_fsdb_itm itm = new Xof_fsdb_itm();
|
||||
itm.Init_by_wm_parse(wiki.Domain_itm().Abrv_xo(), repo_is_commons, file_is_thumb, Bry_.new_u8(file_ttl), file_w, file_time, file_page);
|
||||
Xof_ext orig_ext = Xof_ext_.new_by_ext_(Bry_.new_u8(orig_ext_str));
|
||||
itm.Init_by_wm_parse(wiki.Domain_itm().Abrv_xo(), repo_is_commons, file_is_orig, Bry_.new_u8(file_ttl), orig_ext, file_w, file_time, file_page);
|
||||
return itm;
|
||||
}
|
||||
public Xosync_page_loader__fxt Test__fsdb(Xof_fsdb_itm expd) {
|
||||
|
||||
@@ -20,10 +20,22 @@ import org.junit.*;
|
||||
import gplx.langs.htmls.*;
|
||||
public class Xosync_page_loader__tst {
|
||||
@Before public void init() {fxt.Clear();} private final Xosync_page_loader__fxt fxt = new Xosync_page_loader__fxt();
|
||||
@Test public void Basic() {
|
||||
@Test public void File() {
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("a<img src='xowa:/file/commons.wikimedia.org/thumb/4/a/6/9/Commons-logo.svg/12px.png' width='12' height='20'>b"))
|
||||
.Test__html(Gfh_utl.Replace_apos("a<img id='xoimg_0' src='file:///mem/xowa/file/commons.wikimedia.org/thumb/4/a/6/9/Commons-logo.svg/12px.png' width='12' height='20'>b"))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.N, "Commons-logo.svg", 12, -1, -1))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.N, "Commons-logo.svg", "svg", 12, -1, -1))
|
||||
;
|
||||
}
|
||||
@Test public void Math() {
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("a<img src='xowa:/math/596f8baf206a81478afd4194b44138715dc1a05c' width='12' height='20'>b"))
|
||||
.Test__html(Gfh_utl.Replace_apos("a<img id='xoimg_0' src='file:///mem/xowa/file/math/596f8baf206a81478afd4194b44138715dc1a05c' width='12' height='20'>b"))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.Y, "596f8baf206a81478afd4194b44138715dc1a05c", "svg", -1, -1, -1))
|
||||
;
|
||||
}
|
||||
@Test public void Ogg() {
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("a<img src='xowa:/file/commons.wikimedia.org/thumb/4/2/7/e/A.ogg/320px.jpg'>b"))
|
||||
.Test__html(Gfh_utl.Replace_apos("a<img id='xoimg_0' src='file:///mem/xowa/file/commons.wikimedia.org/thumb/4/2/7/e/A.ogg/320px.jpg'>b"))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.N, "A.ogg", "ogv", 320, -1, -1))
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ public class Xosync_hdoc_parser__err__tst {
|
||||
@Before public void init() {fxt.Clear();} private final Xosync_hdoc_parser__fxt fxt = new Xosync_hdoc_parser__fxt();
|
||||
@Test public void Url_does_not_start_with_upload_wikimedia_org() {
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("<img src='//fail/wikipedia/commons/thumb/7/70/A.png/220px-A.png'>"))
|
||||
.Test__html(Gfh_utl.Replace_apos("<!--wm.parse:url does not start with //upload.wikimedia.org--><img src='//fail/wikipedia/commons/thumb/7/70/A.png/220px-A.png'>"));
|
||||
.Test__html(Gfh_utl.Replace_apos("<!--wm.parse:img src does not start with known sequence--><img src='//fail/wikipedia/commons/thumb/7/70/A.png/220px-A.png'>"));
|
||||
}
|
||||
@Test public void Unknown_repo() {
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("<img src='//upload.wikimedia.org/wiktionary/fr/thumb/7/70/A.png/220px-A.png'>"))
|
||||
|
||||
@@ -21,14 +21,14 @@ import gplx.langs.htmls.*;
|
||||
public class Xosync_hdoc_parser__file__tst {
|
||||
@Before public void init() {fxt.Clear();} private final Xosync_hdoc_parser__fxt fxt = new Xosync_hdoc_parser__fxt();
|
||||
@Test public void Commons__thumb() {
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("<img src='//upload.wikimedia.org/wikipedia/commons/thumb/7/70/A.png/220px-A.png'>"))
|
||||
.Test__html(Gfh_utl.Replace_apos("<img src='xowa:/file/commons.wikimedia.org/thumb/7/0/1/c/A.png/220px.png'>"))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.N, "A.png", 220, -1, -1));
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("<img src='//upload.wikimedia.org/wikipedia/commons/thumb/7/70/A.png/320px-A.png'>"))
|
||||
.Test__html(Gfh_utl.Replace_apos("<img src='xowa:/file/commons.wikimedia.org/thumb/7/0/1/c/A.png/320px.png'>"))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.N, "A.png", 320, -1, -1));
|
||||
}
|
||||
@Test public void Url_encoded() {
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("<img src='//upload.wikimedia.org/wikipedia/commons/thumb/9/91/A%2CB.png/220px-A%2CB.png'>"))
|
||||
.Test__html(Gfh_utl.Replace_apos("<img src='xowa:/file/commons.wikimedia.org/thumb/9/1/0/8/A%2CB.png/220px.png'>"))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.N, "A,B.png", 220, -1, -1));
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("<img src='//upload.wikimedia.org/wikipedia/commons/thumb/9/91/A%2CB.png/320px-A%2CB.png'>"))
|
||||
.Test__html(Gfh_utl.Replace_apos("<img src='xowa:/file/commons.wikimedia.org/thumb/9/1/0/8/A%2CB.png/320px.png'>"))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.N, "A,B.png", 320, -1, -1));
|
||||
}
|
||||
@Test public void Local__orig() {
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("<img src='//upload.wikimedia.org/wikipedia/en/7/70/A.png'>"))
|
||||
@@ -36,11 +36,23 @@ public class Xosync_hdoc_parser__file__tst {
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.N, Bool_.Y, "A.png", -1, -1, -1));
|
||||
}
|
||||
@Test public void Svg() {
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("<img src='//upload.wikimedia.org/wikipedia/commons/thumb/4/4a/Commons-logo.svg/12px-Commons-logo.svg.png'>"))
|
||||
.Test__html(Gfh_utl.Replace_apos("<img src='xowa:/file/commons.wikimedia.org/thumb/4/a/6/9/Commons-logo.svg/12px.png'>"))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.N, "Commons-logo.svg", 12, -1, -1));
|
||||
// https://upload.wikimedia.org/wikipedia/commons/thumb/f/fc/Papilio_dardanus_emerging.ogg/320px--Papilio_dardanus_emerging.ogg.jpg
|
||||
// https://upload.wikimedia.org/wikipedia/commons/thumb/3/30/Clip_from_My_Man_Godfrey.ogg/240px-seek%3D67-Clip_from_My_Man_Godfrey.ogg.jpg
|
||||
// https://upload.wikimedia.org/wikipedia/commons/thumb/7/7a/PL_Henryk_Sienkiewicz-Pisma_zapomniane_i_niewydane.djvu/page6-250px-PL_Henryk_Sienkiewicz-Pisma_zapomniane_i_niewydane.djvu.jpg
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("<img src='//upload.wikimedia.org/wikipedia/commons/thumb/7/75/A.svg/12px-A.svg.png'>"))
|
||||
.Test__html(Gfh_utl.Replace_apos("<img src='xowa:/file/commons.wikimedia.org/thumb/7/5/9/a/A.svg/12px.png'>"))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.N, "A.svg", 12, -1, -1));
|
||||
}
|
||||
@Test public void Ogg() {
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("<img src='//upload.wikimedia.org/wikipedia/commons/thumb/4/42/A.ogg/320px--A.ogg.jpg'>"))
|
||||
.Test__html(Gfh_utl.Replace_apos("<img src='xowa:/file/commons.wikimedia.org/thumb/4/2/7/e/A.ogg/320px.jpg'>"))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.N, "A.ogg", "ogv", 320, -1, -1));
|
||||
}
|
||||
@Test public void Ogg__time() {
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("<img src='//upload.wikimedia.org/wikipedia/commons/thumb/4/42/A.ogg/320px-seek%3D1.2-A.ogg.jpg'>"))
|
||||
.Test__html(Gfh_utl.Replace_apos("<img src='xowa:/file/commons.wikimedia.org/thumb/4/2/7/e/A.ogg/320px-1.2.jpg'>"))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.N, "A.ogg", "ogv", 320, 1.2, -1));
|
||||
}
|
||||
@Test public void Pdf__page() {
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("<img src='//upload.wikimedia.org/wikipedia/commons/thumb/7/76/A.djvu/page1-320px-A.djvu.jpg'>"))
|
||||
.Test__html(Gfh_utl.Replace_apos("<img src='xowa:/file/commons.wikimedia.org/thumb/7/6/9/a/A.djvu/320px-1.jpg'>"))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.N, "A.djvu", 320, -1, 1));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,9 +39,15 @@ public class Xosync_hdoc_parser__fxt {
|
||||
Gftest.Eq__ary__lines(expd, hpg.Db().Html().Html_bry(), "converted html");
|
||||
return this;
|
||||
}
|
||||
public Xof_fsdb_itm Make__fsdb(boolean repo_is_commons, boolean file_is_thumb, String file_ttl, int file_w, double file_time, int file_page) {
|
||||
public Xof_fsdb_itm Make__fsdb(boolean repo_is_commons, boolean file_is_orig, String file_ttl, int file_w, double file_time, int file_page) {
|
||||
return Make__fsdb(repo_is_commons, file_is_orig, file_ttl, Xof_ext_.new_by_ttl_(Bry_.new_u8(file_ttl)), file_w, file_time, file_page);
|
||||
}
|
||||
public Xof_fsdb_itm Make__fsdb(boolean repo_is_commons, boolean file_is_orig, String file_ttl, String file_ext, int file_w, double file_time, int file_page) {
|
||||
return Make__fsdb(repo_is_commons, file_is_orig, file_ttl, Xof_ext_.new_by_ext_(Bry_.new_u8(file_ext)), file_w, file_time, file_page);
|
||||
}
|
||||
public Xof_fsdb_itm Make__fsdb(boolean repo_is_commons, boolean file_is_orig, String file_ttl, Xof_ext file_ext, int file_w, double file_time, int file_page) {
|
||||
Xof_fsdb_itm itm = new Xof_fsdb_itm();
|
||||
itm.Init_by_wm_parse(wiki.Domain_itm().Abrv_xo(), repo_is_commons, file_is_thumb, Bry_.new_u8(file_ttl), file_w, file_time, file_page);
|
||||
itm.Init_by_wm_parse(wiki.Domain_itm().Abrv_xo(), repo_is_commons, file_is_orig, Bry_.new_u8(file_ttl), file_ext, file_w, file_time, file_page);
|
||||
return itm;
|
||||
}
|
||||
public Xosync_hdoc_parser__fxt Test__fsdb(Xof_fsdb_itm expd) {
|
||||
@@ -54,9 +60,10 @@ public class Xosync_hdoc_parser__fxt {
|
||||
return tmp_bfr.To_str_and_clear();
|
||||
}
|
||||
private static void To_bfr(Bry_bfr bfr, Xof_fsdb_itm itm) {
|
||||
bfr.Add_str_a7(itm.Orig_repo_id() == Xof_repo_itm_.Repo_remote ? "remote" : "local").Add_byte_pipe();
|
||||
bfr.Add_str_a7(itm.Orig_repo_id() == Xof_repo_tid_.Tid__remote ? "remote" : "local").Add_byte_pipe();
|
||||
bfr.Add_str_a7(itm.File_is_orig() ? "orig" : "thumb").Add_byte_pipe();
|
||||
bfr.Add(itm.Orig_ttl()).Add_byte_pipe();
|
||||
bfr.Add(itm.Orig_ext().Ext()).Add_byte_pipe();
|
||||
bfr.Add_int_variable(itm.File_w()).Add_byte_pipe();
|
||||
bfr.Add_double(itm.Lnki_time()).Add_byte_pipe();
|
||||
bfr.Add_int_variable(itm.Lnki_page()).Add_byte_pipe();
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.pages.syncs.core.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.pages.*; import gplx.xowa.addons.wikis.pages.syncs.*; import gplx.xowa.addons.wikis.pages.syncs.core.*;
|
||||
import org.junit.*;
|
||||
import gplx.langs.htmls.*;
|
||||
public class Xosync_hdoc_parser__misc__tst {
|
||||
@Before public void init() {fxt.Clear();} private final Xosync_hdoc_parser__fxt fxt = new Xosync_hdoc_parser__fxt();
|
||||
@Test public void Math() {
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("<img src='https://wikimedia.org/api/rest_v1/media/math/render/svg/596f8baf206a81478afd4194b44138715dc1a05c' class='mwe-math-fallback-image-inline' aria-hidden='true' style='vertical-align: -2.005ex; width:16.822ex; height:6.176ex;' alt='R_{H}=a\\left({\\frac {m}{3M}}\\right)^{\\frac {1}{3}}'>"))
|
||||
.Test__html(Gfh_utl.Replace_apos("<img src='xowa:/math/596f8baf206a81478afd4194b44138715dc1a05c.svg' class='mwe-math-fallback-image-inline' aria-hidden='true' style='vertical-align: -2.005ex; width:16.822ex; height:6.176ex;' alt='R_{H}=a\\left({\\frac {m}{3M}}\\right)^{\\frac {1}{3}}'>"))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.Y, "596f8baf206a81478afd4194b44138715dc1a05c.svg", -1, -1, -1));
|
||||
}
|
||||
}
|
||||
@@ -34,14 +34,6 @@ public class Xosync_hdoc_parser__tst {
|
||||
, "</h2>"
|
||||
));
|
||||
}
|
||||
@Test public void File() {
|
||||
fxt.Exec__parse(Gfh_utl.Replace_apos("<img src='//upload.wikimedia.org/wikipedia/commons/thumb/4/4a/Commons-logo.svg/12px-Commons-logo.svg.png'>"))
|
||||
.Test__html(Gfh_utl.Replace_apos("<img src='xowa:/file/commons.wikimedia.org/thumb/4/a/6/9/Commons-logo.svg/12px.png'>"))
|
||||
.Test__fsdb(fxt.Make__fsdb(Bool_.Y, Bool_.N, "Commons-logo.svg", 12, -1, -1));
|
||||
// https://upload.wikimedia.org/wikipedia/commons/thumb/f/fc/Papilio_dardanus_emerging.ogg/320px--Papilio_dardanus_emerging.ogg.jpg
|
||||
// https://upload.wikimedia.org/wikipedia/commons/thumb/3/30/Clip_from_My_Man_Godfrey.ogg/240px-seek%3D67-Clip_from_My_Man_Godfrey.ogg.jpg
|
||||
// https://upload.wikimedia.org/wikipedia/commons/thumb/7/7a/PL_Henryk_Sienkiewicz-Pisma_zapomniane_i_niewydane.djvu/page6-250px-PL_Henryk_Sienkiewicz-Pisma_zapomniane_i_niewydane.djvu.jpg
|
||||
}
|
||||
// @Test public void Smoke() {
|
||||
// fxt.Exec__parse(Io_mgr.Instance.LoadFilStr("C:\\xowa\\dev\\wm.updater.src.html"));
|
||||
// Io_mgr.Instance.SaveFilBry("C:\\xowa\\dev\\wm.updater.trg.html", fxt.Hdoc().Converted());
|
||||
|
||||
@@ -16,16 +16,18 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.pages.syncs.core.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.pages.*; import gplx.xowa.addons.wikis.pages.syncs.*; import gplx.xowa.addons.wikis.pages.syncs.core.*;
|
||||
import gplx.core.brys.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.files.repos.*;
|
||||
import gplx.core.brys.*; import gplx.core.btries.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.files.repos.*; import gplx.xowa.files.imgs.*;
|
||||
import gplx.langs.htmls.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import gplx.xowa.wikis.domains.*;
|
||||
public class Xosync_img_src_parser {
|
||||
private final Bry_rdr rdr = new Bry_rdr().Dflt_dlm_(Byte_ascii.Slash);
|
||||
private final Xof_url_bldr url_bldr = Xof_url_bldr.new_v2();
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
private final byte[] wiki_abrv_commons;
|
||||
|
||||
private Xoh_hdoc_ctx hctx;
|
||||
private byte path_tid;
|
||||
private byte[] img_src_bgn_local, img_src_bgn_remote;
|
||||
private byte[] page_url, repo_local;
|
||||
private byte[] raw;
|
||||
@@ -33,6 +35,7 @@ public class Xosync_img_src_parser {
|
||||
public boolean Repo_is_commons() {return repo_is_commons;} private boolean repo_is_commons;
|
||||
public byte[] File_ttl_bry() {return file_ttl_bry;} private byte[] file_ttl_bry;
|
||||
public boolean File_is_orig() {return file_is_orig;} private boolean file_is_orig;
|
||||
public Xof_ext File_ext() {return file_ext;} private Xof_ext file_ext;
|
||||
public int File_w() {return file_w;} private int file_w;
|
||||
public double File_time() {return file_time;} private double file_time;
|
||||
public int File_page() {return file_page;} private int file_page;
|
||||
@@ -40,14 +43,16 @@ public class Xosync_img_src_parser {
|
||||
|
||||
public Xosync_img_src_parser() {
|
||||
rdr.Err_wkr().Fail_throws_err_(false);
|
||||
img_src_bgn_remote = tmp_bfr.Add(Bry__xowa_src_bgn).Add(Xow_domain_itm_.Bry__commons).Add_byte_slash().To_bry_and_clear();
|
||||
img_src_bgn_remote = tmp_bfr.Add(Bry__xowa_file).Add(Xow_domain_itm_.Bry__commons).Add_byte_slash().To_bry_and_clear();
|
||||
wiki_abrv_commons = Xow_abrv_xo_.To_bry(Xow_domain_itm_.Bry__commons);
|
||||
}
|
||||
public void Init_by_page(Xoh_hdoc_ctx hctx) {
|
||||
this.hctx = hctx;
|
||||
this.page_url = hctx.Page__url();
|
||||
this.path_tid = Path__unknown;
|
||||
this.repo_local = To_wmf_repo_or_null(tmp_bfr, hctx.Wiki__domain_itm());
|
||||
if (repo_local == null) Gfo_usr_dlg_.Instance.Warn_many("", "", "unsupported wmf repo; domain=~{0}", hctx.Wiki__domain_itm().Domain_bry());
|
||||
img_src_bgn_local = tmp_bfr.Add(Bry__xowa_src_bgn).Add(hctx.Wiki__domain_bry()).Add_byte_slash().To_bry_and_clear(); // EX: "xowa:/file/en.wikipedia.org/"
|
||||
img_src_bgn_local = tmp_bfr.Add(Bry__xowa_file).Add(hctx.Wiki__domain_bry()).Add_byte_slash().To_bry_and_clear(); // EX: "xowa:/file/en.wikipedia.org/"
|
||||
}
|
||||
public boolean Parse(byte[] raw) {
|
||||
// init
|
||||
@@ -60,8 +65,14 @@ public class Xosync_img_src_parser {
|
||||
rdr.Init_by_src(raw);
|
||||
|
||||
// check "//upload.wikimedia.org/" at bgn
|
||||
if (!rdr.Is(Bry__upload_wikimedia_org)) return Fail("url does not start with //upload.wikimedia.org");
|
||||
|
||||
this.path_tid = rdr.Chk_or(path_trie, Path__unknown);
|
||||
switch (path_tid) {
|
||||
case Path__file: return Parse_file(raw_len);
|
||||
case Path__math: return Parse_math(raw_len);
|
||||
default: return Fail("img src does not start with known sequence");
|
||||
}
|
||||
}
|
||||
private boolean Parse_file(int raw_len) {
|
||||
// get repo: either "wikipedia/commons/" or "wiki_type/wiki_lang/"; EX:"wiktionary/fr"
|
||||
if (rdr.Is(Bry__repo_remote))
|
||||
this.repo_is_commons = true;
|
||||
@@ -80,35 +91,74 @@ public class Xosync_img_src_parser {
|
||||
int file_ttl_end = rdr.Find_fwd_lr_or(Byte_ascii.Slash, raw_len);
|
||||
file_ttl_bry = Bry_.Mid(raw, file_ttl_bgn, file_ttl_end);
|
||||
file_ttl_bry = gplx.langs.htmls.encoders.Gfo_url_encoder_.Http_url.Decode(file_ttl_bry); // NOTE: @src is always url-encoded; file_ttl_bry is un-encoded (for MD5, database lookups, etc.)
|
||||
this.file_ext = Xof_ext_.new_by_ttl_(file_ttl_bry);
|
||||
if (file_ext.Id_is_ogg()) file_ext = Xof_ext_.new_by_id_(Xof_ext_.Id_ogv);
|
||||
|
||||
// if thumb, get width, time, page
|
||||
|
||||
// if thumb, get file_w, file_time, file_page
|
||||
if (!file_is_orig) {
|
||||
// if "page", then file_page exists; EX: // "page1-320px"
|
||||
if (rdr.Is(Bry__page)) {
|
||||
int file_page_bgn = rdr.Pos();
|
||||
int file_page_end = rdr.Find_fwd_lr(Byte_ascii.Dash);
|
||||
file_page = Bry_.To_int_or_fail(raw, file_page_bgn, file_page_end);
|
||||
}
|
||||
|
||||
// get file_w; EX: "320px-"
|
||||
int file_w_bgn = rdr.Pos();
|
||||
int file_w_end = rdr.Find_fwd_lr(Bry__px);
|
||||
if (file_w_end == -1) return Fail("missing px");
|
||||
file_w = Bry_.To_int_or(raw, file_w_bgn, file_w_end, -1);
|
||||
if (file_w == -1) return Fail("invalid file_w");
|
||||
|
||||
// get time via "-seek%3D"; EX: "320px-seek%3D67-"
|
||||
int seek_end = rdr.Find_fwd_rr(Bry__seek);
|
||||
if (seek_end != Bry_find_.Not_found) {
|
||||
int file_time_bgn = rdr.Pos();
|
||||
int file_time_end = rdr.Find_fwd_lr(Byte_ascii.Dash);
|
||||
file_time = Bry_.To_double(raw, file_time_bgn, file_time_end);
|
||||
}
|
||||
}
|
||||
|
||||
// make image
|
||||
Xof_fsdb_itm itm = new Xof_fsdb_itm();
|
||||
itm.Init_by_wm_parse(hctx.Wiki__domain_itm().Abrv_xo(), repo_is_commons, file_is_orig, file_ttl_bry, file_w, file_time, file_page);
|
||||
hctx.Page().Hdump_mgr().Imgs().Add(itm);
|
||||
|
||||
Add_img(hctx.Wiki__domain_itm().Abrv_xo());
|
||||
return true;
|
||||
}
|
||||
private boolean Parse_math(int raw_len) {
|
||||
// set file_ttl_bry to rest of src + ".svg"; EX: "https://wikimedia.org/api/rest_v1/media/math/render/svg/596f8baf206a81478afd4194b44138715dc1a05c" -> "596f8baf206a81478afd4194b44138715dc1a05c.svg"
|
||||
this.file_ttl_bry = Bry_.Add(Bry_.Mid(raw, rdr.Pos(), raw_len), Byte_ascii.Dot_bry, Xof_ext_.Bry_svg);
|
||||
this.repo_is_commons = true;
|
||||
this.file_is_orig = true;
|
||||
this.file_ext = Xof_ext_.new_by_id_(Xof_ext_.Id_svg);
|
||||
|
||||
Add_img(wiki_abrv_commons);
|
||||
return true;
|
||||
}
|
||||
private void Add_img(byte[] wiki_abrv) {
|
||||
Xof_fsdb_itm itm = new Xof_fsdb_itm();
|
||||
hctx.Page().Hdump_mgr().Imgs().Add(itm);
|
||||
itm.Init_by_wm_parse(wiki_abrv, repo_is_commons, file_is_orig, file_ttl_bry, file_ext, file_w, file_time, file_page);
|
||||
}
|
||||
public byte[] To_bry() {
|
||||
To_bfr(tmp_bfr);
|
||||
switch (path_tid) {
|
||||
case Path__file: To_bfr_file(tmp_bfr); break;
|
||||
case Path__math: To_bfr_math(tmp_bfr); break;
|
||||
}
|
||||
return tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
public void To_bfr(Bry_bfr bfr) { // EX:'xowa:file/commons.wikimedia.org/thumb/7/0/1/c/A.png/220px.png'
|
||||
// init repo; either "xowa:file/commons.wikimedia.org" or "xowa:file/en.wikipedia.org"
|
||||
url_bldr.Init_by_root(repo_is_commons ? img_src_bgn_remote : img_src_bgn_local, Bool_.N, Byte_ascii.Slash, Bool_.N, Bool_.N, 4);
|
||||
private void To_bfr_file(Bry_bfr bfr) { // EX:'xowa:/file/commons.wikimedia.org/thumb/7/0/1/c/A.png/220px.png'
|
||||
// init repo; either "xowa:/file/commons.wikimedia.org" or "xowa:/file/en.wikipedia.org"
|
||||
byte repo_tid = repo_is_commons ? Xof_repo_tid_.Tid__remote : Xof_repo_tid_.Tid__local;
|
||||
byte[] fsys_root = repo_is_commons ? img_src_bgn_remote : img_src_bgn_local;
|
||||
url_bldr.Init_by_repo(repo_tid, fsys_root, Bool_.N, Byte_ascii.Slash, Bool_.N, Bool_.N, 4);
|
||||
|
||||
// set other props and generate url;
|
||||
url_bldr.Init_by_itm(file_is_orig ? Xof_repo_itm_.Mode_orig : Xof_repo_itm_.Mode_thumb, gplx.langs.htmls.encoders.Gfo_url_encoder_.Http_url.Encode(file_ttl_bry), Xof_file_wkr_.Md5(file_ttl_bry), Xof_ext_.new_by_ttl_(file_ttl_bry), file_w, file_time, file_page);
|
||||
url_bldr.Init_by_itm(file_is_orig ? Xof_img_mode_.Tid__orig : Xof_img_mode_.Tid__thumb, gplx.langs.htmls.encoders.Gfo_url_encoder_.Http_url.Encode(file_ttl_bry), Xof_file_wkr_.Md5(file_ttl_bry), Xof_ext_.new_by_ttl_(file_ttl_bry), file_w, file_time, file_page);
|
||||
bfr.Add(url_bldr.Xto_bry());
|
||||
}
|
||||
private void To_bfr_math(Bry_bfr bfr) { // EX:'xowa:/math/596f8baf206a81478afd4194b44138715dc1a05c
|
||||
bfr.Add(Bry__xowa_math).Add(file_ttl_bry);
|
||||
}
|
||||
private void Clear() {
|
||||
this.file_ttl_bry = null;
|
||||
this.repo_is_commons = false;
|
||||
@@ -136,12 +186,23 @@ public class Xosync_img_src_parser {
|
||||
}
|
||||
|
||||
private static final byte[]
|
||||
Bry__upload_wikimedia_org = Bry_.new_a7("//upload.wikimedia.org/")
|
||||
, Bry__repo_remote = Bry_.new_a7("wikipedia/commons/")
|
||||
Bry__repo_remote = Bry_.new_a7("wikipedia/commons/")
|
||||
, Bry__thumb = Bry_.new_a7("thumb/")
|
||||
, Bry__px = Bry_.new_a7("px")
|
||||
, Bry__seek = Bry_.new_a7("-seek%3D")
|
||||
, Bry__page = Bry_.new_a7("page")
|
||||
;
|
||||
public static final byte Path__unknown = 0, Path__file = 1, Path__math = 2;
|
||||
private final Btrie_slim_mgr path_trie = Btrie_slim_mgr.cs()
|
||||
.Add_str_byte("//upload.wikimedia.org/", Path__file)
|
||||
.Add_str_byte("https://wikimedia.org/api/rest_v1/media/math/render/svg/", Path__math)
|
||||
;
|
||||
|
||||
public static final byte[] Bry__xowa_file = Bry_.new_a7("xowa:/file/"), Bry__xowa_math = Bry_.new_a7("xowa:/math/");
|
||||
public static Btrie_slim_mgr Src_xo_trie = Btrie_slim_mgr.cs()
|
||||
.Add_bry_byte(Bry__xowa_file, Path__file)
|
||||
.Add_bry_byte(Bry__xowa_math, Path__math)
|
||||
;
|
||||
public static final byte[] Bry__xowa_src_bgn = Bry_.new_a7("xowa:/file/");
|
||||
|
||||
private static byte[] To_wmf_repo_or_null(Bry_bfr bfr, Xow_domain_itm domain_itm) {
|
||||
// add type; EX: "fr.wiktionary.org" -> "wiktionary/"
|
||||
@@ -164,4 +225,10 @@ public class Xosync_img_src_parser {
|
||||
bfr.Add(domain_itm.Lang_orig_key()).Add_byte_slash();
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
public static Xof_ext Ext_by_ttl(byte[] file_ttl_bry, byte repo_tid) {
|
||||
Xof_ext rv = Xof_ext_.new_by_ttl_(file_ttl_bry);
|
||||
if (rv.Id_is_ogg()) rv = Xof_ext_.new_by_id_(Xof_ext_.Id_ogv);
|
||||
if (repo_tid == Xof_repo_tid_.Tid__math) rv = Xof_ext_.new_by_id_(Xof_ext_.Id_svg);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user