1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-09-29 06:50:50 +00:00

Embeddable: Add page loader

This commit is contained in:
gnosygnu 2016-11-10 15:41:24 -05:00
parent 9c95e2d470
commit 08d6cc3eb9
7 changed files with 130 additions and 18 deletions

View File

@ -0,0 +1,21 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.parsers.mediawikis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.parsers.*;
public interface Xop_mediawiki_loader {
String LoadWikitext(String page);
}

View File

@ -32,8 +32,9 @@ public class Xop_mediawiki_mgr {
, gplx.xowa.apps.boots.Xoa_cmd_arg_mgr.Bin_dir_name() , gplx.xowa.apps.boots.Xoa_cmd_arg_mgr.Bin_dir_name()
); );
} }
public Xop_mediawiki_wkr Make(String domain_str) { public Xop_mediawiki_wkr Make(String domain_str) {return Make(domain_str, null);}
public Xop_mediawiki_wkr Make(String domain_str, Xop_mediawiki_loader loader) {
Xowe_wiki wiki = (Xowe_wiki)app.Wiki_mgr().Make(Bry_.new_u8(domain_str), app.Fsys_mgr().Wiki_dir()); Xowe_wiki wiki = (Xowe_wiki)app.Wiki_mgr().Make(Bry_.new_u8(domain_str), app.Fsys_mgr().Wiki_dir());
return new Xop_mediawiki_wkr(wiki); return new Xop_mediawiki_wkr(wiki, loader);
} }
} }

View File

@ -17,11 +17,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
package gplx.xowa.addons.parsers.mediawikis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.parsers.*; package gplx.xowa.addons.parsers.mediawikis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.parsers.*;
import gplx.xowa.wikis.*; import gplx.xowa.parsers.*; import gplx.xowa.wikis.pages.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.wikis.*; import gplx.xowa.parsers.*; import gplx.xowa.wikis.pages.*; import gplx.xowa.htmls.core.htmls.*;
import gplx.xowa.wikis.caches.*;
public class Xop_mediawiki_wkr { public class Xop_mediawiki_wkr {
private final Xowe_wiki wiki; private final Xowe_wiki wiki;
private final Bry_bfr tmp_bfr = Bry_bfr_.New(); private final Bry_bfr tmp_bfr = Bry_bfr_.New();
public Xop_mediawiki_wkr(Xowe_wiki wiki) { public Xop_mediawiki_wkr(Xowe_wiki wiki, Xop_mediawiki_loader loader) {
this.wiki = wiki; this.wiki = wiki;
this.Loader_(loader);
}
public void Loader_(Xop_mediawiki_loader loader) {
if (loader != null)
wiki.Cache_mgr().Page_cache().Load_wkr_(new Xow_page_cache_wkr__embeddable(loader));
} }
public String Parse(String page, String wikitext) { public String Parse(String page, String wikitext) {
Xoa_ttl ttl = wiki.Ttl_parse(Bry_.new_u8(page)); Xoa_ttl ttl = wiki.Ttl_parse(Bry_.new_u8(page));
@ -37,7 +43,6 @@ public class Xop_mediawiki_wkr {
pctx.Clear_all(); pctx.Clear_all();
parser_mgr.Parse(wpg, true); parser_mgr.Parse(wpg, true);
// write to html // write to html
boolean is_wikitext = Xow_page_tid.Identify(wpg.Wiki().Domain_tid(), ttl.Ns().Id(), ttl.Page_db()) == Xow_page_tid.Tid_wikitext; boolean is_wikitext = Xow_page_tid.Identify(wpg.Wiki().Domain_tid(), ttl.Ns().Id(), ttl.Page_db()) == Xow_page_tid.Tid_wikitext;
byte[] orig_bry = Bry_.Empty; byte[] orig_bry = Bry_.Empty;

View File

@ -20,20 +20,33 @@ import org.junit.*; import gplx.core.tests.*;
public class Xop_mediawiki_wkr__tst { public class Xop_mediawiki_wkr__tst {
private final Xop_mediawiki_wkr__fxt fxt = new Xop_mediawiki_wkr__fxt(); private final Xop_mediawiki_wkr__fxt fxt = new Xop_mediawiki_wkr__fxt();
@Test public void Basic() { @Test public void Basic() {
fxt.Init__wkr("en.wikipedia.org"); fxt.Init__wkr("en.wikipedia.org", null);
fxt.Test__parse("Page_1", "''{{PAGENAME}}''" fxt.Test__parse("Page_1", "''{{PAGENAME}}''"
, "<p><i>Page 1</i>" , "<p><i>Page 1</i>"
, "</p>" , "</p>"
); );
} }
@Test public void Template() {
fxt.Init__wkr("en.wikipedia.org", new Xop_mediawiki_loader__mock());
fxt.Test__parse("Page_1", "{{bold}}"
, "<p><b>bold</b>"
, "</p>"
);
}
} }
class Xop_mediawiki_wkr__fxt { class Xop_mediawiki_wkr__fxt {
private final Xop_mediawiki_mgr mgr = new Xop_mediawiki_mgr("mem/xowa/wiki/en.wikipedia.org/"); private final Xop_mediawiki_mgr mgr = new Xop_mediawiki_mgr("mem/xowa/wiki/en.wikipedia.org/");
private Xop_mediawiki_wkr wkr; private Xop_mediawiki_wkr wkr;
public void Init__wkr(String wiki) { public void Init__wkr(String wiki, Xop_mediawiki_loader cbk) {
this.wkr = mgr.Make(wiki); this.wkr = mgr.Make(wiki, cbk);
} }
public void Test__parse(String page, String wtxt, String... expd) { public void Test__parse(String page, String wtxt, String... expd) {
Gftest.Eq__ary__lines(String_.Concat_lines_nl_skip_last(expd), wkr.Parse(page, wtxt), "parse failed; wtxt={0}", wtxt); Gftest.Eq__ary__lines(String_.Concat_lines_nl_skip_last(expd), wkr.Parse(page, wtxt), "parse failed; wtxt={0}", wtxt);
} }
} }
class Xop_mediawiki_loader__mock implements Xop_mediawiki_loader {
public String LoadWikitext(String page) {
if (String_.Eq(page, "Template:Bold")) return "'''bold'''";
else return "text";
}
}

View File

@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.parsers.mediawikis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.parsers.*;
class Xow_page_cache_wkr__embeddable implements gplx.xowa.wikis.caches.Xow_page_cache_wkr {
private final Xop_mediawiki_loader cbk;
public Xow_page_cache_wkr__embeddable(Xop_mediawiki_loader cbk) {
this.cbk = cbk;
}
public byte[] Get_page_or_null(byte[] full_db) {
return Bry_.new_u8(cbk.LoadWikitext(String_.new_u8(full_db)));
}
}

View File

@ -19,7 +19,10 @@ package gplx.xowa.wikis.caches; import gplx.*; import gplx.xowa.*; import gplx.x
public class Xow_page_cache { public class Xow_page_cache {
private final Xowe_wiki wiki; private final Xowe_wiki wiki;
private final Hash_adp_bry cache = Hash_adp_bry.cs(); // NOTE: wiki titles are not case-sensitive when ns is "1st-letter" (EX: w:earth an w:Earth); in these cases, two entries will be stored private final Hash_adp_bry cache = Hash_adp_bry.cs(); // NOTE: wiki titles are not case-sensitive when ns is "1st-letter" (EX: w:earth an w:Earth); in these cases, two entries will be stored
private Xow_page_cache_wkr load_wkr;
public Xow_page_cache(Xowe_wiki wiki) {this.wiki = wiki;} public Xow_page_cache(Xowe_wiki wiki) {this.wiki = wiki;}
public Xow_page_cache_wkr Load_wkr() {return load_wkr;}
public void Load_wkr_(Xow_page_cache_wkr v) {this.load_wkr = v;}
public byte[] Get_or_load_as_src(Xoa_ttl ttl) { public byte[] Get_or_load_as_src(Xoa_ttl ttl) {
Xow_page_cache_itm rv = Get_or_load_as_itm(ttl); Xow_page_cache_itm rv = Get_or_load_as_itm(ttl);
return rv == null ? null : rv.Wtxt__direct(); return rv == null ? null : rv.Wtxt__direct();
@ -30,11 +33,33 @@ public class Xow_page_cache {
public Xow_page_cache_itm Get_or_load_as_itm(Xoa_ttl ttl) { public Xow_page_cache_itm Get_or_load_as_itm(Xoa_ttl ttl) {
byte[] ttl_full_db = ttl.Full_db(); byte[] ttl_full_db = ttl.Full_db();
Xow_page_cache_itm rv = (Xow_page_cache_itm)cache.Get_by_bry(ttl_full_db); Xow_page_cache_itm rv = (Xow_page_cache_itm)cache.Get_by_bry(ttl_full_db);
if (rv == Xow_page_cache_itm.Missing) return null; if (rv == Xow_page_cache_itm.Missing) {
return null;
}
else if (rv == null) { else if (rv == null) {
return Load_page(ttl, ttl_full_db);
}
return rv;
}
private Xow_page_cache_itm Load_page(Xoa_ttl ttl, byte[] ttl_full_db) {
Xow_page_cache_itm rv = null;
Xoa_ttl page_ttl = ttl;
boolean page_exists = false;
byte[] page_text = null;
byte[] page_redirect_from = null;
if (load_wkr != null) {
page_text = load_wkr.Get_page_or_null(ttl_full_db);
page_exists = page_text != null;
}
if (page_text == null) {
Xoae_page page = wiki.Data_mgr().Load_page_by_ttl(ttl); // NOTE: do not call Db_mgr.Load_page; need to handle redirects Xoae_page page = wiki.Data_mgr().Load_page_by_ttl(ttl); // NOTE: do not call Db_mgr.Load_page; need to handle redirects
if (page.Db().Page().Exists()) { page_ttl = page.Ttl();
rv = new Xow_page_cache_itm(page.Ttl(), page.Db().Text().Text_bry(), page.Redirect_trail().Itms__get_wtxt_at_0th_or_null()); page_text = page.Db().Text().Text_bry();
page_exists = page.Db().Page().Exists();
page_redirect_from = page.Redirect_trail().Itms__get_wtxt_at_0th_or_null();
}
if (page_exists) {
rv = new Xow_page_cache_itm(page_ttl, page_text, page_redirect_from);
synchronized (this) { // LOCK:high-usage;DATE:2016-07-14 synchronized (this) { // LOCK:high-usage;DATE:2016-07-14
cache.Add_bry_obj(ttl_full_db, rv); cache.Add_bry_obj(ttl_full_db, rv);
} }
@ -45,7 +70,6 @@ public class Xow_page_cache {
rv = null; rv = null;
} }
} }
}
return rv; return rv;
} }
public Xow_page_cache_itm Get_or_load_as_itm_2(Xoa_ttl ttl) { // NOTE: same as Get_or_load_as_itm, but handles redirects to missing pages; DATE:2016-05-02 public Xow_page_cache_itm Get_or_load_as_itm_2(Xoa_ttl ttl) { // NOTE: same as Get_or_load_as_itm, but handles redirects to missing pages; DATE:2016-05-02

View File

@ -0,0 +1,21 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.wikis.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.wikis.*;
public interface Xow_page_cache_wkr {
byte[] Get_page_or_null(byte[] full_db);
}