1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Embeddable: Create core dbs in proper subdirectory

This commit is contained in:
gnosygnu
2017-10-23 20:50:50 -04:00
parent 1336d44f34
commit 66877212bf
4537 changed files with 311750 additions and 0 deletions

View File

@@ -13,3 +13,7 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.parsers.mediawikis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.parsers.*;
public interface Xop_mediawiki_loader {
String LoadWikitext(String page);
}

View File

@@ -13,3 +13,54 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.parsers.mediawikis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.parsers.*;
public class Xop_mediawiki_mgr {
private final Xoae_app app;
private boolean mode_is_prod;
public Xop_mediawiki_mgr(String root_str, boolean mode_is_prod) {
Gfo_usr_dlg usr_dlg = Xoa_app_.New__usr_dlg__console();
Gfo_usr_dlg_.Instance = usr_dlg;
Io_url root_dir = Io_url_.new_dir_(root_str);
this.mode_is_prod = mode_is_prod;
if (mode_is_prod) {
gplx.dbs.Db_conn_bldr.Instance.Reg_default_sqlite();
gplx.core.envs.Env_.Init_swt(String_.Ary_empty, Type_.Type_by_obj(this)); // must call Init else unit_testing will be true
}
this.app = new Xoae_app(usr_dlg, gplx.xowa.apps.Xoa_app_mode.Itm_cmd
, root_dir
, root_dir.GenSubDir("wiki")
, root_dir.GenSubDir("file")
, root_dir.GenSubDir("user")
, root_dir.GenSubDir_nest("user", "anonymous", "wiki")
, gplx.xowa.apps.boots.Xoa_cmd_arg_mgr.Bin_dir_name()
);
if (mode_is_prod) {
app.Init_by_app();
app.Stage_(gplx.xowa.apps.Xoa_stage_.Tid_launch); // must set to Launch, else wiki.init_needed will never be false; DATE:2017-01-26
}
}
public Xop_mediawiki_wkr Make(String domain_str) {return Make(domain_str, null);}
public Xop_mediawiki_wkr Make(String domain_str, Xop_mediawiki_loader loader) {
Xowe_wiki wiki = (Xowe_wiki)app.Wiki_mgr().Make(Bry_.new_u8(domain_str), app.Fsys_mgr().Wiki_dir().GenSubDir(domain_str));
if (mode_is_prod) {
wiki.Embeddable_enabled_(true); // must mark wiki as embeddable, else orig_mgr will load wkrs which will download images DATE:2017-10-23
wiki.Init_by_wiki();
// init setup data; xowa_cfg|interwikimap and ns_msg; DATE:2017-10-23
if (gplx.xowa.wikis.data.Xow_db_file__core_.Find_core_fil_or_null(wiki) == null) { // only run if file does not exist
Xowe_wiki_.Create(wiki, 0, "embeddeable_parser");
wiki.App().Site_cfg_mgr().Load(wiki); // load interwikimap et al from WM API
wiki.Db_mgr_as_sql().Core_data_mgr().Db__core().Tbl__ns().Insert(wiki.Ns_mgr()); // save ns to xowa_ns
}
wiki.File_mgr().Version_2_y_(); // must set to version_2 else video files will use old v1 Meta_code; DATE:2017-01-26
wiki.File_mgr().Fsdb_mode().Tid__v2__mp__y_(); // must set to mass_parse mode, else will use old v1 Meta_code for xfer_itm and url_bldr; DATE:2017-01-26
}
return new Xop_mediawiki_wkr(wiki, loader);
}
public static Xop_mediawiki_mgr New(String root_str) {
return new Xop_mediawiki_mgr(root_str, true);
}
}

View File

@@ -13,3 +13,69 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.parsers.mediawikis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.parsers.*;
import gplx.xowa.wikis.*; import gplx.xowa.parsers.*; import gplx.xowa.wikis.pages.*; import gplx.xowa.htmls.core.htmls.*;
import gplx.xowa.wikis.caches.*;
import gplx.xowa.addons.wikis.ctgs.htmls.pageboxs.*;
public class Xop_mediawiki_wkr {
private final Xowe_wiki wiki;
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
public Xop_mediawiki_wkr(Xowe_wiki wiki, Xop_mediawiki_loader loader) {
this.wiki = wiki;
this.Loader_(loader);
}
public void Loader_(Xop_mediawiki_loader loader) {
if (loader != null)
wiki.Cache_mgr().Load_wkr_(new Xow_page_cache_wkr__embeddable(wiki, loader));
}
public void Free_memory() {
wiki.Cache_mgr().Tmpl_result_cache().Clear();
wiki.Cache_mgr().Free_mem__page();
wiki.Parser_mgr().Scrib().Core_term();
wiki.Appe().Wiki_mgr().Wdata_mgr().Clear();
}
public void Clear_cache(String page) {
Xoa_ttl ttl = wiki.Ttl_parse(Bry_.new_u8(page));
wiki.Cache_mgr().Page_cache().Del(ttl.Full_db());
}
public String Parse(String page, String wikitext) {
Xoa_ttl ttl = wiki.Ttl_parse(Bry_.new_u8(page));
byte[] wtxt = Bry_.new_u8(wikitext);
Xoae_page wpg = Xoae_page.New(wiki, ttl);
wpg.Db().Text().Text_bry_(wtxt);
Xow_parser_mgr parser_mgr = wiki.Parser_mgr();
// parse page
Xop_ctx pctx = parser_mgr.Ctx();
pctx.Clear_all();
parser_mgr.Parse(wpg, true);
// write to html
boolean is_wikitext = Xow_page_tid.Identify(wpg.Wiki().Domain_tid(), ttl.Ns().Id(), ttl.Page_db()) == Xow_page_tid.Tid_wikitext;
byte[] orig_bry = Bry_.Empty;
if (is_wikitext) {
wiki.Html_mgr().Page_wtr_mgr().Wkr(Xopg_page_.Tid_read).Write_hdump(tmp_bfr, pctx, Xoh_wtr_ctx.Hdump, wpg);
// write categories
int ctgs_len = wpg.Wtxt().Ctgs__len();
if ( ctgs_len > 0 // skip if no categories found while parsing wikitext
) {
Xoctg_pagebox_itm[] pagebox_itms = new Xoctg_pagebox_itm[ctgs_len];
for (int i = 0; i < ctgs_len; i++) {
pagebox_itms[i] = new Xoctg_pagebox_itm(wpg.Wtxt().Ctgs__get_at(i));
}
wiki.Ctg__pagebox_wtr().Write_pagebox(tmp_bfr, wiki, wpg, pagebox_itms);
}
orig_bry = tmp_bfr.To_bry_and_clear();
wpg.Db().Html().Html_bry_(orig_bry);
}
else { // not wikitext; EX: pages in MediaWiki: ns; DATE:2016-09-12
wpg.Db().Html().Html_bry_(wpg.Db().Text().Text_bry());
}
return String_.new_u8(orig_bry);
}
}

View File

@@ -13,3 +13,42 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.parsers.mediawikis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.parsers.*;
import org.junit.*; import gplx.core.tests.*;
public class Xop_mediawiki_wkr__tst {
private final Xop_mediawiki_wkr__fxt fxt = new Xop_mediawiki_wkr__fxt();
@After public void term() {Gfo_usr_dlg_.Instance = Gfo_usr_dlg_.Noop;}
@Test public void Basic() {
fxt.Init__wkr("en.wikipedia.org", null);
fxt.Test__parse("Page_1", "''{{PAGENAME}}''"
, "<p><i>Page 1</i>"
, "</p>"
);
}
@Test public void Template() {
fxt.Init__wkr("en.wikipedia.org", new Xop_mediawiki_loader__mock());
fxt.Test__parse("Page_1", "{{bold}}"
, "<p><b>bold</b>"
, "</p>"
);
}
}
class Xop_mediawiki_wkr__fxt {
private final Xop_mediawiki_mgr mgr = new Xop_mediawiki_mgr("mem/xowa/wiki/en.wikipedia.org/", false);
private Xop_mediawiki_wkr wkr;
public Xop_mediawiki_wkr__fxt() {
gplx.dbs.Db_conn_bldr.Instance.Reg_default_mem();
}
public void Init__wkr(String wiki, Xop_mediawiki_loader cbk) {
this.wkr = mgr.Make(wiki, cbk);
}
public void Test__parse(String page, String wtxt, String... expd) {
Gftest.Eq__ary__lines(String_.Concat_lines_nl_skip_last(expd), wkr.Parse(page, wtxt), "parse failed; wtxt={0}", wtxt);
}
}
class Xop_mediawiki_loader__mock implements Xop_mediawiki_loader {
public String LoadWikitext(String page) {
if (String_.Eq(page, "Template:Bold")) return "'''bold'''";
else return "text";
}
}

View File

@@ -13,3 +13,33 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.parsers.mediawikis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.parsers.*;
import gplx.xowa.parsers.utils.*;
class Xow_page_cache_wkr__embeddable implements gplx.xowa.wikis.caches.Xow_page_cache_wkr {
private final Xop_mediawiki_loader cbk;
private final Xop_redirect_mgr redirect_mgr;
public Xow_page_cache_wkr__embeddable(Xowe_wiki wiki, Xop_mediawiki_loader cbk) {
this.cbk = cbk;
this.redirect_mgr = new Xop_redirect_mgr(wiki);
}
public byte[] Get_page_or_null(byte[] full_db) {
byte[] wikitext = null;
// loop to handle redirects; DATE:2017-05-29
int loops = 0;
while (loops++ < 5) {
wikitext = Bry_.new_u8(cbk.LoadWikitext(String_.new_u8(full_db)));
Xoa_ttl redirect_ttl = redirect_mgr.Extract_redirect(wikitext);
// not a redirect; exit loop
if (redirect_ttl == null) {
break;
}
// redirect; update title and continue;
else {
full_db = redirect_ttl.Full_db();
continue;
}
}
return wikitext;
}
}