HTML Databases: Show redlinks for htxt [#320]

pull/620/head
gnosygnu 5 years ago
parent 4c47bb8793
commit cd56234e28

@ -14,21 +14,54 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.langs.htmls.docs.*;
import gplx.xowa.wikis.ttls.*;
import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*;
import gplx.langs.htmls.docs.*; import gplx.langs.htmls.encoders.*;
import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*; import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*;
import gplx.xowa.wikis.ttls.*;
public class Xoh_hdoc_wkr__make implements Xoh_hdoc_wkr {
private Xoh_hzip_bfr bfr; private Xoh_page hpg; private Xoh_hdoc_ctx hctx; private byte[] src;
private final Xoh_hdr_wtr wkr__hdr = new Xoh_hdr_wtr();
private final Xoh_img_wtr wkr__img = new Xoh_img_wtr();
private final Xoh_img_wtr wkr__img = new Xoh_img_wtr();
private int html_uid;
public void On_new_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
this.bfr = bfr; this.hpg = hpg; this.hctx = hctx; this.src = src;
this.html_uid = 0;
}
public void On_txt(int rng_bgn, int rng_end) {
// text; just add it
bfr.Add_mid(src, rng_bgn, rng_end);
}
public void On_escape(gplx.xowa.htmls.core.wkrs.escapes.Xoh_escape_data data) {
// hzip escape byte ((byte)27); should never happen but if it does, add it
bfr.Add(data.Hook());
}
public void On_xnde(gplx.xowa.htmls.core.wkrs.xndes.Xoh_xnde_parser data) {
// regular xml node; just add it
bfr.Add_mid(src, data.Src_bgn(), data.Src_end());
}
public void On_lnki(gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_data data) {
// <a> node
// handle "#"
if (data.Href_itm().Tid() == Xoh_anch_href_data.Tid__anch) {
bfr.Add_mid(src, data.Src_bgn(), data.Src_end());
return;
}
// increment html_uid and add "id=xolnki_"
byte[] ttl_bry = data.Href_itm().Ttl_page_db();
this.html_uid = Lnki_redlink_reg(hpg, hctx, ttl_bry, html_uid);
int src_bgn_lhs = data.Src_bgn();
int src_bgn_rhs = src_bgn_lhs + 3; // +3 to skip over "<a "
if (Bry_.Match(src, src_bgn_lhs, src_bgn_rhs, Bry__a__bgn)) {
bfr.Add(Bry__a__id);
bfr.Add_int_variable(html_uid);
bfr.Add_byte_quote().Add_byte_space();
bfr.Add_mid(src, src_bgn_rhs, data.Src_end());
}
else {
bfr.Add_mid(src, data.Src_bgn(), data.Src_end());
Gfo_usr_dlg_.Instance.Warn_many("", "", "anchor hook should start with <a; url=~{0}", hpg.Url_bry_safe());
}
}
public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
public void On_escape (gplx.xowa.htmls.core.wkrs.escapes.Xoh_escape_data data) {bfr.Add(data.Hook());}
public void On_xnde (gplx.xowa.htmls.core.wkrs.xndes.Xoh_xnde_parser data) {bfr.Add_mid(src, data.Src_bgn(), data.Src_end());}
public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_data data) {bfr.Add_mid(src, data.Src_bgn(), data.Src_end());}
public void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_data data) {
Xoh_img_data img_data = (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_data)data.Img_data();
bfr.Add_mid(src, data.Src_bgn(), img_data.Src_bgn());
@ -50,4 +83,18 @@ public class Xoh_hdoc_wkr__make implements Xoh_hdoc_wkr {
}
return true;
}
public static int Lnki_redlink_reg(Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] href_bry, int html_uid) {
if (hctx.Mode_is_diff()) return html_uid; // PERF: don't do redlinks during hzip_diff
try {
Xoa_ttl ttl = hpg.Wiki().Ttl_parse(Gfo_url_encoder_.Href.Decode(href_bry));
Xopg_lnki_itm__hdump lnki_itm = new Xopg_lnki_itm__hdump(ttl);
hpg.Html_data().Redlink_list().Add(lnki_itm);
return lnki_itm.Html_uid();
}
catch (Exception e) {
Gfo_log_.Instance.Warn("failed to add lnki to redlinks", "page", hpg.Url_bry_safe(), "href_bry", href_bry, "e", Err_.Message_gplx_log(e));
return html_uid;
}
}
private static final byte[] Bry__a__bgn = Bry_.new_a7("<a "), Bry__a__id = Bry_.new_a7("<a id=\"xolnki_");
}

@ -0,0 +1,58 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.core.tests.*;
import gplx.xowa.files.caches.*;
import gplx.langs.htmls.*; import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.makes.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.wikis.pages.*;
import gplx.xowa.wikis.pages.lnkis.*;
public class Xoh_htxt_fxt {
private final Xowe_wiki wiki;
private final Xop_fxt parser_fxt = new Xop_fxt();
private final Xoh_page hpg = new Xoh_page();
private final Xoh_make_mgr make_mgr = new Xoh_make_mgr();
public Xoh_htxt_fxt() {
this.wiki = parser_fxt.Wiki();
Xoa_app_fxt.repo2_(parser_fxt.App(), wiki); // needed else will be old "mem/wiki/repo/trg/thumb/" instead of standard "mem/file/en.wikipedia.org/thumb/"
wiki.Html__hdump_mgr().Init_by_db(parser_fxt.Wiki());
parser_fxt.Hctx_(Xoh_wtr_ctx.Hdump_by_hzip_tid(Xoh_hzip_dict_.Hzip__none));
hpg.Ctor_by_hview(wiki, Xoa_url.blank(), parser_fxt.Wiki().Ttl_parse(Xoa_page_.Main_page_bry), 1);
}
public Xow_wiki Wiki() {return wiki;}
public Xoa_page Page() {return hpg;}
public void Clear() {hpg.Clear();}
public void Test__decode(String htxt) {Test__decode(htxt, htxt);}
public void Test__decode(String htxt, String html) {
htxt = Gfh_utl.Replace_apos(htxt);
html = Gfh_utl.Replace_apos(html);
Test__decode__raw(htxt, html);
}
public void Test__decode__raw(String htxt, String expd) {
hpg.Section_mgr().Clear();
byte[] actl = make_mgr.Parse(Bry_.new_u8(htxt), hpg, hpg.Wiki());
Tfds.Eq_str_lines(expd, String_.new_u8(actl));
}
public void Test__hpg__redlinks(String... expd_ttls) {
Xopg_lnki_list actl_list = hpg.Html_data().Redlink_list();
int len = actl_list.Len();
String[] actl_ttls = new String[len];
for (int i = 0; i < len; i++) {
Xopg_lnki_itm actl_itm = actl_list.Get_at(i);
actl_ttls[i] = actl_itm.Ttl().Full_db_as_str();
}
Gftest.Eq__ary(expd_ttls, actl_ttls);
}
public static String Escape(String v) {return String_.Replace(v, "~", "");}
}

@ -22,6 +22,7 @@ public class Xoh_escape_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
public byte[] Hook() {return hook;} private byte[] hook;
public String Key() {return Xoh_hzip_dict_.Key__escape;}
public Gfo_poolable_itm Encode1(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
// escapes an escape byte; should not happen, since (byte)27 doesn't exist in an html document, but just to be sure
Xoh_escape_data data = (Xoh_escape_data)data_obj;
bfr.Add(hook); // EX: 1,0
bfr.Add(data.Hook()); // EX: 2

@ -0,0 +1,32 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import org.junit.*;
public class Xoh_lnki_htxt__tst {
private final Xoh_htxt_fxt fxt = new Xoh_htxt_fxt();
@Test public void Redlink__basic() {
fxt.Test__decode
( "<a href='/wiki/A'>a</a> <a href='/wiki/B'>b</a>"
, "<a id='xolnki_2' href='/wiki/A'>a</a> <a id='xolnki_3' href='/wiki/B'>b</a>");
fxt.Test__hpg__redlinks("A", "B");
}
@Test public void Redlink__anchor() {
fxt.Test__decode
( "<a href='#A'>a</a>"
, "<a href='#A'>a</a>");
fxt.Test__hpg__redlinks();
}
}

@ -124,14 +124,7 @@ public class Xoh_lnki_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
href_bry = tmp_bfr.To_bry_and_clear();
// generate stub for redlink
if ( !hctx.Mode_is_diff()) { // PERF: don't do redlinks during hzip_diff
try {
Xoa_ttl ttl = hpg.Wiki().Ttl_parse(Gfo_url_encoder_.Href.Decode(href_bry));
Xopg_lnki_itm__hdump lnki_itm = new Xopg_lnki_itm__hdump(ttl);
hpg.Html_data().Redlink_list().Add(lnki_itm);
html_uid = lnki_itm.Html_uid();
} catch (Exception e) {Gfo_log_.Instance.Warn("failed to add lnki to redlinks", "page", hpg.Url_bry_safe(), "href_bry", href_bry, "e", Err_.Message_gplx_log(e));}
}
html_uid = Xoh_hdoc_wkr__make.Lnki_redlink_reg(hpg, hctx, href_bry, html_uid);
break;
}
byte[] capt_bry = Xoh_lnki_hzip_.Bld_capt(tmp_bfr, href_type, text_type, capt_has_ns, capt_cs0_tid, ns_bry, src, text_0_bgn, text_0_end, src, text_1_bgn, text_1_end);

@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import gplx.xowa.wikis.pages.lnkis.*;
class Xopg_lnki_itm__hdump implements Xopg_lnki_itm {
public class Xopg_lnki_itm__hdump implements Xopg_lnki_itm {
public Xopg_lnki_itm__hdump(Xoa_ttl ttl) {this.ttl = ttl;}
public Xoa_ttl Ttl() {return ttl;} private final Xoa_ttl ttl;
public int Html_uid() {return html_uid;} private int html_uid; public void Html_uid_(int v) {html_uid = v;}

@ -35,8 +35,8 @@ class Xoh_file_wtr__hdump__fxt {
public Xoh_file_wtr__hdump__fxt() {
fxt.Reset();
// default to hzip
fxt.Hctx_(Xoh_wtr_ctx.Hdump_by_hzip_tid(Xoh_hzip_dict_.Hzip__v1));
// default to none
fxt.Hctx_(Xoh_wtr_ctx.Hdump_by_hzip_tid(Xoh_hzip_dict_.Hzip__none));
// create file_fx
this.file_fxt = Xof_file_fxt.new_all(fxt.Wiki());

Loading…
Cancel
Save