mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
HTML Databases: Show redlinks for htxt [#320]
This commit is contained in:
parent
4c47bb8793
commit
cd56234e28
@ -14,21 +14,54 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.langs.htmls.docs.*;
|
||||
import gplx.xowa.wikis.ttls.*;
|
||||
import gplx.xowa.htmls.core.hzips.*;
|
||||
import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*;
|
||||
import gplx.langs.htmls.docs.*; import gplx.langs.htmls.encoders.*;
|
||||
import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*; import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*;
|
||||
import gplx.xowa.wikis.ttls.*;
|
||||
public class Xoh_hdoc_wkr__make implements Xoh_hdoc_wkr {
|
||||
private Xoh_hzip_bfr bfr; private Xoh_page hpg; private Xoh_hdoc_ctx hctx; private byte[] src;
|
||||
private final Xoh_hdr_wtr wkr__hdr = new Xoh_hdr_wtr();
|
||||
private final Xoh_img_wtr wkr__img = new Xoh_img_wtr();
|
||||
private final Xoh_img_wtr wkr__img = new Xoh_img_wtr();
|
||||
private int html_uid;
|
||||
public void On_new_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
|
||||
this.bfr = bfr; this.hpg = hpg; this.hctx = hctx; this.src = src;
|
||||
this.html_uid = 0;
|
||||
}
|
||||
public void On_txt(int rng_bgn, int rng_end) {
|
||||
// text; just add it
|
||||
bfr.Add_mid(src, rng_bgn, rng_end);
|
||||
}
|
||||
public void On_escape(gplx.xowa.htmls.core.wkrs.escapes.Xoh_escape_data data) {
|
||||
// hzip escape byte ((byte)27); should never happen but if it does, add it
|
||||
bfr.Add(data.Hook());
|
||||
}
|
||||
public void On_xnde(gplx.xowa.htmls.core.wkrs.xndes.Xoh_xnde_parser data) {
|
||||
// regular xml node; just add it
|
||||
bfr.Add_mid(src, data.Src_bgn(), data.Src_end());
|
||||
}
|
||||
public void On_lnki(gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_data data) {
|
||||
// <a> node
|
||||
// handle "#"
|
||||
if (data.Href_itm().Tid() == Xoh_anch_href_data.Tid__anch) {
|
||||
bfr.Add_mid(src, data.Src_bgn(), data.Src_end());
|
||||
return;
|
||||
}
|
||||
|
||||
// increment html_uid and add "id=xolnki_"
|
||||
byte[] ttl_bry = data.Href_itm().Ttl_page_db();
|
||||
this.html_uid = Lnki_redlink_reg(hpg, hctx, ttl_bry, html_uid);
|
||||
int src_bgn_lhs = data.Src_bgn();
|
||||
int src_bgn_rhs = src_bgn_lhs + 3; // +3 to skip over "<a "
|
||||
if (Bry_.Match(src, src_bgn_lhs, src_bgn_rhs, Bry__a__bgn)) {
|
||||
bfr.Add(Bry__a__id);
|
||||
bfr.Add_int_variable(html_uid);
|
||||
bfr.Add_byte_quote().Add_byte_space();
|
||||
bfr.Add_mid(src, src_bgn_rhs, data.Src_end());
|
||||
}
|
||||
else {
|
||||
bfr.Add_mid(src, data.Src_bgn(), data.Src_end());
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "anchor hook should start with <a; url=~{0}", hpg.Url_bry_safe());
|
||||
}
|
||||
}
|
||||
public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
|
||||
public void On_escape (gplx.xowa.htmls.core.wkrs.escapes.Xoh_escape_data data) {bfr.Add(data.Hook());}
|
||||
public void On_xnde (gplx.xowa.htmls.core.wkrs.xndes.Xoh_xnde_parser data) {bfr.Add_mid(src, data.Src_bgn(), data.Src_end());}
|
||||
public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_data data) {bfr.Add_mid(src, data.Src_bgn(), data.Src_end());}
|
||||
public void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_data data) {
|
||||
Xoh_img_data img_data = (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_data)data.Img_data();
|
||||
bfr.Add_mid(src, data.Src_bgn(), img_data.Src_bgn());
|
||||
@ -50,4 +83,18 @@ public class Xoh_hdoc_wkr__make implements Xoh_hdoc_wkr {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
public static int Lnki_redlink_reg(Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] href_bry, int html_uid) {
|
||||
if (hctx.Mode_is_diff()) return html_uid; // PERF: don't do redlinks during hzip_diff
|
||||
try {
|
||||
Xoa_ttl ttl = hpg.Wiki().Ttl_parse(Gfo_url_encoder_.Href.Decode(href_bry));
|
||||
Xopg_lnki_itm__hdump lnki_itm = new Xopg_lnki_itm__hdump(ttl);
|
||||
hpg.Html_data().Redlink_list().Add(lnki_itm);
|
||||
return lnki_itm.Html_uid();
|
||||
}
|
||||
catch (Exception e) {
|
||||
Gfo_log_.Instance.Warn("failed to add lnki to redlinks", "page", hpg.Url_bry_safe(), "href_bry", href_bry, "e", Err_.Message_gplx_log(e));
|
||||
return html_uid;
|
||||
}
|
||||
}
|
||||
private static final byte[] Bry__a__bgn = Bry_.new_a7("<a "), Bry__a__id = Bry_.new_a7("<a id=\"xolnki_");
|
||||
}
|
||||
|
58
400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_htxt_fxt.java
Normal file
58
400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_htxt_fxt.java
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.core.tests.*;
|
||||
import gplx.xowa.files.caches.*;
|
||||
import gplx.langs.htmls.*; import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.makes.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.wikis.pages.*;
|
||||
import gplx.xowa.wikis.pages.lnkis.*;
|
||||
public class Xoh_htxt_fxt {
|
||||
private final Xowe_wiki wiki;
|
||||
private final Xop_fxt parser_fxt = new Xop_fxt();
|
||||
private final Xoh_page hpg = new Xoh_page();
|
||||
private final Xoh_make_mgr make_mgr = new Xoh_make_mgr();
|
||||
public Xoh_htxt_fxt() {
|
||||
this.wiki = parser_fxt.Wiki();
|
||||
Xoa_app_fxt.repo2_(parser_fxt.App(), wiki); // needed else will be old "mem/wiki/repo/trg/thumb/" instead of standard "mem/file/en.wikipedia.org/thumb/"
|
||||
wiki.Html__hdump_mgr().Init_by_db(parser_fxt.Wiki());
|
||||
parser_fxt.Hctx_(Xoh_wtr_ctx.Hdump_by_hzip_tid(Xoh_hzip_dict_.Hzip__none));
|
||||
hpg.Ctor_by_hview(wiki, Xoa_url.blank(), parser_fxt.Wiki().Ttl_parse(Xoa_page_.Main_page_bry), 1);
|
||||
}
|
||||
public Xow_wiki Wiki() {return wiki;}
|
||||
public Xoa_page Page() {return hpg;}
|
||||
public void Clear() {hpg.Clear();}
|
||||
public void Test__decode(String htxt) {Test__decode(htxt, htxt);}
|
||||
public void Test__decode(String htxt, String html) {
|
||||
htxt = Gfh_utl.Replace_apos(htxt);
|
||||
html = Gfh_utl.Replace_apos(html);
|
||||
Test__decode__raw(htxt, html);
|
||||
}
|
||||
public void Test__decode__raw(String htxt, String expd) {
|
||||
hpg.Section_mgr().Clear();
|
||||
byte[] actl = make_mgr.Parse(Bry_.new_u8(htxt), hpg, hpg.Wiki());
|
||||
Tfds.Eq_str_lines(expd, String_.new_u8(actl));
|
||||
}
|
||||
public void Test__hpg__redlinks(String... expd_ttls) {
|
||||
Xopg_lnki_list actl_list = hpg.Html_data().Redlink_list();
|
||||
int len = actl_list.Len();
|
||||
String[] actl_ttls = new String[len];
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xopg_lnki_itm actl_itm = actl_list.Get_at(i);
|
||||
actl_ttls[i] = actl_itm.Ttl().Full_db_as_str();
|
||||
}
|
||||
Gftest.Eq__ary(expd_ttls, actl_ttls);
|
||||
}
|
||||
public static String Escape(String v) {return String_.Replace(v, "~", "");}
|
||||
}
|
@ -22,6 +22,7 @@ public class Xoh_escape_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
public byte[] Hook() {return hook;} private byte[] hook;
|
||||
public String Key() {return Xoh_hzip_dict_.Key__escape;}
|
||||
public Gfo_poolable_itm Encode1(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
|
||||
// escapes an escape byte; should not happen, since (byte)27 doesn't exist in an html document, but just to be sure
|
||||
Xoh_escape_data data = (Xoh_escape_data)data_obj;
|
||||
bfr.Add(hook); // EX: 1,0
|
||||
bfr.Add(data.Hook()); // EX: 2
|
||||
|
@ -0,0 +1,32 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import org.junit.*;
|
||||
public class Xoh_lnki_htxt__tst {
|
||||
private final Xoh_htxt_fxt fxt = new Xoh_htxt_fxt();
|
||||
@Test public void Redlink__basic() {
|
||||
fxt.Test__decode
|
||||
( "<a href='/wiki/A'>a</a> <a href='/wiki/B'>b</a>"
|
||||
, "<a id='xolnki_2' href='/wiki/A'>a</a> <a id='xolnki_3' href='/wiki/B'>b</a>");
|
||||
fxt.Test__hpg__redlinks("A", "B");
|
||||
}
|
||||
@Test public void Redlink__anchor() {
|
||||
fxt.Test__decode
|
||||
( "<a href='#A'>a</a>"
|
||||
, "<a href='#A'>a</a>");
|
||||
fxt.Test__hpg__redlinks();
|
||||
}
|
||||
}
|
@ -124,14 +124,7 @@ public class Xoh_lnki_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
href_bry = tmp_bfr.To_bry_and_clear();
|
||||
|
||||
// generate stub for redlink
|
||||
if ( !hctx.Mode_is_diff()) { // PERF: don't do redlinks during hzip_diff
|
||||
try {
|
||||
Xoa_ttl ttl = hpg.Wiki().Ttl_parse(Gfo_url_encoder_.Href.Decode(href_bry));
|
||||
Xopg_lnki_itm__hdump lnki_itm = new Xopg_lnki_itm__hdump(ttl);
|
||||
hpg.Html_data().Redlink_list().Add(lnki_itm);
|
||||
html_uid = lnki_itm.Html_uid();
|
||||
} catch (Exception e) {Gfo_log_.Instance.Warn("failed to add lnki to redlinks", "page", hpg.Url_bry_safe(), "href_bry", href_bry, "e", Err_.Message_gplx_log(e));}
|
||||
}
|
||||
html_uid = Xoh_hdoc_wkr__make.Lnki_redlink_reg(hpg, hctx, href_bry, html_uid);
|
||||
break;
|
||||
}
|
||||
byte[] capt_bry = Xoh_lnki_hzip_.Bld_capt(tmp_bfr, href_type, text_type, capt_has_ns, capt_cs0_tid, ns_bry, src, text_0_bgn, text_0_end, src, text_1_bgn, text_1_end);
|
||||
|
@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import gplx.xowa.wikis.pages.lnkis.*;
|
||||
class Xopg_lnki_itm__hdump implements Xopg_lnki_itm {
|
||||
public class Xopg_lnki_itm__hdump implements Xopg_lnki_itm {
|
||||
public Xopg_lnki_itm__hdump(Xoa_ttl ttl) {this.ttl = ttl;}
|
||||
public Xoa_ttl Ttl() {return ttl;} private final Xoa_ttl ttl;
|
||||
public int Html_uid() {return html_uid;} private int html_uid; public void Html_uid_(int v) {html_uid = v;}
|
||||
|
@ -35,8 +35,8 @@ class Xoh_file_wtr__hdump__fxt {
|
||||
public Xoh_file_wtr__hdump__fxt() {
|
||||
fxt.Reset();
|
||||
|
||||
// default to hzip
|
||||
fxt.Hctx_(Xoh_wtr_ctx.Hdump_by_hzip_tid(Xoh_hzip_dict_.Hzip__v1));
|
||||
// default to none
|
||||
fxt.Hctx_(Xoh_wtr_ctx.Hdump_by_hzip_tid(Xoh_hzip_dict_.Hzip__none));
|
||||
|
||||
// create file_fx
|
||||
this.file_fxt = Xof_file_fxt.new_all(fxt.Wiki());
|
||||
|
Loading…
Reference in New Issue
Block a user