From cd56234e281079c95afad775cb4da2ba70c03783 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Sun, 3 Feb 2019 23:48:23 -0500 Subject: [PATCH] HTML Databases: Show redlinks for htxt [#320] --- .../htmls/core/wkrs/Xoh_hdoc_wkr__make.java | 65 ++++++++++++++++--- .../xowa/htmls/core/wkrs/Xoh_htxt_fxt.java | 58 +++++++++++++++++ .../core/wkrs/escapes/Xoh_escape_hzip.java | 1 + .../core/wkrs/lnkis/Xoh_lnki_htxt__tst.java | 32 +++++++++ .../htmls/core/wkrs/lnkis/Xoh_lnki_hzip.java | 9 +-- .../core/wkrs/lnkis/Xopg_lnki_itm__hdump.java | 2 +- .../lnkis/htmls/Xoh_file_wtr__hdump__tst.java | 4 +- 7 files changed, 151 insertions(+), 20 deletions(-) create mode 100644 400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_htxt_fxt.java create mode 100644 400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_htxt__tst.java diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_hdoc_wkr__make.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_hdoc_wkr__make.java index 1323bc1e0..44d476639 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_hdoc_wkr__make.java +++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_hdoc_wkr__make.java @@ -14,21 +14,54 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; -import gplx.langs.htmls.docs.*; -import gplx.xowa.wikis.ttls.*; -import gplx.xowa.htmls.core.hzips.*; -import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*; +import gplx.langs.htmls.docs.*; import gplx.langs.htmls.encoders.*; +import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*; import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*; +import gplx.xowa.wikis.ttls.*; public class Xoh_hdoc_wkr__make implements Xoh_hdoc_wkr { private Xoh_hzip_bfr bfr; private Xoh_page hpg; private Xoh_hdoc_ctx hctx; private byte[] src; private final Xoh_hdr_wtr wkr__hdr = new Xoh_hdr_wtr(); - private final Xoh_img_wtr wkr__img = new Xoh_img_wtr(); + private final Xoh_img_wtr wkr__img = new Xoh_img_wtr(); + private int html_uid; public void On_new_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) { this.bfr = bfr; this.hpg = hpg; this.hctx = hctx; this.src = src; + this.html_uid = 0; + } + public void On_txt(int rng_bgn, int rng_end) { + // text; just add it + bfr.Add_mid(src, rng_bgn, rng_end); + } + public void On_escape(gplx.xowa.htmls.core.wkrs.escapes.Xoh_escape_data data) { + // hzip escape byte ((byte)27); should never happen but if it does, add it + bfr.Add(data.Hook()); + } + public void On_xnde(gplx.xowa.htmls.core.wkrs.xndes.Xoh_xnde_parser data) { + // regular xml node; just add it + bfr.Add_mid(src, data.Src_bgn(), data.Src_end()); + } + public void On_lnki(gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_data data) { + // node + // handle "#" + if (data.Href_itm().Tid() == Xoh_anch_href_data.Tid__anch) { + bfr.Add_mid(src, data.Src_bgn(), data.Src_end()); + return; + } + + // increment html_uid and add "id=xolnki_" + byte[] ttl_bry = data.Href_itm().Ttl_page_db(); + this.html_uid = Lnki_redlink_reg(hpg, hctx, ttl_bry, html_uid); + int src_bgn_lhs = data.Src_bgn(); + int src_bgn_rhs = src_bgn_lhs + 3; // +3 to skip over "a b" + , "a b"); + fxt.Test__hpg__redlinks("A", "B"); + } + @Test public void Redlink__anchor() { + fxt.Test__decode + ( "a" + , "a"); + fxt.Test__hpg__redlinks(); + } +} diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_hzip.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_hzip.java index 889f2bb3c..41186df91 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_hzip.java +++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_hzip.java @@ -124,14 +124,7 @@ public class Xoh_lnki_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm { href_bry = tmp_bfr.To_bry_and_clear(); // generate stub for redlink - if ( !hctx.Mode_is_diff()) { // PERF: don't do redlinks during hzip_diff - try { - Xoa_ttl ttl = hpg.Wiki().Ttl_parse(Gfo_url_encoder_.Href.Decode(href_bry)); - Xopg_lnki_itm__hdump lnki_itm = new Xopg_lnki_itm__hdump(ttl); - hpg.Html_data().Redlink_list().Add(lnki_itm); - html_uid = lnki_itm.Html_uid(); - } catch (Exception e) {Gfo_log_.Instance.Warn("failed to add lnki to redlinks", "page", hpg.Url_bry_safe(), "href_bry", href_bry, "e", Err_.Message_gplx_log(e));} - } + html_uid = Xoh_hdoc_wkr__make.Lnki_redlink_reg(hpg, hctx, href_bry, html_uid); break; } byte[] capt_bry = Xoh_lnki_hzip_.Bld_capt(tmp_bfr, href_type, text_type, capt_has_ns, capt_cs0_tid, ns_bry, src, text_0_bgn, text_0_end, src, text_1_bgn, text_1_end); diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xopg_lnki_itm__hdump.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xopg_lnki_itm__hdump.java index 2af9d2f12..dc8fb60ac 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xopg_lnki_itm__hdump.java +++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xopg_lnki_itm__hdump.java @@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.wikis.pages.lnkis.*; -class Xopg_lnki_itm__hdump implements Xopg_lnki_itm { +public class Xopg_lnki_itm__hdump implements Xopg_lnki_itm { public Xopg_lnki_itm__hdump(Xoa_ttl ttl) {this.ttl = ttl;} public Xoa_ttl Ttl() {return ttl;} private final Xoa_ttl ttl; public int Html_uid() {return html_uid;} private int html_uid; public void Html_uid_(int v) {html_uid = v;} diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/htmls/Xoh_file_wtr__hdump__tst.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/htmls/Xoh_file_wtr__hdump__tst.java index 63c2569bc..f112bc41b 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/htmls/Xoh_file_wtr__hdump__tst.java +++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/htmls/Xoh_file_wtr__hdump__tst.java @@ -35,8 +35,8 @@ class Xoh_file_wtr__hdump__fxt { public Xoh_file_wtr__hdump__fxt() { fxt.Reset(); - // default to hzip - fxt.Hctx_(Xoh_wtr_ctx.Hdump_by_hzip_tid(Xoh_hzip_dict_.Hzip__v1)); + // default to none + fxt.Hctx_(Xoh_wtr_ctx.Hdump_by_hzip_tid(Xoh_hzip_dict_.Hzip__none)); // create file_fx this.file_fxt = Xof_file_fxt.new_all(fxt.Wiki());