mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.12.1.1
This commit is contained in:
@@ -55,6 +55,7 @@ public class Xoh_page implements Xoa_page {
|
||||
}
|
||||
public Xoh_page Ctor_by_page(Bry_bfr tmp_bfr, Xoae_page page) {
|
||||
this.page_id = page.Revision_data().Id();
|
||||
this.wiki = page.Wiki();
|
||||
this.body = page.Hdump_data().Body();
|
||||
this.page_url = page.Url();
|
||||
Xopg_html_data html_data = page.Html_data();
|
||||
|
||||
@@ -153,11 +153,11 @@ public class Xoh_page_wtr_wkr {
|
||||
wiki.Html_mgr().Html_wtr().Write_all(tidy_bfr, page.Wikie().Parser_mgr().Ctx(), hctx, page.Root().Data_mid(), page.Root());
|
||||
|
||||
// if [[Category]], render rest of html (Subcategories; Pages; Files); note that a category may have other html which requires wikitext processing
|
||||
if (ns_id == Xow_ns_.Tid__category) wiki.Html_mgr().Ns_ctg().Bld_html(wiki, page, tidy_bfr);
|
||||
if (ns_id == Xow_ns_.Tid__category) wiki.Html_mgr().Ns_ctg().Bld_html(wiki, page, hctx, tidy_bfr);
|
||||
|
||||
// tidy html
|
||||
gplx.xowa.htmls.core.htmls.tidy.Xoh_tidy_mgr tidy_mgr = app.Html_mgr().Tidy_mgr();
|
||||
if (tidy_mgr.Enabled()) tidy_mgr.Run_tidy_html(page, tidy_bfr);
|
||||
if (tidy_mgr.Enabled()) tidy_mgr.Run_tidy_html(page, tidy_bfr, !hctx.Mode_is_hdump());
|
||||
|
||||
// add back to main bfr
|
||||
bfr.Add_bfr_and_clear(tidy_bfr);
|
||||
|
||||
@@ -20,20 +20,20 @@ import gplx.core.brys.fmtrs.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.msgs.*;
|
||||
public class Xohp_ctg_grp_mgr {
|
||||
final Bry_fmtr grp_fmtr = Bry_fmtr.new_(String_.Concat_lines_nl_skip_last
|
||||
( "<div id=\"catlinks\" class=\"catlinks\">"
|
||||
, " <div id=\"mw-normal-catlinks\" class=\"mw-normal-catlinks\">"
|
||||
, " ~{grp_lbl}"
|
||||
, " <ul>~{grp_itms}"
|
||||
, " </ul>"
|
||||
, " </div>"
|
||||
, "</div>"
|
||||
( "<div id=\"catlinks\" class=\"catlinks\">"
|
||||
, "<div id=\"mw-normal-catlinks\" class=\"mw-normal-catlinks\">"
|
||||
, "~{grp_lbl}"
|
||||
, "<ul>~{grp_itms}"
|
||||
, "</ul>"
|
||||
, "</div>"
|
||||
, "</div>"
|
||||
), "grp_lbl", "grp_itms")
|
||||
;
|
||||
final Bry_fmtr itm_fmtr = Bry_fmtr.new_(String_.Concat_lines_nl_skip_last
|
||||
( ""
|
||||
, " <li>"
|
||||
, " <a href=\"~{itm_href}\" class=\"internal\" title=\"~{itm_title}\">~{itm_text}</a>"
|
||||
, " </li>"
|
||||
( ""
|
||||
, "<li>"
|
||||
, "<a href=\"~{itm_href}\" class=\"internal\" title=\"~{itm_title}\">~{itm_text}</a>"
|
||||
, "</li>"
|
||||
), "itm_href", "itm_title", "itm_text"
|
||||
);
|
||||
Xoh_ctg_itm_fmtr itm_mgr = new Xoh_ctg_itm_fmtr();
|
||||
|
||||
@@ -23,17 +23,17 @@ public class Xohp_ctg_grp_mgr_tst {
|
||||
@Test public void Basic() {
|
||||
fxt.Init_ctgs("A", "B").Test_html(String_.Concat_lines_nl
|
||||
( "<div id=\"catlinks\" class=\"catlinks\">"
|
||||
, " <div id=\"mw-normal-catlinks\" class=\"mw-normal-catlinks\">"
|
||||
, " Categories"
|
||||
, " <ul>"
|
||||
, " <li>"
|
||||
, " <a href=\"/wiki/Category:A\" class=\"internal\" title=\"A\">A</a>"
|
||||
, " </li>"
|
||||
, " <li>"
|
||||
, " <a href=\"/wiki/Category:B\" class=\"internal\" title=\"B\">B</a>"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </div>"
|
||||
, "<div id=\"mw-normal-catlinks\" class=\"mw-normal-catlinks\">"
|
||||
, "Categories"
|
||||
, "<ul>"
|
||||
, "<li>"
|
||||
, "<a href=\"/wiki/Category:A\" class=\"internal\" title=\"A\">A</a>"
|
||||
, "</li>"
|
||||
, "<li>"
|
||||
, "<a href=\"/wiki/Category:B\" class=\"internal\" title=\"B\">B</a>"
|
||||
, "</li>"
|
||||
, "</ul>"
|
||||
, "</div>"
|
||||
, "</div>"
|
||||
));
|
||||
}
|
||||
|
||||
@@ -26,10 +26,10 @@ public class Xow_html_mgr implements GfoInvkAble {
|
||||
Xoae_app app = wiki.Appe();
|
||||
page_wtr_mgr = new Xoh_page_wtr_mgr(app.Gui_mgr().Kit().Tid() != gplx.gfui.Gfui_kit_.Swing_tid); // reverse logic to handle swt,drd but not mem
|
||||
Io_url file_dir = app.Fsys_mgr().Bin_xowa_file_dir().GenSubDir_nest("mediawiki.file");
|
||||
img_media_play_btn = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys.Encode_to_file_protocol(file_dir.GenSubFil("play.png"));
|
||||
img_media_info_btn = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys.Encode_to_file_protocol(file_dir.GenSubFil("info.png"));
|
||||
img_thumb_magnify = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys.Encode_to_file_protocol(file_dir.GenSubFil("magnify-clip.png"));
|
||||
img_xowa_protocol = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys.Encode_to_file_protocol(app.Fsys_mgr().Bin_xowa_file_dir().GenSubFil_nest("app.general", "xowa_exec.png"));
|
||||
img_media_play_btn = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys_lnx.Encode_to_file_protocol(file_dir.GenSubFil("play.png"));
|
||||
img_media_info_btn = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys_lnx.Encode_to_file_protocol(file_dir.GenSubFil("info.png"));
|
||||
img_thumb_magnify = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys_lnx.Encode_to_file_protocol(file_dir.GenSubFil("magnify-clip.png"));
|
||||
img_xowa_protocol = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys_lnx.Encode_to_file_protocol(app.Fsys_mgr().Bin_xowa_file_dir().GenSubFil_nest("app.general", "xowa_exec.png"));
|
||||
portal_mgr = new Xow_portal_mgr(wiki);
|
||||
imgs_mgr = new Xoh_imgs_mgr(this);
|
||||
module_mgr = new Xow_module_mgr(wiki);
|
||||
|
||||
@@ -23,24 +23,27 @@ public class Xow_hdump_mgr {
|
||||
private final Xoh_page tmp_hpg = new Xoh_page(); private final Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
|
||||
private final Io_stream_zip_mgr zip_mgr = new Io_stream_zip_mgr();
|
||||
public Xow_hdump_mgr(Xow_wiki wiki) {
|
||||
this.save_mgr = new Xow_hdump_mgr__save(wiki, hzip_mgr, zip_mgr, tmp_hpg, tmp_bfr);
|
||||
this.save_mgr = new Xow_hdump_mgr__save(wiki, hzip_mgr, zip_mgr, tmp_hpg);
|
||||
this.load_mgr = new Xow_hdump_mgr__load(wiki, hzip_mgr, zip_mgr, tmp_hpg, tmp_bfr);
|
||||
}
|
||||
public Xow_hdump_mgr__save Save_mgr() {return save_mgr;} private Xow_hdump_mgr__save save_mgr;
|
||||
public Xow_hdump_mgr__load Load_mgr() {return load_mgr;} private Xow_hdump_mgr__load load_mgr;
|
||||
public Xoh_hzip_mgr Hzip_mgr() {return hzip_mgr;} private final Xoh_hzip_mgr hzip_mgr = new Xoh_hzip_mgr();
|
||||
public void Init_by_db(Xow_wiki wiki) {
|
||||
byte default_zip_tid = gplx.core.ios.Io_stream_.Tid_raw;
|
||||
boolean default_hzip_enable = false;
|
||||
byte dflt_zip_tid = gplx.core.ios.Io_stream_.Tid_raw;
|
||||
boolean dflt_hzip_enable = false;
|
||||
boolean mode_is_b256 = false;
|
||||
if (wiki.Data__core_mgr() != null) { // TEST: handle null data mgr
|
||||
Xowd_core_db_props props = wiki.Data__core_mgr().Props();
|
||||
default_zip_tid = props.Zip_tid_html();
|
||||
default_hzip_enable = props.Hzip_enabled();
|
||||
dflt_zip_tid = props.Zip_tid_html();
|
||||
dflt_hzip_enable = props.Hzip_enabled();
|
||||
// dflt_hzip_enable = props.Hzip_enabled();
|
||||
// mode_is_b256 = true;
|
||||
}
|
||||
Init_by_db(default_zip_tid, default_hzip_enable);
|
||||
Init_by_db(dflt_zip_tid, dflt_hzip_enable, mode_is_b256);
|
||||
}
|
||||
public void Init_by_db(byte default_zip_tid, boolean default_hzip_enable) {
|
||||
int dflt_hzip_tid = default_hzip_enable ? Xoh_hzip_dict_.Hzip__v1 : Xoh_hzip_dict_.Hzip__none;
|
||||
save_mgr.Init_by_db(default_zip_tid, dflt_hzip_tid);
|
||||
public void Init_by_db(byte dflt_zip_tid, boolean dflt_hzip_enable, boolean mode_is_b256) {
|
||||
int dflt_hzip_tid = dflt_hzip_enable ? Xoh_hzip_dict_.Hzip__v1 : Xoh_hzip_dict_.Hzip__none;
|
||||
save_mgr.Init_by_db(dflt_zip_tid, dflt_hzip_tid, Bool_.N);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ public class Xow_hdump_mgr__load {
|
||||
private final Xoh_page tmp_hpg; private final Bry_bfr tmp_bfr; private final Xowd_page_itm tmp_dbpg = new Xowd_page_itm();
|
||||
public Xow_hdump_mgr__load(Xow_wiki wiki, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, Xoh_page tmp_hpg, Bry_bfr tmp_bfr) {
|
||||
this.wiki = wiki; this.hzip_mgr = hzip_mgr; this.zip_mgr = zip_mgr; this.tmp_hpg = tmp_hpg; this.tmp_bfr = tmp_bfr;
|
||||
this.make_mgr = new Xoh_make_mgr(wiki.App().Usr_dlg(), wiki.App().Fsys_mgr(), gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys, wiki.Domain_bry());
|
||||
this.make_mgr = new Xoh_make_mgr(wiki.App().Usr_dlg(), wiki.App().Fsys_mgr(), gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys_lnx, wiki.Domain_bry());
|
||||
}
|
||||
public Xoh_make_mgr Make_mgr() {return make_mgr;} private final Xoh_make_mgr make_mgr;
|
||||
public void Load(Xoae_page wpg) {
|
||||
@@ -45,13 +45,12 @@ public class Xow_hdump_mgr__load {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
public byte[] Decode_as_bry(Bry_bfr bfr, Xoh_page hpg, byte[] src, boolean mode_is_diff) {hzip_mgr.Hctx().Mode_is_diff_(mode_is_diff); hzip_mgr.Decode(bfr, wiki, hpg, src); return bfr.To_bry_and_clear();}
|
||||
private byte[] Parse(Xoh_page hpg, int zip_tid, int hzip_tid, byte[] src) {
|
||||
if (zip_tid > gplx.core.ios.Io_stream_.Tid_raw)
|
||||
src = zip_mgr.Unzip((byte)zip_tid, src);
|
||||
if (hzip_tid == Xoh_hzip_dict_.Hzip__v1) {
|
||||
hzip_mgr.Decode(tmp_bfr.Clear(), wiki, hpg, src);
|
||||
src = tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
if (hzip_tid == Xoh_hzip_dict_.Hzip__v1)
|
||||
src = Decode_as_bry(tmp_bfr.Clear(), hpg, src, Bool_.N);
|
||||
return src;
|
||||
}
|
||||
private void Fill_page(Xoae_page wpg, Xoh_page hpg) {
|
||||
|
||||
@@ -117,7 +117,7 @@ class Xodb_hdump_mgr__base_fxt {
|
||||
wiki = fxt.Wiki();
|
||||
page = wiki.Parser_mgr().Ctx().Cur_page();
|
||||
hdump_mgr = wiki.Html__hdump_mgr();
|
||||
hdump_mgr.Init_by_db(gplx.core.ios.Io_stream_.Tid_raw, false);
|
||||
hdump_mgr.Init_by_db(gplx.core.ios.Io_stream_.Tid_raw, false, false);
|
||||
}
|
||||
fxt.Reset();
|
||||
page.Revision_data().Id_(0);
|
||||
|
||||
@@ -16,16 +16,19 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*;
|
||||
import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.heads.*;
|
||||
import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.heads.*;
|
||||
import gplx.core.ios.*; import gplx.core.primitives.*; import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.pages.*;
|
||||
public class Xow_hdump_mgr__save {
|
||||
private final Xow_wiki wiki; private final Xoh_hzip_mgr hzip_mgr; private final Io_stream_zip_mgr zip_mgr;
|
||||
private final Xoh_page tmp_hpg; private final Bry_bfr tmp_bfr; private Bool_obj_ref html_db_is_new = Bool_obj_ref.n_();
|
||||
private final Xoh_page tmp_hpg; private final Xoh_hzip_bfr tmp_bfr = Xoh_hzip_bfr.New_txt(32); private Bool_obj_ref html_db_is_new = Bool_obj_ref.n_();
|
||||
private int dflt_zip_tid, dflt_hzip_tid;
|
||||
public Xow_hdump_mgr__save(Xow_wiki wiki, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, Xoh_page tmp_hpg, Bry_bfr tmp_bfr) {
|
||||
this.wiki = wiki; this.hzip_mgr = hzip_mgr; this.zip_mgr = zip_mgr; this.tmp_hpg = tmp_hpg; this.tmp_bfr = tmp_bfr;
|
||||
public Xow_hdump_mgr__save(Xow_wiki wiki, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, Xoh_page tmp_hpg) {
|
||||
this.wiki = wiki; this.hzip_mgr = hzip_mgr; this.zip_mgr = zip_mgr; this.tmp_hpg = tmp_hpg;
|
||||
}
|
||||
public void Init_by_db(int dflt_zip_tid, int dflt_hzip_tid) {this.dflt_zip_tid = dflt_zip_tid; this.dflt_hzip_tid = dflt_hzip_tid;}
|
||||
public void Init_by_db(int dflt_zip_tid, int dflt_hzip_tid, boolean mode_is_b256) {
|
||||
this.dflt_zip_tid = dflt_zip_tid; this.dflt_hzip_tid = dflt_hzip_tid; tmp_bfr.Mode_is_b256_(mode_is_b256);
|
||||
}
|
||||
public byte[] Src_as_hzip() {return src_as_hzip;} private byte[] src_as_hzip;
|
||||
public int Save(Xoae_page page) {
|
||||
synchronized (tmp_hpg) {
|
||||
Bld_hdump(page);
|
||||
@@ -45,11 +48,9 @@ public class Xow_hdump_mgr__save {
|
||||
wiki.Html__wtr_mgr().Wkr(Xopg_page_.Tid_read).Write_body(tmp_bfr, Xoh_wtr_ctx.Hdump, page); // save as hdump_fmt
|
||||
page.Hdump_data().Body_(tmp_bfr.To_bry_and_clear());
|
||||
}
|
||||
private static byte[] Write(Bry_bfr bfr, Xow_wiki wiki, Xoh_page hpg, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, int zip_tid, int hzip_tid, byte[] src) {
|
||||
if (hzip_tid == Xoh_hzip_dict_.Hzip__v1) {
|
||||
hzip_mgr.Encode(bfr.Clear(), wiki, hpg, src);
|
||||
src = bfr.To_bry_and_clear();
|
||||
}
|
||||
private byte[] Write(Xoh_hzip_bfr bfr, Xow_wiki wiki, Xoh_page hpg, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, int zip_tid, int hzip_tid, byte[] src) {
|
||||
if (hzip_tid != Xoh_hzip_dict_.Hzip__none) src = hzip_mgr.Encode_as_bry((Xoh_hzip_bfr)bfr.Clear(), wiki, hpg, src);
|
||||
src_as_hzip = src;
|
||||
if (zip_tid > gplx.core.ios.Io_stream_.Tid_raw)
|
||||
src = zip_mgr.Zip((byte)zip_tid, src);
|
||||
return src;
|
||||
|
||||
@@ -16,22 +16,26 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.core.brys.*;
|
||||
import gplx.dbs.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.dbs.*; import gplx.xowa.htmls.core.hzips.*;
|
||||
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.pages.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.data.*;
|
||||
import gplx.xowa.apps.apis.xowa.bldrs.imports.*;
|
||||
public class Xob_hdump_bldr implements GfoInvkAble {
|
||||
private boolean enabled, hzip_enabled, hzip_compare;
|
||||
private boolean enabled, hzip_enabled, hzip_diff, hzip_b256; private byte zip_tid = Byte_.Max_value_127;
|
||||
private Xowe_wiki wiki; private Xow_hdump_mgr hdump_mgr;
|
||||
private Xob_ns_to_db_mgr ns_to_db_mgr; int prv_row_len = 0;
|
||||
private Hzip_stat_tbl stats_tbl; private Hzip_stat_itm tmp_stat_itm;
|
||||
private final Xoh_page tmp_hpg = new Xoh_page(); private final Bry_bfr tmp_bfr = Bry_bfr.reset_(Io_mgr.Len_mb);
|
||||
private Xoh_stat_tbl stats_tbl; private Xoh_stat_itm tmp_stat_itm;
|
||||
private final Xoh_page tmp_hpg = new Xoh_page(); private final Bry_bfr tmp_bfr = Bry_bfr.new_();
|
||||
private boolean op_sys_is_wnt;
|
||||
public boolean Init(Xowe_wiki wiki, Db_conn make_conn) {
|
||||
if (!enabled) return false;
|
||||
this.wiki = wiki; this.hdump_mgr = wiki.Html__hdump_mgr(); this.tmp_stat_itm = hdump_mgr.Hzip_mgr().Hctx().Bicode__stat();
|
||||
this.stats_tbl = new Hzip_stat_tbl(make_conn);
|
||||
this.op_sys_is_wnt = gplx.core.envs.Op_sys.Cur().Tid_is_wnt();
|
||||
this.wiki = wiki; this.hdump_mgr = wiki.Html__hdump_mgr(); this.tmp_stat_itm = hdump_mgr.Hzip_mgr().Hctx().Hzip__stat();
|
||||
this.stats_tbl = new Xoh_stat_tbl(make_conn);
|
||||
Xoapi_import import_cfg = wiki.Appe().Api_root().Bldr().Wiki().Import();
|
||||
hdump_mgr.Init_by_db(import_cfg.Zip_tid_html(), hzip_enabled);
|
||||
if (zip_tid == Byte_.Max_value_127) zip_tid = import_cfg.Zip_tid_html();
|
||||
hdump_mgr.Init_by_db(zip_tid, hzip_enabled, hzip_b256);
|
||||
Xowd_db_mgr core_data_mgr = wiki.Db_mgr_as_sql().Core_data_mgr();
|
||||
this.ns_to_db_mgr = new Xob_ns_to_db_mgr(new Xob_ns_to_db_wkr__html(core_data_mgr.Db__core()), core_data_mgr, import_cfg.Html_db_max());
|
||||
Xob_ns_file_itm.Init_ns_bldr_data(Xowd_db_file_.Tid_html_data, wiki.Ns_mgr(), gplx.xowa.apps.apis.xowa.bldrs.imports.Xoapi_import.Ns_file_map__each);
|
||||
@@ -40,13 +44,11 @@ public class Xob_hdump_bldr implements GfoInvkAble {
|
||||
public void Insert(Xoae_page page) {
|
||||
page.File_queue().Clear(); // need to reset uid to 0, else xowa_file_# will resume from last
|
||||
wiki.Html_mgr().Page_wtr_mgr().Wkr(Xopg_page_.Tid_read).Write_body(tmp_bfr, Xoh_wtr_ctx.Hdump, page); // write to html in hdump mode
|
||||
byte[] html_orig_bry = tmp_bfr.To_bry_and_clear();
|
||||
page.Hdump_data().Body_(html_orig_bry); // write to body bry
|
||||
byte[] orig_bry = tmp_bfr.To_bry_and_clear();
|
||||
page.Hdump_data().Body_(orig_bry); // write to body bry
|
||||
Xowd_db_file html_db = ns_to_db_mgr.Get_by_ns(page.Ttl().Ns().Bldr_data(), prv_row_len); // get html_db
|
||||
this.prv_row_len = hdump_mgr.Save_mgr().Save(tmp_hpg.Ctor_by_page(tmp_bfr, page), html_db, true); // save to db
|
||||
if (hzip_compare) {
|
||||
// Compare(html_orig_bry, hdump_mgr.Save_mgr());
|
||||
}
|
||||
if (hzip_diff) Hzip_exec(orig_bry);
|
||||
stats_tbl.Insert(tmp_hpg, tmp_stat_itm, page.Root().Root_src().length, tmp_hpg.Body().length, prv_row_len); // save stats
|
||||
}
|
||||
public void Bld_term() {this.Commit(); ns_to_db_mgr.Rls_all();}
|
||||
@@ -54,12 +56,21 @@ public class Xob_hdump_bldr implements GfoInvkAble {
|
||||
ns_to_db_mgr.Commit();
|
||||
// wiki_db_mgr.Tbl__cfg().Update_long(Cfg_grp_hdump_make, Cfg_itm_hdump_size, hdump_db_size); // update cfg; should happen after commit entries
|
||||
}
|
||||
private void Hzip_exec(byte[] orig_bry) {
|
||||
byte[] expd_bry = op_sys_is_wnt ? Bry_.Replace(tmp_bfr, orig_bry, Byte_ascii.Cr_lf_bry, Byte_ascii.Nl_bry) : orig_bry;
|
||||
byte[] actl_bry = hdump_mgr.Load_mgr().Decode_as_bry(tmp_bfr, tmp_hpg, hdump_mgr.Save_mgr().Src_as_hzip(), Bool_.Y);
|
||||
byte[][] diff = Bry_diff_.Diff_1st_line(expd_bry, actl_bry);
|
||||
if (diff != null)
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", String_.Format("hzip diff: page={0} lhs='{1}' rhs='{2}'", tmp_hpg.Url_bry_safe(), diff[0], diff[1]));
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_enabled_)) enabled = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_zip_tid_)) zip_tid = m.ReadByte("v");
|
||||
else if (ctx.Match(k, Invk_hzip_enabled_)) hzip_enabled = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_hzip_compare_)) hzip_compare = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_hzip_diff_)) hzip_diff = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_hzip_b256_)) hzip_b256 = m.ReadYn("v");
|
||||
else return GfoInvkAble_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
private static final String Invk_enabled_ = "enabled_", Invk_hzip_enabled_ = "hzip_enabled_", Invk_hzip_compare_ = "hzip_compare_";
|
||||
private static final String Invk_enabled_ = "enabled_", Invk_zip_tid_ = "zip_tid_", Invk_hzip_enabled_ = "hzip_enabled_", Invk_hzip_diff_ = "hzip_diff_", Invk_hzip_b256_ = "hzip_b256_";
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.dbs.*;
|
||||
class Xob_link_dump_tbl implements RlsAble {
|
||||
class Xob_link_dump_tbl implements Rls_able {
|
||||
public static final String Tbl_name = "link_dump"; private static final Db_meta_fld_list flds = Db_meta_fld_list.new_();
|
||||
public static final String
|
||||
Fld_uid = flds.Add_int_pkey_autonum("uid")
|
||||
|
||||
@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.dbs.*; import gplx.core.brys.*;
|
||||
public class Xoh_page_tbl implements RlsAble {
|
||||
public class Xoh_page_tbl implements Rls_able {
|
||||
private final String tbl_name = "html"; private final Db_meta_fld_list flds = Db_meta_fld_list.new_();
|
||||
private final String fld_page_id, fld_head_flag, fld_body_flag, fld_display_ttl, fld_content_sub, fld_sidebar_div, fld_body;
|
||||
private final Db_conn conn; private Db_stmt stmt_select, stmt_insert, stmt_delete, stmt_update;
|
||||
|
||||
@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.dbs.*;
|
||||
public class Xoh_redlink_tbl implements RlsAble {
|
||||
public class Xoh_redlink_tbl implements Rls_able {
|
||||
private final String tbl_name = "html_redlink"; private final Db_meta_fld_list flds = Db_meta_fld_list.new_();
|
||||
private final String fld_page_id, fld_redlink_uids;
|
||||
private final Db_conn conn; private Db_stmt stmt_select, stmt_insert, stmt_delete, stmt_update;
|
||||
|
||||
@@ -17,12 +17,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
public class Xoh_wtr_ctx {
|
||||
Xoh_wtr_ctx(byte mode) {this.mode = mode;} private byte mode;
|
||||
Xoh_wtr_ctx(int mode) {this.mode = mode;}
|
||||
public int Mode() {return mode;} private final int mode;
|
||||
public boolean Mode_is_alt() {return mode == Mode_alt;}
|
||||
public boolean Mode_is_display_title() {return mode == Mode_display_title;}
|
||||
public boolean Mode_is_popup() {return mode == Mode_popup;}
|
||||
public boolean Mode_is_hdump() {return mode == Mode_hdump;}
|
||||
public static final byte Mode_basic = 0, Mode_alt = 1, Mode_display_title = 2, Mode_popup = 3, Mode_hdump = 4;
|
||||
public static final int Mode_basic = 0, Mode_alt = 1, Mode_display_title = 2, Mode_popup = 3, Mode_hdump = 4;
|
||||
public static final Xoh_wtr_ctx
|
||||
Basic = new Xoh_wtr_ctx(Mode_basic)
|
||||
, Alt = new Xoh_wtr_ctx(Mode_alt)
|
||||
|
||||
@@ -37,9 +37,10 @@ public class Xoh_tidy_mgr implements GfoInvkAble {
|
||||
: (Xoh_tidy_wkr)wkr_tidy
|
||||
;
|
||||
}
|
||||
public void Run_tidy_html(Xoae_page page, Bry_bfr bfr) {
|
||||
public void Run_tidy_html(Xoae_page page, Bry_bfr bfr, boolean indent) {
|
||||
if (bfr.Len_eq_0()) return; // document is empty; do not exec b/c tidy will never generate files for 0 len files, and previous file will remain; DATE:2014-06-04
|
||||
Tidy_wrap(bfr);
|
||||
wkr.Indent_(indent);
|
||||
wkr.Exec_tidy(page, bfr);
|
||||
Tidy_unwrap(bfr);
|
||||
}
|
||||
|
||||
@@ -18,5 +18,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.htmls.core.htmls.tidy; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*;
|
||||
public interface Xoh_tidy_wkr {
|
||||
byte Tid();
|
||||
void Indent_(boolean v);
|
||||
void Exec_tidy(Xoae_page page, Bry_bfr bfr);
|
||||
}
|
||||
|
||||
@@ -38,5 +38,6 @@ public class Xoh_tidy_wkr_ {
|
||||
}
|
||||
class Xoh_tidy_wkr_null implements Xoh_tidy_wkr {
|
||||
public byte Tid() {return Xoh_tidy_wkr_.Tid_null;}
|
||||
public void Indent_(boolean v) {}
|
||||
public void Exec_tidy(Xoae_page page, Bry_bfr bfr) {}
|
||||
}
|
||||
|
||||
@@ -29,6 +29,7 @@ class Xoh_tidy_wkr_jtidy implements Xoh_tidy_wkr {
|
||||
public void tidy_init() {
|
||||
long bgn = Env_.TickCount();
|
||||
wtr = new ByteArrayOutputStream();
|
||||
System.setProperty("line.separator", "\n");
|
||||
tidy = new Tidy(); // obtain a new Tidy instance
|
||||
tidy.setInputEncoding("UTF-8"); // -utf8
|
||||
tidy.setOutputEncoding("UTF-8"); // -utf8
|
||||
@@ -55,6 +56,10 @@ class Xoh_tidy_wkr_jtidy implements Xoh_tidy_wkr {
|
||||
public void Init_by_app(Xoae_app app) {
|
||||
this.app = app;
|
||||
}
|
||||
public void Indent_(boolean v) {
|
||||
if (tidy == null) tidy_init(); // lazy create to skip tests
|
||||
tidy.setIndentContent(v);
|
||||
}
|
||||
public void Exec_tidy(Xoae_page page, Bry_bfr bfr) {
|
||||
if (tidy == null) tidy_init(); // lazy create to skip tests
|
||||
// int bfr_len = bfr.Len();
|
||||
|
||||
@@ -27,24 +27,26 @@ public class Xoh_tidy_wkr_tidy extends Process_adp implements Xoh_tidy_wkr { pr
|
||||
tidy_target = v.GenSubFil("tidy_target.html");
|
||||
return super.Tmp_dir_(v);
|
||||
}
|
||||
public void Indent_(boolean v) {Indent_val = v ? "y" : "n";}
|
||||
public void Exec_tidy(Xoae_page page, Bry_bfr bfr) {
|
||||
int bfr_len = bfr.Len();
|
||||
long bgn = Env_.TickCount();
|
||||
Io_mgr.Instance.SaveFilBfr(tidy_source, bfr); // saves bfr to source; clears bfr
|
||||
this.Run(tidy_source.Raw(), tidy_target.Raw()); // converts source to target
|
||||
this.Run(tidy_source.Raw(), tidy_target.Raw()); // converts source to target
|
||||
Io_mgr.Instance.LoadFilBryByBfr(tidy_target, bfr); // loads bfr by target
|
||||
if (bfr.Len_eq_0()) // something went wrong; load from source
|
||||
if (bfr.Len_eq_0()) // something went wrong; load from source
|
||||
Io_mgr.Instance.LoadFilBryByBfr(tidy_source, bfr); // loads bfr by target
|
||||
app.Usr_dlg().Log_many("", "", "tidy exec; elapsed=~{0} len=~{1}", Env_.TickCount_elapsed_in_frac(bgn), bfr_len);
|
||||
}
|
||||
public static final String Args_fmt = String_.Concat // see https://meta.wikimedia.org/wiki/Data_dumps; missing numeric-entities:yes; enclose-text: yes
|
||||
private static String Indent_val = "y";
|
||||
public static String Args_fmt = String_.Concat // see https://meta.wikimedia.org/wiki/Data_dumps; missing numeric-entities:yes; enclose-text: yes
|
||||
( "-utf8" // default is ascii
|
||||
, " --force-output y" // always generate output; do not fail on error
|
||||
, " --quiet y" // suppress command-line header
|
||||
, " --tidy-mark n" // do not add tidy watermark
|
||||
, " --doctype ''''" // set to empty else some wikis will show paragraph text with little vertical gap; PAGE:tr.b:
|
||||
, " --wrap 0" // default is 80; do not limit lines to 80 chars
|
||||
, " --indent y" // indent block levels
|
||||
, " --indent ", Indent_val // indent block levels
|
||||
, " --quote-nbsp y" // preserve nbsp as entities; do not convert to Unicode character 160
|
||||
, " --literal-attributes y" // do not alter whitespace chars in attributes
|
||||
, " --wrap-attributes n" // do not line-wrap attribute values (assume tidy will try to take a="b\nc" and change to a="b c" which may cause some fidelity issues?)
|
||||
|
||||
@@ -35,7 +35,7 @@ public class Xoh_lnki_bldr {
|
||||
public Xoh_lnki_bldr Id_(byte[] v) {this.id = Html_utl.Escape_for_atr_val_as_bry(tmp_bfr, Byte_ascii.Apos, v); return this;}
|
||||
public Xoh_lnki_bldr Href_(Xow_wiki wiki, byte[] bry) {return Href_(wiki.Domain_bry(), wiki.Ttl_parse(bry));}
|
||||
public Xoh_lnki_bldr Href_(byte[] domain_bry, Xoa_ttl ttl) {
|
||||
href_wtr.Build_to_bfr(tmp_bfr, app, domain_bry, ttl, Bool_.Y);
|
||||
href_wtr.Build_to_bfr(tmp_bfr, app, Xoh_wtr_ctx.Mode_popup, domain_bry, ttl);
|
||||
this.href = tmp_bfr.To_bry_and_clear();
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.htmls.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*;
|
||||
import gplx.langs.htmls.*; import gplx.xowa.htmls.hrefs.*;
|
||||
import gplx.langs.htmls.*; import gplx.xowa.htmls.hrefs.*; import gplx.xowa.htmls.core.htmls.*;
|
||||
public class Xoh_lnki_wtr_utl {
|
||||
private final Xoa_app app; private final Xow_wiki wiki; private final Xoh_href_wtr href_wtr; private final Bry_bfr tmp_bfr = Bry_bfr.new_(255);
|
||||
public Xoh_lnki_wtr_utl(Xow_wiki wiki, Xoh_href_wtr href_wtr) {
|
||||
@@ -25,7 +25,7 @@ public class Xoh_lnki_wtr_utl {
|
||||
}
|
||||
public byte[] Bld_href(byte[] page) {return Bld_href(wiki.Domain_bry(), wiki.Ttl_parse(page));}
|
||||
public byte[] Bld_href(byte[] domain_bry, Xoa_ttl ttl) {
|
||||
href_wtr.Build_to_bfr(tmp_bfr, app, domain_bry, ttl, Bool_.Y);
|
||||
href_wtr.Build_to_bfr(tmp_bfr, app, Xoh_wtr_ctx.Mode_popup, domain_bry, ttl);
|
||||
return tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
public byte[] Bld_title(byte[] text) {
|
||||
|
||||
21
400_xowa/src/gplx/xowa/htmls/core/hzips/Xoh_hzip_dict.java
Normal file
21
400_xowa/src/gplx/xowa/htmls/core/hzips/Xoh_hzip_dict.java
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
public class Xoh_hzip_dict {
|
||||
|
||||
}
|
||||
@@ -20,36 +20,15 @@ import gplx.core.primitives.*; import gplx.core.btries.*;
|
||||
public class Xoh_hzip_dict_ {
|
||||
public static final byte Escape = Byte_.By_int(27); // SERIALIZED: 27=escape byte
|
||||
public static final byte[] Escape_bry = Bry_.new_ints(27); // SERIALIZED
|
||||
private static final byte Base85_ascii = 33;
|
||||
public static final byte // SERIALIZED
|
||||
Tid__space = 0 + Base85_ascii
|
||||
, Tid__hdr = 1 + Base85_ascii
|
||||
, Tid__lnke = 2 + Base85_ascii
|
||||
, Tid__lnki = 3 + Base85_ascii
|
||||
, Tid__img = 4 + Base85_ascii
|
||||
, Tid__thm = 5 + Base85_ascii
|
||||
, Tid__gly = 6 + Base85_ascii
|
||||
, Tid__escape = 84 + Base85_ascii
|
||||
;
|
||||
public static final byte[]
|
||||
Bry__escape = Bry_.new_ints(Escape, Tid__escape)
|
||||
, Bry__space = Bry_.new_ints(Escape, Tid__space)
|
||||
, Bry__hdr = Bry_.new_ints(Escape, Tid__hdr)
|
||||
, Bry__lnke = Bry_.new_ints(Escape, Tid__lnke)
|
||||
, Bry__lnki = Bry_.new_ints(Escape, Tid__lnki)
|
||||
, Bry__img = Bry_.new_ints(Escape, Tid__img)
|
||||
, Bry__thm = Bry_.new_ints(Escape, Tid__thm)
|
||||
, Bry__gly = Bry_.new_ints(Escape, Tid__gly)
|
||||
;
|
||||
public static final String
|
||||
Key__escape = "escape"
|
||||
, Key__space = "space"
|
||||
, Key__hdr = "hdr"
|
||||
, Key__lnke = "lnke"
|
||||
, Key__lnki = "lnki"
|
||||
, Key__img = "img"
|
||||
, Key__thm = "thm"
|
||||
, Key__gly = "gly"
|
||||
, Key__xnde = "xnde"
|
||||
;
|
||||
public static final int Hzip__none = 0, Hzip__v1 = 1;
|
||||
}
|
||||
|
||||
148
400_xowa/src/gplx/xowa/htmls/core/hzips/Xoh_hzip_int.java
Normal file
148
400_xowa/src/gplx/xowa/htmls/core/hzips/Xoh_hzip_int.java
Normal file
@@ -0,0 +1,148 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.core.primitives.*; import gplx.core.encoders.*;
|
||||
public class Xoh_hzip_int {
|
||||
private boolean mode_is_b256; private byte pad_byte; private byte[] prefix_ary;
|
||||
public Xoh_hzip_int Mode_is_b256_(boolean v) {
|
||||
mode_is_b256 = v;
|
||||
if (mode_is_b256) {
|
||||
pad_byte = Byte_.Zero;
|
||||
prefix_ary = prefix_ary__b256;
|
||||
}
|
||||
else {
|
||||
pad_byte = Byte_ascii.Bang;
|
||||
prefix_ary = prefix_ary__b085;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public void Encode(int reqd_len, Bry_bfr bfr, int val) {
|
||||
int calc_len = Calc_len(mode_is_b256, val);
|
||||
int full_len = Full_len(mode_is_b256, val, calc_len, reqd_len, B256__pow__ary);
|
||||
int hdr_adj = full_len == calc_len || full_len == reqd_len ? 0 : 1;
|
||||
int bfr_len = bfr.Len();
|
||||
bfr.Add_byte_repeat(pad_byte, full_len); // fill with 0s; asserts that underlying array will be large enough for following write
|
||||
byte[] bfr_bry = bfr.Bfr(); // NOTE: set bry reference here b/c Add_byte_repeat may create a new one
|
||||
if (mode_is_b256)
|
||||
Set_bry(val, bfr_bry, bfr_len + hdr_adj, reqd_len, calc_len, pad_byte, B256__pow__ary);
|
||||
else
|
||||
Base85_.Set_bry(val, bfr_bry, bfr_len + hdr_adj, reqd_len); // calc base85 val for val; EX: 7224 -> "uu"
|
||||
if (hdr_adj == 1)
|
||||
bfr_bry[bfr_len] = prefix_ary[full_len]; // write the hdr_byte; EX: 256 -> 253, 1, 0 where 253 is the hdr_byte
|
||||
}
|
||||
public int Decode(int reqd_len, byte[] src, int src_len, int src_bgn, Int_obj_ref pos_ref) {
|
||||
int radix = 256; byte offset = Byte_.Zero;
|
||||
boolean hdr_byte_exists = false;
|
||||
int full_len = 1; // default to 1
|
||||
byte b0 = src[src_bgn];
|
||||
if (mode_is_b256) {
|
||||
switch (b0) {
|
||||
case prefix__b256__2: full_len = 2; hdr_byte_exists = true; break;
|
||||
case prefix__b256__3: full_len = 3; hdr_byte_exists = true; break;
|
||||
case prefix__b256__4: full_len = 4; hdr_byte_exists = true; break;
|
||||
case prefix__b256__5: full_len = 5; hdr_byte_exists = true; break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
radix = 85; offset = Byte_ascii.Bang;
|
||||
switch (b0) {
|
||||
case Byte_ascii.Curly_bgn : full_len = 3; hdr_byte_exists = true; break;
|
||||
case Byte_ascii.Pipe : full_len = 4; hdr_byte_exists = true; break;
|
||||
case Byte_ascii.Curly_end : full_len = 5; hdr_byte_exists = true; break;
|
||||
case Byte_ascii.Tilde : full_len = 6; hdr_byte_exists = true; break;
|
||||
}
|
||||
}
|
||||
if (full_len < reqd_len) full_len = reqd_len; // len should be padded
|
||||
int src_end = src_bgn + full_len;
|
||||
pos_ref.Val_(src_end);
|
||||
if (hdr_byte_exists) ++src_bgn;
|
||||
return To_int_by_bry(src, src_bgn, src_end, offset, radix);
|
||||
}
|
||||
private static int Calc_len(boolean mode_is_b256, int v) {
|
||||
if (mode_is_b256) {
|
||||
if (v < B256__max__expd__1) return 1;
|
||||
else if (v < B256__max__expd__2) return 2;
|
||||
else if (v < B256__max__expd__3) return 3;
|
||||
else return 4;
|
||||
}
|
||||
else
|
||||
return Base85_.Bry_len(v);
|
||||
}
|
||||
private static int Full_len(boolean mode_is_b256, int v, int calc_len, int reqd_len, int[] pow_ary) {
|
||||
int reqd_max = v;
|
||||
if (mode_is_b256) {
|
||||
reqd_max = B256__pow__ary[reqd_len]; // EX: if reqd_len = 2, then reqd_max = 65536
|
||||
int hdr_byte_adj = 1; // default to hdr_byte
|
||||
if ( calc_len == reqd_len // only do this check if calc_len == reqd_len; i.e.: reqd_len = 2; only want to check values that would be represented with two digits where 1st digit might be 252-255; EX: 64512 is "252, 0" but 252 is reserverd; instead "253, 252, 0"
|
||||
&& v < (reqd_max - (4 * B256__pow__ary[calc_len - 1])) // calculates if current value will produce a 252-255 range in the 1st byte; note that 4 is for 255-252
|
||||
) {
|
||||
hdr_byte_adj = 0;
|
||||
}
|
||||
return calc_len + hdr_byte_adj;
|
||||
}
|
||||
else {
|
||||
reqd_max = Base85_.Pow85[reqd_len];
|
||||
if (v < reqd_max) return reqd_len;
|
||||
if (v < Base85_.Pow85[2]) return 3;
|
||||
else if (v < Base85_.Pow85[3]) return 4;
|
||||
else if (v < Base85_.Pow85[4]) return 5;
|
||||
else return 6;
|
||||
}
|
||||
}
|
||||
private static void Set_bry(int val, byte[] src, int src_bgn, int reqd_len, int calc_len, byte pad_byte, int[] pow_ary) {
|
||||
int val_len = -1, pad_len = -1;
|
||||
boolean pad = calc_len < reqd_len;
|
||||
if (pad) {
|
||||
val_len = reqd_len;
|
||||
pad_len = reqd_len - calc_len;
|
||||
}
|
||||
else {
|
||||
val_len = calc_len;
|
||||
pad_len = 0;
|
||||
}
|
||||
if (pad) {
|
||||
for (int i = 0; i < pad_len; i++) // fill src with pad_len
|
||||
src[i + src_bgn] = pad_byte;
|
||||
}
|
||||
for (int i = val_len - pad_len; i > 0; --i) {
|
||||
int div = pow_ary[i - 1];
|
||||
byte tmp = (byte)(val / div);
|
||||
src[src_bgn + val_len - i] = (byte)(tmp + pad_byte);
|
||||
val -= tmp * div;
|
||||
}
|
||||
}
|
||||
private static int To_int_by_bry(byte[] src, int bgn, int end, byte offset, int radix) {
|
||||
int rv = 0, factor = 1;
|
||||
for (int i = end - 1; i >= bgn; --i) {
|
||||
rv += ((src[i] & 0xFF) - offset) * factor; // PATCH.JAVA:need to convert to unsigned byte
|
||||
factor *= radix;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private static final int
|
||||
B256__max__expd__1 = 256 // 256
|
||||
, B256__max__expd__2 = 65536 // 65,536
|
||||
, B256__max__expd__3 = 16777216 // 16,777,216
|
||||
;
|
||||
private static final int[] B256__pow__ary = new int[] {1, B256__max__expd__1, B256__max__expd__2, B256__max__expd__3, Int_.Max_value};
|
||||
private static final byte prefix__b256__2 = (byte)(252 & 0xFF), prefix__b256__3 = (byte)(253 & 0xFF), prefix__b256__4 = (byte)(254 & 0xFF), prefix__b256__5 = (byte)(255 & 0xFF);
|
||||
private static final byte[]
|
||||
prefix_ary__b256 = new byte[] {0, 0, prefix__b256__2, prefix__b256__3, prefix__b256__4, prefix__b256__5}
|
||||
, prefix_ary__b085 = new byte[] {0, 0, 0, Byte_ascii.Curly_bgn, Byte_ascii.Pipe, Byte_ascii.Curly_end, Byte_ascii.Tilde}
|
||||
;
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import org.junit.*;
|
||||
public class Xoh_hzip_int_tst {
|
||||
private final Xoh_hzip_int_fxt fxt = new Xoh_hzip_int_fxt();
|
||||
@Test public void B256__reqd__1() {
|
||||
fxt.Init__b256();
|
||||
fxt.Test__b256(1, 0 , 0);
|
||||
fxt.Test__b256(1, 252 , 252, 252);
|
||||
fxt.Test__b256(1, 253 , 252, 253);
|
||||
fxt.Test__b256(1, 254 , 252, 254);
|
||||
fxt.Test__b256(1, 255 , 252, 255);
|
||||
fxt.Test__b256(1, 256 , 253, 1, 0);
|
||||
fxt.Test__b256(1, 65535 , 253, 255, 255);
|
||||
fxt.Test__b256(1, 65536 , 254, 1, 0, 0);
|
||||
fxt.Test__b256(1, 16777216 , 255, 1, 0, 0, 0);
|
||||
}
|
||||
@Test public void B256__reqd__2() {
|
||||
fxt.Init__b256();
|
||||
fxt.Test__b256(2, 0 , 0, 0);
|
||||
fxt.Test__b256(2, 252 , 0, 252);
|
||||
fxt.Test__b256(2, 253 , 0, 253);
|
||||
fxt.Test__b256(2, 254 , 0, 254);
|
||||
fxt.Test__b256(2, 255 , 0, 255);
|
||||
fxt.Test__b256(2, 256 , 1, 0);
|
||||
fxt.Test__b256(2, 64511 , 251, 255);
|
||||
fxt.Test__b256(2, 64512 , 253, 252, 0);
|
||||
fxt.Test__b256(2, 65535 , 253, 255, 255);
|
||||
fxt.Test__b256(2, 65536 , 254, 1, 0, 0);
|
||||
fxt.Test__b256(2, 16777216 , 255, 1, 0, 0, 0);
|
||||
}
|
||||
@Test public void B085__reqd__1() {
|
||||
fxt.Init__b085();
|
||||
fxt.Test__b085(1, 0, "!");
|
||||
fxt.Test__b085(1, 84, "u");
|
||||
fxt.Test__b085(1, 85, "{\"!");
|
||||
fxt.Test__b085(1, 7225, "|\"!!");
|
||||
fxt.Test__b085(1, 614125, "}\"!!!");
|
||||
fxt.Test__b085(1, 52200625, "~\"!!!!");
|
||||
}
|
||||
@Test public void B085__reqd__2() {
|
||||
fxt.Init__b085();
|
||||
fxt.Test__b085(2, 0, "!!");
|
||||
fxt.Test__b085(2, 84, "!u");
|
||||
fxt.Test__b085(2, 85, "\"!");
|
||||
fxt.Test__b085(2, 7225, "|\"!!");
|
||||
fxt.Test__b085(2, 614125, "}\"!!!");
|
||||
fxt.Test__b085(2, 52200625, "~\"!!!!");
|
||||
}
|
||||
}
|
||||
class Xoh_hzip_int_fxt {
|
||||
private final Bry_bfr bfr = Bry_bfr.new_();
|
||||
private final gplx.core.primitives.Int_obj_ref count_ref = gplx.core.primitives.Int_obj_ref.neg1_();
|
||||
private final Xoh_hzip_int hzint = new Xoh_hzip_int();
|
||||
public void Init__b256() {hzint.Mode_is_b256_(Bool_.Y);}
|
||||
public void Init__b085() {hzint.Mode_is_b256_(Bool_.N);}
|
||||
public void Test__b256(int reqd, int val, int... expd_ints) {
|
||||
hzint.Encode(reqd, bfr, val);
|
||||
byte[] actl = bfr.To_bry_and_clear();
|
||||
byte[] expd = Byte_.Ary_by_ints(expd_ints);
|
||||
Tfds.Eq_ary(expd, actl, Int_.To_str(val));
|
||||
Tfds.Eq(val, hzint.Decode(reqd, actl, actl.length, 0, count_ref));
|
||||
}
|
||||
public void Test__b085(int reqd, int val, String expd) {
|
||||
hzint.Encode(reqd, bfr, val);
|
||||
byte[] actl = bfr.To_bry_and_clear();
|
||||
Tfds.Eq(expd, String_.new_u8(actl));
|
||||
Tfds.Eq(val, hzint.Decode(reqd, actl, actl.length, 0, count_ref));
|
||||
}
|
||||
}
|
||||
@@ -16,43 +16,57 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.btries.*; import gplx.xowa.wikis.ttls.*;
|
||||
import gplx.langs.htmls.parsers.*;
|
||||
import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
public class Xoh_hzip_mgr {
|
||||
private final Xoh_hdoc_parser hdoc_parser = new Xoh_hdoc_parser(new Xoh_hdoc_wkr__hzip());
|
||||
import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.btries.*; import gplx.core.threads.poolables.*;
|
||||
import gplx.langs.htmls.parsers.*; import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import gplx.xowa.wikis.ttls.*;
|
||||
public class Xoh_hzip_mgr implements Xoh_hzip_wkr {
|
||||
private final Xoh_hdoc_wkr hdoc_wkr = new Xoh_hdoc_wkr__hzip();
|
||||
private final Xoh_hdoc_parser hdoc_parser;
|
||||
private final Bry_rdr rdr = new Bry_rdr().Dflt_dlm_(Xoh_hzip_dict_.Escape);
|
||||
public Xoh_hzip_mgr() {this.hdoc_parser = new Xoh_hdoc_parser(hdoc_wkr);}
|
||||
public String Key() {return "root";}
|
||||
public byte[] Hook() {return hook;} private byte[] hook;
|
||||
public Xoh_hdoc_ctx Hctx() {return hctx;} private final Xoh_hdoc_ctx hctx = new Xoh_hdoc_ctx();
|
||||
public void Init_by_app(Xoa_app app) {hctx.Init_by_app(app);}
|
||||
public void Encode(Bry_bfr bfr, Xow_wiki wiki, Xoh_page hpg, byte[] src) {
|
||||
public byte[] Encode_as_bry(Xoh_hzip_bfr bfr, Xow_wiki wiki, Xoh_page hpg, byte[] src) {Encode(bfr, wiki, hpg, src); return bfr.To_bry_and_clear();}
|
||||
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {throw Err_.new_unimplemented();}
|
||||
public void Encode(Xoh_hzip_bfr bfr, Xow_wiki wiki, Xoh_page hpg, byte[] src) {
|
||||
hctx.Init_by_page(wiki, hpg.Url_bry_safe());
|
||||
hdoc_parser.Parse(bfr, hpg, hctx, src);
|
||||
}
|
||||
public void Decode(Bry_bfr bfr, Xow_wiki wiki, Xoh_page hpg, byte[] src) {
|
||||
byte[] page_url = hpg.Url_bry_safe();
|
||||
byte[] page_url = hpg.Url_bry_safe(); int src_len = src.length;
|
||||
hctx.Init_by_page(wiki, page_url);
|
||||
int pos = 0, txt_bgn = -1, src_len = src.length;
|
||||
rdr.Init_by_page(page_url, src, src_len);
|
||||
while (pos < src_len) {
|
||||
if (src[pos] == Xoh_hzip_dict_.Escape) {
|
||||
if (txt_bgn != -1) {bfr.Add_mid(src, txt_bgn, pos); txt_bgn = -1;} // handle pending txt
|
||||
int nxt_pos = pos + 1; if (nxt_pos == src_len) break; // handle escape at end of document
|
||||
Xoh_hzip_wkr wkr = hctx.Mkr().Hzip__wkr(src[nxt_pos]);
|
||||
try {
|
||||
rdr.Init_by_hook(wkr.Key(), pos, pos + 2);
|
||||
wkr.Decode(bfr, Bool_.Y, hctx, hpg, rdr, src, pos);
|
||||
pos = rdr.Pos();
|
||||
} catch (Exception e) {
|
||||
wkr.Pool__rls();
|
||||
gplx.langs.htmls.Html_utl.Log(e, "hzip decode failed", hpg.Url_bry_safe(), src, pos);
|
||||
pos += 2; // 2: skip escape and hook
|
||||
}
|
||||
}
|
||||
else {
|
||||
Decode(bfr, hdoc_wkr, hctx, hpg, Bool_.Y, rdr, src, 0, src_len);
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
|
||||
int pos = src_bgn, txt_bgn = -1;
|
||||
while (true) {
|
||||
if (pos == src_end) break;
|
||||
byte b = src[pos];
|
||||
Object o = hctx.Wkr_mkr().Get(b, src, pos, src_end);
|
||||
if (o == null) {
|
||||
if (txt_bgn == -1) txt_bgn = pos;
|
||||
++pos;
|
||||
}
|
||||
else {
|
||||
if (txt_bgn != -1) {bfr.Add_mid(src, txt_bgn, pos); txt_bgn = -1;} // handle pending txt
|
||||
Xoh_hzip_wkr wkr = (Xoh_hzip_wkr)o;
|
||||
try {
|
||||
rdr.Init_by_sect(wkr.Key(), pos, pos + 2);
|
||||
wkr.Decode(bfr, hdoc_wkr, hctx, hpg, Bool_.Y, rdr, src, pos, src_end);
|
||||
pos = rdr.Pos();
|
||||
} catch (Exception e) {
|
||||
gplx.langs.htmls.Html_utl.Log(e, "hzip decode failed", hpg.Url_bry_safe(), src, pos);
|
||||
pos += 2; // 2: skip escape and hook
|
||||
}
|
||||
finally {wkr.Pool__rls();}
|
||||
}
|
||||
}
|
||||
if (txt_bgn != -1) bfr.Add_mid(src, txt_bgn, src_len);
|
||||
}
|
||||
if (txt_bgn != -1) bfr.Add_mid(src, txt_bgn, src_end);
|
||||
return src_end;
|
||||
}
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_hzip_mgr rv = new Xoh_hzip_mgr(); rv.pool_mgr = mgr; rv.pool_idx = idx; rv.hook = (byte[])args[0]; return rv;}
|
||||
}
|
||||
|
||||
@@ -16,9 +16,11 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.core.brys.*;
|
||||
import gplx.core.brys.*; import gplx.core.threads.poolables.*;
|
||||
import gplx.xowa.htmls.core.wkrs.*;
|
||||
public interface Xoh_hzip_wkr extends gplx.core.threads.poolables.Gfo_poolable_itm {
|
||||
String Key();
|
||||
int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx ctx, Xoh_page hpg, Bry_rdr parser, byte[] src, int hook_bgn);
|
||||
byte[] Hook();
|
||||
Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj);
|
||||
int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.core.btries.*; import gplx.core.threads.poolables.*;
|
||||
import gplx.xowa.htmls.core.wkrs.escapes.*;
|
||||
import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.lnkes.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
|
||||
import gplx.xowa.htmls.core.wkrs.imgs.*; import gplx.xowa.htmls.core.wkrs.thms.*; import gplx.xowa.htmls.core.wkrs.glys.*;
|
||||
public class Xoh_hzip_wkr_mgr {
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
private Gfo_poolable_mgr mkr__escape, mkr__lnke, mkr__lnki, mkr__hdr, mkr__img, mkr__thm, mkr__gly;
|
||||
public Xoh_escape_hzip Mw__escape() {return (Xoh_escape_hzip) mkr__escape.Get_fast();}
|
||||
public Xoh_hdr_hzip Mw__hdr() {return (Xoh_hdr_hzip) mkr__hdr.Get_fast();}
|
||||
public Xoh_lnke_hzip Mw__lnke() {return (Xoh_lnke_hzip) mkr__lnke.Get_fast();}
|
||||
public Xoh_lnki_hzip Mw__lnki() {return (Xoh_lnki_hzip) mkr__lnki.Get_fast();}
|
||||
public Xoh_img_hzip Mw__img() {return (Xoh_img_hzip) mkr__img.Get_fast();}
|
||||
public Xoh_thm_hzip Mw__thm() {return (Xoh_thm_hzip) mkr__thm.Get_fast();}
|
||||
public Xoh_gly_hzip Mw__gly() {return (Xoh_gly_hzip) mkr__gly.Get_fast();}
|
||||
public void Init() {
|
||||
this.Reg_all(false, Byte_ascii.Escape);
|
||||
}
|
||||
public Xoh_hzip_wkr Get(byte b, byte[] src, int src_bgn, int src_end) {
|
||||
Object mgr_obj = trie.Match_bgn_w_byte(b, src, src_bgn, src_end); if (mgr_obj == null) return null;
|
||||
Gfo_poolable_mgr mgr = (Gfo_poolable_mgr)mgr_obj;
|
||||
return (Xoh_hzip_wkr)mgr.Get_fast();
|
||||
}
|
||||
private void Reg_all(boolean mode_is_b256, int escape__mw) {
|
||||
mkr__escape = Reg(New_hook_len2(mode_is_b256, escape__mw, 84), new Xoh_escape_hzip());
|
||||
mkr__hdr = Reg(New_hook_len2(mode_is_b256, escape__mw, 1), new Xoh_hdr_hzip());
|
||||
mkr__lnke = Reg(New_hook_len2(mode_is_b256, escape__mw, 2), new Xoh_lnke_hzip());
|
||||
mkr__lnki = Reg(New_hook_len2(mode_is_b256, escape__mw, 3), new Xoh_lnki_hzip());
|
||||
mkr__img = Reg(New_hook_len2(mode_is_b256, escape__mw, 4), new Xoh_img_hzip());
|
||||
mkr__thm = Reg(New_hook_len2(mode_is_b256, escape__mw, 5), new Xoh_thm_hzip());
|
||||
mkr__gly = Reg(New_hook_len2(mode_is_b256, escape__mw, 6), new Xoh_gly_hzip());
|
||||
}
|
||||
private Gfo_poolable_mgr Reg(byte[] hook, Gfo_poolable_itm proto) {
|
||||
Gfo_poolable_mgr rv = Gfo_poolable_mgr_.New(1, 32, proto, Object_.Ary(hook));
|
||||
trie.Add_obj(hook, rv);
|
||||
return rv;
|
||||
}
|
||||
private static byte[] New_hook_len2(boolean mode_is_b256, int b0, int b1) {
|
||||
return Bry_.new_ints(b0, mode_is_b256 ? b1 : b1 + Byte_ascii.Bang);
|
||||
}
|
||||
}
|
||||
@@ -16,13 +16,14 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
public class Hzip_stat_itm {
|
||||
import gplx.xowa.htmls.core.wkrs.lnkes.*;
|
||||
public class Xoh_stat_itm {
|
||||
public void Clear() {
|
||||
a_rhs = lnki_text_n = lnki_text_y = lnke__free = lnke__auto = lnke__text = 0;
|
||||
hdr_1 = hdr_2 = hdr_3 = hdr_4 = hdr_5 = hdr_6 = 0;
|
||||
img_full = 0;
|
||||
space = 0;
|
||||
escape = 0;
|
||||
Bry_.Clear(escape_bry);
|
||||
}
|
||||
public int A_rhs() {return a_rhs;} public void A_rhs_add() {++a_rhs;} private int a_rhs;
|
||||
public int Lnki_text_n() {return lnki_text_n;} public void Lnki_text_n_add() {++lnki_text_n;} private int lnki_text_n;
|
||||
@@ -38,7 +39,7 @@ public class Hzip_stat_itm {
|
||||
public int Hdr_5() {return hdr_5;} private int hdr_5;
|
||||
public int Hdr_6() {return hdr_6;} private int hdr_6;
|
||||
public int Space() {return space;} public void Space_add(int v) {space += v;} private int space;
|
||||
public int Escape() {return escape;} public void Escape_add_one() {++escape;} private int escape;
|
||||
public byte[] Escape_bry() {return escape_bry;} public void Escape_add(byte v) {escape_bry[v] += 1;} private final byte[] escape_bry = new byte[256];
|
||||
public void Hdr_add(int hdr_num) {
|
||||
switch (hdr_num) {
|
||||
case 1: ++hdr_1; break;
|
||||
@@ -50,4 +51,13 @@ public class Hzip_stat_itm {
|
||||
default: throw Err_.new_unhandled(hdr_num);
|
||||
}
|
||||
}
|
||||
public void Lnki_add(int orig_len, int hzip_len, int flag) {
|
||||
}
|
||||
public void Lnke_add(byte lnke_type) {
|
||||
switch (lnke_type) {
|
||||
case Xoh_lnke_dict_.Type__free: ++lnke__free; break;
|
||||
case Xoh_lnke_dict_.Type__auto: ++lnke__auto; break;
|
||||
case Xoh_lnke_dict_.Type__text: ++lnke__text; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.dbs.*; import gplx.dbs.engines.sqlite.*; import gplx.xowa.htmls.core.makes.imgs.*; import gplx.xowa.htmls.core.hzips.*;
|
||||
import gplx.xowa.wikis.pages.*;
|
||||
public class Hzip_stat_tbl implements RlsAble {
|
||||
public class Xoh_stat_tbl implements Rls_able {
|
||||
private static final String tbl_name = "hdump_stats"; private static final Db_meta_fld_list flds = Db_meta_fld_list.new_();
|
||||
private static final String
|
||||
fld_page_id = flds.Add_int_pkey("page_id"), fld_wtxt_len = flds.Add_int("wtxt_len"), fld_row_orig_len = flds.Add_int("row_orig_len"), fld_row_zip_len = flds.Add_int("row_zip_len")
|
||||
@@ -30,7 +30,7 @@ public class Hzip_stat_tbl implements RlsAble {
|
||||
, fld_img_full = flds.Add_int("img_full")
|
||||
;
|
||||
private final Db_conn conn; private Db_stmt stmt_insert;
|
||||
public Hzip_stat_tbl(Db_conn conn) {
|
||||
public Xoh_stat_tbl(Db_conn conn) {
|
||||
this.conn = conn;
|
||||
this.Create_tbl();
|
||||
conn.Stmt_delete(tbl_name).Exec_delete(); // always zap table
|
||||
@@ -40,7 +40,7 @@ public class Hzip_stat_tbl implements RlsAble {
|
||||
public void Rls() {
|
||||
stmt_insert = Db_stmt_.Rls(stmt_insert);
|
||||
}
|
||||
public void Insert(Xoh_page hpg, Hzip_stat_itm hzip, int wtxt_len, int row_orig_len, int row_zip_len) {
|
||||
public void Insert(Xoh_page hpg, Xoh_stat_itm hzip, int wtxt_len, int row_orig_len, int row_zip_len) {
|
||||
Xopg_module_mgr js_mgr = hpg.Head_mgr();
|
||||
if (stmt_insert == null) stmt_insert = conn.Stmt_insert(tbl_name, flds);
|
||||
stmt_insert.Clear()
|
||||
@@ -23,7 +23,7 @@ import gplx.xowa.wikis.domains.*; import gplx.xowa.wikis.ttls.*; import gplx.xow
|
||||
import gplx.xowa.htmls.core.wkrs.*;
|
||||
import gplx.xowa.htmls.core.hzips.*;
|
||||
public class Xoh_make_mgr {
|
||||
private final Bry_bfr bfr = Bry_bfr.reset_(255), tmp_bfr = Bry_bfr.reset_(255); private final Bry_rdr_old bry_rdr = new Bry_rdr_old(); private Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Instance;
|
||||
private final Xoh_hzip_bfr bfr = Xoh_hzip_bfr.New_txt(255); private final Bry_bfr tmp_bfr = Bry_bfr.reset_(255); private final Bry_rdr_old bry_rdr = new Bry_rdr_old(); private Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Instance;
|
||||
private Xoh_cfg_file cfg_file; private final Xof_url_bldr url_bldr = Xof_url_bldr.new_v2(); private Xoh_file_html_fmtr__base html_fmtr;
|
||||
private final byte[] root_dir, file_dir; private byte[] file_dir_comm, file_dir_wiki, hiero_img_dir; private final byte[] wiki_domain;
|
||||
private final Bry_rdr parser = new Bry_rdr();
|
||||
|
||||
@@ -16,7 +16,8 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.xowa.htmls.core.wkrs.mkrs.*; import gplx.xowa.htmls.core.hzips.*;
|
||||
import gplx.xowa.apps.urls.*;
|
||||
import gplx.xowa.htmls.core.hzips.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.apps.fsys.*;
|
||||
import gplx.xowa.wikis.domains.*; import gplx.xowa.wikis.ttls.*; import gplx.xowa.apps.metas.*;
|
||||
public class Xoh_hdoc_ctx {
|
||||
@@ -27,29 +28,34 @@ public class Xoh_hdoc_ctx {
|
||||
public Xoa_app App() {return app;} private Xoa_app app;
|
||||
public byte[] Wiki__domain_bry() {return wiki__domain_bry;} private byte[] wiki__domain_bry;
|
||||
public Xow_ttl_parser Wiki__ttl_parser() {return wiki__ttl_parser;} private Xow_ttl_parser wiki__ttl_parser;
|
||||
public Xoa_url_parser Wiki__url_parser() {return wiki__url_parser;} private Xoa_url_parser wiki__url_parser;
|
||||
public boolean Xwiki_mgr__missing(byte[] domain){return app.Xwiki_mgr__missing(domain);}
|
||||
public Xoa_file_mgr File__mgr() {return file__mgr;} private final Xoa_file_mgr file__mgr = new Xoa_file_mgr();
|
||||
public Xof_url_bldr File__url_bldr() {return file__url_bldr;} private Xof_url_bldr file__url_bldr = new Xof_url_bldr();
|
||||
public Xoh_hdoc_mkr Mkr() {return mkr;} private Xoh_hdoc_mkr mkr = new Xoh_hdoc_mkr();
|
||||
public byte[] Page__url() {return page__url;} private byte[] page__url;
|
||||
public Hzip_stat_itm Bicode__stat() {return bicode__stat;} private final Hzip_stat_itm bicode__stat = new Hzip_stat_itm();
|
||||
public Xof_url_bldr File__url_bldr() {return file__url_bldr;} private final Xof_url_bldr file__url_bldr = new Xof_url_bldr();
|
||||
public Xoh_hzip_wkr_mgr Wkr_mkr() {return wkr_mgr;} private final Xoh_hzip_wkr_mgr wkr_mgr = new Xoh_hzip_wkr_mgr();
|
||||
public byte[] Page__url() {return page__url;} private byte[] page__url;
|
||||
public Xoh_stat_itm Hzip__stat() {return hzip__stat;} private final Xoh_stat_itm hzip__stat = new Xoh_stat_itm();
|
||||
public int Lnki__uid__nxt() {return ++lnki__uid;} private int lnki__uid; // NOTE: should be 0, but for historical reasons, 1st lnki starts at 2; EX: id='xowa_lnki_2'
|
||||
public boolean Mode_is_diff() {return mode_is_diff;} private boolean mode_is_diff; public void Mode_is_diff_(boolean v) {mode_is_diff = v;}
|
||||
public void Init_by_app(Xoa_app app) {
|
||||
Xoa_fsys_mgr fsys_mgr = app.Fsys_mgr();
|
||||
this.app = app;
|
||||
this.fsys__root = fsys_mgr.Root_dir().To_http_file_bry();
|
||||
this.fsys__file = fsys_mgr.File_dir().To_http_file_bry();
|
||||
this.fsys__file__comm = Bry_.Add(fsys__file, Xow_domain_itm_.Bry__commons, Byte_ascii.Slash_bry);
|
||||
wkr_mgr.Init();
|
||||
}
|
||||
public void Init_by_page(Xow_wiki wiki, byte[] page_url) {
|
||||
if (fsys__root == null) Init_by_app(wiki.App()); // LAZY INIT
|
||||
this.wiki__url_parser = wiki.Utl__url_parser();
|
||||
this.wiki__ttl_parser = wiki;
|
||||
this.wiki__domain_bry = wiki.Domain_bry();
|
||||
this.fsys__file__wiki = Bry_.Add(fsys__file, wiki__domain_bry, Byte_ascii.Slash_bry);
|
||||
this.page__url = page_url;
|
||||
this.Clear();
|
||||
}
|
||||
}
|
||||
private void Clear() {
|
||||
bicode__stat.Clear();
|
||||
hzip__stat.Clear();
|
||||
this.lnki__uid = 1; // NOTE: should be 0, but for historical reasons, 1st lnki starts at 2; EX: id='xowa_lnki_2'
|
||||
}
|
||||
public static final int Invalid = -1;
|
||||
|
||||
@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.langs.htmls.parsers.*; import gplx.xowa.wikis.ttls.*;
|
||||
import gplx.xowa.htmls.core.makes.*; import gplx.xowa.htmls.core.wkrs.tags.*; import gplx.xowa.htmls.core.wkrs.txts.*; import gplx.xowa.htmls.core.wkrs.escapes.*; import gplx.xowa.htmls.core.wkrs.spaces.*;
|
||||
import gplx.xowa.htmls.core.makes.*; import gplx.xowa.htmls.core.wkrs.tags.*; import gplx.xowa.htmls.core.wkrs.txts.*; import gplx.xowa.htmls.core.wkrs.escapes.*;
|
||||
public class Xoh_hdoc_parser {
|
||||
private final Xoh_hdoc_wkr hdoc_wkr;
|
||||
private final Html_doc_parser hdoc_parser;
|
||||
@@ -27,11 +27,10 @@ public class Xoh_hdoc_parser {
|
||||
this.tag_parser = new Xoh_tag_parser(hdoc_wkr);
|
||||
this.hdoc_parser = new Html_doc_parser(new Xoh_txt_parser(hdoc_wkr)
|
||||
, tag_parser
|
||||
, new Xoh_escape_parser(hdoc_wkr)
|
||||
, new Xoh_space_parser(hdoc_wkr)
|
||||
, new Xoh_escape_data(hdoc_wkr)
|
||||
);
|
||||
}
|
||||
public void Parse(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src) {
|
||||
public void Parse(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src) {
|
||||
int src_len = src.length;
|
||||
tag_parser.Init(hctx, src, 0, src_len);
|
||||
hdoc_wkr.On_new_page(bfr, hpg, hctx, src, 0, src_len);
|
||||
|
||||
@@ -19,14 +19,13 @@ package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gpl
|
||||
import gplx.langs.htmls.parsers.*;
|
||||
import gplx.xowa.wikis.ttls.*;
|
||||
public interface Xoh_hdoc_wkr {
|
||||
void On_new_page(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end);
|
||||
void On_new_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end);
|
||||
void On_escape (int rng_bgn, int rng_end);
|
||||
void On_txt (int rng_bgn, int rng_end);
|
||||
void On_space (int rng_bgn, int rng_end);
|
||||
void On_hdr (gplx.xowa.htmls.core.wkrs.hdrs.Xoh_hdr_parser arg);
|
||||
void On_lnke (gplx.xowa.htmls.core.wkrs.lnkes.Xoh_lnke_parser arg);
|
||||
void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_parser arg);
|
||||
void On_img (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_parser arg);
|
||||
void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_parser arg);
|
||||
void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_parser arg);
|
||||
void On_hdr (gplx.xowa.htmls.core.wkrs.hdrs.Xoh_hdr_parser parser);
|
||||
void On_lnke (gplx.xowa.htmls.core.wkrs.lnkes.Xoh_lnke_parser parser);
|
||||
void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_parser parser);
|
||||
void On_img (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_parser parser);
|
||||
void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_parser parser);
|
||||
void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_parser parser);
|
||||
}
|
||||
|
||||
@@ -19,19 +19,19 @@ package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gpl
|
||||
import gplx.langs.htmls.parsers.*; import gplx.xowa.htmls.core.hzips.*;
|
||||
import gplx.xowa.wikis.ttls.*;
|
||||
public class Xoh_hdoc_wkr__hzip implements Xoh_hdoc_wkr {
|
||||
private final Hzip_stat_itm stat_itm = new Hzip_stat_itm();
|
||||
private Bry_bfr bfr; private Xoh_hdoc_ctx hctx; private byte[] src; private int src_end;
|
||||
public void On_new_page(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
|
||||
this.bfr = bfr; this.hctx = hctx; this.src = src; this.src_end = src_end;
|
||||
private final Xoh_stat_itm stat_itm = new Xoh_stat_itm();
|
||||
private Xoh_hzip_bfr bfr; private Xoh_hdoc_ctx hctx; private byte[] src;
|
||||
private Xoh_page hpg;
|
||||
public void On_new_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
|
||||
this.bfr = bfr; this.hpg = hpg; this.hctx = hctx; this.src = src;
|
||||
stat_itm.Clear();
|
||||
}
|
||||
public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
|
||||
public void On_escape (int rng_bgn, int rng_end) {hctx.Mkr().Escape__hzip().Encode(bfr, stat_itm).Pool__rls();}
|
||||
public void On_space (int rng_bgn, int rng_end) {hctx.Mkr().Space__hzip().Encode(bfr, stat_itm, src, src_end, rng_bgn, rng_end).Pool__rls();}
|
||||
public void On_hdr (gplx.xowa.htmls.core.wkrs.hdrs.Xoh_hdr_parser arg) {hctx.Mkr().Hdr__hzip().Encode(bfr, stat_itm, src, arg).Pool__rls();}
|
||||
public void On_lnke (gplx.xowa.htmls.core.wkrs.lnkes.Xoh_lnke_parser arg) {hctx.Mkr().Lnke__hzip().Encode(bfr, stat_itm, src, arg).Pool__rls();}
|
||||
public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_parser arg) {hctx.Mkr().Lnki__hzip().Encode(bfr, hctx, stat_itm, src, arg).Pool__rls();}
|
||||
public void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_parser arg) {hctx.Mkr().Thm__hzip().Encode(bfr, this, stat_itm, src, arg).Pool__rls();}
|
||||
public void On_img (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_parser arg) {hctx.Mkr().Img__hzip().Encode(bfr, stat_itm, src, arg, Bool_.Y).Pool__rls();}
|
||||
public void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_parser arg) {hctx.Mkr().Gly__hzip().Encode(bfr, stat_itm, src, arg).Pool__rls();}
|
||||
public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
|
||||
public void On_escape (int rng_bgn, int rng_end) {hctx.Wkr_mkr().Mw__escape().Encode(bfr, this, hctx, hpg, Bool_.Y, src, null).Pool__rls();}
|
||||
public void On_hdr (gplx.xowa.htmls.core.wkrs.hdrs.Xoh_hdr_parser parser) {hctx.Wkr_mkr().Mw__hdr().Encode(bfr, this, hctx, hpg, Bool_.Y, src, parser).Pool__rls();}
|
||||
public void On_lnke (gplx.xowa.htmls.core.wkrs.lnkes.Xoh_lnke_parser parser) {hctx.Wkr_mkr().Mw__lnke().Encode(bfr, this, hctx, hpg, Bool_.Y, src, parser).Pool__rls();}
|
||||
public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_parser parser) {hctx.Wkr_mkr().Mw__lnki().Encode(bfr, this, hctx, hpg, Bool_.Y, src, parser).Pool__rls();}
|
||||
public void On_img (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_parser parser) {hctx.Wkr_mkr().Mw__img().Encode(bfr, this, hctx, hpg, Bool_.Y, src, parser).Pool__rls();}
|
||||
public void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_parser parser) {hctx.Wkr_mkr().Mw__thm().Encode(bfr, this, hctx, hpg, Bool_.Y, src, parser).Pool__rls();}
|
||||
public void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_parser parser) {hctx.Wkr_mkr().Mw__gly().Encode(bfr, this, hctx, hpg, Bool_.Y, src, parser).Pool__rls();}
|
||||
}
|
||||
|
||||
@@ -21,19 +21,18 @@ import gplx.xowa.wikis.ttls.*;
|
||||
import gplx.xowa.htmls.core.hzips.*;
|
||||
import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*;
|
||||
public class Xoh_hdoc_wkr__make implements Xoh_hdoc_wkr {
|
||||
private Bry_bfr bfr; private Xoh_page hpg; private Xoh_hdoc_ctx hctx; private byte[] src;
|
||||
private Xoh_hzip_bfr bfr; private Xoh_page hpg; private Xoh_hdoc_ctx hctx; private byte[] src;
|
||||
private final Xoh_hdr_make wkr__hdr = new Xoh_hdr_make();
|
||||
private final Xoh_img_bldr wkr__img = new Xoh_img_bldr();
|
||||
public void On_new_page(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
|
||||
public void On_new_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
|
||||
this.bfr = bfr; this.hpg = hpg; this.hctx = hctx; this.src = src;
|
||||
}
|
||||
public void On_escape (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
|
||||
public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
|
||||
public void On_space (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
|
||||
public void On_lnke (gplx.xowa.htmls.core.wkrs.lnkes.Xoh_lnke_parser arg) {bfr.Add_mid(src, arg.Rng_bgn(), arg.Rng_end());}
|
||||
public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_parser arg) {bfr.Add_mid(src, arg.Rng_bgn(), arg.Rng_end());}
|
||||
public void On_hdr (gplx.xowa.htmls.core.wkrs.hdrs.Xoh_hdr_parser arg) {wkr__hdr.Make(bfr, hpg, src, arg);}
|
||||
public void On_img (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_parser arg) {wkr__img.Make_by_parse(bfr, hpg, hctx, src, arg);}
|
||||
public void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_parser arg) {bfr.Add_mid(src, arg.Rng_bgn(), arg.Rng_end());}
|
||||
public void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_parser arg) {}
|
||||
public void On_escape (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
|
||||
public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
|
||||
public void On_lnke (gplx.xowa.htmls.core.wkrs.lnkes.Xoh_lnke_parser parser) {bfr.Add_mid(src, parser.Src_bgn(), parser.Src_end());}
|
||||
public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_parser parser) {bfr.Add_mid(src, parser.Src_bgn(), parser.Src_end());}
|
||||
public void On_hdr (gplx.xowa.htmls.core.wkrs.hdrs.Xoh_hdr_parser parser) {wkr__hdr.Make(bfr, hpg, src, parser);}
|
||||
public void On_img (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_parser parser) {wkr__img.Make_by_parse(bfr, hpg, hctx, src, parser);}
|
||||
public void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_parser parser) {bfr.Add_mid(src, parser.Src_bgn(), parser.Src_end());}
|
||||
public void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_parser parser) {}
|
||||
}
|
||||
|
||||
38
400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_hzip_bfr.java
Normal file
38
400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_hzip_bfr.java
Normal file
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.xowa.htmls.core.hzips.*;
|
||||
public class Xoh_hzip_bfr extends Bry_bfr { private final Xoh_hzip_int hzint = new Xoh_hzip_int();
|
||||
private final byte stop_byte;
|
||||
public Xoh_hzip_bfr(int bfr_max, boolean mode_is_b256, byte stop_byte) {
|
||||
this.Init(bfr_max);
|
||||
this.stop_byte = stop_byte;
|
||||
Mode_is_b256_(mode_is_b256);
|
||||
}
|
||||
public Xoh_hzip_bfr Mode_is_b256_(boolean mode_is_b256) {
|
||||
hzint.Mode_is_b256_(mode_is_b256);
|
||||
return this;
|
||||
}
|
||||
public Xoh_hzip_bfr Add_hzip_bry(byte[] bry) {Add(bry); Add_byte(stop_byte); return this;}
|
||||
public Xoh_hzip_bfr Add_hzip_mid(byte[] src, int bgn, int end) {Add_mid(src, bgn, end); Add_byte(stop_byte); return this;}
|
||||
public Xoh_hzip_bfr Add_hzip_int(int reqd, int val) {
|
||||
hzint.Encode(reqd, this, val);
|
||||
return this;
|
||||
}
|
||||
public static Xoh_hzip_bfr New_txt(int bfr_max) {return new Xoh_hzip_bfr(bfr_max, Bool_.N, gplx.xowa.htmls.core.hzips.Xoh_hzip_dict_.Escape);}
|
||||
}
|
||||
@@ -15,24 +15,29 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.langs.htmls.*;
|
||||
import gplx.xowa.htmls.core.hzips.*;
|
||||
public class Xoh_hzip_fxt {
|
||||
private final Xowe_wiki wiki;
|
||||
private final Xop_fxt parser_fxt = new Xop_fxt();
|
||||
private final Bry_bfr bfr = Bry_bfr.new_();
|
||||
private final Xoh_hzip_bfr bfr = Xoh_hzip_bfr.New_txt(32);
|
||||
private final Xoh_hzip_mgr hzip_mgr;
|
||||
private final Xoh_page hpg = new Xoh_page();
|
||||
private boolean mode_is_b256;
|
||||
public Xoh_hzip_fxt() {
|
||||
Xowe_wiki wiki = parser_fxt.Wiki();
|
||||
this.wiki = parser_fxt.Wiki();
|
||||
Xoa_app_fxt.repo2_(parser_fxt.App(), wiki); // needed else will be old "mem/wiki/repo/trg/thumb/" instead of standard "mem/file/en.wikipedia.org/thumb/"
|
||||
wiki.Html__hdump_mgr().Init_by_db(parser_fxt.Wiki());
|
||||
this.hzip_mgr = parser_fxt.Wiki().Html__hdump_mgr().Hzip_mgr();
|
||||
hpg.Init(wiki, Xoa_url.blank(), parser_fxt.Wiki().Ttl_parse(Xoa_page_.Main_page_bry), 1);
|
||||
}
|
||||
public Xow_wiki Wiki() {return parser_fxt.Wiki();}
|
||||
public Xow_wiki Wiki() {return wiki;}
|
||||
public Xoh_hzip_fxt Init_mode_is_b256_(boolean v) {bfr.Mode_is_b256_(v); mode_is_b256 = v; return this;}
|
||||
public Xoh_hzip_fxt Init_mode_diff_y_() {hzip_mgr.Hctx().Mode_is_diff_(Bool_.Y); return this;}
|
||||
public void Clear() {hpg.Clear();}
|
||||
public Xowe_wiki Prep_create_wiki(String alias, String domain) {
|
||||
public void Init_wiki_installed(String domain) {parser_fxt.Init_xwiki_add_user_(domain);}
|
||||
public Xowe_wiki Init_wiki_alias(String alias, String domain) {
|
||||
Xowe_wiki rv = Xoa_app_fxt.wiki_(parser_fxt.App(), domain);
|
||||
parser_fxt.Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_u8(alias), Bry_.new_u8(domain), null);
|
||||
return rv;
|
||||
@@ -74,7 +79,7 @@ public class Xoh_hzip_fxt {
|
||||
}
|
||||
public void Exec_write_to_fsys(Io_url dir, String fil) {
|
||||
try {
|
||||
Bry_bfr bfr = Bry_bfr.new_();
|
||||
Xoh_hzip_bfr bfr = Xoh_hzip_bfr.New_txt(32).Mode_is_b256_(mode_is_b256);
|
||||
Gfo_usr_dlg_.Test__show__init();
|
||||
hzip_mgr.Encode(bfr, parser_fxt.Wiki(), hpg, Io_mgr.Instance.LoadFilBry(dir.GenSubFil(fil)));
|
||||
Gfo_usr_dlg_.Test__show__term();
|
||||
@@ -82,7 +87,9 @@ public class Xoh_hzip_fxt {
|
||||
Io_mgr.Instance.SaveFilBry(dir.GenSubFil(fil).GenNewExt(".hzip.html"), actl);
|
||||
Gfo_usr_dlg_.Test__show__init();
|
||||
gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_hzip.Md5_depth = 4;
|
||||
hzip_mgr.Hctx().Mode_is_diff_(Bool_.Y);
|
||||
hzip_mgr.Decode(bfr, parser_fxt.Wiki(), hpg, actl);
|
||||
hzip_mgr.Hctx().Mode_is_diff_(Bool_.N);
|
||||
gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_hzip.Md5_depth = 2;
|
||||
Gfo_usr_dlg_.Test__show__term();
|
||||
Io_mgr.Instance.SaveFilBry(dir.GenSubFil(fil).GenNewExt(".hzip.decode.html"), bfr.To_bry_and_clear());
|
||||
@@ -18,9 +18,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
|
||||
import gplx.core.brys.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
public abstract class Xoh_itm_parser_fxt_base {
|
||||
private final Bry_rdr rdr = new Bry_rdr();
|
||||
public abstract class Xoh_itm_parser_fxt {
|
||||
private final Xoae_app app;
|
||||
private final Xowe_wiki wiki;
|
||||
private final Bry_err_wkr err_wkr = new Bry_err_wkr();
|
||||
protected byte[] src; protected int src_len;
|
||||
protected final Xoh_hdoc_ctx hctx = new Xoh_hdoc_ctx();
|
||||
public Xoh_itm_parser_fxt() {
|
||||
this.app = Xoa_app_fxt.app_();
|
||||
this.wiki = Xoa_app_fxt.wiki_tst_(app);
|
||||
hctx.Init_by_app(app);
|
||||
}
|
||||
private Xoh_itm_parser Parser() {return Parser_get();}
|
||||
public abstract Xoh_itm_parser Parser_get();
|
||||
public void Test__parse__fail(String src_str, String expd) {
|
||||
@@ -32,8 +40,9 @@ public abstract class Xoh_itm_parser_fxt_base {
|
||||
}
|
||||
public void Exec_parse(String src_str) {
|
||||
this.src = Bry_.new_u8(src_str); this.src_len = src.length;
|
||||
rdr.Init_by_page(Xoa_page_.Main_page_bry, src, src_len);
|
||||
Exec_parse_hook(rdr, 0, src_len);
|
||||
hctx.Init_by_page(wiki, Xoa_page_.Main_page_bry);
|
||||
err_wkr.Init_by_page(Xoa_page_.Main_page_str, src);
|
||||
Exec_parse_hook(err_wkr, hctx, 0, src_len);
|
||||
}
|
||||
public abstract void Exec_parse_hook(Bry_rdr owner_rdr, int src_bgn, int src_end);
|
||||
public abstract void Exec_parse_hook(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, int src_bgn, int src_end);
|
||||
}
|
||||
@@ -18,9 +18,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.htmls.core.wkrs.escapes; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
import gplx.xowa.htmls.core.hzips.*;
|
||||
public class Xoh_escape_parser implements Html_doc_wkr {
|
||||
public class Xoh_escape_data implements Html_doc_wkr {
|
||||
private final Xoh_hdoc_wkr wkr;
|
||||
public Xoh_escape_parser(Xoh_hdoc_wkr wkr) {this.wkr = wkr;}
|
||||
public Xoh_escape_data(Xoh_hdoc_wkr wkr) {this.wkr = wkr;}
|
||||
public byte[] Hook() {return Xoh_hzip_dict_.Escape_bry;}
|
||||
public int Parse(byte[] src, int src_bgn, int src_end, int pos) {
|
||||
int rv = pos + 1;
|
||||
@@ -19,18 +19,24 @@ package gplx.xowa.htmls.core.wkrs.escapes; import gplx.*; import gplx.xowa.*; im
|
||||
import gplx.core.brys.*; import gplx.core.threads.poolables.*;
|
||||
import gplx.xowa.htmls.core.hzips.*;
|
||||
public class Xoh_escape_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
private byte escape_byte;
|
||||
public byte[] Hook() {return hook;} private byte[] hook;
|
||||
public String Key() {return Xoh_hzip_dict_.Key__escape;}
|
||||
public Xoh_escape_hzip Encode(Bry_bfr bfr, Hzip_stat_itm stat_itm) {
|
||||
stat_itm.Escape_add_one();
|
||||
bfr.Add(Xoh_hzip_dict_.Bry__escape);
|
||||
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
|
||||
bfr.Add(hook);
|
||||
hctx.Hzip__stat().Escape_add(escape_byte);
|
||||
return this;
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx ctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
|
||||
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
|
||||
bfr.Add_byte(Xoh_hzip_dict_.Escape);
|
||||
return rdr.Pos();
|
||||
}
|
||||
public int Pool__idx() {return pool_idx;} private int pool_idx;
|
||||
public void Pool__clear (Object[] args) {}
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_escape_hzip rv = new Xoh_escape_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {
|
||||
Xoh_escape_hzip rv = new Xoh_escape_hzip();
|
||||
rv.pool_mgr = mgr; rv.pool_idx = idx;
|
||||
rv.hook = (byte[])args[0];
|
||||
rv.escape_byte = rv.hook[0];
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,33 +23,15 @@ import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.ttls.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.files.repos.*;
|
||||
public class Xoh_gly_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
public String Key() {return Xoh_hzip_dict_.Key__gly;}
|
||||
public Xoh_gly_hzip Encode(Bry_bfr bfr, Hzip_stat_itm stat_itm, byte[] src, Xoh_gly_grp_parser arg) {
|
||||
bfr.Add_mid(src, arg.Rng_bgn(), arg.Rng_end());
|
||||
public byte[] Hook() {return hook;} private byte[] hook;
|
||||
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
|
||||
Xoh_gly_grp_parser data = (Xoh_gly_grp_parser)data_obj;
|
||||
bfr.Add_mid(src, data.Rng_bgn(), data.Rng_end());
|
||||
return this;
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx hctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
|
||||
return hook_bgn;
|
||||
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
|
||||
return src_bgn + 2;
|
||||
}
|
||||
public int Pool__idx() {return pool_idx;} private int pool_idx;
|
||||
public void Pool__clear (Object[] args) {}
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_gly_hzip rv = new Xoh_gly_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
|
||||
// private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_( 1, 1, 1, 1 , 1, 1, 1, 1 , 2, 1, 1, 1 , 1, 2, 2);
|
||||
// private static final int // SERIALIZED
|
||||
// Flag__file__w_diff_from_html = 0
|
||||
// , Flag__file__time_exists = 1
|
||||
// , Flag__file__page_exists = 2
|
||||
// , Flag__file__is_orig = 3
|
||||
// , Flag__file__repo_is_local = 4
|
||||
// , Flag__file__src_exists = 5
|
||||
// , Flag__img__cls_other_exists = 6
|
||||
// , Flag__anch__ns_is_image = 7
|
||||
// , Flag__anch__cls_tid = 8 // none, image
|
||||
// , Flag__anch__ns_id_needs_saving = 9
|
||||
// , Flag__img__alt_diff_from_anch_title = 10
|
||||
// , Flag__anch__href_diff_file = 11
|
||||
// , Flag__anch__title_missing = 12
|
||||
// , Flag__img__cls_tid = 13 // none, thumbimage, thumbborder
|
||||
// , Flag__anch__href_tid = 14 // wiki, site, anch, inet
|
||||
// ;
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_gly_hzip rv = new Xoh_gly_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; rv.hook = (byte[])args[0]; return rv;}
|
||||
}
|
||||
|
||||
@@ -20,43 +20,63 @@ import gplx.core.brys.*; import gplx.core.threads.poolables.*; import gplx.xowa.
|
||||
import gplx.xowa.htmls.core.hzips.*;
|
||||
public class Xoh_hdr_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
public String Key() {return Xoh_hzip_dict_.Key__hdr;}
|
||||
public Xoh_hdr_hzip Encode(Bry_bfr bfr, Hzip_stat_itm stat_itm, byte[] src, Xoh_hdr_parser arg) {
|
||||
int level = arg.Hdr_level();
|
||||
stat_itm.Hdr_add(level);
|
||||
bfr.Add(Xoh_hzip_dict_.Bry__hdr); // add hook
|
||||
bfr.Add_int_digits(1, level); // add level; EX: 2 in <h2>
|
||||
bfr.Add_mid(src, arg.Capt_bgn(), arg.Capt_end()).Add_byte(Xoh_hzip_dict_.Escape); // add caption
|
||||
bfr.Add_safe(arg.Anch_bry()); // add anchor
|
||||
bfr.Add_byte(Xoh_hzip_dict_.Escape); // add escape
|
||||
public byte[] Hook() {return hook;} private byte[] hook;
|
||||
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
|
||||
Xoh_hdr_parser data = (Xoh_hdr_parser)data_obj;
|
||||
boolean capt_rhs_exists = flag_bldr.Set_as_bool (Flag__capt_rhs_exists , data.Capt_rhs_exists());
|
||||
boolean anch_is_diff = flag_bldr.Set_as_bool (Flag__anch_is_diff , data.Anch_is_diff());
|
||||
int hdr_level = flag_bldr.Set_as_int (Flag__hdr_level , data.Hdr_level());
|
||||
|
||||
bfr.Add(hook);
|
||||
bfr.Add_hzip_int(1, flag_bldr.Encode());
|
||||
bfr.Add_hzip_mid(src, data.Capt_bgn(), data.Capt_end()); // add caption
|
||||
if (anch_is_diff) bfr.Add_hzip_mid(src, data.Anch_bgn(), data.Anch_end()); // add anchor
|
||||
if (capt_rhs_exists) bfr.Add_hzip_mid(src, data.Capt_rhs_bgn(), data.Capt_rhs_end());// add capt_rhs
|
||||
|
||||
hctx.Hzip__stat().Hdr_add(hdr_level);
|
||||
return this;
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx ctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
|
||||
byte level = rdr.Read_byte();
|
||||
int capt_bgn = rdr.Pos();
|
||||
int capt_end = rdr.Find_fwd_lr(Xoh_hzip_dict_.Escape);
|
||||
int anch_bgn = rdr.Pos();
|
||||
int anch_end = rdr.Find_fwd_lr(Xoh_hzip_dict_.Escape);
|
||||
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
|
||||
int flag = rdr.Read_int_by_base85(1); flag_bldr.Decode(flag);
|
||||
boolean capt_rhs_exists = flag_bldr.Get_as_bool(Flag__capt_rhs_exists);
|
||||
boolean anch_is_diff = flag_bldr.Get_as_bool(Flag__anch_is_diff);
|
||||
byte hdr_level = flag_bldr.Get_as_byte(Flag__hdr_level);
|
||||
|
||||
bfr.Add(Bry__hdr__0).Add_byte(level);
|
||||
int capt_bgn = rdr.Pos(); int capt_end = rdr.Find_fwd_lr();
|
||||
int anch_bgn = -1, anch_end = -1;
|
||||
if (anch_is_diff) {
|
||||
anch_bgn = rdr.Pos(); anch_end = rdr.Find_fwd_lr();
|
||||
}
|
||||
byte[] capt_manual_end = capt_rhs_exists ? rdr.Read_bry_to() : null;
|
||||
|
||||
bfr.Add(Bry__hdr__0).Add_byte_as_a7(hdr_level);
|
||||
bfr.Add(Bry__hdr__1);
|
||||
if (anch_end > anch_bgn)
|
||||
if (anch_is_diff)
|
||||
bfr.Add_mid (src, anch_bgn, anch_end);
|
||||
else
|
||||
bfr.Add_mid_w_swap (src, capt_bgn, capt_end, Byte_ascii.Space, Byte_ascii.Underline);
|
||||
bfr.Add(Bry__hdr__2);
|
||||
bfr.Add_mid(src, capt_bgn, capt_end);
|
||||
bfr.Add(Bry__hdr__3).Add_byte(level);
|
||||
bfr.Add(Bry__hdr__3);
|
||||
if (capt_rhs_exists)
|
||||
bfr.Add(capt_manual_end);
|
||||
bfr.Add(Bry__hdr__4).Add_byte_as_a7(hdr_level);
|
||||
bfr.Add_byte(Byte_ascii.Angle_end);
|
||||
return rdr.Pos();
|
||||
}
|
||||
private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_ (1, 1, 3);
|
||||
private static final int // SERIALIZED
|
||||
Flag__capt_rhs_exists = 0
|
||||
, Flag__anch_is_diff = 1
|
||||
, Flag__hdr_level = 2
|
||||
;
|
||||
private static final byte[]
|
||||
Bry__hdr__0 = Bry_.new_a7("<h")
|
||||
, Bry__hdr__1 = Bry_.new_a7(">\n <span class=\"mw-headline\" id=\"")
|
||||
, Bry__hdr__1 = Bry_.new_a7("><span class=\"mw-headline\" id=\"")
|
||||
, Bry__hdr__2 = Bry_.new_a7("\">")
|
||||
, Bry__hdr__3 = Bry_.new_a7("</span>\n</h")
|
||||
, Bry__hdr__3 = Bry_.new_a7("</span>")
|
||||
, Bry__hdr__4 = Bry_.new_a7("</h")
|
||||
;
|
||||
public int Pool__idx() {return pool_idx;} private int pool_idx;
|
||||
public void Pool__clear (Object[] args) {}
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_hdr_hzip rv = new Xoh_hdr_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_hdr_hzip rv = new Xoh_hdr_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; rv.hook = (byte[])args[0]; return rv;}
|
||||
}
|
||||
|
||||
@@ -21,46 +21,53 @@ public class Xoh_hdr_hzip_tst {
|
||||
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt();
|
||||
@Test public void Same() {
|
||||
fxt.Test__bicode(String_.Concat_lines_nl_skip_last
|
||||
( "~\"6A~~"
|
||||
( "~\"'A~"
|
||||
, "a"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h6>"
|
||||
, " <span class='mw-headline' id='A'>A</span>"
|
||||
, "</h6>"
|
||||
( "<h6><span class='mw-headline' id='A'>A</span></h6>"
|
||||
, "a"
|
||||
));
|
||||
}
|
||||
@Test public void Diff() {
|
||||
fxt.Test__bicode(String_.Concat_lines_nl_skip_last
|
||||
( "~\"2<i>A</i>~A~"
|
||||
( "~\"+<i>A</i>~A~"
|
||||
, "a"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>"
|
||||
, " <span class='mw-headline' id='A'><i>A</i></span>"
|
||||
, "</h2>"
|
||||
( "<h2><span class='mw-headline' id='A'><i>A</i></span></h2>"
|
||||
, "a"
|
||||
));
|
||||
}
|
||||
@Test public void Diff_by_underscore() {
|
||||
fxt.Test__bicode(String_.Concat_lines_nl_skip_last
|
||||
( "~\"2A 1~~"
|
||||
( "~\"#A 1~"
|
||||
, "a"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>"
|
||||
, " <span class='mw-headline' id='A_1'>A 1</span>"
|
||||
, "</h2>"
|
||||
( "<h2><span class='mw-headline' id='A_1'>A 1</span></h2>"
|
||||
, "a"
|
||||
));
|
||||
}
|
||||
@Test public void Diff_by_lnki() {
|
||||
fxt.Test__bicode(String_.Concat_lines_nl_skip_last
|
||||
( "~\"+<a href=\"/wiki/Category:A\" title=\"Category:A\">Category:A</a>~Category:A~"
|
||||
, "a"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2><span class='mw-headline' id='Category:A'><a href='/wiki/Category:A' title='Category:A'>Category:A</a></span></h2>"
|
||||
, "a"
|
||||
));
|
||||
}
|
||||
@Test public void Same_w_underscore() {
|
||||
fxt.Test__bicode(String_.Concat_lines_nl_skip_last
|
||||
( "~\"2A_1~~"
|
||||
( "~\"#A_1~"
|
||||
, "a"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2>"
|
||||
, " <span class='mw-headline' id='A_1'>A_1</span>"
|
||||
, "</h2>"
|
||||
( "<h2><span class='mw-headline' id='A_1'>A_1</span></h2>"
|
||||
, "a"
|
||||
));
|
||||
}
|
||||
@Test public void Tidy__bad_end() {
|
||||
fxt.Test__bicode(
|
||||
"~\"?A~AB~B~"
|
||||
, "<h6><span class='mw-headline' id='AB'>A</span>B</h6>"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,14 +22,14 @@ public class Xoh_hdr_make {
|
||||
public void Make(Bry_bfr bfr, Xoh_page hpg, byte[] src, Xoh_hdr_parser arg) {
|
||||
// , int rng_bgn, int rng_end, int level, int capt_bgn, int capt_end, byte[] anch
|
||||
// register section
|
||||
int rng_bgn = arg.Rng_bgn(), rng_end = arg.Rng_end();
|
||||
int rng_bgn = arg.Src_bgn(), rng_end = arg.Src_end();
|
||||
int level = arg.Hdr_level();
|
||||
Xoh_section_mgr section_mgr = hpg.Section_mgr();
|
||||
int section_len = section_mgr.Len();
|
||||
if (section_len != 0) // guard against -1 index; should not happen
|
||||
section_mgr.Set_content(section_len - 1, src, rng_bgn - 2); // -2 to skip "\n\n"
|
||||
byte[] capt = Bry_.Mid(src, arg.Capt_bgn(), arg.Capt_end());
|
||||
byte[] anch = arg.Anch_bry();
|
||||
byte[] anch = Bry_.Mid(src, arg.Anch_bgn(), arg.Anch_end());
|
||||
if (anch == null) anch = Bry_.Replace(capt, Byte_ascii.Space, Byte_ascii.Underline);
|
||||
hpg.Section_mgr().Add(section_len, level, anch, capt).Content_bgn_(rng_end + 1); // +1 to skip "\n"
|
||||
bfr.Add_mid(src, rng_bgn, rng_end);
|
||||
|
||||
@@ -18,26 +18,38 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.htmls.core.wkrs.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
public class Xoh_hdr_parser {
|
||||
// gplx.xowa.htmls.core.wkrs.hdrs.Xoh_hdr_parser
|
||||
public int Rng_bgn() {return rng_bgn;} private int rng_bgn;
|
||||
public int Rng_end() {return rng_end;} private int rng_end;
|
||||
public int Src_bgn() {return src_bgn;} private int src_bgn;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public int Hdr_level() {return hdr_level;} private int hdr_level;
|
||||
public int Anch_bgn() {return anch_bgn;} private int anch_bgn;
|
||||
public int Anch_end() {return anch_end;} private int anch_end;
|
||||
public boolean Anch_is_diff() {return anch_is_diff;} private boolean anch_is_diff;
|
||||
public int Capt_bgn() {return capt_bgn;} private int capt_bgn;
|
||||
public int Capt_end() {return capt_end;} private int capt_end;
|
||||
public byte[] Anch_bry() {return anch_bry;} private byte[] anch_bry;
|
||||
public int Parse(Xoh_hdoc_wkr wkr, byte[] src, Html_tag_rdr rdr, int hdr_level, int rng_bgn, Html_tag span) {// <h2><span class='mw-headline' id='A_1'>A 1</span></h2>
|
||||
this.rng_bgn = rng_bgn; this.hdr_level = hdr_level;
|
||||
Html_atr anch_atr = span.Atrs__get_by_or_fail(Html_atr_.Bry__id);
|
||||
int anch_bgn = anch_atr.Val_bgn(), anch_end = anch_atr.Val_end();
|
||||
this.capt_bgn = span.Src_end();
|
||||
rdr.Tag__move_fwd_tail(hdr_level); // find </h2> not </span> since <span> can be nested, but <h2> cannot
|
||||
this.capt_end = rdr.Tag__peek_bwd_tail(Html_tag_.Id__span).Src_bgn(); // get </span> before </h2>
|
||||
this.anch_bry = null;
|
||||
if (!Bry_.Match_w_swap(src, capt_bgn, capt_end, src, anch_bgn, anch_end, Byte_ascii.Space, Byte_ascii.Underline))
|
||||
this.anch_bry = Bry_.Mid(src, anch_bgn, anch_end); // anch is different than capt; occurs with html and dupe-anchors; EX: "==<i>A</i>==" -> id='A'
|
||||
this.rng_end = rdr.Pos();
|
||||
wkr.On_hdr(this);
|
||||
return rng_end;
|
||||
public int Capt_rhs_bgn() {return capt_rhs_bgn;} private int capt_rhs_bgn;
|
||||
public int Capt_rhs_end() {return capt_rhs_end;} private int capt_rhs_end;
|
||||
public boolean Capt_rhs_exists() {return capt_rhs_exists;} private boolean capt_rhs_exists;
|
||||
public void Clear() {
|
||||
this.anch_bgn = anch_end = capt_bgn = capt_end = capt_rhs_bgn = capt_rhs_end -1;
|
||||
this.anch_is_diff = capt_rhs_exists = false;
|
||||
}
|
||||
public boolean Parse(Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Html_tag_rdr tag_rdr, byte[] src, Html_tag hdr_head, Html_tag span_head) {
|
||||
this.Clear();
|
||||
this.src_bgn = hdr_head.Src_bgn(); this.hdr_level = hdr_head.Name_id();
|
||||
Html_atr anch_atr = span_head.Atrs__get_by_or_fail(Html_atr_.Bry__id);
|
||||
this.anch_bgn = anch_atr.Val_bgn(); this.anch_end = anch_atr.Val_end();
|
||||
this.capt_bgn = span_head.Src_end();
|
||||
Html_tag hdr_tail = tag_rdr.Tag__move_fwd_tail(hdr_level); // find </h2> not </span_head> since <span_head> can be nested, but <h2> cannot
|
||||
Html_tag span_tail = tag_rdr.Tag__peek_bwd_tail(Html_tag_.Id__span); // get </span_head> before </h2>
|
||||
this.capt_end = span_tail.Src_bgn();
|
||||
if (span_tail.Src_end() != hdr_tail.Src_bgn()) {
|
||||
capt_rhs_exists = true;
|
||||
capt_rhs_bgn = span_tail.Src_end(); capt_rhs_end = hdr_tail.Src_bgn();
|
||||
}
|
||||
this.anch_is_diff = !Bry_.Match_w_swap(src, capt_bgn, capt_end, src, anch_bgn, anch_end, Byte_ascii.Space, Byte_ascii.Underline); // anch is different than capt; occurs with html and dupe-anchors; EX: "==<i>A</i>==" -> id='A'
|
||||
this.src_end = tag_rdr.Pos();
|
||||
hdoc_wkr.On_hdr(this);
|
||||
return true;
|
||||
}
|
||||
public static final byte[] Bry__class__mw_headline = Bry_.new_a7("mw-headline");
|
||||
}
|
||||
|
||||
@@ -24,20 +24,20 @@ public class Xoh_img_bldr {
|
||||
public Xoh_img_wtr Wtr() {return wtr;} private final Xoh_img_wtr wtr = new Xoh_img_wtr();
|
||||
public Xof_fsdb_itm Fsdb_itm() {return fsdb_itm;} private Xof_fsdb_itm fsdb_itm;
|
||||
public void Make_by_parse(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, Xoh_img_parser arg) {
|
||||
Make( bfr, hpg, hctx, src, arg.Img_src().File_ttl_bry(), arg.Img_xoimg()
|
||||
, arg.Anch_href().Rel_nofollow_exists(), arg.Anch_href().Atr(), arg.Anch_cls().Atr(), arg.Anch_title()
|
||||
Make( bfr, hpg, hctx, src, arg.Img_src().File_ttl_bry(), arg.Img_xoimg(), arg.Img_xoimg().Val_dat_exists()
|
||||
, arg.Rel_nofollow_exists(), arg.Anch_href().Atr(), arg.Anch_cls().Atr(), arg.Anch_title()
|
||||
, arg.Img_w(), arg.Img_h(), arg.Img_src().Atr(), arg.Img_cls().Atr(), arg.Img_alt());
|
||||
wtr.Bfr_arg__add(bfr);
|
||||
}
|
||||
public void Make(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, byte[] lnki_ttl, Xoh_img_xoimg_parser img_xoimg
|
||||
public void Make(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, byte[] lnki_ttl, Xoh_img_xoimg_parser img_xoimg, boolean img_xoimg_exists
|
||||
, boolean anch_rel_is_nofollow, Bfr_arg anch_href, Bfr_arg anch_cls, Bfr_arg anch_ttl
|
||||
, int img_w, int img_h, Bfr_arg img_src, Bfr_arg img_cls, Bfr_arg img_alt) {
|
||||
wtr.Clear();
|
||||
this.fsdb_itm = hpg.Img_mgr().Make_img();
|
||||
if (img_xoimg.Val_dat_exists()) {
|
||||
if (img_xoimg_exists) {
|
||||
fsdb_itm.Init_at_lnki(Xof_exec_tid.Tid_wiki_page, hpg.Wiki().Domain_itm().Abrv_xo(), lnki_ttl, img_xoimg.Lnki_type(), img_xoimg.Lnki_upright(), img_xoimg.Lnki_w(), img_xoimg.Lnki_h(), img_xoimg.Lnki_time(), img_xoimg.Lnki_page(), Xof_patch_upright_tid_.Tid_all);
|
||||
hctx.File__mgr().Check_cache(fsdb_itm);
|
||||
wtr.Img_xoimg_(src, img_xoimg.Val_bgn(), img_xoimg.Val_end());
|
||||
wtr.Img_xoimg_(img_xoimg);
|
||||
wtr.Img_src_empty_().Img_w_(0).Img_h_(0);
|
||||
}
|
||||
else if (img_w != -1) {
|
||||
@@ -45,6 +45,8 @@ public class Xoh_img_bldr {
|
||||
}
|
||||
if (anch_rel_is_nofollow) wtr.Anch_rel_nofollow_();
|
||||
wtr.Anch_href_(anch_href).Anch_cls_(anch_cls).Anch_title_(anch_ttl).Anch_xowa_title_(lnki_ttl);
|
||||
wtr.Img_id_(Xoh_img_mgr.Bry__html_uid, fsdb_itm.Html_uid()).Img_alt_(img_alt).Img_cls_(img_cls);
|
||||
if (!hctx.Mode_is_diff())
|
||||
wtr.Img_id_(Xoh_img_mgr.Bry__html_uid, fsdb_itm.Html_uid());
|
||||
wtr.Img_alt_(img_alt).Img_cls_(img_cls);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,15 +17,19 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.imgs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.threads.poolables.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*; import gplx.xowa.htmls.hrefs.*; import gplx.xowa.htmls.core.hzips.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*; import gplx.langs.htmls.encoders.*;
|
||||
import gplx.xowa.htmls.hrefs.*; import gplx.xowa.htmls.core.hzips.*;
|
||||
import gplx.xowa.htmls.core.wkrs.bfr_args.*; import gplx.xowa.htmls.core.wkrs.imgs.atrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*; import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*;
|
||||
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.ttls.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.files.repos.*;
|
||||
public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
public String Key() {return Xoh_hzip_dict_.Key__img;}
|
||||
private final Xoh_img_xoimg_parser xoimg_parser = new Xoh_img_xoimg_parser();
|
||||
public byte[] Hook() {return hook;} private byte[] hook;
|
||||
private final Xoh_img_xoimg_parser xoimg_parser = new Xoh_img_xoimg_parser();
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr.new_(32);
|
||||
public Xoh_img_bldr Bldr() {return bldr;} private Xoh_img_bldr bldr = new Xoh_img_bldr();
|
||||
public Bfr_arg__href Anch_href_arg() {return anch_href_arg;} private final Bfr_arg__href anch_href_arg = new Bfr_arg__href();
|
||||
public Xoh_img_xoimg_hzip Xoimg() {return xoimg;} private final Xoh_img_xoimg_hzip xoimg = new Xoh_img_xoimg_hzip();
|
||||
private final Bry_obj_ref
|
||||
anch_cls_mid = Bry_obj_ref.New_empty()
|
||||
, anch_title_mid = Bry_obj_ref.New_empty()
|
||||
@@ -34,30 +38,30 @@ public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
, img_src_mid = Bry_obj_ref.New_empty()
|
||||
, img_cls_mid = Bry_obj_ref.New_empty()
|
||||
;
|
||||
public Xoh_img_hzip Encode(Bry_bfr bfr, Hzip_stat_itm stat_itm, byte[] src, Xoh_img_parser arg, boolean write_hdr) {
|
||||
// img_map: <img id="xowa_file_img_100" alt="" src="file:///J:/xowa/file/commons.wikimedia.org/orig/b/8/a/7/Solar_System_Template_Final.png" width="666" height="36" usemap="#imagemap_1_1">
|
||||
Xoh_anch_href_parser anch_href = arg.Anch_href();
|
||||
Bry_obj_ref anch_page = arg.Anch_page();
|
||||
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
|
||||
Xoh_img_parser data = (Xoh_img_parser)data_obj;
|
||||
Xoh_anch_href_itm anch_href = data.Anch_href();
|
||||
Bry_obj_ref anch_page = data.Anch_page();
|
||||
byte anch_href_tid = anch_href.Tid();
|
||||
boolean anch__ns_id_needs_saving = anch_href.Tid_has_ns() && anch_href.Page_ns_id() != Xow_ns_.Tid__file;
|
||||
Html_atr anch_title = arg.Anch_title();
|
||||
Xoa_ttl anch_href_ttl = anch_href.Page_ttl();
|
||||
Xoh_img_xoimg_parser img_xoimg = arg.Img_xoimg();
|
||||
Xoh_img_cls_parser img_cls = arg.Img_cls();
|
||||
Xoh_img_src_parser img_src = arg.Img_src();
|
||||
boolean img__alt_diff_from_anch_title = arg.Img_alt__diff__anch_title();
|
||||
boolean anch__ns_is_custom = anch_href.Ttl_ns_custom() != null;
|
||||
boolean anch__ns_id_needs_saving = Xoh_anch_href_itm.Ns_exists(anch_href.Tid()) && anch_href.Ttl_ns_id() != Xow_ns_.Tid__file;
|
||||
Html_atr anch_title = data.Anch_title();
|
||||
Xoh_img_xoimg_parser img_xoimg = data.Img_xoimg();
|
||||
Xoh_img_cls_parser img_cls = data.Img_cls();
|
||||
Xoh_img_src_parser img_src = data.Img_src();
|
||||
boolean img__alt_diff_from_anch_title = data.Img_alt__diff__anch_title();
|
||||
boolean file__src_exists = !img_xoimg.Val_dat_exists();
|
||||
boolean anch_href_diff_file = !Bry_.Match(anch_page.Val(), anch_page.Val_bgn(), anch_page.Val_end(), anch_href.Page_ttl().Page_db());
|
||||
boolean anch_href_diff_file = !Bry_.Match(anch_page.Val(), anch_page.Val_bgn(), anch_page.Val_end(), anch_href.Ttl_page_db());
|
||||
|
||||
flag_bldr.Set(Flag__file__w_diff_from_html , file__src_exists && arg.Img_w__diff__file_w());
|
||||
flag_bldr.Set(Flag__file__w_diff_from_html , file__src_exists && data.Img_w__diff__file_w());
|
||||
flag_bldr.Set(Flag__file__time_exists , file__src_exists && img_src.File_time_exists());
|
||||
flag_bldr.Set(Flag__file__page_exists , file__src_exists && img_src.File_page_exists());
|
||||
flag_bldr.Set(Flag__file__is_orig , file__src_exists && img_src.File_is_orig());
|
||||
flag_bldr.Set(Flag__file__repo_is_local , file__src_exists && !img_src.Repo_is_commons());
|
||||
flag_bldr.Set(Flag__file__src_exists , file__src_exists);
|
||||
flag_bldr.Set(Flag__img__cls_other_exists , img_cls.Other_exists());
|
||||
flag_bldr.Set(Flag__anch__ns_is_image , anch_href.Page_ns_id_is_image());
|
||||
flag_bldr.Set(Flag__anch__cls_tid , arg.Anch_cls().Tid());
|
||||
flag_bldr.Set(Flag__anch__ns_is_custom , anch__ns_is_custom);
|
||||
flag_bldr.Set(Flag__anch__cls_tid , data.Anch_cls().Tid());
|
||||
flag_bldr.Set(Flag__anch__ns_id_needs_saving , anch__ns_id_needs_saving);
|
||||
flag_bldr.Set(Flag__img__alt_diff_from_anch_title , img__alt_diff_from_anch_title);
|
||||
flag_bldr.Set(Flag__anch__href_diff_file , anch_href_diff_file);
|
||||
@@ -66,43 +70,52 @@ public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
flag_bldr.Set(Flag__anch__href_tid , anch_href.Tid());
|
||||
// Tfds.Dbg(flag_bldr.Encode(), Array_.To_str(flag_bldr.Val_ary()));
|
||||
|
||||
if (write_hdr) bfr.Add(Xoh_hzip_dict_.Bry__img);
|
||||
if (wkr_is_root) bfr.Add(hook);
|
||||
Xoh_hzip_int_.Encode(2, bfr, flag_bldr.Encode());
|
||||
if (anch_href_tid == Xoh_anch_href_parser.Tid__inet)
|
||||
anch_href_mid.Mid_(src, anch_href.Val_bgn(), anch_href.Val_end());
|
||||
else
|
||||
anch_href_mid.Val_(anch_href_ttl.Page_db());
|
||||
switch (anch_href_tid) {
|
||||
case Xoh_anch_href_itm.Tid__inet:
|
||||
anch_href_mid.Mid_(src, anch_href.Rng_bgn(), anch_href.Rng_end());
|
||||
break;
|
||||
case Xoh_anch_href_itm.Tid__site:
|
||||
anch_href_mid.Val_(tmp_bfr.Add_mid(src, anch_href.Site_bgn(), anch_href.Site_end()).Add_byte(Byte_ascii.Pipe).Add(anch_href.Ttl_page_db()).To_bry_and_clear());
|
||||
break;
|
||||
case Xoh_anch_href_itm.Tid__wiki:
|
||||
case Xoh_anch_href_itm.Tid__anch:
|
||||
anch_href_mid.Val_(anch_href.Ttl_page_db());
|
||||
break;
|
||||
}
|
||||
bfr.Add_bry_ref_obj(anch_href_mid);
|
||||
bfr.Add_byte(Xoh_hzip_dict_.Escape);
|
||||
if (anch_href_diff_file) {
|
||||
arg.Anch_page().Bfr_arg__add(bfr);
|
||||
data.Anch_page().Bfr_arg__add(bfr);
|
||||
bfr.Add_byte(Xoh_hzip_dict_.Escape);
|
||||
}
|
||||
switch (anch_href_tid) {
|
||||
case Xoh_anch_href_parser.Tid__anch:
|
||||
case Xoh_anch_href_parser.Tid__inet:
|
||||
case Xoh_anch_href_itm.Tid__anch:
|
||||
case Xoh_anch_href_itm.Tid__inet:
|
||||
break;
|
||||
case Xoh_anch_href_parser.Tid__wiki:
|
||||
case Xoh_anch_href_parser.Tid__site:
|
||||
case Xoh_anch_href_itm.Tid__wiki:
|
||||
case Xoh_anch_href_itm.Tid__site:
|
||||
if (anch__ns_id_needs_saving)
|
||||
Xoh_lnki_dict_.Ns_encode(bfr, anch_href_ttl.Ns().Id());
|
||||
Xoh_lnki_dict_.Ns_encode(bfr, anch_href.Ttl_ns_id());
|
||||
break;
|
||||
}
|
||||
if (anch__ns_is_custom) bfr.Add(data.Anch_href().Ttl_ns_custom()).Add_byte(Xoh_hzip_dict_.Escape);
|
||||
if (file__src_exists) {
|
||||
Xoh_hzip_int_.Encode(2, bfr, Xoh_hzip_int_.Neg_1_adj + arg.Img_w());
|
||||
Xoh_hzip_int_.Encode(2, bfr, Xoh_hzip_int_.Neg_1_adj + arg.Img_h());
|
||||
if (arg.Img_w__diff__file_w()) Xoh_hzip_int_.Encode(2, bfr, Xoh_hzip_int_.Neg_1_adj + img_src.File_w());
|
||||
Xoh_hzip_int_.Encode(2, bfr, Xoh_hzip_int_.Neg_1_adj + data.Img_w());
|
||||
Xoh_hzip_int_.Encode(2, bfr, Xoh_hzip_int_.Neg_1_adj + data.Img_h());
|
||||
if (data.Img_w__diff__file_w()) Xoh_hzip_int_.Encode(2, bfr, Xoh_hzip_int_.Neg_1_adj + img_src.File_w());
|
||||
if (img_src.File_time_exists()) Xoh_hzip_int_.Encode(1, bfr, Xoh_hzip_int_.Neg_1_adj + img_src.File_time());
|
||||
if (img_src.File_page_exists()) Xoh_hzip_int_.Encode(1, bfr, Xoh_hzip_int_.Neg_1_adj + img_src.File_page());
|
||||
}
|
||||
else
|
||||
bfr.Add_mid(src, img_xoimg.Val_bgn(), img_xoimg.Val_end()).Add_byte(Xoh_hzip_dict_.Escape);
|
||||
xoimg.Encode(bfr, hctx.Hzip__stat(), src, img_xoimg);
|
||||
if (anch_title.Val_dat_exists()) bfr.Add_mid(src, anch_title.Val_bgn(), anch_title.Val_end()).Add_byte(Xoh_hzip_dict_.Escape);
|
||||
if (img__alt_diff_from_anch_title) bfr.Add_mid(src, arg.Img_alt().Val_bgn(), arg.Img_alt().Val_end()).Add_byte(Xoh_hzip_dict_.Escape);
|
||||
if (img_cls.Other_exists()) bfr.Add_mid(src, img_cls.Other_bgn(), img_cls.Other_end()).Add_byte(Xoh_hzip_dict_.Escape);
|
||||
if (img__alt_diff_from_anch_title) bfr.Add_mid(src, data.Img_alt().Val_bgn(), data.Img_alt().Val_end()).Add_byte(Xoh_hzip_dict_.Escape);
|
||||
if (img_cls.Other_exists()) bfr.Add_mid(src, img_cls.Other_bgn(), img_cls.Other_end()).Add_byte(Xoh_hzip_dict_.Escape);
|
||||
return this;
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx hctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
|
||||
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
|
||||
// decode rdr
|
||||
int flag = rdr.Read_int_by_base85(2);
|
||||
flag_bldr.Decode(flag);
|
||||
@@ -110,7 +123,7 @@ public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
boolean file__repo_is_local = flag_bldr.Get_as_bool(Flag__file__repo_is_local);
|
||||
boolean file__src_exists = flag_bldr.Get_as_bool(Flag__file__src_exists);
|
||||
boolean img__cls_other_exists = flag_bldr.Get_as_bool(Flag__img__cls_other_exists);
|
||||
boolean anch__ns_is_image = flag_bldr.Get_as_bool(Flag__anch__ns_is_image);
|
||||
boolean anch__ns_is_custom = flag_bldr.Get_as_bool(Flag__anch__ns_is_custom);
|
||||
boolean anch__ns_id_needs_saving = flag_bldr.Get_as_bool(Flag__anch__ns_id_needs_saving);
|
||||
int anch__cls_tid = flag_bldr.Get_as_int(Flag__anch__cls_tid);
|
||||
boolean img__alt_diff_from_anch_title = flag_bldr.Get_as_bool(Flag__img__alt_diff_from_anch_title);
|
||||
@@ -119,21 +132,36 @@ public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
int img_cls = flag_bldr.Get_as_int(Flag__img__cls_tid);
|
||||
int anch__href_tid = flag_bldr.Get_as_int(Flag__anch__href_tid);
|
||||
byte[] page_db = rdr.Read_bry_to();
|
||||
byte[] site_bry = null;
|
||||
switch (anch__href_tid) {
|
||||
case Xoh_anch_href_itm.Tid__anch:
|
||||
case Xoh_anch_href_itm.Tid__inet:
|
||||
case Xoh_anch_href_itm.Tid__wiki:
|
||||
break;
|
||||
case Xoh_anch_href_itm.Tid__site:
|
||||
int pipe_pos = Bry_find_.Find_fwd(page_db, Byte_ascii.Pipe);
|
||||
site_bry = Bry_.Mid(page_db, 0, pipe_pos);
|
||||
page_db = Bry_.Mid(page_db, pipe_pos + 1);
|
||||
break;
|
||||
}
|
||||
byte[] file_db = page_db;
|
||||
if (anch_href_diff_file) file_db = rdr.Read_bry_to();
|
||||
int anch_href_ns = -1;
|
||||
if (anch__ns_id_needs_saving)
|
||||
anch_href_ns = Xoh_lnki_dict_.Ns_decode(rdr);
|
||||
int img_xoimg_bgn = -1, img_xoimg_end = -1, img_w = -1, img_h = -1, file_time = -1, file_page = -1;
|
||||
int ns_custom_bgn = -1, ns_custom_end = -1;
|
||||
if (anch__ns_is_custom) {
|
||||
ns_custom_bgn = rdr.Pos();
|
||||
ns_custom_end = rdr.Find_fwd_lr();
|
||||
}
|
||||
int img_w = -1, img_h = -1, file_time = -1, file_page = -1;
|
||||
xoimg_parser.Clear();
|
||||
if (file__src_exists) {
|
||||
img_w = rdr.Read_int_by_base85(2) - Xoh_hzip_int_.Neg_1_adj;
|
||||
img_h = rdr.Read_int_by_base85(2) - Xoh_hzip_int_.Neg_1_adj;
|
||||
}
|
||||
else {
|
||||
img_xoimg_bgn = rdr.Pos();
|
||||
img_xoimg_end = rdr.Find_fwd_lr();
|
||||
xoimg_parser.Parse(rdr, src, img_xoimg_bgn, img_xoimg_end);
|
||||
xoimg.Decode(bfr, hctx, hpg, rdr, src, xoimg_parser);
|
||||
}
|
||||
int anch_title_bgn = -1, anch_title_end = -1;
|
||||
if (!anch__title_missing) {
|
||||
@@ -156,22 +184,24 @@ public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
|
||||
// transform values
|
||||
boolean anch_rel_is_nofollow = false;
|
||||
if (anch__ns_id_needs_saving) {
|
||||
Xoa_ttl anch_href_ttl = hctx.Wiki__ttl_parser().Ttl_parse(anch_href_ns, page_db);
|
||||
anch_href_arg.Set_by_page(anch_href_ttl.Full_db());
|
||||
if (anch__href_tid == Xoh_anch_href_itm.Tid__inet) {
|
||||
Gfo_url_encoder_.Href.Encode(tmp_bfr, page_db);
|
||||
anch_rel_is_nofollow = true;
|
||||
}
|
||||
else {
|
||||
if (anch__href_tid == Xoh_anch_href_parser.Tid__inet) {
|
||||
anch_href_arg.Set_by_raw(gplx.langs.htmls.encoders.Gfo_url_encoder_.Href.Encode(page_db));
|
||||
anch_rel_is_nofollow = true;
|
||||
if (anch__href_tid == Xoh_anch_href_itm.Tid__site)
|
||||
tmp_bfr.Add(Xoh_href_.Bry__site).Add(site_bry);
|
||||
if (anch__ns_id_needs_saving) {
|
||||
Xoa_ttl anch_href_ttl = hctx.Wiki__ttl_parser().Ttl_parse(anch_href_ns, page_db);
|
||||
tmp_bfr.Add(Xoh_href_.Bry__wiki).Add(anch_href_ttl.Full_url());
|
||||
}
|
||||
else {
|
||||
if (anch__ns_is_image)
|
||||
anch_href_arg.Set_by_raw(Bry_.Add(gplx.xowa.htmls.hrefs.Xoh_href_.Bry__wiki, gplx.xowa.wikis.nss.Xow_ns_.Alias__image__bry, Byte_ascii.Colon_bry, gplx.langs.htmls.encoders.Gfo_url_encoder_.Href.Encode(page_db)));
|
||||
else
|
||||
anch_href_arg.Set_by_file(gplx.langs.htmls.encoders.Gfo_url_encoder_.Href.Encode(page_db));
|
||||
byte[] ns_bry = anch__ns_is_custom ? Bry_.Mid(src, ns_custom_bgn, ns_custom_end) : Xow_ns_.Bry__file;
|
||||
tmp_bfr.Add(Xoh_href_.Bry__wiki).Add(ns_bry).Add_byte_colon();
|
||||
Gfo_url_encoder_.Href.Encode(tmp_bfr, page_db);
|
||||
}
|
||||
}
|
||||
anch_href_arg.Set_by_raw(tmp_bfr.To_bry_and_clear());
|
||||
// NOTE: src must go underneath ttl
|
||||
Xof_url_bldr url_bldr = hctx.File__url_bldr();
|
||||
url_bldr.Init_by_root(file__repo_is_local ? hctx.Fsys__file__wiki() : hctx.Fsys__file__comm(), Byte_ascii.Slash, false, false, Md5_depth);
|
||||
@@ -179,20 +209,18 @@ public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
byte[] img_src = url_bldr.Xto_bry();
|
||||
|
||||
anch_cls_mid.Val_(Xoh_anch_cls_.To_val(anch__cls_tid));
|
||||
anch_title_mid.Mid_(src, anch_title_bgn, anch_title_end); if (anch_title_bgn == anch_title_end) anch_title_mid.Val_(null);
|
||||
if (!anch__title_missing) anch_title_mid.Mid_(src, anch_title_bgn, anch_title_end); else {anch_title_mid.Val_(null);} // if (anch_title_bgn == anch_title_end) anch_title_mid.Val_(null);
|
||||
img_alt_mid.Mid_(src, img_alt_bgn, img_alt_end); if (img_alt_mid.Val_is_empty()) img_alt_mid.Val_(Bry_.Empty);
|
||||
img_src_mid.Val_(img_src);
|
||||
img_cls_mid.Val_(Xoh_img_cls_.To_val_or_null(img_cls, img_cls_other));
|
||||
|
||||
bldr.Make(bfr, hpg, hctx, src, file_db, xoimg_parser, anch_rel_is_nofollow, anch_href_arg, anch_cls_mid, anch_title_mid, img_w, img_h, img_src_mid, img_cls_mid, img_alt_mid);
|
||||
if (write_to_bfr) bldr.Wtr().Bfr_arg__add(bfr);
|
||||
bldr.Make(bfr, hpg, hctx, src, file_db, xoimg_parser, !file__src_exists, anch_rel_is_nofollow, anch_href_arg, anch_cls_mid, anch_title_mid, img_w, img_h, img_src_mid, img_cls_mid, img_alt_mid);
|
||||
if (wkr_is_root) bldr.Wtr().Bfr_arg__add(bfr);
|
||||
|
||||
return rv;
|
||||
}
|
||||
public int Pool__idx() {return pool_idx;} private int pool_idx;
|
||||
public void Pool__clear (Object[] args) {}
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_img_hzip rv = new Xoh_img_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_img_hzip rv = new Xoh_img_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; rv.hook = (byte[])args[0]; return rv;}
|
||||
public static int Md5_depth = 2;
|
||||
private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_( 1, 1, 1, 1 , 1, 1, 1, 1 , 2, 1, 1, 1 , 1, 2, 2);
|
||||
private static final int // SERIALIZED
|
||||
@@ -203,7 +231,7 @@ public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
, Flag__file__repo_is_local = 4
|
||||
, Flag__file__src_exists = 5
|
||||
, Flag__img__cls_other_exists = 6
|
||||
, Flag__anch__ns_is_image = 7
|
||||
, Flag__anch__ns_is_custom = 7
|
||||
, Flag__anch__cls_tid = 8 // none, image
|
||||
, Flag__anch__ns_id_needs_saving = 9
|
||||
, Flag__img__alt_diff_from_anch_title = 10
|
||||
|
||||
@@ -22,32 +22,55 @@ public class Xoh_img_hzip__dump__tst {
|
||||
@Before public void Clear() {fxt.Clear();}
|
||||
@Test public void Basic() { // [[File:A.png|border|class=other|220px|abc]]
|
||||
fxt.Test__bicode
|
||||
( "~%!!A.png~0|220|110|0.5|-1|-1~abc~"
|
||||
, "<a href='/wiki/File:A.png' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
( "~%!!A.png~)#Sabc~"
|
||||
, "<a href='/wiki/File:A.png' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
);
|
||||
}
|
||||
@Test public void Anch() { // [[File:A.png#b|abc]]
|
||||
fxt.Test__bicode
|
||||
( "~%\"<A.png#file~A.png~)#Sabc~"
|
||||
, "<a href='/wiki/File:A.png#file' class='image' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
);
|
||||
}
|
||||
@Test public void Link__cs() { // [[File:A.png|link=File:a.ogg|abc]]
|
||||
fxt.Test__bicode
|
||||
( "~%!Aa.ogg~A.png~)#Sabc~"
|
||||
, "<a href='/wiki/File:a.ogg' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
);
|
||||
}
|
||||
@Test public void Href__encoding() { // [[File:Aéb.png|abc]]
|
||||
fxt.Test__bicode
|
||||
( "~%!!Aéb.png~0|220|110|0.5|-1|-1~abc~"
|
||||
, "<a href='/wiki/File:A%C3%A9b.png' class='image' title='abc' xowa_title='Aéb.png'><img id='xoimg_0' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
( "~%!qAéb.png~)#Sabc~"
|
||||
, "<a href='/wiki/File:A%C3%A9b.png' class='image' xowa_title='Aéb.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
);
|
||||
}
|
||||
@Test public void Href__encoding__link() { // [[File:Aéb.png|abc|link=Aéb]]
|
||||
fxt.Test__bicode
|
||||
( "~%#gAéb~Aéb.png~#)#Sabc~"
|
||||
, "<a href='/wiki/A%C3%A9b' class='image' xowa_title='Aéb.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
);
|
||||
}
|
||||
@Test public void Href__apos() { // [[File:A'b.png|border|link=A'b_link|A'b_capt]]
|
||||
String html = "<a href=\"/wiki/A%27b_link\" class=\"image\" xowa_title=\"A'b.png\"><img id=\"xoimg_0\" data-xoimg=\"0|220|-1|-1|-1|-1\" src=\"\" width=\"0\" height=\"0\" class=\"thumbborder\" alt=\"A'b_capt\"></a>";
|
||||
fxt.Test__bicode_raw("~%#oA'b_link~A'b.png~#)#SA'b_capt~", html, html);
|
||||
}
|
||||
@Test public void Link__wm__n() { // [[File:A.png|link=http://a.org|abc]]
|
||||
fxt.Test__bicode
|
||||
( "~%!Dhttp://a.org~A.png~0|220|110|0.5|-1|-1~abc~"
|
||||
, "<a href='http://a.org' rel='nofollow' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
( "~%!Dhttp://a.org~A.png~)#Sabc~"
|
||||
, "<a href='http://a.org' rel='nofollow' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
);
|
||||
}
|
||||
// @Test public void Link__wm__y() { // [[File:A.png|link=//en.wiktionary.org/wiki/A|abc]]
|
||||
// fxt.Test__bicode
|
||||
// ( "~%!i=!!!!A~abc~"
|
||||
// , "<a href='/site/en.wiktionary.org/wiki/A' class='image' title='abc'><img id='xoimg_0' alt='abc'></a>"
|
||||
// );
|
||||
// }
|
||||
@Test public void Link__wm__y() { // [[File:A.png|link=http://en.wikitionary.org/wiki/Special:Search/A|abc]]
|
||||
fxt.Test__bicode
|
||||
( "~%\"men.wiktionary.org|Search/A~A.png~\")#Sabc~"
|
||||
, "<a href='/site/en.wiktionary.org/wiki/Special:Search/A' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
);
|
||||
}
|
||||
// lhs='<a href="/site/en.wiktionary.org/wiki/Special:Search/A" class="image" title="B" xowa_title="Commons-logo.svg"><img data-xoimg="0|40|40|-1|-1|-1" src="" width="0" height="0" alt="B"></a>
|
||||
@Test public void Href__image() { // [[Image:A.png|abc]]
|
||||
fxt.Test__bicode
|
||||
( "~%-%A.png~0|220|110|0.5|-1|-1~abc~"
|
||||
, "<a href='/wiki/Image:A.png' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
( "~%-%A.png~Image~)#Sabc~"
|
||||
, "<a href='/wiki/Image:A.png' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
);
|
||||
}
|
||||
@Test public void Missing() { // PURPOSE: bad dump shouldn't write corrupt data
|
||||
|
||||
@@ -18,12 +18,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.htmls.core.wkrs.imgs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import gplx.core.primitives.*; import gplx.core.brys.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*; import gplx.xowa.htmls.core.wkrs.imgs.atrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*; import gplx.xowa.htmls.core.wkrs.lnkis.htmls.*;
|
||||
import gplx.xowa.wikis.domains.*;
|
||||
import gplx.xowa.files.*;
|
||||
public class Xoh_img_parser {
|
||||
public class Xoh_img_parser {
|
||||
private byte[] src;
|
||||
public int Rng_bgn() {return rng_bgn;} private int rng_bgn;
|
||||
public int Rng_end() {return rng_end;} private int rng_end;
|
||||
public Xoh_anch_href_parser Anch_href() {return anch_href;} private Xoh_anch_href_parser anch_href = new Xoh_anch_href_parser();
|
||||
public int Src_bgn() {return src_bgn;} private int src_bgn;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public Xoh_anch_href_itm Anch_href() {return anch_href;} private Xoh_anch_href_itm anch_href = new Xoh_anch_href_itm();
|
||||
public Xoh_anch_cls_parser Anch_cls() {return anch_cls;} private Xoh_anch_cls_parser anch_cls = new Xoh_anch_cls_parser();
|
||||
public Html_atr Anch_title() {return anch_title;} private Html_atr anch_title;
|
||||
public Bry_obj_ref Anch_page() {return anch_page;} private Bry_obj_ref anch_page = Bry_obj_ref.New_empty();
|
||||
@@ -35,29 +36,46 @@ public class Xoh_img_parser {
|
||||
public int Img_w() {return img_w;} private int img_w;
|
||||
public int Img_h() {return img_h;} private int img_h;
|
||||
public boolean Img_w__diff__file_w() {return img_w != img_src.File_w();}
|
||||
public int Parse(Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, byte[] src, Html_tag_rdr tag_rdr, Html_tag anch_head) {
|
||||
this.src = src; Bry_rdr rdr = tag_rdr.Rdr();
|
||||
this.rng_bgn = anch_head.Src_bgn(); // <a
|
||||
if (!anch_href.Parse(rdr, hctx.App(), hctx.Wiki__ttl_parser(), anch_head)) return Xoh_hdoc_ctx.Invalid; // href='/wiki/File:A.png'
|
||||
if (!anch_cls.Parse(rdr, src, anch_head)) return Xoh_hdoc_ctx.Invalid; // class='image'
|
||||
this.anch_title = anch_head.Atrs__get_by_or_empty(Html_atr_.Bry__title); // title='abc'
|
||||
Html_atr xowa_title = anch_head.Atrs__get_by_or_empty(Bry__atr__xowa_title); // xowa_title='A.png'
|
||||
public boolean Rel_nofollow_exists() {
|
||||
if (anch_href.Site_exists()) {
|
||||
if (rel_nofollow_exists == Bool_.__byte) {
|
||||
Xow_domain_itm itm = Xow_domain_itm_.parse(Bry_.Mid(src, anch_href.Site_bgn(), anch_href.Site_end()));
|
||||
rel_nofollow_exists = itm.Domain_type_id() == Xow_domain_tid_.Int__other ? Bool_.Y_byte : Bool_.N_byte;
|
||||
}
|
||||
return rel_nofollow_exists == Bool_.Y_byte;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
} private byte rel_nofollow_exists;
|
||||
private void Clear() {
|
||||
this.rel_nofollow_exists = Bool_.__byte;
|
||||
this.src_bgn = src_end = img_w = img_h = -1;
|
||||
this.anch_title = this.img_alt = Html_atr.Noop;
|
||||
}
|
||||
public boolean Parse(Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, byte[] src, Html_tag_rdr tag_rdr, Html_tag anch_head) {
|
||||
this.Clear();
|
||||
this.src = src; Bry_err_wkr err_wkr = tag_rdr.Err_wkr();
|
||||
this.src_bgn = anch_head.Src_bgn(); // <a
|
||||
if (!anch_href.Parse(err_wkr, hctx, anch_head)) return false; // href='/wiki/File:A.png'
|
||||
if (!anch_cls.Parse(err_wkr, src, anch_head)) return false; // class='image'
|
||||
this.anch_title = anch_head.Atrs__get_by_or_empty(Html_atr_.Bry__title); // title='abc'
|
||||
Html_atr xowa_title = anch_head.Atrs__get_by_or_empty(Bry__atr__xowa_title); // xowa_title='A.png'
|
||||
if (xowa_title.Val_dat_exists()) anch_page.Val_(xowa_title.Val());
|
||||
Html_tag img_tag = tag_rdr.Tag__move_fwd_head().Chk_id(Html_tag_.Id__img); // <img
|
||||
img_xoimg_parser.Parse(rdr, src, img_tag); // data-xoimg='...'
|
||||
this.img_w = img_tag.Atrs__get_as_int_or(Html_atr_.Bry__width, Xof_img_size.Size__neg1); // width='220'
|
||||
this.img_h = img_tag.Atrs__get_as_int_or(Html_atr_.Bry__height, Xof_img_size.Size__neg1); // height='110'
|
||||
this.img_alt = img_tag.Atrs__get_by_or_empty(Html_atr_.Bry__alt); // alt='File:A.png'
|
||||
img_cls.Parse(rdr, src, img_tag); // class='thumbborder'
|
||||
if (!img_src.Parse(rdr, hctx.Wiki__domain_bry(), img_tag)) return Xoh_hdoc_ctx.Invalid; // src='...'
|
||||
Html_tag img_tag = tag_rdr.Tag__move_fwd_head().Chk_id(Html_tag_.Id__img); // <img
|
||||
img_xoimg_parser.Parse(err_wkr, src, img_tag); // data-xoimg='...'
|
||||
this.img_w = img_tag.Atrs__get_as_int_or(Html_atr_.Bry__width, Xof_img_size.Size__neg1); // width='220'
|
||||
this.img_h = img_tag.Atrs__get_as_int_or(Html_atr_.Bry__height, Xof_img_size.Size__neg1); // height='110'
|
||||
this.img_alt = img_tag.Atrs__get_by_or_empty(Html_atr_.Bry__alt); // alt='File:A.png'
|
||||
img_cls.Parse(err_wkr, src, img_tag); // class='thumbborder'
|
||||
if (!img_src.Parse(err_wkr, hctx.Wiki__domain_bry(), img_tag)) return false; // src='...'
|
||||
if (anch_page.Val_is_empty()) {
|
||||
anch_page.Val_(img_src.File_ttl_bry());
|
||||
if (anch_page.Val_is_empty())
|
||||
anch_page.Val_(anch_href.Page_ttl().Page_db());
|
||||
anch_page.Val_(anch_href.Ttl_page_db());
|
||||
}
|
||||
Html_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__a); // </a>
|
||||
this.rng_end = anch_tail.Src_end();
|
||||
return rng_end;
|
||||
Html_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__a); // </a>
|
||||
this.src_end = anch_tail.Src_end();
|
||||
return true;
|
||||
}
|
||||
public static final byte[]
|
||||
Bry__cls__anch__image = Bry_.new_a7("image")
|
||||
|
||||
@@ -53,7 +53,7 @@ public class Xoh_img_wtr extends gplx.core.brys.Bfr_arg_base {
|
||||
public Xoh_img_wtr Img_id_(byte[] prefix, int uid) {img_id.Set_by_arg(img_id_val.Set(prefix, uid)); return this;}
|
||||
public Xoh_img_wtr Img_w_(int v) {img_w.Set_by_int(v); return this;}
|
||||
public Xoh_img_wtr Img_h_(int v) {img_h.Set_by_int(v); return this;}
|
||||
public Xoh_img_wtr Img_xoimg_(byte[] src, int bgn, int end) {img_xoimg.Set_by_mid(src, bgn, end); return this;}
|
||||
public Xoh_img_wtr Img_xoimg_(Bfr_arg v) {img_xoimg.Set_by_arg(v); return this;}
|
||||
public Xoh_img_wtr Img_cls_(Bfr_arg v) {img_cls.Set_by_arg(v); return this;}
|
||||
public Xoh_img_wtr Img_src_(Bfr_arg v) {img_src.Set_by_arg(v); return this;}
|
||||
public Xoh_img_wtr Img_src_empty_() {img_src.Set_by_bry(Bry_.Empty); return this;}
|
||||
|
||||
@@ -22,7 +22,7 @@ public class Xoh_anch_cls_parser {
|
||||
private final Bry_rdr rdr = new Bry_rdr();
|
||||
public byte Tid() {return tid;} private byte tid;
|
||||
public Html_atr Atr() {return atr;} private Html_atr atr;
|
||||
public boolean Parse(Bry_rdr owner_rdr, byte[] src, Html_tag tag) {
|
||||
public boolean Parse(Bry_err_wkr err_wkr, byte[] src, Html_tag tag) {
|
||||
this.atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__class); // EX: class='image'
|
||||
int src_bgn = atr.Val_bgn(); int src_end = atr.Val_end();
|
||||
if (src_bgn == -1) {
|
||||
@@ -30,7 +30,7 @@ public class Xoh_anch_cls_parser {
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
rdr.Init_by_sub(owner_rdr, "anch.cls", src_bgn, src_end);
|
||||
rdr.Init_by_wkr(err_wkr, "anch.cls", src_bgn, src_end);
|
||||
this.tid = rdr.Chk_or(Xoh_anch_cls_.Trie, Byte_ascii.Max_7_bit);
|
||||
return tid != Byte_ascii.Max_7_bit;
|
||||
}
|
||||
|
||||
@@ -25,17 +25,17 @@ public class Xoh_img_cls_parser {
|
||||
public int Other_end() {return other_end;} private int other_end;
|
||||
public boolean Other_exists() {return other_end > other_bgn;}
|
||||
public Html_atr Atr() {return atr;} private Html_atr atr;
|
||||
public void Parse(Bry_rdr owner_rdr, byte[] src, Html_tag tag) {
|
||||
public void Parse(Bry_err_wkr err_wkr, byte[] src, Html_tag tag) {
|
||||
this.atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__class); // EX: class='thumbborder'
|
||||
Parse(owner_rdr, src, atr.Val_bgn(), atr.Val_end());
|
||||
Parse(err_wkr, src, atr.Val_bgn(), atr.Val_end());
|
||||
}
|
||||
private void Parse(Bry_rdr owner_rdr, byte[] src, int src_bgn, int src_end) {
|
||||
private void Parse(Bry_err_wkr err_wkr, byte[] src, int src_bgn, int src_end) {
|
||||
if (src_bgn == -1) {
|
||||
this.cls_tid = Xoh_img_cls_.Tid__none;
|
||||
this.other_bgn = this.other_end = -1;
|
||||
return;
|
||||
}
|
||||
rdr.Init_by_sub(owner_rdr, "img.cls", src_bgn, src_end);
|
||||
rdr.Init_by_wkr(err_wkr, "img.cls", src_bgn, src_end);
|
||||
this.cls_tid = rdr.Chk(Xoh_img_cls_.Trie);
|
||||
if (rdr.Is(Byte_ascii.Space)) {
|
||||
this.other_bgn = rdr.Pos();
|
||||
|
||||
@@ -20,7 +20,7 @@ import gplx.core.brys.*; import gplx.core.btries.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
import gplx.xowa.wikis.domains.*;
|
||||
public class Xoh_img_src_parser implements Xoh_itm_parser {
|
||||
private final Bry_rdr rdr = new Bry_rdr(); private byte[] src;
|
||||
private final Bry_rdr rdr = new Bry_rdr().Dflt_dlm_(Byte_ascii.Slash); private byte[] src;
|
||||
public void Fail_throws_err_(boolean v) {rdr.Fail_throws_err_(v);}// TEST
|
||||
public int Val_bgn() {return val_bgn;} private int val_bgn;
|
||||
public int Val_end() {return val_end;} private int val_end;
|
||||
@@ -45,18 +45,18 @@ public class Xoh_img_src_parser implements Xoh_itm_parser {
|
||||
file_ttl_bry = null;
|
||||
atr = null;
|
||||
}
|
||||
public boolean Parse(Bry_rdr owner_rdr, byte[] domain_bry, Html_tag tag) {
|
||||
public boolean Parse(Bry_err_wkr err_wkr, byte[] domain_bry, Html_tag tag) {
|
||||
this.Clear();
|
||||
this.atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__src);
|
||||
if (!atr.Val_dat_exists()) return true; // empty src; just return true;
|
||||
return Parse(owner_rdr, domain_bry, atr.Val_bgn(), atr.Val_end());
|
||||
return Parse(err_wkr, domain_bry, atr.Val_bgn(), atr.Val_end());
|
||||
}
|
||||
public boolean Parse(Bry_rdr owner_rdr, byte[] domain_bry, int val_bgn, int val_end) { // EX: src="file:///C:/xowa/file/commons.wikimedia.org/thumb/7/0/1/2/A.png/220px.png"
|
||||
public boolean Parse(Bry_err_wkr err_wkr, byte[] domain_bry, int val_bgn, int val_end) { // EX: src="file:///C:/xowa/file/commons.wikimedia.org/thumb/7/0/1/2/A.png/220px.png"
|
||||
this.Clear();
|
||||
this.src = owner_rdr.Src();
|
||||
this.src = err_wkr.Src();
|
||||
this.val_bgn = val_bgn; this.val_end = val_end;
|
||||
if (val_end == val_bgn) return true; // empty src; just return true;
|
||||
file_w = file_time = file_page = -1;
|
||||
rdr.Init_by_sub(owner_rdr, "img.src.xowa", val_bgn, val_end).Dflt_dlm_(Byte_ascii.Slash);
|
||||
rdr.Init_by_wkr(err_wkr, "img.src.xowa", val_bgn, val_end);
|
||||
rdr.Fail_throws_err_(Bool_.N);
|
||||
repo_bgn = rdr.Find_fwd_rr(Bry__file); // skip past /file/; EX: "file:///J:/xowa/file/commons.wikimedia.org/"
|
||||
if (repo_bgn == -1) return false;
|
||||
|
||||
@@ -36,8 +36,7 @@ public class Xoh_img_src_parser_tst {
|
||||
// fxt.Test__parse__fail("file:///C:/xowa/file/en.wiktionary.org/orig/7/0/A.png", "repo must be commons or self: repo='en.wiktionary.org' ctx='Main_Page' wkr='img.src.xowa' excerpt='file:///C:/xowa/file/en.wiktionary.org/orig/7/0/A.png'");
|
||||
// }
|
||||
}
|
||||
class Xoh_img_src_parser_fxt extends Xoh_itm_parser_fxt_base {
|
||||
private final Xoh_img_src_parser parser = new Xoh_img_src_parser();
|
||||
class Xoh_img_src_parser_fxt extends Xoh_itm_parser_fxt { private final Xoh_img_src_parser parser = new Xoh_img_src_parser();
|
||||
@Override public Xoh_itm_parser Parser_get() {return parser;}
|
||||
public void Test__parse(String src_str, String expd_repo, boolean expd_file_is_orig, String expd_file, int expd_w, int expd_time, int expd_page) {
|
||||
Exec_parse(src_str);
|
||||
@@ -48,7 +47,7 @@ class Xoh_img_src_parser_fxt extends Xoh_itm_parser_fxt_base {
|
||||
Tfds.Eq_int(expd_time, parser.File_time());
|
||||
Tfds.Eq_int(expd_page, parser.File_page());
|
||||
}
|
||||
@Override public void Exec_parse_hook(Bry_rdr owner_rdr, int src_bgn, int src_end) {
|
||||
parser.Parse(owner_rdr, Xow_domain_itm_.Bry__enwiki, src_bgn, src_end);
|
||||
@Override public void Exec_parse_hook(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, int src_bgn, int src_end) {
|
||||
parser.Parse(err_wkr, Xow_domain_itm_.Bry__enwiki, src_bgn, src_end);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.imgs.atrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*;
|
||||
import gplx.core.brys.*;
|
||||
import gplx.xowa.htmls.core.hzips.*;
|
||||
import gplx.xowa.parsers.lnkis.*; import gplx.xowa.files.*;
|
||||
public class Xoh_img_xoimg_hzip {
|
||||
public void Encode(Bry_bfr bfr, Xoh_stat_itm stat_itm, byte[] src, Xoh_img_xoimg_parser arg) {
|
||||
boolean page_exists = arg.Lnki_page() != Xof_lnki_page.Null;
|
||||
boolean time_exists = arg.Lnki_time() != Xof_lnki_time.Null;
|
||||
boolean upright_exists = arg.Lnki_upright() != Xof_img_size.Upright_null;
|
||||
boolean height_exists = arg.Lnki_h() != Xof_img_size.Size__neg1;
|
||||
boolean width_exists = arg.Lnki_w() != Xof_img_size.Size__neg1;
|
||||
flag_bldr.Set(Flag__page_exists , page_exists);
|
||||
flag_bldr.Set(Flag__time_exists , time_exists);
|
||||
flag_bldr.Set(Flag__upright_exists , upright_exists);
|
||||
flag_bldr.Set(Flag__height_exists , height_exists);
|
||||
flag_bldr.Set(Flag__width_exists , width_exists);
|
||||
flag_bldr.Set(Flag__lnki_type , arg.Lnki_type());
|
||||
Xoh_hzip_int_.Encode(1, bfr, flag_bldr.Encode());
|
||||
if (width_exists) Xoh_hzip_int_.Encode(2, bfr, arg.Lnki_w());
|
||||
if (height_exists) Xoh_hzip_int_.Encode(2, bfr, arg.Lnki_h());
|
||||
if (upright_exists) bfr.Add_double(arg.Lnki_upright()).Add_byte(Xoh_hzip_dict_.Escape);
|
||||
if (time_exists) bfr.Add_double(arg.Lnki_time()).Add_byte(Xoh_hzip_dict_.Escape);
|
||||
if (page_exists) Xoh_hzip_int_.Encode(2, bfr, arg.Lnki_page());
|
||||
}
|
||||
public void Decode(Bry_bfr bfr, Xoh_hdoc_ctx hctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, Xoh_img_xoimg_parser arg) {
|
||||
int flag = rdr.Read_int_by_base85(1);
|
||||
flag_bldr.Decode(flag);
|
||||
boolean page_exists = flag_bldr.Get_as_bool(Flag__page_exists);
|
||||
boolean time_exists = flag_bldr.Get_as_bool(Flag__time_exists);
|
||||
boolean upright_exists = flag_bldr.Get_as_bool(Flag__upright_exists);
|
||||
boolean height_exists = flag_bldr.Get_as_bool(Flag__height_exists);
|
||||
boolean width_exists = flag_bldr.Get_as_bool(Flag__width_exists);
|
||||
byte tid = flag_bldr.Get_as_byte(Flag__lnki_type);
|
||||
int w = width_exists ? rdr.Read_int_by_base85(2) : Xof_img_size.Size__neg1;
|
||||
int h = height_exists ? rdr.Read_int_by_base85(2) : Xof_img_size.Size__neg1;
|
||||
double upright = upright_exists ? rdr.Read_double_to(Xoh_hzip_dict_.Escape) : Xof_img_size.Upright_null;
|
||||
double time = time_exists ? rdr.Read_double_to(Xoh_hzip_dict_.Escape) : Xof_lnki_time.Null;
|
||||
int page = page_exists ? rdr.Read_int_by_base85(2) : Xof_lnki_page.Null;
|
||||
arg.Set(tid, w, h, upright, time, page);
|
||||
}
|
||||
private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_( 1, 1 , 1, 1, 1, 3);
|
||||
private static final int // SERIALIZED
|
||||
Flag__page_exists = 0
|
||||
, Flag__time_exists = 1
|
||||
, Flag__upright_exists = 2
|
||||
, Flag__height_exists = 3
|
||||
, Flag__width_exists = 4 // none, thumbimage, thumbborder
|
||||
, Flag__lnki_type = 5 // null, none, frameless, frame, thumb; gplx.xowa.parsers.lnkis.Xop_lnki_type
|
||||
;
|
||||
}
|
||||
@@ -18,8 +18,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.htmls.core.wkrs.imgs.atrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*;
|
||||
import gplx.core.brys.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
public class Xoh_img_xoimg_parser {
|
||||
private final Bry_rdr rdr = new Bry_rdr();
|
||||
public class Xoh_img_xoimg_parser implements Bfr_arg {
|
||||
private final Bry_rdr rdr = new Bry_rdr().Dflt_dlm_(Byte_ascii.Pipe);
|
||||
public int Val_bgn() {return val_bgn;} private int val_bgn;
|
||||
public int Val_end() {return val_end;} private int val_end;
|
||||
public boolean Val_dat_exists() {return val_end > val_bgn;}
|
||||
@@ -32,15 +32,23 @@ public class Xoh_img_xoimg_parser {
|
||||
public void Clear() {
|
||||
val_bgn = val_end = -1;
|
||||
}
|
||||
public void Parse(Bry_rdr owner_rdr, byte[] src, Html_tag tag) {
|
||||
Html_atr atr = tag.Atrs__get_by_or_empty(Bry__name);
|
||||
Parse(owner_rdr, src, atr.Val_bgn(), atr.Val_end());
|
||||
public void Set(byte tid, int w, int h, double upright, double time, int page) {
|
||||
this.lnki_type = tid;
|
||||
this.lnki_w = w;
|
||||
this.lnki_h = h;
|
||||
this.lnki_upright = upright;
|
||||
this.lnki_time = time;
|
||||
this.lnki_page = page;
|
||||
}
|
||||
public void Parse(Bry_rdr owner_rdr, byte[] src, int src_bgn, int src_end) {
|
||||
public void Parse(Bry_err_wkr err_wkr, byte[] src, Html_tag tag) {
|
||||
Html_atr atr = tag.Atrs__get_by_or_empty(Bry__name);
|
||||
Parse(err_wkr, src, atr.Val_bgn(), atr.Val_end());
|
||||
}
|
||||
public void Parse(Bry_err_wkr err_wkr, byte[] src, int src_bgn, int src_end) {
|
||||
if (src_bgn == -1)
|
||||
this.Clear();
|
||||
else {
|
||||
rdr.Init_by_sub(owner_rdr, "img.xoimg", src_bgn, src_end).Dflt_dlm_(Byte_ascii.Pipe);
|
||||
rdr.Init_by_wkr(err_wkr, "img.xoimg", src_bgn, src_end);
|
||||
this.val_bgn = src_bgn;
|
||||
this.val_end = src_end;
|
||||
this.lnki_type = (byte)(rdr.Read_byte_to() - Byte_ascii.Num_0);
|
||||
@@ -51,6 +59,16 @@ public class Xoh_img_xoimg_parser {
|
||||
this.lnki_page = rdr.Read_int_to();
|
||||
}
|
||||
}
|
||||
public void Bfr_arg__clear() {}
|
||||
public boolean Bfr_arg__exists() {return true;}
|
||||
public void Bfr_arg__add(Bry_bfr bfr) {
|
||||
bfr.Add_int_variable(lnki_type).Add_byte_pipe();
|
||||
bfr.Add_int_variable(lnki_w).Add_byte_pipe();
|
||||
bfr.Add_int_variable(lnki_h).Add_byte_pipe();
|
||||
bfr.Add_double(lnki_upright).Add_byte_pipe();
|
||||
bfr.Add_double(lnki_time).Add_byte_pipe();
|
||||
bfr.Add_int_variable(lnki_page);
|
||||
}
|
||||
public static final byte[]
|
||||
Bry__name = Bry_.new_a7("data-xoimg")
|
||||
, Bry__html = Bry_.new_a7("\" data-xoimg=\"")
|
||||
|
||||
@@ -30,8 +30,8 @@ public class Xoh_lnke_dict_ {
|
||||
, Html__rhs_end = Bry_.new_a7("\">")
|
||||
;
|
||||
public static byte[]
|
||||
Html__rel__nofollow = Bry_.new_a7("nofollow")
|
||||
, Html__cls__external = Bry_.new_a7("external")
|
||||
Html__rel__nofollow = Bry_.new_a7("nofollow")
|
||||
, Html__cls__external = Bry_.new_a7("external")
|
||||
;
|
||||
public static final Hash_adp_bry Hash = Hash_adp_bry.ci_a7()
|
||||
.Add_bry_byte(Html__class__free, Type__free)
|
||||
|
||||
@@ -26,7 +26,7 @@ public class Xoh_lnke_html {
|
||||
byte lnke_type = Calc_type(lnke);
|
||||
if (!hctx.Mode_is_alt()) { // do not write "<a ...>" if mode is alt
|
||||
bfr.Add(Xoh_consts.A_bgn);
|
||||
if (Write_href(bfr, ctx, src, lnke, href_bgn, href_end, proto_is_xowa))
|
||||
if (Write_href(bfr, hctx, ctx, src, lnke, href_bgn, href_end, proto_is_xowa))
|
||||
bfr.Add(Xoh_lnke_dict_.Html__atr__0).Add(Xoh_lnke_dict_.To_html_class(lnke_type));
|
||||
bfr.Add(Xoh_lnke_dict_.Html__rhs_end);
|
||||
}
|
||||
@@ -37,9 +37,9 @@ public class Xoh_lnke_html {
|
||||
bfr.Add(Xoh_consts.A_end);
|
||||
}
|
||||
}
|
||||
public boolean Write_href(Bry_bfr bfr, Xop_ctx ctx, byte[] src, Xop_lnke_tkn lnke, int href_bgn, int href_end, boolean proto_is_xowa) {
|
||||
public boolean Write_href(Bry_bfr bfr, Xoh_wtr_ctx hctx, Xop_ctx ctx, byte[] src, Xop_lnke_tkn lnke, int href_bgn, int href_end, boolean proto_is_xowa) {
|
||||
byte[] lnke_xwiki_wiki = lnke.Lnke_xwiki_wiki();
|
||||
if (lnke_xwiki_wiki == null) {
|
||||
if (lnke_xwiki_wiki == null || hctx.Mode_is_hdump()) { // if hdump, never write xwiki format (/site/); always write in url format (https:); note that xwiki is set when wiki is installed locally
|
||||
if (lnke.Lnke_relative()) { // relative; EX: //a.org
|
||||
bfr.Add(ctx.Wiki().Utl__url_parser().Url_parser().Relative_url_protocol_bry()).Add_mid(src, href_bgn, href_end);
|
||||
return true;
|
||||
@@ -62,7 +62,7 @@ public class Xoh_lnke_html {
|
||||
.Add(href_encoder.Encode(lnke.Lnke_xwiki_page())); // NOTE: must encode page; EX:%22%3D -> '">' which will end attribute; PAGE:en.w:List_of_Category_A_listed_buildings_in_West_Lothian DATE:2014-07-15
|
||||
if (lnke.Lnke_xwiki_qargs() != null)
|
||||
Gfo_qarg_mgr.Concat_bfr(bfr, href_encoder, lnke.Lnke_xwiki_qargs()); // NOTE: must encode args
|
||||
return ctx.Wiki().App().Xwiki_mgr__missing(lnke_xwiki_wiki);
|
||||
return ctx.Wiki().App().Xwiki_mgr__missing(lnke_xwiki_wiki); // write "external" if hdump or xwiki is missing
|
||||
}
|
||||
}
|
||||
public void Write_caption(Bry_bfr bfr, Xoh_html_wtr html_wtr, Xoh_wtr_ctx hctx, Xop_ctx ctx, byte[] src, Xop_lnke_tkn lnke, int href_bgn, int href_end, boolean proto_is_xowa) {
|
||||
|
||||
@@ -32,4 +32,15 @@ public class Xoh_lnke_html__basic__tst {
|
||||
fxt.Wiki().Sys_cfg().Xowa_proto_enabled_(false);
|
||||
fxt.Test_parse_page_wiki_str("[xowa-cmd:\"a\" b]" , "[xowa-cmd:"a" b]"); // protocol is disabled: literalize String (i.e.: don't make it an anchor)
|
||||
}
|
||||
@Test public void Xwiki() {
|
||||
String wtxt = "[//en.wiktionary.org/wiki/A B]";
|
||||
String html_https = "<a href='https://en.wiktionary.org/wiki/A' rel='nofollow' class='external text'>B</a>";
|
||||
String html_xwiki = "<a href='/site/en.wiktionary.org/wiki/A'>B</a>";
|
||||
fxt.Test__parse__wtxt_to_html(wtxt, html_https); // https b/c wiki not installed
|
||||
fxt.Init_xwiki_add_user_("en.wiktionary.org");
|
||||
fxt.Test__parse__wtxt_to_html(wtxt, html_xwiki); // xwiki b/c wiki installed
|
||||
fxt.Hctx_(gplx.xowa.htmls.core.htmls.Xoh_wtr_ctx.Hdump);
|
||||
fxt.Test__parse__wtxt_to_html(wtxt, html_https); // https b/c hdump, even though wiki installed
|
||||
fxt.Hctx_(gplx.xowa.htmls.core.htmls.Xoh_wtr_ctx.Basic);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,71 +20,50 @@ import gplx.core.brys.*; import gplx.core.threads.poolables.*; import gplx.xowa.
|
||||
import gplx.langs.htmls.*; import gplx.xowa.htmls.core.hzips.*;
|
||||
public class Xoh_lnke_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
public String Key() {return Xoh_hzip_dict_.Key__lnke;}
|
||||
public Xoh_lnke_hzip Encode(Bry_bfr bfr, Hzip_stat_itm stat_itm, byte[] src, Xoh_lnke_parser arg) {
|
||||
byte anch_cls_type = arg.Anch_cls_type();
|
||||
boolean auto_exists = arg.Auto_id() != -1;
|
||||
boolean text_exists = arg.Capt_end() != -1;
|
||||
flag_bldr.Set(Flag__auto_exists , auto_exists);
|
||||
flag_bldr.Set(Flag__text_exists , text_exists);
|
||||
flag_bldr.Set(Flag__anch_cls , anch_cls_type);
|
||||
public byte[] Hook() {return hook;} private byte[] hook;
|
||||
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
|
||||
Xoh_lnke_parser data = (Xoh_lnke_parser)data_obj;
|
||||
boolean auto_exists = flag_bldr.Set_as_bool(Flag__auto_exists , data.Auto_id() != -1);
|
||||
boolean capt_exists = flag_bldr.Set_as_bool(Flag__capt_exists , data.Capt_exists());
|
||||
byte lnke_tid = flag_bldr.Set_as_byte(Flag__lnke_tid , data.Lnke_tid());
|
||||
|
||||
switch (anch_cls_type) {
|
||||
case Xoh_lnke_dict_.Type__free: stat_itm.Lnke__free__add();break;
|
||||
case Xoh_lnke_dict_.Type__auto: stat_itm.Lnke__auto__add(); break;
|
||||
case Xoh_lnke_dict_.Type__text: stat_itm.Lnke__text__add(); break;
|
||||
}
|
||||
bfr.Add(hook);
|
||||
bfr.Add_hzip_int(1, flag_bldr.Encode()); // add flag
|
||||
bfr.Add_hzip_mid(src, data.Href_bgn(), data.Href_end()); // add href
|
||||
if (auto_exists) bfr.Add_hzip_int(1, data.Auto_id()); // add autonumber
|
||||
if (capt_exists) bfr.Add_hzip_mid(src, data.Capt_bgn(), data.Capt_end()); // add caption
|
||||
|
||||
bfr.Add(Xoh_hzip_dict_.Bry__lnke); // add hook
|
||||
Xoh_hzip_int_.Encode(1, bfr, flag_bldr.Encode()); // add flag
|
||||
bfr.Add_mid(src, arg.Href_bgn(), arg.Href_end()); // add href
|
||||
bfr.Add_byte(Xoh_hzip_dict_.Escape);
|
||||
if (auto_exists)
|
||||
Xoh_hzip_int_.Encode(1, bfr, arg.Auto_id());
|
||||
else if (text_exists) {
|
||||
bfr.Add_mid(src, arg.Capt_bgn(), arg.Capt_end()); // add capt
|
||||
bfr.Add_byte(Xoh_hzip_dict_.Escape);
|
||||
}
|
||||
hctx.Hzip__stat().Lnke_add(lnke_tid);
|
||||
return this;
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx ctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
|
||||
int flag = rdr.Read_int_by_base85(1);
|
||||
flag_bldr.Decode(flag);
|
||||
boolean auto_exists = flag_bldr.Get_as_bool(Flag__auto_exists);
|
||||
boolean text_exists = flag_bldr.Get_as_bool(Flag__text_exists);
|
||||
byte anch_cls_type = flag_bldr.Get_as_byte(Flag__anch_cls);
|
||||
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
|
||||
int flag = rdr.Read_int_by_base85(1); flag_bldr.Decode(flag);
|
||||
boolean auto_exists = flag_bldr.Get_as_bool(Flag__auto_exists);
|
||||
boolean capt_exists = flag_bldr.Get_as_bool(Flag__capt_exists);
|
||||
byte lnke_tid = flag_bldr.Get_as_byte(Flag__lnke_tid);
|
||||
|
||||
int href_bgn = rdr.Pos();
|
||||
int href_end = rdr.Find_fwd_lr();
|
||||
int href_bgn = rdr.Pos(); int href_end = rdr.Find_fwd_lr();
|
||||
int auto_id = -1, capt_bgn = -1, capt_end = -1;
|
||||
if (auto_exists)
|
||||
auto_id = rdr.Read_int_by_base85(1);
|
||||
else if (text_exists) {
|
||||
capt_bgn = rdr.Pos();
|
||||
capt_end = rdr.Find_fwd_lr();
|
||||
}
|
||||
if (auto_exists) auto_id = rdr.Read_int_by_base85(1);
|
||||
if (capt_exists) {capt_bgn = rdr.Pos(); capt_end = rdr.Find_fwd_lr();}
|
||||
int rv = rdr.Pos();
|
||||
|
||||
bfr.Add(Html_bldr_.Bry__a_lhs_w_href);
|
||||
bfr.Add_mid(src, href_bgn, href_end);
|
||||
bfr.Add(Xoh_lnke_dict_.Html__atr__0).Add(Xoh_lnke_dict_.To_html_class(anch_cls_type)).Add(Xoh_lnke_dict_.Html__rhs_end);
|
||||
if (auto_exists)
|
||||
bfr.Add_byte(Byte_ascii.Brack_bgn).Add_int_variable(auto_id).Add_byte(Byte_ascii.Brack_end);
|
||||
else if (text_exists)
|
||||
bfr.Add_mid(src, capt_bgn, capt_end);
|
||||
else
|
||||
bfr.Add_mid(src, href_bgn, href_end);
|
||||
bfr.Add(Xoh_lnke_dict_.Html__atr__0).Add(Xoh_lnke_dict_.To_html_class(lnke_tid)).Add(Xoh_lnke_dict_.Html__rhs_end);
|
||||
if (auto_exists) bfr.Add_byte(Byte_ascii.Brack_bgn).Add_int_variable(auto_id).Add_byte(Byte_ascii.Brack_end);
|
||||
else if (capt_exists) bfr.Add_mid(src, capt_bgn, capt_end);
|
||||
else bfr.Add_mid(src, href_bgn, href_end);
|
||||
bfr.Add(Html_bldr_.Bry__a_rhs);
|
||||
|
||||
return rv;
|
||||
}
|
||||
public int Pool__idx() {return pool_idx;} private int pool_idx;
|
||||
public void Pool__clear (Object[] args) {}
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_lnke_hzip rv = new Xoh_lnke_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_lnke_hzip rv = new Xoh_lnke_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; rv.hook = (byte[])args[0]; return rv;}
|
||||
private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_ (1, 1, 2);
|
||||
private static final int // SERIALIZED
|
||||
Flag__auto_exists = 0
|
||||
, Flag__text_exists = 1
|
||||
, Flag__anch_cls = 2 // "free", "autonumber", "text"
|
||||
, Flag__capt_exists = 1
|
||||
, Flag__lnke_tid = 2 // "free", "autonumber", "text"
|
||||
;
|
||||
}
|
||||
|
||||
@@ -22,18 +22,33 @@ public class Xoh_lnke_hzip_tst {
|
||||
@Test public void Free() {
|
||||
fxt.Test__bicode("~#!http://a.org~", Xoh_lnke_html__hdump__tst.Html__free);
|
||||
}
|
||||
@Test public void Free__ws_at_end() {
|
||||
fxt.Test__bicode("~#%https://a.org/. ~https://a.org/.~", "<a href='https://a.org/. ' rel='nofollow' class='external free'>https://a.org/.</a>");
|
||||
}
|
||||
@Test public void Auto() {
|
||||
fxt.Test__bicode("~#*http://a.org~\"", Xoh_lnke_html__hdump__tst.Html__auto);
|
||||
}
|
||||
@Test public void Text() {
|
||||
fxt.Test__bicode("~#'http://a.org~a~", Xoh_lnke_html__hdump__tst.Html__text);
|
||||
}
|
||||
@Test public void Text__tidy() { // PURPOSE:handle reparenting of html elements by HTML tidy EX:<font color="red">[http://a.org]</font>; DATE:2015-08-25
|
||||
@Test public void Auto__tidy() { // PURPOSE:handle reparenting of html elements by HTML tidy EX:<font color="red">[http://a.org]</font>; DATE:2015-08-25
|
||||
fxt.Test__bicode
|
||||
( "~#&http://a.org~<font color=\"red\">[123]</font>~"
|
||||
, "<a href=\"http://a.org\" rel=\"nofollow\" class=\"external autonumber\"><font color=\"red\">[123]</font></a>"
|
||||
);
|
||||
}
|
||||
@Test public void Auto__invalid_number() {
|
||||
String html = "<a href='http://a.org' rel='nofollow' class='external autonumber'>[abc]</a>";
|
||||
fxt.Test__bicode("~#&http://a.org~[abc]~", html);
|
||||
}
|
||||
@Test public void Text() {
|
||||
fxt.Test__bicode("~#'http://a.org~a~", Xoh_lnke_html__hdump__tst.Html__text);
|
||||
}
|
||||
// @Test public void Xwiki__exists() {
|
||||
// String hzip = "~#'https://en.wiktionary.org/wiki/A~A~";
|
||||
// String html_https = "<a href='https://en.wiktionary.org/wiki/A' rel='nofollow' class='external text'>A</a>";
|
||||
// String html_xwiki = "<a href='/site/en.wiktionary.org/wiki/A'>A</a>";
|
||||
// fxt.Test__bicode(hzip, html_https);
|
||||
// fxt.Init_wiki_installed("en.wiktionary.org");
|
||||
// fxt.Test__decode(hzip, html_xwiki);
|
||||
// }
|
||||
@Test public void Fail__href() {
|
||||
String html = "<a rel='nofollow' class='external autonumber'>a</a>";
|
||||
fxt.Test__encode__fail(html, html);
|
||||
@@ -42,8 +57,4 @@ public class Xoh_lnke_hzip_tst {
|
||||
String html = "<a href='http://a.org' rel='nofollow' class='external invalid'>a</a>";
|
||||
fxt.Test__encode__fail(html, html);
|
||||
}
|
||||
@Test public void Fail__auto() {
|
||||
String html = "<a href='http://a.org' rel='nofollow' class='external autonumber'>[abc]</a>";
|
||||
fxt.Test__encode__fail(html, html);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,40 +20,53 @@ import gplx.core.brys.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*; import gplx.xowa.htmls.hrefs.*;
|
||||
public class Xoh_lnke_parser {
|
||||
private final Bry_rdr rdr = new Bry_rdr();
|
||||
public int Rng_bgn() {return rng_bgn;} private int rng_bgn;
|
||||
public int Rng_end() {return rng_end;} private int rng_end;
|
||||
public byte Anch_cls_type() {return anch_cls_type;} private byte anch_cls_type;
|
||||
public int Src_bgn() {return src_bgn;} private int src_bgn;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public byte Lnke_tid() {return lnke_tid;} private byte lnke_tid;
|
||||
public int Auto_id() {return auto_id;} private int auto_id;
|
||||
public int Href_bgn() {return href_bgn;} private int href_bgn;
|
||||
public int Href_end() {return href_end;} private int href_end;
|
||||
public int Capt_bgn() {return capt_bgn;} private int capt_bgn;
|
||||
public int Capt_end() {return capt_end;} private int capt_end;
|
||||
public boolean Capt_exists() {return capt_exists;} private boolean capt_exists;
|
||||
private void Clear() {
|
||||
anch_cls_type = Byte_ascii.Max_7_bit;
|
||||
auto_id = rng_bgn = rng_end = href_bgn = href_end = capt_bgn = capt_end = -1;
|
||||
lnke_tid = Byte_ascii.Max_7_bit;
|
||||
capt_exists = false;
|
||||
src_bgn = src_end = href_bgn = href_end = capt_bgn = capt_end = auto_id = -1;
|
||||
}
|
||||
public int Parse(Xoh_hdoc_wkr hdoc_wkr, Html_tag_rdr tag_rdr, Html_tag anch_head) {
|
||||
public boolean Parse(Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Html_tag_rdr tag_rdr, byte[] src, Html_tag anch_head) {
|
||||
this.Clear();
|
||||
this.rng_bgn = anch_head.Src_bgn();
|
||||
rdr.Init_by_hook("lnke", rng_bgn, rng_bgn);
|
||||
this.src_bgn = anch_head.Src_bgn();
|
||||
rdr.Init_by_sect("lnke", src_bgn, src_bgn);
|
||||
Html_atr href_atr = anch_head.Atrs__get_by_or_fail(Html_atr_.Bry__href); // get href; "EX: href='http://a.org'"
|
||||
this.href_bgn = href_atr.Val_bgn(); this.href_end = href_atr.Val_end();
|
||||
this.anch_cls_type = anch_head.Atrs__cls_find_or_fail(Xoh_lnke_dict_.Hash); // get type by class; EX: "class='external free'"
|
||||
boolean capt_exists = false;
|
||||
switch (anch_cls_type) {
|
||||
case Xoh_lnke_dict_.Type__text: capt_exists = true; break;
|
||||
this.lnke_tid = anch_head.Atrs__cls_find_or_fail(Xoh_lnke_dict_.Hash); // get type by class; EX: "class='external free'"
|
||||
this.capt_bgn = anch_head.Src_end();
|
||||
Html_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__a); // find '</a>'
|
||||
this.capt_end = anch_tail.Src_bgn();
|
||||
switch (lnke_tid) {
|
||||
case Xoh_lnke_dict_.Type__free:
|
||||
if (!Bry_.Match(src, href_bgn, href_end, src, capt_bgn, capt_end)) // EX: <a href='https://a.org/. ' rel='nofollow' class='external free'>https://a.org/.</a>
|
||||
capt_exists = true;
|
||||
break;
|
||||
case Xoh_lnke_dict_.Type__text:
|
||||
capt_exists = true;
|
||||
break;
|
||||
case Xoh_lnke_dict_.Type__auto:
|
||||
if (tag_rdr.Read_and_move(Byte_ascii.Brack_bgn)) // HTML tidy can reparent lnkes in strange ways; DATE:2015-08-25
|
||||
this.auto_id = tag_rdr.Read_int_to(Byte_ascii.Brack_end); // extract int; EX: "<a ...>[123]</a>"
|
||||
if ( src[capt_bgn] == Byte_ascii.Brack_bgn // is capt surround by bracks; EX: "[123]"
|
||||
&& src[capt_end - 1] == Byte_ascii.Brack_end) {
|
||||
int tmp_id = Bry_.To_int_or(src, capt_bgn + 1, capt_end - 1, -1); // extract int; EX: "<a ...>[123]</a>"
|
||||
if (tmp_id == -1) // HTML tidy can reparent lnkes in strange ways; EX: "<a ...><b>[123]</b></a>" DATE:2015-08-25
|
||||
capt_exists = true;
|
||||
else
|
||||
auto_id = tmp_id;
|
||||
}
|
||||
else
|
||||
capt_exists = true;
|
||||
break;
|
||||
}
|
||||
if (capt_exists) this.capt_bgn = anch_head.Src_end();
|
||||
Html_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__a); // find '</a>'
|
||||
if (capt_exists) this.capt_end = anch_tail.Src_bgn();
|
||||
this.rng_end = anch_tail.Src_end();
|
||||
this.src_end = anch_tail.Src_end();
|
||||
hdoc_wkr.On_lnke(this);
|
||||
return rng_end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,7 +21,6 @@ import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
import gplx.xowa.htmls.core.wkrs.bfr_args.*;
|
||||
public class Xoh_lnke_wtr extends gplx.core.brys.Bfr_arg_base {
|
||||
private final Bfr_arg[] arg_ary;
|
||||
private final Bfr_arg__indent indent = new Bfr_arg__indent();
|
||||
private final Bfr_arg__html_atr
|
||||
anch_href = new Bfr_arg__html_atr(Html_atr_.Bry__href)
|
||||
, anch_rel = new Bfr_arg__html_atr(Html_atr_.Bry__rel)
|
||||
@@ -29,9 +28,8 @@ public class Xoh_lnke_wtr extends gplx.core.brys.Bfr_arg_base {
|
||||
;
|
||||
private final Bfr_arg__wrapper anch_capt = new Bfr_arg__wrapper();
|
||||
public Xoh_lnke_wtr() {
|
||||
arg_ary = new Bfr_arg[] {indent, anch_href, anch_rel, anch_cls, anch_capt};
|
||||
arg_ary = new Bfr_arg[] {anch_href, anch_rel, anch_cls, anch_capt};
|
||||
}
|
||||
public Xoh_lnke_wtr Indent_(int v) {indent.Set(v); return this;}
|
||||
public Xoh_lnke_wtr Anch_href_(byte[] src, int bgn, int end) {anch_href.Set_by_mid(src, bgn, end); return this;}
|
||||
public Xoh_lnke_wtr Anch_rel_y_() {anch_rel.Set_by_bry(Xoh_lnke_dict_.Html__rel__nofollow); return this;}
|
||||
public Xoh_lnke_wtr Anch_cls_(byte[]... ary) {anch_cls.Set_by_ary(ary); return this;}
|
||||
@@ -45,6 +43,6 @@ public class Xoh_lnke_wtr extends gplx.core.brys.Bfr_arg_base {
|
||||
fmtr.Bld_bfr_many(bfr, (Object[])arg_ary);
|
||||
}
|
||||
private static final Bry_fmtr fmtr = Bry_fmtr.new_
|
||||
( "~{indent}<a~{anch_href}~{anch_rel}~{anch_cls}>~{anch_capt}</a>"
|
||||
, "indent", "anch_href", "anch_rel", "anch_cls", "anch_capt");
|
||||
( "<a~{anch_href}~{anch_rel}~{anch_cls}>~{anch_capt}</a>"
|
||||
, "anch_href", "anch_rel", "anch_cls", "anch_capt");
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
// using gplx.core.btries;
|
||||
// using gplx.xowa.htmls.core.hzips;
|
||||
// public class Xoh_hzip_href {
|
||||
// public void Save(Bry_bfr bfr, Hzip_stat_itm stats, byte[] src, int src_len, int bgn, int pos, byte bgn_quote) {
|
||||
// public void Save(Bry_bfr bfr, Xoh_stat_itm stats, byte[] src, int src_len, int bgn, int pos, byte bgn_quote) {
|
||||
//// // ignore anchors; EX: "#a"
|
||||
//// int proto_bgn = pos;
|
||||
//// int proto_end = Bry_find_.Find_fwd(src, Byte_ascii.Colon, proto_bgn, src_len);
|
||||
@@ -19,6 +19,7 @@ package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; impo
|
||||
import gplx.core.brys.*;
|
||||
import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*;
|
||||
public class Xoh_lnki_dict_ {
|
||||
public static void Ns_encode(Xoh_hzip_bfr bfr, int ns_id) {bfr.Add_hzip_int(1, ns_id + 2);}
|
||||
public static void Ns_encode(Bry_bfr bfr, int ns_id) {
|
||||
gplx.xowa.htmls.core.hzips.Xoh_hzip_int_.Encode(1, bfr, ns_id + 2);
|
||||
}
|
||||
|
||||
@@ -19,18 +19,13 @@ package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; impo
|
||||
import org.junit.*; import gplx.xowa.htmls.core.makes.tests.*;
|
||||
public class Xoh_lnki_html__hdump__tst {
|
||||
private final Xoh_make_fxt fxt = new Xoh_make_fxt();
|
||||
public static final String
|
||||
Html__same = "<a href='/wiki/A' id='xolnki_2' title='A'>A</a>"
|
||||
, Html__diff = "<a href='/wiki/A' id='xolnki_2' title='A'>b</a>"
|
||||
, Html__trail = "<a href='/wiki/A' id='xolnki_2' title='A'>Ab</a>"
|
||||
, Html__xwiki = "<a href='/site/en.wiktionary.org/wiki/a' id='xolnki_2' title='a'>wikt:a</a>"
|
||||
;
|
||||
@Before public void init() {fxt.Clear();}
|
||||
@Test public void Same() {fxt.Test__html("[[A]]" , Html__same);}
|
||||
@Test public void Diff() {fxt.Test__html("[[A|b]]" , Html__diff);}
|
||||
@Test public void Trail() {fxt.Test__html("[[A]]b" , Html__trail);}
|
||||
@Test public void Same() {fxt.Test__html("[[A]]" , "<a href='/wiki/A' title='A'>A</a>");}
|
||||
@Test public void Diff() {fxt.Test__html("[[A|b]]" , "<a href='/wiki/A' title='A'>b</a>");}
|
||||
@Test public void Trail() {fxt.Test__html("[[A]]b" , "<a href='/wiki/A' title='A'>Ab</a>");}
|
||||
@Test public void Xwiki() {
|
||||
fxt.Parser_fxt().Init_xwiki_add_wiki_and_user_("wikt", "en.wiktionary.org");
|
||||
fxt.Test__html("[[wikt:a]]", Html__xwiki);
|
||||
fxt.Test__html("[[wikt:a]]", "<a href='https://en.wiktionary.org/wiki/a' title='a'>wikt:a</a>");
|
||||
}
|
||||
@Test public void Anch() {fxt.Test__html("[[#a]]" , "<a href='#a'>#a</a>");}
|
||||
}
|
||||
|
||||
@@ -17,153 +17,186 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.btries.*; import gplx.core.encoders.*; import gplx.core.threads.poolables.*;
|
||||
import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*;
|
||||
import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*; import gplx.langs.htmls.encoders.*;
|
||||
import gplx.langs.htmls.*; import gplx.xowa.htmls.hrefs.*; import gplx.xowa.wikis.ttls.*;
|
||||
import gplx.xowa.wikis.nss.*; import gplx.xowa.parsers.lnkis.*;
|
||||
public class Xoh_lnki_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr.new_(32);
|
||||
public String Key() {return Xoh_hzip_dict_.Key__lnki;}
|
||||
public Xoh_lnki_hzip Encode(Bry_bfr bfr, Xoh_hdoc_ctx hctx, Hzip_stat_itm stat_itm, byte[] src, Xoh_lnki_parser arg) {
|
||||
byte text_type = arg.Text_type();
|
||||
Xoh_anch_href_parser anch_href_parser = arg.Href_parser();
|
||||
int page_ns_id = anch_href_parser.Page_ns_id();
|
||||
boolean page_ns_id_is_not_main = page_ns_id != Xow_ns_.Tid__main;
|
||||
int href_type = anch_href_parser.Tid();
|
||||
flag_bldr.Set(Flag__ns_is_not_main , page_ns_id_is_not_main);
|
||||
flag_bldr.Set(Flag__href_type , href_type);
|
||||
flag_bldr.Set(Flag__text_type , text_type);
|
||||
public byte[] Hook() {return hook;} private byte[] hook;
|
||||
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
|
||||
Xoh_lnki_parser data = (Xoh_lnki_parser)data_obj;
|
||||
Xoh_anch_href_itm href = data.Href_itm();
|
||||
int ns_id = href.Ttl_ns_id(); ;
|
||||
flag_bldr.Set_as_bool(Flag__title_missing_ns , data.Title_missing_ns());
|
||||
flag_bldr.Set_as_bool(Flag__ttl_is_main_page , href.Ttl_is_main_page());
|
||||
boolean ns_custom_exists= flag_bldr.Set_as_bool(Flag__ns_custom_exists , href.Ttl_ns_custom() != null);
|
||||
int title_tid = flag_bldr.Set_as_int(Flag__title_tid , href.Tid() == Xoh_anch_href_itm.Tid__anch ? Xoh_lnki_parser.Title__href : data.Title_tid()); // anchs never have title, so don't bother setting flag;
|
||||
flag_bldr.Set_as_bool(Flag__capt_has_ns , data.Capt_has_ns());
|
||||
boolean ns_is_not_main = flag_bldr.Set_as_bool(Flag__ns_is_not_main , ns_id != Xow_ns_.Tid__main);
|
||||
int href_type = flag_bldr.Set_as_int(Flag__href_type , href.Tid());
|
||||
flag_bldr.Set_as_int(Flag__capt_cs0_tid , data.Capt_itm().Cs0_tid());
|
||||
byte text_type = flag_bldr.Set_as_byte(Flag__text_type , data.Text_tid());
|
||||
// Tfds.Dbg(flag_bldr.Encode(), Array_.To_str(flag_bldr.Val_ary()), text_type);
|
||||
|
||||
bfr.Add(Xoh_hzip_dict_.Bry__lnki);
|
||||
Xoh_hzip_int_.Encode(1, bfr, flag_bldr.Encode());
|
||||
if (page_ns_id_is_not_main)
|
||||
Xoh_lnki_dict_.Ns_encode(bfr, page_ns_id);
|
||||
if (href_type == Xoh_anch_href_parser.Tid__site)
|
||||
bfr.Add_mid(src, anch_href_parser.Site_bgn(), anch_href_parser.Site_end()).Add_byte(Xoh_hzip_dict_.Escape);
|
||||
int bfr_bgn = bfr.Len();
|
||||
int flag = flag_bldr.Encode();
|
||||
bfr.Add(hook);
|
||||
bfr.Add_hzip_int(1, flag);
|
||||
if (href_type == Xoh_anch_href_itm.Tid__site) bfr.Add_hzip_mid(src, href.Site_bgn(), href.Site_end());
|
||||
if (ns_is_not_main) Xoh_lnki_dict_.Ns_encode(bfr, ns_id);
|
||||
if (ns_custom_exists) bfr.Add_hzip_bry(href.Ttl_ns_custom());
|
||||
switch (text_type) {
|
||||
case Xoh_anch_capt_parser.Tid__href:
|
||||
case Xoh_anch_capt_parser.Tid__href_pipe:
|
||||
stat_itm.Lnki_text_n_add();
|
||||
bfr.Add_mid(arg.Href_bry(), arg.Href_bgn(), arg.Href_end());
|
||||
bfr.Add_byte(Xoh_hzip_dict_.Escape);
|
||||
case Xoh_anch_capt_itm.Tid__same:
|
||||
bfr.Add_hzip_mid(data.Href_src(), data.Href_bgn(), data.Href_end());
|
||||
break;
|
||||
case Xoh_anch_capt_parser.Tid__capt:
|
||||
case Xoh_anch_capt_parser.Tid__href_trail:
|
||||
case Xoh_anch_capt_parser.Tid__capt_short:
|
||||
stat_itm.Lnki_text_y_add();
|
||||
bfr.Add_mid(arg.Href_bry(), arg.Href_bgn(), arg.Href_end());
|
||||
bfr.Add_byte(Xoh_hzip_dict_.Escape);
|
||||
bfr.Add_mid(arg.Capt_bry(), arg.Capt_bgn(), arg.Capt_end());
|
||||
bfr.Add_byte(Xoh_hzip_dict_.Escape);
|
||||
case Xoh_anch_capt_itm.Tid__diff:
|
||||
case Xoh_anch_capt_itm.Tid__more:
|
||||
case Xoh_anch_capt_itm.Tid__less:
|
||||
bfr.Add_hzip_mid(data.Text_0_src(), data.Text_0_bgn(), data.Text_0_end());
|
||||
bfr.Add_hzip_mid(data.Text_1_src(), data.Text_1_bgn(), data.Text_1_end());
|
||||
break;
|
||||
}
|
||||
if (title_tid == Xoh_lnki_parser.Title__diff) bfr.Add_hzip_mid(src, data.Title_bgn(), data.Title_end());
|
||||
|
||||
hctx.Hzip__stat().Lnki_add(data.Src_end() - data.Src_bgn(), bfr.Len() - bfr_bgn, flag);
|
||||
return this;
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx hctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
|
||||
int flag = rdr.Read_int_by_base85(1);
|
||||
flag_bldr.Decode(flag);
|
||||
boolean page_ns_id_is_not_main = flag_bldr.Get_as_bool(Flag__ns_is_not_main);
|
||||
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
|
||||
int flag = rdr.Read_int_by_base85(1); flag_bldr.Decode(flag);
|
||||
boolean title_missing_ns = flag_bldr.Get_as_bool(Flag__title_missing_ns);
|
||||
boolean ttl_is_main_page = flag_bldr.Get_as_bool(Flag__ttl_is_main_page);
|
||||
boolean ns_custom_exists = flag_bldr.Get_as_bool(Flag__ns_custom_exists);
|
||||
byte title_tid = flag_bldr.Get_as_byte(Flag__title_tid);
|
||||
boolean capt_has_ns = flag_bldr.Get_as_bool(Flag__capt_has_ns);
|
||||
boolean ns_is_not_main = flag_bldr.Get_as_bool(Flag__ns_is_not_main);
|
||||
byte href_type = flag_bldr.Get_as_byte(Flag__href_type);
|
||||
int capt_cs0_tid = flag_bldr.Get_as_int(Flag__capt_cs0_tid);
|
||||
byte text_type = flag_bldr.Get_as_byte(Flag__text_type);
|
||||
|
||||
int ns_id = page_ns_id_is_not_main ? Xoh_lnki_dict_.Ns_decode(rdr) : Xow_ns_.Tid__main;
|
||||
int site_bgn = -1, site_end = -1;
|
||||
if (href_type == Xoh_anch_href_parser.Tid__site) {
|
||||
site_bgn = rdr.Pos();
|
||||
site_end = rdr.Find_fwd_lr();
|
||||
}
|
||||
int href_bgn = rdr.Pos();
|
||||
int href_end = rdr.Find_fwd_lr();
|
||||
int capt_bgn = -1, capt_end = -1;
|
||||
int site_bgn = -1, site_end = -1; if (href_type == Xoh_anch_href_itm.Tid__site) {site_bgn = rdr.Pos(); site_end = rdr.Find_fwd_lr();}
|
||||
int ns_id = ns_is_not_main ? Xoh_lnki_dict_.Ns_decode(rdr) : Xow_ns_.Tid__main;
|
||||
byte[] ns_custom_bry = ns_custom_exists ? rdr.Read_bry_to() : null;
|
||||
int text_0_bgn = rdr.Pos(); int text_0_end = rdr.Find_fwd_lr();
|
||||
int text_1_bgn = -1, text_1_end = -1;
|
||||
switch (text_type) {
|
||||
case Xoh_anch_capt_parser.Tid__capt:
|
||||
case Xoh_anch_capt_parser.Tid__capt_short:
|
||||
case Xoh_anch_capt_parser.Tid__href_trail:
|
||||
capt_bgn = rdr.Pos();
|
||||
capt_end = rdr.Find_fwd_lr();
|
||||
case Xoh_anch_capt_itm.Tid__diff: case Xoh_anch_capt_itm.Tid__less: case Xoh_anch_capt_itm.Tid__more:
|
||||
text_1_bgn = rdr.Pos(); text_1_end = rdr.Find_fwd_lr();
|
||||
break;
|
||||
}
|
||||
byte[] href_bry = null;
|
||||
if (text_type == Xoh_anch_capt_parser.Tid__capt_short)
|
||||
href_bry = Bry_.Add(Bry_.Mid(src, href_bgn, href_end), Bry_.Mid(src, capt_bgn, capt_end));
|
||||
else
|
||||
href_bry = Bry_.Mid(src, href_bgn, href_end);
|
||||
byte[] title_bry = null;
|
||||
Xoa_ttl ttl = null;
|
||||
if (href_type != Xoh_anch_href_parser.Tid__anch) {
|
||||
switch (href_type) {
|
||||
case Xoh_anch_href_parser.Tid__site:
|
||||
Xow_ttl_parser ttl_parser = hctx.App().Wiki_mgri().Get_by_key_or_make_init_n(Bry_.Mid(src, site_bgn, site_end));
|
||||
ttl = ttl_parser.Ttl_parse(ns_id, href_bry);
|
||||
href_bry = gplx.langs.htmls.encoders.Gfo_url_encoder_.Href_qarg.Encode(ttl.Full_db());
|
||||
title_bry = ttl.Full_txt();
|
||||
break;
|
||||
case Xoh_anch_href_parser.Tid__wiki:
|
||||
ttl = hctx.Wiki__ttl_parser().Ttl_parse(ns_id, href_bry); if (ttl == null) rdr.Fail("invalid ttl", String_.Empty, String_.new_u8(href_bry));
|
||||
href_bry = ttl.Full_db_w_anch();
|
||||
href_bry = gplx.langs.htmls.encoders.Gfo_url_encoder_.Href.Encode(href_bry); // encode for href; EX: "/wiki/A's" -> "/wiki/A&27s"
|
||||
title_bry = ttl.Full_txt();
|
||||
break;
|
||||
case Xoh_anch_href_parser.Tid__inet:
|
||||
title_bry = href_bry = gplx.langs.htmls.encoders.Gfo_url_encoder_.Href_qarg.Encode(href_bry);
|
||||
break;
|
||||
byte[] title_bry = title_tid == Xoh_lnki_parser.Title__diff ? rdr.Read_bry_to() : null;
|
||||
byte[] href_bry = text_type == Xoh_anch_capt_itm.Tid__less
|
||||
? tmp_bfr.Add_mid(src, text_0_bgn, text_0_end).Add_mid(src, text_1_bgn, text_1_end).To_bry_and_clear()
|
||||
: Bry_.Mid(src, text_0_bgn, text_0_end);
|
||||
byte[] ns_bry = null;
|
||||
switch (href_type) {
|
||||
case Xoh_anch_href_itm.Tid__anch: break;
|
||||
case Xoh_anch_href_itm.Tid__inet: break; //href_bry = Gfo_url_encoder_.Href_qarg.Encode(href_bry); break;
|
||||
case Xoh_anch_href_itm.Tid__wiki:
|
||||
case Xoh_anch_href_itm.Tid__site:
|
||||
if (ns_custom_exists) {
|
||||
ns_bry = ns_custom_bry;
|
||||
tmp_bfr.Add(Xoa_ttl.Replace_spaces(ns_bry)).Add_byte_colon(); // NOTE: Replace_space to handle ns_custom_bry like "Image talk"
|
||||
}
|
||||
else {
|
||||
if (ns_id == Xow_ns_.Tid__main) {
|
||||
if (ttl_is_main_page)
|
||||
href_bry = Bry_.Empty;
|
||||
}
|
||||
else {
|
||||
Xow_ns ns = hctx.Wiki__ttl_parser().Ns_mgr().Ids_get_or_null(ns_id); if (ns == null) rdr.Err_wkr().Fail("invalid ns_id", "ns_id", ns_id);
|
||||
ns_bry = ns.Name_ui();
|
||||
tmp_bfr.Add(ns.Name_db()).Add_byte_colon();
|
||||
}
|
||||
}
|
||||
Gfo_url_encoder encoder = href_type == Xoh_anch_href_itm.Tid__wiki ? Gfo_url_encoder_.Href : Gfo_url_encoder_.Href_qarg;
|
||||
encoder.Encode(tmp_bfr, href_bry); // encode for href; EX: "/wiki/A's" -> "/wiki/A&27s"
|
||||
href_bry = tmp_bfr.To_bry_and_clear();
|
||||
break;
|
||||
}
|
||||
byte[] capt_bry = Xoh_lnki_hzip_.Bld_capt(tmp_bfr, href_type, text_type, capt_has_ns, capt_cs0_tid, ns_bry, src, text_0_bgn, text_0_end, src, text_1_bgn, text_1_end);
|
||||
if (href_type != Xoh_anch_href_itm.Tid__anch) {
|
||||
switch (title_tid) {
|
||||
case Xoh_lnki_parser.Title__missing: title_bry = null; break;
|
||||
case Xoh_lnki_parser.Title__diff: break;
|
||||
case Xoh_lnki_parser.Title__href: title_bry = Gfo_url_encoder_.Href.Decode(href_bry); break;
|
||||
case Xoh_lnki_parser.Title__capt: title_bry = !capt_has_ns && !title_missing_ns && ns_bry != null ? Bry_.Add(ns_bry, Byte_ascii.Colon_bry, capt_bry) : capt_bry; break;
|
||||
}
|
||||
}
|
||||
|
||||
// gen html
|
||||
bfr.Add(Html_bldr_.Bry__a_lhs_w_href);
|
||||
switch (href_type) {
|
||||
case Xoh_anch_href_parser.Tid__anch:
|
||||
case Xoh_anch_href_itm.Tid__anch:
|
||||
bfr.Add_byte(Byte_ascii.Hash); // "#"
|
||||
break;
|
||||
case Xoh_anch_href_parser.Tid__site:
|
||||
case Xoh_anch_href_itm.Tid__site:
|
||||
bfr.Add(Xoh_href_.Bry__site).Add_mid(src, site_bgn, site_end);
|
||||
bfr.Add(Xoh_href_.Bry__wiki);
|
||||
break;
|
||||
case Xoh_anch_href_parser.Tid__wiki:
|
||||
case Xoh_anch_href_itm.Tid__wiki:
|
||||
bfr.Add(Xoh_href_.Bry__wiki);
|
||||
break;
|
||||
}
|
||||
bfr.Add(href_bry);
|
||||
bfr.Add(Html_bldr_.Bry__id__nth).Add_str_a7(gplx.xowa.parsers.lnkis.redlinks.Xopg_redlink_lnki_list.Lnki_id_prefix).Add_int_variable(hctx.Lnki__uid__nxt());
|
||||
if (href_type != Xoh_anch_href_parser.Tid__anch) {
|
||||
bfr.Add(Html_bldr_.Bry__title__nth);
|
||||
Html_utl.Escape_html_to_bfr(bfr, title_bry, 0, title_bry.length, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.N);
|
||||
if (!hctx.Mode_is_diff())
|
||||
bfr.Add(Html_bldr_.Bry__id__nth).Add_str_a7(gplx.xowa.parsers.lnkis.redlinks.Xopg_redlink_lnki_list.Lnki_id_prefix).Add_int_variable(hctx.Lnki__uid__nxt());
|
||||
if ( href_type != Xoh_anch_href_itm.Tid__anch) { // anchs never have title;
|
||||
if (title_bry != null) {
|
||||
bfr.Add(Html_bldr_.Bry__title__nth);
|
||||
Html_utl.Escape_html_to_bfr(bfr, title_bry, 0, title_bry.length, Bool_.N, Bool_.N, Bool_.N, Bool_.Y, Bool_.N);
|
||||
}
|
||||
}
|
||||
bfr.Add(Html_bldr_.Bry__lhs_end_head_w_quote);
|
||||
if ( href_type == Xoh_anch_href_parser.Tid__anch
|
||||
&& text_type != Xoh_anch_capt_parser.Tid__capt )
|
||||
bfr.Add_byte(Byte_ascii.Hash);
|
||||
switch (text_type) {
|
||||
case Xoh_anch_capt_parser.Tid__href:
|
||||
if (ns_id == Xow_ns_.Tid__main)
|
||||
bfr.Add_mid(src, href_bgn, href_end);
|
||||
else
|
||||
bfr.Add(ttl.Full_txt());
|
||||
break;
|
||||
case Xoh_anch_capt_parser.Tid__href_pipe:
|
||||
bfr.Add_mid(src, href_bgn, href_end);
|
||||
break;
|
||||
case Xoh_anch_capt_parser.Tid__capt:
|
||||
bfr.Add_mid(src, capt_bgn, capt_end);
|
||||
break;
|
||||
case Xoh_anch_capt_parser.Tid__href_trail:
|
||||
bfr.Add_mid(src, href_bgn, href_end);
|
||||
bfr.Add_mid(src, capt_bgn, capt_end);
|
||||
break;
|
||||
case Xoh_anch_capt_parser.Tid__capt_short:
|
||||
bfr.Add_mid(src, href_bgn, href_end);
|
||||
break;
|
||||
}
|
||||
bfr.Add(capt_bry);
|
||||
bfr.Add(Html_bldr_.Bry__a_rhs);
|
||||
return rdr.Pos();
|
||||
}
|
||||
public int Pool__idx() {return pool_idx;} private int pool_idx;
|
||||
public void Pool__clear (Object[] args) {}
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_lnki_hzip rv = new Xoh_lnki_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
|
||||
private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_ (1, 2, 3);
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_lnki_hzip rv = new Xoh_lnki_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; rv.hook = (byte[])args[0]; return rv;}
|
||||
private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_ (1 , 1, 1, 2, 1 , 1, 2, 2, 2);
|
||||
private static final int // SERIALIZED
|
||||
Flag__ns_is_not_main = 0
|
||||
, Flag__href_type = 1 // "wiki", "site", "anch", "inet"
|
||||
, Flag__text_type = 2 // "href", "capt", "href_trail", "capt_short", "href_pipe"
|
||||
Flag__title_missing_ns = 0 // [[c:]] -> "/site/commons.wikimedia.org/wiki/"
|
||||
, Flag__ttl_is_main_page = 1 // [[c:]] -> "/site/commons.wikimedia.org/wiki/"
|
||||
, Flag__ns_custom_exists = 2 // [[c:category:a]] -> "/site/commons.wikimedia.org/wiki/category:a"
|
||||
, Flag__title_tid = 3 // href, capt, diff, empty; [//en.wikipedia.org] where en.w is local
|
||||
, Flag__capt_has_ns = 4 // "A" vs "Help:A"
|
||||
, Flag__ns_is_not_main = 5
|
||||
, Flag__href_type = 6 // "wiki", "site", "anch", "inet"
|
||||
, Flag__capt_cs0_tid = 7 // exact, lower, upper
|
||||
, Flag__text_type = 8 // "same", "diff", "more", "less"
|
||||
;
|
||||
}
|
||||
class Xoh_lnki_hzip_ {
|
||||
public static byte[] Bld_capt(Bry_bfr tmp_bfr, byte href_type, byte text_type, boolean capt_has_ns, int capt_cs0, byte[] ns_bry, byte[] text_0_src, int text_0_bgn, int text_0_end, byte[] capt_src, int text_1_bgn, int text_1_end) {
|
||||
if ( href_type == Xoh_anch_href_itm.Tid__anch
|
||||
&& text_type != Xoh_anch_capt_itm.Tid__diff )
|
||||
tmp_bfr.Add_byte(Byte_ascii.Hash);
|
||||
if (capt_has_ns && ns_bry != null)
|
||||
tmp_bfr.Add(ns_bry).Add_byte_colon();
|
||||
switch (text_type) {
|
||||
case Xoh_anch_capt_itm.Tid__diff: break;
|
||||
default:
|
||||
switch (capt_cs0) {
|
||||
case Xoh_anch_capt_itm.Cs0__exact: break;
|
||||
case Xoh_anch_capt_itm.Cs0__lower: tmp_bfr.Add_byte(Byte_ascii.Case_lower(text_0_src[text_0_bgn++]));break;
|
||||
case Xoh_anch_capt_itm.Cs0__upper: tmp_bfr.Add_byte(Byte_ascii.Case_upper(text_0_src[text_0_bgn++]));break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
switch (text_type) {
|
||||
case Xoh_anch_capt_itm.Tid__same:
|
||||
case Xoh_anch_capt_itm.Tid__less:
|
||||
tmp_bfr.Add_mid(text_0_src, text_0_bgn, text_0_end);
|
||||
break;
|
||||
case Xoh_anch_capt_itm.Tid__diff:
|
||||
tmp_bfr.Add_mid(capt_src, text_1_bgn, text_1_end);
|
||||
break;
|
||||
case Xoh_anch_capt_itm.Tid__more:
|
||||
tmp_bfr.Add_mid(text_0_src, text_0_bgn, text_0_end);
|
||||
tmp_bfr.Add_mid(capt_src, text_1_bgn, text_1_end);
|
||||
break;
|
||||
}
|
||||
return tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import org.junit.*;
|
||||
public class Xoh_lnki_hzip__anch__tst {
|
||||
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt().Init_mode_diff_y_();
|
||||
@Test public void Basic() { // EX: [[#a]]
|
||||
fxt.Test__bicode("~$Ba~#a~", "<a href='#a'>#a</a>");
|
||||
}
|
||||
@Test public void Capt() { // EX: [[#a|b]]
|
||||
fxt.Test__bicode("~$Ba~b~", "<a href='#a'>b</a>");
|
||||
}
|
||||
@Test public void Capt_similar() { // EX: [[#a|a]]
|
||||
fxt.Test__bicode("~$Ba~a~", "<a href='#a'>a</a>");
|
||||
}
|
||||
@Test public void Error() { // EX: [[#a|b]]; make sure bad title character does not cause error
|
||||
fxt.Test__bicode("~$Ba|b~#a|b~", "<a href='#a|b'>#a|b</a>"); // NOTE: the "|" should be url-encoded
|
||||
}
|
||||
@Test public void Inet__file() {
|
||||
fxt.Test__bicode("~$Rfile:///C://A.png~b~", "<a href='file:///C://A.png' title='file:///C://A.png'>b</a>");
|
||||
}
|
||||
@Test public void Inet__enc() {
|
||||
fxt.Test__bicode("~${'Thttps://simple.wikisource.org/wiki/A%C3%A6e~b~Aæe~", "<a href='https://simple.wikisource.org/wiki/A%C3%A6e' title='Aæe'>b</a>");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import org.junit.*;
|
||||
public class Xoh_lnki_hzip__diff__tst {
|
||||
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt().Init_mode_diff_y_();
|
||||
@Test public void Diff__basic() { // EX: [[A|b]]
|
||||
fxt.Test__bicode("~$\"A~b~" , "<a href='/wiki/A' title='A'>b</a>");
|
||||
}
|
||||
@Test public void Diff__cs__lo() { // EX: [[A|a]]
|
||||
fxt.Test__bicode("~$%A~" , "<a href='/wiki/A' title='A'>a</a>");
|
||||
}
|
||||
@Test public void Diff__page_w_anch() { // EX: [[A#b|c]]
|
||||
fxt.Test__bicode("~${'$A#b~b~A~", "<a href='/wiki/A#b' title='A'>b</a>");
|
||||
}
|
||||
@Test public void Capt__nest() { // EX: [[A|B[[C|C1]]D]]
|
||||
fxt.Test__bicode
|
||||
( "~$\"A~B<a href=\"/wiki/C\" title=\"C\">C1</a>D~"
|
||||
, "<a href='/wiki/A' title='A'>B<a href='/wiki/C' title='C'>C1</a>D</a>"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import org.junit.*;
|
||||
public class Xoh_lnki_hzip__ns__tst {
|
||||
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt().Init_mode_diff_y_();
|
||||
@Test public void Ns__same() { // EX: [[Help:A]]
|
||||
fxt.Test__bicode("~${#7/A~", "<a href='/wiki/Help:A' title='Help:A'>Help:A</a>");
|
||||
}
|
||||
@Test public void Ns__diff() { // EX: [[Help:A_b|c]]
|
||||
fxt.Test__bicode("~$b/A b~c~", "<a href='/wiki/Help:A_b' title='Help:A b'>c</a>");
|
||||
}
|
||||
@Test public void Ns__more() { // EX: [[Help:A|a b]]
|
||||
fxt.Test__bicode("~$g/A~ b~", "<a href='/wiki/Help:A' title='Help:A'>a b</a>");
|
||||
}
|
||||
@Test public void Ns__less() { // EX: [[Help:A_b|a]]
|
||||
fxt.Test__bicode("~$h/A~ b~", "<a href='/wiki/Help:A_b' title='Help:A b'>a</a>");
|
||||
}
|
||||
@Test public void Ns__talk() { // EX: [[Help talk:A b]]
|
||||
fxt.Test__bicode("~${#70A b~", "<a href='/wiki/Help_talk:A_b' title='Help talk:A b'>Help talk:A b</a>");
|
||||
}
|
||||
@Test public void Ns__talk__diff() { // EX: [[Help talk:A b|cde]]
|
||||
fxt.Test__bicode("~$b0A b~cde~", "<a href='/wiki/Help_talk:A_b' title='Help talk:A b'>cde</a>");
|
||||
}
|
||||
@Test public void Ns__under() { // EX: [[Help_talk:A_b]]; rare; just make sure codec can handle it;
|
||||
fxt.Test__bicode("~$b0A b~Help_talk:A_b~", "<a href='/wiki/Help_talk:A_b' title='Help talk:A b'>Help_talk:A_b</a>");
|
||||
}
|
||||
@Test public void Ns__pipe() { // EX: [[Help:A|]]
|
||||
fxt.Test__bicode("~$a/A~", "<a href='/wiki/Help:A' title='Help:A'>A</a>");
|
||||
}
|
||||
@Test public void Ns__pipe_w_words() { // EX: [[Help:A b|]]
|
||||
fxt.Test__bicode("~$a/A b~", "<a href='/wiki/Help:A_b' title='Help:A b'>A b</a>");
|
||||
}
|
||||
@Test public void Ns__anch() { // EX: [[Help:A_b#c|a]]
|
||||
fxt.Test__bicode("~${'j/A~ b#c~Help:A b~", "<a href='/wiki/Help:A_b#c' title='Help:A b'>a</a>");
|
||||
}
|
||||
@Test public void Ns__anch__alias() { // EX: [[Help:A_b#c|a]]
|
||||
fxt.Test__bicode("~${3h)Image~A.png#b~c~Image:A.png~", "<a href='/wiki/Image:A.png#b' title='Image:A.png'>c</a>");
|
||||
}
|
||||
@Test public void Fake__ns() { // EX: [[Fake:A]]
|
||||
fxt.Test__bicode("~$!Fake:A~", "<a href='/wiki/Fake:A' title='Fake:A'>Fake:A</a>");
|
||||
}
|
||||
@Test public void Alias__basic() { // EX: [[Image:A]]
|
||||
fxt.Test__bicode("~${-f)Image~A~B~", "<a href='/wiki/Image:A' title='Image:A'>B</a>");
|
||||
}
|
||||
@Test public void Alias__talk() { // EX: [[Image talk:A]]
|
||||
fxt.Test__bicode("~${/;*Image talk~Human-woman.png~", "<a href='/wiki/Image_talk:Human-woman.png' title='Image talk:Human-woman.png'>Image talk:Human-woman.png</a>");
|
||||
}
|
||||
@Test public void Alias__words() { // EX: [[Image:A b]]
|
||||
fxt.Test__bicode("~${/;)Image~A b~", "<a href='/wiki/Image:A_b' title='Image:A b'>Image:A b</a>");
|
||||
}
|
||||
@Test public void Alias__url_encoding() { // EX: [[Image:Aü.png|b]]
|
||||
fxt.Test__bicode("~${-f)Image~Aü.png~b~", "<a href='/wiki/Image:A%C3%BC.png' title='Image:Aü.png'>b</a>");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import org.junit.*;
|
||||
public class Xoh_lnki_hzip__same__tst {
|
||||
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt().Init_mode_diff_y_();
|
||||
@Test public void Same__basic() { // EX: [[A]]
|
||||
fxt.Test__decode("~$!A~", "<a href='/wiki/A' title='A'>A</a>");
|
||||
}
|
||||
@Test public void Same__encoded() { // EX: [[A's]]
|
||||
String html = "<a href=\"/wiki/A%27s\" title=\"A's\">A's</a>";
|
||||
fxt.Test__bicode_raw("~$!A's~", html, html);
|
||||
}
|
||||
@Test public void Same__encoded__anch() { // EX: [[A#90.51]]
|
||||
fxt.Test__bicode("~${$%A~#90.51~", "<a href='/wiki/A#90.51' title='A'>A</a>");
|
||||
}
|
||||
@Test public void Same__encoded__anch__nbsp() { // EX: [[A# B|abc]]
|
||||
fxt.Test__bicode("~${$#A#.C2.A0B~abc~", "<a href='/wiki/A#.C2.A0B' title='abc'>abc</a>");
|
||||
}
|
||||
@Test public void Same__amp() { // EX: [[A&b]]
|
||||
fxt.Test__bicode("~${$#A&b~A&b~", "<a href='/wiki/A%26b' title='A&b'>A&b</a>");
|
||||
}
|
||||
@Test public void More__basic() { // EX: [[A]]b
|
||||
fxt.Test__bicode("~$#A~b~", "<a href='/wiki/A' title='A'>Ab</a>");
|
||||
}
|
||||
@Test public void Less__cs__eq() { // EX: [[Ab|A]]
|
||||
fxt.Test__bicode("~$$A~b~", "<a href='/wiki/Ab' title='Ab'>A</a>");
|
||||
}
|
||||
@Test public void Less__cs__lo() { // EX: [[Ab|a]]
|
||||
fxt.Test__bicode("~$(A~b~", "<a href='/wiki/Ab' title='Ab'>a</a>");
|
||||
}
|
||||
@Test public void Less__ns__cs() { // EX: [[Help:A_b|a]]; make sure ns is added correctly, not "aHelp:b"
|
||||
fxt.Test__bicode("~$h/Ab~ c~", "<a href='/wiki/Help:Ab_c' title='Help:Ab c'>ab</a>");
|
||||
}
|
||||
@Test public void Ignore__audio() {
|
||||
String html = "<a href=\"file:///\" xowa_title=\"A.ogg\">a</a>";
|
||||
fxt.Test__bicode(html, html);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import org.junit.*;
|
||||
public class Xoh_lnki_hzip__site__tst {
|
||||
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt().Init_mode_diff_y_();
|
||||
@Test public void Basic() { // EX: [[wikt:A]]
|
||||
fxt.Test__bicode("~${$3en.wiktionary.org~A~wikt:A~" , "<a href='/site/en.wiktionary.org/wiki/A' title='wikt:A'>wikt:A</a>");
|
||||
}
|
||||
@Test public void Capt__lower() { // EX: [[wikt:A|a]]
|
||||
fxt.Test__bicode("~$5en.wiktionary.org~A~" , "<a href='/site/en.wiktionary.org/wiki/A' title='A'>a</a>");
|
||||
}
|
||||
@Test public void Capt__upper() { // EX: [[wikt:a|A]]
|
||||
fxt.Test__bicode("~$9en.wiktionary.org~a~" , "<a href='/site/en.wiktionary.org/wiki/a' title='a'>A</a>");
|
||||
}
|
||||
@Test public void Ns__href() { // EX: [[wikt:help:a]]
|
||||
fxt.Test__bicode("~${a2en.wiktionary.org~/help~a~wikt:help:a~" , "<a href='/site/en.wiktionary.org/wiki/help:a' title='wikt:help:a'>wikt:help:a</a>");
|
||||
}
|
||||
@Test public void Ns__capt() { // EX: [[wikt:help:a|b]]
|
||||
fxt.Test__bicode("~${d3en.wiktionary.org~/help~a~b~wikt:help:a~" , "<a href='/site/en.wiktionary.org/wiki/help:a' title='wikt:help:a'>b</a>"); // MW: also adds class="extiw"
|
||||
}
|
||||
@Test public void Ns__anch() { // EX: [[wikt:Help:A#b]]
|
||||
fxt.Test__bicode("~${'sen.wiktionary.org~/A#b~Help:A~" , "<a href='/site/en.wiktionary.org/wiki/Help:A#b' title='Help:A'>A#b</a>");
|
||||
}
|
||||
@Test public void Ns__more() { // EX: [[wikt:Help:A]]b
|
||||
fxt.Test__bicode("~${#Ien.wiktionary.org~/A~b~" , "<a href='/site/en.wiktionary.org/wiki/Help:A' title='Help:A'>Help:Ab</a>");
|
||||
}
|
||||
@Test public void Ns__more__name() { // EX: [[wikt:Help:A|Ab|]]
|
||||
fxt.Test__bicode("~$sen.wiktionary.org~/A~b~" , "<a href='/site/en.wiktionary.org/wiki/Help:A' title='Help:A'>Ab</a>");
|
||||
}
|
||||
@Test public void Ns__url_encoding() { // EX: [[wikt:Category:A & B|]]
|
||||
fxt.Test__bicode("~${$sen.wiktionary.org~1A & B~A & B~" , "<a href='/site/en.wiktionary.org/wiki/Category:A_%26_B' title='Category:A & B'>A & B</a>");
|
||||
}
|
||||
@Test public void Less__eq() { // EX: [[wikt:Ab|A]]
|
||||
fxt.Test__bicode("~${*7en.wiktionary.org~A~b~" , "<a href='/site/en.wiktionary.org/wiki/Ab'>A</a>");
|
||||
}
|
||||
@Test public void Less__lo() { // EX: [[wikt:Ab|a]]
|
||||
fxt.Test__bicode("~$8en.wiktionary.org~A~b~" , "<a href='/site/en.wiktionary.org/wiki/Ab' title='Ab'>a</a>");
|
||||
}
|
||||
@Test public void Less__hi() { // EX: [[wikt:ab|A]]
|
||||
fxt.Test__bicode("~$<en.wiktionary.org~a~b~" , "<a href='/site/en.wiktionary.org/wiki/ab' title='ab'>A</a>");
|
||||
}
|
||||
@Test public void More__hi() { // EX: [[wikt:a|Ab]]
|
||||
fxt.Test__bicode("~$;en.wiktionary.org~a~b~" , "<a href='/site/en.wiktionary.org/wiki/a' title='a'>Ab</a>");
|
||||
}
|
||||
@Test public void Encode__lnki() { // EX: [[wikt:eorðe|eorðe]]
|
||||
fxt.Test__bicode("~$1en.wiktionary.org~eorðe~" , "<a href='/site/en.wiktionary.org/wiki/eor%C3%B0e' title='eorðe'>eorðe</a>");
|
||||
}
|
||||
// @Test public void Encode__lnke() { // EX: [//en.wiktionary.org/wiki/eorðe eorðe]; NOTE:MW inconsistently does not URL-encode external links (but does URL-encode @gplx.Internal protected ones)
|
||||
// fxt.Test__bicode("~$)en.wiktionary.org~eorðe~" , "<a href='/site/en.wiktionary.org/wiki/eorðe'>eorðe</a>");
|
||||
// }
|
||||
@Test public void Lnke__ns() {
|
||||
fxt.Test__bicode("~$qen.wiktionary.org~/a~" , "<a href='/site/en.wiktionary.org/wiki/Help:a' title='Help:a'>a</a>");
|
||||
}
|
||||
@Test public void Qarg_lnke() { // EX: [//en.wiktionary.org/wiki/A?b=c d]
|
||||
fxt.Test__bicode("~${*5en.wiktionary.org~A?b=c~d~" , "<a href='/site/en.wiktionary.org/wiki/A?b=c'>d</a>");
|
||||
}
|
||||
@Test public void Qarg_lnki() { // EX: [[wikt:A?b=c|d]]
|
||||
fxt.Test__bicode("~$2en.wiktionary.org~A?b=c~d~" , "<a href='/site/en.wiktionary.org/wiki/A?b=c' title='A?b=c'>d</a>"); // NOTE: mw encodes as A%3Fb%3Dc
|
||||
}
|
||||
@Test public void Main_page() { // EX: [[wikt:]]
|
||||
fxt.Test__bicode("~${<<en.wiktionary.org~~wikt:~" , "<a href='/site/en.wiktionary.org/wiki/' title='wikt:'>wikt:</a>");
|
||||
}
|
||||
}
|
||||
@@ -1,113 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import org.junit.*; import gplx.xowa.htmls.core.hzips.*;
|
||||
public class Xoh_lnki_hzip_tst {
|
||||
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt();
|
||||
@Test public void Href__basic() {
|
||||
fxt.Test__bicode("~$!A~", Xoh_lnki_html__hdump__tst.Html__same);
|
||||
}
|
||||
@Test public void Href__case_diff() {
|
||||
fxt.Test__bicode("~$!a~", "<a href='/wiki/A' id='xolnki_2' title='A'>a</a>");
|
||||
}
|
||||
@Test public void Href__url_encoded() {
|
||||
String html = "<a href=\"/wiki/A%27s\" id=\"xolnki_2\" title=\"A's\">A's</a>";
|
||||
fxt.Test__bicode_raw("~$!A's~", html, html);
|
||||
}
|
||||
@Test public void Ns__same() { // EX: [[Help:A]]
|
||||
fxt.Test__bicode("~$A/A~", "<a href='/wiki/Help:A' id='xolnki_2' title='Help:A'>Help:A</a>");
|
||||
}
|
||||
@Test public void Ns__diff() { // EX: [[Help:A_b|c]]
|
||||
fxt.Test__bicode("~$B/A_b~c~", "<a href='/wiki/Help:A_b' id='xolnki_2' title='Help:A b'>c</a>");
|
||||
}
|
||||
@Test public void Ns__space() { // EX: [[Help talk:A b]]
|
||||
fxt.Test__bicode("~$A0A b~", "<a href='/wiki/Help_talk:A_b' id='xolnki_2' title='Help talk:A b'>Help talk:A b</a>");
|
||||
}
|
||||
@Test public void Ns__under() { // EX: [[Help_talk:A_b]]; rare; just make sure codec can handle it;
|
||||
fxt.Test__bicode("~$B0A_b~Help_talk:A_b~", "<a href='/wiki/Help_talk:A_b' id='xolnki_2' title='Help talk:A b'>Help_talk:A_b</a>");
|
||||
}
|
||||
@Test public void Ns__pipe() { // EX: [[Help:A|]]
|
||||
fxt.Test__bicode("~$E/A~", "<a href='/wiki/Help:A' id='xolnki_2' title='Help:A'>A</a>");
|
||||
}
|
||||
@Test public void Ns__pipe_w_words() { // EX: [[Help:A b|]]
|
||||
fxt.Test__bicode("~$E/A b~", "<a href='/wiki/Help:A_b' id='xolnki_2' title='Help:A b'>A b</a>");
|
||||
}
|
||||
@Test public void Anch__same() {
|
||||
fxt.Test__bicode("~$2a~#a~", "<a href='#a' id='xolnki_2'>#a</a>");
|
||||
}
|
||||
@Test public void Anch__diff() {
|
||||
fxt.Test__bicode("~$2a~b~", "<a href='#a' id='xolnki_2'>b</a>");
|
||||
}
|
||||
@Test public void Anch__diff__starts_w_same() {
|
||||
fxt.Test__bicode("~$2a~a~", "<a href='#a' id='xolnki_2'>a</a>");
|
||||
}
|
||||
@Test public void Capt__basic() { // EX: [[A|b]]
|
||||
fxt.Test__bicode("~$\"A~b~", Xoh_lnki_html__hdump__tst.Html__diff);
|
||||
}
|
||||
@Test public void Capt__page_w_anch() { // Ex: [[A#b|c]]
|
||||
fxt.Test__bicode("~$\"A#b~b~", "<a href='/wiki/A#b' id='xolnki_2' title='A'>b</a>");
|
||||
}
|
||||
@Test public void Capt__nest() {
|
||||
fxt.Test__bicode
|
||||
( "~$\"A~<a href=\"/wiki/C\" id=\"xolnki_3\" title=\"C\">C1</a>D~"
|
||||
, "<a href=\"/wiki/A\" id=\"xolnki_2\" title=\"A\"><a href=\"/wiki/C\" id=\"xolnki_3\" title=\"C\">C1</a>D</a>"
|
||||
);
|
||||
}
|
||||
@Test public void Capt__reparent() { // PURPOSE: PAGE:en.w:Abyssal_plain; DATE:2015-06-02; DELETE: not needed in new dump format;
|
||||
fxt.Test__bicode
|
||||
( "$\"A<font color=\"white\">A1</font>"
|
||||
, "<a href=\"/wiki/A\" id=\"xolnki_2\" title=\"A\"><font color='white'>A1</font></a>"
|
||||
);
|
||||
}
|
||||
@Test public void Capt__xwiki() {
|
||||
Xow_wiki wiki = fxt.Prep_create_wiki("wikt", "en.wiktionary.org");
|
||||
wiki.Ns_mgr().Ns_main().Case_match_(gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all);
|
||||
fxt.Test__bicode("$*en.wiktionary.orgawikt:a", Xoh_lnki_html__hdump__tst.Html__xwiki);
|
||||
}
|
||||
@Test public void Capt__xwiki__qarg() {
|
||||
Xow_wiki wiki = fxt.Prep_create_wiki("wikt", "en.wiktionary.org");
|
||||
wiki.Ns_mgr().Ns_main().Case_match_(gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all);
|
||||
fxt.Test__bicode("$*en.wiktionary.orga?action=editwikt:a?action=edit", "<a href='/site/en.wiktionary.org/wiki/a?action=edit' id='xolnki_2' title='a?action=edit'>wikt:a?action=edit</a>");
|
||||
}
|
||||
@Test public void Capt__xwiki__encode() {
|
||||
Xow_wiki wiki = fxt.Prep_create_wiki("wikt", "en.wiktionary.org");
|
||||
wiki.Ns_mgr().Ns_main().Case_match_(gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all);
|
||||
fxt.Test__bicode("$)en.wiktionary.orgeorðe", "<a href='/site/en.wiktionary.org/wiki/eor%C3%B0e' id='xolnki_2' title='eorðe'>eorðe</a>");
|
||||
}
|
||||
@Test public void Trail__basic() {
|
||||
fxt.Test__bicode("~$#A~b~", Xoh_lnki_html__hdump__tst.Html__trail);
|
||||
}
|
||||
@Test public void Short__basic() {
|
||||
fxt.Test__bicode("~$$A~b~", "<a href='/wiki/Ab' id='xolnki_2' title='Ab'>A</a>");
|
||||
}
|
||||
@Test public void Short__case() {
|
||||
fxt.Test__bicode("~$$a~b~", "<a href='/wiki/Ab' id='xolnki_2' title='Ab'>a</a>");
|
||||
}
|
||||
@Test public void Site__main_page() {
|
||||
fxt.Test__bicode("~$)en.wikipedia.org~Main Page~"
|
||||
, "<a href='/site/en.wikipedia.org/wiki/' id='xolnki_2' title='Main Page'>Main Page</a>"
|
||||
, "<a href='/site/en.wikipedia.org/wiki/Main_Page' id='xolnki_2' title='Main Page'>Main Page</a>"
|
||||
);
|
||||
}
|
||||
@Test public void Site__qarg() {
|
||||
fxt.Test__bicode("~$*en.wikipedia.org~A?b=c~d~", "<a href='/site/en.wikipedia.org/wiki/A?b=c' id='xolnki_2' title='A?b=c'>d</a>");
|
||||
}
|
||||
@Test public void Inet__file() {
|
||||
fxt.Test__bicode("~$:file:///C://A.png~b~", "<a href='file:///C://A.png' id='xolnki_2' title='file:///C://A.png'>b</a>");
|
||||
}
|
||||
}
|
||||
@@ -20,90 +20,128 @@ import gplx.core.brys.*; import gplx.langs.htmls.*; import gplx.langs.htmls.pars
|
||||
import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*;
|
||||
import gplx.xowa.wikis.ttls.*; import gplx.xowa.wikis.nss.*;
|
||||
public class Xoh_lnki_parser {
|
||||
private final Xoh_anch_capt_parser capt_parser = new Xoh_anch_capt_parser();
|
||||
private byte[] src;
|
||||
private int href_ns_id; private byte[] href_ns_name; private int href_ns_name_len;
|
||||
private byte[] capt_src; private int capt_bgn, capt_end;
|
||||
private final Bry_rdr rdr = new Bry_rdr();
|
||||
public int Rng_bgn() {return rng_bgn;} private int rng_bgn;
|
||||
public int Rng_end() {return rng_end;} private int rng_end;
|
||||
public byte Text_type() {return text_type;} private byte text_type;
|
||||
public byte[] Href_bry() {return href_bry;} private byte[] href_bry;
|
||||
public int Href_bgn() {return href_bgn;} private int href_bgn;
|
||||
public int Href_end() {return href_end;} private int href_end;
|
||||
public byte[] Capt_bry() {return capt_bry;} private byte[] capt_bry;
|
||||
public int Capt_bgn() {return capt_bgn;} private int capt_bgn;
|
||||
public int Capt_end() {return capt_end;} private int capt_end;
|
||||
public Xoh_anch_href_parser Href_parser() {return href_parser;} private final Xoh_anch_href_parser href_parser = new Xoh_anch_href_parser();
|
||||
public int Parse(Xoh_hdoc_wkr wkr, Xoh_hdoc_ctx hctx, byte[] src, Html_tag_rdr tag_rdr, Html_tag anch_head, Xow_ttl_parser ttl_parser) {// <a href="/wiki/A" title="A">b</a>
|
||||
this.rng_bgn = anch_head.Src_bgn();
|
||||
rdr.Init_by_sub(tag_rdr.Rdr(), "lnki", rng_bgn, src.length);
|
||||
href_parser.Parse(rdr, hctx.App(), hctx.Wiki__ttl_parser(), anch_head); // href='/wiki/A'
|
||||
// get href
|
||||
this.href_bry = src;
|
||||
this.href_bgn = href_parser.Page_bgn(); this.href_end = href_parser.Page_end();
|
||||
Xoa_ttl href_ttl = null; Xow_ns href_ns = null;
|
||||
int href_ns_id = Xow_ns_.Tid__main; boolean href_cs_tid_1st = true;
|
||||
switch (href_parser.Tid()) {
|
||||
case Xoh_anch_href_parser.Tid__anch:
|
||||
case Xoh_anch_href_parser.Tid__inet:
|
||||
break;
|
||||
default:
|
||||
href_ttl = href_parser.Page_ttl();
|
||||
href_ns = href_ttl.Ns();
|
||||
href_ns_id = href_ns.Id();
|
||||
href_cs_tid_1st = href_ttl.Ns().Case_match() == Xow_ns_case_.Tid__1st;
|
||||
this.href_bry = href_parser.Page_bry();
|
||||
public int Src_bgn() {return src_bgn;} private int src_bgn;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public boolean Capt_has_ns() {return capt_has_ns;} private boolean capt_has_ns;
|
||||
public byte Text_tid() {return text_tid;} private byte text_tid;
|
||||
public byte[] Text_0_src() {return text_0_src;} private byte[] text_0_src;
|
||||
public int Text_0_bgn() {return text_0_bgn;} private int text_0_bgn;
|
||||
public int Text_0_end() {return text_0_end;} private int text_0_end;
|
||||
public byte[] Text_1_src() {return text_1_src;} private byte[] text_1_src;
|
||||
public int Text_1_bgn() {return text_1_bgn;} private int text_1_bgn;
|
||||
public int Text_1_end() {return text_1_end;} private int text_1_end;
|
||||
public byte[] Href_src() {return href_src;} private byte[] href_src;
|
||||
public int Href_bgn() {return href_bgn;} private int href_bgn;
|
||||
public int Href_end() {return href_end;} private int href_end;
|
||||
public boolean Title_missing_ns() {return title_missing_ns;} private boolean title_missing_ns;
|
||||
public int Title_tid() {return title_tid;} private int title_tid;
|
||||
public int Title_bgn() {return title_bgn;} private int title_bgn;
|
||||
public int Title_end() {return title_end;} private int title_end;
|
||||
public Xoh_anch_href_itm Href_itm() {return href_itm;} private final Xoh_anch_href_itm href_itm = new Xoh_anch_href_itm();
|
||||
public Xoh_anch_capt_itm Capt_itm() {return capt_itm;} private final Xoh_anch_capt_itm capt_itm = new Xoh_anch_capt_itm();
|
||||
private void Init(byte[] src) {
|
||||
this.src = href_src = capt_src = src;
|
||||
capt_has_ns = title_missing_ns = false;
|
||||
href_ns_id = Xow_ns_.Tid__main; href_ns_name = null; href_ns_name_len = 0;
|
||||
href_bgn = href_end = capt_bgn = capt_end = title_bgn = title_end = -1;
|
||||
title_tid = Title__href;
|
||||
}
|
||||
public boolean Parse(Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Html_tag_rdr tag_rdr, byte[] src, Html_tag anch_head) {
|
||||
Init(src);
|
||||
this.src_bgn = anch_head.Src_bgn();
|
||||
rdr.Init_by_wkr(tag_rdr.Err_wkr(), "lnki", src_bgn, src.length);
|
||||
Html_atr title_atr = anch_head.Atrs__get_by_or_empty(Html_atr_.Bry__title);
|
||||
Parse_href(hctx, anch_head);
|
||||
Parse_capt(tag_rdr, anch_head);
|
||||
Parse_title(title_atr);
|
||||
hdoc_wkr.On_lnki(this);
|
||||
return true;
|
||||
}
|
||||
private void Parse_href(Xoh_hdoc_ctx hctx, Html_tag anch_head) {
|
||||
href_itm.Parse(rdr.Err_wkr(), hctx, anch_head);
|
||||
this.href_bgn = href_itm.Ttl_bgn(); this.href_end = href_itm.Ttl_end();
|
||||
switch (href_itm.Tid()) {
|
||||
case Xoh_anch_href_itm.Tid__wiki: case Xoh_anch_href_itm.Tid__site:
|
||||
this.href_ns_id = href_itm.Ttl_ns_id();
|
||||
this.href_src = href_itm.Ttl_full_txt();
|
||||
this.href_bgn = 0;
|
||||
this.href_end = href_bry.length;
|
||||
this.href_end = href_src.length;
|
||||
if (href_ns_id != Xow_ns_.Tid__main) { // not main; try to remove template name;
|
||||
int colon_pos = Bry_find_.Find_fwd(href_src, Byte_ascii.Colon, href_bgn, href_end);
|
||||
this.href_ns_name = Xoa_ttl.Replace_unders(Bry_.Mid(href_src, href_bgn, colon_pos + 1)); // EX: 11="Template talk:"
|
||||
this.href_ns_name_len = href_ns_name.length;
|
||||
}
|
||||
break;
|
||||
}
|
||||
// get capt
|
||||
this.capt_bry = src;
|
||||
}
|
||||
private void Parse_capt(Html_tag_rdr tag_rdr, Html_tag anch_head) {
|
||||
this.capt_bgn = anch_head.Src_end(); // capt starts after <a>
|
||||
Html_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__a); // </a>
|
||||
this.capt_end = anch_tail.Src_bgn(); // get capt between "<a>" and "</a>
|
||||
this.rng_end = anch_tail.Src_end();
|
||||
boolean capt_bgn_has_ns = true;
|
||||
this.src_end = anch_tail.Src_end();
|
||||
// skip ns in href / capt
|
||||
if (href_ns_id != Xow_ns_.Tid__main) { // not main; try to remove template name;
|
||||
int colon_pos = Bry_find_.Find_fwd(href_bry, Byte_ascii.Colon, href_bgn, href_end);
|
||||
byte[] ns_name = Xoa_ttl.Replace_unders(Bry_.Mid(href_bry, href_bgn, colon_pos + 1)); // EX: 11="Template talk:"
|
||||
int ns_name_len = ns_name.length;
|
||||
int ns_name_end = capt_bgn + ns_name_len;
|
||||
href_bgn += ns_name_len; // skip ns_name for href; EX: "Help:A" -> "A"; "Help" will be saved as encoded num
|
||||
if (Bry_.Match(src, capt_bgn, ns_name_end, ns_name, 0, ns_name_len)) // href matches capt; EX: [[Help:A]] -> <a href='/wiki/Help:A'>Help:A</a>
|
||||
capt_bgn = ns_name_end;
|
||||
else
|
||||
capt_bgn_has_ns = false;
|
||||
int capt_bgn_wo_ns = capt_bgn + href_ns_name_len;
|
||||
href_bgn += href_ns_name_len; // skip ns_name for href; EX: "Help:A" -> "A"; "Help" will be saved as encoded number
|
||||
if (Bry_.Match(capt_src, capt_bgn, capt_bgn_wo_ns, href_ns_name)) { // capt matches ns_name; EX: <a href='/wiki/Help:A'>Help:A</a> -> "Help:A" matches "Help:"
|
||||
capt_bgn = capt_bgn_wo_ns; // skip ns; "Help:"
|
||||
capt_has_ns = true;
|
||||
}
|
||||
}
|
||||
if (href_parser.Tid() == Xoh_anch_href_parser.Tid__anch)
|
||||
this.text_type = Xoh_anch_capt_parser.Tid__capt;
|
||||
else
|
||||
this.text_type = capt_parser.Parse(rdr, capt_bgn_has_ns, href_cs_tid_1st, href_bry, href_bgn, href_end, src, capt_bgn, capt_end);
|
||||
int split_pos = capt_parser.Split_pos();
|
||||
switch (text_type) {
|
||||
case Xoh_anch_capt_parser.Tid__capt: // nothing to do; href / capt already set above
|
||||
// get text splits
|
||||
this.text_tid = href_itm.Tid() == Xoh_anch_href_itm.Tid__anch
|
||||
? Xoh_anch_capt_itm.Tid__diff
|
||||
: capt_itm.Parse(rdr, capt_has_ns, href_src, href_bgn, href_end, src, capt_bgn, capt_end);
|
||||
int split_pos = capt_itm.Split_pos();
|
||||
this.text_0_src = href_src; this.text_0_bgn = href_bgn; this.text_0_end = href_end;
|
||||
this.text_1_src = capt_src; this.text_1_bgn = capt_bgn; this.text_1_end = capt_end;
|
||||
switch (text_tid) {
|
||||
case Xoh_anch_capt_itm.Tid__same:
|
||||
// case Xoh_anch_capt_itm.Tid__href_pipe:
|
||||
case Xoh_anch_capt_itm.Tid__diff: // nothing to do; href / capt already set above
|
||||
break;
|
||||
case Xoh_anch_capt_parser.Tid__href: // redefine href to capt since both href and capt are same except for case-sensitivity / underscores; EX: [[a]], [[A b]]
|
||||
case Xoh_anch_capt_parser.Tid__href_pipe:
|
||||
this.href_bry = src;
|
||||
this.href_bgn = capt_bgn;
|
||||
this.href_end = capt_end;
|
||||
case Xoh_anch_capt_itm.Tid__more:
|
||||
this.text_1_bgn = split_pos;
|
||||
break;
|
||||
case Xoh_anch_capt_parser.Tid__href_trail:
|
||||
this.href_bry = src;
|
||||
this.href_bgn = capt_bgn;
|
||||
this.href_end = split_pos;
|
||||
this.capt_bgn = split_pos;
|
||||
break;
|
||||
case Xoh_anch_capt_parser.Tid__capt_short:
|
||||
int tmp_capt_bgn = capt_bgn, tmp_capt_end = capt_end;
|
||||
this.capt_bry = href_bry;
|
||||
this.capt_bgn = split_pos;
|
||||
this.capt_end = href_end;
|
||||
this.href_bry = src;
|
||||
this.href_bgn = tmp_capt_bgn;
|
||||
this.href_end = tmp_capt_end;
|
||||
case Xoh_anch_capt_itm.Tid__less:
|
||||
this.text_0_end = split_pos;
|
||||
this.text_1_src = href_src;
|
||||
this.text_1_bgn = split_pos;
|
||||
this.text_1_end = href_end;
|
||||
break;
|
||||
}
|
||||
wkr.On_lnki(this);
|
||||
return rng_end;
|
||||
}
|
||||
private void Parse_title(Html_atr title_atr) {
|
||||
// Tfds.Dbg(Bry_.Mid(href_src, href_bgn, href_end), Bry_.Mid(src, capt_bgn, capt_end), Bry_.Mid(src, title_bgn, title_end));
|
||||
title_bgn = title_atr.Val_bgn(); title_end = title_atr.Val_end();
|
||||
if (href_ns_name != null) { // ns_name exists
|
||||
int title_bgn_wo_ns = title_bgn + href_ns_name_len;
|
||||
if (Bry_.Match(src, title_bgn, title_bgn_wo_ns, href_ns_name)) // title matches ns_name;
|
||||
title_bgn = title_bgn_wo_ns; // skip ns; "Help:"
|
||||
else
|
||||
title_missing_ns = true;
|
||||
}
|
||||
if (title_end == -1)
|
||||
title_tid = Title__missing;
|
||||
else {
|
||||
if (Bry_.Match(src, title_bgn, title_end, href_src, href_bgn, href_end))
|
||||
title_tid = Title__href;
|
||||
else if (Bry_.Match(src, title_bgn, title_end, src, capt_bgn, capt_end))
|
||||
title_tid = Title__capt;
|
||||
else {
|
||||
title_tid = Title__diff;
|
||||
if (href_ns_name != null) title_bgn = title_atr.Val_bgn(); // since title is different, add back ns_name; EX: "<a href='/wiki/Help:A_b#c' title='Help:A b'>a</a>"; title should be "Help:A b", not "A b"
|
||||
}
|
||||
}
|
||||
}
|
||||
public static final int // SERIALIAZED
|
||||
Title__href = 0
|
||||
, Title__capt = 1
|
||||
, Title__diff = 2
|
||||
, Title__missing = 3
|
||||
;
|
||||
}
|
||||
|
||||
@@ -18,44 +18,65 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis.anchs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
|
||||
import gplx.core.brys.*;
|
||||
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.ttls.*;
|
||||
public class Xoh_anch_capt_parser {
|
||||
public int Split_pos() {return split_pos;} private int split_pos;
|
||||
public byte Parse(Bry_rdr owner_rdr, boolean capt_bgn_has_ns, boolean cs_tid_1st, byte[] href_bry, int href_bgn, int href_end, byte[] capt_bry, int capt_bgn, int capt_end) {
|
||||
this.split_pos = -1;
|
||||
public class Xoh_anch_capt_itm {
|
||||
public int Cs0_tid() {return cs0_tid;} private int cs0_tid;
|
||||
public int Split_pos() {return split_pos;} private int split_pos;
|
||||
public byte Parse(Bry_rdr owner_rdr, boolean ns_name_exists, byte[] href_bry, int href_bgn, int href_end, byte[] capt_bry, int capt_bgn, int capt_end) {
|
||||
this.cs0_tid = Cs0__exact; this.split_pos = -1;
|
||||
// do compare
|
||||
int href_len = href_end - href_bgn;
|
||||
int capt_len = capt_end - capt_bgn;
|
||||
for (int i = 0; i < capt_len; ++i) {
|
||||
if (i == href_len) { // ran out of href; mark as trail; EX: [[A]]s -> href="A"; capt="As"
|
||||
split_pos = i + capt_bgn;
|
||||
return Tid__href_trail;
|
||||
return Tid__more;
|
||||
}
|
||||
byte href_byte = href_bry[i + href_bgn];
|
||||
byte capt_byte = capt_bry[i + capt_bgn];
|
||||
if (href_byte == capt_byte) continue;
|
||||
if ( i == 0 // ignore case if 1st letter and ns is Tid__1st; EX: [[earth]] -> href="Earth"; capt="earth"
|
||||
&& cs_tid_1st
|
||||
&& capt_byte >= Byte_ascii.Ltr_a && capt_byte <= Byte_ascii.Ltr_z
|
||||
&& (capt_byte - href_byte) == 32
|
||||
)
|
||||
continue;
|
||||
if (i == 0) { // ignore case if 1st letter and ns is Tid__1st; EX: [[earth]] -> href="Earth"; capt="earth"
|
||||
if ( capt_byte == href_byte) {
|
||||
cs0_tid = Cs0__exact;
|
||||
continue;
|
||||
}
|
||||
else if(href_byte >= Byte_ascii.Ltr_A && href_byte <= Byte_ascii.Ltr_Z
|
||||
&& capt_byte - href_byte == 32
|
||||
) {
|
||||
cs0_tid = Cs0__lower;
|
||||
continue;
|
||||
}
|
||||
else if(href_byte >= Byte_ascii.Ltr_a && href_byte <= Byte_ascii.Ltr_z
|
||||
&& href_byte - capt_byte == 32
|
||||
) {
|
||||
cs0_tid = Cs0__upper;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (href_byte == capt_byte) continue;
|
||||
if ( capt_byte == Byte_ascii.Space // ignore " " vs "_"
|
||||
&& href_byte == Byte_ascii.Underline
|
||||
)
|
||||
continue;
|
||||
return Tid__capt; // bytes still diff; return diff
|
||||
this.cs0_tid = Cs0__exact;
|
||||
return Tid__diff; // bytes still diff; return diff
|
||||
}
|
||||
if (capt_len == href_len) // all bytes same and capt_len == href_len; must be same
|
||||
return capt_bgn_has_ns ? Tid__href : Tid__href_pipe;
|
||||
return Tid__same;// : Tid__href_pipe;
|
||||
else { // capt < href; EX: [[A_(b)|A]] -> href="A_(b)"; capt = "A"
|
||||
split_pos = capt_len + href_bgn;
|
||||
return Tid__capt_short;
|
||||
return Tid__less;
|
||||
}
|
||||
}
|
||||
public static final byte // SERIALIAZED
|
||||
Tid__href = 0 // [[A]] -> "A|A" -> "A|"
|
||||
, Tid__capt = 1 // [[A|b]] -> "A|b" -> "A|b"
|
||||
, Tid__href_trail = 2 // [[A]]s -> "A|As" -> "A|s"
|
||||
, Tid__capt_short = 3 // [[A_(b)|A]] -> "A_(b)|A" -> "A|_(b)"
|
||||
, Tid__href_pipe = 4 // [[Help:A|]] -> "Help:A|A" -> "A|"
|
||||
Tid__same = 0 // [[A]] -> "A|A" -> "A|"
|
||||
, Tid__diff = 1 // [[A|b]] -> "A|b" -> "A|b"
|
||||
, Tid__more = 2 // [[A]]s -> "A|As" -> "A|s"
|
||||
, Tid__less = 3 // [[A_(b)|A]] -> "A_(b)|A" -> "A|_(b)"
|
||||
// , Tid__href_pipe = 4 // [[Help:A|]] -> "Help:A|A" -> "A|"
|
||||
;
|
||||
public static final int // SERIALIAZED
|
||||
Cs0__exact = 0
|
||||
, Cs0__lower = 1 // [[A|a]] -> "A|a" -> "A"
|
||||
, Cs0__upper = 2 // [[a|A]] -> "a|A" -> "a"
|
||||
;
|
||||
}
|
||||
@@ -17,25 +17,25 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis.anchs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
|
||||
import org.junit.*; import gplx.core.brys.*; import gplx.langs.htmls.parsers.*;
|
||||
public class Xoh_anch_capt_parser_tst {
|
||||
private final Xoh_anch_capt_parser_fxt fxt = new Xoh_anch_capt_parser_fxt();
|
||||
@Test public void Basic__same() {fxt.Test__match("Abc" , "Abc", Xoh_anch_capt_parser.Tid__href);}
|
||||
@Test public void Basic__diff() {fxt.Test__match("Abc" , "ABC", Xoh_anch_capt_parser.Tid__capt);}
|
||||
@Test public void Space__same() {fxt.Test__match("A_b" , "A b", Xoh_anch_capt_parser.Tid__href);}
|
||||
@Test public void Case__same() {fxt.Test__match("Abc" , "abc", Xoh_anch_capt_parser.Tid__href);}
|
||||
@Test public void Case__reverse() {fxt.Test__match("abc" , "Abc", Xoh_anch_capt_parser.Tid__capt);}
|
||||
@Test public void Case__disabled() {
|
||||
fxt.Wiki().Ns_mgr().Ns_main().Case_match_(gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all);
|
||||
fxt.Test__match("Abcde", "abcde", Xoh_anch_capt_parser.Tid__capt);
|
||||
}
|
||||
@Test public void Ns__href() {fxt.Test__match("Help_talk:Ab" , "Help talk:Ab" , Xoh_anch_capt_parser.Tid__href);}
|
||||
@Test public void Capt_trail() {fxt.Test__match("A" , "Abc" , Xoh_anch_capt_parser.Tid__href_trail, 1);}
|
||||
@Test public void Href_trail() {fxt.Test__match("Ab" , "A" , Xoh_anch_capt_parser.Tid__capt_short, 1);}
|
||||
public class Xoh_anch_capt_itm_tst {
|
||||
private final Xoh_anch_capt_itm_fxt fxt = new Xoh_anch_capt_itm_fxt();
|
||||
@Test public void Basic__same() {fxt.Test__match("Abc" , "Abc", Xoh_anch_capt_itm.Tid__same);}
|
||||
@Test public void Basic__diff() {fxt.Test__match("Abc" , "ABC", Xoh_anch_capt_itm.Tid__diff);}
|
||||
@Test public void Space__same() {fxt.Test__match("A_b" , "A b", Xoh_anch_capt_itm.Tid__same);}
|
||||
@Test public void Case__same() {fxt.Test__match("Abc" , "abc", Xoh_anch_capt_itm.Tid__same);}
|
||||
// @Test public void Case__reverse() {fxt.Test__match("abc" , "Abc", Xoh_anch_capt_itm.Tid__diff);}
|
||||
// @Test public void Case__disabled() {
|
||||
// fxt.Wiki().Ns_mgr().Ns_main().Case_match_(gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all);
|
||||
// fxt.Test__match("Abcde", "abcde", Xoh_anch_capt_itm.Tid__diff);
|
||||
// }
|
||||
@Test public void Ns__href() {fxt.Test__match("Help_talk:Ab" , "Help talk:Ab" , Xoh_anch_capt_itm.Tid__same);}
|
||||
@Test public void Capt_trail() {fxt.Test__match("A" , "Abc" , Xoh_anch_capt_itm.Tid__more, 1);}
|
||||
@Test public void Href_trail() {fxt.Test__match("Ab" , "A" , Xoh_anch_capt_itm.Tid__less, 1);}
|
||||
}
|
||||
class Xoh_anch_capt_parser_fxt {
|
||||
private final Xoh_anch_capt_parser matcher = new Xoh_anch_capt_parser();
|
||||
class Xoh_anch_capt_itm_fxt {
|
||||
private final Xoh_anch_capt_itm matcher = new Xoh_anch_capt_itm();
|
||||
private final Bry_rdr rdr = new Bry_rdr();
|
||||
public Xoh_anch_capt_parser_fxt() {
|
||||
public Xoh_anch_capt_itm_fxt() {
|
||||
Xoae_app app = Xoa_app_fxt.app_();
|
||||
this.wiki = Xoa_app_fxt.wiki_tst_(app);
|
||||
}
|
||||
@@ -45,8 +45,8 @@ class Xoh_anch_capt_parser_fxt {
|
||||
byte[] page_bry = Bry_.new_u8(page_str);
|
||||
byte[] capt_bry = Bry_.new_u8(capt_str);
|
||||
Xoa_ttl href_ttl = wiki.Ttl_parse(page_bry);
|
||||
boolean cs_tid_1st = href_ttl.Ns().Case_match() == gplx.xowa.wikis.nss.Xow_ns_case_.Tid__1st;
|
||||
Tfds.Eq_int(expd_tid , matcher.Parse(rdr.Init_by_page(Bry_.Empty, page_bry, page_bry.length), Bool_.Y, cs_tid_1st, page_bry, 0, page_bry.length, capt_bry, 0, capt_bry.length));
|
||||
boolean ns_is_cs = href_ttl.Ns().Case_match() == gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all;
|
||||
Tfds.Eq_int(expd_tid , matcher.Parse(rdr.Init_by_page(Bry_.Empty, page_bry, page_bry.length), ns_is_cs, page_bry, 0, page_bry.length, capt_bry, 0, capt_bry.length));
|
||||
Tfds.Eq_int(expd_trail_bgn , matcher.Split_pos());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis.anchs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
|
||||
import gplx.core.brys.*; import gplx.core.btries.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*; import gplx.langs.htmls.encoders.*;
|
||||
import gplx.xowa.wikis.ttls.*; import gplx.xowa.wikis.nss.*;
|
||||
public class Xoh_anch_href_itm implements Xoh_itm_parser {
|
||||
private final Bry_rdr rdr = new Bry_rdr().Dflt_dlm_(Byte_ascii.Slash);
|
||||
// private final Xoa_url tmp_url = Xoa_url.blank();
|
||||
public void Fail_throws_err_(boolean v) {rdr.Fail_throws_err_(v);}// TEST
|
||||
public Html_atr Atr() {return atr;} private Html_atr atr;
|
||||
public byte Tid() {return tid;} private byte tid;
|
||||
public int Rng_bgn() {return rng_bgn;} private int rng_bgn;
|
||||
public int Rng_end() {return rng_end;} private int rng_end;
|
||||
public int Site_bgn() {return site_bgn;} private int site_bgn;
|
||||
public int Site_end() {return site_end;} private int site_end;
|
||||
public boolean Site_exists() {return site_end > site_bgn;}
|
||||
public byte[] Ttl_full_txt() {return ttl_full_txt;} private byte[] ttl_full_txt;
|
||||
public byte[] Ttl_page_db() {return ttl_page_db;} private byte[] ttl_page_db;
|
||||
public boolean Ttl_is_main_page() {return ttl_page_db.length == 0;}
|
||||
public int Ttl_ns_id() {return ttl_ns_id;} private int ttl_ns_id;
|
||||
public byte[] Ttl_ns_custom() {return ttl_ns_custom;} private byte[] ttl_ns_custom;
|
||||
public int Ttl_bgn() {return ttl_bgn;} private int ttl_bgn;
|
||||
public int Ttl_end() {return ttl_end;} private int ttl_end;
|
||||
private void Clear() {
|
||||
tid = Tid__wiki;
|
||||
rng_bgn = rng_end = site_bgn = site_end = ttl_bgn = ttl_end = -1;
|
||||
ttl_full_txt = ttl_page_db = ttl_ns_custom = null;
|
||||
ttl_ns_id = Xow_ns_.Tid__main;
|
||||
}
|
||||
public boolean Parse(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, Html_tag tag) {
|
||||
this.atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__href);
|
||||
return Parse(err_wkr, hctx, atr.Val_bgn(), atr.Val_end());
|
||||
}
|
||||
public boolean Parse(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, int rng_bgn, int rng_end) {
|
||||
this.Clear();
|
||||
if (rng_bgn == -1) return false; // no href; return; EX: <a/> vs <a href='a.org'/>
|
||||
rdr.Init_by_wkr(err_wkr, "href", rng_bgn, rng_end);
|
||||
this.rng_bgn = rng_bgn; this.rng_end = rng_end;
|
||||
byte[] src = err_wkr.Src();
|
||||
if (rng_end == rng_bgn) { // handle empty String separately; EX: href=""
|
||||
tid = Tid__inet;
|
||||
ttl_bgn = ttl_end = 0;
|
||||
}
|
||||
else {
|
||||
ttl_end = rng_end;
|
||||
switch (src[rng_bgn]) {
|
||||
case Byte_ascii.Hash:
|
||||
tid = Tid__anch;
|
||||
ttl_bgn = rng_bgn + 1; // position ttl_bgn after #
|
||||
break;
|
||||
default:
|
||||
Parse_inet(hctx, src);
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
rdr.Move_by_one(); // skip "/"
|
||||
if (rdr.Chk(trie) == Tid__site) { // EX: "/site/wiki/A"
|
||||
tid = Tid__site;
|
||||
site_bgn = rdr.Pos();
|
||||
site_end = rdr.Find_fwd_lr();
|
||||
rdr.Chk(Bry__wiki);
|
||||
}
|
||||
else
|
||||
tid = Tid__wiki;
|
||||
ttl_bgn = rdr.Pos();
|
||||
break;
|
||||
}
|
||||
}
|
||||
Parse_ttl(hctx.Wiki__ttl_parser(), src);
|
||||
return true;
|
||||
}
|
||||
private void Parse_inet(Xoh_hdoc_ctx hctx, byte[] src) {
|
||||
// hctx.Wiki__url_parser().Parse(tmp_url, src);
|
||||
// Tfds.Write(tmp_url.Tid());
|
||||
// if (tmp_url.Tid() == Xoa_url_.Tid_page) {
|
||||
// Tfds.Write(tmp_url.Wiki_bry());
|
||||
// Tfds.Write(tmp_url.Page_bry());
|
||||
// }
|
||||
// else {
|
||||
tid = Tid__inet;
|
||||
ttl_bgn = rng_bgn;
|
||||
// }
|
||||
}
|
||||
private void Parse_ttl(Xow_ttl_parser ttl_parser, byte[] src) {
|
||||
boolean ttl_is_empty = ttl_end - ttl_bgn == 0; // NOTE: ttl can be empty; EX: "href='/site/en.wikipedia.org/wiki/'" "href='/wiki/'"
|
||||
if (ttl_is_empty) {
|
||||
ttl_full_txt = ttl_page_db = Bry_.Empty;
|
||||
}
|
||||
else {
|
||||
ttl_full_txt = Gfo_url_encoder_.Href_wo_anchor.Decode(src, ttl_bgn, ttl_end);
|
||||
switch (tid) {
|
||||
case Xoh_anch_href_itm.Tid__anch:
|
||||
case Xoh_anch_href_itm.Tid__inet:
|
||||
ttl_ns_id = Xow_ns_.Tid__main;
|
||||
ttl_page_db = ttl_full_txt;
|
||||
break;
|
||||
case Xoh_anch_href_itm.Tid__wiki:
|
||||
case Xoh_anch_href_itm.Tid__site:
|
||||
int ttl_full_len = ttl_full_txt.length;
|
||||
int colon_pos = Bry_find_.Find_fwd(ttl_full_txt, Byte_ascii.Colon, 0, ttl_full_len);
|
||||
ttl_page_db = ttl_full_txt;
|
||||
if (colon_pos != Bry_find_.Not_found) {
|
||||
Xow_ns_mgr ns_mgr = ttl_parser.Ns_mgr();
|
||||
Object ns_obj = ns_mgr.Names_get_or_null(ttl_full_txt, 0, colon_pos);
|
||||
if (ns_obj != null) {
|
||||
Xow_ns ns = (Xow_ns)ns_obj;
|
||||
if (ns.Id() != Xow_ns_.Tid__main) {
|
||||
ttl_ns_id = ns.Id();
|
||||
ttl_page_db = Bry_.Mid(ttl_full_txt, colon_pos + 1, ttl_full_len);
|
||||
if (!Bry_.Match(ttl_full_txt, 0, colon_pos, ns.Name_ui()))
|
||||
ttl_ns_custom = Bry_.Mid(ttl_full_txt, 0, colon_pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
ttl_page_db = Xoa_ttl.Replace_spaces(ttl_page_db);
|
||||
break;
|
||||
default: throw Err_.new_unhandled(tid);
|
||||
}
|
||||
}
|
||||
}
|
||||
public static final byte
|
||||
Tid__wiki = 0 // EX: href="/wiki/A"
|
||||
, Tid__site = 1 // EX: href="/site/en.wikipedia.org/wiki/A"
|
||||
, Tid__anch = 2 // EX: href="#A"
|
||||
, Tid__inet = 3 // EX: href="https://a.org/A"
|
||||
;
|
||||
private static final byte[] Bry__site = Bry_.new_a7("site/"), Bry__wiki = Bry_.new_a7("wiki/");
|
||||
private static final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7()
|
||||
.Add_bry_byte(Bry__wiki, Tid__wiki)
|
||||
.Add_bry_byte(Bry__site, Tid__site)
|
||||
;
|
||||
public static boolean Ns_exists(byte tid) {
|
||||
switch (tid) {
|
||||
case Tid__wiki: case Tid__site: return true;
|
||||
case Tid__anch: case Tid__inet: return false;
|
||||
default: throw Err_.new_unhandled(tid);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -17,8 +17,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis.anchs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
|
||||
import org.junit.*; import gplx.core.brys.*; import gplx.xowa.wikis.ttls.*;
|
||||
public class Xoh_anch_href_parser_tst {
|
||||
private final Xoh_anch_href_parser_fxt fxt = new Xoh_anch_href_parser_fxt();
|
||||
public class Xoh_anch_href_itm_tst {
|
||||
private final Xoh_anch_href_itm_fxt fxt = new Xoh_anch_href_itm_fxt();
|
||||
@Test public void Site() {
|
||||
fxt.Test__parse("/site/A/wiki/B", "A", "B");
|
||||
}
|
||||
@@ -34,28 +34,24 @@ public class Xoh_anch_href_parser_tst {
|
||||
@Test public void Inet() {
|
||||
fxt.Test__parse("http://a.org", "", "http://a.org");
|
||||
}
|
||||
// @Test public void Inet__mw() {
|
||||
// fxt.Test__parse("https://en.wikipedia.org/wiki/A", "en.wikipedia.org", "A");
|
||||
// }
|
||||
@Test public void Fail__1st_seg_must_be_site_or_wiki() {
|
||||
fxt.Test__parse__fail("/fail/A", "failed trie check: mid='fail/A' ctx='Main_Page' wkr='anch.href' excerpt=/fail/A");
|
||||
fxt.Test__parse__fail("/fail/A", "failed trie check: mid='fail/A' page='Main_Page' sect='href' text=/fail/A");
|
||||
}
|
||||
@Test public void Fail__2nd_seg_must_be_wiki() {
|
||||
fxt.Test__parse__fail("/site/A/B/C", "failed check: chk='wiki/' ctx='Main_Page' wkr='anch.href' excerpt=/site/A/B/C");
|
||||
fxt.Test__parse__fail("/site/A/B/C", "failed check: chk='wiki/' page='Main_Page' sect='href' text=/site/A/B/C");
|
||||
}
|
||||
}
|
||||
class Xoh_anch_href_parser_fxt extends Xoh_itm_parser_fxt_base {
|
||||
private final Xoae_app app;
|
||||
private final Xoh_anch_href_parser parser = new Xoh_anch_href_parser();
|
||||
private final Xow_ttl_parser ttl_parser;
|
||||
public Xoh_anch_href_parser_fxt() {
|
||||
this.app = Xoa_app_fxt.app_();
|
||||
ttl_parser = Xoa_app_fxt.wiki_tst_(app);
|
||||
}
|
||||
class Xoh_anch_href_itm_fxt extends Xoh_itm_parser_fxt { private final Xoh_anch_href_itm parser = new Xoh_anch_href_itm();
|
||||
@Override public Xoh_itm_parser Parser_get() {return parser;}
|
||||
public void Test__parse(String src_str, String expd_site, String expd_page) {
|
||||
Exec_parse(src_str);
|
||||
Tfds.Eq_str(expd_site, parser.Site_bgn() == -1 ? "" : String_.new_u8(src, parser.Site_bgn(), parser.Site_end()));
|
||||
Tfds.Eq_str(expd_page, String_.new_u8(src, parser.Page_bgn(), parser.Page_end()));
|
||||
Tfds.Eq_str(expd_page, String_.new_u8(src, parser.Ttl_bgn(), parser.Ttl_end()));
|
||||
}
|
||||
@Override public void Exec_parse_hook(Bry_rdr owner_rdr, int src_bgn, int src_end) {
|
||||
parser.Parse(owner_rdr, app, ttl_parser, src_bgn, src_end);
|
||||
@Override public void Exec_parse_hook(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, int src_bgn, int src_end) {
|
||||
parser.Parse(err_wkr, hctx, src_bgn, src_end);
|
||||
}
|
||||
}
|
||||
@@ -1,135 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis.anchs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
|
||||
import gplx.core.brys.*; import gplx.core.btries.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
import gplx.xowa.wikis.ttls.*; import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.domains.*; import gplx.xowa.apps.metas.*;
|
||||
public class Xoh_anch_href_parser implements Xoh_itm_parser {
|
||||
private byte[] page_bry; private Xoa_ttl page_ttl; private Xoa_app app; private Xow_ttl_parser ttl_parser;
|
||||
private final Bry_rdr rdr = new Bry_rdr().Dflt_dlm_(Byte_ascii.Slash);
|
||||
public void Fail_throws_err_(boolean v) {rdr.Fail_throws_err_(v);}// TEST
|
||||
public Html_atr Atr() {return atr;} private Html_atr atr;
|
||||
public byte Tid() {return tid;} private byte tid;
|
||||
public boolean Tid_has_ns() {return tid_has_ns;} private boolean tid_has_ns;
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int Val_bgn() {return val_bgn;} private int val_bgn;
|
||||
public int Val_end() {return val_end;} private int val_end;
|
||||
public int Site_bgn() {return site_bgn;} private int site_bgn;
|
||||
public int Site_end() {return site_end;} private int site_end;
|
||||
public boolean Site_exists() {return site_end > site_bgn;}
|
||||
public boolean Rel_nofollow_exists() {
|
||||
if (Site_exists()) {
|
||||
if (rel_nofollow_exists == Bool_.__byte) {
|
||||
Xow_domain_itm itm = Xow_domain_itm_.parse(Bry_.Mid(src, site_bgn, site_end));
|
||||
rel_nofollow_exists = itm.Domain_type_id() == Xow_domain_tid_.Int__other ? Bool_.Y_byte : Bool_.N_byte;
|
||||
}
|
||||
return rel_nofollow_exists == Bool_.Y_byte;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
} private byte rel_nofollow_exists;
|
||||
public int Page_bgn() {return page_bgn;} private int page_bgn;
|
||||
public int Page_end() {return page_end;} private int page_end;
|
||||
public byte[] Page_bry() {
|
||||
if (page_bry == null) {
|
||||
if (page_end - page_bgn == 0) // NOTE: href="/site/en.wikipedia.org/wiki/" can be null
|
||||
page_bry = Xoa_page_.Main_page_bry;
|
||||
else
|
||||
page_bry = gplx.langs.htmls.encoders.Gfo_url_encoder_.Href.Decode(src, page_bgn, page_end);
|
||||
}
|
||||
return page_bry;
|
||||
}
|
||||
public Xoa_ttl Page_ttl() {
|
||||
if (page_ttl == null) {
|
||||
page_bry = this.Page_bry();
|
||||
if (site_bgn != -1)
|
||||
ttl_parser = app.Wiki_mgri().Get_by_key_or_make_init_n(Bry_.Mid(src, site_bgn, site_end));
|
||||
page_ttl = ttl_parser.Ttl_parse(page_bry);
|
||||
page_bry = page_ttl.Full_db_w_anch();
|
||||
}
|
||||
return page_ttl;
|
||||
}
|
||||
public int Page_ns_id() {
|
||||
switch (tid) {
|
||||
case Xoh_anch_href_parser.Tid__anch:
|
||||
case Xoh_anch_href_parser.Tid__inet: return Xow_ns_.Tid__main; // for purposes of hzip/make, assume main_ns
|
||||
case Xoh_anch_href_parser.Tid__wiki:
|
||||
case Xoh_anch_href_parser.Tid__site: return this.Page_ttl().Ns().Id();
|
||||
default: throw Err_.new_unhandled(tid);
|
||||
}
|
||||
}
|
||||
public boolean Page_ns_id_is_image() {return this.Page_ns_id() == Xow_ns_.Tid__file && Bry_.Has_at_bgn(page_bry, Xow_ns_.Alias__image__bry);}
|
||||
public boolean Parse(Bry_rdr owner_rdr, Xoa_app app, Xow_ttl_parser ttl_parser, Html_tag tag) {
|
||||
this.atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__href);
|
||||
return Parse(owner_rdr, app, ttl_parser, atr.Val_bgn(), atr.Val_end());
|
||||
}
|
||||
public boolean Parse(Bry_rdr owner_rdr, Xoa_app app, Xow_ttl_parser ttl_parser, int href_bgn, int href_end) {
|
||||
if (href_bgn == -1) return false;
|
||||
rdr.Init_by_sub(owner_rdr, "anch.href", href_bgn, href_end);
|
||||
rel_nofollow_exists = Bool_.__byte;
|
||||
site_bgn = site_end = page_bgn = page_end = -1; this.src = owner_rdr.Src();
|
||||
tid = Tid__wiki;
|
||||
page_bry = null; page_ttl = null;
|
||||
this.val_bgn = href_bgn; this.val_end = href_end;
|
||||
this.src = owner_rdr.Src(); this.ttl_parser = ttl_parser; this.app = app;
|
||||
if (val_end == val_bgn) {
|
||||
tid = Tid__inet;
|
||||
page_bgn = page_end = 0;
|
||||
return true; // handle empty String separately; EX: href=""
|
||||
}
|
||||
int pos = href_bgn;
|
||||
switch (src[pos]) {
|
||||
case Byte_ascii.Hash:
|
||||
tid = Tid__anch; tid_has_ns = Bool_.N;
|
||||
page_bgn = pos + 1; // position page_bgn after #
|
||||
page_end = val_end; // anch ends at EOS
|
||||
break;
|
||||
default:
|
||||
tid = Tid__inet; tid_has_ns = Bool_.N;
|
||||
page_bgn = pos; // position page_bgn after #
|
||||
page_end = val_end; // anch ends at EOS
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
rdr.Move_by_one(); // skip "/"
|
||||
if (rdr.Chk(trie) == Tid__site) { // EX: "/site/wiki/A"
|
||||
tid = Tid__site; tid_has_ns = Bool_.Y;
|
||||
site_bgn = rdr.Pos();
|
||||
site_end = rdr.Find_fwd_lr();
|
||||
rdr.Chk(Bry__wiki);
|
||||
}
|
||||
else {
|
||||
tid = Tid__wiki; tid_has_ns = Bool_.Y;
|
||||
}
|
||||
page_bgn = rdr.Pos();
|
||||
page_end = rdr.Src_end();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
public static final byte
|
||||
Tid__wiki = 0 // EX: href="/wiki/A"
|
||||
, Tid__site = 1 // EX: href="/site/en.wikipedia.org/wiki/A"
|
||||
, Tid__anch = 2 // EX: href="#A"
|
||||
, Tid__inet = 3 // EX: href="https://a.org/A"
|
||||
;
|
||||
private static final byte[] Bry__site = Bry_.new_a7("site/"), Bry__wiki = Bry_.new_a7("wiki/");
|
||||
private static final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7()
|
||||
.Add_bry_byte(Bry__wiki, Tid__wiki)
|
||||
.Add_bry_byte(Bry__site, Tid__site)
|
||||
;
|
||||
}
|
||||
@@ -17,7 +17,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
|
||||
import gplx.core.brys.*; import gplx.core.brys.fmtrs.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.htmls.core.makes.*; import gplx.xowa.htmls.core.wkrs.imgs.atrs.*;
|
||||
import gplx.langs.htmls.*; import gplx.xowa.htmls.core.wkrs.bfr_args.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.htmls.core.makes.*; import gplx.xowa.htmls.core.wkrs.imgs.atrs.*;
|
||||
import gplx.xowa.htmls.core.htmls.*;
|
||||
import gplx.xowa.parsers.lnkis.*;
|
||||
public class Xoh_file_html_fmtr__base implements Xoh_file_img_wkr {
|
||||
@@ -33,7 +34,7 @@ public class Xoh_file_html_fmtr__base implements Xoh_file_img_wkr {
|
||||
, "</a>"
|
||||
), "a_href", "a_xowa_title", "html"
|
||||
);
|
||||
@gplx.Virtual public void Html_full_img(Bry_bfr tmp_bfr, Xoh_wtr_ctx hctx, Xoae_page page, Xof_file_itm xfer_itm, int uid
|
||||
@gplx.Virtual public void Html_full_img(Bry_bfr tmp_bfr, Xoh_wtr_ctx hctx, Xoae_page page, byte[] src, Xof_file_itm xfer_itm, int uid
|
||||
, byte[] a_href, byte a_cls, byte a_rel, byte[] a_title, byte[] a_xowa_title
|
||||
, int img_w, int img_h, byte[] img_src, byte[] img_alt, byte img_cls, byte[] img_cls_other
|
||||
) {
|
||||
@@ -46,21 +47,27 @@ public class Xoh_file_html_fmtr__base implements Xoh_file_img_wkr {
|
||||
+ "<img id=\"xowa_file_img_~{uid}\" alt=\"~{img_alt}\"~{img_core}~{img_class} /></a>"
|
||||
, "uid", "a_href", "a_class", "a_rel", "a_title", "a_xowa_title", "img_core", "img_alt", "img_class"
|
||||
);
|
||||
|
||||
@gplx.Virtual public void Html_thumb_core(Bry_bfr tmp_bfr, int uid, byte[] div1_halign, int div2_width, byte[] div2_content) {
|
||||
@gplx.Virtual public void Html_thumb_core(Bry_bfr tmp_bfr, boolean mode_is_hdump, int uid, byte[] div1_halign, int div2_width, byte[] div2_content) {
|
||||
scratch_bfr.Add(Bry_style_bgn);
|
||||
scratch_bfr.Add_int_variable(div2_width);
|
||||
scratch_bfr.Add(Bry_style_end);
|
||||
fmtr_thumb_core.Bld_bfr_many(tmp_bfr, uid, div1_halign, scratch_bfr.To_bry_and_clear(), div2_content);
|
||||
} private static final byte[] Bry_style_bgn = Bry_.new_a7("style=\"width:"), Bry_style_end = Bry_.new_a7("px;\"");
|
||||
thumb_div_id_atr.Bfr_arg__clear();
|
||||
if (!mode_is_hdump)
|
||||
thumb_div_id_atr.Set_by_arg(thum_div_id_val.Set(Bry__id, uid));
|
||||
fmtr_thumb_core.Bld_bfr_many(tmp_bfr, thumb_div_id_atr, div1_halign, scratch_bfr.To_bry_and_clear(), div2_content);
|
||||
}
|
||||
private static final byte[] Bry_style_bgn = Bry_.new_a7("style=\"width:"), Bry_style_end = Bry_.new_a7("px;\"");
|
||||
private final Bfr_arg__html_atr thumb_div_id_atr = new Bfr_arg__html_atr(Html_atr_.Bry__id);
|
||||
private final Bfr_arg__id thum_div_id_val = new Bfr_arg__id();
|
||||
private final byte[] Bry__id = Bry_.new_a7("xowa_file_div_");
|
||||
protected Bry_fmtr fmtr_thumb_core = Bry_fmtr.new_(String_.Concat_lines_nl_skip_last // REF.MW: Linker.php|makeImageLink2
|
||||
( "<div class=\"thumb t~{div1_halign}\">"
|
||||
, " <div id=\"xowa_file_div_~{uid}\" class=\"thumbinner\" ~{style}>"
|
||||
, "~{div2_content}"
|
||||
, " <div~{div_id} class=\"thumbinner\" ~{style}>"
|
||||
, "~{div2_content}"
|
||||
, " </div>"
|
||||
, "</div>"
|
||||
, ""
|
||||
), "uid", "div1_halign", "style", "div2_content"
|
||||
), "div_id", "div1_halign", "style", "div2_content"
|
||||
);
|
||||
public byte[] Html_thumb_part_img(Bry_bfr tmp_bfr, Xoae_page page, Xof_file_itm xfer_itm, Xop_lnki_tkn lnki, int uid, byte[] a_href, byte[] img_src, byte[] img_alt) {
|
||||
Html_thumb_part_img(tmp_bfr, page, xfer_itm, uid, a_href, lnki.Ttl().Page_txt(), xfer_itm.Html_w(), xfer_itm.Html_h(), img_src, img_alt);
|
||||
@@ -71,10 +78,7 @@ public class Xoh_file_html_fmtr__base implements Xoh_file_img_wkr {
|
||||
}
|
||||
private Bry_fmtr fmtr_thumb_part_img = Bry_fmtr.new_(String_.Concat_lines_nl_skip_last
|
||||
( ""
|
||||
, " <div>"
|
||||
, " <a href=\"~{a_href}\" class=\"image\" title=\"~{a_title}\">"
|
||||
, " <img id=\"xowa_file_img_~{uid}\"~{img_core} alt=\"~{img_alt}\" />"
|
||||
, " </a>"
|
||||
, " <div><a href=\"~{a_href}\" class=\"image\" title=\"~{a_title}\"><img id=\"xowa_file_img_~{uid}\"~{img_core} alt=\"~{img_alt}\" /></a>"
|
||||
, " </div>"
|
||||
), "uid", "a_href", "a_title", "img_core", "img_alt");
|
||||
|
||||
@@ -146,4 +150,7 @@ public class Xoh_file_html_fmtr__base implements Xoh_file_img_wkr {
|
||||
), "uid", "a_width", "a_max_width", "a_href", "a_xowa_title", "img_src");
|
||||
|
||||
public static final Xoh_file_html_fmtr__base Base = new Xoh_file_html_fmtr__base();
|
||||
public static byte[] Escape_xowa_title(byte[] lnki_ttl) {
|
||||
return gplx.langs.htmls.encoders.Gfo_url_encoder_.Href_quotes.Encode(lnki_ttl); // must encode xowa_title, particularly quotes; EX: xowa_title="A"b.png"; PAGE:en.w:Earth DATE:2015-11-27
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,11 +20,12 @@ import gplx.core.brys.fmtrs.*;
|
||||
import gplx.langs.htmls.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.htmls.core.makes.*;
|
||||
import gplx.xowa.htmls.core.wkrs.imgs.atrs.*;
|
||||
import gplx.xowa.parsers.lnkis.*;
|
||||
public class Xoh_file_html_fmtr__hdump extends Xoh_file_html_fmtr__base {
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr.reset_(128);
|
||||
@Override public void Html_full_img(Bry_bfr bfr, gplx.xowa.htmls.core.htmls.Xoh_wtr_ctx hctx, Xoae_page page, Xof_file_itm xfer_itm, int uid, byte[] a_href, byte a_cls, byte a_rel, byte[] a_title, byte[] a_xowa_title, int img_w, int img_h, byte[] img_src, byte[] img_alt, byte img_cls, byte[] img_cls_other) {
|
||||
@Override public void Html_full_img(Bry_bfr bfr, gplx.xowa.htmls.core.htmls.Xoh_wtr_ctx hctx, Xoae_page page, byte[] src, Xof_file_itm xfer_itm, int uid, byte[] a_href, byte a_cls, byte a_rel, byte[] a_title, byte[] a_xowa_title, int img_w, int img_h, byte[] img_src, byte[] img_alt, byte img_cls, byte[] img_cls_other) {
|
||||
tmp_bfr.Add_str_a7(" data-xoimg=\"");
|
||||
tmp_bfr.Add_int_digits(1, xfer_itm.Lnki_type()).Add_byte_pipe();
|
||||
tmp_bfr.Add_int_digits(1, Xop_lnki_type.To_tid(xfer_itm.Lnki_type())).Add_byte_pipe();
|
||||
tmp_bfr.Add_int_variable(xfer_itm.Lnki_w()).Add_byte_pipe();
|
||||
tmp_bfr.Add_int_variable(xfer_itm.Lnki_h()).Add_byte_pipe();
|
||||
tmp_bfr.Add_double(xfer_itm.Lnki_upright()).Add_byte_pipe();
|
||||
@@ -34,7 +35,7 @@ public class Xoh_file_html_fmtr__hdump extends Xoh_file_html_fmtr__base {
|
||||
// , a_href, Xoh_lnki_consts.A_cls_to_bry(a_cls), Xoh_lnki_consts.A_rel_to_bry(a_rel), a_title
|
||||
// , img_alt, tmp_bfr.To_bry_and_clear(), arg_img_core.Init(uid, img_src, img_w, img_h), Xoh_img_cls_.To_html(img_cls, img_cls_other));
|
||||
fmtr__img__full.Bld_bfr_many(bfr
|
||||
, a_href, Xoh_lnki_consts.A_cls_to_bry(a_cls), Xoh_lnki_consts.A_rel_to_bry(a_rel), a_title, Xoa_ttl.Replace_spaces(a_xowa_title)
|
||||
, a_href, Xoh_lnki_consts.A_cls_to_bry(a_cls), Xoh_lnki_consts.A_rel_to_bry(a_rel), a_title, a_xowa_title
|
||||
, img_alt, tmp_bfr.To_bry_and_clear(), arg_img_core.Init(uid, Bry_.Empty, 0, 0), Xoh_img_cls_.To_html(img_cls, img_cls_other));
|
||||
}
|
||||
private Bry_fmtr fmtr__img__full = Bry_fmtr.new_
|
||||
|
||||
@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.htmls.core.htmls.*;
|
||||
public interface Xoh_file_img_wkr {
|
||||
void Html_full_img(Bry_bfr tmp_bfr, Xoh_wtr_ctx hctx, Xoae_page page, Xof_file_itm xfer_itm, int uid
|
||||
void Html_full_img(Bry_bfr tmp_bfr, Xoh_wtr_ctx hctx, Xoae_page page, byte[] src, Xof_file_itm xfer_itm, int uid
|
||||
, byte[] a_href, byte a_cls, byte a_rel, byte[] a_title, byte[] a_xowa_title
|
||||
, int img_w, int img_h, byte[] img_src, byte[] img_alt, byte img_cls, byte[] img_cls_other
|
||||
);
|
||||
|
||||
@@ -16,12 +16,10 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
|
||||
import gplx.core.brys.*;
|
||||
import gplx.langs.htmls.*;
|
||||
import gplx.core.brys.*; import gplx.core.bits.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.encoders.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.makes.imgs.*; import gplx.xowa.htmls.core.wkrs.imgs.atrs.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.msgs.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.files.*;
|
||||
import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.makes.imgs.*; import gplx.xowa.htmls.core.wkrs.imgs.atrs.*;
|
||||
import gplx.xowa.wikis.nss.*; import gplx.xowa.files.*;
|
||||
import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.tmpls.*;
|
||||
public class Xoh_file_wtr__basic {
|
||||
private final Xowe_wiki wiki; private final Xow_html_mgr html_mgr; private final Xoh_html_wtr html_wtr; private final Bry_bfr_mkr bfr_mkr; private final Bry_bfr scratch_bfr = Bry_bfr.reset_(Io_mgr.Len_kb);
|
||||
@@ -95,7 +93,7 @@ public class Xoh_file_wtr__basic {
|
||||
private void Write_file_audio(Bry_bfr bfr, Xop_ctx ctx, Xoh_wtr_ctx hctx, byte[] src, Xop_lnki_tkn lnki, int uid, int div_width, byte[] lnki_halign_bry, byte[] lnki_href, byte[] img_orig_src, byte[] alt) {
|
||||
byte[] content = Arg_content_audio(lnki, ctx, hctx, src, uid, lnki_href, img_orig_src, alt);
|
||||
if (lnki.Media_icon())
|
||||
html_fmtr.Html_thumb_core(bfr, uid, lnki_halign_bry, div_width, content);
|
||||
html_fmtr.Html_thumb_core(bfr, hctx.Mode_is_hdump(), uid, lnki_halign_bry, div_width, content);
|
||||
else
|
||||
bfr.Add(content);
|
||||
}
|
||||
@@ -104,7 +102,7 @@ public class Xoh_file_wtr__basic {
|
||||
boolean video_is_thumb = Xop_lnki_type.Id_defaults_to_thumb(lnki.Lnki_type());
|
||||
byte[] content = Arg_content_video(ctx, hctx, src, lnki, xfer_itm, uid, video_is_thumb, lnki_href, img_view_src, img_orig_src, alt);
|
||||
if (video_is_thumb)
|
||||
html_fmtr.Html_thumb_core(bfr, uid, lnki_halign_bry, div_width, content);
|
||||
html_fmtr.Html_thumb_core(bfr, hctx.Mode_is_hdump(), uid, lnki_halign_bry, div_width, content);
|
||||
else
|
||||
bfr.Add(content);
|
||||
}
|
||||
@@ -119,7 +117,7 @@ public class Xoh_file_wtr__basic {
|
||||
if (lnki_is_thumbable) { // is "thumb"
|
||||
if (bfr.Len() > 0) bfr.Add_byte_nl();
|
||||
byte[] content = Arg_content_thumb(lnki_file_wkr, ctx, hctx, src, lnki, xfer_itm, uid, lnki_href, img_view_src, img_orig_src, alt, lnki_ttl, anchor_title);
|
||||
html_fmtr.Html_thumb_core(bfr, uid, lnki_halign_bry, div_width, content);
|
||||
html_fmtr.Html_thumb_core(bfr, hctx.Mode_is_hdump(), uid, lnki_halign_bry, div_width, content);
|
||||
}
|
||||
else {
|
||||
if ( cfg_alt_defaults_to_caption
|
||||
@@ -139,15 +137,15 @@ public class Xoh_file_wtr__basic {
|
||||
byte img_cls_tid = lnki.Border() == Bool_.Y_byte ? Xoh_img_cls_.Tid__thumbborder : Xoh_img_cls_.Tid__none;
|
||||
byte[] img_cls_other = lnki.Lnki_cls(); // PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3; DATE:2014-09-06
|
||||
if (lnki_link_tkn == Arg_nde_tkn.Null) // full
|
||||
lnki_file_wkr.Html_full_img(bfr, hctx, page, xfer_itm, uid, lnki_href, Xoh_lnki_consts.Tid_a_cls_image, Xoh_lnki_consts.Tid_a_rel_none, anchor_title, lnki_ttl, xfer_itm.Html_w(), xfer_itm.Html_h(), img_view_src, alt, img_cls_tid, img_cls_other);
|
||||
lnki_file_wkr.Html_full_img(bfr, hctx, page, src, xfer_itm, uid, lnki_href, Xoh_lnki_consts.Tid_a_cls_image, Xoh_lnki_consts.Tid_a_rel_none, anchor_title, Xoh_file_html_fmtr__base.Escape_xowa_title(lnki_ttl), xfer_itm.Html_w(), xfer_itm.Html_h(), img_view_src, alt, img_cls_tid, img_cls_other);
|
||||
else { // thumb
|
||||
Arg_itm_tkn link_tkn = lnki_link_tkn.Val_tkn();
|
||||
byte[] link_ref = link_tkn.Dat_to_bry(src);
|
||||
byte[] link_ref_new = tmp_link_parser.Parse(tmp_bfr, tmp_url, wiki, link_ref, lnki_href);
|
||||
link_ref = link_ref_new == null ? lnki_href: link_ref_new; // if parse fails, then assign to lnki_href; EX:link={{{1}}}
|
||||
link_ref = gplx.langs.htmls.encoders.Gfo_url_encoder_.Href_quotes.Encode(link_ref); // must encode quotes; PAGE:en.w:List_of_cultural_heritage_sites_in_Punjab,_Pakistan; DATE:2014-07-16
|
||||
if (Bry_.Len_gt_0(tmp_link_parser.Html_xowa_ttl())) lnki_ttl = tmp_link_parser.Html_xowa_ttl();
|
||||
lnki_file_wkr.Html_full_img(bfr, hctx, page, xfer_itm, uid, link_ref, tmp_link_parser.Html_anchor_cls(), tmp_link_parser.Html_anchor_rel(), anchor_title, lnki_ttl, xfer_itm.Html_w(), xfer_itm.Html_h(), img_view_src, alt, img_cls_tid, img_cls_other);
|
||||
byte[] link_arg = Xoa_ttl.Replace_spaces(link_tkn.Dat_to_bry(src)); // replace spaces with unders, else "/wiki/File:A b.ogg" instead of "A_b.ogg"; DATE:2015-11-27
|
||||
byte[] link_arg_html = tmp_link_parser.Parse(tmp_bfr, tmp_url, wiki, link_arg, lnki_href);
|
||||
link_arg = link_arg_html == null ? lnki_href: link_arg_html; // if parse fails, then assign to lnki_href; EX:link={{{1}}}
|
||||
link_arg = Gfo_url_encoder_.Href_qarg.Encode(link_arg); // must encode quotes; PAGE:en.w:List_of_cultural_heritage_sites_in_Punjab,_Pakistan; DATE:2014-07-16
|
||||
// if (Bry_.Len_gt_0(tmp_link_parser.Html_xowa_ttl())) lnki_ttl = tmp_link_parser.Html_xowa_ttl(); // DELETE: not sure why this is here; breaks test; DATE:2015-11-28
|
||||
lnki_file_wkr.Html_full_img(bfr, hctx, page, src, xfer_itm, uid, link_arg, tmp_link_parser.Html_anchor_cls(), tmp_link_parser.Html_anchor_rel(), anchor_title, Xoh_file_html_fmtr__base.Escape_xowa_title(lnki_ttl), xfer_itm.Html_w(), xfer_itm.Html_h(), img_view_src, alt, img_cls_tid, img_cls_other);
|
||||
}
|
||||
if (div_align_exists) bfr.Add(Html_tag_.Div_rhs); // close div from above
|
||||
}
|
||||
@@ -158,7 +156,7 @@ public class Xoh_file_wtr__basic {
|
||||
byte[] lnki_alt_html = wiki.Html_mgr().Imgs_mgr().Alt_in_caption().Val() ? Arg_alt_html(ctx, hctx, src, lnki) : Bry_.Empty;
|
||||
byte img_cls_tid = xfer_itm.File_exists() ? Xoh_img_cls_.Tid__thumbimage : Xoh_img_cls_.Tid__none;
|
||||
Bry_bfr tmp_bfr = bfr_mkr.Get_k004();
|
||||
lnki_file_wkr.Html_full_img(tmp_bfr, hctx, page, xfer_itm, uid, lnki_href, Xoh_lnki_consts.Tid_a_cls_image, Xoh_lnki_consts.Tid_a_rel_none, anchor_title, lnki_ttl, xfer_itm.Html_w(), xfer_itm.Html_h(), view_src, lnki_alt_text, img_cls_tid, Xoh_img_cls_.Bry__none);
|
||||
lnki_file_wkr.Html_full_img(tmp_bfr, hctx, page, src, xfer_itm, uid, lnki_href, Xoh_lnki_consts.Tid_a_cls_image, Xoh_lnki_consts.Tid_a_rel_none, anchor_title, Xoh_file_html_fmtr__base.Escape_xowa_title(lnki_ttl), xfer_itm.Html_w(), xfer_itm.Html_h(), view_src, lnki_alt_text, img_cls_tid, Xoh_img_cls_.Bry__none);
|
||||
byte[] thumb = tmp_bfr.To_bry_and_clear();
|
||||
html_fmtr.Html_thumb_file_image(tmp_bfr, thumb, Arg_caption_div(ctx, hctx, src, lnki, uid, img_orig_src, lnki_href), lnki_alt_html);
|
||||
return tmp_bfr.To_bry_and_rls();
|
||||
|
||||
@@ -40,9 +40,9 @@ public class Xoh_file_wtr_audio_video_tst {
|
||||
( "[[File:A.ogg]]", String_.Concat_lines_nl_skip_last
|
||||
( " <div id=\"xowa_media_div\">"
|
||||
, " <div>"
|
||||
, " <a href=\"/wiki/File:A.ogg\" class=\"image\" title=\"A.ogg\">"
|
||||
, " <img id=\"xowa_file_img_0\" src=\"file:///mem/wiki/repo/trg/orig/4/2/A.ogg\" width=\"220\" height=\"-1\" alt=\"\" />" // note that src still exists (needed for clicking)
|
||||
, " </a>"
|
||||
+ "<a href=\"/wiki/File:A.ogg\" class=\"image\" title=\"A.ogg\">"
|
||||
+ "<img id=\"xowa_file_img_0\" src=\"file:///mem/wiki/repo/trg/orig/4/2/A.ogg\" width=\"220\" height=\"-1\" alt=\"\" />" // note that src still exists (needed for clicking)
|
||||
+ "</a>"
|
||||
, " </div>"
|
||||
, " <div>"
|
||||
, " <a id=\"xowa_file_play_0\" href=\"file:///mem/wiki/repo/trg/orig/4/2/A.ogg\" xowa_title=\"A.ogg\" class=\"xowa_anchor_button\" style=\"width:218px;max-width:220px;\">"
|
||||
@@ -117,9 +117,9 @@ public class Xoh_file_wtr_audio_video_tst {
|
||||
( "[[File:A.ogv|400px|a|alt=b]]", String_.Concat_lines_nl_skip_last
|
||||
( " <div id=\"xowa_media_div\">"
|
||||
, " <div>"
|
||||
, " <a href=\"/wiki/File:A.ogv\" class=\"image\" title=\"A.ogv\">"
|
||||
, " <img id=\"xowa_file_img_0\" src=\"file:///\" width=\"400\" height=\"0\" alt=\"b\" />"
|
||||
, " </a>"
|
||||
+ "<a href=\"/wiki/File:A.ogv\" class=\"image\" title=\"A.ogv\">"
|
||||
+ "<img id=\"xowa_file_img_0\" src=\"file:///\" width=\"400\" height=\"0\" alt=\"b\" />"
|
||||
+ "</a>"
|
||||
, " </div>"
|
||||
, " <div>"
|
||||
, " <a id=\"xowa_file_play_0\" href=\"file:///mem/wiki/repo/trg/orig/d/0/A.ogv\" xowa_title=\"A.ogv\" class=\"xowa_anchor_button\" style=\"width:398px;max-width:400px;\">"
|
||||
@@ -171,9 +171,9 @@ public class Xoh_file_wtr_audio_video_tst {
|
||||
, " <div id=\"xowa_file_div_0\" class=\"thumbinner\" style=\"width:220px;\">" // NOTE:220px is default w for "non-found" thumb; DATE:2014-09-24
|
||||
, " <div id=\"xowa_media_div\">"
|
||||
, " <div>"
|
||||
, " <a href=\"/wiki/File:A.ogv\" class=\"image\" title=\"A.ogv\">"
|
||||
, " <img id=\"xowa_file_img_0\" src=\"file:///\" width=\"400\" height=\"0\" alt=\"b\" />"
|
||||
, " </a>"
|
||||
+ "<a href=\"/wiki/File:A.ogv\" class=\"image\" title=\"A.ogv\">"
|
||||
+ "<img id=\"xowa_file_img_0\" src=\"file:///\" width=\"400\" height=\"0\" alt=\"b\" />"
|
||||
+ "</a>"
|
||||
, " </div>"
|
||||
, " <div>"
|
||||
, " <a id=\"xowa_file_play_0\" href=\"file:///mem/wiki/repo/trg/orig/d/0/A.ogv\" xowa_title=\"A.ogv\" class=\"xowa_anchor_button\" style=\"width:398px;max-width:400px;\">"
|
||||
@@ -205,9 +205,9 @@ public class Xoh_file_wtr_audio_video_tst {
|
||||
, " <div id=\"xowa_file_div_0\" class=\"thumbinner\" style=\"width:220px;\">" // NOTE:220px is default w for "non-found" thumb; DATE:2014-09-24
|
||||
, " <div id=\"xowa_media_div\">"
|
||||
, " <div>"
|
||||
, " <a href=\"/wiki/File:A.webm\" class=\"image\" title=\"A.webm\">"
|
||||
, " <img id=\"xowa_file_img_0\" src=\"file:///\" width=\"400\" height=\"0\" alt=\"b\" />"
|
||||
, " </a>"
|
||||
+ "<a href=\"/wiki/File:A.webm\" class=\"image\" title=\"A.webm\">"
|
||||
+ "<img id=\"xowa_file_img_0\" src=\"file:///\" width=\"400\" height=\"0\" alt=\"b\" />"
|
||||
+ "</a>"
|
||||
, " </div>"
|
||||
, " <div>"
|
||||
, " <a id=\"xowa_file_play_0\" href=\"file:///mem/wiki/repo/trg/orig/3/4/A.webm\" xowa_title=\"A.webm\" class=\"xowa_anchor_button\" style=\"width:398px;max-width:400px;\">"
|
||||
|
||||
@@ -29,6 +29,13 @@ public class Xoh_file_wtr_basic_tst {
|
||||
));
|
||||
fxt.Wtr_cfg().Lnki__title_(false);
|
||||
}
|
||||
@Test public void Xowa_title__quotes() { // PURPOSE: xowa_title should encode quotes DATE:2015-11-27
|
||||
fxt.Test_parse_page_wiki_str
|
||||
( "[[File:A%22b.png]]"
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<a href=\"/wiki/File:A%22b.png\" class=\"image\" xowa_title=\"A%22b.png\"><img id=\"xowa_file_img_0\" alt=\"\" src=\"file:///mem/wiki/repo/trg/orig/d/4/A%22b.png\" width=\"0\" height=\"0\" /></a>"
|
||||
));
|
||||
}
|
||||
@Test public void Img_embed() {
|
||||
fxt.Test_parse_page_wiki_str("[[File:A.png|9x8px|alt=abc]]", Xop_fxt.html_img_none("File:A.png", "abc", "file:///mem/wiki/repo/trg/thumb/7/0/A.png/9px.png", "A.png"));
|
||||
}
|
||||
@@ -238,7 +245,7 @@ public class Xoh_file_wtr_basic_tst {
|
||||
fxt.Wtr_cfg().Lnki__title_(true);
|
||||
fxt.Test_parse_page_all_str
|
||||
( "[[File:A.png|\n{|\n|-\n|b\n|}\n]]"
|
||||
, "<a href=\"/wiki/File:A.png\" class=\"image\" title=\"b \" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\" b \" src=\"file:///mem/wiki/repo/trg/orig/7/0/A.png\" width=\"0\" height=\"0\" /></a>"
|
||||
, "<a href=\"/wiki/File:A.png\" class=\"image\" title=\"b \" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\" b \" src=\"file:///mem/wiki/repo/trg/orig/7/0/A.png\" width=\"0\" height=\"0\" /></a>"
|
||||
);
|
||||
fxt.Wtr_cfg().Lnki__title_(false);
|
||||
}
|
||||
@@ -248,7 +255,7 @@ public class Xoh_file_wtr_basic_tst {
|
||||
fxt.Test_parse_page_all_str
|
||||
( "[[File:A.png|b\nc]]"
|
||||
, String_.Concat_lines_nl
|
||||
( "<p><a href=\"/wiki/File:A.png\" class=\"image\" title=\"b c\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\"b c\" src=\"file:///mem/wiki/repo/trg/orig/7/0/A.png\" width=\"0\" height=\"0\" /></a>"
|
||||
( "<p><a href=\"/wiki/File:A.png\" class=\"image\" title=\"b c\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\"b c\" src=\"file:///mem/wiki/repo/trg/orig/7/0/A.png\" width=\"0\" height=\"0\" /></a>"
|
||||
, "</p>"
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
|
||||
@@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.lnkis.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
|
||||
import gplx.langs.htmls.*;
|
||||
import gplx.xowa.parsers.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.tmpls.*;
|
||||
public class Xoh_lnki_title_fmtr extends gplx.core.brys.Bfr_arg_base {
|
||||
public Xoh_lnki_title_fmtr Set(byte[] src, Xop_tkn_itm tkn) {this.src = src; this.tkn = tkn; return this;}
|
||||
@@ -36,8 +37,10 @@ public class Xoh_lnki_title_fmtr extends gplx.core.brys.Bfr_arg_base {
|
||||
if (tkn_as_lnki.Caption_exists())
|
||||
Bld_recurse(bfr, tkn_as_lnki.Caption_tkn());
|
||||
else {
|
||||
if (tkn_as_lnki.Ttl() != null) // guard against invalid ttls
|
||||
bfr.Add(tkn_as_lnki.Ttl().Page_txt());
|
||||
if (tkn_as_lnki.Ttl() != null) { // guard against invalid ttls
|
||||
byte[] ttl_bry = tkn_as_lnki.Ttl().Page_txt();
|
||||
Write_atr_text(bfr, ttl_bry, 0, ttl_bry.length); // handle titles with quotes; PAGE:s.w:Styx_(band) DATE:2015-11-29
|
||||
}
|
||||
}
|
||||
if (tkn_as_lnki.Tail_bgn() != -1)
|
||||
bfr.Add_mid(src, tkn_as_lnki.Tail_bgn(), tkn_as_lnki.Tail_end());
|
||||
@@ -63,12 +66,13 @@ public class Xoh_lnki_title_fmtr extends gplx.core.brys.Bfr_arg_base {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Tab: // NOTE: escape ws so that it renders correctly in tool tips
|
||||
case Byte_ascii.Quote: case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Amp: // NOTE: escape possible javascript injection characters
|
||||
bfr.Add(Escape_bgn);
|
||||
bfr.Add_int_variable(b);
|
||||
bfr.Add_byte(Byte_ascii.Semic);
|
||||
bfr.Add_byte_space();
|
||||
break;
|
||||
default: bfr.Add_byte(b); break;
|
||||
case Byte_ascii.Quote: bfr.Add(Html_entity_.Quote_bry); break;
|
||||
case Byte_ascii.Lt: bfr.Add(Html_entity_.Lt_bry); break;
|
||||
case Byte_ascii.Gt: bfr.Add(Html_entity_.Gt_bry); break;
|
||||
case Byte_ascii.Amp: bfr.Add(Html_entity_.Amp_bry); break;
|
||||
default: bfr.Add_byte(b); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,8 +23,11 @@ public class Xoh_lnki_title_fmtr_tst {
|
||||
fxt.Test_parse("a b c", "a b c");
|
||||
fxt.Test_parse("a ''b'' c", "a b c");
|
||||
fxt.Test_parse("a <i>b</i> c", "a b c");
|
||||
fxt.Test_parse("a\nb", "a b");
|
||||
fxt.Test_parse("a\"b", "a"b");
|
||||
fxt.Test_parse("a\nb", "a b");
|
||||
fxt.Test_parse("a\"b", "a"b");
|
||||
}
|
||||
@Test public void Lnki__quotes() { // PURPOSE: handle titles with quotes; PAGE:s.w:Styx_(band) DATE:2015-11-29
|
||||
fxt.Test_parse("[[A\"B]]", "A"B");
|
||||
}
|
||||
}
|
||||
class Xoh_lnki_title_fmtr_fxt {
|
||||
|
||||
@@ -41,6 +41,7 @@ public class Xoh_lnki_wtr {
|
||||
redlinks_mgr = page.Redlink_lnki_list(); // NOTE: need to set redlinks_mgr, else toc parse may fail; EX:pl.d:head_sth_off;DATE:2014-05-07
|
||||
file_wtr.Init_by_page(hctx, page);
|
||||
this.history_mgr = app.Usere().History_mgr();
|
||||
if (hctx.Mode_is_hdump()) cfg.Lnki__id_(false);
|
||||
}
|
||||
public void Write(Bry_bfr bfr, Xoh_wtr_ctx hctx, byte[] src, Xop_lnki_tkn lnki) {
|
||||
Xoa_ttl lnki_ttl = lnki.Ttl();
|
||||
@@ -63,7 +64,7 @@ public class Xoh_lnki_wtr {
|
||||
redlinks_mgr.Lnki_add(lnki);
|
||||
boolean stage_is_alt = hctx.Mode_is_alt();
|
||||
switch (lnki.Ns_id()) {
|
||||
case Xow_ns_.Tid__media: if (!stage_is_alt) file_wtr.Write_or_queue(bfr, page, ctx, hctx, src, lnki); return; // NOTE: literal ":" has no effect; PAGE:en.w:Beethoven and [[:Media:De-Ludwig_van_Beethoven.ogg|listen]]
|
||||
case Xow_ns_.Tid__media: if (!stage_is_alt) file_wtr.Write_or_queue(bfr, page, ctx, hctx, src, lnki); return; // NOTE: literal ":" has no effect; PAGE:en.w:Beethoven and [[:Media:De-Ludwig_van_Beethoven.ogg|listen]]
|
||||
case Xow_ns_.Tid__file: if (!literal_link && !stage_is_alt) {file_wtr.Write_or_queue(bfr, page, ctx, hctx, src, lnki); return;} break;
|
||||
case Xow_ns_.Tid__category: if (!literal_link) {page.Html_data().Ctgs_add(lnki.Ttl()); return;} break;
|
||||
}
|
||||
@@ -100,7 +101,7 @@ public class Xoh_lnki_wtr {
|
||||
Write_caption(bfr, ctx, hctx, src, lnki, ttl_bry, true, caption_wkr);
|
||||
else {
|
||||
bfr.Add(Xoh_consts.A_bgn); // '<a href="'
|
||||
app.Html__href_wtr().Build_to_bfr(bfr, app, wiki.Domain_bry(), lnki_ttl, hctx.Mode_is_popup()); // '/wiki/A'
|
||||
app.Html__href_wtr().Build_to_bfr(bfr, app, hctx.Mode(), wiki.Domain_bry(), lnki_ttl); // '/wiki/A'
|
||||
if (cfg.Lnki__id()) {
|
||||
int lnki_html_id = lnki.Html_uid();
|
||||
if (lnki_html_id > Lnki_id_ignore) // html_id=0 for skipped lnkis; EX:anchors and interwiki
|
||||
@@ -108,9 +109,12 @@ public class Xoh_lnki_wtr {
|
||||
.Add_int_variable(lnki_html_id); // '1234'
|
||||
}
|
||||
if (cfg.Lnki__title()) {
|
||||
bfr .Add(Xoh_consts.A_bgn_lnki_0); // '" title=\"'
|
||||
byte[] lnki_title_bry = lnki_ttl.Full_txt(); // 'Abcd' NOTE: use Full_txt to (a) replace underscores with spaces; (b) get title casing; EX:[[roman_empire]] -> Roman empire; (c) include ns_name; EX: Help:A -> "title='Help:A'" not "title='A'"; DATE:2015-11-16
|
||||
Html_utl.Escape_html_to_bfr(bfr, lnki_title_bry, 0, lnki_title_bry.length, Bool_.N, Bool_.N, Bool_.N, Bool_.Y, Bool_.N); // escape title; DATE:2014-10-27
|
||||
byte[] title_bry = lnki_ttl.Full_txt(); // NOTE: use Full_txt to (a) replace underscores with spaces; (b) get title casing; EX:[[roman_empire]] -> Roman empire; (c) include ns_name; EX: Help:A -> "title='Help:A'" not "title='A'"; DATE:2015-11-16
|
||||
int title_len = title_bry.length;
|
||||
if (title_len > 0) {
|
||||
bfr .Add(Xoh_consts.A_bgn_lnki_0); // '" title=\"'
|
||||
Html_utl.Escape_html_to_bfr(bfr, title_bry, 0, title_len, Bool_.N, Bool_.N, Bool_.N, Bool_.Y, Bool_.N); // escape title; DATE:2014-10-27
|
||||
}
|
||||
}
|
||||
if (!hctx.Mode_is_hdump()) { // don't write visited for hdump
|
||||
if (cfg.Lnki__visited()
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.mkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import gplx.core.threads.poolables.*;
|
||||
import gplx.xowa.htmls.core.hzips.*;
|
||||
import gplx.xowa.htmls.core.wkrs.escapes.*; import gplx.xowa.htmls.core.wkrs.spaces.*;
|
||||
import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.lnkes.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
|
||||
import gplx.xowa.htmls.core.wkrs.imgs.*; import gplx.xowa.htmls.core.wkrs.thms.*; import gplx.xowa.htmls.core.wkrs.glys.*;
|
||||
public class Xoh_hdoc_mkr {
|
||||
private Gfo_poolable_mgr
|
||||
pool__escape__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_escape_hzip())
|
||||
, pool__space__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_space_hzip())
|
||||
, pool__hdr__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_hdr_hzip())
|
||||
, pool__lnke__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_lnke_hzip())
|
||||
, pool__lnki__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_lnki_hzip())
|
||||
, pool__img__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_img_hzip())
|
||||
, pool__thm__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_thm_hzip())
|
||||
, pool__gly__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_gly_hzip())
|
||||
;
|
||||
public Xoh_hzip_wkr Hzip__wkr(byte tid) {
|
||||
switch (tid) {
|
||||
case Xoh_hzip_dict_.Tid__escape: return Escape__hzip();
|
||||
case Xoh_hzip_dict_.Tid__space: return Space__hzip();
|
||||
case Xoh_hzip_dict_.Tid__hdr: return Hdr__hzip();
|
||||
case Xoh_hzip_dict_.Tid__lnke: return Lnke__hzip();
|
||||
case Xoh_hzip_dict_.Tid__lnki: return Lnki__hzip();
|
||||
case Xoh_hzip_dict_.Tid__img: return Img__hzip();
|
||||
case Xoh_hzip_dict_.Tid__thm: return Thm__hzip();
|
||||
case Xoh_hzip_dict_.Tid__gly: return Gly__hzip();
|
||||
default: throw Err_.new_unhandled(tid);
|
||||
}
|
||||
}
|
||||
public Xoh_escape_hzip Escape__hzip() {return (Xoh_escape_hzip) pool__escape__hzip.Get_fast();}
|
||||
public Xoh_space_hzip Space__hzip() {return (Xoh_space_hzip) pool__space__hzip.Get_fast();}
|
||||
public Xoh_hdr_hzip Hdr__hzip() {return (Xoh_hdr_hzip) pool__hdr__hzip.Get_fast();}
|
||||
public Xoh_lnke_hzip Lnke__hzip() {return (Xoh_lnke_hzip) pool__lnke__hzip.Get_fast();}
|
||||
public Xoh_lnki_hzip Lnki__hzip() {return (Xoh_lnki_hzip) pool__lnki__hzip.Get_fast();}
|
||||
public Xoh_img_hzip Img__hzip() {return (Xoh_img_hzip) pool__img__hzip.Get_fast();}
|
||||
public Xoh_thm_hzip Thm__hzip() {return (Xoh_thm_hzip) pool__thm__hzip.Get_fast();}
|
||||
public Xoh_gly_hzip Gly__hzip() {return (Xoh_gly_hzip) pool__gly__hzip.Get_fast();}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.spaces; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import gplx.core.brys.*; import gplx.core.threads.poolables.*; import gplx.xowa.wikis.ttls.*;
|
||||
import gplx.xowa.htmls.core.hzips.*;
|
||||
public class Xoh_space_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
public String Key() {return Xoh_hzip_dict_.Key__space;}
|
||||
public Xoh_space_hzip Encode(Bry_bfr bfr, Hzip_stat_itm stat_itm, byte[] src, int src_end, int rng_bgn, int rng_end) {
|
||||
int space_len = Bry_find_.Find_fwd_while(src, rng_end, src_end, Byte_ascii.Space) - rng_bgn;
|
||||
stat_itm.Space_add(space_len);
|
||||
bfr.Add(Xoh_hzip_dict_.Bry__space);
|
||||
Xoh_hzip_int_.Encode(1, bfr, space_len);
|
||||
return this;
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx ctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
|
||||
int space_len = rdr.Read_int_by_base85(1);
|
||||
bfr.Add_byte_repeat(Byte_ascii.Space, space_len);
|
||||
return rdr.Pos();
|
||||
}
|
||||
public int Pool__idx() {return pool_idx;} private int pool_idx;
|
||||
public void Pool__clear (Object[] args) {}
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_space_hzip rv = new Xoh_space_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.spaces; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import org.junit.*; import gplx.xowa.htmls.core.hzips.*;
|
||||
public class Xoh_space_hzip_tst {
|
||||
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt();
|
||||
@Test public void Len__8() {
|
||||
fxt.Test__bicode("~!)", " ");
|
||||
}
|
||||
@Test public void Len__85() {
|
||||
fxt.Test__bicode("~!{\"!", String_.Repeat(" ", 85));
|
||||
}
|
||||
@Test public void Many() {
|
||||
fxt.Test__bicode(gplx.langs.htmls.Html_utl.Replace_apos(String_.Concat_lines_nl_skip_last
|
||||
( " <div id='bodyContent' class='mw-body-content'>"
|
||||
, "~!%<div id='siteSub'>a</div>"
|
||||
, "~!%<div id='contentSub'></div>"
|
||||
, "</div>"
|
||||
)), String_.Concat_lines_nl_skip_last
|
||||
( " <div id='bodyContent' class='mw-body-content'>"
|
||||
, " <div id='siteSub'>a</div>"
|
||||
, " <div id='contentSub'></div>"
|
||||
, "</div>"
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.htmls.core.wkrs.spaces; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
import gplx.xowa.htmls.core.hzips.*;
|
||||
public class Xoh_space_parser implements Html_doc_wkr {
|
||||
private final Xoh_hdoc_wkr wkr;
|
||||
public Xoh_space_parser(Xoh_hdoc_wkr wkr) {this.wkr = wkr;}
|
||||
public byte[] Hook() {return Hook_bry;}
|
||||
public int Parse(byte[] src, int src_bgn, int src_end, int pos) {
|
||||
int rng_end = Bry_find_.Find_fwd_while(src, pos + Hook_len, src_end, Byte_ascii.Space);
|
||||
wkr.On_space(pos, rng_end);
|
||||
return rng_end;
|
||||
}
|
||||
private static final byte[] Hook_bry = Bry_.new_a7(" ");
|
||||
private static final int Hook_len = Hook_bry.length;
|
||||
}
|
||||
@@ -23,15 +23,16 @@ import gplx.xowa.wikis.ttls.*;
|
||||
public class Xoh_tag_parser implements Html_doc_wkr {
|
||||
private final Xoh_hdoc_wkr hdoc_wkr;
|
||||
private final Html_tag_rdr tag_rdr = new Html_tag_rdr();
|
||||
private final Xoh_hdr_parser wkr__hdr = new Xoh_hdr_parser();
|
||||
private final Xoh_lnki_parser wkr__lnki = new Xoh_lnki_parser(); private final Xoh_lnke_parser wkr__lnke = new Xoh_lnke_parser();
|
||||
private final Xoh_img_parser wkr__img = new Xoh_img_parser(); private final Xoh_thm_parser wkr__thm = new Xoh_thm_parser();
|
||||
private final Xoh_gly_grp_parser wkr__gly = new Xoh_gly_grp_parser();
|
||||
private Xoh_hdoc_ctx hctx;
|
||||
private final Xoh_hdr_parser wkr__hdr = new Xoh_hdr_parser();
|
||||
private final Xoh_lnki_parser wkr__lnki = new Xoh_lnki_parser(); private final Xoh_lnke_parser wkr__lnke = new Xoh_lnke_parser();
|
||||
private final Xoh_img_parser wkr__img = new Xoh_img_parser(); private final Xoh_thm_parser wkr__thm = new Xoh_thm_parser();
|
||||
private final Xoh_gly_grp_parser wkr__gly = new Xoh_gly_grp_parser();
|
||||
public byte[] Hook() {return Byte_ascii.Angle_bgn_bry;}
|
||||
public Xoh_tag_parser(Xoh_hdoc_wkr hdoc_wkr) {this.hdoc_wkr = hdoc_wkr;}
|
||||
public void Init(Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
|
||||
this.hctx = hctx; tag_rdr.Init(hctx.Page__url(), src, src_bgn, src_end);
|
||||
this.hctx = hctx;
|
||||
tag_rdr.Init(hctx.Page__url(), src, src_bgn, src_end);
|
||||
}
|
||||
public int Parse(byte[] src, int src_bgn, int src_end, int pos) {
|
||||
tag_rdr.Pos_(pos);
|
||||
@@ -44,31 +45,34 @@ public class Xoh_tag_parser implements Html_doc_wkr {
|
||||
int cur_name_id = cur.Name_id();
|
||||
switch (cur_name_id) {
|
||||
case Html_tag_.Id__h2: case Html_tag_.Id__h3: case Html_tag_.Id__h4: case Html_tag_.Id__h5: case Html_tag_.Id__h6:
|
||||
int hdr_tag_bgn = cur.Src_bgn();
|
||||
nxt = tag_rdr.Tag__peek_fwd_head();
|
||||
if ( nxt.Name_id() == Html_tag_.Id__span
|
||||
&& nxt.Atrs__match_pair(Html_atr_.Bry__class , Xoh_hdr_parser.Bry__class__mw_headline)) {
|
||||
return wkr__hdr.Parse(hdoc_wkr, src, tag_rdr, cur_name_id, hdr_tag_bgn, nxt);
|
||||
if (wkr__hdr.Parse(hdoc_wkr, hctx, tag_rdr, src, cur, nxt)) return wkr__hdr.Src_end();
|
||||
}
|
||||
break;
|
||||
case Html_tag_.Id__a:
|
||||
nxt = tag_rdr.Tag__peek_fwd_head();
|
||||
if (nxt.Name_id() == Html_tag_.Id__img) {
|
||||
int rv = wkr__img.Parse(hdoc_wkr, hctx, src, tag_rdr, cur);
|
||||
if (rv != Xoh_hdoc_ctx.Invalid) {
|
||||
if (wkr__img.Parse(hdoc_wkr, hctx, src, tag_rdr, cur)) {
|
||||
hdoc_wkr.On_img(wkr__img);
|
||||
return rv;
|
||||
return wkr__img.Src_end();
|
||||
}
|
||||
}
|
||||
else if (cur.Atrs__match_pair(Html_atr_.Bry__rel , Xoh_lnke_dict_.Html__rel__nofollow))
|
||||
return wkr__lnke.Parse(hdoc_wkr, tag_rdr, cur);
|
||||
else
|
||||
return wkr__lnki.Parse(hdoc_wkr, hctx, src, tag_rdr, cur, hctx.Wiki__ttl_parser());
|
||||
else if (cur.Atrs__match_pair(Html_atr_.Bry__rel , Xoh_lnke_dict_.Html__rel__nofollow)) {
|
||||
if (wkr__lnke.Parse(hdoc_wkr, hctx, tag_rdr, src, cur)) return wkr__lnke.Src_end();
|
||||
}
|
||||
else if (cur.Atrs__get_by_or_empty(Xoh_img_parser.Bry__atr__xowa_title).Val_dat_exists()) {}
|
||||
else {
|
||||
if (wkr__lnki.Parse(hdoc_wkr, hctx, tag_rdr, src, cur)) return wkr__lnki.Src_end();
|
||||
}
|
||||
break;
|
||||
case Html_tag_.Id__div:
|
||||
if (cur.Atrs__cls_has(Xoh_thm_parser.Atr__class__thumb)) {
|
||||
int rv = wkr__thm.Parse(hdoc_wkr, hctx, src, tag_rdr, cur);
|
||||
if (rv != Xoh_hdoc_ctx.Invalid) return rv;
|
||||
if (wkr__thm.Parse(hdoc_wkr, hctx, src, tag_rdr, cur)) return wkr__thm.Src_end();
|
||||
}
|
||||
else if (cur.Atrs__match_pair(Html_atr_.Bry__id, Xoh_thm_parser.Atr__id__xowa_media_div)) {
|
||||
tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__div);
|
||||
}
|
||||
break;
|
||||
case Html_tag_.Id__ul:
|
||||
|
||||
@@ -25,7 +25,8 @@ public class Xoh_thm_bldr {
|
||||
public void Make(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int div_0_align, int div_1_width, boolean div_2_alt_exists, byte[] img_alt, Xoh_img_bldr img_bldr, Bfr_arg div_2_href, Bfr_arg div_2_capt) {
|
||||
wtr.Clear();
|
||||
wtr.Div_0_align_(div_0_align);
|
||||
wtr.Div_1_id_(img_bldr.Fsdb_itm().Html_uid());
|
||||
if (!hctx.Mode_is_diff())
|
||||
wtr.Div_1_id_(img_bldr.Fsdb_itm().Html_uid());
|
||||
wtr.Div_1_width_(div_1_width);
|
||||
wtr.Div_1_img_(img_bldr.Wtr());
|
||||
wtr.Div_2_href_(div_2_href);
|
||||
|
||||
@@ -23,8 +23,8 @@ public class Xoh_thm_html_tst {
|
||||
// fxt.Expd_itms_xfers(fxt.Make_xfer("A.png", 0, 0, 0, Bool_.Y, Xof_ext_.Id_png));
|
||||
fxt.Test__html("[[File:A.png|thumb|test_caption]]", String_.Concat_lines_nl_skip_last
|
||||
( "<div class='thumb tright'>"
|
||||
, " <div id='xowa_file_div_0' class='thumbinner' style='width:220px;'>"
|
||||
, " <a href='/wiki/File:A.png' class='image' xowa_title='A.png'><img data-xoimg='8|-1|-1|-1|-1|-1' src='' width='0' height='0' alt=''/></a>"
|
||||
, " <div class='thumbinner' style='width:220px;'>"
|
||||
, " <a href='/wiki/File:A.png' class='image' xowa_title='A.png'><img data-xoimg='4|-1|-1|-1|-1|-1' src='' width='0' height='0' alt=''/></a>"
|
||||
, " <div class='thumbcaption'>"
|
||||
, " <div class='magnify'>"
|
||||
, " <a href='/wiki/File:A.png' class='internal' title='Enlarge'>"
|
||||
|
||||
@@ -23,29 +23,30 @@ public class Xoh_thm_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
private final Xoh_thm_bldr bldr = new Xoh_thm_bldr();
|
||||
private final Bry_obj_ref div_2_capt = Bry_obj_ref.New_empty();
|
||||
public String Key() {return Xoh_hzip_dict_.Key__thm;}
|
||||
public Xoh_thm_hzip Encode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Hzip_stat_itm stat_itm, byte[] src, Xoh_thm_parser arg) {
|
||||
if (!arg.Rng_valid()) {
|
||||
bfr.Add_mid(src, arg.Rng_bgn(), arg.Rng_end());
|
||||
public byte[] Hook() {return hook;} private byte[] hook;
|
||||
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
|
||||
Xoh_thm_parser data = (Xoh_thm_parser)data_obj;
|
||||
if (!data.Rng_valid()) {
|
||||
bfr.Add_mid(src, data.Src_bgn(), data.Src_end());
|
||||
return this;
|
||||
}
|
||||
Xoh_thm_caption_parser div_2_capt_parser = arg.Capt_parser();
|
||||
int div_1_width = arg.Div_1_width(); boolean div_1_width_exists = div_1_width != 220;
|
||||
boolean div_2_alt_exists = arg.Capt_parser().Alt_div_exists();
|
||||
flag_bldr.Set(Flag__div_2_alt_exists , div_2_alt_exists);
|
||||
flag_bldr.Set(Flag__div_1_width_exists , div_1_width_exists);
|
||||
flag_bldr.Set(Flag__div_0_align , arg.Div_0_align());
|
||||
bfr.Add(Xoh_hzip_dict_.Bry__thm);
|
||||
Xoh_thm_caption_parser div_2_capt_parser = data.Capt_parser();
|
||||
int div_1_width = data.Div_1_width(); ;
|
||||
boolean div_2_alt_exists = flag_bldr.Set_as_bool(Flag__div_2_alt_exists , data.Capt_parser().Alt_div_exists());
|
||||
boolean div_1_width_exists = flag_bldr.Set_as_bool(Flag__div_1_width_exists , div_1_width != 220);
|
||||
flag_bldr.Set_as_byte(Flag__div_0_align , data.Div_0_align());
|
||||
|
||||
bfr.Add(hook);
|
||||
Xoh_hzip_int_.Encode(1, bfr, flag_bldr.Encode());
|
||||
if (div_1_width_exists) Xoh_hzip_int_.Encode(2, bfr, div_1_width);
|
||||
bfr.Add_mid(src, div_2_capt_parser.Capt_bgn(), div_2_capt_parser.Capt_end()).Add_byte(Xoh_hzip_dict_.Escape);
|
||||
if (div_2_capt_parser.Capt_exists()) bfr.Add_mid(src, div_2_capt_parser.Capt_bgn(), div_2_capt_parser.Capt_end());
|
||||
bfr.Add_byte(Xoh_hzip_dict_.Escape);
|
||||
if (div_2_alt_exists) bfr.Add_mid(src, div_2_capt_parser.Alt_div_bgn(), div_2_capt_parser.Alt_div_end()).Add_byte(Xoh_hzip_dict_.Escape);
|
||||
img_hzip.Encode(bfr, stat_itm, src, arg.Img_parser(), Bool_.N);
|
||||
img_hzip.Encode(bfr, hdoc_wkr, hctx, hpg, Bool_.N, src, data.Img_parser());
|
||||
return this;
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx hctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
|
||||
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
|
||||
int flag = rdr.Read_int_by_base85(1);
|
||||
int capt_bgn = rdr.Pos();
|
||||
int capt_end = rdr.Find_fwd_lr();
|
||||
int rv = rdr.Pos();
|
||||
|
||||
flag_bldr.Decode(flag);
|
||||
@@ -54,16 +55,16 @@ public class Xoh_thm_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
|
||||
int div_0_align = flag_bldr.Get_as_int(Flag__div_0_align);
|
||||
int div_1_width = 220;
|
||||
if (div_1_width_exists) div_1_width = rdr.Read_int_by_base85(2);
|
||||
int capt_bgn = rdr.Pos();
|
||||
int capt_end = rdr.Find_fwd_lr();
|
||||
div_2_capt.Mid_(src, capt_bgn, capt_end);
|
||||
byte[] div_2_alt_bry = div_2_alt_exists ? rdr.Read_bry_to() : Bry_.Empty;
|
||||
img_hzip.Decode(bfr, Bool_.N, hctx, hpg, rdr, src, rv);
|
||||
img_hzip.Decode(bfr, hdoc_wkr, hctx, hpg, Bool_.N, rdr, src, rdr.Pos(), src_end);
|
||||
bldr.Make(bfr, hpg, hctx, src, div_0_align, div_1_width, div_2_alt_exists, div_2_alt_bry, img_hzip.Bldr(), img_hzip.Anch_href_arg(), div_2_capt);
|
||||
return rv;
|
||||
}
|
||||
public int Pool__idx() {return pool_idx;} private int pool_idx;
|
||||
public void Pool__clear (Object[] args) {}
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_thm_hzip rv = new Xoh_thm_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
|
||||
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
|
||||
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_thm_hzip rv = new Xoh_thm_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; rv.hook = (byte[])args[0]; return rv;}
|
||||
private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_(1, 1, 3);
|
||||
private static final int // SERIALIZED
|
||||
Flag__div_2_alt_exists = 0
|
||||
|
||||
@@ -19,56 +19,64 @@ package gplx.xowa.htmls.core.wkrs.thms; import gplx.*; import gplx.xowa.*; impor
|
||||
import org.junit.*; import gplx.xowa.htmls.core.hzips.*;
|
||||
public class Xoh_thm_hzip_tst {
|
||||
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt();
|
||||
private String Html__image = String_.Concat_lines_nl_skip_last
|
||||
( "<div class='thumb tleft'>"
|
||||
, " <div id='xothm_0' class='thumbinner' style='width:220px;'>"
|
||||
, " <a href='/wiki/File:A.png' class='image' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' class='thumbimage' alt='abc'></a>"
|
||||
, " <div class='thumbcaption'>"
|
||||
, " <div class='magnify'>"
|
||||
, " <a href='/wiki/File:A.png' class='internal' title='Enlarge'><img src='file:///mem/xowa/bin/any/xowa/file/mediawiki.file/magnify-clip.png' width='15' height='11' alt=''></a>"
|
||||
, " </div>abc"
|
||||
, " </div>"
|
||||
, " <hr>"
|
||||
, " <div class='thumbcaption'>"
|
||||
, " abc"
|
||||
, " </div>"
|
||||
, " </div>"
|
||||
, "</div>"
|
||||
)
|
||||
, Html__video = String_.Replace(String_.Concat_lines_nl_skip_last
|
||||
( "<div class='thumb tright'>"
|
||||
, " <div id='xowa_file_div_3' class='thumbinner' style='width:220px;'>"
|
||||
, " <div id='xowa_media_div'>"
|
||||
, " <div>"
|
||||
, " <a href='/wiki/File:a.ogv' class='image' title='a.ogv'><img id='xowa_file_img_3' src='file:///' width='-1' height='-1' alt=''></a>"
|
||||
, " </div>"
|
||||
, " <div>"
|
||||
, " <a id='xowa_file_play_3' href='file:///' xowa_title='a.ogv' class='xowa_anchor_button' style='width:218px;max-width:220px;'><img src='file:///C:/xowa/bin/any/xowa/file/mediawiki.file/play.png' width='22' height='22' alt='Play sound'></a>"
|
||||
, " </div>"
|
||||
, " </div>"
|
||||
, " <div class='thumbcaption'>"
|
||||
, " <div class='magnify'>"
|
||||
, " <a href='/wiki/File:a.ogv' class='@gplx.Internal protected' title='Enlarge'><img src='file:///C:/xowa/bin/any/xowa/file/mediawiki.file/magnify-clip.png' width='15' height='11' alt=''></a>"
|
||||
, " </div>Moscow (Russian Empire) in 1908"
|
||||
, " </div>"
|
||||
, " </div>"
|
||||
, "</div>"
|
||||
), "'", "\"")
|
||||
;
|
||||
@Before public void setup() {fxt.Clear();}
|
||||
@Test public void Image() {
|
||||
fxt.Test__bicode("~&3abc~abc~!uA.png~0|220|110|0.5|-1|-1~abc~", Html__image);
|
||||
fxt.Test__bicode("~&3abc~abc~!uA.png~)#Sabc~", String_.Concat_lines_nl_skip_last
|
||||
( "<div class='thumb tleft'>"
|
||||
, "<div id='xothm_0' class='thumbinner' style='width:220px;'><a href='/wiki/File:A.png' class='image' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' class='thumbimage' alt='abc'></a> "
|
||||
, "<div class='thumbcaption'>"
|
||||
, "<div class='magnify'><a href='/wiki/File:A.png' class='internal' title='Enlarge'><img src='file:///mem/xowa/bin/any/xowa/file/mediawiki.file/magnify-clip.png' width='15' height='11' alt=''></a></div>"
|
||||
, "abc</div>"
|
||||
, "<hr>"
|
||||
, "<div class='thumbcaption'>abc</div>"
|
||||
, "</div>"
|
||||
, "</div>"
|
||||
));
|
||||
}
|
||||
@Test public void No_capt() {
|
||||
fxt.Test__bicode("~&#~!%A.png~)#S~", String_.Concat_lines_nl_skip_last
|
||||
( "<div class='thumb tleft'>"
|
||||
, "<div id='xothm_0' class='thumbinner' style='width:220px;'><a href='/wiki/File:A.png' class='image' title='' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' class='thumbimage' alt=''></a> "
|
||||
, "<div class='thumbcaption'>"
|
||||
, "<div class='magnify'><a href='/wiki/File:A.png' class='internal' title='Enlarge'><img src='file:///mem/xowa/bin/any/xowa/file/mediawiki.file/magnify-clip.png' width='15' height='11' alt=''></a></div>"
|
||||
, "</div>"
|
||||
, "</div>"
|
||||
, "</div>"
|
||||
));
|
||||
}
|
||||
@Test public void Video() {
|
||||
fxt.Test__bicode(Html__video, Html__video);
|
||||
String html = String_.Replace(String_.Concat_lines_nl_skip_last
|
||||
( "<div class='thumb tright'>"
|
||||
, "<div id='xowa_file_div_3' class='thumbinner' style='width:220px;'>"
|
||||
, "<div id='xowa_media_div'>"
|
||||
, "<div>"
|
||||
, "<a href='/wiki/File:a.ogv' class='image' title='a.ogv'><img id='xowa_file_img_3' src='file:///' width='-1' height='-1' alt=''></a>"
|
||||
, "</div>"
|
||||
, "<div>"
|
||||
, "<a id='xowa_file_play_3' href='file:///' xowa_title='a.ogv' class='xowa_anchor_button' style='width:218px;max-width:220px;'><img src='file:///C:/xowa/bin/any/xowa/file/mediawiki.file/play.png' width='22' height='22' alt='Play sound'></a>"
|
||||
, "</div>"
|
||||
, "</div>"
|
||||
, "<div class='thumbcaption'>"
|
||||
, "<div class='magnify'>"
|
||||
, "<a href='/wiki/File:a.ogv' class='@gplx.Internal protected' title='Enlarge'><img src='file:///C:/xowa/bin/any/xowa/file/mediawiki.file/magnify-clip.png' width='15' height='11' alt=''></a>"
|
||||
, "</div>Moscow (Russian Empire) in 1908"
|
||||
, "</div>"
|
||||
, "</div>"
|
||||
, "</div>"
|
||||
), "'", "\"")
|
||||
;
|
||||
fxt.Test__bicode(html, html);
|
||||
}
|
||||
@Test public void Dump() {
|
||||
Xowe_wiki en_d = fxt.Init_wiki_alias("wikt", "en.wiktionary.org");
|
||||
gplx.xowa.wikis.nss.Xow_ns_mgr ns_mgr = en_d.Ns_mgr();
|
||||
ns_mgr.Ns_main().Case_match_(gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all);
|
||||
|
||||
fxt.Wiki().Ns_mgr().Aliases_add(gplx.xowa.wikis.nss.Xow_ns_.Tid__portal, "WP");
|
||||
fxt.Wiki().Ns_mgr().Init();
|
||||
|
||||
fxt.Init_mode_is_b256_(Bool_.N);
|
||||
fxt.Exec_write_to_fsys(Io_url_.new_dir_("J:\\xowa\\dev_rls\\html\\"), "temp_earth_xo.html");
|
||||
fxt.Init_mode_is_b256_(Bool_.N);
|
||||
}
|
||||
// @Test public void Dump() {
|
||||
// Xowe_wiki en_d = fxt.Prep_create_wiki("wikt", "en.wiktionary.org");
|
||||
// gplx.xowa.wikis.nss.Xow_ns_mgr ns_mgr = en_d.Ns_mgr();
|
||||
// ns_mgr.Ns_main().Case_match_(gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all);
|
||||
//
|
||||
// fxt.Wiki().Ns_mgr().Aliases_add(gplx.xowa.wikis.nss.Xow_ns_.Tid__portal, "WP");
|
||||
// fxt.Wiki().Ns_mgr().Init();
|
||||
//
|
||||
// fxt.Exec_write_to_fsys(Io_url_.new_dir_("D:\\xowa\\dev_rls\\html\\"), "temp_earth_xo.html");
|
||||
// }
|
||||
}
|
||||
|
||||
@@ -20,30 +20,34 @@ import gplx.core.brys.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*; import gplx.langs.htmls.parsers.styles.*;
|
||||
import gplx.xowa.htmls.core.wkrs.thms.divs.*; import gplx.xowa.htmls.core.wkrs.imgs.*;
|
||||
public class Xoh_thm_parser implements Html_atr_style_wkr {
|
||||
public int Rng_bgn() {return rng_bgn;} private int rng_bgn;
|
||||
public int Rng_end() {return rng_end;} private int rng_end;
|
||||
public int Src_bgn() {return src_bgn;} private int src_bgn;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public boolean Rng_valid() {return rng_valid;} private boolean rng_valid;
|
||||
public byte Div_0_align() {return div_0_align;} private byte div_0_align;
|
||||
public int Div_1_width() {return div_1_width;} private int div_1_width;
|
||||
public Xoh_img_parser Img_parser() {return img_parser;} private final Xoh_img_parser img_parser = new Xoh_img_parser();
|
||||
public Xoh_thm_caption_parser Capt_parser() {return capt_parser;} private final Xoh_thm_caption_parser capt_parser = new Xoh_thm_caption_parser();
|
||||
public int Parse(Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, byte[] src, Html_tag_rdr tag_rdr, Html_tag div_0) {
|
||||
tag_rdr.Rdr().Init_by_hook("thm", div_0.Src_bgn(), div_0.Src_bgn());
|
||||
this.rng_bgn = div_0.Src_bgn();
|
||||
this.div_0_align = div_0.Atrs__cls_find_or_fail(gplx.xowa.parsers.lnkis.Xop_lnki_align_h_.Hash);
|
||||
Html_tag div_1 = tag_rdr.Tag__move_fwd_head(); // <div class='thumbinner'>
|
||||
public boolean Parse(Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, byte[] src, Html_tag_rdr tag_rdr, Html_tag div_0_head) {
|
||||
tag_rdr.Err_wkr().Init_by_sect("thm", div_0_head.Src_bgn());
|
||||
this.src_bgn = div_0_head.Src_bgn();
|
||||
this.div_0_align = div_0_head.Atrs__cls_find_or_fail(gplx.xowa.parsers.lnkis.Xop_lnki_align_h_.Hash);
|
||||
Html_tag div_0_tail = tag_rdr.Tag__peek_fwd_tail(Html_tag_.Id__div); // </div>
|
||||
Html_tag div_1_head = tag_rdr.Tag__find_fwd_head(div_0_head.Src_end(), div_0_tail.Src_bgn(), Html_tag_.Id__div); // <div class='thumbinner'>
|
||||
if (div_1_head.Name_id() != Html_tag_.Id__div) return false;
|
||||
// tag_rdr.Pos_(div_1_head.Src_end());
|
||||
tag_rdr.Tag__move_fwd_head().Chk_id(Html_tag_.Id__div);
|
||||
this.div_1_width = -1;
|
||||
Html_atr_style_parser_.Parse(div_1, this); // " style='120px'"
|
||||
Html_atr_style_parser_.Parse(div_1_head, this); // " style='120px'"
|
||||
rng_valid = false;
|
||||
if (img_parser.Parse(hdoc_wkr, hctx, src, tag_rdr, tag_rdr.Tag__move_fwd_head()) != Xoh_hdoc_ctx.Invalid) { // <a>
|
||||
capt_parser.Parse(hdoc_wkr, tag_rdr, src, tag_rdr.Tag__move_fwd_head()); // <div>
|
||||
if (img_parser.Parse(hdoc_wkr, hctx, src, tag_rdr, tag_rdr.Tag__move_fwd_head())) { // <a>
|
||||
if (!capt_parser.Parse(hdoc_wkr, tag_rdr, src, tag_rdr.Tag__move_fwd_head())) return false; // <div>
|
||||
rng_valid = true;
|
||||
}
|
||||
tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__div); // </div> for div_1
|
||||
Html_tag div_0_tail = tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__div); // </div> for div_0
|
||||
this.rng_end = div_0_tail.Src_end();
|
||||
tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__div);
|
||||
tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__div);
|
||||
this.src_end = tag_rdr.Pos();
|
||||
hdoc_wkr.On_thm(this);
|
||||
return rng_end;
|
||||
return true;
|
||||
}
|
||||
public boolean On_atr(byte[] src, int atr_idx, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end) {
|
||||
if (Bry_.Match(src, key_bgn, key_end, Html_atr_style_.Bry__width))
|
||||
@@ -51,7 +55,8 @@ public class Xoh_thm_parser implements Html_atr_style_wkr {
|
||||
return true;
|
||||
}
|
||||
public static final byte[]
|
||||
Atr__class__thumb = Bry_.new_a7("thumb")
|
||||
, Atr__class__thumbinner = Bry_.new_a7("thumbinner")
|
||||
Atr__class__thumb = Bry_.new_a7("thumb")
|
||||
, Atr__class__thumbinner = Bry_.new_a7("thumbinner")
|
||||
, Atr__id__xowa_media_div = Bry_.new_a7("xowa_media_div")
|
||||
;
|
||||
}
|
||||
|
||||
@@ -45,10 +45,13 @@ public class Xoh_thm_wtr extends gplx.core.brys.Bfr_arg_base {
|
||||
alt_fmtr.Bld_bfr_many(tmp_bfr, img_alt_bry);
|
||||
div_2_alt.Set(tmp_bfr.To_bry_and_clear());
|
||||
}
|
||||
else
|
||||
div_2_alt.Set(Bry_.Empty);
|
||||
return this;
|
||||
}
|
||||
public Xoh_thm_wtr Clear() {
|
||||
Bfr_arg_.Clear(div_0_align, div_1_id, div_1_width, div_1_img, div_2_href, div_2_magnify, div_2_capt, div_2_alt);
|
||||
Bfr_arg_.Clear(div_0_align, div_1_id, div_1_width, div_2_href, div_2_magnify, div_2_alt);
|
||||
div_1_img = div_2_capt = Bfr_arg_.Noop;
|
||||
return this;
|
||||
}
|
||||
@Override public void Bfr_arg__add(Bry_bfr bfr) {
|
||||
@@ -57,21 +60,16 @@ public class Xoh_thm_wtr extends gplx.core.brys.Bfr_arg_base {
|
||||
public static final byte[] Prefix__div_id = Bry_.new_a7("xothm_");
|
||||
private static final Bry_fmtr fmtr = Bry_fmtr.new_(String_.Concat_lines_nl_skip_last
|
||||
( "<div class=\"thumb t~{div_0_align}\">"
|
||||
, " <div~{div_1_id} class=\"thumbinner\" style=\"width:~{div_1_width}px;\">"
|
||||
, " ~{div_1_img}"
|
||||
, " <div class=\"thumbcaption\">"
|
||||
, " <div class=\"magnify\">"
|
||||
, " <a~{div_2_href} class=\"internal\" title=\"Enlarge\"><img src=\"~{div_2_magnify}\" width=\"15\" height=\"11\" alt=\"\"></a>"
|
||||
, " </div>~{div_2_capt}"
|
||||
, " </div>~{div_2_alt}"
|
||||
, " </div>"
|
||||
, "<div~{div_1_id} class=\"thumbinner\" style=\"width:~{div_1_width}px;\">~{div_1_img} " // NOTE: trailing space is intentional; matches jtidy behavior
|
||||
, "<div class=\"thumbcaption\">"
|
||||
, "<div class=\"magnify\"><a~{div_2_href} class=\"internal\" title=\"Enlarge\"><img src=\"~{div_2_magnify}\" width=\"15\" height=\"11\" alt=\"\"></a></div>"
|
||||
, "~{div_2_capt}</div>~{div_2_alt}"
|
||||
, "</div>"
|
||||
, "</div>"
|
||||
), "div_0_align", "div_1_id", "div_1_width", "div_1_img", "div_2_href", "div_2_magnify", "div_2_capt", "div_2_alt");
|
||||
private static final Bry_fmtr alt_fmtr = Bry_fmtr.new_(String_.Concat_lines_nl_skip_last
|
||||
( ""
|
||||
, " <hr>"
|
||||
, " <div class=\"thumbcaption\">"
|
||||
, " ~{alt}"
|
||||
, " </div>"
|
||||
, "<hr>"
|
||||
, "<div class=\"thumbcaption\">~{alt}</div>"
|
||||
), "alt");
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user