1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

v2.12.1.1

This commit is contained in:
gnosygnu
2015-12-06 23:12:52 -05:00
parent 097e6c7f80
commit 9509363f46
337 changed files with 3473 additions and 1917 deletions

View File

@@ -55,6 +55,7 @@ public class Xoh_page implements Xoa_page {
}
public Xoh_page Ctor_by_page(Bry_bfr tmp_bfr, Xoae_page page) {
this.page_id = page.Revision_data().Id();
this.wiki = page.Wiki();
this.body = page.Hdump_data().Body();
this.page_url = page.Url();
Xopg_html_data html_data = page.Html_data();

View File

@@ -153,11 +153,11 @@ public class Xoh_page_wtr_wkr {
wiki.Html_mgr().Html_wtr().Write_all(tidy_bfr, page.Wikie().Parser_mgr().Ctx(), hctx, page.Root().Data_mid(), page.Root());
// if [[Category]], render rest of html (Subcategories; Pages; Files); note that a category may have other html which requires wikitext processing
if (ns_id == Xow_ns_.Tid__category) wiki.Html_mgr().Ns_ctg().Bld_html(wiki, page, tidy_bfr);
if (ns_id == Xow_ns_.Tid__category) wiki.Html_mgr().Ns_ctg().Bld_html(wiki, page, hctx, tidy_bfr);
// tidy html
gplx.xowa.htmls.core.htmls.tidy.Xoh_tidy_mgr tidy_mgr = app.Html_mgr().Tidy_mgr();
if (tidy_mgr.Enabled()) tidy_mgr.Run_tidy_html(page, tidy_bfr);
if (tidy_mgr.Enabled()) tidy_mgr.Run_tidy_html(page, tidy_bfr, !hctx.Mode_is_hdump());
// add back to main bfr
bfr.Add_bfr_and_clear(tidy_bfr);

View File

@@ -20,20 +20,20 @@ import gplx.core.brys.fmtrs.*;
import gplx.xowa.langs.*; import gplx.xowa.langs.msgs.*;
public class Xohp_ctg_grp_mgr {
final Bry_fmtr grp_fmtr = Bry_fmtr.new_(String_.Concat_lines_nl_skip_last
( "<div id=\"catlinks\" class=\"catlinks\">"
, " <div id=\"mw-normal-catlinks\" class=\"mw-normal-catlinks\">"
, " ~{grp_lbl}"
, " <ul>~{grp_itms}"
, " </ul>"
, " </div>"
, "</div>"
( "<div id=\"catlinks\" class=\"catlinks\">"
, "<div id=\"mw-normal-catlinks\" class=\"mw-normal-catlinks\">"
, "~{grp_lbl}"
, "<ul>~{grp_itms}"
, "</ul>"
, "</div>"
, "</div>"
), "grp_lbl", "grp_itms")
;
final Bry_fmtr itm_fmtr = Bry_fmtr.new_(String_.Concat_lines_nl_skip_last
( ""
, " <li>"
, " <a href=\"~{itm_href}\" class=\"internal\" title=\"~{itm_title}\">~{itm_text}</a>"
, " </li>"
( ""
, "<li>"
, "<a href=\"~{itm_href}\" class=\"internal\" title=\"~{itm_title}\">~{itm_text}</a>"
, "</li>"
), "itm_href", "itm_title", "itm_text"
);
Xoh_ctg_itm_fmtr itm_mgr = new Xoh_ctg_itm_fmtr();

View File

@@ -23,17 +23,17 @@ public class Xohp_ctg_grp_mgr_tst {
@Test public void Basic() {
fxt.Init_ctgs("A", "B").Test_html(String_.Concat_lines_nl
( "<div id=\"catlinks\" class=\"catlinks\">"
, " <div id=\"mw-normal-catlinks\" class=\"mw-normal-catlinks\">"
, " Categories"
, " <ul>"
, " <li>"
, " <a href=\"/wiki/Category:A\" class=\"internal\" title=\"A\">A</a>"
, " </li>"
, " <li>"
, " <a href=\"/wiki/Category:B\" class=\"internal\" title=\"B\">B</a>"
, " </li>"
, " </ul>"
, " </div>"
, "<div id=\"mw-normal-catlinks\" class=\"mw-normal-catlinks\">"
, "Categories"
, "<ul>"
, "<li>"
, "<a href=\"/wiki/Category:A\" class=\"internal\" title=\"A\">A</a>"
, "</li>"
, "<li>"
, "<a href=\"/wiki/Category:B\" class=\"internal\" title=\"B\">B</a>"
, "</li>"
, "</ul>"
, "</div>"
, "</div>"
));
}

View File

@@ -26,10 +26,10 @@ public class Xow_html_mgr implements GfoInvkAble {
Xoae_app app = wiki.Appe();
page_wtr_mgr = new Xoh_page_wtr_mgr(app.Gui_mgr().Kit().Tid() != gplx.gfui.Gfui_kit_.Swing_tid); // reverse logic to handle swt,drd but not mem
Io_url file_dir = app.Fsys_mgr().Bin_xowa_file_dir().GenSubDir_nest("mediawiki.file");
img_media_play_btn = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys.Encode_to_file_protocol(file_dir.GenSubFil("play.png"));
img_media_info_btn = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys.Encode_to_file_protocol(file_dir.GenSubFil("info.png"));
img_thumb_magnify = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys.Encode_to_file_protocol(file_dir.GenSubFil("magnify-clip.png"));
img_xowa_protocol = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys.Encode_to_file_protocol(app.Fsys_mgr().Bin_xowa_file_dir().GenSubFil_nest("app.general", "xowa_exec.png"));
img_media_play_btn = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys_lnx.Encode_to_file_protocol(file_dir.GenSubFil("play.png"));
img_media_info_btn = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys_lnx.Encode_to_file_protocol(file_dir.GenSubFil("info.png"));
img_thumb_magnify = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys_lnx.Encode_to_file_protocol(file_dir.GenSubFil("magnify-clip.png"));
img_xowa_protocol = gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys_lnx.Encode_to_file_protocol(app.Fsys_mgr().Bin_xowa_file_dir().GenSubFil_nest("app.general", "xowa_exec.png"));
portal_mgr = new Xow_portal_mgr(wiki);
imgs_mgr = new Xoh_imgs_mgr(this);
module_mgr = new Xow_module_mgr(wiki);

View File

@@ -23,24 +23,27 @@ public class Xow_hdump_mgr {
private final Xoh_page tmp_hpg = new Xoh_page(); private final Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
private final Io_stream_zip_mgr zip_mgr = new Io_stream_zip_mgr();
public Xow_hdump_mgr(Xow_wiki wiki) {
this.save_mgr = new Xow_hdump_mgr__save(wiki, hzip_mgr, zip_mgr, tmp_hpg, tmp_bfr);
this.save_mgr = new Xow_hdump_mgr__save(wiki, hzip_mgr, zip_mgr, tmp_hpg);
this.load_mgr = new Xow_hdump_mgr__load(wiki, hzip_mgr, zip_mgr, tmp_hpg, tmp_bfr);
}
public Xow_hdump_mgr__save Save_mgr() {return save_mgr;} private Xow_hdump_mgr__save save_mgr;
public Xow_hdump_mgr__load Load_mgr() {return load_mgr;} private Xow_hdump_mgr__load load_mgr;
public Xoh_hzip_mgr Hzip_mgr() {return hzip_mgr;} private final Xoh_hzip_mgr hzip_mgr = new Xoh_hzip_mgr();
public void Init_by_db(Xow_wiki wiki) {
byte default_zip_tid = gplx.core.ios.Io_stream_.Tid_raw;
boolean default_hzip_enable = false;
byte dflt_zip_tid = gplx.core.ios.Io_stream_.Tid_raw;
boolean dflt_hzip_enable = false;
boolean mode_is_b256 = false;
if (wiki.Data__core_mgr() != null) { // TEST: handle null data mgr
Xowd_core_db_props props = wiki.Data__core_mgr().Props();
default_zip_tid = props.Zip_tid_html();
default_hzip_enable = props.Hzip_enabled();
dflt_zip_tid = props.Zip_tid_html();
dflt_hzip_enable = props.Hzip_enabled();
// dflt_hzip_enable = props.Hzip_enabled();
// mode_is_b256 = true;
}
Init_by_db(default_zip_tid, default_hzip_enable);
Init_by_db(dflt_zip_tid, dflt_hzip_enable, mode_is_b256);
}
public void Init_by_db(byte default_zip_tid, boolean default_hzip_enable) {
int dflt_hzip_tid = default_hzip_enable ? Xoh_hzip_dict_.Hzip__v1 : Xoh_hzip_dict_.Hzip__none;
save_mgr.Init_by_db(default_zip_tid, dflt_hzip_tid);
public void Init_by_db(byte dflt_zip_tid, boolean dflt_hzip_enable, boolean mode_is_b256) {
int dflt_hzip_tid = dflt_hzip_enable ? Xoh_hzip_dict_.Hzip__v1 : Xoh_hzip_dict_.Hzip__none;
save_mgr.Init_by_db(dflt_zip_tid, dflt_hzip_tid, Bool_.N);
}
}

View File

@@ -25,7 +25,7 @@ public class Xow_hdump_mgr__load {
private final Xoh_page tmp_hpg; private final Bry_bfr tmp_bfr; private final Xowd_page_itm tmp_dbpg = new Xowd_page_itm();
public Xow_hdump_mgr__load(Xow_wiki wiki, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, Xoh_page tmp_hpg, Bry_bfr tmp_bfr) {
this.wiki = wiki; this.hzip_mgr = hzip_mgr; this.zip_mgr = zip_mgr; this.tmp_hpg = tmp_hpg; this.tmp_bfr = tmp_bfr;
this.make_mgr = new Xoh_make_mgr(wiki.App().Usr_dlg(), wiki.App().Fsys_mgr(), gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys, wiki.Domain_bry());
this.make_mgr = new Xoh_make_mgr(wiki.App().Usr_dlg(), wiki.App().Fsys_mgr(), gplx.langs.htmls.encoders.Gfo_url_encoder_.Fsys_lnx, wiki.Domain_bry());
}
public Xoh_make_mgr Make_mgr() {return make_mgr;} private final Xoh_make_mgr make_mgr;
public void Load(Xoae_page wpg) {
@@ -45,13 +45,12 @@ public class Xow_hdump_mgr__load {
return true;
}
}
public byte[] Decode_as_bry(Bry_bfr bfr, Xoh_page hpg, byte[] src, boolean mode_is_diff) {hzip_mgr.Hctx().Mode_is_diff_(mode_is_diff); hzip_mgr.Decode(bfr, wiki, hpg, src); return bfr.To_bry_and_clear();}
private byte[] Parse(Xoh_page hpg, int zip_tid, int hzip_tid, byte[] src) {
if (zip_tid > gplx.core.ios.Io_stream_.Tid_raw)
src = zip_mgr.Unzip((byte)zip_tid, src);
if (hzip_tid == Xoh_hzip_dict_.Hzip__v1) {
hzip_mgr.Decode(tmp_bfr.Clear(), wiki, hpg, src);
src = tmp_bfr.To_bry_and_clear();
}
if (hzip_tid == Xoh_hzip_dict_.Hzip__v1)
src = Decode_as_bry(tmp_bfr.Clear(), hpg, src, Bool_.N);
return src;
}
private void Fill_page(Xoae_page wpg, Xoh_page hpg) {

View File

@@ -117,7 +117,7 @@ class Xodb_hdump_mgr__base_fxt {
wiki = fxt.Wiki();
page = wiki.Parser_mgr().Ctx().Cur_page();
hdump_mgr = wiki.Html__hdump_mgr();
hdump_mgr.Init_by_db(gplx.core.ios.Io_stream_.Tid_raw, false);
hdump_mgr.Init_by_db(gplx.core.ios.Io_stream_.Tid_raw, false, false);
}
fxt.Reset();
page.Revision_data().Id_(0);

View File

@@ -16,16 +16,19 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*;
import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.heads.*;
import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.heads.*;
import gplx.core.ios.*; import gplx.core.primitives.*; import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.pages.*;
public class Xow_hdump_mgr__save {
private final Xow_wiki wiki; private final Xoh_hzip_mgr hzip_mgr; private final Io_stream_zip_mgr zip_mgr;
private final Xoh_page tmp_hpg; private final Bry_bfr tmp_bfr; private Bool_obj_ref html_db_is_new = Bool_obj_ref.n_();
private final Xoh_page tmp_hpg; private final Xoh_hzip_bfr tmp_bfr = Xoh_hzip_bfr.New_txt(32); private Bool_obj_ref html_db_is_new = Bool_obj_ref.n_();
private int dflt_zip_tid, dflt_hzip_tid;
public Xow_hdump_mgr__save(Xow_wiki wiki, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, Xoh_page tmp_hpg, Bry_bfr tmp_bfr) {
this.wiki = wiki; this.hzip_mgr = hzip_mgr; this.zip_mgr = zip_mgr; this.tmp_hpg = tmp_hpg; this.tmp_bfr = tmp_bfr;
public Xow_hdump_mgr__save(Xow_wiki wiki, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, Xoh_page tmp_hpg) {
this.wiki = wiki; this.hzip_mgr = hzip_mgr; this.zip_mgr = zip_mgr; this.tmp_hpg = tmp_hpg;
}
public void Init_by_db(int dflt_zip_tid, int dflt_hzip_tid) {this.dflt_zip_tid = dflt_zip_tid; this.dflt_hzip_tid = dflt_hzip_tid;}
public void Init_by_db(int dflt_zip_tid, int dflt_hzip_tid, boolean mode_is_b256) {
this.dflt_zip_tid = dflt_zip_tid; this.dflt_hzip_tid = dflt_hzip_tid; tmp_bfr.Mode_is_b256_(mode_is_b256);
}
public byte[] Src_as_hzip() {return src_as_hzip;} private byte[] src_as_hzip;
public int Save(Xoae_page page) {
synchronized (tmp_hpg) {
Bld_hdump(page);
@@ -45,11 +48,9 @@ public class Xow_hdump_mgr__save {
wiki.Html__wtr_mgr().Wkr(Xopg_page_.Tid_read).Write_body(tmp_bfr, Xoh_wtr_ctx.Hdump, page); // save as hdump_fmt
page.Hdump_data().Body_(tmp_bfr.To_bry_and_clear());
}
private static byte[] Write(Bry_bfr bfr, Xow_wiki wiki, Xoh_page hpg, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, int zip_tid, int hzip_tid, byte[] src) {
if (hzip_tid == Xoh_hzip_dict_.Hzip__v1) {
hzip_mgr.Encode(bfr.Clear(), wiki, hpg, src);
src = bfr.To_bry_and_clear();
}
private byte[] Write(Xoh_hzip_bfr bfr, Xow_wiki wiki, Xoh_page hpg, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, int zip_tid, int hzip_tid, byte[] src) {
if (hzip_tid != Xoh_hzip_dict_.Hzip__none) src = hzip_mgr.Encode_as_bry((Xoh_hzip_bfr)bfr.Clear(), wiki, hpg, src);
src_as_hzip = src;
if (zip_tid > gplx.core.ios.Io_stream_.Tid_raw)
src = zip_mgr.Zip((byte)zip_tid, src);
return src;

View File

@@ -16,22 +16,26 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.core.brys.*;
import gplx.dbs.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.dbs.*; import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.pages.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.data.*;
import gplx.xowa.apps.apis.xowa.bldrs.imports.*;
public class Xob_hdump_bldr implements GfoInvkAble {
private boolean enabled, hzip_enabled, hzip_compare;
private boolean enabled, hzip_enabled, hzip_diff, hzip_b256; private byte zip_tid = Byte_.Max_value_127;
private Xowe_wiki wiki; private Xow_hdump_mgr hdump_mgr;
private Xob_ns_to_db_mgr ns_to_db_mgr; int prv_row_len = 0;
private Hzip_stat_tbl stats_tbl; private Hzip_stat_itm tmp_stat_itm;
private final Xoh_page tmp_hpg = new Xoh_page(); private final Bry_bfr tmp_bfr = Bry_bfr.reset_(Io_mgr.Len_mb);
private Xoh_stat_tbl stats_tbl; private Xoh_stat_itm tmp_stat_itm;
private final Xoh_page tmp_hpg = new Xoh_page(); private final Bry_bfr tmp_bfr = Bry_bfr.new_();
private boolean op_sys_is_wnt;
public boolean Init(Xowe_wiki wiki, Db_conn make_conn) {
if (!enabled) return false;
this.wiki = wiki; this.hdump_mgr = wiki.Html__hdump_mgr(); this.tmp_stat_itm = hdump_mgr.Hzip_mgr().Hctx().Bicode__stat();
this.stats_tbl = new Hzip_stat_tbl(make_conn);
this.op_sys_is_wnt = gplx.core.envs.Op_sys.Cur().Tid_is_wnt();
this.wiki = wiki; this.hdump_mgr = wiki.Html__hdump_mgr(); this.tmp_stat_itm = hdump_mgr.Hzip_mgr().Hctx().Hzip__stat();
this.stats_tbl = new Xoh_stat_tbl(make_conn);
Xoapi_import import_cfg = wiki.Appe().Api_root().Bldr().Wiki().Import();
hdump_mgr.Init_by_db(import_cfg.Zip_tid_html(), hzip_enabled);
if (zip_tid == Byte_.Max_value_127) zip_tid = import_cfg.Zip_tid_html();
hdump_mgr.Init_by_db(zip_tid, hzip_enabled, hzip_b256);
Xowd_db_mgr core_data_mgr = wiki.Db_mgr_as_sql().Core_data_mgr();
this.ns_to_db_mgr = new Xob_ns_to_db_mgr(new Xob_ns_to_db_wkr__html(core_data_mgr.Db__core()), core_data_mgr, import_cfg.Html_db_max());
Xob_ns_file_itm.Init_ns_bldr_data(Xowd_db_file_.Tid_html_data, wiki.Ns_mgr(), gplx.xowa.apps.apis.xowa.bldrs.imports.Xoapi_import.Ns_file_map__each);
@@ -40,13 +44,11 @@ public class Xob_hdump_bldr implements GfoInvkAble {
public void Insert(Xoae_page page) {
page.File_queue().Clear(); // need to reset uid to 0, else xowa_file_# will resume from last
wiki.Html_mgr().Page_wtr_mgr().Wkr(Xopg_page_.Tid_read).Write_body(tmp_bfr, Xoh_wtr_ctx.Hdump, page); // write to html in hdump mode
byte[] html_orig_bry = tmp_bfr.To_bry_and_clear();
page.Hdump_data().Body_(html_orig_bry); // write to body bry
byte[] orig_bry = tmp_bfr.To_bry_and_clear();
page.Hdump_data().Body_(orig_bry); // write to body bry
Xowd_db_file html_db = ns_to_db_mgr.Get_by_ns(page.Ttl().Ns().Bldr_data(), prv_row_len); // get html_db
this.prv_row_len = hdump_mgr.Save_mgr().Save(tmp_hpg.Ctor_by_page(tmp_bfr, page), html_db, true); // save to db
if (hzip_compare) {
// Compare(html_orig_bry, hdump_mgr.Save_mgr());
}
if (hzip_diff) Hzip_exec(orig_bry);
stats_tbl.Insert(tmp_hpg, tmp_stat_itm, page.Root().Root_src().length, tmp_hpg.Body().length, prv_row_len); // save stats
}
public void Bld_term() {this.Commit(); ns_to_db_mgr.Rls_all();}
@@ -54,12 +56,21 @@ public class Xob_hdump_bldr implements GfoInvkAble {
ns_to_db_mgr.Commit();
// wiki_db_mgr.Tbl__cfg().Update_long(Cfg_grp_hdump_make, Cfg_itm_hdump_size, hdump_db_size); // update cfg; should happen after commit entries
}
private void Hzip_exec(byte[] orig_bry) {
byte[] expd_bry = op_sys_is_wnt ? Bry_.Replace(tmp_bfr, orig_bry, Byte_ascii.Cr_lf_bry, Byte_ascii.Nl_bry) : orig_bry;
byte[] actl_bry = hdump_mgr.Load_mgr().Decode_as_bry(tmp_bfr, tmp_hpg, hdump_mgr.Save_mgr().Src_as_hzip(), Bool_.Y);
byte[][] diff = Bry_diff_.Diff_1st_line(expd_bry, actl_bry);
if (diff != null)
Gfo_usr_dlg_.Instance.Warn_many("", "", String_.Format("hzip diff: page={0} lhs='{1}' rhs='{2}'", tmp_hpg.Url_bry_safe(), diff[0], diff[1]));
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_enabled_)) enabled = m.ReadYn("v");
else if (ctx.Match(k, Invk_zip_tid_)) zip_tid = m.ReadByte("v");
else if (ctx.Match(k, Invk_hzip_enabled_)) hzip_enabled = m.ReadYn("v");
else if (ctx.Match(k, Invk_hzip_compare_)) hzip_compare = m.ReadYn("v");
else if (ctx.Match(k, Invk_hzip_diff_)) hzip_diff = m.ReadYn("v");
else if (ctx.Match(k, Invk_hzip_b256_)) hzip_b256 = m.ReadYn("v");
else return GfoInvkAble_.Rv_unhandled;
return this;
}
private static final String Invk_enabled_ = "enabled_", Invk_hzip_enabled_ = "hzip_enabled_", Invk_hzip_compare_ = "hzip_compare_";
private static final String Invk_enabled_ = "enabled_", Invk_zip_tid_ = "zip_tid_", Invk_hzip_enabled_ = "hzip_enabled_", Invk_hzip_diff_ = "hzip_diff_", Invk_hzip_b256_ = "hzip_b256_";
}

View File

@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.dbs.*;
class Xob_link_dump_tbl implements RlsAble {
class Xob_link_dump_tbl implements Rls_able {
public static final String Tbl_name = "link_dump"; private static final Db_meta_fld_list flds = Db_meta_fld_list.new_();
public static final String
Fld_uid = flds.Add_int_pkey_autonum("uid")

View File

@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.dbs.*; import gplx.core.brys.*;
public class Xoh_page_tbl implements RlsAble {
public class Xoh_page_tbl implements Rls_able {
private final String tbl_name = "html"; private final Db_meta_fld_list flds = Db_meta_fld_list.new_();
private final String fld_page_id, fld_head_flag, fld_body_flag, fld_display_ttl, fld_content_sub, fld_sidebar_div, fld_body;
private final Db_conn conn; private Db_stmt stmt_select, stmt_insert, stmt_delete, stmt_update;

View File

@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.dbs.*;
public class Xoh_redlink_tbl implements RlsAble {
public class Xoh_redlink_tbl implements Rls_able {
private final String tbl_name = "html_redlink"; private final Db_meta_fld_list flds = Db_meta_fld_list.new_();
private final String fld_page_id, fld_redlink_uids;
private final Db_conn conn; private Db_stmt stmt_select, stmt_insert, stmt_delete, stmt_update;

View File

@@ -17,12 +17,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
public class Xoh_wtr_ctx {
Xoh_wtr_ctx(byte mode) {this.mode = mode;} private byte mode;
Xoh_wtr_ctx(int mode) {this.mode = mode;}
public int Mode() {return mode;} private final int mode;
public boolean Mode_is_alt() {return mode == Mode_alt;}
public boolean Mode_is_display_title() {return mode == Mode_display_title;}
public boolean Mode_is_popup() {return mode == Mode_popup;}
public boolean Mode_is_hdump() {return mode == Mode_hdump;}
public static final byte Mode_basic = 0, Mode_alt = 1, Mode_display_title = 2, Mode_popup = 3, Mode_hdump = 4;
public static final int Mode_basic = 0, Mode_alt = 1, Mode_display_title = 2, Mode_popup = 3, Mode_hdump = 4;
public static final Xoh_wtr_ctx
Basic = new Xoh_wtr_ctx(Mode_basic)
, Alt = new Xoh_wtr_ctx(Mode_alt)

View File

@@ -37,9 +37,10 @@ public class Xoh_tidy_mgr implements GfoInvkAble {
: (Xoh_tidy_wkr)wkr_tidy
;
}
public void Run_tidy_html(Xoae_page page, Bry_bfr bfr) {
public void Run_tidy_html(Xoae_page page, Bry_bfr bfr, boolean indent) {
if (bfr.Len_eq_0()) return; // document is empty; do not exec b/c tidy will never generate files for 0 len files, and previous file will remain; DATE:2014-06-04
Tidy_wrap(bfr);
wkr.Indent_(indent);
wkr.Exec_tidy(page, bfr);
Tidy_unwrap(bfr);
}

View File

@@ -18,5 +18,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.htmls.core.htmls.tidy; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*;
public interface Xoh_tidy_wkr {
byte Tid();
void Indent_(boolean v);
void Exec_tidy(Xoae_page page, Bry_bfr bfr);
}

View File

@@ -38,5 +38,6 @@ public class Xoh_tidy_wkr_ {
}
class Xoh_tidy_wkr_null implements Xoh_tidy_wkr {
public byte Tid() {return Xoh_tidy_wkr_.Tid_null;}
public void Indent_(boolean v) {}
public void Exec_tidy(Xoae_page page, Bry_bfr bfr) {}
}

View File

@@ -29,6 +29,7 @@ class Xoh_tidy_wkr_jtidy implements Xoh_tidy_wkr {
public void tidy_init() {
long bgn = Env_.TickCount();
wtr = new ByteArrayOutputStream();
System.setProperty("line.separator", "\n");
tidy = new Tidy(); // obtain a new Tidy instance
tidy.setInputEncoding("UTF-8"); // -utf8
tidy.setOutputEncoding("UTF-8"); // -utf8
@@ -55,6 +56,10 @@ class Xoh_tidy_wkr_jtidy implements Xoh_tidy_wkr {
public void Init_by_app(Xoae_app app) {
this.app = app;
}
public void Indent_(boolean v) {
if (tidy == null) tidy_init(); // lazy create to skip tests
tidy.setIndentContent(v);
}
public void Exec_tidy(Xoae_page page, Bry_bfr bfr) {
if (tidy == null) tidy_init(); // lazy create to skip tests
// int bfr_len = bfr.Len();

View File

@@ -27,24 +27,26 @@ public class Xoh_tidy_wkr_tidy extends Process_adp implements Xoh_tidy_wkr { pr
tidy_target = v.GenSubFil("tidy_target.html");
return super.Tmp_dir_(v);
}
public void Indent_(boolean v) {Indent_val = v ? "y" : "n";}
public void Exec_tidy(Xoae_page page, Bry_bfr bfr) {
int bfr_len = bfr.Len();
long bgn = Env_.TickCount();
Io_mgr.Instance.SaveFilBfr(tidy_source, bfr); // saves bfr to source; clears bfr
this.Run(tidy_source.Raw(), tidy_target.Raw()); // converts source to target
this.Run(tidy_source.Raw(), tidy_target.Raw()); // converts source to target
Io_mgr.Instance.LoadFilBryByBfr(tidy_target, bfr); // loads bfr by target
if (bfr.Len_eq_0()) // something went wrong; load from source
if (bfr.Len_eq_0()) // something went wrong; load from source
Io_mgr.Instance.LoadFilBryByBfr(tidy_source, bfr); // loads bfr by target
app.Usr_dlg().Log_many("", "", "tidy exec; elapsed=~{0} len=~{1}", Env_.TickCount_elapsed_in_frac(bgn), bfr_len);
}
public static final String Args_fmt = String_.Concat // see https://meta.wikimedia.org/wiki/Data_dumps; missing numeric-entities:yes; enclose-text: yes
private static String Indent_val = "y";
public static String Args_fmt = String_.Concat // see https://meta.wikimedia.org/wiki/Data_dumps; missing numeric-entities:yes; enclose-text: yes
( "-utf8" // default is ascii
, " --force-output y" // always generate output; do not fail on error
, " --quiet y" // suppress command-line header
, " --tidy-mark n" // do not add tidy watermark
, " --doctype ''''" // set to empty else some wikis will show paragraph text with little vertical gap; PAGE:tr.b:
, " --wrap 0" // default is 80; do not limit lines to 80 chars
, " --indent y" // indent block levels
, " --indent ", Indent_val // indent block levels
, " --quote-nbsp y" // preserve nbsp as entities; do not convert to Unicode character 160
, " --literal-attributes y" // do not alter whitespace chars in attributes
, " --wrap-attributes n" // do not line-wrap attribute values (assume tidy will try to take a="b\nc" and change to a="b c" which may cause some fidelity issues?)

View File

@@ -35,7 +35,7 @@ public class Xoh_lnki_bldr {
public Xoh_lnki_bldr Id_(byte[] v) {this.id = Html_utl.Escape_for_atr_val_as_bry(tmp_bfr, Byte_ascii.Apos, v); return this;}
public Xoh_lnki_bldr Href_(Xow_wiki wiki, byte[] bry) {return Href_(wiki.Domain_bry(), wiki.Ttl_parse(bry));}
public Xoh_lnki_bldr Href_(byte[] domain_bry, Xoa_ttl ttl) {
href_wtr.Build_to_bfr(tmp_bfr, app, domain_bry, ttl, Bool_.Y);
href_wtr.Build_to_bfr(tmp_bfr, app, Xoh_wtr_ctx.Mode_popup, domain_bry, ttl);
this.href = tmp_bfr.To_bry_and_clear();
return this;
}

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.htmls.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*;
import gplx.langs.htmls.*; import gplx.xowa.htmls.hrefs.*;
import gplx.langs.htmls.*; import gplx.xowa.htmls.hrefs.*; import gplx.xowa.htmls.core.htmls.*;
public class Xoh_lnki_wtr_utl {
private final Xoa_app app; private final Xow_wiki wiki; private final Xoh_href_wtr href_wtr; private final Bry_bfr tmp_bfr = Bry_bfr.new_(255);
public Xoh_lnki_wtr_utl(Xow_wiki wiki, Xoh_href_wtr href_wtr) {
@@ -25,7 +25,7 @@ public class Xoh_lnki_wtr_utl {
}
public byte[] Bld_href(byte[] page) {return Bld_href(wiki.Domain_bry(), wiki.Ttl_parse(page));}
public byte[] Bld_href(byte[] domain_bry, Xoa_ttl ttl) {
href_wtr.Build_to_bfr(tmp_bfr, app, domain_bry, ttl, Bool_.Y);
href_wtr.Build_to_bfr(tmp_bfr, app, Xoh_wtr_ctx.Mode_popup, domain_bry, ttl);
return tmp_bfr.To_bry_and_clear();
}
public byte[] Bld_title(byte[] text) {

View File

@@ -0,0 +1,21 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
public class Xoh_hzip_dict {
}

View File

@@ -20,36 +20,15 @@ import gplx.core.primitives.*; import gplx.core.btries.*;
public class Xoh_hzip_dict_ {
public static final byte Escape = Byte_.By_int(27); // SERIALIZED: 27=escape byte
public static final byte[] Escape_bry = Bry_.new_ints(27); // SERIALIZED
private static final byte Base85_ascii = 33;
public static final byte // SERIALIZED
Tid__space = 0 + Base85_ascii
, Tid__hdr = 1 + Base85_ascii
, Tid__lnke = 2 + Base85_ascii
, Tid__lnki = 3 + Base85_ascii
, Tid__img = 4 + Base85_ascii
, Tid__thm = 5 + Base85_ascii
, Tid__gly = 6 + Base85_ascii
, Tid__escape = 84 + Base85_ascii
;
public static final byte[]
Bry__escape = Bry_.new_ints(Escape, Tid__escape)
, Bry__space = Bry_.new_ints(Escape, Tid__space)
, Bry__hdr = Bry_.new_ints(Escape, Tid__hdr)
, Bry__lnke = Bry_.new_ints(Escape, Tid__lnke)
, Bry__lnki = Bry_.new_ints(Escape, Tid__lnki)
, Bry__img = Bry_.new_ints(Escape, Tid__img)
, Bry__thm = Bry_.new_ints(Escape, Tid__thm)
, Bry__gly = Bry_.new_ints(Escape, Tid__gly)
;
public static final String
Key__escape = "escape"
, Key__space = "space"
, Key__hdr = "hdr"
, Key__lnke = "lnke"
, Key__lnki = "lnki"
, Key__img = "img"
, Key__thm = "thm"
, Key__gly = "gly"
, Key__xnde = "xnde"
;
public static final int Hzip__none = 0, Hzip__v1 = 1;
}

View File

@@ -0,0 +1,148 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.core.primitives.*; import gplx.core.encoders.*;
public class Xoh_hzip_int {
private boolean mode_is_b256; private byte pad_byte; private byte[] prefix_ary;
public Xoh_hzip_int Mode_is_b256_(boolean v) {
mode_is_b256 = v;
if (mode_is_b256) {
pad_byte = Byte_.Zero;
prefix_ary = prefix_ary__b256;
}
else {
pad_byte = Byte_ascii.Bang;
prefix_ary = prefix_ary__b085;
}
return this;
}
public void Encode(int reqd_len, Bry_bfr bfr, int val) {
int calc_len = Calc_len(mode_is_b256, val);
int full_len = Full_len(mode_is_b256, val, calc_len, reqd_len, B256__pow__ary);
int hdr_adj = full_len == calc_len || full_len == reqd_len ? 0 : 1;
int bfr_len = bfr.Len();
bfr.Add_byte_repeat(pad_byte, full_len); // fill with 0s; asserts that underlying array will be large enough for following write
byte[] bfr_bry = bfr.Bfr(); // NOTE: set bry reference here b/c Add_byte_repeat may create a new one
if (mode_is_b256)
Set_bry(val, bfr_bry, bfr_len + hdr_adj, reqd_len, calc_len, pad_byte, B256__pow__ary);
else
Base85_.Set_bry(val, bfr_bry, bfr_len + hdr_adj, reqd_len); // calc base85 val for val; EX: 7224 -> "uu"
if (hdr_adj == 1)
bfr_bry[bfr_len] = prefix_ary[full_len]; // write the hdr_byte; EX: 256 -> 253, 1, 0 where 253 is the hdr_byte
}
public int Decode(int reqd_len, byte[] src, int src_len, int src_bgn, Int_obj_ref pos_ref) {
int radix = 256; byte offset = Byte_.Zero;
boolean hdr_byte_exists = false;
int full_len = 1; // default to 1
byte b0 = src[src_bgn];
if (mode_is_b256) {
switch (b0) {
case prefix__b256__2: full_len = 2; hdr_byte_exists = true; break;
case prefix__b256__3: full_len = 3; hdr_byte_exists = true; break;
case prefix__b256__4: full_len = 4; hdr_byte_exists = true; break;
case prefix__b256__5: full_len = 5; hdr_byte_exists = true; break;
}
}
else {
radix = 85; offset = Byte_ascii.Bang;
switch (b0) {
case Byte_ascii.Curly_bgn : full_len = 3; hdr_byte_exists = true; break;
case Byte_ascii.Pipe : full_len = 4; hdr_byte_exists = true; break;
case Byte_ascii.Curly_end : full_len = 5; hdr_byte_exists = true; break;
case Byte_ascii.Tilde : full_len = 6; hdr_byte_exists = true; break;
}
}
if (full_len < reqd_len) full_len = reqd_len; // len should be padded
int src_end = src_bgn + full_len;
pos_ref.Val_(src_end);
if (hdr_byte_exists) ++src_bgn;
return To_int_by_bry(src, src_bgn, src_end, offset, radix);
}
private static int Calc_len(boolean mode_is_b256, int v) {
if (mode_is_b256) {
if (v < B256__max__expd__1) return 1;
else if (v < B256__max__expd__2) return 2;
else if (v < B256__max__expd__3) return 3;
else return 4;
}
else
return Base85_.Bry_len(v);
}
private static int Full_len(boolean mode_is_b256, int v, int calc_len, int reqd_len, int[] pow_ary) {
int reqd_max = v;
if (mode_is_b256) {
reqd_max = B256__pow__ary[reqd_len]; // EX: if reqd_len = 2, then reqd_max = 65536
int hdr_byte_adj = 1; // default to hdr_byte
if ( calc_len == reqd_len // only do this check if calc_len == reqd_len; i.e.: reqd_len = 2; only want to check values that would be represented with two digits where 1st digit might be 252-255; EX: 64512 is "252, 0" but 252 is reserverd; instead "253, 252, 0"
&& v < (reqd_max - (4 * B256__pow__ary[calc_len - 1])) // calculates if current value will produce a 252-255 range in the 1st byte; note that 4 is for 255-252
) {
hdr_byte_adj = 0;
}
return calc_len + hdr_byte_adj;
}
else {
reqd_max = Base85_.Pow85[reqd_len];
if (v < reqd_max) return reqd_len;
if (v < Base85_.Pow85[2]) return 3;
else if (v < Base85_.Pow85[3]) return 4;
else if (v < Base85_.Pow85[4]) return 5;
else return 6;
}
}
private static void Set_bry(int val, byte[] src, int src_bgn, int reqd_len, int calc_len, byte pad_byte, int[] pow_ary) {
int val_len = -1, pad_len = -1;
boolean pad = calc_len < reqd_len;
if (pad) {
val_len = reqd_len;
pad_len = reqd_len - calc_len;
}
else {
val_len = calc_len;
pad_len = 0;
}
if (pad) {
for (int i = 0; i < pad_len; i++) // fill src with pad_len
src[i + src_bgn] = pad_byte;
}
for (int i = val_len - pad_len; i > 0; --i) {
int div = pow_ary[i - 1];
byte tmp = (byte)(val / div);
src[src_bgn + val_len - i] = (byte)(tmp + pad_byte);
val -= tmp * div;
}
}
private static int To_int_by_bry(byte[] src, int bgn, int end, byte offset, int radix) {
int rv = 0, factor = 1;
for (int i = end - 1; i >= bgn; --i) {
rv += ((src[i] & 0xFF) - offset) * factor; // PATCH.JAVA:need to convert to unsigned byte
factor *= radix;
}
return rv;
}
private static final int
B256__max__expd__1 = 256 // 256
, B256__max__expd__2 = 65536 // 65,536
, B256__max__expd__3 = 16777216 // 16,777,216
;
private static final int[] B256__pow__ary = new int[] {1, B256__max__expd__1, B256__max__expd__2, B256__max__expd__3, Int_.Max_value};
private static final byte prefix__b256__2 = (byte)(252 & 0xFF), prefix__b256__3 = (byte)(253 & 0xFF), prefix__b256__4 = (byte)(254 & 0xFF), prefix__b256__5 = (byte)(255 & 0xFF);
private static final byte[]
prefix_ary__b256 = new byte[] {0, 0, prefix__b256__2, prefix__b256__3, prefix__b256__4, prefix__b256__5}
, prefix_ary__b085 = new byte[] {0, 0, 0, Byte_ascii.Curly_bgn, Byte_ascii.Pipe, Byte_ascii.Curly_end, Byte_ascii.Tilde}
;
}

View File

@@ -0,0 +1,86 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import org.junit.*;
public class Xoh_hzip_int_tst {
private final Xoh_hzip_int_fxt fxt = new Xoh_hzip_int_fxt();
@Test public void B256__reqd__1() {
fxt.Init__b256();
fxt.Test__b256(1, 0 , 0);
fxt.Test__b256(1, 252 , 252, 252);
fxt.Test__b256(1, 253 , 252, 253);
fxt.Test__b256(1, 254 , 252, 254);
fxt.Test__b256(1, 255 , 252, 255);
fxt.Test__b256(1, 256 , 253, 1, 0);
fxt.Test__b256(1, 65535 , 253, 255, 255);
fxt.Test__b256(1, 65536 , 254, 1, 0, 0);
fxt.Test__b256(1, 16777216 , 255, 1, 0, 0, 0);
}
@Test public void B256__reqd__2() {
fxt.Init__b256();
fxt.Test__b256(2, 0 , 0, 0);
fxt.Test__b256(2, 252 , 0, 252);
fxt.Test__b256(2, 253 , 0, 253);
fxt.Test__b256(2, 254 , 0, 254);
fxt.Test__b256(2, 255 , 0, 255);
fxt.Test__b256(2, 256 , 1, 0);
fxt.Test__b256(2, 64511 , 251, 255);
fxt.Test__b256(2, 64512 , 253, 252, 0);
fxt.Test__b256(2, 65535 , 253, 255, 255);
fxt.Test__b256(2, 65536 , 254, 1, 0, 0);
fxt.Test__b256(2, 16777216 , 255, 1, 0, 0, 0);
}
@Test public void B085__reqd__1() {
fxt.Init__b085();
fxt.Test__b085(1, 0, "!");
fxt.Test__b085(1, 84, "u");
fxt.Test__b085(1, 85, "{\"!");
fxt.Test__b085(1, 7225, "|\"!!");
fxt.Test__b085(1, 614125, "}\"!!!");
fxt.Test__b085(1, 52200625, "~\"!!!!");
}
@Test public void B085__reqd__2() {
fxt.Init__b085();
fxt.Test__b085(2, 0, "!!");
fxt.Test__b085(2, 84, "!u");
fxt.Test__b085(2, 85, "\"!");
fxt.Test__b085(2, 7225, "|\"!!");
fxt.Test__b085(2, 614125, "}\"!!!");
fxt.Test__b085(2, 52200625, "~\"!!!!");
}
}
class Xoh_hzip_int_fxt {
private final Bry_bfr bfr = Bry_bfr.new_();
private final gplx.core.primitives.Int_obj_ref count_ref = gplx.core.primitives.Int_obj_ref.neg1_();
private final Xoh_hzip_int hzint = new Xoh_hzip_int();
public void Init__b256() {hzint.Mode_is_b256_(Bool_.Y);}
public void Init__b085() {hzint.Mode_is_b256_(Bool_.N);}
public void Test__b256(int reqd, int val, int... expd_ints) {
hzint.Encode(reqd, bfr, val);
byte[] actl = bfr.To_bry_and_clear();
byte[] expd = Byte_.Ary_by_ints(expd_ints);
Tfds.Eq_ary(expd, actl, Int_.To_str(val));
Tfds.Eq(val, hzint.Decode(reqd, actl, actl.length, 0, count_ref));
}
public void Test__b085(int reqd, int val, String expd) {
hzint.Encode(reqd, bfr, val);
byte[] actl = bfr.To_bry_and_clear();
Tfds.Eq(expd, String_.new_u8(actl));
Tfds.Eq(val, hzint.Decode(reqd, actl, actl.length, 0, count_ref));
}
}

View File

@@ -16,43 +16,57 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.btries.*; import gplx.xowa.wikis.ttls.*;
import gplx.langs.htmls.parsers.*;
import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.wkrs.*;
public class Xoh_hzip_mgr {
private final Xoh_hdoc_parser hdoc_parser = new Xoh_hdoc_parser(new Xoh_hdoc_wkr__hzip());
import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.btries.*; import gplx.core.threads.poolables.*;
import gplx.langs.htmls.parsers.*; import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.wkrs.*;
import gplx.xowa.wikis.ttls.*;
public class Xoh_hzip_mgr implements Xoh_hzip_wkr {
private final Xoh_hdoc_wkr hdoc_wkr = new Xoh_hdoc_wkr__hzip();
private final Xoh_hdoc_parser hdoc_parser;
private final Bry_rdr rdr = new Bry_rdr().Dflt_dlm_(Xoh_hzip_dict_.Escape);
public Xoh_hzip_mgr() {this.hdoc_parser = new Xoh_hdoc_parser(hdoc_wkr);}
public String Key() {return "root";}
public byte[] Hook() {return hook;} private byte[] hook;
public Xoh_hdoc_ctx Hctx() {return hctx;} private final Xoh_hdoc_ctx hctx = new Xoh_hdoc_ctx();
public void Init_by_app(Xoa_app app) {hctx.Init_by_app(app);}
public void Encode(Bry_bfr bfr, Xow_wiki wiki, Xoh_page hpg, byte[] src) {
public byte[] Encode_as_bry(Xoh_hzip_bfr bfr, Xow_wiki wiki, Xoh_page hpg, byte[] src) {Encode(bfr, wiki, hpg, src); return bfr.To_bry_and_clear();}
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {throw Err_.new_unimplemented();}
public void Encode(Xoh_hzip_bfr bfr, Xow_wiki wiki, Xoh_page hpg, byte[] src) {
hctx.Init_by_page(wiki, hpg.Url_bry_safe());
hdoc_parser.Parse(bfr, hpg, hctx, src);
}
public void Decode(Bry_bfr bfr, Xow_wiki wiki, Xoh_page hpg, byte[] src) {
byte[] page_url = hpg.Url_bry_safe();
byte[] page_url = hpg.Url_bry_safe(); int src_len = src.length;
hctx.Init_by_page(wiki, page_url);
int pos = 0, txt_bgn = -1, src_len = src.length;
rdr.Init_by_page(page_url, src, src_len);
while (pos < src_len) {
if (src[pos] == Xoh_hzip_dict_.Escape) {
if (txt_bgn != -1) {bfr.Add_mid(src, txt_bgn, pos); txt_bgn = -1;} // handle pending txt
int nxt_pos = pos + 1; if (nxt_pos == src_len) break; // handle escape at end of document
Xoh_hzip_wkr wkr = hctx.Mkr().Hzip__wkr(src[nxt_pos]);
try {
rdr.Init_by_hook(wkr.Key(), pos, pos + 2);
wkr.Decode(bfr, Bool_.Y, hctx, hpg, rdr, src, pos);
pos = rdr.Pos();
} catch (Exception e) {
wkr.Pool__rls();
gplx.langs.htmls.Html_utl.Log(e, "hzip decode failed", hpg.Url_bry_safe(), src, pos);
pos += 2; // 2: skip escape and hook
}
}
else {
Decode(bfr, hdoc_wkr, hctx, hpg, Bool_.Y, rdr, src, 0, src_len);
}
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
int pos = src_bgn, txt_bgn = -1;
while (true) {
if (pos == src_end) break;
byte b = src[pos];
Object o = hctx.Wkr_mkr().Get(b, src, pos, src_end);
if (o == null) {
if (txt_bgn == -1) txt_bgn = pos;
++pos;
}
else {
if (txt_bgn != -1) {bfr.Add_mid(src, txt_bgn, pos); txt_bgn = -1;} // handle pending txt
Xoh_hzip_wkr wkr = (Xoh_hzip_wkr)o;
try {
rdr.Init_by_sect(wkr.Key(), pos, pos + 2);
wkr.Decode(bfr, hdoc_wkr, hctx, hpg, Bool_.Y, rdr, src, pos, src_end);
pos = rdr.Pos();
} catch (Exception e) {
gplx.langs.htmls.Html_utl.Log(e, "hzip decode failed", hpg.Url_bry_safe(), src, pos);
pos += 2; // 2: skip escape and hook
}
finally {wkr.Pool__rls();}
}
}
if (txt_bgn != -1) bfr.Add_mid(src, txt_bgn, src_len);
}
if (txt_bgn != -1) bfr.Add_mid(src, txt_bgn, src_end);
return src_end;
}
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_hzip_mgr rv = new Xoh_hzip_mgr(); rv.pool_mgr = mgr; rv.pool_idx = idx; rv.hook = (byte[])args[0]; return rv;}
}

View File

@@ -16,9 +16,11 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.core.brys.*;
import gplx.core.brys.*; import gplx.core.threads.poolables.*;
import gplx.xowa.htmls.core.wkrs.*;
public interface Xoh_hzip_wkr extends gplx.core.threads.poolables.Gfo_poolable_itm {
String Key();
int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx ctx, Xoh_page hpg, Bry_rdr parser, byte[] src, int hook_bgn);
byte[] Hook();
Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj);
int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end);
}

View File

@@ -0,0 +1,58 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.core.btries.*; import gplx.core.threads.poolables.*;
import gplx.xowa.htmls.core.wkrs.escapes.*;
import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.lnkes.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
import gplx.xowa.htmls.core.wkrs.imgs.*; import gplx.xowa.htmls.core.wkrs.thms.*; import gplx.xowa.htmls.core.wkrs.glys.*;
public class Xoh_hzip_wkr_mgr {
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
private Gfo_poolable_mgr mkr__escape, mkr__lnke, mkr__lnki, mkr__hdr, mkr__img, mkr__thm, mkr__gly;
public Xoh_escape_hzip Mw__escape() {return (Xoh_escape_hzip) mkr__escape.Get_fast();}
public Xoh_hdr_hzip Mw__hdr() {return (Xoh_hdr_hzip) mkr__hdr.Get_fast();}
public Xoh_lnke_hzip Mw__lnke() {return (Xoh_lnke_hzip) mkr__lnke.Get_fast();}
public Xoh_lnki_hzip Mw__lnki() {return (Xoh_lnki_hzip) mkr__lnki.Get_fast();}
public Xoh_img_hzip Mw__img() {return (Xoh_img_hzip) mkr__img.Get_fast();}
public Xoh_thm_hzip Mw__thm() {return (Xoh_thm_hzip) mkr__thm.Get_fast();}
public Xoh_gly_hzip Mw__gly() {return (Xoh_gly_hzip) mkr__gly.Get_fast();}
public void Init() {
this.Reg_all(false, Byte_ascii.Escape);
}
public Xoh_hzip_wkr Get(byte b, byte[] src, int src_bgn, int src_end) {
Object mgr_obj = trie.Match_bgn_w_byte(b, src, src_bgn, src_end); if (mgr_obj == null) return null;
Gfo_poolable_mgr mgr = (Gfo_poolable_mgr)mgr_obj;
return (Xoh_hzip_wkr)mgr.Get_fast();
}
private void Reg_all(boolean mode_is_b256, int escape__mw) {
mkr__escape = Reg(New_hook_len2(mode_is_b256, escape__mw, 84), new Xoh_escape_hzip());
mkr__hdr = Reg(New_hook_len2(mode_is_b256, escape__mw, 1), new Xoh_hdr_hzip());
mkr__lnke = Reg(New_hook_len2(mode_is_b256, escape__mw, 2), new Xoh_lnke_hzip());
mkr__lnki = Reg(New_hook_len2(mode_is_b256, escape__mw, 3), new Xoh_lnki_hzip());
mkr__img = Reg(New_hook_len2(mode_is_b256, escape__mw, 4), new Xoh_img_hzip());
mkr__thm = Reg(New_hook_len2(mode_is_b256, escape__mw, 5), new Xoh_thm_hzip());
mkr__gly = Reg(New_hook_len2(mode_is_b256, escape__mw, 6), new Xoh_gly_hzip());
}
private Gfo_poolable_mgr Reg(byte[] hook, Gfo_poolable_itm proto) {
Gfo_poolable_mgr rv = Gfo_poolable_mgr_.New(1, 32, proto, Object_.Ary(hook));
trie.Add_obj(hook, rv);
return rv;
}
private static byte[] New_hook_len2(boolean mode_is_b256, int b0, int b1) {
return Bry_.new_ints(b0, mode_is_b256 ? b1 : b1 + Byte_ascii.Bang);
}
}

View File

@@ -16,13 +16,14 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
public class Hzip_stat_itm {
import gplx.xowa.htmls.core.wkrs.lnkes.*;
public class Xoh_stat_itm {
public void Clear() {
a_rhs = lnki_text_n = lnki_text_y = lnke__free = lnke__auto = lnke__text = 0;
hdr_1 = hdr_2 = hdr_3 = hdr_4 = hdr_5 = hdr_6 = 0;
img_full = 0;
space = 0;
escape = 0;
Bry_.Clear(escape_bry);
}
public int A_rhs() {return a_rhs;} public void A_rhs_add() {++a_rhs;} private int a_rhs;
public int Lnki_text_n() {return lnki_text_n;} public void Lnki_text_n_add() {++lnki_text_n;} private int lnki_text_n;
@@ -38,7 +39,7 @@ public class Hzip_stat_itm {
public int Hdr_5() {return hdr_5;} private int hdr_5;
public int Hdr_6() {return hdr_6;} private int hdr_6;
public int Space() {return space;} public void Space_add(int v) {space += v;} private int space;
public int Escape() {return escape;} public void Escape_add_one() {++escape;} private int escape;
public byte[] Escape_bry() {return escape_bry;} public void Escape_add(byte v) {escape_bry[v] += 1;} private final byte[] escape_bry = new byte[256];
public void Hdr_add(int hdr_num) {
switch (hdr_num) {
case 1: ++hdr_1; break;
@@ -50,4 +51,13 @@ public class Hzip_stat_itm {
default: throw Err_.new_unhandled(hdr_num);
}
}
public void Lnki_add(int orig_len, int hzip_len, int flag) {
}
public void Lnke_add(byte lnke_type) {
switch (lnke_type) {
case Xoh_lnke_dict_.Type__free: ++lnke__free; break;
case Xoh_lnke_dict_.Type__auto: ++lnke__auto; break;
case Xoh_lnke_dict_.Type__text: ++lnke__text; break;
}
}
}

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.dbs.*; import gplx.dbs.engines.sqlite.*; import gplx.xowa.htmls.core.makes.imgs.*; import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.wikis.pages.*;
public class Hzip_stat_tbl implements RlsAble {
public class Xoh_stat_tbl implements Rls_able {
private static final String tbl_name = "hdump_stats"; private static final Db_meta_fld_list flds = Db_meta_fld_list.new_();
private static final String
fld_page_id = flds.Add_int_pkey("page_id"), fld_wtxt_len = flds.Add_int("wtxt_len"), fld_row_orig_len = flds.Add_int("row_orig_len"), fld_row_zip_len = flds.Add_int("row_zip_len")
@@ -30,7 +30,7 @@ public class Hzip_stat_tbl implements RlsAble {
, fld_img_full = flds.Add_int("img_full")
;
private final Db_conn conn; private Db_stmt stmt_insert;
public Hzip_stat_tbl(Db_conn conn) {
public Xoh_stat_tbl(Db_conn conn) {
this.conn = conn;
this.Create_tbl();
conn.Stmt_delete(tbl_name).Exec_delete(); // always zap table
@@ -40,7 +40,7 @@ public class Hzip_stat_tbl implements RlsAble {
public void Rls() {
stmt_insert = Db_stmt_.Rls(stmt_insert);
}
public void Insert(Xoh_page hpg, Hzip_stat_itm hzip, int wtxt_len, int row_orig_len, int row_zip_len) {
public void Insert(Xoh_page hpg, Xoh_stat_itm hzip, int wtxt_len, int row_orig_len, int row_zip_len) {
Xopg_module_mgr js_mgr = hpg.Head_mgr();
if (stmt_insert == null) stmt_insert = conn.Stmt_insert(tbl_name, flds);
stmt_insert.Clear()

View File

@@ -23,7 +23,7 @@ import gplx.xowa.wikis.domains.*; import gplx.xowa.wikis.ttls.*; import gplx.xow
import gplx.xowa.htmls.core.wkrs.*;
import gplx.xowa.htmls.core.hzips.*;
public class Xoh_make_mgr {
private final Bry_bfr bfr = Bry_bfr.reset_(255), tmp_bfr = Bry_bfr.reset_(255); private final Bry_rdr_old bry_rdr = new Bry_rdr_old(); private Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Instance;
private final Xoh_hzip_bfr bfr = Xoh_hzip_bfr.New_txt(255); private final Bry_bfr tmp_bfr = Bry_bfr.reset_(255); private final Bry_rdr_old bry_rdr = new Bry_rdr_old(); private Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Instance;
private Xoh_cfg_file cfg_file; private final Xof_url_bldr url_bldr = Xof_url_bldr.new_v2(); private Xoh_file_html_fmtr__base html_fmtr;
private final byte[] root_dir, file_dir; private byte[] file_dir_comm, file_dir_wiki, hiero_img_dir; private final byte[] wiki_domain;
private final Bry_rdr parser = new Bry_rdr();

View File

@@ -16,7 +16,8 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.xowa.htmls.core.wkrs.mkrs.*; import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.apps.urls.*;
import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.files.*; import gplx.xowa.apps.fsys.*;
import gplx.xowa.wikis.domains.*; import gplx.xowa.wikis.ttls.*; import gplx.xowa.apps.metas.*;
public class Xoh_hdoc_ctx {
@@ -27,29 +28,34 @@ public class Xoh_hdoc_ctx {
public Xoa_app App() {return app;} private Xoa_app app;
public byte[] Wiki__domain_bry() {return wiki__domain_bry;} private byte[] wiki__domain_bry;
public Xow_ttl_parser Wiki__ttl_parser() {return wiki__ttl_parser;} private Xow_ttl_parser wiki__ttl_parser;
public Xoa_url_parser Wiki__url_parser() {return wiki__url_parser;} private Xoa_url_parser wiki__url_parser;
public boolean Xwiki_mgr__missing(byte[] domain){return app.Xwiki_mgr__missing(domain);}
public Xoa_file_mgr File__mgr() {return file__mgr;} private final Xoa_file_mgr file__mgr = new Xoa_file_mgr();
public Xof_url_bldr File__url_bldr() {return file__url_bldr;} private Xof_url_bldr file__url_bldr = new Xof_url_bldr();
public Xoh_hdoc_mkr Mkr() {return mkr;} private Xoh_hdoc_mkr mkr = new Xoh_hdoc_mkr();
public byte[] Page__url() {return page__url;} private byte[] page__url;
public Hzip_stat_itm Bicode__stat() {return bicode__stat;} private final Hzip_stat_itm bicode__stat = new Hzip_stat_itm();
public Xof_url_bldr File__url_bldr() {return file__url_bldr;} private final Xof_url_bldr file__url_bldr = new Xof_url_bldr();
public Xoh_hzip_wkr_mgr Wkr_mkr() {return wkr_mgr;} private final Xoh_hzip_wkr_mgr wkr_mgr = new Xoh_hzip_wkr_mgr();
public byte[] Page__url() {return page__url;} private byte[] page__url;
public Xoh_stat_itm Hzip__stat() {return hzip__stat;} private final Xoh_stat_itm hzip__stat = new Xoh_stat_itm();
public int Lnki__uid__nxt() {return ++lnki__uid;} private int lnki__uid; // NOTE: should be 0, but for historical reasons, 1st lnki starts at 2; EX: id='xowa_lnki_2'
public boolean Mode_is_diff() {return mode_is_diff;} private boolean mode_is_diff; public void Mode_is_diff_(boolean v) {mode_is_diff = v;}
public void Init_by_app(Xoa_app app) {
Xoa_fsys_mgr fsys_mgr = app.Fsys_mgr();
this.app = app;
this.fsys__root = fsys_mgr.Root_dir().To_http_file_bry();
this.fsys__file = fsys_mgr.File_dir().To_http_file_bry();
this.fsys__file__comm = Bry_.Add(fsys__file, Xow_domain_itm_.Bry__commons, Byte_ascii.Slash_bry);
wkr_mgr.Init();
}
public void Init_by_page(Xow_wiki wiki, byte[] page_url) {
if (fsys__root == null) Init_by_app(wiki.App()); // LAZY INIT
this.wiki__url_parser = wiki.Utl__url_parser();
this.wiki__ttl_parser = wiki;
this.wiki__domain_bry = wiki.Domain_bry();
this.fsys__file__wiki = Bry_.Add(fsys__file, wiki__domain_bry, Byte_ascii.Slash_bry);
this.page__url = page_url;
this.Clear();
}
}
private void Clear() {
bicode__stat.Clear();
hzip__stat.Clear();
this.lnki__uid = 1; // NOTE: should be 0, but for historical reasons, 1st lnki starts at 2; EX: id='xowa_lnki_2'
}
public static final int Invalid = -1;

View File

@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.langs.htmls.parsers.*; import gplx.xowa.wikis.ttls.*;
import gplx.xowa.htmls.core.makes.*; import gplx.xowa.htmls.core.wkrs.tags.*; import gplx.xowa.htmls.core.wkrs.txts.*; import gplx.xowa.htmls.core.wkrs.escapes.*; import gplx.xowa.htmls.core.wkrs.spaces.*;
import gplx.xowa.htmls.core.makes.*; import gplx.xowa.htmls.core.wkrs.tags.*; import gplx.xowa.htmls.core.wkrs.txts.*; import gplx.xowa.htmls.core.wkrs.escapes.*;
public class Xoh_hdoc_parser {
private final Xoh_hdoc_wkr hdoc_wkr;
private final Html_doc_parser hdoc_parser;
@@ -27,11 +27,10 @@ public class Xoh_hdoc_parser {
this.tag_parser = new Xoh_tag_parser(hdoc_wkr);
this.hdoc_parser = new Html_doc_parser(new Xoh_txt_parser(hdoc_wkr)
, tag_parser
, new Xoh_escape_parser(hdoc_wkr)
, new Xoh_space_parser(hdoc_wkr)
, new Xoh_escape_data(hdoc_wkr)
);
}
public void Parse(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src) {
public void Parse(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src) {
int src_len = src.length;
tag_parser.Init(hctx, src, 0, src_len);
hdoc_wkr.On_new_page(bfr, hpg, hctx, src, 0, src_len);

View File

@@ -19,14 +19,13 @@ package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gpl
import gplx.langs.htmls.parsers.*;
import gplx.xowa.wikis.ttls.*;
public interface Xoh_hdoc_wkr {
void On_new_page(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end);
void On_new_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end);
void On_escape (int rng_bgn, int rng_end);
void On_txt (int rng_bgn, int rng_end);
void On_space (int rng_bgn, int rng_end);
void On_hdr (gplx.xowa.htmls.core.wkrs.hdrs.Xoh_hdr_parser arg);
void On_lnke (gplx.xowa.htmls.core.wkrs.lnkes.Xoh_lnke_parser arg);
void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_parser arg);
void On_img (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_parser arg);
void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_parser arg);
void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_parser arg);
void On_hdr (gplx.xowa.htmls.core.wkrs.hdrs.Xoh_hdr_parser parser);
void On_lnke (gplx.xowa.htmls.core.wkrs.lnkes.Xoh_lnke_parser parser);
void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_parser parser);
void On_img (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_parser parser);
void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_parser parser);
void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_parser parser);
}

View File

@@ -19,19 +19,19 @@ package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gpl
import gplx.langs.htmls.parsers.*; import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.wikis.ttls.*;
public class Xoh_hdoc_wkr__hzip implements Xoh_hdoc_wkr {
private final Hzip_stat_itm stat_itm = new Hzip_stat_itm();
private Bry_bfr bfr; private Xoh_hdoc_ctx hctx; private byte[] src; private int src_end;
public void On_new_page(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
this.bfr = bfr; this.hctx = hctx; this.src = src; this.src_end = src_end;
private final Xoh_stat_itm stat_itm = new Xoh_stat_itm();
private Xoh_hzip_bfr bfr; private Xoh_hdoc_ctx hctx; private byte[] src;
private Xoh_page hpg;
public void On_new_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
this.bfr = bfr; this.hpg = hpg; this.hctx = hctx; this.src = src;
stat_itm.Clear();
}
public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
public void On_escape (int rng_bgn, int rng_end) {hctx.Mkr().Escape__hzip().Encode(bfr, stat_itm).Pool__rls();}
public void On_space (int rng_bgn, int rng_end) {hctx.Mkr().Space__hzip().Encode(bfr, stat_itm, src, src_end, rng_bgn, rng_end).Pool__rls();}
public void On_hdr (gplx.xowa.htmls.core.wkrs.hdrs.Xoh_hdr_parser arg) {hctx.Mkr().Hdr__hzip().Encode(bfr, stat_itm, src, arg).Pool__rls();}
public void On_lnke (gplx.xowa.htmls.core.wkrs.lnkes.Xoh_lnke_parser arg) {hctx.Mkr().Lnke__hzip().Encode(bfr, stat_itm, src, arg).Pool__rls();}
public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_parser arg) {hctx.Mkr().Lnki__hzip().Encode(bfr, hctx, stat_itm, src, arg).Pool__rls();}
public void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_parser arg) {hctx.Mkr().Thm__hzip().Encode(bfr, this, stat_itm, src, arg).Pool__rls();}
public void On_img (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_parser arg) {hctx.Mkr().Img__hzip().Encode(bfr, stat_itm, src, arg, Bool_.Y).Pool__rls();}
public void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_parser arg) {hctx.Mkr().Gly__hzip().Encode(bfr, stat_itm, src, arg).Pool__rls();}
public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
public void On_escape (int rng_bgn, int rng_end) {hctx.Wkr_mkr().Mw__escape().Encode(bfr, this, hctx, hpg, Bool_.Y, src, null).Pool__rls();}
public void On_hdr (gplx.xowa.htmls.core.wkrs.hdrs.Xoh_hdr_parser parser) {hctx.Wkr_mkr().Mw__hdr().Encode(bfr, this, hctx, hpg, Bool_.Y, src, parser).Pool__rls();}
public void On_lnke (gplx.xowa.htmls.core.wkrs.lnkes.Xoh_lnke_parser parser) {hctx.Wkr_mkr().Mw__lnke().Encode(bfr, this, hctx, hpg, Bool_.Y, src, parser).Pool__rls();}
public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_parser parser) {hctx.Wkr_mkr().Mw__lnki().Encode(bfr, this, hctx, hpg, Bool_.Y, src, parser).Pool__rls();}
public void On_img (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_parser parser) {hctx.Wkr_mkr().Mw__img().Encode(bfr, this, hctx, hpg, Bool_.Y, src, parser).Pool__rls();}
public void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_parser parser) {hctx.Wkr_mkr().Mw__thm().Encode(bfr, this, hctx, hpg, Bool_.Y, src, parser).Pool__rls();}
public void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_parser parser) {hctx.Wkr_mkr().Mw__gly().Encode(bfr, this, hctx, hpg, Bool_.Y, src, parser).Pool__rls();}
}

View File

@@ -21,19 +21,18 @@ import gplx.xowa.wikis.ttls.*;
import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*;
public class Xoh_hdoc_wkr__make implements Xoh_hdoc_wkr {
private Bry_bfr bfr; private Xoh_page hpg; private Xoh_hdoc_ctx hctx; private byte[] src;
private Xoh_hzip_bfr bfr; private Xoh_page hpg; private Xoh_hdoc_ctx hctx; private byte[] src;
private final Xoh_hdr_make wkr__hdr = new Xoh_hdr_make();
private final Xoh_img_bldr wkr__img = new Xoh_img_bldr();
public void On_new_page(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
public void On_new_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
this.bfr = bfr; this.hpg = hpg; this.hctx = hctx; this.src = src;
}
public void On_escape (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
public void On_space (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
public void On_lnke (gplx.xowa.htmls.core.wkrs.lnkes.Xoh_lnke_parser arg) {bfr.Add_mid(src, arg.Rng_bgn(), arg.Rng_end());}
public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_parser arg) {bfr.Add_mid(src, arg.Rng_bgn(), arg.Rng_end());}
public void On_hdr (gplx.xowa.htmls.core.wkrs.hdrs.Xoh_hdr_parser arg) {wkr__hdr.Make(bfr, hpg, src, arg);}
public void On_img (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_parser arg) {wkr__img.Make_by_parse(bfr, hpg, hctx, src, arg);}
public void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_parser arg) {bfr.Add_mid(src, arg.Rng_bgn(), arg.Rng_end());}
public void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_parser arg) {}
public void On_escape (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
public void On_lnke (gplx.xowa.htmls.core.wkrs.lnkes.Xoh_lnke_parser parser) {bfr.Add_mid(src, parser.Src_bgn(), parser.Src_end());}
public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_parser parser) {bfr.Add_mid(src, parser.Src_bgn(), parser.Src_end());}
public void On_hdr (gplx.xowa.htmls.core.wkrs.hdrs.Xoh_hdr_parser parser) {wkr__hdr.Make(bfr, hpg, src, parser);}
public void On_img (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_parser parser) {wkr__img.Make_by_parse(bfr, hpg, hctx, src, parser);}
public void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_parser parser) {bfr.Add_mid(src, parser.Src_bgn(), parser.Src_end());}
public void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_parser parser) {}
}

View File

@@ -0,0 +1,38 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.xowa.htmls.core.hzips.*;
public class Xoh_hzip_bfr extends Bry_bfr { private final Xoh_hzip_int hzint = new Xoh_hzip_int();
private final byte stop_byte;
public Xoh_hzip_bfr(int bfr_max, boolean mode_is_b256, byte stop_byte) {
this.Init(bfr_max);
this.stop_byte = stop_byte;
Mode_is_b256_(mode_is_b256);
}
public Xoh_hzip_bfr Mode_is_b256_(boolean mode_is_b256) {
hzint.Mode_is_b256_(mode_is_b256);
return this;
}
public Xoh_hzip_bfr Add_hzip_bry(byte[] bry) {Add(bry); Add_byte(stop_byte); return this;}
public Xoh_hzip_bfr Add_hzip_mid(byte[] src, int bgn, int end) {Add_mid(src, bgn, end); Add_byte(stop_byte); return this;}
public Xoh_hzip_bfr Add_hzip_int(int reqd, int val) {
hzint.Encode(reqd, this, val);
return this;
}
public static Xoh_hzip_bfr New_txt(int bfr_max) {return new Xoh_hzip_bfr(bfr_max, Bool_.N, gplx.xowa.htmls.core.hzips.Xoh_hzip_dict_.Escape);}
}

View File

@@ -15,24 +15,29 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.hzips; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.langs.htmls.*;
import gplx.xowa.htmls.core.hzips.*;
public class Xoh_hzip_fxt {
private final Xowe_wiki wiki;
private final Xop_fxt parser_fxt = new Xop_fxt();
private final Bry_bfr bfr = Bry_bfr.new_();
private final Xoh_hzip_bfr bfr = Xoh_hzip_bfr.New_txt(32);
private final Xoh_hzip_mgr hzip_mgr;
private final Xoh_page hpg = new Xoh_page();
private boolean mode_is_b256;
public Xoh_hzip_fxt() {
Xowe_wiki wiki = parser_fxt.Wiki();
this.wiki = parser_fxt.Wiki();
Xoa_app_fxt.repo2_(parser_fxt.App(), wiki); // needed else will be old "mem/wiki/repo/trg/thumb/" instead of standard "mem/file/en.wikipedia.org/thumb/"
wiki.Html__hdump_mgr().Init_by_db(parser_fxt.Wiki());
this.hzip_mgr = parser_fxt.Wiki().Html__hdump_mgr().Hzip_mgr();
hpg.Init(wiki, Xoa_url.blank(), parser_fxt.Wiki().Ttl_parse(Xoa_page_.Main_page_bry), 1);
}
public Xow_wiki Wiki() {return parser_fxt.Wiki();}
public Xow_wiki Wiki() {return wiki;}
public Xoh_hzip_fxt Init_mode_is_b256_(boolean v) {bfr.Mode_is_b256_(v); mode_is_b256 = v; return this;}
public Xoh_hzip_fxt Init_mode_diff_y_() {hzip_mgr.Hctx().Mode_is_diff_(Bool_.Y); return this;}
public void Clear() {hpg.Clear();}
public Xowe_wiki Prep_create_wiki(String alias, String domain) {
public void Init_wiki_installed(String domain) {parser_fxt.Init_xwiki_add_user_(domain);}
public Xowe_wiki Init_wiki_alias(String alias, String domain) {
Xowe_wiki rv = Xoa_app_fxt.wiki_(parser_fxt.App(), domain);
parser_fxt.Wiki().Xwiki_mgr().Add_by_atrs(Bry_.new_u8(alias), Bry_.new_u8(domain), null);
return rv;
@@ -74,7 +79,7 @@ public class Xoh_hzip_fxt {
}
public void Exec_write_to_fsys(Io_url dir, String fil) {
try {
Bry_bfr bfr = Bry_bfr.new_();
Xoh_hzip_bfr bfr = Xoh_hzip_bfr.New_txt(32).Mode_is_b256_(mode_is_b256);
Gfo_usr_dlg_.Test__show__init();
hzip_mgr.Encode(bfr, parser_fxt.Wiki(), hpg, Io_mgr.Instance.LoadFilBry(dir.GenSubFil(fil)));
Gfo_usr_dlg_.Test__show__term();
@@ -82,7 +87,9 @@ public class Xoh_hzip_fxt {
Io_mgr.Instance.SaveFilBry(dir.GenSubFil(fil).GenNewExt(".hzip.html"), actl);
Gfo_usr_dlg_.Test__show__init();
gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_hzip.Md5_depth = 4;
hzip_mgr.Hctx().Mode_is_diff_(Bool_.Y);
hzip_mgr.Decode(bfr, parser_fxt.Wiki(), hpg, actl);
hzip_mgr.Hctx().Mode_is_diff_(Bool_.N);
gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_hzip.Md5_depth = 2;
Gfo_usr_dlg_.Test__show__term();
Io_mgr.Instance.SaveFilBry(dir.GenSubFil(fil).GenNewExt(".hzip.decode.html"), bfr.To_bry_and_clear());

View File

@@ -18,9 +18,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
import gplx.core.brys.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
public abstract class Xoh_itm_parser_fxt_base {
private final Bry_rdr rdr = new Bry_rdr();
public abstract class Xoh_itm_parser_fxt {
private final Xoae_app app;
private final Xowe_wiki wiki;
private final Bry_err_wkr err_wkr = new Bry_err_wkr();
protected byte[] src; protected int src_len;
protected final Xoh_hdoc_ctx hctx = new Xoh_hdoc_ctx();
public Xoh_itm_parser_fxt() {
this.app = Xoa_app_fxt.app_();
this.wiki = Xoa_app_fxt.wiki_tst_(app);
hctx.Init_by_app(app);
}
private Xoh_itm_parser Parser() {return Parser_get();}
public abstract Xoh_itm_parser Parser_get();
public void Test__parse__fail(String src_str, String expd) {
@@ -32,8 +40,9 @@ public abstract class Xoh_itm_parser_fxt_base {
}
public void Exec_parse(String src_str) {
this.src = Bry_.new_u8(src_str); this.src_len = src.length;
rdr.Init_by_page(Xoa_page_.Main_page_bry, src, src_len);
Exec_parse_hook(rdr, 0, src_len);
hctx.Init_by_page(wiki, Xoa_page_.Main_page_bry);
err_wkr.Init_by_page(Xoa_page_.Main_page_str, src);
Exec_parse_hook(err_wkr, hctx, 0, src_len);
}
public abstract void Exec_parse_hook(Bry_rdr owner_rdr, int src_bgn, int src_end);
public abstract void Exec_parse_hook(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, int src_bgn, int src_end);
}

View File

@@ -18,9 +18,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.htmls.core.wkrs.escapes; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
import gplx.xowa.htmls.core.hzips.*;
public class Xoh_escape_parser implements Html_doc_wkr {
public class Xoh_escape_data implements Html_doc_wkr {
private final Xoh_hdoc_wkr wkr;
public Xoh_escape_parser(Xoh_hdoc_wkr wkr) {this.wkr = wkr;}
public Xoh_escape_data(Xoh_hdoc_wkr wkr) {this.wkr = wkr;}
public byte[] Hook() {return Xoh_hzip_dict_.Escape_bry;}
public int Parse(byte[] src, int src_bgn, int src_end, int pos) {
int rv = pos + 1;

View File

@@ -19,18 +19,24 @@ package gplx.xowa.htmls.core.wkrs.escapes; import gplx.*; import gplx.xowa.*; im
import gplx.core.brys.*; import gplx.core.threads.poolables.*;
import gplx.xowa.htmls.core.hzips.*;
public class Xoh_escape_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
private byte escape_byte;
public byte[] Hook() {return hook;} private byte[] hook;
public String Key() {return Xoh_hzip_dict_.Key__escape;}
public Xoh_escape_hzip Encode(Bry_bfr bfr, Hzip_stat_itm stat_itm) {
stat_itm.Escape_add_one();
bfr.Add(Xoh_hzip_dict_.Bry__escape);
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
bfr.Add(hook);
hctx.Hzip__stat().Escape_add(escape_byte);
return this;
}
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx ctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
bfr.Add_byte(Xoh_hzip_dict_.Escape);
return rdr.Pos();
}
public int Pool__idx() {return pool_idx;} private int pool_idx;
public void Pool__clear (Object[] args) {}
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_escape_hzip rv = new Xoh_escape_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {
Xoh_escape_hzip rv = new Xoh_escape_hzip();
rv.pool_mgr = mgr; rv.pool_idx = idx;
rv.hook = (byte[])args[0];
rv.escape_byte = rv.hook[0];
return rv;
}
}

View File

@@ -23,33 +23,15 @@ import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.ttls.*;
import gplx.xowa.files.*; import gplx.xowa.files.repos.*;
public class Xoh_gly_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
public String Key() {return Xoh_hzip_dict_.Key__gly;}
public Xoh_gly_hzip Encode(Bry_bfr bfr, Hzip_stat_itm stat_itm, byte[] src, Xoh_gly_grp_parser arg) {
bfr.Add_mid(src, arg.Rng_bgn(), arg.Rng_end());
public byte[] Hook() {return hook;} private byte[] hook;
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
Xoh_gly_grp_parser data = (Xoh_gly_grp_parser)data_obj;
bfr.Add_mid(src, data.Rng_bgn(), data.Rng_end());
return this;
}
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx hctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
return hook_bgn;
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
return src_bgn + 2;
}
public int Pool__idx() {return pool_idx;} private int pool_idx;
public void Pool__clear (Object[] args) {}
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_gly_hzip rv = new Xoh_gly_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
// private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_( 1, 1, 1, 1 , 1, 1, 1, 1 , 2, 1, 1, 1 , 1, 2, 2);
// private static final int // SERIALIZED
// Flag__file__w_diff_from_html = 0
// , Flag__file__time_exists = 1
// , Flag__file__page_exists = 2
// , Flag__file__is_orig = 3
// , Flag__file__repo_is_local = 4
// , Flag__file__src_exists = 5
// , Flag__img__cls_other_exists = 6
// , Flag__anch__ns_is_image = 7
// , Flag__anch__cls_tid = 8 // none, image
// , Flag__anch__ns_id_needs_saving = 9
// , Flag__img__alt_diff_from_anch_title = 10
// , Flag__anch__href_diff_file = 11
// , Flag__anch__title_missing = 12
// , Flag__img__cls_tid = 13 // none, thumbimage, thumbborder
// , Flag__anch__href_tid = 14 // wiki, site, anch, inet
// ;
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_gly_hzip rv = new Xoh_gly_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; rv.hook = (byte[])args[0]; return rv;}
}

View File

@@ -20,43 +20,63 @@ import gplx.core.brys.*; import gplx.core.threads.poolables.*; import gplx.xowa.
import gplx.xowa.htmls.core.hzips.*;
public class Xoh_hdr_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
public String Key() {return Xoh_hzip_dict_.Key__hdr;}
public Xoh_hdr_hzip Encode(Bry_bfr bfr, Hzip_stat_itm stat_itm, byte[] src, Xoh_hdr_parser arg) {
int level = arg.Hdr_level();
stat_itm.Hdr_add(level);
bfr.Add(Xoh_hzip_dict_.Bry__hdr); // add hook
bfr.Add_int_digits(1, level); // add level; EX: 2 in <h2>
bfr.Add_mid(src, arg.Capt_bgn(), arg.Capt_end()).Add_byte(Xoh_hzip_dict_.Escape); // add caption
bfr.Add_safe(arg.Anch_bry()); // add anchor
bfr.Add_byte(Xoh_hzip_dict_.Escape); // add escape
public byte[] Hook() {return hook;} private byte[] hook;
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
Xoh_hdr_parser data = (Xoh_hdr_parser)data_obj;
boolean capt_rhs_exists = flag_bldr.Set_as_bool (Flag__capt_rhs_exists , data.Capt_rhs_exists());
boolean anch_is_diff = flag_bldr.Set_as_bool (Flag__anch_is_diff , data.Anch_is_diff());
int hdr_level = flag_bldr.Set_as_int (Flag__hdr_level , data.Hdr_level());
bfr.Add(hook);
bfr.Add_hzip_int(1, flag_bldr.Encode());
bfr.Add_hzip_mid(src, data.Capt_bgn(), data.Capt_end()); // add caption
if (anch_is_diff) bfr.Add_hzip_mid(src, data.Anch_bgn(), data.Anch_end()); // add anchor
if (capt_rhs_exists) bfr.Add_hzip_mid(src, data.Capt_rhs_bgn(), data.Capt_rhs_end());// add capt_rhs
hctx.Hzip__stat().Hdr_add(hdr_level);
return this;
}
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx ctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
byte level = rdr.Read_byte();
int capt_bgn = rdr.Pos();
int capt_end = rdr.Find_fwd_lr(Xoh_hzip_dict_.Escape);
int anch_bgn = rdr.Pos();
int anch_end = rdr.Find_fwd_lr(Xoh_hzip_dict_.Escape);
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
int flag = rdr.Read_int_by_base85(1); flag_bldr.Decode(flag);
boolean capt_rhs_exists = flag_bldr.Get_as_bool(Flag__capt_rhs_exists);
boolean anch_is_diff = flag_bldr.Get_as_bool(Flag__anch_is_diff);
byte hdr_level = flag_bldr.Get_as_byte(Flag__hdr_level);
bfr.Add(Bry__hdr__0).Add_byte(level);
int capt_bgn = rdr.Pos(); int capt_end = rdr.Find_fwd_lr();
int anch_bgn = -1, anch_end = -1;
if (anch_is_diff) {
anch_bgn = rdr.Pos(); anch_end = rdr.Find_fwd_lr();
}
byte[] capt_manual_end = capt_rhs_exists ? rdr.Read_bry_to() : null;
bfr.Add(Bry__hdr__0).Add_byte_as_a7(hdr_level);
bfr.Add(Bry__hdr__1);
if (anch_end > anch_bgn)
if (anch_is_diff)
bfr.Add_mid (src, anch_bgn, anch_end);
else
bfr.Add_mid_w_swap (src, capt_bgn, capt_end, Byte_ascii.Space, Byte_ascii.Underline);
bfr.Add(Bry__hdr__2);
bfr.Add_mid(src, capt_bgn, capt_end);
bfr.Add(Bry__hdr__3).Add_byte(level);
bfr.Add(Bry__hdr__3);
if (capt_rhs_exists)
bfr.Add(capt_manual_end);
bfr.Add(Bry__hdr__4).Add_byte_as_a7(hdr_level);
bfr.Add_byte(Byte_ascii.Angle_end);
return rdr.Pos();
}
private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_ (1, 1, 3);
private static final int // SERIALIZED
Flag__capt_rhs_exists = 0
, Flag__anch_is_diff = 1
, Flag__hdr_level = 2
;
private static final byte[]
Bry__hdr__0 = Bry_.new_a7("<h")
, Bry__hdr__1 = Bry_.new_a7(">\n <span class=\"mw-headline\" id=\"")
, Bry__hdr__1 = Bry_.new_a7("><span class=\"mw-headline\" id=\"")
, Bry__hdr__2 = Bry_.new_a7("\">")
, Bry__hdr__3 = Bry_.new_a7("</span>\n</h")
, Bry__hdr__3 = Bry_.new_a7("</span>")
, Bry__hdr__4 = Bry_.new_a7("</h")
;
public int Pool__idx() {return pool_idx;} private int pool_idx;
public void Pool__clear (Object[] args) {}
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_hdr_hzip rv = new Xoh_hdr_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_hdr_hzip rv = new Xoh_hdr_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; rv.hook = (byte[])args[0]; return rv;}
}

View File

@@ -21,46 +21,53 @@ public class Xoh_hdr_hzip_tst {
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt();
@Test public void Same() {
fxt.Test__bicode(String_.Concat_lines_nl_skip_last
( "~\"6A~~"
( "~\"'A~"
, "a"
), String_.Concat_lines_nl_skip_last
( "<h6>"
, " <span class='mw-headline' id='A'>A</span>"
, "</h6>"
( "<h6><span class='mw-headline' id='A'>A</span></h6>"
, "a"
));
}
@Test public void Diff() {
fxt.Test__bicode(String_.Concat_lines_nl_skip_last
( "~\"2<i>A</i>~A~"
( "~\"+<i>A</i>~A~"
, "a"
), String_.Concat_lines_nl_skip_last
( "<h2>"
, " <span class='mw-headline' id='A'><i>A</i></span>"
, "</h2>"
( "<h2><span class='mw-headline' id='A'><i>A</i></span></h2>"
, "a"
));
}
@Test public void Diff_by_underscore() {
fxt.Test__bicode(String_.Concat_lines_nl_skip_last
( "~\"2A 1~~"
( "~\"#A 1~"
, "a"
), String_.Concat_lines_nl_skip_last
( "<h2>"
, " <span class='mw-headline' id='A_1'>A 1</span>"
, "</h2>"
( "<h2><span class='mw-headline' id='A_1'>A 1</span></h2>"
, "a"
));
}
@Test public void Diff_by_lnki() {
fxt.Test__bicode(String_.Concat_lines_nl_skip_last
( "~\"+<a href=\"/wiki/Category:A\" title=\"Category:A\">Category:A</a>~Category:A~"
, "a"
), String_.Concat_lines_nl_skip_last
( "<h2><span class='mw-headline' id='Category:A'><a href='/wiki/Category:A' title='Category:A'>Category:A</a></span></h2>"
, "a"
));
}
@Test public void Same_w_underscore() {
fxt.Test__bicode(String_.Concat_lines_nl_skip_last
( "~\"2A_1~~"
( "~\"#A_1~"
, "a"
), String_.Concat_lines_nl_skip_last
( "<h2>"
, " <span class='mw-headline' id='A_1'>A_1</span>"
, "</h2>"
( "<h2><span class='mw-headline' id='A_1'>A_1</span></h2>"
, "a"
));
}
@Test public void Tidy__bad_end() {
fxt.Test__bicode(
"~\"?A~AB~B~"
, "<h6><span class='mw-headline' id='AB'>A</span>B</h6>"
);
}
}

View File

@@ -22,14 +22,14 @@ public class Xoh_hdr_make {
public void Make(Bry_bfr bfr, Xoh_page hpg, byte[] src, Xoh_hdr_parser arg) {
// , int rng_bgn, int rng_end, int level, int capt_bgn, int capt_end, byte[] anch
// register section
int rng_bgn = arg.Rng_bgn(), rng_end = arg.Rng_end();
int rng_bgn = arg.Src_bgn(), rng_end = arg.Src_end();
int level = arg.Hdr_level();
Xoh_section_mgr section_mgr = hpg.Section_mgr();
int section_len = section_mgr.Len();
if (section_len != 0) // guard against -1 index; should not happen
section_mgr.Set_content(section_len - 1, src, rng_bgn - 2); // -2 to skip "\n\n"
byte[] capt = Bry_.Mid(src, arg.Capt_bgn(), arg.Capt_end());
byte[] anch = arg.Anch_bry();
byte[] anch = Bry_.Mid(src, arg.Anch_bgn(), arg.Anch_end());
if (anch == null) anch = Bry_.Replace(capt, Byte_ascii.Space, Byte_ascii.Underline);
hpg.Section_mgr().Add(section_len, level, anch, capt).Content_bgn_(rng_end + 1); // +1 to skip "\n"
bfr.Add_mid(src, rng_bgn, rng_end);

View File

@@ -18,26 +18,38 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.htmls.core.wkrs.hdrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
public class Xoh_hdr_parser {
// gplx.xowa.htmls.core.wkrs.hdrs.Xoh_hdr_parser
public int Rng_bgn() {return rng_bgn;} private int rng_bgn;
public int Rng_end() {return rng_end;} private int rng_end;
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public int Hdr_level() {return hdr_level;} private int hdr_level;
public int Anch_bgn() {return anch_bgn;} private int anch_bgn;
public int Anch_end() {return anch_end;} private int anch_end;
public boolean Anch_is_diff() {return anch_is_diff;} private boolean anch_is_diff;
public int Capt_bgn() {return capt_bgn;} private int capt_bgn;
public int Capt_end() {return capt_end;} private int capt_end;
public byte[] Anch_bry() {return anch_bry;} private byte[] anch_bry;
public int Parse(Xoh_hdoc_wkr wkr, byte[] src, Html_tag_rdr rdr, int hdr_level, int rng_bgn, Html_tag span) {// <h2><span class='mw-headline' id='A_1'>A 1</span></h2>
this.rng_bgn = rng_bgn; this.hdr_level = hdr_level;
Html_atr anch_atr = span.Atrs__get_by_or_fail(Html_atr_.Bry__id);
int anch_bgn = anch_atr.Val_bgn(), anch_end = anch_atr.Val_end();
this.capt_bgn = span.Src_end();
rdr.Tag__move_fwd_tail(hdr_level); // find </h2> not </span> since <span> can be nested, but <h2> cannot
this.capt_end = rdr.Tag__peek_bwd_tail(Html_tag_.Id__span).Src_bgn(); // get </span> before </h2>
this.anch_bry = null;
if (!Bry_.Match_w_swap(src, capt_bgn, capt_end, src, anch_bgn, anch_end, Byte_ascii.Space, Byte_ascii.Underline))
this.anch_bry = Bry_.Mid(src, anch_bgn, anch_end); // anch is different than capt; occurs with html and dupe-anchors; EX: "==<i>A</i>==" -> id='A'
this.rng_end = rdr.Pos();
wkr.On_hdr(this);
return rng_end;
public int Capt_rhs_bgn() {return capt_rhs_bgn;} private int capt_rhs_bgn;
public int Capt_rhs_end() {return capt_rhs_end;} private int capt_rhs_end;
public boolean Capt_rhs_exists() {return capt_rhs_exists;} private boolean capt_rhs_exists;
public void Clear() {
this.anch_bgn = anch_end = capt_bgn = capt_end = capt_rhs_bgn = capt_rhs_end -1;
this.anch_is_diff = capt_rhs_exists = false;
}
public boolean Parse(Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Html_tag_rdr tag_rdr, byte[] src, Html_tag hdr_head, Html_tag span_head) {
this.Clear();
this.src_bgn = hdr_head.Src_bgn(); this.hdr_level = hdr_head.Name_id();
Html_atr anch_atr = span_head.Atrs__get_by_or_fail(Html_atr_.Bry__id);
this.anch_bgn = anch_atr.Val_bgn(); this.anch_end = anch_atr.Val_end();
this.capt_bgn = span_head.Src_end();
Html_tag hdr_tail = tag_rdr.Tag__move_fwd_tail(hdr_level); // find </h2> not </span_head> since <span_head> can be nested, but <h2> cannot
Html_tag span_tail = tag_rdr.Tag__peek_bwd_tail(Html_tag_.Id__span); // get </span_head> before </h2>
this.capt_end = span_tail.Src_bgn();
if (span_tail.Src_end() != hdr_tail.Src_bgn()) {
capt_rhs_exists = true;
capt_rhs_bgn = span_tail.Src_end(); capt_rhs_end = hdr_tail.Src_bgn();
}
this.anch_is_diff = !Bry_.Match_w_swap(src, capt_bgn, capt_end, src, anch_bgn, anch_end, Byte_ascii.Space, Byte_ascii.Underline); // anch is different than capt; occurs with html and dupe-anchors; EX: "==<i>A</i>==" -> id='A'
this.src_end = tag_rdr.Pos();
hdoc_wkr.On_hdr(this);
return true;
}
public static final byte[] Bry__class__mw_headline = Bry_.new_a7("mw-headline");
}

View File

@@ -24,20 +24,20 @@ public class Xoh_img_bldr {
public Xoh_img_wtr Wtr() {return wtr;} private final Xoh_img_wtr wtr = new Xoh_img_wtr();
public Xof_fsdb_itm Fsdb_itm() {return fsdb_itm;} private Xof_fsdb_itm fsdb_itm;
public void Make_by_parse(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, Xoh_img_parser arg) {
Make( bfr, hpg, hctx, src, arg.Img_src().File_ttl_bry(), arg.Img_xoimg()
, arg.Anch_href().Rel_nofollow_exists(), arg.Anch_href().Atr(), arg.Anch_cls().Atr(), arg.Anch_title()
Make( bfr, hpg, hctx, src, arg.Img_src().File_ttl_bry(), arg.Img_xoimg(), arg.Img_xoimg().Val_dat_exists()
, arg.Rel_nofollow_exists(), arg.Anch_href().Atr(), arg.Anch_cls().Atr(), arg.Anch_title()
, arg.Img_w(), arg.Img_h(), arg.Img_src().Atr(), arg.Img_cls().Atr(), arg.Img_alt());
wtr.Bfr_arg__add(bfr);
}
public void Make(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, byte[] lnki_ttl, Xoh_img_xoimg_parser img_xoimg
public void Make(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, byte[] lnki_ttl, Xoh_img_xoimg_parser img_xoimg, boolean img_xoimg_exists
, boolean anch_rel_is_nofollow, Bfr_arg anch_href, Bfr_arg anch_cls, Bfr_arg anch_ttl
, int img_w, int img_h, Bfr_arg img_src, Bfr_arg img_cls, Bfr_arg img_alt) {
wtr.Clear();
this.fsdb_itm = hpg.Img_mgr().Make_img();
if (img_xoimg.Val_dat_exists()) {
if (img_xoimg_exists) {
fsdb_itm.Init_at_lnki(Xof_exec_tid.Tid_wiki_page, hpg.Wiki().Domain_itm().Abrv_xo(), lnki_ttl, img_xoimg.Lnki_type(), img_xoimg.Lnki_upright(), img_xoimg.Lnki_w(), img_xoimg.Lnki_h(), img_xoimg.Lnki_time(), img_xoimg.Lnki_page(), Xof_patch_upright_tid_.Tid_all);
hctx.File__mgr().Check_cache(fsdb_itm);
wtr.Img_xoimg_(src, img_xoimg.Val_bgn(), img_xoimg.Val_end());
wtr.Img_xoimg_(img_xoimg);
wtr.Img_src_empty_().Img_w_(0).Img_h_(0);
}
else if (img_w != -1) {
@@ -45,6 +45,8 @@ public class Xoh_img_bldr {
}
if (anch_rel_is_nofollow) wtr.Anch_rel_nofollow_();
wtr.Anch_href_(anch_href).Anch_cls_(anch_cls).Anch_title_(anch_ttl).Anch_xowa_title_(lnki_ttl);
wtr.Img_id_(Xoh_img_mgr.Bry__html_uid, fsdb_itm.Html_uid()).Img_alt_(img_alt).Img_cls_(img_cls);
if (!hctx.Mode_is_diff())
wtr.Img_id_(Xoh_img_mgr.Bry__html_uid, fsdb_itm.Html_uid());
wtr.Img_alt_(img_alt).Img_cls_(img_cls);
}
}

View File

@@ -17,15 +17,19 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.imgs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.threads.poolables.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*; import gplx.xowa.htmls.hrefs.*; import gplx.xowa.htmls.core.hzips.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*; import gplx.langs.htmls.encoders.*;
import gplx.xowa.htmls.hrefs.*; import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.htmls.core.wkrs.bfr_args.*; import gplx.xowa.htmls.core.wkrs.imgs.atrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*; import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*;
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.ttls.*;
import gplx.xowa.files.*; import gplx.xowa.files.repos.*;
public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
public String Key() {return Xoh_hzip_dict_.Key__img;}
private final Xoh_img_xoimg_parser xoimg_parser = new Xoh_img_xoimg_parser();
public byte[] Hook() {return hook;} private byte[] hook;
private final Xoh_img_xoimg_parser xoimg_parser = new Xoh_img_xoimg_parser();
private final Bry_bfr tmp_bfr = Bry_bfr.new_(32);
public Xoh_img_bldr Bldr() {return bldr;} private Xoh_img_bldr bldr = new Xoh_img_bldr();
public Bfr_arg__href Anch_href_arg() {return anch_href_arg;} private final Bfr_arg__href anch_href_arg = new Bfr_arg__href();
public Xoh_img_xoimg_hzip Xoimg() {return xoimg;} private final Xoh_img_xoimg_hzip xoimg = new Xoh_img_xoimg_hzip();
private final Bry_obj_ref
anch_cls_mid = Bry_obj_ref.New_empty()
, anch_title_mid = Bry_obj_ref.New_empty()
@@ -34,30 +38,30 @@ public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
, img_src_mid = Bry_obj_ref.New_empty()
, img_cls_mid = Bry_obj_ref.New_empty()
;
public Xoh_img_hzip Encode(Bry_bfr bfr, Hzip_stat_itm stat_itm, byte[] src, Xoh_img_parser arg, boolean write_hdr) {
// img_map: <img id="xowa_file_img_100" alt="" src="file:///J:/xowa/file/commons.wikimedia.org/orig/b/8/a/7/Solar_System_Template_Final.png" width="666" height="36" usemap="#imagemap_1_1">
Xoh_anch_href_parser anch_href = arg.Anch_href();
Bry_obj_ref anch_page = arg.Anch_page();
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
Xoh_img_parser data = (Xoh_img_parser)data_obj;
Xoh_anch_href_itm anch_href = data.Anch_href();
Bry_obj_ref anch_page = data.Anch_page();
byte anch_href_tid = anch_href.Tid();
boolean anch__ns_id_needs_saving = anch_href.Tid_has_ns() && anch_href.Page_ns_id() != Xow_ns_.Tid__file;
Html_atr anch_title = arg.Anch_title();
Xoa_ttl anch_href_ttl = anch_href.Page_ttl();
Xoh_img_xoimg_parser img_xoimg = arg.Img_xoimg();
Xoh_img_cls_parser img_cls = arg.Img_cls();
Xoh_img_src_parser img_src = arg.Img_src();
boolean img__alt_diff_from_anch_title = arg.Img_alt__diff__anch_title();
boolean anch__ns_is_custom = anch_href.Ttl_ns_custom() != null;
boolean anch__ns_id_needs_saving = Xoh_anch_href_itm.Ns_exists(anch_href.Tid()) && anch_href.Ttl_ns_id() != Xow_ns_.Tid__file;
Html_atr anch_title = data.Anch_title();
Xoh_img_xoimg_parser img_xoimg = data.Img_xoimg();
Xoh_img_cls_parser img_cls = data.Img_cls();
Xoh_img_src_parser img_src = data.Img_src();
boolean img__alt_diff_from_anch_title = data.Img_alt__diff__anch_title();
boolean file__src_exists = !img_xoimg.Val_dat_exists();
boolean anch_href_diff_file = !Bry_.Match(anch_page.Val(), anch_page.Val_bgn(), anch_page.Val_end(), anch_href.Page_ttl().Page_db());
boolean anch_href_diff_file = !Bry_.Match(anch_page.Val(), anch_page.Val_bgn(), anch_page.Val_end(), anch_href.Ttl_page_db());
flag_bldr.Set(Flag__file__w_diff_from_html , file__src_exists && arg.Img_w__diff__file_w());
flag_bldr.Set(Flag__file__w_diff_from_html , file__src_exists && data.Img_w__diff__file_w());
flag_bldr.Set(Flag__file__time_exists , file__src_exists && img_src.File_time_exists());
flag_bldr.Set(Flag__file__page_exists , file__src_exists && img_src.File_page_exists());
flag_bldr.Set(Flag__file__is_orig , file__src_exists && img_src.File_is_orig());
flag_bldr.Set(Flag__file__repo_is_local , file__src_exists && !img_src.Repo_is_commons());
flag_bldr.Set(Flag__file__src_exists , file__src_exists);
flag_bldr.Set(Flag__img__cls_other_exists , img_cls.Other_exists());
flag_bldr.Set(Flag__anch__ns_is_image , anch_href.Page_ns_id_is_image());
flag_bldr.Set(Flag__anch__cls_tid , arg.Anch_cls().Tid());
flag_bldr.Set(Flag__anch__ns_is_custom , anch__ns_is_custom);
flag_bldr.Set(Flag__anch__cls_tid , data.Anch_cls().Tid());
flag_bldr.Set(Flag__anch__ns_id_needs_saving , anch__ns_id_needs_saving);
flag_bldr.Set(Flag__img__alt_diff_from_anch_title , img__alt_diff_from_anch_title);
flag_bldr.Set(Flag__anch__href_diff_file , anch_href_diff_file);
@@ -66,43 +70,52 @@ public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
flag_bldr.Set(Flag__anch__href_tid , anch_href.Tid());
// Tfds.Dbg(flag_bldr.Encode(), Array_.To_str(flag_bldr.Val_ary()));
if (write_hdr) bfr.Add(Xoh_hzip_dict_.Bry__img);
if (wkr_is_root) bfr.Add(hook);
Xoh_hzip_int_.Encode(2, bfr, flag_bldr.Encode());
if (anch_href_tid == Xoh_anch_href_parser.Tid__inet)
anch_href_mid.Mid_(src, anch_href.Val_bgn(), anch_href.Val_end());
else
anch_href_mid.Val_(anch_href_ttl.Page_db());
switch (anch_href_tid) {
case Xoh_anch_href_itm.Tid__inet:
anch_href_mid.Mid_(src, anch_href.Rng_bgn(), anch_href.Rng_end());
break;
case Xoh_anch_href_itm.Tid__site:
anch_href_mid.Val_(tmp_bfr.Add_mid(src, anch_href.Site_bgn(), anch_href.Site_end()).Add_byte(Byte_ascii.Pipe).Add(anch_href.Ttl_page_db()).To_bry_and_clear());
break;
case Xoh_anch_href_itm.Tid__wiki:
case Xoh_anch_href_itm.Tid__anch:
anch_href_mid.Val_(anch_href.Ttl_page_db());
break;
}
bfr.Add_bry_ref_obj(anch_href_mid);
bfr.Add_byte(Xoh_hzip_dict_.Escape);
if (anch_href_diff_file) {
arg.Anch_page().Bfr_arg__add(bfr);
data.Anch_page().Bfr_arg__add(bfr);
bfr.Add_byte(Xoh_hzip_dict_.Escape);
}
switch (anch_href_tid) {
case Xoh_anch_href_parser.Tid__anch:
case Xoh_anch_href_parser.Tid__inet:
case Xoh_anch_href_itm.Tid__anch:
case Xoh_anch_href_itm.Tid__inet:
break;
case Xoh_anch_href_parser.Tid__wiki:
case Xoh_anch_href_parser.Tid__site:
case Xoh_anch_href_itm.Tid__wiki:
case Xoh_anch_href_itm.Tid__site:
if (anch__ns_id_needs_saving)
Xoh_lnki_dict_.Ns_encode(bfr, anch_href_ttl.Ns().Id());
Xoh_lnki_dict_.Ns_encode(bfr, anch_href.Ttl_ns_id());
break;
}
if (anch__ns_is_custom) bfr.Add(data.Anch_href().Ttl_ns_custom()).Add_byte(Xoh_hzip_dict_.Escape);
if (file__src_exists) {
Xoh_hzip_int_.Encode(2, bfr, Xoh_hzip_int_.Neg_1_adj + arg.Img_w());
Xoh_hzip_int_.Encode(2, bfr, Xoh_hzip_int_.Neg_1_adj + arg.Img_h());
if (arg.Img_w__diff__file_w()) Xoh_hzip_int_.Encode(2, bfr, Xoh_hzip_int_.Neg_1_adj + img_src.File_w());
Xoh_hzip_int_.Encode(2, bfr, Xoh_hzip_int_.Neg_1_adj + data.Img_w());
Xoh_hzip_int_.Encode(2, bfr, Xoh_hzip_int_.Neg_1_adj + data.Img_h());
if (data.Img_w__diff__file_w()) Xoh_hzip_int_.Encode(2, bfr, Xoh_hzip_int_.Neg_1_adj + img_src.File_w());
if (img_src.File_time_exists()) Xoh_hzip_int_.Encode(1, bfr, Xoh_hzip_int_.Neg_1_adj + img_src.File_time());
if (img_src.File_page_exists()) Xoh_hzip_int_.Encode(1, bfr, Xoh_hzip_int_.Neg_1_adj + img_src.File_page());
}
else
bfr.Add_mid(src, img_xoimg.Val_bgn(), img_xoimg.Val_end()).Add_byte(Xoh_hzip_dict_.Escape);
xoimg.Encode(bfr, hctx.Hzip__stat(), src, img_xoimg);
if (anch_title.Val_dat_exists()) bfr.Add_mid(src, anch_title.Val_bgn(), anch_title.Val_end()).Add_byte(Xoh_hzip_dict_.Escape);
if (img__alt_diff_from_anch_title) bfr.Add_mid(src, arg.Img_alt().Val_bgn(), arg.Img_alt().Val_end()).Add_byte(Xoh_hzip_dict_.Escape);
if (img_cls.Other_exists()) bfr.Add_mid(src, img_cls.Other_bgn(), img_cls.Other_end()).Add_byte(Xoh_hzip_dict_.Escape);
if (img__alt_diff_from_anch_title) bfr.Add_mid(src, data.Img_alt().Val_bgn(), data.Img_alt().Val_end()).Add_byte(Xoh_hzip_dict_.Escape);
if (img_cls.Other_exists()) bfr.Add_mid(src, img_cls.Other_bgn(), img_cls.Other_end()).Add_byte(Xoh_hzip_dict_.Escape);
return this;
}
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx hctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
// decode rdr
int flag = rdr.Read_int_by_base85(2);
flag_bldr.Decode(flag);
@@ -110,7 +123,7 @@ public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
boolean file__repo_is_local = flag_bldr.Get_as_bool(Flag__file__repo_is_local);
boolean file__src_exists = flag_bldr.Get_as_bool(Flag__file__src_exists);
boolean img__cls_other_exists = flag_bldr.Get_as_bool(Flag__img__cls_other_exists);
boolean anch__ns_is_image = flag_bldr.Get_as_bool(Flag__anch__ns_is_image);
boolean anch__ns_is_custom = flag_bldr.Get_as_bool(Flag__anch__ns_is_custom);
boolean anch__ns_id_needs_saving = flag_bldr.Get_as_bool(Flag__anch__ns_id_needs_saving);
int anch__cls_tid = flag_bldr.Get_as_int(Flag__anch__cls_tid);
boolean img__alt_diff_from_anch_title = flag_bldr.Get_as_bool(Flag__img__alt_diff_from_anch_title);
@@ -119,21 +132,36 @@ public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
int img_cls = flag_bldr.Get_as_int(Flag__img__cls_tid);
int anch__href_tid = flag_bldr.Get_as_int(Flag__anch__href_tid);
byte[] page_db = rdr.Read_bry_to();
byte[] site_bry = null;
switch (anch__href_tid) {
case Xoh_anch_href_itm.Tid__anch:
case Xoh_anch_href_itm.Tid__inet:
case Xoh_anch_href_itm.Tid__wiki:
break;
case Xoh_anch_href_itm.Tid__site:
int pipe_pos = Bry_find_.Find_fwd(page_db, Byte_ascii.Pipe);
site_bry = Bry_.Mid(page_db, 0, pipe_pos);
page_db = Bry_.Mid(page_db, pipe_pos + 1);
break;
}
byte[] file_db = page_db;
if (anch_href_diff_file) file_db = rdr.Read_bry_to();
int anch_href_ns = -1;
if (anch__ns_id_needs_saving)
anch_href_ns = Xoh_lnki_dict_.Ns_decode(rdr);
int img_xoimg_bgn = -1, img_xoimg_end = -1, img_w = -1, img_h = -1, file_time = -1, file_page = -1;
int ns_custom_bgn = -1, ns_custom_end = -1;
if (anch__ns_is_custom) {
ns_custom_bgn = rdr.Pos();
ns_custom_end = rdr.Find_fwd_lr();
}
int img_w = -1, img_h = -1, file_time = -1, file_page = -1;
xoimg_parser.Clear();
if (file__src_exists) {
img_w = rdr.Read_int_by_base85(2) - Xoh_hzip_int_.Neg_1_adj;
img_h = rdr.Read_int_by_base85(2) - Xoh_hzip_int_.Neg_1_adj;
}
else {
img_xoimg_bgn = rdr.Pos();
img_xoimg_end = rdr.Find_fwd_lr();
xoimg_parser.Parse(rdr, src, img_xoimg_bgn, img_xoimg_end);
xoimg.Decode(bfr, hctx, hpg, rdr, src, xoimg_parser);
}
int anch_title_bgn = -1, anch_title_end = -1;
if (!anch__title_missing) {
@@ -156,22 +184,24 @@ public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
// transform values
boolean anch_rel_is_nofollow = false;
if (anch__ns_id_needs_saving) {
Xoa_ttl anch_href_ttl = hctx.Wiki__ttl_parser().Ttl_parse(anch_href_ns, page_db);
anch_href_arg.Set_by_page(anch_href_ttl.Full_db());
if (anch__href_tid == Xoh_anch_href_itm.Tid__inet) {
Gfo_url_encoder_.Href.Encode(tmp_bfr, page_db);
anch_rel_is_nofollow = true;
}
else {
if (anch__href_tid == Xoh_anch_href_parser.Tid__inet) {
anch_href_arg.Set_by_raw(gplx.langs.htmls.encoders.Gfo_url_encoder_.Href.Encode(page_db));
anch_rel_is_nofollow = true;
if (anch__href_tid == Xoh_anch_href_itm.Tid__site)
tmp_bfr.Add(Xoh_href_.Bry__site).Add(site_bry);
if (anch__ns_id_needs_saving) {
Xoa_ttl anch_href_ttl = hctx.Wiki__ttl_parser().Ttl_parse(anch_href_ns, page_db);
tmp_bfr.Add(Xoh_href_.Bry__wiki).Add(anch_href_ttl.Full_url());
}
else {
if (anch__ns_is_image)
anch_href_arg.Set_by_raw(Bry_.Add(gplx.xowa.htmls.hrefs.Xoh_href_.Bry__wiki, gplx.xowa.wikis.nss.Xow_ns_.Alias__image__bry, Byte_ascii.Colon_bry, gplx.langs.htmls.encoders.Gfo_url_encoder_.Href.Encode(page_db)));
else
anch_href_arg.Set_by_file(gplx.langs.htmls.encoders.Gfo_url_encoder_.Href.Encode(page_db));
byte[] ns_bry = anch__ns_is_custom ? Bry_.Mid(src, ns_custom_bgn, ns_custom_end) : Xow_ns_.Bry__file;
tmp_bfr.Add(Xoh_href_.Bry__wiki).Add(ns_bry).Add_byte_colon();
Gfo_url_encoder_.Href.Encode(tmp_bfr, page_db);
}
}
anch_href_arg.Set_by_raw(tmp_bfr.To_bry_and_clear());
// NOTE: src must go underneath ttl
Xof_url_bldr url_bldr = hctx.File__url_bldr();
url_bldr.Init_by_root(file__repo_is_local ? hctx.Fsys__file__wiki() : hctx.Fsys__file__comm(), Byte_ascii.Slash, false, false, Md5_depth);
@@ -179,20 +209,18 @@ public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
byte[] img_src = url_bldr.Xto_bry();
anch_cls_mid.Val_(Xoh_anch_cls_.To_val(anch__cls_tid));
anch_title_mid.Mid_(src, anch_title_bgn, anch_title_end); if (anch_title_bgn == anch_title_end) anch_title_mid.Val_(null);
if (!anch__title_missing) anch_title_mid.Mid_(src, anch_title_bgn, anch_title_end); else {anch_title_mid.Val_(null);} // if (anch_title_bgn == anch_title_end) anch_title_mid.Val_(null);
img_alt_mid.Mid_(src, img_alt_bgn, img_alt_end); if (img_alt_mid.Val_is_empty()) img_alt_mid.Val_(Bry_.Empty);
img_src_mid.Val_(img_src);
img_cls_mid.Val_(Xoh_img_cls_.To_val_or_null(img_cls, img_cls_other));
bldr.Make(bfr, hpg, hctx, src, file_db, xoimg_parser, anch_rel_is_nofollow, anch_href_arg, anch_cls_mid, anch_title_mid, img_w, img_h, img_src_mid, img_cls_mid, img_alt_mid);
if (write_to_bfr) bldr.Wtr().Bfr_arg__add(bfr);
bldr.Make(bfr, hpg, hctx, src, file_db, xoimg_parser, !file__src_exists, anch_rel_is_nofollow, anch_href_arg, anch_cls_mid, anch_title_mid, img_w, img_h, img_src_mid, img_cls_mid, img_alt_mid);
if (wkr_is_root) bldr.Wtr().Bfr_arg__add(bfr);
return rv;
}
public int Pool__idx() {return pool_idx;} private int pool_idx;
public void Pool__clear (Object[] args) {}
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_img_hzip rv = new Xoh_img_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_img_hzip rv = new Xoh_img_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; rv.hook = (byte[])args[0]; return rv;}
public static int Md5_depth = 2;
private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_( 1, 1, 1, 1 , 1, 1, 1, 1 , 2, 1, 1, 1 , 1, 2, 2);
private static final int // SERIALIZED
@@ -203,7 +231,7 @@ public class Xoh_img_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
, Flag__file__repo_is_local = 4
, Flag__file__src_exists = 5
, Flag__img__cls_other_exists = 6
, Flag__anch__ns_is_image = 7
, Flag__anch__ns_is_custom = 7
, Flag__anch__cls_tid = 8 // none, image
, Flag__anch__ns_id_needs_saving = 9
, Flag__img__alt_diff_from_anch_title = 10

View File

@@ -22,32 +22,55 @@ public class Xoh_img_hzip__dump__tst {
@Before public void Clear() {fxt.Clear();}
@Test public void Basic() { // [[File:A.png|border|class=other|220px|abc]]
fxt.Test__bicode
( "~%!!A.png~0|220|110|0.5|-1|-1~abc~"
, "<a href='/wiki/File:A.png' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' alt='abc'></a>"
( "~%!!A.png~)#Sabc~"
, "<a href='/wiki/File:A.png' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
);
}
@Test public void Anch() { // [[File:A.png#b|abc]]
fxt.Test__bicode
( "~%\"<A.png#file~A.png~)#Sabc~"
, "<a href='/wiki/File:A.png#file' class='image' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
);
}
@Test public void Link__cs() { // [[File:A.png|link=File:a.ogg|abc]]
fxt.Test__bicode
( "~%!Aa.ogg~A.png~)#Sabc~"
, "<a href='/wiki/File:a.ogg' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
);
}
@Test public void Href__encoding() { // [[File:Aéb.png|abc]]
fxt.Test__bicode
( "~%!!Aéb.png~0|220|110|0.5|-1|-1~abc~"
, "<a href='/wiki/File:A%C3%A9b.png' class='image' title='abc' xowa_title='Aéb.png'><img id='xoimg_0' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' alt='abc'></a>"
( "~%!qAéb.png~)#Sabc~"
, "<a href='/wiki/File:A%C3%A9b.png' class='image' xowa_title='Aéb.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
);
}
@Test public void Href__encoding__link() { // [[File:Aéb.png|abc|link=Aéb]]
fxt.Test__bicode
( "~%#gAéb~Aéb.png~#)#Sabc~"
, "<a href='/wiki/A%C3%A9b' class='image' xowa_title='Aéb.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
);
}
@Test public void Href__apos() { // [[File:A'b.png|border|link=A'b_link|A'b_capt]]
String html = "<a href=\"/wiki/A%27b_link\" class=\"image\" xowa_title=\"A'b.png\"><img id=\"xoimg_0\" data-xoimg=\"0|220|-1|-1|-1|-1\" src=\"\" width=\"0\" height=\"0\" class=\"thumbborder\" alt=\"A'b_capt\"></a>";
fxt.Test__bicode_raw("~%#oA'b_link~A'b.png~#)#SA'b_capt~", html, html);
}
@Test public void Link__wm__n() { // [[File:A.png|link=http://a.org|abc]]
fxt.Test__bicode
( "~%!Dhttp://a.org~A.png~0|220|110|0.5|-1|-1~abc~"
, "<a href='http://a.org' rel='nofollow' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' alt='abc'></a>"
( "~%!Dhttp://a.org~A.png~)#Sabc~"
, "<a href='http://a.org' rel='nofollow' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
);
}
// @Test public void Link__wm__y() { // [[File:A.png|link=//en.wiktionary.org/wiki/A|abc]]
// fxt.Test__bicode
// ( "~%!i=!!!!A~abc~"
// , "<a href='/site/en.wiktionary.org/wiki/A' class='image' title='abc'><img id='xoimg_0' alt='abc'></a>"
// );
// }
@Test public void Link__wm__y() { // [[File:A.png|link=http://en.wikitionary.org/wiki/Special:Search/A|abc]]
fxt.Test__bicode
( "~%\"men.wiktionary.org|Search/A~A.png~\")#Sabc~"
, "<a href='/site/en.wiktionary.org/wiki/Special:Search/A' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
);
}
// lhs='<a href="/site/en.wiktionary.org/wiki/Special:Search/A" class="image" title="B" xowa_title="Commons-logo.svg"><img data-xoimg="0|40|40|-1|-1|-1" src="" width="0" height="0" alt="B"></a>
@Test public void Href__image() { // [[Image:A.png|abc]]
fxt.Test__bicode
( "~%-%A.png~0|220|110|0.5|-1|-1~abc~"
, "<a href='/wiki/Image:A.png' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' alt='abc'></a>"
( "~%-%A.png~Image~)#Sabc~"
, "<a href='/wiki/Image:A.png' class='image' title='abc' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' alt='abc'></a>"
);
}
@Test public void Missing() { // PURPOSE: bad dump shouldn't write corrupt data

View File

@@ -18,12 +18,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.htmls.core.wkrs.imgs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import gplx.core.primitives.*; import gplx.core.brys.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*; import gplx.xowa.htmls.core.wkrs.imgs.atrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*; import gplx.xowa.htmls.core.wkrs.lnkis.htmls.*;
import gplx.xowa.wikis.domains.*;
import gplx.xowa.files.*;
public class Xoh_img_parser {
public class Xoh_img_parser {
private byte[] src;
public int Rng_bgn() {return rng_bgn;} private int rng_bgn;
public int Rng_end() {return rng_end;} private int rng_end;
public Xoh_anch_href_parser Anch_href() {return anch_href;} private Xoh_anch_href_parser anch_href = new Xoh_anch_href_parser();
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public Xoh_anch_href_itm Anch_href() {return anch_href;} private Xoh_anch_href_itm anch_href = new Xoh_anch_href_itm();
public Xoh_anch_cls_parser Anch_cls() {return anch_cls;} private Xoh_anch_cls_parser anch_cls = new Xoh_anch_cls_parser();
public Html_atr Anch_title() {return anch_title;} private Html_atr anch_title;
public Bry_obj_ref Anch_page() {return anch_page;} private Bry_obj_ref anch_page = Bry_obj_ref.New_empty();
@@ -35,29 +36,46 @@ public class Xoh_img_parser {
public int Img_w() {return img_w;} private int img_w;
public int Img_h() {return img_h;} private int img_h;
public boolean Img_w__diff__file_w() {return img_w != img_src.File_w();}
public int Parse(Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, byte[] src, Html_tag_rdr tag_rdr, Html_tag anch_head) {
this.src = src; Bry_rdr rdr = tag_rdr.Rdr();
this.rng_bgn = anch_head.Src_bgn(); // <a
if (!anch_href.Parse(rdr, hctx.App(), hctx.Wiki__ttl_parser(), anch_head)) return Xoh_hdoc_ctx.Invalid; // href='/wiki/File:A.png'
if (!anch_cls.Parse(rdr, src, anch_head)) return Xoh_hdoc_ctx.Invalid; // class='image'
this.anch_title = anch_head.Atrs__get_by_or_empty(Html_atr_.Bry__title); // title='abc'
Html_atr xowa_title = anch_head.Atrs__get_by_or_empty(Bry__atr__xowa_title); // xowa_title='A.png'
public boolean Rel_nofollow_exists() {
if (anch_href.Site_exists()) {
if (rel_nofollow_exists == Bool_.__byte) {
Xow_domain_itm itm = Xow_domain_itm_.parse(Bry_.Mid(src, anch_href.Site_bgn(), anch_href.Site_end()));
rel_nofollow_exists = itm.Domain_type_id() == Xow_domain_tid_.Int__other ? Bool_.Y_byte : Bool_.N_byte;
}
return rel_nofollow_exists == Bool_.Y_byte;
}
else
return false;
} private byte rel_nofollow_exists;
private void Clear() {
this.rel_nofollow_exists = Bool_.__byte;
this.src_bgn = src_end = img_w = img_h = -1;
this.anch_title = this.img_alt = Html_atr.Noop;
}
public boolean Parse(Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, byte[] src, Html_tag_rdr tag_rdr, Html_tag anch_head) {
this.Clear();
this.src = src; Bry_err_wkr err_wkr = tag_rdr.Err_wkr();
this.src_bgn = anch_head.Src_bgn(); // <a
if (!anch_href.Parse(err_wkr, hctx, anch_head)) return false; // href='/wiki/File:A.png'
if (!anch_cls.Parse(err_wkr, src, anch_head)) return false; // class='image'
this.anch_title = anch_head.Atrs__get_by_or_empty(Html_atr_.Bry__title); // title='abc'
Html_atr xowa_title = anch_head.Atrs__get_by_or_empty(Bry__atr__xowa_title); // xowa_title='A.png'
if (xowa_title.Val_dat_exists()) anch_page.Val_(xowa_title.Val());
Html_tag img_tag = tag_rdr.Tag__move_fwd_head().Chk_id(Html_tag_.Id__img); // <img
img_xoimg_parser.Parse(rdr, src, img_tag); // data-xoimg='...'
this.img_w = img_tag.Atrs__get_as_int_or(Html_atr_.Bry__width, Xof_img_size.Size__neg1); // width='220'
this.img_h = img_tag.Atrs__get_as_int_or(Html_atr_.Bry__height, Xof_img_size.Size__neg1); // height='110'
this.img_alt = img_tag.Atrs__get_by_or_empty(Html_atr_.Bry__alt); // alt='File:A.png'
img_cls.Parse(rdr, src, img_tag); // class='thumbborder'
if (!img_src.Parse(rdr, hctx.Wiki__domain_bry(), img_tag)) return Xoh_hdoc_ctx.Invalid; // src='...'
Html_tag img_tag = tag_rdr.Tag__move_fwd_head().Chk_id(Html_tag_.Id__img); // <img
img_xoimg_parser.Parse(err_wkr, src, img_tag); // data-xoimg='...'
this.img_w = img_tag.Atrs__get_as_int_or(Html_atr_.Bry__width, Xof_img_size.Size__neg1); // width='220'
this.img_h = img_tag.Atrs__get_as_int_or(Html_atr_.Bry__height, Xof_img_size.Size__neg1); // height='110'
this.img_alt = img_tag.Atrs__get_by_or_empty(Html_atr_.Bry__alt); // alt='File:A.png'
img_cls.Parse(err_wkr, src, img_tag); // class='thumbborder'
if (!img_src.Parse(err_wkr, hctx.Wiki__domain_bry(), img_tag)) return false; // src='...'
if (anch_page.Val_is_empty()) {
anch_page.Val_(img_src.File_ttl_bry());
if (anch_page.Val_is_empty())
anch_page.Val_(anch_href.Page_ttl().Page_db());
anch_page.Val_(anch_href.Ttl_page_db());
}
Html_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__a); // </a>
this.rng_end = anch_tail.Src_end();
return rng_end;
Html_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__a); // </a>
this.src_end = anch_tail.Src_end();
return true;
}
public static final byte[]
Bry__cls__anch__image = Bry_.new_a7("image")

View File

@@ -53,7 +53,7 @@ public class Xoh_img_wtr extends gplx.core.brys.Bfr_arg_base {
public Xoh_img_wtr Img_id_(byte[] prefix, int uid) {img_id.Set_by_arg(img_id_val.Set(prefix, uid)); return this;}
public Xoh_img_wtr Img_w_(int v) {img_w.Set_by_int(v); return this;}
public Xoh_img_wtr Img_h_(int v) {img_h.Set_by_int(v); return this;}
public Xoh_img_wtr Img_xoimg_(byte[] src, int bgn, int end) {img_xoimg.Set_by_mid(src, bgn, end); return this;}
public Xoh_img_wtr Img_xoimg_(Bfr_arg v) {img_xoimg.Set_by_arg(v); return this;}
public Xoh_img_wtr Img_cls_(Bfr_arg v) {img_cls.Set_by_arg(v); return this;}
public Xoh_img_wtr Img_src_(Bfr_arg v) {img_src.Set_by_arg(v); return this;}
public Xoh_img_wtr Img_src_empty_() {img_src.Set_by_bry(Bry_.Empty); return this;}

View File

@@ -22,7 +22,7 @@ public class Xoh_anch_cls_parser {
private final Bry_rdr rdr = new Bry_rdr();
public byte Tid() {return tid;} private byte tid;
public Html_atr Atr() {return atr;} private Html_atr atr;
public boolean Parse(Bry_rdr owner_rdr, byte[] src, Html_tag tag) {
public boolean Parse(Bry_err_wkr err_wkr, byte[] src, Html_tag tag) {
this.atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__class); // EX: class='image'
int src_bgn = atr.Val_bgn(); int src_end = atr.Val_end();
if (src_bgn == -1) {
@@ -30,7 +30,7 @@ public class Xoh_anch_cls_parser {
return false;
}
else {
rdr.Init_by_sub(owner_rdr, "anch.cls", src_bgn, src_end);
rdr.Init_by_wkr(err_wkr, "anch.cls", src_bgn, src_end);
this.tid = rdr.Chk_or(Xoh_anch_cls_.Trie, Byte_ascii.Max_7_bit);
return tid != Byte_ascii.Max_7_bit;
}

View File

@@ -25,17 +25,17 @@ public class Xoh_img_cls_parser {
public int Other_end() {return other_end;} private int other_end;
public boolean Other_exists() {return other_end > other_bgn;}
public Html_atr Atr() {return atr;} private Html_atr atr;
public void Parse(Bry_rdr owner_rdr, byte[] src, Html_tag tag) {
public void Parse(Bry_err_wkr err_wkr, byte[] src, Html_tag tag) {
this.atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__class); // EX: class='thumbborder'
Parse(owner_rdr, src, atr.Val_bgn(), atr.Val_end());
Parse(err_wkr, src, atr.Val_bgn(), atr.Val_end());
}
private void Parse(Bry_rdr owner_rdr, byte[] src, int src_bgn, int src_end) {
private void Parse(Bry_err_wkr err_wkr, byte[] src, int src_bgn, int src_end) {
if (src_bgn == -1) {
this.cls_tid = Xoh_img_cls_.Tid__none;
this.other_bgn = this.other_end = -1;
return;
}
rdr.Init_by_sub(owner_rdr, "img.cls", src_bgn, src_end);
rdr.Init_by_wkr(err_wkr, "img.cls", src_bgn, src_end);
this.cls_tid = rdr.Chk(Xoh_img_cls_.Trie);
if (rdr.Is(Byte_ascii.Space)) {
this.other_bgn = rdr.Pos();

View File

@@ -20,7 +20,7 @@ import gplx.core.brys.*; import gplx.core.btries.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
import gplx.xowa.wikis.domains.*;
public class Xoh_img_src_parser implements Xoh_itm_parser {
private final Bry_rdr rdr = new Bry_rdr(); private byte[] src;
private final Bry_rdr rdr = new Bry_rdr().Dflt_dlm_(Byte_ascii.Slash); private byte[] src;
public void Fail_throws_err_(boolean v) {rdr.Fail_throws_err_(v);}// TEST
public int Val_bgn() {return val_bgn;} private int val_bgn;
public int Val_end() {return val_end;} private int val_end;
@@ -45,18 +45,18 @@ public class Xoh_img_src_parser implements Xoh_itm_parser {
file_ttl_bry = null;
atr = null;
}
public boolean Parse(Bry_rdr owner_rdr, byte[] domain_bry, Html_tag tag) {
public boolean Parse(Bry_err_wkr err_wkr, byte[] domain_bry, Html_tag tag) {
this.Clear();
this.atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__src);
if (!atr.Val_dat_exists()) return true; // empty src; just return true;
return Parse(owner_rdr, domain_bry, atr.Val_bgn(), atr.Val_end());
return Parse(err_wkr, domain_bry, atr.Val_bgn(), atr.Val_end());
}
public boolean Parse(Bry_rdr owner_rdr, byte[] domain_bry, int val_bgn, int val_end) { // EX: src="file:///C:/xowa/file/commons.wikimedia.org/thumb/7/0/1/2/A.png/220px.png"
public boolean Parse(Bry_err_wkr err_wkr, byte[] domain_bry, int val_bgn, int val_end) { // EX: src="file:///C:/xowa/file/commons.wikimedia.org/thumb/7/0/1/2/A.png/220px.png"
this.Clear();
this.src = owner_rdr.Src();
this.src = err_wkr.Src();
this.val_bgn = val_bgn; this.val_end = val_end;
if (val_end == val_bgn) return true; // empty src; just return true;
file_w = file_time = file_page = -1;
rdr.Init_by_sub(owner_rdr, "img.src.xowa", val_bgn, val_end).Dflt_dlm_(Byte_ascii.Slash);
rdr.Init_by_wkr(err_wkr, "img.src.xowa", val_bgn, val_end);
rdr.Fail_throws_err_(Bool_.N);
repo_bgn = rdr.Find_fwd_rr(Bry__file); // skip past /file/; EX: "file:///J:/xowa/file/commons.wikimedia.org/"
if (repo_bgn == -1) return false;

View File

@@ -36,8 +36,7 @@ public class Xoh_img_src_parser_tst {
// fxt.Test__parse__fail("file:///C:/xowa/file/en.wiktionary.org/orig/7/0/A.png", "repo must be commons or self: repo='en.wiktionary.org' ctx='Main_Page' wkr='img.src.xowa' excerpt='file:///C:/xowa/file/en.wiktionary.org/orig/7/0/A.png'");
// }
}
class Xoh_img_src_parser_fxt extends Xoh_itm_parser_fxt_base {
private final Xoh_img_src_parser parser = new Xoh_img_src_parser();
class Xoh_img_src_parser_fxt extends Xoh_itm_parser_fxt { private final Xoh_img_src_parser parser = new Xoh_img_src_parser();
@Override public Xoh_itm_parser Parser_get() {return parser;}
public void Test__parse(String src_str, String expd_repo, boolean expd_file_is_orig, String expd_file, int expd_w, int expd_time, int expd_page) {
Exec_parse(src_str);
@@ -48,7 +47,7 @@ class Xoh_img_src_parser_fxt extends Xoh_itm_parser_fxt_base {
Tfds.Eq_int(expd_time, parser.File_time());
Tfds.Eq_int(expd_page, parser.File_page());
}
@Override public void Exec_parse_hook(Bry_rdr owner_rdr, int src_bgn, int src_end) {
parser.Parse(owner_rdr, Xow_domain_itm_.Bry__enwiki, src_bgn, src_end);
@Override public void Exec_parse_hook(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, int src_bgn, int src_end) {
parser.Parse(err_wkr, Xow_domain_itm_.Bry__enwiki, src_bgn, src_end);
}
}

View File

@@ -0,0 +1,67 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.imgs.atrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*;
import gplx.core.brys.*;
import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.parsers.lnkis.*; import gplx.xowa.files.*;
public class Xoh_img_xoimg_hzip {
public void Encode(Bry_bfr bfr, Xoh_stat_itm stat_itm, byte[] src, Xoh_img_xoimg_parser arg) {
boolean page_exists = arg.Lnki_page() != Xof_lnki_page.Null;
boolean time_exists = arg.Lnki_time() != Xof_lnki_time.Null;
boolean upright_exists = arg.Lnki_upright() != Xof_img_size.Upright_null;
boolean height_exists = arg.Lnki_h() != Xof_img_size.Size__neg1;
boolean width_exists = arg.Lnki_w() != Xof_img_size.Size__neg1;
flag_bldr.Set(Flag__page_exists , page_exists);
flag_bldr.Set(Flag__time_exists , time_exists);
flag_bldr.Set(Flag__upright_exists , upright_exists);
flag_bldr.Set(Flag__height_exists , height_exists);
flag_bldr.Set(Flag__width_exists , width_exists);
flag_bldr.Set(Flag__lnki_type , arg.Lnki_type());
Xoh_hzip_int_.Encode(1, bfr, flag_bldr.Encode());
if (width_exists) Xoh_hzip_int_.Encode(2, bfr, arg.Lnki_w());
if (height_exists) Xoh_hzip_int_.Encode(2, bfr, arg.Lnki_h());
if (upright_exists) bfr.Add_double(arg.Lnki_upright()).Add_byte(Xoh_hzip_dict_.Escape);
if (time_exists) bfr.Add_double(arg.Lnki_time()).Add_byte(Xoh_hzip_dict_.Escape);
if (page_exists) Xoh_hzip_int_.Encode(2, bfr, arg.Lnki_page());
}
public void Decode(Bry_bfr bfr, Xoh_hdoc_ctx hctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, Xoh_img_xoimg_parser arg) {
int flag = rdr.Read_int_by_base85(1);
flag_bldr.Decode(flag);
boolean page_exists = flag_bldr.Get_as_bool(Flag__page_exists);
boolean time_exists = flag_bldr.Get_as_bool(Flag__time_exists);
boolean upright_exists = flag_bldr.Get_as_bool(Flag__upright_exists);
boolean height_exists = flag_bldr.Get_as_bool(Flag__height_exists);
boolean width_exists = flag_bldr.Get_as_bool(Flag__width_exists);
byte tid = flag_bldr.Get_as_byte(Flag__lnki_type);
int w = width_exists ? rdr.Read_int_by_base85(2) : Xof_img_size.Size__neg1;
int h = height_exists ? rdr.Read_int_by_base85(2) : Xof_img_size.Size__neg1;
double upright = upright_exists ? rdr.Read_double_to(Xoh_hzip_dict_.Escape) : Xof_img_size.Upright_null;
double time = time_exists ? rdr.Read_double_to(Xoh_hzip_dict_.Escape) : Xof_lnki_time.Null;
int page = page_exists ? rdr.Read_int_by_base85(2) : Xof_lnki_page.Null;
arg.Set(tid, w, h, upright, time, page);
}
private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_( 1, 1 , 1, 1, 1, 3);
private static final int // SERIALIZED
Flag__page_exists = 0
, Flag__time_exists = 1
, Flag__upright_exists = 2
, Flag__height_exists = 3
, Flag__width_exists = 4 // none, thumbimage, thumbborder
, Flag__lnki_type = 5 // null, none, frameless, frame, thumb; gplx.xowa.parsers.lnkis.Xop_lnki_type
;
}

View File

@@ -18,8 +18,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.htmls.core.wkrs.imgs.atrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*;
import gplx.core.brys.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
public class Xoh_img_xoimg_parser {
private final Bry_rdr rdr = new Bry_rdr();
public class Xoh_img_xoimg_parser implements Bfr_arg {
private final Bry_rdr rdr = new Bry_rdr().Dflt_dlm_(Byte_ascii.Pipe);
public int Val_bgn() {return val_bgn;} private int val_bgn;
public int Val_end() {return val_end;} private int val_end;
public boolean Val_dat_exists() {return val_end > val_bgn;}
@@ -32,15 +32,23 @@ public class Xoh_img_xoimg_parser {
public void Clear() {
val_bgn = val_end = -1;
}
public void Parse(Bry_rdr owner_rdr, byte[] src, Html_tag tag) {
Html_atr atr = tag.Atrs__get_by_or_empty(Bry__name);
Parse(owner_rdr, src, atr.Val_bgn(), atr.Val_end());
public void Set(byte tid, int w, int h, double upright, double time, int page) {
this.lnki_type = tid;
this.lnki_w = w;
this.lnki_h = h;
this.lnki_upright = upright;
this.lnki_time = time;
this.lnki_page = page;
}
public void Parse(Bry_rdr owner_rdr, byte[] src, int src_bgn, int src_end) {
public void Parse(Bry_err_wkr err_wkr, byte[] src, Html_tag tag) {
Html_atr atr = tag.Atrs__get_by_or_empty(Bry__name);
Parse(err_wkr, src, atr.Val_bgn(), atr.Val_end());
}
public void Parse(Bry_err_wkr err_wkr, byte[] src, int src_bgn, int src_end) {
if (src_bgn == -1)
this.Clear();
else {
rdr.Init_by_sub(owner_rdr, "img.xoimg", src_bgn, src_end).Dflt_dlm_(Byte_ascii.Pipe);
rdr.Init_by_wkr(err_wkr, "img.xoimg", src_bgn, src_end);
this.val_bgn = src_bgn;
this.val_end = src_end;
this.lnki_type = (byte)(rdr.Read_byte_to() - Byte_ascii.Num_0);
@@ -51,6 +59,16 @@ public class Xoh_img_xoimg_parser {
this.lnki_page = rdr.Read_int_to();
}
}
public void Bfr_arg__clear() {}
public boolean Bfr_arg__exists() {return true;}
public void Bfr_arg__add(Bry_bfr bfr) {
bfr.Add_int_variable(lnki_type).Add_byte_pipe();
bfr.Add_int_variable(lnki_w).Add_byte_pipe();
bfr.Add_int_variable(lnki_h).Add_byte_pipe();
bfr.Add_double(lnki_upright).Add_byte_pipe();
bfr.Add_double(lnki_time).Add_byte_pipe();
bfr.Add_int_variable(lnki_page);
}
public static final byte[]
Bry__name = Bry_.new_a7("data-xoimg")
, Bry__html = Bry_.new_a7("\" data-xoimg=\"")

View File

@@ -30,8 +30,8 @@ public class Xoh_lnke_dict_ {
, Html__rhs_end = Bry_.new_a7("\">")
;
public static byte[]
Html__rel__nofollow = Bry_.new_a7("nofollow")
, Html__cls__external = Bry_.new_a7("external")
Html__rel__nofollow = Bry_.new_a7("nofollow")
, Html__cls__external = Bry_.new_a7("external")
;
public static final Hash_adp_bry Hash = Hash_adp_bry.ci_a7()
.Add_bry_byte(Html__class__free, Type__free)

View File

@@ -26,7 +26,7 @@ public class Xoh_lnke_html {
byte lnke_type = Calc_type(lnke);
if (!hctx.Mode_is_alt()) { // do not write "<a ...>" if mode is alt
bfr.Add(Xoh_consts.A_bgn);
if (Write_href(bfr, ctx, src, lnke, href_bgn, href_end, proto_is_xowa))
if (Write_href(bfr, hctx, ctx, src, lnke, href_bgn, href_end, proto_is_xowa))
bfr.Add(Xoh_lnke_dict_.Html__atr__0).Add(Xoh_lnke_dict_.To_html_class(lnke_type));
bfr.Add(Xoh_lnke_dict_.Html__rhs_end);
}
@@ -37,9 +37,9 @@ public class Xoh_lnke_html {
bfr.Add(Xoh_consts.A_end);
}
}
public boolean Write_href(Bry_bfr bfr, Xop_ctx ctx, byte[] src, Xop_lnke_tkn lnke, int href_bgn, int href_end, boolean proto_is_xowa) {
public boolean Write_href(Bry_bfr bfr, Xoh_wtr_ctx hctx, Xop_ctx ctx, byte[] src, Xop_lnke_tkn lnke, int href_bgn, int href_end, boolean proto_is_xowa) {
byte[] lnke_xwiki_wiki = lnke.Lnke_xwiki_wiki();
if (lnke_xwiki_wiki == null) {
if (lnke_xwiki_wiki == null || hctx.Mode_is_hdump()) { // if hdump, never write xwiki format (/site/); always write in url format (https:); note that xwiki is set when wiki is installed locally
if (lnke.Lnke_relative()) { // relative; EX: //a.org
bfr.Add(ctx.Wiki().Utl__url_parser().Url_parser().Relative_url_protocol_bry()).Add_mid(src, href_bgn, href_end);
return true;
@@ -62,7 +62,7 @@ public class Xoh_lnke_html {
.Add(href_encoder.Encode(lnke.Lnke_xwiki_page())); // NOTE: must encode page; EX:%22%3D -> '">' which will end attribute; PAGE:en.w:List_of_Category_A_listed_buildings_in_West_Lothian DATE:2014-07-15
if (lnke.Lnke_xwiki_qargs() != null)
Gfo_qarg_mgr.Concat_bfr(bfr, href_encoder, lnke.Lnke_xwiki_qargs()); // NOTE: must encode args
return ctx.Wiki().App().Xwiki_mgr__missing(lnke_xwiki_wiki);
return ctx.Wiki().App().Xwiki_mgr__missing(lnke_xwiki_wiki); // write "external" if hdump or xwiki is missing
}
}
public void Write_caption(Bry_bfr bfr, Xoh_html_wtr html_wtr, Xoh_wtr_ctx hctx, Xop_ctx ctx, byte[] src, Xop_lnke_tkn lnke, int href_bgn, int href_end, boolean proto_is_xowa) {

View File

@@ -32,4 +32,15 @@ public class Xoh_lnke_html__basic__tst {
fxt.Wiki().Sys_cfg().Xowa_proto_enabled_(false);
fxt.Test_parse_page_wiki_str("[xowa-cmd:\"a\" b]" , "[xowa-cmd:&quot;a&quot; b]"); // protocol is disabled: literalize String (i.e.: don't make it an anchor)
}
@Test public void Xwiki() {
String wtxt = "[//en.wiktionary.org/wiki/A B]";
String html_https = "<a href='https://en.wiktionary.org/wiki/A' rel='nofollow' class='external text'>B</a>";
String html_xwiki = "<a href='/site/en.wiktionary.org/wiki/A'>B</a>";
fxt.Test__parse__wtxt_to_html(wtxt, html_https); // https b/c wiki not installed
fxt.Init_xwiki_add_user_("en.wiktionary.org");
fxt.Test__parse__wtxt_to_html(wtxt, html_xwiki); // xwiki b/c wiki installed
fxt.Hctx_(gplx.xowa.htmls.core.htmls.Xoh_wtr_ctx.Hdump);
fxt.Test__parse__wtxt_to_html(wtxt, html_https); // https b/c hdump, even though wiki installed
fxt.Hctx_(gplx.xowa.htmls.core.htmls.Xoh_wtr_ctx.Basic);
}
}

View File

@@ -20,71 +20,50 @@ import gplx.core.brys.*; import gplx.core.threads.poolables.*; import gplx.xowa.
import gplx.langs.htmls.*; import gplx.xowa.htmls.core.hzips.*;
public class Xoh_lnke_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
public String Key() {return Xoh_hzip_dict_.Key__lnke;}
public Xoh_lnke_hzip Encode(Bry_bfr bfr, Hzip_stat_itm stat_itm, byte[] src, Xoh_lnke_parser arg) {
byte anch_cls_type = arg.Anch_cls_type();
boolean auto_exists = arg.Auto_id() != -1;
boolean text_exists = arg.Capt_end() != -1;
flag_bldr.Set(Flag__auto_exists , auto_exists);
flag_bldr.Set(Flag__text_exists , text_exists);
flag_bldr.Set(Flag__anch_cls , anch_cls_type);
public byte[] Hook() {return hook;} private byte[] hook;
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
Xoh_lnke_parser data = (Xoh_lnke_parser)data_obj;
boolean auto_exists = flag_bldr.Set_as_bool(Flag__auto_exists , data.Auto_id() != -1);
boolean capt_exists = flag_bldr.Set_as_bool(Flag__capt_exists , data.Capt_exists());
byte lnke_tid = flag_bldr.Set_as_byte(Flag__lnke_tid , data.Lnke_tid());
switch (anch_cls_type) {
case Xoh_lnke_dict_.Type__free: stat_itm.Lnke__free__add();break;
case Xoh_lnke_dict_.Type__auto: stat_itm.Lnke__auto__add(); break;
case Xoh_lnke_dict_.Type__text: stat_itm.Lnke__text__add(); break;
}
bfr.Add(hook);
bfr.Add_hzip_int(1, flag_bldr.Encode()); // add flag
bfr.Add_hzip_mid(src, data.Href_bgn(), data.Href_end()); // add href
if (auto_exists) bfr.Add_hzip_int(1, data.Auto_id()); // add autonumber
if (capt_exists) bfr.Add_hzip_mid(src, data.Capt_bgn(), data.Capt_end()); // add caption
bfr.Add(Xoh_hzip_dict_.Bry__lnke); // add hook
Xoh_hzip_int_.Encode(1, bfr, flag_bldr.Encode()); // add flag
bfr.Add_mid(src, arg.Href_bgn(), arg.Href_end()); // add href
bfr.Add_byte(Xoh_hzip_dict_.Escape);
if (auto_exists)
Xoh_hzip_int_.Encode(1, bfr, arg.Auto_id());
else if (text_exists) {
bfr.Add_mid(src, arg.Capt_bgn(), arg.Capt_end()); // add capt
bfr.Add_byte(Xoh_hzip_dict_.Escape);
}
hctx.Hzip__stat().Lnke_add(lnke_tid);
return this;
}
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx ctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
int flag = rdr.Read_int_by_base85(1);
flag_bldr.Decode(flag);
boolean auto_exists = flag_bldr.Get_as_bool(Flag__auto_exists);
boolean text_exists = flag_bldr.Get_as_bool(Flag__text_exists);
byte anch_cls_type = flag_bldr.Get_as_byte(Flag__anch_cls);
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
int flag = rdr.Read_int_by_base85(1); flag_bldr.Decode(flag);
boolean auto_exists = flag_bldr.Get_as_bool(Flag__auto_exists);
boolean capt_exists = flag_bldr.Get_as_bool(Flag__capt_exists);
byte lnke_tid = flag_bldr.Get_as_byte(Flag__lnke_tid);
int href_bgn = rdr.Pos();
int href_end = rdr.Find_fwd_lr();
int href_bgn = rdr.Pos(); int href_end = rdr.Find_fwd_lr();
int auto_id = -1, capt_bgn = -1, capt_end = -1;
if (auto_exists)
auto_id = rdr.Read_int_by_base85(1);
else if (text_exists) {
capt_bgn = rdr.Pos();
capt_end = rdr.Find_fwd_lr();
}
if (auto_exists) auto_id = rdr.Read_int_by_base85(1);
if (capt_exists) {capt_bgn = rdr.Pos(); capt_end = rdr.Find_fwd_lr();}
int rv = rdr.Pos();
bfr.Add(Html_bldr_.Bry__a_lhs_w_href);
bfr.Add_mid(src, href_bgn, href_end);
bfr.Add(Xoh_lnke_dict_.Html__atr__0).Add(Xoh_lnke_dict_.To_html_class(anch_cls_type)).Add(Xoh_lnke_dict_.Html__rhs_end);
if (auto_exists)
bfr.Add_byte(Byte_ascii.Brack_bgn).Add_int_variable(auto_id).Add_byte(Byte_ascii.Brack_end);
else if (text_exists)
bfr.Add_mid(src, capt_bgn, capt_end);
else
bfr.Add_mid(src, href_bgn, href_end);
bfr.Add(Xoh_lnke_dict_.Html__atr__0).Add(Xoh_lnke_dict_.To_html_class(lnke_tid)).Add(Xoh_lnke_dict_.Html__rhs_end);
if (auto_exists) bfr.Add_byte(Byte_ascii.Brack_bgn).Add_int_variable(auto_id).Add_byte(Byte_ascii.Brack_end);
else if (capt_exists) bfr.Add_mid(src, capt_bgn, capt_end);
else bfr.Add_mid(src, href_bgn, href_end);
bfr.Add(Html_bldr_.Bry__a_rhs);
return rv;
}
public int Pool__idx() {return pool_idx;} private int pool_idx;
public void Pool__clear (Object[] args) {}
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_lnke_hzip rv = new Xoh_lnke_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_lnke_hzip rv = new Xoh_lnke_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; rv.hook = (byte[])args[0]; return rv;}
private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_ (1, 1, 2);
private static final int // SERIALIZED
Flag__auto_exists = 0
, Flag__text_exists = 1
, Flag__anch_cls = 2 // "free", "autonumber", "text"
, Flag__capt_exists = 1
, Flag__lnke_tid = 2 // "free", "autonumber", "text"
;
}

View File

@@ -22,18 +22,33 @@ public class Xoh_lnke_hzip_tst {
@Test public void Free() {
fxt.Test__bicode("~#!http://a.org~", Xoh_lnke_html__hdump__tst.Html__free);
}
@Test public void Free__ws_at_end() {
fxt.Test__bicode("~#%https://a.org/. ~https://a.org/.~", "<a href='https://a.org/. ' rel='nofollow' class='external free'>https://a.org/.</a>");
}
@Test public void Auto() {
fxt.Test__bicode("~#*http://a.org~\"", Xoh_lnke_html__hdump__tst.Html__auto);
}
@Test public void Text() {
fxt.Test__bicode("~#'http://a.org~a~", Xoh_lnke_html__hdump__tst.Html__text);
}
@Test public void Text__tidy() { // PURPOSE:handle reparenting of html elements by HTML tidy EX:<font color="red">[http://a.org]</font>; DATE:2015-08-25
@Test public void Auto__tidy() { // PURPOSE:handle reparenting of html elements by HTML tidy EX:<font color="red">[http://a.org]</font>; DATE:2015-08-25
fxt.Test__bicode
( "~#&http://a.org~<font color=\"red\">[123]</font>~"
, "<a href=\"http://a.org\" rel=\"nofollow\" class=\"external autonumber\"><font color=\"red\">[123]</font></a>"
);
}
@Test public void Auto__invalid_number() {
String html = "<a href='http://a.org' rel='nofollow' class='external autonumber'>[abc]</a>";
fxt.Test__bicode("~#&http://a.org~[abc]~", html);
}
@Test public void Text() {
fxt.Test__bicode("~#'http://a.org~a~", Xoh_lnke_html__hdump__tst.Html__text);
}
// @Test public void Xwiki__exists() {
// String hzip = "~#'https://en.wiktionary.org/wiki/A~A~";
// String html_https = "<a href='https://en.wiktionary.org/wiki/A' rel='nofollow' class='external text'>A</a>";
// String html_xwiki = "<a href='/site/en.wiktionary.org/wiki/A'>A</a>";
// fxt.Test__bicode(hzip, html_https);
// fxt.Init_wiki_installed("en.wiktionary.org");
// fxt.Test__decode(hzip, html_xwiki);
// }
@Test public void Fail__href() {
String html = "<a rel='nofollow' class='external autonumber'>a</a>";
fxt.Test__encode__fail(html, html);
@@ -42,8 +57,4 @@ public class Xoh_lnke_hzip_tst {
String html = "<a href='http://a.org' rel='nofollow' class='external invalid'>a</a>";
fxt.Test__encode__fail(html, html);
}
@Test public void Fail__auto() {
String html = "<a href='http://a.org' rel='nofollow' class='external autonumber'>[abc]</a>";
fxt.Test__encode__fail(html, html);
}
}

View File

@@ -20,40 +20,53 @@ import gplx.core.brys.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*; import gplx.xowa.htmls.hrefs.*;
public class Xoh_lnke_parser {
private final Bry_rdr rdr = new Bry_rdr();
public int Rng_bgn() {return rng_bgn;} private int rng_bgn;
public int Rng_end() {return rng_end;} private int rng_end;
public byte Anch_cls_type() {return anch_cls_type;} private byte anch_cls_type;
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public byte Lnke_tid() {return lnke_tid;} private byte lnke_tid;
public int Auto_id() {return auto_id;} private int auto_id;
public int Href_bgn() {return href_bgn;} private int href_bgn;
public int Href_end() {return href_end;} private int href_end;
public int Capt_bgn() {return capt_bgn;} private int capt_bgn;
public int Capt_end() {return capt_end;} private int capt_end;
public boolean Capt_exists() {return capt_exists;} private boolean capt_exists;
private void Clear() {
anch_cls_type = Byte_ascii.Max_7_bit;
auto_id = rng_bgn = rng_end = href_bgn = href_end = capt_bgn = capt_end = -1;
lnke_tid = Byte_ascii.Max_7_bit;
capt_exists = false;
src_bgn = src_end = href_bgn = href_end = capt_bgn = capt_end = auto_id = -1;
}
public int Parse(Xoh_hdoc_wkr hdoc_wkr, Html_tag_rdr tag_rdr, Html_tag anch_head) {
public boolean Parse(Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Html_tag_rdr tag_rdr, byte[] src, Html_tag anch_head) {
this.Clear();
this.rng_bgn = anch_head.Src_bgn();
rdr.Init_by_hook("lnke", rng_bgn, rng_bgn);
this.src_bgn = anch_head.Src_bgn();
rdr.Init_by_sect("lnke", src_bgn, src_bgn);
Html_atr href_atr = anch_head.Atrs__get_by_or_fail(Html_atr_.Bry__href); // get href; "EX: href='http://a.org'"
this.href_bgn = href_atr.Val_bgn(); this.href_end = href_atr.Val_end();
this.anch_cls_type = anch_head.Atrs__cls_find_or_fail(Xoh_lnke_dict_.Hash); // get type by class; EX: "class='external free'"
boolean capt_exists = false;
switch (anch_cls_type) {
case Xoh_lnke_dict_.Type__text: capt_exists = true; break;
this.lnke_tid = anch_head.Atrs__cls_find_or_fail(Xoh_lnke_dict_.Hash); // get type by class; EX: "class='external free'"
this.capt_bgn = anch_head.Src_end();
Html_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__a); // find '</a>'
this.capt_end = anch_tail.Src_bgn();
switch (lnke_tid) {
case Xoh_lnke_dict_.Type__free:
if (!Bry_.Match(src, href_bgn, href_end, src, capt_bgn, capt_end)) // EX: <a href='https://a.org/. ' rel='nofollow' class='external free'>https://a.org/.</a>
capt_exists = true;
break;
case Xoh_lnke_dict_.Type__text:
capt_exists = true;
break;
case Xoh_lnke_dict_.Type__auto:
if (tag_rdr.Read_and_move(Byte_ascii.Brack_bgn)) // HTML tidy can reparent lnkes in strange ways; DATE:2015-08-25
this.auto_id = tag_rdr.Read_int_to(Byte_ascii.Brack_end); // extract int; EX: "<a ...>[123]</a>"
if ( src[capt_bgn] == Byte_ascii.Brack_bgn // is capt surround by bracks; EX: "[123]"
&& src[capt_end - 1] == Byte_ascii.Brack_end) {
int tmp_id = Bry_.To_int_or(src, capt_bgn + 1, capt_end - 1, -1); // extract int; EX: "<a ...>[123]</a>"
if (tmp_id == -1) // HTML tidy can reparent lnkes in strange ways; EX: "<a ...><b>[123]</b></a>" DATE:2015-08-25
capt_exists = true;
else
auto_id = tmp_id;
}
else
capt_exists = true;
break;
}
if (capt_exists) this.capt_bgn = anch_head.Src_end();
Html_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__a); // find '</a>'
if (capt_exists) this.capt_end = anch_tail.Src_bgn();
this.rng_end = anch_tail.Src_end();
this.src_end = anch_tail.Src_end();
hdoc_wkr.On_lnke(this);
return rng_end;
return true;
}
}

View File

@@ -21,7 +21,6 @@ import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
import gplx.xowa.htmls.core.wkrs.bfr_args.*;
public class Xoh_lnke_wtr extends gplx.core.brys.Bfr_arg_base {
private final Bfr_arg[] arg_ary;
private final Bfr_arg__indent indent = new Bfr_arg__indent();
private final Bfr_arg__html_atr
anch_href = new Bfr_arg__html_atr(Html_atr_.Bry__href)
, anch_rel = new Bfr_arg__html_atr(Html_atr_.Bry__rel)
@@ -29,9 +28,8 @@ public class Xoh_lnke_wtr extends gplx.core.brys.Bfr_arg_base {
;
private final Bfr_arg__wrapper anch_capt = new Bfr_arg__wrapper();
public Xoh_lnke_wtr() {
arg_ary = new Bfr_arg[] {indent, anch_href, anch_rel, anch_cls, anch_capt};
arg_ary = new Bfr_arg[] {anch_href, anch_rel, anch_cls, anch_capt};
}
public Xoh_lnke_wtr Indent_(int v) {indent.Set(v); return this;}
public Xoh_lnke_wtr Anch_href_(byte[] src, int bgn, int end) {anch_href.Set_by_mid(src, bgn, end); return this;}
public Xoh_lnke_wtr Anch_rel_y_() {anch_rel.Set_by_bry(Xoh_lnke_dict_.Html__rel__nofollow); return this;}
public Xoh_lnke_wtr Anch_cls_(byte[]... ary) {anch_cls.Set_by_ary(ary); return this;}
@@ -45,6 +43,6 @@ public class Xoh_lnke_wtr extends gplx.core.brys.Bfr_arg_base {
fmtr.Bld_bfr_many(bfr, (Object[])arg_ary);
}
private static final Bry_fmtr fmtr = Bry_fmtr.new_
( "~{indent}<a~{anch_href}~{anch_rel}~{anch_cls}>~{anch_capt}</a>"
, "indent", "anch_href", "anch_rel", "anch_cls", "anch_capt");
( "<a~{anch_href}~{anch_rel}~{anch_cls}>~{anch_capt}</a>"
, "anch_href", "anch_rel", "anch_cls", "anch_capt");
}

View File

@@ -19,7 +19,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
// using gplx.core.btries;
// using gplx.xowa.htmls.core.hzips;
// public class Xoh_hzip_href {
// public void Save(Bry_bfr bfr, Hzip_stat_itm stats, byte[] src, int src_len, int bgn, int pos, byte bgn_quote) {
// public void Save(Bry_bfr bfr, Xoh_stat_itm stats, byte[] src, int src_len, int bgn, int pos, byte bgn_quote) {
//// // ignore anchors; EX: "#a"
//// int proto_bgn = pos;
//// int proto_end = Bry_find_.Find_fwd(src, Byte_ascii.Colon, proto_bgn, src_len);

View File

@@ -19,6 +19,7 @@ package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; impo
import gplx.core.brys.*;
import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*;
public class Xoh_lnki_dict_ {
public static void Ns_encode(Xoh_hzip_bfr bfr, int ns_id) {bfr.Add_hzip_int(1, ns_id + 2);}
public static void Ns_encode(Bry_bfr bfr, int ns_id) {
gplx.xowa.htmls.core.hzips.Xoh_hzip_int_.Encode(1, bfr, ns_id + 2);
}

View File

@@ -19,18 +19,13 @@ package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; impo
import org.junit.*; import gplx.xowa.htmls.core.makes.tests.*;
public class Xoh_lnki_html__hdump__tst {
private final Xoh_make_fxt fxt = new Xoh_make_fxt();
public static final String
Html__same = "<a href='/wiki/A' id='xolnki_2' title='A'>A</a>"
, Html__diff = "<a href='/wiki/A' id='xolnki_2' title='A'>b</a>"
, Html__trail = "<a href='/wiki/A' id='xolnki_2' title='A'>Ab</a>"
, Html__xwiki = "<a href='/site/en.wiktionary.org/wiki/a' id='xolnki_2' title='a'>wikt:a</a>"
;
@Before public void init() {fxt.Clear();}
@Test public void Same() {fxt.Test__html("[[A]]" , Html__same);}
@Test public void Diff() {fxt.Test__html("[[A|b]]" , Html__diff);}
@Test public void Trail() {fxt.Test__html("[[A]]b" , Html__trail);}
@Test public void Same() {fxt.Test__html("[[A]]" , "<a href='/wiki/A' title='A'>A</a>");}
@Test public void Diff() {fxt.Test__html("[[A|b]]" , "<a href='/wiki/A' title='A'>b</a>");}
@Test public void Trail() {fxt.Test__html("[[A]]b" , "<a href='/wiki/A' title='A'>Ab</a>");}
@Test public void Xwiki() {
fxt.Parser_fxt().Init_xwiki_add_wiki_and_user_("wikt", "en.wiktionary.org");
fxt.Test__html("[[wikt:a]]", Html__xwiki);
fxt.Test__html("[[wikt:a]]", "<a href='https://en.wiktionary.org/wiki/a' title='a'>wikt:a</a>");
}
@Test public void Anch() {fxt.Test__html("[[#a]]" , "<a href='#a'>#a</a>");}
}

View File

@@ -17,153 +17,186 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.btries.*; import gplx.core.encoders.*; import gplx.core.threads.poolables.*;
import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*;
import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*; import gplx.langs.htmls.encoders.*;
import gplx.langs.htmls.*; import gplx.xowa.htmls.hrefs.*; import gplx.xowa.wikis.ttls.*;
import gplx.xowa.wikis.nss.*; import gplx.xowa.parsers.lnkis.*;
public class Xoh_lnki_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
private final Bry_bfr tmp_bfr = Bry_bfr.new_(32);
public String Key() {return Xoh_hzip_dict_.Key__lnki;}
public Xoh_lnki_hzip Encode(Bry_bfr bfr, Xoh_hdoc_ctx hctx, Hzip_stat_itm stat_itm, byte[] src, Xoh_lnki_parser arg) {
byte text_type = arg.Text_type();
Xoh_anch_href_parser anch_href_parser = arg.Href_parser();
int page_ns_id = anch_href_parser.Page_ns_id();
boolean page_ns_id_is_not_main = page_ns_id != Xow_ns_.Tid__main;
int href_type = anch_href_parser.Tid();
flag_bldr.Set(Flag__ns_is_not_main , page_ns_id_is_not_main);
flag_bldr.Set(Flag__href_type , href_type);
flag_bldr.Set(Flag__text_type , text_type);
public byte[] Hook() {return hook;} private byte[] hook;
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
Xoh_lnki_parser data = (Xoh_lnki_parser)data_obj;
Xoh_anch_href_itm href = data.Href_itm();
int ns_id = href.Ttl_ns_id(); ;
flag_bldr.Set_as_bool(Flag__title_missing_ns , data.Title_missing_ns());
flag_bldr.Set_as_bool(Flag__ttl_is_main_page , href.Ttl_is_main_page());
boolean ns_custom_exists= flag_bldr.Set_as_bool(Flag__ns_custom_exists , href.Ttl_ns_custom() != null);
int title_tid = flag_bldr.Set_as_int(Flag__title_tid , href.Tid() == Xoh_anch_href_itm.Tid__anch ? Xoh_lnki_parser.Title__href : data.Title_tid()); // anchs never have title, so don't bother setting flag;
flag_bldr.Set_as_bool(Flag__capt_has_ns , data.Capt_has_ns());
boolean ns_is_not_main = flag_bldr.Set_as_bool(Flag__ns_is_not_main , ns_id != Xow_ns_.Tid__main);
int href_type = flag_bldr.Set_as_int(Flag__href_type , href.Tid());
flag_bldr.Set_as_int(Flag__capt_cs0_tid , data.Capt_itm().Cs0_tid());
byte text_type = flag_bldr.Set_as_byte(Flag__text_type , data.Text_tid());
// Tfds.Dbg(flag_bldr.Encode(), Array_.To_str(flag_bldr.Val_ary()), text_type);
bfr.Add(Xoh_hzip_dict_.Bry__lnki);
Xoh_hzip_int_.Encode(1, bfr, flag_bldr.Encode());
if (page_ns_id_is_not_main)
Xoh_lnki_dict_.Ns_encode(bfr, page_ns_id);
if (href_type == Xoh_anch_href_parser.Tid__site)
bfr.Add_mid(src, anch_href_parser.Site_bgn(), anch_href_parser.Site_end()).Add_byte(Xoh_hzip_dict_.Escape);
int bfr_bgn = bfr.Len();
int flag = flag_bldr.Encode();
bfr.Add(hook);
bfr.Add_hzip_int(1, flag);
if (href_type == Xoh_anch_href_itm.Tid__site) bfr.Add_hzip_mid(src, href.Site_bgn(), href.Site_end());
if (ns_is_not_main) Xoh_lnki_dict_.Ns_encode(bfr, ns_id);
if (ns_custom_exists) bfr.Add_hzip_bry(href.Ttl_ns_custom());
switch (text_type) {
case Xoh_anch_capt_parser.Tid__href:
case Xoh_anch_capt_parser.Tid__href_pipe:
stat_itm.Lnki_text_n_add();
bfr.Add_mid(arg.Href_bry(), arg.Href_bgn(), arg.Href_end());
bfr.Add_byte(Xoh_hzip_dict_.Escape);
case Xoh_anch_capt_itm.Tid__same:
bfr.Add_hzip_mid(data.Href_src(), data.Href_bgn(), data.Href_end());
break;
case Xoh_anch_capt_parser.Tid__capt:
case Xoh_anch_capt_parser.Tid__href_trail:
case Xoh_anch_capt_parser.Tid__capt_short:
stat_itm.Lnki_text_y_add();
bfr.Add_mid(arg.Href_bry(), arg.Href_bgn(), arg.Href_end());
bfr.Add_byte(Xoh_hzip_dict_.Escape);
bfr.Add_mid(arg.Capt_bry(), arg.Capt_bgn(), arg.Capt_end());
bfr.Add_byte(Xoh_hzip_dict_.Escape);
case Xoh_anch_capt_itm.Tid__diff:
case Xoh_anch_capt_itm.Tid__more:
case Xoh_anch_capt_itm.Tid__less:
bfr.Add_hzip_mid(data.Text_0_src(), data.Text_0_bgn(), data.Text_0_end());
bfr.Add_hzip_mid(data.Text_1_src(), data.Text_1_bgn(), data.Text_1_end());
break;
}
if (title_tid == Xoh_lnki_parser.Title__diff) bfr.Add_hzip_mid(src, data.Title_bgn(), data.Title_end());
hctx.Hzip__stat().Lnki_add(data.Src_end() - data.Src_bgn(), bfr.Len() - bfr_bgn, flag);
return this;
}
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx hctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
int flag = rdr.Read_int_by_base85(1);
flag_bldr.Decode(flag);
boolean page_ns_id_is_not_main = flag_bldr.Get_as_bool(Flag__ns_is_not_main);
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
int flag = rdr.Read_int_by_base85(1); flag_bldr.Decode(flag);
boolean title_missing_ns = flag_bldr.Get_as_bool(Flag__title_missing_ns);
boolean ttl_is_main_page = flag_bldr.Get_as_bool(Flag__ttl_is_main_page);
boolean ns_custom_exists = flag_bldr.Get_as_bool(Flag__ns_custom_exists);
byte title_tid = flag_bldr.Get_as_byte(Flag__title_tid);
boolean capt_has_ns = flag_bldr.Get_as_bool(Flag__capt_has_ns);
boolean ns_is_not_main = flag_bldr.Get_as_bool(Flag__ns_is_not_main);
byte href_type = flag_bldr.Get_as_byte(Flag__href_type);
int capt_cs0_tid = flag_bldr.Get_as_int(Flag__capt_cs0_tid);
byte text_type = flag_bldr.Get_as_byte(Flag__text_type);
int ns_id = page_ns_id_is_not_main ? Xoh_lnki_dict_.Ns_decode(rdr) : Xow_ns_.Tid__main;
int site_bgn = -1, site_end = -1;
if (href_type == Xoh_anch_href_parser.Tid__site) {
site_bgn = rdr.Pos();
site_end = rdr.Find_fwd_lr();
}
int href_bgn = rdr.Pos();
int href_end = rdr.Find_fwd_lr();
int capt_bgn = -1, capt_end = -1;
int site_bgn = -1, site_end = -1; if (href_type == Xoh_anch_href_itm.Tid__site) {site_bgn = rdr.Pos(); site_end = rdr.Find_fwd_lr();}
int ns_id = ns_is_not_main ? Xoh_lnki_dict_.Ns_decode(rdr) : Xow_ns_.Tid__main;
byte[] ns_custom_bry = ns_custom_exists ? rdr.Read_bry_to() : null;
int text_0_bgn = rdr.Pos(); int text_0_end = rdr.Find_fwd_lr();
int text_1_bgn = -1, text_1_end = -1;
switch (text_type) {
case Xoh_anch_capt_parser.Tid__capt:
case Xoh_anch_capt_parser.Tid__capt_short:
case Xoh_anch_capt_parser.Tid__href_trail:
capt_bgn = rdr.Pos();
capt_end = rdr.Find_fwd_lr();
case Xoh_anch_capt_itm.Tid__diff: case Xoh_anch_capt_itm.Tid__less: case Xoh_anch_capt_itm.Tid__more:
text_1_bgn = rdr.Pos(); text_1_end = rdr.Find_fwd_lr();
break;
}
byte[] href_bry = null;
if (text_type == Xoh_anch_capt_parser.Tid__capt_short)
href_bry = Bry_.Add(Bry_.Mid(src, href_bgn, href_end), Bry_.Mid(src, capt_bgn, capt_end));
else
href_bry = Bry_.Mid(src, href_bgn, href_end);
byte[] title_bry = null;
Xoa_ttl ttl = null;
if (href_type != Xoh_anch_href_parser.Tid__anch) {
switch (href_type) {
case Xoh_anch_href_parser.Tid__site:
Xow_ttl_parser ttl_parser = hctx.App().Wiki_mgri().Get_by_key_or_make_init_n(Bry_.Mid(src, site_bgn, site_end));
ttl = ttl_parser.Ttl_parse(ns_id, href_bry);
href_bry = gplx.langs.htmls.encoders.Gfo_url_encoder_.Href_qarg.Encode(ttl.Full_db());
title_bry = ttl.Full_txt();
break;
case Xoh_anch_href_parser.Tid__wiki:
ttl = hctx.Wiki__ttl_parser().Ttl_parse(ns_id, href_bry); if (ttl == null) rdr.Fail("invalid ttl", String_.Empty, String_.new_u8(href_bry));
href_bry = ttl.Full_db_w_anch();
href_bry = gplx.langs.htmls.encoders.Gfo_url_encoder_.Href.Encode(href_bry); // encode for href; EX: "/wiki/A's" -> "/wiki/A&27s"
title_bry = ttl.Full_txt();
break;
case Xoh_anch_href_parser.Tid__inet:
title_bry = href_bry = gplx.langs.htmls.encoders.Gfo_url_encoder_.Href_qarg.Encode(href_bry);
break;
byte[] title_bry = title_tid == Xoh_lnki_parser.Title__diff ? rdr.Read_bry_to() : null;
byte[] href_bry = text_type == Xoh_anch_capt_itm.Tid__less
? tmp_bfr.Add_mid(src, text_0_bgn, text_0_end).Add_mid(src, text_1_bgn, text_1_end).To_bry_and_clear()
: Bry_.Mid(src, text_0_bgn, text_0_end);
byte[] ns_bry = null;
switch (href_type) {
case Xoh_anch_href_itm.Tid__anch: break;
case Xoh_anch_href_itm.Tid__inet: break; //href_bry = Gfo_url_encoder_.Href_qarg.Encode(href_bry); break;
case Xoh_anch_href_itm.Tid__wiki:
case Xoh_anch_href_itm.Tid__site:
if (ns_custom_exists) {
ns_bry = ns_custom_bry;
tmp_bfr.Add(Xoa_ttl.Replace_spaces(ns_bry)).Add_byte_colon(); // NOTE: Replace_space to handle ns_custom_bry like "Image talk"
}
else {
if (ns_id == Xow_ns_.Tid__main) {
if (ttl_is_main_page)
href_bry = Bry_.Empty;
}
else {
Xow_ns ns = hctx.Wiki__ttl_parser().Ns_mgr().Ids_get_or_null(ns_id); if (ns == null) rdr.Err_wkr().Fail("invalid ns_id", "ns_id", ns_id);
ns_bry = ns.Name_ui();
tmp_bfr.Add(ns.Name_db()).Add_byte_colon();
}
}
Gfo_url_encoder encoder = href_type == Xoh_anch_href_itm.Tid__wiki ? Gfo_url_encoder_.Href : Gfo_url_encoder_.Href_qarg;
encoder.Encode(tmp_bfr, href_bry); // encode for href; EX: "/wiki/A's" -> "/wiki/A&27s"
href_bry = tmp_bfr.To_bry_and_clear();
break;
}
byte[] capt_bry = Xoh_lnki_hzip_.Bld_capt(tmp_bfr, href_type, text_type, capt_has_ns, capt_cs0_tid, ns_bry, src, text_0_bgn, text_0_end, src, text_1_bgn, text_1_end);
if (href_type != Xoh_anch_href_itm.Tid__anch) {
switch (title_tid) {
case Xoh_lnki_parser.Title__missing: title_bry = null; break;
case Xoh_lnki_parser.Title__diff: break;
case Xoh_lnki_parser.Title__href: title_bry = Gfo_url_encoder_.Href.Decode(href_bry); break;
case Xoh_lnki_parser.Title__capt: title_bry = !capt_has_ns && !title_missing_ns && ns_bry != null ? Bry_.Add(ns_bry, Byte_ascii.Colon_bry, capt_bry) : capt_bry; break;
}
}
// gen html
bfr.Add(Html_bldr_.Bry__a_lhs_w_href);
switch (href_type) {
case Xoh_anch_href_parser.Tid__anch:
case Xoh_anch_href_itm.Tid__anch:
bfr.Add_byte(Byte_ascii.Hash); // "#"
break;
case Xoh_anch_href_parser.Tid__site:
case Xoh_anch_href_itm.Tid__site:
bfr.Add(Xoh_href_.Bry__site).Add_mid(src, site_bgn, site_end);
bfr.Add(Xoh_href_.Bry__wiki);
break;
case Xoh_anch_href_parser.Tid__wiki:
case Xoh_anch_href_itm.Tid__wiki:
bfr.Add(Xoh_href_.Bry__wiki);
break;
}
bfr.Add(href_bry);
bfr.Add(Html_bldr_.Bry__id__nth).Add_str_a7(gplx.xowa.parsers.lnkis.redlinks.Xopg_redlink_lnki_list.Lnki_id_prefix).Add_int_variable(hctx.Lnki__uid__nxt());
if (href_type != Xoh_anch_href_parser.Tid__anch) {
bfr.Add(Html_bldr_.Bry__title__nth);
Html_utl.Escape_html_to_bfr(bfr, title_bry, 0, title_bry.length, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.N);
if (!hctx.Mode_is_diff())
bfr.Add(Html_bldr_.Bry__id__nth).Add_str_a7(gplx.xowa.parsers.lnkis.redlinks.Xopg_redlink_lnki_list.Lnki_id_prefix).Add_int_variable(hctx.Lnki__uid__nxt());
if ( href_type != Xoh_anch_href_itm.Tid__anch) { // anchs never have title;
if (title_bry != null) {
bfr.Add(Html_bldr_.Bry__title__nth);
Html_utl.Escape_html_to_bfr(bfr, title_bry, 0, title_bry.length, Bool_.N, Bool_.N, Bool_.N, Bool_.Y, Bool_.N);
}
}
bfr.Add(Html_bldr_.Bry__lhs_end_head_w_quote);
if ( href_type == Xoh_anch_href_parser.Tid__anch
&& text_type != Xoh_anch_capt_parser.Tid__capt )
bfr.Add_byte(Byte_ascii.Hash);
switch (text_type) {
case Xoh_anch_capt_parser.Tid__href:
if (ns_id == Xow_ns_.Tid__main)
bfr.Add_mid(src, href_bgn, href_end);
else
bfr.Add(ttl.Full_txt());
break;
case Xoh_anch_capt_parser.Tid__href_pipe:
bfr.Add_mid(src, href_bgn, href_end);
break;
case Xoh_anch_capt_parser.Tid__capt:
bfr.Add_mid(src, capt_bgn, capt_end);
break;
case Xoh_anch_capt_parser.Tid__href_trail:
bfr.Add_mid(src, href_bgn, href_end);
bfr.Add_mid(src, capt_bgn, capt_end);
break;
case Xoh_anch_capt_parser.Tid__capt_short:
bfr.Add_mid(src, href_bgn, href_end);
break;
}
bfr.Add(capt_bry);
bfr.Add(Html_bldr_.Bry__a_rhs);
return rdr.Pos();
}
public int Pool__idx() {return pool_idx;} private int pool_idx;
public void Pool__clear (Object[] args) {}
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_lnki_hzip rv = new Xoh_lnki_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_ (1, 2, 3);
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_lnki_hzip rv = new Xoh_lnki_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; rv.hook = (byte[])args[0]; return rv;}
private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_ (1 , 1, 1, 2, 1 , 1, 2, 2, 2);
private static final int // SERIALIZED
Flag__ns_is_not_main = 0
, Flag__href_type = 1 // "wiki", "site", "anch", "inet"
, Flag__text_type = 2 // "href", "capt", "href_trail", "capt_short", "href_pipe"
Flag__title_missing_ns = 0 // [[c:]] -> "/site/commons.wikimedia.org/wiki/"
, Flag__ttl_is_main_page = 1 // [[c:]] -> "/site/commons.wikimedia.org/wiki/"
, Flag__ns_custom_exists = 2 // [[c:category:a]] -> "/site/commons.wikimedia.org/wiki/category:a"
, Flag__title_tid = 3 // href, capt, diff, empty; [//en.wikipedia.org] where en.w is local
, Flag__capt_has_ns = 4 // "A" vs "Help:A"
, Flag__ns_is_not_main = 5
, Flag__href_type = 6 // "wiki", "site", "anch", "inet"
, Flag__capt_cs0_tid = 7 // exact, lower, upper
, Flag__text_type = 8 // "same", "diff", "more", "less"
;
}
class Xoh_lnki_hzip_ {
public static byte[] Bld_capt(Bry_bfr tmp_bfr, byte href_type, byte text_type, boolean capt_has_ns, int capt_cs0, byte[] ns_bry, byte[] text_0_src, int text_0_bgn, int text_0_end, byte[] capt_src, int text_1_bgn, int text_1_end) {
if ( href_type == Xoh_anch_href_itm.Tid__anch
&& text_type != Xoh_anch_capt_itm.Tid__diff )
tmp_bfr.Add_byte(Byte_ascii.Hash);
if (capt_has_ns && ns_bry != null)
tmp_bfr.Add(ns_bry).Add_byte_colon();
switch (text_type) {
case Xoh_anch_capt_itm.Tid__diff: break;
default:
switch (capt_cs0) {
case Xoh_anch_capt_itm.Cs0__exact: break;
case Xoh_anch_capt_itm.Cs0__lower: tmp_bfr.Add_byte(Byte_ascii.Case_lower(text_0_src[text_0_bgn++]));break;
case Xoh_anch_capt_itm.Cs0__upper: tmp_bfr.Add_byte(Byte_ascii.Case_upper(text_0_src[text_0_bgn++]));break;
}
break;
}
switch (text_type) {
case Xoh_anch_capt_itm.Tid__same:
case Xoh_anch_capt_itm.Tid__less:
tmp_bfr.Add_mid(text_0_src, text_0_bgn, text_0_end);
break;
case Xoh_anch_capt_itm.Tid__diff:
tmp_bfr.Add_mid(capt_src, text_1_bgn, text_1_end);
break;
case Xoh_anch_capt_itm.Tid__more:
tmp_bfr.Add_mid(text_0_src, text_0_bgn, text_0_end);
tmp_bfr.Add_mid(capt_src, text_1_bgn, text_1_end);
break;
}
return tmp_bfr.To_bry_and_clear();
}
}

View File

@@ -0,0 +1,40 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import org.junit.*;
public class Xoh_lnki_hzip__anch__tst {
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt().Init_mode_diff_y_();
@Test public void Basic() { // EX: [[#a]]
fxt.Test__bicode("~$Ba~#a~", "<a href='#a'>#a</a>");
}
@Test public void Capt() { // EX: [[#a|b]]
fxt.Test__bicode("~$Ba~b~", "<a href='#a'>b</a>");
}
@Test public void Capt_similar() { // EX: [[#a|a]]
fxt.Test__bicode("~$Ba~a~", "<a href='#a'>a</a>");
}
@Test public void Error() { // EX: [[#a|b]]; make sure bad title character does not cause error
fxt.Test__bicode("~$Ba|b~#a|b~", "<a href='#a|b'>#a|b</a>"); // NOTE: the "|" should be url-encoded
}
@Test public void Inet__file() {
fxt.Test__bicode("~$Rfile:///C://A.png~b~", "<a href='file:///C://A.png' title='file:///C://A.png'>b</a>");
}
@Test public void Inet__enc() {
fxt.Test__bicode("~${'Thttps://simple.wikisource.org/wiki/A%C3%A6e~b~Aæe~", "<a href='https://simple.wikisource.org/wiki/A%C3%A6e' title='Aæe'>b</a>");
}
}

View File

@@ -0,0 +1,37 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import org.junit.*;
public class Xoh_lnki_hzip__diff__tst {
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt().Init_mode_diff_y_();
@Test public void Diff__basic() { // EX: [[A|b]]
fxt.Test__bicode("~$\"A~b~" , "<a href='/wiki/A' title='A'>b</a>");
}
@Test public void Diff__cs__lo() { // EX: [[A|a]]
fxt.Test__bicode("~$%A~" , "<a href='/wiki/A' title='A'>a</a>");
}
@Test public void Diff__page_w_anch() { // EX: [[A#b|c]]
fxt.Test__bicode("~${'$A#b~b~A~", "<a href='/wiki/A#b' title='A'>b</a>");
}
@Test public void Capt__nest() { // EX: [[A|B[[C|C1]]D]]
fxt.Test__bicode
( "~$\"A~B<a href=\"/wiki/C\" title=\"C\">C1</a>D~"
, "<a href='/wiki/A' title='A'>B<a href='/wiki/C' title='C'>C1</a>D</a>"
);
}
}

View File

@@ -0,0 +1,70 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import org.junit.*;
public class Xoh_lnki_hzip__ns__tst {
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt().Init_mode_diff_y_();
@Test public void Ns__same() { // EX: [[Help:A]]
fxt.Test__bicode("~${#7/A~", "<a href='/wiki/Help:A' title='Help:A'>Help:A</a>");
}
@Test public void Ns__diff() { // EX: [[Help:A_b|c]]
fxt.Test__bicode("~$b/A b~c~", "<a href='/wiki/Help:A_b' title='Help:A b'>c</a>");
}
@Test public void Ns__more() { // EX: [[Help:A|a b]]
fxt.Test__bicode("~$g/A~ b~", "<a href='/wiki/Help:A' title='Help:A'>a b</a>");
}
@Test public void Ns__less() { // EX: [[Help:A_b|a]]
fxt.Test__bicode("~$h/A~ b~", "<a href='/wiki/Help:A_b' title='Help:A b'>a</a>");
}
@Test public void Ns__talk() { // EX: [[Help talk:A b]]
fxt.Test__bicode("~${#70A b~", "<a href='/wiki/Help_talk:A_b' title='Help talk:A b'>Help talk:A b</a>");
}
@Test public void Ns__talk__diff() { // EX: [[Help talk:A b|cde]]
fxt.Test__bicode("~$b0A b~cde~", "<a href='/wiki/Help_talk:A_b' title='Help talk:A b'>cde</a>");
}
@Test public void Ns__under() { // EX: [[Help_talk:A_b]]; rare; just make sure codec can handle it;
fxt.Test__bicode("~$b0A b~Help_talk:A_b~", "<a href='/wiki/Help_talk:A_b' title='Help talk:A b'>Help_talk:A_b</a>");
}
@Test public void Ns__pipe() { // EX: [[Help:A|]]
fxt.Test__bicode("~$a/A~", "<a href='/wiki/Help:A' title='Help:A'>A</a>");
}
@Test public void Ns__pipe_w_words() { // EX: [[Help:A b|]]
fxt.Test__bicode("~$a/A b~", "<a href='/wiki/Help:A_b' title='Help:A b'>A b</a>");
}
@Test public void Ns__anch() { // EX: [[Help:A_b#c|a]]
fxt.Test__bicode("~${'j/A~ b#c~Help:A b~", "<a href='/wiki/Help:A_b#c' title='Help:A b'>a</a>");
}
@Test public void Ns__anch__alias() { // EX: [[Help:A_b#c|a]]
fxt.Test__bicode("~${3h)Image~A.png#b~c~Image:A.png~", "<a href='/wiki/Image:A.png#b' title='Image:A.png'>c</a>");
}
@Test public void Fake__ns() { // EX: [[Fake:A]]
fxt.Test__bicode("~$!Fake:A~", "<a href='/wiki/Fake:A' title='Fake:A'>Fake:A</a>");
}
@Test public void Alias__basic() { // EX: [[Image:A]]
fxt.Test__bicode("~${-f)Image~A~B~", "<a href='/wiki/Image:A' title='Image:A'>B</a>");
}
@Test public void Alias__talk() { // EX: [[Image talk:A]]
fxt.Test__bicode("~${/;*Image talk~Human-woman.png~", "<a href='/wiki/Image_talk:Human-woman.png' title='Image talk:Human-woman.png'>Image talk:Human-woman.png</a>");
}
@Test public void Alias__words() { // EX: [[Image:A b]]
fxt.Test__bicode("~${/;)Image~A b~", "<a href='/wiki/Image:A_b' title='Image:A b'>Image:A b</a>");
}
@Test public void Alias__url_encoding() { // EX: [[Image:Aü.png|b]]
fxt.Test__bicode("~${-f)Image~Aü.png~b~", "<a href='/wiki/Image:A%C3%BC.png' title='Image:Aü.png'>b</a>");
}
}

View File

@@ -0,0 +1,54 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import org.junit.*;
public class Xoh_lnki_hzip__same__tst {
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt().Init_mode_diff_y_();
@Test public void Same__basic() { // EX: [[A]]
fxt.Test__decode("~$!A~", "<a href='/wiki/A' title='A'>A</a>");
}
@Test public void Same__encoded() { // EX: [[A's]]
String html = "<a href=\"/wiki/A%27s\" title=\"A's\">A's</a>";
fxt.Test__bicode_raw("~$!A's~", html, html);
}
@Test public void Same__encoded__anch() { // EX: [[A#90.51]]
fxt.Test__bicode("~${$%A~#90.51~", "<a href='/wiki/A#90.51' title='A'>A</a>");
}
@Test public void Same__encoded__anch__nbsp() { // EX: [[A#&nbsp;B|abc]]
fxt.Test__bicode("~${$#A#.C2.A0B~abc~", "<a href='/wiki/A#.C2.A0B' title='abc'>abc</a>");
}
@Test public void Same__amp() { // EX: [[A&b]]
fxt.Test__bicode("~${$#A&b~A&amp;b~", "<a href='/wiki/A%26b' title='A&amp;b'>A&amp;b</a>");
}
@Test public void More__basic() { // EX: [[A]]b
fxt.Test__bicode("~$#A~b~", "<a href='/wiki/A' title='A'>Ab</a>");
}
@Test public void Less__cs__eq() { // EX: [[Ab|A]]
fxt.Test__bicode("~$$A~b~", "<a href='/wiki/Ab' title='Ab'>A</a>");
}
@Test public void Less__cs__lo() { // EX: [[Ab|a]]
fxt.Test__bicode("~$(A~b~", "<a href='/wiki/Ab' title='Ab'>a</a>");
}
@Test public void Less__ns__cs() { // EX: [[Help:A_b|a]]; make sure ns is added correctly, not "aHelp:b"
fxt.Test__bicode("~$h/Ab~ c~", "<a href='/wiki/Help:Ab_c' title='Help:Ab c'>ab</a>");
}
@Test public void Ignore__audio() {
String html = "<a href=\"file:///\" xowa_title=\"A.ogg\">a</a>";
fxt.Test__bicode(html, html);
}
}

View File

@@ -0,0 +1,79 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import org.junit.*;
public class Xoh_lnki_hzip__site__tst {
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt().Init_mode_diff_y_();
@Test public void Basic() { // EX: [[wikt:A]]
fxt.Test__bicode("~${$3en.wiktionary.org~A~wikt:A~" , "<a href='/site/en.wiktionary.org/wiki/A' title='wikt:A'>wikt:A</a>");
}
@Test public void Capt__lower() { // EX: [[wikt:A|a]]
fxt.Test__bicode("~$5en.wiktionary.org~A~" , "<a href='/site/en.wiktionary.org/wiki/A' title='A'>a</a>");
}
@Test public void Capt__upper() { // EX: [[wikt:a|A]]
fxt.Test__bicode("~$9en.wiktionary.org~a~" , "<a href='/site/en.wiktionary.org/wiki/a' title='a'>A</a>");
}
@Test public void Ns__href() { // EX: [[wikt:help:a]]
fxt.Test__bicode("~${a2en.wiktionary.org~/help~a~wikt:help:a~" , "<a href='/site/en.wiktionary.org/wiki/help:a' title='wikt:help:a'>wikt:help:a</a>");
}
@Test public void Ns__capt() { // EX: [[wikt:help:a|b]]
fxt.Test__bicode("~${d3en.wiktionary.org~/help~a~b~wikt:help:a~" , "<a href='/site/en.wiktionary.org/wiki/help:a' title='wikt:help:a'>b</a>"); // MW: also adds class="extiw"
}
@Test public void Ns__anch() { // EX: [[wikt:Help:A#b]]
fxt.Test__bicode("~${'sen.wiktionary.org~/A#b~Help:A~" , "<a href='/site/en.wiktionary.org/wiki/Help:A#b' title='Help:A'>A#b</a>");
}
@Test public void Ns__more() { // EX: [[wikt:Help:A]]b
fxt.Test__bicode("~${#Ien.wiktionary.org~/A~b~" , "<a href='/site/en.wiktionary.org/wiki/Help:A' title='Help:A'>Help:Ab</a>");
}
@Test public void Ns__more__name() { // EX: [[wikt:Help:A|Ab|]]
fxt.Test__bicode("~$sen.wiktionary.org~/A~b~" , "<a href='/site/en.wiktionary.org/wiki/Help:A' title='Help:A'>Ab</a>");
}
@Test public void Ns__url_encoding() { // EX: [[wikt:Category:A & B|]]
fxt.Test__bicode("~${$sen.wiktionary.org~1A & B~A &amp; B~" , "<a href='/site/en.wiktionary.org/wiki/Category:A_%26_B' title='Category:A &amp; B'>A &amp; B</a>");
}
@Test public void Less__eq() { // EX: [[wikt:Ab|A]]
fxt.Test__bicode("~${*7en.wiktionary.org~A~b~" , "<a href='/site/en.wiktionary.org/wiki/Ab'>A</a>");
}
@Test public void Less__lo() { // EX: [[wikt:Ab|a]]
fxt.Test__bicode("~$8en.wiktionary.org~A~b~" , "<a href='/site/en.wiktionary.org/wiki/Ab' title='Ab'>a</a>");
}
@Test public void Less__hi() { // EX: [[wikt:ab|A]]
fxt.Test__bicode("~$<en.wiktionary.org~a~b~" , "<a href='/site/en.wiktionary.org/wiki/ab' title='ab'>A</a>");
}
@Test public void More__hi() { // EX: [[wikt:a|Ab]]
fxt.Test__bicode("~$;en.wiktionary.org~a~b~" , "<a href='/site/en.wiktionary.org/wiki/a' title='a'>Ab</a>");
}
@Test public void Encode__lnki() { // EX: [[wikt:eorðe|eorðe]]
fxt.Test__bicode("~$1en.wiktionary.org~eorðe~" , "<a href='/site/en.wiktionary.org/wiki/eor%C3%B0e' title='eorðe'>eorðe</a>");
}
// @Test public void Encode__lnke() { // EX: [//en.wiktionary.org/wiki/eorðe eorðe]; NOTE:MW inconsistently does not URL-encode external links (but does URL-encode @gplx.Internal protected ones)
// fxt.Test__bicode("~$)en.wiktionary.org~eorðe~" , "<a href='/site/en.wiktionary.org/wiki/eorðe'>eorðe</a>");
// }
@Test public void Lnke__ns() {
fxt.Test__bicode("~$qen.wiktionary.org~/a~" , "<a href='/site/en.wiktionary.org/wiki/Help:a' title='Help:a'>a</a>");
}
@Test public void Qarg_lnke() { // EX: [//en.wiktionary.org/wiki/A?b=c d]
fxt.Test__bicode("~${*5en.wiktionary.org~A?b=c~d~" , "<a href='/site/en.wiktionary.org/wiki/A?b=c'>d</a>");
}
@Test public void Qarg_lnki() { // EX: [[wikt:A?b=c|d]]
fxt.Test__bicode("~$2en.wiktionary.org~A?b=c~d~" , "<a href='/site/en.wiktionary.org/wiki/A?b=c' title='A?b=c'>d</a>"); // NOTE: mw encodes as A%3Fb%3Dc
}
@Test public void Main_page() { // EX: [[wikt:]]
fxt.Test__bicode("~${<<en.wiktionary.org~~wikt:~" , "<a href='/site/en.wiktionary.org/wiki/' title='wikt:'>wikt:</a>");
}
}

View File

@@ -1,113 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import org.junit.*; import gplx.xowa.htmls.core.hzips.*;
public class Xoh_lnki_hzip_tst {
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt();
@Test public void Href__basic() {
fxt.Test__bicode("~$!A~", Xoh_lnki_html__hdump__tst.Html__same);
}
@Test public void Href__case_diff() {
fxt.Test__bicode("~$!a~", "<a href='/wiki/A' id='xolnki_2' title='A'>a</a>");
}
@Test public void Href__url_encoded() {
String html = "<a href=\"/wiki/A%27s\" id=\"xolnki_2\" title=\"A's\">A's</a>";
fxt.Test__bicode_raw("~$!A's~", html, html);
}
@Test public void Ns__same() { // EX: [[Help:A]]
fxt.Test__bicode("~$A/A~", "<a href='/wiki/Help:A' id='xolnki_2' title='Help:A'>Help:A</a>");
}
@Test public void Ns__diff() { // EX: [[Help:A_b|c]]
fxt.Test__bicode("~$B/A_b~c~", "<a href='/wiki/Help:A_b' id='xolnki_2' title='Help:A b'>c</a>");
}
@Test public void Ns__space() { // EX: [[Help talk:A b]]
fxt.Test__bicode("~$A0A b~", "<a href='/wiki/Help_talk:A_b' id='xolnki_2' title='Help talk:A b'>Help talk:A b</a>");
}
@Test public void Ns__under() { // EX: [[Help_talk:A_b]]; rare; just make sure codec can handle it;
fxt.Test__bicode("~$B0A_b~Help_talk:A_b~", "<a href='/wiki/Help_talk:A_b' id='xolnki_2' title='Help talk:A b'>Help_talk:A_b</a>");
}
@Test public void Ns__pipe() { // EX: [[Help:A|]]
fxt.Test__bicode("~$E/A~", "<a href='/wiki/Help:A' id='xolnki_2' title='Help:A'>A</a>");
}
@Test public void Ns__pipe_w_words() { // EX: [[Help:A b|]]
fxt.Test__bicode("~$E/A b~", "<a href='/wiki/Help:A_b' id='xolnki_2' title='Help:A b'>A b</a>");
}
@Test public void Anch__same() {
fxt.Test__bicode("~$2a~#a~", "<a href='#a' id='xolnki_2'>#a</a>");
}
@Test public void Anch__diff() {
fxt.Test__bicode("~$2a~b~", "<a href='#a' id='xolnki_2'>b</a>");
}
@Test public void Anch__diff__starts_w_same() {
fxt.Test__bicode("~$2a~a~", "<a href='#a' id='xolnki_2'>a</a>");
}
@Test public void Capt__basic() { // EX: [[A|b]]
fxt.Test__bicode("~$\"A~b~", Xoh_lnki_html__hdump__tst.Html__diff);
}
@Test public void Capt__page_w_anch() { // Ex: [[A#b|c]]
fxt.Test__bicode("~$\"A#b~b~", "<a href='/wiki/A#b' id='xolnki_2' title='A'>b</a>");
}
@Test public void Capt__nest() {
fxt.Test__bicode
( "~$\"A~<a href=\"/wiki/C\" id=\"xolnki_3\" title=\"C\">C1</a>D~"
, "<a href=\"/wiki/A\" id=\"xolnki_2\" title=\"A\"><a href=\"/wiki/C\" id=\"xolnki_3\" title=\"C\">C1</a>D</a>"
);
}
@Test public void Capt__reparent() { // PURPOSE: PAGE:en.w:Abyssal_plain; DATE:2015-06-02; DELETE: not needed in new dump format;
fxt.Test__bicode
( "$\"A<font color=\"white\">A1</font>"
, "<a href=\"/wiki/A\" id=\"xolnki_2\" title=\"A\"><font color='white'>A1</font></a>"
);
}
@Test public void Capt__xwiki() {
Xow_wiki wiki = fxt.Prep_create_wiki("wikt", "en.wiktionary.org");
wiki.Ns_mgr().Ns_main().Case_match_(gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all);
fxt.Test__bicode("$*en.wiktionary.orgawikt:a", Xoh_lnki_html__hdump__tst.Html__xwiki);
}
@Test public void Capt__xwiki__qarg() {
Xow_wiki wiki = fxt.Prep_create_wiki("wikt", "en.wiktionary.org");
wiki.Ns_mgr().Ns_main().Case_match_(gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all);
fxt.Test__bicode("$*en.wiktionary.orga?action=editwikt:a?action=edit", "<a href='/site/en.wiktionary.org/wiki/a?action=edit' id='xolnki_2' title='a?action=edit'>wikt:a?action=edit</a>");
}
@Test public void Capt__xwiki__encode() {
Xow_wiki wiki = fxt.Prep_create_wiki("wikt", "en.wiktionary.org");
wiki.Ns_mgr().Ns_main().Case_match_(gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all);
fxt.Test__bicode("$)en.wiktionary.orgeorðe", "<a href='/site/en.wiktionary.org/wiki/eor%C3%B0e' id='xolnki_2' title='eorðe'>eorðe</a>");
}
@Test public void Trail__basic() {
fxt.Test__bicode("~$#A~b~", Xoh_lnki_html__hdump__tst.Html__trail);
}
@Test public void Short__basic() {
fxt.Test__bicode("~$$A~b~", "<a href='/wiki/Ab' id='xolnki_2' title='Ab'>A</a>");
}
@Test public void Short__case() {
fxt.Test__bicode("~$$a~b~", "<a href='/wiki/Ab' id='xolnki_2' title='Ab'>a</a>");
}
@Test public void Site__main_page() {
fxt.Test__bicode("~$)en.wikipedia.org~Main Page~"
, "<a href='/site/en.wikipedia.org/wiki/' id='xolnki_2' title='Main Page'>Main Page</a>"
, "<a href='/site/en.wikipedia.org/wiki/Main_Page' id='xolnki_2' title='Main Page'>Main Page</a>"
);
}
@Test public void Site__qarg() {
fxt.Test__bicode("~$*en.wikipedia.org~A?b=c~d~", "<a href='/site/en.wikipedia.org/wiki/A?b=c' id='xolnki_2' title='A?b=c'>d</a>");
}
@Test public void Inet__file() {
fxt.Test__bicode("~$:file:///C://A.png~b~", "<a href='file:///C://A.png' id='xolnki_2' title='file:///C://A.png'>b</a>");
}
}

View File

@@ -20,90 +20,128 @@ import gplx.core.brys.*; import gplx.langs.htmls.*; import gplx.langs.htmls.pars
import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*;
import gplx.xowa.wikis.ttls.*; import gplx.xowa.wikis.nss.*;
public class Xoh_lnki_parser {
private final Xoh_anch_capt_parser capt_parser = new Xoh_anch_capt_parser();
private byte[] src;
private int href_ns_id; private byte[] href_ns_name; private int href_ns_name_len;
private byte[] capt_src; private int capt_bgn, capt_end;
private final Bry_rdr rdr = new Bry_rdr();
public int Rng_bgn() {return rng_bgn;} private int rng_bgn;
public int Rng_end() {return rng_end;} private int rng_end;
public byte Text_type() {return text_type;} private byte text_type;
public byte[] Href_bry() {return href_bry;} private byte[] href_bry;
public int Href_bgn() {return href_bgn;} private int href_bgn;
public int Href_end() {return href_end;} private int href_end;
public byte[] Capt_bry() {return capt_bry;} private byte[] capt_bry;
public int Capt_bgn() {return capt_bgn;} private int capt_bgn;
public int Capt_end() {return capt_end;} private int capt_end;
public Xoh_anch_href_parser Href_parser() {return href_parser;} private final Xoh_anch_href_parser href_parser = new Xoh_anch_href_parser();
public int Parse(Xoh_hdoc_wkr wkr, Xoh_hdoc_ctx hctx, byte[] src, Html_tag_rdr tag_rdr, Html_tag anch_head, Xow_ttl_parser ttl_parser) {// <a href="/wiki/A" title="A">b</a>
this.rng_bgn = anch_head.Src_bgn();
rdr.Init_by_sub(tag_rdr.Rdr(), "lnki", rng_bgn, src.length);
href_parser.Parse(rdr, hctx.App(), hctx.Wiki__ttl_parser(), anch_head); // href='/wiki/A'
// get href
this.href_bry = src;
this.href_bgn = href_parser.Page_bgn(); this.href_end = href_parser.Page_end();
Xoa_ttl href_ttl = null; Xow_ns href_ns = null;
int href_ns_id = Xow_ns_.Tid__main; boolean href_cs_tid_1st = true;
switch (href_parser.Tid()) {
case Xoh_anch_href_parser.Tid__anch:
case Xoh_anch_href_parser.Tid__inet:
break;
default:
href_ttl = href_parser.Page_ttl();
href_ns = href_ttl.Ns();
href_ns_id = href_ns.Id();
href_cs_tid_1st = href_ttl.Ns().Case_match() == Xow_ns_case_.Tid__1st;
this.href_bry = href_parser.Page_bry();
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public boolean Capt_has_ns() {return capt_has_ns;} private boolean capt_has_ns;
public byte Text_tid() {return text_tid;} private byte text_tid;
public byte[] Text_0_src() {return text_0_src;} private byte[] text_0_src;
public int Text_0_bgn() {return text_0_bgn;} private int text_0_bgn;
public int Text_0_end() {return text_0_end;} private int text_0_end;
public byte[] Text_1_src() {return text_1_src;} private byte[] text_1_src;
public int Text_1_bgn() {return text_1_bgn;} private int text_1_bgn;
public int Text_1_end() {return text_1_end;} private int text_1_end;
public byte[] Href_src() {return href_src;} private byte[] href_src;
public int Href_bgn() {return href_bgn;} private int href_bgn;
public int Href_end() {return href_end;} private int href_end;
public boolean Title_missing_ns() {return title_missing_ns;} private boolean title_missing_ns;
public int Title_tid() {return title_tid;} private int title_tid;
public int Title_bgn() {return title_bgn;} private int title_bgn;
public int Title_end() {return title_end;} private int title_end;
public Xoh_anch_href_itm Href_itm() {return href_itm;} private final Xoh_anch_href_itm href_itm = new Xoh_anch_href_itm();
public Xoh_anch_capt_itm Capt_itm() {return capt_itm;} private final Xoh_anch_capt_itm capt_itm = new Xoh_anch_capt_itm();
private void Init(byte[] src) {
this.src = href_src = capt_src = src;
capt_has_ns = title_missing_ns = false;
href_ns_id = Xow_ns_.Tid__main; href_ns_name = null; href_ns_name_len = 0;
href_bgn = href_end = capt_bgn = capt_end = title_bgn = title_end = -1;
title_tid = Title__href;
}
public boolean Parse(Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Html_tag_rdr tag_rdr, byte[] src, Html_tag anch_head) {
Init(src);
this.src_bgn = anch_head.Src_bgn();
rdr.Init_by_wkr(tag_rdr.Err_wkr(), "lnki", src_bgn, src.length);
Html_atr title_atr = anch_head.Atrs__get_by_or_empty(Html_atr_.Bry__title);
Parse_href(hctx, anch_head);
Parse_capt(tag_rdr, anch_head);
Parse_title(title_atr);
hdoc_wkr.On_lnki(this);
return true;
}
private void Parse_href(Xoh_hdoc_ctx hctx, Html_tag anch_head) {
href_itm.Parse(rdr.Err_wkr(), hctx, anch_head);
this.href_bgn = href_itm.Ttl_bgn(); this.href_end = href_itm.Ttl_end();
switch (href_itm.Tid()) {
case Xoh_anch_href_itm.Tid__wiki: case Xoh_anch_href_itm.Tid__site:
this.href_ns_id = href_itm.Ttl_ns_id();
this.href_src = href_itm.Ttl_full_txt();
this.href_bgn = 0;
this.href_end = href_bry.length;
this.href_end = href_src.length;
if (href_ns_id != Xow_ns_.Tid__main) { // not main; try to remove template name;
int colon_pos = Bry_find_.Find_fwd(href_src, Byte_ascii.Colon, href_bgn, href_end);
this.href_ns_name = Xoa_ttl.Replace_unders(Bry_.Mid(href_src, href_bgn, colon_pos + 1)); // EX: 11="Template talk:"
this.href_ns_name_len = href_ns_name.length;
}
break;
}
// get capt
this.capt_bry = src;
}
private void Parse_capt(Html_tag_rdr tag_rdr, Html_tag anch_head) {
this.capt_bgn = anch_head.Src_end(); // capt starts after <a>
Html_tag anch_tail = tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__a); // </a>
this.capt_end = anch_tail.Src_bgn(); // get capt between "<a>" and "</a>
this.rng_end = anch_tail.Src_end();
boolean capt_bgn_has_ns = true;
this.src_end = anch_tail.Src_end();
// skip ns in href / capt
if (href_ns_id != Xow_ns_.Tid__main) { // not main; try to remove template name;
int colon_pos = Bry_find_.Find_fwd(href_bry, Byte_ascii.Colon, href_bgn, href_end);
byte[] ns_name = Xoa_ttl.Replace_unders(Bry_.Mid(href_bry, href_bgn, colon_pos + 1)); // EX: 11="Template talk:"
int ns_name_len = ns_name.length;
int ns_name_end = capt_bgn + ns_name_len;
href_bgn += ns_name_len; // skip ns_name for href; EX: "Help:A" -> "A"; "Help" will be saved as encoded num
if (Bry_.Match(src, capt_bgn, ns_name_end, ns_name, 0, ns_name_len)) // href matches capt; EX: [[Help:A]] -> <a href='/wiki/Help:A'>Help:A</a>
capt_bgn = ns_name_end;
else
capt_bgn_has_ns = false;
int capt_bgn_wo_ns = capt_bgn + href_ns_name_len;
href_bgn += href_ns_name_len; // skip ns_name for href; EX: "Help:A" -> "A"; "Help" will be saved as encoded number
if (Bry_.Match(capt_src, capt_bgn, capt_bgn_wo_ns, href_ns_name)) { // capt matches ns_name; EX: <a href='/wiki/Help:A'>Help:A</a> -> "Help:A" matches "Help:"
capt_bgn = capt_bgn_wo_ns; // skip ns; "Help:"
capt_has_ns = true;
}
}
if (href_parser.Tid() == Xoh_anch_href_parser.Tid__anch)
this.text_type = Xoh_anch_capt_parser.Tid__capt;
else
this.text_type = capt_parser.Parse(rdr, capt_bgn_has_ns, href_cs_tid_1st, href_bry, href_bgn, href_end, src, capt_bgn, capt_end);
int split_pos = capt_parser.Split_pos();
switch (text_type) {
case Xoh_anch_capt_parser.Tid__capt: // nothing to do; href / capt already set above
// get text splits
this.text_tid = href_itm.Tid() == Xoh_anch_href_itm.Tid__anch
? Xoh_anch_capt_itm.Tid__diff
: capt_itm.Parse(rdr, capt_has_ns, href_src, href_bgn, href_end, src, capt_bgn, capt_end);
int split_pos = capt_itm.Split_pos();
this.text_0_src = href_src; this.text_0_bgn = href_bgn; this.text_0_end = href_end;
this.text_1_src = capt_src; this.text_1_bgn = capt_bgn; this.text_1_end = capt_end;
switch (text_tid) {
case Xoh_anch_capt_itm.Tid__same:
// case Xoh_anch_capt_itm.Tid__href_pipe:
case Xoh_anch_capt_itm.Tid__diff: // nothing to do; href / capt already set above
break;
case Xoh_anch_capt_parser.Tid__href: // redefine href to capt since both href and capt are same except for case-sensitivity / underscores; EX: [[a]], [[A b]]
case Xoh_anch_capt_parser.Tid__href_pipe:
this.href_bry = src;
this.href_bgn = capt_bgn;
this.href_end = capt_end;
case Xoh_anch_capt_itm.Tid__more:
this.text_1_bgn = split_pos;
break;
case Xoh_anch_capt_parser.Tid__href_trail:
this.href_bry = src;
this.href_bgn = capt_bgn;
this.href_end = split_pos;
this.capt_bgn = split_pos;
break;
case Xoh_anch_capt_parser.Tid__capt_short:
int tmp_capt_bgn = capt_bgn, tmp_capt_end = capt_end;
this.capt_bry = href_bry;
this.capt_bgn = split_pos;
this.capt_end = href_end;
this.href_bry = src;
this.href_bgn = tmp_capt_bgn;
this.href_end = tmp_capt_end;
case Xoh_anch_capt_itm.Tid__less:
this.text_0_end = split_pos;
this.text_1_src = href_src;
this.text_1_bgn = split_pos;
this.text_1_end = href_end;
break;
}
wkr.On_lnki(this);
return rng_end;
}
private void Parse_title(Html_atr title_atr) {
// Tfds.Dbg(Bry_.Mid(href_src, href_bgn, href_end), Bry_.Mid(src, capt_bgn, capt_end), Bry_.Mid(src, title_bgn, title_end));
title_bgn = title_atr.Val_bgn(); title_end = title_atr.Val_end();
if (href_ns_name != null) { // ns_name exists
int title_bgn_wo_ns = title_bgn + href_ns_name_len;
if (Bry_.Match(src, title_bgn, title_bgn_wo_ns, href_ns_name)) // title matches ns_name;
title_bgn = title_bgn_wo_ns; // skip ns; "Help:"
else
title_missing_ns = true;
}
if (title_end == -1)
title_tid = Title__missing;
else {
if (Bry_.Match(src, title_bgn, title_end, href_src, href_bgn, href_end))
title_tid = Title__href;
else if (Bry_.Match(src, title_bgn, title_end, src, capt_bgn, capt_end))
title_tid = Title__capt;
else {
title_tid = Title__diff;
if (href_ns_name != null) title_bgn = title_atr.Val_bgn(); // since title is different, add back ns_name; EX: "<a href='/wiki/Help:A_b#c' title='Help:A b'>a</a>"; title should be "Help:A b", not "A b"
}
}
}
public static final int // SERIALIAZED
Title__href = 0
, Title__capt = 1
, Title__diff = 2
, Title__missing = 3
;
}

View File

@@ -18,44 +18,65 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.htmls.core.wkrs.lnkis.anchs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
import gplx.core.brys.*;
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.ttls.*;
public class Xoh_anch_capt_parser {
public int Split_pos() {return split_pos;} private int split_pos;
public byte Parse(Bry_rdr owner_rdr, boolean capt_bgn_has_ns, boolean cs_tid_1st, byte[] href_bry, int href_bgn, int href_end, byte[] capt_bry, int capt_bgn, int capt_end) {
this.split_pos = -1;
public class Xoh_anch_capt_itm {
public int Cs0_tid() {return cs0_tid;} private int cs0_tid;
public int Split_pos() {return split_pos;} private int split_pos;
public byte Parse(Bry_rdr owner_rdr, boolean ns_name_exists, byte[] href_bry, int href_bgn, int href_end, byte[] capt_bry, int capt_bgn, int capt_end) {
this.cs0_tid = Cs0__exact; this.split_pos = -1;
// do compare
int href_len = href_end - href_bgn;
int capt_len = capt_end - capt_bgn;
for (int i = 0; i < capt_len; ++i) {
if (i == href_len) { // ran out of href; mark as trail; EX: [[A]]s -> href="A"; capt="As"
split_pos = i + capt_bgn;
return Tid__href_trail;
return Tid__more;
}
byte href_byte = href_bry[i + href_bgn];
byte capt_byte = capt_bry[i + capt_bgn];
if (href_byte == capt_byte) continue;
if ( i == 0 // ignore case if 1st letter and ns is Tid__1st; EX: [[earth]] -> href="Earth"; capt="earth"
&& cs_tid_1st
&& capt_byte >= Byte_ascii.Ltr_a && capt_byte <= Byte_ascii.Ltr_z
&& (capt_byte - href_byte) == 32
)
continue;
if (i == 0) { // ignore case if 1st letter and ns is Tid__1st; EX: [[earth]] -> href="Earth"; capt="earth"
if ( capt_byte == href_byte) {
cs0_tid = Cs0__exact;
continue;
}
else if(href_byte >= Byte_ascii.Ltr_A && href_byte <= Byte_ascii.Ltr_Z
&& capt_byte - href_byte == 32
) {
cs0_tid = Cs0__lower;
continue;
}
else if(href_byte >= Byte_ascii.Ltr_a && href_byte <= Byte_ascii.Ltr_z
&& href_byte - capt_byte == 32
) {
cs0_tid = Cs0__upper;
continue;
}
}
else
if (href_byte == capt_byte) continue;
if ( capt_byte == Byte_ascii.Space // ignore " " vs "_"
&& href_byte == Byte_ascii.Underline
)
continue;
return Tid__capt; // bytes still diff; return diff
this.cs0_tid = Cs0__exact;
return Tid__diff; // bytes still diff; return diff
}
if (capt_len == href_len) // all bytes same and capt_len == href_len; must be same
return capt_bgn_has_ns ? Tid__href : Tid__href_pipe;
return Tid__same;// : Tid__href_pipe;
else { // capt < href; EX: [[A_(b)|A]] -> href="A_(b)"; capt = "A"
split_pos = capt_len + href_bgn;
return Tid__capt_short;
return Tid__less;
}
}
public static final byte // SERIALIAZED
Tid__href = 0 // [[A]] -> "A|A" -> "A|"
, Tid__capt = 1 // [[A|b]] -> "A|b" -> "A|b"
, Tid__href_trail = 2 // [[A]]s -> "A|As" -> "A|s"
, Tid__capt_short = 3 // [[A_(b)|A]] -> "A_(b)|A" -> "A|_(b)"
, Tid__href_pipe = 4 // [[Help:A|]] -> "Help:A|A" -> "A|"
Tid__same = 0 // [[A]] -> "A|A" -> "A|"
, Tid__diff = 1 // [[A|b]] -> "A|b" -> "A|b"
, Tid__more = 2 // [[A]]s -> "A|As" -> "A|s"
, Tid__less = 3 // [[A_(b)|A]] -> "A_(b)|A" -> "A|_(b)"
// , Tid__href_pipe = 4 // [[Help:A|]] -> "Help:A|A" -> "A|"
;
public static final int // SERIALIAZED
Cs0__exact = 0
, Cs0__lower = 1 // [[A|a]] -> "A|a" -> "A"
, Cs0__upper = 2 // [[a|A]] -> "a|A" -> "a"
;
}

View File

@@ -17,25 +17,25 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.lnkis.anchs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
import org.junit.*; import gplx.core.brys.*; import gplx.langs.htmls.parsers.*;
public class Xoh_anch_capt_parser_tst {
private final Xoh_anch_capt_parser_fxt fxt = new Xoh_anch_capt_parser_fxt();
@Test public void Basic__same() {fxt.Test__match("Abc" , "Abc", Xoh_anch_capt_parser.Tid__href);}
@Test public void Basic__diff() {fxt.Test__match("Abc" , "ABC", Xoh_anch_capt_parser.Tid__capt);}
@Test public void Space__same() {fxt.Test__match("A_b" , "A b", Xoh_anch_capt_parser.Tid__href);}
@Test public void Case__same() {fxt.Test__match("Abc" , "abc", Xoh_anch_capt_parser.Tid__href);}
@Test public void Case__reverse() {fxt.Test__match("abc" , "Abc", Xoh_anch_capt_parser.Tid__capt);}
@Test public void Case__disabled() {
fxt.Wiki().Ns_mgr().Ns_main().Case_match_(gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all);
fxt.Test__match("Abcde", "abcde", Xoh_anch_capt_parser.Tid__capt);
}
@Test public void Ns__href() {fxt.Test__match("Help_talk:Ab" , "Help talk:Ab" , Xoh_anch_capt_parser.Tid__href);}
@Test public void Capt_trail() {fxt.Test__match("A" , "Abc" , Xoh_anch_capt_parser.Tid__href_trail, 1);}
@Test public void Href_trail() {fxt.Test__match("Ab" , "A" , Xoh_anch_capt_parser.Tid__capt_short, 1);}
public class Xoh_anch_capt_itm_tst {
private final Xoh_anch_capt_itm_fxt fxt = new Xoh_anch_capt_itm_fxt();
@Test public void Basic__same() {fxt.Test__match("Abc" , "Abc", Xoh_anch_capt_itm.Tid__same);}
@Test public void Basic__diff() {fxt.Test__match("Abc" , "ABC", Xoh_anch_capt_itm.Tid__diff);}
@Test public void Space__same() {fxt.Test__match("A_b" , "A b", Xoh_anch_capt_itm.Tid__same);}
@Test public void Case__same() {fxt.Test__match("Abc" , "abc", Xoh_anch_capt_itm.Tid__same);}
// @Test public void Case__reverse() {fxt.Test__match("abc" , "Abc", Xoh_anch_capt_itm.Tid__diff);}
// @Test public void Case__disabled() {
// fxt.Wiki().Ns_mgr().Ns_main().Case_match_(gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all);
// fxt.Test__match("Abcde", "abcde", Xoh_anch_capt_itm.Tid__diff);
// }
@Test public void Ns__href() {fxt.Test__match("Help_talk:Ab" , "Help talk:Ab" , Xoh_anch_capt_itm.Tid__same);}
@Test public void Capt_trail() {fxt.Test__match("A" , "Abc" , Xoh_anch_capt_itm.Tid__more, 1);}
@Test public void Href_trail() {fxt.Test__match("Ab" , "A" , Xoh_anch_capt_itm.Tid__less, 1);}
}
class Xoh_anch_capt_parser_fxt {
private final Xoh_anch_capt_parser matcher = new Xoh_anch_capt_parser();
class Xoh_anch_capt_itm_fxt {
private final Xoh_anch_capt_itm matcher = new Xoh_anch_capt_itm();
private final Bry_rdr rdr = new Bry_rdr();
public Xoh_anch_capt_parser_fxt() {
public Xoh_anch_capt_itm_fxt() {
Xoae_app app = Xoa_app_fxt.app_();
this.wiki = Xoa_app_fxt.wiki_tst_(app);
}
@@ -45,8 +45,8 @@ class Xoh_anch_capt_parser_fxt {
byte[] page_bry = Bry_.new_u8(page_str);
byte[] capt_bry = Bry_.new_u8(capt_str);
Xoa_ttl href_ttl = wiki.Ttl_parse(page_bry);
boolean cs_tid_1st = href_ttl.Ns().Case_match() == gplx.xowa.wikis.nss.Xow_ns_case_.Tid__1st;
Tfds.Eq_int(expd_tid , matcher.Parse(rdr.Init_by_page(Bry_.Empty, page_bry, page_bry.length), Bool_.Y, cs_tid_1st, page_bry, 0, page_bry.length, capt_bry, 0, capt_bry.length));
boolean ns_is_cs = href_ttl.Ns().Case_match() == gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all;
Tfds.Eq_int(expd_tid , matcher.Parse(rdr.Init_by_page(Bry_.Empty, page_bry, page_bry.length), ns_is_cs, page_bry, 0, page_bry.length, capt_bry, 0, capt_bry.length));
Tfds.Eq_int(expd_trail_bgn , matcher.Split_pos());
}
}

View File

@@ -0,0 +1,154 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.lnkis.anchs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
import gplx.core.brys.*; import gplx.core.btries.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*; import gplx.langs.htmls.encoders.*;
import gplx.xowa.wikis.ttls.*; import gplx.xowa.wikis.nss.*;
public class Xoh_anch_href_itm implements Xoh_itm_parser {
private final Bry_rdr rdr = new Bry_rdr().Dflt_dlm_(Byte_ascii.Slash);
// private final Xoa_url tmp_url = Xoa_url.blank();
public void Fail_throws_err_(boolean v) {rdr.Fail_throws_err_(v);}// TEST
public Html_atr Atr() {return atr;} private Html_atr atr;
public byte Tid() {return tid;} private byte tid;
public int Rng_bgn() {return rng_bgn;} private int rng_bgn;
public int Rng_end() {return rng_end;} private int rng_end;
public int Site_bgn() {return site_bgn;} private int site_bgn;
public int Site_end() {return site_end;} private int site_end;
public boolean Site_exists() {return site_end > site_bgn;}
public byte[] Ttl_full_txt() {return ttl_full_txt;} private byte[] ttl_full_txt;
public byte[] Ttl_page_db() {return ttl_page_db;} private byte[] ttl_page_db;
public boolean Ttl_is_main_page() {return ttl_page_db.length == 0;}
public int Ttl_ns_id() {return ttl_ns_id;} private int ttl_ns_id;
public byte[] Ttl_ns_custom() {return ttl_ns_custom;} private byte[] ttl_ns_custom;
public int Ttl_bgn() {return ttl_bgn;} private int ttl_bgn;
public int Ttl_end() {return ttl_end;} private int ttl_end;
private void Clear() {
tid = Tid__wiki;
rng_bgn = rng_end = site_bgn = site_end = ttl_bgn = ttl_end = -1;
ttl_full_txt = ttl_page_db = ttl_ns_custom = null;
ttl_ns_id = Xow_ns_.Tid__main;
}
public boolean Parse(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, Html_tag tag) {
this.atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__href);
return Parse(err_wkr, hctx, atr.Val_bgn(), atr.Val_end());
}
public boolean Parse(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, int rng_bgn, int rng_end) {
this.Clear();
if (rng_bgn == -1) return false; // no href; return; EX: <a/> vs <a href='a.org'/>
rdr.Init_by_wkr(err_wkr, "href", rng_bgn, rng_end);
this.rng_bgn = rng_bgn; this.rng_end = rng_end;
byte[] src = err_wkr.Src();
if (rng_end == rng_bgn) { // handle empty String separately; EX: href=""
tid = Tid__inet;
ttl_bgn = ttl_end = 0;
}
else {
ttl_end = rng_end;
switch (src[rng_bgn]) {
case Byte_ascii.Hash:
tid = Tid__anch;
ttl_bgn = rng_bgn + 1; // position ttl_bgn after #
break;
default:
Parse_inet(hctx, src);
break;
case Byte_ascii.Slash:
rdr.Move_by_one(); // skip "/"
if (rdr.Chk(trie) == Tid__site) { // EX: "/site/wiki/A"
tid = Tid__site;
site_bgn = rdr.Pos();
site_end = rdr.Find_fwd_lr();
rdr.Chk(Bry__wiki);
}
else
tid = Tid__wiki;
ttl_bgn = rdr.Pos();
break;
}
}
Parse_ttl(hctx.Wiki__ttl_parser(), src);
return true;
}
private void Parse_inet(Xoh_hdoc_ctx hctx, byte[] src) {
// hctx.Wiki__url_parser().Parse(tmp_url, src);
// Tfds.Write(tmp_url.Tid());
// if (tmp_url.Tid() == Xoa_url_.Tid_page) {
// Tfds.Write(tmp_url.Wiki_bry());
// Tfds.Write(tmp_url.Page_bry());
// }
// else {
tid = Tid__inet;
ttl_bgn = rng_bgn;
// }
}
private void Parse_ttl(Xow_ttl_parser ttl_parser, byte[] src) {
boolean ttl_is_empty = ttl_end - ttl_bgn == 0; // NOTE: ttl can be empty; EX: "href='/site/en.wikipedia.org/wiki/'" "href='/wiki/'"
if (ttl_is_empty) {
ttl_full_txt = ttl_page_db = Bry_.Empty;
}
else {
ttl_full_txt = Gfo_url_encoder_.Href_wo_anchor.Decode(src, ttl_bgn, ttl_end);
switch (tid) {
case Xoh_anch_href_itm.Tid__anch:
case Xoh_anch_href_itm.Tid__inet:
ttl_ns_id = Xow_ns_.Tid__main;
ttl_page_db = ttl_full_txt;
break;
case Xoh_anch_href_itm.Tid__wiki:
case Xoh_anch_href_itm.Tid__site:
int ttl_full_len = ttl_full_txt.length;
int colon_pos = Bry_find_.Find_fwd(ttl_full_txt, Byte_ascii.Colon, 0, ttl_full_len);
ttl_page_db = ttl_full_txt;
if (colon_pos != Bry_find_.Not_found) {
Xow_ns_mgr ns_mgr = ttl_parser.Ns_mgr();
Object ns_obj = ns_mgr.Names_get_or_null(ttl_full_txt, 0, colon_pos);
if (ns_obj != null) {
Xow_ns ns = (Xow_ns)ns_obj;
if (ns.Id() != Xow_ns_.Tid__main) {
ttl_ns_id = ns.Id();
ttl_page_db = Bry_.Mid(ttl_full_txt, colon_pos + 1, ttl_full_len);
if (!Bry_.Match(ttl_full_txt, 0, colon_pos, ns.Name_ui()))
ttl_ns_custom = Bry_.Mid(ttl_full_txt, 0, colon_pos);
}
}
}
ttl_page_db = Xoa_ttl.Replace_spaces(ttl_page_db);
break;
default: throw Err_.new_unhandled(tid);
}
}
}
public static final byte
Tid__wiki = 0 // EX: href="/wiki/A"
, Tid__site = 1 // EX: href="/site/en.wikipedia.org/wiki/A"
, Tid__anch = 2 // EX: href="#A"
, Tid__inet = 3 // EX: href="https://a.org/A"
;
private static final byte[] Bry__site = Bry_.new_a7("site/"), Bry__wiki = Bry_.new_a7("wiki/");
private static final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7()
.Add_bry_byte(Bry__wiki, Tid__wiki)
.Add_bry_byte(Bry__site, Tid__site)
;
public static boolean Ns_exists(byte tid) {
switch (tid) {
case Tid__wiki: case Tid__site: return true;
case Tid__anch: case Tid__inet: return false;
default: throw Err_.new_unhandled(tid);
}
}
}

View File

@@ -17,8 +17,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.lnkis.anchs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
import org.junit.*; import gplx.core.brys.*; import gplx.xowa.wikis.ttls.*;
public class Xoh_anch_href_parser_tst {
private final Xoh_anch_href_parser_fxt fxt = new Xoh_anch_href_parser_fxt();
public class Xoh_anch_href_itm_tst {
private final Xoh_anch_href_itm_fxt fxt = new Xoh_anch_href_itm_fxt();
@Test public void Site() {
fxt.Test__parse("/site/A/wiki/B", "A", "B");
}
@@ -34,28 +34,24 @@ public class Xoh_anch_href_parser_tst {
@Test public void Inet() {
fxt.Test__parse("http://a.org", "", "http://a.org");
}
// @Test public void Inet__mw() {
// fxt.Test__parse("https://en.wikipedia.org/wiki/A", "en.wikipedia.org", "A");
// }
@Test public void Fail__1st_seg_must_be_site_or_wiki() {
fxt.Test__parse__fail("/fail/A", "failed trie check: mid='fail/A' ctx='Main_Page' wkr='anch.href' excerpt=/fail/A");
fxt.Test__parse__fail("/fail/A", "failed trie check: mid='fail/A' page='Main_Page' sect='href' text=/fail/A");
}
@Test public void Fail__2nd_seg_must_be_wiki() {
fxt.Test__parse__fail("/site/A/B/C", "failed check: chk='wiki/' ctx='Main_Page' wkr='anch.href' excerpt=/site/A/B/C");
fxt.Test__parse__fail("/site/A/B/C", "failed check: chk='wiki/' page='Main_Page' sect='href' text=/site/A/B/C");
}
}
class Xoh_anch_href_parser_fxt extends Xoh_itm_parser_fxt_base {
private final Xoae_app app;
private final Xoh_anch_href_parser parser = new Xoh_anch_href_parser();
private final Xow_ttl_parser ttl_parser;
public Xoh_anch_href_parser_fxt() {
this.app = Xoa_app_fxt.app_();
ttl_parser = Xoa_app_fxt.wiki_tst_(app);
}
class Xoh_anch_href_itm_fxt extends Xoh_itm_parser_fxt { private final Xoh_anch_href_itm parser = new Xoh_anch_href_itm();
@Override public Xoh_itm_parser Parser_get() {return parser;}
public void Test__parse(String src_str, String expd_site, String expd_page) {
Exec_parse(src_str);
Tfds.Eq_str(expd_site, parser.Site_bgn() == -1 ? "" : String_.new_u8(src, parser.Site_bgn(), parser.Site_end()));
Tfds.Eq_str(expd_page, String_.new_u8(src, parser.Page_bgn(), parser.Page_end()));
Tfds.Eq_str(expd_page, String_.new_u8(src, parser.Ttl_bgn(), parser.Ttl_end()));
}
@Override public void Exec_parse_hook(Bry_rdr owner_rdr, int src_bgn, int src_end) {
parser.Parse(owner_rdr, app, ttl_parser, src_bgn, src_end);
@Override public void Exec_parse_hook(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, int src_bgn, int src_end) {
parser.Parse(err_wkr, hctx, src_bgn, src_end);
}
}

View File

@@ -1,135 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.lnkis.anchs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
import gplx.core.brys.*; import gplx.core.btries.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
import gplx.xowa.wikis.ttls.*; import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.domains.*; import gplx.xowa.apps.metas.*;
public class Xoh_anch_href_parser implements Xoh_itm_parser {
private byte[] page_bry; private Xoa_ttl page_ttl; private Xoa_app app; private Xow_ttl_parser ttl_parser;
private final Bry_rdr rdr = new Bry_rdr().Dflt_dlm_(Byte_ascii.Slash);
public void Fail_throws_err_(boolean v) {rdr.Fail_throws_err_(v);}// TEST
public Html_atr Atr() {return atr;} private Html_atr atr;
public byte Tid() {return tid;} private byte tid;
public boolean Tid_has_ns() {return tid_has_ns;} private boolean tid_has_ns;
public byte[] Src() {return src;} private byte[] src;
public int Val_bgn() {return val_bgn;} private int val_bgn;
public int Val_end() {return val_end;} private int val_end;
public int Site_bgn() {return site_bgn;} private int site_bgn;
public int Site_end() {return site_end;} private int site_end;
public boolean Site_exists() {return site_end > site_bgn;}
public boolean Rel_nofollow_exists() {
if (Site_exists()) {
if (rel_nofollow_exists == Bool_.__byte) {
Xow_domain_itm itm = Xow_domain_itm_.parse(Bry_.Mid(src, site_bgn, site_end));
rel_nofollow_exists = itm.Domain_type_id() == Xow_domain_tid_.Int__other ? Bool_.Y_byte : Bool_.N_byte;
}
return rel_nofollow_exists == Bool_.Y_byte;
}
else
return false;
} private byte rel_nofollow_exists;
public int Page_bgn() {return page_bgn;} private int page_bgn;
public int Page_end() {return page_end;} private int page_end;
public byte[] Page_bry() {
if (page_bry == null) {
if (page_end - page_bgn == 0) // NOTE: href="/site/en.wikipedia.org/wiki/" can be null
page_bry = Xoa_page_.Main_page_bry;
else
page_bry = gplx.langs.htmls.encoders.Gfo_url_encoder_.Href.Decode(src, page_bgn, page_end);
}
return page_bry;
}
public Xoa_ttl Page_ttl() {
if (page_ttl == null) {
page_bry = this.Page_bry();
if (site_bgn != -1)
ttl_parser = app.Wiki_mgri().Get_by_key_or_make_init_n(Bry_.Mid(src, site_bgn, site_end));
page_ttl = ttl_parser.Ttl_parse(page_bry);
page_bry = page_ttl.Full_db_w_anch();
}
return page_ttl;
}
public int Page_ns_id() {
switch (tid) {
case Xoh_anch_href_parser.Tid__anch:
case Xoh_anch_href_parser.Tid__inet: return Xow_ns_.Tid__main; // for purposes of hzip/make, assume main_ns
case Xoh_anch_href_parser.Tid__wiki:
case Xoh_anch_href_parser.Tid__site: return this.Page_ttl().Ns().Id();
default: throw Err_.new_unhandled(tid);
}
}
public boolean Page_ns_id_is_image() {return this.Page_ns_id() == Xow_ns_.Tid__file && Bry_.Has_at_bgn(page_bry, Xow_ns_.Alias__image__bry);}
public boolean Parse(Bry_rdr owner_rdr, Xoa_app app, Xow_ttl_parser ttl_parser, Html_tag tag) {
this.atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__href);
return Parse(owner_rdr, app, ttl_parser, atr.Val_bgn(), atr.Val_end());
}
public boolean Parse(Bry_rdr owner_rdr, Xoa_app app, Xow_ttl_parser ttl_parser, int href_bgn, int href_end) {
if (href_bgn == -1) return false;
rdr.Init_by_sub(owner_rdr, "anch.href", href_bgn, href_end);
rel_nofollow_exists = Bool_.__byte;
site_bgn = site_end = page_bgn = page_end = -1; this.src = owner_rdr.Src();
tid = Tid__wiki;
page_bry = null; page_ttl = null;
this.val_bgn = href_bgn; this.val_end = href_end;
this.src = owner_rdr.Src(); this.ttl_parser = ttl_parser; this.app = app;
if (val_end == val_bgn) {
tid = Tid__inet;
page_bgn = page_end = 0;
return true; // handle empty String separately; EX: href=""
}
int pos = href_bgn;
switch (src[pos]) {
case Byte_ascii.Hash:
tid = Tid__anch; tid_has_ns = Bool_.N;
page_bgn = pos + 1; // position page_bgn after #
page_end = val_end; // anch ends at EOS
break;
default:
tid = Tid__inet; tid_has_ns = Bool_.N;
page_bgn = pos; // position page_bgn after #
page_end = val_end; // anch ends at EOS
break;
case Byte_ascii.Slash:
rdr.Move_by_one(); // skip "/"
if (rdr.Chk(trie) == Tid__site) { // EX: "/site/wiki/A"
tid = Tid__site; tid_has_ns = Bool_.Y;
site_bgn = rdr.Pos();
site_end = rdr.Find_fwd_lr();
rdr.Chk(Bry__wiki);
}
else {
tid = Tid__wiki; tid_has_ns = Bool_.Y;
}
page_bgn = rdr.Pos();
page_end = rdr.Src_end();
break;
}
return true;
}
public static final byte
Tid__wiki = 0 // EX: href="/wiki/A"
, Tid__site = 1 // EX: href="/site/en.wikipedia.org/wiki/A"
, Tid__anch = 2 // EX: href="#A"
, Tid__inet = 3 // EX: href="https://a.org/A"
;
private static final byte[] Bry__site = Bry_.new_a7("site/"), Bry__wiki = Bry_.new_a7("wiki/");
private static final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7()
.Add_bry_byte(Bry__wiki, Tid__wiki)
.Add_bry_byte(Bry__site, Tid__site)
;
}

View File

@@ -17,7 +17,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.lnkis.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
import gplx.core.brys.*; import gplx.core.brys.fmtrs.*;
import gplx.xowa.files.*; import gplx.xowa.htmls.core.makes.*; import gplx.xowa.htmls.core.wkrs.imgs.atrs.*;
import gplx.langs.htmls.*; import gplx.xowa.htmls.core.wkrs.bfr_args.*;
import gplx.xowa.files.*; import gplx.xowa.htmls.core.makes.*; import gplx.xowa.htmls.core.wkrs.imgs.atrs.*;
import gplx.xowa.htmls.core.htmls.*;
import gplx.xowa.parsers.lnkis.*;
public class Xoh_file_html_fmtr__base implements Xoh_file_img_wkr {
@@ -33,7 +34,7 @@ public class Xoh_file_html_fmtr__base implements Xoh_file_img_wkr {
, "</a>"
), "a_href", "a_xowa_title", "html"
);
@gplx.Virtual public void Html_full_img(Bry_bfr tmp_bfr, Xoh_wtr_ctx hctx, Xoae_page page, Xof_file_itm xfer_itm, int uid
@gplx.Virtual public void Html_full_img(Bry_bfr tmp_bfr, Xoh_wtr_ctx hctx, Xoae_page page, byte[] src, Xof_file_itm xfer_itm, int uid
, byte[] a_href, byte a_cls, byte a_rel, byte[] a_title, byte[] a_xowa_title
, int img_w, int img_h, byte[] img_src, byte[] img_alt, byte img_cls, byte[] img_cls_other
) {
@@ -46,21 +47,27 @@ public class Xoh_file_html_fmtr__base implements Xoh_file_img_wkr {
+ "<img id=\"xowa_file_img_~{uid}\" alt=\"~{img_alt}\"~{img_core}~{img_class} /></a>"
, "uid", "a_href", "a_class", "a_rel", "a_title", "a_xowa_title", "img_core", "img_alt", "img_class"
);
@gplx.Virtual public void Html_thumb_core(Bry_bfr tmp_bfr, int uid, byte[] div1_halign, int div2_width, byte[] div2_content) {
@gplx.Virtual public void Html_thumb_core(Bry_bfr tmp_bfr, boolean mode_is_hdump, int uid, byte[] div1_halign, int div2_width, byte[] div2_content) {
scratch_bfr.Add(Bry_style_bgn);
scratch_bfr.Add_int_variable(div2_width);
scratch_bfr.Add(Bry_style_end);
fmtr_thumb_core.Bld_bfr_many(tmp_bfr, uid, div1_halign, scratch_bfr.To_bry_and_clear(), div2_content);
} private static final byte[] Bry_style_bgn = Bry_.new_a7("style=\"width:"), Bry_style_end = Bry_.new_a7("px;\"");
thumb_div_id_atr.Bfr_arg__clear();
if (!mode_is_hdump)
thumb_div_id_atr.Set_by_arg(thum_div_id_val.Set(Bry__id, uid));
fmtr_thumb_core.Bld_bfr_many(tmp_bfr, thumb_div_id_atr, div1_halign, scratch_bfr.To_bry_and_clear(), div2_content);
}
private static final byte[] Bry_style_bgn = Bry_.new_a7("style=\"width:"), Bry_style_end = Bry_.new_a7("px;\"");
private final Bfr_arg__html_atr thumb_div_id_atr = new Bfr_arg__html_atr(Html_atr_.Bry__id);
private final Bfr_arg__id thum_div_id_val = new Bfr_arg__id();
private final byte[] Bry__id = Bry_.new_a7("xowa_file_div_");
protected Bry_fmtr fmtr_thumb_core = Bry_fmtr.new_(String_.Concat_lines_nl_skip_last // REF.MW: Linker.php|makeImageLink2
( "<div class=\"thumb t~{div1_halign}\">"
, " <div id=\"xowa_file_div_~{uid}\" class=\"thumbinner\" ~{style}>"
, "~{div2_content}"
, " <div~{div_id} class=\"thumbinner\" ~{style}>"
, "~{div2_content}"
, " </div>"
, "</div>"
, ""
), "uid", "div1_halign", "style", "div2_content"
), "div_id", "div1_halign", "style", "div2_content"
);
public byte[] Html_thumb_part_img(Bry_bfr tmp_bfr, Xoae_page page, Xof_file_itm xfer_itm, Xop_lnki_tkn lnki, int uid, byte[] a_href, byte[] img_src, byte[] img_alt) {
Html_thumb_part_img(tmp_bfr, page, xfer_itm, uid, a_href, lnki.Ttl().Page_txt(), xfer_itm.Html_w(), xfer_itm.Html_h(), img_src, img_alt);
@@ -71,10 +78,7 @@ public class Xoh_file_html_fmtr__base implements Xoh_file_img_wkr {
}
private Bry_fmtr fmtr_thumb_part_img = Bry_fmtr.new_(String_.Concat_lines_nl_skip_last
( ""
, " <div>"
, " <a href=\"~{a_href}\" class=\"image\" title=\"~{a_title}\">"
, " <img id=\"xowa_file_img_~{uid}\"~{img_core} alt=\"~{img_alt}\" />"
, " </a>"
, " <div><a href=\"~{a_href}\" class=\"image\" title=\"~{a_title}\"><img id=\"xowa_file_img_~{uid}\"~{img_core} alt=\"~{img_alt}\" /></a>"
, " </div>"
), "uid", "a_href", "a_title", "img_core", "img_alt");
@@ -146,4 +150,7 @@ public class Xoh_file_html_fmtr__base implements Xoh_file_img_wkr {
), "uid", "a_width", "a_max_width", "a_href", "a_xowa_title", "img_src");
public static final Xoh_file_html_fmtr__base Base = new Xoh_file_html_fmtr__base();
public static byte[] Escape_xowa_title(byte[] lnki_ttl) {
return gplx.langs.htmls.encoders.Gfo_url_encoder_.Href_quotes.Encode(lnki_ttl); // must encode xowa_title, particularly quotes; EX: xowa_title="A"b.png"; PAGE:en.w:Earth DATE:2015-11-27
}
}

View File

@@ -20,11 +20,12 @@ import gplx.core.brys.fmtrs.*;
import gplx.langs.htmls.*;
import gplx.xowa.files.*; import gplx.xowa.htmls.core.makes.*;
import gplx.xowa.htmls.core.wkrs.imgs.atrs.*;
import gplx.xowa.parsers.lnkis.*;
public class Xoh_file_html_fmtr__hdump extends Xoh_file_html_fmtr__base {
private final Bry_bfr tmp_bfr = Bry_bfr.reset_(128);
@Override public void Html_full_img(Bry_bfr bfr, gplx.xowa.htmls.core.htmls.Xoh_wtr_ctx hctx, Xoae_page page, Xof_file_itm xfer_itm, int uid, byte[] a_href, byte a_cls, byte a_rel, byte[] a_title, byte[] a_xowa_title, int img_w, int img_h, byte[] img_src, byte[] img_alt, byte img_cls, byte[] img_cls_other) {
@Override public void Html_full_img(Bry_bfr bfr, gplx.xowa.htmls.core.htmls.Xoh_wtr_ctx hctx, Xoae_page page, byte[] src, Xof_file_itm xfer_itm, int uid, byte[] a_href, byte a_cls, byte a_rel, byte[] a_title, byte[] a_xowa_title, int img_w, int img_h, byte[] img_src, byte[] img_alt, byte img_cls, byte[] img_cls_other) {
tmp_bfr.Add_str_a7(" data-xoimg=\"");
tmp_bfr.Add_int_digits(1, xfer_itm.Lnki_type()).Add_byte_pipe();
tmp_bfr.Add_int_digits(1, Xop_lnki_type.To_tid(xfer_itm.Lnki_type())).Add_byte_pipe();
tmp_bfr.Add_int_variable(xfer_itm.Lnki_w()).Add_byte_pipe();
tmp_bfr.Add_int_variable(xfer_itm.Lnki_h()).Add_byte_pipe();
tmp_bfr.Add_double(xfer_itm.Lnki_upright()).Add_byte_pipe();
@@ -34,7 +35,7 @@ public class Xoh_file_html_fmtr__hdump extends Xoh_file_html_fmtr__base {
// , a_href, Xoh_lnki_consts.A_cls_to_bry(a_cls), Xoh_lnki_consts.A_rel_to_bry(a_rel), a_title
// , img_alt, tmp_bfr.To_bry_and_clear(), arg_img_core.Init(uid, img_src, img_w, img_h), Xoh_img_cls_.To_html(img_cls, img_cls_other));
fmtr__img__full.Bld_bfr_many(bfr
, a_href, Xoh_lnki_consts.A_cls_to_bry(a_cls), Xoh_lnki_consts.A_rel_to_bry(a_rel), a_title, Xoa_ttl.Replace_spaces(a_xowa_title)
, a_href, Xoh_lnki_consts.A_cls_to_bry(a_cls), Xoh_lnki_consts.A_rel_to_bry(a_rel), a_title, a_xowa_title
, img_alt, tmp_bfr.To_bry_and_clear(), arg_img_core.Init(uid, Bry_.Empty, 0, 0), Xoh_img_cls_.To_html(img_cls, img_cls_other));
}
private Bry_fmtr fmtr__img__full = Bry_fmtr.new_

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.htmls.core.wkrs.lnkis.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
import gplx.xowa.files.*; import gplx.xowa.htmls.core.htmls.*;
public interface Xoh_file_img_wkr {
void Html_full_img(Bry_bfr tmp_bfr, Xoh_wtr_ctx hctx, Xoae_page page, Xof_file_itm xfer_itm, int uid
void Html_full_img(Bry_bfr tmp_bfr, Xoh_wtr_ctx hctx, Xoae_page page, byte[] src, Xof_file_itm xfer_itm, int uid
, byte[] a_href, byte a_cls, byte a_rel, byte[] a_title, byte[] a_xowa_title
, int img_w, int img_h, byte[] img_src, byte[] img_alt, byte img_cls, byte[] img_cls_other
);

View File

@@ -16,12 +16,10 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.lnkis.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
import gplx.core.brys.*;
import gplx.langs.htmls.*;
import gplx.core.brys.*; import gplx.core.bits.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.encoders.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.makes.imgs.*; import gplx.xowa.htmls.core.wkrs.imgs.atrs.*;
import gplx.xowa.langs.*; import gplx.xowa.langs.msgs.*;
import gplx.xowa.wikis.nss.*;
import gplx.xowa.files.*;
import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.makes.imgs.*; import gplx.xowa.htmls.core.wkrs.imgs.atrs.*;
import gplx.xowa.wikis.nss.*; import gplx.xowa.files.*;
import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.tmpls.*;
public class Xoh_file_wtr__basic {
private final Xowe_wiki wiki; private final Xow_html_mgr html_mgr; private final Xoh_html_wtr html_wtr; private final Bry_bfr_mkr bfr_mkr; private final Bry_bfr scratch_bfr = Bry_bfr.reset_(Io_mgr.Len_kb);
@@ -95,7 +93,7 @@ public class Xoh_file_wtr__basic {
private void Write_file_audio(Bry_bfr bfr, Xop_ctx ctx, Xoh_wtr_ctx hctx, byte[] src, Xop_lnki_tkn lnki, int uid, int div_width, byte[] lnki_halign_bry, byte[] lnki_href, byte[] img_orig_src, byte[] alt) {
byte[] content = Arg_content_audio(lnki, ctx, hctx, src, uid, lnki_href, img_orig_src, alt);
if (lnki.Media_icon())
html_fmtr.Html_thumb_core(bfr, uid, lnki_halign_bry, div_width, content);
html_fmtr.Html_thumb_core(bfr, hctx.Mode_is_hdump(), uid, lnki_halign_bry, div_width, content);
else
bfr.Add(content);
}
@@ -104,7 +102,7 @@ public class Xoh_file_wtr__basic {
boolean video_is_thumb = Xop_lnki_type.Id_defaults_to_thumb(lnki.Lnki_type());
byte[] content = Arg_content_video(ctx, hctx, src, lnki, xfer_itm, uid, video_is_thumb, lnki_href, img_view_src, img_orig_src, alt);
if (video_is_thumb)
html_fmtr.Html_thumb_core(bfr, uid, lnki_halign_bry, div_width, content);
html_fmtr.Html_thumb_core(bfr, hctx.Mode_is_hdump(), uid, lnki_halign_bry, div_width, content);
else
bfr.Add(content);
}
@@ -119,7 +117,7 @@ public class Xoh_file_wtr__basic {
if (lnki_is_thumbable) { // is "thumb"
if (bfr.Len() > 0) bfr.Add_byte_nl();
byte[] content = Arg_content_thumb(lnki_file_wkr, ctx, hctx, src, lnki, xfer_itm, uid, lnki_href, img_view_src, img_orig_src, alt, lnki_ttl, anchor_title);
html_fmtr.Html_thumb_core(bfr, uid, lnki_halign_bry, div_width, content);
html_fmtr.Html_thumb_core(bfr, hctx.Mode_is_hdump(), uid, lnki_halign_bry, div_width, content);
}
else {
if ( cfg_alt_defaults_to_caption
@@ -139,15 +137,15 @@ public class Xoh_file_wtr__basic {
byte img_cls_tid = lnki.Border() == Bool_.Y_byte ? Xoh_img_cls_.Tid__thumbborder : Xoh_img_cls_.Tid__none;
byte[] img_cls_other = lnki.Lnki_cls(); // PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3; DATE:2014-09-06
if (lnki_link_tkn == Arg_nde_tkn.Null) // full
lnki_file_wkr.Html_full_img(bfr, hctx, page, xfer_itm, uid, lnki_href, Xoh_lnki_consts.Tid_a_cls_image, Xoh_lnki_consts.Tid_a_rel_none, anchor_title, lnki_ttl, xfer_itm.Html_w(), xfer_itm.Html_h(), img_view_src, alt, img_cls_tid, img_cls_other);
lnki_file_wkr.Html_full_img(bfr, hctx, page, src, xfer_itm, uid, lnki_href, Xoh_lnki_consts.Tid_a_cls_image, Xoh_lnki_consts.Tid_a_rel_none, anchor_title, Xoh_file_html_fmtr__base.Escape_xowa_title(lnki_ttl), xfer_itm.Html_w(), xfer_itm.Html_h(), img_view_src, alt, img_cls_tid, img_cls_other);
else { // thumb
Arg_itm_tkn link_tkn = lnki_link_tkn.Val_tkn();
byte[] link_ref = link_tkn.Dat_to_bry(src);
byte[] link_ref_new = tmp_link_parser.Parse(tmp_bfr, tmp_url, wiki, link_ref, lnki_href);
link_ref = link_ref_new == null ? lnki_href: link_ref_new; // if parse fails, then assign to lnki_href; EX:link={{{1}}}
link_ref = gplx.langs.htmls.encoders.Gfo_url_encoder_.Href_quotes.Encode(link_ref); // must encode quotes; PAGE:en.w:List_of_cultural_heritage_sites_in_Punjab,_Pakistan; DATE:2014-07-16
if (Bry_.Len_gt_0(tmp_link_parser.Html_xowa_ttl())) lnki_ttl = tmp_link_parser.Html_xowa_ttl();
lnki_file_wkr.Html_full_img(bfr, hctx, page, xfer_itm, uid, link_ref, tmp_link_parser.Html_anchor_cls(), tmp_link_parser.Html_anchor_rel(), anchor_title, lnki_ttl, xfer_itm.Html_w(), xfer_itm.Html_h(), img_view_src, alt, img_cls_tid, img_cls_other);
byte[] link_arg = Xoa_ttl.Replace_spaces(link_tkn.Dat_to_bry(src)); // replace spaces with unders, else "/wiki/File:A b.ogg" instead of "A_b.ogg"; DATE:2015-11-27
byte[] link_arg_html = tmp_link_parser.Parse(tmp_bfr, tmp_url, wiki, link_arg, lnki_href);
link_arg = link_arg_html == null ? lnki_href: link_arg_html; // if parse fails, then assign to lnki_href; EX:link={{{1}}}
link_arg = Gfo_url_encoder_.Href_qarg.Encode(link_arg); // must encode quotes; PAGE:en.w:List_of_cultural_heritage_sites_in_Punjab,_Pakistan; DATE:2014-07-16
// if (Bry_.Len_gt_0(tmp_link_parser.Html_xowa_ttl())) lnki_ttl = tmp_link_parser.Html_xowa_ttl(); // DELETE: not sure why this is here; breaks test; DATE:2015-11-28
lnki_file_wkr.Html_full_img(bfr, hctx, page, src, xfer_itm, uid, link_arg, tmp_link_parser.Html_anchor_cls(), tmp_link_parser.Html_anchor_rel(), anchor_title, Xoh_file_html_fmtr__base.Escape_xowa_title(lnki_ttl), xfer_itm.Html_w(), xfer_itm.Html_h(), img_view_src, alt, img_cls_tid, img_cls_other);
}
if (div_align_exists) bfr.Add(Html_tag_.Div_rhs); // close div from above
}
@@ -158,7 +156,7 @@ public class Xoh_file_wtr__basic {
byte[] lnki_alt_html = wiki.Html_mgr().Imgs_mgr().Alt_in_caption().Val() ? Arg_alt_html(ctx, hctx, src, lnki) : Bry_.Empty;
byte img_cls_tid = xfer_itm.File_exists() ? Xoh_img_cls_.Tid__thumbimage : Xoh_img_cls_.Tid__none;
Bry_bfr tmp_bfr = bfr_mkr.Get_k004();
lnki_file_wkr.Html_full_img(tmp_bfr, hctx, page, xfer_itm, uid, lnki_href, Xoh_lnki_consts.Tid_a_cls_image, Xoh_lnki_consts.Tid_a_rel_none, anchor_title, lnki_ttl, xfer_itm.Html_w(), xfer_itm.Html_h(), view_src, lnki_alt_text, img_cls_tid, Xoh_img_cls_.Bry__none);
lnki_file_wkr.Html_full_img(tmp_bfr, hctx, page, src, xfer_itm, uid, lnki_href, Xoh_lnki_consts.Tid_a_cls_image, Xoh_lnki_consts.Tid_a_rel_none, anchor_title, Xoh_file_html_fmtr__base.Escape_xowa_title(lnki_ttl), xfer_itm.Html_w(), xfer_itm.Html_h(), view_src, lnki_alt_text, img_cls_tid, Xoh_img_cls_.Bry__none);
byte[] thumb = tmp_bfr.To_bry_and_clear();
html_fmtr.Html_thumb_file_image(tmp_bfr, thumb, Arg_caption_div(ctx, hctx, src, lnki, uid, img_orig_src, lnki_href), lnki_alt_html);
return tmp_bfr.To_bry_and_rls();

View File

@@ -40,9 +40,9 @@ public class Xoh_file_wtr_audio_video_tst {
( "[[File:A.ogg]]", String_.Concat_lines_nl_skip_last
( " <div id=\"xowa_media_div\">"
, " <div>"
, " <a href=\"/wiki/File:A.ogg\" class=\"image\" title=\"A.ogg\">"
, " <img id=\"xowa_file_img_0\" src=\"file:///mem/wiki/repo/trg/orig/4/2/A.ogg\" width=\"220\" height=\"-1\" alt=\"\" />" // note that src still exists (needed for clicking)
, " </a>"
+ "<a href=\"/wiki/File:A.ogg\" class=\"image\" title=\"A.ogg\">"
+ "<img id=\"xowa_file_img_0\" src=\"file:///mem/wiki/repo/trg/orig/4/2/A.ogg\" width=\"220\" height=\"-1\" alt=\"\" />" // note that src still exists (needed for clicking)
+ "</a>"
, " </div>"
, " <div>"
, " <a id=\"xowa_file_play_0\" href=\"file:///mem/wiki/repo/trg/orig/4/2/A.ogg\" xowa_title=\"A.ogg\" class=\"xowa_anchor_button\" style=\"width:218px;max-width:220px;\">"
@@ -117,9 +117,9 @@ public class Xoh_file_wtr_audio_video_tst {
( "[[File:A.ogv|400px|a|alt=b]]", String_.Concat_lines_nl_skip_last
( " <div id=\"xowa_media_div\">"
, " <div>"
, " <a href=\"/wiki/File:A.ogv\" class=\"image\" title=\"A.ogv\">"
, " <img id=\"xowa_file_img_0\" src=\"file:///\" width=\"400\" height=\"0\" alt=\"b\" />"
, " </a>"
+ "<a href=\"/wiki/File:A.ogv\" class=\"image\" title=\"A.ogv\">"
+ "<img id=\"xowa_file_img_0\" src=\"file:///\" width=\"400\" height=\"0\" alt=\"b\" />"
+ "</a>"
, " </div>"
, " <div>"
, " <a id=\"xowa_file_play_0\" href=\"file:///mem/wiki/repo/trg/orig/d/0/A.ogv\" xowa_title=\"A.ogv\" class=\"xowa_anchor_button\" style=\"width:398px;max-width:400px;\">"
@@ -171,9 +171,9 @@ public class Xoh_file_wtr_audio_video_tst {
, " <div id=\"xowa_file_div_0\" class=\"thumbinner\" style=\"width:220px;\">" // NOTE:220px is default w for "non-found" thumb; DATE:2014-09-24
, " <div id=\"xowa_media_div\">"
, " <div>"
, " <a href=\"/wiki/File:A.ogv\" class=\"image\" title=\"A.ogv\">"
, " <img id=\"xowa_file_img_0\" src=\"file:///\" width=\"400\" height=\"0\" alt=\"b\" />"
, " </a>"
+ "<a href=\"/wiki/File:A.ogv\" class=\"image\" title=\"A.ogv\">"
+ "<img id=\"xowa_file_img_0\" src=\"file:///\" width=\"400\" height=\"0\" alt=\"b\" />"
+ "</a>"
, " </div>"
, " <div>"
, " <a id=\"xowa_file_play_0\" href=\"file:///mem/wiki/repo/trg/orig/d/0/A.ogv\" xowa_title=\"A.ogv\" class=\"xowa_anchor_button\" style=\"width:398px;max-width:400px;\">"
@@ -205,9 +205,9 @@ public class Xoh_file_wtr_audio_video_tst {
, " <div id=\"xowa_file_div_0\" class=\"thumbinner\" style=\"width:220px;\">" // NOTE:220px is default w for "non-found" thumb; DATE:2014-09-24
, " <div id=\"xowa_media_div\">"
, " <div>"
, " <a href=\"/wiki/File:A.webm\" class=\"image\" title=\"A.webm\">"
, " <img id=\"xowa_file_img_0\" src=\"file:///\" width=\"400\" height=\"0\" alt=\"b\" />"
, " </a>"
+ "<a href=\"/wiki/File:A.webm\" class=\"image\" title=\"A.webm\">"
+ "<img id=\"xowa_file_img_0\" src=\"file:///\" width=\"400\" height=\"0\" alt=\"b\" />"
+ "</a>"
, " </div>"
, " <div>"
, " <a id=\"xowa_file_play_0\" href=\"file:///mem/wiki/repo/trg/orig/3/4/A.webm\" xowa_title=\"A.webm\" class=\"xowa_anchor_button\" style=\"width:398px;max-width:400px;\">"

View File

@@ -29,6 +29,13 @@ public class Xoh_file_wtr_basic_tst {
));
fxt.Wtr_cfg().Lnki__title_(false);
}
@Test public void Xowa_title__quotes() { // PURPOSE: xowa_title should encode quotes DATE:2015-11-27
fxt.Test_parse_page_wiki_str
( "[[File:A%22b.png]]"
, String_.Concat_lines_nl_skip_last
( "<a href=\"/wiki/File:A%22b.png\" class=\"image\" xowa_title=\"A%22b.png\"><img id=\"xowa_file_img_0\" alt=\"\" src=\"file:///mem/wiki/repo/trg/orig/d/4/A%22b.png\" width=\"0\" height=\"0\" /></a>"
));
}
@Test public void Img_embed() {
fxt.Test_parse_page_wiki_str("[[File:A.png|9x8px|alt=abc]]", Xop_fxt.html_img_none("File:A.png", "abc", "file:///mem/wiki/repo/trg/thumb/7/0/A.png/9px.png", "A.png"));
}
@@ -238,7 +245,7 @@ public class Xoh_file_wtr_basic_tst {
fxt.Wtr_cfg().Lnki__title_(true);
fxt.Test_parse_page_all_str
( "[[File:A.png|\n{|\n|-\n|b\n|}\n]]"
, "<a href=\"/wiki/File:A.png\" class=\"image\" title=\"b&#10;\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\" b \" src=\"file:///mem/wiki/repo/trg/orig/7/0/A.png\" width=\"0\" height=\"0\" /></a>"
, "<a href=\"/wiki/File:A.png\" class=\"image\" title=\"b \" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\" b \" src=\"file:///mem/wiki/repo/trg/orig/7/0/A.png\" width=\"0\" height=\"0\" /></a>"
);
fxt.Wtr_cfg().Lnki__title_(false);
}
@@ -248,7 +255,7 @@ public class Xoh_file_wtr_basic_tst {
fxt.Test_parse_page_all_str
( "[[File:A.png|b\nc]]"
, String_.Concat_lines_nl
( "<p><a href=\"/wiki/File:A.png\" class=\"image\" title=\"b&#10;c\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\"b c\" src=\"file:///mem/wiki/repo/trg/orig/7/0/A.png\" width=\"0\" height=\"0\" /></a>"
( "<p><a href=\"/wiki/File:A.png\" class=\"image\" title=\"b c\" xowa_title=\"A.png\"><img id=\"xowa_file_img_0\" alt=\"b c\" src=\"file:///mem/wiki/repo/trg/orig/7/0/A.png\" width=\"0\" height=\"0\" /></a>"
, "</p>"
));
fxt.Init_para_n_();

View File

@@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.lnkis.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
import gplx.langs.htmls.*;
import gplx.xowa.parsers.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.tmpls.*;
public class Xoh_lnki_title_fmtr extends gplx.core.brys.Bfr_arg_base {
public Xoh_lnki_title_fmtr Set(byte[] src, Xop_tkn_itm tkn) {this.src = src; this.tkn = tkn; return this;}
@@ -36,8 +37,10 @@ public class Xoh_lnki_title_fmtr extends gplx.core.brys.Bfr_arg_base {
if (tkn_as_lnki.Caption_exists())
Bld_recurse(bfr, tkn_as_lnki.Caption_tkn());
else {
if (tkn_as_lnki.Ttl() != null) // guard against invalid ttls
bfr.Add(tkn_as_lnki.Ttl().Page_txt());
if (tkn_as_lnki.Ttl() != null) { // guard against invalid ttls
byte[] ttl_bry = tkn_as_lnki.Ttl().Page_txt();
Write_atr_text(bfr, ttl_bry, 0, ttl_bry.length); // handle titles with quotes; PAGE:s.w:Styx_(band) DATE:2015-11-29
}
}
if (tkn_as_lnki.Tail_bgn() != -1)
bfr.Add_mid(src, tkn_as_lnki.Tail_bgn(), tkn_as_lnki.Tail_end());
@@ -63,12 +66,13 @@ public class Xoh_lnki_title_fmtr extends gplx.core.brys.Bfr_arg_base {
byte b = src[i];
switch (b) {
case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Tab: // NOTE: escape ws so that it renders correctly in tool tips
case Byte_ascii.Quote: case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Amp: // NOTE: escape possible javascript injection characters
bfr.Add(Escape_bgn);
bfr.Add_int_variable(b);
bfr.Add_byte(Byte_ascii.Semic);
bfr.Add_byte_space();
break;
default: bfr.Add_byte(b); break;
case Byte_ascii.Quote: bfr.Add(Html_entity_.Quote_bry); break;
case Byte_ascii.Lt: bfr.Add(Html_entity_.Lt_bry); break;
case Byte_ascii.Gt: bfr.Add(Html_entity_.Gt_bry); break;
case Byte_ascii.Amp: bfr.Add(Html_entity_.Amp_bry); break;
default: bfr.Add_byte(b); break;
}
}
}

View File

@@ -23,8 +23,11 @@ public class Xoh_lnki_title_fmtr_tst {
fxt.Test_parse("a b c", "a b c");
fxt.Test_parse("a ''b'' c", "a b c");
fxt.Test_parse("a <i>b</i> c", "a b c");
fxt.Test_parse("a\nb", "a&#10;b");
fxt.Test_parse("a\"b", "a&#34;b");
fxt.Test_parse("a\nb", "a b");
fxt.Test_parse("a\"b", "a&quot;b");
}
@Test public void Lnki__quotes() { // PURPOSE: handle titles with quotes; PAGE:s.w:Styx_(band) DATE:2015-11-29
fxt.Test_parse("[[A\"B]]", "A&quot;B");
}
}
class Xoh_lnki_title_fmtr_fxt {

View File

@@ -41,6 +41,7 @@ public class Xoh_lnki_wtr {
redlinks_mgr = page.Redlink_lnki_list(); // NOTE: need to set redlinks_mgr, else toc parse may fail; EX:pl.d:head_sth_off;DATE:2014-05-07
file_wtr.Init_by_page(hctx, page);
this.history_mgr = app.Usere().History_mgr();
if (hctx.Mode_is_hdump()) cfg.Lnki__id_(false);
}
public void Write(Bry_bfr bfr, Xoh_wtr_ctx hctx, byte[] src, Xop_lnki_tkn lnki) {
Xoa_ttl lnki_ttl = lnki.Ttl();
@@ -63,7 +64,7 @@ public class Xoh_lnki_wtr {
redlinks_mgr.Lnki_add(lnki);
boolean stage_is_alt = hctx.Mode_is_alt();
switch (lnki.Ns_id()) {
case Xow_ns_.Tid__media: if (!stage_is_alt) file_wtr.Write_or_queue(bfr, page, ctx, hctx, src, lnki); return; // NOTE: literal ":" has no effect; PAGE:en.w:Beethoven and [[:Media:De-Ludwig_van_Beethoven.ogg|listen]]
case Xow_ns_.Tid__media: if (!stage_is_alt) file_wtr.Write_or_queue(bfr, page, ctx, hctx, src, lnki); return; // NOTE: literal ":" has no effect; PAGE:en.w:Beethoven and [[:Media:De-Ludwig_van_Beethoven.ogg|listen]]
case Xow_ns_.Tid__file: if (!literal_link && !stage_is_alt) {file_wtr.Write_or_queue(bfr, page, ctx, hctx, src, lnki); return;} break;
case Xow_ns_.Tid__category: if (!literal_link) {page.Html_data().Ctgs_add(lnki.Ttl()); return;} break;
}
@@ -100,7 +101,7 @@ public class Xoh_lnki_wtr {
Write_caption(bfr, ctx, hctx, src, lnki, ttl_bry, true, caption_wkr);
else {
bfr.Add(Xoh_consts.A_bgn); // '<a href="'
app.Html__href_wtr().Build_to_bfr(bfr, app, wiki.Domain_bry(), lnki_ttl, hctx.Mode_is_popup()); // '/wiki/A'
app.Html__href_wtr().Build_to_bfr(bfr, app, hctx.Mode(), wiki.Domain_bry(), lnki_ttl); // '/wiki/A'
if (cfg.Lnki__id()) {
int lnki_html_id = lnki.Html_uid();
if (lnki_html_id > Lnki_id_ignore) // html_id=0 for skipped lnkis; EX:anchors and interwiki
@@ -108,9 +109,12 @@ public class Xoh_lnki_wtr {
.Add_int_variable(lnki_html_id); // '1234'
}
if (cfg.Lnki__title()) {
bfr .Add(Xoh_consts.A_bgn_lnki_0); // '" title=\"'
byte[] lnki_title_bry = lnki_ttl.Full_txt(); // 'Abcd' NOTE: use Full_txt to (a) replace underscores with spaces; (b) get title casing; EX:[[roman_empire]] -> Roman empire; (c) include ns_name; EX: Help:A -> "title='Help:A'" not "title='A'"; DATE:2015-11-16
Html_utl.Escape_html_to_bfr(bfr, lnki_title_bry, 0, lnki_title_bry.length, Bool_.N, Bool_.N, Bool_.N, Bool_.Y, Bool_.N); // escape title; DATE:2014-10-27
byte[] title_bry = lnki_ttl.Full_txt(); // NOTE: use Full_txt to (a) replace underscores with spaces; (b) get title casing; EX:[[roman_empire]] -> Roman empire; (c) include ns_name; EX: Help:A -> "title='Help:A'" not "title='A'"; DATE:2015-11-16
int title_len = title_bry.length;
if (title_len > 0) {
bfr .Add(Xoh_consts.A_bgn_lnki_0); // '" title=\"'
Html_utl.Escape_html_to_bfr(bfr, title_bry, 0, title_len, Bool_.N, Bool_.N, Bool_.N, Bool_.Y, Bool_.N); // escape title; DATE:2014-10-27
}
}
if (!hctx.Mode_is_hdump()) { // don't write visited for hdump
if (cfg.Lnki__visited()

View File

@@ -1,56 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.mkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import gplx.core.threads.poolables.*;
import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.htmls.core.wkrs.escapes.*; import gplx.xowa.htmls.core.wkrs.spaces.*;
import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.lnkes.*; import gplx.xowa.htmls.core.wkrs.lnkis.*;
import gplx.xowa.htmls.core.wkrs.imgs.*; import gplx.xowa.htmls.core.wkrs.thms.*; import gplx.xowa.htmls.core.wkrs.glys.*;
public class Xoh_hdoc_mkr {
private Gfo_poolable_mgr
pool__escape__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_escape_hzip())
, pool__space__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_space_hzip())
, pool__hdr__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_hdr_hzip())
, pool__lnke__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_lnke_hzip())
, pool__lnki__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_lnki_hzip())
, pool__img__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_img_hzip())
, pool__thm__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_thm_hzip())
, pool__gly__hzip = Gfo_poolable_mgr_.New(1, 32, new Xoh_gly_hzip())
;
public Xoh_hzip_wkr Hzip__wkr(byte tid) {
switch (tid) {
case Xoh_hzip_dict_.Tid__escape: return Escape__hzip();
case Xoh_hzip_dict_.Tid__space: return Space__hzip();
case Xoh_hzip_dict_.Tid__hdr: return Hdr__hzip();
case Xoh_hzip_dict_.Tid__lnke: return Lnke__hzip();
case Xoh_hzip_dict_.Tid__lnki: return Lnki__hzip();
case Xoh_hzip_dict_.Tid__img: return Img__hzip();
case Xoh_hzip_dict_.Tid__thm: return Thm__hzip();
case Xoh_hzip_dict_.Tid__gly: return Gly__hzip();
default: throw Err_.new_unhandled(tid);
}
}
public Xoh_escape_hzip Escape__hzip() {return (Xoh_escape_hzip) pool__escape__hzip.Get_fast();}
public Xoh_space_hzip Space__hzip() {return (Xoh_space_hzip) pool__space__hzip.Get_fast();}
public Xoh_hdr_hzip Hdr__hzip() {return (Xoh_hdr_hzip) pool__hdr__hzip.Get_fast();}
public Xoh_lnke_hzip Lnke__hzip() {return (Xoh_lnke_hzip) pool__lnke__hzip.Get_fast();}
public Xoh_lnki_hzip Lnki__hzip() {return (Xoh_lnki_hzip) pool__lnki__hzip.Get_fast();}
public Xoh_img_hzip Img__hzip() {return (Xoh_img_hzip) pool__img__hzip.Get_fast();}
public Xoh_thm_hzip Thm__hzip() {return (Xoh_thm_hzip) pool__thm__hzip.Get_fast();}
public Xoh_gly_hzip Gly__hzip() {return (Xoh_gly_hzip) pool__gly__hzip.Get_fast();}
}

View File

@@ -1,39 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.spaces; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import gplx.core.brys.*; import gplx.core.threads.poolables.*; import gplx.xowa.wikis.ttls.*;
import gplx.xowa.htmls.core.hzips.*;
public class Xoh_space_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
public String Key() {return Xoh_hzip_dict_.Key__space;}
public Xoh_space_hzip Encode(Bry_bfr bfr, Hzip_stat_itm stat_itm, byte[] src, int src_end, int rng_bgn, int rng_end) {
int space_len = Bry_find_.Find_fwd_while(src, rng_end, src_end, Byte_ascii.Space) - rng_bgn;
stat_itm.Space_add(space_len);
bfr.Add(Xoh_hzip_dict_.Bry__space);
Xoh_hzip_int_.Encode(1, bfr, space_len);
return this;
}
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx ctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
int space_len = rdr.Read_int_by_base85(1);
bfr.Add_byte_repeat(Byte_ascii.Space, space_len);
return rdr.Pos();
}
public int Pool__idx() {return pool_idx;} private int pool_idx;
public void Pool__clear (Object[] args) {}
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_space_hzip rv = new Xoh_space_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
}

View File

@@ -1,41 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.spaces; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import org.junit.*; import gplx.xowa.htmls.core.hzips.*;
public class Xoh_space_hzip_tst {
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt();
@Test public void Len__8() {
fxt.Test__bicode("~!)", " ");
}
@Test public void Len__85() {
fxt.Test__bicode("~!{\"!", String_.Repeat(" ", 85));
}
@Test public void Many() {
fxt.Test__bicode(gplx.langs.htmls.Html_utl.Replace_apos(String_.Concat_lines_nl_skip_last
( " <div id='bodyContent' class='mw-body-content'>"
, "~!%<div id='siteSub'>a</div>"
, "~!%<div id='contentSub'></div>"
, "</div>"
)), String_.Concat_lines_nl_skip_last
( " <div id='bodyContent' class='mw-body-content'>"
, " <div id='siteSub'>a</div>"
, " <div id='contentSub'></div>"
, "</div>"
));
}
}

View File

@@ -1,32 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.core.wkrs.spaces; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
import gplx.xowa.htmls.core.hzips.*;
public class Xoh_space_parser implements Html_doc_wkr {
private final Xoh_hdoc_wkr wkr;
public Xoh_space_parser(Xoh_hdoc_wkr wkr) {this.wkr = wkr;}
public byte[] Hook() {return Hook_bry;}
public int Parse(byte[] src, int src_bgn, int src_end, int pos) {
int rng_end = Bry_find_.Find_fwd_while(src, pos + Hook_len, src_end, Byte_ascii.Space);
wkr.On_space(pos, rng_end);
return rng_end;
}
private static final byte[] Hook_bry = Bry_.new_a7(" ");
private static final int Hook_len = Hook_bry.length;
}

View File

@@ -23,15 +23,16 @@ import gplx.xowa.wikis.ttls.*;
public class Xoh_tag_parser implements Html_doc_wkr {
private final Xoh_hdoc_wkr hdoc_wkr;
private final Html_tag_rdr tag_rdr = new Html_tag_rdr();
private final Xoh_hdr_parser wkr__hdr = new Xoh_hdr_parser();
private final Xoh_lnki_parser wkr__lnki = new Xoh_lnki_parser(); private final Xoh_lnke_parser wkr__lnke = new Xoh_lnke_parser();
private final Xoh_img_parser wkr__img = new Xoh_img_parser(); private final Xoh_thm_parser wkr__thm = new Xoh_thm_parser();
private final Xoh_gly_grp_parser wkr__gly = new Xoh_gly_grp_parser();
private Xoh_hdoc_ctx hctx;
private final Xoh_hdr_parser wkr__hdr = new Xoh_hdr_parser();
private final Xoh_lnki_parser wkr__lnki = new Xoh_lnki_parser(); private final Xoh_lnke_parser wkr__lnke = new Xoh_lnke_parser();
private final Xoh_img_parser wkr__img = new Xoh_img_parser(); private final Xoh_thm_parser wkr__thm = new Xoh_thm_parser();
private final Xoh_gly_grp_parser wkr__gly = new Xoh_gly_grp_parser();
public byte[] Hook() {return Byte_ascii.Angle_bgn_bry;}
public Xoh_tag_parser(Xoh_hdoc_wkr hdoc_wkr) {this.hdoc_wkr = hdoc_wkr;}
public void Init(Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
this.hctx = hctx; tag_rdr.Init(hctx.Page__url(), src, src_bgn, src_end);
this.hctx = hctx;
tag_rdr.Init(hctx.Page__url(), src, src_bgn, src_end);
}
public int Parse(byte[] src, int src_bgn, int src_end, int pos) {
tag_rdr.Pos_(pos);
@@ -44,31 +45,34 @@ public class Xoh_tag_parser implements Html_doc_wkr {
int cur_name_id = cur.Name_id();
switch (cur_name_id) {
case Html_tag_.Id__h2: case Html_tag_.Id__h3: case Html_tag_.Id__h4: case Html_tag_.Id__h5: case Html_tag_.Id__h6:
int hdr_tag_bgn = cur.Src_bgn();
nxt = tag_rdr.Tag__peek_fwd_head();
if ( nxt.Name_id() == Html_tag_.Id__span
&& nxt.Atrs__match_pair(Html_atr_.Bry__class , Xoh_hdr_parser.Bry__class__mw_headline)) {
return wkr__hdr.Parse(hdoc_wkr, src, tag_rdr, cur_name_id, hdr_tag_bgn, nxt);
if (wkr__hdr.Parse(hdoc_wkr, hctx, tag_rdr, src, cur, nxt)) return wkr__hdr.Src_end();
}
break;
case Html_tag_.Id__a:
nxt = tag_rdr.Tag__peek_fwd_head();
if (nxt.Name_id() == Html_tag_.Id__img) {
int rv = wkr__img.Parse(hdoc_wkr, hctx, src, tag_rdr, cur);
if (rv != Xoh_hdoc_ctx.Invalid) {
if (wkr__img.Parse(hdoc_wkr, hctx, src, tag_rdr, cur)) {
hdoc_wkr.On_img(wkr__img);
return rv;
return wkr__img.Src_end();
}
}
else if (cur.Atrs__match_pair(Html_atr_.Bry__rel , Xoh_lnke_dict_.Html__rel__nofollow))
return wkr__lnke.Parse(hdoc_wkr, tag_rdr, cur);
else
return wkr__lnki.Parse(hdoc_wkr, hctx, src, tag_rdr, cur, hctx.Wiki__ttl_parser());
else if (cur.Atrs__match_pair(Html_atr_.Bry__rel , Xoh_lnke_dict_.Html__rel__nofollow)) {
if (wkr__lnke.Parse(hdoc_wkr, hctx, tag_rdr, src, cur)) return wkr__lnke.Src_end();
}
else if (cur.Atrs__get_by_or_empty(Xoh_img_parser.Bry__atr__xowa_title).Val_dat_exists()) {}
else {
if (wkr__lnki.Parse(hdoc_wkr, hctx, tag_rdr, src, cur)) return wkr__lnki.Src_end();
}
break;
case Html_tag_.Id__div:
if (cur.Atrs__cls_has(Xoh_thm_parser.Atr__class__thumb)) {
int rv = wkr__thm.Parse(hdoc_wkr, hctx, src, tag_rdr, cur);
if (rv != Xoh_hdoc_ctx.Invalid) return rv;
if (wkr__thm.Parse(hdoc_wkr, hctx, src, tag_rdr, cur)) return wkr__thm.Src_end();
}
else if (cur.Atrs__match_pair(Html_atr_.Bry__id, Xoh_thm_parser.Atr__id__xowa_media_div)) {
tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__div);
}
break;
case Html_tag_.Id__ul:

View File

@@ -25,7 +25,8 @@ public class Xoh_thm_bldr {
public void Make(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int div_0_align, int div_1_width, boolean div_2_alt_exists, byte[] img_alt, Xoh_img_bldr img_bldr, Bfr_arg div_2_href, Bfr_arg div_2_capt) {
wtr.Clear();
wtr.Div_0_align_(div_0_align);
wtr.Div_1_id_(img_bldr.Fsdb_itm().Html_uid());
if (!hctx.Mode_is_diff())
wtr.Div_1_id_(img_bldr.Fsdb_itm().Html_uid());
wtr.Div_1_width_(div_1_width);
wtr.Div_1_img_(img_bldr.Wtr());
wtr.Div_2_href_(div_2_href);

View File

@@ -23,8 +23,8 @@ public class Xoh_thm_html_tst {
// fxt.Expd_itms_xfers(fxt.Make_xfer("A.png", 0, 0, 0, Bool_.Y, Xof_ext_.Id_png));
fxt.Test__html("[[File:A.png|thumb|test_caption]]", String_.Concat_lines_nl_skip_last
( "<div class='thumb tright'>"
, " <div id='xowa_file_div_0' class='thumbinner' style='width:220px;'>"
, " <a href='/wiki/File:A.png' class='image' xowa_title='A.png'><img data-xoimg='8|-1|-1|-1|-1|-1' src='' width='0' height='0' alt=''/></a>"
, " <div class='thumbinner' style='width:220px;'>"
, " <a href='/wiki/File:A.png' class='image' xowa_title='A.png'><img data-xoimg='4|-1|-1|-1|-1|-1' src='' width='0' height='0' alt=''/></a>"
, " <div class='thumbcaption'>"
, " <div class='magnify'>"
, " <a href='/wiki/File:A.png' class='internal' title='Enlarge'>"

View File

@@ -23,29 +23,30 @@ public class Xoh_thm_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
private final Xoh_thm_bldr bldr = new Xoh_thm_bldr();
private final Bry_obj_ref div_2_capt = Bry_obj_ref.New_empty();
public String Key() {return Xoh_hzip_dict_.Key__thm;}
public Xoh_thm_hzip Encode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Hzip_stat_itm stat_itm, byte[] src, Xoh_thm_parser arg) {
if (!arg.Rng_valid()) {
bfr.Add_mid(src, arg.Rng_bgn(), arg.Rng_end());
public byte[] Hook() {return hook;} private byte[] hook;
public Gfo_poolable_itm Encode(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
Xoh_thm_parser data = (Xoh_thm_parser)data_obj;
if (!data.Rng_valid()) {
bfr.Add_mid(src, data.Src_bgn(), data.Src_end());
return this;
}
Xoh_thm_caption_parser div_2_capt_parser = arg.Capt_parser();
int div_1_width = arg.Div_1_width(); boolean div_1_width_exists = div_1_width != 220;
boolean div_2_alt_exists = arg.Capt_parser().Alt_div_exists();
flag_bldr.Set(Flag__div_2_alt_exists , div_2_alt_exists);
flag_bldr.Set(Flag__div_1_width_exists , div_1_width_exists);
flag_bldr.Set(Flag__div_0_align , arg.Div_0_align());
bfr.Add(Xoh_hzip_dict_.Bry__thm);
Xoh_thm_caption_parser div_2_capt_parser = data.Capt_parser();
int div_1_width = data.Div_1_width(); ;
boolean div_2_alt_exists = flag_bldr.Set_as_bool(Flag__div_2_alt_exists , data.Capt_parser().Alt_div_exists());
boolean div_1_width_exists = flag_bldr.Set_as_bool(Flag__div_1_width_exists , div_1_width != 220);
flag_bldr.Set_as_byte(Flag__div_0_align , data.Div_0_align());
bfr.Add(hook);
Xoh_hzip_int_.Encode(1, bfr, flag_bldr.Encode());
if (div_1_width_exists) Xoh_hzip_int_.Encode(2, bfr, div_1_width);
bfr.Add_mid(src, div_2_capt_parser.Capt_bgn(), div_2_capt_parser.Capt_end()).Add_byte(Xoh_hzip_dict_.Escape);
if (div_2_capt_parser.Capt_exists()) bfr.Add_mid(src, div_2_capt_parser.Capt_bgn(), div_2_capt_parser.Capt_end());
bfr.Add_byte(Xoh_hzip_dict_.Escape);
if (div_2_alt_exists) bfr.Add_mid(src, div_2_capt_parser.Alt_div_bgn(), div_2_capt_parser.Alt_div_end()).Add_byte(Xoh_hzip_dict_.Escape);
img_hzip.Encode(bfr, stat_itm, src, arg.Img_parser(), Bool_.N);
img_hzip.Encode(bfr, hdoc_wkr, hctx, hpg, Bool_.N, src, data.Img_parser());
return this;
}
public int Decode(Bry_bfr bfr, boolean write_to_bfr, Xoh_hdoc_ctx hctx, Xoh_page hpg, Bry_rdr rdr, byte[] src, int hook_bgn) {
public int Decode(Bry_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, Bry_rdr rdr, byte[] src, int src_bgn, int src_end) {
int flag = rdr.Read_int_by_base85(1);
int capt_bgn = rdr.Pos();
int capt_end = rdr.Find_fwd_lr();
int rv = rdr.Pos();
flag_bldr.Decode(flag);
@@ -54,16 +55,16 @@ public class Xoh_thm_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
int div_0_align = flag_bldr.Get_as_int(Flag__div_0_align);
int div_1_width = 220;
if (div_1_width_exists) div_1_width = rdr.Read_int_by_base85(2);
int capt_bgn = rdr.Pos();
int capt_end = rdr.Find_fwd_lr();
div_2_capt.Mid_(src, capt_bgn, capt_end);
byte[] div_2_alt_bry = div_2_alt_exists ? rdr.Read_bry_to() : Bry_.Empty;
img_hzip.Decode(bfr, Bool_.N, hctx, hpg, rdr, src, rv);
img_hzip.Decode(bfr, hdoc_wkr, hctx, hpg, Bool_.N, rdr, src, rdr.Pos(), src_end);
bldr.Make(bfr, hpg, hctx, src, div_0_align, div_1_width, div_2_alt_exists, div_2_alt_bry, img_hzip.Bldr(), img_hzip.Anch_href_arg(), div_2_capt);
return rv;
}
public int Pool__idx() {return pool_idx;} private int pool_idx;
public void Pool__clear (Object[] args) {}
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_thm_hzip rv = new Xoh_thm_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; return rv;}
public void Pool__rls () {pool_mgr.Rls_fast(pool_idx);} private Gfo_poolable_mgr pool_mgr; private int pool_idx;
public Gfo_poolable_itm Pool__make (Gfo_poolable_mgr mgr, int idx, Object[] args) {Xoh_thm_hzip rv = new Xoh_thm_hzip(); rv.pool_mgr = mgr; rv.pool_idx = idx; rv.hook = (byte[])args[0]; return rv;}
private final Int_flag_bldr flag_bldr = new Int_flag_bldr().Pow_ary_bld_(1, 1, 3);
private static final int // SERIALIZED
Flag__div_2_alt_exists = 0

View File

@@ -19,56 +19,64 @@ package gplx.xowa.htmls.core.wkrs.thms; import gplx.*; import gplx.xowa.*; impor
import org.junit.*; import gplx.xowa.htmls.core.hzips.*;
public class Xoh_thm_hzip_tst {
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt();
private String Html__image = String_.Concat_lines_nl_skip_last
( "<div class='thumb tleft'>"
, " <div id='xothm_0' class='thumbinner' style='width:220px;'>"
, " <a href='/wiki/File:A.png' class='image' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' class='thumbimage' alt='abc'></a>"
, " <div class='thumbcaption'>"
, " <div class='magnify'>"
, " <a href='/wiki/File:A.png' class='internal' title='Enlarge'><img src='file:///mem/xowa/bin/any/xowa/file/mediawiki.file/magnify-clip.png' width='15' height='11' alt=''></a>"
, " </div>abc"
, " </div>"
, " <hr>"
, " <div class='thumbcaption'>"
, " abc"
, " </div>"
, " </div>"
, "</div>"
)
, Html__video = String_.Replace(String_.Concat_lines_nl_skip_last
( "<div class='thumb tright'>"
, " <div id='xowa_file_div_3' class='thumbinner' style='width:220px;'>"
, " <div id='xowa_media_div'>"
, " <div>"
, " <a href='/wiki/File:a.ogv' class='image' title='a.ogv'><img id='xowa_file_img_3' src='file:///' width='-1' height='-1' alt=''></a>"
, " </div>"
, " <div>"
, " <a id='xowa_file_play_3' href='file:///' xowa_title='a.ogv' class='xowa_anchor_button' style='width:218px;max-width:220px;'><img src='file:///C:/xowa/bin/any/xowa/file/mediawiki.file/play.png' width='22' height='22' alt='Play sound'></a>"
, " </div>"
, " </div>"
, " <div class='thumbcaption'>"
, " <div class='magnify'>"
, " <a href='/wiki/File:a.ogv' class='@gplx.Internal protected' title='Enlarge'><img src='file:///C:/xowa/bin/any/xowa/file/mediawiki.file/magnify-clip.png' width='15' height='11' alt=''></a>"
, " </div>Moscow (Russian Empire) in 1908"
, " </div>"
, " </div>"
, "</div>"
), "'", "\"")
;
@Before public void setup() {fxt.Clear();}
@Test public void Image() {
fxt.Test__bicode("~&3abc~abc~!uA.png~0|220|110|0.5|-1|-1~abc~", Html__image);
fxt.Test__bicode("~&3abc~abc~!uA.png~)#Sabc~", String_.Concat_lines_nl_skip_last
( "<div class='thumb tleft'>"
, "<div id='xothm_0' class='thumbinner' style='width:220px;'><a href='/wiki/File:A.png' class='image' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' class='thumbimage' alt='abc'></a> "
, "<div class='thumbcaption'>"
, "<div class='magnify'><a href='/wiki/File:A.png' class='internal' title='Enlarge'><img src='file:///mem/xowa/bin/any/xowa/file/mediawiki.file/magnify-clip.png' width='15' height='11' alt=''></a></div>"
, "abc</div>"
, "<hr>"
, "<div class='thumbcaption'>abc</div>"
, "</div>"
, "</div>"
));
}
@Test public void No_capt() {
fxt.Test__bicode("~&#~!%A.png~)#S~", String_.Concat_lines_nl_skip_last
( "<div class='thumb tleft'>"
, "<div id='xothm_0' class='thumbinner' style='width:220px;'><a href='/wiki/File:A.png' class='image' title='' xowa_title='A.png'><img id='xoimg_0' data-xoimg='0|220|-1|-1|-1|-1' src='' width='0' height='0' class='thumbimage' alt=''></a> "
, "<div class='thumbcaption'>"
, "<div class='magnify'><a href='/wiki/File:A.png' class='internal' title='Enlarge'><img src='file:///mem/xowa/bin/any/xowa/file/mediawiki.file/magnify-clip.png' width='15' height='11' alt=''></a></div>"
, "</div>"
, "</div>"
, "</div>"
));
}
@Test public void Video() {
fxt.Test__bicode(Html__video, Html__video);
String html = String_.Replace(String_.Concat_lines_nl_skip_last
( "<div class='thumb tright'>"
, "<div id='xowa_file_div_3' class='thumbinner' style='width:220px;'>"
, "<div id='xowa_media_div'>"
, "<div>"
, "<a href='/wiki/File:a.ogv' class='image' title='a.ogv'><img id='xowa_file_img_3' src='file:///' width='-1' height='-1' alt=''></a>"
, "</div>"
, "<div>"
, "<a id='xowa_file_play_3' href='file:///' xowa_title='a.ogv' class='xowa_anchor_button' style='width:218px;max-width:220px;'><img src='file:///C:/xowa/bin/any/xowa/file/mediawiki.file/play.png' width='22' height='22' alt='Play sound'></a>"
, "</div>"
, "</div>"
, "<div class='thumbcaption'>"
, "<div class='magnify'>"
, "<a href='/wiki/File:a.ogv' class='@gplx.Internal protected' title='Enlarge'><img src='file:///C:/xowa/bin/any/xowa/file/mediawiki.file/magnify-clip.png' width='15' height='11' alt=''></a>"
, "</div>Moscow (Russian Empire) in 1908"
, "</div>"
, "</div>"
, "</div>"
), "'", "\"")
;
fxt.Test__bicode(html, html);
}
@Test public void Dump() {
Xowe_wiki en_d = fxt.Init_wiki_alias("wikt", "en.wiktionary.org");
gplx.xowa.wikis.nss.Xow_ns_mgr ns_mgr = en_d.Ns_mgr();
ns_mgr.Ns_main().Case_match_(gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all);
fxt.Wiki().Ns_mgr().Aliases_add(gplx.xowa.wikis.nss.Xow_ns_.Tid__portal, "WP");
fxt.Wiki().Ns_mgr().Init();
fxt.Init_mode_is_b256_(Bool_.N);
fxt.Exec_write_to_fsys(Io_url_.new_dir_("J:\\xowa\\dev_rls\\html\\"), "temp_earth_xo.html");
fxt.Init_mode_is_b256_(Bool_.N);
}
// @Test public void Dump() {
// Xowe_wiki en_d = fxt.Prep_create_wiki("wikt", "en.wiktionary.org");
// gplx.xowa.wikis.nss.Xow_ns_mgr ns_mgr = en_d.Ns_mgr();
// ns_mgr.Ns_main().Case_match_(gplx.xowa.wikis.nss.Xow_ns_case_.Tid__all);
//
// fxt.Wiki().Ns_mgr().Aliases_add(gplx.xowa.wikis.nss.Xow_ns_.Tid__portal, "WP");
// fxt.Wiki().Ns_mgr().Init();
//
// fxt.Exec_write_to_fsys(Io_url_.new_dir_("D:\\xowa\\dev_rls\\html\\"), "temp_earth_xo.html");
// }
}

View File

@@ -20,30 +20,34 @@ import gplx.core.brys.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*; import gplx.langs.htmls.parsers.styles.*;
import gplx.xowa.htmls.core.wkrs.thms.divs.*; import gplx.xowa.htmls.core.wkrs.imgs.*;
public class Xoh_thm_parser implements Html_atr_style_wkr {
public int Rng_bgn() {return rng_bgn;} private int rng_bgn;
public int Rng_end() {return rng_end;} private int rng_end;
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public boolean Rng_valid() {return rng_valid;} private boolean rng_valid;
public byte Div_0_align() {return div_0_align;} private byte div_0_align;
public int Div_1_width() {return div_1_width;} private int div_1_width;
public Xoh_img_parser Img_parser() {return img_parser;} private final Xoh_img_parser img_parser = new Xoh_img_parser();
public Xoh_thm_caption_parser Capt_parser() {return capt_parser;} private final Xoh_thm_caption_parser capt_parser = new Xoh_thm_caption_parser();
public int Parse(Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, byte[] src, Html_tag_rdr tag_rdr, Html_tag div_0) {
tag_rdr.Rdr().Init_by_hook("thm", div_0.Src_bgn(), div_0.Src_bgn());
this.rng_bgn = div_0.Src_bgn();
this.div_0_align = div_0.Atrs__cls_find_or_fail(gplx.xowa.parsers.lnkis.Xop_lnki_align_h_.Hash);
Html_tag div_1 = tag_rdr.Tag__move_fwd_head(); // <div class='thumbinner'>
public boolean Parse(Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, byte[] src, Html_tag_rdr tag_rdr, Html_tag div_0_head) {
tag_rdr.Err_wkr().Init_by_sect("thm", div_0_head.Src_bgn());
this.src_bgn = div_0_head.Src_bgn();
this.div_0_align = div_0_head.Atrs__cls_find_or_fail(gplx.xowa.parsers.lnkis.Xop_lnki_align_h_.Hash);
Html_tag div_0_tail = tag_rdr.Tag__peek_fwd_tail(Html_tag_.Id__div); // </div>
Html_tag div_1_head = tag_rdr.Tag__find_fwd_head(div_0_head.Src_end(), div_0_tail.Src_bgn(), Html_tag_.Id__div); // <div class='thumbinner'>
if (div_1_head.Name_id() != Html_tag_.Id__div) return false;
// tag_rdr.Pos_(div_1_head.Src_end());
tag_rdr.Tag__move_fwd_head().Chk_id(Html_tag_.Id__div);
this.div_1_width = -1;
Html_atr_style_parser_.Parse(div_1, this); // " style='120px'"
Html_atr_style_parser_.Parse(div_1_head, this); // " style='120px'"
rng_valid = false;
if (img_parser.Parse(hdoc_wkr, hctx, src, tag_rdr, tag_rdr.Tag__move_fwd_head()) != Xoh_hdoc_ctx.Invalid) { // <a>
capt_parser.Parse(hdoc_wkr, tag_rdr, src, tag_rdr.Tag__move_fwd_head()); // <div>
if (img_parser.Parse(hdoc_wkr, hctx, src, tag_rdr, tag_rdr.Tag__move_fwd_head())) { // <a>
if (!capt_parser.Parse(hdoc_wkr, tag_rdr, src, tag_rdr.Tag__move_fwd_head())) return false; // <div>
rng_valid = true;
}
tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__div); // </div> for div_1
Html_tag div_0_tail = tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__div); // </div> for div_0
this.rng_end = div_0_tail.Src_end();
tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__div);
tag_rdr.Tag__move_fwd_tail(Html_tag_.Id__div);
this.src_end = tag_rdr.Pos();
hdoc_wkr.On_thm(this);
return rng_end;
return true;
}
public boolean On_atr(byte[] src, int atr_idx, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end) {
if (Bry_.Match(src, key_bgn, key_end, Html_atr_style_.Bry__width))
@@ -51,7 +55,8 @@ public class Xoh_thm_parser implements Html_atr_style_wkr {
return true;
}
public static final byte[]
Atr__class__thumb = Bry_.new_a7("thumb")
, Atr__class__thumbinner = Bry_.new_a7("thumbinner")
Atr__class__thumb = Bry_.new_a7("thumb")
, Atr__class__thumbinner = Bry_.new_a7("thumbinner")
, Atr__id__xowa_media_div = Bry_.new_a7("xowa_media_div")
;
}

View File

@@ -45,10 +45,13 @@ public class Xoh_thm_wtr extends gplx.core.brys.Bfr_arg_base {
alt_fmtr.Bld_bfr_many(tmp_bfr, img_alt_bry);
div_2_alt.Set(tmp_bfr.To_bry_and_clear());
}
else
div_2_alt.Set(Bry_.Empty);
return this;
}
public Xoh_thm_wtr Clear() {
Bfr_arg_.Clear(div_0_align, div_1_id, div_1_width, div_1_img, div_2_href, div_2_magnify, div_2_capt, div_2_alt);
Bfr_arg_.Clear(div_0_align, div_1_id, div_1_width, div_2_href, div_2_magnify, div_2_alt);
div_1_img = div_2_capt = Bfr_arg_.Noop;
return this;
}
@Override public void Bfr_arg__add(Bry_bfr bfr) {
@@ -57,21 +60,16 @@ public class Xoh_thm_wtr extends gplx.core.brys.Bfr_arg_base {
public static final byte[] Prefix__div_id = Bry_.new_a7("xothm_");
private static final Bry_fmtr fmtr = Bry_fmtr.new_(String_.Concat_lines_nl_skip_last
( "<div class=\"thumb t~{div_0_align}\">"
, " <div~{div_1_id} class=\"thumbinner\" style=\"width:~{div_1_width}px;\">"
, " ~{div_1_img}"
, " <div class=\"thumbcaption\">"
, " <div class=\"magnify\">"
, " <a~{div_2_href} class=\"internal\" title=\"Enlarge\"><img src=\"~{div_2_magnify}\" width=\"15\" height=\"11\" alt=\"\"></a>"
, " </div>~{div_2_capt}"
, " </div>~{div_2_alt}"
, " </div>"
, "<div~{div_1_id} class=\"thumbinner\" style=\"width:~{div_1_width}px;\">~{div_1_img} " // NOTE: trailing space is intentional; matches jtidy behavior
, "<div class=\"thumbcaption\">"
, "<div class=\"magnify\"><a~{div_2_href} class=\"internal\" title=\"Enlarge\"><img src=\"~{div_2_magnify}\" width=\"15\" height=\"11\" alt=\"\"></a></div>"
, "~{div_2_capt}</div>~{div_2_alt}"
, "</div>"
, "</div>"
), "div_0_align", "div_1_id", "div_1_width", "div_1_img", "div_2_href", "div_2_magnify", "div_2_capt", "div_2_alt");
private static final Bry_fmtr alt_fmtr = Bry_fmtr.new_(String_.Concat_lines_nl_skip_last
( ""
, " <hr>"
, " <div class=\"thumbcaption\">"
, " ~{alt}"
, " </div>"
, "<hr>"
, "<div class=\"thumbcaption\">~{alt}</div>"
), "alt");
}

Some files were not shown because too many files have changed in this diff Show More