1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-08-03 00:10:03 -04:00
parent 9d63f03b3d
commit 34c34f227c
514 changed files with 4972 additions and 3910 deletions

View File

@@ -16,13 +16,14 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.net.*;
import gplx.xowa.html.*; import gplx.xowa.html.hrefs.*;
import gplx.xowa.net.*;
import gplx.xowa.urls.*;
public class Xoh_lnke_wtr {
private Xoae_app app;
public Xoh_lnke_wtr(Xowe_wiki wiki) {this.app = wiki.Appe();}
// private Xoae_app app;
public Xoh_lnke_wtr(Xowe_wiki wiki) {}// this.app = wiki.Appe();}
public void Write_all(Bry_bfr bfr, Xoh_html_wtr html_wtr, Xoh_wtr_ctx hctx, Xop_ctx ctx, byte[] src, Xop_lnke_tkn lnke) {
int lnke_bgn = lnke.Lnke_bgn(), lnke_end = lnke.Lnke_end(); boolean proto_is_xowa = lnke.Proto_tid() == Xoo_protocol_itm.Tid_xowa;
int lnke_bgn = lnke.Lnke_bgn(), lnke_end = lnke.Lnke_end(); boolean proto_is_xowa = lnke.Proto_tid() == Gfo_protocol_itm.Tid_xowa;
if (!hctx.Mode_is_alt()) { // write href, unless mode is alt
if (hctx.Mode_is_hdump()) {
if (lnke.Lnke_typ() == Xop_lnke_tkn.Lnke_typ_text)
@@ -52,7 +53,7 @@ public class Xoh_lnke_wtr {
byte[] lnke_xwiki_wiki = lnke.Lnke_xwiki_wiki();
if (lnke_xwiki_wiki == null) {
if (lnke.Lnke_relative()) { // relative; EX: //a.org
bfr.Add(app.Utl__url_parser().Url_parser().Relative_url_protocol_bry()).Add_mid(src, lnke_bgn, lnke_end);
bfr.Add(ctx.Wiki().Utl__url_parser().Url_parser().Relative_url_protocol_bry()).Add_mid(src, lnke_bgn, lnke_end);
return true;
}
else { // xowa or regular; EX: http://a.org
@@ -69,10 +70,10 @@ public class Xoh_lnke_wtr {
}
else { // xwiki
Url_encoder href_encoder = Xoa_app_.Utl__encoder_mgr().Href_quotes();
bfr.Add(Xoh_href_parser.Href_site_bry).Add(lnke_xwiki_wiki).Add(Xoh_href_parser.Href_wiki_bry)
bfr.Add(Xoh_href_.Bry__site).Add(lnke_xwiki_wiki).Add(Xoh_href_.Bry__wiki)
.Add(href_encoder.Encode(lnke.Lnke_xwiki_page())); // NOTE: must encode page; EX:%22%3D -> '">' which will end attribute; PAGE:en.w:List_of_Category_A_listed_buildings_in_West_Lothian DATE:2014-07-15
if (lnke.Lnke_xwiki_qargs() != null)
Xoa_url_arg_hash.Concat_bfr(bfr, href_encoder, lnke.Lnke_xwiki_qargs()); // NOTE: must encode args
Gfo_qarg_mgr.Concat_bfr(bfr, href_encoder, lnke.Lnke_xwiki_qargs()); // NOTE: must encode args
return false;
}
}

View File

@@ -16,20 +16,20 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.xowa.net.*;
import gplx.core.btries.*; import gplx.core.net.*;
public class Xop_lnke_lxr implements Xop_lxr {
Xop_lnke_lxr(byte lnke_typ, byte[] protocol, byte tid) {this.lnke_typ = lnke_typ; this.protocol = protocol; this.tid = tid;} private byte lnke_typ; byte[] protocol; byte tid;
public byte Lxr_tid() {return Xop_lxr_.Tid_lnke_bgn;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {
Xoo_protocol_itm[] ary = Xoo_protocol_itm.Ary();
Gfo_protocol_itm[] ary = Gfo_protocol_itm.Ary();
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {
Xoo_protocol_itm itm = ary[i];
Gfo_protocol_itm itm = ary[i];
Ctor_lxr_add(core_trie, itm.Key_w_colon_bry(), itm.Tid());
}
core_trie.Add(Bry_relative_1, new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, Xoa_consts.Url_relative_prefix, Xoo_protocol_itm.Tid_relative_1));
core_trie.Add(Bry_relative_2, new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, Xoa_consts.Url_relative_prefix, Xoo_protocol_itm.Tid_relative_2));
Ctor_lxr_add(core_trie, Bry_.new_a7("xowa-cmd"), Xoo_protocol_itm.Tid_xowa);
core_trie.Add(Bry_relative_1, new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, Xoa_consts.Url_relative_prefix, Gfo_protocol_itm.Tid_relative_1));
core_trie.Add(Bry_relative_2, new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, Xoa_consts.Url_relative_prefix, Gfo_protocol_itm.Tid_relative_2));
Ctor_lxr_add(core_trie, Bry_.new_a7("xowa-cmd"), Gfo_protocol_itm.Tid_xowa);
} private static final byte[] Bry_relative_1 = Bry_.new_a7("[//"), Bry_relative_2 = Bry_.new_a7("[[//");
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
private void Ctor_lxr_add(Btrie_fast_mgr core_trie, byte[] protocol_bry, byte tid) {
@@ -37,7 +37,7 @@ public class Xop_lnke_lxr implements Xop_lxr {
core_trie.Add(Bry_.Add(Byte_ascii.Brack_bgn, protocol_bry) , new Xop_lnke_lxr(Xop_lnke_tkn.Lnke_typ_brack, protocol_bry, tid));
}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
if (this.tid == Xoo_protocol_itm.Tid_xowa && !ctx.Wiki().Sys_cfg().Xowa_proto_enabled()) return ctx.Lxr_make_txt_(cur_pos);
if (this.tid == Gfo_protocol_itm.Tid_xowa && !ctx.Wiki().Sys_cfg().Xowa_proto_enabled()) return ctx.Lxr_make_txt_(cur_pos);
return ctx.Lnke().MakeTkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, protocol, tid, lnke_typ);
}
public static final Xop_lnke_lxr _ = new Xop_lnke_lxr(); Xop_lnke_lxr() {}

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.parsers.lnkes.*;
import gplx.core.net.*;
public class Xop_lnke_tkn extends Xop_tkn_itm_base {//20111222
public static final byte Lnke_typ_null = 0, Lnke_typ_brack = 1, Lnke_typ_text = 2, Lnke_typ_brack_dangling = 3;
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_lnke;}
@@ -25,8 +25,8 @@ public class Xop_lnke_tkn extends Xop_tkn_itm_base {//20111222
public byte[] Lnke_site() {return lnke_site;} public Xop_lnke_tkn Lnke_site_(byte[] v) {lnke_site = v; return this;} private byte[] lnke_site;
public byte[] Lnke_xwiki_wiki() {return lnke_xwiki_wiki;} private byte[] lnke_xwiki_wiki;
public byte[] Lnke_xwiki_page() {return lnke_xwiki_page;} private byte[] lnke_xwiki_page;
public Gfo_url_arg[] Lnke_xwiki_qargs() {return lnke_xwiki_qargs;} Gfo_url_arg[] lnke_xwiki_qargs;
public void Lnke_xwiki_(byte[] wiki, byte[] page, Gfo_url_arg[] args) {this.lnke_xwiki_wiki = wiki; this.lnke_xwiki_page = page; this.lnke_xwiki_qargs = args;}
public Gfo_qarg_itm[] Lnke_xwiki_qargs() {return lnke_xwiki_qargs;} Gfo_qarg_itm[] lnke_xwiki_qargs;
public void Lnke_xwiki_(byte[] wiki, byte[] page, Gfo_qarg_itm[] args) {this.lnke_xwiki_wiki = wiki; this.lnke_xwiki_page = page; this.lnke_xwiki_qargs = args;}
public int Lnke_bgn() {return lnke_bgn;} private int lnke_bgn;
public int Lnke_end() {return lnke_end;} private int lnke_end;
public Xop_lnke_tkn Lnke_rng_(int bgn, int end) {lnke_bgn = bgn; lnke_end = end; return this;}

View File

@@ -16,9 +16,11 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.apps.progs.*; import gplx.xowa.net.*; import gplx.xowa.wikis.xwikis.*;
import gplx.core.net.*; import gplx.xowa.urls.*;
import gplx.xowa.apps.progs.*; import gplx.xowa.wikis.xwikis.*;
public class Xop_lnke_wkr implements Xop_ctx_wkr {
public void Ctor_ctx(Xop_ctx ctx) {url_parser = ctx.App().Utl__url_parser().Url_parser();} Gfo_url_parser url_parser; Gfo_url_site_data site_data = new Gfo_url_site_data(); Xoa_url_parser xo_url_parser = new Xoa_url_parser(); Xoa_url xo_url_parser_url = Xoa_url.blank_();
public void Ctor_ctx(Xop_ctx ctx) {url_parser = ctx.Wiki().Utl__url_parser().Url_parser();} Gfo_url_parser url_parser; Gfo_url_site_data site_data = new Gfo_url_site_data();
private Xoa_url xo_url_parser_url = Xoa_url.blank();
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
public boolean Dangling_goes_on_stack() {return dangling_goes_on_stack;} public void Dangling_goes_on_stack_(boolean v) {dangling_goes_on_stack = v;} private boolean dangling_goes_on_stack;
@@ -38,7 +40,7 @@ public class Xop_lnke_wkr implements Xop_ctx_wkr {
)
return ctx.Lxr_make_txt_(cur_pos - 1); // -1 to ignore ":" in making text colon; needed to process ":" for list like "; attl: b" PAGE:de.w:Mord_(Deutschland)#Besonders_verwerfliche_Begehungsweise; DATE:2015-01-09
if (ctx.Stack_get_typ(Xop_tkn_itm_.Tid_lnke) != null) return ctx.Lxr_make_txt_(cur_pos); // no nested lnke; return cur lnke as text; EX: "[irc://a irc://b]" -> "<a href='irc:a'>irc:b</a>"
if (proto_tid == Xoo_protocol_itm.Tid_xowa) return Make_tkn_xowa(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, protocol, proto_tid, lnke_type);
if (proto_tid == Gfo_protocol_itm.Tid_xowa) return Make_tkn_xowa(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, protocol, proto_tid, lnke_type);
// HACK: need to disable lnke if enclosing type is lnki and (1) arg is "link=" or (2) in 1st arg; basically, only enable for caption tkns (and preferably, thumb only) (which should be neither 1 or 2)
if (ctx.Cur_tkn_tid() == Xop_tkn_itm_.Tid_lnki && lnke_type == Xop_lnke_tkn.Lnke_typ_text) {
@@ -129,7 +131,7 @@ public class Xop_lnke_wkr implements Xop_ctx_wkr {
lnke_type = Xop_lnke_tkn.Lnke_typ_brack_dangling;
return ctx.Lxr_make_txt_(lnke_end); // textify lnk; EX: [irc://a\n] textifies "[irc://a"
default:
lnke_bgn += proto_tid == Xoo_protocol_itm.Tid_relative_2 ? 2 : 1; // if Tid_relative_2, then starts with [[; adjust by 2; EX:"[[//en" should have lnke_bgn at "//en", not "[//en"
lnke_bgn += proto_tid == Gfo_protocol_itm.Tid_relative_2 ? 2 : 1; // if Tid_relative_2, then starts with [[; adjust by 2; EX:"[[//en" should have lnke_bgn at "//en", not "[//en"
lnke_type = Xop_lnke_tkn.Lnke_typ_brack;
break;
}
@@ -148,7 +150,7 @@ public class Xop_lnke_wkr implements Xop_ctx_wkr {
}
}
}
if (proto_tid == Xoo_protocol_itm.Tid_relative_2) // for "[[//", add "["; rest of code handles "[//" normally, but still want to include literal "["; DATE:2013-02-02
if (proto_tid == Gfo_protocol_itm.Tid_relative_2) // for "[[//", add "["; rest of code handles "[//" normally, but still want to include literal "["; DATE:2013-02-02
ctx.Subs_add(root, tkn_mkr.Txt(lnke_bgn - 1, lnke_bgn));
url_parser.Parse_site_fast(site_data, src, lnke_bgn, lnke_end);
int site_bgn = site_data.Site_bgn(), site_end = site_data.Site_end();
@@ -162,9 +164,12 @@ public class Xop_lnke_wkr implements Xop_ctx_wkr {
Xop_lnke_tkn tkn = tkn_mkr.Lnke(bgn_pos, brack_end_pos, protocol, proto_tid, lnke_type, lnke_bgn, lnke_end);
tkn.Lnke_relative_(site_data.Rel());
Xow_xwiki_itm xwiki = ctx.App().Usere().Wiki().Xwiki_mgr().Get_by_mid(src, site_bgn, site_end); // NOTE: check User_wiki.Xwiki_mgr, not App.Wiki_mgr() b/c only it is guaranteed to know all wikis on system
if (xwiki != null) { // lnke is to an xwiki; EX: [http://en.wikipedia.org/A a]
if ( xwiki != null // lnke is to an xwiki; EX: [http://en.wikipedia.org/A a]
&& Byte_.In(proto_tid, Gfo_protocol_itm.Tid_relative_1, Gfo_protocol_itm.Tid_relative_2, Gfo_protocol_itm.Tid_http, Gfo_protocol_itm.Tid_https) // only consider http / https; ignore mailto and others; PAGE:uk.w:Маскалі; DATE:2015-07-28
&& Bry_.Match(src, site_bgn, site_end, xwiki.Domain_bry()) // only consider full domains, not alliases; EX: [http://w/b] should not match alias of w for en.wikipedia.org
) {
Xowe_wiki wiki = ctx.Wiki();
Xoa_url_parser.Parse_url(xo_url_parser_url, ctx.App(), wiki, src, lnke_bgn, lnke_end, false);
xo_url_parser_url = wiki.Utl__url_parser().Parse(src, lnke_bgn, lnke_end);
byte[] xwiki_wiki = xo_url_parser_url.Wiki_bry();
byte[] xwiki_page = xo_url_parser_url.Page_bry();
byte[] ttl_bry = xo_url_parser_url.Page_bry();
@@ -173,7 +178,7 @@ public class Xop_lnke_wkr implements Xop_ctx_wkr {
xwiki_wiki = ttl.Wik_itm().Domain_bry();
xwiki_page = ttl.Page_url();
}
tkn.Lnke_xwiki_(xwiki_wiki, xwiki_page, xo_url_parser_url.Args());
tkn.Lnke_xwiki_(xwiki_wiki, xwiki_page, xo_url_parser_url.Qargs_ary());
}
ctx.Subs_add(root, tkn);
if (lnke_type == Xop_lnke_tkn.Lnke_typ_brack) {

View File

@@ -84,11 +84,11 @@ public class Xop_lnke_wkr_brack_tst {
@Test public void Encode_relative() { // PURPOSE: counterpart to Encode_xwiki; DATE:2014-07-15
fxt.Test_parse_page_wiki_str // encode page
( "[//a.org/%22%3E_A B]"
, "<a href=\"http://a.org/%22%3E_A\" class=\"external text\" rel=\"nofollow\">B</a>" // '%22%3E' not '">'
, "<a href=\"https://a.org/%22%3E_A\" class=\"external text\" rel=\"nofollow\">B</a>" // '%22%3E' not '">'
);
fxt.Test_parse_page_wiki_str // encode args
( "[//a.org/A?b=%22%3E_C D]"
, "<a href=\"http://a.org/A?b=%22%3E_C\" class=\"external text\" rel=\"nofollow\">D</a>" // '%22%3E' not '">'
, "<a href=\"https://a.org/A?b=%22%3E_C\" class=\"external text\" rel=\"nofollow\">D</a>" // '%22%3E' not '">'
);
}
}

View File

@@ -25,7 +25,7 @@ public class Xop_lnke_wkr_relative_tst {
);
}
@Test public void Relative_external() {
fxt.Test_parse_page_wiki_str("[//www.a.org a]", "<a href=\"http://www.a.org\" class=\"external text\" rel=\"nofollow\">a</a>");
fxt.Test_parse_page_wiki_str("[//www.a.org a]", "<a href=\"https://www.a.org\" class=\"external text\" rel=\"nofollow\">a</a>");
}
@Test public void Relative_internal() {
fxt.Init_xwiki_add_user_("en.wikipedia.org");

View File

@@ -29,7 +29,7 @@ public class Xop_lnke_wkr_text_tst {
fxt.Test_parse_page_wiki("irc://a b c", fxt.tkn_lnke_(0, 7).Lnke_rng_(0, 7), fxt.tkn_space_(7, 8), fxt.tkn_txt_(8, 9), fxt.tkn_space_(9, 10), fxt.tkn_txt_(10, 11));
}
@Test public void Text_before_ascii() { // PURPOSE: free form external urls should not match if preceded by letters; EX:de.w:Sylvie_und_Bruno; DATE:2014-05-11
fxt.Ctx().Lang().Case_mgr_utf8_();
fxt.Ctx().Lang().Case_mgr_u8_();
String expd_lnke_html = "<a href=\"tel:a\" class=\"external text\" rel=\"nofollow\">tel:a</a>";
fxt.Test_parse_page_wiki_str("titel:a" , "titel:a");
fxt.Test_parse_page_wiki_str(" tel:a" , " " + expd_lnke_html);

View File

@@ -40,4 +40,16 @@ public class Xop_lnke_wkr_xwiki_tst {
fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_a7("en.wikipedia.org"), Bry_.new_a7("en.wikipedia.org"));
fxt.Test_parse_page_wiki_str("[http://en.wikipedia.org/wiki/A?action=edit a]", "<a href=\"/site/en.wikipedia.org/wiki/A?action=edit\">a</a>");
}
@Test public void Ignore_proto() { // PURPOSE: handle other protocols; PAGE:uk.w:Маскалі; DATE:2015-07-28
fxt.Test_parse_page_wiki_str("[mailto:a b]", "<a href=\"mailto:a\" class=\"external text\" rel=\"nofollow\">b</a>");// should be /w/, not /en.wikipedia.org
}
@Test public void Ignore_alias() { // PURPOSE: fictitious example to make sure aliases are not subbed for domains; DATE:2015-07-28
fxt.Init_xwiki_add_user_("w", "en.wikipedia.org");
fxt.Test_parse_page_wiki_str("[https://w/b c]", "<a href=\"https://w/b\" class=\"external text\" rel=\"nofollow\">c</a>");// should be /w/, not /en.wikipedia.org
}
@Test public void Xwiki__qargs() { // PURPOSE: fix null ref error; PAGE:en.w:Wikipedia:Template_standardisation/demometa DATE:2015-08-02
fxt.Init_xwiki_add_user_("en.wikipedia.org");
fxt.Test_parse_page_wiki_str("[http://en.wikipedia.org/w/index.php?action&#61;edit&preload&#61;Template:Afd2+starter&editintro&#61;Template:Afd3+starter&title&#61;Wikipedia:Articles+for+deletion/Template_standardisation/demometa]"
, "<a href=\"/site/en.wikipedia.org/wiki/index.php?action=&#61;edit=&preload=&#61;Template:Afd2+starter=&editintro=&#61;Template:Afd3+starter=&title=&=\">[1]</a>");
}
}