Gui: Strip `about:` from links during `Copy` [#823]

master
gnosygnu 3 years ago
parent 1a6a203cfd
commit 4187dc4a76

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2021 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,80 +13,94 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.htmls.hrefs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*;
import gplx.core.btries.*; import gplx.core.primitives.*;
public class Xoh_href_gui_utl {
public static String Html_extract_text(String site, String page, String text_str) {
byte[] text_bry = Bry_.new_u8(text_str);
int text_len = text_bry.length;
int text_tid = Byte_ascii.To_a7_int(text_bry[0]);
switch (text_tid) {
case Text_tid_none: return ""; // "0"
case Text_tid_text: return String_.new_u8(text_bry, 2, text_len); // 2 to skip "1|"
case Text_tid_href: break; // fall through to below
default: throw Err_.new_unhandled(text_tid);
}
int href_bgn = 2; // 2 to skip "2|"
if (Bry_.Has_at_bgn(text_bry, Xoh_href_.Bry__file, href_bgn, text_len))
href_bgn += Xoh_href_.Len__file; // skip "file://"
Byte_obj_val href_tid = (Byte_obj_val)href_trie.Match_bgn(text_bry, href_bgn, text_len);
if (href_tid != null) {
switch (href_tid.Val()) {
case Href_tid_wiki: return site + String_.new_u8(text_bry, href_bgn, text_len);
case Href_tid_site: return String_.new_u8(text_bry, href_bgn + 6, text_len); // +6 to skip "site/"
case Href_tid_anch: return site + "/wiki/" + page + String_.new_u8(text_bry, href_bgn, text_len);
}
}
return String_.new_u8(text_bry, 2, text_len); // 2 to skip "2|"; handles "http://" text as well as any fall-thru from above
}
public static String Standardize_xowa_link(String str) {
byte[] bry = Bry_.new_u8(str);
int skip = Skip_start_of_xowa_link(bry, bry.length, 0);
return skip == 0 ? str : String_.Mid(str, skip);
}
private static int Skip_start_of_xowa_link(byte[] src, int src_len, int bgn) {
if (!Bry_.Has_at_bgn(src, Xoh_href_.Bry__file, bgn, src_len)) return bgn; // does not start with "file://"
int pos = bgn + Xoh_href_.Len__file; // skip "file://"
Object tid_obj = href_trie.Match_bgn(src, pos, src_len);
if (tid_obj == null) {
return bgn; // if not a known xowa link, return original bgn;
}
switch (((Byte_obj_val)tid_obj).Val()) {
case Href_tid_site: return pos;
case Href_tid_wiki: return pos;
case Href_tid_anch: return pos;
default: throw Err_.new_unhandled(tid_obj);
}
}
private static final byte Text_tid_none = 0, Text_tid_text = 1, Text_tid_href = 2;
private static final byte Href_tid_wiki = 1, Href_tid_site = 2, Href_tid_anch = 3;
private static final Btrie_slim_mgr href_trie = Btrie_slim_mgr.cs()
.Add_bry_byte(Xoh_href_.Bry__site , Href_tid_site)
.Add_bry_byte(Xoh_href_.Bry__wiki , Href_tid_wiki)
.Add_bry_byte(Xoh_href_.Bry__anch , Href_tid_anch)
;
}
/*
NOTE_1:
. swt/mozilla treats text differently in href="{text}" when content_editable=n; occurs in LocationListener.changing
http://a.org -> http://a.org does nothing
A -> file:///A adds "file:///"
/wiki/A -> file:///wiki/A adds "file://"
Category:A -> Category:A noops; Category is assumed to be protocol?
//en.wiktionary.org/wiki/a -> file:///wiki/a strips out site name and prepends "file://"; no idea why
. so, to handle the above, the code does the following
http://a.org -> http://a.org does nothing; nothing needed
A -> /wiki/A always prepend /wiki/
Category:A -> /wiki/Category:A always prepend /wiki/
//en.wiktionary.org/wiki/A -> /site/en.wiktionary.org/wiki/A always transform relative url to /site/
. the href will still come here as file:///wiki/A or file:///site/en.wiktionary.org/wiki/A.
. however, the file:// can be lopped off and discarded and the rest of the href will fall into one of the following cases
.. /wiki/
.. /site/
.. /xcmd/
.. #
.. anything else -> assume to be really a file:// url; EX: file://C/dir/fil.txt -> C/dir/fil.txt
. the other advantage of this approach is that this proc can be reused outside of swt calls; i.e.: it can parse both "file:///wiki/A" and "/wiki/A"
*/
package gplx.xowa.htmls.hrefs;
import gplx.Bry_;
import gplx.Byte_ascii;
import gplx.Err_;
import gplx.String_;
import gplx.core.btries.Btrie_slim_mgr;
import gplx.core.primitives.Byte_obj_val;
import gplx.gfui.kits.swts.Swt_html_utl;
public class Xoh_href_gui_utl {
public static String Html_extract_text(String site, String page, String text_str) {
byte[] text_bry = Bry_.new_u8(text_str);
int text_len = text_bry.length;
int text_tid = Byte_ascii.To_a7_int(text_bry[0]);
switch (text_tid) {
case Text_tid_none: return ""; // "0"
case Text_tid_text: return String_.new_u8(text_bry, 2, text_len); // 2 to skip "1|"
case Text_tid_href: break; // fall through to below
default: throw Err_.new_unhandled(text_tid);
}
String href_str = String_.Mid(String_.new_u8(text_bry), 2);
href_str = Swt_html_utl.NormalizeSwtUrl(href_str);
if (String_.Has_at_bgn(href_str, Xoh_href_.Str__file))
href_str = Standardize_xowa_link(href_str); // skip "file://"
Byte_obj_val href_tid = (Byte_obj_val)href_trie.Match_bgn(Bry_.new_u8(href_str), 0, href_str.length());
if (href_tid != null) {
switch (href_tid.Val()) {
case Href_tid_wiki: return site + href_str;
case Href_tid_site: return String_.Mid(href_str, 6); // +6 to skip "site/"
case Href_tid_anch: return site + "/wiki/" + page + href_str;
}
}
return href_str;
}
public static String Standardize_xowa_link(String str) {
byte[] bry = Bry_.new_u8(str);
int skip = Skip_start_of_xowa_link(bry, bry.length, 0);
return skip == 0 ? str : String_.Mid(str, skip);
}
private static int Skip_start_of_xowa_link(byte[] src, int src_len, int bgn) {
if (!Bry_.Has_at_bgn(src, Xoh_href_.Bry__file, bgn, src_len)) return bgn; // does not start with "file://"
int pos = bgn + Xoh_href_.Len__file; // skip "file://"
Object tid_obj = href_trie.Match_bgn(src, pos, src_len);
if (tid_obj == null) {
if (src_len - pos > 0 && src[pos] == Byte_ascii.Slash) { // handle "file:///C:/dir/fil.png"
return pos + 1;
}
else {
return bgn; // if not a known xowa link, return original bgn;
}
}
switch (((Byte_obj_val)tid_obj).Val()) {
case Href_tid_site: return pos;
case Href_tid_wiki: return pos;
case Href_tid_anch: return pos;
default: throw Err_.new_unhandled(tid_obj);
}
}
private static final byte Text_tid_none = 0, Text_tid_text = 1, Text_tid_href = 2;
private static final byte Href_tid_wiki = 1, Href_tid_site = 2, Href_tid_anch = 3;
private static final Btrie_slim_mgr href_trie = Btrie_slim_mgr.cs()
.Add_bry_byte(Xoh_href_.Bry__site , Href_tid_site)
.Add_bry_byte(Xoh_href_.Bry__wiki , Href_tid_wiki)
.Add_bry_byte(Xoh_href_.Bry__anch , Href_tid_anch)
;
}
/*
NOTE_1:
. swt/mozilla treats text differently in href="{text}" when content_editable=n; occurs in LocationListener.changing
http://a.org -> http://a.org does nothing
A -> file:///A adds "file:///"
/wiki/A -> file:///wiki/A adds "file://"
Category:A -> Category:A noops; Category is assumed to be protocol?
//en.wiktionary.org/wiki/a -> file:///wiki/a strips out site name and prepends "file://"; no idea why
. so, to handle the above, the code does the following
http://a.org -> http://a.org does nothing; nothing needed
A -> /wiki/A always prepend /wiki/
Category:A -> /wiki/Category:A always prepend /wiki/
//en.wiktionary.org/wiki/A -> /site/en.wiktionary.org/wiki/A always transform relative url to /site/
. the href will still come here as file:///wiki/A or file:///site/en.wiktionary.org/wiki/A.
. however, the file:// can be lopped off and discarded and the rest of the href will fall into one of the following cases
.. /wiki/
.. /site/
.. /xcmd/
.. #
.. anything else -> assume to be really a file:// url; EX: file://C/dir/fil.txt -> C/dir/fil.txt
. the other advantage of this approach is that this proc can be reused outside of swt calls; i.e.: it can parse both "file:///wiki/A" and "/wiki/A"
*/

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2021 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,54 +13,66 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.htmls.hrefs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*;
import org.junit.*;
import gplx.core.primitives.*; import gplx.xowa.htmls.hrefs.*; import gplx.xowa.guis.views.*;
public class Xoh_href_gui_utl_tst {
@Before public void init() {fxt.Clear();} private Xoh_href_gui_utl_fxt fxt = new Xoh_href_gui_utl_fxt();
@Test public void Extract_href__text() {
fxt.Test_extract_href("0|" , "");
fxt.Test_extract_href("1|selected_text" , "selected_text");
fxt.Test_extract_href("2|http://a.org" , "http://a.org");
}
@Test public void Extract_href__file() {
fxt.Test_extract_href("2|file:///site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1");
fxt.Test_extract_href("2|file:///wiki/Page_2" , "en.wikipedia.org/wiki/Page_2");
fxt.Test_extract_href("2|file://#anchor" , "en.wikipedia.org/wiki/Page_0#anchor");
}
@Test public void Extract_href__internal() {
fxt.Test_extract_href("2|/site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1");
fxt.Test_extract_href("2|/wiki/Page_2" , "en.wikipedia.org/wiki/Page_2");
fxt.Test_extract_href("2|#anchor" , "en.wikipedia.org/wiki/Page_0#anchor");
}
@Test public void Html_window_vpos_parse() {
fxt.Test_Html_window_vpos_parse("0|0,1,2", "0", "'0','1','2'");
fxt.Test_Html_window_vpos_parse("org.eclipse.swt.SWTException: Permission denied for <file://> to get property Selection.rangeCount", null, null); // check that invalid path doesn't fail; DATE:2014-04-05
}
@Test public void Standardize_xowa_link() {
fxt.Test_standardize_xowa_link("file:///site/en.wikipedia.org/wiki/A" , "/site/en.wikipedia.org/wiki/A");
fxt.Test_standardize_xowa_link("file:///wiki/A" , "/wiki/A");
fxt.Test_standardize_xowa_link("file://#A" , "#A");
}
}
class Xoh_href_gui_utl_fxt {
public void Clear() {
cur_wiki = "en.wikipedia.org";
cur_page = "Page_0";
}
public String Cur_wiki() {return cur_wiki;} public Xoh_href_gui_utl_fxt Cur_wiki_(String v) {cur_wiki = v; return this;} private String cur_wiki;
public String Cur_page() {return cur_page;} public Xoh_href_gui_utl_fxt Cur_page_(String v) {cur_page = v; return this;} private String cur_page;
public void Test_extract_href(String text_str, String expd) {
Tfds.Eq(expd, Xoh_href_gui_utl.Html_extract_text(cur_wiki, cur_page, text_str));
}
private String_obj_ref scroll_top = String_obj_ref.null_(), node_path = String_obj_ref.null_();
public void Test_Html_window_vpos_parse(String raw, String expd_scroll_top, String expd_node_path) {
scroll_top.Val_null_(); node_path.Val_null_();
Xog_html_itm.Html_window_vpos_parse(raw, scroll_top, node_path);
Tfds.Eq(expd_scroll_top, scroll_top.Val(), expd_scroll_top);
Tfds.Eq(expd_node_path, node_path.Val(), expd_node_path);
}
public void Test_standardize_xowa_link(String raw, String expd) {
Tfds.Eq_str(expd, Xoh_href_gui_utl.Standardize_xowa_link(raw), "standardize");
}
}
package gplx.xowa.htmls.hrefs;
import gplx.Tfds;
import gplx.core.primitives.String_obj_ref;
import gplx.xowa.guis.views.Xog_html_itm;
import org.junit.Before;
import org.junit.Test;
public class Xoh_href_gui_utl_tst {
@Before public void init() {fxt.Clear();} private Xoh_href_gui_utl_fxt fxt = new Xoh_href_gui_utl_fxt();
@Test public void Extract_href__text() {
fxt.Test_extract_text("0|" , "");
fxt.Test_extract_text("1|selected_text" , "selected_text");
fxt.Test_extract_text("2|http://a.org" , "http://a.org");
}
@Test public void Extract_href__file() {
fxt.Test_extract_text("2|file:///site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1");
fxt.Test_extract_text("2|file:///wiki/Page_2" , "en.wikipedia.org/wiki/Page_2");
fxt.Test_extract_text("2|file://#anchor" , "en.wikipedia.org/wiki/Page_0#anchor");
}
@Test public void Extract_href__internal() {
fxt.Test_extract_text("2|/site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1");
fxt.Test_extract_text("2|/wiki/Page_2" , "en.wikipedia.org/wiki/Page_2");
fxt.Test_extract_text("2|#anchor" , "en.wikipedia.org/wiki/Page_0#anchor");
}
@Test public void Html_window_vpos_parse() {
fxt.Test_Html_window_vpos_parse("0|0,1,2", "0", "'0','1','2'");
fxt.Test_Html_window_vpos_parse("org.eclipse.swt.SWTException: Permission denied for <file://> to get property Selection.rangeCount", null, null); // check that invalid path doesn't fail; DATE:2014-04-05
}
@Test public void Standardize_xowa_link() {
fxt.Test_standardize_xowa_link("file:///site/en.wikipedia.org/wiki/A" , "/site/en.wikipedia.org/wiki/A");
fxt.Test_standardize_xowa_link("file:///wiki/A" , "/wiki/A");
fxt.Test_standardize_xowa_link("file://#A" , "#A");
fxt.Test_standardize_xowa_link("file:///C:/dir/fil.png" , "C:/dir/fil.png");
}
@Test public void Swt() { // 2021-01-03|ISSUE#:823|Copy fails for links `about:/wiki/PAGE_NAME` or `about:/site/WIKI_NAME/wiki/PAGE_NAME`
fxt.Test_extract_text("2|about:/site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1");
fxt.Test_extract_text("2|about:/wiki/Page_2" , "en.wikipedia.org/wiki/Page_2");
fxt.Test_extract_text("2|about:#anchor" , "en.wikipedia.org/wiki/Page_0#anchor");
fxt.Test_extract_text("2|about:file:///C:/dir/fil.png" , "C:/dir/fil.png");
}
}
class Xoh_href_gui_utl_fxt {
public void Clear() {
cur_wiki = "en.wikipedia.org";
cur_page = "Page_0";
}
public String Cur_wiki() {return cur_wiki;} public Xoh_href_gui_utl_fxt Cur_wiki_(String v) {cur_wiki = v; return this;} private String cur_wiki;
public String Cur_page() {return cur_page;} public Xoh_href_gui_utl_fxt Cur_page_(String v) {cur_page = v; return this;} private String cur_page;
public void Test_extract_text(String text_str, String expd) {
Tfds.Eq(expd, Xoh_href_gui_utl.Html_extract_text(cur_wiki, cur_page, text_str));
}
private String_obj_ref scroll_top = String_obj_ref.null_(), node_path = String_obj_ref.null_();
public void Test_Html_window_vpos_parse(String raw, String expd_scroll_top, String expd_node_path) {
scroll_top.Val_null_(); node_path.Val_null_();
Xog_html_itm.Html_window_vpos_parse(raw, scroll_top, node_path);
Tfds.Eq(expd_scroll_top, scroll_top.Val(), expd_scroll_top);
Tfds.Eq(expd_node_path, node_path.Val(), expd_node_path);
}
public void Test_standardize_xowa_link(String raw, String expd) {
Tfds.Eq_str(expd, Xoh_href_gui_utl.Standardize_xowa_link(raw), "standardize");
}
}

Loading…
Cancel
Save