Gui: Strip `about:` from links during `Copy` [#823]

master
gnosygnu 3 years ago
parent 1a6a203cfd
commit 4187dc4a76

@ -1,6 +1,6 @@
/* /*
XOWA: the XOWA Offline Wiki Application XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com Copyright (C) 2012-2021 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3, XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0. or alternatively under the terms of the Apache License Version 2.0.
@ -13,80 +13,94 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/ */
package gplx.xowa.htmls.hrefs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; package gplx.xowa.htmls.hrefs;
import gplx.core.btries.*; import gplx.core.primitives.*;
public class Xoh_href_gui_utl { import gplx.Bry_;
public static String Html_extract_text(String site, String page, String text_str) { import gplx.Byte_ascii;
byte[] text_bry = Bry_.new_u8(text_str); import gplx.Err_;
int text_len = text_bry.length; import gplx.String_;
int text_tid = Byte_ascii.To_a7_int(text_bry[0]); import gplx.core.btries.Btrie_slim_mgr;
switch (text_tid) { import gplx.core.primitives.Byte_obj_val;
case Text_tid_none: return ""; // "0" import gplx.gfui.kits.swts.Swt_html_utl;
case Text_tid_text: return String_.new_u8(text_bry, 2, text_len); // 2 to skip "1|"
case Text_tid_href: break; // fall through to below public class Xoh_href_gui_utl {
default: throw Err_.new_unhandled(text_tid); public static String Html_extract_text(String site, String page, String text_str) {
} byte[] text_bry = Bry_.new_u8(text_str);
int href_bgn = 2; // 2 to skip "2|" int text_len = text_bry.length;
if (Bry_.Has_at_bgn(text_bry, Xoh_href_.Bry__file, href_bgn, text_len)) int text_tid = Byte_ascii.To_a7_int(text_bry[0]);
href_bgn += Xoh_href_.Len__file; // skip "file://" switch (text_tid) {
Byte_obj_val href_tid = (Byte_obj_val)href_trie.Match_bgn(text_bry, href_bgn, text_len); case Text_tid_none: return ""; // "0"
if (href_tid != null) { case Text_tid_text: return String_.new_u8(text_bry, 2, text_len); // 2 to skip "1|"
switch (href_tid.Val()) { case Text_tid_href: break; // fall through to below
case Href_tid_wiki: return site + String_.new_u8(text_bry, href_bgn, text_len); default: throw Err_.new_unhandled(text_tid);
case Href_tid_site: return String_.new_u8(text_bry, href_bgn + 6, text_len); // +6 to skip "site/" }
case Href_tid_anch: return site + "/wiki/" + page + String_.new_u8(text_bry, href_bgn, text_len); String href_str = String_.Mid(String_.new_u8(text_bry), 2);
} href_str = Swt_html_utl.NormalizeSwtUrl(href_str);
} if (String_.Has_at_bgn(href_str, Xoh_href_.Str__file))
return String_.new_u8(text_bry, 2, text_len); // 2 to skip "2|"; handles "http://" text as well as any fall-thru from above href_str = Standardize_xowa_link(href_str); // skip "file://"
} Byte_obj_val href_tid = (Byte_obj_val)href_trie.Match_bgn(Bry_.new_u8(href_str), 0, href_str.length());
public static String Standardize_xowa_link(String str) { if (href_tid != null) {
byte[] bry = Bry_.new_u8(str); switch (href_tid.Val()) {
int skip = Skip_start_of_xowa_link(bry, bry.length, 0); case Href_tid_wiki: return site + href_str;
return skip == 0 ? str : String_.Mid(str, skip); case Href_tid_site: return String_.Mid(href_str, 6); // +6 to skip "site/"
} case Href_tid_anch: return site + "/wiki/" + page + href_str;
private static int Skip_start_of_xowa_link(byte[] src, int src_len, int bgn) { }
if (!Bry_.Has_at_bgn(src, Xoh_href_.Bry__file, bgn, src_len)) return bgn; // does not start with "file://" }
int pos = bgn + Xoh_href_.Len__file; // skip "file://" return href_str;
Object tid_obj = href_trie.Match_bgn(src, pos, src_len); }
if (tid_obj == null) { public static String Standardize_xowa_link(String str) {
return bgn; // if not a known xowa link, return original bgn; byte[] bry = Bry_.new_u8(str);
} int skip = Skip_start_of_xowa_link(bry, bry.length, 0);
switch (((Byte_obj_val)tid_obj).Val()) { return skip == 0 ? str : String_.Mid(str, skip);
case Href_tid_site: return pos; }
case Href_tid_wiki: return pos; private static int Skip_start_of_xowa_link(byte[] src, int src_len, int bgn) {
case Href_tid_anch: return pos; if (!Bry_.Has_at_bgn(src, Xoh_href_.Bry__file, bgn, src_len)) return bgn; // does not start with "file://"
default: throw Err_.new_unhandled(tid_obj); int pos = bgn + Xoh_href_.Len__file; // skip "file://"
} Object tid_obj = href_trie.Match_bgn(src, pos, src_len);
} if (tid_obj == null) {
private static final byte Text_tid_none = 0, Text_tid_text = 1, Text_tid_href = 2; if (src_len - pos > 0 && src[pos] == Byte_ascii.Slash) { // handle "file:///C:/dir/fil.png"
private static final byte Href_tid_wiki = 1, Href_tid_site = 2, Href_tid_anch = 3; return pos + 1;
private static final Btrie_slim_mgr href_trie = Btrie_slim_mgr.cs() }
.Add_bry_byte(Xoh_href_.Bry__site , Href_tid_site) else {
.Add_bry_byte(Xoh_href_.Bry__wiki , Href_tid_wiki) return bgn; // if not a known xowa link, return original bgn;
.Add_bry_byte(Xoh_href_.Bry__anch , Href_tid_anch) }
; }
} switch (((Byte_obj_val)tid_obj).Val()) {
/* case Href_tid_site: return pos;
NOTE_1: case Href_tid_wiki: return pos;
. swt/mozilla treats text differently in href="{text}" when content_editable=n; occurs in LocationListener.changing case Href_tid_anch: return pos;
http://a.org -> http://a.org does nothing default: throw Err_.new_unhandled(tid_obj);
A -> file:///A adds "file:///" }
/wiki/A -> file:///wiki/A adds "file://" }
Category:A -> Category:A noops; Category is assumed to be protocol? private static final byte Text_tid_none = 0, Text_tid_text = 1, Text_tid_href = 2;
//en.wiktionary.org/wiki/a -> file:///wiki/a strips out site name and prepends "file://"; no idea why private static final byte Href_tid_wiki = 1, Href_tid_site = 2, Href_tid_anch = 3;
private static final Btrie_slim_mgr href_trie = Btrie_slim_mgr.cs()
. so, to handle the above, the code does the following .Add_bry_byte(Xoh_href_.Bry__site , Href_tid_site)
http://a.org -> http://a.org does nothing; nothing needed .Add_bry_byte(Xoh_href_.Bry__wiki , Href_tid_wiki)
A -> /wiki/A always prepend /wiki/ .Add_bry_byte(Xoh_href_.Bry__anch , Href_tid_anch)
Category:A -> /wiki/Category:A always prepend /wiki/ ;
//en.wiktionary.org/wiki/A -> /site/en.wiktionary.org/wiki/A always transform relative url to /site/ }
/*
. the href will still come here as file:///wiki/A or file:///site/en.wiktionary.org/wiki/A. NOTE_1:
. however, the file:// can be lopped off and discarded and the rest of the href will fall into one of the following cases . swt/mozilla treats text differently in href="{text}" when content_editable=n; occurs in LocationListener.changing
.. /wiki/ http://a.org -> http://a.org does nothing
.. /site/ A -> file:///A adds "file:///"
.. /xcmd/ /wiki/A -> file:///wiki/A adds "file://"
.. # Category:A -> Category:A noops; Category is assumed to be protocol?
.. anything else -> assume to be really a file:// url; EX: file://C/dir/fil.txt -> C/dir/fil.txt //en.wiktionary.org/wiki/a -> file:///wiki/a strips out site name and prepends "file://"; no idea why
. the other advantage of this approach is that this proc can be reused outside of swt calls; i.e.: it can parse both "file:///wiki/A" and "/wiki/A"
*/ . so, to handle the above, the code does the following
http://a.org -> http://a.org does nothing; nothing needed
A -> /wiki/A always prepend /wiki/
Category:A -> /wiki/Category:A always prepend /wiki/
//en.wiktionary.org/wiki/A -> /site/en.wiktionary.org/wiki/A always transform relative url to /site/
. the href will still come here as file:///wiki/A or file:///site/en.wiktionary.org/wiki/A.
. however, the file:// can be lopped off and discarded and the rest of the href will fall into one of the following cases
.. /wiki/
.. /site/
.. /xcmd/
.. #
.. anything else -> assume to be really a file:// url; EX: file://C/dir/fil.txt -> C/dir/fil.txt
. the other advantage of this approach is that this proc can be reused outside of swt calls; i.e.: it can parse both "file:///wiki/A" and "/wiki/A"
*/

@ -1,6 +1,6 @@
/* /*
XOWA: the XOWA Offline Wiki Application XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com Copyright (C) 2012-2021 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3, XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0. or alternatively under the terms of the Apache License Version 2.0.
@ -13,54 +13,66 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/ */
package gplx.xowa.htmls.hrefs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; package gplx.xowa.htmls.hrefs;
import org.junit.*;
import gplx.core.primitives.*; import gplx.xowa.htmls.hrefs.*; import gplx.xowa.guis.views.*; import gplx.Tfds;
public class Xoh_href_gui_utl_tst { import gplx.core.primitives.String_obj_ref;
@Before public void init() {fxt.Clear();} private Xoh_href_gui_utl_fxt fxt = new Xoh_href_gui_utl_fxt(); import gplx.xowa.guis.views.Xog_html_itm;
@Test public void Extract_href__text() { import org.junit.Before;
fxt.Test_extract_href("0|" , ""); import org.junit.Test;
fxt.Test_extract_href("1|selected_text" , "selected_text");
fxt.Test_extract_href("2|http://a.org" , "http://a.org"); public class Xoh_href_gui_utl_tst {
} @Before public void init() {fxt.Clear();} private Xoh_href_gui_utl_fxt fxt = new Xoh_href_gui_utl_fxt();
@Test public void Extract_href__file() { @Test public void Extract_href__text() {
fxt.Test_extract_href("2|file:///site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1"); fxt.Test_extract_text("0|" , "");
fxt.Test_extract_href("2|file:///wiki/Page_2" , "en.wikipedia.org/wiki/Page_2"); fxt.Test_extract_text("1|selected_text" , "selected_text");
fxt.Test_extract_href("2|file://#anchor" , "en.wikipedia.org/wiki/Page_0#anchor"); fxt.Test_extract_text("2|http://a.org" , "http://a.org");
} }
@Test public void Extract_href__internal() { @Test public void Extract_href__file() {
fxt.Test_extract_href("2|/site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1"); fxt.Test_extract_text("2|file:///site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1");
fxt.Test_extract_href("2|/wiki/Page_2" , "en.wikipedia.org/wiki/Page_2"); fxt.Test_extract_text("2|file:///wiki/Page_2" , "en.wikipedia.org/wiki/Page_2");
fxt.Test_extract_href("2|#anchor" , "en.wikipedia.org/wiki/Page_0#anchor"); fxt.Test_extract_text("2|file://#anchor" , "en.wikipedia.org/wiki/Page_0#anchor");
} }
@Test public void Html_window_vpos_parse() { @Test public void Extract_href__internal() {
fxt.Test_Html_window_vpos_parse("0|0,1,2", "0", "'0','1','2'"); fxt.Test_extract_text("2|/site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1");
fxt.Test_Html_window_vpos_parse("org.eclipse.swt.SWTException: Permission denied for <file://> to get property Selection.rangeCount", null, null); // check that invalid path doesn't fail; DATE:2014-04-05 fxt.Test_extract_text("2|/wiki/Page_2" , "en.wikipedia.org/wiki/Page_2");
} fxt.Test_extract_text("2|#anchor" , "en.wikipedia.org/wiki/Page_0#anchor");
@Test public void Standardize_xowa_link() { }
fxt.Test_standardize_xowa_link("file:///site/en.wikipedia.org/wiki/A" , "/site/en.wikipedia.org/wiki/A"); @Test public void Html_window_vpos_parse() {
fxt.Test_standardize_xowa_link("file:///wiki/A" , "/wiki/A"); fxt.Test_Html_window_vpos_parse("0|0,1,2", "0", "'0','1','2'");
fxt.Test_standardize_xowa_link("file://#A" , "#A"); fxt.Test_Html_window_vpos_parse("org.eclipse.swt.SWTException: Permission denied for <file://> to get property Selection.rangeCount", null, null); // check that invalid path doesn't fail; DATE:2014-04-05
} }
} @Test public void Standardize_xowa_link() {
class Xoh_href_gui_utl_fxt { fxt.Test_standardize_xowa_link("file:///site/en.wikipedia.org/wiki/A" , "/site/en.wikipedia.org/wiki/A");
public void Clear() { fxt.Test_standardize_xowa_link("file:///wiki/A" , "/wiki/A");
cur_wiki = "en.wikipedia.org"; fxt.Test_standardize_xowa_link("file://#A" , "#A");
cur_page = "Page_0"; fxt.Test_standardize_xowa_link("file:///C:/dir/fil.png" , "C:/dir/fil.png");
} }
public String Cur_wiki() {return cur_wiki;} public Xoh_href_gui_utl_fxt Cur_wiki_(String v) {cur_wiki = v; return this;} private String cur_wiki; @Test public void Swt() { // 2021-01-03|ISSUE#:823|Copy fails for links `about:/wiki/PAGE_NAME` or `about:/site/WIKI_NAME/wiki/PAGE_NAME`
public String Cur_page() {return cur_page;} public Xoh_href_gui_utl_fxt Cur_page_(String v) {cur_page = v; return this;} private String cur_page; fxt.Test_extract_text("2|about:/site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1");
public void Test_extract_href(String text_str, String expd) { fxt.Test_extract_text("2|about:/wiki/Page_2" , "en.wikipedia.org/wiki/Page_2");
Tfds.Eq(expd, Xoh_href_gui_utl.Html_extract_text(cur_wiki, cur_page, text_str)); fxt.Test_extract_text("2|about:#anchor" , "en.wikipedia.org/wiki/Page_0#anchor");
} fxt.Test_extract_text("2|about:file:///C:/dir/fil.png" , "C:/dir/fil.png");
private String_obj_ref scroll_top = String_obj_ref.null_(), node_path = String_obj_ref.null_(); }
public void Test_Html_window_vpos_parse(String raw, String expd_scroll_top, String expd_node_path) { }
scroll_top.Val_null_(); node_path.Val_null_(); class Xoh_href_gui_utl_fxt {
Xog_html_itm.Html_window_vpos_parse(raw, scroll_top, node_path); public void Clear() {
Tfds.Eq(expd_scroll_top, scroll_top.Val(), expd_scroll_top); cur_wiki = "en.wikipedia.org";
Tfds.Eq(expd_node_path, node_path.Val(), expd_node_path); cur_page = "Page_0";
} }
public void Test_standardize_xowa_link(String raw, String expd) { public String Cur_wiki() {return cur_wiki;} public Xoh_href_gui_utl_fxt Cur_wiki_(String v) {cur_wiki = v; return this;} private String cur_wiki;
Tfds.Eq_str(expd, Xoh_href_gui_utl.Standardize_xowa_link(raw), "standardize"); public String Cur_page() {return cur_page;} public Xoh_href_gui_utl_fxt Cur_page_(String v) {cur_page = v; return this;} private String cur_page;
} public void Test_extract_text(String text_str, String expd) {
} Tfds.Eq(expd, Xoh_href_gui_utl.Html_extract_text(cur_wiki, cur_page, text_str));
}
private String_obj_ref scroll_top = String_obj_ref.null_(), node_path = String_obj_ref.null_();
public void Test_Html_window_vpos_parse(String raw, String expd_scroll_top, String expd_node_path) {
scroll_top.Val_null_(); node_path.Val_null_();
Xog_html_itm.Html_window_vpos_parse(raw, scroll_top, node_path);
Tfds.Eq(expd_scroll_top, scroll_top.Val(), expd_scroll_top);
Tfds.Eq(expd_node_path, node_path.Val(), expd_node_path);
}
public void Test_standardize_xowa_link(String raw, String expd) {
Tfds.Eq_str(expd, Xoh_href_gui_utl.Standardize_xowa_link(raw), "standardize");
}
}

Loading…
Cancel
Save