Gui: Strip about: from links during Copy [#823]

2025-12-06 22:52:03 +00:00 · 2021-01-05 02:06:20 -05:00 · 2021-01-05 02:06:20 -05:00 · 4187dc4a76
commit 4187dc4a76
parent 1a6a203cfd
2 changed files with 156 additions and 130 deletions
--- a/400_xowa/src/gplx/xowa/htmls/hrefs/Xoh_href_gui_utl.java
+++ b/400_xowa/src/gplx/xowa/htmls/hrefs/Xoh_href_gui_utl.java
@ -1,6 +1,6 @@
 /*
 XOWA: the XOWA Offline Wiki Application
-Copyright (C) 2012-2017 gnosygnu@gmail.com
+Copyright (C) 2012-2021 gnosygnu@gmail.com

 XOWA is licensed under the terms of the General Public License (GPL) Version 3,
 or alternatively under the terms of the Apache License Version 2.0.
@ -13,80 +13,94 @@ The terms of each license can be found in the source code repository:
 GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
 Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 */
-package gplx.xowa.htmls.hrefs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*;
-import gplx.core.btries.*; import gplx.core.primitives.*;
-public class Xoh_href_gui_utl {
-	public static String Html_extract_text(String site, String page, String text_str) {
-		byte[] text_bry = Bry_.new_u8(text_str);
-		int text_len = text_bry.length;
-		int text_tid = Byte_ascii.To_a7_int(text_bry[0]);
-		switch (text_tid) {
-			case Text_tid_none: return "";	// "0"
-			case Text_tid_text: return String_.new_u8(text_bry, 2, text_len);	// 2 to skip "1|"
-			case Text_tid_href: break;	// fall through to below
-			default:			throw Err_.new_unhandled(text_tid);
-		}
-		int href_bgn = 2;							// 2 to skip "2|"
-		if (Bry_.Has_at_bgn(text_bry, Xoh_href_.Bry__file, href_bgn, text_len))
-			href_bgn += Xoh_href_.Len__file;	// skip "file://"
-		Byte_obj_val href_tid = (Byte_obj_val)href_trie.Match_bgn(text_bry, href_bgn, text_len);
-		if (href_tid != null) {
-			switch (href_tid.Val()) {
-				case Href_tid_wiki:			return site + String_.new_u8(text_bry, href_bgn, text_len);		
-				case Href_tid_site:			return String_.new_u8(text_bry, href_bgn + 6, text_len);			// +6 to skip "site/"
-				case Href_tid_anch:			return site + "/wiki/" + page + String_.new_u8(text_bry, href_bgn, text_len);
-			}
-		}
-		return String_.new_u8(text_bry, 2, text_len);	// 2 to skip "2|"; handles "http://" text as well as any fall-thru from above
-	}
-	public static String Standardize_xowa_link(String str) {
-		byte[] bry = Bry_.new_u8(str);
-		int skip = Skip_start_of_xowa_link(bry, bry.length, 0);
-		return skip == 0 ? str : String_.Mid(str, skip);
-	}
-	private static int Skip_start_of_xowa_link(byte[] src, int src_len, int bgn) {
-		if (!Bry_.Has_at_bgn(src, Xoh_href_.Bry__file, bgn, src_len)) return bgn;	// does not start with "file://"
-		int pos = bgn + Xoh_href_.Len__file;	// skip "file://"
-		Object tid_obj = href_trie.Match_bgn(src, pos, src_len);
-		if (tid_obj == null) {
-			return bgn; // if not a known xowa link, return original bgn; 
-		}
-		switch (((Byte_obj_val)tid_obj).Val()) {
-			case Href_tid_site:			return pos;
-			case Href_tid_wiki:			return pos;
-			case Href_tid_anch:			return pos;
-			default:					throw Err_.new_unhandled(tid_obj);
-		}
-	}
-	private static final byte Text_tid_none = 0, Text_tid_text = 1, Text_tid_href = 2;
-	private static final byte Href_tid_wiki = 1, Href_tid_site = 2, Href_tid_anch = 3;
-	private static final Btrie_slim_mgr href_trie = Btrie_slim_mgr.cs()
-	.Add_bry_byte(Xoh_href_.Bry__site		, Href_tid_site)
-	.Add_bry_byte(Xoh_href_.Bry__wiki		, Href_tid_wiki)
-	.Add_bry_byte(Xoh_href_.Bry__anch		, Href_tid_anch)
-	;
-}
-/*
-NOTE_1:
-. swt/mozilla treats text differently in href="{text}" when content_editable=n; occurs in LocationListener.changing
-http://a.org						-> http://a.org								does nothing
-A									-> file:///A								adds "file:///"
-/wiki/A								-> file:///wiki/A							adds "file://"
-Category:A							-> Category:A								noops; Category is assumed to be protocol?
-//en.wiktionary.org/wiki/a			-> file:///wiki/a							strips out site name and prepends "file://"; no idea why
-
-. so, to handle the above, the code does the following
-http://a.org						-> http://a.org								does nothing; nothing needed
-A									-> /wiki/A									always prepend /wiki/
-Category:A							-> /wiki/Category:A							always prepend /wiki/
-//en.wiktionary.org/wiki/A			-> /site/en.wiktionary.org/wiki/A			always transform relative url to /site/
-
-. the href will still come here as file:///wiki/A or file:///site/en.wiktionary.org/wiki/A.
-. however, the file:// can be lopped off and discarded and the rest of the href will fall into one of the following cases
-.. /wiki/
-.. /site/
-.. /xcmd/
-.. #
-.. anything else -> assume to be really a file:// url; EX: file://C/dir/fil.txt -> C/dir/fil.txt
-. the other advantage of this approach is that this proc can be reused outside of swt calls; i.e.: it can parse both "file:///wiki/A" and "/wiki/A"
-*/
+package gplx.xowa.htmls.hrefs;
+
+import gplx.Bry_;
+import gplx.Byte_ascii;
+import gplx.Err_;
+import gplx.String_;
+import gplx.core.btries.Btrie_slim_mgr;
+import gplx.core.primitives.Byte_obj_val;
+import gplx.gfui.kits.swts.Swt_html_utl;
+
+public class Xoh_href_gui_utl {
+	public static String Html_extract_text(String site, String page, String text_str) {
+		byte[] text_bry = Bry_.new_u8(text_str);
+		int text_len = text_bry.length;
+		int text_tid = Byte_ascii.To_a7_int(text_bry[0]);
+		switch (text_tid) {
+			case Text_tid_none: return "";	// "0"
+			case Text_tid_text: return String_.new_u8(text_bry, 2, text_len);	// 2 to skip "1|"
+			case Text_tid_href: break;	// fall through to below
+			default:			throw Err_.new_unhandled(text_tid);
+		}
+		String href_str = String_.Mid(String_.new_u8(text_bry), 2);
+		href_str = Swt_html_utl.NormalizeSwtUrl(href_str);
+		if (String_.Has_at_bgn(href_str, Xoh_href_.Str__file))
+			href_str = Standardize_xowa_link(href_str);	// skip "file://"
+		Byte_obj_val href_tid = (Byte_obj_val)href_trie.Match_bgn(Bry_.new_u8(href_str), 0, href_str.length());
+		if (href_tid != null) {
+			switch (href_tid.Val()) {
+				case Href_tid_wiki:			return site + href_str;
+				case Href_tid_site:			return String_.Mid(href_str, 6);			// +6 to skip "site/"
+				case Href_tid_anch:			return site + "/wiki/" + page + href_str;
+			}
+		}
+		return href_str;
+	}
+	public static String Standardize_xowa_link(String str) {
+		byte[] bry = Bry_.new_u8(str);
+		int skip = Skip_start_of_xowa_link(bry, bry.length, 0);
+		return skip == 0 ? str : String_.Mid(str, skip);
+	}
+	private static int Skip_start_of_xowa_link(byte[] src, int src_len, int bgn) {
+		if (!Bry_.Has_at_bgn(src, Xoh_href_.Bry__file, bgn, src_len)) return bgn;	// does not start with "file://"
+		int pos = bgn + Xoh_href_.Len__file;	// skip "file://"
+		Object tid_obj = href_trie.Match_bgn(src, pos, src_len);
+		if (tid_obj == null) {
+			if (src_len - pos > 0 && src[pos] == Byte_ascii.Slash) { // handle "file:///C:/dir/fil.png"
+				return pos + 1;
+			}
+			else {
+				return bgn; // if not a known xowa link, return original bgn;
+			}
+		}
+		switch (((Byte_obj_val)tid_obj).Val()) {
+			case Href_tid_site:			return pos;
+			case Href_tid_wiki:			return pos;
+			case Href_tid_anch:			return pos;
+			default:					throw Err_.new_unhandled(tid_obj);
+		}
+	}
+	private static final byte Text_tid_none = 0, Text_tid_text = 1, Text_tid_href = 2;
+	private static final byte Href_tid_wiki = 1, Href_tid_site = 2, Href_tid_anch = 3;
+	private static final Btrie_slim_mgr href_trie = Btrie_slim_mgr.cs()
+	.Add_bry_byte(Xoh_href_.Bry__site		, Href_tid_site)
+	.Add_bry_byte(Xoh_href_.Bry__wiki		, Href_tid_wiki)
+	.Add_bry_byte(Xoh_href_.Bry__anch		, Href_tid_anch)
+	;
+}
+/*
+NOTE_1:
+. swt/mozilla treats text differently in href="{text}" when content_editable=n; occurs in LocationListener.changing
+http://a.org						-> http://a.org								does nothing
+A									-> file:///A								adds "file:///"
+/wiki/A								-> file:///wiki/A							adds "file://"
+Category:A							-> Category:A								noops; Category is assumed to be protocol?
+//en.wiktionary.org/wiki/a			-> file:///wiki/a							strips out site name and prepends "file://"; no idea why
+
+. so, to handle the above, the code does the following
+http://a.org						-> http://a.org								does nothing; nothing needed
+A									-> /wiki/A									always prepend /wiki/
+Category:A							-> /wiki/Category:A							always prepend /wiki/
+//en.wiktionary.org/wiki/A			-> /site/en.wiktionary.org/wiki/A			always transform relative url to /site/
+
+. the href will still come here as file:///wiki/A or file:///site/en.wiktionary.org/wiki/A.
+. however, the file:// can be lopped off and discarded and the rest of the href will fall into one of the following cases
+.. /wiki/
+.. /site/
+.. /xcmd/
+.. #
+.. anything else -> assume to be really a file:// url; EX: file://C/dir/fil.txt -> C/dir/fil.txt
+. the other advantage of this approach is that this proc can be reused outside of swt calls; i.e.: it can parse both "file:///wiki/A" and "/wiki/A"
+*/
--- a/400_xowa/src/gplx/xowa/htmls/hrefs/Xoh_href_gui_utl_tst.java
+++ b/400_xowa/src/gplx/xowa/htmls/hrefs/Xoh_href_gui_utl_tst.java
@ -1,6 +1,6 @@
 /*
 XOWA: the XOWA Offline Wiki Application
-Copyright (C) 2012-2017 gnosygnu@gmail.com
+Copyright (C) 2012-2021 gnosygnu@gmail.com

 XOWA is licensed under the terms of the General Public License (GPL) Version 3,
 or alternatively under the terms of the Apache License Version 2.0.
@ -13,54 +13,66 @@ The terms of each license can be found in the source code repository:
 GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
 Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 */
-package gplx.xowa.htmls.hrefs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*;
-import org.junit.*;
-import gplx.core.primitives.*; import gplx.xowa.htmls.hrefs.*; import gplx.xowa.guis.views.*;
-public class Xoh_href_gui_utl_tst {
-	@Before public void init() {fxt.Clear();} private Xoh_href_gui_utl_fxt fxt = new Xoh_href_gui_utl_fxt();
-	@Test   public void Extract_href__text() {
-		fxt.Test_extract_href("0|"													, "");
-		fxt.Test_extract_href("1|selected_text"										, "selected_text");
-		fxt.Test_extract_href("2|http://a.org"										, "http://a.org");
-	}
-	@Test   public void Extract_href__file() {
-		fxt.Test_extract_href("2|file:///site/en.wiktionary.org/wiki/Page_1"		, "en.wiktionary.org/wiki/Page_1");
-		fxt.Test_extract_href("2|file:///wiki/Page_2"								, "en.wikipedia.org/wiki/Page_2");
-		fxt.Test_extract_href("2|file://#anchor"									, "en.wikipedia.org/wiki/Page_0#anchor");
-	}
-	@Test   public void Extract_href__internal() {
-		fxt.Test_extract_href("2|/site/en.wiktionary.org/wiki/Page_1"				, "en.wiktionary.org/wiki/Page_1");
-		fxt.Test_extract_href("2|/wiki/Page_2"										, "en.wikipedia.org/wiki/Page_2");
-		fxt.Test_extract_href("2|#anchor"											, "en.wikipedia.org/wiki/Page_0#anchor");
-	}
-	@Test  public void Html_window_vpos_parse() {
-		fxt.Test_Html_window_vpos_parse("0|0,1,2", "0", "'0','1','2'");
-		fxt.Test_Html_window_vpos_parse("org.eclipse.swt.SWTException: Permission denied for <file://> to get property Selection.rangeCount", null, null);	// check that invalid path doesn't fail; DATE:2014-04-05
-	}
-	@Test   public void Standardize_xowa_link() {
-		fxt.Test_standardize_xowa_link("file:///site/en.wikipedia.org/wiki/A"	, "/site/en.wikipedia.org/wiki/A");
-		fxt.Test_standardize_xowa_link("file:///wiki/A"							, "/wiki/A");
-		fxt.Test_standardize_xowa_link("file://#A"								, "#A");
-	}
-}
-class Xoh_href_gui_utl_fxt {
-	public void Clear() {
-		cur_wiki = "en.wikipedia.org";
-		cur_page = "Page_0";
-	}
-	public String Cur_wiki() {return cur_wiki;} public Xoh_href_gui_utl_fxt Cur_wiki_(String v) {cur_wiki = v; return this;} private String cur_wiki;
-	public String Cur_page() {return cur_page;} public Xoh_href_gui_utl_fxt Cur_page_(String v) {cur_page = v; return this;} private String cur_page;
-	public void Test_extract_href(String text_str, String expd) {
-		Tfds.Eq(expd, Xoh_href_gui_utl.Html_extract_text(cur_wiki, cur_page, text_str));
-	}
-	private String_obj_ref scroll_top = String_obj_ref.null_(), node_path = String_obj_ref.null_();
-	public void Test_Html_window_vpos_parse(String raw, String expd_scroll_top, String expd_node_path) {
-		scroll_top.Val_null_(); node_path.Val_null_();
-		Xog_html_itm.Html_window_vpos_parse(raw, scroll_top, node_path);
-		Tfds.Eq(expd_scroll_top, scroll_top.Val(), expd_scroll_top);
-		Tfds.Eq(expd_node_path, node_path.Val(), expd_node_path);
-	}
-	public void Test_standardize_xowa_link(String raw, String expd) {
-		Tfds.Eq_str(expd, Xoh_href_gui_utl.Standardize_xowa_link(raw), "standardize");
-	}
-}
+package gplx.xowa.htmls.hrefs;
+
+import gplx.Tfds;
+import gplx.core.primitives.String_obj_ref;
+import gplx.xowa.guis.views.Xog_html_itm;
+import org.junit.Before;
+import org.junit.Test;
+
+public class Xoh_href_gui_utl_tst {
+	@Before public void init() {fxt.Clear();} private Xoh_href_gui_utl_fxt fxt = new Xoh_href_gui_utl_fxt();
+	@Test public void Extract_href__text() {
+		fxt.Test_extract_text("0|"                                                 , "");
+		fxt.Test_extract_text("1|selected_text"                                    , "selected_text");
+		fxt.Test_extract_text("2|http://a.org"                                     , "http://a.org");
+	}
+	@Test public void Extract_href__file() {
+		fxt.Test_extract_text("2|file:///site/en.wiktionary.org/wiki/Page_1"       , "en.wiktionary.org/wiki/Page_1");
+		fxt.Test_extract_text("2|file:///wiki/Page_2"                              , "en.wikipedia.org/wiki/Page_2");
+		fxt.Test_extract_text("2|file://#anchor"                                   , "en.wikipedia.org/wiki/Page_0#anchor");
+	}
+	@Test public void Extract_href__internal() {
+		fxt.Test_extract_text("2|/site/en.wiktionary.org/wiki/Page_1"              , "en.wiktionary.org/wiki/Page_1");
+		fxt.Test_extract_text("2|/wiki/Page_2"                                     , "en.wikipedia.org/wiki/Page_2");
+		fxt.Test_extract_text("2|#anchor"                                          , "en.wikipedia.org/wiki/Page_0#anchor");
+	}
+	@Test  public void Html_window_vpos_parse() {
+		fxt.Test_Html_window_vpos_parse("0|0,1,2", "0", "'0','1','2'");
+		fxt.Test_Html_window_vpos_parse("org.eclipse.swt.SWTException: Permission denied for <file://> to get property Selection.rangeCount", null, null);	// check that invalid path doesn't fail; DATE:2014-04-05
+	}
+	@Test public void Standardize_xowa_link() {
+		fxt.Test_standardize_xowa_link("file:///site/en.wikipedia.org/wiki/A"        , "/site/en.wikipedia.org/wiki/A");
+		fxt.Test_standardize_xowa_link("file:///wiki/A"                              , "/wiki/A");
+		fxt.Test_standardize_xowa_link("file://#A"                                   , "#A");
+		fxt.Test_standardize_xowa_link("file:///C:/dir/fil.png"                      , "C:/dir/fil.png");
+	}
+	@Test public void Swt() { // 2021-01-03|ISSUE#:823|Copy fails for links `about:/wiki/PAGE_NAME` or `about:/site/WIKI_NAME/wiki/PAGE_NAME`
+		fxt.Test_extract_text("2|about:/site/en.wiktionary.org/wiki/Page_1"          , "en.wiktionary.org/wiki/Page_1");
+		fxt.Test_extract_text("2|about:/wiki/Page_2"                                 , "en.wikipedia.org/wiki/Page_2");
+		fxt.Test_extract_text("2|about:#anchor"                                      , "en.wikipedia.org/wiki/Page_0#anchor");
+		fxt.Test_extract_text("2|about:file:///C:/dir/fil.png"                       , "C:/dir/fil.png");
+	}
+}
+class Xoh_href_gui_utl_fxt {
+	public void Clear() {
+		cur_wiki = "en.wikipedia.org";
+		cur_page = "Page_0";
+	}
+	public String Cur_wiki() {return cur_wiki;} public Xoh_href_gui_utl_fxt Cur_wiki_(String v) {cur_wiki = v; return this;} private String cur_wiki;
+	public String Cur_page() {return cur_page;} public Xoh_href_gui_utl_fxt Cur_page_(String v) {cur_page = v; return this;} private String cur_page;
+	public void Test_extract_text(String text_str, String expd) {
+		Tfds.Eq(expd, Xoh_href_gui_utl.Html_extract_text(cur_wiki, cur_page, text_str));
+	}
+	private String_obj_ref scroll_top = String_obj_ref.null_(), node_path = String_obj_ref.null_();
+	public void Test_Html_window_vpos_parse(String raw, String expd_scroll_top, String expd_node_path) {
+		scroll_top.Val_null_(); node_path.Val_null_();
+		Xog_html_itm.Html_window_vpos_parse(raw, scroll_top, node_path);
+		Tfds.Eq(expd_scroll_top, scroll_top.Val(), expd_scroll_top);
+		Tfds.Eq(expd_node_path, node_path.Val(), expd_node_path);
+	}
+	public void Test_standardize_xowa_link(String raw, String expd) {
+		Tfds.Eq_str(expd, Xoh_href_gui_utl.Standardize_xowa_link(raw), "standardize");
+	}
+}