mirror of
https://github.com/gnosygnu/xowa.git
synced 2025-05-31 22:44:34 +00:00
Gui: Strip about:
from links during Copy
[#823]
This commit is contained in:
parent
1a6a203cfd
commit
4187dc4a76
@ -1,6 +1,6 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
Copyright (C) 2012-2021 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
@ -13,80 +13,94 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.htmls.hrefs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
public class Xoh_href_gui_utl {
|
||||
public static String Html_extract_text(String site, String page, String text_str) {
|
||||
byte[] text_bry = Bry_.new_u8(text_str);
|
||||
int text_len = text_bry.length;
|
||||
int text_tid = Byte_ascii.To_a7_int(text_bry[0]);
|
||||
switch (text_tid) {
|
||||
case Text_tid_none: return ""; // "0"
|
||||
case Text_tid_text: return String_.new_u8(text_bry, 2, text_len); // 2 to skip "1|"
|
||||
case Text_tid_href: break; // fall through to below
|
||||
default: throw Err_.new_unhandled(text_tid);
|
||||
}
|
||||
int href_bgn = 2; // 2 to skip "2|"
|
||||
if (Bry_.Has_at_bgn(text_bry, Xoh_href_.Bry__file, href_bgn, text_len))
|
||||
href_bgn += Xoh_href_.Len__file; // skip "file://"
|
||||
Byte_obj_val href_tid = (Byte_obj_val)href_trie.Match_bgn(text_bry, href_bgn, text_len);
|
||||
if (href_tid != null) {
|
||||
switch (href_tid.Val()) {
|
||||
case Href_tid_wiki: return site + String_.new_u8(text_bry, href_bgn, text_len);
|
||||
case Href_tid_site: return String_.new_u8(text_bry, href_bgn + 6, text_len); // +6 to skip "site/"
|
||||
case Href_tid_anch: return site + "/wiki/" + page + String_.new_u8(text_bry, href_bgn, text_len);
|
||||
}
|
||||
}
|
||||
return String_.new_u8(text_bry, 2, text_len); // 2 to skip "2|"; handles "http://" text as well as any fall-thru from above
|
||||
}
|
||||
public static String Standardize_xowa_link(String str) {
|
||||
byte[] bry = Bry_.new_u8(str);
|
||||
int skip = Skip_start_of_xowa_link(bry, bry.length, 0);
|
||||
return skip == 0 ? str : String_.Mid(str, skip);
|
||||
}
|
||||
private static int Skip_start_of_xowa_link(byte[] src, int src_len, int bgn) {
|
||||
if (!Bry_.Has_at_bgn(src, Xoh_href_.Bry__file, bgn, src_len)) return bgn; // does not start with "file://"
|
||||
int pos = bgn + Xoh_href_.Len__file; // skip "file://"
|
||||
Object tid_obj = href_trie.Match_bgn(src, pos, src_len);
|
||||
if (tid_obj == null) {
|
||||
return bgn; // if not a known xowa link, return original bgn;
|
||||
}
|
||||
switch (((Byte_obj_val)tid_obj).Val()) {
|
||||
case Href_tid_site: return pos;
|
||||
case Href_tid_wiki: return pos;
|
||||
case Href_tid_anch: return pos;
|
||||
default: throw Err_.new_unhandled(tid_obj);
|
||||
}
|
||||
}
|
||||
private static final byte Text_tid_none = 0, Text_tid_text = 1, Text_tid_href = 2;
|
||||
private static final byte Href_tid_wiki = 1, Href_tid_site = 2, Href_tid_anch = 3;
|
||||
private static final Btrie_slim_mgr href_trie = Btrie_slim_mgr.cs()
|
||||
.Add_bry_byte(Xoh_href_.Bry__site , Href_tid_site)
|
||||
.Add_bry_byte(Xoh_href_.Bry__wiki , Href_tid_wiki)
|
||||
.Add_bry_byte(Xoh_href_.Bry__anch , Href_tid_anch)
|
||||
;
|
||||
}
|
||||
/*
|
||||
NOTE_1:
|
||||
. swt/mozilla treats text differently in href="{text}" when content_editable=n; occurs in LocationListener.changing
|
||||
http://a.org -> http://a.org does nothing
|
||||
A -> file:///A adds "file:///"
|
||||
/wiki/A -> file:///wiki/A adds "file://"
|
||||
Category:A -> Category:A noops; Category is assumed to be protocol?
|
||||
//en.wiktionary.org/wiki/a -> file:///wiki/a strips out site name and prepends "file://"; no idea why
|
||||
|
||||
. so, to handle the above, the code does the following
|
||||
http://a.org -> http://a.org does nothing; nothing needed
|
||||
A -> /wiki/A always prepend /wiki/
|
||||
Category:A -> /wiki/Category:A always prepend /wiki/
|
||||
//en.wiktionary.org/wiki/A -> /site/en.wiktionary.org/wiki/A always transform relative url to /site/
|
||||
|
||||
. the href will still come here as file:///wiki/A or file:///site/en.wiktionary.org/wiki/A.
|
||||
. however, the file:// can be lopped off and discarded and the rest of the href will fall into one of the following cases
|
||||
.. /wiki/
|
||||
.. /site/
|
||||
.. /xcmd/
|
||||
.. #
|
||||
.. anything else -> assume to be really a file:// url; EX: file://C/dir/fil.txt -> C/dir/fil.txt
|
||||
. the other advantage of this approach is that this proc can be reused outside of swt calls; i.e.: it can parse both "file:///wiki/A" and "/wiki/A"
|
||||
*/
|
||||
package gplx.xowa.htmls.hrefs;
|
||||
|
||||
import gplx.Bry_;
|
||||
import gplx.Byte_ascii;
|
||||
import gplx.Err_;
|
||||
import gplx.String_;
|
||||
import gplx.core.btries.Btrie_slim_mgr;
|
||||
import gplx.core.primitives.Byte_obj_val;
|
||||
import gplx.gfui.kits.swts.Swt_html_utl;
|
||||
|
||||
public class Xoh_href_gui_utl {
|
||||
public static String Html_extract_text(String site, String page, String text_str) {
|
||||
byte[] text_bry = Bry_.new_u8(text_str);
|
||||
int text_len = text_bry.length;
|
||||
int text_tid = Byte_ascii.To_a7_int(text_bry[0]);
|
||||
switch (text_tid) {
|
||||
case Text_tid_none: return ""; // "0"
|
||||
case Text_tid_text: return String_.new_u8(text_bry, 2, text_len); // 2 to skip "1|"
|
||||
case Text_tid_href: break; // fall through to below
|
||||
default: throw Err_.new_unhandled(text_tid);
|
||||
}
|
||||
String href_str = String_.Mid(String_.new_u8(text_bry), 2);
|
||||
href_str = Swt_html_utl.NormalizeSwtUrl(href_str);
|
||||
if (String_.Has_at_bgn(href_str, Xoh_href_.Str__file))
|
||||
href_str = Standardize_xowa_link(href_str); // skip "file://"
|
||||
Byte_obj_val href_tid = (Byte_obj_val)href_trie.Match_bgn(Bry_.new_u8(href_str), 0, href_str.length());
|
||||
if (href_tid != null) {
|
||||
switch (href_tid.Val()) {
|
||||
case Href_tid_wiki: return site + href_str;
|
||||
case Href_tid_site: return String_.Mid(href_str, 6); // +6 to skip "site/"
|
||||
case Href_tid_anch: return site + "/wiki/" + page + href_str;
|
||||
}
|
||||
}
|
||||
return href_str;
|
||||
}
|
||||
public static String Standardize_xowa_link(String str) {
|
||||
byte[] bry = Bry_.new_u8(str);
|
||||
int skip = Skip_start_of_xowa_link(bry, bry.length, 0);
|
||||
return skip == 0 ? str : String_.Mid(str, skip);
|
||||
}
|
||||
private static int Skip_start_of_xowa_link(byte[] src, int src_len, int bgn) {
|
||||
if (!Bry_.Has_at_bgn(src, Xoh_href_.Bry__file, bgn, src_len)) return bgn; // does not start with "file://"
|
||||
int pos = bgn + Xoh_href_.Len__file; // skip "file://"
|
||||
Object tid_obj = href_trie.Match_bgn(src, pos, src_len);
|
||||
if (tid_obj == null) {
|
||||
if (src_len - pos > 0 && src[pos] == Byte_ascii.Slash) { // handle "file:///C:/dir/fil.png"
|
||||
return pos + 1;
|
||||
}
|
||||
else {
|
||||
return bgn; // if not a known xowa link, return original bgn;
|
||||
}
|
||||
}
|
||||
switch (((Byte_obj_val)tid_obj).Val()) {
|
||||
case Href_tid_site: return pos;
|
||||
case Href_tid_wiki: return pos;
|
||||
case Href_tid_anch: return pos;
|
||||
default: throw Err_.new_unhandled(tid_obj);
|
||||
}
|
||||
}
|
||||
private static final byte Text_tid_none = 0, Text_tid_text = 1, Text_tid_href = 2;
|
||||
private static final byte Href_tid_wiki = 1, Href_tid_site = 2, Href_tid_anch = 3;
|
||||
private static final Btrie_slim_mgr href_trie = Btrie_slim_mgr.cs()
|
||||
.Add_bry_byte(Xoh_href_.Bry__site , Href_tid_site)
|
||||
.Add_bry_byte(Xoh_href_.Bry__wiki , Href_tid_wiki)
|
||||
.Add_bry_byte(Xoh_href_.Bry__anch , Href_tid_anch)
|
||||
;
|
||||
}
|
||||
/*
|
||||
NOTE_1:
|
||||
. swt/mozilla treats text differently in href="{text}" when content_editable=n; occurs in LocationListener.changing
|
||||
http://a.org -> http://a.org does nothing
|
||||
A -> file:///A adds "file:///"
|
||||
/wiki/A -> file:///wiki/A adds "file://"
|
||||
Category:A -> Category:A noops; Category is assumed to be protocol?
|
||||
//en.wiktionary.org/wiki/a -> file:///wiki/a strips out site name and prepends "file://"; no idea why
|
||||
|
||||
. so, to handle the above, the code does the following
|
||||
http://a.org -> http://a.org does nothing; nothing needed
|
||||
A -> /wiki/A always prepend /wiki/
|
||||
Category:A -> /wiki/Category:A always prepend /wiki/
|
||||
//en.wiktionary.org/wiki/A -> /site/en.wiktionary.org/wiki/A always transform relative url to /site/
|
||||
|
||||
. the href will still come here as file:///wiki/A or file:///site/en.wiktionary.org/wiki/A.
|
||||
. however, the file:// can be lopped off and discarded and the rest of the href will fall into one of the following cases
|
||||
.. /wiki/
|
||||
.. /site/
|
||||
.. /xcmd/
|
||||
.. #
|
||||
.. anything else -> assume to be really a file:// url; EX: file://C/dir/fil.txt -> C/dir/fil.txt
|
||||
. the other advantage of this approach is that this proc can be reused outside of swt calls; i.e.: it can parse both "file:///wiki/A" and "/wiki/A"
|
||||
*/
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
Copyright (C) 2012-2021 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
@ -13,54 +13,66 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.htmls.hrefs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*;
|
||||
import org.junit.*;
|
||||
import gplx.core.primitives.*; import gplx.xowa.htmls.hrefs.*; import gplx.xowa.guis.views.*;
|
||||
public class Xoh_href_gui_utl_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoh_href_gui_utl_fxt fxt = new Xoh_href_gui_utl_fxt();
|
||||
@Test public void Extract_href__text() {
|
||||
fxt.Test_extract_href("0|" , "");
|
||||
fxt.Test_extract_href("1|selected_text" , "selected_text");
|
||||
fxt.Test_extract_href("2|http://a.org" , "http://a.org");
|
||||
}
|
||||
@Test public void Extract_href__file() {
|
||||
fxt.Test_extract_href("2|file:///site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1");
|
||||
fxt.Test_extract_href("2|file:///wiki/Page_2" , "en.wikipedia.org/wiki/Page_2");
|
||||
fxt.Test_extract_href("2|file://#anchor" , "en.wikipedia.org/wiki/Page_0#anchor");
|
||||
}
|
||||
@Test public void Extract_href__internal() {
|
||||
fxt.Test_extract_href("2|/site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1");
|
||||
fxt.Test_extract_href("2|/wiki/Page_2" , "en.wikipedia.org/wiki/Page_2");
|
||||
fxt.Test_extract_href("2|#anchor" , "en.wikipedia.org/wiki/Page_0#anchor");
|
||||
}
|
||||
@Test public void Html_window_vpos_parse() {
|
||||
fxt.Test_Html_window_vpos_parse("0|0,1,2", "0", "'0','1','2'");
|
||||
fxt.Test_Html_window_vpos_parse("org.eclipse.swt.SWTException: Permission denied for <file://> to get property Selection.rangeCount", null, null); // check that invalid path doesn't fail; DATE:2014-04-05
|
||||
}
|
||||
@Test public void Standardize_xowa_link() {
|
||||
fxt.Test_standardize_xowa_link("file:///site/en.wikipedia.org/wiki/A" , "/site/en.wikipedia.org/wiki/A");
|
||||
fxt.Test_standardize_xowa_link("file:///wiki/A" , "/wiki/A");
|
||||
fxt.Test_standardize_xowa_link("file://#A" , "#A");
|
||||
}
|
||||
}
|
||||
class Xoh_href_gui_utl_fxt {
|
||||
public void Clear() {
|
||||
cur_wiki = "en.wikipedia.org";
|
||||
cur_page = "Page_0";
|
||||
}
|
||||
public String Cur_wiki() {return cur_wiki;} public Xoh_href_gui_utl_fxt Cur_wiki_(String v) {cur_wiki = v; return this;} private String cur_wiki;
|
||||
public String Cur_page() {return cur_page;} public Xoh_href_gui_utl_fxt Cur_page_(String v) {cur_page = v; return this;} private String cur_page;
|
||||
public void Test_extract_href(String text_str, String expd) {
|
||||
Tfds.Eq(expd, Xoh_href_gui_utl.Html_extract_text(cur_wiki, cur_page, text_str));
|
||||
}
|
||||
private String_obj_ref scroll_top = String_obj_ref.null_(), node_path = String_obj_ref.null_();
|
||||
public void Test_Html_window_vpos_parse(String raw, String expd_scroll_top, String expd_node_path) {
|
||||
scroll_top.Val_null_(); node_path.Val_null_();
|
||||
Xog_html_itm.Html_window_vpos_parse(raw, scroll_top, node_path);
|
||||
Tfds.Eq(expd_scroll_top, scroll_top.Val(), expd_scroll_top);
|
||||
Tfds.Eq(expd_node_path, node_path.Val(), expd_node_path);
|
||||
}
|
||||
public void Test_standardize_xowa_link(String raw, String expd) {
|
||||
Tfds.Eq_str(expd, Xoh_href_gui_utl.Standardize_xowa_link(raw), "standardize");
|
||||
}
|
||||
}
|
||||
package gplx.xowa.htmls.hrefs;
|
||||
|
||||
import gplx.Tfds;
|
||||
import gplx.core.primitives.String_obj_ref;
|
||||
import gplx.xowa.guis.views.Xog_html_itm;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
public class Xoh_href_gui_utl_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoh_href_gui_utl_fxt fxt = new Xoh_href_gui_utl_fxt();
|
||||
@Test public void Extract_href__text() {
|
||||
fxt.Test_extract_text("0|" , "");
|
||||
fxt.Test_extract_text("1|selected_text" , "selected_text");
|
||||
fxt.Test_extract_text("2|http://a.org" , "http://a.org");
|
||||
}
|
||||
@Test public void Extract_href__file() {
|
||||
fxt.Test_extract_text("2|file:///site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1");
|
||||
fxt.Test_extract_text("2|file:///wiki/Page_2" , "en.wikipedia.org/wiki/Page_2");
|
||||
fxt.Test_extract_text("2|file://#anchor" , "en.wikipedia.org/wiki/Page_0#anchor");
|
||||
}
|
||||
@Test public void Extract_href__internal() {
|
||||
fxt.Test_extract_text("2|/site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1");
|
||||
fxt.Test_extract_text("2|/wiki/Page_2" , "en.wikipedia.org/wiki/Page_2");
|
||||
fxt.Test_extract_text("2|#anchor" , "en.wikipedia.org/wiki/Page_0#anchor");
|
||||
}
|
||||
@Test public void Html_window_vpos_parse() {
|
||||
fxt.Test_Html_window_vpos_parse("0|0,1,2", "0", "'0','1','2'");
|
||||
fxt.Test_Html_window_vpos_parse("org.eclipse.swt.SWTException: Permission denied for <file://> to get property Selection.rangeCount", null, null); // check that invalid path doesn't fail; DATE:2014-04-05
|
||||
}
|
||||
@Test public void Standardize_xowa_link() {
|
||||
fxt.Test_standardize_xowa_link("file:///site/en.wikipedia.org/wiki/A" , "/site/en.wikipedia.org/wiki/A");
|
||||
fxt.Test_standardize_xowa_link("file:///wiki/A" , "/wiki/A");
|
||||
fxt.Test_standardize_xowa_link("file://#A" , "#A");
|
||||
fxt.Test_standardize_xowa_link("file:///C:/dir/fil.png" , "C:/dir/fil.png");
|
||||
}
|
||||
@Test public void Swt() { // 2021-01-03|ISSUE#:823|Copy fails for links `about:/wiki/PAGE_NAME` or `about:/site/WIKI_NAME/wiki/PAGE_NAME`
|
||||
fxt.Test_extract_text("2|about:/site/en.wiktionary.org/wiki/Page_1" , "en.wiktionary.org/wiki/Page_1");
|
||||
fxt.Test_extract_text("2|about:/wiki/Page_2" , "en.wikipedia.org/wiki/Page_2");
|
||||
fxt.Test_extract_text("2|about:#anchor" , "en.wikipedia.org/wiki/Page_0#anchor");
|
||||
fxt.Test_extract_text("2|about:file:///C:/dir/fil.png" , "C:/dir/fil.png");
|
||||
}
|
||||
}
|
||||
class Xoh_href_gui_utl_fxt {
|
||||
public void Clear() {
|
||||
cur_wiki = "en.wikipedia.org";
|
||||
cur_page = "Page_0";
|
||||
}
|
||||
public String Cur_wiki() {return cur_wiki;} public Xoh_href_gui_utl_fxt Cur_wiki_(String v) {cur_wiki = v; return this;} private String cur_wiki;
|
||||
public String Cur_page() {return cur_page;} public Xoh_href_gui_utl_fxt Cur_page_(String v) {cur_page = v; return this;} private String cur_page;
|
||||
public void Test_extract_text(String text_str, String expd) {
|
||||
Tfds.Eq(expd, Xoh_href_gui_utl.Html_extract_text(cur_wiki, cur_page, text_str));
|
||||
}
|
||||
private String_obj_ref scroll_top = String_obj_ref.null_(), node_path = String_obj_ref.null_();
|
||||
public void Test_Html_window_vpos_parse(String raw, String expd_scroll_top, String expd_node_path) {
|
||||
scroll_top.Val_null_(); node_path.Val_null_();
|
||||
Xog_html_itm.Html_window_vpos_parse(raw, scroll_top, node_path);
|
||||
Tfds.Eq(expd_scroll_top, scroll_top.Val(), expd_scroll_top);
|
||||
Tfds.Eq(expd_node_path, node_path.Val(), expd_node_path);
|
||||
}
|
||||
public void Test_standardize_xowa_link(String raw, String expd) {
|
||||
Tfds.Eq_str(expd, Xoh_href_gui_utl.Standardize_xowa_link(raw), "standardize");
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user