1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

HTTP_Server: Add initial implementation for post-processing HTML to change to /fsys/ etc [#524]

This commit is contained in:
gnosygnu
2019-08-03 10:00:40 -04:00
parent a51906bf4b
commit a1da02a99f
16 changed files with 416 additions and 14 deletions

View File

@@ -19,7 +19,7 @@ import gplx.core.primitives.*; import gplx.core.net.*; import gplx.langs.htmls.e
import gplx.xowa.apps.*;
import gplx.xowa.htmls.js.*;
import gplx.xowa.wikis.pages.*;
class Http_server_wkr implements Gfo_invk {
public class Http_server_wkr implements Gfo_invk {
private final int uid;
private final Http_server_mgr server_mgr;
private final Http_server_wtr server_wtr;
@@ -150,9 +150,12 @@ class Http_server_wkr implements Gfo_invk {
return this;
} public static final String Invk_run = "run";
private static final byte[]
Url__home = Bry_.new_a7("/"), Url__fsys = Bry_.new_a7("/fsys/")
Url__home = Bry_.new_a7("/")
, Url__exec = Bry_.new_a7("/exec/"), Url__exec_2 = Bry_.new_a7("/xowa-cmd:")
;
public static final byte[]
Url__fsys = Bry_.new_a7("/fsys/")
;
private static final int Url__fsys_len = Url__fsys.length;
}
class Xosrv_http_wkr_ {

View File

@@ -0,0 +1,138 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.apps.servers.http.hdocs; import gplx.*; import gplx.xowa.*; import gplx.xowa.apps.*; import gplx.xowa.apps.servers.*; import gplx.xowa.apps.servers.http.*;
import gplx.core.btries.*; import gplx.core.primitives.*; import gplx.core.net.*;
import gplx.langs.htmls.docs.*; import gplx.xowa.htmls.hrefs.*;
import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*; import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*;
import gplx.xowa.htmls.core.wkrs.addons.forms.*;
class Xoh_hdoc_wkr__http_server implements Xoh_hdoc_wkr {
private Bry_bfr bfr; private Xoh_page hpg; private byte[] src;
private byte[] root_http_dir; // EX: file:///C:/xowa/
public void On_page_bgn(Bry_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
this.bfr = bfr; this.hpg = hpg; this.src = src;
if (root_http_dir == null) {
this.root_http_dir = hpg.Wiki().App().Fsys_mgr().Http_root().To_http_file_bry();
href_trie.Add_bry_byte(root_http_dir, Tid__fsys);
}
}
public void On_page_end() {}
public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
public void On_escape (gplx.xowa.htmls.core.wkrs.escapes.Xoh_escape_data data) {bfr.Add(data.Hook());}
public void On_xnde (gplx.xowa.htmls.core.wkrs.xndes.Xoh_xnde_parser data) {bfr.Add_mid(src, data.Src_bgn(), data.Src_end());}
public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_data data) {
// get atr (with null checks)
Xoh_anch_href_data href_itm = data.Href_itm();
if (href_itm == null) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "lnki missing href; page=~{0} src=~{1}", hpg.Url_bry_safe(), Bry_.Mid(src, data.Src_bgn(), data.Src_end()));
return;
}
Add_href(data.Src_bgn(), data.Src_end(), href_itm.Atr().Val_bgn(), href_itm.Atr().Val_end());
}
public boolean On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_data data) {
// NOTE: not parsing thm b/c Xoh_thm_data does not expose the <a> in the magnify div
// In addition, On_thm would also need to parse any <a> or <img> in the caption
return false;
}
public void On_gly(gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_data data) {
bfr.Add_mid(src, data.Src_bgn(), data.Src_end());
}
public boolean Process_parse(Xoh_data_itm data) {
switch (data.Tid()) {
case Xoh_hzip_dict_.Tid__img: {
Xoh_img_data img_data = (Xoh_img_data)data;
Xoh_anch_href_data anch_href = img_data.Anch_href();
Add_href(data.Src_bgn(), anch_href.Rng_end(), anch_href.Atr().Val_bgn(), anch_href.Atr().Val_end());
Add_href(anch_href.Rng_end(), data.Src_end(), img_data.Img_src().Src_bgn(), img_data.Img_src().Src_end());
return true;
}
// case Xoh_hzip_dict_.Tid__img_bare:
// case Xoh_hzip_dict_.Tid__media:
// Gfo_usr_dlg_.Instance.Warn_many("", "", "htxt_wkr does not support media; url=~{0}", hpg.Url_bry_safe());
// return false;
case Xoh_hzip_dict_.Tid__form:
Xoh_form_data form_data = (Xoh_form_data)data;
Gfh_atr action_atr = form_data.Action_atr();
bfr.Add_mid(src, form_data.Src_bgn(), action_atr.Val_bgn());
if (Bry_.Match(src, action_atr.Val_bgn(), action_atr.Val_bgn() + Xoh_href_.Bry__wiki.length, Xoh_href_.Bry__wiki)) {
bfr.Add_byte_slash().Add(hpg.Wiki().Domain_bry());
}
bfr.Add_mid(src, action_atr.Val_bgn(), form_data.Src_end());
return true;
case Xoh_hzip_dict_.Tid__hdr:
case Xoh_hzip_dict_.Tid__toc:
case Xoh_hzip_dict_.Tid__lnke:
default:
bfr.Add_mid(src, data.Src_bgn(), data.Src_end());
break;
}
return true;
}
private void Add_href(int itm_bgn, int itm_end, int href_bgn, int href_end) {
// add everything up to href_bgn
bfr.Add_mid(src, itm_bgn, href_bgn);
// now "fix" href
Btrie_rv trv = new Btrie_rv();
Object tid_obj = href_trie.Match_at(trv, src, href_bgn, href_end);
if (tid_obj != null) {
byte tid = ((Byte_obj_val)tid_obj).Val();
switch (tid) {
case Tid__wiki:
bfr.Add_byte_slash().Add(hpg.Wiki().Domain_bry());
break;
case Tid__xcmd:
bfr.Add_str_a7("/exec/");
href_bgn = trv.Pos();
break;
case Tid__site:
href_bgn = trv.Pos();
break;
case Tid__fsys:
bfr.Add(Http_server_wkr.Url__fsys);
href_bgn = trv.Pos();
break;
case Tid__fsys_bug:
bfr.Add(Http_server_wkr.Url__fsys);
href_bgn = trv.Pos() - 5; // 5 = "file/".length
break;
default:
throw Err_.new_unhandled_default(tid);
}
}
// add remainder of href_val
bfr.Add_mid(src, href_bgn, href_end);
// add everything after href
bfr.Add_mid(src, href_end, itm_end);
}
public static final byte[] Path_lnxusr_xowa_file = Bry_.new_a7("file:////home/lnxusr/xowa/file/");
private static final byte[] Bry__site = Bry_.new_a7("/site");
private static final byte
Tid__wiki = 1
, Tid__xcmd = 2
, Tid__site = 3
, Tid__fsys = 4
, Tid__fsys_bug = 5
;
private static final Btrie_slim_mgr href_trie = Btrie_slim_mgr.ci_u8()
.Add_bry_byte(Xoh_href_.Bry__wiki , Tid__wiki)
.Add_bry_byte(Gfo_protocol_itm.Bry_xcmd , Tid__xcmd)
.Add_bry_byte(Bry__site , Tid__site)
.Add_bry_byte(Path_lnxusr_xowa_file , Tid__fsys_bug)
;
}

View File

@@ -0,0 +1,192 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.apps.servers.http.hdocs; import gplx.*; import gplx.xowa.*; import gplx.xowa.apps.*; import gplx.xowa.apps.servers.*; import gplx.xowa.apps.servers.http.*;
import org.junit.*; import gplx.core.tests.*;
import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.wkrs.*;
import gplx.xowa.htmls.core.makes.*;
public class Xoh_hdoc_wkr__http_server__tst {
private final Xoh_hdoc_wkr__http_server__fxt fxt = new Xoh_hdoc_wkr__http_server__fxt();
@Before public void init() {Gfo_usr_dlg_.Instance = Gfo_usr_dlg_.Test_console();}
@After public void term() {Gfo_usr_dlg_.Instance = Gfo_usr_dlg_.Noop;}
@Test public void Wiki_quot() {
fxt.Test
( "<a id=\"id1\" href=\"/wiki/A\">abc</a>"
, "<a id=\"id1\" href=\"/en.wikipedia.org/wiki/A\">abc</a>"
);
}
@Test public void Wiki_apos() {
fxt.Test
( "<a id=\"id1\" href='/wiki/A'>abc</a>"
, "<a id=\"id1\" href='/en.wikipedia.org/wiki/A'>abc</a>"
);
}
// @Test public void Lnki_caption() {
// fxt.Test
// ( "<a id=\"id1\" href='/wiki/A'>abc <a href='/wiki/B'</a>lmn</a> xyz</a>"
// , "<a id=\"id1\" href='/en.wikipedia.org/wiki/A'>abc <a href='/en.wikipedia.org/wiki/B'</a>lmn</a> xyz</a>"
// );
// }
@Test public void Xcmd() {
fxt.Test
( "<a id=\"id1\" href=\"xowa-cmd:a\">abc</a>"
, "<a id=\"id1\" href=\"/exec/a\">abc</a>"
);
}
@Test public void Site() {
fxt.Test
( "<a id=\"id1\" href=\"/site/en.wikipedia.org/wiki/Special:Search/A\">abc</a>"
, "<a id=\"id1\" href=\"/en.wikipedia.org/wiki/Special:Search/A\">abc</a>"
);
}
@Test public void Action() { // NOTE: also used by Special:ItemByTitle; EX: "<form method=\"get\" action=\"//www.wikidata.org/wiki/Special:ItemByTitle\" name=\"itembytitle\" id=\"wb-itembytitle-form1\">"
fxt.Test
( "<form id=\"searchform\" action=\"/wiki/SearchUrl\">abc</form>"
, "<form id=\"searchform\" action=\"/en.wikipedia.org/wiki/SearchUrl\">abc</form>"
);
}
@Test public void Fsys() {
fxt.Test
( "<a id=\"id1\" href=\"file:///mem/xowa/file/A.png\">abc</a>"
, "<a id=\"id1\" href=\"/fsys/file/A.png\">abc</a>"
);
}
@Test public void Fsys_bug() { // 2019-05 enwiki embedded build machine's path
fxt.Test
( "<a id=\"id1\" href=\"file:////home/lnxusr/xowa/file/A.png\">abc</a>"
, "<a id=\"id1\" href=\"/fsys/file/A.png\">abc</a>"
);
}
@Test public void Fsys_img() {
fxt.Test
( "<a href='/wiki/File:A.jpg' class='image'><img src=\"file:///mem/xowa/file/commons.wikimedia.org/thumb/7/0/A.png/128px.png\">abc</img></a>"
, "<a href='/en.wikipedia.org/wiki/File:A.jpg' class='image'><img src=\"/fsys/file/commons.wikimedia.org/thumb/7/0/A.png/128px.png\">abc</img></a>"
);
}
@Test public void Fsys_div() {
fxt.Test
( String_.Concat_lines_nl
( "<div class='thumb tleft'>"
, " <div id='xowa_file_div_2' class='thumbinner' style='width:128px;'>"
, " <a href='/wiki/File:A.jpg' class='image' xowa_title='A.jpg'><img id='xoimg_2' alt='' src='file:///mem/xowa/file/commons.wikimedia.org/thumb/7/0/A.png/128px.png' width='128' height='100' class='thumbimage'></a> "
, " <div class='thumbcaption'>"
, " <div class='magnify'>"
, " <a href='/wiki/File:A.jpg' class='@gplx.Internal protected' title='Enlarge'></a>"
, " </div>"
, " thumb_caption"
, " </div>"
, " </div>"
, "</div>"
)
, String_.Concat_lines_nl
( "<div class='thumb tleft'>"
, " <div id='xowa_file_div_2' class='thumbinner' style='width:128px;'>"
, " <a href='/en.wikipedia.org/wiki/File:A.jpg' class='image' xowa_title='A.jpg'><img id='xoimg_2' alt='' src='/fsys/file/commons.wikimedia.org/thumb/7/0/A.png/128px.png' width='128' height='100' class='thumbimage'></a> "
, " <div class='thumbcaption'>"
, " <div class='magnify'>"
, " <a href='/en.wikipedia.org/wiki/File:A.jpg' class='@gplx.Internal protected' title='Enlarge'></a>"
, " </div>"
, " thumb_caption"
, " </div>"
, " </div>"
, "</div>"
)
);
}
@Test public void Fsys_gallery() {
fxt.Test
( String_.Concat_lines_nl_skip_last
( "<ul id=\"xowa_gallery_ul_0\" class=\"gallery mw-gallery-traditional\">"
, " <li id=\"xowa_gallery_li_0\" class=\"gallerybox\" style=\"width:235px;\">"
, " <div style=\"width:235px;\">"
, " <div class=\"thumb\" style=\"width:230px;\">"
, " <div style=\"margin:15px auto;\">"
, " <a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xoimg_0\" alt=\"\" src=\"file:///mem/xowa/file/commons.wikimedia.org/thumb/7/0/A.png/200px.png\" width=\"200\" height=\"300\" /></a>"
, " </div>"
, " </div>"
, " <div class=\"gallerytext\"><p><i>a1</i>"
, "</p>"
, ""
, " </div>"
, " </div>"
, " </li>"
, " <li id=\"xowa_gallery_li_1\" class=\"gallerybox\" style=\"width:235px;\">"
, " <div style=\"width:235px;\">"
, " <div class=\"thumb\" style=\"width:230px;\">"
, " <div style=\"margin:15px auto;\">"
, " <a href=\"/wiki/File:B.png\" class=\"image\" xowa_title=\"B.png\"><img id=\"xoimg_1\" alt=\"\" src=\"file:///mem/xowa/file/commons.wikimedia.org/thumb/5/7/B.png/200px.png\" width=\"200\" height=\"300\" /></a>"
, " </div>"
, " </div>"
, " <div class=\"gallerytext\"><p><i>b1</i>"
, "</p>"
, ""
, " </div>"
, " </div>"
, " </li>"
, "</ul>"
)
, String_.Concat_lines_nl_skip_last
( "<ul id=\"xowa_gallery_ul_0\" class=\"gallery mw-gallery-traditional\">"
, " <li id=\"xowa_gallery_li_0\" class=\"gallerybox\" style=\"width:235px;\">"
, " <div style=\"width:235px;\">"
, " <div class=\"thumb\" style=\"width:230px;\">"
, " <div style=\"margin:15px auto;\">"
, " <a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xoimg_0\" alt=\"\" src=\"file:///mem/xowa/file/commons.wikimedia.org/thumb/7/0/A.png/200px.png\" width=\"200\" height=\"300\" /></a>"
, " </div>"
, " </div>"
, " <div class=\"gallerytext\"><p><i>a1</i>"
, "</p>"
, ""
, " </div>"
, " </div>"
, " </li>"
, " <li id=\"xowa_gallery_li_1\" class=\"gallerybox\" style=\"width:235px;\">"
, " <div style=\"width:235px;\">"
, " <div class=\"thumb\" style=\"width:230px;\">"
, " <div style=\"margin:15px auto;\">"
, " <a href=\"/wiki/File:B.png\" class=\"image\" xowa_title=\"B.png\"><img id=\"xoimg_1\" alt=\"\" src=\"file:///mem/xowa/file/commons.wikimedia.org/thumb/5/7/B.png/200px.png\" width=\"200\" height=\"300\" /></a>"
, " </div>"
, " </div>"
, " <div class=\"gallerytext\"><p><i>b1</i>"
, "</p>"
, ""
, " </div>"
, " </div>"
, " </li>"
, "</ul>"
)
);
}
}
class Xoh_hdoc_wkr__http_server__fxt {
private final Xow_wiki wiki;
private final Xoh_page hpg;
private final Xoh_make_mgr hdoc_mgr = Xoh_make_mgr.New(new Xoh_hdoc_wkr__http_server());
public Xoh_hdoc_wkr__http_server__fxt() {
Xop_fxt fxt = Xop_fxt.New_app_html();
this.wiki = fxt.Wiki();
int page_id = 123;
byte[] page_ttl_bry = Bry_.new_u8("Test_Page");
Xoa_url page_url = Xoa_url.New(wiki.Domain_bry(), page_ttl_bry);
Xoa_ttl page_ttl = wiki.Ttl_parse(page_ttl_bry);
this.hpg = new Xoh_page();
hpg.Ctor_by_hview(wiki, page_url, page_ttl, page_id);
}
public void Test(String src_str, String expd) {
byte[] src = Bry_.new_u8(src_str);
byte[] actl = hdoc_mgr.Parse(src, wiki, hpg);
Gftest.Eq__ary__lines(expd, String_.new_u8(actl));
}
}