1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

HTML Databases: Make plain-text parser thread-safe; Simplify code; Fix gallery not working [#320]

This commit is contained in:
gnosygnu
2019-01-06 21:27:33 -05:00
parent 2b4320b302
commit 42d15b726c
18 changed files with 91 additions and 137 deletions

View File

@@ -16,7 +16,6 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.core.btries.*;
public class Gfh_doc_parser {
private final Btrie_rv trv = new Btrie_rv();
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
private final Gfh_txt_wkr txt_wkr;
public Gfh_doc_parser(Gfh_txt_wkr txt_wkr, Gfh_doc_wkr... wkr_ary) {
@@ -27,6 +26,7 @@ public class Gfh_doc_parser {
public void Parse(byte[] page_url, byte[] src, int src_bgn, int src_end) {
int txt_bgn = -1;
int pos = src_bgn;
Btrie_rv trv = new Btrie_rv();
while (pos < src_end) {
Object o = trie.Match_at(trv, src, pos, src_end);
if (o == null) { // not a known hook; add to txt

View File

@@ -26,10 +26,11 @@ public class Gfh_tag_rdr {
public int Src_end() {return src_end;} private int src_end;
public Bry_err_wkr Err_wkr() {return err_wkr;} private final Bry_err_wkr err_wkr = new Bry_err_wkr();
public Gfh_tag_rdr Reg(String tag_name, int tag_id) {name_hash.Add_str_int(tag_name, tag_id); return this;}
public void Init(byte[] ctx_name, byte[] src, int src_bgn, int src_end) {
public Gfh_tag_rdr Init(byte[] ctx_name, byte[] src, int src_bgn, int src_end) {
this.src = src; this.pos = src_bgn; this.src_end = src_end;
tag__eos.Init(this, src, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Gfh_tag_.Id__eos, Bry_.Empty);
err_wkr.Init_by_page(String_.new_u8(ctx_name), src);
return this;
}
public void Src_rng_(int src_bgn, int src_end) {
this.pos = src_bgn; this.src_end = src_end;