mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
HTML Databases: Make plain-text parser thread-safe; Simplify code; Fix gallery not working [#320]
This commit is contained in:
@@ -16,7 +16,6 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Gfh_doc_parser {
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
private final Gfh_txt_wkr txt_wkr;
|
||||
public Gfh_doc_parser(Gfh_txt_wkr txt_wkr, Gfh_doc_wkr... wkr_ary) {
|
||||
@@ -27,6 +26,7 @@ public class Gfh_doc_parser {
|
||||
public void Parse(byte[] page_url, byte[] src, int src_bgn, int src_end) {
|
||||
int txt_bgn = -1;
|
||||
int pos = src_bgn;
|
||||
Btrie_rv trv = new Btrie_rv();
|
||||
while (pos < src_end) {
|
||||
Object o = trie.Match_at(trv, src, pos, src_end);
|
||||
if (o == null) { // not a known hook; add to txt
|
||||
|
||||
@@ -26,10 +26,11 @@ public class Gfh_tag_rdr {
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public Bry_err_wkr Err_wkr() {return err_wkr;} private final Bry_err_wkr err_wkr = new Bry_err_wkr();
|
||||
public Gfh_tag_rdr Reg(String tag_name, int tag_id) {name_hash.Add_str_int(tag_name, tag_id); return this;}
|
||||
public void Init(byte[] ctx_name, byte[] src, int src_bgn, int src_end) {
|
||||
public Gfh_tag_rdr Init(byte[] ctx_name, byte[] src, int src_bgn, int src_end) {
|
||||
this.src = src; this.pos = src_bgn; this.src_end = src_end;
|
||||
tag__eos.Init(this, src, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Gfh_tag_.Id__eos, Bry_.Empty);
|
||||
err_wkr.Init_by_page(String_.new_u8(ctx_name), src);
|
||||
return this;
|
||||
}
|
||||
public void Src_rng_(int src_bgn, int src_end) {
|
||||
this.pos = src_bgn; this.src_end = src_end;
|
||||
|
||||
Reference in New Issue
Block a user