mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Embeddable: Fix if_exists
This commit is contained in:
@@ -1,61 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public class Gfh_class_ {
|
||||
public static boolean Has(byte[] src, int src_bgn, int src_end, byte[] cls) {
|
||||
int cls_bgn = src_bgn;
|
||||
int pos = src_bgn;
|
||||
while (true) {
|
||||
boolean pos_is_last = pos == src_end;
|
||||
byte b = pos_is_last ? Byte_ascii.Space : src[pos];
|
||||
if (b == Byte_ascii.Space) {
|
||||
if (cls_bgn != -1) {
|
||||
if (Bry_.Match(src, cls_bgn, pos, cls))return true;
|
||||
cls_bgn = -1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (cls_bgn == -1) cls_bgn = pos;
|
||||
}
|
||||
if (pos_is_last) break;
|
||||
++pos;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
public static byte Find_1st(byte[] src, int src_bgn, int src_end, Hash_adp_bry hash) {
|
||||
int cls_bgn = src_bgn;
|
||||
int pos = src_bgn;
|
||||
while (true) {
|
||||
boolean pos_is_last = pos == src_end;
|
||||
byte b = pos_is_last ? Byte_ascii.Space : src[pos];
|
||||
if (b == Byte_ascii.Space) {
|
||||
if (cls_bgn != -1) {
|
||||
byte rv = hash.Get_as_byte_or(src, cls_bgn, pos, Byte_.Max_value_127);
|
||||
if (rv != Byte_.Max_value_127) return rv;
|
||||
cls_bgn = -1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (cls_bgn == -1) cls_bgn = pos;
|
||||
}
|
||||
if (pos_is_last) break;
|
||||
++pos;
|
||||
}
|
||||
return Byte_.Max_value_127;
|
||||
}
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import org.junit.*;
|
||||
public class Gfh_class__tst {
|
||||
private final Gfh_class__fxt fxt = new Gfh_class__fxt();
|
||||
@Test public void Has() {
|
||||
fxt.Test__has__y("a b c", "a", "b", "c");
|
||||
fxt.Test__has__n("a b c", "d");
|
||||
fxt.Test__has__n("ab", "a");
|
||||
}
|
||||
@Test public void Cls__has__hash() {
|
||||
Hash_adp_bry hash = fxt.Make_hash("x", "y", "z");
|
||||
fxt.Test__find_1st(hash, 0, "x");
|
||||
fxt.Test__find_1st(hash, 2, "z");
|
||||
fxt.Test__find_1st(hash, 0, "a x b");
|
||||
fxt.Test__find_1st(hash, 0, "a b x");
|
||||
fxt.Test__find_1st(hash, Byte_.Max_value_127, "a");
|
||||
fxt.Test__find_1st(hash, Byte_.Max_value_127, "xyz");
|
||||
}
|
||||
}
|
||||
class Gfh_class__fxt {
|
||||
public void Test__has__y(String src, String... ary) {Test__has(Bool_.Y, src, ary);}
|
||||
public void Test__has__n(String src, String... ary) {Test__has(Bool_.N, src, ary);}
|
||||
public void Test__has(boolean expd, String src, String... ary) {
|
||||
byte[] src_bry = Bry_.new_u8(src);
|
||||
for (String itm : ary) {
|
||||
byte[] itm_bry = Bry_.new_u8(itm);
|
||||
Tfds.Eq_bool(expd, Gfh_class_.Has(src_bry, 0, src_bry.length, itm_bry), itm);
|
||||
}
|
||||
}
|
||||
public Hash_adp_bry Make_hash(String... ary) {
|
||||
Hash_adp_bry rv = Hash_adp_bry.ci_a7();
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; ++i)
|
||||
rv.Add_bry_byte(Bry_.new_u8(ary[i]), (byte)i);
|
||||
return rv;
|
||||
}
|
||||
public void Test__find_1st(Hash_adp_bry hash, int expd, String src) {
|
||||
byte[] src_bry = Bry_.new_u8(src);
|
||||
Tfds.Eq_byte((byte)expd, Gfh_class_.Find_1st(src_bry, 0, src_bry.length, hash), src);
|
||||
}
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.langs.htmls.docs.*;
|
||||
public class Gfh_class_parser_ {
|
||||
public static void Parse(Gfh_tag tag, Gfh_class_parser_wkr wkr) {
|
||||
Gfh_atr atr = tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__class);
|
||||
if (atr.Val_dat_exists())
|
||||
Parse(tag.Src(), atr.Val_bgn(), atr.Val_end(), wkr);
|
||||
}
|
||||
public static void Parse(byte[] src, int src_bgn, int src_end, Gfh_class_parser_wkr wkr) {
|
||||
int atr_idx = 0, tmp_bgn = -1, tmp_end = -1;
|
||||
int pos = src_bgn;
|
||||
while (true) {
|
||||
boolean pos_is_last = pos == src_end;
|
||||
byte b = pos_is_last ? Byte_ascii.Space : src[pos];
|
||||
switch (b) {
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
if (tmp_bgn != -1) { // ignore empty atrs
|
||||
if (!wkr.On_cls(src, atr_idx, src_bgn, src_end, tmp_bgn, tmp_end))
|
||||
pos_is_last = true;
|
||||
}
|
||||
++atr_idx; tmp_bgn = -1; tmp_end = -1;
|
||||
break;
|
||||
default:
|
||||
if (tmp_bgn == -1) tmp_bgn = pos;
|
||||
tmp_end = pos + 1;
|
||||
break;
|
||||
}
|
||||
if (pos_is_last) break;
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import org.junit.*;
|
||||
public class Gfh_class_parser__tst {
|
||||
private final Gfh_class_parser__fxt fxt = new Gfh_class_parser__fxt();
|
||||
@Test public void Basic() {fxt.Test__parse("v1" , "v1");}
|
||||
@Test public void Many() {fxt.Test__parse("v1 v2" , "v1", "v2");}
|
||||
}
|
||||
class Gfh_class_parser__fxt {
|
||||
private final Gfh_class_wkr__list wkr = new Gfh_class_wkr__list();
|
||||
public void Test__parse(String src_str, String... expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
String[] actl = wkr.Parse(src_bry, 0, src_bry.length);
|
||||
Tfds.Eq_ary_str(expd, actl);
|
||||
}
|
||||
}
|
||||
class Gfh_class_wkr__list implements Gfh_class_parser_wkr {
|
||||
private final List_adp list = List_adp_.New();
|
||||
public boolean On_cls(byte[] src, int atr_idx, int atr_bgn, int atr_end, int val_bgn, int val_end) {
|
||||
String s = String_.new_u8(src, val_bgn, val_end);
|
||||
list.Add(s); //
|
||||
return true;
|
||||
}
|
||||
public String[] Parse(byte[] src, int src_bgn, int src_end) {
|
||||
Gfh_class_parser_.Parse(src, src_bgn, src_end, this);
|
||||
return (String[])list.To_ary_and_clear(String.class);
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public interface Gfh_class_parser_wkr {
|
||||
boolean On_cls(byte[] src, int atr_idx, int atr_bgn, int atr_end, int val_bgn, int val_end);
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public class Gfh_atr implements gplx.core.brys.Bfr_arg {
|
||||
public Gfh_atr(int idx, int atr_bgn, int atr_end, byte[] key, byte[] val, byte[] src, int val_bgn, int val_end) {
|
||||
this.idx = idx; this.atr_bgn = atr_bgn; this.atr_end = atr_end; this.key = key; this.val = val;
|
||||
this.src = src; this.val_bgn = val_bgn; this.val_end = val_end;
|
||||
}
|
||||
public byte[] Src() {return src;} private final byte[] src;
|
||||
public int Idx() {return idx;} private final int idx;
|
||||
public int Atr_bgn() {return atr_bgn;} private final int atr_bgn;
|
||||
public int Atr_end() {return atr_end;} private final int atr_end;
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public int Val_bgn() {return val_bgn;} private final int val_bgn;
|
||||
public int Val_end() {return val_end;} private final int val_end;
|
||||
public boolean Val_dat_exists() {return val_end != -1;}
|
||||
public boolean Val_dat_missing() {return val_end == -1;}
|
||||
public byte[] Val() {
|
||||
if (val == null)
|
||||
val = Bry_.Mid(src, val_bgn, val_end);
|
||||
return val;
|
||||
} private byte[] val;
|
||||
public void Html__add(Bry_bfr bfr) {
|
||||
if (val_end > val_bgn)
|
||||
bfr.Add_mid(src, val_bgn, val_end);
|
||||
}
|
||||
public void Bfr_arg__add(Bry_bfr bfr) {
|
||||
if (Val_dat_exists())
|
||||
bfr.Add_mid(src, val_bgn, val_end);
|
||||
}
|
||||
public static final Gfh_atr Noop = new Gfh_atr(-1, -1, -1, Bry_.Empty, Bry_.Empty, Bry_.Empty, -1, -1);
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Gfh_doc_parser {
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
private final Gfh_txt_wkr txt_wkr;
|
||||
public Gfh_doc_parser(Gfh_txt_wkr txt_wkr, Gfh_doc_wkr... wkr_ary) {
|
||||
this.txt_wkr = txt_wkr;
|
||||
for (Gfh_doc_wkr wkr : wkr_ary)
|
||||
trie.Add_obj(wkr.Hook(), wkr);
|
||||
}
|
||||
public void Parse(byte[] page_url, byte[] src, int src_bgn, int src_end) {
|
||||
int txt_bgn = -1;
|
||||
int pos = src_bgn;
|
||||
while (pos < src_end) {
|
||||
Object o = trie.Match_at(trv, src, pos, src_end);
|
||||
if (o == null) { // not a known hook; add to txt
|
||||
if (txt_bgn == -1) txt_bgn = pos;
|
||||
++pos;
|
||||
}
|
||||
else { // known hook
|
||||
if (txt_bgn != -1) { // txt pending; handle it
|
||||
txt_wkr.Parse(txt_bgn, pos);
|
||||
txt_bgn = -1;
|
||||
}
|
||||
Gfh_doc_wkr wkr = (Gfh_doc_wkr)o;
|
||||
try {pos = wkr.Parse(src, src_bgn, src_end, pos);}
|
||||
catch (Exception e) {
|
||||
Gfh_utl.Log(e, "html parse failed", page_url, src, pos);
|
||||
txt_bgn = pos; // set txt_bgn to hook_bgn which is "pos"; i.e.: txt resumes from start of failed hook
|
||||
pos = trv.Pos(); // set pos to hook_end
|
||||
}
|
||||
}
|
||||
}
|
||||
if (txt_bgn != -1) txt_wkr.Parse(txt_bgn, src_end); // handle add pending txt at EOS
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public interface Gfh_doc_wkr {
|
||||
byte[] Hook();
|
||||
int Parse(byte[] src, int src_bgn, int src_end, int pos);
|
||||
}
|
||||
@@ -1,164 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.xowa.parsers.htmls.*; import gplx.langs.htmls.styles.*; import gplx.langs.htmls.clses.*;
|
||||
public class Gfh_tag implements Mwh_atr_wkr {
|
||||
private Gfh_tag_rdr tag_rdr;
|
||||
private Ordered_hash atrs_hash; private boolean atrs_null; private int atrs_bgn, atrs_end;
|
||||
private final Gfh_style_wkr__val_as_int style_wkr = new Gfh_style_wkr__val_as_int();
|
||||
public Gfh_tag Init(Gfh_tag_rdr tag_rdr, byte[] src, boolean tag_is_tail, boolean tag_is_inline, int src_bgn, int src_end, int atrs_bgn, int atrs_end, int name_id, byte[] name_bry) {
|
||||
this.tag_rdr = tag_rdr; this.src = src; this.atrs_null = true;
|
||||
this.tag_is_tail = tag_is_tail; this.tag_is_inline = tag_is_inline;
|
||||
this.atrs_bgn = atrs_bgn; this.atrs_end = atrs_end;
|
||||
this.name_id = name_id; this.name_bry = name_bry; this.src_bgn = src_bgn; this.src_end = src_end;
|
||||
return this;
|
||||
}
|
||||
public Gfh_tag Copy() {
|
||||
Gfh_tag rv = new Gfh_tag().Init(tag_rdr, src, tag_is_tail, tag_is_inline, src_bgn, src_end, atrs_bgn, atrs_end, name_id, name_bry);
|
||||
rv.atrs_null = false;
|
||||
rv.atrs_hash = Copy(atrs_hash);
|
||||
return rv;
|
||||
}
|
||||
public int Name_id() {return name_id;} private int name_id;
|
||||
public boolean Tid_is_comment() {return name_id == Gfh_tag_.Id__comment;}
|
||||
public byte[] Name_bry() {return name_bry;} private byte[] name_bry;
|
||||
public Gfh_tag Chk_name_or_fail(int chk) {
|
||||
if (!Chk_name(chk)) tag_rdr.Err_wkr().Fail("name_id chk failed", "expecting", Gfh_tag_.To_str(chk));
|
||||
return this;
|
||||
}
|
||||
public boolean Chk_name(int chk) {
|
||||
return ( chk == name_id
|
||||
|| (name_id != Gfh_tag_.Id__eos && Int_.In(chk, Gfh_tag_.Id__any, Gfh_tag_.Id__comment)));
|
||||
}
|
||||
public boolean Chk(int chk_name, byte[] chk_cls) {return name_id == chk_name && Atrs__cls_has(chk_cls);}
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int Src_bgn() {return src_bgn;} private int src_bgn;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public boolean Src_exists() {return src_end > src_bgn;} // NOTE: only true if EOS where src_end == src_bgn == src_len
|
||||
public boolean Tag_is_tail() {return tag_is_tail;} private boolean tag_is_tail;
|
||||
public boolean Tag_is_inline() {return tag_is_inline;} private boolean tag_is_inline;
|
||||
public Ordered_hash Atrs__hash() {if (atrs_null) Atrs__make(); return atrs_hash;}
|
||||
public int Atrs__len() {if (atrs_null) Atrs__make(); return atrs_hash.Count();}
|
||||
public boolean Atrs__match_pair(byte[] key, byte[] val) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key);
|
||||
return rv == null ? false : Bry_.Eq(val, rv.Val());
|
||||
}
|
||||
public boolean Atrs__cls_has(byte[] val) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(Gfh_atr_.Bry__class); if (rv == null) return false;
|
||||
byte[] rv_val = rv.Val();
|
||||
return Gfh_class_.Has(rv_val, 0, rv_val.length, val);
|
||||
}
|
||||
public boolean Atrs__cls_eq(byte[] val) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(Gfh_atr_.Bry__class); if (rv == null) return false;
|
||||
return Bry_.Eq(val, rv.Val());
|
||||
}
|
||||
public byte Atrs__cls_find_or_fail(Hash_adp_bry hash) {
|
||||
byte rv = Atrs__cls_find_or(hash, Byte_.Max_value_127); if (rv == Byte_.Max_value_127) tag_rdr.Err_wkr().Fail("cls missing");
|
||||
return rv;
|
||||
}
|
||||
public byte Atrs__cls_find_or(Hash_adp_bry hash, byte or) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr cls_atr = (Gfh_atr)atrs_hash.Get_by(Gfh_atr_.Bry__class); if (cls_atr == null) return or;
|
||||
byte rv = Gfh_class_.Find_1st(src, cls_atr.Val_bgn(), cls_atr.Val_end(), hash); if (rv == Byte_.Max_value_127) return or;
|
||||
return rv;
|
||||
}
|
||||
public int Atrs__style_get_as_int(byte[] key) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(Gfh_atr_.Bry__style); if (rv == null) return -1;
|
||||
byte[] rv_val = rv.Val();
|
||||
return style_wkr.Parse(rv_val, 0, rv_val.length, key);
|
||||
}
|
||||
public boolean Atrs__has(byte[] key) {
|
||||
if (atrs_null) Atrs__make();
|
||||
return atrs_hash.Get_by(key) != null;
|
||||
}
|
||||
public byte[] Atrs__get_as_bry(byte[] key) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key);
|
||||
return rv == null ? Bry_.Empty : rv.Val();
|
||||
}
|
||||
public int Atrs__get_as_int(byte[] key) {
|
||||
int rv = Atrs__get_as_int_or(key, Int_.Min_value); if (rv == Int_.Min_value) tag_rdr.Err_wkr().Fail("atr missing", "key", key);
|
||||
return rv;
|
||||
}
|
||||
public int Atrs__get_as_int_or(byte[] key, int or) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key); if (rv == null) return or;
|
||||
return Bry_.To_int_or(src, rv.Val_bgn(), rv.Val_end(), or);
|
||||
}
|
||||
public double Atrs__get_as_double_or(byte[] key, double or) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key); if (rv == null) return or;
|
||||
return Bry_.To_double_or(src, rv.Val_bgn(), rv.Val_end(), or);
|
||||
}
|
||||
public Gfh_atr Atrs__get_at(int i) {return (Gfh_atr)atrs_hash.Get_at(i);}
|
||||
public Gfh_atr Atrs__get_by_or_fail(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.Y);}
|
||||
public Gfh_atr Atrs__get_by_or_empty(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.N);}
|
||||
public Gfh_atr Atrs__get_by_or_fail(byte[] key, boolean fail_if_null) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key);
|
||||
if (rv == null) {
|
||||
if (fail_if_null) tag_rdr.Err_wkr().Fail("atr missing", "key", key);
|
||||
else return Gfh_atr.Noop;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public String Atrs__print() {
|
||||
if (atrs_null) Atrs__make();
|
||||
Bry_bfr bfr = Bry_bfr_.New();
|
||||
int len = atrs_hash.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Gfh_atr atr = (Gfh_atr)atrs_hash.Get_at(i);
|
||||
bfr.Add(atr.Key()).Add_byte_eq().Add(atr.Val()).Add_byte_nl();
|
||||
}
|
||||
return bfr.To_str();
|
||||
}
|
||||
private void Atrs__make() {
|
||||
atrs_null = false;
|
||||
if (atrs_hash == null) atrs_hash = Ordered_hash_.New_bry();
|
||||
else atrs_hash.Clear();
|
||||
tag_rdr.Atrs__make(this, atrs_bgn, atrs_end);
|
||||
}
|
||||
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {
|
||||
if (!valid) return;
|
||||
byte[] val_bry = val_bry_manual;
|
||||
int val_bgn = -1, val_end = -1;
|
||||
int atr_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_atr_bgn];
|
||||
int atr_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_atr_end];
|
||||
if (key_exists) {
|
||||
val_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
|
||||
val_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
|
||||
}
|
||||
else
|
||||
val_bry = key_bry;
|
||||
Gfh_atr atr = new Gfh_atr(atrs_hash.Count(), atr_bgn, atr_end, key_bry, val_bry, src, val_bgn, val_end);
|
||||
atrs_hash.Add(key_bry, atr);
|
||||
}
|
||||
private static Ordered_hash Copy(Ordered_hash src) {
|
||||
Ordered_hash rv = Ordered_hash_.New();
|
||||
int len = src.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Gfh_atr atr = (Gfh_atr)src.Get_at(i);
|
||||
rv.Add(atr.Key(), atr);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
@@ -1,360 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.btries.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
public class Gfh_tag_rdr {
|
||||
private final Hash_adp_bry name_hash;
|
||||
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
||||
private final Gfh_tag tag__tmp__move = new Gfh_tag(), tag__tmp__peek = new Gfh_tag(), tag__eos = new Gfh_tag(), tag__comment = new Gfh_tag();
|
||||
private final Int_obj_ref tmp_depth = Int_obj_ref.New_zero();
|
||||
Gfh_tag_rdr(Hash_adp_bry name_hash) {this.name_hash = name_hash;}
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public Bry_err_wkr Err_wkr() {return err_wkr;} private final Bry_err_wkr err_wkr = new Bry_err_wkr();
|
||||
public Gfh_tag_rdr Reg(String tag_name, int tag_id) {name_hash.Add_str_int(tag_name, tag_id); return this;}
|
||||
public void Init(byte[] ctx_name, byte[] src, int src_bgn, int src_end) {
|
||||
this.src = src; this.pos = src_bgn; this.src_end = src_end;
|
||||
tag__eos.Init(this, src, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Gfh_tag_.Id__eos, Bry_.Empty);
|
||||
err_wkr.Init_by_page(String_.new_u8(ctx_name), src);
|
||||
}
|
||||
public void Src_rng_(int src_bgn, int src_end) {
|
||||
this.pos = src_bgn; this.src_end = src_end;
|
||||
}
|
||||
public int Pos() {return pos;} private int pos;
|
||||
public void Pos_(int v) {this.pos = v;}
|
||||
public void Atrs__make(Mwh_atr_wkr atr_wkr, int head_bgn, int head_end) {atr_parser.Parse(atr_wkr, -1, -1, src, head_bgn, head_end);}
|
||||
public Gfh_tag Tag__move_fwd_head() {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, pos, src_end, Gfh_tag_.Id__any);}
|
||||
public Gfh_tag Tag__move_fwd_head(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, pos, src_end, match_name_id);}
|
||||
public Gfh_tag Tag__move_fwd_tail(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, pos, src_end, match_name_id);}
|
||||
public Gfh_tag Tag__peek_fwd_head() {return Tag__find(Bool_.N, Bool_.N, Bool_.N, pos, src_end, Gfh_tag_.Id__any);}
|
||||
public Gfh_tag Tag__peek_fwd_head(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.N, pos, src_end, match_name_id);}
|
||||
public Gfh_tag Tag__peek_fwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.Y, pos, src_end, match_name_id);}
|
||||
public Gfh_tag Tag__peek_bwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, pos, src_end, match_name_id);}
|
||||
public Gfh_tag Tag__peek_bwd_head() {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, pos, src_end, Gfh_tag_.Id__any);}
|
||||
public Gfh_tag Tag__find_fwd_head(int bgn, int end, int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.N, bgn, end, match_name_id);}
|
||||
private Gfh_tag Tag__find(boolean move, boolean bwd, boolean tail, int rng_bgn, int rng_end, int match_name_id) {
|
||||
int tmp = rng_bgn;
|
||||
int stop_pos = rng_end; int adj = 1;
|
||||
if (bwd) {
|
||||
stop_pos = -1;
|
||||
adj = -1;
|
||||
--tmp; // subtract 1 from tmp; needed when pos is at src_len, else array error below
|
||||
}
|
||||
tmp_depth.Val_zero_();
|
||||
Gfh_tag rv = null;
|
||||
while (tmp != stop_pos) {
|
||||
if (src[tmp] == Byte_ascii.Angle_bgn) {
|
||||
rv = Tag__extract(move, tail, match_name_id, tmp);
|
||||
if (rv.Name_id() == Gfh_tag_.Id__comment) { // ignore comments DATE:2016-06-25
|
||||
tmp = rv.Src_end();
|
||||
rv = null; // null rv, else rv will still be comment and may get returned to caller
|
||||
continue;
|
||||
}
|
||||
if (Tag__match(move, bwd, tail, match_name_id, tmp_depth, rv))
|
||||
break;
|
||||
else {
|
||||
tmp = bwd ? rv.Src_bgn() - 1 : rv.Src_end();
|
||||
rv = null;
|
||||
}
|
||||
}
|
||||
else
|
||||
tmp += adj;
|
||||
}
|
||||
if (rv == null) {
|
||||
if (move && tail && !bwd)
|
||||
err_wkr.Fail("move tag fwd failed", "tag_name", Gfh_tag_.To_str(match_name_id));
|
||||
else
|
||||
return Tag__eos(rng_bgn);
|
||||
}
|
||||
if (move) pos = rv.Src_end();
|
||||
return rv;
|
||||
}
|
||||
private boolean Tag__match(boolean move, boolean bwd, boolean tail, int match_name_id, Int_obj_ref depth_obj, Gfh_tag tag) {
|
||||
int tag_name_id = tag.Name_id();
|
||||
if ( tag_name_id != match_name_id // tag doesn't match requested
|
||||
&& match_name_id != Gfh_tag_.Id__any // requested is not wildcard
|
||||
) return false;
|
||||
if (tag_name_id == Gfh_tag_.Id__comment) return true; // ignore comments
|
||||
int depth = depth_obj.Val();
|
||||
boolean tag_is_tail = tag.Tag_is_tail();
|
||||
if (tail == tag_is_tail) {
|
||||
if (depth == 0)
|
||||
return true;
|
||||
else {
|
||||
if (match_name_id == tag_name_id)
|
||||
depth_obj.Val_add(-1);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!bwd && tail && !tag_is_tail && !tag.Tag_is_inline()) {
|
||||
if (match_name_id == tag_name_id)
|
||||
depth_obj.Val_add(1);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
public Gfh_tag Tag__move_fwd_tail(byte[] find_tag_bry) {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, pos, src_end, find_tag_bry);}
|
||||
public Gfh_tag Tag__peek_fwd_tail(byte[] find_tag_bry) {return Tag__find(Bool_.N, Bool_.N, Bool_.Y, pos, src_end, find_tag_bry);}
|
||||
private Gfh_tag Tag__find(boolean move, boolean bwd, boolean tail, int rng_bgn, int rng_end, byte[] find_tag_bry) {
|
||||
int tmp = rng_bgn;
|
||||
int stop_pos = rng_end; int adj = 1;
|
||||
if (bwd) {
|
||||
stop_pos = -1;
|
||||
adj = -1;
|
||||
--tmp; // subtract 1 from tmp; needed when pos is at src_len, else array error below
|
||||
}
|
||||
tmp_depth.Val_zero_();
|
||||
Gfh_tag rv = null;
|
||||
while (tmp != stop_pos) {
|
||||
if (src[tmp] == Byte_ascii.Angle_bgn) {
|
||||
rv = Tag__extract(move, tail, find_tag_bry, tmp);
|
||||
if (Bry_.Eq(rv.Name_bry(), Gfh_tag_.Bry__xowa_comment)) { // ignore comments DATE:2016-06-25
|
||||
tmp = rv.Src_end();
|
||||
rv = null; // null rv, else rv will still be comment and may get returned to caller
|
||||
continue;
|
||||
}
|
||||
if (Tag__match(move, bwd, tail, find_tag_bry, tmp_depth, rv))
|
||||
break;
|
||||
else {
|
||||
tmp = bwd ? rv.Src_bgn() - 1 : rv.Src_end();
|
||||
rv = null;
|
||||
}
|
||||
}
|
||||
else
|
||||
tmp += adj;
|
||||
}
|
||||
if (rv == null) {
|
||||
if (move && tail && !bwd)
|
||||
err_wkr.Fail("move tag fwd failed", "tag_name", find_tag_bry);
|
||||
else
|
||||
return Tag__eos(rng_bgn);
|
||||
}
|
||||
if (move) pos = rv.Src_end();
|
||||
return rv;
|
||||
}
|
||||
private boolean Tag__match(boolean move, boolean bwd, boolean tail, byte[] find_tag_bry, Int_obj_ref depth_obj, Gfh_tag tag) {
|
||||
byte[] cur_tag_bry = tag.Name_bry();
|
||||
if ( !Bry_.Eq(cur_tag_bry, find_tag_bry) // tag doesn't match requested
|
||||
&& find_tag_bry != Gfh_tag_.Bry__xowa_any // requested is not wildcard
|
||||
) return false;
|
||||
if (cur_tag_bry == Gfh_tag_.Bry__xowa_comment) return true; // ignore comments
|
||||
int depth = depth_obj.Val();
|
||||
boolean tag_is_tail = tag.Tag_is_tail();
|
||||
if (tail == tag_is_tail) {
|
||||
if (depth == 0)
|
||||
return true;
|
||||
else {
|
||||
if (Bry_.Eq(cur_tag_bry, find_tag_bry))
|
||||
depth_obj.Val_add(-1);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!bwd && tail && !tag_is_tail && !tag.Tag_is_inline()) {
|
||||
if (Bry_.Eq(cur_tag_bry, find_tag_bry))
|
||||
depth_obj.Val_add(1);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
public Gfh_tag Tag__extract(boolean move, boolean tail, byte[] find_tag_bry, int tag_bgn) {
|
||||
int name_bgn = tag_bgn + 1; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "<EOS"
|
||||
byte name_0 = src[name_bgn];
|
||||
boolean cur_is_tail = false;
|
||||
switch (name_0) {
|
||||
case Byte_ascii.Bang:
|
||||
if (Bry_.Match(src, name_bgn + 1, name_bgn + 3, Bry__comment__mid)) // skip comment; EX: "<!"
|
||||
return Tag__comment(tag_bgn);
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
++name_bgn; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "</EOS"
|
||||
name_0 = src[name_bgn];
|
||||
cur_is_tail = true;
|
||||
break;
|
||||
}
|
||||
int name_end = -1, atrs_end = -1, tag_end = -1, name_pos = name_bgn;
|
||||
byte name_byte = name_0; boolean inline = false;
|
||||
boolean loop = true;
|
||||
while (true) {
|
||||
switch (name_byte) {
|
||||
case Byte_ascii.Angle_end: // EX: "<a>"
|
||||
name_end = atrs_end = name_pos;
|
||||
tag_end = name_end + 1;
|
||||
loop = false;
|
||||
break;
|
||||
case Byte_ascii.Slash: // EX: "<a/>"
|
||||
name_end = name_pos;
|
||||
tag_end = name_pos + 1; if (tag_end == src_end) return Tag__eos(tag_bgn);// EX: "<a/EOS"
|
||||
if (src[tag_end] == Byte_ascii.Angle_end) {
|
||||
atrs_end = name_end;
|
||||
inline = true;
|
||||
loop = false;
|
||||
++tag_end; // move tag_end after >
|
||||
}
|
||||
else {
|
||||
name_end = tag_end = -1;
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
name_end = name_pos;
|
||||
loop = false;
|
||||
break;
|
||||
}
|
||||
if (!loop) break;
|
||||
++name_pos; if (name_pos == src_end) return Tag__eos(tag_bgn); // EX: "<abEOS"
|
||||
name_byte = src[name_pos];
|
||||
}
|
||||
if (tag_end == -1) {
|
||||
tag_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, name_end, src_end);
|
||||
if (tag_end == Bry_find_.Not_found) return Tag__eos(tag_bgn);
|
||||
int prv_pos = tag_end - 1;
|
||||
if (src[prv_pos] == Byte_ascii.Slash) {
|
||||
atrs_end = prv_pos;
|
||||
inline = true;
|
||||
}
|
||||
else
|
||||
atrs_end = tag_end;
|
||||
++tag_end; // position after ">"
|
||||
}
|
||||
Gfh_tag tmp = move ? tag__tmp__move : tag__tmp__peek;
|
||||
return tmp.Init(this, src, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end, Gfh_tag_.Id__unknown, Bry_.Mid(src, name_bgn, name_end));
|
||||
}
|
||||
public Gfh_tag Tag__extract(boolean move, boolean tail, int match_name_id, int tag_bgn) {
|
||||
int name_bgn = tag_bgn + 1; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "<EOS"
|
||||
byte name_0 = src[name_bgn];
|
||||
boolean cur_is_tail = false;
|
||||
switch (name_0) {
|
||||
case Byte_ascii.Bang:
|
||||
if (Bry_.Match(src, name_bgn + 1, name_bgn + 3, Bry__comment__mid)) // skip comment; EX: "<!"
|
||||
return Tag__comment(tag_bgn);
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
++name_bgn; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "</EOS"
|
||||
name_0 = src[name_bgn];
|
||||
cur_is_tail = true;
|
||||
break;
|
||||
}
|
||||
int name_end = -1, atrs_end = -1, tag_end = -1, name_pos = name_bgn;
|
||||
byte name_byte = name_0; boolean inline = false;
|
||||
boolean loop = true;
|
||||
while (true) {
|
||||
switch (name_byte) {
|
||||
case Byte_ascii.Angle_end: // EX: "<a>"
|
||||
name_end = atrs_end = name_pos;
|
||||
tag_end = name_end + 1;
|
||||
loop = false;
|
||||
break;
|
||||
case Byte_ascii.Slash: // EX: "<a/>"
|
||||
name_end = name_pos;
|
||||
tag_end = name_pos + 1; if (tag_end == src_end) return Tag__eos(tag_bgn);// EX: "<a/EOS"
|
||||
if (src[tag_end] == Byte_ascii.Angle_end) {
|
||||
atrs_end = name_end;
|
||||
inline = true;
|
||||
loop = false;
|
||||
++tag_end; // move tag_end after >
|
||||
}
|
||||
else {
|
||||
name_end = tag_end = -1;
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
name_end = name_pos;
|
||||
loop = false;
|
||||
break;
|
||||
}
|
||||
if (!loop) break;
|
||||
++name_pos; if (name_pos == src_end) return Tag__eos(tag_bgn); // EX: "<abEOS"
|
||||
name_byte = src[name_pos];
|
||||
}
|
||||
if (tag_end == -1) {
|
||||
tag_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, name_end, src_end);
|
||||
if (tag_end == Bry_find_.Not_found) return Tag__eos(tag_bgn);
|
||||
int prv_pos = tag_end - 1;
|
||||
if (src[prv_pos] == Byte_ascii.Slash) {
|
||||
atrs_end = prv_pos;
|
||||
inline = true;
|
||||
}
|
||||
else
|
||||
atrs_end = tag_end;
|
||||
++tag_end; // position after ">"
|
||||
}
|
||||
Gfh_tag tmp = move ? tag__tmp__move : tag__tmp__peek;
|
||||
return tmp.Init(this, src, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end
|
||||
, name_hash.Get_as_int_or(src, name_bgn, name_end, -1) // TODO_OLD: change from -1 to Unknown
|
||||
, Bry_.Mid(src, name_bgn, name_end));
|
||||
}
|
||||
public boolean Read_and_move(byte match) {
|
||||
byte b = src[pos];
|
||||
if (b == match) {
|
||||
++pos;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
public int Read_int_to(byte to_char) {
|
||||
int rv = Read_int_to(to_char, Int_.Max_value); if (rv == Int_.Max_value) err_wkr.Fail("invalid int", "pos", pos);
|
||||
return rv;
|
||||
}
|
||||
public int Read_int_to(byte to_char, int or_int) {
|
||||
int bgn = pos;
|
||||
int rv = 0;
|
||||
int negative = 1;
|
||||
while (pos < src_end) {
|
||||
byte b = src[pos++];
|
||||
switch (b) {
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
rv = (rv * 10) + (b - Byte_ascii.Num_0);
|
||||
break;
|
||||
case Byte_ascii.Dash:
|
||||
if (negative == -1) // 2nd negative
|
||||
return or_int; // return or_int
|
||||
else // 1st negative
|
||||
negative = -1; // flag negative
|
||||
break;
|
||||
default: {
|
||||
boolean match = b == to_char;
|
||||
if (to_char == Byte_ascii.Null) {// hack for Read_int_to_non_num
|
||||
--pos;
|
||||
match = true;
|
||||
}
|
||||
return match ? rv * negative : or_int;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bgn == pos ? or_int : rv * negative;
|
||||
}
|
||||
private Gfh_tag Tag__comment(int tag_bgn) {
|
||||
int tag_end = Bry_find_.Move_fwd(src, gplx.langs.htmls.Gfh_tag_.Comm_end, tag_bgn, src_end); if (tag_end == Bry_find_.Not_found) tag_end = src_end;
|
||||
return tag__comment.Init(this, src, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Gfh_tag_.Id__comment, Bry_.Empty);
|
||||
}
|
||||
private Gfh_tag Tag__eos(int tag_bgn) {
|
||||
int tag_end = tag_bgn + 255; if (tag_end > src_end) tag_end = src_end;
|
||||
return tag__comment.Init(this, src, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Gfh_tag_.Id__eos, Bry_.Empty);
|
||||
}
|
||||
private static final byte[] Bry__comment__mid = Bry_.new_a7("--");
|
||||
public static Gfh_tag_rdr New__html() {return new Gfh_tag_rdr(Gfh_tag_.Hash);}
|
||||
public static Gfh_tag_rdr New__custom() {return new Gfh_tag_rdr(Hash_adp_bry.cs());}
|
||||
}
|
||||
@@ -1,80 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import org.junit.*;
|
||||
public class Gfh_tag_rdr_tst {
|
||||
private final Gfh_tag_rdr_fxt fxt = new Gfh_tag_rdr_fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Init("1<div id='1'>2</div>3<div id='2'>4</div>5<div id='3'>6</div>7");
|
||||
fxt.Test__move_fwd_head("<div id='1'>"); fxt.Test__pos("2");
|
||||
fxt.Test__peek_fwd_head("<div id='2'>"); fxt.Test__pos("2");
|
||||
fxt.Test__move_fwd_head("<div id='2'>"); fxt.Test__pos("4");
|
||||
fxt.Test__peek_bwd_tail("</div>3") ; fxt.Test__pos("4");
|
||||
}
|
||||
@Test public void Comment() {
|
||||
fxt.Init("1<!--2-->3<!--4-->5<div id='1'>6</div>");
|
||||
fxt.Test__move_fwd_head(Gfh_tag_.Id__any , "<div id='1'>") ; fxt.Test__pos("6");
|
||||
}
|
||||
@Test public void Meta() {
|
||||
fxt.Init("<!DOCTYPE html>1<div id='1'>2</div>3");
|
||||
fxt.Test__move_fwd_head(Gfh_tag_.Id__div , "<div id='1'>") ; fxt.Test__pos("2");
|
||||
}
|
||||
@Test public void Recursive__same_tags() {
|
||||
fxt.Init("1<a>2<a>3</a>4</a>5");
|
||||
fxt.Test__move_fwd_head(Gfh_tag_.Id__a , "<a>") ; fxt.Test__pos("2");
|
||||
fxt.Test__move_fwd_tail(Gfh_tag_.Id__a , "</a>") ; fxt.Test__pos("5");
|
||||
}
|
||||
@Test public void Recursive__diff_tags() {
|
||||
fxt.Init("1<div>2<a>3<img/>4</a>5</div>6");
|
||||
fxt.Test__move_fwd_head(Gfh_tag_.Id__div , "<div>") ; fxt.Test__pos("2");
|
||||
fxt.Test__move_fwd_tail(Gfh_tag_.Id__div , "</div>") ; fxt.Test__pos("6");
|
||||
}
|
||||
@Test public void Inline() {
|
||||
fxt.Init("1<br/>2");
|
||||
fxt.Test__move_fwd_head(Gfh_tag_.Id__br , "<br/>") ; fxt.Test__pos("2");
|
||||
}
|
||||
}
|
||||
class Gfh_tag_rdr_fxt {
|
||||
private final Gfh_tag_rdr rdr = Gfh_tag_rdr.New__html();
|
||||
public void Init(String src_str) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
rdr.Init(Bry_.Empty, src_bry, 0, src_bry.length);
|
||||
}
|
||||
public void Test__move_fwd_head(String expd) {Test__move_fwd_head(Gfh_tag_.Id__any, expd);}
|
||||
public void Test__move_fwd_head(int match_name_id, String expd) {
|
||||
Gfh_tag actl_tag = rdr.Tag__move_fwd_head(match_name_id).Chk_name_or_fail(match_name_id);
|
||||
Tfds.Eq_str(expd, String_.new_u8(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_end()));
|
||||
}
|
||||
public void Test__move_fwd_tail(int match_name_id, String expd) {
|
||||
Gfh_tag actl_tag = rdr.Tag__move_fwd_tail(match_name_id);
|
||||
Tfds.Eq_str(expd, String_.new_u8(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_end()));
|
||||
}
|
||||
public void Test__peek_fwd_head(String expd) {
|
||||
Gfh_tag actl_tag = rdr.Tag__peek_fwd_head();
|
||||
Tfds.Eq_str(expd, String_.new_u8(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_end()));
|
||||
}
|
||||
public void Test__peek_bwd_tail(String expd_str) {
|
||||
byte[] expd_bry = Bry_.new_u8(expd_str);
|
||||
Gfh_tag actl_tag = rdr.Tag__peek_bwd_tail(-1);
|
||||
Tfds.Eq_bry(expd_bry, Bry_.Mid(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_bgn() + expd_bry.length));
|
||||
}
|
||||
public void Test__pos(String expd_str) {
|
||||
byte[] expd_bry = Bry_.new_u8(expd_str);
|
||||
Tfds.Eq_bry(expd_bry, Bry_.Mid(rdr.Src(), rdr.Pos(), rdr.Pos() + expd_bry.length));
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public interface Gfh_txt_wkr {
|
||||
void Parse(int rng_bgn, int rng_end);
|
||||
}
|
||||
Reference in New Issue
Block a user