mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.10.3.1
This commit is contained in:
189
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_convert_lang.java
Normal file
189
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_convert_lang.java
Normal file
@@ -0,0 +1,189 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*; import gplx.dbs.*;
|
||||
import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
public class Vnt_convert_lang {
|
||||
private final Xol_convert_mgr convert_mgr; private final Xol_vnt_regy vnt_regy;
|
||||
private final Vnt_convert_rule converter_rule; private final Vnt_html_doc_wkr html_convert_wkr; private final Mwh_doc_parser doc_parser = new Mwh_doc_parser();
|
||||
private final Bry_bfr bfr = Bry_bfr.new_(255), tmp_frame_bfr = Bry_bfr.new_(255), tmp_convert_bfr = Bry_bfr.new_(255);
|
||||
private byte[] src; private int src_len; private int pos;
|
||||
private Vnt_log_mgr log_mgr; private int tag_bgn, tag_end;
|
||||
public Vnt_convert_lang(Xol_convert_mgr convert_mgr, Xol_vnt_regy vnt_regy) {
|
||||
this.convert_mgr = convert_mgr; this.vnt_regy = vnt_regy;
|
||||
this.html_convert_wkr = new Vnt_html_doc_wkr(convert_mgr, vnt_regy);
|
||||
this.converter_rule = new Vnt_convert_rule(this, vnt_regy, log_mgr);
|
||||
}
|
||||
public byte[] Converted_title() {return converted_title;} private byte[] converted_title;
|
||||
public void Log__init(Db_conn conn) {
|
||||
log_mgr = new Vnt_log_mgr();
|
||||
log_mgr.Init_by_db(conn, vnt_regy);
|
||||
}
|
||||
public byte[] Parse_page(Xol_vnt_itm vnt_itm, int page_id, byte[] src) {// REF.MW:/languages/LanguageConverter.php!recursiveConvertTopLevel
|
||||
if (log_mgr != null) log_mgr.Init_by_page(page_id);
|
||||
this.converted_title = null;
|
||||
return Parse_bry(vnt_itm, src);
|
||||
}
|
||||
public byte[] Parse_bry(Xol_vnt_itm vnt_itm, byte[] src) {
|
||||
boolean convert_needed = true; // false for sr lang; SEE:LanguageSr.php !$this->guessVariant(src, vnt);
|
||||
this.pos = 0;
|
||||
this.src = src; this.src_len = src.length;
|
||||
while (pos < src_len) {
|
||||
int curly_bgn = Bry_find_.Find_fwd(src, Bry__curly_bgn, pos, src_len);
|
||||
if (curly_bgn == Bry_find_.Not_found) { // No more markup, append final segment
|
||||
Add_output(vnt_itm, convert_needed, src, pos, src_len);
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
boolean inside_tag = Is_inside_tag(pos, curly_bgn);
|
||||
if (inside_tag) {
|
||||
Add_output(vnt_itm, convert_needed, src, pos, tag_bgn); // Markup found; append segment
|
||||
Auto_convert(bfr, vnt_itm, src, tag_bgn, tag_end);
|
||||
pos = tag_end;
|
||||
}
|
||||
else {
|
||||
Add_output(vnt_itm, convert_needed, src, pos, curly_bgn); // Markup found; append segment
|
||||
pos = curly_bgn; // Advance position
|
||||
bfr.Add(Parse_recursive(tmp_frame_bfr, vnt_itm, 1)); // Do recursive conversion
|
||||
}
|
||||
}
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
private boolean Is_inside_tag(int prev_pos, int curly_bgn) {
|
||||
if ( curly_bgn == 0 // -{ starts at BOS; EX: "-{A}-"
|
||||
|| curly_bgn == prev_pos // -{ starts after last pair; EX: "-{A}--{B}-"
|
||||
) return false;
|
||||
int cur = curly_bgn - 1;
|
||||
tag_bgn = tag_end = -1;
|
||||
boolean loop = true;
|
||||
while (loop) { // scan bwd for <
|
||||
byte b = src[cur];
|
||||
switch (b) {
|
||||
case Byte_ascii.Angle_bgn: tag_bgn = cur; loop = false; break;
|
||||
case Byte_ascii.Angle_end: return false; // ">" found; "-{}-" not inside tag
|
||||
default: --cur; break;
|
||||
}
|
||||
if (cur == prev_pos - 1) break;
|
||||
}
|
||||
if (tag_bgn == -1) return false; // no "<" found;
|
||||
loop = true;
|
||||
cur = curly_bgn + 1; // TODO: resume at }-
|
||||
while (loop) { // scan fwd for >
|
||||
byte b = src[cur];
|
||||
switch (b) {
|
||||
case Byte_ascii.Angle_bgn: return false; // "<" found; "-{}-" not inside tag
|
||||
case Byte_ascii.Angle_end: tag_end = cur + 1; return true;
|
||||
default: ++cur; break;
|
||||
}
|
||||
if (cur == src_len) break;
|
||||
}
|
||||
return false; // no ">" foud
|
||||
}
|
||||
private byte[] Parse_recursive(Bry_bfr frame_bfr, Xol_vnt_itm vnt_itm, int depth) {
|
||||
pos += 2; // skip "-{"
|
||||
boolean warning_done = false;
|
||||
boolean frame_bfr_dirty = false;
|
||||
int bgn_pos = pos;
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
Object o = trie.Match_bgn_w_byte(b, src,pos, src_len);
|
||||
if (o == null) { // char;
|
||||
++pos;
|
||||
continue;
|
||||
}
|
||||
switch (((Byte_obj_val)o).Val()) {
|
||||
case Tid__curly_bgn:
|
||||
frame_bfr.Add_mid(src, bgn_pos, pos); // add everything from bgn of frame to cur pos; EX: "a" in "-{a-{b}-c}-"
|
||||
frame_bfr_dirty = true;
|
||||
if (depth >= max_depth) {
|
||||
pos += 2; // skip "-{"
|
||||
frame_bfr.Add(Bry__curly_bgn);
|
||||
if (!warning_done) {
|
||||
frame_bfr.Add_str("<span class=\"error\">max depth");
|
||||
// wfMessage('language-converter-depth-warning')->numParams($this->mMaxDepth)->inContentLanguage()->text()
|
||||
frame_bfr.Add_str("</span>");
|
||||
warning_done = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
frame_bfr.Add(Parse_recursive(Bry_bfr.new_(16), vnt_itm, depth + 1)); // Recursively parse another rule
|
||||
bgn_pos = pos;
|
||||
break;
|
||||
case Tid__curly_end:
|
||||
if (frame_bfr_dirty) { // recursive; use frame_bfr
|
||||
frame_bfr.Add_mid(src, bgn_pos, pos); // add everything from bgn of frame to cur pos; EX: "a" in "-{a-{b}-c}-"
|
||||
byte[] frame_bry = frame_bfr.To_bry_and_clear();
|
||||
converter_rule.Parse(vnt_itm, frame_bry, 0, frame_bry.length);
|
||||
}
|
||||
else // not recursive
|
||||
converter_rule.Parse(vnt_itm, src, bgn_pos, pos);
|
||||
Apply_manual_conv(converter_rule);
|
||||
pos += 2;
|
||||
return converter_rule.Display();
|
||||
default: throw Err_.new_unhandled(-1); // never happens
|
||||
}
|
||||
}
|
||||
Auto_convert(frame_bfr, vnt_itm, src, bgn_pos, src_len); // Unclosed rule
|
||||
pos = src_len;
|
||||
return Bry_.Add(Bry__curly_bgn, frame_bfr.To_bry_and_clear());
|
||||
}
|
||||
private void Add_output(Xol_vnt_itm vnt_itm, boolean convert_needed, byte[] src, int bgn, int end) {
|
||||
if (end - bgn == 0) return;
|
||||
if (convert_needed) {
|
||||
Auto_convert(bfr, vnt_itm, src, bgn, end);
|
||||
}
|
||||
else
|
||||
bfr.Add_mid(src, bgn, end);
|
||||
}
|
||||
public byte[] Auto_convert(Xol_vnt_itm vnt_itm, byte[] src) {
|
||||
Auto_convert(tmp_convert_bfr, vnt_itm, src, 0, src.length);
|
||||
return tmp_convert_bfr.To_bry_and_clear();
|
||||
}
|
||||
private void Auto_convert(Bry_bfr bfr, Xol_vnt_itm vnt_itm, byte[] src, int bgn, int end) {
|
||||
html_convert_wkr.Init(bfr, vnt_itm);
|
||||
doc_parser.Parse(html_convert_wkr, src, bgn, end);
|
||||
}
|
||||
private void Apply_manual_conv(Vnt_convert_rule rule) {
|
||||
this.converted_title = rule.Title();
|
||||
byte action = rule.Action();
|
||||
Vnt_rule_undi_mgr cnv_tbl = rule.Cnv_tbl();
|
||||
int len = cnv_tbl.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Vnt_rule_undi_grp grp = cnv_tbl.Get_at(i);
|
||||
byte[] grp_key = grp.Vnt();
|
||||
Xol_vnt_itm vnt_itm = vnt_regy.Get_by(grp_key); if (vnt_itm == null) continue;
|
||||
int grp_len = grp.Len();
|
||||
Xol_convert_wkr wkr = convert_mgr.Converter_ary()[vnt_itm.Idx()];
|
||||
for (int j = 0; j < grp_len; ++j) {
|
||||
Vnt_rule_undi_itm itm = grp.Get_at(j);
|
||||
if (action == Byte_ascii.Plus) {
|
||||
wkr.Add(itm.Src(), itm.Trg());
|
||||
}
|
||||
else if (action == Byte_ascii.Dash)
|
||||
wkr.Del(itm.Src());
|
||||
}
|
||||
}
|
||||
}
|
||||
private static final byte Tid__curly_bgn = 1, Tid__curly_end = 2;
|
||||
private static final byte[] Bry__curly_bgn = Bry_.new_a7("-{"), Bry__curly_end = Bry_.new_a7("}-");
|
||||
private static final Btrie_fast_mgr trie = Btrie_fast_mgr.cs()
|
||||
.Add_bry_byte(Bry__curly_bgn, Tid__curly_bgn)
|
||||
.Add_bry_byte(Bry__curly_end, Tid__curly_end);
|
||||
public static final byte[] Bry__armor_bgn = Bry_.new_a7("-{R|"), Bry__armor_end = Bry_.new_a7("}-");
|
||||
private static final int max_depth = 32;
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
|
||||
public class Vnt_convert_lang__html__tst { // REF: https://www.mediawiki.org/wiki/Writing_systems/Syntax
|
||||
private final Vnt_convert_lang_fxt fxt = new Vnt_convert_lang_fxt();
|
||||
private String rule;
|
||||
@Before public void init() {
|
||||
rule = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}-";
|
||||
fxt.Clear();
|
||||
}
|
||||
@Test public void Node() {
|
||||
fxt.Test_parse(rule + "hk<span>hk</span>hk", "cn<span>cn</span>cn");
|
||||
}
|
||||
@Test public void Attribs() {
|
||||
fxt.Test_parse(rule + "<span class='hk'>hk</span>", "<span class='hk'>cn</span>");
|
||||
}
|
||||
@Test public void Attribs__title() {
|
||||
fxt.Test_parse(rule + "<span title='hk'>hk</span>", "<span title='cn'>cn</span>");
|
||||
}
|
||||
@Test public void Attribs__alt() {
|
||||
fxt.Test_parse(rule + "<span alt='hk'>hk</span>", "<span alt='cn'>cn</span>");
|
||||
}
|
||||
@Test public void Attribs__alt_w_embedded_vnt() { // PURPOSE: handle embedded variants inside attribute tags; PAGE:sr.n:Проглашени_победници_„Вики_воли_Земљу" DATE:2015-10-13
|
||||
fxt.Test_parse(rule + "<img id='hk' alt='hk-{hk}-hk' src='hk'>", "<img id='hk' alt='cnhkcn' src='hk'>");
|
||||
}
|
||||
@Test public void Attribs__skip_url() {
|
||||
fxt.Test_parse(rule + "<span alt='http://hk.org'>hk</span>", "<span alt='http://hk.org'>cn</span>");
|
||||
}
|
||||
@Test public void Node__script() {
|
||||
fxt.Test_parse(rule + "hk<script>hk</script>hk", "cn<script>hk</script>cn");
|
||||
}
|
||||
@Test public void Node__code() {
|
||||
fxt.Test_parse(rule + "hk<code>hk</code>hk", "cn<code>hk</code>cn");
|
||||
}
|
||||
@Test public void Node__pre() {
|
||||
fxt.Test_parse(rule + "hk<pre>hk</pre>hk", "cn<pre>hk</pre>cn");
|
||||
}
|
||||
@Test public void Node__pre__nested() {
|
||||
fxt.Test_parse(rule + "hk<pre><span>hk</span></pre>hk", "cn<pre><span>hk</span></pre>cn");
|
||||
}
|
||||
@Test public void Recursive__deep() {
|
||||
fxt.Test_parse("a-{b-{c-{d}-e}-f}-g", "abcdefg");
|
||||
}
|
||||
@Test public void Recursive__many() {
|
||||
fxt.Test_parse("a-{b-{c}-d-{e}-f}-g", "abcdefg");
|
||||
}
|
||||
@Test public void Recursive__unclosed() {
|
||||
fxt.Test_parse("a-{b-{c", "a-{b-{c");
|
||||
}
|
||||
@Test public void Recursive__unclosed_2() {
|
||||
fxt.Test_parse("a-{b-{c}-", "a-{bc");
|
||||
}
|
||||
@Test public void Recursive__failed() { // PURPOSE: handle out of bounds exception; PAGE:zh.w:重庆市 ;DATE:2015-10-01
|
||||
fxt.Test_parse("-{zh-cn:a-{b}-c; zh-tw:d-{e}-f; }-", "abc");
|
||||
}
|
||||
@Test public void Unclosed() {
|
||||
fxt.Test_parse("a-{bc", "a-{bc");
|
||||
}
|
||||
@Test public void Entity__body() {
|
||||
fxt.Test_parse("-{H|zh-cn:nbsp-cn;zh-hk:nbsp;}-" + " nbsp", " nbsp-cn");
|
||||
}
|
||||
@Test public void Entity__atr() {
|
||||
fxt.Test_parse("-{H|zh-cn:nbsp-cn;zh-hk:nbsp;}-" + "<div title='nbsp nbsp'/>" , "<div title='nbsp-cn nbsp-cn'/>");
|
||||
}
|
||||
@Test public void Node__example() {
|
||||
fxt.Test_parse(rule + String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|A<br />"
|
||||
, "|B<br/>"
|
||||
, "<span style=''>-{zh-hans:C;zh-hant:D;}-</span>"
|
||||
, "|}")
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|A<br />"
|
||||
, "|B<br/>"
|
||||
, "<span style=''>C</span>"
|
||||
, "|}"
|
||||
));
|
||||
}
|
||||
@Test public void Attribs__title__w_vnt() {
|
||||
fxt.Init_cur("zh-tw");
|
||||
fxt.Test_parse("<span title=\"-{zh-cn:cn;zh-hant:hk;}-cn\" style=\"color:red;\">cn</span>", "<span title=\"hkcn\" style=\"color:red;\">cn</span>"); // cn not converted to hk
|
||||
}
|
||||
}
|
||||
@@ -17,8 +17,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
|
||||
public class Vnt_converter_lang__syntax__tst { // REF: https://www.mediawiki.org/wiki/Writing_systems/Syntax
|
||||
private final Vnt_converter_lang_fxt fxt = new Vnt_converter_lang_fxt();
|
||||
public class Vnt_convert_lang__syntax__tst { // REF: https://www.mediawiki.org/wiki/Writing_systems/Syntax
|
||||
private final Vnt_convert_lang_fxt fxt = new Vnt_convert_lang_fxt();
|
||||
@Test public void Bidi() {
|
||||
String text = "-{zh-hans:a;zh-hant:b}-";
|
||||
fxt.Test_parse_many(text, "a", "zh-hans", "zh-cn", "zh-sg", "zh");
|
||||
@@ -81,23 +81,26 @@ public class Vnt_converter_lang__syntax__tst { // REF: https://www.mediawiki.org
|
||||
@Test public void Descrip__undi() {fxt.Test_parse("-{D|cn_k=>zh-cn:cn_v;hk_k=>zh-hk:hk_v}-", "cn_k⇒ZH-CN:cn_v;hk_k⇒ZH-HK:hk_v;");}
|
||||
@Test public void Descrip__mixd() {fxt.Test_parse("-{D|zh-tw:tw_v;cn_k=>zh-cn:cn_v;hk_k=>zh-hk:hk_v;zh-mo:mo_v}-", "ZH-TW:tw_v;ZH-MO:mo_v;cn_k⇒ZH-CN:cn_v;hk_k⇒ZH-HK:hk_v;");}
|
||||
}
|
||||
class Vnt_converter_lang_fxt {
|
||||
private final Vnt_converter_lang converter_lang;
|
||||
class Vnt_convert_lang_fxt {
|
||||
private final Vnt_convert_lang converter_lang;
|
||||
private final Xol_convert_mgr convert_mgr = new Xol_convert_mgr();
|
||||
private final Xol_vnt_regy vnt_regy = Xol_vnt_regy_fxt.new_chinese();
|
||||
private Xol_vnt_itm vnt_itm;
|
||||
public Vnt_converter_lang_fxt() {
|
||||
converter_lang = new Vnt_converter_lang(convert_mgr, vnt_regy);
|
||||
public Vnt_convert_lang_fxt() {
|
||||
converter_lang = new Vnt_convert_lang(convert_mgr, vnt_regy);
|
||||
this.Clear();
|
||||
}
|
||||
public void Clear() {
|
||||
convert_mgr.Init(vnt_regy);
|
||||
Init_cur("zh-cn");
|
||||
}
|
||||
public void Init_cur(String vnt) {
|
||||
public Vnt_convert_lang_fxt Init_cur(String vnt) {
|
||||
byte[] cur_vnt = Bry_.new_a7(vnt);
|
||||
this.vnt_itm = vnt_regy.Get_by(cur_vnt);
|
||||
convert_mgr.Cur_vnt_(cur_vnt);
|
||||
return this;
|
||||
}
|
||||
public void Test_parse(String raw, String expd) {
|
||||
Tfds.Eq_str(expd, String_.new_u8(converter_lang.Parse(vnt_itm, Bry_.new_u8(raw))));
|
||||
Tfds.Eq_str(expd, String_.new_u8(converter_lang.Parse_page(vnt_itm, -1, Bry_.new_u8(raw))));
|
||||
}
|
||||
public void Test_parse_many(String raw, String expd, String... vnts) {
|
||||
int len = vnts.length;
|
||||
@@ -105,13 +108,13 @@ class Vnt_converter_lang_fxt {
|
||||
String vnt_key = vnts[i];
|
||||
Init_cur(vnt_key);
|
||||
Xol_vnt_itm vnt = vnt_regy.Get_by(Bry_.new_a7(vnt_key));
|
||||
Tfds.Eq_str(expd, String_.new_u8(converter_lang.Parse(vnt, Bry_.new_u8(raw))), vnt_key);
|
||||
Tfds.Eq_str(expd, String_.new_u8(converter_lang.Parse_page(vnt, -1, Bry_.new_u8(raw))), vnt_key);
|
||||
}
|
||||
}
|
||||
public void Test_parse_title(String raw, String expd_title, String expd_text, String vnt_key) {
|
||||
Init_cur(vnt_key);
|
||||
Xol_vnt_itm vnt = vnt_regy.Get_by(Bry_.new_a7(vnt_key));
|
||||
Tfds.Eq_str(expd_text, String_.new_u8(converter_lang.Parse(vnt, Bry_.new_u8(raw))), vnt_key);
|
||||
Tfds.Eq_str(expd_text, String_.new_u8(converter_lang.Parse_page(vnt, -1, Bry_.new_u8(raw))), vnt_key);
|
||||
Tfds.Eq_str(expd_title, converter_lang.Converted_title());
|
||||
}
|
||||
}
|
||||
@@ -18,24 +18,27 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
class Vnt_converter_rule { // REF.MW: /languages/LanguageConverter.php|ConverterRule
|
||||
class Vnt_convert_rule { // REF.MW: /languages/LanguageConverter.php|ConverterRule
|
||||
private final Vnt_flag_parser flag_parser = new Vnt_flag_parser(); private final Vnt_flag_code_mgr flag_codes = new Vnt_flag_code_mgr(); private final Vnt_flag_lang_mgr flag_langs = new Vnt_flag_lang_mgr();
|
||||
private final Vnt_rule_parser rule_parser = new Vnt_rule_parser(); private final Vnt_rule_undi_mgr rule_undis = new Vnt_rule_undi_mgr(); private final Vnt_rule_bidi_mgr rule_bidis = new Vnt_rule_bidi_mgr();
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr.new_();
|
||||
private final Ordered_hash cnv_marked_hash = Ordered_hash_.new_bry_();
|
||||
private Vnt_converter_lang converter;
|
||||
private Xol_vnt_regy vnt_regy; private Xol_vnt_itm vnt_itm; private byte[] vnt_key;
|
||||
private final Ordered_hash cnv_marked_hash = Ordered_hash_.New_bry();
|
||||
private Vnt_convert_lang converter;
|
||||
private Xol_vnt_regy vnt_regy; private byte[] vnt_key;
|
||||
private Vnt_log_mgr log_mgr;
|
||||
private byte[] rule_raw;
|
||||
public byte[] Display() {return display;} private byte[] display;
|
||||
public byte[] Title() {return title;} private byte[] title;
|
||||
public byte Action() {return action;} private byte action;
|
||||
public Vnt_rule_undi_mgr Cnv_tbl() {return cnv_tbl;} private final Vnt_rule_undi_mgr cnv_tbl = new Vnt_rule_undi_mgr();
|
||||
public void Init(Vnt_converter_lang converter, Xol_vnt_regy vnt_regy, Xol_vnt_itm vnt_itm) {
|
||||
this.converter = converter;
|
||||
this.vnt_regy = vnt_regy; this.vnt_itm = vnt_itm; this.vnt_key = vnt_itm.Key();
|
||||
rule_parser.Init(vnt_regy);
|
||||
public Vnt_convert_rule(Vnt_convert_lang converter, Xol_vnt_regy vnt_regy, Vnt_log_mgr log_mgr) {
|
||||
this.converter = converter; this.log_mgr = log_mgr;
|
||||
this.vnt_regy = vnt_regy;
|
||||
flag_parser.Init(log_mgr);
|
||||
rule_parser.Init(log_mgr, vnt_regy);
|
||||
}
|
||||
public void Parse(byte[] src, int src_bgn, int src_end) {
|
||||
public void Parse(Xol_vnt_itm vnt_itm, byte[] src, int src_bgn, int src_end) {
|
||||
this.vnt_key = vnt_itm.Key();
|
||||
this.display = this.title = null;
|
||||
this.action = Byte_ascii.Null;
|
||||
int pipe_pos = Bry_find_.Find_fwd(src, Byte_ascii.Pipe, src_bgn, src_end);
|
||||
@@ -61,9 +64,9 @@ class Vnt_converter_rule { // REF.MW: /languages/LanguageConverter.php|Converter
|
||||
flag_codes.Limit(Vnt_flag_code_.Tid_raw);
|
||||
}
|
||||
rule_parser.Clear(rule_undis, rule_bidis, rule_raw);
|
||||
if (!flag_codes.Get(Vnt_flag_code_.Tid_raw) && !flag_codes.Get(Vnt_flag_code_.Tid_name)) {
|
||||
if (!flag_codes.Get(Vnt_flag_code_.Tid_raw) && !flag_codes.Get(Vnt_flag_code_.Tid_name))
|
||||
rule_parser.Parse(src, rule_bgn, src_end);
|
||||
}
|
||||
if (log_mgr != null) log_mgr.Log_rule(src_bgn, src_end, Bry_.Mid(src, src_bgn, src_end), flag_codes, flag_langs, rule_undis, rule_bidis);
|
||||
if (rule_undis.Has_none() && rule_bidis.Has_none()) {
|
||||
if ( flag_codes.Get(Vnt_flag_code_.Tid_add)
|
||||
|| flag_codes.Get(Vnt_flag_code_.Tid_del)
|
||||
@@ -158,7 +161,7 @@ class Vnt_converter_rule { // REF.MW: /languages/LanguageConverter.php|Converter
|
||||
tmp_bfr.Add(undi_itm.Src()).Add(Bry__undi_spr).Add(undi_vnt.Name()).Add_byte_colon().Add(undi_itm.Trg()).Add_byte_semic();
|
||||
}
|
||||
}
|
||||
return tmp_bfr.Xto_bry_and_clear();
|
||||
return tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
private byte[] Make_title(Xol_vnt_itm vnt) {
|
||||
if (vnt.Idx() == 0) { // for mainLanguageCode; EX: "zh"
|
||||
@@ -179,7 +182,7 @@ class Vnt_converter_rule { // REF.MW: /languages/LanguageConverter.php|Converter
|
||||
return rv;
|
||||
}
|
||||
private final static byte[]
|
||||
Bry__error_bgn = Bry_.new_a7("<span class=\"error\">")
|
||||
Bry__error_bgn = Bry_.new_a7("<span class=\"error\">vnt error")
|
||||
, Bry__error_end = Bry_.new_a7("</span>")
|
||||
, Bry__undi_spr = Bry_.new_u8("⇒")
|
||||
;
|
||||
@@ -1,144 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
public class Vnt_converter_lang {
|
||||
private final Bry_bfr bfr = Bry_bfr.new_();
|
||||
private int max_depth = 32;
|
||||
private byte[] src; private int src_len;
|
||||
private boolean convert_needed;
|
||||
private int pos;
|
||||
private final Vnt_converter_rule converter_rule = new Vnt_converter_rule();
|
||||
private Xol_convert_mgr convert_mgr; private Xol_vnt_regy vnt_regy; // private Xol_vnt_mgr vnt_mgr; // private Xol_vnt_itm vnt_itm;
|
||||
private final Mwh_doc_parser doc_parser = new Mwh_doc_parser();
|
||||
private final Vnt_html_doc_wkr html_convert_wkr;
|
||||
private final Bry_bfr tmp_convert_bfr = Bry_bfr.new_();
|
||||
public Vnt_converter_lang(Xol_convert_mgr convert_mgr, Xol_vnt_regy vnt_regy) {
|
||||
this.html_convert_wkr = new Vnt_html_doc_wkr(convert_mgr);
|
||||
this.convert_mgr = convert_mgr; this.vnt_regy = vnt_regy;
|
||||
}
|
||||
public byte[] Converted_title() {return converted_title;} private byte[] converted_title;
|
||||
public byte[] Parse(Xol_vnt_itm vnt_itm, byte[] src) {// REF.MW:/languages/LanguageConverter.php!recursiveConvertTopLevel
|
||||
this.converted_title = null;
|
||||
converter_rule.Init(this, vnt_regy, vnt_itm);
|
||||
this.converted_title = null;
|
||||
int markup_count = 0;
|
||||
this.pos = 0;
|
||||
this.convert_needed = true; // false for sr lang; SEE:LanguageSr.php !$this->guessVariant(src, vnt);
|
||||
this.src = src; this.src_len = src.length;
|
||||
while (pos < src_len) {
|
||||
int curly_bgn = Bry_find_.Find_fwd(src, Bry__curly_bgn, pos, src_len);
|
||||
if (curly_bgn == Bry_find_.Not_found) { // No more markup, append final segment
|
||||
if (markup_count == 0) return src; // no markups found; just return original
|
||||
Add_output(vnt_itm, convert_needed, src, pos, src_len);
|
||||
return bfr.Xto_bry_and_clear();
|
||||
}
|
||||
Add_output(vnt_itm, convert_needed, src, pos, curly_bgn); // Markup found; append segment
|
||||
pos = curly_bgn; // Advance position
|
||||
++markup_count;
|
||||
Parse_recursive(vnt_itm, 1); // Do recursive conversion
|
||||
}
|
||||
return bfr.Xto_bry_and_clear();
|
||||
}
|
||||
private void Parse_recursive(Xol_vnt_itm vnt_itm, int depth) {
|
||||
pos += 2; // skip "-{"
|
||||
boolean warning_done = false;
|
||||
int bgn_pos = pos;
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
Object o = trie.Match_bgn_w_byte(b, src,pos, src_len);
|
||||
if (o == null) { // char;
|
||||
++pos;
|
||||
continue;
|
||||
}
|
||||
switch (((Byte_obj_val)o).Val()) {
|
||||
case Tid__curly_bgn:
|
||||
if (depth >= max_depth) {
|
||||
bfr.Add(Bry__curly_bgn);
|
||||
if (!warning_done) {
|
||||
bfr.Add_str("<span class=\"error\">");
|
||||
// wfMessage('language-converter-depth-warning')->numParams($this->mMaxDepth)->inContentLanguage()->text()
|
||||
bfr.Add_str("</span>");
|
||||
warning_done = true;
|
||||
}
|
||||
pos += 2; // skip "-{"
|
||||
continue;
|
||||
}
|
||||
bgn_pos = pos;
|
||||
Parse_recursive(vnt_itm, depth + 1); // Recursively parse another rule
|
||||
break;
|
||||
case Tid__curly_end:
|
||||
converter_rule.Parse(src, bgn_pos, pos);
|
||||
Apply_manual_conv(converter_rule);
|
||||
bfr.Add(converter_rule.Display());
|
||||
pos += 2;
|
||||
return;
|
||||
default: throw Err_.new_unhandled(-1); // never happens
|
||||
}
|
||||
}
|
||||
if (pos < src_len) { // Unclosed rule
|
||||
bfr.Add(Bry__curly_bgn);
|
||||
Auto_convert(bfr, vnt_itm, src, pos, src_len);
|
||||
}
|
||||
pos = src_len;
|
||||
}
|
||||
private void Add_output(Xol_vnt_itm vnt_itm, boolean convert_needed, byte[] src, int bgn, int end) {
|
||||
if (end - bgn == 0) return;
|
||||
if (convert_needed) {
|
||||
Auto_convert(bfr, vnt_itm, src, bgn, end);
|
||||
}
|
||||
else
|
||||
bfr.Add_mid(src, bgn, end);
|
||||
}
|
||||
public byte[] Auto_convert(Xol_vnt_itm vnt_itm, byte[] src) {
|
||||
Auto_convert(tmp_convert_bfr, vnt_itm, src, 0, src.length);
|
||||
return tmp_convert_bfr.Xto_bry_and_clear();
|
||||
}
|
||||
private void Auto_convert(Bry_bfr bfr, Xol_vnt_itm vnt_itm, byte[] src, int bgn, int end) {
|
||||
html_convert_wkr.Init(bfr, vnt_itm.Idx());
|
||||
doc_parser.Parse(html_convert_wkr, src, bgn, end);
|
||||
}
|
||||
private void Apply_manual_conv(Vnt_converter_rule rule) {
|
||||
this.converted_title = rule.Title();
|
||||
byte action = rule.Action();
|
||||
Vnt_rule_undi_mgr cnv_tbl = rule.Cnv_tbl();
|
||||
int len = cnv_tbl.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Vnt_rule_undi_grp grp = cnv_tbl.Get_at(i);
|
||||
byte[] grp_key = grp.Vnt();
|
||||
Xol_vnt_itm vnt_itm = vnt_regy.Get_by(grp_key); if (vnt_itm == null) continue;
|
||||
int grp_len = grp.Len();
|
||||
Xol_convert_wkr wkr = convert_mgr.Converter_ary()[vnt_itm.Idx()];
|
||||
for (int j = 0; j < grp_len; ++j) {
|
||||
Vnt_rule_undi_itm itm = grp.Get_at(j);
|
||||
if (action == Byte_ascii.Plus) {
|
||||
wkr.Add(itm.Src(), itm.Trg());
|
||||
}
|
||||
else if (action == Byte_ascii.Dash)
|
||||
wkr.Del(itm.Src());
|
||||
}
|
||||
}
|
||||
}
|
||||
private static final byte Tid__curly_bgn = 1, Tid__curly_end = 2;
|
||||
private static final byte[] Bry__curly_bgn = Bry_.new_a7("-{"), Bry__curly_end = Bry_.new_a7("}-");
|
||||
private static final Btrie_fast_mgr trie = Btrie_fast_mgr.cs()
|
||||
.Add_bry_byte(Bry__curly_bgn, Tid__curly_bgn)
|
||||
.Add_bry_byte(Bry__curly_end, Tid__curly_end);
|
||||
}
|
||||
@@ -1,53 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
|
||||
public class Vnt_converter_lang__html__tst { // REF: https://www.mediawiki.org/wiki/Writing_systems/Syntax
|
||||
private final Vnt_converter_lang_fxt fxt = new Vnt_converter_lang_fxt();
|
||||
private String rule;
|
||||
@Before public void init() {
|
||||
rule = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}-";
|
||||
}
|
||||
@Test public void Node() {
|
||||
fxt.Test_parse(rule + "hk<span>hk</span>hk", "cn<span>cn</span>cn");
|
||||
}
|
||||
@Test public void Attribs() {
|
||||
fxt.Test_parse(rule + "<span class='hk'>hk</span>", "<span class='hk'>cn</span>");
|
||||
}
|
||||
@Test public void Attribs__title() {
|
||||
fxt.Test_parse(rule + "<span title='hk'>hk</span>", "<span title='cn'>cn</span>");
|
||||
}
|
||||
@Test public void Attribs__alt() {
|
||||
fxt.Test_parse(rule + "<span alt='hk'>hk</span>", "<span alt='cn'>cn</span>");
|
||||
}
|
||||
@Test public void Attribs__skip_url() {
|
||||
fxt.Test_parse(rule + "<span alt='http://hk.org'>hk</span>", "<span alt='http://hk.org'>cn</span>");
|
||||
}
|
||||
@Test public void Node__style() {
|
||||
fxt.Test_parse(rule + "hk<script>hk</script>hk", "cn<script>hk</script>cn");
|
||||
}
|
||||
@Test public void Node__code() {
|
||||
fxt.Test_parse(rule + "hk<code>hk</code>hk", "cn<code>hk</code>cn");
|
||||
}
|
||||
@Test public void Node__pre() {
|
||||
fxt.Test_parse(rule + "hk<pre>hk</pre>hk", "cn<pre>hk</pre>cn");
|
||||
}
|
||||
@Test public void Node__pre__nested() {
|
||||
fxt.Test_parse(rule + "hk<pre><span>hk</span></pre>hk", "cn<pre><span>hk</span></pre>cn");
|
||||
}
|
||||
}
|
||||
@@ -18,11 +18,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
class Vnt_flag_lang_mgr {
|
||||
private final Ordered_hash regy = Ordered_hash_.new_bry_();
|
||||
private final Ordered_hash regy = Ordered_hash_.New_bry();
|
||||
public int Count() {return regy.Count();}
|
||||
public boolean Has(byte[] vnt) {return regy.Has(vnt);}
|
||||
public void Clear() {regy.Clear();}
|
||||
public void Add(Xol_vnt_itm itm) {regy.Add(itm.Key(), itm);}
|
||||
public void Add(Xol_vnt_itm itm) {regy.Add_if_dupe_use_1st(itm.Key(), itm);}
|
||||
public Xol_vnt_itm Get_at(int i) {return (Xol_vnt_itm)regy.Get_at(i);}
|
||||
public void To_bfr__dbg(Bry_bfr bfr) {
|
||||
int len = regy.Count();
|
||||
|
||||
@@ -21,6 +21,10 @@ class Vnt_flag_parser implements gplx.core.brys.Bry_split_wkr {
|
||||
private final Hash_adp_bry codes_regy = Vnt_flag_code_.Regy;
|
||||
private Vnt_flag_code_mgr codes; private Vnt_flag_lang_mgr langs;
|
||||
private Xol_vnt_regy vnt_regy;
|
||||
private Vnt_log_mgr log_mgr;
|
||||
public void Init(Vnt_log_mgr log_mgr) {
|
||||
this.log_mgr = log_mgr;
|
||||
}
|
||||
public void Parse(Vnt_flag_code_mgr codes, Vnt_flag_lang_mgr langs, Xol_vnt_regy vnt_regy, byte[] src, int src_bgn, int src_end) {
|
||||
this.codes = codes; this.langs = langs; this.vnt_regy = vnt_regy;
|
||||
codes.Clear(); langs.Clear();
|
||||
@@ -54,6 +58,7 @@ class Vnt_flag_parser implements gplx.core.brys.Bry_split_wkr {
|
||||
if (flag_tid == -1) { // try to find flags like "zh-hans", "zh-hant"; allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
|
||||
Xol_vnt_itm vnt_itm = vnt_regy.Get_by(src, itm_bgn, itm_end);
|
||||
if (vnt_itm == null) return Bry_split_.Rv__ok; // unknown flag; ignore
|
||||
if (log_mgr != null) log_mgr.Log_lang(vnt_itm, Vnt_log_mgr.Scope__lang);
|
||||
langs.Add(vnt_itm);
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
|
||||
@@ -38,6 +38,7 @@ public class Vnt_flag_parser_tst {
|
||||
@Test public void Lang__one() {fxt.Test_parse("zh-hans" , "S;zh-hans");}
|
||||
@Test public void Lang__many() {fxt.Test_parse("zh-cn;zh-hk" , "S;zh-cn;zh-hk");}
|
||||
@Test public void Lang__many__ws() {fxt.Test_parse(" zh-cn ; zh-hk " , "S;zh-cn;zh-hk");}
|
||||
@Test public void Lang__many__dupe() {fxt.Test_parse("zh-cn;zh-cn" , "S;zh-cn");}
|
||||
@Test public void Lang__zap__codes() {fxt.Test_parse("+;S;zh-hans;" , "zh-hans");}
|
||||
}
|
||||
class Vnt_flag_parser_fxt {
|
||||
@@ -50,6 +51,6 @@ class Vnt_flag_parser_fxt {
|
||||
parser.Parse(codes, langs, vnt_regy, src, 0, src.length);
|
||||
codes.To_bfr__dbg(bfr);
|
||||
langs.To_bfr__dbg(bfr);
|
||||
Tfds.Eq_str(expd, bfr.Xto_str_and_clear());
|
||||
Tfds.Eq_str(expd, bfr.To_str_and_clear());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,39 +16,46 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.xndes.*;
|
||||
import gplx.xowa.langs.vnts.converts.*;
|
||||
import gplx.xowa.html.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*;
|
||||
import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.amps.*;
|
||||
import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
|
||||
import gplx.xowa.htmls.*;
|
||||
class Vnt_html_doc_wkr implements Mwh_doc_wkr {
|
||||
private final Hash_adp_bry atr_hash = Hash_adp_bry.ci_a7();
|
||||
private final Xol_convert_mgr convert_mgr; private final Xol_vnt_regy vnt_regy;
|
||||
private Vnt_convert_lang atr_converter;
|
||||
private Xol_vnt_itm vnt_itm; private int convert_vnt_idx;
|
||||
private Bry_bfr bfr;
|
||||
private final Xol_convert_mgr convert_mgr; private int convert_vnt_idx;
|
||||
public Vnt_html_doc_wkr(Xol_convert_mgr convert_mgr) {
|
||||
this.convert_mgr = convert_mgr;
|
||||
public Vnt_html_doc_wkr(Xol_convert_mgr convert_mgr, Xol_vnt_regy vnt_regy) {
|
||||
this.convert_mgr = convert_mgr; this.vnt_regy = vnt_regy;
|
||||
atr_hash.Add_many_str("title", "alt");
|
||||
}
|
||||
public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw();
|
||||
public void Init(Bry_bfr bfr, int convert_vnt_idx) {this.bfr = bfr; this.convert_vnt_idx = convert_vnt_idx;}
|
||||
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {
|
||||
int val_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
|
||||
int val_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
|
||||
if ( atr_hash.Get_by_mid(key_bry, 0, key_bry.length) == null // title, alt
|
||||
|| !key_exists
|
||||
|| Bry_find_.Find_fwd(src, Bry__url_frag, val_bgn, val_end) != Bry_find_.Not_found
|
||||
) { // handle name-only attribs like "<span title>"
|
||||
public void Init(Bry_bfr bfr, Xol_vnt_itm vnt_itm) {this.bfr = bfr; this.vnt_itm = vnt_itm; this.convert_vnt_idx = vnt_itm.Idx();}
|
||||
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {
|
||||
boolean literal = true;
|
||||
if (atr_hash.Get_by_mid(key_bry, 0, key_bry.length) != null) { // title, alt
|
||||
int val_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
|
||||
int val_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
|
||||
if (Bry_find_.Find_fwd(src, Bry__url_frag, val_bgn, val_end) == Bry_find_.Not_found) { // do not convert if urls are present
|
||||
literal = false;
|
||||
byte[] val_bry = val_bry_manual == null ? Bry_.Mid(src, val_bgn, val_end) : val_bry_manual;
|
||||
if (atr_converter == null) atr_converter = new Vnt_convert_lang(convert_mgr, vnt_regy);// NOTE: late instantiation, or else StackOverflow error
|
||||
val_bry = atr_converter.Parse_bry(vnt_itm, val_bry);
|
||||
bfr.Add_byte_space();
|
||||
bfr.Add(key_bry);
|
||||
bfr.Add_byte(Byte_ascii.Eq);
|
||||
byte quote_byte = Mwh_atr_itm_.Calc_qte_byte(itm_ary, itm_idx);
|
||||
bfr.Add_byte(quote_byte);
|
||||
bfr.Add(val_bry);
|
||||
bfr.Add_byte(quote_byte);
|
||||
}
|
||||
}
|
||||
if (literal) {
|
||||
int atr_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_atr_bgn];
|
||||
int atr_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_atr_end];
|
||||
bfr.Add_mid(src, atr_bgn, atr_end);
|
||||
}
|
||||
else {
|
||||
bfr.Add_byte_space();
|
||||
bfr.Add(key_bry);
|
||||
bfr.Add_byte(Byte_ascii.Eq);
|
||||
byte quote_byte = Mwh_atr_itm.Calc_qte_byte(itm_ary, itm_idx);
|
||||
bfr.Add_byte(quote_byte);
|
||||
bfr.Add(convert_mgr.Convert_text(convert_vnt_idx, src, val_bgn, val_end));
|
||||
bfr.Add_byte(quote_byte);
|
||||
}
|
||||
}
|
||||
public void On_txt_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {
|
||||
switch (nde_tid) {
|
||||
@@ -70,5 +77,6 @@ class Vnt_html_doc_wkr implements Mwh_doc_wkr {
|
||||
}
|
||||
public void On_nde_tail_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);}
|
||||
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);}
|
||||
public void On_entity_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);}
|
||||
private static final byte[] Bry__url_frag = Bry_.new_a7("://"); // REF.MW: if ( !strpos( $attr, '://' ) ) {
|
||||
}
|
||||
|
||||
58
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_log_mgr.java
Normal file
58
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_log_mgr.java
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.dbs.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
class Vnt_log_mgr {
|
||||
private int uid;
|
||||
private int page_id, rule_idx;
|
||||
private Xol_vnt_regy vnt_regy;
|
||||
private int[] vnt_ary = new int[10];
|
||||
private Vnt_log_tbl tbl;
|
||||
public void Init_by_db(Db_conn conn, Xol_vnt_regy vnt_regy) {
|
||||
this.vnt_regy = vnt_regy;
|
||||
this.tbl = new Vnt_log_tbl(conn);
|
||||
tbl.Create_tbl();
|
||||
this.uid = 0;
|
||||
this.page_id = 0;
|
||||
}
|
||||
public void Init_by_page(int page_id) {
|
||||
this.page_id = page_id;
|
||||
this.rule_idx = -1;
|
||||
for (int i = 0; i < 10; ++i)
|
||||
vnt_ary[i] = 0;
|
||||
}
|
||||
public void Log_lang(byte[] vnt, int scope) {Log_lang(vnt_regy.Get_by(vnt), scope);}
|
||||
public void Log_lang(Xol_vnt_itm itm, int scope) {
|
||||
int idx = itm.Idx();
|
||||
int val = vnt_ary[idx];
|
||||
vnt_ary[idx] = val == 0 ? scope : val | scope;
|
||||
}
|
||||
public void Log_rule(int src_bgn, int src_end, byte[] src_txt, Vnt_flag_code_mgr flag_codes, Vnt_flag_lang_mgr flag_langs, Vnt_rule_undi_mgr rule_undis, Vnt_rule_bidi_mgr rule_bidis) {
|
||||
tbl.Insert(uid, page_id, ++rule_idx
|
||||
, flag_codes.Count(), flag_langs.Count(), rule_undis.Len(), rule_bidis.Len()
|
||||
, flag_codes.Get(Vnt_flag_code_.Tid_add), flag_codes.Get(Vnt_flag_code_.Tid_del), flag_codes.Get(Vnt_flag_code_.Tid_aout), flag_codes.Get(Vnt_flag_code_.Tid_hide), flag_codes.Get(Vnt_flag_code_.Tid_raw), flag_codes.Get(Vnt_flag_code_.Tid_show), flag_codes.Get(Vnt_flag_code_.Tid_descrip), flag_codes.Get(Vnt_flag_code_.Tid_name), flag_codes.Get(Vnt_flag_code_.Tid_title), flag_codes.Get(Vnt_flag_code_.Tid_err)
|
||||
, vnt_ary[0], vnt_ary[1], vnt_ary[2], vnt_ary[3], vnt_ary[4], vnt_ary[5], vnt_ary[6], vnt_ary[7], vnt_ary[8], vnt_ary[9]
|
||||
, src_bgn, src_end, src_txt
|
||||
);
|
||||
}
|
||||
public void Rls() {
|
||||
tbl.Rls();
|
||||
}
|
||||
public static final int Scope__lang = 1, Scope__undi = 2, Scope__bidi = 4;
|
||||
}
|
||||
106
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_log_tbl.java
Normal file
106
400_xowa/src/gplx/xowa/parsers/vnts/Vnt_log_tbl.java
Normal file
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.dbs.*;
|
||||
public class Vnt_log_tbl implements RlsAble {
|
||||
private final String tbl_name = "log_vnt"; private final Db_meta_fld_list flds = Db_meta_fld_list.new_();
|
||||
private final String fld_uid, fld_page_id, fld_rule_idx
|
||||
, fld_flag_count, fld_lang_count, fld_undi_count, fld_bidi_count
|
||||
, fld_flag_add, fld_flag_del, fld_flag_aout, fld_flag_hide, fld_flag_raw, fld_flag_show, fld_flag_descrip, fld_flag_name, fld_flag_title, fld_flag_err
|
||||
, fld_vnt_0, fld_vnt_1, fld_vnt_2, fld_vnt_3, fld_vnt_4, fld_vnt_5, fld_vnt_6, fld_vnt_7, fld_vnt_8, fld_vnt_9
|
||||
, fld_src_bgn, fld_src_end, fld_src_txt;
|
||||
private Db_stmt stmt_insert;
|
||||
public Vnt_log_tbl(Db_conn conn) {
|
||||
this.conn = conn;
|
||||
this.fld_uid = flds.Add_int("uid");
|
||||
this.fld_page_id = flds.Add_int("page_id");
|
||||
this.fld_rule_idx = flds.Add_int("rule_idx");
|
||||
this.fld_flag_count = flds.Add_int("flag_count");
|
||||
this.fld_lang_count = flds.Add_int("lang_count");
|
||||
this.fld_undi_count = flds.Add_int("undi_count");
|
||||
this.fld_bidi_count = flds.Add_int("bidi_count");
|
||||
this.fld_flag_add = flds.Add_int("flag_add");
|
||||
this.fld_flag_del = flds.Add_int("flag_del");
|
||||
this.fld_flag_aout = flds.Add_int("flag_aout");
|
||||
this.fld_flag_hide = flds.Add_int("flag_hide");
|
||||
this.fld_flag_raw = flds.Add_int("flag_raw");
|
||||
this.fld_flag_show = flds.Add_int("flag_show");
|
||||
this.fld_flag_descrip = flds.Add_int("flag_descrip");
|
||||
this.fld_flag_name = flds.Add_int("flag_name");
|
||||
this.fld_flag_title = flds.Add_int("flag_title");
|
||||
this.fld_flag_err = flds.Add_int("flag_err");
|
||||
this.fld_vnt_0 = flds.Add_int("vnt_0");
|
||||
this.fld_vnt_1 = flds.Add_int("vnt_1");
|
||||
this.fld_vnt_2 = flds.Add_int("vnt_2");
|
||||
this.fld_vnt_3 = flds.Add_int("vnt_3");
|
||||
this.fld_vnt_4 = flds.Add_int("vnt_4");
|
||||
this.fld_vnt_5 = flds.Add_int("vnt_5");
|
||||
this.fld_vnt_6 = flds.Add_int("vnt_6");
|
||||
this.fld_vnt_7 = flds.Add_int("vnt_7");
|
||||
this.fld_vnt_8 = flds.Add_int("vnt_8");
|
||||
this.fld_vnt_9 = flds.Add_int("vnt_9");
|
||||
this.fld_src_bgn = flds.Add_int("src_bgn");
|
||||
this.fld_src_end = flds.Add_int("src_end");
|
||||
this.fld_src_txt = flds.Add_text("src_txt");
|
||||
conn.Rls_reg(this);
|
||||
}
|
||||
public Db_conn Conn() {return conn;} private final Db_conn conn;
|
||||
public void Rls() {
|
||||
stmt_insert = Db_stmt_.Rls(stmt_insert);
|
||||
}
|
||||
public void Create_tbl() {conn.Ddl_create_tbl(Db_meta_tbl.new_(tbl_name, flds));}
|
||||
public void Insert(int uid, int page_id, int rule_idx, int flag_count, int lang_count, int undi_count, int bidi_count
|
||||
, boolean flag_add, boolean flag_del, boolean flag_aout, boolean flag_hide, boolean flag_raw, boolean flag_show, boolean flag_descrip, boolean flag_name, boolean flag_title, boolean flag_err
|
||||
, int vnt_0, int vnt_1, int vnt_2, int vnt_3, int vnt_4, int vnt_5, int vnt_6, int vnt_7, int vnt_8, int vnt_9
|
||||
, int src_bgn, int src_end, byte[] src_txt
|
||||
) {
|
||||
if (stmt_insert == null) stmt_insert = conn.Stmt_insert(tbl_name, flds);
|
||||
stmt_insert.Clear()
|
||||
.Val_int(fld_uid, uid)
|
||||
.Val_int(fld_page_id, page_id)
|
||||
.Val_int(fld_rule_idx, rule_idx)
|
||||
.Val_int(fld_flag_count, flag_count)
|
||||
.Val_int(fld_lang_count, lang_count)
|
||||
.Val_int(fld_undi_count, undi_count)
|
||||
.Val_int(fld_bidi_count, bidi_count)
|
||||
.Val_int_by_bool(fld_flag_add, flag_add)
|
||||
.Val_int_by_bool(fld_flag_del, flag_del)
|
||||
.Val_int_by_bool(fld_flag_aout, flag_aout)
|
||||
.Val_int_by_bool(fld_flag_hide, flag_hide)
|
||||
.Val_int_by_bool(fld_flag_raw, flag_raw)
|
||||
.Val_int_by_bool(fld_flag_show, flag_show)
|
||||
.Val_int_by_bool(fld_flag_descrip, flag_descrip)
|
||||
.Val_int_by_bool(fld_flag_name, flag_name)
|
||||
.Val_int_by_bool(fld_flag_title, flag_title)
|
||||
.Val_int_by_bool(fld_flag_err, flag_err)
|
||||
.Val_int(fld_vnt_0, vnt_0)
|
||||
.Val_int(fld_vnt_1, vnt_1)
|
||||
.Val_int(fld_vnt_2, vnt_2)
|
||||
.Val_int(fld_vnt_3, vnt_3)
|
||||
.Val_int(fld_vnt_4, vnt_4)
|
||||
.Val_int(fld_vnt_5, vnt_5)
|
||||
.Val_int(fld_vnt_6, vnt_6)
|
||||
.Val_int(fld_vnt_7, vnt_7)
|
||||
.Val_int(fld_vnt_8, vnt_8)
|
||||
.Val_int(fld_vnt_9, vnt_9)
|
||||
.Val_int(fld_src_bgn, src_bgn)
|
||||
.Val_int(fld_src_end, src_end)
|
||||
.Val_bry_as_str(fld_src_txt, src_txt)
|
||||
.Exec_insert();
|
||||
}
|
||||
}
|
||||
@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Vnt_rule_bidi_mgr {
|
||||
private final Ordered_hash hash = Ordered_hash_.new_bry_();
|
||||
private final Ordered_hash hash = Ordered_hash_.New_bry();
|
||||
public int Len() {return hash.Count();}
|
||||
public boolean Has_none() {return hash.Count() == 0;}
|
||||
public void Clear() {hash.Clear();}
|
||||
|
||||
@@ -21,9 +21,11 @@ import gplx.xowa.langs.vnts.*;
|
||||
class Vnt_rule_parser implements gplx.core.brys.Bry_split_wkr {
|
||||
private final Btrie_slim_mgr vnt_trie = Btrie_slim_mgr.ci_a7();
|
||||
private Vnt_rule_undi_mgr undis; private Vnt_rule_bidi_mgr bidis;
|
||||
private int src_end, src_len; private byte[] rule_raw;
|
||||
private int src_end; private byte[] rule_raw;
|
||||
public byte[] Raw() {return rule_raw;}
|
||||
public void Init(Xol_vnt_regy vnt_regy) {
|
||||
private Vnt_log_mgr log_mgr;
|
||||
public void Init(Vnt_log_mgr log_mgr, Xol_vnt_regy vnt_regy) {
|
||||
this.log_mgr = log_mgr;
|
||||
this.vnt_trie.Clear();
|
||||
int len = vnt_regy.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
@@ -37,23 +39,23 @@ class Vnt_rule_parser implements gplx.core.brys.Bry_split_wkr {
|
||||
this.rule_raw = rule_raw;
|
||||
}
|
||||
public void Parse(byte[] src, int src_bgn, int src_end) {
|
||||
this.src_end = src_end; this.src_len = src.length;
|
||||
this.src_end = src_end;
|
||||
Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Semic, false, this); // trim=false for "&#entity;" check below
|
||||
}
|
||||
public int Split(byte[] src, int itm_bgn, int itm_end) { // macro=>zh-hans:text;
|
||||
int html_entity_pos = Bry_find_.Find_bwd_while_alphanum(src, itm_end);
|
||||
byte html_entity_byte = src[html_entity_pos];
|
||||
if (html_entity_byte == Byte_ascii.Hash) html_entity_byte = src[html_entity_pos - 2]; // skip #; EX: {
|
||||
if (html_entity_byte == Byte_ascii.Amp) return Bry_split_.Rv__extend; // reject "&#entity;"; EX: " zh-hans;"
|
||||
if (html_entity_byte == Byte_ascii.Hash) html_entity_byte = src[html_entity_pos - 2]; // skip #; EX: {
|
||||
if (html_entity_byte == Byte_ascii.Amp) return Bry_split_.Rv__extend; // reject "&#entity;"; EX: " zh-hans;"
|
||||
if (itm_end != src_end) {
|
||||
int nxt_lang_bgn = Bry_find_.Find_fwd(src, Bry__bidi_dlm, itm_end + 1, src_len); // look for next "=>"
|
||||
int nxt_lang_bgn = Bry_find_.Find_fwd(src, Bry__bidi_dlm, itm_end + 1, src_end); // look for next "=>"
|
||||
if (nxt_lang_bgn == Bry_find_.Not_found)
|
||||
nxt_lang_bgn = Bry_find_.Find_fwd_while_ws(src, itm_end + 1, src_len); // skip any ws after end ";"; EX: "a:1; b:2"; NOTE: +1 to skip semic;
|
||||
nxt_lang_bgn = Bry_find_.Find_fwd_while_ws(src, itm_end + 1, src_end); // skip any ws after end ";"; EX: "a:1; b:2"; NOTE: +1 to skip semic;
|
||||
else
|
||||
nxt_lang_bgn += 2;
|
||||
int nxt_lang_end = Bry_find_.Find_fwd(src, Byte_ascii.Colon, nxt_lang_bgn, src_len); // get colon;
|
||||
int nxt_lang_end = Bry_find_.Find_fwd(src, Byte_ascii.Colon, nxt_lang_bgn, src_end); // get colon;
|
||||
if (nxt_lang_end != Bry_find_.Not_found) {
|
||||
nxt_lang_end = Bry_find_.Find_bwd__skip_ws(src, nxt_lang_end, src_len); // trim
|
||||
nxt_lang_end = Bry_find_.Find_bwd__skip_ws(src, nxt_lang_end, src_end); // trim
|
||||
if (vnt_trie.Match_bgn(src, nxt_lang_bgn, nxt_lang_end) == null) return Bry_split_.Rv__extend; // reject ";not_variant"; EX: ";border" in "zh-hans:<span style='color:blue;border:1px;'>;zh-hant:"
|
||||
}
|
||||
}
|
||||
@@ -65,21 +67,24 @@ class Vnt_rule_parser implements gplx.core.brys.Bry_split_wkr {
|
||||
undi_end = Bry_find_.Find_bwd__skip_ws(src, undi_end, undi_bgn); // trim ws from end of bd;
|
||||
}
|
||||
Object vnt_obj = vnt_trie.Match_bgn(src, lang_bgn, itm_end);
|
||||
if (vnt_obj == null) {
|
||||
if (vnt_obj == null)
|
||||
return (itm_bgn == 0) ? Bry_split_.Rv__cancel : Bry_split_.Rv__extend; // if 1st item; cancel rest; otherwise, extend
|
||||
}
|
||||
int lang_end = vnt_trie.Match_pos();
|
||||
int text_bgn = Bry_find_.Find_fwd_while_ws(src, lang_end, itm_end); if (src[text_bgn] != Byte_ascii.Colon) return Bry_split_.Rv__extend;
|
||||
++text_bgn;
|
||||
Xol_vnt_itm vnt_itm = (Xol_vnt_itm)vnt_obj;
|
||||
byte[] vnt_key = vnt_itm.Key();
|
||||
byte[] text_bry = Bry_.Mid_w_trim(src, text_bgn, itm_end);
|
||||
if (undi_end == Bry_find_.Not_found)
|
||||
if (undi_end == Bry_find_.Not_found) {
|
||||
if (log_mgr != null) log_mgr.Log_lang(vnt_itm, Vnt_log_mgr.Scope__bidi);
|
||||
bidis.Set(vnt_key, text_bry);
|
||||
}
|
||||
else {
|
||||
byte[] undi_bry = Bry_.Mid(src, undi_bgn, undi_end);
|
||||
if (itm_end - text_bgn > 0)
|
||||
if (itm_end - text_bgn > 0) {
|
||||
if (log_mgr != null) log_mgr.Log_lang(vnt_itm, Vnt_log_mgr.Scope__undi);
|
||||
undis.Set(vnt_key, undi_bry, text_bry);
|
||||
}
|
||||
}
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
|
||||
@@ -25,13 +25,13 @@ class Vnt_rule_parser_fxt {
|
||||
vnt_regy.Add(Bry_.new_a7("x1"), Bry_.new_a7("lang1"));
|
||||
vnt_regy.Add(Bry_.new_a7("x2"), Bry_.new_a7("lang2"));
|
||||
vnt_regy.Add(Bry_.new_a7("x3"), Bry_.new_a7("lang3"));
|
||||
parser.Init(vnt_regy);
|
||||
parser.Init(null, vnt_regy);
|
||||
}
|
||||
public void Test_parse(String raw, String... expd_ary) {
|
||||
byte[] src = Bry_.new_u8(raw);
|
||||
parser.Clear(undis, bidis, src);
|
||||
parser.Parse(src, 0, src.length);
|
||||
parser.To_bry__dbg(bfr);
|
||||
Tfds.Eq_str_lines(String_.Concat_lines_nl_skip_last(expd_ary), bfr.Xto_str_and_clear());
|
||||
Tfds.Eq_str_lines(String_.Concat_lines_nl_skip_last(expd_ary), bfr.To_str_and_clear());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
class Vnt_rule_undi_mgr {
|
||||
private final Ordered_hash hash = Ordered_hash_.new_bry_();
|
||||
private final Ordered_hash hash = Ordered_hash_.New_bry();
|
||||
public int Len() {return hash.Count();}
|
||||
public boolean Has_none() {return hash.Count() == 0;}
|
||||
public void Clear() {hash.Clear();}
|
||||
@@ -51,7 +51,7 @@ class Vnt_rule_undi_mgr {
|
||||
}
|
||||
}
|
||||
class Vnt_rule_undi_grp {
|
||||
private final Ordered_hash hash = Ordered_hash_.new_bry_();
|
||||
private final Ordered_hash hash = Ordered_hash_.New_bry();
|
||||
public Vnt_rule_undi_grp(byte[] vnt) {this.vnt = vnt;}
|
||||
public int Len() {return hash.Count();}
|
||||
public Vnt_rule_undi_itm Get_at(int i) {return (Vnt_rule_undi_itm)hash.Get_at(i);}
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_vnt_eqgt_tkn extends Xop_tkn_itm_base { // "=>"
|
||||
public Xop_vnt_eqgt_tkn(int bgn, int end) {this.Tkn_ini_pos(false, bgn, end);}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_vnt_eqgt;}
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xop_vnt_flag {
|
||||
public Xop_vnt_flag(int tid) {this.tid = tid; this.mask = 0;}
|
||||
public Xop_vnt_flag(int tid, int mask) {this.tid = tid; this.mask = mask;}
|
||||
public int Tid() {return tid;} private final int tid;
|
||||
public int Mask() {return mask;} private final int mask;
|
||||
public static Xop_vnt_flag new_lang(int mask) {return new Xop_vnt_flag(Xop_vnt_flag_.Tid_lang, mask);}
|
||||
}
|
||||
class Xop_vnt_flag_ { // REF.MW: /languages/LanguageConverter.php
|
||||
public static final Xop_vnt_flag[] Ary_empty = new Xop_vnt_flag[0];
|
||||
public static final int
|
||||
Tid_unknown = 0
|
||||
, Tid_show = 1 // EX: -{S|zh-hans:A;zh-hant:B}- -> "A"
|
||||
, Tid_all = 2 // EX: -{+|zh-hans:A;zh-hant:B}- -> "A"
|
||||
, Tid_err = 3 // EX: -{E|zh-hans:A;zh-hant:B}- -> "A"
|
||||
, Tid_add = 4 // add and output; EX: -{A|zh-hans:A;zh-hant:B}- -> "A"
|
||||
, Tid_title = 5 // page_title; EX: -{T|zh-hans:A;zh-hant:B}- -> ""
|
||||
, Tid_raw = 6 // raw: no convert; EX: -{R|zh-hans:A;zh-hant:B}- -> "zh-hans:A;zh-hant:B"
|
||||
, Tid_descrip = 7 // describe; EX: -{D|zh-hans:A;zh-hant:B}- -> "简体:A;繁體:B;" (简体=Simplified;繁體=Traditional)
|
||||
, Tid_del = 8 // remove; EX: -{-|zh-hans:A;zh-hant:B}- -> ""
|
||||
, Tid_macro = 9 // macro; EX: -{H|zh-hans:A;zh-hant:B}- -> ""
|
||||
, Tid_name = 10 // EX: -{N|zh-hans:A;zh-hant:B}- -> ""
|
||||
, Tid_lang = 11 // EX: -{zh-hant|B}- -> "B"
|
||||
, Tid_len = 12
|
||||
;
|
||||
public static final byte Tid__max = 12;
|
||||
private static final String[] Tid__names = new String[]
|
||||
{ "unknown", "show", "all", "err", "add", "title"
|
||||
, "raw", "descrip", "del", "macro", "name", "lang"
|
||||
};
|
||||
public static String To_name(int tid) {return Tid__names[tid];}
|
||||
public static final Xop_vnt_flag
|
||||
Flag_unknown = new Xop_vnt_flag(Tid_unknown)
|
||||
, Flag_show = new Xop_vnt_flag(Tid_show)
|
||||
, Flag_all = new Xop_vnt_flag(Tid_all)
|
||||
, Flag_err = new Xop_vnt_flag(Tid_err)
|
||||
, Flag_add = new Xop_vnt_flag(Tid_add)
|
||||
, Flag_title = new Xop_vnt_flag(Tid_title)
|
||||
, Flag_raw = new Xop_vnt_flag(Tid_raw)
|
||||
, Flag_descrip = new Xop_vnt_flag(Tid_descrip)
|
||||
, Flag_del = new Xop_vnt_flag(Tid_del)
|
||||
, Flag_macro = new Xop_vnt_flag(Tid_macro)
|
||||
, Flag_name = new Xop_vnt_flag(Tid_name)
|
||||
;
|
||||
public static final Btrie_fast_mgr Trie = Btrie_fast_mgr.ci_a7() // NOTE: match either lc or uc; EX: -{D}- or -{d}-; // NOTE:ci.ascii:MW_const.en; flag keys; EX: -{S|a}-
|
||||
.Add(Byte_ascii.Ltr_S , Xop_vnt_flag_.Flag_show)
|
||||
.Add(Byte_ascii.Plus , Xop_vnt_flag_.Flag_all)
|
||||
.Add(Byte_ascii.Ltr_E , Xop_vnt_flag_.Flag_err)
|
||||
.Add(Byte_ascii.Ltr_A , Xop_vnt_flag_.Flag_add)
|
||||
.Add(Byte_ascii.Ltr_T , Xop_vnt_flag_.Flag_title)
|
||||
.Add(Byte_ascii.Ltr_R , Xop_vnt_flag_.Flag_raw)
|
||||
.Add(Byte_ascii.Ltr_D , Xop_vnt_flag_.Flag_descrip)
|
||||
.Add(Byte_ascii.Dash , Xop_vnt_flag_.Flag_del)
|
||||
.Add(Byte_ascii.Ltr_H , Xop_vnt_flag_.Flag_macro)
|
||||
.Add(Byte_ascii.Ltr_N , Xop_vnt_flag_.Flag_name)
|
||||
;
|
||||
}
|
||||
@@ -1,92 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
class Xop_vnt_flag_parser {
|
||||
private final Xop_vnt_lang_bldr flag_lang_bldr;
|
||||
private final Xol_vnt_regy vnt_regy;
|
||||
public Xop_vnt_flag_parser(Xol_vnt_mgr vnt_mgr) {this.flag_lang_bldr = new Xop_vnt_lang_bldr(vnt_mgr); this.vnt_regy = vnt_mgr.Regy();}
|
||||
public int Rslt_tkn_pos() {return rslt_tkn_pos;} private int rslt_tkn_pos;
|
||||
public int Rslt_pipe_last() {return rslt_pipe_last;} private int rslt_pipe_last;
|
||||
public Xop_vnt_flag[] Rslt_flags() {return rslt_flags;} private Xop_vnt_flag[] rslt_flags;
|
||||
public void Parse(Xowe_wiki wiki, Xoa_url page_url, Xop_vnt_tkn vnt_tkn, int pipe_tkn_count, byte[] src) {
|
||||
flag_lang_bldr.Clear();
|
||||
int rv_idx = 0;
|
||||
int subs_len = vnt_tkn.Subs_len();
|
||||
this.rslt_flags = new Xop_vnt_flag[pipe_tkn_count];
|
||||
this.rslt_tkn_pos = 0;
|
||||
Bry_bfr flag_bfr = wiki.Utl__bfr_mkr().Get_b128();
|
||||
boolean loop = true;
|
||||
while (loop) {
|
||||
Xop_tkn_itm sub = vnt_tkn.Subs_get(rslt_tkn_pos);
|
||||
switch (sub.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab: case Xop_tkn_itm_.Tid_newLine: break; // skip ws
|
||||
case Xop_tkn_itm_.Tid_txt: flag_bfr.Add_mid(src, sub.Src_bgn(), sub.Src_end()); break; // just add text
|
||||
case Xop_tkn_itm_.Tid_pipe:
|
||||
rslt_flags[rv_idx++] = Parse_flag_bry(flag_bfr.Xto_bry_and_clear());
|
||||
if (rv_idx == pipe_tkn_count) {
|
||||
loop = false;
|
||||
rslt_pipe_last = sub.Src_end();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
wiki.Appe().Usr_dlg().Log_many("", "", "unknown tkn in vnt flag; url=~{0} tid=~{1} txt=~{2}", page_url.To_str(), sub.Tkn_tid(), String_.new_u8(src, sub.Src_bgn(), sub.Src_end()));
|
||||
flag_bfr.Add_mid(src, sub.Src_bgn(), sub.Src_end());
|
||||
break;
|
||||
}
|
||||
++rslt_tkn_pos;
|
||||
if (rslt_tkn_pos == subs_len) break;
|
||||
}
|
||||
flag_bfr.Mkr_rls();
|
||||
}
|
||||
private Xop_vnt_flag Parse_flag_bry(byte[] bry) {
|
||||
int bry_len = bry.length; if (bry_len == 0) return Xop_vnt_flag_.Flag_unknown; // EX: exit early if 0 len, else trie will fail; EX: "-{|}-"
|
||||
Object flag_obj = flag_trie.Match_exact(bry, 0, bry_len);
|
||||
return flag_obj == null
|
||||
? Parse_flag_vnts(bry, bry_len) // unknown tid sequence; either (a) "lang" cmd ("-{zh-hans;zh-hant|a}-") or (b) invalid cmd ("-{X|a}-")
|
||||
: (Xop_vnt_flag)flag_obj; // known flag; check that next non_ws is |
|
||||
}
|
||||
private Xop_vnt_flag Parse_flag_vnts(byte[] bry, int bry_len) {
|
||||
int vnt_pos = 0;
|
||||
boolean loop = true;
|
||||
Btrie_slim_mgr trie = vnt_regy.Trie();
|
||||
while (loop) {
|
||||
boolean last = false, valid = true;
|
||||
Object vnt_obj = trie.Match_bgn(bry, vnt_pos, bry_len);
|
||||
if (vnt_obj == null) break; // no more vnts found; stop
|
||||
vnt_pos = trie.Match_pos(); // update pos to end of vnt
|
||||
int semic_pos = Bry_find_.Find_fwd_while_not_ws(bry, vnt_pos, bry_len);
|
||||
if (semic_pos == bry_len) // note that Find_fwd_non_ws will return bry_len if no non-ws found;
|
||||
last = true;
|
||||
else { // char found; make sure it is semic
|
||||
if (bry[semic_pos] != Byte_ascii.Semic) // invalid vnt; ignore; EX: -{zh-hansx|}-
|
||||
valid = false;
|
||||
vnt_pos = semic_pos + 1; // update pos to after semic
|
||||
if (vnt_pos == bry_len) last = true; // EX: "a;"
|
||||
}
|
||||
if (valid)
|
||||
flag_lang_bldr.Add(((Xol_vnt_itm)vnt_obj).Key());
|
||||
else // invalid entry clears list; EX: -{zh-hans;zh-bad}-
|
||||
flag_lang_bldr.Clear();
|
||||
if (last) break;
|
||||
}
|
||||
return flag_lang_bldr.Bld();
|
||||
}
|
||||
private static final Btrie_fast_mgr flag_trie = Xop_vnt_flag_.Trie;
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.html.*; import gplx.xowa.langs.vnts.*;
|
||||
public class Xop_vnt_html_wtr {
|
||||
public static void Write(Bry_bfr bfr, Xoh_html_wtr html_wtr, Xop_ctx ctx, Xoh_wtr_ctx hctx, Xoae_page page, byte[] src, Xop_vnt_tkn vnt) {
|
||||
Xol_vnt_mgr vnt_mgr = ctx.Wiki().Lang().Vnt_mgr(); Xol_vnt_regy vnt_regy = vnt_mgr.Regy();
|
||||
byte[] cur_lang_vnt = vnt_mgr.Cur_key();
|
||||
Xop_vnt_rule_tkn[] rules = vnt.Vnt_rules(); if (rules == null) return; // shouldn't happen, but guard anyway
|
||||
int rules_len = rules.length;
|
||||
switch (vnt.Vnt_cmd()) {
|
||||
case Xop_vnt_html_wtr.Cmd_empty: break; // nothing: ""
|
||||
case Xop_vnt_html_wtr.Cmd_error: // original token; "-{A}-"
|
||||
bfr.Add_mid(src, vnt.Src_bgn(), vnt.Src_end());
|
||||
break;
|
||||
case Xop_vnt_html_wtr.Cmd_literal: { // val only; "A"
|
||||
Xop_vnt_rule_tkn rule_0 = rules[0]; // Cmd_calc guarantees there will always be 1 item
|
||||
html_wtr.Write_tkn_ary(bfr, ctx, hctx, src, rule_0.Rule_subs());
|
||||
break;
|
||||
}
|
||||
case Xop_vnt_html_wtr.Cmd_bidi: { // matching rule: "A" if zh-hans; -{zh-hans:A}-
|
||||
Xop_vnt_rule_tkn rule = Get_rule_by_key(vnt_mgr, vnt_regy, rules, rules_len, cur_lang_vnt);
|
||||
if (rule != null) html_wtr.Write_tkn_ary(bfr, ctx, hctx, src, rule.Rule_subs());
|
||||
break;
|
||||
}
|
||||
case Xop_vnt_html_wtr.Cmd_lang: { // matching lang: "A" if zh-hans; -{zh-hans|A}-
|
||||
Xop_vnt_rule_tkn rule_0 = rules[0]; // Cmd_calc guarantees there will always be 1 rule
|
||||
Xop_vnt_flag flag_0 = vnt.Vnt_flags()[0]; // parse guarantees there will always be 1 flag
|
||||
if (vnt_regy.Mask__match_any(flag_0.Mask(), vnt_mgr.Cur_itm().Mask__fallbacks()))
|
||||
html_wtr.Write_tkn_ary(bfr, ctx, hctx, src, rule_0.Rule_subs());
|
||||
break;
|
||||
}
|
||||
case Xop_vnt_html_wtr.Cmd_raw: { // raw; everything between last flag and }-: "-{R|zh-hans:A;zh-hant:B}- -> "zh-hans:A;zh-hant:B"
|
||||
bfr.Add_mid(src, vnt.Vnt_pipe_idx_last(), vnt.Src_end() - 2);
|
||||
break;
|
||||
}
|
||||
case Xop_vnt_html_wtr.Cmd_descrip: { // descrip; similar to raw, but use localized lang
|
||||
// bfr.Add_mid(src, vnt.Vnt_pipe_idx_last(), vnt.Src_end() - 2);
|
||||
break;
|
||||
}
|
||||
case Xop_vnt_html_wtr.Cmd_title: break; // title: ignore; already handled during parse; DATE:2014-08-29
|
||||
}
|
||||
}
|
||||
public static Xop_vnt_rule_tkn Get_rule_by_key(Xol_vnt_mgr vnt_mgr, Xol_vnt_regy vnt_regy, Xop_vnt_rule_tkn[] rules, int rules_len, byte[] cur_lang_vnt) {
|
||||
vnt_regy.Mask__sort(rules); // sort to put more specific in front; EX: -{zh-hans:A;zh-cn:B}- should be "B", not "A"
|
||||
for (int i = 0; i < rules_len; i++) {
|
||||
Xop_vnt_rule_tkn rule = rules[i];
|
||||
if (vnt_regy.Mask__match_any(vnt_regy.Mask__calc(rule.Rule_lang()), vnt_mgr.Cur_itm().Mask__fallbacks())) return rule;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
public static final byte
|
||||
Cmd_error = 0 // eror -> output literal; EX: "-{some_unknown_error}-" -> "-{some_unknown_error}-"
|
||||
, Cmd_empty = 1 // empty -> output nothing; EX: "-{}-" -> ""
|
||||
, Cmd_literal = 2 // literal EX: "-{A}-" -> "A"
|
||||
, Cmd_bidi = 3 // bidi EX: "-{zh-hans:A;zh-hant:B}-" -> "A" if zh-hans; "B" if zh-hant
|
||||
, Cmd_lang = 4 // lang EX: "-{zh-hans|A}-" -> "A" if zh-hans; "" if zh-hant
|
||||
, Cmd_raw = 5 // raw; text in -{}- EX: "-{R|zh-hans:A;zh-hant:B}- -> "zh-hans:A;zh-hant:B"
|
||||
, Cmd_descrip = 6 // describe; output rules EX: "-{D|zh-hans:A;zh-hant:B}- -> "简体:A;繁體:B;"
|
||||
, Cmd_title = 7 // title; change title EX: "-{T|zh-hans:A;zh-hant:B}- -> "A" as display title
|
||||
;
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
class Xop_vnt_lang_bldr { // performant way of building langs; EX: -{zh;zh-hans;zh-cn|B}-
|
||||
private final Xol_vnt_regy vnt_regy;
|
||||
private int rslt_mask;
|
||||
public Xop_vnt_lang_bldr(Xol_vnt_mgr vnt_mgr) {this.vnt_regy = vnt_mgr.Regy();}
|
||||
public void Clear() {rslt_mask = 0;}
|
||||
public void Add(byte[] key) {
|
||||
Xol_vnt_itm vnt = vnt_regy.Get_by(key); if (vnt == null) return; // ignore invalid vnts; EX: -{zh;zhx}-
|
||||
int vnt_mask = vnt.Mask__vnt();
|
||||
this.rslt_mask = (rslt_mask == 0) ? vnt_mask : Bitmask_.Flip_int(true, rslt_mask, vnt_mask);
|
||||
}
|
||||
public Xop_vnt_flag Bld() {
|
||||
return (rslt_mask == 0) ? Xop_vnt_flag_.Flag_unknown : Xop_vnt_flag.new_lang(rslt_mask);
|
||||
}
|
||||
}
|
||||
@@ -1,73 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
class Xop_vnt_lxr_bgn implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_vnt_bgn;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Xop_vnt_lxr_.Hook_bgn, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
ctx.Subs_add_and_stack(root, tkn_mkr.Vnt(bgn_pos, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public static final Xop_vnt_lxr_bgn _ = new Xop_vnt_lxr_bgn(); Xop_vnt_lxr_bgn() {}
|
||||
}
|
||||
class Xop_vnt_lxr_end implements Xop_lxr {
|
||||
private Xop_vnt_flag_parser flag_parser;
|
||||
private Xop_vnt_rules_parser rule_parser;
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_vnt_end;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {
|
||||
core_trie.Add(Xop_vnt_lxr_.Hook_end, this);
|
||||
Xol_vnt_mgr vnt_mgr = wiki.Lang().Vnt_mgr();
|
||||
flag_parser = new Xop_vnt_flag_parser(vnt_mgr);
|
||||
rule_parser = new Xop_vnt_rules_parser(vnt_mgr);
|
||||
}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
int stack_pos = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_vnt);
|
||||
if (stack_pos == Xop_ctx.Stack_not_found) return ctx.Lxr_make_txt_(cur_pos); // "}-" found but no "-{" in stack;
|
||||
Xop_vnt_tkn vnt_tkn = (Xop_vnt_tkn)ctx.Stack_pop_til(root, src, stack_pos, false, bgn_pos, cur_pos, Xop_tkn_itm_.Tid_vnt);
|
||||
Xowe_wiki wiki = ctx.Wiki();
|
||||
try {
|
||||
vnt_tkn.Src_end_(cur_pos);
|
||||
vnt_tkn.Subs_move(root);
|
||||
Xop_vnt_flag[] vnt_flag_ary = Xop_vnt_flag_.Ary_empty;
|
||||
int rule_subs_bgn = 0;
|
||||
int pipe_tkn_count = vnt_tkn.Vnt_pipe_tkn_count();
|
||||
if (pipe_tkn_count > 0) {
|
||||
flag_parser.Parse(wiki, ctx.Cur_page().Url(), vnt_tkn, pipe_tkn_count, src);
|
||||
vnt_flag_ary = flag_parser.Rslt_flags();
|
||||
rule_subs_bgn = flag_parser.Rslt_tkn_pos();
|
||||
vnt_tkn.Vnt_pipe_idx_last_(flag_parser.Rslt_pipe_last());
|
||||
}
|
||||
vnt_tkn.Vnt_flags_(vnt_flag_ary);
|
||||
Xop_vnt_rule_tkn[] rules = rule_parser.Parse(ctx, vnt_tkn, src, rule_subs_bgn);
|
||||
vnt_tkn.Vnt_rules_(rules);
|
||||
vnt_tkn.Vnt_cmd_calc(wiki, ctx.Cur_page(), ctx, src);
|
||||
}
|
||||
catch (Exception e) {
|
||||
ctx.App().Usr_dlg().Warn_many("", "", "vnt.parse failed: page=~{0} src=~{1} err=~{2}", ctx.Cur_page().Ttl().Raw(), String_.new_u8(src, bgn_pos, cur_pos), Err_.Message_gplx_full(e));
|
||||
if (vnt_tkn != null)
|
||||
root.Subs_add(tkn_mkr.Bry_mid(src, vnt_tkn.Src_bgn(), cur_pos));
|
||||
}
|
||||
return cur_pos;
|
||||
}
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
public class Xop_vnt_lxr_ {
|
||||
public static void Init(Xowe_wiki wiki) {
|
||||
Btrie_fast_mgr wiki_trie = wiki.Parser_mgr().Main().Wtxt_lxr_mgr().Trie();
|
||||
Object exists = wiki_trie.Match_bgn(Xop_vnt_lxr_.Hook_bgn, 0, Xop_vnt_lxr_.Hook_bgn.length);
|
||||
if (exists == null) {
|
||||
Xop_vnt_lxr_eqgt._.Init_by_wiki(wiki, wiki_trie);
|
||||
Xop_vnt_lxr_bgn._.Init_by_wiki(wiki, wiki_trie);
|
||||
new Xop_vnt_lxr_end().Init_by_wiki(wiki, wiki_trie);
|
||||
// Btrie_fast_mgr tmpl_trie = wiki.Parser_mgr().Main().Tmpl_trie(); // do not add to tmpl trie
|
||||
// Xop_vnt_lxr_bgn._.Init_by_wiki(wiki, tmpl_trie);
|
||||
}
|
||||
}
|
||||
public static final byte[] Hook_bgn = new byte[] {Byte_ascii.Dash, Byte_ascii.Curly_bgn}, Hook_end = new byte[] {Byte_ascii.Curly_end, Byte_ascii.Dash};
|
||||
}
|
||||
class Xop_vnt_lxr_eqgt implements Xop_lxr {
|
||||
public int Lxr_tid() {return Xop_lxr_.Tid_vnt_eqgt;}
|
||||
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Hook, this);}
|
||||
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
ctx.Subs_add_and_stack(root, tkn_mkr.Vnt_eqgt(bgn_pos, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
public void Term(Btrie_fast_mgr core_trie) {}
|
||||
public static final byte[] Hook = new byte[] {Byte_ascii.Eq, Byte_ascii.Gt};
|
||||
public static final Xop_vnt_lxr_eqgt _ = new Xop_vnt_lxr_eqgt(); Xop_vnt_lxr_eqgt() {}
|
||||
}
|
||||
@@ -1,79 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xop_vnt_parser__html__tst { // NOTE: cur_vnt is zh-cn
|
||||
private final Xop_vnt_parser_fxt fxt = new Xop_vnt_parser_fxt();
|
||||
@Before public void init() {fxt.Clear();}
|
||||
@Test public void Literal() {fxt.Test_parse("-{A}-", "A");}
|
||||
@Test public void Bidi_fwd() {fxt.Test_parse("-{zh-cn:B;zh-hans:A}-", "B");}
|
||||
@Test public void Bidi_bwd() {fxt.Test_parse("-{zh-hans:A;zh-cn:B}-", "B");}
|
||||
@Test public void Empty() {fxt.Test_parse("a-{}-b", "ab");}
|
||||
@Test public void Unknown_empty() {fxt.Test_parse("a-{|}-c", "ac");}
|
||||
@Test public void Unknown_text() {fxt.Test_parse("a-{|b}-c", "abc");}
|
||||
@Test public void Unknown_flag() {fxt.Test_parse("a-{x|b}-c", "abc");}
|
||||
@Test public void Lang_y() {fxt.Test_parse("-{zh-hant|A}-", "A");}
|
||||
@Test public void Lang_n() {fxt.Test_parse("-{zh-sg|A}-", "");}
|
||||
@Test public void Raw() {fxt.Test_parse("-{R|zh-hans:A;}-", "zh-hans:A;");}
|
||||
// @Test public void Descrip() {fxt.Test_parse("-{D|zh-hans:A;}-", "zh-hans:A");}
|
||||
@Test public void Tmpl() {
|
||||
fxt.Parser_fxt().Init_page_create("Template:A", "B");
|
||||
fxt.Test_parse("-{{{A}}}-", "B");
|
||||
}
|
||||
@Test public void Tmpl_arg_4() { // PURPOSE: handle "-{" + "{{{"
|
||||
fxt.Parser_fxt().Init_page_create("Template:A", "-{{{{1}}}}-");
|
||||
fxt.Test_parse("{{A|B}}", "B"); // -{ {{{1}}} }- -> -{B}- -> B
|
||||
}
|
||||
@Test public void Tmpl_arg_3() { // PURPOSE: handle "-" + "{{{"; PAGE:sr.w:ДНК; EX:<span id="interwiki-{{{1}}}-fa"></span> DATE:2014-07-03
|
||||
fxt.Parser_fxt().Init_page_create("Template:A", "-{{{1}}}-");
|
||||
fxt.Test_parse("{{A|B}}", "-B-");
|
||||
}
|
||||
@Test public void Parser_function() {
|
||||
fxt.Test_parse("-{{{#expr:1}}}-", "1");
|
||||
}
|
||||
@Test public void Ignore() {
|
||||
fxt.Test_parse("-{{#expr:1}}-", "-1-");
|
||||
}
|
||||
@Test public void Expr() {
|
||||
fxt.Parser_fxt().Init_page_create("Template:A", "{{#expr: 0-{{{1|2}}}}}");
|
||||
fxt.Test_parse("{{A}}", "-2");
|
||||
}
|
||||
@Test public void Invalid() { // PURPOSE: invalid flags should cause vnt to render text only; DATE:2014-04-10
|
||||
fxt.Test_parse("-{:a|b}-", "b");
|
||||
}
|
||||
@Test public void Macro_ignore() { // PURPOSE: ignore macro (implement later); EX:zh.v:西安; Template:pagebanner; DATE:2014-05-03
|
||||
fxt.Test_parse("-{H|zh-cn:亚琛; zh-tw:阿亨;}-", "");
|
||||
}
|
||||
@Test public void Title() { // PURPOSE: implement title; PAGE:zh.w:Help:進階字詞轉換處理 DATE:2014-08-29
|
||||
fxt.Test_parse("-{T|zh-hant:A;zh-hans:B}-", "");
|
||||
Tfds.Eq("A", String_.new_u8(fxt.Parser_fxt().Page().Html_data().Display_ttl_vnt()));
|
||||
}
|
||||
// @Test public void Disabled() {
|
||||
// Xop_fxt fxt = new Xop_fxt();
|
||||
// fxt.Wiki().Vnt_mgr().Set(null, null);
|
||||
// fxt.Test_parse_page_all_str("a-{b}-c", "a-{b}-c");
|
||||
// }
|
||||
// @Test public void Enabled() {
|
||||
// Xoae_app app = Xoa_app_fxt.app_();
|
||||
// Xol_lang lang = new Xol_lang(app, Bry_.new_a7("zh"));
|
||||
// Xowe_wiki wiki = Xoa_app_fxt.wiki_(app, "zh.wikipedia.org", lang);
|
||||
// Xop_fxt fxt = new Xop_fxt(app, wiki);
|
||||
// fxt.Test_parse_page_all_str("a-{b}-c", "ac");
|
||||
// fxt.Wiki().Vnt_mgr().Set(null, null); // set it back to null for other tests
|
||||
// }
|
||||
}
|
||||
@@ -1,153 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.parsers.miscs.*;
|
||||
public class Xop_vnt_parser__tkn__basic__tst {
|
||||
private final Xop_vnt_lxr_fxt fxt = new Xop_vnt_lxr_fxt();
|
||||
@Before public void init() {fxt.Clear();}
|
||||
@Test public void Add__flag__basic() {fxt.Test_parse("-{A|b}-" , fxt.vnt_().Flags_codes_("A").Rule_("b"));}
|
||||
@Test public void Add__flag__ws() {fxt.Test_parse("-{ A |b}-" , fxt.vnt_().Flags_codes_("A").Rule_("b"));}
|
||||
@Test public void Add__rule__semic_n() {fxt.Test_parse("-{A|zh-hans:bcd}-" , fxt.vnt_().Flags_codes_("A").Rule_("zh-hans", "bcd"));}
|
||||
@Test public void Add__rule__semic_y() {fxt.Test_parse("-{A|zh-hans:bcd;}-" , fxt.vnt_().Flags_codes_("A").Rule_("zh-hans", "bcd"));}
|
||||
@Test public void Add__rule__semic_empty() {fxt.Test_parse("-{A|zh-hans:bcd;;}-" , fxt.vnt_().Flags_codes_("A").Rule_("zh-hans", "bcd"));}
|
||||
@Test public void Add__rule__ws() {fxt.Test_parse("-{A|zh-hans : b c ;}-" , fxt.vnt_().Flags_codes_("A").Rule_("zh-hans", "b c"));}
|
||||
@Test public void Add__rule__many() {fxt.Test_parse("-{A|zh-hans:b;zh-hant:c}-" , fxt.vnt_().Flags_codes_("A").Rule_("zh-hans", "b").Rule_("zh-hant", "c"));}
|
||||
@Test public void Langs__flag__semic_n() {fxt.Test_parse("-{zh-hans;zh-hant|b}-" , fxt.vnt_().Flags_langs_(Xop_vnt_tkn_mok.Mask__hans, Xop_vnt_tkn_mok.Mask__hant).Rule_("b"));}
|
||||
@Test public void Langs__flag__semic_y() {fxt.Test_parse("-{zh-hans;zh-hant;|b}-" , fxt.vnt_().Flags_langs_(Xop_vnt_tkn_mok.Mask__hans, Xop_vnt_tkn_mok.Mask__hant).Rule_("b"));}
|
||||
@Test public void Langs__flag__ws() {fxt.Test_parse("-{ zh-hans ; zh-hant ; |b}-" , fxt.vnt_().Flags_langs_(Xop_vnt_tkn_mok.Mask__hans, Xop_vnt_tkn_mok.Mask__hant).Rule_("b"));}
|
||||
@Test public void Langs__unknown__1st() {fxt.Test_parse("-{ zh-hans x ; zh-hant ; |b}-" , fxt.vnt_().Flags_unknown_().Rule_("b"));}
|
||||
@Test public void Langs__unknown__nth() {fxt.Test_parse("-{ zh-hans ; zh-hant x; |b}-" , fxt.vnt_().Flags_unknown_().Rule_("b"));}
|
||||
@Test public void Langs__unknown__all() {fxt.Test_parse("-{ zh-hans x ; zh-hant x;|b}-" , fxt.vnt_().Flags_unknown_().Rule_("b"));}
|
||||
@Test public void Multiple() {fxt.Test_parse("-{A|D|E|b}-" , fxt.vnt_().Flags_codes_("A", "D", "E").Rule_("b"));}
|
||||
@Test public void Unknown__flag_only() {fxt.Test_parse("-{a}-" , fxt.vnt_().Flags_none_().Rule_("a"));}
|
||||
@Test public void Unknown__flag_w_text() {fxt.Test_parse("-{ A x |b}-" , fxt.vnt_().Flags_unknown_().Rule_("b"));}
|
||||
@Test public void Bidi__basic() {fxt.Test_parse("-{zh-hans:a;zh-hant:b}-" , fxt.vnt_().Flags_none_().Rule_("zh-hans", "a").Rule_("zh-hant", "b"));}
|
||||
@Test public void Bidi__invalid__1st() {fxt.Test_parse("-{zh-x:x;zh-hans:a;zh-hant:b}-" , fxt.vnt_().Flags_none_().Rule_("zh-x:x;zh-hans:a;zh-hant:b"));}
|
||||
@Test public void Bidi__invalid__nth() {fxt.Test_parse("-{zh-hans:a;zh-x:x;zh-hant:b}-" , fxt.vnt_().Flags_none_().Rule_("zh-hans", "a;zh-x:x").Rule_("zh-hant", "b"));}
|
||||
// @Test public void Bidi__html() {
|
||||
// fxt.Test_parse("-{zh-cn:<span class='border:1px;text-align:center;'>text1</span>;zh-tw:<span class='border:1px;text-align:center;'>tex21</span>;}-"
|
||||
// , fxt.vnt_().Flags_none_().Rule_("zh-hans", "a;zh-x:x").Rule_("zh-hant", "b"));
|
||||
// }
|
||||
}
|
||||
class Xop_vnt_tkn_mok {
|
||||
private final List_adp rules_list = List_adp_.new_(), flags_list = List_adp_.new_();
|
||||
private Xop_vnt_flag[] flags;
|
||||
public Xop_vnt_flag[] Flags() {
|
||||
if (flags == null) flags = (Xop_vnt_flag[])flags_list.To_ary(Xop_vnt_flag.class);
|
||||
return flags;
|
||||
}
|
||||
public Xop_vnt_tkn_mok Flags_none_() {flags_list.Clear(); return this;}
|
||||
public Xop_vnt_tkn_mok Flags_unknown_(String... v) {flags_list.Add(Xop_vnt_flag_.Flag_unknown); return this;}
|
||||
public Xop_vnt_tkn_mok Flags_langs_(int... ary) {flags_list.Add(Xop_vnt_flag.new_lang(Bitmask_.Add_int_ary(ary))); return this;}
|
||||
public Xop_vnt_tkn_mok Flags_codes_(String... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte[] bry = Bry_.new_a7(ary[i]);
|
||||
Xop_vnt_flag flag = (Xop_vnt_flag)Xop_vnt_flag_.Trie.Match_bgn(bry, 0, bry.length);
|
||||
flags_list.Add(flag);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public Xop_vnt_rule_tkn[] Rules() {
|
||||
if (rules == null) rules = (Xop_vnt_rule_tkn[])rules_list.To_ary(Xop_vnt_rule_tkn.class);
|
||||
return rules;
|
||||
} private Xop_vnt_rule_tkn[] rules;
|
||||
public Xop_vnt_tkn_mok Rule_(String rule) {return Rule_(Xop_vnt_rule_tkn.Null_lang, rule);}
|
||||
public Xop_vnt_tkn_mok Rule_(byte[] lang, String rule) {return Rule_(Xop_vnt_rule_tkn.Null_macro, lang, new Xop_bry_tkn(-1, -1, Bry_.new_u8(rule)));}
|
||||
public Xop_vnt_tkn_mok Rule_(String lang, String rule) {return Rule_(Xop_vnt_rule_tkn.Null_macro, Bry_.new_a7(lang), new Xop_bry_tkn(-1, -1, Bry_.new_u8(rule)));}
|
||||
public Xop_vnt_tkn_mok Rule_(String macro, String lang, String rule) {return Rule_(Bry_.new_a7(macro), Bry_.new_a7(lang), new Xop_bry_tkn(-1, -1, Bry_.new_u8(rule)));}
|
||||
public Xop_vnt_tkn_mok Rule_(byte[] macro, byte[] lang, Xop_tkn_itm... tkns) {rules_list.Add(new Xop_vnt_rule_tkn(macro, lang, tkns)); return this;}
|
||||
public static final int Mask__hans = 2, Mask__hant = 4;
|
||||
}
|
||||
class Xop_vnt_lxr_fxt {
|
||||
private Xop_fxt fxt;
|
||||
private Xol_vnt_regy vnt_regy;
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr.new_();
|
||||
public Xop_vnt_lxr_fxt Clear() {
|
||||
Xoae_app app = Xoa_app_fxt.app_();
|
||||
Xowe_wiki wiki = Xoa_app_fxt.wiki_(app, "zh.wikipedia.org");
|
||||
fxt = new Xop_fxt(app, wiki);
|
||||
Xop_vnt_lxr_fxt.Init_vnt_mgr(wiki.Lang().Vnt_mgr(), "zh-hans", "zh-hant", "zh-cn");
|
||||
Xop_vnt_lxr_.Init(wiki);
|
||||
this.vnt_regy = wiki.Lang().Vnt_mgr().Regy();
|
||||
return this;
|
||||
}
|
||||
public Xop_vnt_tkn_mok vnt_() {return new Xop_vnt_tkn_mok();}
|
||||
public static void Init_vnt_mgr(Xol_vnt_mgr vnt_mgr, String... vnts_str) {
|
||||
byte[][] vnts_bry = Bry_.Ary(vnts_str);
|
||||
int vnts_bry_len = vnts_bry.length;
|
||||
for (int i = 0; i < vnts_bry_len; i++)
|
||||
vnt_mgr.Regy__get_or_new(vnts_bry[i]);
|
||||
vnt_mgr.Convert_mgr().Init(vnt_mgr.Regy());
|
||||
}
|
||||
public Xop_vnt_lxr_fxt Test_parse(String raw, Xop_vnt_tkn_mok expd) {
|
||||
byte[] raw_bry = Bry_.new_u8(raw);
|
||||
Xop_root_tkn root = fxt.Exec_parse_page_all_as_root(raw_bry);
|
||||
Xop_vnt_tkn actl = (Xop_vnt_tkn)root.Subs_get(0);
|
||||
Test_vnt_tkn(raw_bry, expd, actl);
|
||||
return this;
|
||||
}
|
||||
private void Test_vnt_tkn(byte[] raw_bry, Xop_vnt_tkn_mok expd, Xop_vnt_tkn actl) {
|
||||
Tfds.Eq(Vnt_flag_ary_to_str(tmp_bfr, expd.Flags()), Vnt_flag_ary_to_str(tmp_bfr, actl.Vnt_flags()), "flags");
|
||||
Tfds.Eq_str_lines(Vnt_rule_ary_to_str(tmp_bfr, raw_bry, expd.Rules()), Vnt_rule_ary_to_str(tmp_bfr, raw_bry, actl.Vnt_rules()), "rules");
|
||||
}
|
||||
private String Vnt_flag_ary_to_str(Bry_bfr bfr, Xop_vnt_flag[] ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xop_vnt_flag itm = ary[i];
|
||||
int itm_tid = itm.Tid();
|
||||
if (itm_tid == Xop_vnt_flag_.Tid_lang)
|
||||
Vnt_flag_lang_to_bfr(bfr, itm);
|
||||
else
|
||||
bfr.Add_str(Xop_vnt_flag_.To_name(itm_tid)).Add_byte(Byte_ascii.Semic);
|
||||
}
|
||||
return bfr.Xto_str_and_clear();
|
||||
}
|
||||
private void Vnt_flag_lang_to_bfr(Bry_bfr bfr, Xop_vnt_flag itm) {
|
||||
int itm_mask = itm.Mask();
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
int mask = gplx.core.brys.Bit_.Get_flag(i);
|
||||
if (Bitmask_.Has_int(mask, itm_mask)) {
|
||||
Xol_vnt_itm vnt = vnt_regy.Get_at(i);
|
||||
bfr.Add(vnt.Key()).Add_byte(Byte_ascii.Semic);
|
||||
}
|
||||
}
|
||||
}
|
||||
private String Vnt_rule_ary_to_str(Bry_bfr bfr, byte[] src, Xop_vnt_rule_tkn[] ary) {
|
||||
if (ary == null) return "";
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xop_vnt_rule_tkn itm = ary[i];
|
||||
if (itm.Rule_macro() != Xop_vnt_rule_tkn.Null_macro) // macro exists
|
||||
bfr.Add(itm.Rule_macro()).Add_str("=>");
|
||||
if (itm.Rule_lang() != Xop_vnt_rule_tkn.Null_lang) // lang exists
|
||||
bfr.Add(itm.Rule_lang()).Add_byte(Byte_ascii.Colon);
|
||||
Xop_tkn_itm[] subs = itm.Rule_subs();
|
||||
int subs_len = subs.length;
|
||||
for (int j = 0; j < subs_len; j++) {
|
||||
Xop_tkn_itm sub = subs[j];
|
||||
if (sub.Tkn_tid() == Xop_tkn_itm_.Tid_bry) // tests uses Xop_tkn_bry
|
||||
bfr.Add(((Xop_bry_tkn)sub).Val());
|
||||
else
|
||||
bfr.Add_mid(src, sub.Src_bgn(), sub.Src_end());
|
||||
}
|
||||
bfr.Add_byte(Byte_ascii.Semic).Add_byte_nl();
|
||||
}
|
||||
return bfr.Xto_str_and_clear();
|
||||
}
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.parsers.miscs.*;
|
||||
public class Xop_vnt_parser__tkn__macro__tst {
|
||||
private final Xop_vnt_lxr_fxt fxt = new Xop_vnt_lxr_fxt();
|
||||
@Before public void init() {fxt.Clear();}
|
||||
@Test public void Basic() {
|
||||
fxt.Test_parse("-{H|A1=>zh-hans:B1}-", fxt.vnt_().Flags_codes_("H")
|
||||
.Rule_("A1", "zh-hans", "B1"));
|
||||
}
|
||||
@Test public void Many() {
|
||||
fxt.Test_parse("-{H|A1=>zh-hans:B1;A2=>zh-hant:B2;A3=>zh-cn:B3;}-", fxt.vnt_().Flags_codes_("H")
|
||||
.Rule_("A1", "zh-hans", "B1")
|
||||
.Rule_("A2", "zh-hant", "B2")
|
||||
.Rule_("A3", "zh-cn" , "B3")
|
||||
);
|
||||
}
|
||||
@Test public void Mixed() {
|
||||
fxt.Test_parse("-{H|A1=>zh-hans:B1;zh-hant:B2;A3=>zh-cn:B3}-"
|
||||
, fxt.vnt_().Flags_codes_("H")
|
||||
.Rule_("A1" , "zh-hans" , "B1")
|
||||
.Rule_( "zh-hant" , "B2")
|
||||
.Rule_("A3", "zh-cn" , "B3")
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.vnts.*;
|
||||
public class Xop_vnt_parser_fxt {
|
||||
public Xop_fxt Parser_fxt() {return fxt;} private Xop_fxt fxt;
|
||||
public Xop_vnt_parser_fxt Clear() {
|
||||
Xoae_app app = Xoa_app_fxt.app_();
|
||||
Xowe_wiki wiki = Xoa_app_fxt.wiki_(app, "zh.wikipedia.org");
|
||||
fxt = new Xop_fxt(app, wiki);
|
||||
Xol_lang lang = wiki.Lang();
|
||||
lang.Fallback_bry_(Bry_.new_a7("zh-cn,zh-hans,zh-hant"));
|
||||
Xop_vnt_parser_fxt.Vnt_mgr__init(lang.Vnt_mgr(), 3, Vnts_chinese);
|
||||
Xop_vnt_lxr_.Init(wiki);
|
||||
return this;
|
||||
}
|
||||
public Xop_vnt_parser_fxt Test_parse(String raw, String expd) {
|
||||
fxt.Test_parse_page_all_str(raw, expd);
|
||||
return this;
|
||||
}
|
||||
public static void Vnt_mgr__init(Xol_vnt_mgr vnt_mgr, int cur_idx, String[] ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xol_vnt_itm itm = vnt_mgr.Regy__get_or_new(Bry_.new_a7(ary[i]));
|
||||
vnt_mgr.Lang().Lang_mgr().Get_by_key_or_load(itm.Key()).Fallback_bry_(Bry_.new_a7("zh-hans,zh-hant"));
|
||||
}
|
||||
vnt_mgr.Init_end();
|
||||
vnt_mgr.Cur_vnt_(Bry_.new_a7(ary[cur_idx]));
|
||||
}
|
||||
public static final String[] Vnts_chinese = String_.Ary("zh", "zh-hans", "zh-hant", "zh-cn", "zh-hk", "zh-mo", "zh-sg", "zh-tw");
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
public class Xop_vnt_rule_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_vnt_rule_tkn(byte[] rule_macro, byte[] rule_lang, Xop_tkn_itm[] rule_subs) {
|
||||
this.rule_macro = rule_macro; this.rule_lang = rule_lang; this.rule_subs = rule_subs;
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_vnt_rule;}
|
||||
public byte[] Rule_macro() {return rule_macro;} private final byte[] rule_macro;
|
||||
public byte[] Rule_lang() {return rule_lang;} private final byte[] rule_lang;
|
||||
public Xop_tkn_itm[] Rule_subs() {return rule_subs;} private final Xop_tkn_itm[] rule_subs;
|
||||
public static final byte[] Null_lang = null, Null_macro = null;
|
||||
}
|
||||
@@ -1,172 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
import gplx.xowa.parsers.miscs.*;
|
||||
class Xop_vnt_rules_parser {
|
||||
private final Btrie_slim_mgr trie;
|
||||
private final List_adp rules_list = List_adp_.new_(), text_tkns = List_adp_.new_();
|
||||
private int tmp_area; private byte[] tmp_macro, tmp_lang; private int tmp_key_bgn = -1;
|
||||
private int text_tkns__last_text_tkn; private boolean check_for_ws_at_bgn = false; private int semic_pos;
|
||||
private Xop_tkn_mkr tkn_mkr;
|
||||
public Xop_vnt_rules_parser(Xol_vnt_mgr vnt_mgr) {this.trie = Xop_vnt_rules_parser_.new_trie(vnt_mgr.Regy());}
|
||||
public void Clear() {
|
||||
rules_list.Clear(); text_tkns.Clear();
|
||||
this.tmp_area = Area_key;
|
||||
this.tmp_macro = tmp_lang = null;
|
||||
this.tmp_key_bgn = -1;
|
||||
this.text_tkns__last_text_tkn = 0;
|
||||
this.check_for_ws_at_bgn = false;
|
||||
this.semic_pos = -1;
|
||||
}
|
||||
public Xop_vnt_rule_tkn[] Parse(Xop_ctx ctx, Xop_vnt_tkn vnt_tkn, byte[] src, int subs_bgn) {// parse for macro;lang;text; EX: -{macro1=>lang1:text1;macro2=>lang2:text2;}
|
||||
synchronized (rules_list) {
|
||||
this.Clear();
|
||||
this.tkn_mkr = ctx.Tkn_mkr();
|
||||
int subs_len = vnt_tkn.Subs_len(); int subs_idx = subs_bgn;
|
||||
boolean valid = true;
|
||||
while (subs_idx < subs_len) {
|
||||
if (!Parse_tkn(vnt_tkn, src, subs_idx)) {
|
||||
valid = false;
|
||||
break;
|
||||
}
|
||||
++subs_idx;
|
||||
}
|
||||
if (valid)
|
||||
Make_rule(); // make rules for any pending items; EX: "-{A|text}-"; "text" is unclosed by semic and would need to be processed
|
||||
if (rules_list.Count() == 0) { // no rules made; assume text is just literal; EX: "-{Abc}-"
|
||||
Xop_tkn_itm[] rule_subs = Xop_vnt_rules_parser_.Get_subs_as_ary(vnt_tkn, subs_bgn, subs_len);
|
||||
rules_list.Add(new Xop_vnt_rule_tkn(Xop_vnt_rule_tkn.Null_macro, Xop_vnt_rule_tkn.Null_lang, rule_subs));
|
||||
}
|
||||
return (Xop_vnt_rule_tkn[])rules_list.To_ary_and_clear(Xop_vnt_rule_tkn.class);
|
||||
}
|
||||
}
|
||||
private boolean Parse_tkn(Xop_vnt_tkn vnt_tkn, byte[] src, int subs_idx) {
|
||||
Xop_tkn_itm sub = vnt_tkn.Subs_get(subs_idx);
|
||||
boolean add_to_text_tkns = true, sub_is_middle_ws = false;
|
||||
switch (sub.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_colon:
|
||||
switch (tmp_area) {
|
||||
case Area_lang_found: // colon should only follow lang; EX: -{zh-hant:text}-
|
||||
if (tmp_lang == null) { // if tmp_lang already exists, ignore colon; treat as part of text; EX: -{zh-hant:text1:text2}-
|
||||
tmp_lang = Bry_.Mid_w_trim(src, tmp_key_bgn, sub.Src_bgn());
|
||||
tmp_area = Area_text;
|
||||
add_to_text_tkns = false; // do not add ":"
|
||||
check_for_ws_at_bgn = true;
|
||||
}
|
||||
break;
|
||||
default: // colon found after unknown lang; EX: "-{zh-hant:a;zh-x:b}-"
|
||||
if (rules_list.Count() == 0) return false; // invalid lang at start renders entire rule invalid;
|
||||
Xop_vnt_rule_tkn rule = (Xop_vnt_rule_tkn)List_adp_.Pop(rules_list); // invalid rule in middle just adds text to previous rule
|
||||
Xop_tkn_itm[] text_tkns_ary = rule.Rule_subs();
|
||||
for (Xop_tkn_itm itm : text_tkns_ary)
|
||||
text_tkns.Add(itm);
|
||||
tmp_macro = rule.Rule_macro();
|
||||
tmp_lang = rule.Rule_lang();
|
||||
text_tkns.Add(tkn_mkr.Bry_raw(semic_pos, sub.Src_end(), Bry_.Mid_w_trim(src, semic_pos, sub.Src_end())));
|
||||
tmp_area = Area_text;
|
||||
add_to_text_tkns = false;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_space: case Xop_tkn_itm_.Tid_tab: case Xop_tkn_itm_.Tid_newLine: // skip ws
|
||||
if (check_for_ws_at_bgn) add_to_text_tkns = false; // prevents ws at bgn from being added to text_tkns; EX: " a"
|
||||
else sub_is_middle_ws = true;
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_vnt_eqgt:
|
||||
if (tmp_area == Area_key) {
|
||||
tmp_macro = Bry_.Mid_w_trim(src, tmp_key_bgn, sub.Src_bgn());
|
||||
add_to_text_tkns = false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_txt:
|
||||
add_to_text_tkns = Parse_bry(sub, src, sub.Src_bgn(), sub.Src_end());
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_bry:
|
||||
byte[] bry = ((Xop_bry_tkn)sub).Val();
|
||||
add_to_text_tkns = Parse_bry(sub, bry, 0, bry.length);
|
||||
break;
|
||||
}
|
||||
if (sub_is_middle_ws)
|
||||
text_tkns__last_text_tkn = text_tkns.Count(); // ws found in middle or end; update text_tkns__last_text_tkn; EX: "a "
|
||||
else
|
||||
text_tkns__last_text_tkn = -1; // not a middle_ws; set to -1; EX: "a b"
|
||||
if (tmp_area == Area_text && add_to_text_tkns) { // tmp_area is text && not ":", "=>", or leading "\s"
|
||||
text_tkns.Add(sub);
|
||||
check_for_ws_at_bgn = false; // stop checking for ws at bgn
|
||||
}
|
||||
return true;
|
||||
}
|
||||
private boolean Parse_bry(Xop_tkn_itm sub, byte[] src, int src_bgn, int src_end) {// NOTE: parse byte-per-byte b/c ";" is not a tkn; note that class is designed around this behavior, particulary tmp_key_bgn
|
||||
int cur_pos = src_bgn;
|
||||
while (cur_pos < src_end) {
|
||||
if (tmp_key_bgn == -1) tmp_key_bgn = cur_pos;
|
||||
byte b = src[cur_pos];
|
||||
Object itm_obj = trie.Match_bgn_w_byte(b, src, cur_pos, src_end);
|
||||
if (itm_obj == null) // not a lang or semic; ignore
|
||||
++cur_pos;
|
||||
else {
|
||||
Xop_vnt_rule_trie_itm itm = (Xop_vnt_rule_trie_itm)itm_obj;
|
||||
int new_pos = trie.Match_pos();
|
||||
switch (itm.Tid()) {
|
||||
case Xop_vnt_rule_trie_itm.Tid_lang: // lang; EX: "zh-hant"; only process inside Area_key;
|
||||
if (tmp_area == Area_key) {
|
||||
int nxt_pos = Bry_find_.Find_fwd_while_space_or_tab(src, new_pos, src_end); // skip any ws at end
|
||||
if (nxt_pos == src_end) { // eos for tkn; valid; EX: "zh-hant", "zh-hant "
|
||||
tmp_key_bgn = cur_pos;
|
||||
tmp_area = Area_lang_found;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Xop_vnt_rule_trie_itm.Tid_semic: // delimiter for multiple langs; EX: -{zh-hans:A;zh-cn:B}-
|
||||
semic_pos = cur_pos;
|
||||
switch (tmp_area) {
|
||||
case Area_text: // in Area_text; make rule;
|
||||
text_tkns.Add(tkn_mkr.Bry_raw(src_bgn, cur_pos, Bry_.Mid_w_trim(src, src_bgn, cur_pos))); // add everything up to ";"; EX: "text1;zh-hant"; add "text1"
|
||||
Make_rule();
|
||||
break;
|
||||
case Area_key: // ignore; empty semic's; EX: "zh-hant:a;;"
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
cur_pos = new_pos;
|
||||
}
|
||||
}
|
||||
return tmp_area == Area_text;
|
||||
}
|
||||
private void Make_rule() {// called after ";" or before "}-"
|
||||
int text_tkns_len = text_tkns.Count();
|
||||
if ( tmp_area != Area_text // stil in key area; EX: -{text;}-
|
||||
|| text_tkns_len == 0 // no text tkns; EX: -{zh-hans:;}
|
||||
) return;
|
||||
if (text_tkns__last_text_tkn != -1) text_tkns.Del_range(text_tkns__last_text_tkn, text_tkns.Count() - 1); // ignore trailing ws tkns
|
||||
Xop_tkn_itm[] rule_subs = (Xop_tkn_itm[])text_tkns.To_ary_and_clear(Xop_tkn_itm.class);
|
||||
Xop_vnt_rule_tkn rule = new Xop_vnt_rule_tkn(tmp_macro, tmp_lang, rule_subs);
|
||||
rules_list.Add(rule);
|
||||
tmp_macro = null;
|
||||
tmp_lang = null;
|
||||
tmp_area = Area_key;
|
||||
tmp_key_bgn = -1;
|
||||
}
|
||||
private static final int Area_key = 1, Area_lang_found = 2, Area_text = 3;
|
||||
}
|
||||
@@ -1,60 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.langs.vnts.*;
|
||||
class Xop_vnt_rules_parser_ {
|
||||
public static Btrie_slim_mgr new_trie(Xol_vnt_regy regy) {
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7(); // NOTE:ci.ascii:MW_const.en; lang variant name; EX:zh-hans
|
||||
int len = regy.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xol_vnt_itm itm = regy.Get_at(i);
|
||||
byte[] key = itm.Key();
|
||||
rv.Add_obj(key, Xop_vnt_rule_trie_itm.lang_(key));
|
||||
}
|
||||
rv.Add_obj(";", Xop_vnt_rule_trie_itm.Dlm_semic);
|
||||
return rv;
|
||||
}
|
||||
public static Xop_tkn_itm[] Get_subs_as_ary(Xop_tkn_itm owner, int bgn, int end) {
|
||||
int len = end - bgn;
|
||||
Xop_tkn_itm[] rv = new Xop_tkn_itm[len];
|
||||
for (int i = bgn; i < end; i++)
|
||||
rv[i - bgn] = owner.Subs_get(i);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
class Xop_vnt_rule_trie_itm {
|
||||
public Xop_vnt_rule_trie_itm(byte tid, byte[] lang) {this.tid = tid; this.lang = lang;}
|
||||
public byte Tid() {return tid;} private byte tid;
|
||||
public byte[] Lang() {return lang;} private byte[] lang;
|
||||
public static final byte Tid_semic = 1, Tid_lang = 2;
|
||||
public static Xop_vnt_rule_trie_itm lang_(byte[] lang) {return new Xop_vnt_rule_trie_itm(Tid_lang, lang);}
|
||||
public static final Xop_vnt_rule_trie_itm
|
||||
Dlm_semic = new Xop_vnt_rule_trie_itm(Tid_semic, null)
|
||||
;
|
||||
}
|
||||
/*
|
||||
-{flags|lang:rule}- EX: -{A|zh-hant:a}-
|
||||
-{lang:rule;lang:rule} EX: -{zh-hans:a;zh-hant:b}-
|
||||
-{lang;lang|rule}- EX: -{zh-hans;zh-hant|XXXX}-
|
||||
-{rule}- EX: -{a}-
|
||||
-{flags|from=>variant:to;}- EX: -{H|HUGEBLOCK=>zh-cn:macro;}-
|
||||
-{lang:data_0;data_1;}- EX: -{zh-hans:<span style='border:solid;color:blue;'>;zh-hant:b}-
|
||||
. where data_0 and data_1 is actually one itm since ; is not delimiter b/c data_1 must be variant_code
|
||||
-{zh-hans:a-{zh-hans:b}-c}-
|
||||
*/
|
||||
@@ -1,71 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.html.*; import gplx.xowa.langs.vnts.*;
|
||||
public class Xop_vnt_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_vnt_tkn(int bgn, int end) {
|
||||
this.Tkn_ini_pos(false, bgn, end);
|
||||
vnt_pipe_idx_last = bgn + Xop_vnt_lxr_.Hook_bgn.length; // default last pipe to pos after -{
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_vnt;}
|
||||
public int Vnt_pipe_tkn_count() {return vnt_pipe_tkn_count;}
|
||||
public Xop_vnt_tkn Vnt_pipe_tkn_count_add_() {++vnt_pipe_tkn_count; return this;} private int vnt_pipe_tkn_count;
|
||||
public int Vnt_pipe_idx_last() {return vnt_pipe_idx_last;} public Xop_vnt_tkn Vnt_pipe_idx_last_(int v) {vnt_pipe_idx_last = v; return this;} private int vnt_pipe_idx_last = -1;
|
||||
public Xop_vnt_flag[] Vnt_flags() {return vnt_flags;} public Xop_vnt_tkn Vnt_flags_(Xop_vnt_flag[] v) {vnt_flags = v; return this;} private Xop_vnt_flag[] vnt_flags;
|
||||
public Xop_vnt_rule_tkn[] Vnt_rules() {return vnt_rules;} public Xop_vnt_tkn Vnt_rules_(Xop_vnt_rule_tkn[] v) {vnt_rules = v; return this;} private Xop_vnt_rule_tkn[] vnt_rules;
|
||||
public byte Vnt_cmd() {return vnt_cmd;} private byte vnt_cmd;
|
||||
public void Vnt_cmd_calc(Xowe_wiki wiki, Xoae_page page, Xop_ctx ctx, byte[] src) {
|
||||
int flags_len = vnt_flags.length;
|
||||
int rules_len = vnt_rules.length;
|
||||
if (flags_len == 0) { // no flags; either literal ("-{A}-") or bidi ("-{zh-hans:A;zh-hant:B}-");
|
||||
if (rules_len == 0) vnt_cmd = Xop_vnt_html_wtr.Cmd_empty;
|
||||
else {
|
||||
Xop_vnt_rule_tkn rule_0 = vnt_rules[0];
|
||||
if ( rules_len == 1 // only one rule
|
||||
&& rule_0.Rule_lang() == Xop_vnt_rule_tkn.Null_lang // no lang; EX: -{A}-
|
||||
)
|
||||
vnt_cmd = Xop_vnt_html_wtr.Cmd_literal;
|
||||
else // bidi: either one rule which has lang ("-{zh-hans:A}-") or more than one rule (which can't be literal)
|
||||
vnt_cmd = Xop_vnt_html_wtr.Cmd_bidi;
|
||||
}
|
||||
}
|
||||
else if (flags_len == 1){ // 1 flag; common case
|
||||
Xop_vnt_flag flag_0 = vnt_flags[0];
|
||||
switch (flag_0.Tid()) {
|
||||
case Xop_vnt_flag_.Tid_lang : vnt_cmd = Xop_vnt_html_wtr.Cmd_lang; break;
|
||||
case Xop_vnt_flag_.Tid_raw : vnt_cmd = Xop_vnt_html_wtr.Cmd_raw; break;
|
||||
case Xop_vnt_flag_.Tid_descrip : vnt_cmd = Xop_vnt_html_wtr.Cmd_descrip; break;
|
||||
case Xop_vnt_flag_.Tid_unknown : vnt_cmd = Xop_vnt_html_wtr.Cmd_literal; break; // flag is unknown; output text as literal; EX: "-{|a}-"; "-{X|a}-"
|
||||
case Xop_vnt_flag_.Tid_macro : vnt_cmd = Xop_vnt_html_wtr.Cmd_empty; break; // TODO: implement macro; ignore for now; DATE:2014-05-03
|
||||
case Xop_vnt_flag_.Tid_title: { // title; same as {{DISPLAYTITLE}} but variant aware; PAGE:zh.w:Help:進階字詞轉換處理 DATE:2014-08-29
|
||||
vnt_cmd = Xop_vnt_html_wtr.Cmd_title;
|
||||
Xol_vnt_mgr vnt_mgr = wiki.Lang().Vnt_mgr();
|
||||
byte[] cur_lang_vnt = vnt_mgr.Cur_key();
|
||||
Xop_vnt_rule_tkn rule = Xop_vnt_html_wtr.Get_rule_by_key(vnt_mgr, vnt_mgr.Regy(), vnt_rules, vnt_rules.length, cur_lang_vnt);
|
||||
if (rule != null) {
|
||||
Bry_bfr tmp_bfr = wiki.Utl__bfr_mkr().Get_b512();
|
||||
wiki.Html_mgr().Html_wtr().Write_tkn_ary(tmp_bfr, ctx, Xoh_wtr_ctx.Alt, src, rule.Rule_subs());
|
||||
byte[] display_ttl = tmp_bfr.To_bry_and_rls();
|
||||
page.Html_data().Display_ttl_vnt_(display_ttl);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user