1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

'v3.8.2.1'

This commit is contained in:
gnosygnu
2016-08-07 21:36:50 -04:00
parent b0fdf78a41
commit e4a2af026b
165 changed files with 2534 additions and 1247 deletions

View File

@@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers; import gplx.*; import gplx.xowa.*;
import gplx.langs.htmls.entitys.*;
import gplx.xowa.parsers.apos.*; import gplx.xowa.parsers.amps.*; import gplx.xowa.parsers.lnkes.*; import gplx.xowa.parsers.hdrs.*; import gplx.xowa.parsers.lists.*; import gplx.xowa.parsers.tblws.*;
import gplx.xowa.parsers.paras.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.tmpls.*; import gplx.xowa.parsers.miscs.*; import gplx.xowa.parsers.vnts.*; import gplx.xowa.xtns.cites.*;
public class Xop_tkn_mkr {
@@ -25,7 +26,7 @@ public class Xop_tkn_mkr {
public Xop_space_tkn Space_mutable(int bgn, int end) {return new Xop_space_tkn(false, bgn, end);}
public Xop_apos_tkn Apos(int bgn, int end
, int aposLen, int typ, int cmd, int lit_apos) {return new Xop_apos_tkn(bgn, end, aposLen, typ, cmd, lit_apos);}
public Xop_tkn_itm Amp_txt(int bgn, int end, Xop_amp_trie_itm itm) {return new Xop_amp_tkn_ent(bgn, end, itm);}
public Xop_tkn_itm Amp_txt(int bgn, int end, Gfh_entity_itm itm) {return new Xop_amp_tkn_ent(bgn, end, itm);}
public Xop_tkn_itm Amp_num(int bgn, int end, int val_int, byte[] val_bry) {return new Xop_amp_tkn_num(bgn, end, val_int, val_bry);}
public Xop_tkn_itm Amp_num(int bgn, int end, int val_int) {return new Xop_amp_tkn_num(bgn, end, val_int, gplx.core.intls.Utf16_.Encode_int_to_bry(val_int));}
public Xop_nl_tkn NewLine(int bgn, int end, byte nl_typ, int nl_len) {return new Xop_nl_tkn(bgn, end, nl_typ, nl_len);}

View File

@@ -22,9 +22,10 @@ import gplx.xowa.xtns.pfuncs.exprs.*; import gplx.xowa.xtns.math.*;
public class Xop_tmp_mgr {
public Xof_math_itm Math_itm() {return math_itm;} private final Xof_math_itm math_itm = new Xof_math_itm();
public Xof_xfer_itm Xfer_itm() {return xfer_itm;} private final Xof_xfer_itm xfer_itm = new Xof_xfer_itm();
public Gfo_number_parser Pfunc_num_parser_0() {return num_parser_0;} private final Gfo_number_parser num_parser_0 = new Gfo_number_parser().Hex_enabled_(true);
public Gfo_number_parser Pfunc_num_parser_1() {return num_parser_1;} private final Gfo_number_parser num_parser_1 = new Gfo_number_parser().Hex_enabled_(true);
public Gfo_number_parser Pfunc_num_parser_0() {return num_parser_0;} private final Gfo_number_parser num_parser_0 = new Gfo_number_parser().Hex_enabled_(true);
public Gfo_number_parser Pfunc_num_parser_1() {return num_parser_1;} private final Gfo_number_parser num_parser_1 = new Gfo_number_parser().Hex_enabled_(true);
public Pfunc_expr_shunter Expr_shunter() {return expr_shunter;} private final Pfunc_expr_shunter expr_shunter = new Pfunc_expr_shunter();
public Btrie_slim_mgr Xnde__xtn_end() {return xnde__xtn_end;} private final Btrie_slim_mgr xnde__xtn_end = Btrie_slim_mgr.ci_a7(); // NOTE:ci.ascii:MW_const.en; listed XML node names are en
public Btrie_rv Xnde__trv() {return xnde__trv;} private final Btrie_rv xnde__trv = new Btrie_rv();
public Int_obj_ref Pfunc_rel2abs() {return pfunc_rel2abs;} private final Int_obj_ref pfunc_rel2abs = Int_obj_ref.New_zero();
}

View File

@@ -17,17 +17,18 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
import gplx.langs.htmls.entitys.*;
public class Xop_amp_mgr { // TS
private static final Btrie_rv trv = new Btrie_rv();
public Btrie_slim_mgr Amp_trie() {return amp_trie;} private final Btrie_slim_mgr amp_trie = Xop_amp_trie.Instance;
public Btrie_slim_mgr Amp_trie() {return amp_trie;} private final Btrie_slim_mgr amp_trie = Gfh_entity_trie.Instance;
public Xop_amp_mgr_rslt Parse_tkn(Xop_tkn_mkr tkn_mkr, byte[] src, int src_len, int amp_pos, int bgn) {
int fail_pos = amp_pos + 1; // default to fail pos which is after &
// check amp_trie; EX: 'lt'
Xop_amp_mgr_rslt rv = new Xop_amp_mgr_rslt();
Xop_amp_trie_itm itm; int cur;
Gfh_entity_itm itm; int cur;
synchronized (trv) {
itm = (Xop_amp_trie_itm)amp_trie.Match_at(trv, src, bgn, src_len);
itm = (Gfh_entity_itm)amp_trie.Match_at(trv, src, bgn, src_len);
cur = trv.Pos();
}
@@ -39,15 +40,15 @@ public class Xop_amp_mgr { // TS
// check itm
switch (itm.Tid()) {
// letters; EX: '&lt;'
case Xop_amp_trie_itm.Tid_name_std:
case Xop_amp_trie_itm.Tid_name_xowa:
case Gfh_entity_itm.Tid_name_std:
case Gfh_entity_itm.Tid_name_xowa:
rv.Pos_(cur);
rv.Tkn_(tkn_mkr.Amp_txt(amp_pos, cur, itm));
return rv;
// numbers; EX: '&#123;' '&#x123'
case Xop_amp_trie_itm.Tid_num_hex:
case Xop_amp_trie_itm.Tid_num_dec:
boolean ncr_is_hex = itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
case Gfh_entity_itm.Tid_num_hex:
case Gfh_entity_itm.Tid_num_dec:
boolean ncr_is_hex = itm.Tid() == Gfh_entity_itm.Tid_num_hex;
boolean pass = Parse_ncr(rv, ncr_is_hex, src, src_len, amp_pos, cur);
if (pass) { // NOTE: do not set rv.Pos_(); will be set by Parse_ncr
rv.Tkn_(tkn_mkr.Amp_num(amp_pos, rv.Pos(), rv.Val()));
@@ -119,16 +120,16 @@ public class Xop_amp_mgr { // TS
}
bfr.Add_mid(src, 0, pos);
}
Xop_amp_trie_itm amp_itm = (Xop_amp_trie_itm)amp_obj;
Gfh_entity_itm amp_itm = (Gfh_entity_itm)amp_obj;
switch (amp_itm.Tid()) {
case Xop_amp_trie_itm.Tid_name_std:
case Xop_amp_trie_itm.Tid_name_xowa:
case Gfh_entity_itm.Tid_name_std:
case Gfh_entity_itm.Tid_name_xowa:
bfr.Add(amp_itm.U8_bry());
pos = amp_pos;
break;
case Xop_amp_trie_itm.Tid_num_hex:
case Xop_amp_trie_itm.Tid_num_dec:
boolean ncr_is_hex = amp_itm.Tid() == Xop_amp_trie_itm.Tid_num_hex;
case Gfh_entity_itm.Tid_num_hex:
case Gfh_entity_itm.Tid_num_dec:
boolean ncr_is_hex = amp_itm.Tid() == Gfh_entity_itm.Tid_num_hex;
int int_bgn = amp_pos;
if (amp_rv == null)
amp_rv = new Xop_amp_mgr_rslt();

View File

@@ -16,16 +16,17 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.langs.htmls.entitys.*;
public class Xop_amp_tkn_ent extends Xop_tkn_itm_base {
private Xop_amp_trie_itm html_ref_itm;
public Xop_amp_tkn_ent(int bgn, int end, Xop_amp_trie_itm html_ref_itm) {
private Gfh_entity_itm html_ref_itm;
public Xop_amp_tkn_ent(int bgn, int end, Gfh_entity_itm html_ref_itm) {
this.html_ref_itm = html_ref_itm;
this.Tkn_ini_pos(false, bgn, end);
}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_html_ref;}
public int Char_int() {return html_ref_itm.Char_int();}
public byte[] Xml_name_bry() {return html_ref_itm.Xml_name_bry();}
public boolean Itm_is_custom() {return html_ref_itm.Tid() == Xop_amp_trie_itm.Tid_name_xowa;}
public boolean Itm_is_custom() {return html_ref_itm.Tid() == Gfh_entity_itm.Tid_name_xowa;}
public void Print_ncr(Bry_bfr bfr) {html_ref_itm.Print_ncr(bfr);}
public void Print_literal(Bry_bfr bfr) {html_ref_itm.Print_literal(bfr);}
}

View File

@@ -1,317 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
public class Xop_amp_trie { // TS
public static final String // NOTE: top_define; entities needed for <nowiki> escaping
Str__xowa_lt = "&xowa_lt;"
, Str__xowa_brack_bgn = "&xowa_brack_bgn;"
, Str__xowa_brack_end = "&xowa_brack_end;"
, Str__xowa_pipe = "&xowa_pipe;"
, Str__xowa_apos = "&xowa_apos;"
, Str__xowa_colon = "&xowa_colon;"
, Str__xowa_underline = "&xowa_underline;"
, Str__xowa_asterisk = "&xowa_asterisk;"
, Str__xowa_space = "&xowa_space;"
, Str__xowa_nl = "&xowa_nl;"
, Str__xowa_dash = "&xowa_dash;"
;
public static final Btrie_slim_mgr Instance = New(); Xop_amp_trie() {}
private static Btrie_slim_mgr New() {// REF.MW: Sanitizer|$wgHtmlEntities; NOTE:added apos
Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
Add_name(rv, Bool_.Y, 60, Str__xowa_lt);
Add_name(rv, Bool_.Y, 91, Str__xowa_brack_bgn);
Add_name(rv, Bool_.Y, 93, Str__xowa_brack_end);
Add_name(rv, Bool_.Y, 124, Str__xowa_pipe);
Add_name(rv, Bool_.Y, 39, Str__xowa_apos);
Add_name(rv, Bool_.Y, 58, Str__xowa_colon);
Add_name(rv, Bool_.Y, 95, Str__xowa_underline);
Add_name(rv, Bool_.Y, 42, Str__xowa_asterisk);
Add_name(rv, Bool_.Y, 32, Str__xowa_space);
Add_name(rv, Bool_.Y, 10, Str__xowa_nl);
Add_name(rv, Bool_.Y, 45, Str__xowa_dash);
Add_name(rv, Bool_.N, 39, "&apos;");
Add_name(rv, Bool_.N, 193, "&Aacute;");
Add_name(rv, Bool_.N, 225, "&aacute;");
Add_name(rv, Bool_.N, 194, "&Acirc;");
Add_name(rv, Bool_.N, 226, "&acirc;");
Add_name(rv, Bool_.N, 180, "&acute;");
Add_name(rv, Bool_.N, 198, "&AElig;");
Add_name(rv, Bool_.N, 230, "&aelig;");
Add_name(rv, Bool_.N, 192, "&Agrave;");
Add_name(rv, Bool_.N, 224, "&agrave;");
Add_name(rv, Bool_.N, 8501, "&alefsym;");
Add_name(rv, Bool_.N, 913, "&Alpha;");
Add_name(rv, Bool_.N, 945, "&alpha;");
Add_name(rv, Bool_.N, 38, "&amp;");
Add_name(rv, Bool_.N, 8743, "&and;");
Add_name(rv, Bool_.N, 8736, "&ang;");
Add_name(rv, Bool_.N, 197, "&Aring;");
Add_name(rv, Bool_.N, 229, "&aring;");
Add_name(rv, Bool_.N, 8776, "&asymp;");
Add_name(rv, Bool_.N, 195, "&Atilde;");
Add_name(rv, Bool_.N, 227, "&atilde;");
Add_name(rv, Bool_.N, 196, "&Auml;");
Add_name(rv, Bool_.N, 228, "&auml;");
Add_name(rv, Bool_.N, 8222, "&bdquo;");
Add_name(rv, Bool_.N, 914, "&Beta;");
Add_name(rv, Bool_.N, 946, "&beta;");
Add_name(rv, Bool_.N, 166, "&brvbar;");
Add_name(rv, Bool_.N, 8226, "&bull;");
Add_name(rv, Bool_.N, 8745, "&cap;");
Add_name(rv, Bool_.N, 199, "&Ccedil;");
Add_name(rv, Bool_.N, 231, "&ccedil;");
Add_name(rv, Bool_.N, 184, "&cedil;");
Add_name(rv, Bool_.N, 162, "&cent;");
Add_name(rv, Bool_.N, 935, "&Chi;");
Add_name(rv, Bool_.N, 967, "&chi;");
Add_name(rv, Bool_.N, 710, "&circ;");
Add_name(rv, Bool_.N, 9827, "&clubs;");
Add_name(rv, Bool_.N, 8773, "&cong;");
Add_name(rv, Bool_.N, 169, "&copy;");
Add_name(rv, Bool_.N, 8629, "&crarr;");
Add_name(rv, Bool_.N, 8746, "&cup;");
Add_name(rv, Bool_.N, 164, "&curren;");
Add_name(rv, Bool_.N, 8224, "&dagger;");
Add_name(rv, Bool_.N, 8225, "&Dagger;");
Add_name(rv, Bool_.N, 8595, "&darr;");
Add_name(rv, Bool_.N, 8659, "&dArr;");
Add_name(rv, Bool_.N, 176, "&deg;");
Add_name(rv, Bool_.N, 916, "&Delta;");
Add_name(rv, Bool_.N, 948, "&delta;");
Add_name(rv, Bool_.N, 9830, "&diams;");
Add_name(rv, Bool_.N, 247, "&divide;");
Add_name(rv, Bool_.N, 201, "&Eacute;");
Add_name(rv, Bool_.N, 233, "&eacute;");
Add_name(rv, Bool_.N, 202, "&Ecirc;");
Add_name(rv, Bool_.N, 234, "&ecirc;");
Add_name(rv, Bool_.N, 200, "&Egrave;");
Add_name(rv, Bool_.N, 232, "&egrave;");
Add_name(rv, Bool_.N, 8709, "&empty;");
Add_name(rv, Bool_.N, 8195, "&emsp;");
Add_name(rv, Bool_.N, 8194, "&ensp;");
Add_name(rv, Bool_.N, 917, "&Epsilon;");
Add_name(rv, Bool_.N, 949, "&epsilon;");
Add_name(rv, Bool_.N, 8801, "&equiv;");
Add_name(rv, Bool_.N, 919, "&Eta;");
Add_name(rv, Bool_.N, 951, "&eta;");
Add_name(rv, Bool_.N, 208, "&ETH;");
Add_name(rv, Bool_.N, 240, "&eth;");
Add_name(rv, Bool_.N, 203, "&Euml;");
Add_name(rv, Bool_.N, 235, "&euml;");
Add_name(rv, Bool_.N, 8364, "&euro;");
Add_name(rv, Bool_.N, 8707, "&exist;");
Add_name(rv, Bool_.N, 402, "&fnof;");
Add_name(rv, Bool_.N, 8704, "&forall;");
Add_name(rv, Bool_.N, 189, "&frac12;");
Add_name(rv, Bool_.N, 188, "&frac14;");
Add_name(rv, Bool_.N, 190, "&frac34;");
Add_name(rv, Bool_.N, 8260, "&frasl;");
Add_name(rv, Bool_.N, 915, "&Gamma;");
Add_name(rv, Bool_.N, 947, "&gamma;");
Add_name(rv, Bool_.N, 8805, "&ge;");
Add_name(rv, Bool_.N, 62, "&gt;");
Add_name(rv, Bool_.N, 8596, "&harr;");
Add_name(rv, Bool_.N, 8660, "&hArr;");
Add_name(rv, Bool_.N, 9829, "&hearts;");
Add_name(rv, Bool_.N, 8230, "&hellip;");
Add_name(rv, Bool_.N, 205, "&Iacute;");
Add_name(rv, Bool_.N, 237, "&iacute;");
Add_name(rv, Bool_.N, 206, "&Icirc;");
Add_name(rv, Bool_.N, 238, "&icirc;");
Add_name(rv, Bool_.N, 161, "&iexcl;");
Add_name(rv, Bool_.N, 204, "&Igrave;");
Add_name(rv, Bool_.N, 236, "&igrave;");
Add_name(rv, Bool_.N, 8465, "&image;");
Add_name(rv, Bool_.N, 8734, "&infin;");
Add_name(rv, Bool_.N, 8747, "&int;");
Add_name(rv, Bool_.N, 921, "&Iota;");
Add_name(rv, Bool_.N, 953, "&iota;");
Add_name(rv, Bool_.N, 191, "&iquest;");
Add_name(rv, Bool_.N, 8712, "&isin;");
Add_name(rv, Bool_.N, 207, "&Iuml;");
Add_name(rv, Bool_.N, 239, "&iuml;");
Add_name(rv, Bool_.N, 922, "&Kappa;");
Add_name(rv, Bool_.N, 954, "&kappa;");
Add_name(rv, Bool_.N, 923, "&Lambda;");
Add_name(rv, Bool_.N, 955, "&lambda;");
Add_name(rv, Bool_.N, 9001, "&lang;");
Add_name(rv, Bool_.N, 171, "&laquo;");
Add_name(rv, Bool_.N, 8592, "&larr;");
Add_name(rv, Bool_.N, 8656, "&lArr;");
Add_name(rv, Bool_.N, 8968, "&lceil;");
Add_name(rv, Bool_.N, 8220, "&ldquo;");
Add_name(rv, Bool_.N, 8804, "&le;");
Add_name(rv, Bool_.N, 8970, "&lfloor;");
Add_name(rv, Bool_.N, 8727, "&lowast;");
Add_name(rv, Bool_.N, 9674, "&loz;");
Add_name(rv, Bool_.N, 8206, "&lrm;");
Add_name(rv, Bool_.N, 8249, "&lsaquo;");
Add_name(rv, Bool_.N, 8216, "&lsquo;");
Add_name(rv, Bool_.N, 60, "&lt;");
Add_name(rv, Bool_.N, 175, "&macr;");
Add_name(rv, Bool_.N, 8212, "&mdash;");
Add_name(rv, Bool_.N, 181, "&micro;");
Add_name(rv, Bool_.N, 183, "&middot;");
Add_name(rv, Bool_.N, 8722, "&minus;");
Add_name(rv, Bool_.N, 924, "&Mu;");
Add_name(rv, Bool_.N, 956, "&mu;");
Add_name(rv, Bool_.N, 8711, "&nabla;");
Add_name(rv, Bool_.N, 160, "&nbsp;");
Add_name(rv, Bool_.N, 8211, "&ndash;");
Add_name(rv, Bool_.N, 8800, "&ne;");
Add_name(rv, Bool_.N, 8715, "&ni;");
Add_name(rv, Bool_.N, 172, "&not;");
Add_name(rv, Bool_.N, 8713, "&notin;");
Add_name(rv, Bool_.N, 8836, "&nsub;");
Add_name(rv, Bool_.N, 209, "&Ntilde;");
Add_name(rv, Bool_.N, 241, "&ntilde;");
Add_name(rv, Bool_.N, 925, "&Nu;");
Add_name(rv, Bool_.N, 957, "&nu;");
Add_name(rv, Bool_.N, 211, "&Oacute;");
Add_name(rv, Bool_.N, 243, "&oacute;");
Add_name(rv, Bool_.N, 212, "&Ocirc;");
Add_name(rv, Bool_.N, 244, "&ocirc;");
Add_name(rv, Bool_.N, 338, "&OElig;");
Add_name(rv, Bool_.N, 339, "&oelig;");
Add_name(rv, Bool_.N, 210, "&Ograve;");
Add_name(rv, Bool_.N, 242, "&ograve;");
Add_name(rv, Bool_.N, 8254, "&oline;");
Add_name(rv, Bool_.N, 937, "&Omega;");
Add_name(rv, Bool_.N, 969, "&omega;");
Add_name(rv, Bool_.N, 927, "&Omicron;");
Add_name(rv, Bool_.N, 959, "&omicron;");
Add_name(rv, Bool_.N, 8853, "&oplus;");
Add_name(rv, Bool_.N, 8744, "&or;");
Add_name(rv, Bool_.N, 170, "&ordf;");
Add_name(rv, Bool_.N, 186, "&ordm;");
Add_name(rv, Bool_.N, 216, "&Oslash;");
Add_name(rv, Bool_.N, 248, "&oslash;");
Add_name(rv, Bool_.N, 213, "&Otilde;");
Add_name(rv, Bool_.N, 245, "&otilde;");
Add_name(rv, Bool_.N, 8855, "&otimes;");
Add_name(rv, Bool_.N, 214, "&Ouml;");
Add_name(rv, Bool_.N, 246, "&ouml;");
Add_name(rv, Bool_.N, 182, "&para;");
Add_name(rv, Bool_.N, 8706, "&part;");
Add_name(rv, Bool_.N, 8240, "&permil;");
Add_name(rv, Bool_.N, 8869, "&perp;");
Add_name(rv, Bool_.N, 934, "&Phi;");
Add_name(rv, Bool_.N, 966, "&phi;");
Add_name(rv, Bool_.N, 928, "&Pi;");
Add_name(rv, Bool_.N, 960, "&pi;");
Add_name(rv, Bool_.N, 982, "&piv;");
Add_name(rv, Bool_.N, 177, "&plusmn;");
Add_name(rv, Bool_.N, 163, "&pound;");
Add_name(rv, Bool_.N, 8242, "&prime;");
Add_name(rv, Bool_.N, 8243, "&Prime;");
Add_name(rv, Bool_.N, 8719, "&prod;");
Add_name(rv, Bool_.N, 8733, "&prop;");
Add_name(rv, Bool_.N, 936, "&Psi;");
Add_name(rv, Bool_.N, 968, "&psi;");
Add_name(rv, Bool_.N, 34, "&quot;");
Add_name(rv, Bool_.N, 8730, "&radic;");
Add_name(rv, Bool_.N, 9002, "&rang;");
Add_name(rv, Bool_.N, 187, "&raquo;");
Add_name(rv, Bool_.N, 8594, "&rarr;");
Add_name(rv, Bool_.N, 8658, "&rArr;");
Add_name(rv, Bool_.N, 8969, "&rceil;");
Add_name(rv, Bool_.N, 8221, "&rdquo;");
Add_name(rv, Bool_.N, 8476, "&real;");
Add_name(rv, Bool_.N, 174, "&reg;");
Add_name(rv, Bool_.N, 8971, "&rfloor;");
Add_name(rv, Bool_.N, 929, "&Rho;");
Add_name(rv, Bool_.N, 961, "&rho;");
Add_name(rv, Bool_.N, 8207, "&rlm;");
Add_name(rv, Bool_.N, 8250, "&rsaquo;");
Add_name(rv, Bool_.N, 8217, "&rsquo;");
Add_name(rv, Bool_.N, 8218, "&sbquo;");
Add_name(rv, Bool_.N, 352, "&Scaron;");
Add_name(rv, Bool_.N, 353, "&scaron;");
Add_name(rv, Bool_.N, 8901, "&sdot;");
Add_name(rv, Bool_.N, 167, "&sect;");
Add_name(rv, Bool_.N, 173, "&shy;");
Add_name(rv, Bool_.N, 931, "&Sigma;");
Add_name(rv, Bool_.N, 963, "&sigma;");
Add_name(rv, Bool_.N, 962, "&sigmaf;");
Add_name(rv, Bool_.N, 8764, "&sim;");
Add_name(rv, Bool_.N, 9824, "&spades;");
Add_name(rv, Bool_.N, 8834, "&sub;");
Add_name(rv, Bool_.N, 8838, "&sube;");
Add_name(rv, Bool_.N, 8721, "&sum;");
Add_name(rv, Bool_.N, 8835, "&sup;");
Add_name(rv, Bool_.N, 185, "&sup1;");
Add_name(rv, Bool_.N, 178, "&sup2;");
Add_name(rv, Bool_.N, 179, "&sup3;");
Add_name(rv, Bool_.N, 8839, "&supe;");
Add_name(rv, Bool_.N, 223, "&szlig;");
Add_name(rv, Bool_.N, 932, "&Tau;");
Add_name(rv, Bool_.N, 964, "&tau;");
Add_name(rv, Bool_.N, 8756, "&there4;");
Add_name(rv, Bool_.N, 920, "&Theta;");
Add_name(rv, Bool_.N, 952, "&theta;");
Add_name(rv, Bool_.N, 977, "&thetasym;");
Add_name(rv, Bool_.N, 8201, "&thinsp;");
Add_name(rv, Bool_.N, 222, "&THORN;");
Add_name(rv, Bool_.N, 254, "&thorn;");
Add_name(rv, Bool_.N, 732, "&tilde;");
Add_name(rv, Bool_.N, 215, "&times;");
Add_name(rv, Bool_.N, 8482, "&trade;");
Add_name(rv, Bool_.N, 218, "&Uacute;");
Add_name(rv, Bool_.N, 250, "&uacute;");
Add_name(rv, Bool_.N, 8593, "&uarr;");
Add_name(rv, Bool_.N, 8657, "&uArr;");
Add_name(rv, Bool_.N, 219, "&Ucirc;");
Add_name(rv, Bool_.N, 251, "&ucirc;");
Add_name(rv, Bool_.N, 217, "&Ugrave;");
Add_name(rv, Bool_.N, 249, "&ugrave;");
Add_name(rv, Bool_.N, 168, "&uml;");
Add_name(rv, Bool_.N, 978, "&upsih;");
Add_name(rv, Bool_.N, 933, "&Upsilon;");
Add_name(rv, Bool_.N, 965, "&upsilon;");
Add_name(rv, Bool_.N, 220, "&Uuml;");
Add_name(rv, Bool_.N, 252, "&uuml;");
Add_name(rv, Bool_.N, 8472, "&weierp;");
Add_name(rv, Bool_.N, 926, "&Xi;");
Add_name(rv, Bool_.N, 958, "&xi;");
Add_name(rv, Bool_.N, 221, "&Yacute;");
Add_name(rv, Bool_.N, 253, "&yacute;");
Add_name(rv, Bool_.N, 165, "&yen;");
Add_name(rv, Bool_.N, 376, "&Yuml;");
Add_name(rv, Bool_.N, 255, "&yuml;");
Add_name(rv, Bool_.N, 918, "&Zeta;");
Add_name(rv, Bool_.N, 950, "&zeta;");
Add_name(rv, Bool_.N, 8205, "&zwj;");
Add_name(rv, Bool_.N, 8204, "&zwnj;");
Add_prefix(rv, Xop_amp_trie_itm.Tid_num_hex, "#x");
Add_prefix(rv, Xop_amp_trie_itm.Tid_num_hex, "#X");
Add_prefix(rv, Xop_amp_trie_itm.Tid_num_dec, "#");
return rv;
}
private static void Add_name(Btrie_slim_mgr trie, boolean tid_is_xowa, int char_int, String xml_name_str) {
byte itm_tid = tid_is_xowa ? Xop_amp_trie_itm.Tid_name_xowa : Xop_amp_trie_itm.Tid_name_std;
byte[] xml_name_bry = Bry_.new_a7(xml_name_str);
byte[] key = Bry_.Mid(xml_name_bry, 1, xml_name_bry.length); // ignore & for purpose of trie; EX: "amp;"; NOTE: must keep trailing ";" else "&amp " will be valid;
trie.Add_obj(key, new Xop_amp_trie_itm(itm_tid, char_int, xml_name_bry));
}
private static void Add_prefix(Btrie_slim_mgr trie, byte prefix_type, String prefix) {
byte[] prefix_ary = Bry_.new_u8(prefix);
Xop_amp_trie_itm itm = new Xop_amp_trie_itm(prefix_type, Xop_amp_trie_itm.Char_int_null, prefix_ary);
trie.Add_obj(prefix_ary, itm);
}
}

View File

@@ -1,57 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.amps; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.langs.htmls.*; import gplx.xowa.htmls.core.wkrs.lnkis.htmls.*;
public class Xop_amp_trie_itm { // TS
public Xop_amp_trie_itm(byte tid, int char_int, byte[] xml_name_bry) {
this.tid = tid;
this.char_int = char_int;
this.u8_bry = gplx.core.intls.Utf16_.Encode_int_to_bry(char_int);
this.xml_name_bry = xml_name_bry;
this.key_name_len = xml_name_bry.length - 2; // 2 for & and ;
}
public byte Tid() {return tid;} private final byte tid;
public int Char_int() {return char_int;} private final int char_int; // val; EX: 160
public byte[] U8_bry() {return u8_bry;} private final byte[] u8_bry; // EX: new byte[] {192, 160}; (C2, A0)
public byte[] Xml_name_bry() {return xml_name_bry;} private final byte[] xml_name_bry; // EX: "&nbsp;"
public int Key_name_len() {return key_name_len;} private final int key_name_len; // EX: "nbsp".Len
public void Print_ncr(Bry_bfr bfr) {
switch (char_int) {
case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Quote: case Byte_ascii.Amp:
bfr.Add(xml_name_bry); // NOTE: never write actual char; EX: "&lt;" should be written as "&lt;", not "<"
break;
default:
bfr.Add(Xoh_lnki_title_fmtr.Escape_bgn); // &#
bfr.Add_int_variable(char_int); // 160
bfr.Add_byte(Byte_ascii.Semic); // ;
break;
}
}
public void Print_literal(Bry_bfr bfr) {
switch (char_int) {
case Byte_ascii.Lt: bfr.Add(Gfh_entity_.Lt_bry); break; // NOTE: never write actual char; EX: "&lt;" should be written as "&lt;", not "<"; MW does same; DATE:2014-11-07
case Byte_ascii.Gt: bfr.Add(Gfh_entity_.Gt_bry); break;
case Byte_ascii.Quote: bfr.Add(Gfh_entity_.Quote_bry); break;
case Byte_ascii.Amp: bfr.Add(Gfh_entity_.Amp_bry); break;
default: bfr.Add(u8_bry); break; // write literal; EX: "[" not "&#91;"
}
}
public static final byte Tid_name_std = 1, Tid_name_xowa = 2, Tid_num_hex = 3, Tid_num_dec = 4;
public static final int Char_int_null = -1;
}

View File

@@ -50,7 +50,7 @@ public class Xop_lnki_tkn extends Xop_tkn_itm_base implements gplx.xowa.wikis.pa
public int Pipe_count() {return pipe_count;} private int pipe_count;
public boolean Pipe_count_is_zero() {return pipe_count++ == 0;}
public boolean Xtn_sites_link() {return xtn_sites_link;} public void Xtn_sites_link_(boolean v) {xtn_sites_link = v;} private boolean xtn_sites_link;
public Xoh_file_img_wkr Lnki_file_wkr() {return lnki_file_wkr;} public void Lnki_file_wkr_(Xoh_file_img_wkr v) {lnki_file_wkr = v;} private Xoh_file_img_wkr lnki_file_wkr;
public Xoh_file_fmtr Lnki_file_wkr() {return lnki_file_wkr;} public void Lnki_file_wkr_(Xoh_file_fmtr v) {lnki_file_wkr = v;} private Xoh_file_fmtr lnki_file_wkr;
public byte[] Target;
public byte[] Ttl_ary() {
return ttl.ForceLiteralLink() || ns_id != Xow_ns_.Tid__main // if [[:]] or non-main (Category, Template)

View File

@@ -22,7 +22,6 @@ import gplx.xowa.wikis.nss.*;
import gplx.xowa.wikis.*; import gplx.xowa.xtns.pfuncs.ttls.*; import gplx.xowa.xtns.relatedSites.*;
import gplx.xowa.parsers.tmpls.*; import gplx.xowa.wikis.pages.lnkis.*;
public class Xop_lnki_wkr_ {
private static final Int_obj_ref rel2abs_tid = Int_obj_ref.New_zero();
public static final int Invalidate_lnki_len = 128;
public static int Invalidate_lnki(Xop_ctx ctx, byte[] src, Xop_root_tkn root, Xop_lnki_tkn lnki, int cur_pos) {
lnki.Tkn_tid_to_txt(); // convert initial "[[" to text; note that this lnki has no pipes as pipe_lxr does similar check; EX: [[<invalid>]]; DATE:2014-03-26
@@ -48,7 +47,8 @@ public class Xop_lnki_wkr_ {
if (page_ttl.Ns().Subpages_enabled()
&& Pfunc_rel2abs.Rel2abs_ttl(ttl_bry, 0, ttl_bry_len)) { // Linker.php|normalizeSubpageLink
Bry_bfr tmp_bfr = ctx.Wiki().Utl__bfr_mkr().Get_b512();
byte[] new_bry = Pfunc_rel2abs.Rel2abs(tmp_bfr, ttl_bry, page_ttl.Raw(), rel2abs_tid.Val_zero_());
Int_obj_ref rel2abs_tid = ctx.Tmp_mgr().Pfunc_rel2abs().Val_zero_();
byte[] new_bry = Pfunc_rel2abs.Rel2abs(tmp_bfr, ttl_bry, page_ttl.Raw(), rel2abs_tid);
lnki.Subpage_tid_(rel2abs_tid.Val());
lnki.Subpage_slash_at_end_(Bry_.Get_at_end(ttl_bry) == Byte_ascii.Slash);
ttl_bry = new_bry;

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.tmpls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.langs.htmls.*; import gplx.xowa.parsers.amps.*;
import gplx.core.btries.*; import gplx.langs.htmls.entitys.*; import gplx.xowa.parsers.amps.*;
public class Nowiki_escape_itm {
public Nowiki_escape_itm(byte[] src, byte[] trg) {this.src = src; this.trg = trg; this.src_adj = src.length - 1;}
private int src_adj;
@@ -48,17 +48,17 @@ public class Nowiki_escape_itm {
private static Btrie_slim_mgr New_trie() {
byte[] pre_bry = new byte[] {Byte_ascii.Nl, Byte_ascii.Space}; // NOTE: must go before New_trie
Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
New_trie_itm(rv, Byte_ascii.Lt_bry , Xop_amp_trie.Str__xowa_lt);
New_trie_itm(rv, Byte_ascii.Brack_bgn_bry , Xop_amp_trie.Str__xowa_brack_bgn);
New_trie_itm(rv, Byte_ascii.Brack_end_bry , Xop_amp_trie.Str__xowa_brack_end);// PAGE:en.w: Tall_poppy_syndrome DATE:2014-07-23
New_trie_itm(rv, Byte_ascii.Pipe_bry , Xop_amp_trie.Str__xowa_pipe);
New_trie_itm(rv, Byte_ascii.Apos_bry , Xop_amp_trie.Str__xowa_apos); // NOTE: for backward compatibility, use &apos; note that amp_wkr will turn &apos; -> &#39 but &#39 -> '; DATE:2014-07-03
New_trie_itm(rv, Byte_ascii.Colon_bry , Xop_amp_trie.Str__xowa_colon);
New_trie_itm(rv, Byte_ascii.Underline_bry , Xop_amp_trie.Str__xowa_underline);
New_trie_itm(rv, Byte_ascii.Star_bry , Xop_amp_trie.Str__xowa_asterisk);
New_trie_itm(rv, Byte_ascii.Dash_bry , Xop_amp_trie.Str__xowa_dash); // needed to handle "|<nowiki>-</nowiki>"; PAGE:de.w:Liste_von_Vereinen_und_Vereinigungen_von_Gl<47>ubigen_(r<>misch-katholische_Kirche) DATE:2015-01-08
New_trie_itm(rv, Byte_ascii.Space_bry , Xop_amp_trie.Str__xowa_space);
New_trie_itm(rv, Byte_ascii.Nl_bry , Xop_amp_trie.Str__xowa_nl);
New_trie_itm(rv, Byte_ascii.Lt_bry , Gfh_entity_trie.Str__xowa_lt);
New_trie_itm(rv, Byte_ascii.Brack_bgn_bry , Gfh_entity_trie.Str__xowa_brack_bgn);
New_trie_itm(rv, Byte_ascii.Brack_end_bry , Gfh_entity_trie.Str__xowa_brack_end);// PAGE:en.w: Tall_poppy_syndrome DATE:2014-07-23
New_trie_itm(rv, Byte_ascii.Pipe_bry , Gfh_entity_trie.Str__xowa_pipe);
New_trie_itm(rv, Byte_ascii.Apos_bry , Gfh_entity_trie.Str__xowa_apos); // NOTE: for backward compatibility, use &apos; note that amp_wkr will turn &apos; -> &#39 but &#39 -> '; DATE:2014-07-03
New_trie_itm(rv, Byte_ascii.Colon_bry , Gfh_entity_trie.Str__xowa_colon);
New_trie_itm(rv, Byte_ascii.Underline_bry , Gfh_entity_trie.Str__xowa_underline);
New_trie_itm(rv, Byte_ascii.Star_bry , Gfh_entity_trie.Str__xowa_asterisk);
New_trie_itm(rv, Byte_ascii.Dash_bry , Gfh_entity_trie.Str__xowa_dash); // needed to handle "|<nowiki>-</nowiki>"; PAGE:de.w:Liste_von_Vereinen_und_Vereinigungen_von_Gl<47>ubigen_(r<>misch-katholische_Kirche) DATE:2015-01-08
New_trie_itm(rv, Byte_ascii.Space_bry , Gfh_entity_trie.Str__xowa_space);
New_trie_itm(rv, Byte_ascii.Nl_bry , Gfh_entity_trie.Str__xowa_nl);
New_trie_itm(rv, pre_bry , pre_bry);
return rv;
}

View File

@@ -17,6 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.xowa.parsers.amps.*; import gplx.core.log_msgs.*;
import gplx.langs.htmls.entitys.*;
public class Xop_sanitizer {
private Btrie_slim_mgr trie = Btrie_slim_mgr.cs(), amp_trie;
private Xop_amp_mgr amp_mgr;
@@ -71,18 +72,18 @@ public class Xop_sanitizer {
++pos;
}
else {
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)amp_obj;
Gfh_entity_itm itm = (Gfh_entity_itm)amp_obj;
byte itm_tid = itm.Tid();
switch (itm_tid) {
case Xop_amp_trie_itm.Tid_name_std:
case Xop_amp_trie_itm.Tid_name_xowa:
case Gfh_entity_itm.Tid_name_std:
case Gfh_entity_itm.Tid_name_xowa:
bfr.Add(itm.U8_bry());
pos += itm.Key_name_len() + 1; // 1 for trailing ";"; EX: for "&nbsp; ", (a) pos is at "&", (b) "nbsp" is Key_name_len, (c) ";" needs + 1
break;
case Xop_amp_trie_itm.Tid_num_dec:
case Xop_amp_trie_itm.Tid_num_hex:
case Gfh_entity_itm.Tid_num_dec:
case Gfh_entity_itm.Tid_num_hex:
Xop_amp_mgr_rslt rv = new Xop_amp_mgr_rslt();
amp_mgr.Parse_ncr(rv, itm_tid == Xop_amp_trie_itm.Tid_num_hex, src, end, pos - 1, pos + itm.Xml_name_bry().length);
amp_mgr.Parse_ncr(rv, itm_tid == Gfh_entity_itm.Tid_num_hex, src, end, pos - 1, pos + itm.Xml_name_bry().length);
if (rv.Pass())
bfr.Add_u8_int(rv.Val());
else

View File

@@ -17,7 +17,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*; import gplx.core.envs.*; import gplx.xowa.apps.progs.*;
import gplx.xowa.wikis.domains.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.strings.*; import gplx.langs.htmls.*;
import gplx.xowa.wikis.domains.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.strings.*;
import gplx.langs.htmls.entitys.*;
import gplx.xowa.parsers.logs.*; import gplx.xowa.parsers.tblws.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.miscs.*; import gplx.xowa.parsers.htmls.*;
public class Xop_xnde_wkr implements Xop_ctx_wkr {
public void Ctor_ctx(Xop_ctx ctx) {}
@@ -97,6 +98,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Percent: // EX:<ref%s>; PAGE:pl.w:Scynk_nadrzewny; DATE:2016-08-07
tag_obj = null;
break;
}
@@ -319,7 +321,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
if ( page.Html_data().Html_restricted()
&& page.Wiki().Domain_tid() != Xow_domain_tid_.Int__home) {
int end_pos = gtPos + 1;
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, end_pos, Bry_.Add(gplx.langs.htmls.Gfh_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, end_pos)))); // +1 to skip <
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, end_pos, Bry_.Add(Gfh_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, end_pos)))); // +1 to skip <
return end_pos;
}
}
@@ -479,7 +481,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
}
}
if (end_tag.Restricted()) // restricted tags (like <script>) are not placed on stack; for now, just write it out
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, cur_pos, Bry_.Add(gplx.langs.htmls.Gfh_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, cur_pos)))); // +1 to skip <
ctx.Subs_add(root, tkn_mkr.Bry_raw(bgn_pos, cur_pos, Bry_.Add(Gfh_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, cur_pos)))); // +1 to skip <
else {
if (pre2_pending) {
pre2_pending = false;

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.xndes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
public class Xop_xnde_wkr__err_misc_tst {
private final Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Error_br_removed() {
fxt.Init_para_y_();
@@ -187,4 +187,7 @@ public class Xop_xnde_wkr__err_misc_tst {
@Test public void Img_should_not_be_xtn() { // PURPOSE:<img> marked as .xtn; unclosed <img> was escaping rest of text; PAGE:de.w:Wikipedia:Technik/Archiv/2014 DATE:2014-11-06
fxt.Test_parse_page_all_str("<img>''a''", "&lt;img><i>a</i>");
}
@Test public void Invalid__percent() { // PURPOSE: invalidate xml tags with %; EX:<ref%s>; PAGE:pl.w:Scynk_nadrzewny; DATE:2016-08-07
fxt.Test_parse_page_all_str("<b%>a</b>", "&lt;b%&gt;a</b>"); // NOTE: should be literally printed as <b%>, not transformed to <b>
}
}