mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Mw_parse: Mass checkin of various mediawiki parse files
This commit is contained in:
parent
6a5c114998
commit
cef2d7e2f6
@ -114,4 +114,9 @@ public class Array_ {
|
||||
Set_at(trg, i, Get_at(add, i - srcLen));
|
||||
return trg;
|
||||
}
|
||||
public static Object Clone(Object src) {
|
||||
Object trg = Create(Component_type(src), Len(src));
|
||||
Copy(src, trg);
|
||||
return trg;
|
||||
}
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx;
|
||||
import java.lang.*;
|
||||
import gplx.core.brys.*; import gplx.core.primitives.*; import gplx.core.ios.*;
|
||||
import gplx.langs.htmls.entitys.*;
|
||||
public class Bry_ {
|
||||
public static final String Cls_val_name = "byte[]";
|
||||
public static final byte[] Empty = new byte[0];
|
||||
@ -62,6 +63,7 @@ public class Bry_ {
|
||||
public static byte[] new_u8(String str) {
|
||||
try {
|
||||
int str_len = str.length();
|
||||
if (str_len == 0) return Bry_.Empty;
|
||||
int bry_len = new_u8__by_len(str, str_len);
|
||||
byte[] rv = new byte[bry_len];
|
||||
new_u8__write(str, str_len, rv, 0);
|
||||
@ -365,7 +367,7 @@ public class Bry_ {
|
||||
|| (end < bgn)
|
||||
)
|
||||
return or;
|
||||
return Mid(src, bgn, src.length);
|
||||
return bgn == src_len ? Bry_.Empty : Mid(src, bgn, src_len);
|
||||
}
|
||||
public static byte[] Mid(byte[] src, int bgn, int end) {
|
||||
try {
|
||||
@ -1001,6 +1003,33 @@ public class Bry_ {
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static byte[] Xcase__build__all(Bry_bfr tmp, boolean upper, byte[] src) {
|
||||
if (src == null) return null;
|
||||
int src_bgn = 0;
|
||||
int src_end = src.length;
|
||||
int lbound = 96, ubound = 123;
|
||||
if (!upper) {
|
||||
lbound = 64; ubound = 91;
|
||||
}
|
||||
|
||||
boolean dirty = false;
|
||||
for (int i = src_bgn; i < src_end; i++) {
|
||||
byte b = src[i];
|
||||
if (b > lbound && b < ubound) {
|
||||
if (!dirty) {
|
||||
dirty = true;
|
||||
tmp.Add_mid(src, src_bgn, i);
|
||||
}
|
||||
if (upper)
|
||||
b -= 32;
|
||||
else
|
||||
b += 32;
|
||||
}
|
||||
if (dirty)
|
||||
tmp.Add_byte(b);
|
||||
}
|
||||
return dirty ? tmp.To_bry_and_clear() : src;
|
||||
}
|
||||
public static byte[] Ucase__1st(byte[] src) {return Xcase__1st(Bool_.Y, src);}
|
||||
public static byte[] Lcase__1st(byte[] src) {return Xcase__1st(Bool_.N, src);}
|
||||
private static byte[] Xcase__1st(boolean upper, byte[] src) {
|
||||
@ -1076,4 +1105,71 @@ public class Bry_ {
|
||||
public static byte[] Replace_nl_w_tab(byte[] src, int bgn, int end) {
|
||||
return Bry_.Replace(Bry_.Mid(src, bgn, end), Byte_ascii.Nl, Byte_ascii.Tab);
|
||||
}
|
||||
public static byte[] Escape_html(byte[] src) {
|
||||
return Escape_html(null, src, 0, src.length);
|
||||
}
|
||||
public static byte[] Escape_html(Bry_bfr bfr, byte[] src, int src_bgn, int src_end) { // uses PHP rules for htmlspecialchars; REF.PHP:http://php.net/manual/en/function.htmlspecialchars.php
|
||||
boolean dirty = false;
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
boolean called_by_bry = bfr == null;
|
||||
|
||||
// loop over chars
|
||||
while (true) {
|
||||
// if EOS, exit
|
||||
if (cur == src_end) {
|
||||
if (dirty) {
|
||||
bfr.Add_mid(src, prv, src_end);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// check current byte if escaped
|
||||
byte b = src[cur];
|
||||
byte[] escaped = null;
|
||||
switch (b) {
|
||||
case Byte_ascii.Amp: escaped = Gfh_entity_.Amp_bry; break;
|
||||
case Byte_ascii.Quote: escaped = Gfh_entity_.Quote_bry; break;
|
||||
case Byte_ascii.Apos: escaped = Gfh_entity_.Apos_num_bry; break;
|
||||
case Byte_ascii.Lt: escaped = Gfh_entity_.Lt_bry; break;
|
||||
case Byte_ascii.Gt: escaped = Gfh_entity_.Gt_bry; break;
|
||||
}
|
||||
|
||||
// not escaped; increment and continue
|
||||
if (escaped == null) {
|
||||
cur++;
|
||||
continue;
|
||||
}
|
||||
// escaped
|
||||
else {
|
||||
dirty = true;
|
||||
if (bfr == null) bfr = Bry_bfr_.New();
|
||||
|
||||
if (prv < cur)
|
||||
bfr.Add_mid(src, prv, cur);
|
||||
bfr.Add(escaped);
|
||||
cur++;
|
||||
prv = cur;
|
||||
}
|
||||
}
|
||||
|
||||
if (dirty) {
|
||||
if (called_by_bry)
|
||||
return bfr.To_bry_and_clear();
|
||||
else
|
||||
return null;
|
||||
}
|
||||
else {
|
||||
if (called_by_bry) {
|
||||
if (src_bgn == 0 && src_end == src.length)
|
||||
return src;
|
||||
else
|
||||
return Bry_.Mid(src, src_bgn, src_end);
|
||||
}
|
||||
else {
|
||||
bfr.Add_mid(src, src_bgn, src_end);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -279,8 +279,13 @@ public class Bry__tst {
|
||||
@Test public void Repeat_bry() {
|
||||
fxt.Test__repeat_bry("abc" , 3, "abcabcabc");
|
||||
}
|
||||
@Test public void Xcase__build__all() {
|
||||
fxt.Test__xcase__build__all(Bool_.N, "abc", "abc");
|
||||
fxt.Test__xcase__build__all(Bool_.N, "aBc", "abc");
|
||||
}
|
||||
}
|
||||
class Bry__fxt {
|
||||
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
public void Test_trim_end(String raw, byte trim, String expd) {
|
||||
byte[] raw_bry = Bry_.new_a7(raw);
|
||||
Tfds.Eq(expd, String_.new_u8(Bry_.Trim_end(raw_bry, trim, raw_bry.length)));
|
||||
@ -298,4 +303,7 @@ class Bry__fxt {
|
||||
public void Test__repeat_bry(String s, int count, String expd) {
|
||||
Gftest.Eq__str(expd, Bry_.Repeat_bry(Bry_.new_u8(s), count));
|
||||
}
|
||||
public void Test__xcase__build__all(boolean upper, String src, String expd) {
|
||||
Gftest.Eq__str(expd, Bry_.Xcase__build__all(tmp, upper, Bry_.new_u8(src)));
|
||||
}
|
||||
}
|
||||
|
@ -297,35 +297,21 @@ public class Bry_bfr {
|
||||
Add_mid(val, bgn, end);
|
||||
return this;
|
||||
}
|
||||
public Bry_bfr Add_bry_escape_html(byte[] val) {return Add_bry_escape_html(val, 0, val.length);}
|
||||
public Bry_bfr Add_bry_escape_html(byte[] val, int bgn, int end) { // uses PHP rules for htmlspecialchars; REF.PHP:http://php.net/manual/en/function.htmlspecialchars.php
|
||||
boolean clean = true;
|
||||
for (int i = bgn; i < end; ++i) {
|
||||
byte[] escaped = null;
|
||||
byte b = val[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Amp: escaped = Gfh_entity_.Amp_bry; break;
|
||||
case Byte_ascii.Quote: escaped = Gfh_entity_.Quote_bry; break;
|
||||
case Byte_ascii.Apos: escaped = Gfh_entity_.Apos_num_bry; break;
|
||||
case Byte_ascii.Lt: escaped = Gfh_entity_.Lt_bry; break;
|
||||
case Byte_ascii.Gt: escaped = Gfh_entity_.Gt_bry; break;
|
||||
public Bry_bfr Add_bry_many(byte[]... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte[] bry = ary[i];
|
||||
if (bry != null && bry.length > 0)
|
||||
this.Add(bry);
|
||||
}
|
||||
if (escaped == null && clean) {
|
||||
continue;
|
||||
return this;
|
||||
}
|
||||
else {
|
||||
if (clean) {
|
||||
clean = false;
|
||||
this.Add_mid(val, bgn, i);
|
||||
public Bry_bfr Add_bry_escape_html(byte[] val) {
|
||||
if (val == null) return this;
|
||||
return Add_bry_escape_html(val, 0, val.length);
|
||||
}
|
||||
if (escaped == null)
|
||||
this.Add_byte(b);
|
||||
else
|
||||
this.Add(escaped);
|
||||
}
|
||||
}
|
||||
if (clean)
|
||||
Add_mid(val, bgn, end);
|
||||
public Bry_bfr Add_bry_escape_html(byte[] val, int bgn, int end) {
|
||||
Bry_.Escape_html(this, val, bgn, end);
|
||||
return this;
|
||||
}
|
||||
public Bry_bfr Add_str_u8_w_nl(String s) {Add_str_u8(s); return Add_byte_nl();}
|
||||
@ -542,6 +528,30 @@ public class Bry_bfr {
|
||||
this.Del_by(count);
|
||||
return this;
|
||||
}
|
||||
public Bry_bfr Trim_end_ws() {
|
||||
if (bfr_len == 0) return this;
|
||||
int count = 0;
|
||||
for (int i = bfr_len - 1; i > -1; --i) {
|
||||
byte b = bfr[i];
|
||||
if (Trim_end_ws_ary[b])
|
||||
++count;
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (count > 0)
|
||||
this.Del_by(count);
|
||||
return this;
|
||||
}
|
||||
private static final boolean[] Trim_end_ws_ary = Trim_end_ws_new();
|
||||
private static boolean[] Trim_end_ws_new() {
|
||||
boolean[] rv = new boolean[256];
|
||||
rv[32] = true;
|
||||
rv[ 9] = true;
|
||||
rv[10] = true;
|
||||
rv[13] = true;
|
||||
rv[11] = true;
|
||||
return rv;
|
||||
}
|
||||
public Bry_bfr Concat_skip_empty(byte[] dlm, byte[]... ary) {
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
|
@ -245,6 +245,13 @@ public class Bry_find_ {
|
||||
cur += while_len;
|
||||
}
|
||||
}
|
||||
public static int Find_fwd_while_in(byte[] src, int cur, int end, boolean[] while_ary) {
|
||||
while (cur < end) {
|
||||
if (cur == end || !while_ary[src[cur]]) return cur;
|
||||
cur++;
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static int Find_fwd_until(byte[] src, int cur, int end, byte until_byte) {
|
||||
while (true) {
|
||||
if ( cur == end
|
||||
|
@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx;
|
||||
import org.junit.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Bry_find__tst {
|
||||
private Bry_find__fxt fxt = new Bry_find__fxt();
|
||||
@Test public void Find_fwd() {
|
||||
@ -59,6 +59,10 @@ public class Bry_find__tst {
|
||||
fxt.Test_Trim_bwd_space_tab("" , 0);
|
||||
fxt.Test_Trim_bwd_space_tab(" \t" , 0);
|
||||
}
|
||||
@Test public void Find_fwd_while_in() {
|
||||
boolean[] while_ary = fxt.Init__find_fwd_while_in(Byte_ascii.Space, Byte_ascii.Tab, Byte_ascii.Nl);
|
||||
fxt.Test__find_fwd_while_in(" \t\na", while_ary, 3);
|
||||
}
|
||||
}
|
||||
class Bry_find__fxt {
|
||||
public void Test_Find_fwd(String src, String lkp, int bgn, int expd) {Tfds.Eq(expd, Bry_find_.Find_fwd(Bry_.new_u8(src), Bry_.new_u8(lkp), bgn));}
|
||||
@ -74,4 +78,15 @@ class Bry_find__fxt {
|
||||
int actl = Bry_find_.Trim_fwd_space_tab(raw_bry, 0, raw_bry.length);
|
||||
Tfds.Eq(expd, actl, raw_str);
|
||||
}
|
||||
public boolean[] Init__find_fwd_while_in(byte... ary) {
|
||||
boolean[] rv = new boolean[256];
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++)
|
||||
rv[ary[i]] = true;
|
||||
return rv;
|
||||
}
|
||||
public void Test__find_fwd_while_in(String src, boolean[] ary, int expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src);
|
||||
Gftest.Eq__int(expd, Bry_find_.Find_fwd_while_in(src_bry, 0, src_bry.length, ary));
|
||||
}
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ public class Bry_split_ {
|
||||
boolean reset = true;
|
||||
if (itm_bgn == -1) {
|
||||
if (pos_is_last) {} // skip dlm at bgn / end; EX: "a,"
|
||||
else {wkr.Split(src, itm_bgn, itm_end);} // else, process "empty" dlm; EX: ",a"
|
||||
else {wkr.Split(src, pos, pos );} // else, process "empty" dlm; EX: ",a"
|
||||
}
|
||||
else {
|
||||
int rv = wkr.Split(src, itm_bgn, itm_end);
|
||||
|
@ -43,6 +43,9 @@ public class Bry_split__tst {
|
||||
fxt.Test_split("a|b|c|d" , 2, 6, "|", "b", "c");
|
||||
fxt.Test_split("a|b|c|d" , 2, 4, "|", "b");
|
||||
}
|
||||
@Test public void Empty() {
|
||||
fxt.Test_split("a\n\nb" , Byte_ascii.Nl, Bool_.N, "a", "", "b");
|
||||
}
|
||||
@Test public void Split_w_max() {
|
||||
fxt.Test__split_w_max("a|b|c|d" , Byte_ascii.Pipe, 2, "a", "b"); // max is less
|
||||
fxt.Test__split_w_max("a" , Byte_ascii.Pipe, 2, "a", null); // max is more
|
||||
|
@ -109,6 +109,14 @@ public class Btrie_slim_mgr implements Btrie_mgr {
|
||||
Add_obj(Bry_.new_u8(ary[i]), bval);
|
||||
return this;
|
||||
}
|
||||
public Btrie_slim_mgr Add_many_str(String... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte[] itm = Bry_.new_u8(ary[i]);
|
||||
Add_obj(itm, itm);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public Btrie_slim_mgr Add_many_int(int val, String... ary) {return Add_many_int(val, Bry_.Ary(ary));}
|
||||
public Btrie_slim_mgr Add_many_int(int val, byte[]... ary) {
|
||||
int len = ary.length;
|
||||
|
@ -87,11 +87,31 @@ public class Hex_utl_ {
|
||||
public static void Write(byte[] bry, int bgn, int end, int val) {
|
||||
for (int i = end - 1; i > bgn - 1; i--) {
|
||||
int b = val % 16;
|
||||
bry[i] = To_byte(b);
|
||||
bry[i] = To_byte_ucase(b);
|
||||
val /= 16;
|
||||
if (val == 0) break;
|
||||
}
|
||||
}
|
||||
public static void Write_bfr(Bry_bfr bfr, boolean lcase, int val) {
|
||||
// count bytes
|
||||
int val_len = 0;
|
||||
int tmp = val;
|
||||
while (true) {
|
||||
tmp /= 16;
|
||||
val_len++;
|
||||
if (tmp == 0) break;
|
||||
}
|
||||
|
||||
// fill bytes from right to left
|
||||
int hex_bgn = bfr.Len();
|
||||
bfr.Add_byte_repeat(Byte_ascii.Null, val_len);
|
||||
byte[] bry = bfr.Bfr();
|
||||
for (int i = 0; i < val_len; i++) {
|
||||
int b = val % 16;
|
||||
bry[hex_bgn + val_len - i - 1] = lcase ? To_byte_lcase(b) : To_byte_ucase(b);
|
||||
val /= 16;
|
||||
}
|
||||
}
|
||||
public static boolean Is_hex_many(byte... ary) {
|
||||
for (byte itm : ary) {
|
||||
switch (itm) {
|
||||
@ -123,7 +143,7 @@ public class Hex_utl_ {
|
||||
default: throw Err_.new_parse("hexstring", Int_.To_str(val));
|
||||
}
|
||||
}
|
||||
private static byte To_byte(int v) {
|
||||
private static byte To_byte_ucase(int v) {
|
||||
switch (v) {
|
||||
case 0: return Byte_ascii.Num_0; case 1: return Byte_ascii.Num_1; case 2: return Byte_ascii.Num_2; case 3: return Byte_ascii.Num_3; case 4: return Byte_ascii.Num_4;
|
||||
case 5: return Byte_ascii.Num_5; case 6: return Byte_ascii.Num_6; case 7: return Byte_ascii.Num_7; case 8: return Byte_ascii.Num_8; case 9: return Byte_ascii.Num_9;
|
||||
|
@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.core.encoders; import gplx.*; import gplx.core.*;
|
||||
import org.junit.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Hex_utl__tst {
|
||||
private final Hex_utl__fxt fxt = new Hex_utl__fxt();
|
||||
@Test public void To_int() {
|
||||
@ -46,6 +46,15 @@ public class Hex_utl__tst {
|
||||
fxt.Test__write("[00000000]", 1, 9, 15, "[0000000F]");
|
||||
fxt.Test__write("[00000000]", 1, 9, 255, "[000000FF]");
|
||||
}
|
||||
@Test public void Write_bfr() {
|
||||
fxt.Test__write_bfr(Bool_.Y, 0, "0");
|
||||
fxt.Test__write_bfr(Bool_.Y, 15, "f");
|
||||
fxt.Test__write_bfr(Bool_.Y, 16, "10");
|
||||
fxt.Test__write_bfr(Bool_.Y, 32, "20");
|
||||
fxt.Test__write_bfr(Bool_.Y, 255, "ff");
|
||||
fxt.Test__write_bfr(Bool_.Y, 256, "100");
|
||||
fxt.Test__write_bfr(Bool_.Y, Int_.Max_value, "7fffffff");
|
||||
}
|
||||
}
|
||||
class Hex_utl__fxt {
|
||||
public void Test__write(String s, int bgn, int end, int val, String expd) {
|
||||
@ -63,6 +72,11 @@ class Hex_utl__fxt {
|
||||
String actl = Hex_utl_.To_str(val, pad);
|
||||
Tfds.Eq(expd, actl);
|
||||
}
|
||||
private final Bry_bfr bfr = Bry_bfr_.New();
|
||||
public void Test__write_bfr(boolean lcase, int val, String expd) {
|
||||
Hex_utl_.Write_bfr(bfr, lcase, val);
|
||||
Gftest.Eq__str(expd, bfr.To_str_and_clear());
|
||||
}
|
||||
// public void Test__encode_bry(int val, int pad, String expd) {
|
||||
// String actl = Hex_utl_.To_str(val, pad);
|
||||
// Tfds.Eq(expd, actl);
|
||||
|
@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.core.primitives.*;
|
||||
public class Php_preg_ {
|
||||
public static byte[][] Split(Int_list list, byte[] src, int src_bgn, int src_end, byte[] dlm, boolean extend) {
|
||||
@ -27,7 +28,7 @@ public class Php_preg_ {
|
||||
while (true) {
|
||||
if (i == src_end) break;
|
||||
int dlm_end = i + dlm_len;
|
||||
if (dlm_end < src_end && Bry_.Eq(src, i, dlm_end, dlm)) {
|
||||
if (dlm_end <= src_end && Bry_.Eq(src, i, dlm_end, dlm)) {
|
||||
if (extend) {
|
||||
dlm_end = Bry_find_.Find_fwd_while(src, i, src_end, dlm_nth);
|
||||
}
|
||||
@ -42,13 +43,33 @@ public class Php_preg_ {
|
||||
|
||||
// create brys
|
||||
int rv_len = list.Len() - 1;
|
||||
if (rv_len == 1) return null;
|
||||
if (rv_len == 1) {
|
||||
list.Clear();
|
||||
return null;
|
||||
}
|
||||
if (list.Get_at(list.Len() - 2) == src_end) { // if 2nd to last elem == src_end, then last item is Bry_.Empty; ignore it; EX: "a''" -> "a", "''" x> "a", "''", ""
|
||||
rv_len--;
|
||||
}
|
||||
byte[][] rv = new byte[rv_len][];
|
||||
for (i = 0; i < rv_len; i += 2) {
|
||||
rv[i ] = Bry_.Mid(src, list.Get_at(i + 0), list.Get_at(i + 1));
|
||||
if (i + 1 == rv_len) break;
|
||||
rv[i + 1] = Bry_.Mid(src, list.Get_at(i + 1), list.Get_at(i + 2));
|
||||
}
|
||||
list.Clear();
|
||||
return rv;
|
||||
}
|
||||
public static Object Match(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
|
||||
int cur = src_bgn;
|
||||
while (cur < src_end) {
|
||||
byte b = src[cur];
|
||||
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
|
||||
if (o == null)
|
||||
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||
else {
|
||||
return o;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -21,11 +21,12 @@ public class Php_preg___tst {
|
||||
private final Php_preg___fxt fxt = new Php_preg___fxt();
|
||||
@Test public void Basic() {fxt.Test__split("a''b''c" , "''", Bool_.Y, "a", "''", "b", "''", "c");}
|
||||
@Test public void Extend() {fxt.Test__split("a'''b'''c" , "''", Bool_.Y, "a", "'''", "b", "'''", "c");}
|
||||
@Test public void Eos() {fxt.Test__split("a''" , "''", Bool_.Y, "a", "''");}
|
||||
}
|
||||
class Php_preg___fxt {
|
||||
private final gplx.core.primitives.Int_list rv = new gplx.core.primitives.Int_list();
|
||||
public void Test__split(String src, String dlm, boolean extend, String... expd) {Test__split(src, 0, String_.Len(src), dlm, extend, expd);}
|
||||
public void Test__split(String src, int src_bgn, int src_end, String dlm, boolean extend, String... expd) {
|
||||
gplx.core.primitives.Int_list rv = new gplx.core.primitives.Int_list();
|
||||
byte[][] actl = Php_preg_.Split(rv, Bry_.new_u8(src), src_bgn, src_end, Bry_.new_u8(dlm), extend);
|
||||
Gftest.Eq__ary(expd, String_.Ary(actl), "find_failed");
|
||||
}
|
||||
|
@ -16,7 +16,11 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Php_str_ {
|
||||
public static int Strpos(byte[] src, byte find, int bgn, int end) {
|
||||
return Bry_find_.Find_fwd(src, find, bgn, end);
|
||||
}
|
||||
public static byte[] Substr(byte[] src, int bgn) {return Substr(src, bgn, src.length);}
|
||||
public static byte[] Substr(byte[] src, int bgn, int len) {
|
||||
int src_len = src.length;
|
||||
@ -29,12 +33,24 @@ public class Php_str_ {
|
||||
public static byte Substr_byte(byte[] src, int bgn) {return Substr_byte(src, bgn, src.length);}
|
||||
public static byte Substr_byte(byte[] src, int bgn, int len) {
|
||||
int src_len = src.length;
|
||||
if (src_len == 0) return Byte_ascii.Null;
|
||||
if (bgn < 0) bgn = src_len + bgn; // handle negative
|
||||
if (bgn < 0) bgn = 0; // handle out of bounds; EX: ("a", -1, -1)
|
||||
int end = len < 0 ? src_len + len : bgn + len;
|
||||
if (end > src.length) end = src.length;; // handle out of bounds;
|
||||
return src[bgn];
|
||||
}
|
||||
public static int Strspn_fwd__ary(byte[] src, boolean[] find, int bgn, int max, int src_len) {
|
||||
if (max == -1) max = src_len;
|
||||
int rv = 0;
|
||||
for (int i = bgn; i < src_len; i++) {
|
||||
if (find[src[i]] && rv < max)
|
||||
rv++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static int Strspn_fwd__byte(byte[] src, byte find, int bgn, int max, int src_len) {
|
||||
if (max == -1) max = src_len;
|
||||
int rv = 0;
|
||||
@ -91,4 +107,31 @@ public class Php_str_ {
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static byte[] Strtr(byte[] src, Btrie_slim_mgr trie, Bry_bfr tmp, Btrie_rv trv) {
|
||||
boolean dirty = false;
|
||||
int src_bgn = 0;
|
||||
int src_end = src.length;
|
||||
int i = src_bgn;
|
||||
|
||||
while (true) {
|
||||
if (i == src_end) break;
|
||||
byte b = src[i];
|
||||
Object o = trie.Match_at_w_b0(trv, b, src, i, src_end);
|
||||
if (o == null) {
|
||||
if (dirty) {
|
||||
tmp.Add_byte(b);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
else {
|
||||
if (!dirty) {
|
||||
dirty = true;
|
||||
tmp.Add_mid(src, 0, i);
|
||||
}
|
||||
tmp.Add((byte[])o);
|
||||
i = trv.Pos();
|
||||
}
|
||||
}
|
||||
return dirty ? tmp.To_bry_and_clear() : src;
|
||||
}
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*;
|
||||
public class Php_str___tst {
|
||||
private final Php_str___fxt fxt = new Php_str___fxt();
|
||||
@Test public void Strspn_fwd__byte() {
|
||||
@ -43,6 +43,14 @@ public class Php_str___tst {
|
||||
fxt.Test__substr("abcde" , -1, "e");
|
||||
fxt.Test__substr("abcde" , -3, -1, "cd");
|
||||
}
|
||||
@Test public void Strtr() {
|
||||
fxt.Init__strtr_by_trie("01", "89", "02", "79");
|
||||
fxt.Test__strtr_by_trie("abc" , "abc"); // found=none
|
||||
fxt.Test__strtr_by_trie("ab_01_cd" , "ab_89_cd"); // found=one
|
||||
fxt.Test__strtr_by_trie("ab_01_cd_02_ef", "ab_89_cd_79_ef"); // found=many
|
||||
fxt.Test__strtr_by_trie("01_ab" , "89_ab"); // BOS
|
||||
fxt.Test__strtr_by_trie("ab_01" , "ab_89"); // EOS
|
||||
}
|
||||
}
|
||||
class Php_str___fxt {
|
||||
public void Test__strspn_fwd__byte(String src_str, byte find, int bgn, int max, int expd) {
|
||||
@ -63,4 +71,17 @@ class Php_str___fxt {
|
||||
public void Test__substr(String src_str, int bgn, int len, String expd) {
|
||||
Gftest.Eq__str(expd, Php_str_.Substr(Bry_.new_u8(src_str), bgn, len));
|
||||
}
|
||||
private Btrie_slim_mgr strtr_trie;
|
||||
public void Init__strtr_by_trie(String... kvs) {
|
||||
if (strtr_trie == null) strtr_trie = Btrie_slim_mgr.cs();
|
||||
int len = kvs.length;
|
||||
for (int i = 0; i < len; i += 2) {
|
||||
strtr_trie.Add_str_str(kvs[i], kvs[i + 1]);
|
||||
}
|
||||
}
|
||||
public void Test__strtr_by_trie(String src, String expd) {
|
||||
Bry_bfr tmp = Bry_bfr_.New();
|
||||
Btrie_rv trv = new Btrie_rv();
|
||||
Gftest.Eq__str(expd, Php_str_.Strtr(Bry_.new_u8(src), strtr_trie, tmp, trv));
|
||||
}
|
||||
}
|
||||
|
@ -132,6 +132,67 @@ public class Xoa_ttl { // PAGE:en.w:http://en.wikipedia.org/wiki/Help:Link; REF.
|
||||
return Bry_.Mid(full_txt, page_bgn, ques_pos == Bry_find_.Not_found ? full_txt_len : ques_pos);
|
||||
}
|
||||
|
||||
public byte[] Get_prefixed_text() {return Full_txt_wo_qarg();}
|
||||
public byte[] Get_prefixed_db_key() {return Full_db();}
|
||||
public boolean Has_fragment() {return anch_bgn != -1;}
|
||||
public byte[] Get_fragment() {return Anch_txt();}
|
||||
public byte[] Get_link_url(byte[] query, boolean query2, boolean proto) {
|
||||
// if ( $this->isExternal() || $proto !== false ) {
|
||||
// $ret = $this->getFullURL( $query, $query2, $proto );
|
||||
// }
|
||||
// else if ( $this->getPrefixedText() === '' && $this->hasFragment() ) {
|
||||
// $ret = $this->getFragmentForURL();
|
||||
// }
|
||||
// else {
|
||||
// $ret = $this->getLocalURL( $query, $query2 ) . $this->getFragmentForURL();
|
||||
// }
|
||||
return Bry_.Add(gplx.xowa.htmls.hrefs.Xoh_href_.Bry__wiki, this.Full_db_w_anch());
|
||||
}
|
||||
public boolean Is_always_known() {
|
||||
// $isKnown = null;
|
||||
|
||||
/**
|
||||
* Allows overriding default behavior for determining if a page exists.
|
||||
* If $isKnown is kept as null, regular checks happen. If it's
|
||||
* a boolean, this value is returned by the isKnown method.
|
||||
*
|
||||
* @since 1.20
|
||||
*
|
||||
* @param Title $title
|
||||
* @param boolean|null $isKnown
|
||||
*/
|
||||
// Hooks::run( 'TitleIsAlwaysKnown', [ $this, &$isKnown ] );
|
||||
//
|
||||
// if ( !is_null( $isKnown ) ) {
|
||||
// return $isKnown;
|
||||
// }
|
||||
//
|
||||
// if ( $this->isExternal() ) {
|
||||
// return true; // any interwiki link might be viewable, for all we know
|
||||
// }
|
||||
//
|
||||
// switch ( $this->mNamespace ) {
|
||||
// case NS_MEDIA:
|
||||
// case NS_FILE:
|
||||
// // file exists, possibly in a foreign repo
|
||||
// return (boolean)wfFindFile( $this );
|
||||
// case NS_SPECIAL:
|
||||
// // valid special page
|
||||
// return SpecialPageFactory::exists( $this->getDBkey() );
|
||||
// case NS_MAIN:
|
||||
// // selflink, possibly with fragment
|
||||
// return $this->mDbkeyform == '';
|
||||
// case NS_MEDIAWIKI:
|
||||
// // known system message
|
||||
// return $this->hasSourceText() !== false;
|
||||
// default:
|
||||
// return false;
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean Is_external() {return this.wik_bgn != -1;}
|
||||
|
||||
public static final byte Subpage_spr = Byte_ascii.Slash; // EX: A/B/C
|
||||
public static final Xoa_ttl Null = null;
|
||||
|
||||
|
@ -46,3 +46,12 @@ public class Xocfg_dflt_mgr {
|
||||
gfs_mgr.Run_url(url);
|
||||
}
|
||||
}
|
||||
class Xocfg_dflt_itm__static implements Gfo_invk {
|
||||
private final String val;
|
||||
public Xocfg_dflt_itm__static(String val) {
|
||||
this.val = val;
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
return val;
|
||||
}
|
||||
}
|
||||
|
@ -17,6 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.updates.files; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.updates.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.xowa.files.*;
|
||||
public class Xodel_small_cmd extends Xob_cmd__base {
|
||||
public Xodel_small_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
|
||||
private final int[] ext_max_ary = Xobldr__fsdb_db__delete_small_files_.New_ext_max_ary();
|
||||
@ -31,3 +32,20 @@ public class Xodel_small_cmd extends Xob_cmd__base {
|
||||
public static final Xob_cmd Prototype = new Xodel_small_cmd(null, null);
|
||||
@Override public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Xodel_small_cmd(bldr, wiki);}
|
||||
}
|
||||
class Xobldr__fsdb_db__delete_small_files_ {
|
||||
public static int[] New_ext_max_ary() {
|
||||
int[] rv = new int[Xof_ext_.Id__max];
|
||||
Ext_max_(rv, 35, Xof_ext_.Id_svg);
|
||||
Ext_max_(rv, 40, Xof_ext_.Id_gif);
|
||||
Ext_max_(rv, 100, Xof_ext_.Id_png, Xof_ext_.Id_jpg, Xof_ext_.Id_jpeg);
|
||||
Ext_max_(rv, 500, Xof_ext_.Id_tif, Xof_ext_.Id_tiff);
|
||||
Ext_max_(rv, 500, Xof_ext_.Id_xcf);
|
||||
Ext_max_(rv, 1000, Xof_ext_.Id_bmp);
|
||||
Ext_max_(rv, 700, Xof_ext_.Id_webm);
|
||||
Ext_max_(rv, 1000, Xof_ext_.Id_ogv);
|
||||
Ext_max_(rv, 400, Xof_ext_.Id_pdf);
|
||||
Ext_max_(rv, 700, Xof_ext_.Id_djvu);
|
||||
return rv;
|
||||
}
|
||||
private static void Ext_max_(int[] ary, int max, int... exts) {for (int ext : exts) ary[ext] = max;}
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ package gplx.xowa.addons.bldrs.updates.files; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.dbs.*;
|
||||
import gplx.xowa.bldrs.*;
|
||||
import gplx.fsdb.*; import gplx.fsdb.meta.*; import gplx.xowa.files.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
class Xodel_small_mgr {
|
||||
public void Exec(Xowe_wiki wiki, int[] ext_max_ary) {
|
||||
wiki.Init_assert();
|
||||
@ -53,20 +54,3 @@ class Xodel_small_mgr {
|
||||
);
|
||||
}
|
||||
}
|
||||
class Xobldr__fsdb_db__delete_small_files_ {
|
||||
public static int[] New_ext_max_ary() {
|
||||
int[] rv = new int[Xof_ext_.Id__max];
|
||||
Ext_max_(rv, 35, Xof_ext_.Id_svg);
|
||||
Ext_max_(rv, 40, Xof_ext_.Id_gif);
|
||||
Ext_max_(rv, 100, Xof_ext_.Id_png, Xof_ext_.Id_jpg, Xof_ext_.Id_jpeg);
|
||||
Ext_max_(rv, 500, Xof_ext_.Id_tif, Xof_ext_.Id_tiff);
|
||||
Ext_max_(rv, 500, Xof_ext_.Id_xcf);
|
||||
Ext_max_(rv, 1000, Xof_ext_.Id_bmp);
|
||||
Ext_max_(rv, 700, Xof_ext_.Id_webm);
|
||||
Ext_max_(rv, 1000, Xof_ext_.Id_ogv);
|
||||
Ext_max_(rv, 400, Xof_ext_.Id_pdf);
|
||||
Ext_max_(rv, 700, Xof_ext_.Id_djvu);
|
||||
return rv;
|
||||
}
|
||||
private static void Ext_max_(int[] ary, int max, int... exts) {for (int ext : exts) ary[ext] = max;}
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ public class Xol_lnki_trail_mgr implements Gfo_invk {
|
||||
public Xol_lnki_trail_mgr(Xol_lang_itm lang) {}
|
||||
public void Clear() {trie.Clear();}
|
||||
public int Count() {return trie.Count();}
|
||||
public Btrie_slim_mgr Trie() {return trie;} Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
public Btrie_slim_mgr Trie() {return trie;} private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
public void Add(byte[] v) {trie.Add_obj(v, v);}
|
||||
public void Del(byte[] v) {trie.Del(v);}
|
||||
private void Add(String... ary) {
|
||||
|
819
400_xowa/src/gplx/xowa/mws/Xomw_linker.java
Normal file
819
400_xowa/src/gplx/xowa/mws/Xomw_linker.java
Normal file
@ -0,0 +1,819 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.mws.htmls.*;
|
||||
public class Xomw_linker {
|
||||
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
private final Linker_rel_splitter splitter = new Linker_rel_splitter();
|
||||
private final Xomw_html_utl html_utl = new Xomw_html_utl();
|
||||
private byte[] wg_title = null;
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final byte[][] split_trail_rv = new byte[2][];
|
||||
private Btrie_slim_mgr split_trail_trie;
|
||||
private static final byte[] Atr__class = Bry_.new_a7("class"), Atr__rel = Bry_.new_a7("rel"), Atr__href = Bry_.new_a7("href"), Rel__nofollow = Bry_.new_a7("nofollow");
|
||||
public void Init_by_wiki(Btrie_slim_mgr trie) {
|
||||
this.split_trail_trie = trie;
|
||||
}
|
||||
// /**
|
||||
// * This function returns an HTML link to the given target. It serves a few
|
||||
// * purposes:
|
||||
// * 1) If $target is a Title, the correct URL to link to will be figured
|
||||
// * out automatically.
|
||||
// * 2) It automatically adds the usual classes for various types of link
|
||||
// * targets: "new" for red links, "stub" for short articles, etc.
|
||||
// * 3) It escapes all attribute values safely so there's no risk of XSS.
|
||||
// * 4) It provides a default tooltip if the target is a Title (the page
|
||||
// * name of the target).
|
||||
// * link() replaces the old functions in the makeLink() family.
|
||||
// *
|
||||
// * @since 1.18 Method exists since 1.16 as non-static, made static in 1.18.
|
||||
// * @deprecated since 1.28, use MediaWiki\Linker\LinkRenderer instead
|
||||
// *
|
||||
// * @param Title $target Can currently only be a Title, but this may
|
||||
// * change to support Images, literal URLs, etc.
|
||||
// * @param String $html The HTML contents of the <a> element, i.e.,
|
||||
// * the link text. This is raw HTML and will not be escaped. If null,
|
||||
// * defaults to the prefixed text of the Title; or if the Title is just a
|
||||
// * fragment, the contents of the fragment.
|
||||
// * @param array $customAttribs A key => value array of extra HTML attributes,
|
||||
// * such as title and class. (href is ignored.) Classes will be
|
||||
// * merged with the default classes, while other attributes will replace
|
||||
// * default attributes. All passed attribute values will be HTML-escaped.
|
||||
// * A false attribute value means to suppress that attribute.
|
||||
// * @param array $query The query String to append to the URL
|
||||
// * you're linking to, in key => value array form. Query keys and values
|
||||
// * will be URL-encoded.
|
||||
// * @param String|array $options String or array of strings:
|
||||
// * 'known': Page is known to exist, so don't check if it does.
|
||||
// * 'broken': Page is known not to exist, so don't check if it does.
|
||||
// * 'noclasses': Don't add any classes automatically (includes "new",
|
||||
// * "stub", "mw-redirect", "extiw"). Only use the class attribute
|
||||
// * provided, if any, so you get a simple blue link with no funny i-
|
||||
// * cons.
|
||||
// * 'forcearticlepath': Use the article path always, even with a querystring.
|
||||
// * Has compatibility issues on some setups, so avoid wherever possible.
|
||||
// * 'http': Force a full URL with http:// as the scheme.
|
||||
// * 'https': Force a full URL with https:// as the scheme.
|
||||
// * 'stubThreshold' => (int): Stub threshold to use when determining link classes.
|
||||
// * @return String HTML <a> attribute
|
||||
// */
|
||||
// public static function link(
|
||||
// $target, $html = null, $customAttribs = [], $query = [], $options = []
|
||||
// ) {
|
||||
// if ( !$target instanceof Title ) {
|
||||
// wfWarn( __METHOD__ . ': Requires $target to be a Title Object.', 2 );
|
||||
// return "<!-- ERROR -->$html";
|
||||
// }
|
||||
//
|
||||
// if ( is_string( $query ) ) {
|
||||
// // some functions withing core using this still hand over query strings
|
||||
// wfDeprecated( __METHOD__ . ' with parameter $query as String (should be array)', '1.20' );
|
||||
// $query = wfCgiToArray( $query );
|
||||
// }
|
||||
//
|
||||
// $services = MediaWikiServices::getInstance();
|
||||
// $options = (array)$options;
|
||||
// if ( $options ) {
|
||||
// // Custom options, create new LinkRenderer
|
||||
// if ( !isset( $options['stubThreshold'] ) ) {
|
||||
// $defaultLinkRenderer = $services->getLinkRenderer();
|
||||
// $options['stubThreshold'] = $defaultLinkRenderer->getStubThreshold();
|
||||
// }
|
||||
// $linkRenderer = $services->getLinkRendererFactory()
|
||||
// ->createFromLegacyOptions( $options );
|
||||
// } else {
|
||||
// $linkRenderer = $services->getLinkRenderer();
|
||||
// }
|
||||
//
|
||||
// if ( $html !== null ) {
|
||||
// $text = new HtmlArmor( $html );
|
||||
// } else {
|
||||
// $text = $html; // null
|
||||
// }
|
||||
// if ( in_array( 'known', $options, true ) ) {
|
||||
// return $linkRenderer->makeKnownLink( $target, $text, $customAttribs, $query );
|
||||
// } elseif ( in_array( 'broken', $options, true ) ) {
|
||||
// return $linkRenderer->makeBrokenLink( $target, $text, $customAttribs, $query );
|
||||
// } elseif ( in_array( 'noclasses', $options, true ) ) {
|
||||
// return $linkRenderer->makePreloadedLink( $target, $text, '', $customAttribs, $query );
|
||||
// } else {
|
||||
// return $linkRenderer->makeLink( $target, $text, $customAttribs, $query );
|
||||
// }
|
||||
// }
|
||||
public void Make_self_link_obj(Bry_bfr bfr, Xoa_ttl nt, byte[] html, byte[] query, byte[] trail, byte[] prefix) {
|
||||
// MW.HOOK:SelfLinkBegin
|
||||
if (html == Bry_.Empty) {
|
||||
html = tmp.Add_bry_escape_html(nt.Get_prefixed_text()).To_bry_and_clear();
|
||||
}
|
||||
byte[] inside = Bry_.Empty;
|
||||
byte[][] split_trail = Split_trail(trail);
|
||||
inside = split_trail[0];
|
||||
trail = split_trail[1];
|
||||
bfr.Add_str_a7("<strong class=\"selflink\">");
|
||||
bfr.Add_bry_many(prefix, html, inside);
|
||||
bfr.Add_str_a7("</strong>");
|
||||
bfr.Add(trail);
|
||||
}
|
||||
public void Make_external_link(Bry_bfr bfr, byte[] url, byte[] text, boolean escape, byte[] link_type, Xomwh_atr_mgr attribs, byte[] title) {
|
||||
tmp.Add_str_a7("external");
|
||||
if (link_type != null) {
|
||||
tmp.Add_byte_space().Add(link_type);
|
||||
}
|
||||
Xomwh_atr_itm cls_itm = attribs.Get_by_or_make(Atr__class);
|
||||
if (cls_itm.Val() != null) {
|
||||
tmp.Add(cls_itm.Val());
|
||||
}
|
||||
cls_itm.Val_(tmp.To_bry_and_clear());
|
||||
|
||||
if (escape)
|
||||
text = tmp.Add_bry_escape_html(text).To_bry_and_clear();
|
||||
|
||||
if (title == null)
|
||||
title = wg_title;
|
||||
|
||||
byte[] new_rel = Get_external_link_rel(url, title);
|
||||
Xomwh_atr_itm cur_rel_atr = attribs.Get_by_or_make(Atr__rel);
|
||||
if (cur_rel_atr.Val() == null) {
|
||||
cur_rel_atr.Val_(new_rel);
|
||||
}
|
||||
else {
|
||||
// Merge the rel attributes.
|
||||
byte[] cur_rel = cur_rel_atr.Val();
|
||||
Bry_split_.Split(new_rel, 0, new_rel.length, Byte_ascii.Space, Bool_.N, splitter); // $newRels = explode( ' ', $newRel );
|
||||
Bry_split_.Split(cur_rel, 0, cur_rel.length, Byte_ascii.Space, Bool_.N, splitter); // $oldRels = explode( ' ', $attribs['rel'] );
|
||||
cur_rel_atr.Val_(splitter.To_bry()); // $attribs['rel'] = implode( ' ', $combined );
|
||||
}
|
||||
//$link = '';
|
||||
//$success = Hooks::run( 'LinkerMakeExternalLink',
|
||||
// [ &$url, &$text, &$link, &$attribs, $linktype ] );
|
||||
//if ( !$success ) {
|
||||
// wfDebug( "Hook LinkerMakeExternalLink changed the output of link "
|
||||
// . "with url {$url} and text {$text} to {$link}\n", true );
|
||||
// return $link;
|
||||
//}
|
||||
attribs.Set(Atr__href, url);
|
||||
|
||||
html_utl.Raw_element(bfr, Bry_.new_a7("a"), attribs, text);
|
||||
}
|
||||
private byte[] Get_external_link_rel(byte[] url, byte[] title) {
|
||||
// global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
|
||||
// $ns = $title ? $title->getNamespace() : false;
|
||||
// if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
|
||||
// && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
|
||||
// ) {
|
||||
return Rel__nofollow;
|
||||
// }
|
||||
// return null;
|
||||
}
|
||||
public void Normalize_subpage_link(Xomw_linker__normalize_subpage_link rv, Xoa_ttl context_title, byte[] target, byte[] text) {
|
||||
// Valid link forms:
|
||||
// Foobar -- normal
|
||||
// :Foobar -- override special treatment of prefix (images, language links)
|
||||
// /Foobar -- convert to CurrentPage/Foobar
|
||||
// /Foobar/ -- convert to CurrentPage/Foobar, strip the initial and final / from text
|
||||
// ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage
|
||||
// ../Foobar -- convert to CurrentPage/Foobar,
|
||||
// (from CurrentPage/CurrentSubPage)
|
||||
// ../Foobar/ -- convert to CurrentPage/Foobar, use 'Foobar' as text
|
||||
// (from CurrentPage/CurrentSubPage)
|
||||
|
||||
byte[] ret = target; // default return value is no change
|
||||
|
||||
// Some namespaces don't allow subpages,
|
||||
// so only perform processing if subpages are allowed
|
||||
if (context_title != null && context_title.Ns().Subpages_enabled()) {
|
||||
int hash = Bry_find_.Find_fwd(target, Byte_ascii.Hash);
|
||||
byte[] suffix = null;
|
||||
if (hash != Bry_find_.Not_found) {
|
||||
suffix = Bry_.Mid(target, hash);
|
||||
target = Bry_.Mid(target, 0, hash);
|
||||
}
|
||||
else {
|
||||
suffix = Bry_.Empty;
|
||||
}
|
||||
// bug 7425
|
||||
target = Bry_.Trim(target);
|
||||
// Look at the first character
|
||||
if (target != Bry_.Empty && target[0] == Byte_ascii.Slash) {
|
||||
// / at end means we don't want the slash to be shown
|
||||
int target_len = target.length;
|
||||
int trailing_slashes_bgn = Bry_find_.Find_bwd_while(target, target_len, 0, Byte_ascii.Slash) + 1;
|
||||
byte[] no_slash = null;
|
||||
if (trailing_slashes_bgn != target_len) {
|
||||
no_slash = target = Bry_.Mid(target, 1, trailing_slashes_bgn);
|
||||
}
|
||||
else {
|
||||
no_slash = Bry_.Mid(target, 1);
|
||||
}
|
||||
|
||||
ret = Bry_.Add(context_title.Get_prefixed_text(), Byte_ascii.Slash_bry, Bry_.Trim(no_slash), suffix);
|
||||
if (text == Bry_.Empty) {
|
||||
text = Bry_.Add(target, suffix);
|
||||
} // this might be changed for ugliness reasons
|
||||
}
|
||||
else {
|
||||
// check for .. subpage backlinks
|
||||
int dot2_count = 0;
|
||||
byte[] dot2_stripped = target;
|
||||
while (Bry_.Match(dot2_stripped, 0, 3, Bry__dot2)) {
|
||||
++dot2_count;
|
||||
dot2_stripped = Bry_.Mid(dot2_stripped, 3);
|
||||
}
|
||||
if (dot2_count > 0) {
|
||||
byte[][] exploded = Bry_split_.Split(context_title.Get_prefixed_text(), Byte_ascii.Slash);
|
||||
int exploded_len = exploded.length;
|
||||
if (exploded_len > dot2_count) { // not allowed to go below top level page
|
||||
// PORTED: ret = implode('/', array_slice($exploded, 0, -dot2_count));
|
||||
int implode_len = exploded_len - dot2_count;
|
||||
for (int i = 0; i < implode_len; i++) {
|
||||
if (i != 0) tmp.Add_byte(Byte_ascii.Slash);
|
||||
tmp.Add(exploded[i]);
|
||||
}
|
||||
// / at the end means don't show full path
|
||||
if (Bry_.Has_at_end(dot2_stripped, Byte_ascii.Slash)) {
|
||||
dot2_stripped = Bry_.Mid(dot2_stripped, 0, dot2_stripped.length - 1);
|
||||
if (text == Bry_.Empty) {
|
||||
text = Bry_.Add(dot2_stripped, suffix);
|
||||
}
|
||||
}
|
||||
dot2_stripped = Bry_.Trim(dot2_stripped);
|
||||
if (dot2_stripped != Bry_.Empty) {
|
||||
tmp.Add_bry_many(Byte_ascii.Slash_bry, dot2_stripped);
|
||||
}
|
||||
tmp.Add(suffix);
|
||||
ret = tmp.To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rv.Init(ret, text);
|
||||
}
|
||||
public byte[][] Split_trail(byte[] trail) {
|
||||
int cur = 0;
|
||||
int src_end = trail.length;
|
||||
while (true) {
|
||||
Object o = split_trail_trie.Match_at(trv, trail, cur, src_end);
|
||||
if (o == null) break;
|
||||
byte[] bry = (byte[])o;
|
||||
cur += bry.length;
|
||||
}
|
||||
|
||||
if (cur == 0) { // no trail
|
||||
split_trail_rv[0] = null;
|
||||
split_trail_rv[1] = trail;
|
||||
}
|
||||
else {
|
||||
split_trail_rv[0] = Bry_.Mid(trail, 0, cur);
|
||||
split_trail_rv[1] = Bry_.Mid(trail, cur, src_end);
|
||||
}
|
||||
return split_trail_rv;
|
||||
}
|
||||
public void Make_image(Bry_bfr bfr, Xoa_ttl title, byte[] options, boolean holders) {
|
||||
// Check if the options text is of the form "options|alt text"
|
||||
// Options are:
|
||||
// * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
|
||||
// * left no resizing, just left align. label is used for alt= only
|
||||
// * right same, but right aligned
|
||||
// * none same, but not aligned
|
||||
// * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
|
||||
// * center center the image
|
||||
// * frame Keep original image size, no magnify-button.
|
||||
// * framed Same as "frame"
|
||||
// * frameless like 'thumb' but without a frame. Keeps user preferences for width
|
||||
// * upright reduce width for upright images, rounded to full __0 px
|
||||
// * border draw a 1px border around the image
|
||||
// * alt Text for HTML alt attribute (defaults to empty)
|
||||
// * class Set a class for img node
|
||||
// * link Set the target of the image link. Can be external, interwiki, or local
|
||||
// vertical-align values (no % or length right now):
|
||||
// * baseline
|
||||
// * sub
|
||||
// * super
|
||||
// * top
|
||||
// * text-top
|
||||
// * middle
|
||||
// * bottom
|
||||
// * text-bottom
|
||||
|
||||
// Protect LanguageConverter markup when splitting into parts
|
||||
// $parts = StringUtils::delimiterExplode(
|
||||
// '-{', '}-', '|', $options, true /* allow nesting */
|
||||
// );
|
||||
|
||||
// Give extensions a chance to select the file revision for us
|
||||
// $options = [];
|
||||
// $descQuery = false;
|
||||
// MW.HOOK:BeforeParserFetchFileAndTitle
|
||||
|
||||
// Fetch and register the file (file title may be different via hooks)
|
||||
// list($file, $title) = $this->fetchFileAndTitle($title, $options);
|
||||
|
||||
// Get parameter map
|
||||
// $handler = $file ? $file->getHandler() : false;
|
||||
|
||||
// list($paramMap, $mwArray) = $this->getImageParams($handler);
|
||||
|
||||
// if (!$file) {
|
||||
// $this->addTrackingCategory('broken-file-category');
|
||||
// }
|
||||
|
||||
// Process the input parameters
|
||||
// $caption = '';
|
||||
// $params = [ 'frame' => [], 'handler' => [],
|
||||
// 'horizAlign' => [], 'vertAlign' => [] ];
|
||||
// $seenformat = false;
|
||||
// foreach ($parts as $part) {
|
||||
// $part = trim($part);
|
||||
// list($magicName, $value) = $mwArray->matchVariableStartToEnd($part);
|
||||
// $validated = false;
|
||||
// if (isset($paramMap[$magicName])) {
|
||||
// list($type, $paramName) = $paramMap[$magicName];
|
||||
|
||||
// Special case; width and height come in one variable together
|
||||
// if ($type === 'handler' && $paramName === 'width') {
|
||||
// $parsedWidthParam = $this->parseWidthParam($value);
|
||||
// if (isset($parsedWidthParam['width'])) {
|
||||
// $width = $parsedWidthParam['width'];
|
||||
// if ($handler->validateParam('width', $width)) {
|
||||
// $params[$type]['width'] = $width;
|
||||
// $validated = true;
|
||||
// }
|
||||
// }
|
||||
// if (isset($parsedWidthParam['height'])) {
|
||||
// $height = $parsedWidthParam['height'];
|
||||
// if ($handler->validateParam('height', $height)) {
|
||||
// $params[$type]['height'] = $height;
|
||||
// $validated = true;
|
||||
// }
|
||||
// }
|
||||
// else no validation -- T15436
|
||||
// } else {
|
||||
// if ($type === 'handler') {
|
||||
// // Validate handler parameter
|
||||
// $validated = $handler->validateParam($paramName, $value);
|
||||
// } else {
|
||||
// // Validate @gplx.Internal protected parameters
|
||||
// switch ($paramName) {
|
||||
// case 'manualthumb':
|
||||
// case 'alt':
|
||||
// case 'class':
|
||||
// @todo FIXME: Possibly check validity here for
|
||||
// manualthumb? downstream behavior seems odd with
|
||||
// missing manual thumbs.
|
||||
// $validated = true;
|
||||
// $value = $this->stripAltText($value, $holders);
|
||||
// break;
|
||||
// case 'link':
|
||||
// $chars = self::EXT_LINK_URL_CLASS;
|
||||
// $addr = self::EXT_LINK_ADDR;
|
||||
// $prots = $this->mUrlProtocols;
|
||||
// if ($value === '') {
|
||||
// $paramName = 'no-link';
|
||||
// $value = true;
|
||||
// $validated = true;
|
||||
// } elseif (preg_match("/^((?i)$prots)/", $value)) {
|
||||
// if (preg_match("/^((?i)$prots)$addr$chars*$/u", $value, $m)) {
|
||||
// $paramName = 'link-url';
|
||||
// $this->mOutput->addExternalLink($value);
|
||||
// if ($this->mOptions->getExternalLinkTarget()) {
|
||||
// $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
|
||||
// }
|
||||
// $validated = true;
|
||||
// }
|
||||
// } else {
|
||||
// $linkTitle = Title::newFromText($value);
|
||||
// if ($linkTitle) {
|
||||
// $paramName = 'link-title';
|
||||
// $value = $linkTitle;
|
||||
// $this->mOutput->addLink($linkTitle);
|
||||
// $validated = true;
|
||||
// }
|
||||
// }
|
||||
// break;
|
||||
// case 'frameless':
|
||||
// case 'framed':
|
||||
// case 'thumbnail':
|
||||
// // use first appearing option, discard others.
|
||||
// $validated = !$seenformat;
|
||||
// $seenformat = true;
|
||||
// break;
|
||||
// default:
|
||||
// // Most other things appear to be empty or numeric...
|
||||
// $validated = ($value === false || is_numeric(trim($value)));
|
||||
// }
|
||||
// }
|
||||
|
||||
// if ($validated) {
|
||||
// $params[$type][$paramName] = $value;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// if (!$validated) {
|
||||
// $caption = $part;
|
||||
// }
|
||||
// }
|
||||
|
||||
// Process alignment parameters
|
||||
// if ($params['horizAlign']) {
|
||||
// $params['frame']['align'] = key($params['horizAlign']);
|
||||
// }
|
||||
// if ($params['vertAlign']) {
|
||||
// $params['frame']['valign'] = key($params['vertAlign']);
|
||||
// }
|
||||
|
||||
// $params['frame']['caption'] = $caption;
|
||||
|
||||
// Will the image be presented in a frame, with the caption below?
|
||||
// $imageIsFramed = isset($params['frame']['frame'])
|
||||
// || isset($params['frame']['framed'])
|
||||
// || isset($params['frame']['thumbnail'])
|
||||
// || isset($params['frame']['manualthumb']);
|
||||
|
||||
// In the old days, [[Image:Foo|text...]] would set alt text. Later it
|
||||
// came to also set the caption, ordinary text after the image -- which
|
||||
// makes no sense, because that just repeats the text multiple times in
|
||||
// screen readers. It *also* came to set the title attribute.
|
||||
// Now that we have an alt attribute, we should not set the alt text to
|
||||
// equal the caption: that's worse than useless, it just repeats the
|
||||
// text. This is the framed/thumbnail case. If there's no caption, we
|
||||
// use the unnamed parameter for alt text as well, just for the time be-
|
||||
// ing, if the unnamed param is set and the alt param is not.
|
||||
// For the future, we need to figure out if we want to tweak this more,
|
||||
// e.g., introducing a title= parameter for the title; ignoring the un-
|
||||
// named parameter entirely for images without a caption; adding an ex-
|
||||
// plicit caption= parameter and preserving the old magic unnamed para-
|
||||
// meter for BC; ...
|
||||
// if ($imageIsFramed) { // Framed image
|
||||
// if ($caption === '' && !isset($params['frame']['alt'])) {
|
||||
// // No caption or alt text, add the filename as the alt text so
|
||||
// // that screen readers at least get some description of the image
|
||||
// $params['frame']['alt'] = $title->getText();
|
||||
// }
|
||||
// Do not set $params['frame']['title'] because tooltips don't make sense
|
||||
// for framed images
|
||||
// } else { // Inline image
|
||||
// if (!isset($params['frame']['alt'])) {
|
||||
// // No alt text, use the "caption" for the alt text
|
||||
// if ($caption !== '') {
|
||||
// $params['frame']['alt'] = $this->stripAltText($caption, $holders);
|
||||
// } else {
|
||||
// // No caption, fall back to using the filename for the
|
||||
// // alt text
|
||||
// $params['frame']['alt'] = $title->getText();
|
||||
// }
|
||||
// }
|
||||
// Use the "caption" for the tooltip text
|
||||
// $params['frame']['title'] = $this->stripAltText($caption, $holders);
|
||||
// }
|
||||
|
||||
// MW.HOOK:ParserMakeImageParams
|
||||
|
||||
// Linker does the rest
|
||||
// $time = isset($options['time']) ? $options['time'] : false;
|
||||
// $ret = Linker::makeImageLink($this, $title, $file, $params['frame'], $params['handler'],
|
||||
// $time, $descQuery, $this->mOptions->getThumbSize());
|
||||
|
||||
// Give the handler a chance to modify the parser Object
|
||||
// if ($handler) {
|
||||
// $handler->parserTransformHook($this, $file);
|
||||
// }
|
||||
|
||||
// return $ret;
|
||||
}
|
||||
// public function getImageParams($handler) {
|
||||
// if ($handler) {
|
||||
// $handlerClass = get_class($handler);
|
||||
// }
|
||||
// else {
|
||||
// $handlerClass = '';
|
||||
// }
|
||||
// if (!isset($this->mImageParams[$handlerClass])) {
|
||||
// Initialise static lists
|
||||
// static $internalParamNames = [
|
||||
// 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
|
||||
// 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
|
||||
// 'bottom', 'text-bottom' ],
|
||||
// 'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
|
||||
// 'upright', 'border', 'link', 'alt', 'class' ],
|
||||
// ];
|
||||
// static $internalParamMap;
|
||||
// if (!$internalParamMap) {
|
||||
// $internalParamMap = [];
|
||||
// foreach ($internalParamNames as $type => $names) {
|
||||
// foreach ($names as $name) {
|
||||
// $magicName = str_replace('-', '_', "img_$name");
|
||||
// $internalParamMap[$magicName] = [ $type, $name ];
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// Add handler params
|
||||
// $paramMap = $internalParamMap;
|
||||
// if ($handler) {
|
||||
// $handlerParamMap = $handler->getParamMap();
|
||||
// foreach ($handlerParamMap as $magic => $paramName) {
|
||||
// $paramMap[$magic] = [ 'handler', $paramName ];
|
||||
// }
|
||||
// }
|
||||
// $this->mImageParams[$handlerClass] = $paramMap;
|
||||
// $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray(array_keys($paramMap));
|
||||
// }
|
||||
// return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
|
||||
// }
|
||||
// /**
|
||||
// * Make HTML for a thumbnail including image, border and caption
|
||||
// * @param Title $title
|
||||
// * @param File|boolean $file File Object or false if it doesn't exist
|
||||
// * @param String $label
|
||||
// * @param String $alt
|
||||
// * @param String $align
|
||||
// * @param array $params
|
||||
// * @param boolean $framed
|
||||
// * @param String $manualthumb
|
||||
// * @return String
|
||||
// */
|
||||
// public static function makeThumbLinkObj( Title $title, $file, $label = '', $alt,
|
||||
// $align = 'right', $params = [], $framed = false, $manualthumb = ""
|
||||
// ) {
|
||||
// $frameParams = [
|
||||
// 'alt' => $alt,
|
||||
// 'caption' => $label,
|
||||
// 'align' => $align
|
||||
// ];
|
||||
// if ( $framed ) {
|
||||
// $frameParams['framed'] = true;
|
||||
// }
|
||||
// if ( $manualthumb ) {
|
||||
// $frameParams['manualthumb'] = $manualthumb;
|
||||
// }
|
||||
// return self::makeThumbLink2( $title, $file, $frameParams, $params );
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * @param Title $title
|
||||
// * @param File $file
|
||||
// * @param array $frameParams
|
||||
// * @param array $handlerParams
|
||||
// * @param boolean $time
|
||||
// * @param String $query
|
||||
// * @return String
|
||||
// */
|
||||
// public static function makeThumbLink2( Title $title, $file, $frameParams = [],
|
||||
// $handlerParams = [], $time = false, $query = ""
|
||||
// ) {
|
||||
// $exists = $file && $file->exists();
|
||||
//
|
||||
// $page = isset( $handlerParams['page'] ) ? $handlerParams['page'] : false;
|
||||
// if ( !isset( $frameParams['align'] ) ) {
|
||||
// $frameParams['align'] = 'right';
|
||||
// }
|
||||
// if ( !isset( $frameParams['alt'] ) ) {
|
||||
// $frameParams['alt'] = '';
|
||||
// }
|
||||
// if ( !isset( $frameParams['title'] ) ) {
|
||||
// $frameParams['title'] = '';
|
||||
// }
|
||||
// if ( !isset( $frameParams['caption'] ) ) {
|
||||
// $frameParams['caption'] = '';
|
||||
// }
|
||||
//
|
||||
// if ( empty( $handlerParams['width'] ) ) {
|
||||
// // Reduce width for upright images when parameter 'upright' is used
|
||||
// $handlerParams['width'] = isset( $frameParams['upright'] ) ? 130 : 180;
|
||||
// }
|
||||
// $thumb = false;
|
||||
// $noscale = false;
|
||||
// $manualthumb = false;
|
||||
//
|
||||
// if ( !$exists ) {
|
||||
// $outerWidth = $handlerParams['width'] + 2;
|
||||
// } else {
|
||||
// if ( isset( $frameParams['manualthumb'] ) ) {
|
||||
// # Use manually specified thumbnail
|
||||
// $manual_title = Title::makeTitleSafe( NS_FILE, $frameParams['manualthumb'] );
|
||||
// if ( $manual_title ) {
|
||||
// $manual_img = wfFindFile( $manual_title );
|
||||
// if ( $manual_img ) {
|
||||
// $thumb = $manual_img->getUnscaledThumb( $handlerParams );
|
||||
// $manualthumb = true;
|
||||
// } else {
|
||||
// $exists = false;
|
||||
// }
|
||||
// }
|
||||
// } elseif ( isset( $frameParams['framed'] ) ) {
|
||||
// // Use image dimensions, don't scale
|
||||
// $thumb = $file->getUnscaledThumb( $handlerParams );
|
||||
// $noscale = true;
|
||||
// } else {
|
||||
// # Do not present an image bigger than the source, for bitmap-style images
|
||||
// # This is a hack to maintain compatibility with arbitrary pre-1.10 behavior
|
||||
// $srcWidth = $file->getWidth( $page );
|
||||
// if ( $srcWidth && !$file->mustRender() && $handlerParams['width'] > $srcWidth ) {
|
||||
// $handlerParams['width'] = $srcWidth;
|
||||
// }
|
||||
// $thumb = $file->transform( $handlerParams );
|
||||
// }
|
||||
//
|
||||
// if ( $thumb ) {
|
||||
// $outerWidth = $thumb->getWidth() + 2;
|
||||
// } else {
|
||||
// $outerWidth = $handlerParams['width'] + 2;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// # ThumbnailImage::toHtml() already adds page= onto the end of DjVu URLs
|
||||
// # So we don't need to pass it here in $query. However, the URL for the
|
||||
// # zoom icon still needs it, so we make a unique query for it. See bug 14771
|
||||
// $url = $title->getLocalURL( $query );
|
||||
// if ( $page ) {
|
||||
// $url = wfAppendQuery( $url, [ 'page' => $page ] );
|
||||
// }
|
||||
// if ( $manualthumb
|
||||
// && !isset( $frameParams['link-title'] )
|
||||
// && !isset( $frameParams['link-url'] )
|
||||
// && !isset( $frameParams['no-link'] ) ) {
|
||||
// $frameParams['link-url'] = $url;
|
||||
// }
|
||||
//
|
||||
// $s = "<div class=\"thumb t{$frameParams['align']}\">"
|
||||
// . "<div class=\"thumbinner\" style=\"width:{$outerWidth}px;\">";
|
||||
//
|
||||
// if ( !$exists ) {
|
||||
// $s .= self::makeBrokenImageLinkObj( $title, $frameParams['title'], '', '', '', $time == true );
|
||||
// $zoomIcon = '';
|
||||
// } elseif ( !$thumb ) {
|
||||
// $s .= wfMessage( 'thumbnail_error', '' )->escaped();
|
||||
// $zoomIcon = '';
|
||||
// } else {
|
||||
// if ( !$noscale && !$manualthumb ) {
|
||||
// self::processResponsiveImages( $file, $thumb, $handlerParams );
|
||||
// }
|
||||
// $params = [
|
||||
// 'alt' => $frameParams['alt'],
|
||||
// 'title' => $frameParams['title'],
|
||||
// 'img-class' => ( isset( $frameParams['class'] ) && $frameParams['class'] !== ''
|
||||
// ? $frameParams['class'] . ' '
|
||||
// : '' ) . 'thumbimage'
|
||||
// ];
|
||||
// $params = self::getImageLinkMTOParams( $frameParams, $query ) + $params;
|
||||
// $s .= $thumb->toHtml( $params );
|
||||
// if ( isset( $frameParams['framed'] ) ) {
|
||||
// $zoomIcon = "";
|
||||
// } else {
|
||||
// $zoomIcon = Html::rawElement( 'div', [ 'class' => 'magnify' ],
|
||||
// Html::rawElement( 'a', [
|
||||
// 'href' => $url,
|
||||
// 'class' => '@gplx.Internal protected',
|
||||
// 'title' => wfMessage( 'thumbnail-more' )->text() ],
|
||||
// "" ) );
|
||||
// }
|
||||
// }
|
||||
// $s .= ' <div class="thumbcaption">' . $zoomIcon . $frameParams['caption'] . "</div></div></div>";
|
||||
// return str_replace( "\n", ' ', $s );
|
||||
// }
|
||||
// /**
|
||||
// * Make a "broken" link to an image
|
||||
// *
|
||||
// * @since 1.16.3
|
||||
// * @param Title $title
|
||||
// * @param String $label Link label (plain text)
|
||||
// * @param String $query Query String
|
||||
// * @param String $unused1 Unused parameter kept for b/c
|
||||
// * @param String $unused2 Unused parameter kept for b/c
|
||||
// * @param boolean $time A file of a certain timestamp was requested
|
||||
// * @return String
|
||||
// */
|
||||
// public static function makeBrokenImageLinkObj( $title, $label = '',
|
||||
// $query = '', $unused1 = '', $unused2 = '', $time = false
|
||||
// ) {
|
||||
// if ( !$title instanceof Title ) {
|
||||
// wfWarn( __METHOD__ . ': Requires $title to be a Title Object.' );
|
||||
// return "<!-- ERROR -->" . htmlspecialchars( $label );
|
||||
// }
|
||||
//
|
||||
// global $wgEnableUploads, $wgUploadMissingFileUrl, $wgUploadNavigationUrl;
|
||||
// if ( $label == '' ) {
|
||||
// $label = $title->getPrefixedText();
|
||||
// }
|
||||
// $encLabel = htmlspecialchars( $label );
|
||||
// $currentExists = $time ? ( wfFindFile( $title ) != false ) : false;
|
||||
//
|
||||
// if ( ( $wgUploadMissingFileUrl || $wgUploadNavigationUrl || $wgEnableUploads )
|
||||
// && !$currentExists
|
||||
// ) {
|
||||
// $redir = RepoGroup::singleton()->getLocalRepo()->checkRedirect( $title );
|
||||
//
|
||||
// if ( $redir ) {
|
||||
// // We already know it's a redirect, so mark it
|
||||
// // accordingly
|
||||
// return self::link(
|
||||
// $title,
|
||||
// $encLabel,
|
||||
// [ 'class' => 'mw-redirect' ],
|
||||
// wfCgiToArray( $query ),
|
||||
// [ 'known', 'noclasses' ]
|
||||
// );
|
||||
// }
|
||||
//
|
||||
// $href = self::getUploadUrl( $title, $query );
|
||||
//
|
||||
// return '<a href="' . htmlspecialchars( $href ) . '" class="new" title="' .
|
||||
// htmlspecialchars( $title->getPrefixedText(), ENT_QUOTES ) . '">' .
|
||||
// $encLabel . '</a>';
|
||||
// }
|
||||
//
|
||||
// return self::link( $title, $encLabel, [], wfCgiToArray( $query ), [ 'known', 'noclasses' ] );
|
||||
// }
|
||||
// /**
|
||||
// * Create a direct link to a given uploaded file.
|
||||
// *
|
||||
// * @since 1.16.3
|
||||
// * @param Title $title
|
||||
// * @param String $html Pre-sanitized HTML
|
||||
// * @param String $time MW timestamp of file creation time
|
||||
// * @return String HTML
|
||||
// */
|
||||
// public static function makeMediaLinkObj( $title, $html = '', $time = false ) {
|
||||
// $img = wfFindFile( $title, [ 'time' => $time ] );
|
||||
// return self::makeMediaLinkFile( $title, $img, $html );
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Create a direct link to a given uploaded file.
|
||||
// * This will make a broken link if $file is false.
|
||||
// *
|
||||
// * @since 1.16.3
|
||||
// * @param Title $title
|
||||
// * @param File|boolean $file File Object or false
|
||||
// * @param String $html Pre-sanitized HTML
|
||||
// * @return String HTML
|
||||
// *
|
||||
// * @todo Handle invalid or missing images better.
|
||||
// */
|
||||
// public static function makeMediaLinkFile( Title $title, $file, $html = '' ) {
|
||||
// if ( $file && $file->exists() ) {
|
||||
// $url = $file->getUrl();
|
||||
// $class = '@gplx.Internal protected';
|
||||
// } else {
|
||||
// $url = self::getUploadUrl( $title );
|
||||
// $class = 'new';
|
||||
// }
|
||||
//
|
||||
// $alt = $title->getText();
|
||||
// if ( $html == '' ) {
|
||||
// $html = $alt;
|
||||
// }
|
||||
//
|
||||
// $ret = '';
|
||||
// $attribs = [
|
||||
// 'href' => $url,
|
||||
// 'class' => $class,
|
||||
// 'title' => $alt
|
||||
// ];
|
||||
//
|
||||
// if ( !Hooks::run( 'LinkerMakeMediaLinkFile',
|
||||
// [ $title, $file, &$html, &$attribs, &$ret ] ) ) {
|
||||
// wfDebug( "Hook LinkerMakeMediaLinkFile changed the output of link "
|
||||
// . "with url {$url} and text {$html} to {$ret}\n", true );
|
||||
// return $ret;
|
||||
// }
|
||||
//
|
||||
// return Html::rawElement( 'a', $attribs, $html );
|
||||
// }
|
||||
private static final byte[] Bry__dot2 = Bry_.new_a7("../");
|
||||
}
|
||||
class Linker_rel_splitter implements gplx.core.brys.Bry_split_wkr {
|
||||
private final Hash_adp_bry hash = Hash_adp_bry.cs();
|
||||
private final Bry_bfr bfr = Bry_bfr_.New();
|
||||
public int Split(byte[] src, int itm_bgn, int itm_end) { // $combined = array_unique( array_merge( $newRels, $oldRels ) );
|
||||
byte[] val = (byte[])hash.Get_by_mid(src, itm_bgn, itm_end);
|
||||
if (val == null) {
|
||||
val = Bry_.Mid(src, itm_bgn, itm_end);
|
||||
hash.Add_as_key_and_val(val);
|
||||
if (bfr.Len_gt_0()) bfr.Add_byte_space();
|
||||
bfr.Add(val);
|
||||
}
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
public byte[] To_bry() {
|
||||
hash.Clear();
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
public class Xomw_linker__normalize_subpage_link {
|
||||
public byte[] link;
|
||||
public byte[] text;
|
||||
public Xomw_linker__normalize_subpage_link Init(byte[] link, byte[] text) {
|
||||
this.link = link;
|
||||
this.text = text;
|
||||
return this;
|
||||
}
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xomw_linker__normalize_subpage_link__tst {
|
||||
private final Xomw_linker__normalize_subpage_link__fxt fxt = new Xomw_linker__normalize_subpage_link__fxt();
|
||||
@Test public void None() {fxt.Test__normalize_subpage_link("A/B/C" , "Z" , "" , "Z" , "");}
|
||||
@Test public void Hash() {fxt.Test__normalize_subpage_link("A/B/C" , "/Y#Z" , "" , "A/B/C/Y#Z" , "/Y#Z");}
|
||||
@Test public void Slash__basic() {fxt.Test__normalize_subpage_link("A/B/C" , "/Z" , "" , "A/B/C/Z" , "/Z");}
|
||||
@Test public void Slash__slash() {fxt.Test__normalize_subpage_link("A/B/C" , "/Z/" , "" , "A/B/C/Z" , "Z");}
|
||||
@Test public void Dot2__empty() {fxt.Test__normalize_subpage_link("A/B/C" , "../" , "" , "A/B" , "");}
|
||||
@Test public void Dot2__many() {fxt.Test__normalize_subpage_link("A/B/C" , "../../Z" , "z1" , "A/Z" , "z1");}
|
||||
@Test public void Dot2__trailing() {fxt.Test__normalize_subpage_link("A/B/C" , "../../Z/" , "" , "A/Z" , "Z");}
|
||||
}
|
||||
class Xomw_linker__normalize_subpage_link__fxt {
|
||||
private final Xomw_linker mgr = new Xomw_linker();
|
||||
private final Xowe_wiki wiki;
|
||||
private final Xomw_linker__normalize_subpage_link normalize_subpage_link = new Xomw_linker__normalize_subpage_link();
|
||||
public Xomw_linker__normalize_subpage_link__fxt() {
|
||||
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||
this.wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
}
|
||||
public void Test__normalize_subpage_link(String page_title_str, String link, String text, String expd_link, String expd_text) {
|
||||
mgr.Normalize_subpage_link(normalize_subpage_link, wiki.Ttl_parse(Bry_.new_u8(page_title_str)), Bry_.new_u8(link), Bry_.new_u8(text));
|
||||
Gftest.Eq__str(expd_link, String_.new_u8(normalize_subpage_link.link));
|
||||
Gftest.Eq__str(expd_text, String_.new_u8(normalize_subpage_link.text));
|
||||
}
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*;
|
||||
public class Xomw_linker__split_trail__tst {
|
||||
private final Xomw_linker__split_trail__fxt fxt = new Xomw_linker__split_trail__fxt();
|
||||
@Test public void Basic() {fxt.Test__split_trail("abc def" , "abc" , " def");}
|
||||
@Test public void None() {fxt.Test__split_trail(" abc" , null , " abc");}
|
||||
}
|
||||
class Xomw_linker__split_trail__fxt {
|
||||
private final Xomw_linker linker = new Xomw_linker();
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
public Xomw_linker__split_trail__fxt() {
|
||||
String[] ary = new String[] {"a", "b", "c", "d", "e", "f"};
|
||||
for (String itm : ary)
|
||||
trie.Add_str_str(itm, itm);
|
||||
linker.Init_by_wiki(trie);
|
||||
}
|
||||
public void Test__split_trail(String trail_str, String expd_inside, String expd_trail) {
|
||||
byte[][] split_trail = linker.Split_trail(Bry_.new_u8(trail_str));
|
||||
Gftest.Eq__str(expd_inside, String_.new_u8(split_trail[0]));
|
||||
Gftest.Eq__str(expd_trail , String_.new_u8(split_trail[1]));
|
||||
}
|
||||
}
|
538
400_xowa/src/gplx/xowa/mws/Xomw_sanitizer.java
Normal file
538
400_xowa/src/gplx/xowa/mws/Xomw_sanitizer.java
Normal file
@ -0,0 +1,538 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.encoders.*; import gplx.langs.htmls.entitys.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_sanitizer {
|
||||
private final Mwh_doc_wkr__atr_bldr atr_bldr = new Mwh_doc_wkr__atr_bldr();
|
||||
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
||||
public void Fix_tag_attributes(Bry_bfr bfr, byte[] tag_name, byte[] atrs) {
|
||||
atr_bldr.Atrs__clear();
|
||||
atr_parser.Parse(atr_bldr, -1, -1, atrs, 0, atrs.length);
|
||||
int len = atr_bldr.Atrs__len();
|
||||
|
||||
// PORTED: Sanitizer.php|safeEncodeTagAttributes
|
||||
for (int i = 0; i < len; i++) {
|
||||
// $encAttribute = htmlspecialchars( $attribute );
|
||||
// $encValue = Sanitizer::safeEncodeAttribute( $value );
|
||||
// $attribs[] = "$encAttribute=\"$encValue\"";
|
||||
Mwh_atr_itm itm = atr_bldr.Atrs__get_at(i);
|
||||
bfr.Add_byte_space(); // "return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';"
|
||||
bfr.Add_bry_escape_html(itm.Key_bry(), itm.Key_bgn(), itm.Key_end());
|
||||
bfr.Add_byte_eq().Add_byte_quote();
|
||||
bfr.Add(itm.Val_as_bry()); // TODO.XO:Sanitizer::encode
|
||||
bfr.Add_byte_quote();
|
||||
}
|
||||
}
|
||||
public void Normalize_char_references(Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
Normalize_char_references(bfr, Bool_.N, src, src_bgn, src_end);
|
||||
}
|
||||
public byte[] Normalize_char_references(Bry_bfr bfr, boolean lone_bfr, byte[] src, int src_bgn, int src_end) {
|
||||
// assert static structs
|
||||
if (Normalize__dec == null) {
|
||||
synchronized (Xomw_sanitizer.class) {
|
||||
html_entities = Html_entities_new();
|
||||
Normalize__dec = Bool_ary_bldr.New_u8().Set_rng(Byte_ascii.Num_0, Byte_ascii.Num_9).To_ary();
|
||||
Normalize__hex = Bool_ary_bldr.New_u8()
|
||||
.Set_rng(Byte_ascii.Num_0, Byte_ascii.Num_9)
|
||||
.Set_rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z)
|
||||
.Set_rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z)
|
||||
.To_ary();
|
||||
Normalize__ent = Bool_ary_bldr.New_u8()
|
||||
.Set_rng(Byte_ascii.Num_0, Byte_ascii.Num_9)
|
||||
.Set_rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z)
|
||||
.Set_rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z)
|
||||
.Set_rng(128, 255)
|
||||
.To_ary();
|
||||
}
|
||||
}
|
||||
|
||||
// XO.BRY_BFR
|
||||
boolean dirty = false;
|
||||
int cur = src_bgn;
|
||||
boolean called_by_bry = bfr == null;
|
||||
|
||||
while (true) {
|
||||
// search for "&"
|
||||
int find_bgn = Bry_find_.Find_fwd(src, Byte_ascii.Amp, cur);
|
||||
if (find_bgn == Bry_find_.Not_found) { // "&" not found; exit
|
||||
if (dirty)
|
||||
bfr.Add_mid(src, cur, src_end);
|
||||
break;
|
||||
}
|
||||
int ent_bgn = find_bgn + 1; // +1 to skip &
|
||||
|
||||
// get regex; (a) dec (	); (b) hex (ÿ); (c) entity (α);
|
||||
boolean[] regex = null;
|
||||
// check for #;
|
||||
if (ent_bgn < src_end && src[ent_bgn] == Byte_ascii.Hash) {
|
||||
ent_bgn++;
|
||||
if (ent_bgn < src_end) {
|
||||
byte nxt = src[ent_bgn];
|
||||
// check for x
|
||||
if (nxt == Byte_ascii.Ltr_X || nxt == Byte_ascii.Ltr_x) {
|
||||
ent_bgn++;
|
||||
regex = Normalize__hex;
|
||||
}
|
||||
}
|
||||
if (regex == null)
|
||||
regex = Normalize__dec;
|
||||
}
|
||||
else {
|
||||
regex = Normalize__ent;
|
||||
}
|
||||
|
||||
// keep looping until invalid regex
|
||||
int ent_end = ent_bgn;
|
||||
byte b = Byte_ascii.Null;
|
||||
for (int i = ent_bgn; i < src_end; i++) {
|
||||
b = src[i];
|
||||
if (regex[b])
|
||||
ent_end++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
// mark dirty; can optimize later by checking if "<" already exists
|
||||
dirty = true;
|
||||
if (bfr == null) bfr = Bry_bfr_.New();
|
||||
bfr.Add_mid(src, cur, find_bgn); // add everything before &
|
||||
|
||||
// invalid <- regex ended, but not at semic
|
||||
if (b != Byte_ascii.Semic) {
|
||||
bfr.Add(Gfh_entity_.Amp_bry); // transform "&" to "&"
|
||||
cur = find_bgn + 1; // position after "&"
|
||||
continue;
|
||||
}
|
||||
|
||||
// do normalization
|
||||
byte[] name = Bry_.Mid(src, ent_bgn, ent_end);
|
||||
boolean ret = false;
|
||||
if (regex == Normalize__ent) {
|
||||
Normalize_entity(bfr, name);
|
||||
ret = true;
|
||||
}
|
||||
else if (regex == Normalize__dec) {
|
||||
ret = Dec_char_reference(bfr, name);
|
||||
}
|
||||
else if (regex == Normalize__hex) {
|
||||
ret = Hex_char_reference(bfr, name);
|
||||
}
|
||||
if (!ret) {
|
||||
bfr.Add(Gfh_entity_.Amp_bry); // transform "&" to "&"
|
||||
bfr.Add_bry_escape_html(src, find_bgn + 1, ent_end + 1); // "find_bgn + 1" to start after "&"; "ent_end + 1" to include ";"
|
||||
}
|
||||
|
||||
cur = ent_end + 1; // +1 to position after ";"
|
||||
}
|
||||
|
||||
// XO.BRY_BFR
|
||||
if (dirty) {
|
||||
if (called_by_bry)
|
||||
return bfr.To_bry_and_clear();
|
||||
else
|
||||
return Bry_.Empty;
|
||||
}
|
||||
else {
|
||||
if (called_by_bry) {
|
||||
if (src_bgn == 0 && src_end == src.length)
|
||||
return src;
|
||||
else
|
||||
return Bry_.Mid(src, src_bgn, src_end);
|
||||
}
|
||||
else {
|
||||
if (lone_bfr)
|
||||
bfr.Add_mid(src, src_bgn, src_end);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
|
||||
// return the equivalent numeric entity reference (except for the core <
|
||||
// > & "). If the entity is a MediaWiki-specific alias, returns
|
||||
// the HTML equivalent. Otherwise, returns HTML-escaped text of
|
||||
// pseudo-entity source (eg &foo;)
|
||||
private void Normalize_entity(Bry_bfr bfr, byte[] name) {
|
||||
Object o = html_entities.Get_by_bry(name);
|
||||
if (o == null) {
|
||||
bfr.Add_str_a7("&").Add(name).Add_byte_semic();
|
||||
}
|
||||
else {
|
||||
Xomw_html_ent entity = (Xomw_html_ent)o;
|
||||
bfr.Add(entity.html);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean Dec_char_reference(Bry_bfr bfr, byte[] codepoint) {
|
||||
int point = Bry_.To_int_or(codepoint, -1);
|
||||
if (Validate_codepoint(point)) {
|
||||
bfr.Add_str_a7("&#").Add_int_variable(point).Add_byte_semic();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean Hex_char_reference(Bry_bfr bfr, byte[] codepoint) {
|
||||
int point = Hex_utl_.Parse_or(codepoint, -1);
|
||||
if (Validate_codepoint(point)) {
|
||||
bfr.Add_str_a7("&#x");
|
||||
Hex_utl_.Write_bfr(bfr, Bool_.Y, point); // sprintf( '&#x%x;', $point )
|
||||
bfr.Add_byte_semic();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean Validate_codepoint(int codepoint) {
|
||||
// U+000C is valid in HTML5 but not allowed in XML.
|
||||
// U+000D is valid in XML but not allowed in HTML5.
|
||||
// U+007F - U+009F are disallowed in HTML5 (control characters).
|
||||
return codepoint == 0x09
|
||||
|| codepoint == 0x0a
|
||||
|| (codepoint >= 0x20 && codepoint <= 0x7e)
|
||||
|| (codepoint >= 0xa0 && codepoint <= 0xd7ff)
|
||||
|| (codepoint >= 0xe000 && codepoint <= 0xfffd)
|
||||
|| (codepoint >= 0x10000 && codepoint <= 0x10ffff);
|
||||
}
|
||||
|
||||
private static boolean[] Normalize__dec, Normalize__hex, Normalize__ent;
|
||||
private static Hash_adp_bry html_entities;
|
||||
private static Hash_adp_bry Html_entities_new() {
|
||||
Bry_bfr tmp = Bry_bfr_.New();
|
||||
Hash_adp_bry rv = Hash_adp_bry.cs();
|
||||
|
||||
Html_entities_set(rv, Xomw_html_ent.Type__alias, -1, "רלמ", "‏");
|
||||
Html_entities_set(rv, Xomw_html_ent.Type__alias, -1, "رلم", "‏");
|
||||
|
||||
Html_entities_set(rv, Xomw_html_ent.Type__char, 60, "lt", "<");
|
||||
Html_entities_set(rv, Xomw_html_ent.Type__char, 62, "gt", ">");
|
||||
Html_entities_set(rv, Xomw_html_ent.Type__char, 38, "amp", "&");
|
||||
Html_entities_set(rv, Xomw_html_ent.Type__char, 34, "quot", """);
|
||||
|
||||
// List of all named character entities defined in HTML 4.01
|
||||
// https://www.w3.org/TR/html4/sgml/entities.html
|
||||
// As well as ' which is only defined starting in XHTML1.
|
||||
Html_entities_set(rv, tmp, "Aacute" , 193);
|
||||
Html_entities_set(rv, tmp, "aacute" , 225);
|
||||
Html_entities_set(rv, tmp, "Acirc" , 194);
|
||||
Html_entities_set(rv, tmp, "acirc" , 226);
|
||||
Html_entities_set(rv, tmp, "acute" , 180);
|
||||
Html_entities_set(rv, tmp, "AElig" , 198);
|
||||
Html_entities_set(rv, tmp, "aelig" , 230);
|
||||
Html_entities_set(rv, tmp, "Agrave" , 192);
|
||||
Html_entities_set(rv, tmp, "agrave" , 224);
|
||||
Html_entities_set(rv, tmp, "alefsym" , 8501);
|
||||
Html_entities_set(rv, tmp, "Alpha" , 913);
|
||||
Html_entities_set(rv, tmp, "alpha" , 945);
|
||||
Html_entities_set(rv, tmp, "amp" , 38); // XO: identical to Type__char entry; note that Type__char should be evaluated first
|
||||
Html_entities_set(rv, tmp, "and" , 8743);
|
||||
Html_entities_set(rv, tmp, "ang" , 8736);
|
||||
Html_entities_set(rv, tmp, "apos" , 39); // New in XHTML & HTML 5; avoid in output for compatibility with IE.
|
||||
Html_entities_set(rv, tmp, "Aring" , 197);
|
||||
Html_entities_set(rv, tmp, "aring" , 229);
|
||||
Html_entities_set(rv, tmp, "asymp" , 8776);
|
||||
Html_entities_set(rv, tmp, "Atilde" , 195);
|
||||
Html_entities_set(rv, tmp, "atilde" , 227);
|
||||
Html_entities_set(rv, tmp, "Auml" , 196);
|
||||
Html_entities_set(rv, tmp, "auml" , 228);
|
||||
Html_entities_set(rv, tmp, "bdquo" , 8222);
|
||||
Html_entities_set(rv, tmp, "Beta" , 914);
|
||||
Html_entities_set(rv, tmp, "beta" , 946);
|
||||
Html_entities_set(rv, tmp, "brvbar" , 166);
|
||||
Html_entities_set(rv, tmp, "bull" , 8226);
|
||||
Html_entities_set(rv, tmp, "cap" , 8745);
|
||||
Html_entities_set(rv, tmp, "Ccedil" , 199);
|
||||
Html_entities_set(rv, tmp, "ccedil" , 231);
|
||||
Html_entities_set(rv, tmp, "cedil" , 184);
|
||||
Html_entities_set(rv, tmp, "cent" , 162);
|
||||
Html_entities_set(rv, tmp, "Chi" , 935);
|
||||
Html_entities_set(rv, tmp, "chi" , 967);
|
||||
Html_entities_set(rv, tmp, "circ" , 710);
|
||||
Html_entities_set(rv, tmp, "clubs" , 9827);
|
||||
Html_entities_set(rv, tmp, "cong" , 8773);
|
||||
Html_entities_set(rv, tmp, "copy" , 169);
|
||||
Html_entities_set(rv, tmp, "crarr" , 8629);
|
||||
Html_entities_set(rv, tmp, "cup" , 8746);
|
||||
Html_entities_set(rv, tmp, "curren" , 164);
|
||||
Html_entities_set(rv, tmp, "dagger" , 8224);
|
||||
Html_entities_set(rv, tmp, "Dagger" , 8225);
|
||||
Html_entities_set(rv, tmp, "darr" , 8595);
|
||||
Html_entities_set(rv, tmp, "dArr" , 8659);
|
||||
Html_entities_set(rv, tmp, "deg" , 176);
|
||||
Html_entities_set(rv, tmp, "Delta" , 916);
|
||||
Html_entities_set(rv, tmp, "delta" , 948);
|
||||
Html_entities_set(rv, tmp, "diams" , 9830);
|
||||
Html_entities_set(rv, tmp, "divide" , 247);
|
||||
Html_entities_set(rv, tmp, "Eacute" , 201);
|
||||
Html_entities_set(rv, tmp, "eacute" , 233);
|
||||
Html_entities_set(rv, tmp, "Ecirc" , 202);
|
||||
Html_entities_set(rv, tmp, "ecirc" , 234);
|
||||
Html_entities_set(rv, tmp, "Egrave" , 200);
|
||||
Html_entities_set(rv, tmp, "egrave" , 232);
|
||||
Html_entities_set(rv, tmp, "empty" , 8709);
|
||||
Html_entities_set(rv, tmp, "emsp" , 8195);
|
||||
Html_entities_set(rv, tmp, "ensp" , 8194);
|
||||
Html_entities_set(rv, tmp, "Epsilon" , 917);
|
||||
Html_entities_set(rv, tmp, "epsilon" , 949);
|
||||
Html_entities_set(rv, tmp, "equiv" , 8801);
|
||||
Html_entities_set(rv, tmp, "Eta" , 919);
|
||||
Html_entities_set(rv, tmp, "eta" , 951);
|
||||
Html_entities_set(rv, tmp, "ETH" , 208);
|
||||
Html_entities_set(rv, tmp, "eth" , 240);
|
||||
Html_entities_set(rv, tmp, "Euml" , 203);
|
||||
Html_entities_set(rv, tmp, "euml" , 235);
|
||||
Html_entities_set(rv, tmp, "euro" , 8364);
|
||||
Html_entities_set(rv, tmp, "exist" , 8707);
|
||||
Html_entities_set(rv, tmp, "fnof" , 402);
|
||||
Html_entities_set(rv, tmp, "forall" , 8704);
|
||||
Html_entities_set(rv, tmp, "frac12" , 189);
|
||||
Html_entities_set(rv, tmp, "frac14" , 188);
|
||||
Html_entities_set(rv, tmp, "frac34" , 190);
|
||||
Html_entities_set(rv, tmp, "frasl" , 8260);
|
||||
Html_entities_set(rv, tmp, "Gamma" , 915);
|
||||
Html_entities_set(rv, tmp, "gamma" , 947);
|
||||
Html_entities_set(rv, tmp, "ge" , 8805);
|
||||
Html_entities_set(rv, tmp, "gt" , 62);
|
||||
Html_entities_set(rv, tmp, "harr" , 8596);
|
||||
Html_entities_set(rv, tmp, "hArr" , 8660);
|
||||
Html_entities_set(rv, tmp, "hearts" , 9829);
|
||||
Html_entities_set(rv, tmp, "hellip" , 8230);
|
||||
Html_entities_set(rv, tmp, "Iacute" , 205);
|
||||
Html_entities_set(rv, tmp, "iacute" , 237);
|
||||
Html_entities_set(rv, tmp, "Icirc" , 206);
|
||||
Html_entities_set(rv, tmp, "icirc" , 238);
|
||||
Html_entities_set(rv, tmp, "iexcl" , 161);
|
||||
Html_entities_set(rv, tmp, "Igrave" , 204);
|
||||
Html_entities_set(rv, tmp, "igrave" , 236);
|
||||
Html_entities_set(rv, tmp, "image" , 8465);
|
||||
Html_entities_set(rv, tmp, "infin" , 8734);
|
||||
Html_entities_set(rv, tmp, "int" , 8747);
|
||||
Html_entities_set(rv, tmp, "Iota" , 921);
|
||||
Html_entities_set(rv, tmp, "iota" , 953);
|
||||
Html_entities_set(rv, tmp, "iquest" , 191);
|
||||
Html_entities_set(rv, tmp, "isin" , 8712);
|
||||
Html_entities_set(rv, tmp, "Iuml" , 207);
|
||||
Html_entities_set(rv, tmp, "iuml" , 239);
|
||||
Html_entities_set(rv, tmp, "Kappa" , 922);
|
||||
Html_entities_set(rv, tmp, "kappa" , 954);
|
||||
Html_entities_set(rv, tmp, "Lambda" , 923);
|
||||
Html_entities_set(rv, tmp, "lambda" , 955);
|
||||
Html_entities_set(rv, tmp, "lang" , 9001);
|
||||
Html_entities_set(rv, tmp, "laquo" , 171);
|
||||
Html_entities_set(rv, tmp, "larr" , 8592);
|
||||
Html_entities_set(rv, tmp, "lArr" , 8656);
|
||||
Html_entities_set(rv, tmp, "lceil" , 8968);
|
||||
Html_entities_set(rv, tmp, "ldquo" , 8220);
|
||||
Html_entities_set(rv, tmp, "le" , 8804);
|
||||
Html_entities_set(rv, tmp, "lfloor" , 8970);
|
||||
Html_entities_set(rv, tmp, "lowast" , 8727);
|
||||
Html_entities_set(rv, tmp, "loz" , 9674);
|
||||
Html_entities_set(rv, tmp, "lrm" , 8206);
|
||||
Html_entities_set(rv, tmp, "lsaquo" , 8249);
|
||||
Html_entities_set(rv, tmp, "lsquo" , 8216);
|
||||
Html_entities_set(rv, tmp, "lt" , 60);
|
||||
Html_entities_set(rv, tmp, "macr" , 175);
|
||||
Html_entities_set(rv, tmp, "mdash" , 8212);
|
||||
Html_entities_set(rv, tmp, "micro" , 181);
|
||||
Html_entities_set(rv, tmp, "middot" , 183);
|
||||
Html_entities_set(rv, tmp, "minus" , 8722);
|
||||
Html_entities_set(rv, tmp, "Mu" , 924);
|
||||
Html_entities_set(rv, tmp, "mu" , 956);
|
||||
Html_entities_set(rv, tmp, "nabla" , 8711);
|
||||
Html_entities_set(rv, tmp, "nbsp" , 160);
|
||||
Html_entities_set(rv, tmp, "ndash" , 8211);
|
||||
Html_entities_set(rv, tmp, "ne" , 8800);
|
||||
Html_entities_set(rv, tmp, "ni" , 8715);
|
||||
Html_entities_set(rv, tmp, "not" , 172);
|
||||
Html_entities_set(rv, tmp, "notin" , 8713);
|
||||
Html_entities_set(rv, tmp, "nsub" , 8836);
|
||||
Html_entities_set(rv, tmp, "Ntilde" , 209);
|
||||
Html_entities_set(rv, tmp, "ntilde" , 241);
|
||||
Html_entities_set(rv, tmp, "Nu" , 925);
|
||||
Html_entities_set(rv, tmp, "nu" , 957);
|
||||
Html_entities_set(rv, tmp, "Oacute" , 211);
|
||||
Html_entities_set(rv, tmp, "oacute" , 243);
|
||||
Html_entities_set(rv, tmp, "Ocirc" , 212);
|
||||
Html_entities_set(rv, tmp, "ocirc" , 244);
|
||||
Html_entities_set(rv, tmp, "OElig" , 338);
|
||||
Html_entities_set(rv, tmp, "oelig" , 339);
|
||||
Html_entities_set(rv, tmp, "Ograve" , 210);
|
||||
Html_entities_set(rv, tmp, "ograve" , 242);
|
||||
Html_entities_set(rv, tmp, "oline" , 8254);
|
||||
Html_entities_set(rv, tmp, "Omega" , 937);
|
||||
Html_entities_set(rv, tmp, "omega" , 969);
|
||||
Html_entities_set(rv, tmp, "Omicron" , 927);
|
||||
Html_entities_set(rv, tmp, "omicron" , 959);
|
||||
Html_entities_set(rv, tmp, "oplus" , 8853);
|
||||
Html_entities_set(rv, tmp, "or" , 8744);
|
||||
Html_entities_set(rv, tmp, "ordf" , 170);
|
||||
Html_entities_set(rv, tmp, "ordm" , 186);
|
||||
Html_entities_set(rv, tmp, "Oslash" , 216);
|
||||
Html_entities_set(rv, tmp, "oslash" , 248);
|
||||
Html_entities_set(rv, tmp, "Otilde" , 213);
|
||||
Html_entities_set(rv, tmp, "otilde" , 245);
|
||||
Html_entities_set(rv, tmp, "otimes" , 8855);
|
||||
Html_entities_set(rv, tmp, "Ouml" , 214);
|
||||
Html_entities_set(rv, tmp, "ouml" , 246);
|
||||
Html_entities_set(rv, tmp, "para" , 182);
|
||||
Html_entities_set(rv, tmp, "part" , 8706);
|
||||
Html_entities_set(rv, tmp, "permil" , 8240);
|
||||
Html_entities_set(rv, tmp, "perp" , 8869);
|
||||
Html_entities_set(rv, tmp, "Phi" , 934);
|
||||
Html_entities_set(rv, tmp, "phi" , 966);
|
||||
Html_entities_set(rv, tmp, "Pi" , 928);
|
||||
Html_entities_set(rv, tmp, "pi" , 960);
|
||||
Html_entities_set(rv, tmp, "piv" , 982);
|
||||
Html_entities_set(rv, tmp, "plusmn" , 177);
|
||||
Html_entities_set(rv, tmp, "pound" , 163);
|
||||
Html_entities_set(rv, tmp, "prime" , 8242);
|
||||
Html_entities_set(rv, tmp, "Prime" , 8243);
|
||||
Html_entities_set(rv, tmp, "prod" , 8719);
|
||||
Html_entities_set(rv, tmp, "prop" , 8733);
|
||||
Html_entities_set(rv, tmp, "Psi" , 936);
|
||||
Html_entities_set(rv, tmp, "psi" , 968);
|
||||
Html_entities_set(rv, tmp, "quot" , 34);
|
||||
Html_entities_set(rv, tmp, "radic" , 8730);
|
||||
Html_entities_set(rv, tmp, "rang" , 9002);
|
||||
Html_entities_set(rv, tmp, "raquo" , 187);
|
||||
Html_entities_set(rv, tmp, "rarr" , 8594);
|
||||
Html_entities_set(rv, tmp, "rArr" , 8658);
|
||||
Html_entities_set(rv, tmp, "rceil" , 8969);
|
||||
Html_entities_set(rv, tmp, "rdquo" , 8221);
|
||||
Html_entities_set(rv, tmp, "real" , 8476);
|
||||
Html_entities_set(rv, tmp, "reg" , 174);
|
||||
Html_entities_set(rv, tmp, "rfloor" , 8971);
|
||||
Html_entities_set(rv, tmp, "Rho" , 929);
|
||||
Html_entities_set(rv, tmp, "rho" , 961);
|
||||
Html_entities_set(rv, tmp, "rlm" , 8207);
|
||||
Html_entities_set(rv, tmp, "rsaquo" , 8250);
|
||||
Html_entities_set(rv, tmp, "rsquo" , 8217);
|
||||
Html_entities_set(rv, tmp, "sbquo" , 8218);
|
||||
Html_entities_set(rv, tmp, "Scaron" , 352);
|
||||
Html_entities_set(rv, tmp, "scaron" , 353);
|
||||
Html_entities_set(rv, tmp, "sdot" , 8901);
|
||||
Html_entities_set(rv, tmp, "sect" , 167);
|
||||
Html_entities_set(rv, tmp, "shy" , 173);
|
||||
Html_entities_set(rv, tmp, "Sigma" , 931);
|
||||
Html_entities_set(rv, tmp, "sigma" , 963);
|
||||
Html_entities_set(rv, tmp, "sigmaf" , 962);
|
||||
Html_entities_set(rv, tmp, "sim" , 8764);
|
||||
Html_entities_set(rv, tmp, "spades" , 9824);
|
||||
Html_entities_set(rv, tmp, "sub" , 8834);
|
||||
Html_entities_set(rv, tmp, "sube" , 8838);
|
||||
Html_entities_set(rv, tmp, "sum" , 8721);
|
||||
Html_entities_set(rv, tmp, "sup" , 8835);
|
||||
Html_entities_set(rv, tmp, "sup1" , 185);
|
||||
Html_entities_set(rv, tmp, "sup2" , 178);
|
||||
Html_entities_set(rv, tmp, "sup3" , 179);
|
||||
Html_entities_set(rv, tmp, "supe" , 8839);
|
||||
Html_entities_set(rv, tmp, "szlig" , 223);
|
||||
Html_entities_set(rv, tmp, "Tau" , 932);
|
||||
Html_entities_set(rv, tmp, "tau" , 964);
|
||||
Html_entities_set(rv, tmp, "there4" , 8756);
|
||||
Html_entities_set(rv, tmp, "Theta" , 920);
|
||||
Html_entities_set(rv, tmp, "theta" , 952);
|
||||
Html_entities_set(rv, tmp, "thetasym" , 977);
|
||||
Html_entities_set(rv, tmp, "thinsp" , 8201);
|
||||
Html_entities_set(rv, tmp, "THORN" , 222);
|
||||
Html_entities_set(rv, tmp, "thorn" , 254);
|
||||
Html_entities_set(rv, tmp, "tilde" , 732);
|
||||
Html_entities_set(rv, tmp, "times" , 215);
|
||||
Html_entities_set(rv, tmp, "trade" , 8482);
|
||||
Html_entities_set(rv, tmp, "Uacute" , 218);
|
||||
Html_entities_set(rv, tmp, "uacute" , 250);
|
||||
Html_entities_set(rv, tmp, "uarr" , 8593);
|
||||
Html_entities_set(rv, tmp, "uArr" , 8657);
|
||||
Html_entities_set(rv, tmp, "Ucirc" , 219);
|
||||
Html_entities_set(rv, tmp, "ucirc" , 251);
|
||||
Html_entities_set(rv, tmp, "Ugrave" , 217);
|
||||
Html_entities_set(rv, tmp, "ugrave" , 249);
|
||||
Html_entities_set(rv, tmp, "uml" , 168);
|
||||
Html_entities_set(rv, tmp, "upsih" , 978);
|
||||
Html_entities_set(rv, tmp, "Upsilon" , 933);
|
||||
Html_entities_set(rv, tmp, "upsilon" , 965);
|
||||
Html_entities_set(rv, tmp, "Uuml" , 220);
|
||||
Html_entities_set(rv, tmp, "uuml" , 252);
|
||||
Html_entities_set(rv, tmp, "weierp" , 8472);
|
||||
Html_entities_set(rv, tmp, "Xi" , 926);
|
||||
Html_entities_set(rv, tmp, "xi" , 958);
|
||||
Html_entities_set(rv, tmp, "Yacute" , 221);
|
||||
Html_entities_set(rv, tmp, "yacute" , 253);
|
||||
Html_entities_set(rv, tmp, "yen" , 165);
|
||||
Html_entities_set(rv, tmp, "Yuml" , 376);
|
||||
Html_entities_set(rv, tmp, "yuml" , 255);
|
||||
Html_entities_set(rv, tmp, "Zeta" , 918);
|
||||
Html_entities_set(rv, tmp, "zeta" , 950);
|
||||
Html_entities_set(rv, tmp, "zwj" , 8205);
|
||||
Html_entities_set(rv, tmp, "zwnj" , 8204);
|
||||
return rv;
|
||||
}
|
||||
private static void Html_entities_set(Hash_adp_bry rv, Bry_bfr tmp, String name_str, int code) {
|
||||
byte[] html_bry = tmp.Add_str_a7("&#").Add_int_variable(code).Add_byte_semic().To_bry_and_clear();
|
||||
Html_entities_set(rv, Xomw_html_ent.Type__entity, code, name_str, html_bry);
|
||||
}
|
||||
private static void Html_entities_set(Hash_adp_bry rv, byte type, int code, String name_str, String html_str) {Html_entities_set(rv, type, code, name_str, Bry_.new_u8(html_str));}
|
||||
private static void Html_entities_set(Hash_adp_bry rv, byte type, int code, String name_str, byte[] html_bry) {
|
||||
byte[] name_bry = Bry_.new_u8(name_str);
|
||||
rv.Add_if_dupe_use_1st(name_bry, new Xomw_html_ent(type, code, name_bry, html_bry)); // Add_dupe needed b/c "lt" and co. are added early; ignore subsequent call
|
||||
}
|
||||
}
|
||||
class Xomw_html_ent {
|
||||
public Xomw_html_ent(byte type, int code, byte[] name, byte[] html) {
|
||||
this.type = type;
|
||||
this.code = code;
|
||||
this.name = name;
|
||||
this.html = html;
|
||||
}
|
||||
public final byte type;
|
||||
public final int code;
|
||||
public final byte[] name;
|
||||
public final byte[] html;
|
||||
public static final byte Type__null = 0, Type__alias = 1, Type__char = 2, Type__entity = 3;
|
||||
}
|
||||
class Bool_ary_bldr {
|
||||
private final boolean[] ary;
|
||||
public Bool_ary_bldr(int len) {
|
||||
this.ary = new boolean[len];
|
||||
}
|
||||
public Bool_ary_bldr Set_many(int... v) {
|
||||
int len = v.length;
|
||||
for (int i = 0; i < len; i++)
|
||||
ary[v[i]] = true;
|
||||
return this;
|
||||
}
|
||||
public Bool_ary_bldr Set_rng(int bgn, int end) {
|
||||
for (int i = bgn; i <= end; i++)
|
||||
ary[i] = true;
|
||||
return this;
|
||||
}
|
||||
public boolean[] To_ary() {
|
||||
return ary;
|
||||
}
|
||||
public static Bool_ary_bldr New_u8() {return new Bool_ary_bldr(256);}
|
||||
}
|
44
400_xowa/src/gplx/xowa/mws/Xomw_sanitizer__tst.java
Normal file
44
400_xowa/src/gplx/xowa/mws/Xomw_sanitizer__tst.java
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*;
|
||||
public class Xomw_sanitizer__tst {
|
||||
private final Xomw_sanitizer__fxt fxt = new Xomw_sanitizer__fxt();
|
||||
@Test public void Text() {fxt.Test__normalize_char_references("abc" , "abc");}
|
||||
@Test public void Dec() {fxt.Test__normalize_char_references("" , "&#08;");}
|
||||
@Test public void Dec__invalid() {fxt.Test__normalize_char_references("	" , "	");}
|
||||
@Test public void Hex() {fxt.Test__normalize_char_references("ÿ" , "ÿ");}
|
||||
@Test public void Entity() {fxt.Test__normalize_char_references("α" , "α");}
|
||||
@Test public void Entity__lt() {fxt.Test__normalize_char_references("<" , "<");}
|
||||
@Test public void Invalid() {fxt.Test__normalize_char_references("&(invalid);" , "&(invalid);");}
|
||||
@Test public void Many() {
|
||||
fxt.Test__normalize_char_references
|
||||
( "a 	 b α c ÿ d &(invalid); e"
|
||||
, "a 	 b α c ÿ d &(invalid); e"
|
||||
);
|
||||
}
|
||||
}
|
||||
class Xomw_sanitizer__fxt {
|
||||
private final Xomw_sanitizer sanitizer = new Xomw_sanitizer();
|
||||
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
public void Test__normalize_char_references(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
sanitizer.Normalize_char_references(tmp, Bool_.Y, src_bry, 0, src_bry.length);
|
||||
Gftest.Eq__str(expd, tmp.To_str_and_clear());
|
||||
}
|
||||
}
|
26
400_xowa/src/gplx/xowa/mws/htmls/Xomw_html_elem.java
Normal file
26
400_xowa/src/gplx/xowa/mws/htmls/Xomw_html_elem.java
Normal file
@ -0,0 +1,26 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_html_elem {
|
||||
public Xomw_html_elem(byte[] name) {
|
||||
this.name = name;
|
||||
}
|
||||
public byte[] Name() {return name;} private final byte[] name; // EX: "a", "div", "img"
|
||||
|
||||
// private static final Hash_adp_bry void_elements = Hash_adp_bry.cs().Add_many_str("area", "super", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr");
|
||||
}
|
267
400_xowa/src/gplx/xowa/mws/htmls/Xomw_html_utl.java
Normal file
267
400_xowa/src/gplx/xowa/mws/htmls/Xomw_html_utl.java
Normal file
@ -0,0 +1,267 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.langs.phps.utls.*;
|
||||
public class Xomw_html_utl {
|
||||
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
public void Raw_element(Bry_bfr bfr, byte[] element, Xomwh_atr_mgr attribs, byte[] contents) {
|
||||
Bry_.Lcase__all(element); // XO:lcase element
|
||||
|
||||
Open_element__lcased(bfr, element, attribs);
|
||||
if (void_elements.Has(element)) {
|
||||
bfr.Del_by_1().Add(Bry__elem__lhs__inl);
|
||||
}
|
||||
else {
|
||||
bfr.Add(contents);
|
||||
Close_element__lcased(bfr, element);
|
||||
}
|
||||
}
|
||||
private void Open_element__lcased(Bry_bfr bfr, byte[] element, Xomwh_atr_mgr attribs) {
|
||||
// This is not required in HTML5, but let's do it anyway, for
|
||||
// consistency and better compression.
|
||||
// $element = strtolower($element); // XO:handled by callers
|
||||
|
||||
// Remove invalid input types
|
||||
if (Bry_.Eq(element, Tag__input)) {
|
||||
// PORTED.HEADER:valid_input_types
|
||||
byte[] type_atr_val = attribs.Get_val_or_null(Atr__type);
|
||||
if (type_atr_val != null && !valid_input_types.Has(type_atr_val)) {
|
||||
attribs.Del(Atr__type);
|
||||
}
|
||||
}
|
||||
|
||||
// According to standard the default type for <button> elements is "submit".
|
||||
// Depending on compatibility mode IE might use "button", instead.
|
||||
// We enforce the standard "submit".
|
||||
if (Bry_.Eq(element, Tag__button) && attribs.Get_val_or_null(Atr__type) == null) {
|
||||
attribs.Set(Atr__type, Val__type__submit);
|
||||
}
|
||||
|
||||
bfr.Add_byte(Byte_ascii.Angle_bgn).Add(element);
|
||||
Expand_attributes(bfr, attribs); // TODO.XO:self::dropDefaults($element, $attribs)
|
||||
bfr.Add_byte(Byte_ascii.Angle_end);
|
||||
}
|
||||
public void Expand_attributes(Bry_bfr bfr, Xomwh_atr_mgr atrs) {
|
||||
int len = atrs.Len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xomwh_atr_itm atr = (Xomwh_atr_itm)atrs.Get_at(i);
|
||||
byte[] key = atr.Key_bry();
|
||||
byte[] val = atr.Val();
|
||||
|
||||
// Support intuitive [ 'checked' => true/false ] form
|
||||
if (val == null) { // TESTME
|
||||
continue;
|
||||
}
|
||||
|
||||
// For boolean attributes, support [ 'foo' ] instead of
|
||||
// requiring [ 'foo' => 'meaningless' ].
|
||||
boolean bool_attrib = bool_attribs.Has(val);
|
||||
if (atr.Key_int() != -1 && bool_attrib) {
|
||||
key = val;
|
||||
}
|
||||
|
||||
// Not technically required in HTML5 but we'd like consistency
|
||||
// and better compression anyway.
|
||||
key = Bry_.Xcase__build__all(tmp, Bool_.N, key);
|
||||
|
||||
// PORTED.HEADER:$spaceSeparatedListAttributes
|
||||
|
||||
// Specific features for attributes that allow a list of space-separated values
|
||||
if (space_separated_list_attributes.Has(key)) {
|
||||
// Apply some normalization and remove duplicates
|
||||
|
||||
// Convert into correct array. Array can contain space-separated
|
||||
// values. Implode/explode to get those into the main array as well.
|
||||
// if (is_array($value)) {
|
||||
// If input wasn't an array, we can skip this step
|
||||
// $newValue = [];
|
||||
// foreach ($value as $k => $v) {
|
||||
// if (is_string($v)) {
|
||||
// String values should be normal `array('foo')`
|
||||
// Just append them
|
||||
// if (!isset($value[$v])) {
|
||||
// As a special case don't set 'foo' if a
|
||||
// separate 'foo' => true/false exists in the array
|
||||
// keys should be authoritative
|
||||
// $newValue[] = $v;
|
||||
// }
|
||||
// }
|
||||
// elseif ($v) {
|
||||
// If the value is truthy but not a String this is likely
|
||||
// an [ 'foo' => true ], falsy values don't add strings
|
||||
// $newValue[] = $k;
|
||||
// }
|
||||
// }
|
||||
// $value = implode(' ', $newValue);
|
||||
// }
|
||||
// $value = explode(' ', $value);
|
||||
|
||||
// Normalize spacing by fixing up cases where people used
|
||||
// more than 1 space and/or a trailing/leading space
|
||||
// $value = array_diff($value, [ '', ' ' ]);
|
||||
|
||||
// Remove duplicates and create the String
|
||||
// $value = implode(' ', array_unique($value));
|
||||
}
|
||||
// DELETE
|
||||
// elseif (is_array($value)) {
|
||||
// throw new MWException("HTML attribute $key can not contain a list of values");
|
||||
// }
|
||||
|
||||
if (bool_attrib) {
|
||||
bfr.Add_byte_space().Add(key).Add(Bry__atr__val__empty); // $ret .= " $key=\"\"";
|
||||
}
|
||||
else {
|
||||
// PORTED.HEADER:atr_val_encodings
|
||||
val = Php_str_.Strtr(val, atr_val_encodings, tmp, trv);
|
||||
bfr.Add_byte_space().Add(key).Add(Bry__atr__val__quote).Add(val).Add_byte_quote();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void Close_element__lcased(Bry_bfr bfr, byte[] element) {
|
||||
bfr.Add(Bry__elem__rhs__bgn).Add(element).Add_byte(Byte_ascii.Angle_end); // EX: "</", element, ">";
|
||||
}
|
||||
private static final byte[]
|
||||
Bry__elem__lhs__inl = Bry_.new_a7("/>")
|
||||
, Bry__elem__rhs__bgn = Bry_.new_a7("</")
|
||||
, Bry__atr__val__quote = Bry_.new_a7("=\"")
|
||||
, Bry__atr__val__empty = Bry_.new_a7("=\"\"")
|
||||
|
||||
, Tag__input = Bry_.new_a7("input")
|
||||
, Tag__button = Bry_.new_a7("button")
|
||||
, Atr__type = Bry_.new_a7("type")
|
||||
, Val__type__submit = Bry_.new_a7("submit")
|
||||
;
|
||||
|
||||
// List of void elements from HTML5, section 8.1.2 as of 2016-09-19
|
||||
private static final Hash_adp_bry void_elements = Hash_adp_bry.cs().Add_many_str
|
||||
(
|
||||
"area",
|
||||
"super",
|
||||
"br",
|
||||
"col",
|
||||
"embed",
|
||||
"hr",
|
||||
"img",
|
||||
"input",
|
||||
"keygen",
|
||||
"link",
|
||||
"meta",
|
||||
"param",
|
||||
"source",
|
||||
"track",
|
||||
"wbr"
|
||||
);
|
||||
|
||||
// Boolean attributes, which may have the value omitted entirely. Manually
|
||||
// collected from the HTML5 spec as of 2011-08-12.
|
||||
private static final Hash_adp_bry bool_attribs = Hash_adp_bry.ci_a7().Add_many_str(
|
||||
"async",
|
||||
"autofocus",
|
||||
"autoplay",
|
||||
"checked",
|
||||
"controls",
|
||||
"default",
|
||||
"defer",
|
||||
"disabled",
|
||||
"formnovalidate",
|
||||
"hidden",
|
||||
"ismap",
|
||||
// "itemscope", //XO:duplicate; added below
|
||||
"loop",
|
||||
"multiple",
|
||||
"muted",
|
||||
"novalidate",
|
||||
"open",
|
||||
"pubdate",
|
||||
"final ",
|
||||
"required",
|
||||
"reversed",
|
||||
"scoped",
|
||||
"seamless",
|
||||
"selected",
|
||||
"truespeed",
|
||||
"typemustmatch",
|
||||
// HTML5 Microdata
|
||||
"itemscope"
|
||||
);
|
||||
|
||||
private static final Btrie_slim_mgr atr_val_encodings = Btrie_slim_mgr.cs()
|
||||
// Apparently we need to entity-encode \n, \r, \t, although the
|
||||
// spec doesn't mention that. Since we're doing strtr() anyway,
|
||||
// we may as well not call htmlspecialchars().
|
||||
// @todo FIXME: Verify that we actually need to
|
||||
// escape \n\r\t here, and explain why, exactly.
|
||||
// We could call Sanitizer::encodeAttribute() for this, but we
|
||||
// don't because we're stubborn and like our marginal savings on
|
||||
// byte size from not having to encode unnecessary quotes.
|
||||
// The only difference between this transform and the one by
|
||||
// Sanitizer::encodeAttribute() is ' is not encoded.
|
||||
.Add_str_str("&" , "&")
|
||||
.Add_str_str("\"" , """)
|
||||
.Add_str_str(">" , ">")
|
||||
// '<' allegedly allowed per spec
|
||||
// but breaks some tools if not escaped.
|
||||
.Add_str_str("<" , "<")
|
||||
.Add_str_str("\n" , " ")
|
||||
.Add_str_str("\r" , " ")
|
||||
.Add_str_str("\t" , "	");
|
||||
|
||||
// https://www.w3.org/TR/html401/index/attributes.html ("space-separated")
|
||||
// https://www.w3.org/TR/html5/index.html#attributes-1 ("space-separated")
|
||||
private static final Hash_adp_bry space_separated_list_attributes = Hash_adp_bry.ci_a7().Add_many_str(
|
||||
"class", // html4, html5
|
||||
"accesskey", // as of html5, multiple space-separated values allowed
|
||||
// html4-spec doesn't document rel= as space-separated
|
||||
// but has been used like that and is now documented as such
|
||||
// in the html5-spec.
|
||||
"rel"
|
||||
);
|
||||
|
||||
private static final Hash_adp_bry valid_input_types = Hash_adp_bry.ci_a7().Add_many_str(
|
||||
// Remove invalid input types
|
||||
"hidden",
|
||||
"text",
|
||||
"password",
|
||||
"checkbox",
|
||||
"radio",
|
||||
"file",
|
||||
"submit",
|
||||
"image",
|
||||
"reset",
|
||||
"button",
|
||||
|
||||
// HTML input types
|
||||
"datetime",
|
||||
"datetime-local",
|
||||
"date",
|
||||
"month",
|
||||
"time",
|
||||
"week",
|
||||
"number",
|
||||
"range",
|
||||
"email",
|
||||
"url",
|
||||
"search",
|
||||
"tel",
|
||||
"color"
|
||||
);
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xomw_html_utl__expand_attributes__tst {
|
||||
private final Xomw_html_utl__expand_attributes__fxt fxt = new Xomw_html_utl__expand_attributes__fxt();
|
||||
@Test public void Basic() {fxt.Test__expand_attributes(" a=\"b\"", "a", "b");}
|
||||
}
|
||||
class Xomw_html_utl__expand_attributes__fxt {
|
||||
private final Xomw_html_utl utl = new Xomw_html_utl();
|
||||
private final Bry_bfr bfr = Bry_bfr_.New();
|
||||
public void Test__expand_attributes(String expd, String... kvs) {
|
||||
Xomwh_atr_mgr atrs = new Xomwh_atr_mgr();
|
||||
int kvs_len = kvs.length;
|
||||
for (int i = 0; i < kvs_len; i += 2) {
|
||||
byte[] key = Bry_.new_a7(kvs[i]);
|
||||
byte[] val = Bry_.new_a7(kvs[i + 1]);
|
||||
Xomwh_atr_itm itm = new Xomwh_atr_itm(-1, key, val);
|
||||
atrs.Add(itm);
|
||||
}
|
||||
utl.Expand_attributes(bfr, atrs);
|
||||
Gftest.Eq__str(expd, bfr.To_str_and_clear());
|
||||
}
|
||||
}
|
@ -15,13 +15,15 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.apps.cfgs.mgrs.dflts; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.apps.*; import gplx.xowa.addons.apps.cfgs.*; import gplx.xowa.addons.apps.cfgs.mgrs.*;
|
||||
class Xocfg_dflt_itm__static implements Gfo_invk {
|
||||
private final String val;
|
||||
public Xocfg_dflt_itm__static(String val) {
|
||||
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomwh_atr_itm {
|
||||
public Xomwh_atr_itm(int key_int, byte[] key, byte[] val) {
|
||||
this.key_int = key_int;
|
||||
this.key_bry = key;
|
||||
this.val = val;
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
return val;
|
||||
}
|
||||
public int Key_int() {return key_int;} private int key_int;
|
||||
public byte[] Key_bry() {return key_bry;} private byte[] key_bry;
|
||||
public byte[] Val() {return val;} private byte[] val;
|
||||
public void Val_(byte[] v) {this.val = v;}
|
||||
}
|
53
400_xowa/src/gplx/xowa/mws/htmls/Xomwh_atr_mgr.java
Normal file
53
400_xowa/src/gplx/xowa/mws/htmls/Xomwh_atr_mgr.java
Normal file
@ -0,0 +1,53 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomwh_atr_mgr {
|
||||
private final Ordered_hash hash = Ordered_hash_.New();
|
||||
public int Len() {return hash.Len();}
|
||||
public Xomwh_atr_itm Get_at(int i) {return (Xomwh_atr_itm)hash.Get_at(i);}
|
||||
public Xomwh_atr_mgr Clear() {hash.Clear(); return this;}
|
||||
public void Add(byte[] key, byte[] val) {hash.Add(key, new Xomwh_atr_itm(-1, key, val));}
|
||||
public void Add(Xomwh_atr_itm itm) {hash.Add(itm.Key_bry(), itm);}
|
||||
public void Del(byte[] key) {hash.Del(key);}
|
||||
public void Set(byte[] key, byte[] val) {
|
||||
Xomwh_atr_itm atr = Get_by_or_make(key);
|
||||
atr.Val_(val);
|
||||
}
|
||||
public void Add_or_set(Xomwh_atr_itm src) {
|
||||
Xomwh_atr_itm trg = (Xomwh_atr_itm)hash.Get_by(src.Key_bry());
|
||||
if (trg == null)
|
||||
this.Add(src);
|
||||
else
|
||||
trg.Val_(src.Val());
|
||||
}
|
||||
public Xomwh_atr_itm Get_by_or_null(byte[] k) {
|
||||
return (Xomwh_atr_itm)hash.Get_by(k);
|
||||
}
|
||||
public Xomwh_atr_itm Get_by_or_make(byte[] k) {
|
||||
Xomwh_atr_itm rv = (Xomwh_atr_itm)hash.Get_by(k);
|
||||
if (rv == null) {
|
||||
rv = new Xomwh_atr_itm(-1, k, null);
|
||||
Add(rv);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public byte[] Get_val_or_null(byte[] k) {
|
||||
Xomwh_atr_itm atr = (Xomwh_atr_itm)hash.Get_by(k);
|
||||
return atr == null ? null : atr.Val();
|
||||
}
|
||||
}
|
137
400_xowa/src/gplx/xowa/mws/linkers/Xomw_link_renderer.java
Normal file
137
400_xowa/src/gplx/xowa/mws/linkers/Xomw_link_renderer.java
Normal file
@ -0,0 +1,137 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.linkers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.langs.htmls.*;
|
||||
import gplx.xowa.mws.htmls.*;
|
||||
public class Xomw_link_renderer {
|
||||
private boolean expand_urls = false;
|
||||
private final Xomw_html_utl html_utl = new Xomw_html_utl();
|
||||
private final Xomwh_atr_mgr attribs = new Xomwh_atr_mgr();
|
||||
|
||||
// If you have already looked up the proper CSS classes using LinkRenderer::getLinkClasses()
|
||||
// or some other method, use this to avoid looking it up again.
|
||||
public void Make_preloaded_link(Bry_bfr bfr, Xoa_ttl target, byte[] text, byte[] classes, Xomwh_atr_mgr extra_atrs, byte[] query) {
|
||||
// IGNORE: $this->runBeginHook --> 'HtmlPageLinkRendererBegin', 'LinkBegin'
|
||||
|
||||
// $target = $this->normalizeTarget( $target ); // normalizeSpecialPage
|
||||
byte[] url = Get_link_url(target, query);
|
||||
attribs.Clear();
|
||||
attribs.Add(Gfh_atr_.Bry__href, url); // NOTE: add url 1st; MW does attribs["url", url] + attribs + extra_attribs
|
||||
if (classes.length > 0) // XO:do not bother adding if empty
|
||||
attribs.Add(Gfh_atr_.Bry__class, classes);
|
||||
byte[] prefixed_text = target.Get_prefixed_text();
|
||||
if (prefixed_text != Bry_.Empty) {
|
||||
attribs.Add(Gfh_atr_.Bry__title, prefixed_text);
|
||||
}
|
||||
|
||||
int extra_atrs_len = extra_atrs.Len();
|
||||
for (int i = 0; i < extra_atrs_len; i++) {
|
||||
attribs.Add_or_set(extra_atrs.Get_at(i));
|
||||
}
|
||||
|
||||
if (text == null) {
|
||||
text = this.Get_link_text(target);
|
||||
}
|
||||
|
||||
Build_a_element(bfr, target,text, attribs, true);
|
||||
}
|
||||
private void Build_a_element(Bry_bfr bfr, Xoa_ttl target, byte[] text, Xomwh_atr_mgr attribs, boolean is_known) {
|
||||
// IGNORE: if ( !Hooks::run( 'HtmlPageLinkRendererEnd',
|
||||
|
||||
byte[] html = text;
|
||||
// $html = HtmlArmor::getHtml( $text );
|
||||
|
||||
// IGNORE: if ( Hooks::isRegistered( 'LinkEnd' ) ) {
|
||||
|
||||
html_utl.Raw_element(bfr, Gfh_tag_.Bry__a, attribs, html);
|
||||
}
|
||||
private byte[] Get_link_url(Xoa_ttl target, byte[] query) {
|
||||
// TODO: Use a LinkTargetResolver service instead of Title
|
||||
// if ( $this->forceArticlePath ) {
|
||||
// $realQuery = $query;
|
||||
// $query = [];
|
||||
// }
|
||||
// else {
|
||||
// $realQuery = [];
|
||||
// }
|
||||
byte[] url = target.Get_link_url(query, false, expand_urls);
|
||||
|
||||
// if ( $this->forceArticlePath && $realQuery ) {
|
||||
// $url = wfAppendQuery( $url, $realQuery );
|
||||
// }
|
||||
return url;
|
||||
}
|
||||
private byte[] Get_link_text(Xoa_ttl target) {
|
||||
byte[] prefixed_text = target.Get_prefixed_text();
|
||||
// If the target is just a fragment, with no title, we return the fragment
|
||||
// text. Otherwise, we return the title text itself.
|
||||
if (prefixed_text == Bry_.Empty && target.Has_fragment()) {
|
||||
return target.Get_fragment();
|
||||
}
|
||||
return prefixed_text;
|
||||
}
|
||||
// private function normalizeTarget( LinkTarget $target ) {
|
||||
// return Linker::normaliseSpecialPage( $target );
|
||||
// }
|
||||
// public static function normaliseSpecialPage( LinkTarget $target ) {
|
||||
// if ( $target->getNamespace() == NS_SPECIAL && !$target->isExternal() ) {
|
||||
// list( $name, $subpage ) = SpecialPageFactory::resolveAlias( $target->getDBkey() );
|
||||
// if ( !$name ) {
|
||||
// return $target;
|
||||
// }
|
||||
// $ret = SpecialPage::getTitleValueFor( $name, $subpage, $target->getFragment() );
|
||||
// return $ret;
|
||||
// } else {
|
||||
// return $target;
|
||||
// }
|
||||
// }
|
||||
private static final byte[] Bry__classes__extiw = Bry_.new_a7("extiw");
|
||||
public void Make_known_link(Bry_bfr bfr, Xoa_ttl target, byte[] text, Xomwh_atr_mgr extra_atrs, byte[] query) {
|
||||
byte[] classes = Bry_.Empty;
|
||||
if (target.Is_external()) {
|
||||
classes = Bry__classes__extiw;
|
||||
}
|
||||
byte[] colour = Get_link_classes(target);
|
||||
if (colour != Bry_.Empty) {
|
||||
classes = Bry_.Add(classes, Byte_ascii.Space_bry, colour);
|
||||
}
|
||||
|
||||
Make_preloaded_link(bfr, target, text, classes, extra_atrs, query);
|
||||
}
|
||||
public byte[] Get_link_classes(Xoa_ttl target) {
|
||||
// Make sure the target is in the cache
|
||||
// $id = $this->linkCache->addLinkObj( $target );
|
||||
// if ( $id == 0 ) {
|
||||
// // Doesn't exist
|
||||
// return '';
|
||||
// }
|
||||
|
||||
// if ( $this->linkCache->getGoodLinkFieldObj( $target, 'redirect' ) ) {
|
||||
// Page is a redirect
|
||||
// return 'mw-redirect';
|
||||
// }
|
||||
// elseif ( $this->stubThreshold > 0 && MWNamespace::isContent( $target->getNamespace() )
|
||||
// && $this->linkCache->getGoodLinkFieldObj( $target, 'length' ) < $this->stubThreshold
|
||||
// ) {
|
||||
// Page is a stub
|
||||
// return 'stub';
|
||||
// }
|
||||
|
||||
return Bry_.Empty;
|
||||
}
|
||||
}
|
584
400_xowa/src/gplx/xowa/mws/parsers/Xomw_block_level_pass.java
Normal file
584
400_xowa/src/gplx/xowa/mws/parsers/Xomw_block_level_pass.java
Normal file
@ -0,0 +1,584 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.langs.htmls.*;
|
||||
import gplx.langs.phps.utls.*;
|
||||
public class Xomw_block_level_pass {
|
||||
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private boolean in_pre, dt_open;
|
||||
private int last_section;
|
||||
private byte[] find_colon_no_links__before, find_colon_no_links__after;
|
||||
|
||||
public void Do_block_levels(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, boolean line_start) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
// XO.STATIC
|
||||
if (block_chars_ary == null) {
|
||||
synchronized (Type_adp_.ClassOf_obj(this)) {
|
||||
block_chars_ary = Block_chars_ary__new();
|
||||
open_match_trie = Btrie_slim_mgr.ci_a7().Add_many_str
|
||||
("<table", "<h1", "<h2", "<h3", "<h4", "<h5", "<h6", "<pre", "<tr", "<p", "<ul", "<ol", "<dl", "<li", "</tr", "</td", "</th");
|
||||
close_match_trie = Btrie_slim_mgr.ci_a7().Add_many_str
|
||||
( "</table", "</h1", "</h2", "</h3", "</h4", "</h5", "</h6", "<td", "<th", "<blockquote", "</blockquote", "<div", "</div", "<hr"
|
||||
, "</pre", "</p", "</mw:", Xomw_strip_state.Str__marker_bgn + "-pre", "</li", "</ul", "</ol", "</dl", "<center", "</center");
|
||||
blockquote_trie = Btrie_slim_mgr.ci_a7().Add_many_str("<blockquote", "</blockquote");
|
||||
pre_trie = Btrie_slim_mgr.ci_a7().Add_str_int("<pre", Pre__bgn).Add_str_int("</pre", Pre__end);
|
||||
}
|
||||
}
|
||||
|
||||
// Parsing through the text line by line. The main thing
|
||||
// happening here is handling of block-level elements p, pre,
|
||||
// and making lists from lines starting with * # : etc.
|
||||
byte[] last_prefix = Bry_.Empty;
|
||||
bfr.Clear();
|
||||
this.dt_open = false;
|
||||
boolean in_block_elem = false;
|
||||
int prefix_len = 0;
|
||||
byte para_stack = Para_stack__none;
|
||||
boolean in_blockquote = false;
|
||||
this.in_pre = false;
|
||||
this.last_section = Last_section__none;
|
||||
byte[] prefix2 = null;
|
||||
|
||||
// PORTED.SPLIT: $textLines = StringUtils::explode("\n", $text);
|
||||
int line_bgn = src_bgn;
|
||||
while (line_bgn < src_end) {
|
||||
int line_end = Bry_find_.Find_fwd(src, Byte_ascii.Nl, line_bgn);
|
||||
if (line_end == Bry_find_.Not_found)
|
||||
line_end = src_end;
|
||||
|
||||
// Fix up line_start
|
||||
if (!line_start) {
|
||||
bfr.Add_mid(src, line_bgn, line_end);
|
||||
line_start = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// * = ul
|
||||
// # = ol
|
||||
// ; = dt
|
||||
// : = dd
|
||||
int last_prefix_len = last_prefix.length;
|
||||
|
||||
// PORTED: pre_close_match = preg_match('/<\\/pre/i', $oLine); pre_open_match = preg_match('/<pre/i', $oLine);
|
||||
int pre_cur = line_bgn;
|
||||
boolean pre_close_match = false;
|
||||
boolean pre_open_match = false;
|
||||
|
||||
while (true) {
|
||||
if (pre_cur >= line_end)
|
||||
break;
|
||||
Object o = pre_trie.Match_at(trv, src, pre_cur, line_end);
|
||||
if (o == null)
|
||||
pre_cur++;
|
||||
else {
|
||||
int pre_tid = (int)o;
|
||||
if (pre_tid == Pre__bgn)
|
||||
pre_open_match = true;
|
||||
else if (pre_tid == Pre__end)
|
||||
pre_close_match = true;
|
||||
pre_cur = trv.Pos();
|
||||
}
|
||||
}
|
||||
|
||||
byte[] prefix = null, t = null;
|
||||
// If not in a <pre> element, scan for and figure out what prefixes are there.
|
||||
if (!in_pre) {
|
||||
// Multiple prefixes may abut each other for nested lists.
|
||||
prefix_len = Php_str_.Strspn_fwd__ary(src, block_chars_ary, line_bgn, line_end, line_end); // strspn($oLine, '*#:;');
|
||||
prefix = Php_str_.Substr(src, line_bgn, prefix_len);
|
||||
|
||||
// eh?
|
||||
// ; and : are both from definition-lists, so they're equivalent
|
||||
// for the purposes of determining whether or not we need to open/close
|
||||
// elements.
|
||||
// substr( $inputLine, $prefixLength );
|
||||
prefix2 = Bry_.Replace(prefix, Byte_ascii.Semic, Byte_ascii.Colon);
|
||||
t = Bry_.Mid(src, line_bgn + prefix_len, line_end);
|
||||
in_pre = pre_open_match;
|
||||
}
|
||||
else {
|
||||
// Don't interpret any other prefixes in preformatted text
|
||||
prefix_len = 0;
|
||||
prefix = prefix2 = Bry_.Empty;
|
||||
t = Bry_.Mid(src, line_bgn, line_end);
|
||||
}
|
||||
|
||||
// List generation
|
||||
byte[] term = null, t2 = null;
|
||||
int common_prefix_len = -1;
|
||||
if (prefix_len > 0 && Bry_.Eq(last_prefix, prefix2)) {
|
||||
// Same as the last item, so no need to deal with nesting or opening stuff
|
||||
bfr.Add(Next_item(Php_str_.Substr_byte(prefix, -1)));
|
||||
para_stack = Para_stack__none;
|
||||
|
||||
if (prefix_len > 0 && prefix[prefix_len - 1] == Byte_ascii.Semic) {
|
||||
// The one nasty exception: definition lists work like this:
|
||||
// ; title : definition text
|
||||
// So we check for : in the remainder text to split up the
|
||||
// title and definition, without b0rking links.
|
||||
term = t2 = Bry_.Empty;
|
||||
if (Find_colon_no_links(t, term, t2) != Bry_find_.Not_found) {
|
||||
term = find_colon_no_links__before;
|
||||
t2 = find_colon_no_links__after;
|
||||
t = t2;
|
||||
bfr.Add(term).Add(Next_item(Byte_ascii.Colon));
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (prefix_len > 0 || last_prefix_len > 0) {
|
||||
// We need to open or close prefixes, or both.
|
||||
|
||||
// Either open or close a level...
|
||||
common_prefix_len = Get_common(prefix, last_prefix);
|
||||
para_stack = Para_stack__none;
|
||||
|
||||
// Close all the prefixes which aren't shared.
|
||||
while (common_prefix_len < last_prefix_len) {
|
||||
bfr.Add(Close_list(last_prefix[last_prefix_len - 1]));
|
||||
last_prefix_len--;
|
||||
}
|
||||
|
||||
// Continue the current prefix if appropriate.
|
||||
if (prefix_len <= common_prefix_len && common_prefix_len > 0) {
|
||||
bfr.Add(Next_item(prefix[common_prefix_len - 1]));
|
||||
}
|
||||
|
||||
// Open prefixes where appropriate.
|
||||
if (Bry_.Len_gt_0(last_prefix) && prefix_len > common_prefix_len) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
while (prefix_len > common_prefix_len) {
|
||||
byte c = Php_str_.Substr_byte(prefix, common_prefix_len, 1);
|
||||
bfr.Add(Open_list(c));
|
||||
|
||||
if (c == Byte_ascii.Semic) {
|
||||
// @todo FIXME: This is dupe of code above
|
||||
if (Find_colon_no_links(t, term, t2) != Bry_find_.Not_found) {
|
||||
term = find_colon_no_links__before;
|
||||
t2 = find_colon_no_links__after;
|
||||
t = t2;
|
||||
bfr.Add(term).Add(Next_item(Byte_ascii.Colon));
|
||||
}
|
||||
}
|
||||
++common_prefix_len;
|
||||
}
|
||||
if (prefix_len == 0 && Bry_.Len_gt_0(last_prefix)) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
last_prefix = prefix2;
|
||||
}
|
||||
|
||||
// If we have no prefixes, go to paragraph mode.
|
||||
if (0 == prefix_len) {
|
||||
// No prefix (not in list)--go to paragraph mode
|
||||
// XXX: use a stack for nestable elements like span, table and div
|
||||
int t_len = t.length;
|
||||
boolean open_match = Php_preg_.Match(open_match_trie, trv, t, 0, t_len) != null;
|
||||
boolean close_match = Php_preg_.Match(close_match_trie, trv, t, 0, t_len) != null;
|
||||
|
||||
if (open_match || close_match) {
|
||||
para_stack = Para_stack__none;
|
||||
// @todo bug 5718: paragraph closed
|
||||
bfr.Add(Close_paragraph());
|
||||
if (pre_open_match && !pre_close_match) {
|
||||
in_pre = true;
|
||||
}
|
||||
int bq_offset = 0;
|
||||
// PORTED:preg_match('/<(\\/?)blockquote[\s>]/i', t, $bqMatch, PREG_OFFSET_CAPTURE, $bq_offset)
|
||||
while (true) {
|
||||
Object o = Php_preg_.Match(blockquote_trie, trv, t, bq_offset, t_len);
|
||||
if (o == null) { // no more blockquotes found; exit
|
||||
break;
|
||||
}
|
||||
else {
|
||||
byte[] bq_bry = (byte[])o;
|
||||
in_blockquote = bq_bry[1] != Byte_ascii.Slash; // is this a close tag?
|
||||
bq_offset = trv.Pos();
|
||||
}
|
||||
}
|
||||
in_block_elem = !close_match;
|
||||
}
|
||||
else if (!in_block_elem && !in_pre) {
|
||||
if ( Php_str_.Substr_byte(t, 0) == Byte_ascii.Space
|
||||
&& (last_section == Last_section__pre || Bry_.Trim(t) != Bry_.Empty)
|
||||
&& !in_blockquote
|
||||
) {
|
||||
// pre
|
||||
if (last_section != Last_section__pre) {
|
||||
para_stack = Para_stack__none;
|
||||
bfr.Add(Close_paragraph()).Add(Gfh_tag_.Pre_lhs);
|
||||
last_section = Last_section__pre;
|
||||
}
|
||||
t = Bry_.Mid(t, 1);
|
||||
}
|
||||
else {
|
||||
// paragraph
|
||||
if (Bry_.Trim(t) == Bry_.Empty) {
|
||||
if (para_stack != Para_stack__none) {
|
||||
Para_stack_bfr(bfr, para_stack);
|
||||
bfr.Add_str_a7("<br />");
|
||||
para_stack = Para_stack__none;
|
||||
last_section = Last_section__para;
|
||||
}
|
||||
else {
|
||||
if (last_section != Last_section__para) {
|
||||
bfr.Add(Close_paragraph());
|
||||
last_section = Last_section__none;
|
||||
para_stack = Para_stack__bgn;
|
||||
}
|
||||
else {
|
||||
para_stack = Para_stack__mid;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (para_stack != Para_stack__none) {
|
||||
Para_stack_bfr(bfr, para_stack);
|
||||
para_stack = Para_stack__none;
|
||||
last_section = Last_section__para;
|
||||
}
|
||||
else if (last_section != Last_section__para) {
|
||||
bfr.Add(Close_paragraph()).Add(Gfh_tag_.P_lhs);
|
||||
this.last_section = Last_section__para;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// somewhere above we forget to get out of pre block (bug 785)
|
||||
if (pre_close_match && in_pre) {
|
||||
in_pre = false;
|
||||
}
|
||||
if (para_stack == Para_stack__none) {
|
||||
bfr.Add(t);
|
||||
if (prefix_len == 0) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
}
|
||||
|
||||
line_bgn = line_end + 1;
|
||||
}
|
||||
|
||||
while (prefix_len > 0) {
|
||||
bfr.Add(Close_list(prefix2[prefix_len - 1]));
|
||||
prefix_len--;
|
||||
if (prefix_len > 0) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
}
|
||||
if (last_section != Last_section__none) {
|
||||
bfr.Add(last_section == Last_section__para ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs);
|
||||
last_section = Last_section__none;
|
||||
}
|
||||
}
|
||||
// If a pre or p is open, return the corresponding close tag and update
|
||||
// the state. If no tag is open, return an empty String.
|
||||
public byte[] Close_paragraph() {
|
||||
byte[] result = Bry_.Empty;
|
||||
if (last_section != Last_section__none) {
|
||||
tmp.Add(last_section == Last_section__para ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs);
|
||||
result = tmp.Add_byte_nl().To_bry_and_clear();
|
||||
}
|
||||
in_pre = false;
|
||||
last_section = Last_section__none;
|
||||
return result;
|
||||
}
|
||||
|
||||
// getCommon() returns the length of the longest common substring
|
||||
// of both arguments, starting at the beginning of both.
|
||||
private int Get_common(byte[] st1, byte[] st2) {
|
||||
int st1_len = st1.length, st2_len = st2.length;
|
||||
int shorter = st1_len < st2_len ? st1_len : st2_len;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < shorter; i++) {
|
||||
if (st1[i] != st2[i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
// Open the list item element identified by the prefix character.
|
||||
private byte[] Open_list(byte c) {
|
||||
byte[] result = Close_paragraph();
|
||||
|
||||
if (c == Byte_ascii.Star)
|
||||
result = tmp.Add(result).Add_str_a7("<ul><li>").To_bry_and_clear();
|
||||
else if (c == Byte_ascii.Hash)
|
||||
result = tmp.Add(result).Add_str_a7("<ol><li>").To_bry_and_clear();
|
||||
else if (c == Byte_ascii.Hash)
|
||||
result = tmp.Add(result).Add_str_a7("<dl><dd>").To_bry_and_clear();
|
||||
else if (c == Byte_ascii.Semic) {
|
||||
result = tmp.Add(result).Add_str_a7("<dl><dt>").To_bry_and_clear();
|
||||
dt_open = true;
|
||||
}
|
||||
else
|
||||
result = tmp.Add_str_a7("<!-- ERR 1 -->").To_bry_and_clear();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Close the current list item and open the next one.
|
||||
private byte[] Next_item(byte c) {
|
||||
if (c == Byte_ascii.Star || c == Byte_ascii.Hash) {
|
||||
return tmp.Add_str_a7("</li>\n<li>").To_bry_and_clear();
|
||||
}
|
||||
else if (c == Byte_ascii.Colon || c == Byte_ascii.Semic) {
|
||||
byte[] close = tmp.Add_str_a7("</dd>\n").To_bry_and_clear();
|
||||
if (dt_open) {
|
||||
close = tmp.Add_str_a7("</dt>\n").To_bry_and_clear();
|
||||
}
|
||||
if (c == Byte_ascii.Semic) {
|
||||
dt_open = true;
|
||||
return tmp.Add(close).Add_str_a7("<dt>").To_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
dt_open = false;
|
||||
return tmp.Add(close).Add_str_a7("<dd>").To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
return tmp.Add_str_a7("<!-- ERR 2 -->").To_bry_and_clear();
|
||||
}
|
||||
|
||||
// Close the current list item identified by the prefix character.
|
||||
private byte[] Close_list(byte c) {
|
||||
byte[] text = null;
|
||||
if (c == Byte_ascii.Star) {
|
||||
text = Bry_.new_a7("</li></ul>");
|
||||
}
|
||||
else if (c == Byte_ascii.Hash) {
|
||||
text = Bry_.new_a7("</li></ol>");
|
||||
}
|
||||
else if (c == Byte_ascii.Colon) {
|
||||
if (dt_open) {
|
||||
dt_open = false;
|
||||
text = Bry_.new_a7("</dt></dl>");
|
||||
}
|
||||
else {
|
||||
text = Bry_.new_a7("</dd></dl>");
|
||||
}
|
||||
}
|
||||
else {
|
||||
return Bry_.new_a7("<!-- ERR 3 -->");
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
// Split up a String on ':', ignoring any occurrences inside tags
|
||||
// to prevent illegal overlapping.
|
||||
private int Find_colon_no_links(byte[] str, byte[] before, byte[] after) {
|
||||
int len = str.length;
|
||||
int colon_pos = Php_str_.Strpos(str, Byte_ascii.Colon, 0, len);
|
||||
if (colon_pos == Bry_find_.Not_found) {
|
||||
// Nothing to find!
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
|
||||
int lt_pos = Php_str_.Strpos(str, Byte_ascii.Angle_bgn, 0, len);
|
||||
if (lt_pos == Bry_find_.Not_found || lt_pos > colon_pos) {
|
||||
// Easy; no tag nesting to worry about
|
||||
find_colon_no_links__before = Php_str_.Substr(str, 0, colon_pos);
|
||||
find_colon_no_links__after = Php_str_.Substr(str, colon_pos + 1);
|
||||
return colon_pos;
|
||||
}
|
||||
|
||||
// Ugly state machine to walk through avoiding tags.
|
||||
int state = COLON_STATE_TEXT;
|
||||
int level = 0;
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte c = str[i];
|
||||
|
||||
switch (state) {
|
||||
case COLON_STATE_TEXT:
|
||||
switch (c) {
|
||||
case Byte_ascii.Angle_bgn:
|
||||
// Could be either a <start> tag or an </end> tag
|
||||
state = COLON_STATE_TAGSTART;
|
||||
break;
|
||||
case Byte_ascii.Colon:
|
||||
if (level == 0) {
|
||||
// We found it!
|
||||
find_colon_no_links__before = Php_str_.Substr(str, 0, i);
|
||||
find_colon_no_links__after = Php_str_.Substr(str, i + 1);
|
||||
return i;
|
||||
}
|
||||
// Embedded in a tag; don't break it.
|
||||
break;
|
||||
default:
|
||||
// Skip ahead looking for something interesting
|
||||
colon_pos = Php_str_.Strpos(str, Byte_ascii.Colon, i, len);
|
||||
if (colon_pos == Bry_find_.Not_found) {
|
||||
// Nothing else interesting
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
lt_pos = Php_str_.Strpos(str, Byte_ascii.Angle_bgn, i, len);
|
||||
if (level == 0) {
|
||||
if (lt_pos == Bry_find_.Not_found || colon_pos < lt_pos) {
|
||||
// We found it!
|
||||
find_colon_no_links__before = Php_str_.Substr(str, 0, colon_pos);
|
||||
find_colon_no_links__after = Php_str_.Substr(str, colon_pos + 1);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
if (lt_pos == Bry_find_.Not_found) {
|
||||
// Nothing else interesting to find; abort!
|
||||
// We're nested, but there's no close tags left. Abort!
|
||||
i = len; // break 2
|
||||
break;
|
||||
}
|
||||
// Skip ahead to next tag start
|
||||
i = lt_pos;
|
||||
state = COLON_STATE_TAGSTART;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_TAG:
|
||||
// In a <tag>
|
||||
switch (c) {
|
||||
case Byte_ascii.Angle_bgn:
|
||||
level++;
|
||||
state = COLON_STATE_TEXT;
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
// Slash may be followed by >?
|
||||
state = COLON_STATE_TAGSLASH;
|
||||
break;
|
||||
default:
|
||||
// ignore
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_TAGSTART:
|
||||
switch (c) {
|
||||
case Byte_ascii.Slash:
|
||||
state = COLON_STATE_CLOSETAG;
|
||||
break;
|
||||
case Byte_ascii.Bang:
|
||||
state = COLON_STATE_COMMENT;
|
||||
break;
|
||||
case Byte_ascii.Angle_bgn:
|
||||
// Illegal early close? This shouldn't happen D:
|
||||
state = COLON_STATE_TEXT;
|
||||
break;
|
||||
default:
|
||||
state = COLON_STATE_TAG;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_CLOSETAG:
|
||||
// In a </tag>
|
||||
if (c == Byte_ascii.Angle_bgn) {
|
||||
level--;
|
||||
if (level < 0) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; too many close tags");
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
state = COLON_STATE_TEXT;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_TAGSLASH:
|
||||
if (c == Byte_ascii.Angle_bgn) {
|
||||
// Yes, a self-closed tag <blah/>
|
||||
state = COLON_STATE_TEXT;
|
||||
}
|
||||
else {
|
||||
// Probably we're jumping the gun, and this is an attribute
|
||||
state = COLON_STATE_TAG;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_COMMENT:
|
||||
if (c == Byte_ascii.Dash) {
|
||||
state = COLON_STATE_COMMENTDASH;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_COMMENTDASH:
|
||||
if (c == Byte_ascii.Dash) {
|
||||
state = COLON_STATE_COMMENTDASHDASH;
|
||||
}
|
||||
else {
|
||||
state = COLON_STATE_COMMENT;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_COMMENTDASHDASH:
|
||||
if (c == Byte_ascii.Angle_bgn) {
|
||||
state = COLON_STATE_TEXT;
|
||||
}
|
||||
else {
|
||||
state = COLON_STATE_COMMENT;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw Err_.new_wo_type("State machine error");
|
||||
}
|
||||
}
|
||||
if (level > 0) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; not enough close tags (level ~{0}, state ~{1})", level, state);
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
private static final int
|
||||
COLON_STATE_TEXT = 0
|
||||
, COLON_STATE_TAG = 1
|
||||
, COLON_STATE_TAGSTART = 2
|
||||
, COLON_STATE_CLOSETAG = 3
|
||||
, COLON_STATE_TAGSLASH = 4
|
||||
, COLON_STATE_COMMENT = 5
|
||||
, COLON_STATE_COMMENTDASH = 6
|
||||
, COLON_STATE_COMMENTDASHDASH = 7
|
||||
;
|
||||
private static final byte
|
||||
Last_section__none = 0 // ''
|
||||
, Last_section__para = 1 // p
|
||||
, Last_section__pre = 2 // pre
|
||||
;
|
||||
private static final byte
|
||||
Para_stack__none = 0 // false
|
||||
, Para_stack__bgn = 1 // <p>
|
||||
, Para_stack__mid = 2 // </p><p>
|
||||
;
|
||||
private static final int Pre__bgn = 0, Pre__end = 1;
|
||||
private static Btrie_slim_mgr pre_trie;
|
||||
private static boolean[] block_chars_ary;
|
||||
private static boolean[] Block_chars_ary__new() {
|
||||
boolean[] rv = new boolean[256];
|
||||
rv[Byte_ascii.Star] = true;
|
||||
rv[Byte_ascii.Hash] = true;
|
||||
rv[Byte_ascii.Colon] = true;
|
||||
rv[Byte_ascii.Semic] = true;
|
||||
return rv;
|
||||
}
|
||||
private static Btrie_slim_mgr open_match_trie, close_match_trie, blockquote_trie;
|
||||
private static void Para_stack_bfr(Bry_bfr bfr, int id) {
|
||||
switch (id) {
|
||||
case Para_stack__bgn: bfr.Add_str_a7("<p>"); break;
|
||||
case Para_stack__mid: bfr.Add_str_a7("</p><p>"); break;
|
||||
default: throw Err_.new_unhandled_default(id);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
import gplx.xowa.mws.linkers.*;
|
||||
public class Xomw_block_level_pass__tst {
|
||||
private final Xomw_block_level_pass__fxt fxt = new Xomw_block_level_pass__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test__do_block_levels(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<p>a"
|
||||
, "</p>"
|
||||
));
|
||||
}
|
||||
}
|
||||
class Xomw_block_level_pass__fxt {
|
||||
private final Xomw_block_level_pass block_level_pass = new Xomw_block_level_pass();
|
||||
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private boolean apos = true;
|
||||
public void Test__do_block_levels(String src, String expd) {
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
block_level_pass.Do_block_levels(pctx, pbfr.Init(Bry_.new_u8(src)), true);
|
||||
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear());
|
||||
}
|
||||
}
|
250
400_xowa/src/gplx/xowa/mws/parsers/Xomw_link_holders.java
Normal file
250
400_xowa/src/gplx/xowa/mws/parsers/Xomw_link_holders.java
Normal file
@ -0,0 +1,250 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.langs.htmls.*;
|
||||
import gplx.xowa.mws.*;
|
||||
import gplx.xowa.mws.htmls.*;
|
||||
import gplx.xowa.mws.linkers.*;
|
||||
public class Xomw_link_holders {
|
||||
private final Xomw_link_renderer link_renderer;
|
||||
private final Bry_bfr tmp;
|
||||
private int link_id = 0; // MOVED:Parser.php
|
||||
private final Xomw_link_holder_list internals = new Xomw_link_holder_list();
|
||||
private final Xomwh_atr_mgr extra_atrs = new Xomwh_atr_mgr();
|
||||
public Xomw_link_holders(Xomw_link_renderer link_renderer, Bry_bfr tmp) {
|
||||
this.link_renderer = link_renderer;
|
||||
this.tmp = tmp;
|
||||
}
|
||||
public void Clear() {
|
||||
internals.Clear();
|
||||
|
||||
link_id = 0;
|
||||
}
|
||||
public void Make_holder(Bry_bfr bfr, Xoa_ttl nt, byte[] text, byte[][] query, byte[] trail, byte[] prefix) {
|
||||
if (nt == null) {
|
||||
// Fail gracefully
|
||||
bfr.Add_str_a7("<!-- ERROR -->").Add(prefix).Add(text).Add(trail);
|
||||
}
|
||||
else {
|
||||
// Separate the link trail from the rest of the link
|
||||
// list( $inside, $trail ) = Linker::splitTrail( $trail );
|
||||
byte[] inside = Bry_.Empty;
|
||||
|
||||
Xomw_link_holder_item entry = new Xomw_link_holder_item(nt, tmp.Add_bry_many(prefix, text, inside).To_bry_and_clear(), query);
|
||||
|
||||
boolean is_external = false; // $nt->isExternal()
|
||||
if (is_external) {
|
||||
// Use a globally unique ID to keep the objects mergable
|
||||
// $key = $this->parent->nextLinkID();
|
||||
// $this->interwikis[$key] = $entry;
|
||||
// $retVal = "<!--IWLINK $key-->{$trail}";
|
||||
}
|
||||
else {
|
||||
int key = link_id++;
|
||||
internals.Add(key, entry);
|
||||
bfr.Add(Bry__link__bgn).Add_int_variable(key).Add(Gfh_tag_.Comm_end).Add(trail); // "<!--LINK $ns:$key-->{$trail}";
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Test__add(Xoa_ttl ttl, byte[] capt) {
|
||||
int key = link_id++;
|
||||
Xomw_link_holder_item item = new Xomw_link_holder_item(ttl, capt, Bry_.Ary_empty);
|
||||
internals.Add(key, item);
|
||||
}
|
||||
public void Replace(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
this.Replace_internal(pbfr);
|
||||
// $this->replaceInterwiki( $text );
|
||||
}
|
||||
private void Replace_internal(Xomw_parser_bfr pbfr) {
|
||||
if (internals.Len() == 0)
|
||||
return;
|
||||
|
||||
// $colours = [];
|
||||
// $linkCache = LinkCache::singleton();
|
||||
// $output = $this->parent->getOutput();
|
||||
// $linkRenderer = $this->parent->getLinkRenderer();
|
||||
|
||||
// $linkcolour_ids = [];
|
||||
|
||||
// SKIP:Replace_internals does db lookup to identify redlinks;
|
||||
|
||||
// Construct search and replace arrays
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
int cur = src_bgn;
|
||||
int prv = 0;
|
||||
while (true) {
|
||||
int link_bgn = Bry_find_.Find_fwd(src, Bry__link__bgn, cur, src_end);
|
||||
if (link_bgn == Bry_find_.Not_found) {
|
||||
bfr.Add_mid(src, prv, src_end);
|
||||
break;
|
||||
}
|
||||
int key_bgn = link_bgn + Bry__link__bgn.length;
|
||||
int key_end = Bry_find_.Find_fwd_while_num(src, key_bgn, src_end);
|
||||
int link_key = Bry_.To_int_or(src, key_bgn, key_end, -1);
|
||||
Xomw_link_holder_item item = internals.Get_by(link_key);
|
||||
|
||||
// $pdbk = $entry['pdbk'];
|
||||
// $title = $entry['title'];
|
||||
// $query = isset( $entry['query'] ) ? $entry['query'] : [];
|
||||
// $key = "$ns:$index";
|
||||
// $searchkey = "<!--LINK $key-->";
|
||||
// $displayText = $entry['text'];
|
||||
// if ( isset( $entry['selflink'] ) ) {
|
||||
// $replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayText, $query );
|
||||
// continue;
|
||||
// }
|
||||
// if ( $displayText === '' ) {
|
||||
// $displayText = null;
|
||||
// } else {
|
||||
// $displayText = new HtmlArmor( $displayText );
|
||||
// }
|
||||
// if ( !isset( $colours[$pdbk] ) ) {
|
||||
// $colours[$pdbk] = 'new';
|
||||
// }
|
||||
// $attribs = [];
|
||||
// if ( $colours[$pdbk] == 'new' ) {
|
||||
// $linkCache->addBadLinkObj( $title );
|
||||
// $output->addLink( $title, 0 );
|
||||
// $link = $linkRenderer->makeBrokenLink(
|
||||
// $title, $displayText, $attribs, $query
|
||||
// );
|
||||
// } else {
|
||||
// $link = $linkRenderer->makePreloadedLink(
|
||||
// $title, $displayText, $colours[$pdbk], $attribs, $query
|
||||
// );
|
||||
// }
|
||||
|
||||
bfr.Add_mid(src, prv, link_bgn);
|
||||
link_renderer.Make_preloaded_link(bfr, item.Title(), item.Text(), Bry_.Empty, extra_atrs, Bry_.Empty);
|
||||
cur = key_end + Gfh_tag_.Comm_end_len;
|
||||
prv = cur;
|
||||
}
|
||||
}
|
||||
// private void Replace_internal__db() {
|
||||
// // Generate query
|
||||
// $lb = new LinkBatch();
|
||||
// $lb->setCaller( __METHOD__ );
|
||||
//
|
||||
// foreach ( $this->internals as $ns => $entries ) {
|
||||
// foreach ( $entries as $entry ) {
|
||||
// /** @var Title $title */
|
||||
// $title = $entry['title'];
|
||||
// $pdbk = $entry['pdbk'];
|
||||
//
|
||||
// # Skip invalid entries.
|
||||
// # Result will be ugly, but prevents crash.
|
||||
// if ( is_null( $title ) ) {
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// # Check if it's a static known link, e.g. interwiki
|
||||
// if ( $title->isAlwaysKnown() ) {
|
||||
// $colours[$pdbk] = '';
|
||||
// } elseif ( $ns == NS_SPECIAL ) {
|
||||
// $colours[$pdbk] = 'new';
|
||||
// } else {
|
||||
// $id = $linkCache->getGoodLinkID( $pdbk );
|
||||
// if ( $id != 0 ) {
|
||||
// $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
|
||||
// $output->addLink( $title, $id );
|
||||
// $linkcolour_ids[$id] = $pdbk;
|
||||
// } elseif ( $linkCache->isBadLink( $pdbk ) ) {
|
||||
// $colours[$pdbk] = 'new';
|
||||
// } else {
|
||||
// # Not in the link cache, add it to the query
|
||||
// $lb->addObj( $title );
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// if ( !$lb->isEmpty() ) {
|
||||
// $fields = array_merge(
|
||||
// LinkCache::getSelectFields(),
|
||||
// [ 'page_namespace', 'page_title' ]
|
||||
// );
|
||||
//
|
||||
// $res = $dbr->select(
|
||||
// 'page',
|
||||
// $fields,
|
||||
// $lb->constructSet( 'page', $dbr ),
|
||||
// __METHOD__
|
||||
// );
|
||||
//
|
||||
// # Fetch data and form into an associative array
|
||||
// # non-existent = broken
|
||||
// foreach ( $res as $s ) {
|
||||
// $title = Title::makeTitle( $s->page_namespace, $s->page_title );
|
||||
// $pdbk = $title->getPrefixedDBkey();
|
||||
// $linkCache->addGoodLinkObjFromRow( $title, $s );
|
||||
// $output->addLink( $title, $s->page_id );
|
||||
// $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
|
||||
// // add id to the extension todolist
|
||||
// $linkcolour_ids[$s->page_id] = $pdbk;
|
||||
// }
|
||||
// unset( $res );
|
||||
// }
|
||||
// if ( count( $linkcolour_ids ) ) {
|
||||
// // pass an array of page_ids to an extension
|
||||
// Hooks::run( 'GetLinkColours', [ $linkcolour_ids, &$colours ] );
|
||||
// }
|
||||
//
|
||||
// # Do a second query for different language variants of links and categories
|
||||
// if ( $wgContLang->hasVariants() ) {
|
||||
// $this->doVariants( $colours );
|
||||
// }
|
||||
// }
|
||||
|
||||
private static final byte[] Bry__link__bgn = Bry_.new_a7("<!--LINK ");
|
||||
}
|
||||
class Xomw_link_holder_list {
|
||||
private int ary_len = 0, ary_max = 128;
|
||||
private Xomw_link_holder_item[] ary = new Xomw_link_holder_item[128];
|
||||
public int Len() {return ary_len;}
|
||||
public void Clear() {
|
||||
ary_len = 0;
|
||||
if (ary_max > 128)
|
||||
ary = new Xomw_link_holder_item[128];
|
||||
}
|
||||
public void Add(int key, Xomw_link_holder_item item) {
|
||||
if (key >= ary_max) {
|
||||
int new_max = ary_max * 2;
|
||||
ary = (Xomw_link_holder_item[])Array_.Resize(ary, new_max);
|
||||
ary_max = new_max;
|
||||
}
|
||||
ary[key] = item;
|
||||
ary_len++;
|
||||
}
|
||||
public Xomw_link_holder_item Get_by(int key) {return ary[key];}
|
||||
}
|
||||
class Xomw_link_holder_item {
|
||||
public Xomw_link_holder_item(Xoa_ttl title, byte[] text, byte[][] query) {
|
||||
this.title = title;
|
||||
this.text = text;
|
||||
this.query = query;
|
||||
}
|
||||
public Xoa_ttl Title() {return title;} private final Xoa_ttl title;
|
||||
public byte[] Text() {return text;} private final byte[] text;
|
||||
public byte[] Pdbk() {return title.Get_prefixed_db_key();}
|
||||
public byte[][] Query() {return query;} private final byte[][] query;
|
||||
}
|
@ -0,0 +1,45 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
import gplx.xowa.mws.linkers.*;
|
||||
public class Xomw_link_holders__tst {
|
||||
private final Xomw_link_holders__fxt fxt = new Xomw_link_holders__fxt();
|
||||
@Test public void Replace__basic() {
|
||||
fxt.Init__add("A", "a");
|
||||
fxt.Test__replace("a <!--LINK 0--> b", "a <a href='/wiki/A' title='A'>a</a> b");
|
||||
}
|
||||
}
|
||||
class Xomw_link_holders__fxt {
|
||||
private final Xomw_link_holders holders = new Xomw_link_holders(new Xomw_link_renderer(), Bry_bfr_.New());
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private final Xowe_wiki wiki;
|
||||
private boolean apos = true;
|
||||
public Xomw_link_holders__fxt() {
|
||||
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||
this.wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
}
|
||||
public void Init__add(String ttl, String capt) {
|
||||
holders.Test__add(wiki.Ttl_parse(Bry_.new_u8(ttl)), Bry_.new_u8(capt));
|
||||
}
|
||||
public void Test__replace(String src, String expd) {
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
holders.Replace(new Xomw_parser_ctx(), pbfr.Init(Bry_.new_u8(src)));
|
||||
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear());
|
||||
}
|
||||
}
|
27
400_xowa/src/gplx/xowa/mws/parsers/Xomw_output_type.java
Normal file
27
400_xowa/src/gplx/xowa/mws/parsers/Xomw_output_type.java
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_output_type {
|
||||
public static final byte
|
||||
Tid__html = 1 // like parse()
|
||||
, Tid__wiki = 2 // like preSaveTransform()
|
||||
, Tid__preprocess = 3 // like preprocess()
|
||||
, Tid__msg = 3
|
||||
, Tid__plain = 4 // like extractSections() - portions of the original are returned unchanged.
|
||||
;
|
||||
}
|
257
400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser.java
Normal file
257
400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser.java
Normal file
@ -0,0 +1,257 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.core.btries.*; import gplx.core.net.*;
|
||||
import gplx.xowa.mws.parsers.prepros.*; import gplx.xowa.mws.parsers.headings.*;
|
||||
import gplx.xowa.mws.parsers.quotes.*; import gplx.xowa.mws.parsers.tables.*; import gplx.xowa.mws.parsers.hrs.*; import gplx.xowa.mws.parsers.nbsps.*;
|
||||
import gplx.xowa.mws.parsers.lnkes.*; import gplx.xowa.mws.parsers.lnkis.*;
|
||||
import gplx.xowa.mws.utls.*; import gplx.xowa.mws.linkers.*;
|
||||
public class Xomw_parser {
|
||||
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
private final Xomw_table_wkr table_wkr;
|
||||
private final Xomw_hr_wkr hr_wkr = new Xomw_hr_wkr();
|
||||
private final Xomw_lnke_wkr lnke_wkr;
|
||||
private final Xomw_nbsp_wkr nbsp_wkr = new Xomw_nbsp_wkr();
|
||||
private final Xomw_block_level_pass block_wkr = new Xomw_block_level_pass();
|
||||
private final Xomw_heading_wkr heading_wkr = new Xomw_heading_wkr();
|
||||
private final Xomw_link_renderer link_renderer = new Xomw_link_renderer();
|
||||
private final Xomw_link_holders holders;
|
||||
private final Xomw_heading_cbk__html heading_wkr_cbk;
|
||||
private final Btrie_slim_mgr protocols_trie;
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private int marker_index = 0;
|
||||
// private final Xomw_prepro_wkr prepro_wkr = new Xomw_prepro_wkr();
|
||||
public Xomw_strip_state Strip_state() {return strip_state;} private final Xomw_strip_state strip_state = new Xomw_strip_state();
|
||||
public Xomw_sanitizer Sanitizer() {return sanitizer;} private final Xomw_sanitizer sanitizer = new Xomw_sanitizer();
|
||||
public Xomw_linker Linker() {return linker;} private final Xomw_linker linker = new Xomw_linker();
|
||||
public Bry_bfr Tmp() {return tmp;} private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
public Xomw_quote_wkr Quote_wkr() {return quote_wkr;} private final Xomw_quote_wkr quote_wkr;
|
||||
public Xomw_lnki_wkr Lnki_wkr() {return lnki_wkr;} private final Xomw_lnki_wkr lnki_wkr;
|
||||
public boolean Output_type__wiki() {return output_type__wiki;} private final boolean output_type__wiki = false;
|
||||
public Xomw_parser() {
|
||||
this.protocols_trie = Xomw_parser.Protocols__dflt();
|
||||
this.holders = new Xomw_link_holders(link_renderer, tmp);
|
||||
this.table_wkr = new Xomw_table_wkr(this);
|
||||
this.quote_wkr = new Xomw_quote_wkr(this);
|
||||
this.lnke_wkr = new Xomw_lnke_wkr(this);
|
||||
this.lnki_wkr = new Xomw_lnki_wkr(this, holders, link_renderer, protocols_trie);
|
||||
this.heading_wkr_cbk = new Xomw_heading_cbk__html();
|
||||
}
|
||||
public void Init_by_wiki(Xowe_wiki wiki) {
|
||||
linker.Init_by_wiki(wiki.Lang().Lnki_trail_mgr().Trie());
|
||||
lnke_wkr.Init_by_wiki(protocols_trie);
|
||||
lnki_wkr.Init_by_wiki(wiki);
|
||||
}
|
||||
public void Internal_parse(Xomw_parser_bfr pbfr, byte[] text) {
|
||||
pbfr.Init(text);
|
||||
// $origText = text;
|
||||
|
||||
// MW.HOOK:ParserBeforeInternalParse
|
||||
|
||||
// if ($frame) {
|
||||
// use frame depth to infer how include/noinclude tags should be handled
|
||||
// depth=0 means this is the top-level document; otherwise it's an included document
|
||||
// boolean for_inclusion = false;
|
||||
// if (!$frame->depth) {
|
||||
// $flag = 0;
|
||||
// } else {
|
||||
// $flag = Parser::PTD_FOR_INCLUSION;
|
||||
// }
|
||||
// text = prepro_wkr.Preprocess_to_xml(text, for_inclusion);
|
||||
// text = $frame->expand($dom);
|
||||
// } else {
|
||||
// // if $frame is not provided, then use old-style replaceVariables
|
||||
// text = $this->replaceVariables(text);
|
||||
// }
|
||||
|
||||
// MW.HOOK:InternalParseBeforeSanitize
|
||||
// text = Sanitizer::removeHTMLtags(
|
||||
// text,
|
||||
// [ &$this, 'attributeStripCallback' ],
|
||||
// false,
|
||||
// array_keys($this->mTransparentTagHooks),
|
||||
// [],
|
||||
// [ &$this, 'addTrackingCategory' ]
|
||||
// );
|
||||
// MW.HOOK:InternalParseBeforeLinks
|
||||
|
||||
// Tables need to come after variable replacement for things to work
|
||||
// properly; putting them before other transformations should keep
|
||||
// exciting things like link expansions from showing up in surprising
|
||||
// places.
|
||||
table_wkr.Do_table_stuff(pctx, pbfr);
|
||||
hr_wkr.Replace_hrs(pctx, pbfr);
|
||||
|
||||
// text = $this->doDoubleUnderscore(text);
|
||||
|
||||
heading_wkr.Do_headings(pctx, pbfr, heading_wkr_cbk);
|
||||
lnki_wkr.Replace_internal_links(pctx, pbfr);
|
||||
quote_wkr.Do_all_quotes(pctx, pbfr);
|
||||
lnke_wkr.Replace_external_links(pctx, pbfr);
|
||||
|
||||
// replaceInternalLinks may sometimes leave behind
|
||||
// absolute URLs, which have to be masked to hide them from replaceExternalLinks
|
||||
Xomw_parser_bfr_.Replace(pbfr, Bry__marker__noparse, Bry_.Empty);
|
||||
|
||||
// $text = $this->doMagicLinks($text);
|
||||
// $text = $this->formatHeadings($text, $origText, $isMain);
|
||||
}
|
||||
|
||||
public void Internal_parse_half_parsed(Xomw_parser_bfr pbfr, boolean is_main, boolean line_start) {
|
||||
strip_state.Unstrip_general(pbfr);
|
||||
|
||||
// MW.HOOK:ParserAfterUnstrip
|
||||
|
||||
// Clean up special characters, only run once, next-to-last before doBlockLevels
|
||||
nbsp_wkr.Do_nbsp(pctx, pbfr);
|
||||
|
||||
block_wkr.Do_block_levels(pctx, pbfr, line_start);
|
||||
|
||||
lnki_wkr.Replace_link_holders(pctx, pbfr);
|
||||
|
||||
// The input doesn't get language converted if
|
||||
// a) It's disabled
|
||||
// b) Content isn't converted
|
||||
// c) It's a conversion table
|
||||
// d) it is an interface message (which is in the user language)
|
||||
// if ( !( $this->mOptions->getDisableContentConversion()
|
||||
// || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
|
||||
// ) {
|
||||
// if ( !$this->mOptions->getInterfaceMessage() ) {
|
||||
// // The position of the convert() call should not be changed. it
|
||||
// // assumes that the links are all replaced and the only thing left
|
||||
// // is the <nowiki> mark.
|
||||
// $text = $this->getConverterLanguage()->convert( $text );
|
||||
// }
|
||||
// }
|
||||
|
||||
strip_state.Unstrip_nowiki(pbfr);
|
||||
|
||||
// MW.HOOK:ParserBeforeTidy
|
||||
|
||||
// $text = $this->replaceTransparentTags( $text );
|
||||
strip_state.Unstrip_general(pbfr);
|
||||
|
||||
sanitizer.Normalize_char_references(pbfr);
|
||||
|
||||
// if ( MWTidy::isEnabled() ) {
|
||||
// if ( $this->mOptions->getTidy() ) {
|
||||
// $text = MWTidy::tidy( $text );
|
||||
// }
|
||||
// }
|
||||
// else {
|
||||
// attempt to sanitize at least some nesting problems
|
||||
// (T4702 and quite a few others)
|
||||
// $tidyregs = [
|
||||
// // ''Something [http://www.cool.com cool''] -->
|
||||
// // <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
|
||||
// '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
|
||||
// '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
|
||||
// // fix up an anchor inside another anchor, only
|
||||
// // at least for a single single nested link (T5695)
|
||||
// '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
|
||||
// '\\1\\2</a>\\3</a>\\1\\4</a>',
|
||||
// // fix div inside inline elements- doBlockLevels won't wrap a line which
|
||||
// // contains a div, so fix it up here; replace
|
||||
// // div with escaped text
|
||||
// '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
|
||||
// '\\1\\3<div\\5>\\6</div>\\8\\9',
|
||||
// // remove empty italic or bold tag pairs, some
|
||||
// // introduced by rules above
|
||||
// '/<([bi])><\/\\1>/' => '',
|
||||
// ];
|
||||
|
||||
// $text = preg_replace(
|
||||
// array_keys( $tidyregs ),
|
||||
// array_values( $tidyregs ),
|
||||
// $text );
|
||||
// }
|
||||
|
||||
// MW.HOOK:ParserAfterTidy
|
||||
}
|
||||
public byte[] Armor_links(Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
|
||||
// PORTED:preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/', self::MARKER_PREFIX . "NOPARSE$1", $text )
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
boolean dirty = false;
|
||||
boolean called_by_bry = trg == null;
|
||||
while (true) {
|
||||
// exit if EOS
|
||||
if (cur == src_end) {
|
||||
// if dirty, add rest of String
|
||||
if (dirty)
|
||||
trg.Add_mid(src, prv, src_end);
|
||||
break;
|
||||
}
|
||||
|
||||
// check if cur matches protocol
|
||||
Object protocol_obj = protocols_trie.Match_at(trv, src, cur, src_end);
|
||||
// no match; continue
|
||||
if (protocol_obj == null) {
|
||||
cur++;
|
||||
}
|
||||
// match; add to bfr
|
||||
else {
|
||||
dirty = true;
|
||||
byte[] protocol_bry = (byte[])protocol_obj;
|
||||
if (called_by_bry) trg = Bry_bfr_.New();
|
||||
trg.Add_bry_many(Xomw_strip_state.Bry__marker__bgn, Bry__noparse, protocol_bry);
|
||||
cur += protocol_bry.length;
|
||||
prv = cur;
|
||||
}
|
||||
}
|
||||
if (called_by_bry) {
|
||||
if (dirty)
|
||||
return trg.To_bry_and_clear();
|
||||
else {
|
||||
if (src_bgn == 0 && src_end == src.length)
|
||||
return src;
|
||||
else
|
||||
return Bry_.Mid(src, src_bgn, src_end);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (dirty)
|
||||
return null;
|
||||
else {
|
||||
trg.Add_mid(src, src_bgn, src_end);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
public byte[] Insert_strip_item(byte[] text) {
|
||||
tmp.Add_bry_many(Xomw_strip_state.Bry__marker__bgn, Bry__strip_state_item);
|
||||
tmp.Add_int_variable(marker_index);
|
||||
tmp.Add(Xomw_strip_state.Bry__marker__end);
|
||||
byte[] marker = tmp.To_bry_and_clear();
|
||||
marker_index++;
|
||||
strip_state.Add_general(marker, text);
|
||||
return marker;
|
||||
}
|
||||
private static final byte[] Bry__strip_state_item = Bry_.new_a7("-item-"), Bry__noparse = Bry_.new_a7("NOPARSE");
|
||||
private static final byte[] Bry__marker__noparse = Bry_.Add(Xomw_strip_state.Bry__marker__bgn, Bry__noparse);
|
||||
public static Btrie_slim_mgr Protocols__dflt() {
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7();
|
||||
Gfo_protocol_itm[] ary = Gfo_protocol_itm.Ary();
|
||||
for (Gfo_protocol_itm itm : ary) {
|
||||
byte[] key = itm.Text_bry(); // EX: "https://"
|
||||
rv.Add_obj(key, key);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
72
400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser__tst.java
Normal file
72
400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser__tst.java
Normal file
@ -0,0 +1,72 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_parser__tst {
|
||||
private final Xomw_parser__fxt fxt = new Xomw_parser__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "== heading_1 =="
|
||||
, "para_1"
|
||||
, "== heading_2 =="
|
||||
, "para_2"
|
||||
, "-----"
|
||||
, "{|"
|
||||
, "|-"
|
||||
, "|a"
|
||||
, "|}"
|
||||
, "''italics''"
|
||||
, "[https://a.org b]"
|
||||
, "[[A|abc]]"
|
||||
, "a »b«  !important c"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2> heading_1 </h2>"
|
||||
, "<p>para_1"
|
||||
, "</p>"
|
||||
, "<h2> heading_2 </h2>"
|
||||
, "<p>para_2"
|
||||
, "</p>"
|
||||
, "<hr />"
|
||||
, "<table>"
|
||||
, ""
|
||||
, "<tr>"
|
||||
, "<td>a"
|
||||
, "</td></tr></table>"
|
||||
, "<p><i>italics</i>"
|
||||
, "<a class=\"external text\" rel=\"nofollow\" href=\"https://a.org\">b</a>"
|
||||
, "<a href=\"/wiki/A\" title=\"A\">abc</a>"
|
||||
, "a »b«  !important c"
|
||||
, "</p>"
|
||||
));
|
||||
}
|
||||
}
|
||||
class Xomw_parser__fxt {
|
||||
private final Xomw_parser mgr = new Xomw_parser();
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
public Xomw_parser__fxt() {
|
||||
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
mgr.Init_by_wiki(wiki);
|
||||
}
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
mgr.Internal_parse(pbfr, src_bry);
|
||||
mgr.Internal_parse_half_parsed(pbfr, true, true);
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
48
400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser_bfr.java
Normal file
48
400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser_bfr.java
Normal file
@ -0,0 +1,48 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_parser_bfr { // manages 2 bfrs to eliminate multiple calls to new memory allocations ("return bfr.To_bry_and_clear()")
|
||||
private final Bry_bfr bfr_1 = Bry_bfr_.New(), bfr_2 = Bry_bfr_.New();
|
||||
private Bry_bfr src, trg;
|
||||
public Xomw_parser_bfr() {
|
||||
this.src = bfr_1;
|
||||
this.trg = bfr_2;
|
||||
}
|
||||
public Bry_bfr Src() {return src;}
|
||||
public Bry_bfr Trg() {return trg;}
|
||||
public Bry_bfr Rslt() {return src;}
|
||||
public Xomw_parser_bfr Init(byte[] text) {
|
||||
// resize each bfr once by guessing that html_len = text_len * 2
|
||||
int text_len = text.length;
|
||||
int html_len = text_len * 2;
|
||||
src.Resize(html_len);
|
||||
trg.Resize(html_len);
|
||||
|
||||
// clear and add
|
||||
src.Clear();
|
||||
trg.Clear();
|
||||
src.Add(text);
|
||||
return this;
|
||||
}
|
||||
public void Switch() {
|
||||
Bry_bfr tmp = src;
|
||||
this.src = trg;
|
||||
this.trg = tmp;
|
||||
trg.Clear();
|
||||
}
|
||||
}
|
69
400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser_bfr_.java
Normal file
69
400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser_bfr_.java
Normal file
@ -0,0 +1,69 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_parser_bfr_ {
|
||||
public static void Replace(Xomw_parser_bfr pbfr, byte[] find, byte[] repl) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
|
||||
if (Replace(bfr, Bool_.N, src, src_bgn, src_end, find, repl) != null)
|
||||
pbfr.Switch();
|
||||
}
|
||||
private static byte[] Replace(Bry_bfr bfr, boolean lone_bfr, byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) {
|
||||
boolean dirty = false;
|
||||
int cur = src_bgn;
|
||||
boolean called_by_bry = bfr == null;
|
||||
|
||||
while (true) {
|
||||
int find_bgn = Bry_find_.Find_fwd(src, find, cur);
|
||||
if (find_bgn == Bry_find_.Not_found) {
|
||||
if (dirty)
|
||||
bfr.Add_mid(src, cur, src_end);
|
||||
break;
|
||||
}
|
||||
if (called_by_bry) bfr = Bry_bfr_.New();
|
||||
bfr.Add_mid(src, cur, find_bgn);
|
||||
cur += find.length;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
if (dirty) {
|
||||
if (called_by_bry)
|
||||
return bfr.To_bry_and_clear();
|
||||
else
|
||||
return Bry_.Empty;
|
||||
}
|
||||
else {
|
||||
if (called_by_bry) {
|
||||
if (src_bgn == 0 && src_end == src.length)
|
||||
return src;
|
||||
else
|
||||
return Bry_.Mid(src, src_bgn, src_end);
|
||||
}
|
||||
else {
|
||||
if (lone_bfr)
|
||||
bfr.Add_mid(src, src_bgn, src_end);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -15,13 +15,13 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
import gplx.xowa.parsers.mws.utils.*;
|
||||
import gplx.xowa.parsers.uniqs.*;
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_parser_ctx {
|
||||
public Xomw_sanitizer_mgr Sanitizer() {return sanitizer;} private final Xomw_sanitizer_mgr sanitizer = new Xomw_sanitizer_mgr();
|
||||
public Xop_uniq_mgr Uniq_mgr() {return uniq_mgr;} private final Xop_uniq_mgr uniq_mgr = new Xop_uniq_mgr();
|
||||
public Xoa_ttl Page_title() {return page_title;} private Xoa_ttl page_title;
|
||||
|
||||
public void Init_by_page(Xoa_ttl page_title) {
|
||||
this.page_title = page_title;
|
||||
}
|
||||
|
||||
public static final int Pos__bos = -1;
|
||||
}
|
139
400_xowa/src/gplx/xowa/mws/parsers/Xomw_strip_state.java
Normal file
139
400_xowa/src/gplx/xowa/mws/parsers/Xomw_strip_state.java
Normal file
@ -0,0 +1,139 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xomw_strip_state { // REF.MW:/parser/StripState.php
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final Bry_bfr tmp_1 = Bry_bfr_.New();
|
||||
private final Bry_bfr tmp_2 = Bry_bfr_.New();
|
||||
private boolean tmp_2_used = false;
|
||||
private int general_len, nowiki_len;
|
||||
public void Clear() {
|
||||
trie.Clear();
|
||||
general_len = nowiki_len = 0;
|
||||
tmp_2_used = false;
|
||||
}
|
||||
public void Add_general(byte[] marker, byte[] val) {Add_item(Tid__general, marker, val);}
|
||||
public void Add_nowiki (byte[] marker, byte[] val) {Add_item(Tid__nowiki, marker, val);}
|
||||
public void Add_item(byte tid, byte[] marker, byte[] val) {
|
||||
trie.Add_obj(marker, new Xomw_strip_item(tid, marker, val));
|
||||
if (tid == Tid__general)
|
||||
general_len++;
|
||||
else
|
||||
nowiki_len++;
|
||||
}
|
||||
public byte[] Unstrip_general(byte[] text) {return Unstrip(Tid__general, text);}
|
||||
public byte[] Unstrip_nowiki (byte[] text) {return Unstrip(Tid__nowiki , text);}
|
||||
public byte[] Unstrip_both (byte[] text) {return Unstrip(Tid__both , text);}
|
||||
public byte[] Unstrip(byte tid, byte[] text) {
|
||||
boolean dirty = Unstrip(tid, tmp_1, text, 0, text.length);
|
||||
return dirty ? tmp_1.To_bry_and_clear() : text;
|
||||
}
|
||||
public void Unstrip_general(Xomw_parser_bfr pbfr) {Unstrip(Tid__general, pbfr);}
|
||||
public void Unstrip_nowiki (Xomw_parser_bfr pbfr) {Unstrip(Tid__nowiki , pbfr);}
|
||||
public void Unstrip_both (Xomw_parser_bfr pbfr) {Unstrip(Tid__both , pbfr);}
|
||||
private boolean Unstrip(byte tid, Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
boolean dirty = Unstrip(tid, pbfr.Trg(), src, 0, src_bfr.Len());
|
||||
if (dirty)
|
||||
pbfr.Switch();
|
||||
return dirty;
|
||||
}
|
||||
private boolean Unstrip(byte tid, Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
|
||||
// exit early if no items for type
|
||||
if ((tid & Tid__general) == Tid__general) {
|
||||
if (general_len == 0)
|
||||
return false;
|
||||
}
|
||||
else if ((tid & Tid__nowiki) == Tid__nowiki) {
|
||||
if (nowiki_len == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
boolean dirty = false;
|
||||
// loop over each src char
|
||||
while (true) {
|
||||
// EOS: exit
|
||||
if (cur == src_end) {
|
||||
if (dirty) // add remainder if dirty
|
||||
trg.Add_mid(src, prv, src_end);
|
||||
break;
|
||||
}
|
||||
|
||||
// check if current pos matches strip state
|
||||
Object o = trie.Match_at(trv, src, cur, src_end);
|
||||
if (o != null) { // match
|
||||
Xomw_strip_item item = (Xomw_strip_item)o;
|
||||
byte item_tid = item.Tid();
|
||||
if ((tid & item_tid) == item_tid) { // check if types match
|
||||
// get bfr for recursion
|
||||
Bry_bfr nested_bfr = null;
|
||||
boolean tmp_2_release = false;
|
||||
if (tmp_2_used) {
|
||||
nested_bfr = Bry_bfr_.New();
|
||||
}
|
||||
else {
|
||||
nested_bfr = tmp_2;
|
||||
tmp_2_used = true;
|
||||
tmp_2_release = true;
|
||||
}
|
||||
|
||||
// recurse
|
||||
byte[] item_val = item.Val();
|
||||
if (Unstrip(tid, nested_bfr, item_val, 0, item_val.length))
|
||||
item_val = nested_bfr.To_bry_and_clear();
|
||||
if (tmp_2_release)
|
||||
tmp_2_used = false;
|
||||
|
||||
// add to trg
|
||||
trg.Add_mid(src, prv, cur);
|
||||
trg.Add(item_val);
|
||||
|
||||
// update vars
|
||||
dirty = true;
|
||||
cur += item.Key().length;
|
||||
prv = cur;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
cur++;
|
||||
}
|
||||
return dirty;
|
||||
}
|
||||
public static final String Str__marker_bgn = "\u007f'\"`UNIQ-";
|
||||
public static final byte[]
|
||||
Bry__marker__bgn = Bry_.new_a7(Str__marker_bgn)
|
||||
, Bry__marker__end = Bry_.new_a7("-QINU`\"'\u007f")
|
||||
;
|
||||
public static final byte Tid__general = 1, Tid__nowiki = 2, Tid__both = 3;
|
||||
}
|
||||
class Xomw_strip_item {
|
||||
public Xomw_strip_item(byte tid, byte[] key, byte[] val) {
|
||||
this.tid = tid;
|
||||
this.key = key;
|
||||
this.val = val;
|
||||
}
|
||||
public byte Tid() {return tid;} private final byte tid;
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public byte[] Val() {return val;} private final byte[] val;
|
||||
}
|
@ -0,0 +1,44 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xomw_strip_state__tst {
|
||||
private final Xomw_strip_state__fxt fxt = new Xomw_strip_state__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
|
||||
fxt.Test__nostrip(Xomw_strip_state.Tid__nowiki , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b");
|
||||
fxt.Test__unstrip(Xomw_strip_state.Tid__general, "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
|
||||
fxt.Test__unstrip(Xomw_strip_state.Tid__both , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
|
||||
}
|
||||
@Test public void Recurse() {
|
||||
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
|
||||
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-2-QINU`\"'\u007f", "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f");
|
||||
fxt.Test__unstrip(Xomw_strip_state.Tid__general, "a \u007f'\"`UNIQ-key-2-QINU`\"'\u007f b", "a val-1 b");
|
||||
}
|
||||
}
|
||||
class Xomw_strip_state__fxt {
|
||||
private final Xomw_strip_state strip_state = new Xomw_strip_state();
|
||||
public void Init__add(byte tid, String marker, String val) {
|
||||
strip_state.Add_item(tid, Bry_.new_u8(marker), Bry_.new_u8(val));
|
||||
}
|
||||
public void Test__nostrip(byte tid, String src) {Test__unstrip(tid, src, src);}
|
||||
public void Test__unstrip(byte tid, String src, String expd) {
|
||||
byte[] actl = strip_state.Unstrip(tid, Bry_.new_u8(src));
|
||||
Gftest.Eq__str(expd, String_.new_u8(actl));
|
||||
}
|
||||
}
|
@ -0,0 +1,84 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.doubleunders; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
class Xomw_doubleunder_wkr {
|
||||
public boolean show_toc;
|
||||
public boolean force_toc_position;
|
||||
public boolean output__no_gallery ;
|
||||
public Xomw_doubleunder_data doubleunderscore_data = new Xomw_doubleunder_data();
|
||||
private void Match_and_remove(byte[] text, Xomw_doubleunder_data doubleunderscore_data) {
|
||||
doubleunderscore_data.Reset();
|
||||
}
|
||||
public void Do_double_underscore(byte[] text) {
|
||||
// The position of __TOC__ needs to be recorded
|
||||
// $mw = MagicWord::get( 'toc' );
|
||||
// if ( $mw->match( $text ) ) {
|
||||
this.show_toc = true;
|
||||
this.force_toc_position = true;
|
||||
|
||||
// Set a placeholder. At the end we'll fill it in with the TOC.
|
||||
// $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
|
||||
|
||||
// Only keep the first one.
|
||||
// $text = $mw->replace( '', $text );
|
||||
// }
|
||||
|
||||
// Now match and remove the rest of them
|
||||
// $mwa = MagicWord::getDoubleUnderscoreArray();
|
||||
Match_and_remove(text, doubleunderscore_data);
|
||||
|
||||
if (doubleunderscore_data.no_gallery) {
|
||||
output__no_gallery = true;
|
||||
}
|
||||
if (doubleunderscore_data.no_toc && !force_toc_position) {
|
||||
this.show_toc = false;
|
||||
}
|
||||
if ( doubleunderscore_data.hidden_cat
|
||||
// && $this->mTitle->getNamespace() == NS_CATEGORY
|
||||
) {
|
||||
//$this->addTrackingCategory( 'hidden-category-category' );
|
||||
}
|
||||
// (T10068) Allow control over whether robots index a page.
|
||||
// __INDEX__ always overrides __NOINDEX__, see T16899
|
||||
if (doubleunderscore_data.no_index // && $this->mTitle->canUseNoindex()
|
||||
) {
|
||||
// $this->mOutput->setIndexPolicy( 'noindex' );
|
||||
// $this->addTrackingCategory( 'noindex-category' );
|
||||
}
|
||||
if (doubleunderscore_data.index //&& $this->mTitle->canUseNoindex()
|
||||
) {
|
||||
// $this->mOutput->setIndexPolicy( 'index' );
|
||||
// $this->addTrackingCategory( 'index-category' );
|
||||
}
|
||||
|
||||
// Cache all double underscores in the database
|
||||
// foreach ( $this->mDoubleUnderscores as $key => $val ) {
|
||||
// $this->mOutput->setProperty( $key, '' );
|
||||
// }
|
||||
}
|
||||
}
|
||||
class Xomw_doubleunder_data {
|
||||
public boolean no_gallery;
|
||||
public boolean no_toc;
|
||||
public boolean hidden_cat;
|
||||
public boolean no_index;
|
||||
public boolean index;
|
||||
public void Reset() {
|
||||
no_gallery = no_toc = hidden_cat = no_index = index = false;
|
||||
}
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public interface Xomw_heading_cbk {
|
||||
void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr);
|
||||
void On_src_done(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr);
|
||||
}
|
@ -15,10 +15,14 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
public class Xomw_hdr_cbk__html implements Xomw_hdr_cbk {
|
||||
public Bry_bfr Bfr() {return bfr;} private final Bry_bfr bfr = Bry_bfr_.New();
|
||||
public void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_hdr_wkr wkr) {
|
||||
package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_heading_cbk__html implements Xomw_heading_cbk {
|
||||
public Bry_bfr Bfr() {return bfr;} private Bry_bfr bfr;
|
||||
public Xomw_heading_cbk__html Bfr_(Bry_bfr bfr) {
|
||||
this.bfr = bfr;
|
||||
return this;
|
||||
}
|
||||
public void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr) {
|
||||
// add from txt_bgn to hdr_bgn; EX: "abc\n==A==\n"; "\n==" seen -> add "abc"
|
||||
byte[] src = wkr.Src();
|
||||
int hdr_bgn = wkr.Hdr_bgn(), txt_bgn = wkr.Txt_bgn();
|
||||
@ -34,7 +38,7 @@ public class Xomw_hdr_cbk__html implements Xomw_hdr_cbk {
|
||||
bfr.Add_mid(wkr.Src(), wkr.Hdr_lhs_end(), wkr.Hdr_rhs_bgn());
|
||||
bfr.Add(Tag__rhs).Add_int_digits(1, hdr_num).Add(Byte_ascii.Angle_end_bry); // </h2>
|
||||
}
|
||||
public void On_src_done(Xomw_parser_ctx pctx, Xomw_hdr_wkr wkr) {
|
||||
public void On_src_done(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr) {
|
||||
// add from txt_bgn to EOS;
|
||||
byte[] src = wkr.Src();
|
||||
int txt_bgn = wkr.Txt_bgn(), src_end = wkr.Src_end();
|
@ -15,11 +15,11 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xomw_hdr_wkr {
|
||||
public class Xomw_heading_wkr {
|
||||
private Xomw_parser_ctx pctx;
|
||||
private Xomw_hdr_cbk cbk;
|
||||
private Xomw_heading_cbk cbk;
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public int Txt_bgn() {return txt_bgn;} private int txt_bgn;
|
||||
@ -30,13 +30,27 @@ public class Xomw_hdr_wkr {
|
||||
public int Hdr_lhs_end() {return hdr_lhs_end;} private int hdr_lhs_end;
|
||||
public int Hdr_rhs_bgn() {return hdr_rhs_bgn;} private int hdr_rhs_bgn;
|
||||
public int Hdr_rhs_end() {return hdr_rhs_end;} private int hdr_rhs_end;
|
||||
public void Parse(Xomw_parser_ctx pctx, byte[] src, int src_bgn, int src_end, Xomw_hdr_cbk cbk) { // REF.MW: /includes/parser/Parser.php|doHeadings
|
||||
public void Do_headings(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, Xomw_heading_cbk__html cbk) {
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src_bry = src_bfr.Bfr();
|
||||
int src_end = src_bfr.Len();
|
||||
cbk.Bfr_(pbfr.Trg());
|
||||
pbfr.Switch();
|
||||
Parse(pctx, src_bry, 0, src_end, cbk);
|
||||
}
|
||||
public void Parse(Xomw_parser_ctx pctx, byte[] src, int src_bgn, int src_end, Xomw_heading_cbk cbk) { // REF.MW: /includes/parser/Parser.php|doHeadings
|
||||
// init members
|
||||
this.pctx = pctx;
|
||||
this.src = src;
|
||||
this.src_end = src_end;
|
||||
this.cbk = cbk;
|
||||
|
||||
// PORTED:
|
||||
// for ( $i = 6; $i >= 1; --$i ) {
|
||||
// $h = str_repeat( '=', $i );
|
||||
// $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
|
||||
// }
|
||||
|
||||
// do loop
|
||||
int pos = src_bgn;
|
||||
this.txt_bgn = pos == Xomw_parser_ctx.Pos__bos ? 0 : pos;
|
||||
@ -92,7 +106,3 @@ public class Xomw_hdr_wkr {
|
||||
return nl_rhs;
|
||||
}
|
||||
}
|
||||
// for ( $i = 6; $i >= 1; --$i ) {
|
||||
// $h = str_repeat( '=', $i );
|
||||
// $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
|
||||
// }
|
@ -15,10 +15,10 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_hdr_wkr_tst {
|
||||
private final Xomw_hdr_wkr_fxt fxt = new Xomw_hdr_wkr_fxt();
|
||||
public class Xomw_heading_wkr__tst {
|
||||
private final Xomw_heading_wkr__fxt fxt = new Xomw_heading_wkr__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test__parse("==A==" , "<h2>A</h2>");
|
||||
fxt.Test__parse("abc\n==A==\ndef" , "abc\n<h2>A</h2>\ndef");
|
||||
@ -28,10 +28,11 @@ public class Xomw_hdr_wkr_tst {
|
||||
fxt.Test__parse("abc\n==" , "abc\n<h1></h1>");
|
||||
}
|
||||
}
|
||||
class Xomw_hdr_wkr_fxt {
|
||||
private final Xomw_hdr_wkr wkr = new Xomw_hdr_wkr();
|
||||
private final Xomw_hdr_cbk__html cbk = new Xomw_hdr_cbk__html();
|
||||
class Xomw_heading_wkr__fxt {
|
||||
private final Xomw_heading_wkr wkr = new Xomw_heading_wkr();
|
||||
private final Xomw_heading_cbk__html cbk = new Xomw_heading_cbk__html().Bfr_(Bry_bfr_.New());
|
||||
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Parse(pctx, src_bry, -1, src_bry.length, cbk);
|
81
400_xowa/src/gplx/xowa/mws/parsers/hrs/Xomw_hr_wkr.java
Normal file
81
400_xowa/src/gplx/xowa/mws/parsers/hrs/Xomw_hr_wkr.java
Normal file
@ -0,0 +1,81 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.hrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.langs.phps.utls.*;
|
||||
public class Xomw_hr_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private Bry_bfr bfr;
|
||||
public void Replace_hrs(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) { // REF.MW: text = preg_replace('/(^|\n)-----*/', '\\1<hr />', text);
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
this.bfr = pbfr.Trg();
|
||||
|
||||
boolean dirty = false;
|
||||
|
||||
// do separate check for "-----" at start of String;
|
||||
int cur = 0;
|
||||
if (Bry_.Eq(src, 0, Len__wtxt__hr__bos, Bry__wtxt__hr__bos)) {
|
||||
cur = Replace_hr(Bool_.N, src, src_bgn, src_end, 0, Len__wtxt__hr__bos);
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
// loop
|
||||
while (true) {
|
||||
// find next "\n-----"
|
||||
int find_bgn = Bry_find_.Find_fwd(src, Bry__wtxt__hr__mid, cur, src_end);
|
||||
|
||||
// nothing found; exit
|
||||
if (find_bgn == Bry_find_.Not_found) {
|
||||
if (dirty) {
|
||||
bfr.Add_mid(src, cur, src_end);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// something found
|
||||
cur = Replace_hr(Bool_.Y, src, cur, src_end, find_bgn, Len__wtxt__hr__mid);
|
||||
dirty = true;
|
||||
}
|
||||
if (dirty)
|
||||
pbfr.Switch();
|
||||
}
|
||||
private int Replace_hr(boolean mid, byte[] src, int cur, int src_end, int find_bgn, int tkn_len) {
|
||||
// something found; add to bfr
|
||||
if (mid) {
|
||||
bfr.Add_mid(src, cur, find_bgn); // add everything before "\n-----"
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
bfr.Add(Bry__html__hr);
|
||||
|
||||
// set dirty / cur and continue
|
||||
cur = find_bgn + tkn_len;
|
||||
cur = Bry_find_.Find_fwd_while(src, cur, src_end, Byte_ascii.Dash); // gobble up trailing "-"; the "*" in "-----*" from the regex above
|
||||
return cur;
|
||||
}
|
||||
private static final byte[]
|
||||
Bry__wtxt__hr__mid = Bry_.new_a7("\n-----")
|
||||
, Bry__wtxt__hr__bos = Bry_.new_a7("-----")
|
||||
, Bry__html__hr = Bry_.new_a7("<hr />")
|
||||
;
|
||||
private static final int
|
||||
Len__wtxt__hr__mid = Bry__wtxt__hr__mid.length
|
||||
, Len__wtxt__hr__bos = Bry__wtxt__hr__bos.length
|
||||
;
|
||||
}
|
36
400_xowa/src/gplx/xowa/mws/parsers/hrs/Xomw_hr_wkr__tst.java
Normal file
36
400_xowa/src/gplx/xowa/mws/parsers/hrs/Xomw_hr_wkr__tst.java
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.hrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_hr_wkr__tst {
|
||||
private final Xomw_hr_wkr__fxt fxt = new Xomw_hr_wkr__fxt();
|
||||
@Test public void Basic() {fxt.Test__parse("a\n-----b" , "a\n<hr />b");}
|
||||
@Test public void Extend() {fxt.Test__parse("a\n------b" , "a\n<hr />b");}
|
||||
@Test public void Not_found() {fxt.Test__parse("a\n----b" , "a\n----b");}
|
||||
@Test public void Bos() {fxt.Test__parse("-----a" , "<hr />a");}
|
||||
@Test public void Bos_and_mid() {fxt.Test__parse("-----a\n-----b" , "<hr />a\n<hr />b");}
|
||||
}
|
||||
class Xomw_hr_wkr__fxt {
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private final Xomw_hr_wkr wkr = new Xomw_hr_wkr();
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Replace_hrs(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
282
400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java
Normal file
282
400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java
Normal file
@ -0,0 +1,282 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
import gplx.langs.phps.utls.*;
|
||||
import gplx.xowa.mws.htmls.*;
|
||||
// TODO.XO: add proto-rel; EX: [//a.org b]
|
||||
public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private final Bry_bfr tmp;
|
||||
private Btrie_slim_mgr protocol_trie; private final Btrie_rv trv = new Btrie_rv();
|
||||
private int autonumber;
|
||||
private final Xomw_linker linker;
|
||||
private final Xomwh_atr_mgr attribs = new Xomwh_atr_mgr();
|
||||
public Xomw_lnke_wkr(Xomw_parser mgr) {
|
||||
this.tmp = mgr.Tmp();
|
||||
this.linker = mgr.Linker();
|
||||
}
|
||||
public void Init_by_wiki(Btrie_slim_mgr protocol_trie) {
|
||||
this.protocol_trie = protocol_trie;
|
||||
}
|
||||
public void Replace_external_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
int cur = src_bgn;
|
||||
this.autonumber = 1;
|
||||
|
||||
// find regex
|
||||
int prv = 0;
|
||||
while (true) {
|
||||
// PORTED.BGN: $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
|
||||
|
||||
// $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
|
||||
// self::EXT_LINK_ADDR .
|
||||
// self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
|
||||
//
|
||||
// REGEX: "[" + "protocol" + "url-char"* + "space"* + "text"* + "]";
|
||||
// protocol -> ((?i)' . $this->mUrlProtocols . ') -> "http://", "HTTps://"
|
||||
// url-char* -> (EXT_LINK_ADDR . EXT_LINK_URL_CLASS*) -> "255.255.255.255", "a.b.c"; NOTE: "http:///" is valid
|
||||
// space* -> \p{Zs}*
|
||||
// text -> ([^\]\\x00-\\x08\\x0a-\\x1F]*?) -> "abcd"
|
||||
// NOTE: /S=extra analysis of pattern /u = unicode support; REF.MW:http://php.net/manual/en/reference.pcre.pattern.modifiers.php
|
||||
|
||||
// Simplified expression to match an IPv4 or IPv6 address, or
|
||||
// at least one character of a host name (embeds EXT_LINK_URL_CLASS)
|
||||
// static final EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
|
||||
//
|
||||
// REGEX: "IPv4" | "IPv6" | "url-char"
|
||||
// IPv4 -> [0-9.]+ -> "255."
|
||||
// IPv6 -> \\[(?i:[0-9a-f:.]+)\\] -> "2001:"
|
||||
// url-char -> [^][<>"\\x00-\\x20\\x7F\p{Zs}] -> "abcde"
|
||||
|
||||
// Constants needed for external link processing
|
||||
// Everything except bracket, space, or control characters
|
||||
// \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
|
||||
// as well as U+3000 is IDEOGRAPHIC SPACE for T21052
|
||||
// static final EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
|
||||
//
|
||||
// REGEX: NOT [ "symbols" | "control" | "whitespace" ]
|
||||
// symbols -> ^][<>"
|
||||
// control -> \\x00-\\x20\\x7F
|
||||
// whitespace -> \p{Zs}
|
||||
|
||||
// search for "["
|
||||
int lnke_bgn = Bry_find_.Find_fwd(src, Byte_ascii.Brack_bgn, cur, src_end);
|
||||
if (lnke_bgn == Bry_find_.Not_found) {
|
||||
bfr.Add_mid(src, cur, src_end);
|
||||
break; // no more "["; stop
|
||||
}
|
||||
|
||||
// check for protocol; EX: "https://"
|
||||
cur = lnke_bgn + 1;
|
||||
int url_bgn = cur;
|
||||
Object protocol_bry = protocol_trie.Match_at(trv, src, cur, src_end);
|
||||
if (protocol_bry == null) {
|
||||
bfr.Add_mid(src, prv, cur);
|
||||
prv = cur;
|
||||
continue;// unknown protocol; ignore "["
|
||||
}
|
||||
cur += ((byte[])protocol_bry).length;
|
||||
|
||||
// check for one-or-more url chars; [^][<>"\\x00-\\x20\\x7F\p{Zs}]
|
||||
int domain_bgn = cur;
|
||||
while (true) {
|
||||
byte b = src[cur];
|
||||
Object url_char_byte = invalid_url_chars_trie.Match_at_w_b0(trv, b, src, cur, src_end);
|
||||
if (url_char_byte == null)
|
||||
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (cur - domain_bgn == 0) {
|
||||
bfr.Add_mid(src, prv, cur);
|
||||
prv = cur;
|
||||
continue; // no chars found; invalid; EX: "[https://"abcde"]"
|
||||
}
|
||||
int url_end = cur;
|
||||
|
||||
// get ws (if any)
|
||||
int ws_bgn = -1;
|
||||
while (true) {
|
||||
Object space_byte = space_chars_trie.Match_at(trv, src, cur, src_end);
|
||||
if (space_byte == null) break;
|
||||
if (ws_bgn == -1) ws_bgn = cur;
|
||||
cur += ((Int_obj_val)space_byte).Val();
|
||||
}
|
||||
|
||||
// get text (if any)
|
||||
int text_bgn = -1, text_end = -1;
|
||||
while (true) {
|
||||
byte b = src[cur];
|
||||
Object invalid_text_char = invalid_text_chars_trie.Match_at_w_b0(trv, b, src, cur, src_end);
|
||||
if (invalid_text_char != null) break;
|
||||
if (text_bgn == -1) text_bgn = cur;
|
||||
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||
text_end = cur;
|
||||
}
|
||||
|
||||
// check for "]"
|
||||
if (src[cur] != Byte_ascii.Brack_end) {
|
||||
bfr.Add_mid(src, prv, cur);
|
||||
prv = cur;
|
||||
continue;
|
||||
}
|
||||
cur++;
|
||||
// PORTED.END: $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
|
||||
|
||||
// The characters '<' and '>' (which were escaped by
|
||||
// removeHTMLtags()) should not be included in
|
||||
// URLs, per RFC 2396.
|
||||
// TODO.XO:
|
||||
//$m2 = [];
|
||||
//if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
|
||||
// $text = substr( $url, $m2[0][1] ) . ' ' . $text;
|
||||
// $url = substr( $url, 0, $m2[0][1] );
|
||||
//}
|
||||
|
||||
// If the link text is an image URL, replace it with an <img> tag
|
||||
// This happened by accident in the original parser, but some people used it extensively
|
||||
// TODO.XO:
|
||||
//$img = $this->maybeMakeExternalImage( $text );
|
||||
//if ( $img !== false ) {
|
||||
// $text = $img;
|
||||
//}
|
||||
//
|
||||
//$dtrail = '';
|
||||
|
||||
// Set linktype for CSS - if URL==text, link is essentially free
|
||||
boolean text_missing = text_bgn == -1;
|
||||
byte[] link_type = text_missing ? Link_type__free : Link_type__text;
|
||||
|
||||
// No link text, e.g. [http://domain.tld/some.link]
|
||||
if (text_missing) {
|
||||
// Autonumber; EX: "[123]"
|
||||
tmp.Add_byte(Byte_ascii.Brack_bgn);
|
||||
tmp.Add_int_variable(autonumber++); // TODO.XO:$langObj->formatNum( ++$this->mAutonumber );
|
||||
tmp.Add_byte(Byte_ascii.Brack_end);
|
||||
link_type = Link_type__autonumber;
|
||||
}
|
||||
else {
|
||||
// Have link text, e.g. [http://domain.tld/some.link text]s
|
||||
// Check for trail
|
||||
// TODO.XO:
|
||||
// list( $dtrail, $trail ) = Linker::splitTrail( $trail );
|
||||
}
|
||||
|
||||
// TODO.XO:
|
||||
// $text = $this->getConverterLanguage()->markNoConversion( $text );
|
||||
|
||||
// TODO.XO:
|
||||
// $url = Sanitizer::cleanUrl( $url );
|
||||
|
||||
bfr.Add_mid(src, prv, lnke_bgn);
|
||||
prv = cur;
|
||||
// Use the encoded URL
|
||||
// This means that users can paste URLs directly into the text
|
||||
// Funny characters like <EFBFBD> aren't valid in URLs anyway
|
||||
// This was changed in August 2004
|
||||
// TODO.XO:getExternalLinkAttribs
|
||||
attribs.Clear();
|
||||
linker.Make_external_link(bfr, Bry_.Mid(src, url_bgn, url_end), Bry_.Mid(src, text_bgn, text_end), Bool_.N, link_type, attribs, Bry_.Empty);
|
||||
|
||||
// Register link in the output Object.
|
||||
// Replace unnecessary URL escape codes with the referenced character
|
||||
// This prevents spammers from hiding links from the filters
|
||||
// $pasteurized = self::normalizeLinkUrl( $url );
|
||||
// $this->mOutput->addExternalLink( $pasteurized );
|
||||
}
|
||||
}
|
||||
// public function getExternalLinkAttribs( $url ) {
|
||||
// $attribs = [];
|
||||
// $rel = self::getExternalLinkRel( $url, $this->mTitle );
|
||||
//
|
||||
// $target = $this->mOptions->getExternalLinkTarget();
|
||||
// if ( $target ) {
|
||||
// $attribs['target'] = $target;
|
||||
// if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
|
||||
// // T133507. New windows can navigate parent cross-origin.
|
||||
// // Including noreferrer due to lacking browser
|
||||
// // support of noopener. Eventually noreferrer should be removed.
|
||||
// if ( $rel !== '' ) {
|
||||
// $rel .= ' ';
|
||||
// }
|
||||
// $rel .= 'noreferrer noopener';
|
||||
// }
|
||||
// }
|
||||
// $attribs['rel'] = $rel;
|
||||
// return $attribs;
|
||||
// }
|
||||
// public static function getExternalLinkRel( $url = false, $title = null ) {
|
||||
// global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
|
||||
// $ns = $title ? $title->getNamespace() : false;
|
||||
// if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
|
||||
// && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
|
||||
// ) {
|
||||
// return 'nofollow';
|
||||
// }
|
||||
// return null;
|
||||
// }
|
||||
|
||||
private static final byte[]
|
||||
Link_type__free = Bry_.new_a7("free")
|
||||
, Link_type__text = Bry_.new_a7("text")
|
||||
, Link_type__autonumber = Bry_.new_a7("autonumber")
|
||||
;
|
||||
|
||||
private static final Btrie_slim_mgr
|
||||
invalid_url_chars_trie = New__invalid_url_chars_trie()
|
||||
, space_chars_trie = New__space_chars_trie()
|
||||
, invalid_text_chars_trie = New__invalid_text_chars_trie()
|
||||
;
|
||||
private static Btrie_slim_mgr New__invalid_url_chars_trie() { // REGEX:[^][<>"\\x00-\\x20\\x7F\p{Zs}]; NOTE: val is just a marker
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
|
||||
rv.Add_str_byte__many(Byte_.Zero, "[", "]", "<", ">", "\"");
|
||||
for (byte i = 0; i < 33; i++) {
|
||||
rv.Add_bry_byte(new byte[] {i}, Byte_.Zero);
|
||||
}
|
||||
rv.Add_bry_byte(Bry_.New_by_ints(127), Byte_.Zero); // x7F
|
||||
rv.Add_bry_byte(Bry_.New_by_ints(227, 128, 128), Byte_.Zero); // \p{Zs} // e3 80 80; https://phabricator.wikimedia.org/T21052
|
||||
return rv;
|
||||
}
|
||||
private static Btrie_slim_mgr New__space_chars_trie() { // REGEX:\p{Zs}; NOTE: val is key.length
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
|
||||
New__trie_itm__by_len(rv, 32);
|
||||
New__trie_itm__by_len(rv, 227, 128, 128); // \p{Zs} // e3 80 80; https://phabricator.wikimedia.org/T21052
|
||||
return rv;
|
||||
}
|
||||
private static Btrie_slim_mgr New__invalid_text_chars_trie() { // REGEX:([^\]\\x00-\\x08\\x0a-\\x1F]*?); NOTE: val is key.length
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
|
||||
New__trie_itm__by_len(rv, Byte_ascii.Brack_end);
|
||||
for (int i = 0; i <= 8; i++) { // x00-x08
|
||||
New__trie_itm__by_len(rv, i);
|
||||
}
|
||||
for (int i = 10; i <= 31; i++) { // x0a-x1F
|
||||
New__trie_itm__by_len(rv, i);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private static void New__trie_itm__by_len(Btrie_slim_mgr mgr, int... ary) {
|
||||
mgr.Add_obj(Bry_.New_by_ints(ary), new Int_obj_val(ary.length));
|
||||
}
|
||||
}
|
@ -0,0 +1,56 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_lnke_wkr__tst {
|
||||
private final Xomw_lnke_wkr__fxt fxt = new Xomw_lnke_wkr__fxt();
|
||||
@Test public void Basic() {fxt.Test__parse("[https://a.org b]" , "<a class='external text' rel='nofollow' href='https://a.org'>b</a>");}
|
||||
@Test public void Invaild__protocol() {fxt.Test__parse("[httpz:a.org]" , "[httpz:a.org]");}
|
||||
@Test public void Invaild__protocol_slash() {fxt.Test__parse("[https:a.org]" , "[https:a.org]");}
|
||||
@Test public void Invaild__urlchars__0() {fxt.Test__parse("[https://]" , "[https://]");}
|
||||
@Test public void Invaild__urlchars__bad() {fxt.Test__parse("[https://\"]" , "[https://\"]");}
|
||||
@Test public void Many() {
|
||||
fxt.Test__parse(String_.Concat_lines_nl_apos_skip_last
|
||||
( "a"
|
||||
, "[https://b.org c]"
|
||||
, "d"
|
||||
, "[https://e.org f]"
|
||||
, "g"
|
||||
), String_.Concat_lines_nl_apos_skip_last
|
||||
( "a"
|
||||
, "<a class='external text' rel='nofollow' href='https://b.org'>c</a>"
|
||||
, "d"
|
||||
, "<a class='external text' rel='nofollow' href='https://e.org'>f</a>"
|
||||
, "g"
|
||||
));
|
||||
}
|
||||
}
|
||||
class Xomw_lnke_wkr__fxt {
|
||||
private final Xomw_lnke_wkr wkr = new Xomw_lnke_wkr(new Xomw_parser());
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private boolean apos = true;
|
||||
public Xomw_lnke_wkr__fxt() {
|
||||
wkr.Init_by_wiki(Xomw_parser.Protocols__dflt());
|
||||
}
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Replace_external_links(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
462
400_xowa/src/gplx/xowa/mws/parsers/lnkis/Xomw_lnki_wkr.java
Normal file
462
400_xowa/src/gplx/xowa/mws/parsers/lnkis/Xomw_lnki_wkr.java
Normal file
@ -0,0 +1,462 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
import gplx.langs.phps.utls.*;
|
||||
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.xwikis.*;
|
||||
import gplx.xowa.mws.parsers.*; import gplx.xowa.mws.parsers.quotes.*;
|
||||
import gplx.xowa.mws.htmls.*; import gplx.xowa.mws.linkers.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
import gplx.xowa.parsers.uniqs.*;
|
||||
public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private final Xomw_link_holders holders;
|
||||
private final Xomw_linker linker;
|
||||
private final Xomw_link_renderer link_renderer;
|
||||
// private final Btrie_slim_mgr protocols_trie;
|
||||
private final Xomw_quote_wkr quote_wkr;
|
||||
private final Xomw_strip_state strip_state;
|
||||
private Xow_wiki wiki;
|
||||
private Xoa_ttl page_title;
|
||||
private final Xomw_linker__normalize_subpage_link normalize_subpage_link = new Xomw_linker__normalize_subpage_link();
|
||||
private final Bry_bfr tmp;
|
||||
private final Xomw_parser parser;
|
||||
private final Xomwh_atr_mgr extra_atrs = new Xomwh_atr_mgr();
|
||||
public Xomw_lnki_wkr(Xomw_parser parser, Xomw_link_holders holders, Xomw_link_renderer link_renderer, Btrie_slim_mgr protocols_trie) {
|
||||
this.parser = parser;
|
||||
this.holders = holders;
|
||||
this.link_renderer = link_renderer;
|
||||
// this.protocols_trie = protocols_trie;
|
||||
|
||||
this.linker = parser.Linker();
|
||||
this.quote_wkr = parser.Quote_wkr();
|
||||
this.tmp = parser.Tmp();
|
||||
this.strip_state = parser.Strip_state();
|
||||
}
|
||||
public void Init_by_wiki(Xow_wiki wiki) {
|
||||
this.wiki = wiki;
|
||||
if (title_chars_for_lnki == null) {
|
||||
title_chars_for_lnki = (boolean[])Array_.Clone(Xomw_ttl_utl.Title_chars_valid());
|
||||
// the % is needed to support urlencoded titles as well
|
||||
title_chars_for_lnki[Byte_ascii.Hash] = true;
|
||||
title_chars_for_lnki[Byte_ascii.Percent] = true;
|
||||
}
|
||||
}
|
||||
public void Clear_state() {
|
||||
holders.Clear();
|
||||
}
|
||||
public void Replace_internal_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
this.page_title = pctx.Page_title();
|
||||
|
||||
Replace_internal_links(bfr, src, src_bgn, src_end);
|
||||
}
|
||||
public void Replace_internal_links(Bry_bfr bfr, byte[] src, int src_bgn, int src_end) {
|
||||
// PORTED: regex for tc move to header; e1 and e1_img moved to code
|
||||
// split the entire text String on occurrences of [[
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
while (true) {
|
||||
int lnki_bgn = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__bgn, cur, src_end); // $a = StringUtils::explode('[[', ' ' . $s);
|
||||
if (lnki_bgn == Bry_find_.Not_found) { // no more "[["; stop loop
|
||||
bfr.Add_mid(src, cur, src_end);
|
||||
break;
|
||||
}
|
||||
cur = lnki_bgn + 2; // 2="[[".length
|
||||
|
||||
// IGNORE: handles strange split logic of adding space to String; "$s = substr($s, 1);"
|
||||
|
||||
// TODO.XO:lnke_bgn; EX: b[[A]]
|
||||
// $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
|
||||
// $e2 = null;
|
||||
// if ($useLinkPrefixExtension) {
|
||||
// // Match the end of a line for a word that's not followed by whitespace,
|
||||
// // e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
|
||||
// global $wgContLang;
|
||||
// $charset = $wgContLang->linkPrefixCharset();
|
||||
// $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
|
||||
// }
|
||||
|
||||
// IGNORE: throw new MWException(__METHOD__ . ": \$this->mTitle is null\n");
|
||||
|
||||
// $nottalk = !$this->mTitle->isTalkPage();
|
||||
|
||||
// TODO.XO:lnke_bgn
|
||||
byte[] prefix = Bry_.Empty;
|
||||
//if ($useLinkPrefixExtension) {
|
||||
// $m = [];
|
||||
// if (preg_match($e2, $s, $m)) {
|
||||
// $first_prefix = $m[2];
|
||||
// } else {
|
||||
// $first_prefix = false;
|
||||
// }
|
||||
//} else {
|
||||
// $prefix = '';
|
||||
//}
|
||||
|
||||
// IGNORE: "Check for excessive memory usage"
|
||||
|
||||
// TODO.XO:lnke_bgn; EX: b[[A]]
|
||||
//if ($useLinkPrefixExtension) {
|
||||
// if (preg_match($e2, $s, $m)) {
|
||||
// $prefix = $m[2];
|
||||
// $s = $m[1];
|
||||
// } else {
|
||||
// $prefix = '';
|
||||
// }
|
||||
// // first link
|
||||
// if ($first_prefix) {
|
||||
// $prefix = $first_prefix;
|
||||
// $first_prefix = false;
|
||||
// }
|
||||
//}
|
||||
|
||||
// PORTED.BGN: if (preg_match($e1, $line, $m)) && else if (preg_match($e1_img, $line, $m))
|
||||
// NOTE: both e1 and e1_img are effectively the same; e1_img allows nested "[["; EX: "[[A|b[[c]]d]]" will stop at "[[A|b"
|
||||
int ttl_bgn = cur;
|
||||
int ttl_end = Xomw_ttl_utl.Find_fwd_while_title(src, cur, src_end, title_chars_for_lnki);
|
||||
cur = ttl_end;
|
||||
int capt_bgn = -1, capt_end = -1;
|
||||
int nxt_lnki = -1;
|
||||
|
||||
boolean might_be_img = false;
|
||||
if (ttl_end > ttl_bgn) { // at least one valid title-char found; check for "|" or "]]" EX: "[[a"
|
||||
byte nxt_byte = src[ttl_end];
|
||||
if (nxt_byte == Byte_ascii.Pipe) { // handles lnki with capt ([[A|a]])and lnki with file ([[File:A.png|b|c|d]])
|
||||
cur = ttl_end + 1;
|
||||
|
||||
// find next "[["
|
||||
nxt_lnki = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__bgn, cur, src_end);
|
||||
if (nxt_lnki == Bry_find_.Not_found)
|
||||
nxt_lnki = src_end;
|
||||
|
||||
// find end "]]"
|
||||
capt_bgn = cur;
|
||||
capt_end = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__end, cur, nxt_lnki);
|
||||
if (capt_end == Bry_find_.Not_found) {
|
||||
capt_end = nxt_lnki;
|
||||
cur = nxt_lnki;
|
||||
might_be_img = true;
|
||||
}
|
||||
else {
|
||||
cur = capt_end + Bry__wtxt__lnki__end.length;
|
||||
}
|
||||
}
|
||||
else if (Bry_.Match(src, ttl_end, ttl_end + 2, Bry__wtxt__lnki__end)) { // handles simple lnki; EX: [[A]]
|
||||
cur = ttl_end + 2;
|
||||
}
|
||||
else {
|
||||
ttl_end = -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
ttl_end = -1;
|
||||
if (ttl_end == -1) { // either (a) no valid title-chars ("[[<") or (b) title char, but has stray "]" ("[[a]b]]")
|
||||
// Invalid form; output directly
|
||||
bfr.Add_mid(src, cur, src_end);
|
||||
continue;
|
||||
}
|
||||
// PORTED.END: if (preg_match($e1, $line, $m)) && else if (preg_match($e1_img, $line, $m))
|
||||
|
||||
byte[] text = Bry_.Mid(src, capt_bgn, capt_end);
|
||||
byte[] trail = Bry_.Empty;
|
||||
if (!might_be_img) {
|
||||
// If we get a ] at the beginning of $m[3] that means we have a link that's something like:
|
||||
// [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
|
||||
// the real problem is with the $e1 regex
|
||||
// See T1500.
|
||||
// Still some problems for cases where the ] is meant to be outside punctuation,
|
||||
// and no image is in sight. See T4095.
|
||||
// if ($text !== ''
|
||||
// && substr($m[3], 0, 1) === ']'
|
||||
// && strpos($text, '[') !== false
|
||||
// ) {
|
||||
// $text .= ']'; // so that replaceExternalLinks($text) works later
|
||||
// $m[3] = substr($m[3], 1);
|
||||
// }
|
||||
|
||||
// fix up urlencoded title texts
|
||||
// if (strpos($m[1], '%') !== false) {
|
||||
// // Should anchors '#' also be rejected?
|
||||
// $m[1] = str_replace([ '<', '>' ], [ '<', '>' ], rawurldecode($m[1]));
|
||||
// }
|
||||
// $trail = $m[3];
|
||||
}
|
||||
else {
|
||||
// Invalid, but might be an image with a link in its caption
|
||||
// $text = $m[2];
|
||||
// if (strpos($m[1], '%') !== false) {
|
||||
// $m[1] = str_replace([ '<', '>' ], [ '<', '>' ], rawurldecode($m[1]));
|
||||
// }
|
||||
// $trail = "";
|
||||
}
|
||||
|
||||
byte[] orig_link = Bry_.Mid(src, ttl_bgn, ttl_end);
|
||||
|
||||
// TODO.XO: handle "[[http://a.org]]"
|
||||
// Don't allow @gplx.Internal protected links to pages containing
|
||||
// PROTO: where PROTO is a valid URL protocol; these
|
||||
// should be external links.
|
||||
// if (preg_match('/^(?i:' . $this->mUrlProtocols . ')/', $origLink)) {
|
||||
// $s .= $prefix . '[[' . $line;
|
||||
// continue;
|
||||
// }
|
||||
|
||||
byte[] link = orig_link;
|
||||
boolean no_force = orig_link[0] != Byte_ascii.Colon;
|
||||
if (!no_force) {
|
||||
// Strip off leading ':'
|
||||
link = Bry_.Mid(link, 1);
|
||||
}
|
||||
Xoa_ttl nt = wiki.Ttl_parse(link);
|
||||
|
||||
// Make subpage if necessary
|
||||
boolean subpages_enabled = nt.Ns().Subpages_enabled();
|
||||
if (subpages_enabled) {
|
||||
Maybe_do_subpage_link(normalize_subpage_link, orig_link, text);
|
||||
link = normalize_subpage_link.link;
|
||||
text = normalize_subpage_link.text;
|
||||
nt = wiki.Ttl_parse(link);
|
||||
}
|
||||
// IGNORE: handled in rewrite above
|
||||
// else {
|
||||
// link = orig_link;
|
||||
// }
|
||||
|
||||
byte[] unstrip = strip_state.Unstrip_nowiki(link);
|
||||
if (!Bry_.Eq(unstrip, link))
|
||||
nt = wiki.Ttl_parse(unstrip);
|
||||
if (nt == null) {
|
||||
bfr.Add_mid(src, prv, lnki_bgn + 2); // $s .= $prefix . '[[' . $line;
|
||||
cur = lnki_bgn + 2;
|
||||
prv = cur;
|
||||
continue;
|
||||
}
|
||||
|
||||
Xow_ns ns = nt.Ns();
|
||||
Xow_xwiki_itm iw = nt.Wik_itm();
|
||||
|
||||
if (might_be_img) { // if this is actually an invalid link
|
||||
if (ns.Id_is_file() && no_force) { // but might be an image
|
||||
boolean found = false;
|
||||
// while (true) {
|
||||
// // look at the next 'line' to see if we can close it there
|
||||
// a->next();
|
||||
// next_line = a->current();
|
||||
// if (next_line === false || next_line === null) {
|
||||
// break;
|
||||
// }
|
||||
// m = explode(']]', next_line, 3);
|
||||
// if (count(m) == 3) {
|
||||
// // the first ]] closes the inner link, the second the image
|
||||
// found = true;
|
||||
// text .= "[[{m[0]}]]{m[1]}";
|
||||
// trail = m[2];
|
||||
// break;
|
||||
// } else if (count(m) == 2) {
|
||||
// // if there's exactly one ]] that's fine, we'll keep looking
|
||||
// text .= "[[{m[0]}]]{m[1]}";
|
||||
// } else {
|
||||
// // if next_line is invalid too, we need look no further
|
||||
// text .= '[[' . next_line;
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
if (!found) {
|
||||
// we couldn't find the end of this imageLink, so output it raw
|
||||
// but don't ignore what might be perfectly normal links in the text we've examined
|
||||
Bry_bfr nested = wiki.Utl__bfr_mkr().Get_b128();
|
||||
this.Replace_internal_links(nested, text, 0, text.length);
|
||||
nested.Mkr_rls();
|
||||
bfr.Add(prefix).Add(Bry__wtxt__lnki__bgn).Add(link).Add_byte_pipe().Add(text); // s .= "{prefix}[[link|text";
|
||||
// note: no trail, because without an end, there *is* no trail
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else { // it's not an image, so output it raw
|
||||
bfr.Add(prefix).Add(Bry__wtxt__lnki__bgn).Add(link).Add_byte_pipe().Add(text); // s .= "{prefix}[[link|text";
|
||||
// note: no trail, because without an end, there *is* no trail
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
boolean was_blank = text.length == 0;
|
||||
if (was_blank) {
|
||||
text = link;
|
||||
}
|
||||
else {
|
||||
// T6598 madness. Handle the quotes only if they come from the alternate part
|
||||
// [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
|
||||
// [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
|
||||
// -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
|
||||
text = quote_wkr.Do_quotes(tmp, text);
|
||||
}
|
||||
|
||||
// Link not escaped by : , create the various objects
|
||||
// if (no_force && !nt->wasLocalInterwiki()) {
|
||||
// Interwikis
|
||||
// if (
|
||||
// iw && this->mOptions->getInterwikiMagic() && nottalk && (
|
||||
// Language::fetchLanguageName(iw, null, 'mw') ||
|
||||
// in_array(iw, wgExtraInterlanguageLinkPrefixes)
|
||||
// )
|
||||
// ) {
|
||||
// T26502: filter duplicates
|
||||
// if (!isset(this->mLangLinkLanguages[iw])) {
|
||||
// this->mLangLinkLanguages[iw] = true;
|
||||
// this->mOutput->addLanguageLink(nt->getFullText());
|
||||
// }
|
||||
//
|
||||
// s = rtrim(s . prefix);
|
||||
// s .= trim(trail, "\n") == '' ? '': prefix . trail;
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
if (ns.Id_is_file()) {
|
||||
// if (!wfIsBadImage(nt->getDBkey(), this->mTitle)) {
|
||||
// if (wasblank) {
|
||||
// // if no parameters were passed, text
|
||||
// // becomes something like "File:Foo.png",
|
||||
// // which we don't want to pass on to the
|
||||
// // image generator
|
||||
// text = '';
|
||||
// } else {
|
||||
// // recursively parse links inside the image caption
|
||||
// // actually, this will parse them in any other parameters, too,
|
||||
// // but it might be hard to fix that, and it doesn't matter ATM
|
||||
// text = this->replaceExternalLinks(text);
|
||||
// holders->merge(this->replaceInternalLinks2(text));
|
||||
// }
|
||||
// // cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
|
||||
// s .= prefix . this->armorLinks(
|
||||
// this->makeImage(nt, text, holders)) . trail;
|
||||
// continue;
|
||||
// }
|
||||
}
|
||||
else if (ns.Id_is_ctg()) {
|
||||
bfr.Trim_end_ws(); // s = rtrim(s . "\n"); // T2087
|
||||
|
||||
if (was_blank) {
|
||||
// sortkey = this->getDefaultSort();
|
||||
}
|
||||
else {
|
||||
// sortkey = text;
|
||||
}
|
||||
// sortkey = Sanitizer::decodeCharReferences(sortkey);
|
||||
// sortkey = str_replace("\n", '', sortkey);
|
||||
// sortkey = this->getConverterLanguage()->convertCategoryKey(sortkey);
|
||||
// this->mOutput->addCategory(nt->getDBkey(), sortkey);
|
||||
//
|
||||
// Strip the whitespace Category links produce, see T2087
|
||||
// s .= trim(prefix . trail, "\n") == '' ? '' : prefix . trail;
|
||||
|
||||
continue;
|
||||
}
|
||||
// }
|
||||
|
||||
// Self-link checking. For some languages, variants of the title are checked in
|
||||
// LinkHolderArray::doVariants() to allow batching the existence checks necessary
|
||||
// for linking to a different variant.
|
||||
if (!ns.Id_is_special() && nt.Eq_full_db(page_title) && !nt.Has_fragment()) {
|
||||
bfr.Add(prefix);
|
||||
linker.Make_self_link_obj(bfr, nt, text, Bry_.Empty, trail, Bry_.Empty);
|
||||
continue;
|
||||
}
|
||||
|
||||
// NS_MEDIA is a pseudo-namespace for linking directly to a file
|
||||
// @todo FIXME: Should do batch file existence checks, see comment below
|
||||
if (ns.Id_is_media()) {
|
||||
// Give extensions a chance to select the file revision for us
|
||||
// options = [];
|
||||
// descQuery = false;
|
||||
// MW.HOOK:BeforeParserFetchFileAndTitle
|
||||
// Fetch and register the file (file title may be different via hooks)
|
||||
// list(file, nt) = this->fetchFileAndTitle(nt, options);
|
||||
// Cloak with NOPARSE to avoid replacement in replaceExternalLinks
|
||||
// s .= prefix . this->armorLinks(
|
||||
// Linker::makeMediaLinkFile(nt, file, text)) . trail;
|
||||
// continue;
|
||||
}
|
||||
|
||||
// Some titles, such as valid special pages or files in foreign repos, should
|
||||
// be shown as bluelinks even though they're not included in the page table
|
||||
// @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
|
||||
// batch file existence checks for NS_FILE and NS_MEDIA
|
||||
bfr.Add_mid(src, prv, lnki_bgn);
|
||||
prv = cur;
|
||||
if (iw == null && nt.Is_always_known()) {
|
||||
// this->mOutput->addLink(nt);
|
||||
Make_known_link_holder(bfr, nt, text, trail, prefix);
|
||||
}
|
||||
else {
|
||||
// Links will be added to the output link list after checking
|
||||
holders.Make_holder(bfr, nt, text, Bry_.Ary_empty, trail, prefix);
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Maybe_do_subpage_link(Xomw_linker__normalize_subpage_link rv, byte[] target, byte[] text) {
|
||||
linker.Normalize_subpage_link(rv, page_title, target, text);
|
||||
}
|
||||
public void Replace_link_holders(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
holders.Replace(pctx, pbfr);
|
||||
}
|
||||
public void Make_known_link_holder(Bry_bfr bfr, Xoa_ttl nt, byte[] text, byte[] trail, byte[] prefix) {
|
||||
byte[][] split_trail = linker.Split_trail(trail);
|
||||
byte[] inside = split_trail[0];
|
||||
trail = split_trail[1];
|
||||
|
||||
if (text == Bry_.Empty) {
|
||||
text = Bry_.Escape_html(nt.Get_prefixed_text());
|
||||
}
|
||||
|
||||
// PORTED:new HtmlArmor( "$prefix$text$inside" )
|
||||
tmp.Add_bry_escape_html(prefix);
|
||||
tmp.Add_bry_escape_html(text);
|
||||
tmp.Add_bry_escape_html(inside);
|
||||
text = tmp.To_bry_and_clear();
|
||||
|
||||
link_renderer.Make_known_link(bfr, nt, text, extra_atrs, Bry_.Empty);
|
||||
byte[] link = bfr.To_bry_and_clear();
|
||||
parser.Armor_links(bfr, link, 0, link.length);
|
||||
bfr.Add(trail);
|
||||
}
|
||||
|
||||
private static boolean[] title_chars_for_lnki;
|
||||
private static final byte[] Bry__wtxt__lnki__bgn = Bry_.new_a7("[["), Bry__wtxt__lnki__end = Bry_.new_a7("]]");
|
||||
|
||||
// $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
|
||||
//
|
||||
// REGEX: "title-char"(1+) + "pipe"(0-1) + "]]"(0-1) + "other chars up to next [["
|
||||
// title-char -> ([{$tc}]+)
|
||||
// pipe -> (?:\\|(.+?))?
|
||||
// ]] -> ?]]
|
||||
// other chars... -> (.*)
|
||||
|
||||
// $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
|
||||
//
|
||||
// REGEX: "title-char"(1+) + "pipe"(0-1) + "other chars up to next [["
|
||||
// title-char -> ([{$tc}]+)
|
||||
// pipe -> \\|
|
||||
// other chars... -> (.*)
|
||||
}
|
@ -0,0 +1,63 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_lnki_wkr__tst {
|
||||
private final Xomw_lnki_wkr__fxt fxt = new Xomw_lnki_wkr__fxt();
|
||||
@Before public void init() {fxt.Clear();}
|
||||
// @Test public void Basic() {fxt.Test__parse("[[A]]" , "<!--LINK 0-->");}
|
||||
@Test public void Text() {fxt.Test__parse("a [[A]] z" , "a <!--LINK 0--> z");}
|
||||
@Test public void Capt() {fxt.Test__parse("a [[A|a]] z" , "a <!--LINK 0--> z");}
|
||||
// @Test public void Text() {fxt.Test__parse("a [[A]] z" , "a <a href='/wiki/A' title='A'>A</a> z");}
|
||||
// @Test public void Capt() {fxt.Test__parse("a [[A|a]] z" , "a <a href='/wiki/A' title='A'>a</a> z");}
|
||||
// @Test public void Text() {fxt.Test__parse("a [[A]] z" , "a <!--LINK 0--> z");}
|
||||
// @Test public void Invalid__char() {fxt.Test__parse("[[<A>]]" , "[[<A>]]");}
|
||||
@Test public void Self() {fxt.Test__to_html("[[Page_1]]" , "<strong class='selflink'>Page_1</strong>");}
|
||||
}
|
||||
class Xomw_lnki_wkr__fxt {
|
||||
private final Xomw_lnki_wkr wkr;
|
||||
private final Xomw_parser_ctx pctx;
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private boolean apos = true;
|
||||
public Xomw_lnki_wkr__fxt() {
|
||||
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
Xomw_parser parser = new Xomw_parser();
|
||||
wkr = parser.Lnki_wkr();
|
||||
parser.Init_by_wiki(wiki);
|
||||
|
||||
pctx = new Xomw_parser_ctx();
|
||||
pctx.Init_by_page(wiki.Ttl_parse(Bry_.new_a7("Page_1")));
|
||||
}
|
||||
public void Clear() {
|
||||
wkr.Clear_state();
|
||||
}
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Replace_internal_links(pctx, pbfr.Init(src_bry));
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
public void Test__to_html(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Replace_internal_links(pctx, pbfr.Init(src_bry));
|
||||
wkr.Replace_link_holders(pctx, pbfr);
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
@ -0,0 +1,331 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.magiclinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.core.net.*;
|
||||
import gplx.langs.phps.utls.*;
|
||||
// public class Xomw_magiclinks_wkr {
|
||||
// private final Btrie_slim_mgr regex_trie = Btrie_slim_mgr.ci_a7(); // NOTE: must be ci to handle protocols; EX: "https:" and "HTTPS:"
|
||||
// private final Btrie_rv trv = new Btrie_rv();
|
||||
// public Xomw_magiclinks_wkr() {
|
||||
// }
|
||||
// private static byte[] Tag__anch__rhs, Prefix__rfc, Prefix__pmid;
|
||||
//
|
||||
// private static final byte Space__tab = 1, Space__nbsp_ent = 2, Space__nbsp_dec = 3, Space__nbsp_hex = 4;
|
||||
// private static Btrie_slim_mgr space_trie;
|
||||
// // static final SPACE_NOT_NL = '(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
|
||||
//// public void Test() {
|
||||
//// regex.Add("\t", Space__tab);
|
||||
//// regex.Add(" ", Space__nbsp__ent);
|
||||
//// regex.Add(Regex.Make("&#").Star("0").Add("160;"), Space__nbsp__dec);
|
||||
//// regex.Add(Regex.Make("&#").Brack("X", "x").Star("0").Brack("A", "a").Add("0"), Space__nbsp__hex);
|
||||
//// }
|
||||
// public int Find_fwd_space(byte[] src, int cur, int src_end) {
|
||||
// return -1;
|
||||
// }
|
||||
//
|
||||
// private static final byte Regex__anch = 1, Regex__elem = 2, Regex__free = 3, Regex__rfc = 5, Regex__isbn = 6, Regex__pmid = 7;
|
||||
// public void Init_by_wiki() {
|
||||
// regex_trie.Add_str_byte("<a", Regex__anch);
|
||||
// regex_trie.Add_str_byte("<" , Regex__elem);
|
||||
//
|
||||
// Gfo_protocol_itm[] protocol_ary = Gfo_protocol_itm.Ary();
|
||||
// int protocol_len = protocol_ary.length;
|
||||
// for (int i = 0; i < protocol_len; i++) {
|
||||
// Gfo_protocol_itm itm = protocol_ary[i];
|
||||
// regex_trie.Add_bry_byte(itm.Key_w_colon_bry(), Regex__free);
|
||||
// }
|
||||
// regex_trie.Add_str_byte("RFC " , Regex__rfc);
|
||||
// regex_trie.Add_str_byte("PMID " , Regex__rfc);
|
||||
// regex_trie.Add_str_byte("ISBN ", Regex__rfc);
|
||||
//
|
||||
// if (Tag__anch__rhs == null) {
|
||||
// synchronized (Type_adp_.ClassOf_obj(this)) {
|
||||
// Tag__anch__rhs = Bry_.new_a7("</a>");
|
||||
// Prefix__rfc = Bry_.new_a7("RFC");
|
||||
// Prefix__pmid = Bry_.new_a7("PMID");
|
||||
// space_trie = Btrie_slim_mgr.ci_a7()
|
||||
// .Add_str_byte("\t", Space__tab)
|
||||
// .Add_str_byte(" ", Space__nbsp_ent)
|
||||
// .Add_str_byte("&#", Space__nbsp_dec)
|
||||
// .Add_str_byte("&x", Space__nbsp_hex)
|
||||
// ;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // Replace special strings like "ISBN xxx" and "RFC xxx" with
|
||||
// // magic external links.
|
||||
// public void Do_magic_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
// // XO.PBFR
|
||||
// Bry_bfr src_bfr = pbfr.Src();
|
||||
// byte[] src = src_bfr.Bfr();
|
||||
// int src_bgn = 0;
|
||||
// int src_end = src_bfr.Len();
|
||||
// Bry_bfr bfr = pbfr.Trg();
|
||||
//
|
||||
// int cur = src_bgn;
|
||||
// int prv = cur;
|
||||
// boolean dirty = true;
|
||||
// while (true) {
|
||||
// if (cur == src_end) {
|
||||
// if (dirty)
|
||||
// bfr.Add_mid(src, prv, src_end);
|
||||
// break;
|
||||
// }
|
||||
//
|
||||
// byte b = src[cur];
|
||||
// Object o = regex_trie.Match_at_w_b0(trv, b, src, cur, src_end);
|
||||
// // current byte doesn't look like magiclink; continue;
|
||||
// if (o == null) {
|
||||
// cur++;
|
||||
// continue;
|
||||
// }
|
||||
// // looks like magiclink; do additional processing
|
||||
// byte regex_tid = ((Byte_obj_ref)o).Val();
|
||||
// int trv_pos = trv.Pos();
|
||||
// int nxt_pos = trv_pos;
|
||||
// boolean regex_valid = true;
|
||||
// switch (regex_tid) {
|
||||
// case Regex__anch: // (<a[ \t\r\n>].*?</a>) | // m[1]: Skip link text
|
||||
// if (trv_pos < src_end) {
|
||||
// // find ws in "[ \t\r\n>]"
|
||||
// byte ws_byte = src[cur];
|
||||
// switch (ws_byte) {
|
||||
// case Byte_ascii.Space:
|
||||
// case Byte_ascii.Tab:
|
||||
// case Byte_ascii.Cr:
|
||||
// case Byte_ascii.Nl:
|
||||
// break;
|
||||
// default:
|
||||
// regex_valid = false;
|
||||
// break;
|
||||
// }
|
||||
// if (regex_valid) {
|
||||
// // find </a>
|
||||
// nxt_pos++;
|
||||
// int anch_end = Bry_find_.Find_fwd(src, Tag__anch__rhs, nxt_pos, src_end);
|
||||
// if (anch_end == Bry_find_.Not_found) {
|
||||
// regex_valid = false;
|
||||
// }
|
||||
// else {
|
||||
// cur = anch_end + Tag__anch__rhs.length;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// else {
|
||||
// regex_valid = false;
|
||||
// }
|
||||
// break;
|
||||
// case Regex__elem: // (<.*?>) | // m[2]: Skip stuff inside
|
||||
// // just find ">"
|
||||
// int elem_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, nxt_pos, src_end);
|
||||
// if (elem_end == Bry_find_.Not_found)
|
||||
// regex_valid = false;
|
||||
// else
|
||||
// cur = elem_end + 1;
|
||||
// break;
|
||||
// case Regex__free:
|
||||
// // addr; urlchar
|
||||
// break;
|
||||
// case Regex__rfc:
|
||||
// case Regex__pmid:
|
||||
// // byte[] prefix = regex == Regex__rfc ? Prefix__rfc : Prefix__pmid;
|
||||
// // match previous for case sensitivity
|
||||
//// if (Bry_.Eq(src, trv_pos - prefix.length - 1, trv_pos - 1, prefix)) {
|
||||
////
|
||||
//// }
|
||||
//// else {
|
||||
//// regex_valid = false;
|
||||
//// }
|
||||
// break;
|
||||
// }
|
||||
//
|
||||
//// '!(?: // Start cases
|
||||
//// (<a[ \t\r\n>].*?</a>) | // m[1]: Skip link text
|
||||
//// (<.*?>) | // m[2]: Skip stuff inside
|
||||
//// // HTML elements' . "
|
||||
//// (\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
|
||||
//// // m[4]: Post-protocol path
|
||||
//// \b(?:RFC|PMID) $spaces // m[5]: RFC or PMID, capture number
|
||||
//// ([0-9]+)\b |
|
||||
//// \bISBN $spaces ( // m[6]: ISBN, capture number
|
||||
//// (?: 97[89] $spdash?)? // optional 13-digit ISBN prefix
|
||||
//// (?: [0-9] $spdash?){9} // 9 digits with opt. delimiters
|
||||
//// [0-9Xx] // check digit
|
||||
//// )\b
|
||||
//
|
||||
// }
|
||||
// if (dirty)
|
||||
// pbfr.Switch();
|
||||
|
||||
// $prots = wfUrlProtocolsWithoutProtRel();
|
||||
// $urlChar = self::EXT_LINK_URL_CLASS;
|
||||
// $addr = self::EXT_LINK_ADDR;
|
||||
// $space = self::SPACE_NOT_NL; // non-newline space
|
||||
// $spdash = "(?:-|$space)"; // a dash or a non-newline space
|
||||
// $spaces = "$space++"; // possessive match of 1 or more spaces
|
||||
// $text = preg_replace_callback(
|
||||
// '!(?: // Start cases
|
||||
// (<a[ \t\r\n>].*?</a>) | // m[1]: Skip link text
|
||||
// (<.*?>) | // m[2]: Skip stuff inside
|
||||
// // HTML elements' . "
|
||||
// (\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
|
||||
// // m[4]: Post-protocol path
|
||||
// \b(?:RFC|PMID) $spaces // m[5]: RFC or PMID, capture number
|
||||
// ([0-9]+)\b |
|
||||
// \bISBN $spaces ( // m[6]: ISBN, capture number
|
||||
// (?: 97[89] $spdash?)? // optional 13-digit ISBN prefix
|
||||
// (?: [0-9] $spdash?){9} // 9 digits with opt. delimiters
|
||||
// [0-9Xx] // check digit
|
||||
// )\b
|
||||
// )!xu", [ &$this, 'magicLinkCallback' ], $text);
|
||||
// return $text;
|
||||
// }
|
||||
|
||||
// public function magicLinkCallback($m) {
|
||||
// if (isset($m[1]) && $m[1] !== '') {
|
||||
// // Skip anchor
|
||||
// return $m[0];
|
||||
// } else if (isset($m[2]) && $m[2] !== '') {
|
||||
// // Skip HTML element
|
||||
// return $m[0];
|
||||
// } else if (isset($m[3]) && $m[3] !== '') {
|
||||
// // Free external link
|
||||
// return $this->makeFreeExternalLink($m[0], strlen($m[4]));
|
||||
// } else if (isset($m[5]) && $m[5] !== '') {
|
||||
// // RFC or PMID
|
||||
// if (substr($m[0], 0, 3) === 'RFC') {
|
||||
// if (!$this->mOptions->getMagicRFCLinks()) {
|
||||
// return $m[0];
|
||||
// }
|
||||
// $keyword = 'RFC';
|
||||
// $urlmsg = 'rfcurl';
|
||||
// $cssClass = 'mw-magiclink-rfc';
|
||||
// $trackingCat = 'magiclink-tracking-rfc';
|
||||
// $id = $m[5];
|
||||
// } else if (substr($m[0], 0, 4) === 'PMID') {
|
||||
// if (!$this->mOptions->getMagicPMIDLinks()) {
|
||||
// return $m[0];
|
||||
// }
|
||||
// $keyword = 'PMID';
|
||||
// $urlmsg = 'pubmedurl';
|
||||
// $cssClass = 'mw-magiclink-pmid';
|
||||
// $trackingCat = 'magiclink-tracking-pmid';
|
||||
// $id = $m[5];
|
||||
// } else {
|
||||
// throw new MWException(__METHOD__ . ': unrecognised match type "' .
|
||||
// substr($m[0], 0, 20) . '"');
|
||||
// }
|
||||
// $url = wfMessage($urlmsg, $id)->inContentLanguage()->text();
|
||||
// $this->addTrackingCategory($trackingCat);
|
||||
// return Linker::makeExternalLink($url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle);
|
||||
// } else if (isset($m[6]) && $m[6] !== ''
|
||||
// && $this->mOptions->getMagicISBNLinks()
|
||||
// ) {
|
||||
// // ISBN
|
||||
// $isbn = $m[6];
|
||||
// $space = self::SPACE_NOT_NL; // non-newline space
|
||||
// $isbn = preg_replace("/$space/", ' ', $isbn);
|
||||
// $num = strtr($isbn, [
|
||||
// '-' => '',
|
||||
// ' ' => '',
|
||||
// 'x' => 'X',
|
||||
// ]);
|
||||
// $this->addTrackingCategory('magiclink-tracking-isbn');
|
||||
// return $this->getLinkRenderer()->makeKnownLink(
|
||||
// SpecialPage::getTitleFor('Booksources', $num),
|
||||
// "ISBN $isbn",
|
||||
// [
|
||||
// 'class' => '@gplx.Internal protected mw-magiclink-isbn',
|
||||
// 'title' => false // suppress title attribute
|
||||
// ]
|
||||
// );
|
||||
// } else {
|
||||
// return $m[0];
|
||||
// }
|
||||
|
||||
// Make a free external link, given a user-supplied URL
|
||||
// public void Make_free_external_link(byte[] url, int num_post_proto) {
|
||||
// byte[] trail = Bry_.Empty;
|
||||
|
||||
// The characters '<' and '>' (which were escaped by
|
||||
// removeHTMLtags()) should not be included in
|
||||
// URLs, per RFC 2396.
|
||||
// Make terminate a URL as well (bug T84937)
|
||||
// $m2 = [];
|
||||
// if (preg_match(
|
||||
// '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
|
||||
// $url,
|
||||
// $m2,
|
||||
// PREG_OFFSET_CAPTURE
|
||||
// )) {
|
||||
// trail = substr($url, $m2[0][1]) . $trail;
|
||||
// $url = substr($url, 0, $m2[0][1]);
|
||||
// }
|
||||
|
||||
// Move trailing punctuation to $trail
|
||||
// $sep = ',;\.:!?';
|
||||
// If there is no left bracket, then consider right brackets fair game too
|
||||
// if (strpos($url, '(') === false) {
|
||||
// $sep .= ')';
|
||||
// }
|
||||
|
||||
// $urlRev = strrev($url);
|
||||
// $numSepChars = strspn($urlRev, $sep);
|
||||
// Don't break a trailing HTML entity by moving the ; into $trail
|
||||
// This is in hot code, so use substr_compare to avoid having to
|
||||
// create a new String Object for the comparison
|
||||
// if ($numSepChars && substr_compare($url, ";", -$numSepChars, 1) === 0) {
|
||||
// more optimization: instead of running preg_match with a $
|
||||
// anchor, which can be slow, do the match on the reversed
|
||||
// String starting at the desired offset.
|
||||
// un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
|
||||
// if (preg_match('/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars)) {
|
||||
// $numSepChars--;
|
||||
// }
|
||||
// }
|
||||
// if ($numSepChars) {
|
||||
// $trail = substr($url, -$numSepChars) . $trail;
|
||||
// $url = substr($url, 0, -$numSepChars);
|
||||
// }
|
||||
|
||||
// Verify that we still have a real URL after trail removal, and
|
||||
// not just lone protocol
|
||||
// if (strlen($trail) >= $numPostProto) {
|
||||
// return $url . $trail;
|
||||
// }
|
||||
|
||||
// $url = Sanitizer::cleanUrl($url);
|
||||
|
||||
// Is this an external image?
|
||||
// $text = $this->maybeMakeExternalImage($url);
|
||||
// if ($text === false) {
|
||||
// Not an image, make a link
|
||||
// $text = Linker::makeExternalLink($url,
|
||||
// $this->getConverterLanguage()->markNoConversion($url, true),
|
||||
// true, 'free',
|
||||
// $this->getExternalLinkAttribs($url), $this->mTitle);
|
||||
// Register it in the output Object...
|
||||
// Replace unnecessary URL escape codes with their equivalent characters
|
||||
// $pasteurized = self::normalizeLinkUrl($url);
|
||||
// $this->mOutput->addExternalLink($pasteurized);
|
||||
// }
|
||||
// return $text . $trail;
|
||||
// }
|
||||
// }
|
||||
// }
|
134
400_xowa/src/gplx/xowa/mws/parsers/nbsps/Xomw_nbsp_wkr.java
Normal file
134
400_xowa/src/gplx/xowa/mws/parsers/nbsps/Xomw_nbsp_wkr.java
Normal file
@ -0,0 +1,134 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.nbsps; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xomw_nbsp_wkr {
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
public void Do_nbsp(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
// PORTED:
|
||||
// Clean up special characters, only run once, next-to-last before doBlockLevels
|
||||
// $fixtags = [
|
||||
// // French spaces, last one Guillemet-left
|
||||
// // only if there is something before the space
|
||||
// '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ',
|
||||
// // french spaces, Guillemet-right
|
||||
// '/(\\302\\253) /' => '\\1 ',
|
||||
// '/ (!\s*important)/' => ' \\1', // Beware of CSS magic word !important, T13874.
|
||||
// ];
|
||||
// $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
|
||||
if (trie == null) {
|
||||
synchronized (this.getClass()) {
|
||||
trie = Btrie_slim_mgr.cs();
|
||||
Trie__add(trie, Tid__space_lhs, " ?");
|
||||
Trie__add(trie, Tid__space_lhs, " :");
|
||||
Trie__add(trie, Tid__space_lhs, " ;");
|
||||
Trie__add(trie, Tid__space_lhs, " !");
|
||||
Trie__add(trie, Tid__space_lhs, " »");
|
||||
Trie__add(trie, Tid__space_rhs, "« ");
|
||||
Trie__add(trie, Tid__important, " !");
|
||||
}
|
||||
}
|
||||
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
boolean dirty = true;
|
||||
// search forward for...
|
||||
// "\s" before ? : ; ! % 302,273; EX: "a :"
|
||||
// "\s" after 302,253
|
||||
// "&160;!\simportant"
|
||||
while (true) {
|
||||
if (cur == src_end) {
|
||||
if (dirty)
|
||||
bfr.Add_mid(src, prv, src_end);
|
||||
break;
|
||||
}
|
||||
Object o = trie.Match_at(trv, src, cur, src_end);
|
||||
if (o == null) {
|
||||
cur++;
|
||||
continue;
|
||||
}
|
||||
Xomw_nbsp_itm itm = (Xomw_nbsp_itm)o;
|
||||
|
||||
// '/ (!\s*important)/' => ' \\1'
|
||||
byte itm_tid = itm.Tid();
|
||||
int important_end = -1;
|
||||
if (itm_tid == Tid__important) {
|
||||
int space_bgn = cur + itm.Key().length;
|
||||
int space_end = Bry_find_.Find_fwd_while(src, space_bgn, src_end, Byte_ascii.Space);
|
||||
important_end = space_end + Bry__important.length;
|
||||
if (!Bry_.Match(src, space_end, important_end, Bry__important)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
dirty = true;
|
||||
bfr.Add_mid(src, prv, cur);
|
||||
switch (itm_tid) {
|
||||
case Tid__space_lhs:
|
||||
bfr.Add_bry_many(Bry__nbsp, itm.Val());
|
||||
break;
|
||||
case Tid__space_rhs:
|
||||
bfr.Add_bry_many(itm.Val(), Bry__nbsp);
|
||||
break;
|
||||
case Tid__important:
|
||||
bfr.Add(Bry__important__repl);
|
||||
break;
|
||||
}
|
||||
cur += itm.Key().length;
|
||||
prv = cur;
|
||||
}
|
||||
if (dirty)
|
||||
pbfr.Switch();
|
||||
}
|
||||
private static final byte Tid__space_lhs = 0, Tid__space_rhs = 1, Tid__important = 2;
|
||||
private static Btrie_slim_mgr trie;
|
||||
private static void Trie__add(Btrie_slim_mgr trie, byte tid, String key_str) {
|
||||
byte[] key_bry = Bry_.new_u8(key_str);
|
||||
byte[] val_bry = null;
|
||||
switch (tid) {
|
||||
case Tid__space_lhs:
|
||||
val_bry = Bry_.Mid(key_bry, 1);
|
||||
break;
|
||||
case Tid__space_rhs:
|
||||
val_bry = Bry_.Mid(key_bry, 0, key_bry.length - 1);
|
||||
break;
|
||||
case Tid__important:
|
||||
val_bry = key_bry;
|
||||
break;
|
||||
}
|
||||
Xomw_nbsp_itm itm = new Xomw_nbsp_itm(tid, key_bry, val_bry);
|
||||
trie.Add_obj(key_bry, itm);
|
||||
}
|
||||
private static final byte[] Bry__nbsp = Bry_.new_a7(" "), Bry__important = Bry_.new_a7("important"), Bry__important__repl = Bry_.new_a7(" !");
|
||||
}
|
||||
class Xomw_nbsp_itm {
|
||||
public Xomw_nbsp_itm(byte tid, byte[] key, byte[] val) {
|
||||
this.tid = tid;
|
||||
this.key = key;
|
||||
this.val = val;
|
||||
}
|
||||
public byte Tid() {return tid;} private final byte tid;
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public byte[] Val() {return val;} private final byte[] val;
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.nbsps; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_nbsp_wkr__tst {
|
||||
private final Xomw_nbsp_wkr__fxt fxt = new Xomw_nbsp_wkr__fxt();
|
||||
@Test public void Noop() {fxt.Test__parse("abc" , "abc");}
|
||||
@Test public void Space_lhs__colon() {fxt.Test__parse("a :b c" , "a :b c");}
|
||||
@Test public void Space_lhs__laquo() {fxt.Test__parse("a »b c" , "a »b c");}
|
||||
@Test public void Space_rhs() {fxt.Test__parse("a« b c" , "a« b c");}
|
||||
@Test public void Important() {fxt.Test__parse("a  ! important b" , "a ! important b");}
|
||||
}
|
||||
class Xomw_nbsp_wkr__fxt {
|
||||
private final Xomw_nbsp_wkr wkr = new Xomw_nbsp_wkr();
|
||||
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private boolean apos = true;
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
pbfr.Init(src_bry);
|
||||
wkr.Do_nbsp(pctx, pbfr);
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
@ -15,8 +15,9 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
public interface Xomw_hdr_cbk {
|
||||
void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_hdr_wkr wkr);
|
||||
void On_src_done(Xomw_parser_ctx pctx, Xomw_hdr_wkr wkr);
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_frame_itm {
|
||||
public byte[] Expand(byte[] ttl) {
|
||||
return null;
|
||||
}
|
||||
}
|
564
400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_frame_wkr.java
Normal file
564
400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_frame_wkr.java
Normal file
@ -0,0 +1,564 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
// public class Xomw_frame_wkr { // THREAD.UNSAFE: caching for repeated calls
|
||||
// private final Xomw_parser parser;
|
||||
// public Xomw_frame_wkr(Xomw_parser parser) {
|
||||
// this.parser = parser;
|
||||
// }
|
||||
// \\ Replace magic variables, templates, and template arguments
|
||||
// \\ with the appropriate text. Templates are substituted recursively,
|
||||
// \\ taking care to avoid infinite loops.
|
||||
// \\
|
||||
// \\ Note that the substitution depends on value of $mOutputType:
|
||||
// \\ self::OT_WIKI: only {{subst:}} templates
|
||||
// \\ self::OT_PREPROCESS: templates but not extension tags
|
||||
// \\ self::OT_HTML: all templates and extension tags
|
||||
// \\
|
||||
// \\ @param String $text The text to transform
|
||||
// \\ @param boolean|PPFrame $frame Object describing the arguments passed to the
|
||||
// \\ template. Arguments may also be provided as an associative array, as
|
||||
// \\ was the usual case before MW1.12. Providing arguments this way may be
|
||||
// \\ useful for extensions wishing to perform variable replacement
|
||||
// \\ explicitly.
|
||||
// \\ @param boolean $argsOnly Only do argument (triple-brace) expansion, not
|
||||
// \\ double-brace expansion.
|
||||
// \\ @return String
|
||||
// public function replaceVariables($text, $frame = false, $argsOnly = false) {
|
||||
// // Is there any text? Also, Prevent too big inclusions!
|
||||
// $textSize = strlen($text);
|
||||
// if ($textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize()) {
|
||||
// return $text;
|
||||
// }
|
||||
//
|
||||
// if ($frame == false) {
|
||||
// $frame = $this->getPreprocessor()->newFrame();
|
||||
// } elseif (!($frame instanceof PPFrame)) {
|
||||
// wfDebug(__METHOD__ . " called using plain parameters instead of "
|
||||
// . "a PPFrame instance. Creating custom frame.\n");
|
||||
// $frame = $this->getPreprocessor()->newCustomFrame($frame);
|
||||
// }
|
||||
//
|
||||
// $dom = $this->preprocessToDom($text);
|
||||
// $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
|
||||
// $text = $frame->expand($dom, $flags);
|
||||
//
|
||||
// return $text;
|
||||
// }
|
||||
//
|
||||
// \\ Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
|
||||
// public static function createAssocArgs($args) {
|
||||
// $assocArgs = [];
|
||||
// $index = 1;
|
||||
// foreach ($args as $arg) {
|
||||
// $eqpos = strpos($arg, '=');
|
||||
// if ($eqpos == false) {
|
||||
// $assocArgs[$index++] = $arg;
|
||||
// } else {
|
||||
// $name = trim(substr($arg, 0, $eqpos));
|
||||
// $value = trim(substr($arg, $eqpos + 1));
|
||||
// if ($value == false) {
|
||||
// $value = '';
|
||||
// }
|
||||
// if ($name != false) {
|
||||
// $assocArgs[$name] = $value;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return $assocArgs;
|
||||
// }
|
||||
|
||||
// \\ Return the text of a template, after recursively
|
||||
// \\ replacing any variables or templates within the template.
|
||||
// \\
|
||||
// \\ @param array $piece The parts of the template
|
||||
// \\ $piece['title']: the title, i.e. the part before the |
|
||||
// \\ $piece['parts']: the parameter array
|
||||
// \\ $piece['lineStart']: whether the brace was at the start of a line
|
||||
// \\ @param PPFrame $frame The current frame, contains template arguments
|
||||
// \\ @throws Exception
|
||||
// \\ @return String The text of the template
|
||||
// public void Brace_substitution(Xomw_prepro_node__template piece, Xomw_frame_itm frame) {
|
||||
// // Flags
|
||||
//
|
||||
// // $text has been filled
|
||||
// boolean found = false;
|
||||
// // wiki markup in $text should be escaped
|
||||
// boolean nowiki = false;
|
||||
// // $text is HTML, armour it against wikitext transformation
|
||||
// boolean is_html = false;
|
||||
// // Force interwiki transclusion to be done in raw mode not rendered
|
||||
// boolean force_raw_interwiki = false;
|
||||
// // $text is a DOM node needing expansion in a child frame
|
||||
// boolean is_child_obj = false;
|
||||
// // $text is a DOM node needing expansion in the current frame
|
||||
// boolean is_local_obj = false;
|
||||
//
|
||||
// // Title Object, where $text came from
|
||||
// byte[] title = null;
|
||||
//
|
||||
// // $part1 is the bit before the first |, and must contain only title characters.
|
||||
// // Various prefixes will be stripped from it later.
|
||||
// byte[] title_with_spaces = frame.Expand(piece.Title());
|
||||
// byte[] part1 = Bry_.Trim(title_with_spaces);
|
||||
// byte[] title_text = null;
|
||||
//
|
||||
// // Original title text preserved for various purposes
|
||||
// byte[] originalTitle = part1;
|
||||
//
|
||||
// // $args is a list of argument nodes, starting from index 0, not including $part1
|
||||
// // @todo FIXME: If piece['parts'] is null then the call to getLength()
|
||||
// // below won't work b/c this $args isn't an Object
|
||||
// Xomw_prepro_node__part[] args = (null == piece.Parts()) ? null : piece.Parts();
|
||||
//
|
||||
// byte[] profile_section = null; // profile templates
|
||||
//
|
||||
// Tfds.Write(nowiki, is_html, force_raw_interwiki, is_child_obj, is_local_obj, title, title_text, profile_section);
|
||||
// // SUBST
|
||||
// if (!found) {
|
||||
// String subst_match = null; // $this->mSubstWords->matchStartAndRemove($part1);
|
||||
// boolean literal = false;
|
||||
//
|
||||
// // Possibilities for substMatch: "subst", "safesubst" or FALSE
|
||||
// // Decide whether to expand template or keep wikitext as-is.
|
||||
// if (parser.Output_type__wiki()) {
|
||||
// if (subst_match == null) {
|
||||
// literal = true; // literal when in PST with no prefix
|
||||
// }
|
||||
// else {
|
||||
// literal = false; // expand when in PST with subst: or safesubst:
|
||||
// }
|
||||
// }
|
||||
// else {
|
||||
// if (subst_match == "subst") {
|
||||
// literal = true; // literal when not in PST with plain subst:
|
||||
// }
|
||||
// else {
|
||||
// literal = false; // expand when not in PST with safesubst: or no prefix
|
||||
// }
|
||||
// }
|
||||
// if (literal) {
|
||||
//// $text = $frame->virtualBracketedImplode('{{', '|', '}}', title_with_spaces, $args);
|
||||
// is_local_obj = true;
|
||||
// found = true;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // Variables
|
||||
// if (!found && args.length == 0) {
|
||||
//// $id = $this->mVariables->matchStartToEnd($part1);
|
||||
//// if ($id != false) {
|
||||
//// $text = $this->getVariableValue($id, $frame);
|
||||
//// if (MagicWord::getCacheTTL($id) > -1) {
|
||||
//// $this->mOutput->updateCacheExpiry(MagicWord::getCacheTTL($id));
|
||||
//// }
|
||||
// found = true;
|
||||
//// }
|
||||
// }
|
||||
//
|
||||
// // MSG, MSGNW and RAW
|
||||
// if (!found) {
|
||||
// // Check for MSGNW:
|
||||
//// $mwMsgnw = MagicWord::get('msgnw');
|
||||
//// if ($mwMsgnw->matchStartAndRemove($part1)) {
|
||||
// nowiki = true;
|
||||
//// }
|
||||
//// else {
|
||||
// // Remove obsolete MSG:
|
||||
//// $mwMsg = MagicWord::get('msg');
|
||||
//// $mwMsg->matchStartAndRemove($part1);
|
||||
//// }
|
||||
//
|
||||
// // Check for RAW:
|
||||
//// $mwRaw = MagicWord::get('raw');
|
||||
//// if ($mwRaw->matchStartAndRemove($part1)) {
|
||||
//// force_raw_interwiki = true;
|
||||
//// }
|
||||
// }
|
||||
|
||||
// Parser functions
|
||||
// if (!found) {
|
||||
// $colonPos = strpos($part1, ':');
|
||||
// if ($colonPos != false) {
|
||||
// $func = substr($part1, 0, $colonPos);
|
||||
// $funcArgs = [ trim(substr($part1, $colonPos + 1)) ];
|
||||
// $argsLength = $args->getLength();
|
||||
// for ($i = 0; $i < $argsLength; $i++) {
|
||||
// $funcArgs[] = $args->item($i);
|
||||
// }
|
||||
// try {
|
||||
// $result = $this->callParserFunction($frame, $func, $funcArgs);
|
||||
// } catch (Exception $ex) {
|
||||
// throw $ex;
|
||||
// }
|
||||
|
||||
// The interface for parser functions allows for extracting
|
||||
// flags into the local scope. Extract any forwarded flags
|
||||
// here.
|
||||
// extract($result);
|
||||
// }
|
||||
// }
|
||||
|
||||
// Finish mangling title and then check for loops.
|
||||
// Set title to a Title Object and $title_text to the PDBK
|
||||
// if (!found) {
|
||||
// $ns = NS_TEMPLATE;
|
||||
// Split the title into page and subpage
|
||||
// $subpage = '';
|
||||
// $relative = $this->maybeDoSubpageLink($part1, $subpage);
|
||||
// if ($part1 != $relative) {
|
||||
// $part1 = $relative;
|
||||
// $ns = $this->mTitle->getNamespace();
|
||||
// }
|
||||
// title = Title::newFromText($part1, $ns);
|
||||
// if (title) {
|
||||
// $title_text = title->getPrefixedText();
|
||||
// // Check for language variants if the template is not found
|
||||
// if ($this->getConverterLanguage()->hasVariants() && title->getArticleID() == 0) {
|
||||
// $this->getConverterLanguage()->findVariantLink($part1, title, true);
|
||||
// }
|
||||
// // Do recursion depth check
|
||||
// $limit = $this->mOptions->getMaxTemplateDepth();
|
||||
// if ($frame->depth >= $limit) {
|
||||
// found = true;
|
||||
// $text = '<span class="error">'
|
||||
// . wfMessage('parser-template-recursion-depth-warning')
|
||||
// ->numParams($limit)->inContentLanguage()->text()
|
||||
// . '</span>';
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// Load from database
|
||||
// if (!found && title) {
|
||||
// $profile_section = $this->mProfiler->scopedProfileIn(title->getPrefixedDBkey());
|
||||
// if (!title->isExternal()) {
|
||||
// if (title->isSpecialPage()
|
||||
// && $this->mOptions->getAllowSpecialInclusion()
|
||||
// && $this->ot['html']
|
||||
// ) {
|
||||
// $specialPage = SpecialPageFactory::getPage(title->getDBkey());
|
||||
// // Pass the template arguments as URL parameters.
|
||||
// // "uselang" will have no effect since the Language Object
|
||||
// // is forced to the one defined in ParserOptions.
|
||||
// $pageArgs = [];
|
||||
// $argsLength = $args->getLength();
|
||||
// for ($i = 0; $i < $argsLength; $i++) {
|
||||
// $bits = $args->item($i)->splitArg();
|
||||
// if (strval($bits['index']) == '') {
|
||||
// $name = trim($frame->expand($bits['name'], PPFrame::STRIP_COMMENTS));
|
||||
// $value = trim($frame->expand($bits['value']));
|
||||
// $pageArgs[$name] = $value;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // Create a new context to execute the special page
|
||||
// $context = new RequestContext;
|
||||
// $context->setTitle(title);
|
||||
// $context->setRequest(new FauxRequest($pageArgs));
|
||||
// if ($specialPage && $specialPage->maxIncludeCacheTime() == 0) {
|
||||
// $context->setUser($this->getUser());
|
||||
// } else {
|
||||
// // If this page is cached, then we better not be per user.
|
||||
// $context->setUser(User::newFromName('127.0.0.1', false));
|
||||
// }
|
||||
// $context->setLanguage($this->mOptions->getUserLangObj());
|
||||
// $ret = SpecialPageFactory::capturePath(
|
||||
// title, $context, $this->getLinkRenderer());
|
||||
// if ($ret) {
|
||||
// $text = $context->getOutput()->getHTML();
|
||||
// $this->mOutput->addOutputPageMetadata($context->getOutput());
|
||||
// found = true;
|
||||
// is_html = true;
|
||||
// if ($specialPage && $specialPage->maxIncludeCacheTime() != false) {
|
||||
// $this->mOutput->updateRuntimeAdaptiveExpiry(
|
||||
// $specialPage->maxIncludeCacheTime()
|
||||
// );
|
||||
// }
|
||||
// }
|
||||
// } elseif (MWNamespace::isNonincludable(title->getNamespace())) {
|
||||
// found = false; // access denied
|
||||
// wfDebug(__METHOD__ . ": template inclusion denied for " .
|
||||
// title->getPrefixedDBkey() . "\n");
|
||||
// } else {
|
||||
// list($text, title) = $this->getTemplateDom(title);
|
||||
// if ($text != false) {
|
||||
// found = true;
|
||||
// is_child_obj = true;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // If the title is valid but undisplayable, make a link to it
|
||||
// if (!found && ($this->ot['html'] || $this->ot['pre'])) {
|
||||
// $text = "[[:$title_text]]";
|
||||
// found = true;
|
||||
// }
|
||||
// } elseif (title->isTrans()) {
|
||||
// // Interwiki transclusion
|
||||
// if ($this->ot['html'] && !force_raw_interwiki) {
|
||||
// $text = $this->interwikiTransclude(title, 'render');
|
||||
// is_html = true;
|
||||
// } else {
|
||||
// $text = $this->interwikiTransclude(title, 'raw');
|
||||
// // Preprocess it like a template
|
||||
// $text = $this->preprocessToDom($text, self::PTD_FOR_INCLUSION);
|
||||
// is_child_obj = true;
|
||||
// }
|
||||
// found = true;
|
||||
// }
|
||||
//
|
||||
// // Do infinite loop check
|
||||
// // This has to be done after redirect resolution to avoid infinite loops via redirects
|
||||
// if (!$frame->loopCheck(title)) {
|
||||
// found = true;
|
||||
// $text = '<span class="error">'
|
||||
// . wfMessage('parser-template-loop-warning', $title_text)->inContentLanguage()->text()
|
||||
// . '</span>';
|
||||
// wfDebug(__METHOD__ . ": template loop broken at '$title_text'\n");
|
||||
// }
|
||||
// }
|
||||
|
||||
// If we haven't found text to substitute by now, we're done
|
||||
// Recover the source wikitext and return it
|
||||
// if (!found) {
|
||||
// $text = $frame->virtualBracketedImplode('{{', '|', '}}', title_with_spaces, $args);
|
||||
// if ($profile_section) {
|
||||
// $this->mProfiler->scopedProfileOut($profile_section);
|
||||
// }
|
||||
// return [ 'Object' => $text ];
|
||||
// }
|
||||
|
||||
// Expand DOM-style return values in a child frame
|
||||
// if (is_child_obj) {
|
||||
// // Clean up argument array
|
||||
// $newFrame = $frame->newChild($args, title);
|
||||
//
|
||||
// if (nowiki) {
|
||||
// $text = $newFrame->expand($text, PPFrame::RECOVER_ORIG);
|
||||
// } elseif ($title_text != false && $newFrame->isEmpty()) {
|
||||
// // Expansion is eligible for the empty-frame cache
|
||||
// $text = $newFrame->cachedExpand($title_text, $text);
|
||||
// } else {
|
||||
// // Uncached expansion
|
||||
// $text = $newFrame->expand($text);
|
||||
// }
|
||||
// }
|
||||
// if (is_local_obj && nowiki) {
|
||||
// $text = $frame->expand($text, PPFrame::RECOVER_ORIG);
|
||||
// is_local_obj = false;
|
||||
// }
|
||||
|
||||
// if ($profile_section) {
|
||||
// $this->mProfiler->scopedProfileOut($profile_section);
|
||||
// }
|
||||
|
||||
// Replace raw HTML by a placeholder
|
||||
// if (is_html) {
|
||||
// $text = $this->insertStripItem($text);
|
||||
// } elseif (nowiki && ($this->ot['html'] || $this->ot['pre'])) {
|
||||
// // Escape nowiki-style return values
|
||||
// $text = wfEscapeWikiText($text);
|
||||
// } elseif (is_string($text)
|
||||
// && !$piece['lineStart']
|
||||
// && preg_match('/^(?:{\\||:|;|#|\*)/', $text)
|
||||
// ) {
|
||||
// // T2529: if the template begins with a table or block-level
|
||||
// // element, it should be treated as beginning a new line.
|
||||
// // This behavior is somewhat controversial.
|
||||
// $text = "\n" . $text;
|
||||
// }
|
||||
|
||||
// if (is_string($text) && !$this->incrementIncludeSize('post-expand', strlen($text))) {
|
||||
// // Error, oversize inclusion
|
||||
// if ($title_text != false) {
|
||||
// // Make a working, properly escaped link if possible (T25588)
|
||||
// $text = "[[:$title_text]]";
|
||||
// } else {
|
||||
// // This will probably not be a working link, but at least it may
|
||||
// // provide some hint of where the problem is
|
||||
// preg_replace('/^:/', '', $originalTitle);
|
||||
// $text = "[[:$originalTitle]]";
|
||||
// }
|
||||
// $text .= $this->insertStripItem('<!-- WARNING: template omitted, '
|
||||
// . 'post-expand include size too large -->');
|
||||
// $this->limitationWarn('post-expand-template-inclusion');
|
||||
// }
|
||||
//
|
||||
// if (is_local_obj) {
|
||||
// $ret = [ 'Object' => $text ];
|
||||
// } else {
|
||||
// $ret = [ 'text' => $text ];
|
||||
// }
|
||||
|
||||
// return $ret;
|
||||
// }
|
||||
|
||||
// \\ Triple brace replacement -- used for template arguments
|
||||
// public function argSubstitution($piece, $frame) {
|
||||
//
|
||||
// $error = false;
|
||||
// $parts = $piece['parts'];
|
||||
// $nameWithSpaces = $frame->expand($piece['title']);
|
||||
// $argName = trim($nameWithSpaces);
|
||||
// $Object = false;
|
||||
// $text = $frame->getArgument($argName);
|
||||
// if ($text == false && $parts->getLength() > 0
|
||||
// && ($this->ot['html']
|
||||
// || $this->ot['pre']
|
||||
// || ($this->ot['wiki'] && $frame->isTemplate())
|
||||
// )
|
||||
// ) {
|
||||
// // No match in frame, use the supplied default
|
||||
// $Object = $parts->item(0)->getChildren();
|
||||
// }
|
||||
// if (!$this->incrementIncludeSize('arg', strlen($text))) {
|
||||
// $error = '<!-- WARNING: argument omitted, expansion size too large -->';
|
||||
// $this->limitationWarn('post-expand-template-argument');
|
||||
// }
|
||||
//
|
||||
// if ($text == false && $Object == false) {
|
||||
// // No match anywhere
|
||||
// $Object = $frame->virtualBracketedImplode('{{{', '|', '}}}', $nameWithSpaces, $parts);
|
||||
// }
|
||||
// if ($error != false) {
|
||||
// $text .= $error;
|
||||
// }
|
||||
// if ($Object != false) {
|
||||
// $ret = [ 'Object' => $Object ];
|
||||
// } else {
|
||||
// $ret = [ 'text' => $text ];
|
||||
// }
|
||||
//
|
||||
// return $ret;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// \\ Return the text to be used for a given extension tag.
|
||||
// \\ This is the ghost of strip().
|
||||
// \\
|
||||
// \\ @param array $params Associative array of parameters:
|
||||
// \\ name PPNode for the tag name
|
||||
// \\ attr PPNode for unparsed text where tag attributes are thought to be
|
||||
// \\ attributes Optional associative array of parsed attributes
|
||||
// \\ inner Contents of extension element
|
||||
// \\ noClose Original text did not have a close tag
|
||||
// \\ @param PPFrame $frame
|
||||
// \\
|
||||
// \\ @throws MWException
|
||||
// \\ @return String
|
||||
// \\/
|
||||
// public function extensionSubstitution($params, $frame) {
|
||||
// static $errorStr = '<span class="error">';
|
||||
// static $errorLen = 20;
|
||||
//
|
||||
// $name = $frame->expand($params['name']);
|
||||
// if (substr($name, 0, $errorLen) == $errorStr) {
|
||||
// // Probably expansion depth or node count exceeded. Just punt the
|
||||
// // error up.
|
||||
// return $name;
|
||||
// }
|
||||
//
|
||||
// $attrText = !isset($params['attr']) ? null : $frame->expand($params['attr']);
|
||||
// if (substr($attrText, 0, $errorLen) == $errorStr) {
|
||||
// // See above
|
||||
// return $attrText;
|
||||
// }
|
||||
//
|
||||
// // We can't safely check if the expansion for $content resulted in an
|
||||
// // error, because the content could happen to be the error String
|
||||
// // (T149622).
|
||||
// $content = !isset($params['inner']) ? null : $frame->expand($params['inner']);
|
||||
//
|
||||
// $marker = self::MARKER_PREFIX . "-$name-"
|
||||
// . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX;
|
||||
//
|
||||
// $isFunctionTag = isset($this->mFunctionTagHooks[strtolower($name)]) &&
|
||||
// ($this->ot['html'] || $this->ot['pre']);
|
||||
// if ($isFunctionTag) {
|
||||
// $markerType = 'none';
|
||||
// } else {
|
||||
// $markerType = 'general';
|
||||
// }
|
||||
// if ($this->ot['html'] || $isFunctionTag) {
|
||||
// $name = strtolower($name);
|
||||
// $attributes = Sanitizer::decodeTagAttributes($attrText);
|
||||
// if (isset($params['attributes'])) {
|
||||
// $attributes = $attributes + $params['attributes'];
|
||||
// }
|
||||
//
|
||||
// if (isset($this->mTagHooks[$name])) {
|
||||
// // Workaround for PHP bug 35229 and similar
|
||||
// if (!is_callable($this->mTagHooks[$name])) {
|
||||
// throw new MWException("Tag hook for $name is not callable\n");
|
||||
// }
|
||||
// $output = call_user_func_array($this->mTagHooks[$name],
|
||||
// [ $content, $attributes, $this, $frame ]);
|
||||
// } elseif (isset($this->mFunctionTagHooks[$name])) {
|
||||
// list($callback,) = $this->mFunctionTagHooks[$name];
|
||||
// if (!is_callable($callback)) {
|
||||
// throw new MWException("Tag hook for $name is not callable\n");
|
||||
// }
|
||||
//
|
||||
// $output = call_user_func_array($callback, [ &$this, $frame, $content, $attributes ]);
|
||||
// } else {
|
||||
// $output = '<span class="error">Invalid tag extension name: ' .
|
||||
// htmlspecialchars($name) . '</span>';
|
||||
// }
|
||||
//
|
||||
// if (is_array($output)) {
|
||||
// // Extract flags to local scope (to override $markerType)
|
||||
// $flags = $output;
|
||||
// $output = $flags[0];
|
||||
// unset($flags[0]);
|
||||
// extract($flags);
|
||||
// }
|
||||
// } else {
|
||||
// if (is_null($attrText)) {
|
||||
// $attrText = '';
|
||||
// }
|
||||
// if (isset($params['attributes'])) {
|
||||
// foreach ($params['attributes'] as $attrName => $attrValue) {
|
||||
// $attrText .= ' ' . htmlspecialchars($attrName) . '="' .
|
||||
// htmlspecialchars($attrValue) . '"';
|
||||
// }
|
||||
// }
|
||||
// if ($content == null) {
|
||||
// $output = "<$name$attrText/>";
|
||||
// } else {
|
||||
// $close = is_null($params['close']) ? '' : $frame->expand($params['close']);
|
||||
// if (substr($close, 0, $errorLen) == $errorStr) {
|
||||
// // See above
|
||||
// return $close;
|
||||
// }
|
||||
// $output = "<$name$attrText>$content$close";
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if ($markerType == 'none') {
|
||||
// return $output;
|
||||
// } elseif ($markerType == 'nowiki') {
|
||||
// $this->mStripState->addNoWiki($marker, $output);
|
||||
// } elseif ($markerType == 'general') {
|
||||
// $this->mStripState->addGeneral($marker, $output);
|
||||
// } else {
|
||||
// throw new MWException(__METHOD__ . ': invalid marker type');
|
||||
// }
|
||||
// return $marker;
|
||||
// }
|
||||
// }
|
@ -0,0 +1,98 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public interface Xomw_prepro_node {
|
||||
int Subs__len();
|
||||
Xomw_prepro_node Subs__get_at(int i);
|
||||
void Subs__add(Xomw_prepro_node sub);
|
||||
void To_xml(Bry_bfr bfr);
|
||||
}
|
||||
class Xomw_prepro_node__text extends Xomw_prepro_node__base {
|
||||
public Xomw_prepro_node__text(byte[] bry) {
|
||||
this.bry = bry;
|
||||
}
|
||||
public byte[] Bry() {return bry;} protected final byte[] bry;
|
||||
@Override public void To_xml(Bry_bfr bfr) {
|
||||
bfr.Add(bry);
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_node__comment extends Xomw_prepro_node__base {
|
||||
public Xomw_prepro_node__comment(byte[] bry) {
|
||||
this.bry = bry;
|
||||
}
|
||||
public byte[] Bry() {return bry;} protected final byte[] bry;
|
||||
@Override public void To_xml(Bry_bfr bfr) {
|
||||
bfr.Add_str_a7("<comment>");
|
||||
bfr.Add(bry);
|
||||
bfr.Add_str_a7("</comment>");
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_node__ext extends Xomw_prepro_node__base {
|
||||
public Xomw_prepro_node__ext(byte[] name, byte[] attr, byte[] inner, byte[] close) {
|
||||
this.name = name;
|
||||
this.attr = attr;
|
||||
this.inner = inner;
|
||||
this.close = close;
|
||||
}
|
||||
public byte[] Name() {return name;} private final byte[] name;
|
||||
public byte[] Attr() {return attr;} private final byte[] attr;
|
||||
public byte[] Inner() {return inner;} private final byte[] inner;
|
||||
public byte[] Close() {return close;} private final byte[] close;
|
||||
@Override public void To_xml(Bry_bfr bfr) {
|
||||
bfr.Add_str_a7("<ext>");
|
||||
bfr.Add_str_a7("<name>").Add(name).Add_str_a7("</name>");
|
||||
bfr.Add_str_a7("<atr>").Add(attr).Add_str_a7("</atr>");
|
||||
bfr.Add_str_a7("<inner>").Add(inner).Add_str_a7("</inner>");
|
||||
bfr.Add_str_a7("<close>").Add(close).Add_str_a7("</close>");
|
||||
bfr.Add_str_a7("</ext>");
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_node__heading extends Xomw_prepro_node__base {
|
||||
public Xomw_prepro_node__heading(int heading_index, int title_index, byte[] text) {
|
||||
this.heading_index = heading_index;
|
||||
this.title_index = title_index;
|
||||
this.text = text;
|
||||
}
|
||||
public int Heading_index() {return heading_index;} private final int heading_index;
|
||||
public int Title_index() {return title_index;} private final int title_index;
|
||||
public byte[] Text() {return text;} private final byte[] text;
|
||||
@Override public void To_xml(Bry_bfr bfr) {
|
||||
bfr.Add_str_a7("<h ");
|
||||
bfr.Add_str_a7(" level=\"").Add_int_variable(heading_index);
|
||||
bfr.Add_str_a7("\" i=\"").Add_int_variable(title_index);
|
||||
bfr.Add_str_a7("\">");
|
||||
bfr.Add(text);
|
||||
bfr.Add_str_a7("</h>");
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_node__tplarg extends Xomw_prepro_node__base {
|
||||
public Xomw_prepro_node__tplarg(byte[] title, Xomw_prepro_node__part[] parts) {
|
||||
this.title = title; this.parts = parts;
|
||||
}
|
||||
public byte[] Title() {return title;} private final byte[] title;
|
||||
public Xomw_prepro_node__part[] Parts() {return parts;} private final Xomw_prepro_node__part[] parts;
|
||||
@Override public void To_xml(Bry_bfr bfr) {
|
||||
bfr.Add_str_a7("<tplarg>");
|
||||
bfr.Add_str_a7("<title>").Add(title);
|
||||
bfr.Add_str_a7("</title>");
|
||||
for (Xomw_prepro_node__part part : parts)
|
||||
part.To_xml(bfr);
|
||||
|
||||
bfr.Add_str_a7("</tplarg>");
|
||||
}
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public abstract class Xomw_prepro_node__base implements Xomw_prepro_node {
|
||||
private List_adp subs;
|
||||
public int Subs__len() {return subs == null ? 0 : subs.Len();}
|
||||
public Xomw_prepro_node Subs__get_at(int i) {return subs == null ? null : (Xomw_prepro_node)subs.Get_at(i);}
|
||||
public void Subs__add(Xomw_prepro_node sub) {
|
||||
if (subs == null) subs = List_adp_.New();
|
||||
subs.Add(sub);
|
||||
}
|
||||
public abstract void To_xml(Bry_bfr bfr);
|
||||
}
|
@ -0,0 +1,45 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_prepro_node__part extends Xomw_prepro_node__base {
|
||||
public Xomw_prepro_node__part(int idx, byte[] key, byte[] val) {
|
||||
this.idx = idx;
|
||||
this.key = key;
|
||||
this.val = val;
|
||||
}
|
||||
public int Idx() {return idx;} private final int idx;
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public byte[] Val() {return val;} private final byte[] val;
|
||||
@Override public void To_xml(Bry_bfr bfr) {
|
||||
bfr.Add_str_a7("<part>");
|
||||
bfr.Add_str_a7("<name");
|
||||
if (idx > 0) {
|
||||
bfr.Add_str_a7(" index=\"").Add_int_variable(idx).Add_str_a7("\" />");
|
||||
}
|
||||
else {
|
||||
bfr.Add_str_a7(">");
|
||||
bfr.Add(key);
|
||||
bfr.Add_str_a7("</name>");
|
||||
bfr.Add_str_a7("=");
|
||||
}
|
||||
bfr.Add_str_a7("<value>");
|
||||
bfr.Add(val);
|
||||
bfr.Add_str_a7("</value>");
|
||||
bfr.Add_str_a7("</part>");
|
||||
}
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_prepro_node__template extends Xomw_prepro_node__base {
|
||||
public Xomw_prepro_node__template(byte[] title, Xomw_prepro_node__part[] parts, int line_start) {
|
||||
this.title = title; this.parts = parts; this.line_start = line_start;
|
||||
}
|
||||
public byte[] Title() {return title;} private final byte[] title;
|
||||
public Xomw_prepro_node__part[] Parts() {return parts;} private final Xomw_prepro_node__part[] parts;
|
||||
public int Line_start() {return line_start;} private final int line_start;
|
||||
@Override public void To_xml(Bry_bfr bfr) {
|
||||
bfr.Add_str_a7("<template");
|
||||
if (line_start > 0) bfr.Add_str_a7(" lineStart=\"").Add_int_variable(line_start).Add_byte_quote();
|
||||
bfr.Add_byte(Byte_ascii.Angle_end);
|
||||
bfr.Add_str_a7("<title>").Add(title);
|
||||
bfr.Add_str_a7("</title>");
|
||||
for (Xomw_prepro_node__part part : parts)
|
||||
part.To_xml(bfr);
|
||||
bfr.Add_str_a7("</template>");
|
||||
}
|
||||
}
|
@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
class Xomw_prepro_rule {
|
||||
public Xomw_prepro_rule(byte[] bgn, byte[] end, int min, int max, int[] names) {
|
||||
this.bgn = bgn;
|
@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
class Xomw_prepro_stack {
|
||||
public List_adp stack = List_adp_.New();
|
||||
public Xomw_prepro_piece top;
|
@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.langs.phps.utls.*;
|
||||
public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
|
@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_prepro_wkr__tst {
|
||||
private final Xomw_prepro_wkr__fxt fxt = new Xomw_prepro_wkr__fxt();
|
||||
@ -55,6 +55,9 @@ public class Xomw_prepro_wkr__tst {
|
||||
@Test public void Tplarg() {
|
||||
fxt.Test__parse("a{{{b}}}c", "<root>a<tplarg><title>b</title></tplarg>c</root>");
|
||||
}
|
||||
@Test public void Tplarg__dflt() {
|
||||
fxt.Test__parse("a{{{b|c}}}d", "<root>a<tplarg><title>b</title><part><name index=\"1\" /><value>c</value></part></tplarg>d</root>");
|
||||
}
|
||||
@Test public void Comment() {
|
||||
fxt.Test__parse("a<!--b-->c", "<root>a<comment><!--b--></comment>c</root>");
|
||||
}
|
@ -15,26 +15,53 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.quotes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
package gplx.xowa.mws.parsers.quotes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.langs.phps.utls.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
import gplx.core.primitives.*;
|
||||
public class Xomw_quote_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private final Bry_bfr bfr = Bry_bfr_.New();
|
||||
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
public class Xomw_quote_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private Bry_bfr tmp;
|
||||
private final Int_list apos_pos_ary = new Int_list(32);
|
||||
public byte[] Do_all_quotes(byte[] src) {
|
||||
Bry_split_.Split(src, 0, src.length, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode( "\n", $text );
|
||||
public Xomw_quote_wkr(Xomw_parser mgr) {
|
||||
this.tmp = mgr.Tmp();
|
||||
}
|
||||
public void Do_all_quotes(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
int cur = src_bgn;
|
||||
int line_bgn = cur;
|
||||
while (true) {
|
||||
int line_end = Bry_find_.Find_fwd(src, Byte_ascii.Nl, line_bgn, src_end);
|
||||
if (line_end == Bry_find_.Not_found) {
|
||||
line_end = src_end;
|
||||
}
|
||||
Do_quotes(bfr, Bool_.Y, src, line_bgn, line_end);
|
||||
if (line_end == src_end)
|
||||
break;
|
||||
else
|
||||
line_bgn = line_end + 1; // 1=\n.length
|
||||
}
|
||||
// Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode( "\n", $text );
|
||||
if (bfr.Match_end_byt(Byte_ascii.Nl))
|
||||
bfr.Del_by_1(); // REF.MW: $outtext = substr( $outtext, 0, -1 );
|
||||
apos_pos_ary.Clear();
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
private static final byte[] Wtxt__apos = Bry_.new_a7("''");
|
||||
public int Split(byte[] src, int itm_bgn, int itm_end) {
|
||||
byte[][] arr = Php_preg_.Split(apos_pos_ary, src, itm_bgn, itm_end, Wtxt__apos, Bool_.Y); // PORTED.REGX: arr = preg_split("/(''+)/", text, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||
public byte[] Do_quotes(Bry_bfr tmp, byte[] src) {
|
||||
boolean found = Do_quotes(tmp, Bool_.N, src, 0, src.length);
|
||||
return found ? tmp.To_bry_and_clear() : src;
|
||||
}
|
||||
private boolean Do_quotes(Bry_bfr bfr, boolean all_quotes_mode, byte[] src, int line_bgn, int line_end) {
|
||||
byte[][] arr = Php_preg_.Split(apos_pos_ary, src, line_bgn, line_end, Wtxt__apos, Bool_.Y); // PORTED.REGX: arr = preg_split("/(''+)/", text, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||
if (arr == null) {
|
||||
bfr.Add_mid(src, itm_bgn, itm_end).Add_byte_nl();
|
||||
return Bry_split_.Rv__ok;
|
||||
if (all_quotes_mode) {
|
||||
bfr.Add_mid(src, line_bgn, line_end).Add_byte_nl();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
int arr_len = arr.length;
|
||||
|
||||
@ -226,7 +253,7 @@ public class Xomw_quote_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
bfr.Add_str_a7("<b><i>").Add_bfr_and_clear(tmp).Add_str_a7("</i></b>");
|
||||
}
|
||||
bfr.Add_byte_nl();
|
||||
return Bry_split_.Rv__ok;
|
||||
return true;
|
||||
}
|
||||
private static final int
|
||||
State__empty = 0
|
||||
@ -236,4 +263,5 @@ public class Xomw_quote_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
, State__ib = 4
|
||||
, State__both = 5
|
||||
;
|
||||
private static final byte[] Wtxt__apos = Bry_.new_a7("''");
|
||||
}
|
@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.quotes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
package gplx.xowa.mws.parsers.quotes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_quote_wkr__tst {
|
||||
private final Xomw_quote_wkr__fxt fxt = new Xomw_quote_wkr__fxt();
|
||||
@ -32,12 +32,14 @@ public class Xomw_quote_wkr__tst {
|
||||
@Test public void Dangling__b() {fxt.Test__parse("a'''b" , "a<b>b</b>");} // COVERS: "if (state == State__b || state == State__ib)"
|
||||
@Test public void Dangling__i() {fxt.Test__parse("a''b" , "a<i>b</i>");} // COVERS: "if (state == State__i || state == State__bi || state == State__ib)"
|
||||
@Test public void Dangling__lone(){fxt.Test__parse("a'''''b" , "a<b><i>b</i></b>");} // COVERS: "There might be lonely ''''', so make sure we have a buffer"
|
||||
@Test public void Nl__text() {fxt.Test__parse("a\nb''c''d\n\ne" , "a\nb<i>c</i>d\n\ne");}
|
||||
}
|
||||
class Xomw_quote_wkr__fxt {
|
||||
private final Xomw_quote_wkr wkr = new Xomw_quote_wkr();
|
||||
private final Xomw_quote_wkr wkr = new Xomw_quote_wkr(new Xomw_parser());
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
byte[] actl = wkr.Do_all_quotes(src_bry);
|
||||
Tfds.Eq_str_lines(expd, String_.new_u8(actl), src_str);
|
||||
wkr.Do_all_quotes(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
@ -15,12 +15,14 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
package gplx.xowa.mws.parsers.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.langs.phps.utls.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
import gplx.xowa.parsers.mws.utils.*; import gplx.xowa.parsers.uniqs.*;
|
||||
import gplx.xowa.mws.utls.*; import gplx.xowa.parsers.uniqs.*;
|
||||
public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private final Bry_bfr bfr = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New();
|
||||
private final Bry_bfr tmp;
|
||||
private Bry_bfr bfr;
|
||||
private final Xomw_sanitizer sanitizer; private final Xomw_strip_state strip_state;
|
||||
private final List_adp
|
||||
td_history = List_adp_.New() // Is currently a td tag open?
|
||||
, last_tag_history = List_adp_.New() // Save history of last lag activated (td, th or caption)
|
||||
@ -30,14 +32,22 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
;
|
||||
private int indent_level = 0; // indent level of the table
|
||||
private byte[] first_2 = new byte[2];
|
||||
private Xomw_sanitizer_mgr sanitizer;
|
||||
private Xop_uniq_mgr uniq_mgr;
|
||||
public byte[] Do_table_stuff(Xomw_parser_ctx ctx, byte[] src) {
|
||||
this.sanitizer = ctx.Sanitizer();
|
||||
this.uniq_mgr = ctx.Uniq_mgr();
|
||||
public Xomw_table_wkr(Xomw_parser parser) {
|
||||
this.tmp = parser.Tmp();
|
||||
this.sanitizer = parser.Sanitizer();
|
||||
this.strip_state = parser.Strip_state();
|
||||
}
|
||||
public void Do_table_stuff(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
this.bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
indent_level = 0;
|
||||
|
||||
Bry_split_.Split(src, 0, src.length, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode("\n", $text);
|
||||
Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode("\n", $text);
|
||||
|
||||
// Closing open td, tr && table
|
||||
while (td_history.Len() > 0) {
|
||||
@ -62,9 +72,8 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
if ( bfr.Len() == Len__tb__empty
|
||||
&& Bry_.Eq(bfr.Bfr(), 0, Len__tb__empty, Html__tb__empty)) {
|
||||
bfr.Clear();
|
||||
return Bry_.Empty;
|
||||
return;
|
||||
}
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
public int Split(byte[] src, int itm_bgn, int itm_end) {
|
||||
byte[] out_line = Bry_.Mid(src, itm_bgn, itm_end); // MW: "$outLine"
|
||||
@ -78,7 +87,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
|
||||
byte first_char = line[0];
|
||||
first_2[0] = line[0];
|
||||
if (line_len > 1) first_2[1] = line[1];
|
||||
first_2[1] = line_len == 1 ? Byte_ascii.Null : line[1];
|
||||
|
||||
// PORTED: preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches)
|
||||
byte[] tblw_atrs = null;
|
||||
@ -94,15 +103,15 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
// First check if we are starting a new table
|
||||
indent_level = colons_end;
|
||||
|
||||
tblw_atrs = uniq_mgr.Convert(tblw_atrs);
|
||||
tblw_atrs = strip_state.Unstrip_both(tblw_atrs);
|
||||
|
||||
// PORTED: out_line = str_repeat('<dl><dd>', $indent_level) . "<table{atrs}>";
|
||||
for (int j = 0; j < indent_level; j++)
|
||||
tmp_bfr.Add(Html__dl__bgn);
|
||||
tmp_bfr.Add_str_a7("<table");
|
||||
sanitizer.Fix_tag_attributes(tmp_bfr, Name__table, tblw_atrs);
|
||||
tmp_bfr.Add_byte(Byte_ascii.Angle_end);
|
||||
out_line = tmp_bfr.To_bry_and_clear();
|
||||
tmp.Add(Html__dl__bgn);
|
||||
tmp.Add_str_a7("<table");
|
||||
sanitizer.Fix_tag_attributes(tmp, Name__table, tblw_atrs);
|
||||
tmp.Add_byte(Byte_ascii.Angle_end);
|
||||
out_line = tmp.To_bry_and_clear();
|
||||
td_history.Add(false);
|
||||
last_tag_history.Add(Bry_.Empty);
|
||||
tr_history.Add(false);
|
||||
@ -116,35 +125,35 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
}
|
||||
else if (Bry_.Eq(first_2, Wtxt__tb__end)) {
|
||||
// We are ending a table
|
||||
line = tmp_bfr.Add_str_a7("</table>").Add_mid(line, 2, line.length).To_bry_and_clear();
|
||||
line = tmp.Add_str_a7("</table>").Add_mid(line, 2, line.length).To_bry_and_clear();
|
||||
byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history);
|
||||
|
||||
if (!Php_ary_.Pop_bool_or_n(has_opened_tr)) {
|
||||
line = tmp_bfr.Add_str_a7("<tr><td></td></tr>").Add(line).To_bry_and_clear();
|
||||
line = tmp.Add_str_a7("<tr><td></td></tr>").Add(line).To_bry_and_clear();
|
||||
}
|
||||
|
||||
if (Php_ary_.Pop_bool_or_n(tr_history)) {
|
||||
line = tmp_bfr.Add_str_a7("</tr>").Add(line).To_bry_and_clear();
|
||||
line = tmp.Add_str_a7("</tr>").Add(line).To_bry_and_clear();
|
||||
}
|
||||
|
||||
if (Php_ary_.Pop_bool_or_n(td_history)) {
|
||||
line = tmp_bfr.Add_str_a7("</").Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(line).To_bry_and_clear();
|
||||
line = tmp.Add_str_a7("</").Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(line).To_bry_and_clear();
|
||||
}
|
||||
Php_ary_.Pop_bry_or_null(tr_attributes);
|
||||
// PORTED:$outLine = $line . str_repeat( '</dd></dl>', $indent_level );
|
||||
tmp_bfr.Add(line);
|
||||
tmp.Add(line);
|
||||
for (int j = 0; j < indent_level; j++)
|
||||
tmp_bfr.Add(Html__dl__end);
|
||||
out_line = tmp_bfr.To_bry_and_clear();
|
||||
tmp.Add(Html__dl__end);
|
||||
out_line = tmp.To_bry_and_clear();
|
||||
}
|
||||
else if (Bry_.Eq(first_2, Wtxt__tr)) {
|
||||
// Now we have a table row
|
||||
line = Bry_.Mid(line, 2); // PORTED: $line = preg_replace('#^\|-+#', '', $line);
|
||||
|
||||
// Whats after the tag is now only attributes
|
||||
byte[] atrs = uniq_mgr.Unstrip_both(line);
|
||||
sanitizer.Fix_tag_attributes(tmp_bfr, Name__tr, atrs);
|
||||
atrs = tmp_bfr.To_bry_and_clear();
|
||||
byte[] atrs = strip_state.Unstrip_both(line);
|
||||
sanitizer.Fix_tag_attributes(tmp, Name__tr, atrs);
|
||||
atrs = tmp.To_bry_and_clear();
|
||||
|
||||
Php_ary_.Pop_bry_or_null(tr_attributes);
|
||||
tr_attributes.Add(atrs);
|
||||
@ -159,7 +168,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
}
|
||||
|
||||
if (Php_ary_.Pop_bool_or_n(td_history)) {
|
||||
line = tmp_bfr.Add_str_a7("</").Add(last_tag).Add_byte(Byte_ascii.Gt).Add(line).To_bry_and_clear();
|
||||
line = tmp.Add_str_a7("</").Add(last_tag).Add_byte(Byte_ascii.Gt).Add(line).To_bry_and_clear();
|
||||
}
|
||||
|
||||
out_line = line;
|
||||
@ -181,13 +190,14 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
|
||||
// Implies both are valid for table headings.
|
||||
if (first_char == Byte_ascii.Bang) {
|
||||
Xomw_string_utils.Replace_markup(line, 0, line.length, Wtxt__th2, Wtxt__td2); // $line = StringUtils::replaceMarkup('!!', '||', $line);
|
||||
Xomw_string_utl.Replace_markup(line, 0, line.length, Wtxt__th2, Wtxt__td2); // $line = StringUtils::replaceMarkup('!!', '||', $line);
|
||||
}
|
||||
|
||||
// Split up multiple cells on the same line.
|
||||
// FIXME : This can result in improper nesting of tags processed
|
||||
// by earlier parser steps.
|
||||
byte[][] cells = Bry_split_.Split(line, Wtxt__td2);
|
||||
if (cells.length == 0) cells = Cells__empty; // handle "\n|\n" which should still generate "<tr><td></td></tr>", not ""; see TEST
|
||||
|
||||
out_line = Bry_.Empty;
|
||||
|
||||
@ -200,7 +210,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
if (first_char != Byte_ascii.Plus) {
|
||||
byte[] tr_after = Php_ary_.Pop_bry_or_null(tr_attributes);
|
||||
if (!Php_ary_.Pop_bool_or_n(tr_history)) {
|
||||
previous = tmp_bfr.Add_str_a7("<tr").Add(tr_after).Add_str_a7(">\n").To_bry_and_clear();
|
||||
previous = tmp.Add_str_a7("<tr").Add(tr_after).Add_str_a7(">\n").To_bry_and_clear();
|
||||
}
|
||||
tr_history.Add(true);
|
||||
tr_attributes.Add(Bry_.Empty);
|
||||
@ -211,7 +221,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history);
|
||||
|
||||
if (Php_ary_.Pop_bool_or_n(td_history)) {
|
||||
previous = tmp_bfr.Add_str_a7("</").Add(last_tag).Add_str_a7(">\n").Add(previous).To_bry_and_clear();
|
||||
previous = tmp.Add_str_a7("</").Add(last_tag).Add_str_a7(">\n").Add(previous).To_bry_and_clear();
|
||||
}
|
||||
|
||||
if (first_char == Byte_ascii.Pipe) {
|
||||
@ -237,17 +247,17 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
byte[] cell_data_0 = cell_data[0];
|
||||
byte[] cell_data_1 = cell_data[1];
|
||||
if (Bry_find_.Find_fwd(cell_data_0, Wtxt__lnki__bgn) != Bry_find_.Not_found) {
|
||||
cell = tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell).To_bry_and_clear();
|
||||
cell = tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell).To_bry_and_clear();
|
||||
}
|
||||
else if (cell_data_1 == null) {
|
||||
cell = tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell_data_0).To_bry_and_clear();
|
||||
cell = tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell_data_0).To_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
byte[] atrs = uniq_mgr.Unstrip_both(cell_data_0);
|
||||
tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag);
|
||||
sanitizer.Fix_tag_attributes(tmp_bfr, last_tag, atrs);
|
||||
tmp_bfr.Add_byte(Byte_ascii.Angle_end).Add(cell_data_1);
|
||||
cell = tmp_bfr.To_bry_and_clear();
|
||||
byte[] atrs = strip_state.Unstrip_both(cell_data_0);
|
||||
tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag);
|
||||
sanitizer.Fix_tag_attributes(tmp, last_tag, atrs);
|
||||
tmp.Add_byte(Byte_ascii.Angle_end).Add(cell_data_1);
|
||||
cell = tmp.To_bry_and_clear();
|
||||
}
|
||||
|
||||
out_line = Bry_.Add(out_line, cell);
|
||||
@ -278,4 +288,5 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
, Html__tb__empty = Bry_.new_a7("<table>\n<tr><td></td></tr>\n</table>")
|
||||
;
|
||||
private static final int Len__tb__empty = Html__tb__empty.length;
|
||||
private static final byte[][] Cells__empty = new byte[][] {Bry_.Empty};
|
||||
}
|
@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
package gplx.xowa.mws.parsers.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_table_wkr__tst {
|
||||
private final Xomw_table_wkr__fxt fxt = new Xomw_table_wkr__fxt();
|
||||
@ -101,13 +101,29 @@ public class Xomw_table_wkr__tst {
|
||||
, "<tr><td></td></tr></table>"
|
||||
));
|
||||
}
|
||||
}
|
||||
class Xomw_table_wkr__fxt {
|
||||
private final Xomw_parser_ctx ctx = new Xomw_parser_ctx();
|
||||
private final Xomw_table_wkr wkr = new Xomw_table_wkr();
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
byte[] actl = wkr.Do_table_stuff(ctx, src_bry);
|
||||
Tfds.Eq_str_lines(expd, String_.new_u8(actl), src_str);
|
||||
@Test public void Td__empty() { // PURPOSE: handles (a) failure due to "first_2" array not handling "\n|\n"; (b) missing <tr><td></td></tr>
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|-"
|
||||
, "|"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, ""
|
||||
, "<tr>"
|
||||
, "<td>"
|
||||
, "</td></tr></table>"
|
||||
));
|
||||
}
|
||||
}
|
||||
class Xomw_table_wkr__fxt {
|
||||
private final Xomw_parser_bfr parser_bfr = new Xomw_parser_bfr();
|
||||
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
private final Xomw_table_wkr wkr = new Xomw_table_wkr(new Xomw_parser());
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
parser_bfr.Init(src_bry);
|
||||
wkr.Do_table_stuff(pctx, parser_bfr);
|
||||
Tfds.Eq_str_lines(expd, parser_bfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
@ -15,8 +15,8 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
public class Xomw_string_utils {
|
||||
package gplx.xowa.mws.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_string_utl {
|
||||
public static void Replace_markup(byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) { // REF:/includes/libs/StringUtils.php|replaceMarkup
|
||||
// PORTED: avoiding multiple regex calls / String creations
|
||||
// $placeholder = "\x00";
|
@ -15,10 +15,10 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_string_utils__tst {
|
||||
private final Xomw_string_utils__fxt fxt = new Xomw_string_utils__fxt();
|
||||
package gplx.xowa.mws.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xomw_string_utl__tst {
|
||||
private final Xomw_string_utl__fxt fxt = new Xomw_string_utl__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test__replace_markup("a!!b" , "!!", "||", "a||b");
|
||||
}
|
||||
@ -38,10 +38,10 @@ public class Xomw_string_utils__tst {
|
||||
fxt.Test__replace_markup("a!!b<!!>!!>!!c" , "!!", "||", "a||b<!!>||>||c"); // NOTE: should probably be "!!>!!>", but unmatched ">" are escaped to ">"
|
||||
}
|
||||
}
|
||||
class Xomw_string_utils__fxt {
|
||||
class Xomw_string_utl__fxt {
|
||||
public void Test__replace_markup(String src_str, String find, String repl, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
Xomw_string_utils.Replace_markup(src_bry, 0, src_bry.length, Bry_.new_a7(find), Bry_.new_a7(repl));
|
||||
Tfds.Eq_str(expd, src_bry);
|
||||
Xomw_string_utl.Replace_markup(src_bry, 0, src_bry.length, Bry_.new_a7(find), Bry_.new_a7(repl));
|
||||
Gftest.Eq__str(expd, src_bry);
|
||||
}
|
||||
}
|
120
400_xowa/src/gplx/xowa/mws/utls/Xomw_ttl_utl.java
Normal file
120
400_xowa/src/gplx/xowa/mws/utls/Xomw_ttl_utl.java
Normal file
@ -0,0 +1,120 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_ttl_utl {
|
||||
// REF.MW: DefaultSettings.php
|
||||
// Allowed title characters -- regex character class
|
||||
// Don't change this unless you know what you're doing
|
||||
//
|
||||
// Problematic punctuation:
|
||||
// - []{}|# Are needed for link syntax, never enable these
|
||||
// - <> Causes problems with HTML escaping, don't use
|
||||
// - % Enabled by default, minor problems with path to query rewrite rules, see below
|
||||
// - + Enabled by default, but doesn't work with path to query rewrite rules,
|
||||
// corrupted by apache
|
||||
// - ? Enabled by default, but doesn't work with path to PATH_INFO rewrites
|
||||
//
|
||||
// All three of these punctuation problems can be avoided by using an alias,
|
||||
// instead of a rewrite rule of either variety.
|
||||
//
|
||||
// The problem with % is that when using a path to query rewrite rule, URLs are
|
||||
// double-unescaped: once by Apache's path conversion code, and again by PHP. So
|
||||
// %253F, for example, becomes "?". Our code does not double-escape to compensate
|
||||
// for this, indeed double escaping would break if the double-escaped title was
|
||||
// passed in the query String rather than the path. This is a minor security issue
|
||||
// because articles can be created such that they are hard to view or edit.
|
||||
//
|
||||
// In some rare cases you may wish to remove + for compatibility with old links.
|
||||
//
|
||||
// Theoretically 0x80-0x9F of ISO 8859-1 should be disallowed, but
|
||||
// this breaks interlanguage links
|
||||
// $wgLegalTitleChars = " %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF+";
|
||||
//
|
||||
// REGEX:
|
||||
// without-backslash escaping --> \s%!"$&'()*,-./0-9:;=?@A-Z\^_`a-z~x80-xFF+
|
||||
// rearranged
|
||||
// letters --> 0-9A-Za-z
|
||||
// unicode-chars --> x80-xFF
|
||||
// symbols --> \s%!"$&'()*,-./:;=?@\^_`~+"
|
||||
// deliberately ignores
|
||||
// control chars: 00-31,127
|
||||
// []{}|#<>
|
||||
public static int Find_fwd_while_title(byte[] src, int src_bgn, int src_end, boolean[] valid) {
|
||||
int cur = src_bgn;
|
||||
while (true) {
|
||||
if (cur == src_end) break;
|
||||
byte b = src[cur];
|
||||
int b_len = gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||
if (b_len == 1) { // ASCII
|
||||
if (valid[b]) // valid; EX: "a0A B&$"
|
||||
cur++;
|
||||
else // invalid; EX: "<title>"
|
||||
break;
|
||||
}
|
||||
else { // Multi-byte UTF8; NOTE: all sequences are valid
|
||||
cur += b_len;
|
||||
}
|
||||
}
|
||||
return cur;
|
||||
}
|
||||
private static boolean[] title_chars_valid;
|
||||
public static boolean[] Title_chars_valid() {
|
||||
if (title_chars_valid == null) {
|
||||
title_chars_valid = new boolean[128];
|
||||
// add num and alpha
|
||||
for (int i = Byte_ascii.Num_0; i <= Byte_ascii.Num_9; i++)
|
||||
title_chars_valid[i] = true;
|
||||
for (int i = Byte_ascii.Ltr_A; i <= Byte_ascii.Ltr_Z; i++)
|
||||
title_chars_valid[i] = true;
|
||||
for (int i = Byte_ascii.Ltr_a; i <= Byte_ascii.Ltr_z; i++)
|
||||
title_chars_valid[i] = true;
|
||||
|
||||
// add symbols: \s%!"$&'()*,-./:;=?@\^_`~+"
|
||||
byte[] symbols = new byte[]
|
||||
{ Byte_ascii.Space
|
||||
, Byte_ascii.Percent
|
||||
, Byte_ascii.Bang
|
||||
, Byte_ascii.Quote
|
||||
, Byte_ascii.Amp
|
||||
, Byte_ascii.Apos
|
||||
, Byte_ascii.Paren_bgn
|
||||
, Byte_ascii.Paren_end
|
||||
, Byte_ascii.Star
|
||||
, Byte_ascii.Comma
|
||||
, Byte_ascii.Dash
|
||||
, Byte_ascii.Dot
|
||||
, Byte_ascii.Slash
|
||||
, Byte_ascii.Colon
|
||||
, Byte_ascii.Semic
|
||||
, Byte_ascii.Eq
|
||||
, Byte_ascii.Question
|
||||
, Byte_ascii.At
|
||||
, Byte_ascii.Backslash
|
||||
, Byte_ascii.Pow
|
||||
, Byte_ascii.Underline
|
||||
, Byte_ascii.Tick
|
||||
, Byte_ascii.Tilde
|
||||
, Byte_ascii.Plus
|
||||
};
|
||||
int symbols_len = symbols.length;
|
||||
for (int i = 0; i < symbols_len; i++)
|
||||
title_chars_valid[symbols[i]] = true;
|
||||
}
|
||||
return title_chars_valid;
|
||||
}
|
||||
}
|
30
400_xowa/src/gplx/xowa/mws/utls/Xomw_ttl_utl__tst.java
Normal file
30
400_xowa/src/gplx/xowa/mws/utls/Xomw_ttl_utl__tst.java
Normal file
@ -0,0 +1,30 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xomw_ttl_utl__tst {
|
||||
private final Xomw_ttl_utl__fxt fxt = new Xomw_ttl_utl__fxt();
|
||||
@Test public void Alphanum() {fxt.Test__find_fwd_while_title("0aB" , 3);}
|
||||
@Test public void Angle() {fxt.Test__find_fwd_while_title("0a<" , 2);}
|
||||
}
|
||||
class Xomw_ttl_utl__fxt {
|
||||
public void Test__find_fwd_while_title(String src_str, int expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
Gftest.Eq__int(expd, Xomw_ttl_utl.Find_fwd_while_title(src_bry, 0, src_bry.length, Xomw_ttl_utl.Title_chars_valid()));
|
||||
}
|
||||
}
|
@ -16,10 +16,10 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs.sections; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.hdrs.*;
|
||||
import gplx.xowa.parsers.mws.*; import gplx.xowa.parsers.mws.wkrs.*;
|
||||
import gplx.xowa.mws.parsers.*; import gplx.xowa.mws.parsers.headings.*;
|
||||
import gplx.xowa.addons.htmls.tocs.*; import gplx.xowa.htmls.core.htmls.tidy.*;
|
||||
class Xop_section_list implements Xomw_hdr_cbk {
|
||||
private final Xomw_hdr_wkr hdr_wkr = new Xomw_hdr_wkr();
|
||||
class Xop_section_list implements Xomw_heading_cbk {
|
||||
private final Xomw_heading_wkr hdr_wkr = new Xomw_heading_wkr();
|
||||
private final Ordered_hash hash = Ordered_hash_.New_bry();
|
||||
private final Xoh_toc_mgr toc_mgr = new Xoh_toc_mgr();
|
||||
private byte[] src;
|
||||
@ -92,7 +92,7 @@ class Xop_section_list implements Xomw_hdr_cbk {
|
||||
|
||||
return new int[] {src_bgn, src_end};
|
||||
}
|
||||
public void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_hdr_wkr wkr) {
|
||||
public void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr) {
|
||||
// get key by taking everything between ==; EX: "== abc ==" -> " abc "
|
||||
byte[] src = wkr.Src();
|
||||
int hdr_txt_bgn = wkr.Hdr_lhs_end();
|
||||
@ -117,5 +117,5 @@ class Xop_section_list implements Xomw_hdr_cbk {
|
||||
Xop_section_itm itm = new Xop_section_itm(hash.Count(), num, key, wkr.Hdr_bgn(), wkr.Hdr_end());
|
||||
hash.Add(key, itm);
|
||||
}
|
||||
public void On_src_done(Xomw_parser_ctx pctx, Xomw_hdr_wkr wkr) {}
|
||||
public void On_src_done(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr) {}
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.hdrs.sections; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.hdrs.*;
|
||||
import gplx.langs.htmls.*;
|
||||
import gplx.xowa.parsers.mws.*; import gplx.xowa.parsers.mws.wkrs.*; import gplx.xowa.parsers.hdrs.*; import gplx.xowa.htmls.core.htmls.tidy.*;
|
||||
import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*; import gplx.xowa.parsers.hdrs.*; import gplx.xowa.htmls.core.htmls.tidy.*;
|
||||
public class Xop_section_mgr implements Gfo_invk {
|
||||
private Xoae_app app; private Xowe_wiki wiki;
|
||||
private Xow_tidy_mgr_interface tidy_mgr;
|
||||
|
@ -1,261 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.blocks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
import gplx.langs.phps.utls.*;
|
||||
public class Xomw_block_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private final Bry_bfr bfr = Bry_bfr_.New();
|
||||
private byte[] last_prefix, last_section;
|
||||
private boolean line_start, dt_open, in_block_elem, para_stack, in_blockquote, in_pre = false;
|
||||
private int prefix_len;
|
||||
private int src_len;
|
||||
public byte[] Do_block_levels(byte[] src, boolean line_start) {
|
||||
this.src_len = src.length;
|
||||
this.line_start = line_start;
|
||||
// Parsing through the text line by line. The main thing
|
||||
// happening here is handling of block-level elements p, pre,
|
||||
// and making lists from lines starting with * # : etc.
|
||||
this.last_prefix = Bry_.Empty;
|
||||
bfr.Clear();
|
||||
this.dt_open = this.in_block_elem = false;
|
||||
this.prefix_len = 0;
|
||||
this.para_stack = false;
|
||||
this.in_blockquote = false;
|
||||
|
||||
// PORTED.SPLIT: $textLines = StringUtils::explode("\n", $text);
|
||||
Bry_split_.Split(src, 0, src_len, Byte_ascii.Nl, Bool_.N, this);
|
||||
|
||||
while (prefix_len > 0) {
|
||||
// bfr .= this.closeList(prefix2[prefix_len - 1]);
|
||||
prefix_len--;
|
||||
if (prefix_len > 0) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
}
|
||||
if (Bry_.Len_gt_0(last_section)) {
|
||||
bfr.Add_str_a7("</").Add(last_section).Add_str_a7(">");
|
||||
this.last_section = Bry_.Empty;
|
||||
}
|
||||
|
||||
if (dt_open || in_block_elem || para_stack || in_blockquote || in_pre) {
|
||||
}
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
public int Split(byte[] src, int itm_bgn, int itm_end) {
|
||||
// Fix up line_start
|
||||
if (!line_start) {
|
||||
bfr.Add_mid(src, itm_bgn, itm_end);
|
||||
line_start = true;
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
|
||||
// * = ul
|
||||
// # = ol
|
||||
// ; = dt
|
||||
// : = dd
|
||||
int last_prefix_len = last_prefix.length;
|
||||
boolean pre_close_match = false; //preg_match('/<\\/pre/i', $oLine);
|
||||
boolean pre_open_match = false; //preg_match('/<pre/i', $oLine);
|
||||
byte[] prefix = null, prefix2 = null, t = null;
|
||||
// If not in a <pre> element, scan for and figure out what prefixes are there.
|
||||
if (!in_pre) {
|
||||
// Multiple prefixes may abut each other for nested lists.
|
||||
prefix_len = 0;// strspn($oLine, '*#:;');
|
||||
prefix = Php_str_.Substr(src, itm_bgn, prefix_len);
|
||||
|
||||
// eh?
|
||||
// ; and : are both from definition-lists, so they're equivalent
|
||||
// for the purposes of determining whether or not we need to open/close
|
||||
// elements.
|
||||
prefix2 = Bry_.Replace(prefix, Byte_ascii.Semic, Byte_ascii.Colon);
|
||||
t = Bry_.Mid(src, itm_bgn + prefix_len, itm_end);
|
||||
// this.in_pre = (boolean)pre_open_match;
|
||||
}
|
||||
else {
|
||||
// Don't interpret any other prefixes in preformatted text
|
||||
prefix_len = 0;
|
||||
prefix = prefix2 = Bry_.Empty;
|
||||
t = Bry_.Mid(src, itm_bgn, itm_end);
|
||||
}
|
||||
|
||||
// List generation
|
||||
byte[] term = null, t2 = null;
|
||||
int common_prefix_len = -1;
|
||||
if (prefix_len > 0 && Bry_.Eq(last_prefix, prefix2)) {
|
||||
// Same as the last item, so no need to deal with nesting or opening stuff
|
||||
// bfr .= this.nextItem(substr(prefix, -1));
|
||||
para_stack = false;
|
||||
|
||||
if (prefix_len > 0 && prefix[prefix_len - 1] == Byte_ascii.Semic) {
|
||||
// The one nasty exception: definition lists work like this:
|
||||
// ; title : definition text
|
||||
// So we check for : in the remainder text to split up the
|
||||
// title and definition, without b0rking links.
|
||||
term = t2 = Bry_.Empty;
|
||||
// if (this.findColonNoLinks(t, term, t2) !== false) {
|
||||
t = t2;
|
||||
bfr.Add(term); // . this.nextItem(':');
|
||||
// }
|
||||
}
|
||||
}
|
||||
else if (prefix_len > 0 || last_prefix_len > 0) {
|
||||
// We need to open or close prefixes, or both.
|
||||
|
||||
// Either open or close a level...
|
||||
// common_prefix_len = this.getCommon(prefix, last_prefix);
|
||||
para_stack = false;
|
||||
|
||||
// Close all the prefixes which aren't shared.
|
||||
while (common_prefix_len < last_prefix_len) {
|
||||
// bfr .= this.closeList(last_prefix[last_prefix_len - 1]);
|
||||
last_prefix_len--;
|
||||
}
|
||||
//
|
||||
// Continue the current prefix if appropriate.
|
||||
if (prefix_len <= common_prefix_len && common_prefix_len > 0) {
|
||||
// bfr .= this.nextItem(prefix[common_prefix_len - 1]);
|
||||
}
|
||||
|
||||
// Open prefixes where appropriate.
|
||||
if (Bry_.Len_gt_0(last_prefix) && prefix_len > common_prefix_len) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
while (prefix_len > common_prefix_len) {
|
||||
// $char = substr(prefix, common_prefix_len, 1);
|
||||
// bfr .= this.openList($char);
|
||||
//
|
||||
// if (';' == $char) {
|
||||
// // @todo FIXME: This is dupe of code above
|
||||
// if (this.findColonNoLinks(t, term, t2) !== false) {
|
||||
// t = t2;
|
||||
// bfr .= term . this.nextItem(':');
|
||||
// }
|
||||
// }
|
||||
++common_prefix_len;
|
||||
}
|
||||
if (prefix_len == 0 && Bry_.Len_gt_0(last_prefix)) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
last_prefix = prefix2;
|
||||
}
|
||||
|
||||
// If we have no prefixes, go to paragraph mode.
|
||||
if (0 == prefix_len) {
|
||||
// No prefix (not in list)--go to paragraph mode
|
||||
// XXX: use a stack for nestable elements like span, table and div
|
||||
boolean open_match = false, close_match = false;
|
||||
// open_match = preg_match(
|
||||
// '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|'
|
||||
// . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS',
|
||||
// t
|
||||
// );
|
||||
// close_match = preg_match(
|
||||
// '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'
|
||||
// . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|'
|
||||
// . self::MARKER_PREFIX
|
||||
// . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS',
|
||||
// t
|
||||
// );
|
||||
|
||||
if (open_match || close_match) {
|
||||
para_stack = false;
|
||||
// @todo bug 5718: paragraph closed
|
||||
// bfr .= this.closeParagraph();
|
||||
if (pre_open_match && !pre_close_match) {
|
||||
this.in_pre = true;
|
||||
}
|
||||
// $bqOffset = 0;
|
||||
// while (preg_match('/<(\\/?)blockquote[\s>]/i', t,
|
||||
// $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset)
|
||||
// ) {
|
||||
// in_blockquote = !$bqMatch[1][0]; // is this a close tag?
|
||||
// $bqOffset = $bqMatch[0][1] + strlen($bqMatch[0][0]);
|
||||
// }
|
||||
in_block_elem = !close_match;
|
||||
}
|
||||
else if (!in_block_elem && !this.in_pre) {
|
||||
if ( Byte_ascii.Space == t[0]
|
||||
// && (last_section == 'pre' || trim(t) != '')
|
||||
&& !in_blockquote
|
||||
) {
|
||||
// pre
|
||||
// if (this.last_section !== 'pre') {
|
||||
para_stack = false;
|
||||
// bfr .= this.closeParagraph() . '<pre>';
|
||||
// this.last_section = 'pre';
|
||||
// }
|
||||
t = Bry_.Mid(t, 1);
|
||||
}
|
||||
else {
|
||||
// paragraph
|
||||
// if (trim(t) == '') {
|
||||
if (para_stack) {
|
||||
// bfr .= para_stack . '<br />';
|
||||
para_stack = false;
|
||||
// this.last_section = 'p';
|
||||
}
|
||||
else {
|
||||
// if (this.last_section !== 'p') {
|
||||
// bfr .= this.closeParagraph();
|
||||
// this.last_section = '';
|
||||
// para_stack = '<p>';
|
||||
// }
|
||||
// else {
|
||||
// para_stack = '</p><p>';
|
||||
// }
|
||||
}
|
||||
// }
|
||||
// else {
|
||||
if (para_stack) {
|
||||
// bfr .= para_stack;
|
||||
para_stack = false;
|
||||
// this.last_section = 'p';
|
||||
}
|
||||
// else if (this.last_section !== 'p') {
|
||||
// bfr .= this.closeParagraph() . '<p>';
|
||||
// this.last_section = 'p';
|
||||
// }
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
// somewhere above we forget to get out of pre block (bug 785)
|
||||
if (pre_close_match && this.in_pre) {
|
||||
this.in_pre = false;
|
||||
}
|
||||
if (para_stack == false) {
|
||||
bfr.Add(t);
|
||||
if (prefix_len == 0) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
}
|
||||
|
||||
if (last_prefix_len == -1 || common_prefix_len == -1) {
|
||||
}
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
// private static final int
|
||||
// Para_stack_none = 0 // false
|
||||
// , Para_stack_bgn = 1 // <p>
|
||||
// , Para_stack_mid = 2 // </p><p>
|
||||
// ;
|
||||
// private static final byte
|
||||
// Mode_none = 0 // ''
|
||||
// , Mode_para = 1 // p
|
||||
// , Mode_pre = 2 // pre
|
||||
// ;
|
||||
}
|
@ -1,41 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
public class Xomw_sanitizer_mgr {
|
||||
private final Mwh_doc_wkr__atr_bldr atr_bldr = new Mwh_doc_wkr__atr_bldr();
|
||||
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
||||
public void Fix_tag_attributes(Bry_bfr bfr, byte[] tag_name, byte[] atrs) {
|
||||
atr_bldr.Atrs__clear();
|
||||
atr_parser.Parse(atr_bldr, -1, -1, atrs, 0, atrs.length);
|
||||
int len = atr_bldr.Atrs__len();
|
||||
|
||||
// PORTED: Sanitizer.php|safeEncodeTagAttributes
|
||||
for (int i = 0; i < len; i++) {
|
||||
// $encAttribute = htmlspecialchars( $attribute );
|
||||
// $encValue = Sanitizer::safeEncodeAttribute( $value );
|
||||
// $attribs[] = "$encAttribute=\"$encValue\"";
|
||||
Mwh_atr_itm itm = atr_bldr.Atrs__get_at(i);
|
||||
bfr.Add_byte_space(); // "return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';"
|
||||
bfr.Add_bry_escape_html(itm.Key_bry(), itm.Key_bgn(), itm.Key_end());
|
||||
bfr.Add_byte_eq().Add_byte_quote();
|
||||
bfr.Add(itm.Val_as_bry()); // TODO.XO:Sanitizer::encode
|
||||
bfr.Add_byte_quote();
|
||||
}
|
||||
}
|
||||
}
|
@ -22,6 +22,7 @@ public class Xop_uniq_mgr { // REF.MW:/parser/StripState.php
|
||||
private final Bry_bfr key_bfr = Bry_bfr_.New_w_size(32);
|
||||
private int idx = -1;
|
||||
public void Clear() {idx = -1; general_trie.Clear();}
|
||||
public byte[] Get(byte[] key) {return (byte[])general_trie.Match_exact(key, 0, key.length);}
|
||||
public byte[] Add(byte[] val) { // "<b>" -> "\u007fUNIQ-item-1--QINU\u007f"
|
||||
byte[] key = key_bfr
|
||||
.Add(Bry__uniq__add__bgn)
|
||||
@ -30,10 +31,6 @@ public class Xop_uniq_mgr { // REF.MW:/parser/StripState.php
|
||||
general_trie.Add_bry_bry(key, val);
|
||||
return key;
|
||||
}
|
||||
public byte[] Get(byte[] key) {return (byte[])general_trie.Match_exact(key, 0, key.length);}
|
||||
public byte[] Unstrip_both(byte[] src) {
|
||||
return Convert(src);
|
||||
}
|
||||
public byte[] Convert(byte[] src) {
|
||||
if (general_trie.Count() == 0) return src;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user