mirror of https://github.com/gnosygnu/xowa
parent
5f1609a869
commit
f8fcb553d5
@ -0,0 +1,41 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||||
|
import gplx.xowa.parsers.htmls.*;
|
||||||
|
public class Xomw_sanitizer_mgr {
|
||||||
|
private final Mwh_doc_wkr__atr_bldr atr_bldr = new Mwh_doc_wkr__atr_bldr();
|
||||||
|
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
||||||
|
public void Fix_tag_attributes(Bry_bfr bfr, byte[] tag_name, byte[] atrs) {
|
||||||
|
atr_bldr.Atrs__clear();
|
||||||
|
atr_parser.Parse(atr_bldr, -1, -1, atrs, 0, atrs.length);
|
||||||
|
int len = atr_bldr.Atrs__len();
|
||||||
|
|
||||||
|
// PORTED: Sanitizer.php|safeEncodeTagAttributes
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
// $encAttribute = htmlspecialchars( $attribute );
|
||||||
|
// $encValue = Sanitizer::safeEncodeAttribute( $value );
|
||||||
|
// $attribs[] = "$encAttribute=\"$encValue\"";
|
||||||
|
Mwh_atr_itm itm = atr_bldr.Atrs__get_at(i);
|
||||||
|
bfr.Add_byte_space(); // "return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';"
|
||||||
|
bfr.Add_bry_escape_html(itm.Key_bry(), itm.Key_bgn(), itm.Key_end());
|
||||||
|
bfr.Add_byte_eq().Add_byte_quote();
|
||||||
|
bfr.Add(itm.Val_as_bry()); // TODO.XO:Sanitizer::encode
|
||||||
|
bfr.Add_byte_quote();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,62 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||||
|
public class Xomw_string_utils {
|
||||||
|
public static void Replace_markup(byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) { // REF:/includes/libs/StringUtils.php|replaceMarkup
|
||||||
|
// PORTED: avoiding multiple regex calls / String creations
|
||||||
|
// $placeholder = "\x00";
|
||||||
|
|
||||||
|
// Remove placeholder instances
|
||||||
|
// $text = str_replace( $placeholder, '', $text );
|
||||||
|
|
||||||
|
// Replace instances of the separator inside HTML-like tags with the placeholder
|
||||||
|
// $replacer = new DoubleReplacer( $search, $placeholder );
|
||||||
|
// $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text );
|
||||||
|
|
||||||
|
// Explode, then put the replaced separators back in
|
||||||
|
// $cleaned = str_replace( $search, $replace, $cleaned );
|
||||||
|
// $text = str_replace( $placeholder, $search, $cleaned );
|
||||||
|
|
||||||
|
// if same length find / repl, do in-place replacement; EX: "!!" -> "||"
|
||||||
|
int find_len = find.length;
|
||||||
|
int repl_len = repl.length;
|
||||||
|
if (find_len != repl_len) throw Err_.new_wo_type("find and repl should be same length");
|
||||||
|
|
||||||
|
byte find_0 = find[0];
|
||||||
|
byte dlm_bgn = Byte_ascii.Angle_bgn;
|
||||||
|
byte dlm_end = Byte_ascii.Angle_end;
|
||||||
|
boolean repl_active = true;
|
||||||
|
|
||||||
|
// loop every char in array
|
||||||
|
for (int i = src_bgn; i < src_end; i++) {
|
||||||
|
byte b = src[i];
|
||||||
|
if ( b == find_0
|
||||||
|
&& Bry_.Match(src, i + 1, i + find_len, find, 1, find_len)
|
||||||
|
&& repl_active
|
||||||
|
) {
|
||||||
|
Bry_.Set(src, i, i + find_len, repl);
|
||||||
|
}
|
||||||
|
else if (b == dlm_bgn) {
|
||||||
|
repl_active = false;
|
||||||
|
}
|
||||||
|
else if (b == dlm_end) {
|
||||||
|
repl_active = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,47 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||||
|
import org.junit.*;
|
||||||
|
public class Xomw_string_utils__tst {
|
||||||
|
private final Xomw_string_utils__fxt fxt = new Xomw_string_utils__fxt();
|
||||||
|
@Test public void Basic() {
|
||||||
|
fxt.Test__replace_markup("a!!b" , "!!", "||", "a||b");
|
||||||
|
}
|
||||||
|
@Test public void Missing() {
|
||||||
|
fxt.Test__replace_markup("abcd" , "!!", "||", "abcd");
|
||||||
|
}
|
||||||
|
@Test public void Eos() {
|
||||||
|
fxt.Test__replace_markup("a!!" , "!!", "||", "a||");
|
||||||
|
}
|
||||||
|
@Test public void Ignore() {
|
||||||
|
fxt.Test__replace_markup("a!!b<!!>!!c" , "!!", "||", "a||b<!!>||c");
|
||||||
|
}
|
||||||
|
@Test public void Ignore__asym__lhs() {
|
||||||
|
fxt.Test__replace_markup("a!!b<!!<!!>!!c" , "!!", "||", "a||b<!!<!!>||c");
|
||||||
|
}
|
||||||
|
@Test public void Ignore__asym__rhs() {
|
||||||
|
fxt.Test__replace_markup("a!!b<!!>!!>!!c" , "!!", "||", "a||b<!!>||>||c"); // NOTE: should probably be "!!>!!>", but unmatched ">" are escaped to ">"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Xomw_string_utils__fxt {
|
||||||
|
public void Test__replace_markup(String src_str, String find, String repl, String expd) {
|
||||||
|
byte[] src_bry = Bry_.new_u8(src_str);
|
||||||
|
Xomw_string_utils.Replace_markup(src_bry, 0, src_bry.length, Bry_.new_a7(find), Bry_.new_a7(repl));
|
||||||
|
Tfds.Eq_str(expd, src_bry);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in new issue