mirror of https://github.com/gnosygnu/xowa
parent
5f1609a869
commit
f8fcb553d5
@ -0,0 +1,41 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
public class Xomw_sanitizer_mgr {
|
||||
private final Mwh_doc_wkr__atr_bldr atr_bldr = new Mwh_doc_wkr__atr_bldr();
|
||||
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
||||
public void Fix_tag_attributes(Bry_bfr bfr, byte[] tag_name, byte[] atrs) {
|
||||
atr_bldr.Atrs__clear();
|
||||
atr_parser.Parse(atr_bldr, -1, -1, atrs, 0, atrs.length);
|
||||
int len = atr_bldr.Atrs__len();
|
||||
|
||||
// PORTED: Sanitizer.php|safeEncodeTagAttributes
|
||||
for (int i = 0; i < len; i++) {
|
||||
// $encAttribute = htmlspecialchars( $attribute );
|
||||
// $encValue = Sanitizer::safeEncodeAttribute( $value );
|
||||
// $attribs[] = "$encAttribute=\"$encValue\"";
|
||||
Mwh_atr_itm itm = atr_bldr.Atrs__get_at(i);
|
||||
bfr.Add_byte_space(); // "return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';"
|
||||
bfr.Add_bry_escape_html(itm.Key_bry(), itm.Key_bgn(), itm.Key_end());
|
||||
bfr.Add_byte_eq().Add_byte_quote();
|
||||
bfr.Add(itm.Val_as_bry()); // TODO.XO:Sanitizer::encode
|
||||
bfr.Add_byte_quote();
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,62 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
public class Xomw_string_utils {
|
||||
public static void Replace_markup(byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) { // REF:/includes/libs/StringUtils.php|replaceMarkup
|
||||
// PORTED: avoiding multiple regex calls / String creations
|
||||
// $placeholder = "\x00";
|
||||
|
||||
// Remove placeholder instances
|
||||
// $text = str_replace( $placeholder, '', $text );
|
||||
|
||||
// Replace instances of the separator inside HTML-like tags with the placeholder
|
||||
// $replacer = new DoubleReplacer( $search, $placeholder );
|
||||
// $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text );
|
||||
|
||||
// Explode, then put the replaced separators back in
|
||||
// $cleaned = str_replace( $search, $replace, $cleaned );
|
||||
// $text = str_replace( $placeholder, $search, $cleaned );
|
||||
|
||||
// if same length find / repl, do in-place replacement; EX: "!!" -> "||"
|
||||
int find_len = find.length;
|
||||
int repl_len = repl.length;
|
||||
if (find_len != repl_len) throw Err_.new_wo_type("find and repl should be same length");
|
||||
|
||||
byte find_0 = find[0];
|
||||
byte dlm_bgn = Byte_ascii.Angle_bgn;
|
||||
byte dlm_end = Byte_ascii.Angle_end;
|
||||
boolean repl_active = true;
|
||||
|
||||
// loop every char in array
|
||||
for (int i = src_bgn; i < src_end; i++) {
|
||||
byte b = src[i];
|
||||
if ( b == find_0
|
||||
&& Bry_.Match(src, i + 1, i + find_len, find, 1, find_len)
|
||||
&& repl_active
|
||||
) {
|
||||
Bry_.Set(src, i, i + find_len, repl);
|
||||
}
|
||||
else if (b == dlm_bgn) {
|
||||
repl_active = false;
|
||||
}
|
||||
else if (b == dlm_end) {
|
||||
repl_active = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_string_utils__tst {
|
||||
private final Xomw_string_utils__fxt fxt = new Xomw_string_utils__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test__replace_markup("a!!b" , "!!", "||", "a||b");
|
||||
}
|
||||
@Test public void Missing() {
|
||||
fxt.Test__replace_markup("abcd" , "!!", "||", "abcd");
|
||||
}
|
||||
@Test public void Eos() {
|
||||
fxt.Test__replace_markup("a!!" , "!!", "||", "a||");
|
||||
}
|
||||
@Test public void Ignore() {
|
||||
fxt.Test__replace_markup("a!!b<!!>!!c" , "!!", "||", "a||b<!!>||c");
|
||||
}
|
||||
@Test public void Ignore__asym__lhs() {
|
||||
fxt.Test__replace_markup("a!!b<!!<!!>!!c" , "!!", "||", "a||b<!!<!!>||c");
|
||||
}
|
||||
@Test public void Ignore__asym__rhs() {
|
||||
fxt.Test__replace_markup("a!!b<!!>!!>!!c" , "!!", "||", "a||b<!!>||>||c"); // NOTE: should probably be "!!>!!>", but unmatched ">" are escaped to ">"
|
||||
}
|
||||
}
|
||||
class Xomw_string_utils__fxt {
|
||||
public void Test__replace_markup(String src_str, String find, String repl, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
Xomw_string_utils.Replace_markup(src_bry, 0, src_bry.length, Bry_.new_a7(find), Bry_.new_a7(repl));
|
||||
Tfds.Eq_str(expd, src_bry);
|
||||
}
|
||||
}
|
Loading…
Reference in new issue