diff --git a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_wkr__atr_bldr.java b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_wkr__atr_bldr.java index c90d100f6..96841e299 100644 --- a/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_wkr__atr_bldr.java +++ b/400_xowa/src/gplx/xowa/parsers/htmls/Mwh_doc_wkr__atr_bldr.java @@ -39,5 +39,9 @@ public class Mwh_doc_wkr__atr_bldr implements Mwh_doc_wkr { public void On_nde_tail_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {} public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {} public void On_entity_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {} + public Mwh_atr_itm[] To_atr_ary() {return (Mwh_atr_itm[])list.To_ary_and_clear(Mwh_atr_itm.class);} + public int Atrs__len() {return list.Len();} + public Mwh_atr_itm Atrs__get_at(int i) {return (Mwh_atr_itm)list.Get_at(i);} + public void Atrs__clear() {list.Clear();} } \ No newline at end of file diff --git a/400_xowa/src/gplx/xowa/parsers/mws/Xomw_parser_ctx.java b/400_xowa/src/gplx/xowa/parsers/mws/Xomw_parser_ctx.java index d0636f8b9..60ebb57c7 100644 --- a/400_xowa/src/gplx/xowa/parsers/mws/Xomw_parser_ctx.java +++ b/400_xowa/src/gplx/xowa/parsers/mws/Xomw_parser_ctx.java @@ -16,8 +16,12 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.xowa.parsers.mws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; +import gplx.xowa.parsers.htmls.*; +import gplx.xowa.parsers.mws.utils.*; +import gplx.xowa.parsers.uniqs.*; public class Xomw_parser_ctx { - public Xomw_parser_ctx() { - } + public Xomw_sanitizer_mgr Sanitizer() {return sanitizer;} private final Xomw_sanitizer_mgr sanitizer = new Xomw_sanitizer_mgr(); + public Xop_uniq_mgr Uniq_mgr() {return uniq_mgr;} private final Xop_uniq_mgr uniq_mgr = new Xop_uniq_mgr(); + public static final int Pos__bos = -1; } diff --git a/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr.java b/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr.java index 478125dbc..a4553c98d 100644 --- a/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr.java +++ b/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr.java @@ -17,29 +17,27 @@ along with this program. If not, see . */ package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; import gplx.langs.phps.utls.*; +import gplx.xowa.parsers.htmls.*; +import gplx.xowa.parsers.mws.utils.*; import gplx.xowa.parsers.uniqs.*; public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls private final Bry_bfr bfr = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New(); private final List_adp - td_history = List_adp_.New() - , last_tag_history = List_adp_.New() - , tr_history = List_adp_.New() - , tr_attributes = List_adp_.New() - , has_opened_tr = List_adp_.New() + td_history = List_adp_.New() // Is currently a td tag open? + , last_tag_history = List_adp_.New() // Save history of last lag activated (td, th or caption) + , tr_history = List_adp_.New() // Is currently a tr tag open? + , tr_attributes = List_adp_.New() // history of tr attributes + , has_opened_tr = List_adp_.New() // Did this table open a element? ; private int indent_level = 0; // indent level of the table private byte[] first_2 = new byte[2]; - public byte[] Do_table_stuff(byte[] src) { + private Xomw_sanitizer_mgr sanitizer; + private Xop_uniq_mgr uniq_mgr; + public byte[] Do_table_stuff(Xomw_parser_ctx ctx, byte[] src) { + this.sanitizer = ctx.Sanitizer(); + this.uniq_mgr = ctx.Uniq_mgr(); indent_level = 0; - - // PORTED:member variables - // $td_history = []; // Is currently a td tag open? - // $last_tag_history = []; // Save history of last lag activated (td, th or caption) - // $tr_history = []; // Is currently a tr tag open? - // $tr_attributes = []; // history of tr attributes - // $has_opened_tr = []; // Did this table open a element? - - // PORTED.SPLIT: $lines = StringUtils::explode("\n", $text); - Bry_split_.Split(src, 0, src.length, Byte_ascii.Nl, Bool_.N, this); + + Bry_split_.Split(src, 0, src.length, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode("\n", $text); // Closing open td, tr && table while (td_history.Len() > 0) { @@ -61,8 +59,8 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U } // special case: don't return empty table - if ( bfr.Len() == Len__special_case - && Bry_.Eq(bfr.Bfr(), 0, Len__special_case, Bry__special_case)) { + if ( bfr.Len() == Len__tb__empty + && Bry_.Eq(bfr.Bfr(), 0, Len__tb__empty, Html__tb__empty)) { bfr.Clear(); return Bry_.Empty; } @@ -70,39 +68,41 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U } public int Split(byte[] src, int itm_bgn, int itm_end) { byte[] out_line = Bry_.Mid(src, itm_bgn, itm_end); // MW: "$outLine" - byte[] line = Bry_.Trim(out_line); // MW: "$line" + byte[] line = Bry_.Trim(out_line); // MW: "$line" - int line_temp_len = line.length; - if (line_temp_len == 0) { // empty line, go to next line + int line_len = line.length; + if (line_len == 0) { // empty line, go to next line bfr.Add(out_line).Add_byte_nl(); return Bry_split_.Rv__ok; } byte first_char = line[0]; first_2[0] = line[0]; - if (line_temp_len > 1) first_2[1] = line[1]; + if (line_len > 1) first_2[1] = line[1]; // PORTED: preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches) byte[] tblw_atrs = null; boolean tblw_bgn_found = false; - int colons_end = Bry_find_.Find_fwd_while(src, 0, line_temp_len, Byte_ascii.Colon); - int tblw_bgn = Bry_find_.Find_fwd_while(line, colons_end, line_temp_len, Byte_ascii.Space); + int colons_end = Bry_find_.Find_fwd_while(src, 0, line_len, Byte_ascii.Colon); + int tblw_bgn = Bry_find_.Find_fwd_while(line, colons_end, line_len, Byte_ascii.Space); int tblw_atrs_bgn = tblw_bgn + 2; - if (Bry_.Eq(line, tblw_bgn, tblw_atrs_bgn, Bry__tblw_bgn)) { + if (Bry_.Eq(line, tblw_bgn, tblw_atrs_bgn, Wtxt__tb__bgn)) { tblw_bgn_found = true; - tblw_atrs = (tblw_atrs_bgn == line_temp_len) ? Bry_.Empty : Bry_.Mid(line, tblw_atrs_bgn, line_temp_len); + tblw_atrs = (tblw_atrs_bgn == line_len) ? Bry_.Empty : Bry_.Mid(line, tblw_atrs_bgn, line_len); } if (tblw_bgn_found) { // First check if we are starting a new table indent_level = colons_end; -// atrs = $this->mStripState->unstripBoth(matches[2]); -// atrs = Sanitizer::fixTagAttributes(attributes, 'table'); + tblw_atrs = uniq_mgr.Convert(tblw_atrs); // PORTED: out_line = str_repeat('
', $indent_level) . ""; for (int j = 0; j < indent_level; j++) - tmp_bfr.Add(Bry__dl_dd); - out_line = tmp_bfr.Add_str_a7("").Add_mid(line, 2, line.length).To_bry_and_clear(); byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history); @@ -134,17 +134,18 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U // PORTED:$outLine = $line . str_repeat( '
', $indent_level ); tmp_bfr.Add(line); for (int j = 0; j < indent_level; j++) - tmp_bfr.Add(Bry__dl_dd_end); + tmp_bfr.Add(Html__dl__end); out_line = tmp_bfr.To_bry_and_clear(); } - else if (Bry_.Eq(first_2, Bry__tr)) { + else if (Bry_.Eq(first_2, Wtxt__tr)) { // Now we have a table row line = Bry_.Mid(line, 2); // PORTED: $line = preg_replace('#^\|-+#', '', $line); // Whats after the tag is now only attributes - byte[] atrs = line; -// atrs = $this->mStripState->unstripBoth(line); -// atrs = Sanitizer::fixTagAttributes(attributes, 'tr'); + byte[] atrs = uniq_mgr.Unstrip_both(line); + sanitizer.Fix_tag_attributes(tmp_bfr, Name__tr, atrs); + atrs = tmp_bfr.To_bry_and_clear(); + Php_ary_.Pop_bry_or_null(tr_attributes); tr_attributes.Add(atrs); @@ -154,7 +155,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U has_opened_tr.Add(true); if (Php_ary_.Pop_bool_or_n(tr_history)) { - line = Bry__elem_end__tr; + line = Html__tr__end; } if (Php_ary_.Pop_bool_or_n(td_history)) { @@ -168,25 +169,25 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U } else if ( first_char == Byte_ascii.Pipe || first_char == Byte_ascii.Bang - || Bry_.Eq(first_2, Bry__th) + || Bry_.Eq(first_2, Wtxt__caption) ) { // This might be cell elements, td, th or captions - if (Bry_.Eq(first_2, Bry__th)) { - first_char = Byte_ascii.Pipe; + if (Bry_.Eq(first_2, Wtxt__caption)) { + first_char = Byte_ascii.Plus; line = Bry_.Mid(line, 2); } else { line = Bry_.Mid(line, 1); } // Implies both are valid for table headings. - if (first_char == Byte_ascii.Nl) { -// $line = StringUtils::replaceMarkup('!!', '||', $line); + if (first_char == Byte_ascii.Bang) { + Xomw_string_utils.Replace_markup(line, 0, line.length, Wtxt__th2, Wtxt__td2); // $line = StringUtils::replaceMarkup('!!', '||', $line); } // Split up multiple cells on the same line. // FIXME : This can result in improper nesting of tags processed // by earlier parser steps. - byte[][] cells = Bry_split_.Split(line, Bry__td2); + byte[][] cells = Bry_split_.Split(line, Wtxt__td2); out_line = Bry_.Empty; @@ -214,13 +215,13 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U } if (first_char == Byte_ascii.Pipe) { - last_tag = Bry__tag__td; + last_tag = Name__td; } else if (first_char == Byte_ascii.Bang) { - last_tag = Bry__tag__th; + last_tag = Name__th; } else if (first_char == Byte_ascii.Plus) { - last_tag = Bry__tag__caption; + last_tag = Name__caption; } else { last_tag = Bry_.Empty; @@ -235,16 +236,18 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U // be mistaken as delimiting cell parameters byte[] cell_data_0 = cell_data[0]; byte[] cell_data_1 = cell_data[1]; - if (Bry_find_.Find_fwd(cell_data_0, Bry__lnki) != Bry_find_.Not_found) { + if (Bry_find_.Find_fwd(cell_data_0, Wtxt__lnki__bgn) != Bry_find_.Not_found) { cell = tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell).To_bry_and_clear(); } else if (cell_data_1 == null) { cell = tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell_data_0).To_bry_and_clear(); } else { -// atrs = $this->mStripState->unstripBoth(cell_data_0); -// atrs = Sanitizer::fixTagAttributes(attributes, $last_tag); - cell = tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add(cell_data_0).Add_byte(Byte_ascii.Angle_end).Add(cell_data_1).To_bry_and_clear(); + byte[] atrs = uniq_mgr.Unstrip_both(cell_data_0); + tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag); + sanitizer.Fix_tag_attributes(tmp_bfr, last_tag, atrs); + tmp_bfr.Add_byte(Byte_ascii.Angle_end).Add(cell_data_1); + cell = tmp_bfr.To_bry_and_clear(); } out_line = Bry_.Add(out_line, cell); @@ -255,13 +258,24 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U return Bry_split_.Rv__ok; } private static final byte[] - Bry__tblw_bgn = Bry_.new_a7("{|"), Bry__tblw_end = Bry_.new_a7("|}"), Bry__tr = Bry_.new_a7("|-"), Bry__th = Bry_.new_a7("|+"), Bry__td2 = Bry_.new_a7("||") - , Bry__lnki = Bry_.new_a7("[[") - , Bry__special_case = Bry_.new_a7("\n\n
") - , Bry__tag__td = Bry_.new_a7("td"), Bry__tag__th = Bry_.new_a7("th"), Bry__tag__caption = Bry_.new_a7("caption") - , Bry__elem_end__tr = Bry_.new_a7("") - , Bry__dl_dd = Bry_.new_a7("
") - , Bry__dl_dd_end = Bry_.new_a7("
") + Wtxt__tb__bgn = Bry_.new_a7("{|") + , Wtxt__tb__end = Bry_.new_a7("|}") + , Wtxt__tr = Bry_.new_a7("|-") + , Wtxt__caption = Bry_.new_a7("|+") + , Wtxt__th2 = Bry_.new_a7("!!") + , Wtxt__td2 = Bry_.new_a7("||") + , Wtxt__lnki__bgn = Bry_.new_a7("[[") + + , Name__table = Bry_.new_a7("table") + , Name__tr = Bry_.new_a7("tr") + , Name__td = Bry_.new_a7("td") + , Name__th = Bry_.new_a7("th") + , Name__caption = Bry_.new_a7("caption") + + , Html__tr__end = Bry_.new_a7("") + , Html__dl__bgn = Bry_.new_a7("
") + , Html__dl__end = Bry_.new_a7("
") + , Html__tb__empty = Bry_.new_a7("\n\n
") ; - private static final int Len__special_case = Bry__special_case.length; + private static final int Len__tb__empty = Html__tb__empty.length; } diff --git a/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr__tst.java b/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr__tst.java index 71a073c60..b73d7072f 100644 --- a/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr__tst.java +++ b/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr__tst.java @@ -33,6 +33,44 @@ public class Xomw_table_wkr__tst { , "" )); } + @Test public void Tb__atrs() { + fxt.Test__parse(String_.Concat_lines_nl_skip_last + ( "{|id='1'" + , "|-" + , "|a" + , "|}" + ), String_.Concat_lines_nl_skip_last + ( "" + , "" + , "" + , "
a" + , "
" + )); + } + @Test public void Tc__atrs() { + fxt.Test__parse(String_.Concat_lines_nl_skip_last + ( "{|" + , "|+id='1'|a" + , "|}" + ), String_.Concat_lines_nl_skip_last + ( "" + , "
a" + , "
" + )); + } + @Test public void Th__double() { + fxt.Test__parse(String_.Concat_lines_nl_skip_last + ( "{|" + , "!a!!b" + , "|}" + ), String_.Concat_lines_nl_skip_last + ( "" + , "" + , "" + , "
ab" + , "
" + )); + } @Test public void Blank() { // COVERS: "empty line, go to next line" fxt.Test__parse(String_.Concat_lines_nl_skip_last ( " " @@ -40,7 +78,7 @@ public class Xomw_table_wkr__tst { ( " " )); } - @Test public void Indent() { + @Test public void Tb__indent() { fxt.Test__parse(String_.Concat_lines_nl_skip_last ( "::{|" , "|-" @@ -54,7 +92,7 @@ public class Xomw_table_wkr__tst { , "" )); } - @Test public void End__no_rows() { // COVERS: "if (has_opened_tr.Len() == 0) {" + @Test public void Tb__empty() { // COVERS: "if (has_opened_tr.Len() == 0) {" fxt.Test__parse(String_.Concat_lines_nl_skip_last ( "{|" , "|}" @@ -65,10 +103,11 @@ public class Xomw_table_wkr__tst { } } class Xomw_table_wkr__fxt { + private final Xomw_parser_ctx ctx = new Xomw_parser_ctx(); private final Xomw_table_wkr wkr = new Xomw_table_wkr(); public void Test__parse(String src_str, String expd) { byte[] src_bry = Bry_.new_u8(src_str); - byte[] actl = wkr.Do_table_stuff(src_bry); + byte[] actl = wkr.Do_table_stuff(ctx, src_bry); Tfds.Eq_str_lines(expd, String_.new_u8(actl), src_str); } } diff --git a/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_sanitizer_mgr.java b/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_sanitizer_mgr.java new file mode 100644 index 000000000..1842bae56 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_sanitizer_mgr.java @@ -0,0 +1,41 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; +import gplx.xowa.parsers.htmls.*; +public class Xomw_sanitizer_mgr { + private final Mwh_doc_wkr__atr_bldr atr_bldr = new Mwh_doc_wkr__atr_bldr(); + private final Mwh_atr_parser atr_parser = new Mwh_atr_parser(); + public void Fix_tag_attributes(Bry_bfr bfr, byte[] tag_name, byte[] atrs) { + atr_bldr.Atrs__clear(); + atr_parser.Parse(atr_bldr, -1, -1, atrs, 0, atrs.length); + int len = atr_bldr.Atrs__len(); + + // PORTED: Sanitizer.php|safeEncodeTagAttributes + for (int i = 0; i < len; i++) { + // $encAttribute = htmlspecialchars( $attribute ); + // $encValue = Sanitizer::safeEncodeAttribute( $value ); + // $attribs[] = "$encAttribute=\"$encValue\""; + Mwh_atr_itm itm = atr_bldr.Atrs__get_at(i); + bfr.Add_byte_space(); // "return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';" + bfr.Add_bry_escape_html(itm.Key_bry(), itm.Key_bgn(), itm.Key_end()); + bfr.Add_byte_eq().Add_byte_quote(); + bfr.Add(itm.Val_as_bry()); // TODO.XO:Sanitizer::encode + bfr.Add_byte_quote(); + } + } +} diff --git a/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_string_utils.java b/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_string_utils.java new file mode 100644 index 000000000..b969ee57b --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_string_utils.java @@ -0,0 +1,62 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; +public class Xomw_string_utils { + public static void Replace_markup(byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) { // REF:/includes/libs/StringUtils.php|replaceMarkup + // PORTED: avoiding multiple regex calls / String creations + // $placeholder = "\x00"; + + // Remove placeholder instances + // $text = str_replace( $placeholder, '', $text ); + + // Replace instances of the separator inside HTML-like tags with the placeholder + // $replacer = new DoubleReplacer( $search, $placeholder ); + // $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text ); + + // Explode, then put the replaced separators back in + // $cleaned = str_replace( $search, $replace, $cleaned ); + // $text = str_replace( $placeholder, $search, $cleaned ); + + // if same length find / repl, do in-place replacement; EX: "!!" -> "||" + int find_len = find.length; + int repl_len = repl.length; + if (find_len != repl_len) throw Err_.new_wo_type("find and repl should be same length"); + + byte find_0 = find[0]; + byte dlm_bgn = Byte_ascii.Angle_bgn; + byte dlm_end = Byte_ascii.Angle_end; + boolean repl_active = true; + + // loop every char in array + for (int i = src_bgn; i < src_end; i++) { + byte b = src[i]; + if ( b == find_0 + && Bry_.Match(src, i + 1, i + find_len, find, 1, find_len) + && repl_active + ) { + Bry_.Set(src, i, i + find_len, repl); + } + else if (b == dlm_bgn) { + repl_active = false; + } + else if (b == dlm_end) { + repl_active = true; + } + } + } +} diff --git a/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_string_utils__tst.java b/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_string_utils__tst.java new file mode 100644 index 000000000..7b5b9c3be --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_string_utils__tst.java @@ -0,0 +1,47 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; +import org.junit.*; +public class Xomw_string_utils__tst { + private final Xomw_string_utils__fxt fxt = new Xomw_string_utils__fxt(); + @Test public void Basic() { + fxt.Test__replace_markup("a!!b" , "!!", "||", "a||b"); + } + @Test public void Missing() { + fxt.Test__replace_markup("abcd" , "!!", "||", "abcd"); + } + @Test public void Eos() { + fxt.Test__replace_markup("a!!" , "!!", "||", "a||"); + } + @Test public void Ignore() { + fxt.Test__replace_markup("a!!b!!c" , "!!", "||", "a||b||c"); + } + @Test public void Ignore__asym__lhs() { + fxt.Test__replace_markup("a!!b!!c" , "!!", "||", "a||b||c"); + } + @Test public void Ignore__asym__rhs() { + fxt.Test__replace_markup("a!!b!!>!!c" , "!!", "||", "a||b||>||c"); // NOTE: should probably be "!!>!!>", but unmatched ">" are escaped to ">" + } +} +class Xomw_string_utils__fxt { + public void Test__replace_markup(String src_str, String find, String repl, String expd) { + byte[] src_bry = Bry_.new_u8(src_str); + Xomw_string_utils.Replace_markup(src_bry, 0, src_bry.length, Bry_.new_a7(find), Bry_.new_a7(repl)); + Tfds.Eq_str(expd, src_bry); + } +} diff --git a/400_xowa/src/gplx/xowa/parsers/uniqs/Xop_uniq_mgr.java b/400_xowa/src/gplx/xowa/parsers/uniqs/Xop_uniq_mgr.java index 154a2e6a9..4b2553d7b 100644 --- a/400_xowa/src/gplx/xowa/parsers/uniqs/Xop_uniq_mgr.java +++ b/400_xowa/src/gplx/xowa/parsers/uniqs/Xop_uniq_mgr.java @@ -31,6 +31,9 @@ public class Xop_uniq_mgr { // REF.MW:/parser/StripState.php return key; } public byte[] Get(byte[] key) {return (byte[])general_trie.Match_exact(key, 0, key.length);} + public byte[] Unstrip_both(byte[] src) { + return Convert(src); + } public byte[] Convert(byte[] src) { if (general_trie.Count() == 0) return src;