From 305c2f976239bb712b458b720ab10b3b50a2af88 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Thu, 12 Jan 2017 10:20:17 -0500 Subject: [PATCH] Mw_parse.Table: Add more implementation --- 100_core/src/gplx/Bry_split_.java | 21 ++++ 100_core/src/gplx/Bry_split__tst.java | 10 +- 100_core/src/gplx/String_.java | 6 +- 100_core/src/gplx/core/tests/Gftest.java | 2 +- .../Xomw_table_wkr.java} | 101 +++++++++++------- .../mws/tables/Xomw_table_wkr__tst.java | 43 ++++++++ 6 files changed, 138 insertions(+), 45 deletions(-) rename 400_xowa/src/gplx/xowa/parsers/mws/{tblws/Xomw_tblw_wkr.java => tables/Xomw_table_wkr.java} (67%) create mode 100644 400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr__tst.java diff --git a/100_core/src/gplx/Bry_split_.java b/100_core/src/gplx/Bry_split_.java index c5e629af0..096dcfa13 100644 --- a/100_core/src/gplx/Bry_split_.java +++ b/100_core/src/gplx/Bry_split_.java @@ -116,6 +116,27 @@ public class Bry_split_ { } return (byte[][])rv.To_ary(byte[].class); } + public static byte[][] Split_w_max(byte[] src, byte dlm, int max) { + byte[][] rv = new byte[max][]; + int src_len = src.length; + int rv_idx = 0; + int itm_bgn = 0; + int src_pos = 0; + while (true) { + boolean is_last = src_pos == src_len; + byte b = is_last ? dlm : src[src_pos]; + if (b == dlm) { + rv[rv_idx++] = Bry_.Mid(src, itm_bgn, src_pos); + itm_bgn = src_pos + 1; + } + if (is_last || rv_idx == max) + break; + else + src_pos++; + } + return rv; + } + public static final int Rv__ok = 0, Rv__extend = 1, Rv__cancel = 2; } class Bry_split_wkr__to_ary implements gplx.core.brys.Bry_split_wkr { diff --git a/100_core/src/gplx/Bry_split__tst.java b/100_core/src/gplx/Bry_split__tst.java index 68d677da8..a77bb678d 100644 --- a/100_core/src/gplx/Bry_split__tst.java +++ b/100_core/src/gplx/Bry_split__tst.java @@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx; -import org.junit.*; +import org.junit.*; import gplx.core.tests.*; public class Bry_split__tst { private final Bry_split__fxt fxt = new Bry_split__fxt(); @Test public void Split() { @@ -43,6 +43,11 @@ public class Bry_split__tst { fxt.Test_split("a|b|c|d" , 2, 6, "|", "b", "c"); fxt.Test_split("a|b|c|d" , 2, 4, "|", "b"); } + @Test public void Split_w_max() { + fxt.Test__split_w_max("a|b|c|d" , Byte_ascii.Pipe, 2, "a", "b"); // max is less + fxt.Test__split_w_max("a" , Byte_ascii.Pipe, 2, "a", null); // max is more + fxt.Test__split_w_max("|" , Byte_ascii.Pipe, 2, "", ""); // empty itms + } } class Bry_split__fxt { private final Bry_split_wkr__example wkr = new Bry_split_wkr__example(); @@ -55,6 +60,9 @@ class Bry_split__fxt { public void Test_split(String src, int src_bgn, int src_end, String dlm, String... expd) { Tfds.Eq_ary_str(Bry_.Ary(expd), Bry_split_.Split(Bry_.new_u8(src), src_bgn, src_end, Bry_.new_u8(dlm))); } + public void Test__split_w_max(String src, byte dlm, int max, String... expd) { + Gftest.Eq__ary(expd, String_.Ary(Bry_split_.Split_w_max(Bry_.new_u8(src), dlm, max))); + } } class Bry_split_wkr__example implements gplx.core.brys.Bry_split_wkr { private final List_adp list = List_adp_.New(); diff --git a/100_core/src/gplx/String_.java b/100_core/src/gplx/String_.java index f962ead18..86e202890 100644 --- a/100_core/src/gplx/String_.java +++ b/100_core/src/gplx/String_.java @@ -488,8 +488,10 @@ public class String_ { if (ary == null) return String_.Ary_empty; int ary_len = ary.length; String[] rv = new String[ary_len]; - for (int i = 0; i < ary_len; i++) - rv[i] = String_.new_u8(ary[i]); + for (int i = 0; i < ary_len; i++) { + byte[] itm = ary[i]; + rv[i] = itm == null ? null : String_.new_u8(itm); + } return rv; } public static String [] Ary_filter(String[] src, String[] filter) { diff --git a/100_core/src/gplx/core/tests/Gftest.java b/100_core/src/gplx/core/tests/Gftest.java index d050cfd41..cd69b40e5 100644 --- a/100_core/src/gplx/core/tests/Gftest.java +++ b/100_core/src/gplx/core/tests/Gftest.java @@ -153,7 +153,7 @@ public class Gftest { if (idx < len) { switch (type_id) { case Type_adp_.Tid__bool: bfr.Add_yn(Bool_.Cast(Array_.Get_at(ary, idx))); break; - case Type_adp_.Tid__bry: bfr.Add((byte[])Array_.Get_at(ary, idx)); break; + case Type_adp_.Tid__bry: bfr.Add_safe((byte[])Array_.Get_at(ary, idx)); break; case Type_adp_.Tid__long: bfr.Add_long_variable(Long_.cast(Array_.Get_at(ary, idx))); break; case Type_adp_.Tid__int: bfr.Add_int_variable(Int_.cast(Array_.Get_at(ary, idx))); break; default: throw Err_.new_unhandled_default(type_id); diff --git a/400_xowa/src/gplx/xowa/parsers/mws/tblws/Xomw_tblw_wkr.java b/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr.java similarity index 67% rename from 400_xowa/src/gplx/xowa/parsers/mws/tblws/Xomw_tblw_wkr.java rename to 400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr.java index d71e0fc4d..bff7ea69d 100644 --- a/400_xowa/src/gplx/xowa/parsers/mws/tblws/Xomw_tblw_wkr.java +++ b/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr.java @@ -15,8 +15,8 @@ GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ -package gplx.xowa.parsers.mws.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; -public class Xomw_tblw_wkr { +package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; +public class Xomw_table_wkr { private final Bry_bfr bfr = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New(); private final List_adp td_history = List_adp_.New() @@ -26,10 +26,12 @@ public class Xomw_tblw_wkr { , has_opened_tr = List_adp_.New() ; private static final byte[] - Bry__tblw_end = Bry_.new_a7("|}"), Bry__tr = Bry_.new_a7("|-"), Bry__th = Bry_.new_a7("|+") + Bry__tblw_bgn = Bry_.new_a7("{|"), Bry__tblw_end = Bry_.new_a7("|}"), Bry__tr = Bry_.new_a7("|-"), Bry__th = Bry_.new_a7("|+"), Bry__td2 = Bry_.new_a7("||") + , Bry__lnki = Bry_.new_a7("[[") , Bry__special_case = Bry_.new_a7("\n\n
") , Bry__tag__td = Bry_.new_a7("td"), Bry__tag__th = Bry_.new_a7("th"), Bry__tag__caption = Bry_.new_a7("caption") , Bry__elem_end__tr = Bry_.new_a7("") + , Bry__dl_dd = Bry_.new_a7("
") ; private static final int Len__special_case = Bry__special_case.length; public byte[] Do_table_stuff(byte[] src) { @@ -60,17 +62,27 @@ public class Xomw_tblw_wkr { chars_2[0] = line[0]; if (line_len > 1) chars_2[1] = line[1]; - boolean is_indented_table = false; + // PORTED: preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches) + byte[] colon_atrs = null; + int colons_end = Bry_find_.Find_fwd(src, Byte_ascii.Pipe, 0, line_len); + if (colons_end > 0) { + int atrs_bgn = Bry_find_.Find_fwd_while(line, colons_end, line_len, Byte_ascii.Space); + if (Bry_.Eq(line, atrs_bgn, atrs_bgn + 2, Bry__tblw_bgn)) { + colon_atrs = Bry_.Mid(line, atrs_bgn, line_len); + } + } // ":*" , "\s*" , "{|" , ".*" - if (is_indented_table) { -// if (preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches)) { -// // First check if we are starting a new table -// $indent_level = strlen(matches[1]); -// -// $attributes = $this->mStripState->unstripBoth(matches[2]); -// $attributes = Sanitizer::fixTagAttributes(attributes, 'table'); -// -// line_orig = str_repeat('
', $indent_level) . ""; + if (colon_atrs != null) { + // First check if we are starting a new table + indent_level = colons_end; + +// atrs = $this->mStripState->unstripBoth(matches[2]); +// atrs = Sanitizer::fixTagAttributes(attributes, 'table'); + + // PORTED: line_orig = str_repeat('
', $indent_level) . ""; + for (int j = 0; j < indent_level; j++) + tmp_bfr.Add(Bry__dl_dd); + line_orig = tmp_bfr.Add_str_a7("
"), indent_level)).To_bry_and_clear(); } else if (Bry_.Eq(chars_2, Bry__tr)) { -// // Now we have a table row -// $line = preg_replace('#^\|-+#', '', $line); -// -// // Whats after the tag is now only attributes -// $attributes = $this->mStripState->unstripBoth(line); -// $attributes = Sanitizer::fixTagAttributes(attributes, 'tr'); + // Now we have a table row + line = Bry_.Mid(line, 2); // PORTED: $line = preg_replace('#^\|-+#', '', $line); + + // Whats after the tag is now only attributes + byte[] atrs = Bry_.Empty; +// atrs = $this->mStripState->unstripBoth(line); +// atrs = Sanitizer::fixTagAttributes(attributes, 'tr'); List_adp_.Pop(tr_attributes); -// array_push(tr_attributes, $attributes); + tr_attributes.Add(atrs); line = Bry_.Empty; byte[] last_tag = (byte[])List_adp_.Pop(last_tag_history); @@ -145,19 +158,21 @@ public class Xomw_tblw_wkr { // Implies both are valid for table headings. if (char_0 == Byte_ascii.Nl) { - // $line = StringUtils::replaceMarkup('!!', '||', $line); +// $line = StringUtils::replaceMarkup('!!', '||', $line); } // Split up multiple cells on the same line. // FIXME : This can result in improper nesting of tags processed // by earlier parser steps. -// $cells = explode('||', $line); + byte[][] cells = Bry_split_.Split(line, Bry__td2); line_orig = Bry_.Empty; byte[] previous = null; // Loop through each table cell -// foreach (cells as $cell) { + int cells_len = cells.length; + for (int j = 0; j < cells_len; i++) { + byte[] cell = cells[j]; previous = Bry_.Empty; if (char_0 != Byte_ascii.Plus) { byte[] tr_after = (byte[])List_adp_.Pop(tr_attributes); @@ -190,25 +205,29 @@ public class Xomw_tblw_wkr { } last_tag_history.Add(last_tag); -// -// // A cell could contain both parameters and data -// $cell_data = explode('|', $cell, 2); -// -// // Bug 553: Note that a '|' inside an invalid link should not -// // be mistaken as delimiting cell parameters -// if (strpos(cell_data[0], '[[') !== false) { -// $cell = "{$previous}<{$last_tag}>{$cell}"; -// } else if (count(cell_data) == 1) { -// $cell = "{$previous}<{$last_tag}>{$cell_data[0]}"; -// } else { -// $attributes = $this->mStripState->unstripBoth(cell_data[0]); -// $attributes = Sanitizer::fixTagAttributes(attributes, $last_tag); -// $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; -// } -// -// line_orig = Bry_.Add(line_orig, $cell); + + // A cell could contain both parameters and data + byte[][] cell_data = Bry_split_.Split_w_max(cell, Byte_ascii.Pipe, 2); + + // Bug 553: Note that a '|' inside an invalid link should not + // be mistaken as delimiting cell parameters + byte[] cell_data_0 = cell_data[0]; + byte[] cell_data_1 = cell_data[1]; + if (Bry_find_.Find_fwd(cell_data_0, Bry__lnki) != Bry_find_.Not_found) { + cell = tmp_bfr.Add(previous).Add_str_a7("<").Add(last_tag).Add_str_a7(">").Add(cell).To_bry_and_clear(); + } + else if (cell_data_1 == null) { + cell = tmp_bfr.Add(previous).Add_str_a7("<").Add(last_tag).Add_str_a7(">").Add(cell_data_0).To_bry_and_clear(); + } + else { +// atrs = $this->mStripState->unstripBoth(cell_data[0]); +// atrs = Sanitizer::fixTagAttributes(attributes, $last_tag); + cell = tmp_bfr.Add(previous).Add_str_a7("<").Add(last_tag).Add_str_a7(">").Add(cell_data_1).To_bry_and_clear(); + } + + line_orig = Bry_.Add(line_orig, cell); td_history.Add(true); -// } + } } bfr.Add(line_orig).Add_byte_nl(); } diff --git a/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr__tst.java b/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr__tst.java new file mode 100644 index 000000000..0a969ff1c --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr__tst.java @@ -0,0 +1,43 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; +import org.junit.*; +public class Xomw_table_wkr__tst { + private final Xomw_table_wkr__fxt fxt = new Xomw_table_wkr__fxt(); + @Test public void Table() { + fxt.Test__parse(String_.Concat_lines_nl_skip_last + ( "{|" + , "|-" + , "|a" + , "|}" + ), String_.Concat_lines_nl_skip_last + ( "{|" + , "|-" + , "|a" + , "|}" + )); + } +} +class Xomw_table_wkr__fxt { + private final Xomw_table_wkr wkr = new Xomw_table_wkr(); + public void Test__parse(String src_str, String expd) { + byte[] src_bry = Bry_.new_u8(src_str); + byte[] actl = wkr.Do_table_stuff(src_bry); + Tfds.Eq_str_lines(expd, String_.new_u8(actl), src_str); + } +}