1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-09-30 07:20:52 +00:00

Mw_parse.Table: Add more implementation

This commit is contained in:
gnosygnu 2017-01-12 10:20:17 -05:00
parent b35a45657c
commit 305c2f9762
6 changed files with 138 additions and 45 deletions

View File

@ -116,6 +116,27 @@ public class Bry_split_ {
} }
return (byte[][])rv.To_ary(byte[].class); return (byte[][])rv.To_ary(byte[].class);
} }
public static byte[][] Split_w_max(byte[] src, byte dlm, int max) {
byte[][] rv = new byte[max][];
int src_len = src.length;
int rv_idx = 0;
int itm_bgn = 0;
int src_pos = 0;
while (true) {
boolean is_last = src_pos == src_len;
byte b = is_last ? dlm : src[src_pos];
if (b == dlm) {
rv[rv_idx++] = Bry_.Mid(src, itm_bgn, src_pos);
itm_bgn = src_pos + 1;
}
if (is_last || rv_idx == max)
break;
else
src_pos++;
}
return rv;
}
public static final int Rv__ok = 0, Rv__extend = 1, Rv__cancel = 2; public static final int Rv__ok = 0, Rv__extend = 1, Rv__cancel = 2;
} }
class Bry_split_wkr__to_ary implements gplx.core.brys.Bry_split_wkr { class Bry_split_wkr__to_ary implements gplx.core.brys.Bry_split_wkr {

View File

@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
package gplx; package gplx;
import org.junit.*; import org.junit.*; import gplx.core.tests.*;
public class Bry_split__tst { public class Bry_split__tst {
private final Bry_split__fxt fxt = new Bry_split__fxt(); private final Bry_split__fxt fxt = new Bry_split__fxt();
@Test public void Split() { @Test public void Split() {
@ -43,6 +43,11 @@ public class Bry_split__tst {
fxt.Test_split("a|b|c|d" , 2, 6, "|", "b", "c"); fxt.Test_split("a|b|c|d" , 2, 6, "|", "b", "c");
fxt.Test_split("a|b|c|d" , 2, 4, "|", "b"); fxt.Test_split("a|b|c|d" , 2, 4, "|", "b");
} }
@Test public void Split_w_max() {
fxt.Test__split_w_max("a|b|c|d" , Byte_ascii.Pipe, 2, "a", "b"); // max is less
fxt.Test__split_w_max("a" , Byte_ascii.Pipe, 2, "a", null); // max is more
fxt.Test__split_w_max("|" , Byte_ascii.Pipe, 2, "", ""); // empty itms
}
} }
class Bry_split__fxt { class Bry_split__fxt {
private final Bry_split_wkr__example wkr = new Bry_split_wkr__example(); private final Bry_split_wkr__example wkr = new Bry_split_wkr__example();
@ -55,6 +60,9 @@ class Bry_split__fxt {
public void Test_split(String src, int src_bgn, int src_end, String dlm, String... expd) { public void Test_split(String src, int src_bgn, int src_end, String dlm, String... expd) {
Tfds.Eq_ary_str(Bry_.Ary(expd), Bry_split_.Split(Bry_.new_u8(src), src_bgn, src_end, Bry_.new_u8(dlm))); Tfds.Eq_ary_str(Bry_.Ary(expd), Bry_split_.Split(Bry_.new_u8(src), src_bgn, src_end, Bry_.new_u8(dlm)));
} }
public void Test__split_w_max(String src, byte dlm, int max, String... expd) {
Gftest.Eq__ary(expd, String_.Ary(Bry_split_.Split_w_max(Bry_.new_u8(src), dlm, max)));
}
} }
class Bry_split_wkr__example implements gplx.core.brys.Bry_split_wkr { class Bry_split_wkr__example implements gplx.core.brys.Bry_split_wkr {
private final List_adp list = List_adp_.New(); private final List_adp list = List_adp_.New();

View File

@ -488,8 +488,10 @@ public class String_ {
if (ary == null) return String_.Ary_empty; if (ary == null) return String_.Ary_empty;
int ary_len = ary.length; int ary_len = ary.length;
String[] rv = new String[ary_len]; String[] rv = new String[ary_len];
for (int i = 0; i < ary_len; i++) for (int i = 0; i < ary_len; i++) {
rv[i] = String_.new_u8(ary[i]); byte[] itm = ary[i];
rv[i] = itm == null ? null : String_.new_u8(itm);
}
return rv; return rv;
} }
public static String [] Ary_filter(String[] src, String[] filter) { public static String [] Ary_filter(String[] src, String[] filter) {

View File

@ -153,7 +153,7 @@ public class Gftest {
if (idx < len) { if (idx < len) {
switch (type_id) { switch (type_id) {
case Type_adp_.Tid__bool: bfr.Add_yn(Bool_.Cast(Array_.Get_at(ary, idx))); break; case Type_adp_.Tid__bool: bfr.Add_yn(Bool_.Cast(Array_.Get_at(ary, idx))); break;
case Type_adp_.Tid__bry: bfr.Add((byte[])Array_.Get_at(ary, idx)); break; case Type_adp_.Tid__bry: bfr.Add_safe((byte[])Array_.Get_at(ary, idx)); break;
case Type_adp_.Tid__long: bfr.Add_long_variable(Long_.cast(Array_.Get_at(ary, idx))); break; case Type_adp_.Tid__long: bfr.Add_long_variable(Long_.cast(Array_.Get_at(ary, idx))); break;
case Type_adp_.Tid__int: bfr.Add_int_variable(Int_.cast(Array_.Get_at(ary, idx))); break; case Type_adp_.Tid__int: bfr.Add_int_variable(Int_.cast(Array_.Get_at(ary, idx))); break;
default: throw Err_.new_unhandled_default(type_id); default: throw Err_.new_unhandled_default(type_id);

View File

@ -15,8 +15,8 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
package gplx.xowa.parsers.mws.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
public class Xomw_tblw_wkr { public class Xomw_table_wkr {
private final Bry_bfr bfr = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New(); private final Bry_bfr bfr = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New();
private final List_adp private final List_adp
td_history = List_adp_.New() td_history = List_adp_.New()
@ -26,10 +26,12 @@ public class Xomw_tblw_wkr {
, has_opened_tr = List_adp_.New() , has_opened_tr = List_adp_.New()
; ;
private static final byte[] private static final byte[]
Bry__tblw_end = Bry_.new_a7("|}"), Bry__tr = Bry_.new_a7("|-"), Bry__th = Bry_.new_a7("|+") Bry__tblw_bgn = Bry_.new_a7("{|"), Bry__tblw_end = Bry_.new_a7("|}"), Bry__tr = Bry_.new_a7("|-"), Bry__th = Bry_.new_a7("|+"), Bry__td2 = Bry_.new_a7("||")
, Bry__lnki = Bry_.new_a7("[[")
, Bry__special_case = Bry_.new_a7("<table>\n<tr><td></td></tr>\n</table>") , Bry__special_case = Bry_.new_a7("<table>\n<tr><td></td></tr>\n</table>")
, Bry__tag__td = Bry_.new_a7("td"), Bry__tag__th = Bry_.new_a7("th"), Bry__tag__caption = Bry_.new_a7("caption") , Bry__tag__td = Bry_.new_a7("td"), Bry__tag__th = Bry_.new_a7("th"), Bry__tag__caption = Bry_.new_a7("caption")
, Bry__elem_end__tr = Bry_.new_a7("</tr>") , Bry__elem_end__tr = Bry_.new_a7("</tr>")
, Bry__dl_dd = Bry_.new_a7("<dl><dd>")
; ;
private static final int Len__special_case = Bry__special_case.length; private static final int Len__special_case = Bry__special_case.length;
public byte[] Do_table_stuff(byte[] src) { public byte[] Do_table_stuff(byte[] src) {
@ -60,17 +62,27 @@ public class Xomw_tblw_wkr {
chars_2[0] = line[0]; chars_2[0] = line[0];
if (line_len > 1) chars_2[1] = line[1]; if (line_len > 1) chars_2[1] = line[1];
boolean is_indented_table = false; // PORTED: preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches)
byte[] colon_atrs = null;
int colons_end = Bry_find_.Find_fwd(src, Byte_ascii.Pipe, 0, line_len);
if (colons_end > 0) {
int atrs_bgn = Bry_find_.Find_fwd_while(line, colons_end, line_len, Byte_ascii.Space);
if (Bry_.Eq(line, atrs_bgn, atrs_bgn + 2, Bry__tblw_bgn)) {
colon_atrs = Bry_.Mid(line, atrs_bgn, line_len);
}
}
// ":*" , "\s*" , "{|" , ".*" // ":*" , "\s*" , "{|" , ".*"
if (is_indented_table) { if (colon_atrs != null) {
// if (preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches)) { // First check if we are starting a new table
// // First check if we are starting a new table indent_level = colons_end;
// $indent_level = strlen(matches[1]);
// // atrs = $this->mStripState->unstripBoth(matches[2]);
// $attributes = $this->mStripState->unstripBoth(matches[2]); // atrs = Sanitizer::fixTagAttributes(attributes, 'table');
// $attributes = Sanitizer::fixTagAttributes(attributes, 'table');
// // PORTED: line_orig = str_repeat('<dl><dd>', $indent_level) . "<table{atrs}>";
// line_orig = str_repeat('<dl><dd>', $indent_level) . "<table{$attributes}>"; for (int j = 0; j < indent_level; j++)
tmp_bfr.Add(Bry__dl_dd);
line_orig = tmp_bfr.Add_str_a7("<table").Add(colon_atrs).Add_byte(Byte_ascii.Angle_end).To_bry_and_clear();
td_history.Add(false); td_history.Add(false);
last_tag_history.Add(Bry_.Empty); last_tag_history.Add(Bry_.Empty);
tr_history.Add(false); tr_history.Add(false);
@ -104,14 +116,15 @@ public class Xomw_tblw_wkr {
line_orig = tmp_bfr.Add(line).Add(Bry_.Repeat_bry(Bry_.new_a7("</dd></dl>"), indent_level)).To_bry_and_clear(); line_orig = tmp_bfr.Add(line).Add(Bry_.Repeat_bry(Bry_.new_a7("</dd></dl>"), indent_level)).To_bry_and_clear();
} }
else if (Bry_.Eq(chars_2, Bry__tr)) { else if (Bry_.Eq(chars_2, Bry__tr)) {
// // Now we have a table row // Now we have a table row
// $line = preg_replace('#^\|-+#', '', $line); line = Bry_.Mid(line, 2); // PORTED: $line = preg_replace('#^\|-+#', '', $line);
//
// // Whats after the tag is now only attributes // Whats after the tag is now only attributes
// $attributes = $this->mStripState->unstripBoth(line); byte[] atrs = Bry_.Empty;
// $attributes = Sanitizer::fixTagAttributes(attributes, 'tr'); // atrs = $this->mStripState->unstripBoth(line);
// atrs = Sanitizer::fixTagAttributes(attributes, 'tr');
List_adp_.Pop(tr_attributes); List_adp_.Pop(tr_attributes);
// array_push(tr_attributes, $attributes); tr_attributes.Add(atrs);
line = Bry_.Empty; line = Bry_.Empty;
byte[] last_tag = (byte[])List_adp_.Pop(last_tag_history); byte[] last_tag = (byte[])List_adp_.Pop(last_tag_history);
@ -145,19 +158,21 @@ public class Xomw_tblw_wkr {
// Implies both are valid for table headings. // Implies both are valid for table headings.
if (char_0 == Byte_ascii.Nl) { if (char_0 == Byte_ascii.Nl) {
// $line = StringUtils::replaceMarkup('!!', '||', $line); // $line = StringUtils::replaceMarkup('!!', '||', $line);
} }
// Split up multiple cells on the same line. // Split up multiple cells on the same line.
// FIXME : This can result in improper nesting of tags processed // FIXME : This can result in improper nesting of tags processed
// by earlier parser steps. // by earlier parser steps.
// $cells = explode('||', $line); byte[][] cells = Bry_split_.Split(line, Bry__td2);
line_orig = Bry_.Empty; line_orig = Bry_.Empty;
byte[] previous = null; byte[] previous = null;
// Loop through each table cell // Loop through each table cell
// foreach (cells as $cell) { int cells_len = cells.length;
for (int j = 0; j < cells_len; i++) {
byte[] cell = cells[j];
previous = Bry_.Empty; previous = Bry_.Empty;
if (char_0 != Byte_ascii.Plus) { if (char_0 != Byte_ascii.Plus) {
byte[] tr_after = (byte[])List_adp_.Pop(tr_attributes); byte[] tr_after = (byte[])List_adp_.Pop(tr_attributes);
@ -190,25 +205,29 @@ public class Xomw_tblw_wkr {
} }
last_tag_history.Add(last_tag); last_tag_history.Add(last_tag);
//
// // A cell could contain both parameters and data // A cell could contain both parameters and data
// $cell_data = explode('|', $cell, 2); byte[][] cell_data = Bry_split_.Split_w_max(cell, Byte_ascii.Pipe, 2);
//
// // Bug 553: Note that a '|' inside an invalid link should not // Bug 553: Note that a '|' inside an invalid link should not
// // be mistaken as delimiting cell parameters // be mistaken as delimiting cell parameters
// if (strpos(cell_data[0], '[[') !== false) { byte[] cell_data_0 = cell_data[0];
// $cell = "{$previous}<{$last_tag}>{$cell}"; byte[] cell_data_1 = cell_data[1];
// } else if (count(cell_data) == 1) { if (Bry_find_.Find_fwd(cell_data_0, Bry__lnki) != Bry_find_.Not_found) {
// $cell = "{$previous}<{$last_tag}>{$cell_data[0]}"; cell = tmp_bfr.Add(previous).Add_str_a7("<").Add(last_tag).Add_str_a7(">").Add(cell).To_bry_and_clear();
// } else { }
// $attributes = $this->mStripState->unstripBoth(cell_data[0]); else if (cell_data_1 == null) {
// $attributes = Sanitizer::fixTagAttributes(attributes, $last_tag); cell = tmp_bfr.Add(previous).Add_str_a7("<").Add(last_tag).Add_str_a7(">").Add(cell_data_0).To_bry_and_clear();
// $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; }
// } else {
// // atrs = $this->mStripState->unstripBoth(cell_data[0]);
// line_orig = Bry_.Add(line_orig, $cell); // atrs = Sanitizer::fixTagAttributes(attributes, $last_tag);
cell = tmp_bfr.Add(previous).Add_str_a7("<").Add(last_tag).Add_str_a7(">").Add(cell_data_1).To_bry_and_clear();
}
line_orig = Bry_.Add(line_orig, cell);
td_history.Add(true); td_history.Add(true);
// } }
} }
bfr.Add(line_orig).Add_byte_nl(); bfr.Add(line_orig).Add_byte_nl();
} }

View File

@ -0,0 +1,43 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
import org.junit.*;
public class Xomw_table_wkr__tst {
private final Xomw_table_wkr__fxt fxt = new Xomw_table_wkr__fxt();
@Test public void Table() {
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|a"
, "|}"
), String_.Concat_lines_nl_skip_last
( "{|"
, "|-"
, "|a"
, "|}"
));
}
}
class Xomw_table_wkr__fxt {
private final Xomw_table_wkr wkr = new Xomw_table_wkr();
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
byte[] actl = wkr.Do_table_stuff(src_bry);
Tfds.Eq_str_lines(expd, String_.new_u8(actl), src_str);
}
}