mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Mw_parse.Table: Add more implementation
This commit is contained in:
parent
b35a45657c
commit
305c2f9762
@ -116,6 +116,27 @@ public class Bry_split_ {
|
|||||||
}
|
}
|
||||||
return (byte[][])rv.To_ary(byte[].class);
|
return (byte[][])rv.To_ary(byte[].class);
|
||||||
}
|
}
|
||||||
|
public static byte[][] Split_w_max(byte[] src, byte dlm, int max) {
|
||||||
|
byte[][] rv = new byte[max][];
|
||||||
|
int src_len = src.length;
|
||||||
|
int rv_idx = 0;
|
||||||
|
int itm_bgn = 0;
|
||||||
|
int src_pos = 0;
|
||||||
|
while (true) {
|
||||||
|
boolean is_last = src_pos == src_len;
|
||||||
|
byte b = is_last ? dlm : src[src_pos];
|
||||||
|
if (b == dlm) {
|
||||||
|
rv[rv_idx++] = Bry_.Mid(src, itm_bgn, src_pos);
|
||||||
|
itm_bgn = src_pos + 1;
|
||||||
|
}
|
||||||
|
if (is_last || rv_idx == max)
|
||||||
|
break;
|
||||||
|
else
|
||||||
|
src_pos++;
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
public static final int Rv__ok = 0, Rv__extend = 1, Rv__cancel = 2;
|
public static final int Rv__ok = 0, Rv__extend = 1, Rv__cancel = 2;
|
||||||
}
|
}
|
||||||
class Bry_split_wkr__to_ary implements gplx.core.brys.Bry_split_wkr {
|
class Bry_split_wkr__to_ary implements gplx.core.brys.Bry_split_wkr {
|
||||||
|
@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
|||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
package gplx;
|
package gplx;
|
||||||
import org.junit.*;
|
import org.junit.*; import gplx.core.tests.*;
|
||||||
public class Bry_split__tst {
|
public class Bry_split__tst {
|
||||||
private final Bry_split__fxt fxt = new Bry_split__fxt();
|
private final Bry_split__fxt fxt = new Bry_split__fxt();
|
||||||
@Test public void Split() {
|
@Test public void Split() {
|
||||||
@ -43,6 +43,11 @@ public class Bry_split__tst {
|
|||||||
fxt.Test_split("a|b|c|d" , 2, 6, "|", "b", "c");
|
fxt.Test_split("a|b|c|d" , 2, 6, "|", "b", "c");
|
||||||
fxt.Test_split("a|b|c|d" , 2, 4, "|", "b");
|
fxt.Test_split("a|b|c|d" , 2, 4, "|", "b");
|
||||||
}
|
}
|
||||||
|
@Test public void Split_w_max() {
|
||||||
|
fxt.Test__split_w_max("a|b|c|d" , Byte_ascii.Pipe, 2, "a", "b"); // max is less
|
||||||
|
fxt.Test__split_w_max("a" , Byte_ascii.Pipe, 2, "a", null); // max is more
|
||||||
|
fxt.Test__split_w_max("|" , Byte_ascii.Pipe, 2, "", ""); // empty itms
|
||||||
|
}
|
||||||
}
|
}
|
||||||
class Bry_split__fxt {
|
class Bry_split__fxt {
|
||||||
private final Bry_split_wkr__example wkr = new Bry_split_wkr__example();
|
private final Bry_split_wkr__example wkr = new Bry_split_wkr__example();
|
||||||
@ -55,6 +60,9 @@ class Bry_split__fxt {
|
|||||||
public void Test_split(String src, int src_bgn, int src_end, String dlm, String... expd) {
|
public void Test_split(String src, int src_bgn, int src_end, String dlm, String... expd) {
|
||||||
Tfds.Eq_ary_str(Bry_.Ary(expd), Bry_split_.Split(Bry_.new_u8(src), src_bgn, src_end, Bry_.new_u8(dlm)));
|
Tfds.Eq_ary_str(Bry_.Ary(expd), Bry_split_.Split(Bry_.new_u8(src), src_bgn, src_end, Bry_.new_u8(dlm)));
|
||||||
}
|
}
|
||||||
|
public void Test__split_w_max(String src, byte dlm, int max, String... expd) {
|
||||||
|
Gftest.Eq__ary(expd, String_.Ary(Bry_split_.Split_w_max(Bry_.new_u8(src), dlm, max)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
class Bry_split_wkr__example implements gplx.core.brys.Bry_split_wkr {
|
class Bry_split_wkr__example implements gplx.core.brys.Bry_split_wkr {
|
||||||
private final List_adp list = List_adp_.New();
|
private final List_adp list = List_adp_.New();
|
||||||
|
@ -488,8 +488,10 @@ public class String_ {
|
|||||||
if (ary == null) return String_.Ary_empty;
|
if (ary == null) return String_.Ary_empty;
|
||||||
int ary_len = ary.length;
|
int ary_len = ary.length;
|
||||||
String[] rv = new String[ary_len];
|
String[] rv = new String[ary_len];
|
||||||
for (int i = 0; i < ary_len; i++)
|
for (int i = 0; i < ary_len; i++) {
|
||||||
rv[i] = String_.new_u8(ary[i]);
|
byte[] itm = ary[i];
|
||||||
|
rv[i] = itm == null ? null : String_.new_u8(itm);
|
||||||
|
}
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
public static String [] Ary_filter(String[] src, String[] filter) {
|
public static String [] Ary_filter(String[] src, String[] filter) {
|
||||||
|
@ -153,7 +153,7 @@ public class Gftest {
|
|||||||
if (idx < len) {
|
if (idx < len) {
|
||||||
switch (type_id) {
|
switch (type_id) {
|
||||||
case Type_adp_.Tid__bool: bfr.Add_yn(Bool_.Cast(Array_.Get_at(ary, idx))); break;
|
case Type_adp_.Tid__bool: bfr.Add_yn(Bool_.Cast(Array_.Get_at(ary, idx))); break;
|
||||||
case Type_adp_.Tid__bry: bfr.Add((byte[])Array_.Get_at(ary, idx)); break;
|
case Type_adp_.Tid__bry: bfr.Add_safe((byte[])Array_.Get_at(ary, idx)); break;
|
||||||
case Type_adp_.Tid__long: bfr.Add_long_variable(Long_.cast(Array_.Get_at(ary, idx))); break;
|
case Type_adp_.Tid__long: bfr.Add_long_variable(Long_.cast(Array_.Get_at(ary, idx))); break;
|
||||||
case Type_adp_.Tid__int: bfr.Add_int_variable(Int_.cast(Array_.Get_at(ary, idx))); break;
|
case Type_adp_.Tid__int: bfr.Add_int_variable(Int_.cast(Array_.Get_at(ary, idx))); break;
|
||||||
default: throw Err_.new_unhandled_default(type_id);
|
default: throw Err_.new_unhandled_default(type_id);
|
||||||
|
@ -15,8 +15,8 @@ GNU Affero General Public License for more details.
|
|||||||
You should have received a copy of the GNU Affero General Public License
|
You should have received a copy of the GNU Affero General Public License
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
package gplx.xowa.parsers.mws.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||||
public class Xomw_tblw_wkr {
|
public class Xomw_table_wkr {
|
||||||
private final Bry_bfr bfr = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New();
|
private final Bry_bfr bfr = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New();
|
||||||
private final List_adp
|
private final List_adp
|
||||||
td_history = List_adp_.New()
|
td_history = List_adp_.New()
|
||||||
@ -26,10 +26,12 @@ public class Xomw_tblw_wkr {
|
|||||||
, has_opened_tr = List_adp_.New()
|
, has_opened_tr = List_adp_.New()
|
||||||
;
|
;
|
||||||
private static final byte[]
|
private static final byte[]
|
||||||
Bry__tblw_end = Bry_.new_a7("|}"), Bry__tr = Bry_.new_a7("|-"), Bry__th = Bry_.new_a7("|+")
|
Bry__tblw_bgn = Bry_.new_a7("{|"), Bry__tblw_end = Bry_.new_a7("|}"), Bry__tr = Bry_.new_a7("|-"), Bry__th = Bry_.new_a7("|+"), Bry__td2 = Bry_.new_a7("||")
|
||||||
|
, Bry__lnki = Bry_.new_a7("[[")
|
||||||
, Bry__special_case = Bry_.new_a7("<table>\n<tr><td></td></tr>\n</table>")
|
, Bry__special_case = Bry_.new_a7("<table>\n<tr><td></td></tr>\n</table>")
|
||||||
, Bry__tag__td = Bry_.new_a7("td"), Bry__tag__th = Bry_.new_a7("th"), Bry__tag__caption = Bry_.new_a7("caption")
|
, Bry__tag__td = Bry_.new_a7("td"), Bry__tag__th = Bry_.new_a7("th"), Bry__tag__caption = Bry_.new_a7("caption")
|
||||||
, Bry__elem_end__tr = Bry_.new_a7("</tr>")
|
, Bry__elem_end__tr = Bry_.new_a7("</tr>")
|
||||||
|
, Bry__dl_dd = Bry_.new_a7("<dl><dd>")
|
||||||
;
|
;
|
||||||
private static final int Len__special_case = Bry__special_case.length;
|
private static final int Len__special_case = Bry__special_case.length;
|
||||||
public byte[] Do_table_stuff(byte[] src) {
|
public byte[] Do_table_stuff(byte[] src) {
|
||||||
@ -60,17 +62,27 @@ public class Xomw_tblw_wkr {
|
|||||||
chars_2[0] = line[0];
|
chars_2[0] = line[0];
|
||||||
if (line_len > 1) chars_2[1] = line[1];
|
if (line_len > 1) chars_2[1] = line[1];
|
||||||
|
|
||||||
boolean is_indented_table = false;
|
// PORTED: preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches)
|
||||||
|
byte[] colon_atrs = null;
|
||||||
|
int colons_end = Bry_find_.Find_fwd(src, Byte_ascii.Pipe, 0, line_len);
|
||||||
|
if (colons_end > 0) {
|
||||||
|
int atrs_bgn = Bry_find_.Find_fwd_while(line, colons_end, line_len, Byte_ascii.Space);
|
||||||
|
if (Bry_.Eq(line, atrs_bgn, atrs_bgn + 2, Bry__tblw_bgn)) {
|
||||||
|
colon_atrs = Bry_.Mid(line, atrs_bgn, line_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
// ":*" , "\s*" , "{|" , ".*"
|
// ":*" , "\s*" , "{|" , ".*"
|
||||||
if (is_indented_table) {
|
if (colon_atrs != null) {
|
||||||
// if (preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches)) {
|
// First check if we are starting a new table
|
||||||
// // First check if we are starting a new table
|
indent_level = colons_end;
|
||||||
// $indent_level = strlen(matches[1]);
|
|
||||||
//
|
// atrs = $this->mStripState->unstripBoth(matches[2]);
|
||||||
// $attributes = $this->mStripState->unstripBoth(matches[2]);
|
// atrs = Sanitizer::fixTagAttributes(attributes, 'table');
|
||||||
// $attributes = Sanitizer::fixTagAttributes(attributes, 'table');
|
|
||||||
//
|
// PORTED: line_orig = str_repeat('<dl><dd>', $indent_level) . "<table{atrs}>";
|
||||||
// line_orig = str_repeat('<dl><dd>', $indent_level) . "<table{$attributes}>";
|
for (int j = 0; j < indent_level; j++)
|
||||||
|
tmp_bfr.Add(Bry__dl_dd);
|
||||||
|
line_orig = tmp_bfr.Add_str_a7("<table").Add(colon_atrs).Add_byte(Byte_ascii.Angle_end).To_bry_and_clear();
|
||||||
td_history.Add(false);
|
td_history.Add(false);
|
||||||
last_tag_history.Add(Bry_.Empty);
|
last_tag_history.Add(Bry_.Empty);
|
||||||
tr_history.Add(false);
|
tr_history.Add(false);
|
||||||
@ -104,14 +116,15 @@ public class Xomw_tblw_wkr {
|
|||||||
line_orig = tmp_bfr.Add(line).Add(Bry_.Repeat_bry(Bry_.new_a7("</dd></dl>"), indent_level)).To_bry_and_clear();
|
line_orig = tmp_bfr.Add(line).Add(Bry_.Repeat_bry(Bry_.new_a7("</dd></dl>"), indent_level)).To_bry_and_clear();
|
||||||
}
|
}
|
||||||
else if (Bry_.Eq(chars_2, Bry__tr)) {
|
else if (Bry_.Eq(chars_2, Bry__tr)) {
|
||||||
// // Now we have a table row
|
// Now we have a table row
|
||||||
// $line = preg_replace('#^\|-+#', '', $line);
|
line = Bry_.Mid(line, 2); // PORTED: $line = preg_replace('#^\|-+#', '', $line);
|
||||||
//
|
|
||||||
// // Whats after the tag is now only attributes
|
// Whats after the tag is now only attributes
|
||||||
// $attributes = $this->mStripState->unstripBoth(line);
|
byte[] atrs = Bry_.Empty;
|
||||||
// $attributes = Sanitizer::fixTagAttributes(attributes, 'tr');
|
// atrs = $this->mStripState->unstripBoth(line);
|
||||||
|
// atrs = Sanitizer::fixTagAttributes(attributes, 'tr');
|
||||||
List_adp_.Pop(tr_attributes);
|
List_adp_.Pop(tr_attributes);
|
||||||
// array_push(tr_attributes, $attributes);
|
tr_attributes.Add(atrs);
|
||||||
|
|
||||||
line = Bry_.Empty;
|
line = Bry_.Empty;
|
||||||
byte[] last_tag = (byte[])List_adp_.Pop(last_tag_history);
|
byte[] last_tag = (byte[])List_adp_.Pop(last_tag_history);
|
||||||
@ -145,19 +158,21 @@ public class Xomw_tblw_wkr {
|
|||||||
|
|
||||||
// Implies both are valid for table headings.
|
// Implies both are valid for table headings.
|
||||||
if (char_0 == Byte_ascii.Nl) {
|
if (char_0 == Byte_ascii.Nl) {
|
||||||
// $line = StringUtils::replaceMarkup('!!', '||', $line);
|
// $line = StringUtils::replaceMarkup('!!', '||', $line);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Split up multiple cells on the same line.
|
// Split up multiple cells on the same line.
|
||||||
// FIXME : This can result in improper nesting of tags processed
|
// FIXME : This can result in improper nesting of tags processed
|
||||||
// by earlier parser steps.
|
// by earlier parser steps.
|
||||||
// $cells = explode('||', $line);
|
byte[][] cells = Bry_split_.Split(line, Bry__td2);
|
||||||
|
|
||||||
line_orig = Bry_.Empty;
|
line_orig = Bry_.Empty;
|
||||||
|
|
||||||
byte[] previous = null;
|
byte[] previous = null;
|
||||||
// Loop through each table cell
|
// Loop through each table cell
|
||||||
// foreach (cells as $cell) {
|
int cells_len = cells.length;
|
||||||
|
for (int j = 0; j < cells_len; i++) {
|
||||||
|
byte[] cell = cells[j];
|
||||||
previous = Bry_.Empty;
|
previous = Bry_.Empty;
|
||||||
if (char_0 != Byte_ascii.Plus) {
|
if (char_0 != Byte_ascii.Plus) {
|
||||||
byte[] tr_after = (byte[])List_adp_.Pop(tr_attributes);
|
byte[] tr_after = (byte[])List_adp_.Pop(tr_attributes);
|
||||||
@ -190,25 +205,29 @@ public class Xomw_tblw_wkr {
|
|||||||
}
|
}
|
||||||
|
|
||||||
last_tag_history.Add(last_tag);
|
last_tag_history.Add(last_tag);
|
||||||
//
|
|
||||||
// // A cell could contain both parameters and data
|
// A cell could contain both parameters and data
|
||||||
// $cell_data = explode('|', $cell, 2);
|
byte[][] cell_data = Bry_split_.Split_w_max(cell, Byte_ascii.Pipe, 2);
|
||||||
//
|
|
||||||
// // Bug 553: Note that a '|' inside an invalid link should not
|
// Bug 553: Note that a '|' inside an invalid link should not
|
||||||
// // be mistaken as delimiting cell parameters
|
// be mistaken as delimiting cell parameters
|
||||||
// if (strpos(cell_data[0], '[[') !== false) {
|
byte[] cell_data_0 = cell_data[0];
|
||||||
// $cell = "{$previous}<{$last_tag}>{$cell}";
|
byte[] cell_data_1 = cell_data[1];
|
||||||
// } else if (count(cell_data) == 1) {
|
if (Bry_find_.Find_fwd(cell_data_0, Bry__lnki) != Bry_find_.Not_found) {
|
||||||
// $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
|
cell = tmp_bfr.Add(previous).Add_str_a7("<").Add(last_tag).Add_str_a7(">").Add(cell).To_bry_and_clear();
|
||||||
// } else {
|
}
|
||||||
// $attributes = $this->mStripState->unstripBoth(cell_data[0]);
|
else if (cell_data_1 == null) {
|
||||||
// $attributes = Sanitizer::fixTagAttributes(attributes, $last_tag);
|
cell = tmp_bfr.Add(previous).Add_str_a7("<").Add(last_tag).Add_str_a7(">").Add(cell_data_0).To_bry_and_clear();
|
||||||
// $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
|
}
|
||||||
// }
|
else {
|
||||||
//
|
// atrs = $this->mStripState->unstripBoth(cell_data[0]);
|
||||||
// line_orig = Bry_.Add(line_orig, $cell);
|
// atrs = Sanitizer::fixTagAttributes(attributes, $last_tag);
|
||||||
|
cell = tmp_bfr.Add(previous).Add_str_a7("<").Add(last_tag).Add_str_a7(">").Add(cell_data_1).To_bry_and_clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
line_orig = Bry_.Add(line_orig, cell);
|
||||||
td_history.Add(true);
|
td_history.Add(true);
|
||||||
// }
|
}
|
||||||
}
|
}
|
||||||
bfr.Add(line_orig).Add_byte_nl();
|
bfr.Add(line_orig).Add_byte_nl();
|
||||||
}
|
}
|
@ -0,0 +1,43 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||||
|
import org.junit.*;
|
||||||
|
public class Xomw_table_wkr__tst {
|
||||||
|
private final Xomw_table_wkr__fxt fxt = new Xomw_table_wkr__fxt();
|
||||||
|
@Test public void Table() {
|
||||||
|
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||||
|
( "{|"
|
||||||
|
, "|-"
|
||||||
|
, "|a"
|
||||||
|
, "|}"
|
||||||
|
), String_.Concat_lines_nl_skip_last
|
||||||
|
( "{|"
|
||||||
|
, "|-"
|
||||||
|
, "|a"
|
||||||
|
, "|}"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Xomw_table_wkr__fxt {
|
||||||
|
private final Xomw_table_wkr wkr = new Xomw_table_wkr();
|
||||||
|
public void Test__parse(String src_str, String expd) {
|
||||||
|
byte[] src_bry = Bry_.new_u8(src_str);
|
||||||
|
byte[] actl = wkr.Do_table_stuff(src_bry);
|
||||||
|
Tfds.Eq_str_lines(expd, String_.new_u8(actl), src_str);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user