Mw_parse.Table: Add tests for basic functionality

pull/620/head
gnosygnu 8 years ago
parent d22c5e5f3d
commit 5f1609a869

@ -448,7 +448,14 @@ public class Bry_ {
} }
if (all_ws) return Bry_.Empty; if (all_ws) return Bry_.Empty;
} }
return Bry_.Mid(src, txt_bgn, txt_end);
if ( bgn == 0 && end == src.length // Trim is called on entire bry, not subset
&& bgn == txt_bgn && end == txt_end // Trim hasn't trimmed anything
) {
return src;
}
else
return Bry_.Mid(src, txt_bgn, txt_end);
} }
public static byte[] Trim_end(byte[] v, byte trim, int end) { public static byte[] Trim_end(byte[] v, byte trim, int end) {
boolean trimmed = false; boolean trimmed = false;

@ -0,0 +1,22 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
public class Php_ary_ {
public static boolean Pop_bool_or_n(List_adp list) {return (boolean)List_adp_.Pop_or(list, false);}
public static byte[] Pop_bry_or_null(List_adp list) {return (byte[])List_adp_.Pop_or(list, null);}
}

@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
package gplx.langs.phps; import gplx.*; import gplx.langs.*; package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
public class Php_str_ { public class Php_str_ {
public static byte[] Substr(byte[] src, int bgn, int len) {return Bry_.Mid(src, bgn, bgn + len);} public static byte[] Substr(byte[] src, int bgn, int len) {return Bry_.Mid(src, bgn, bgn + len);}
public static int Strspn_fwd__byte(byte[] src, byte find, int bgn, int max, int src_len) { public static int Strspn_fwd__byte(byte[] src, byte find, int bgn, int max, int src_len) {

@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
package gplx.langs.phps; import gplx.*; import gplx.langs.*; package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
import org.junit.*; import gplx.core.tests.*; import org.junit.*; import gplx.core.tests.*;
public class Php_str___tst { public class Php_str___tst {
private final Php_str___fxt fxt = new Php_str___fxt(); private final Php_str___fxt fxt = new Php_str___fxt();

@ -29,6 +29,7 @@ public class Xomp_prog_mgr {
this.perf_interval = perf_interval; this.perf_interval = perf_interval;
this.perf_url = perf_url; this.perf_url = perf_url;
this.prog_bgn = this.prog_prv = this.perf_prv = gplx.core.envs.System_.Ticks(); this.prog_bgn = this.prog_prv = this.perf_prv = gplx.core.envs.System_.Ticks();
Io_mgr.Instance.DeleteFil(perf_url);
} }
public void Mark_done(int id) { public void Mark_done(int id) {
synchronized (thread_lock) { synchronized (thread_lock) {

@ -24,7 +24,7 @@ import gplx.xowa.htmls.core.dbs.*;
import gplx.xowa.addons.wikis.pages.syncs.wmapis.*; import gplx.xowa.addons.wikis.pages.syncs.wmapis.*;
import gplx.xowa.addons.wikis.pages.syncs.core.parsers.*; import gplx.xowa.addons.wikis.pages.syncs.core.parsers.*;
public class Xosync_update_mgr { public class Xosync_update_mgr {
private final Xoh_hzip_bfr bfr = new Xoh_hzip_bfr(Io_mgr.Len_mb, Bool_.N, Byte_.Max_value_127); private final Xoh_hzip_bfr bfr = new Xoh_hzip_bfr(Io_mgr.Len_kb, Bool_.N, Byte_.Max_value_127);
private final Gfh_doc_parser hdoc_parser_mgr; private final Gfh_doc_parser hdoc_parser_mgr;
private final Xoh_hdoc_ctx hctx = new Xoh_hdoc_ctx(); private final Xoh_hdoc_ctx hctx = new Xoh_hdoc_ctx();
private final Xosync_hdoc_wtr hdoc_bldr = new Xosync_hdoc_wtr(); private final Xosync_hdoc_wtr hdoc_bldr = new Xosync_hdoc_wtr();

@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
package gplx.xowa.parsers.mws.blocks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; package gplx.xowa.parsers.mws.blocks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
import gplx.langs.phps.*; import gplx.langs.phps.utls.*;
public class Xomw_block_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls public class Xomw_block_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
private final Bry_bfr bfr = Bry_bfr_.New(); private final Bry_bfr bfr = Bry_bfr_.New();
private byte[] last_prefix, last_section; private byte[] last_prefix, last_section;

@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
import gplx.core.btries.*; import gplx.core.btries.*;
import gplx.langs.phps.*; import gplx.langs.phps.utls.*;
public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
private final Bry_bfr tmp_bfr = Bry_bfr_.New(); private final Bry_bfr tmp_bfr = Bry_bfr_.New();
private final List_adp comments_list = List_adp_.New(); private final List_adp comments_list = List_adp_.New();

@ -16,7 +16,8 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
public class Xomw_table_wkr { import gplx.langs.phps.utls.*;
public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
private final Bry_bfr bfr = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New(); private final Bry_bfr bfr = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New();
private final List_adp private final List_adp
td_history = List_adp_.New() td_history = List_adp_.New()
@ -25,19 +26,10 @@ public class Xomw_table_wkr {
, tr_attributes = List_adp_.New() , tr_attributes = List_adp_.New()
, has_opened_tr = List_adp_.New() , has_opened_tr = List_adp_.New()
; ;
private static final byte[] private int indent_level = 0; // indent level of the table
Bry__tblw_bgn = Bry_.new_a7("{|"), Bry__tblw_end = Bry_.new_a7("|}"), Bry__tr = Bry_.new_a7("|-"), Bry__th = Bry_.new_a7("|+"), Bry__td2 = Bry_.new_a7("||") private byte[] first_2 = new byte[2];
, Bry__lnki = Bry_.new_a7("[[")
, Bry__special_case = Bry_.new_a7("<table>\n<tr><td></td></tr>\n</table>")
, Bry__tag__td = Bry_.new_a7("td"), Bry__tag__th = Bry_.new_a7("th"), Bry__tag__caption = Bry_.new_a7("caption")
, Bry__elem_end__tr = Bry_.new_a7("</tr>")
, Bry__dl_dd = Bry_.new_a7("<dl><dd>")
;
private static final int Len__special_case = Bry__special_case.length;
public byte[] Do_table_stuff(byte[] src) { public byte[] Do_table_stuff(byte[] src) {
int src_len = src.length; indent_level = 0;
byte[][] lines = Bry_split_.Split(src, 0, src_len, Byte_ascii.Nl_bry); // PORTED: $lines = StringUtils::explode("\n", $text);
int lines_len = lines.length;
// PORTED:member variables // PORTED:member variables
// $td_history = []; // Is currently a td tag open? // $td_history = []; // Is currently a td tag open?
@ -46,214 +38,230 @@ public class Xomw_table_wkr {
// $tr_attributes = []; // history of tr attributes // $tr_attributes = []; // history of tr attributes
// $has_opened_tr = []; // Did this table open a <tr> element? // $has_opened_tr = []; // Did this table open a <tr> element?
int indent_level = 0; // indent level of the table // PORTED.SPLIT: $lines = StringUtils::explode("\n", $text);
byte[] chars_2 = new byte[2]; Bry_split_.Split(src, 0, src.length, Byte_ascii.Nl, Bool_.N, this);
for (int i = 0; i < lines_len; i++) {
byte[] line_orig = lines[i];
byte[] line = Bry_.Trim(line_orig);
int line_len = line.length; // Closing open td, tr && table
if (line_len == 0) { // empty line, go to next line while (td_history.Len() > 0) {
bfr.Add(line_orig).Add_byte_nl(); if (Php_ary_.Pop_bool_or_n(td_history)) {
continue; bfr.Add_str_a7("</td>\n");
}
if (Php_ary_.Pop_bool_or_n(tr_history)) {
bfr.Add_str_a7("</tr>\n");
} }
if (!Php_ary_.Pop_bool_or_n(has_opened_tr)) {
bfr.Add_str_a7("<tr><td></td></tr>\n");
}
bfr.Add_str_a7("</table>\n");
}
byte char_0 = line[0]; // Remove trailing line-ending (b/c)
chars_2[0] = line[0]; if (bfr.Get_at_last_or_nil_if_empty() == Byte_ascii.Nl) {
if (line_len > 1) chars_2[1] = line[1]; bfr.Del_by_1();
}
// PORTED: preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches) // special case: don't return empty table
byte[] colon_atrs = null; if ( bfr.Len() == Len__special_case
int colons_end = Bry_find_.Find_fwd(src, Byte_ascii.Pipe, 0, line_len); && Bry_.Eq(bfr.Bfr(), 0, Len__special_case, Bry__special_case)) {
if (colons_end > 0) { bfr.Clear();
int atrs_bgn = Bry_find_.Find_fwd_while(line, colons_end, line_len, Byte_ascii.Space); return Bry_.Empty;
if (Bry_.Eq(line, atrs_bgn, atrs_bgn + 2, Bry__tblw_bgn)) { }
colon_atrs = Bry_.Mid(line, atrs_bgn, line_len); return bfr.To_bry_and_clear();
} }
} public int Split(byte[] src, int itm_bgn, int itm_end) {
// ":*" , "\s*" , "{|" , ".*" byte[] out_line = Bry_.Mid(src, itm_bgn, itm_end); // MW: "$outLine"
if (colon_atrs != null) { byte[] line = Bry_.Trim(out_line); // MW: "$line"
// First check if we are starting a new table
indent_level = colons_end;
// atrs = $this->mStripState->unstripBoth(matches[2]); int line_temp_len = line.length;
// atrs = Sanitizer::fixTagAttributes(attributes, 'table'); if (line_temp_len == 0) { // empty line, go to next line
bfr.Add(out_line).Add_byte_nl();
return Bry_split_.Rv__ok;
}
// PORTED: line_orig = str_repeat('<dl><dd>', $indent_level) . "<table{atrs}>"; byte first_char = line[0];
for (int j = 0; j < indent_level; j++) first_2[0] = line[0];
tmp_bfr.Add(Bry__dl_dd); if (line_temp_len > 1) first_2[1] = line[1];
line_orig = tmp_bfr.Add_str_a7("<table").Add(colon_atrs).Add_byte(Byte_ascii.Angle_end).To_bry_and_clear();
td_history.Add(false);
last_tag_history.Add(Bry_.Empty);
tr_history.Add(false);
tr_attributes.Add(Bry_.Empty);
has_opened_tr.Add(false);
}
else if (td_history.Len() == 0) {
// Don't do any of the following
bfr.Add(line_orig).Add_byte_nl();
continue;
}
else if (Bry_.Eq(chars_2, Bry__tblw_end)) {
// We are ending a table
line = tmp_bfr.Add_str_a7("</table>").Add_mid(line, 2, line.length).To_bry_and_clear();
byte[] last_tag = (byte[])List_adp_.Pop(last_tag_history);
if (has_opened_tr.Len() == 0) { // PORTED: preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches)
line = tmp_bfr.Add_str_a7("<tr><td></td></tr>").Add(line).To_bry_and_clear(); byte[] tblw_atrs = null;
} boolean tblw_bgn_found = false;
int colons_end = Bry_find_.Find_fwd_while(src, 0, line_temp_len, Byte_ascii.Colon);
int tblw_bgn = Bry_find_.Find_fwd_while(line, colons_end, line_temp_len, Byte_ascii.Space);
int tblw_atrs_bgn = tblw_bgn + 2;
if (Bry_.Eq(line, tblw_bgn, tblw_atrs_bgn, Bry__tblw_bgn)) {
tblw_bgn_found = true;
tblw_atrs = (tblw_atrs_bgn == line_temp_len) ? Bry_.Empty : Bry_.Mid(line, tblw_atrs_bgn, line_temp_len);
}
if (tblw_bgn_found) {
// First check if we are starting a new table
indent_level = colons_end;
if (tr_history.Len() > 0) { // atrs = $this->mStripState->unstripBoth(matches[2]);
List_adp_.Pop(tr_history); // atrs = Sanitizer::fixTagAttributes(attributes, 'table');
line = tmp_bfr.Add_str_a7("</tr>").Add(line).To_bry_and_clear();
}
if (td_history.Len() > 0) { // PORTED: out_line = str_repeat('<dl><dd>', $indent_level) . "<table{atrs}>";
List_adp_.Pop(td_history); for (int j = 0; j < indent_level; j++)
line = tmp_bfr.Add_str_a7("</").Add(last_tag).Add_str_a7(">").Add(line).To_bry_and_clear(); tmp_bfr.Add(Bry__dl_dd);
} out_line = tmp_bfr.Add_str_a7("<table").Add(tblw_atrs).Add_byte(Byte_ascii.Angle_end).To_bry_and_clear();
List_adp_.Pop(tr_attributes); td_history.Add(false);
line_orig = tmp_bfr.Add(line).Add(Bry_.Repeat_bry(Bry_.new_a7("</dd></dl>"), indent_level)).To_bry_and_clear(); last_tag_history.Add(Bry_.Empty);
tr_history.Add(false);
tr_attributes.Add(Bry_.Empty);
has_opened_tr.Add(false);
}
else if (td_history.Len() == 0) {
// Don't do any of the following
bfr.Add(out_line).Add_byte_nl();
return Bry_split_.Rv__ok;
}
else if (Bry_.Eq(first_2, Bry__tblw_end)) {
// We are ending a table
line = tmp_bfr.Add_str_a7("</table>").Add_mid(line, 2, line.length).To_bry_and_clear();
byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history);
if (!Php_ary_.Pop_bool_or_n(has_opened_tr)) {
line = tmp_bfr.Add_str_a7("<tr><td></td></tr>").Add(line).To_bry_and_clear();
} }
else if (Bry_.Eq(chars_2, Bry__tr)) {
// Now we have a table row
line = Bry_.Mid(line, 2); // PORTED: $line = preg_replace('#^\|-+#', '', $line);
// Whats after the tag is now only attributes if (Php_ary_.Pop_bool_or_n(tr_history)) {
byte[] atrs = Bry_.Empty; line = tmp_bfr.Add_str_a7("</tr>").Add(line).To_bry_and_clear();
// atrs = $this->mStripState->unstripBoth(line); }
// atrs = Sanitizer::fixTagAttributes(attributes, 'tr');
List_adp_.Pop(tr_attributes);
tr_attributes.Add(atrs);
line = Bry_.Empty; if (Php_ary_.Pop_bool_or_n(td_history)) {
byte[] last_tag = (byte[])List_adp_.Pop(last_tag_history); line = tmp_bfr.Add_str_a7("</").Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(line).To_bry_and_clear();
List_adp_.Pop(has_opened_tr); }
has_opened_tr.Add(true); Php_ary_.Pop_bry_or_null(tr_attributes);
// PORTED:$outLine = $line . str_repeat( '</dd></dl>', $indent_level );
tmp_bfr.Add(line);
for (int j = 0; j < indent_level; j++)
tmp_bfr.Add(Bry__dl_dd_end);
out_line = tmp_bfr.To_bry_and_clear();
}
else if (Bry_.Eq(first_2, Bry__tr)) {
// Now we have a table row
line = Bry_.Mid(line, 2); // PORTED: $line = preg_replace('#^\|-+#', '', $line);
if ((boolean)List_adp_.Pop(tr_history)) { // Whats after the tag is now only attributes
line = Bry__elem_end__tr; byte[] atrs = line;
} // atrs = $this->mStripState->unstripBoth(line);
// atrs = Sanitizer::fixTagAttributes(attributes, 'tr');
Php_ary_.Pop_bry_or_null(tr_attributes);
tr_attributes.Add(atrs);
if ((boolean)List_adp_.Pop(td_history)) { line = Bry_.Empty;
line = tmp_bfr.Add_str_a7("</").Add(last_tag).Add_byte(Byte_ascii.Gt).Add(line).To_bry_and_clear(); byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history);
} Php_ary_.Pop_bool_or_n(has_opened_tr);
has_opened_tr.Add(true);
line_orig = line; if (Php_ary_.Pop_bool_or_n(tr_history)) {
tr_history.Add(false); line = Bry__elem_end__tr;
td_history.Add(false);
last_tag_history.Add(Bry_.Empty);
} }
else if ( char_0 == Byte_ascii.Pipe
|| char_0 == Byte_ascii.Bang
|| Bry_.Eq(chars_2, Bry__th)
) {
// This might be cell elements, td, th or captions
if (Bry_.Eq(chars_2, Bry__th)) {
char_0 = Byte_ascii.Pipe;
line = Bry_.Mid(line, 2);
} else {
line = Bry_.Mid(line, 1);
}
// Implies both are valid for table headings. if (Php_ary_.Pop_bool_or_n(td_history)) {
if (char_0 == Byte_ascii.Nl) { line = tmp_bfr.Add_str_a7("</").Add(last_tag).Add_byte(Byte_ascii.Gt).Add(line).To_bry_and_clear();
// $line = StringUtils::replaceMarkup('!!', '||', $line); }
}
// Split up multiple cells on the same line. out_line = line;
// FIXME : This can result in improper nesting of tags processed tr_history.Add(false);
// by earlier parser steps. td_history.Add(false);
byte[][] cells = Bry_split_.Split(line, Bry__td2); last_tag_history.Add(Bry_.Empty);
}
else if ( first_char == Byte_ascii.Pipe
|| first_char == Byte_ascii.Bang
|| Bry_.Eq(first_2, Bry__th)
) {
// This might be cell elements, td, th or captions
if (Bry_.Eq(first_2, Bry__th)) {
first_char = Byte_ascii.Pipe;
line = Bry_.Mid(line, 2);
} else {
line = Bry_.Mid(line, 1);
}
line_orig = Bry_.Empty; // Implies both are valid for table headings.
if (first_char == Byte_ascii.Nl) {
// $line = StringUtils::replaceMarkup('!!', '||', $line);
}
byte[] previous = null; // Split up multiple cells on the same line.
// Loop through each table cell // FIXME : This can result in improper nesting of tags processed
int cells_len = cells.length; // by earlier parser steps.
for (int j = 0; j < cells_len; i++) { byte[][] cells = Bry_split_.Split(line, Bry__td2);
byte[] cell = cells[j];
previous = Bry_.Empty;
if (char_0 != Byte_ascii.Plus) {
byte[] tr_after = (byte[])List_adp_.Pop(tr_attributes);
if (!(boolean)List_adp_.Pop(tr_history)) {
previous = tmp_bfr.Add_str_a7("<tr").Add(tr_after).Add_str_a7(">\n").To_bry_and_clear();
}
tr_history.Add(true);
tr_attributes.Add(Bry_.Empty);
List_adp_.Pop(has_opened_tr);
has_opened_tr.Add(true);
}
byte[] last_tag = (byte[])List_adp_.Pop(last_tag_history); out_line = Bry_.Empty;
if ((boolean)List_adp_.Pop(td_history)) { byte[] previous = null;
previous = tmp_bfr.Add_str_a7("</").Add(last_tag).Add_str_a7(">\n").Add(previous).To_bry_and_clear(); // Loop through each table cell
int cells_len = cells.length;
for (int j = 0; j < cells_len; j++) {
byte[] cell = cells[j];
previous = Bry_.Empty;
if (first_char != Byte_ascii.Plus) {
byte[] tr_after = Php_ary_.Pop_bry_or_null(tr_attributes);
if (!Php_ary_.Pop_bool_or_n(tr_history)) {
previous = tmp_bfr.Add_str_a7("<tr").Add(tr_after).Add_str_a7(">\n").To_bry_and_clear();
} }
tr_history.Add(true);
tr_attributes.Add(Bry_.Empty);
Php_ary_.Pop_bool_or_n(has_opened_tr);
has_opened_tr.Add(true);
}
if (char_0 == Byte_ascii.Pipe) { byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history);
last_tag = Bry__tag__td;
} if (Php_ary_.Pop_bool_or_n(td_history)) {
else if (char_0 == Byte_ascii.Bang) { previous = tmp_bfr.Add_str_a7("</").Add(last_tag).Add_str_a7(">\n").Add(previous).To_bry_and_clear();
last_tag = Bry__tag__th; }
}
else if (char_0 == Byte_ascii.Plus) {
last_tag = Bry__tag__caption;
}
else {
last_tag = Bry_.Empty;
}
last_tag_history.Add(last_tag); if (first_char == Byte_ascii.Pipe) {
last_tag = Bry__tag__td;
}
else if (first_char == Byte_ascii.Bang) {
last_tag = Bry__tag__th;
}
else if (first_char == Byte_ascii.Plus) {
last_tag = Bry__tag__caption;
}
else {
last_tag = Bry_.Empty;
}
// A cell could contain both parameters and data last_tag_history.Add(last_tag);
byte[][] cell_data = Bry_split_.Split_w_max(cell, Byte_ascii.Pipe, 2);
// Bug 553: Note that a '|' inside an invalid link should not // A cell could contain both parameters and data
// be mistaken as delimiting cell parameters byte[][] cell_data = Bry_split_.Split_w_max(cell, Byte_ascii.Pipe, 2);
byte[] cell_data_0 = cell_data[0];
byte[] cell_data_1 = cell_data[1];
if (Bry_find_.Find_fwd(cell_data_0, Bry__lnki) != Bry_find_.Not_found) {
cell = tmp_bfr.Add(previous).Add_str_a7("<").Add(last_tag).Add_str_a7(">").Add(cell).To_bry_and_clear();
}
else if (cell_data_1 == null) {
cell = tmp_bfr.Add(previous).Add_str_a7("<").Add(last_tag).Add_str_a7(">").Add(cell_data_0).To_bry_and_clear();
}
else {
// atrs = $this->mStripState->unstripBoth(cell_data[0]);
// atrs = Sanitizer::fixTagAttributes(attributes, $last_tag);
cell = tmp_bfr.Add(previous).Add_str_a7("<").Add(last_tag).Add_str_a7(">").Add(cell_data_1).To_bry_and_clear();
}
line_orig = Bry_.Add(line_orig, cell); // Bug 553: Note that a '|' inside an invalid link should not
td_history.Add(true); // be mistaken as delimiting cell parameters
byte[] cell_data_0 = cell_data[0];
byte[] cell_data_1 = cell_data[1];
if (Bry_find_.Find_fwd(cell_data_0, Bry__lnki) != Bry_find_.Not_found) {
cell = tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell).To_bry_and_clear();
}
else if (cell_data_1 == null) {
cell = tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell_data_0).To_bry_and_clear();
}
else {
// atrs = $this->mStripState->unstripBoth(cell_data_0);
// atrs = Sanitizer::fixTagAttributes(attributes, $last_tag);
cell = tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add(cell_data_0).Add_byte(Byte_ascii.Angle_end).Add(cell_data_1).To_bry_and_clear();
} }
}
bfr.Add(line_orig).Add_byte_nl();
}
// Closing open td, tr && table out_line = Bry_.Add(out_line, cell);
while (td_history.Len() > 0) { td_history.Add(true);
if (tr_history.Len() > 0) {
List_adp_.Del_at_last(tr_history);
bfr.Add_str_a7("</td>\n");
} }
if (has_opened_tr.Len() == 0) {
bfr.Add_str_a7("<tr><td></td></tr>\n");
}
bfr.Add_str_a7("</table>\n");
}
// Remove trailing line-ending (b/c)
if (bfr.Get_at_last_or_nil_if_empty() == Byte_ascii.Nl) {
bfr.Del_by_1();
}
// special case: don't return empty table
if ( bfr.Len() == Len__special_case
&& Bry_.Eq(bfr.Bfr(), 0, Len__special_case, Bry__special_case)) {
return Bry_.Empty;
} }
return bfr.To_bry_and_clear(); bfr.Add(out_line).Add_byte_nl();
return Bry_split_.Rv__ok;
} }
private static final byte[]
Bry__tblw_bgn = Bry_.new_a7("{|"), Bry__tblw_end = Bry_.new_a7("|}"), Bry__tr = Bry_.new_a7("|-"), Bry__th = Bry_.new_a7("|+"), Bry__td2 = Bry_.new_a7("||")
, Bry__lnki = Bry_.new_a7("[[")
, Bry__special_case = Bry_.new_a7("<table>\n<tr><td></td></tr>\n</table>")
, Bry__tag__td = Bry_.new_a7("td"), Bry__tag__th = Bry_.new_a7("th"), Bry__tag__caption = Bry_.new_a7("caption")
, Bry__elem_end__tr = Bry_.new_a7("</tr>")
, Bry__dl_dd = Bry_.new_a7("<dl><dd>")
, Bry__dl_dd_end = Bry_.new_a7("</dd></dl>")
;
private static final int Len__special_case = Bry__special_case.length;
} }

@ -19,19 +19,50 @@ package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import
import org.junit.*; import org.junit.*;
public class Xomw_table_wkr__tst { public class Xomw_table_wkr__tst {
private final Xomw_table_wkr__fxt fxt = new Xomw_table_wkr__fxt(); private final Xomw_table_wkr__fxt fxt = new Xomw_table_wkr__fxt();
@Test public void Table() { @Test public void Basic() {
fxt.Test__parse(String_.Concat_lines_nl_skip_last fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "{|" ( "{|"
, "|-" , "|-"
, "|a" , "|a"
, "|}" , "|}"
), String_.Concat_lines_nl_skip_last ), String_.Concat_lines_nl_skip_last
( "{|" ( "<table>"
, ""
, "<tr>"
, "<td>a"
, "</td></tr></table>"
));
}
@Test public void Blank() { // COVERS: "empty line, go to next line"
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( " "
), String_.Concat_lines_nl_skip_last
( " "
));
}
@Test public void Indent() {
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "::{|"
, "|-" , "|-"
, "|a" , "|a"
, "|}" , "|}"
), String_.Concat_lines_nl_skip_last
( "<dl><dd><dl><dd><table>"
, ""
, "<tr>"
, "<td>a"
, "</td></tr></table></dd></dl></dd></dl>"
)); ));
} }
@Test public void End__no_rows() { // COVERS: "if (has_opened_tr.Len() == 0) {"
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "{|"
, "|}"
), String_.Concat_lines_nl_skip_last
( "<table>"
, "<tr><td></td></tr></table>"
));
}
} }
class Xomw_table_wkr__fxt { class Xomw_table_wkr__fxt {
private final Xomw_table_wkr wkr = new Xomw_table_wkr(); private final Xomw_table_wkr wkr = new Xomw_table_wkr();

Loading…
Cancel
Save