mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Mw_parse.Table: Support attributes
This commit is contained in:
parent
5f1609a869
commit
f8fcb553d5
@ -39,5 +39,9 @@ public class Mwh_doc_wkr__atr_bldr implements Mwh_doc_wkr {
|
||||
public void On_nde_tail_end(Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
public void On_entity_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {}
|
||||
|
||||
public Mwh_atr_itm[] To_atr_ary() {return (Mwh_atr_itm[])list.To_ary_and_clear(Mwh_atr_itm.class);}
|
||||
public int Atrs__len() {return list.Len();}
|
||||
public Mwh_atr_itm Atrs__get_at(int i) {return (Mwh_atr_itm)list.Get_at(i);}
|
||||
public void Atrs__clear() {list.Clear();}
|
||||
}
|
@ -16,8 +16,12 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
import gplx.xowa.parsers.mws.utils.*;
|
||||
import gplx.xowa.parsers.uniqs.*;
|
||||
public class Xomw_parser_ctx {
|
||||
public Xomw_parser_ctx() {
|
||||
}
|
||||
public Xomw_sanitizer_mgr Sanitizer() {return sanitizer;} private final Xomw_sanitizer_mgr sanitizer = new Xomw_sanitizer_mgr();
|
||||
public Xop_uniq_mgr Uniq_mgr() {return uniq_mgr;} private final Xop_uniq_mgr uniq_mgr = new Xop_uniq_mgr();
|
||||
|
||||
public static final int Pos__bos = -1;
|
||||
}
|
||||
|
@ -17,29 +17,27 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
import gplx.langs.phps.utls.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
import gplx.xowa.parsers.mws.utils.*; import gplx.xowa.parsers.uniqs.*;
|
||||
public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private final Bry_bfr bfr = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New();
|
||||
private final List_adp
|
||||
td_history = List_adp_.New()
|
||||
, last_tag_history = List_adp_.New()
|
||||
, tr_history = List_adp_.New()
|
||||
, tr_attributes = List_adp_.New()
|
||||
, has_opened_tr = List_adp_.New()
|
||||
td_history = List_adp_.New() // Is currently a td tag open?
|
||||
, last_tag_history = List_adp_.New() // Save history of last lag activated (td, th or caption)
|
||||
, tr_history = List_adp_.New() // Is currently a tr tag open?
|
||||
, tr_attributes = List_adp_.New() // history of tr attributes
|
||||
, has_opened_tr = List_adp_.New() // Did this table open a <tr> element?
|
||||
;
|
||||
private int indent_level = 0; // indent level of the table
|
||||
private byte[] first_2 = new byte[2];
|
||||
public byte[] Do_table_stuff(byte[] src) {
|
||||
private Xomw_sanitizer_mgr sanitizer;
|
||||
private Xop_uniq_mgr uniq_mgr;
|
||||
public byte[] Do_table_stuff(Xomw_parser_ctx ctx, byte[] src) {
|
||||
this.sanitizer = ctx.Sanitizer();
|
||||
this.uniq_mgr = ctx.Uniq_mgr();
|
||||
indent_level = 0;
|
||||
|
||||
// PORTED:member variables
|
||||
// $td_history = []; // Is currently a td tag open?
|
||||
// $last_tag_history = []; // Save history of last lag activated (td, th or caption)
|
||||
// $tr_history = []; // Is currently a tr tag open?
|
||||
// $tr_attributes = []; // history of tr attributes
|
||||
// $has_opened_tr = []; // Did this table open a <tr> element?
|
||||
|
||||
// PORTED.SPLIT: $lines = StringUtils::explode("\n", $text);
|
||||
Bry_split_.Split(src, 0, src.length, Byte_ascii.Nl, Bool_.N, this);
|
||||
|
||||
Bry_split_.Split(src, 0, src.length, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode("\n", $text);
|
||||
|
||||
// Closing open td, tr && table
|
||||
while (td_history.Len() > 0) {
|
||||
@ -61,8 +59,8 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
}
|
||||
|
||||
// special case: don't return empty table
|
||||
if ( bfr.Len() == Len__special_case
|
||||
&& Bry_.Eq(bfr.Bfr(), 0, Len__special_case, Bry__special_case)) {
|
||||
if ( bfr.Len() == Len__tb__empty
|
||||
&& Bry_.Eq(bfr.Bfr(), 0, Len__tb__empty, Html__tb__empty)) {
|
||||
bfr.Clear();
|
||||
return Bry_.Empty;
|
||||
}
|
||||
@ -70,39 +68,41 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
}
|
||||
public int Split(byte[] src, int itm_bgn, int itm_end) {
|
||||
byte[] out_line = Bry_.Mid(src, itm_bgn, itm_end); // MW: "$outLine"
|
||||
byte[] line = Bry_.Trim(out_line); // MW: "$line"
|
||||
byte[] line = Bry_.Trim(out_line); // MW: "$line"
|
||||
|
||||
int line_temp_len = line.length;
|
||||
if (line_temp_len == 0) { // empty line, go to next line
|
||||
int line_len = line.length;
|
||||
if (line_len == 0) { // empty line, go to next line
|
||||
bfr.Add(out_line).Add_byte_nl();
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
|
||||
byte first_char = line[0];
|
||||
first_2[0] = line[0];
|
||||
if (line_temp_len > 1) first_2[1] = line[1];
|
||||
if (line_len > 1) first_2[1] = line[1];
|
||||
|
||||
// PORTED: preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches)
|
||||
byte[] tblw_atrs = null;
|
||||
boolean tblw_bgn_found = false;
|
||||
int colons_end = Bry_find_.Find_fwd_while(src, 0, line_temp_len, Byte_ascii.Colon);
|
||||
int tblw_bgn = Bry_find_.Find_fwd_while(line, colons_end, line_temp_len, Byte_ascii.Space);
|
||||
int colons_end = Bry_find_.Find_fwd_while(src, 0, line_len, Byte_ascii.Colon);
|
||||
int tblw_bgn = Bry_find_.Find_fwd_while(line, colons_end, line_len, Byte_ascii.Space);
|
||||
int tblw_atrs_bgn = tblw_bgn + 2;
|
||||
if (Bry_.Eq(line, tblw_bgn, tblw_atrs_bgn, Bry__tblw_bgn)) {
|
||||
if (Bry_.Eq(line, tblw_bgn, tblw_atrs_bgn, Wtxt__tb__bgn)) {
|
||||
tblw_bgn_found = true;
|
||||
tblw_atrs = (tblw_atrs_bgn == line_temp_len) ? Bry_.Empty : Bry_.Mid(line, tblw_atrs_bgn, line_temp_len);
|
||||
tblw_atrs = (tblw_atrs_bgn == line_len) ? Bry_.Empty : Bry_.Mid(line, tblw_atrs_bgn, line_len);
|
||||
}
|
||||
if (tblw_bgn_found) {
|
||||
// First check if we are starting a new table
|
||||
indent_level = colons_end;
|
||||
|
||||
// atrs = $this->mStripState->unstripBoth(matches[2]);
|
||||
// atrs = Sanitizer::fixTagAttributes(attributes, 'table');
|
||||
tblw_atrs = uniq_mgr.Convert(tblw_atrs);
|
||||
|
||||
// PORTED: out_line = str_repeat('<dl><dd>', $indent_level) . "<table{atrs}>";
|
||||
for (int j = 0; j < indent_level; j++)
|
||||
tmp_bfr.Add(Bry__dl_dd);
|
||||
out_line = tmp_bfr.Add_str_a7("<table").Add(tblw_atrs).Add_byte(Byte_ascii.Angle_end).To_bry_and_clear();
|
||||
tmp_bfr.Add(Html__dl__bgn);
|
||||
tmp_bfr.Add_str_a7("<table");
|
||||
sanitizer.Fix_tag_attributes(tmp_bfr, Name__table, tblw_atrs);
|
||||
tmp_bfr.Add_byte(Byte_ascii.Angle_end);
|
||||
out_line = tmp_bfr.To_bry_and_clear();
|
||||
td_history.Add(false);
|
||||
last_tag_history.Add(Bry_.Empty);
|
||||
tr_history.Add(false);
|
||||
@ -114,7 +114,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
bfr.Add(out_line).Add_byte_nl();
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
else if (Bry_.Eq(first_2, Bry__tblw_end)) {
|
||||
else if (Bry_.Eq(first_2, Wtxt__tb__end)) {
|
||||
// We are ending a table
|
||||
line = tmp_bfr.Add_str_a7("</table>").Add_mid(line, 2, line.length).To_bry_and_clear();
|
||||
byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history);
|
||||
@ -134,17 +134,18 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
// PORTED:$outLine = $line . str_repeat( '</dd></dl>', $indent_level );
|
||||
tmp_bfr.Add(line);
|
||||
for (int j = 0; j < indent_level; j++)
|
||||
tmp_bfr.Add(Bry__dl_dd_end);
|
||||
tmp_bfr.Add(Html__dl__end);
|
||||
out_line = tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
else if (Bry_.Eq(first_2, Bry__tr)) {
|
||||
else if (Bry_.Eq(first_2, Wtxt__tr)) {
|
||||
// Now we have a table row
|
||||
line = Bry_.Mid(line, 2); // PORTED: $line = preg_replace('#^\|-+#', '', $line);
|
||||
|
||||
// Whats after the tag is now only attributes
|
||||
byte[] atrs = line;
|
||||
// atrs = $this->mStripState->unstripBoth(line);
|
||||
// atrs = Sanitizer::fixTagAttributes(attributes, 'tr');
|
||||
byte[] atrs = uniq_mgr.Unstrip_both(line);
|
||||
sanitizer.Fix_tag_attributes(tmp_bfr, Name__tr, atrs);
|
||||
atrs = tmp_bfr.To_bry_and_clear();
|
||||
|
||||
Php_ary_.Pop_bry_or_null(tr_attributes);
|
||||
tr_attributes.Add(atrs);
|
||||
|
||||
@ -154,7 +155,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
has_opened_tr.Add(true);
|
||||
|
||||
if (Php_ary_.Pop_bool_or_n(tr_history)) {
|
||||
line = Bry__elem_end__tr;
|
||||
line = Html__tr__end;
|
||||
}
|
||||
|
||||
if (Php_ary_.Pop_bool_or_n(td_history)) {
|
||||
@ -168,25 +169,25 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
}
|
||||
else if ( first_char == Byte_ascii.Pipe
|
||||
|| first_char == Byte_ascii.Bang
|
||||
|| Bry_.Eq(first_2, Bry__th)
|
||||
|| Bry_.Eq(first_2, Wtxt__caption)
|
||||
) {
|
||||
// This might be cell elements, td, th or captions
|
||||
if (Bry_.Eq(first_2, Bry__th)) {
|
||||
first_char = Byte_ascii.Pipe;
|
||||
if (Bry_.Eq(first_2, Wtxt__caption)) {
|
||||
first_char = Byte_ascii.Plus;
|
||||
line = Bry_.Mid(line, 2);
|
||||
} else {
|
||||
line = Bry_.Mid(line, 1);
|
||||
}
|
||||
|
||||
// Implies both are valid for table headings.
|
||||
if (first_char == Byte_ascii.Nl) {
|
||||
// $line = StringUtils::replaceMarkup('!!', '||', $line);
|
||||
if (first_char == Byte_ascii.Bang) {
|
||||
Xomw_string_utils.Replace_markup(line, 0, line.length, Wtxt__th2, Wtxt__td2); // $line = StringUtils::replaceMarkup('!!', '||', $line);
|
||||
}
|
||||
|
||||
// Split up multiple cells on the same line.
|
||||
// FIXME : This can result in improper nesting of tags processed
|
||||
// by earlier parser steps.
|
||||
byte[][] cells = Bry_split_.Split(line, Bry__td2);
|
||||
byte[][] cells = Bry_split_.Split(line, Wtxt__td2);
|
||||
|
||||
out_line = Bry_.Empty;
|
||||
|
||||
@ -214,13 +215,13 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
}
|
||||
|
||||
if (first_char == Byte_ascii.Pipe) {
|
||||
last_tag = Bry__tag__td;
|
||||
last_tag = Name__td;
|
||||
}
|
||||
else if (first_char == Byte_ascii.Bang) {
|
||||
last_tag = Bry__tag__th;
|
||||
last_tag = Name__th;
|
||||
}
|
||||
else if (first_char == Byte_ascii.Plus) {
|
||||
last_tag = Bry__tag__caption;
|
||||
last_tag = Name__caption;
|
||||
}
|
||||
else {
|
||||
last_tag = Bry_.Empty;
|
||||
@ -235,16 +236,18 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
// be mistaken as delimiting cell parameters
|
||||
byte[] cell_data_0 = cell_data[0];
|
||||
byte[] cell_data_1 = cell_data[1];
|
||||
if (Bry_find_.Find_fwd(cell_data_0, Bry__lnki) != Bry_find_.Not_found) {
|
||||
if (Bry_find_.Find_fwd(cell_data_0, Wtxt__lnki__bgn) != Bry_find_.Not_found) {
|
||||
cell = tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell).To_bry_and_clear();
|
||||
}
|
||||
else if (cell_data_1 == null) {
|
||||
cell = tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell_data_0).To_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
// atrs = $this->mStripState->unstripBoth(cell_data_0);
|
||||
// atrs = Sanitizer::fixTagAttributes(attributes, $last_tag);
|
||||
cell = tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add(cell_data_0).Add_byte(Byte_ascii.Angle_end).Add(cell_data_1).To_bry_and_clear();
|
||||
byte[] atrs = uniq_mgr.Unstrip_both(cell_data_0);
|
||||
tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag);
|
||||
sanitizer.Fix_tag_attributes(tmp_bfr, last_tag, atrs);
|
||||
tmp_bfr.Add_byte(Byte_ascii.Angle_end).Add(cell_data_1);
|
||||
cell = tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
|
||||
out_line = Bry_.Add(out_line, cell);
|
||||
@ -255,13 +258,24 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
private static final byte[]
|
||||
Bry__tblw_bgn = Bry_.new_a7("{|"), Bry__tblw_end = Bry_.new_a7("|}"), Bry__tr = Bry_.new_a7("|-"), Bry__th = Bry_.new_a7("|+"), Bry__td2 = Bry_.new_a7("||")
|
||||
, Bry__lnki = Bry_.new_a7("[[")
|
||||
, Bry__special_case = Bry_.new_a7("<table>\n<tr><td></td></tr>\n</table>")
|
||||
, Bry__tag__td = Bry_.new_a7("td"), Bry__tag__th = Bry_.new_a7("th"), Bry__tag__caption = Bry_.new_a7("caption")
|
||||
, Bry__elem_end__tr = Bry_.new_a7("</tr>")
|
||||
, Bry__dl_dd = Bry_.new_a7("<dl><dd>")
|
||||
, Bry__dl_dd_end = Bry_.new_a7("</dd></dl>")
|
||||
Wtxt__tb__bgn = Bry_.new_a7("{|")
|
||||
, Wtxt__tb__end = Bry_.new_a7("|}")
|
||||
, Wtxt__tr = Bry_.new_a7("|-")
|
||||
, Wtxt__caption = Bry_.new_a7("|+")
|
||||
, Wtxt__th2 = Bry_.new_a7("!!")
|
||||
, Wtxt__td2 = Bry_.new_a7("||")
|
||||
, Wtxt__lnki__bgn = Bry_.new_a7("[[")
|
||||
|
||||
, Name__table = Bry_.new_a7("table")
|
||||
, Name__tr = Bry_.new_a7("tr")
|
||||
, Name__td = Bry_.new_a7("td")
|
||||
, Name__th = Bry_.new_a7("th")
|
||||
, Name__caption = Bry_.new_a7("caption")
|
||||
|
||||
, Html__tr__end = Bry_.new_a7("</tr>")
|
||||
, Html__dl__bgn = Bry_.new_a7("<dl><dd>")
|
||||
, Html__dl__end = Bry_.new_a7("</dd></dl>")
|
||||
, Html__tb__empty = Bry_.new_a7("<table>\n<tr><td></td></tr>\n</table>")
|
||||
;
|
||||
private static final int Len__special_case = Bry__special_case.length;
|
||||
private static final int Len__tb__empty = Html__tb__empty.length;
|
||||
}
|
||||
|
@ -33,6 +33,44 @@ public class Xomw_table_wkr__tst {
|
||||
, "</td></tr></table>"
|
||||
));
|
||||
}
|
||||
@Test public void Tb__atrs() {
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "{|id='1'"
|
||||
, "|-"
|
||||
, "|a"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table id=\"1\">"
|
||||
, ""
|
||||
, "<tr>"
|
||||
, "<td>a"
|
||||
, "</td></tr></table>"
|
||||
));
|
||||
}
|
||||
@Test public void Tc__atrs() {
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|+id='1'|a"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, "<caption id=\"1\">a"
|
||||
, "</caption><tr><td></td></tr></table>"
|
||||
));
|
||||
}
|
||||
@Test public void Th__double() {
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "!a!!b"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, "<tr>"
|
||||
, "<th>a</th>"
|
||||
, "<th>b"
|
||||
, "</th></tr></table>"
|
||||
));
|
||||
}
|
||||
@Test public void Blank() { // COVERS: "empty line, go to next line"
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( " "
|
||||
@ -40,7 +78,7 @@ public class Xomw_table_wkr__tst {
|
||||
( " "
|
||||
));
|
||||
}
|
||||
@Test public void Indent() {
|
||||
@Test public void Tb__indent() {
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "::{|"
|
||||
, "|-"
|
||||
@ -54,7 +92,7 @@ public class Xomw_table_wkr__tst {
|
||||
, "</td></tr></table></dd></dl></dd></dl>"
|
||||
));
|
||||
}
|
||||
@Test public void End__no_rows() { // COVERS: "if (has_opened_tr.Len() == 0) {"
|
||||
@Test public void Tb__empty() { // COVERS: "if (has_opened_tr.Len() == 0) {"
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "{|"
|
||||
, "|}"
|
||||
@ -65,10 +103,11 @@ public class Xomw_table_wkr__tst {
|
||||
}
|
||||
}
|
||||
class Xomw_table_wkr__fxt {
|
||||
private final Xomw_parser_ctx ctx = new Xomw_parser_ctx();
|
||||
private final Xomw_table_wkr wkr = new Xomw_table_wkr();
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
byte[] actl = wkr.Do_table_stuff(src_bry);
|
||||
byte[] actl = wkr.Do_table_stuff(ctx, src_bry);
|
||||
Tfds.Eq_str_lines(expd, String_.new_u8(actl), src_str);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,41 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
public class Xomw_sanitizer_mgr {
|
||||
private final Mwh_doc_wkr__atr_bldr atr_bldr = new Mwh_doc_wkr__atr_bldr();
|
||||
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
||||
public void Fix_tag_attributes(Bry_bfr bfr, byte[] tag_name, byte[] atrs) {
|
||||
atr_bldr.Atrs__clear();
|
||||
atr_parser.Parse(atr_bldr, -1, -1, atrs, 0, atrs.length);
|
||||
int len = atr_bldr.Atrs__len();
|
||||
|
||||
// PORTED: Sanitizer.php|safeEncodeTagAttributes
|
||||
for (int i = 0; i < len; i++) {
|
||||
// $encAttribute = htmlspecialchars( $attribute );
|
||||
// $encValue = Sanitizer::safeEncodeAttribute( $value );
|
||||
// $attribs[] = "$encAttribute=\"$encValue\"";
|
||||
Mwh_atr_itm itm = atr_bldr.Atrs__get_at(i);
|
||||
bfr.Add_byte_space(); // "return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';"
|
||||
bfr.Add_bry_escape_html(itm.Key_bry(), itm.Key_bgn(), itm.Key_end());
|
||||
bfr.Add_byte_eq().Add_byte_quote();
|
||||
bfr.Add(itm.Val_as_bry()); // TODO.XO:Sanitizer::encode
|
||||
bfr.Add_byte_quote();
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,62 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
public class Xomw_string_utils {
|
||||
public static void Replace_markup(byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) { // REF:/includes/libs/StringUtils.php|replaceMarkup
|
||||
// PORTED: avoiding multiple regex calls / String creations
|
||||
// $placeholder = "\x00";
|
||||
|
||||
// Remove placeholder instances
|
||||
// $text = str_replace( $placeholder, '', $text );
|
||||
|
||||
// Replace instances of the separator inside HTML-like tags with the placeholder
|
||||
// $replacer = new DoubleReplacer( $search, $placeholder );
|
||||
// $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text );
|
||||
|
||||
// Explode, then put the replaced separators back in
|
||||
// $cleaned = str_replace( $search, $replace, $cleaned );
|
||||
// $text = str_replace( $placeholder, $search, $cleaned );
|
||||
|
||||
// if same length find / repl, do in-place replacement; EX: "!!" -> "||"
|
||||
int find_len = find.length;
|
||||
int repl_len = repl.length;
|
||||
if (find_len != repl_len) throw Err_.new_wo_type("find and repl should be same length");
|
||||
|
||||
byte find_0 = find[0];
|
||||
byte dlm_bgn = Byte_ascii.Angle_bgn;
|
||||
byte dlm_end = Byte_ascii.Angle_end;
|
||||
boolean repl_active = true;
|
||||
|
||||
// loop every char in array
|
||||
for (int i = src_bgn; i < src_end; i++) {
|
||||
byte b = src[i];
|
||||
if ( b == find_0
|
||||
&& Bry_.Match(src, i + 1, i + find_len, find, 1, find_len)
|
||||
&& repl_active
|
||||
) {
|
||||
Bry_.Set(src, i, i + find_len, repl);
|
||||
}
|
||||
else if (b == dlm_bgn) {
|
||||
repl_active = false;
|
||||
}
|
||||
else if (b == dlm_end) {
|
||||
repl_active = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_string_utils__tst {
|
||||
private final Xomw_string_utils__fxt fxt = new Xomw_string_utils__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test__replace_markup("a!!b" , "!!", "||", "a||b");
|
||||
}
|
||||
@Test public void Missing() {
|
||||
fxt.Test__replace_markup("abcd" , "!!", "||", "abcd");
|
||||
}
|
||||
@Test public void Eos() {
|
||||
fxt.Test__replace_markup("a!!" , "!!", "||", "a||");
|
||||
}
|
||||
@Test public void Ignore() {
|
||||
fxt.Test__replace_markup("a!!b<!!>!!c" , "!!", "||", "a||b<!!>||c");
|
||||
}
|
||||
@Test public void Ignore__asym__lhs() {
|
||||
fxt.Test__replace_markup("a!!b<!!<!!>!!c" , "!!", "||", "a||b<!!<!!>||c");
|
||||
}
|
||||
@Test public void Ignore__asym__rhs() {
|
||||
fxt.Test__replace_markup("a!!b<!!>!!>!!c" , "!!", "||", "a||b<!!>||>||c"); // NOTE: should probably be "!!>!!>", but unmatched ">" are escaped to ">"
|
||||
}
|
||||
}
|
||||
class Xomw_string_utils__fxt {
|
||||
public void Test__replace_markup(String src_str, String find, String repl, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
Xomw_string_utils.Replace_markup(src_bry, 0, src_bry.length, Bry_.new_a7(find), Bry_.new_a7(repl));
|
||||
Tfds.Eq_str(expd, src_bry);
|
||||
}
|
||||
}
|
@ -31,6 +31,9 @@ public class Xop_uniq_mgr { // REF.MW:/parser/StripState.php
|
||||
return key;
|
||||
}
|
||||
public byte[] Get(byte[] key) {return (byte[])general_trie.Match_exact(key, 0, key.length);}
|
||||
public byte[] Unstrip_both(byte[] src) {
|
||||
return Convert(src);
|
||||
}
|
||||
public byte[] Convert(byte[] src) {
|
||||
if (general_trie.Count() == 0) return src;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user