mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v1.7.4.1
This commit is contained in:
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.apps.fsys.*; import gplx.xowa.wikis.*; import gplx.xowa.xtns.*; import gplx.html.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.apps.fsys.*; import gplx.xowa.wikis.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.strings.*; import gplx.html.*;
|
||||
import gplx.xowa.parsers.logs.*;
|
||||
public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
@@ -431,18 +431,6 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
Tblw_end(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, end_tag_id);
|
||||
return cur_pos;
|
||||
}
|
||||
byte wlst_tid = Xop_list_tkn_.List_itmTyp_null;
|
||||
switch (end_tag_id) {
|
||||
case Xop_xnde_tag_.Tid_ul: wlst_tid = Xop_list_tkn_.List_itmTyp_ul; break;
|
||||
case Xop_xnde_tag_.Tid_ol: wlst_tid = Xop_list_tkn_.List_itmTyp_ol; break;
|
||||
case Xop_xnde_tag_.Tid_dd: wlst_tid = Xop_list_tkn_.List_itmTyp_dd; break;
|
||||
case Xop_xnde_tag_.Tid_dt: wlst_tid = Xop_list_tkn_.List_itmTyp_dt; break;
|
||||
}
|
||||
if ( wlst_tid != Xop_list_tkn_.List_itmTyp_null // end tag is list; EX: </ul>
|
||||
&& ctx.List().List_dirty() // wlst is in effect; EX: *a
|
||||
) {
|
||||
ctx.List().AutoClose(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, tkn_mkr.List_end(cur_pos, wlst_tid)); // xnde can close wlst; PAGE:en.w:Bristol_Bullfinch DATE:2014-06-24
|
||||
}
|
||||
if (end_tag.Empty_ignored() && ctx.Empty_ignored() // emulate TidyHtml logic for pruning empty tags; EX: "<li> </li>" -> "")
|
||||
&& bgn_nde != null) { // bgn_nde will be null if only end_nde; EX:WP:Sukhoi Su-47; "* </li>"
|
||||
ctx.Empty_ignore(root, bgn_nde.Tkn_sub_idx());
|
||||
@@ -487,14 +475,17 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
}
|
||||
if (end_tag.Restricted()) // restricted tags (like <script>) are not placed on stack; for now, just write it out
|
||||
ctx.Subs_add(root, tkn_mkr.Bry(bgn_pos, cur_pos, Bry_.Add(gplx.html.Html_entity_.Lt_bry, Bry_.Mid(src, bgn_pos + 1, cur_pos)))); // +1 to skip <
|
||||
else {
|
||||
else {
|
||||
if (pre2_pending) {
|
||||
// ctx.Subs_add(root, tkn_mkr.Bry(bgn_pos, cur_pos, src));
|
||||
pre2_pending = false;
|
||||
return ctx.Lxr_make_txt_(cur_pos);
|
||||
}
|
||||
else
|
||||
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, cur_pos, Xop_ignore_tkn.Ignore_tid_xnde_dangling));
|
||||
else {
|
||||
if (end_tag.Xtn()) // if xtn end tag, ignore it; tidy / browser doesn't know about xtn_tags like "</poem>" so these need to be hidden, else they will show; DATE:2014-07-22
|
||||
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, cur_pos, Xop_ignore_tkn.Ignore_tid_xnde_dangling));
|
||||
else // regular tag; show it; depend on tidy to clean up; DATE:2014-07-22
|
||||
ctx.Subs_add(root, tkn_mkr.Bry(src, bgn_pos, cur_pos));
|
||||
}
|
||||
}
|
||||
ctx.Para().Process_block__xnde(end_tag, end_tag.Block_close());
|
||||
|
||||
@@ -549,10 +540,10 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
return found ? rv : Bry_.NotFound;
|
||||
}
|
||||
private int Find_xtn_end_lhs(Xop_ctx ctx, Xop_xnde_tag tag, byte[] src, int src_len, int open_bgn, int open_end, byte[] close_bry) {
|
||||
int tag_bgn = open_bgn - Pf_tag.Xtag_len;
|
||||
int tag_bgn = open_bgn - Pfunc_tag.Xtag_len;
|
||||
if (tag_bgn > -1
|
||||
&& Bry_.Eq(Pf_tag.Xtag_bgn_lhs, src, tag_bgn, tag_bgn + Pf_tag.Xtag_bgn_lhs.length)) // xtn created by tag
|
||||
return Find_xtn_end_tag(ctx, src, src_len, open_end, close_bry, tag_bgn + Pf_tag.Xtag_bgn);
|
||||
&& Bry_.Eq(Pfunc_tag.Xtag_bgn_lhs, src, tag_bgn, tag_bgn + Pfunc_tag.Xtag_bgn_lhs.length)) // xtn created by tag
|
||||
return Find_xtn_end_tag(ctx, src, src_len, open_end, close_bry, tag_bgn + Pfunc_tag.Xtag_bgn);
|
||||
else { // search rest of String for case-insensitive name; NOTE: used to do CS first, then fall-back on CI; DATE:2013-12-02
|
||||
xtn_end_tag_trie.Clear();
|
||||
xtn_end_tag_trie.Add_obj(close_bry, close_bry);
|
||||
@@ -569,9 +560,9 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
int tag_id = Bry_.Xto_int_or(src, tag_bgn, tag_bgn + 10, -1);
|
||||
if (tag_id == -1) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not extract int: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
|
||||
Bry_bfr tmp = ctx.Wiki().Utl_bry_bfr_mkr().Get_b128();
|
||||
tmp.Add(Pf_tag.Xtag_end_lhs).Add_int_pad_bgn(Byte_ascii.Num_0, 10, tag_id).Add(Pf_tag.Xtag_rhs);
|
||||
tmp.Add(Pfunc_tag.Xtag_end_lhs).Add_int_pad_bgn(Byte_ascii.Num_0, 10, tag_id).Add(Pfunc_tag.Xtag_rhs);
|
||||
byte[] tag_end = tmp.Mkr_rls().XtoAryAndClear();
|
||||
int rv = Bry_finder.Find_fwd(src, tag_end, open_end + Pf_tag.Xtag_rhs.length);
|
||||
int rv = Bry_finder.Find_fwd(src, tag_end, open_end + Pfunc_tag.Xtag_rhs.length);
|
||||
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find end: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
|
||||
rv = Bry_finder.Find_bwd(src, Byte_ascii.Lt, rv - 1);
|
||||
if (rv == Bry_finder.Not_found) {ctx.App().Usr_dlg().Warn_many("", "", "parser.xtn: could not find <: page=~{0}", ctx.Cur_page().Url().Xto_full_str_safe()); return Bry_finder.Not_found;}
|
||||
|
||||
@@ -66,14 +66,14 @@ public class Xop_xnde_wkr__basic_tst {
|
||||
fxt.Test_parse_page_wiki("<ref name=a/b/>", fxt.tkn_xnde_(0, 15).Atrs_rng_(5, 13));
|
||||
}
|
||||
@Test public void Escaped() {
|
||||
fxt.Init_log_(Xop_xnde_log.Escaped_xnde).Test_parse_page_wiki("<div></span></div>", fxt.tkn_xnde_(0, 18).Subs_(fxt.tkn_ignore_(5, 12, Xop_ignore_tkn.Ignore_tid_xnde_dangling)));
|
||||
fxt.Init_log_(Xop_xnde_log.Escaped_xnde).Test_parse_page_wiki("<div></span></div>", fxt.tkn_xnde_(0, 18).Subs_(fxt.tkn_bry_(5, 12)));// TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
|
||||
}
|
||||
@Test public void Nest() {// REVISIT: 2nd <b> should be converted to </b>; other </b> ignored; WHEN: with example
|
||||
fxt.Init_log_(Xop_xnde_log.Invalid_nest, Xop_xnde_log.Escaped_xnde).Test_parse_page_wiki("a<b>b<b>c</b>d</b>e"
|
||||
, fxt.tkn_txt_ ( 0, 1)
|
||||
, fxt.tkn_xnde_ ( 1, 13).Subs_(fxt.tkn_txt_(4, 9))
|
||||
, fxt.tkn_txt_ (13, 14)
|
||||
, fxt.tkn_ignore_(14, 18, Xop_ignore_tkn.Ignore_tid_xnde_dangling)
|
||||
, fxt.tkn_bry_(14, 18) // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
|
||||
, fxt.tkn_txt_ (18, 19)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
import org.junit.*; import gplx.xowa.parsers.lists.*;
|
||||
public class Xop_xnde_wkr__err_dangling_tst {
|
||||
private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
|
||||
@@ -37,7 +37,7 @@ public class Xop_xnde_wkr__err_malformed_tst {
|
||||
));
|
||||
}
|
||||
@Test public void Incomplete_tag_div() { // PURPOSE: handle broken tags; EX: <div a </div> -> <div a; DATE:2014-02-03
|
||||
fxt.Test_parse_page_all_str("<div a </div>", "<div a "); // note that "<div a " is escaped (not considered xnde; while "</div>" is dropped (dangling end xndes are ignored)
|
||||
fxt.Test_parse_page_all_str("<div a </div>", "<div a </div>"); // note that "<div a " is escaped (not considered xnde; while "</div>" is literally printed; // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
|
||||
}
|
||||
@Test public void Incomplete_tag_ref() {// PURPOSE: invalid tag shouldn't break parser; EX:w:Cullen_(surname); "http://www.surnamedb.com/Surname/Cullen<ref"
|
||||
fxt.Test_parse_page_all_str("a<ref", "a<ref");
|
||||
@@ -56,7 +56,7 @@ public class Xop_xnde_wkr__err_malformed_tst {
|
||||
, "</div>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<div>"
|
||||
, "<table><center>"
|
||||
, "<table><center></div>" // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
|
||||
, " <tr>"
|
||||
, " <td>"
|
||||
, " </td>"
|
||||
|
||||
@@ -40,7 +40,7 @@ public class Xop_xnde_wkr__err_misc_tst {
|
||||
));
|
||||
fxt.Init_para_n_();
|
||||
}
|
||||
@Test public void Div_should_not_pop_past_td() { // PURPOSE: extra </div> should not close <div> that is outside of <td>; PAGE:en.w:Rome and Ankara
|
||||
@Test public void Div_should_not_pop_past_td() { // PURPOSE: extra </div> should not close <div> that is outside of <td>; PAGE:en.w:Rome en.w:Ankara
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
@@ -94,6 +94,7 @@ public class Xop_xnde_wkr__err_misc_tst {
|
||||
, "<p>b"
|
||||
, "</p>"
|
||||
, "</div>"
|
||||
, "</div>" // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
|
||||
, " </td>"
|
||||
, " <td>"
|
||||
, "<div>"
|
||||
|
||||
@@ -62,18 +62,18 @@ public class Xop_xnde_wkr__li_tst {
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Empty_ignored_error() { // EX:WP:Sukhoi Su-47; "* </li>" causes error b/c </li> tries to close non-existent node
|
||||
@Test public void Empty_ignored_error() { // PAGE:en.w:Sukhoi_Su-47; "* </li>" causes error b/c </li> tries to close non-existent node
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "* a"
|
||||
, "* </li>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li> a"
|
||||
, " </li>"
|
||||
, " <li> "
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
( "* a"
|
||||
, "* </li>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<ul>"
|
||||
, " <li> a"
|
||||
, " </li>"
|
||||
, " <li> </li>" // TIDY.dangling: tidy will correct dangling node; DATE:2014-07-22
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
}
|
||||
@Test public void Insert_nl() {// PURPOSE: <li> should always be separated by nl, or else items will merge, creating long horizontal scroll bar; EX:w:Music
|
||||
fxt.Init_para_y_();
|
||||
|
||||
@@ -45,7 +45,7 @@ public class Xop_xnde_wkr__nowiki_tst {
|
||||
( "a<h1>b<h6>c"
|
||||
));
|
||||
}
|
||||
@Test public void Lnke() { // PAGE:en.w:Doomsday argument; <nowiki>[0, 1]</nowiki>
|
||||
@Test public void Lnke() { // PAGE:en.w:Doomsday_argument; <nowiki>[0, 1]</nowiki>
|
||||
fxt.Test_parse_page_wiki_str("a <nowiki>[0, 1]</nowiki> b", "a [0, 1] b"); // NOTE: not "0" + Byte_.XtoStr(160) + "1"; depend on browser to translate
|
||||
}
|
||||
@Test public void Xatrs_val_text() {
|
||||
@@ -104,4 +104,9 @@ public class Xop_xnde_wkr__nowiki_tst {
|
||||
@Test public void Code() { // PURPOSE.fix:HtmlNcr-escaped refs were being ignored; caused by HtmlTidy fix for frwiki templates;DATE:2013-06-27
|
||||
fxt.Test_parse_page_all_str("<code><nowiki>|:</nowiki></code>", "<code>|:</code>");
|
||||
}
|
||||
@Test public void Brack_end() { // PURPOSE: check that "]" is escaped; PAGE:en.w:Tall_poppy_syndrome; DATE:2014-07-23
|
||||
fxt.Test_parse_page_all_str
|
||||
( "<nowiki>[</nowiki>[[A]]<nowiki>]</nowiki>"
|
||||
, "[<a href=\"/wiki/A\">A</a>]"); // was showing up as [[[A]]]
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user