1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2025-05-31 22:44:34 +00:00

Lst: Handle keys with whitespace [#720]

This commit is contained in:
gnosygnu 2020-05-10 09:37:04 -04:00
parent 994d405a26
commit 49f8d4b000
2 changed files with 122 additions and 55 deletions

View File

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,75 +13,81 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.lst; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
import org.junit.*;
package gplx.xowa.xtns.lst;
import gplx.Io_mgr;
import gplx.String_;
import gplx.xowa.Xop_fxt;
import org.junit.Before;
import org.junit.Test;
public class Lst_pfunc_lst_tst {
@Before public void init() {fxt.Clear();} private Lst_pfunc_lst_fxt fxt = new Lst_pfunc_lst_fxt();
@Test public void Bgn_only() {
@Test public void Bgn_only() {
fxt.Clear().Page_txt_("a<section begin=key0/>val0<section end=key0/> b").Test_lst("{{#lst:section_test|key0}}", "val0");
}
@Test public void Multiple() {
@Test public void Multiple() {
fxt.Clear().Page_txt_("a<section begin=key0/>val00<section end=key0/> b<section begin=key0/> val01<section end=key0/> c").Test_lst("{{#lst:section_test|key0}}", "val00 val01");
}
@Test public void Range() {
@Test public void Range() {
fxt.Clear().Page_txt_("a<section begin=key0/>val0<section end=key0/> b<section begin=key1/> val1<section end=key1/> c<section begin=key2/> val2<section end=key2/> d")
.Test_lst("{{#lst:section_test|key0|key2}}", "val0 b val1 c val2");
}
@Test public void Nest() {
@Test public void Nest() {
fxt.Clear().Page_txt_("<section begin=key0/>val0<section begin=key00/> val00<section end=key00/><section end=key0/>").Test_lst("{{#lst:section_test|key0}}", "val0 val00");
}
@Test public void Wikitext() { // PURPOSE: assert section is expanded to html
@Test public void Wikitext() { // PURPOSE: assert section is expanded to html
fxt.Clear().Page_txt_("a<section begin=key0/>''val0''<section end=key0/> b").Test_lst("{{#lst:section_test|key0}}", "<i>val0</i>");
}
@Test public void Refs_ignored() { // PURPOSE: assert that nearby refs are ignored
@Test public void Refs_ignored() { // PURPOSE: assert that nearby refs are ignored
fxt.Clear().Page_txt_("a<section begin=key0/>val0<ref>ref1</ref><section end=key0/> b <ref>ref2</ref>").Test_lst("{{#lst:section_test|key0}}<references/>", String_.Concat_lines_nl
( "val0<sup id=\"cite_ref-0\" class=\"reference\"><a href=\"#cite_note-0\">[1]</a></sup><ol class=\"references\">"
, "<li id=\"cite_note-0\"><span class=\"mw-cite-backlink\"><a href=\"#cite_ref-0\">^</a></span> <span class=\"reference-text\">ref1</span></li>"
, "</ol>"
));
}
@Test public void Missing_bgn_end() {
@Test public void Missing_bgn_end() {
fxt.Page_txt_("a<section bgn=key0/> b<section end=key0/> c");
fxt.Clear().Test_lst("{{#lst:section_test}}", "a b c");
}
@Test public void Missing_bgn() {
@Test public void Missing_bgn() {
fxt.Page_txt_("a<section bgn=key0/> b<section end=key0/> c");
fxt.Clear().Test_lst("{{#lst:section_test||key0}}", "a b");
}
@Test public void Missing_end() {
@Test public void Missing_end() {
fxt.Page_txt_("a <section begin=key0/>val0<section end=key1/> b");
fxt.Clear().Test_lst("{{#lst:section_test|key0}}", "val0 b"); // end is missing; read to end;
}
@Test public void Missing_end_noinclude() { // EX: de.wikisource.org/wiki/Versuch_einer_mokscha-mordwinischen_Grammatik/Mokscha-Texte; Seite:Ahlqvist_Forschungen_auf_dem_Gebiete_der_ural-altaischen_Sprachen_I.pdf/111
@Test public void Missing_end_noinclude() { // EX: de.wikisource.org/wiki/Versuch_einer_mokscha-mordwinischen_Grammatik/Mokscha-Texte; Seite:Ahlqvist_Forschungen_auf_dem_Gebiete_der_ural-altaischen_Sprachen_I.pdf/111
fxt.Page_txt_("a <section begin=key0/>val0<section end=key1/> b<noinclude>c</noinclude>");
fxt.Clear().Test_lst("{{#lst:section_test|key0}}", "val0 b"); // end is missing; ignore noinclude
}
@Test public void Missing_bgn_dupe() {
@Test public void Missing_bgn_dupe() {
fxt.Page_txt_("a <section begin=key0/>val0<section end=key0/> b<section begin=key1/>val1<section end=key0/>");
fxt.Clear().Test_lst("{{#lst:section_test|key0}}", "val0");
}
@Test public void Nowiki() { // PURPOSE.fix: <nowiki> was creating incorrect sections; DATE:2013-07-11
@Test public void Nowiki() { // PURPOSE.fix: <nowiki> was creating incorrect sections; DATE:2013-07-11
fxt.Clear().Page_txt_("a<nowiki>''c''</nowiki><section begin=key0/>val0<section end=key0/> b").Test_lst("{{#lst:section_test|key0}}", "val0");
}
@Test public void Fullpagename() { // PURPOSE.fix: lst creates its own ctx; make sure ctx has same page_name of calling page (Test page) not default (Main page); DATE:2013-07-11
@Test public void Fullpagename() { // PURPOSE.fix: lst creates its own ctx; make sure ctx has same page_name of calling page (Test page) not default (Main page); DATE:2013-07-11
fxt.Clear().Page_txt_("a <section begin=key0/>{{FULLPAGENAME}}<section end=key0/> b").Test_lst("{{#lst:section_test|key0}}", "Test page");
}
@Test public void Nested_forbid_recursion() { // PURPOSE: forbid recursive calls; DATE:2014-02-09
@Test public void Nested_forbid_recursion() { // PURPOSE: forbid recursive calls; DATE:2014-02-09
fxt.Fxt().Init_page_create("Sub_0", "<section begin=key_0 />a<section end=key_0 />{{#lst:Sub_0|key_0}}"); // NOTE: recursive call to self
fxt.Fxt().Test_parse_page_all_str("{{#lst:Sub_0|key_0}}", "a");
}
@Test public void Nested_allow() { // PURPOSE: allow nested calls; DATE:2014-02-09
@Test public void Nested_allow() { // PURPOSE: allow nested calls; DATE:2014-02-09
fxt.Fxt().Init_page_create("Template:Sub_1", "<section begin=key_1 />b<section end=key_1 />");
fxt.Fxt().Init_page_create("Sub_0", "<section begin=key_0 />a{{Sub_1}}<section end=key_0 />");
fxt.Fxt().Test_parse_page_all_str("{{#lst:Sub_0|key_0}}", "ab");
}
@Test public void Nested_recursion() { // PURPOSE: allow nested calls; it.s:Main_Page; DATE:2014-02-09
@Test public void Nested_recursion() { // PURPOSE: allow nested calls; it.s:Main_Page; DATE:2014-02-09
fxt.Fxt().Init_page_create("Sub_1", "<section begin=key_0 />b<section end=key_0 />");
fxt.Fxt().Init_page_create("Template:Sub_1", "{{#section:Sub_1|key_0}}");
fxt.Fxt().Init_page_create("Sub_0", "<section begin=key_0 />a{{Sub_1}}<section end=key_0 />");
fxt.Fxt().Test_parse_page_all_str("{{#section:Sub_0|key_0}}", "ab");
}
@Test public void Nested__ref() { // PURPOSE: handle tags; PAGE:it.s:La_Secchia_rapita/Canto_primo DATE:2015-12-02
@Test public void Nested__ref() { // PURPOSE: handle tags; PAGE:it.s:La_Secchia_rapita/Canto_primo DATE:2015-12-02
fxt.Fxt().Init_page_create("Template:TagTemplate", "<ref>xyz</ref>");
fxt.Fxt().Init_page_create("PoemPage", "<poem>A{{TagTemplate}}B</poem>");
fxt.Fxt().Test_parse_page_all_str("{{#section:PoemPage}}<references/>", String_.Replace(String_.Concat_lines_nl_skip_last
@ -95,7 +101,7 @@ public class Lst_pfunc_lst_tst {
, ""
), "'", "\""));
}
@Test public void Nested__ref_poem() { // PURPOSE: handle tags; PAGE:it.s:La_Secchia_rapita/Canto_primo DATE:2015-12-02
@Test public void Nested__ref_poem() { // PURPOSE: handle tags; PAGE:it.s:La_Secchia_rapita/Canto_primo DATE:2015-12-02
fxt.Fxt().Init_page_create("Template:TagTemplate", "{{#tag:ref|abc<poem>def</poem>xyz}}");
fxt.Fxt().Init_page_create("PoemPage", String_.Concat_lines_nl_skip_last
( "<poem>A{{TagTemplate}}"
@ -118,6 +124,15 @@ public class Lst_pfunc_lst_tst {
, ""
), "'", "\""));
}
@Test public void Whitespace() {
// NOTE: parse attribs with whitespace; EX: `bgn=a b`; ISSUE#:720; DATE:2020-05-09
fxt.Clear().Page_txt_("a<section begin=x y/>b<section end=x y/> c");
fxt.Test_lst("{{#lst:section_test|x y}}", "b");
// parse multiple attributes
fxt.Page_txt_("a<section begin=x y invalid=z/>b<section end=x y invalid=z/> c");
fxt.Clear().Test_lst("{{#lst:section_test|x y}}", "b");
}
}
class Lst_pfunc_lst_fxt {
public Lst_pfunc_lst_fxt Clear() {

View File

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,10 +13,29 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.lst; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
import gplx.core.primitives.*;
import gplx.xowa.langs.*; import gplx.xowa.htmls.core.htmls.*;
import gplx.xowa.parsers.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*;
package gplx.xowa.xtns.lst;
import gplx.Bry_;
import gplx.Bry_bfr;
import gplx.Bry_find_;
import gplx.Byte_ascii;
import gplx.Hash_adp_bry;
import gplx.core.primitives.Byte_obj_val;
import gplx.xowa.Xoae_app;
import gplx.xowa.Xoae_page;
import gplx.xowa.Xowe_wiki;
import gplx.xowa.htmls.core.htmls.Xoh_html_wtr;
import gplx.xowa.htmls.core.htmls.Xoh_wtr_ctx;
import gplx.xowa.langs.Xol_lang_itm;
import gplx.xowa.langs.Xol_lang_stub_;
import gplx.xowa.parsers.Xop_ctx;
import gplx.xowa.parsers.Xop_root_tkn;
import gplx.xowa.parsers.htmls.Mwh_atr_itm;
import gplx.xowa.parsers.htmls.Mwh_atr_itm_owner1;
import gplx.xowa.parsers.xndes.Xop_xnde_tkn;
import gplx.xowa.xtns.Xox_xnde;
import gplx.xowa.xtns.Xox_xnde_;
public class Lst_section_nde implements Xox_xnde, Mwh_atr_itm_owner1 {
public byte[] Section_name() {return section_name;} private byte[] section_name;
public void Xatr__set(Xowe_wiki wiki, byte[] src, Mwh_atr_itm xatr, Object xatr_id_obj) {
@ -24,7 +43,40 @@ public class Lst_section_nde implements Xox_xnde, Mwh_atr_itm_owner1 {
byte xatr_id = ((Byte_obj_val)xatr_id_obj).Val();
switch (xatr_id) {
case Xatr_name: case Xatr_bgn: case Xatr_end:
section_name = xatr.Val_as_bry(); name_tid = xatr_id; break;
name_tid = xatr_id;
int valBgn = xatr.Val_bgn();
byte b = src[valBgn - 1];
// previous byte is a quote
if (b == '"' || b == '\'') {
// then use standard xoHtmlParser
section_name = xatr.Val_as_bry();
}
// previous byte is not a quote (= or whitespace)
else {
// NOTE: parse attribs with whitespace; EX: `bgn=a b`; ISSUE#:720; DATE:2020-05-09
// MW has different logic specific to LST: REF.MW:https://github.com/wikimedia/mediawiki-extensions-LabeledSectionTransclusion/blob/master/includes/LabeledSectionTransclusion.php#L128-L144
int srcLen = src.length;
int valPos = valBgn;
int valEnd = -1;
while (valPos < srcLen) {
b = src[valPos];
switch (b) {
case '/': // majority occurrence; EX: <section begin=a b />
case '>': // should not happen, but just in case; EX: <section begin=a b ></section>
valEnd = valPos;
valPos = srcLen;
break;
case '=':// may not happen, but this is what regex allows; EX: <section begin=a b someOtherAttribute=c d></section>
valEnd = Bry_find_.Find_bwd_ws(src, valPos, valBgn);
valPos = srcLen;
break;
}
valPos++;
}
section_name = Bry_.Trim(Bry_.Mid(src, valBgn, valEnd));
}
break;
}
}
public Xop_xnde_tkn Xnde() {return xnde;} private Xop_xnde_tkn xnde;