1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2025-05-30 14:04:56 +00:00

Lst: Handle keys with whitespace [#720]

This commit is contained in:
gnosygnu 2020-05-10 09:37:04 -04:00
parent 994d405a26
commit 49f8d4b000
2 changed files with 122 additions and 55 deletions

View File

@ -1,87 +1,93 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.lst; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
import org.junit.*;
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.lst;
import gplx.Io_mgr;
import gplx.String_;
import gplx.xowa.Xop_fxt;
import org.junit.Before;
import org.junit.Test;
public class Lst_pfunc_lst_tst {
@Before public void init() {fxt.Clear();} private Lst_pfunc_lst_fxt fxt = new Lst_pfunc_lst_fxt();
@Test public void Bgn_only() {
@Test public void Bgn_only() {
fxt.Clear().Page_txt_("a<section begin=key0/>val0<section end=key0/> b").Test_lst("{{#lst:section_test|key0}}", "val0");
}
@Test public void Multiple() {
@Test public void Multiple() {
fxt.Clear().Page_txt_("a<section begin=key0/>val00<section end=key0/> b<section begin=key0/> val01<section end=key0/> c").Test_lst("{{#lst:section_test|key0}}", "val00 val01");
}
@Test public void Range() {
@Test public void Range() {
fxt.Clear().Page_txt_("a<section begin=key0/>val0<section end=key0/> b<section begin=key1/> val1<section end=key1/> c<section begin=key2/> val2<section end=key2/> d")
.Test_lst("{{#lst:section_test|key0|key2}}", "val0 b val1 c val2");
}
@Test public void Nest() {
@Test public void Nest() {
fxt.Clear().Page_txt_("<section begin=key0/>val0<section begin=key00/> val00<section end=key00/><section end=key0/>").Test_lst("{{#lst:section_test|key0}}", "val0 val00");
}
@Test public void Wikitext() { // PURPOSE: assert section is expanded to html
@Test public void Wikitext() { // PURPOSE: assert section is expanded to html
fxt.Clear().Page_txt_("a<section begin=key0/>''val0''<section end=key0/> b").Test_lst("{{#lst:section_test|key0}}", "<i>val0</i>");
}
@Test public void Refs_ignored() { // PURPOSE: assert that nearby refs are ignored
@Test public void Refs_ignored() { // PURPOSE: assert that nearby refs are ignored
fxt.Clear().Page_txt_("a<section begin=key0/>val0<ref>ref1</ref><section end=key0/> b <ref>ref2</ref>").Test_lst("{{#lst:section_test|key0}}<references/>", String_.Concat_lines_nl
( "val0<sup id=\"cite_ref-0\" class=\"reference\"><a href=\"#cite_note-0\">[1]</a></sup><ol class=\"references\">"
, "<li id=\"cite_note-0\"><span class=\"mw-cite-backlink\"><a href=\"#cite_ref-0\">^</a></span> <span class=\"reference-text\">ref1</span></li>"
, "</ol>"
));
}
@Test public void Missing_bgn_end() {
@Test public void Missing_bgn_end() {
fxt.Page_txt_("a<section bgn=key0/> b<section end=key0/> c");
fxt.Clear().Test_lst("{{#lst:section_test}}", "a b c");
}
@Test public void Missing_bgn() {
@Test public void Missing_bgn() {
fxt.Page_txt_("a<section bgn=key0/> b<section end=key0/> c");
fxt.Clear().Test_lst("{{#lst:section_test||key0}}", "a b");
}
@Test public void Missing_end() {
@Test public void Missing_end() {
fxt.Page_txt_("a <section begin=key0/>val0<section end=key1/> b");
fxt.Clear().Test_lst("{{#lst:section_test|key0}}", "val0 b"); // end is missing; read to end;
}
@Test public void Missing_end_noinclude() { // EX: de.wikisource.org/wiki/Versuch_einer_mokscha-mordwinischen_Grammatik/Mokscha-Texte; Seite:Ahlqvist_Forschungen_auf_dem_Gebiete_der_ural-altaischen_Sprachen_I.pdf/111
@Test public void Missing_end_noinclude() { // EX: de.wikisource.org/wiki/Versuch_einer_mokscha-mordwinischen_Grammatik/Mokscha-Texte; Seite:Ahlqvist_Forschungen_auf_dem_Gebiete_der_ural-altaischen_Sprachen_I.pdf/111
fxt.Page_txt_("a <section begin=key0/>val0<section end=key1/> b<noinclude>c</noinclude>");
fxt.Clear().Test_lst("{{#lst:section_test|key0}}", "val0 b"); // end is missing; ignore noinclude
}
@Test public void Missing_bgn_dupe() {
@Test public void Missing_bgn_dupe() {
fxt.Page_txt_("a <section begin=key0/>val0<section end=key0/> b<section begin=key1/>val1<section end=key0/>");
fxt.Clear().Test_lst("{{#lst:section_test|key0}}", "val0");
}
@Test public void Nowiki() { // PURPOSE.fix: <nowiki> was creating incorrect sections; DATE:2013-07-11
@Test public void Nowiki() { // PURPOSE.fix: <nowiki> was creating incorrect sections; DATE:2013-07-11
fxt.Clear().Page_txt_("a<nowiki>''c''</nowiki><section begin=key0/>val0<section end=key0/> b").Test_lst("{{#lst:section_test|key0}}", "val0");
}
@Test public void Fullpagename() { // PURPOSE.fix: lst creates its own ctx; make sure ctx has same page_name of calling page (Test page) not default (Main page); DATE:2013-07-11
@Test public void Fullpagename() { // PURPOSE.fix: lst creates its own ctx; make sure ctx has same page_name of calling page (Test page) not default (Main page); DATE:2013-07-11
fxt.Clear().Page_txt_("a <section begin=key0/>{{FULLPAGENAME}}<section end=key0/> b").Test_lst("{{#lst:section_test|key0}}", "Test page");
}
@Test public void Nested_forbid_recursion() { // PURPOSE: forbid recursive calls; DATE:2014-02-09
@Test public void Nested_forbid_recursion() { // PURPOSE: forbid recursive calls; DATE:2014-02-09
fxt.Fxt().Init_page_create("Sub_0", "<section begin=key_0 />a<section end=key_0 />{{#lst:Sub_0|key_0}}"); // NOTE: recursive call to self
fxt.Fxt().Test_parse_page_all_str("{{#lst:Sub_0|key_0}}", "a");
}
@Test public void Nested_allow() { // PURPOSE: allow nested calls; DATE:2014-02-09
@Test public void Nested_allow() { // PURPOSE: allow nested calls; DATE:2014-02-09
fxt.Fxt().Init_page_create("Template:Sub_1", "<section begin=key_1 />b<section end=key_1 />");
fxt.Fxt().Init_page_create("Sub_0", "<section begin=key_0 />a{{Sub_1}}<section end=key_0 />");
fxt.Fxt().Test_parse_page_all_str("{{#lst:Sub_0|key_0}}", "ab");
}
@Test public void Nested_recursion() { // PURPOSE: allow nested calls; it.s:Main_Page; DATE:2014-02-09
@Test public void Nested_recursion() { // PURPOSE: allow nested calls; it.s:Main_Page; DATE:2014-02-09
fxt.Fxt().Init_page_create("Sub_1", "<section begin=key_0 />b<section end=key_0 />");
fxt.Fxt().Init_page_create("Template:Sub_1", "{{#section:Sub_1|key_0}}");
fxt.Fxt().Init_page_create("Sub_0", "<section begin=key_0 />a{{Sub_1}}<section end=key_0 />");
fxt.Fxt().Test_parse_page_all_str("{{#section:Sub_0|key_0}}", "ab");
}
@Test public void Nested__ref() { // PURPOSE: handle tags; PAGE:it.s:La_Secchia_rapita/Canto_primo DATE:2015-12-02
@Test public void Nested__ref() { // PURPOSE: handle tags; PAGE:it.s:La_Secchia_rapita/Canto_primo DATE:2015-12-02
fxt.Fxt().Init_page_create("Template:TagTemplate", "<ref>xyz</ref>");
fxt.Fxt().Init_page_create("PoemPage", "<poem>A{{TagTemplate}}B</poem>");
fxt.Fxt().Test_parse_page_all_str("{{#section:PoemPage}}<references/>", String_.Replace(String_.Concat_lines_nl_skip_last
@ -95,7 +101,7 @@ public class Lst_pfunc_lst_tst {
, ""
), "'", "\""));
}
@Test public void Nested__ref_poem() { // PURPOSE: handle tags; PAGE:it.s:La_Secchia_rapita/Canto_primo DATE:2015-12-02
@Test public void Nested__ref_poem() { // PURPOSE: handle tags; PAGE:it.s:La_Secchia_rapita/Canto_primo DATE:2015-12-02
fxt.Fxt().Init_page_create("Template:TagTemplate", "{{#tag:ref|abc<poem>def</poem>xyz}}");
fxt.Fxt().Init_page_create("PoemPage", String_.Concat_lines_nl_skip_last
( "<poem>A{{TagTemplate}}"
@ -118,6 +124,15 @@ public class Lst_pfunc_lst_tst {
, ""
), "'", "\""));
}
@Test public void Whitespace() {
// NOTE: parse attribs with whitespace; EX: `bgn=a b`; ISSUE#:720; DATE:2020-05-09
fxt.Clear().Page_txt_("a<section begin=x y/>b<section end=x y/> c");
fxt.Test_lst("{{#lst:section_test|x y}}", "b");
// parse multiple attributes
fxt.Page_txt_("a<section begin=x y invalid=z/>b<section end=x y invalid=z/> c");
fxt.Clear().Test_lst("{{#lst:section_test|x y}}", "b");
}
}
class Lst_pfunc_lst_fxt {
public Lst_pfunc_lst_fxt Clear() {

View File

@ -1,22 +1,41 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.lst; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
import gplx.core.primitives.*;
import gplx.xowa.langs.*; import gplx.xowa.htmls.core.htmls.*;
import gplx.xowa.parsers.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*;
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.lst;
import gplx.Bry_;
import gplx.Bry_bfr;
import gplx.Bry_find_;
import gplx.Byte_ascii;
import gplx.Hash_adp_bry;
import gplx.core.primitives.Byte_obj_val;
import gplx.xowa.Xoae_app;
import gplx.xowa.Xoae_page;
import gplx.xowa.Xowe_wiki;
import gplx.xowa.htmls.core.htmls.Xoh_html_wtr;
import gplx.xowa.htmls.core.htmls.Xoh_wtr_ctx;
import gplx.xowa.langs.Xol_lang_itm;
import gplx.xowa.langs.Xol_lang_stub_;
import gplx.xowa.parsers.Xop_ctx;
import gplx.xowa.parsers.Xop_root_tkn;
import gplx.xowa.parsers.htmls.Mwh_atr_itm;
import gplx.xowa.parsers.htmls.Mwh_atr_itm_owner1;
import gplx.xowa.parsers.xndes.Xop_xnde_tkn;
import gplx.xowa.xtns.Xox_xnde;
import gplx.xowa.xtns.Xox_xnde_;
public class Lst_section_nde implements Xox_xnde, Mwh_atr_itm_owner1 {
public byte[] Section_name() {return section_name;} private byte[] section_name;
public void Xatr__set(Xowe_wiki wiki, byte[] src, Mwh_atr_itm xatr, Object xatr_id_obj) {
@ -24,7 +43,40 @@ public class Lst_section_nde implements Xox_xnde, Mwh_atr_itm_owner1 {
byte xatr_id = ((Byte_obj_val)xatr_id_obj).Val();
switch (xatr_id) {
case Xatr_name: case Xatr_bgn: case Xatr_end:
section_name = xatr.Val_as_bry(); name_tid = xatr_id; break;
name_tid = xatr_id;
int valBgn = xatr.Val_bgn();
byte b = src[valBgn - 1];
// previous byte is a quote
if (b == '"' || b == '\'') {
// then use standard xoHtmlParser
section_name = xatr.Val_as_bry();
}
// previous byte is not a quote (= or whitespace)
else {
// NOTE: parse attribs with whitespace; EX: `bgn=a b`; ISSUE#:720; DATE:2020-05-09
// MW has different logic specific to LST: REF.MW:https://github.com/wikimedia/mediawiki-extensions-LabeledSectionTransclusion/blob/master/includes/LabeledSectionTransclusion.php#L128-L144
int srcLen = src.length;
int valPos = valBgn;
int valEnd = -1;
while (valPos < srcLen) {
b = src[valPos];
switch (b) {
case '/': // majority occurrence; EX: <section begin=a b />
case '>': // should not happen, but just in case; EX: <section begin=a b ></section>
valEnd = valPos;
valPos = srcLen;
break;
case '=':// may not happen, but this is what regex allows; EX: <section begin=a b someOtherAttribute=c d></section>
valEnd = Bry_find_.Find_bwd_ws(src, valPos, valBgn);
valPos = srcLen;
break;
}
valPos++;
}
section_name = Bry_.Trim(Bry_.Mid(src, valBgn, valEnd));
}
break;
}
}
public Xop_xnde_tkn Xnde() {return xnde;} private Xop_xnde_tkn xnde;