From 49f8d4b0003b017fd50170e9860183f8bc7f65d7 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Sun, 10 May 2020 09:37:04 -0400 Subject: [PATCH] Lst: Handle keys with whitespace [#720] --- .../gplx/xowa/xtns/lst/Lst_pfunc_lst_tst.java | 85 ++++++++++------- .../gplx/xowa/xtns/lst/Lst_section_nde.java | 92 +++++++++++++++---- 2 files changed, 122 insertions(+), 55 deletions(-) diff --git a/400_xowa/src/gplx/xowa/xtns/lst/Lst_pfunc_lst_tst.java b/400_xowa/src/gplx/xowa/xtns/lst/Lst_pfunc_lst_tst.java index 3ca20329e..d6d6d02b1 100644 --- a/400_xowa/src/gplx/xowa/xtns/lst/Lst_pfunc_lst_tst.java +++ b/400_xowa/src/gplx/xowa/xtns/lst/Lst_pfunc_lst_tst.java @@ -1,87 +1,93 @@ -/* -XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com - -XOWA is licensed under the terms of the General Public License (GPL) Version 3, -or alternatively under the terms of the Apache License Version 2.0. - -You may use XOWA according to either of these licenses as is most appropriate -for your project on a case-by-case basis. - -The terms of each license can be found in the source code repository: - -GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt -Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt -*/ -package gplx.xowa.xtns.lst; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; -import org.junit.*; +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2020 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.xtns.lst; + +import gplx.Io_mgr; +import gplx.String_; +import gplx.xowa.Xop_fxt; +import org.junit.Before; +import org.junit.Test; + public class Lst_pfunc_lst_tst { @Before public void init() {fxt.Clear();} private Lst_pfunc_lst_fxt fxt = new Lst_pfunc_lst_fxt(); - @Test public void Bgn_only() { + @Test public void Bgn_only() { fxt.Clear().Page_txt_("a
val0
b").Test_lst("{{#lst:section_test|key0}}", "val0"); } - @Test public void Multiple() { + @Test public void Multiple() { fxt.Clear().Page_txt_("a
val00
b
val01
c").Test_lst("{{#lst:section_test|key0}}", "val00 val01"); } - @Test public void Range() { + @Test public void Range() { fxt.Clear().Page_txt_("a
val0
b
val1
c
val2
d") .Test_lst("{{#lst:section_test|key0|key2}}", "val0 b val1 c val2"); } - @Test public void Nest() { + @Test public void Nest() { fxt.Clear().Page_txt_("
val0
val00
").Test_lst("{{#lst:section_test|key0}}", "val0 val00"); } - @Test public void Wikitext() { // PURPOSE: assert section is expanded to html + @Test public void Wikitext() { // PURPOSE: assert section is expanded to html fxt.Clear().Page_txt_("a
''val0''
b").Test_lst("{{#lst:section_test|key0}}", "val0"); } - @Test public void Refs_ignored() { // PURPOSE: assert that nearby refs are ignored + @Test public void Refs_ignored() { // PURPOSE: assert that nearby refs are ignored fxt.Clear().Page_txt_("a
val0ref1
b ref2").Test_lst("{{#lst:section_test|key0}}", String_.Concat_lines_nl ( "val0[1]
    " , "
  1. ^ ref1
  2. " , "
" )); } - @Test public void Missing_bgn_end() { + @Test public void Missing_bgn_end() { fxt.Page_txt_("a
b
c"); fxt.Clear().Test_lst("{{#lst:section_test}}", "a b c"); } - @Test public void Missing_bgn() { + @Test public void Missing_bgn() { fxt.Page_txt_("a
b
c"); fxt.Clear().Test_lst("{{#lst:section_test||key0}}", "a b"); } - @Test public void Missing_end() { + @Test public void Missing_end() { fxt.Page_txt_("a
val0
b"); fxt.Clear().Test_lst("{{#lst:section_test|key0}}", "val0 b"); // end is missing; read to end; } - @Test public void Missing_end_noinclude() { // EX: de.wikisource.org/wiki/Versuch_einer_mokscha-mordwinischen_Grammatik/Mokscha-Texte; Seite:Ahlqvist_Forschungen_auf_dem_Gebiete_der_ural-altaischen_Sprachen_I.pdf/111 + @Test public void Missing_end_noinclude() { // EX: de.wikisource.org/wiki/Versuch_einer_mokscha-mordwinischen_Grammatik/Mokscha-Texte; Seite:Ahlqvist_Forschungen_auf_dem_Gebiete_der_ural-altaischen_Sprachen_I.pdf/111 fxt.Page_txt_("a
val0
bc"); fxt.Clear().Test_lst("{{#lst:section_test|key0}}", "val0 b"); // end is missing; ignore noinclude } - @Test public void Missing_bgn_dupe() { + @Test public void Missing_bgn_dupe() { fxt.Page_txt_("a
val0
b
val1
"); fxt.Clear().Test_lst("{{#lst:section_test|key0}}", "val0"); } - @Test public void Nowiki() { // PURPOSE.fix: was creating incorrect sections; DATE:2013-07-11 + @Test public void Nowiki() { // PURPOSE.fix: was creating incorrect sections; DATE:2013-07-11 fxt.Clear().Page_txt_("a''c''
val0
b").Test_lst("{{#lst:section_test|key0}}", "val0"); } - @Test public void Fullpagename() { // PURPOSE.fix: lst creates its own ctx; make sure ctx has same page_name of calling page (Test page) not default (Main page); DATE:2013-07-11 + @Test public void Fullpagename() { // PURPOSE.fix: lst creates its own ctx; make sure ctx has same page_name of calling page (Test page) not default (Main page); DATE:2013-07-11 fxt.Clear().Page_txt_("a
{{FULLPAGENAME}}
b").Test_lst("{{#lst:section_test|key0}}", "Test page"); } - @Test public void Nested_forbid_recursion() { // PURPOSE: forbid recursive calls; DATE:2014-02-09 + @Test public void Nested_forbid_recursion() { // PURPOSE: forbid recursive calls; DATE:2014-02-09 fxt.Fxt().Init_page_create("Sub_0", "
a
{{#lst:Sub_0|key_0}}"); // NOTE: recursive call to self fxt.Fxt().Test_parse_page_all_str("{{#lst:Sub_0|key_0}}", "a"); } - @Test public void Nested_allow() { // PURPOSE: allow nested calls; DATE:2014-02-09 + @Test public void Nested_allow() { // PURPOSE: allow nested calls; DATE:2014-02-09 fxt.Fxt().Init_page_create("Template:Sub_1", "
b
"); fxt.Fxt().Init_page_create("Sub_0", "
a{{Sub_1}}
"); fxt.Fxt().Test_parse_page_all_str("{{#lst:Sub_0|key_0}}", "ab"); } - @Test public void Nested_recursion() { // PURPOSE: allow nested calls; it.s:Main_Page; DATE:2014-02-09 + @Test public void Nested_recursion() { // PURPOSE: allow nested calls; it.s:Main_Page; DATE:2014-02-09 fxt.Fxt().Init_page_create("Sub_1", "
b
"); fxt.Fxt().Init_page_create("Template:Sub_1", "{{#section:Sub_1|key_0}}"); fxt.Fxt().Init_page_create("Sub_0", "
a{{Sub_1}}
"); fxt.Fxt().Test_parse_page_all_str("{{#section:Sub_0|key_0}}", "ab"); } - @Test public void Nested__ref() { // PURPOSE: handle tags; PAGE:it.s:La_Secchia_rapita/Canto_primo DATE:2015-12-02 + @Test public void Nested__ref() { // PURPOSE: handle tags; PAGE:it.s:La_Secchia_rapita/Canto_primo DATE:2015-12-02 fxt.Fxt().Init_page_create("Template:TagTemplate", "xyz"); fxt.Fxt().Init_page_create("PoemPage", "A{{TagTemplate}}B"); fxt.Fxt().Test_parse_page_all_str("{{#section:PoemPage}}", String_.Replace(String_.Concat_lines_nl_skip_last @@ -95,7 +101,7 @@ public class Lst_pfunc_lst_tst { , "" ), "'", "\"")); } - @Test public void Nested__ref_poem() { // PURPOSE: handle tags; PAGE:it.s:La_Secchia_rapita/Canto_primo DATE:2015-12-02 + @Test public void Nested__ref_poem() { // PURPOSE: handle tags; PAGE:it.s:La_Secchia_rapita/Canto_primo DATE:2015-12-02 fxt.Fxt().Init_page_create("Template:TagTemplate", "{{#tag:ref|abcdefxyz}}"); fxt.Fxt().Init_page_create("PoemPage", String_.Concat_lines_nl_skip_last ( "A{{TagTemplate}}" @@ -118,6 +124,15 @@ public class Lst_pfunc_lst_tst { , "" ), "'", "\"")); } + @Test public void Whitespace() { + // NOTE: parse attribs with whitespace; EX: `bgn=a b`; ISSUE#:720; DATE:2020-05-09 + fxt.Clear().Page_txt_("a
b
c"); + fxt.Test_lst("{{#lst:section_test|x y}}", "b"); + + // parse multiple attributes + fxt.Page_txt_("a
b
c"); + fxt.Clear().Test_lst("{{#lst:section_test|x y}}", "b"); + } } class Lst_pfunc_lst_fxt { public Lst_pfunc_lst_fxt Clear() { diff --git a/400_xowa/src/gplx/xowa/xtns/lst/Lst_section_nde.java b/400_xowa/src/gplx/xowa/xtns/lst/Lst_section_nde.java index a0f9eb9c8..e2a4252e9 100644 --- a/400_xowa/src/gplx/xowa/xtns/lst/Lst_section_nde.java +++ b/400_xowa/src/gplx/xowa/xtns/lst/Lst_section_nde.java @@ -1,22 +1,41 @@ -/* -XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com - -XOWA is licensed under the terms of the General Public License (GPL) Version 3, -or alternatively under the terms of the Apache License Version 2.0. - -You may use XOWA according to either of these licenses as is most appropriate -for your project on a case-by-case basis. - -The terms of each license can be found in the source code repository: - -GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt -Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt -*/ -package gplx.xowa.xtns.lst; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; -import gplx.core.primitives.*; -import gplx.xowa.langs.*; import gplx.xowa.htmls.core.htmls.*; -import gplx.xowa.parsers.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*; +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2020 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.xtns.lst; + +import gplx.Bry_; +import gplx.Bry_bfr; +import gplx.Bry_find_; +import gplx.Byte_ascii; +import gplx.Hash_adp_bry; +import gplx.core.primitives.Byte_obj_val; +import gplx.xowa.Xoae_app; +import gplx.xowa.Xoae_page; +import gplx.xowa.Xowe_wiki; +import gplx.xowa.htmls.core.htmls.Xoh_html_wtr; +import gplx.xowa.htmls.core.htmls.Xoh_wtr_ctx; +import gplx.xowa.langs.Xol_lang_itm; +import gplx.xowa.langs.Xol_lang_stub_; +import gplx.xowa.parsers.Xop_ctx; +import gplx.xowa.parsers.Xop_root_tkn; +import gplx.xowa.parsers.htmls.Mwh_atr_itm; +import gplx.xowa.parsers.htmls.Mwh_atr_itm_owner1; +import gplx.xowa.parsers.xndes.Xop_xnde_tkn; +import gplx.xowa.xtns.Xox_xnde; +import gplx.xowa.xtns.Xox_xnde_; + public class Lst_section_nde implements Xox_xnde, Mwh_atr_itm_owner1 { public byte[] Section_name() {return section_name;} private byte[] section_name; public void Xatr__set(Xowe_wiki wiki, byte[] src, Mwh_atr_itm xatr, Object xatr_id_obj) { @@ -24,7 +43,40 @@ public class Lst_section_nde implements Xox_xnde, Mwh_atr_itm_owner1 { byte xatr_id = ((Byte_obj_val)xatr_id_obj).Val(); switch (xatr_id) { case Xatr_name: case Xatr_bgn: case Xatr_end: - section_name = xatr.Val_as_bry(); name_tid = xatr_id; break; + name_tid = xatr_id; + + int valBgn = xatr.Val_bgn(); + byte b = src[valBgn - 1]; + // previous byte is a quote + if (b == '"' || b == '\'') { + // then use standard xoHtmlParser + section_name = xatr.Val_as_bry(); + } + // previous byte is not a quote (= or whitespace) + else { + // NOTE: parse attribs with whitespace; EX: `bgn=a b`; ISSUE#:720; DATE:2020-05-09 + // MW has different logic specific to LST: REF.MW:https://github.com/wikimedia/mediawiki-extensions-LabeledSectionTransclusion/blob/master/includes/LabeledSectionTransclusion.php#L128-L144 + int srcLen = src.length; + int valPos = valBgn; + int valEnd = -1; + while (valPos < srcLen) { + b = src[valPos]; + switch (b) { + case '/': // majority occurrence; EX:
+ case '>': // should not happen, but just in case; EX:
+ valEnd = valPos; + valPos = srcLen; + break; + case '=':// may not happen, but this is what regex allows; EX:
+ valEnd = Bry_find_.Find_bwd_ws(src, valPos, valBgn); + valPos = srcLen; + break; + } + valPos++; + } + section_name = Bry_.Trim(Bry_.Mid(src, valBgn, valEnd)); + } + break; } } public Xop_xnde_tkn Xnde() {return xnde;} private Xop_xnde_tkn xnde;