Parser: Add namespace to Template title [#784]

staging
gnosygnu 4 years ago
parent 0aced904a5
commit 174e93cbfa

@ -0,0 +1,113 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2020 https://github.com/desb42
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.langs.cases;
import gplx.Bool_;
import gplx.Bry_;
import gplx.Bry_bfr;
import gplx.Bry_bfr_;
public class Xol_case_cvt {
public static byte[] Upper_1st(byte[] src, int pos, int src_len, boolean reuse) {return Up_low_1st(src, pos, src_len, Bool_.Y, reuse);}
public static byte[] Upper_1st(byte[] src, int pos, int src_len) {return Up_low_1st(src, pos, src_len, Bool_.Y);}
public static byte[] Lower_1st(byte[] src, int pos, int src_len) {return Up_low_1st(src, pos, src_len, Bool_.N);}
public static byte[] Up_low_1st(byte[] src, int pos, int src_len, boolean upper) {
return Up_low_1st(src, pos, src_len, upper, Bool_.Y);
}
public static byte[] Up_low_1st(byte[] src, int pos, int src_len, boolean upper, boolean reuse) {
if (src_len == 0) return Bry_.Empty;
byte b = src[pos];
int b_len = gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
if (b_len > src_len) return Bry_.Empty; // bad unicode
if (reuse) {
return Upper_Lower_1st(src, pos, src_len, b_len, upper);
}
else {
int copyLen = src_len - pos;
byte[] copy = new byte[copyLen];
for (int i = 0; i < copyLen; i++) {
copy[i] = src[pos + i];
}
return Upper_Lower_1st(copy, 0, copyLen, b_len, upper);
}
}
public static byte[] Upper_1st(byte[] src, int pos, int src_len, int b_len) {return Upper_Lower_1st(src, pos, src_len, b_len, Bool_.Y);}
public static byte[] Lower_1st(byte[] src, int pos, int src_len, int b_len) {return Upper_Lower_1st(src, pos, src_len, b_len, Bool_.N);}
private static byte[] Upper_Lower_1st(byte[] src, int pos, int src_len, int b_len, boolean upper) {
byte[] ucase;
if (upper)
ucase = Xol_case_cvt_.Upper(src, pos, b_len);
else
ucase = Xol_case_cvt_.Lower(src, pos, b_len);
if (ucase == Xol_case_cvt_.byte_NOCHANGE)
return src;
if (ucase.length == b_len) {
for (int i = 0; i < b_len; i++)
src[pos+i] = ucase[i];
return src;
} else {
// need to rebuild the byte string
Bry_bfr tmp_bfr = Bry_bfr_.New();
tmp_bfr.Add_mid(src, 0, pos);
tmp_bfr.Add(ucase);
tmp_bfr.Add_mid(src, pos + b_len, src_len);
return tmp_bfr.To_bry_and_clear();
}
}
public static byte[] Uppercase(byte[] src, int src_len) {return Case_cvt(src, src_len, Bool_.Y);}
public static byte[] Lowercase(byte[] src, int src_len) {return Case_cvt(src, src_len, Bool_.N);}
public static byte[] Case_cvt(byte[] src, int src_len, boolean upper) {
Bry_bfr tmp_bfr = null;
int pos = 0;
while (pos < src_len) {
byte b = src[pos];
int b_len = gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
byte[] ucase;
if (upper)
ucase = Xol_case_cvt_.Upper(src, pos, b_len);
else
ucase = Xol_case_cvt_.Lower(src, pos, b_len);
if (tmp_bfr == null) {
if (ucase != Xol_case_cvt_.byte_NOCHANGE) {
if (ucase.length == b_len) {
for (int i = 0; i < b_len; i++)
src[pos+i] = ucase[i];
} else {
// need to rebuild the byte string
tmp_bfr = Bry_bfr_.New();
tmp_bfr.Add_mid(src, 0, pos);
tmp_bfr.Add(ucase);
}
}
} else {
if (ucase != Xol_case_cvt_.byte_NOCHANGE) {
tmp_bfr.Add(ucase);
} else {
if (b_len == 1)
tmp_bfr.Add_byte(b);
else
tmp_bfr.Add_mid(src, pos, pos + b_len);
}
}
pos += b_len;
}
if (tmp_bfr == null)
return src;
else
return tmp_bfr.To_bry_and_clear();
}
}

File diff suppressed because it is too large Load Diff

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,59 +13,83 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.parsers.tmpls; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.xowa.wikis.nss.*;
public class Xot_defn_tmpl_ {
public static Xot_invk CopyNew(Xop_ctx ctx, Xot_defn_tmpl orig_defn, Xot_invk orig, Xot_invk caller, byte[] src, int frame_ns, byte[] frame_ttl) { // SEE:NOTE_1
Xop_tkn_mkr tkn_mkr = ctx.Tkn_mkr();
byte[] orig_src = orig_defn.Data_raw();
Xowe_wiki wiki = ctx.Wiki();
Xot_invk_temp rv = Xot_invk_temp.New(orig.Defn_tid(), ctx.Page().Ttl().Page_txt(), orig.Name_tkn(), orig_src, caller.Src_bgn(), caller.Src_end());
frame_ttl = wiki.Lang().Case_mgr().Case_reuse_1st_upper(frame_ttl); // NOTE: always uppercase 1st; EX:{{navbox -> "Template:Navbox"; PAGE:en.w:Achilles DATE:2014-06-21
frame_ttl = Xoa_ttl.Replace_unders(frame_ttl);
if (frame_ns == Xow_ns_.Tid__template)
frame_ttl = Bry_.Add(wiki.Ns_mgr().Ns_template().Name_db_w_colon(), Xoa_ttl.Replace_unders(frame_ttl)); // NOTE: always prepend "Template:" to frame_ttl; DATE:2014-06-13; always use spaces; DATE:2014-08-14; must be local language; Russian "Шаблон" not English "Template"; PAGE:ru.w:Королевство_Нидерландов DATE:2016-11-23
rv.Frame_ttl_(frame_ttl);
int orig_args_len = orig.Args_len();
boolean tmpl_args_parsing_orig = ctx.Tmpl_args_parsing();
ctx.Tmpl_args_parsing_(true);
for (int i = 0; i < orig_args_len; i++) {
Arg_nde_tkn orig_arg = orig.Args_get_by_idx(i);
Arg_nde_tkn copy_arg = tkn_mkr.ArgNde(-1, 0);
if (orig_arg.KeyTkn_exists()) {
Arg_itm_tkn key_tkn = orig_arg.Key_tkn();
copy_arg.Key_tkn_(Make_itm(false, ctx, tkn_mkr, src, key_tkn, caller, orig_arg));
rv.Args_add_by_key(copy_arg.Key_tkn().Dat_ary(), copy_arg); // NOTE: was originally Bry_.Mid(caller.Src(), key_tkn.Dat_bgn(), key_tkn.Dat_end()) which was wrong; caused {{{increment}}} instead of "increment"
}
else
rv.Args_add_by_idx(copy_arg); // NOTE: not a key, so add to idx_hash; DATE:2014-07-23
copy_arg.Val_tkn_(Make_itm(true, ctx, tkn_mkr, src, orig_arg.Val_tkn(), caller, orig_arg));
rv.Args_add(copy_arg);
}
ctx.Tmpl_args_parsing_(tmpl_args_parsing_orig);
return rv;
}
private static Arg_itm_tkn Make_itm(boolean val_tkn, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, byte[] src, Arg_itm_tkn orig, Xot_invk caller, Arg_nde_tkn orig_arg) {
int subs_len = orig.Subs_len();
Bry_bfr arg_bfr = Bry_bfr_.New();
for (int i = 0; i < subs_len; i++)
orig.Subs_get(i).Tmpl_evaluate(ctx, src, caller, arg_bfr);
Arg_itm_tkn rv = tkn_mkr.ArgItm(-1, -1); // NOTE: was -1, 0; DATE:2013-04-10
byte[] rv_ary = orig_arg.KeyTkn_exists() && val_tkn ? arg_bfr.To_bry_and_clear_and_trim() : arg_bfr.To_bry_and_clear(); // // NOTE: must trim if key_exists; DUPE:TRIM_IF_KEY; PAGE:en.w:Coord in Chernobyl disaster, Sahara
rv.Dat_ary_(rv_ary);
return rv;
}
}
/*
NOTE_1: Creates an invk_temp from an invk
page {{test_1|a}}
test_1 {{test_2|{{{1|nil_1}}}}}
test_2 {{{1|nil_2}}}
page : invk_temp gets created for {{test1|a}} where name=test1 and arg1=a
test_1 : invk_temp gets created for {{test_2|{{{1|nil_1}}}}}
1) create the invk_tmp tkn, with name=test2
2) copy the args and resolve; in this case -> {{test2|a}}
now we can use the invk_temp to call test_2 (and so on if needed)
*/
package gplx.xowa.parsers.tmpls;
import gplx.Bool_;
import gplx.Bry_;
import gplx.Bry_bfr;
import gplx.Bry_bfr_;
import gplx.xowa.Xoa_ttl;
import gplx.xowa.Xowe_wiki;
import gplx.xowa.langs.cases.Xol_case_cvt;
import gplx.xowa.langs.cases.Xol_case_mgr;
import gplx.xowa.parsers.Xop_ctx;
import gplx.xowa.parsers.Xop_tkn_mkr;
import gplx.xowa.wikis.nss.Xow_ns_;
public class Xot_defn_tmpl_ {
public static Xot_invk CopyNew(Xop_ctx ctx, Xot_defn_tmpl orig_defn, Xot_invk orig, Xot_invk caller, byte[] src, int frame_ns, byte[] frame_ttl) { // SEE:NOTE_1
Xop_tkn_mkr tkn_mkr = ctx.Tkn_mkr();
byte[] orig_src = orig_defn.Data_raw();
Xowe_wiki wiki = ctx.Wiki();
Xot_invk_temp rv = Xot_invk_temp.New(orig.Defn_tid(), ctx.Page().Ttl().Page_txt(), orig.Name_tkn(), orig_src, caller.Src_bgn(), caller.Src_end());
// DATE:2014-06-21: always uppercase 1st; EX:{{navbox -> "Template:Navbox"; PAGE:en.w:Achilles
// DATE:2020-08-09: ISSUE#:784; uppercase non-ascii chars; NOTE: do not reuse byte array, else will cause Xot_defn_trace tests to fail
frame_ttl = Xol_case_cvt.Upper_1st(frame_ttl, 0, frame_ttl.length, Bool_.N);
// DATE:2014-08-14: always use spaces
frame_ttl = Xoa_ttl.Replace_unders(frame_ttl);
// DATE:2014-06-13: always prepend "Template:" to frame_ttl
// DATE:2016-11-23: must be local language; Russian "Шаблон" not English "Template"; PAGE:ru.w:Королевство_Нидерландов
// DATE:2020-08-09: ISSUE#:784; apply to all non-main namespaces; PAGE:en.w:Wikipedia:Wikipedia_Signpost/2015-07-15/Op-ed
if (frame_ns != Xow_ns_.Tid__main) {
byte[] nsBry = wiki.Ns_mgr().Ids_get_or_null(frame_ns).Name_db_w_colon();
frame_ttl = Bry_.Add(nsBry, frame_ttl);
}
rv.Frame_ttl_(frame_ttl);
int orig_args_len = orig.Args_len();
boolean tmpl_args_parsing_orig = ctx.Tmpl_args_parsing();
ctx.Tmpl_args_parsing_(true);
for (int i = 0; i < orig_args_len; i++) {
Arg_nde_tkn orig_arg = orig.Args_get_by_idx(i);
Arg_nde_tkn copy_arg = tkn_mkr.ArgNde(-1, 0);
if (orig_arg.KeyTkn_exists()) {
Arg_itm_tkn key_tkn = orig_arg.Key_tkn();
copy_arg.Key_tkn_(Make_itm(false, ctx, tkn_mkr, src, key_tkn, caller, orig_arg));
rv.Args_add_by_key(copy_arg.Key_tkn().Dat_ary(), copy_arg); // NOTE: was originally Bry_.Mid(caller.Src(), key_tkn.Dat_bgn(), key_tkn.Dat_end()) which was wrong; caused {{{increment}}} instead of "increment"
}
else
rv.Args_add_by_idx(copy_arg); // NOTE: not a key, so add to idx_hash; DATE:2014-07-23
copy_arg.Val_tkn_(Make_itm(true, ctx, tkn_mkr, src, orig_arg.Val_tkn(), caller, orig_arg));
rv.Args_add(copy_arg);
}
ctx.Tmpl_args_parsing_(tmpl_args_parsing_orig);
return rv;
}
private static Arg_itm_tkn Make_itm(boolean val_tkn, Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, byte[] src, Arg_itm_tkn orig, Xot_invk caller, Arg_nde_tkn orig_arg) {
int subs_len = orig.Subs_len();
Bry_bfr arg_bfr = Bry_bfr_.New();
for (int i = 0; i < subs_len; i++)
orig.Subs_get(i).Tmpl_evaluate(ctx, src, caller, arg_bfr);
Arg_itm_tkn rv = tkn_mkr.ArgItm(-1, -1); // NOTE: was -1, 0; DATE:2013-04-10
byte[] rv_ary = orig_arg.KeyTkn_exists() && val_tkn ? arg_bfr.To_bry_and_clear_and_trim() : arg_bfr.To_bry_and_clear(); // // NOTE: must trim if key_exists; DUPE:TRIM_IF_KEY; PAGE:en.w:Coord in Chernobyl disaster, Sahara
rv.Dat_ary_(rv_ary);
return rv;
}
}
/*
NOTE_1: Creates an invk_temp from an invk
page {{test_1|a}}
test_1 {{test_2|{{{1|nil_1}}}}}
test_2 {{{1|nil_2}}}
page : invk_temp gets created for {{test1|a}} where name=test1 and arg1=a
test_1 : invk_temp gets created for {{test_2|{{{1|nil_1}}}}}
1) create the invk_tmp tkn, with name=test2
2) copy the args and resolve; in this case -> {{test2|a}}
now we can use the invk_temp to call test_2 (and so on if needed)
*/

@ -0,0 +1,69 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.parsers.tmpls;
import gplx.Bry_;
import gplx.core.tests.Gftest;
import gplx.xowa.Xop_fxt;
import gplx.xowa.Xowe_wiki;
import gplx.xowa.parsers.Xop_ctx;
import gplx.xowa.wikis.nss.Xow_ns_;
import org.junit.Test;
public class Xot_defn_tmpl_Test {
private final Xot_defn_tmpl__fxt fxt = new Xot_defn_tmpl__fxt();
@Test
public void CopyNewMain() {
fxt.Test_CopyNew_FrameTtl(Xow_ns_.Tid__main, "a", "A");
}
@Test
public void CopyNewUppercaseNonAscii() {
fxt.Test_CopyNew_FrameTtl(Xow_ns_.Tid__main, "à", "À");
}
@Test
public void CopyNewSpaces() {
fxt.Test_CopyNew_FrameTtl(Xow_ns_.Tid__help_talk, "a_b", "Help_talk:A b");
}
@Test
public void CopyNewTemplate() {
fxt.Test_CopyNew_FrameTtl(Xow_ns_.Tid__template, "a", "Template:A");
}
@Test
public void CopyNewNonMain() {
fxt.Test_CopyNew_FrameTtl(Xow_ns_.Tid__project, "a", "Wikipedia:A");
}
}
class Xot_defn_tmpl__fxt {
private final Xop_fxt fxt = new Xop_fxt();
public void Test_CopyNew_FrameTtl(int frameNs, String frameTtlStr, String expdFrameTtl) {
Xowe_wiki wiki = fxt.Wiki();
Xop_ctx ctx = Xop_ctx.New__top(wiki);
byte[] frameTtlBry = Bry_.new_u8(frameTtlStr);
Xot_defn_tmpl orig_defn = new Xot_defn_tmpl();
Xot_invk orig = Xot_invk_temp.New_root(Bry_.new_u8("orig"));
Xot_invk caller = Xot_invk_temp.New_root(Bry_.new_u8("caller"));
byte[] src = Bry_.Empty;
Xot_invk tmpl = Xot_defn_tmpl_.CopyNew(ctx, orig_defn, orig, caller, src, frameNs, frameTtlBry);
Gftest.Eq__bry(Bry_.new_u8(expdFrameTtl), tmpl.Frame_ttl());
}
}
Loading…
Cancel
Save