diff --git a/100_core/src/gplx/Bry_find_.java b/100_core/src/gplx/Bry_find_.java index e5a1336a2..5695ea643 100644 --- a/100_core/src/gplx/Bry_find_.java +++ b/100_core/src/gplx/Bry_find_.java @@ -226,6 +226,14 @@ public class Bry_find_ { --cur; } } + public static int Find_bwd_while_v2(byte[] src, int cur, int end, byte while_byte) { + --cur; + while (true) { + if ( cur < end + || src[cur] != while_byte) return cur + 1; + --cur; + } + } public static int Find_fwd_while(byte[] src, int cur, int end, byte while_byte) { while (true) { if ( cur == end diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/title/XomwMediaWikiTitleCodec.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/title/XomwMediaWikiTitleCodec.java index 918d51ebe..aed7c1418 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/title/XomwMediaWikiTitleCodec.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/title/XomwMediaWikiTitleCodec.java @@ -458,3 +458,32 @@ public class XomwMediaWikiTitleCodec implements XomwTitleFormatter { // return $rxTc; // } } +class XomwRegexTitlePrefix { + // $prefixRegexp = "/^(.+?)_*:_*(.*)$/S"; + // "(.+?)": greedy: same as .* + // "_*" : spaces; allows "Talk___:_A" to be (Talk) (A) + // "(.*)" : gobble up rest; + // "/S" : analyze + public static boolean preg_match(byte[][] rv, byte[] src) { + int len = src.length; + + // look for colon + int colon_pos = Bry_find_.Find_fwd(src, Byte_ascii.Colon, 0, len); + + // if no_colon, no match; just return bry; + if (colon_pos == Bry_find_.Not_found) { + rv[0] = src; + rv[1] = null; + return false; + } + + // colon exists; strip any flanking underlines + int ns_end = Bry_find_.Find_bwd_while_v2(src, colon_pos, 0, Byte_ascii.Underline); + int ttl_bgn = Bry_find_.Find_fwd_while(src, colon_pos + 1, len, Byte_ascii.Underline); + + // split ns / title and return true + rv[0] = Bry_.Mid(src, 0, ns_end); + rv[1] = Bry_.Mid(src, ttl_bgn, len); + return true; + } +} diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/title/XomwMediaWikiTitleCodecTest.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/title/XomwMediaWikiTitleCodecTest.java new file mode 100644 index 000000000..1bb2a85d2 --- /dev/null +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/title/XomwMediaWikiTitleCodecTest.java @@ -0,0 +1,36 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2017 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.mediawiki.includes.title; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; +import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*; import gplx.xowa.mediawiki.includes.parsers.*; +public class XomwMediaWikiTitleCodecTest { + private final XomwMediaWikiTitleCodecFxt fxt = new XomwMediaWikiTitleCodecFxt(); + @Test public void regexTitlePrefix() { + // no match + fxt.Test_regexTitlePrefix("a" , "a", null); + // ns + fxt.Test_regexTitlePrefix("a:b" , "a", "b"); + // underscores + fxt.Test_regexTitlePrefix("a__:___b" , "a", "b"); + } +} +class XomwMediaWikiTitleCodecFxt { + private byte[][] regexTitlePrefixResult = new byte[2][]; + public void Test_regexTitlePrefix(String src, String expd_ns, String expd_ttl) { + XomwRegexTitlePrefix.preg_match(regexTitlePrefixResult, Bry_.new_u8(src)); + Gftest.Eq__str(expd_ns, String_.new_u8(regexTitlePrefixResult[0])); + Gftest.Eq__str(expd_ttl, String_.new_u8(regexTitlePrefixResult[1])); + } +}