1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-09-28 14:30:51 +00:00

Xomw: Add TitlePrefix regex

This commit is contained in:
gnosygnu 2017-02-24 09:38:15 -05:00
parent 35bbd888ae
commit af4e7ab6c4
3 changed files with 73 additions and 0 deletions

View File

@ -226,6 +226,14 @@ public class Bry_find_ {
--cur;
}
}
public static int Find_bwd_while_v2(byte[] src, int cur, int end, byte while_byte) {
--cur;
while (true) {
if ( cur < end
|| src[cur] != while_byte) return cur + 1;
--cur;
}
}
public static int Find_fwd_while(byte[] src, int cur, int end, byte while_byte) {
while (true) {
if ( cur == end

View File

@ -458,3 +458,32 @@ public class XomwMediaWikiTitleCodec implements XomwTitleFormatter {
// return $rxTc;
// }
}
class XomwRegexTitlePrefix {
// $prefixRegexp = "/^(.+?)_*:_*(.*)$/S";
// "(.+?)": greedy: same as .*
// "_*" : spaces; allows "Talk___:_A" to be (Talk) (A)
// "(.*)" : gobble up rest;
// "/S" : analyze
public static boolean preg_match(byte[][] rv, byte[] src) {
int len = src.length;
// look for colon
int colon_pos = Bry_find_.Find_fwd(src, Byte_ascii.Colon, 0, len);
// if no_colon, no match; just return bry;
if (colon_pos == Bry_find_.Not_found) {
rv[0] = src;
rv[1] = null;
return false;
}
// colon exists; strip any flanking underlines
int ns_end = Bry_find_.Find_bwd_while_v2(src, colon_pos, 0, Byte_ascii.Underline);
int ttl_bgn = Bry_find_.Find_fwd_while(src, colon_pos + 1, len, Byte_ascii.Underline);
// split ns / title and return true
rv[0] = Bry_.Mid(src, 0, ns_end);
rv[1] = Bry_.Mid(src, ttl_bgn, len);
return true;
}
}

View File

@ -0,0 +1,36 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.title; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*; import gplx.xowa.mediawiki.includes.parsers.*;
public class XomwMediaWikiTitleCodecTest {
private final XomwMediaWikiTitleCodecFxt fxt = new XomwMediaWikiTitleCodecFxt();
@Test public void regexTitlePrefix() {
// no match
fxt.Test_regexTitlePrefix("a" , "a", null);
// ns
fxt.Test_regexTitlePrefix("a:b" , "a", "b");
// underscores
fxt.Test_regexTitlePrefix("a__:___b" , "a", "b");
}
}
class XomwMediaWikiTitleCodecFxt {
private byte[][] regexTitlePrefixResult = new byte[2][];
public void Test_regexTitlePrefix(String src, String expd_ns, String expd_ttl) {
XomwRegexTitlePrefix.preg_match(regexTitlePrefixResult, Bry_.new_u8(src));
Gftest.Eq__str(expd_ns, String_.new_u8(regexTitlePrefixResult[0]));
Gftest.Eq__str(expd_ttl, String_.new_u8(regexTitlePrefixResult[1]));
}
}