mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Xomw: Add TitlePrefix regex
This commit is contained in:
parent
35bbd888ae
commit
af4e7ab6c4
@ -226,6 +226,14 @@ public class Bry_find_ {
|
||||
--cur;
|
||||
}
|
||||
}
|
||||
public static int Find_bwd_while_v2(byte[] src, int cur, int end, byte while_byte) {
|
||||
--cur;
|
||||
while (true) {
|
||||
if ( cur < end
|
||||
|| src[cur] != while_byte) return cur + 1;
|
||||
--cur;
|
||||
}
|
||||
}
|
||||
public static int Find_fwd_while(byte[] src, int cur, int end, byte while_byte) {
|
||||
while (true) {
|
||||
if ( cur == end
|
||||
|
@ -458,3 +458,32 @@ public class XomwMediaWikiTitleCodec implements XomwTitleFormatter {
|
||||
// return $rxTc;
|
||||
// }
|
||||
}
|
||||
class XomwRegexTitlePrefix {
|
||||
// $prefixRegexp = "/^(.+?)_*:_*(.*)$/S";
|
||||
// "(.+?)": greedy: same as .*
|
||||
// "_*" : spaces; allows "Talk___:_A" to be (Talk) (A)
|
||||
// "(.*)" : gobble up rest;
|
||||
// "/S" : analyze
|
||||
public static boolean preg_match(byte[][] rv, byte[] src) {
|
||||
int len = src.length;
|
||||
|
||||
// look for colon
|
||||
int colon_pos = Bry_find_.Find_fwd(src, Byte_ascii.Colon, 0, len);
|
||||
|
||||
// if no_colon, no match; just return bry;
|
||||
if (colon_pos == Bry_find_.Not_found) {
|
||||
rv[0] = src;
|
||||
rv[1] = null;
|
||||
return false;
|
||||
}
|
||||
|
||||
// colon exists; strip any flanking underlines
|
||||
int ns_end = Bry_find_.Find_bwd_while_v2(src, colon_pos, 0, Byte_ascii.Underline);
|
||||
int ttl_bgn = Bry_find_.Find_fwd_while(src, colon_pos + 1, len, Byte_ascii.Underline);
|
||||
|
||||
// split ns / title and return true
|
||||
rv[0] = Bry_.Mid(src, 0, ns_end);
|
||||
rv[1] = Bry_.Mid(src, ttl_bgn, len);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,36 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.mediawiki.includes.title; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
|
||||
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*; import gplx.xowa.mediawiki.includes.parsers.*;
|
||||
public class XomwMediaWikiTitleCodecTest {
|
||||
private final XomwMediaWikiTitleCodecFxt fxt = new XomwMediaWikiTitleCodecFxt();
|
||||
@Test public void regexTitlePrefix() {
|
||||
// no match
|
||||
fxt.Test_regexTitlePrefix("a" , "a", null);
|
||||
// ns
|
||||
fxt.Test_regexTitlePrefix("a:b" , "a", "b");
|
||||
// underscores
|
||||
fxt.Test_regexTitlePrefix("a__:___b" , "a", "b");
|
||||
}
|
||||
}
|
||||
class XomwMediaWikiTitleCodecFxt {
|
||||
private byte[][] regexTitlePrefixResult = new byte[2][];
|
||||
public void Test_regexTitlePrefix(String src, String expd_ns, String expd_ttl) {
|
||||
XomwRegexTitlePrefix.preg_match(regexTitlePrefixResult, Bry_.new_u8(src));
|
||||
Gftest.Eq__str(expd_ns, String_.new_u8(regexTitlePrefixResult[0]));
|
||||
Gftest.Eq__str(expd_ttl, String_.new_u8(regexTitlePrefixResult[1]));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user