1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

Xomw: Add basic namespace parsing to title

This commit is contained in:
gnosygnu 2017-02-27 07:54:10 -05:00
parent ba4634919c
commit 2416ccb7e9
4 changed files with 102 additions and 75 deletions

View File

@ -232,18 +232,20 @@ public class XomwNamespace {
public static int getCanonicalIndex(byte[] name) {
if (xNamespaces == null) {
xNamespaces = Hash_adp_bry.cs();
int len = namespaces.Len();
XomwNamespacesById namespacesHash = getCanonicalNamespaces();
int len = namespacesHash.Len();
for (int i = 0; i < len; i++) {
XomwNamespaceItem item = (XomwNamespaceItem)namespaces.GetAtOrNull(i);
XomwNamespaceItem item = (XomwNamespaceItem)namespacesHash.GetAtOrNull(i);
xNamespaces.Add(Bry_.Lcase__all(item.name), item); // NOTE: MW does "strtolower($text)"; canonical namespaces are always ascii
}
}
// if (array_key_exists($name, $xNamespaces)) {
// return $xNamespaces[$name];
// } else {
// return null;
// }
return XomwNamespace.NULL_NS_ID;
XomwNamespaceItem xNs = (XomwNamespaceItem)xNamespaces.Get_by(name);
if (xNs != null) {
return xNs.id;
}
else {
return XomwNamespace.NULL_NS_ID;
}
}
// /**

View File

@ -233,7 +233,7 @@ public class XomwMediaWikiTitleCodec implements XomwTitleFormatter {
* @return array A map with the fields 'interwiki', 'fragment', 'namespace',
* 'user_case_dbkey', and 'dbkey'.
*/
// private final byte[][] tmpPrefixRegex = new byte[2][];
private final byte[][] tmpPrefixRegex = new byte[2][];
public XomwMediaWikiTitleCodecParts splitTitleString(byte[] text, int defaultNamespace) {
byte[] dbkey = XophpString.str_replace(Byte_ascii.Space, Byte_ascii.Underline, text);
@ -276,74 +276,70 @@ public class XomwMediaWikiTitleCodec implements XomwTitleFormatter {
}
// Namespace or interwiki prefix
if (Bry_.Has_at_bgn(dbkey, Bry_.new_a7("File:"))) {
parts.ns = XomwDefines.NS_FILE;
dbkey = Bry_.Mid(dbkey, 5);
}
// do {
// byte[][] m = tmpPrefixRegex;
// if (XomwRegexTitlePrefix.preg_match(m, dbkey)) {
// byte[] p = m[0];
// int ns = this.language.getNsIndex(p);
// if (ns != XophpUtility.NULL_INT) {
// // Ordinary namespace
// dbkey = m[1];
// parts.ns = ns;
// // For Talk:X pages, check if X has a "namespace" prefix
// if (ns == XomwDefines.NS_TALK && XomwRegexTitlePrefix.preg_match(m, dbkey)) {
// if (this.language.getNsIndex(m[0]) != XophpUtility.NULL_INT) {
// // Disallow Talk:File:x type titles...
// throw new XomwMalformedTitleException("title-invalid-talk-namespace", text);
do {
byte[][] m = tmpPrefixRegex;
if (XomwRegexTitlePrefix.preg_match(m, dbkey)) {
byte[] p = m[0];
int ns = this.language.getNsIndex(p);
if (ns != XophpUtility.NULL_INT) {
// Ordinary namespace
dbkey = m[1];
parts.ns = ns;
// For Talk:X pages, check if X has a "namespace" prefix
if (ns == XomwDefines.NS_TALK && XomwRegexTitlePrefix.preg_match(m, dbkey)) {
if (this.language.getNsIndex(m[0]) != XophpUtility.NULL_INT) {
// Disallow Talk:File:x type titles...
throw new XomwMalformedTitleException("title-invalid-talk-namespace", text);
}
// else if (Interwiki::isValidInterwiki($x[1])) {
// // TODO: get rid of global state!
// // Disallow Talk:Interwiki:x type titles...
// throw new XomwMalformedTitleException('title-invalid-talk-namespace', text);
// }
//// else if (Interwiki::isValidInterwiki($x[1])) {
//// // TODO: get rid of global state!
//// // Disallow Talk:Interwiki:x type titles...
//// throw new XomwMalformedTitleException('title-invalid-talk-namespace', text);
//// }
}
}
// else if (Interwiki::isValidInterwiki($p)) {
// // Interwiki link
// dbkey = $m[2];
// parts['interwiki'] = this.language->lc($p);
//
// // Redundant interwiki prefix to the local wiki
// foreach (this.localInterwikis as $localIW) {
// if (0 == strcasecmp(parts['interwiki'], $localIW)) {
// if (dbkey == '') {
// // Empty self-links should point to the Main Page, to ensure
// // compatibility with cross-wiki transclusions and the like.
// $mainPage = Title::newMainPage();
// return [
// 'interwiki' => $mainPage->getInterwiki(),
// 'local_interwiki' => true,
// 'fragment' => $mainPage->getFragment(),
// 'namespace' => $mainPage->getNamespace(),
// 'dbkey' => $mainPage->getDBkey(),
// 'user_case_dbkey' => $mainPage->getUserCaseDBKey()
// ];
// }
// parts['interwiki'] = '';
// // local interwikis should behave like initial-colon links
// parts['local_interwiki'] = true;
//
// // Do another namespace split...
// continue 2;
// }
// }
//
// // If there's an initial colon after the interwiki, that also
// // resets the default namespace
// if (dbkey !== '' && dbkey[0] == ':') {
// parts['namespace'] = NS_MAIN;
// dbkey = substr(dbkey, 1);
// }
// }
//// else if (Interwiki::isValidInterwiki($p)) {
//// // Interwiki link
//// dbkey = $m[2];
//// parts['interwiki'] = this.language->lc($p);
////
//// // Redundant interwiki prefix to the local wiki
//// foreach (this.localInterwikis as $localIW) {
//// if (0 == strcasecmp(parts['interwiki'], $localIW)) {
//// if (dbkey == '') {
//// // Empty self-links should point to the Main Page, to ensure
//// // compatibility with cross-wiki transclusions and the like.
//// $mainPage = Title::newMainPage();
//// return [
//// 'interwiki' => $mainPage->getInterwiki(),
//// 'local_interwiki' => true,
//// 'fragment' => $mainPage->getFragment(),
//// 'namespace' => $mainPage->getNamespace(),
//// 'dbkey' => $mainPage->getDBkey(),
//// 'user_case_dbkey' => $mainPage->getUserCaseDBKey()
//// ];
//// }
//// parts['interwiki'] = '';
//// // local interwikis should behave like initial-colon links
//// parts['local_interwiki'] = true;
////
//// // Do another namespace split...
//// continue 2;
//// }
//// }
////
//// // If there's an initial colon after the interwiki, that also
//// // resets the default namespace
//// if (dbkey !== '' && dbkey[0] == ':') {
//// parts['namespace'] = NS_MAIN;
//// dbkey = substr(dbkey, 1);
//// }
//// }
// // If there's no recognized interwiki or namespace,
// // then let the colon expression be part of the title.
// }
// break;
// } while (true);
// If there's no recognized interwiki or namespace,
// then let the colon expression be part of the title.
}
break;
} while (true);
// $fragment = strstr(dbkey, '#');
// if (false !== $fragment) {

View File

@ -29,4 +29,10 @@ public class XomwMediaWikiTitleCodecParts {
this.dbkey = dbkey;
this.user_case_dbkey = dbkey;
}
public String ToStr() {
Bry_bfr bfr = Bry_bfr_.New();
bfr.Add_int_variable(ns).Add_byte_pipe();
bfr.Add(dbkey).Add_byte_pipe();
return bfr.To_str_and_clear();
}
}

View File

@ -14,7 +14,10 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.title; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*; import gplx.xowa.mediawiki.includes.parsers.*;
import org.junit.*; import gplx.core.tests.*;
import gplx.core.btries.*;
import gplx.xowa.mediawiki.includes.parsers.*;
import gplx.xowa.mediawiki.languages.*; import gplx.xowa.langs.*;
public class XomwMediaWikiTitleCodecTest {
private final XomwMediaWikiTitleCodecFxt fxt = new XomwMediaWikiTitleCodecFxt();
@Test public void regexTitlePrefix() {
@ -25,6 +28,11 @@ public class XomwMediaWikiTitleCodecTest {
// underscores
fxt.Test_regexTitlePrefix("a__:___b" , "a", "b");
}
@Test public void splitTitleString() {
XomwMediaWikiTitleCodec codec = fxt.Make_codec(fxt.Make_lang());
// ns
fxt.Test_splitTitleString(codec, "File:A" , fxt.Make_parts(XomwDefines.NS_FILE, "A"));
}
}
class XomwMediaWikiTitleCodecFxt {
private byte[][] regexTitlePrefixResult = new byte[2][];
@ -33,4 +41,19 @@ class XomwMediaWikiTitleCodecFxt {
Gftest.Eq__str(expd_ns, String_.new_u8(regexTitlePrefixResult[0]));
Gftest.Eq__str(expd_ttl, String_.new_u8(regexTitlePrefixResult[1]));
}
public XomwMediaWikiTitleCodecParts Make_parts(int ns, String dbkey) {
return new XomwMediaWikiTitleCodecParts(Bry_.new_u8(dbkey), ns);
}
public XomwMediaWikiTitleCodec Make_codec(XomwLanguage lang) {
return new XomwMediaWikiTitleCodec(lang);
}
public XomwLanguage Make_lang() {
Xoae_app app = Xoa_app_fxt.Make__app__edit();
Xol_lang_itm lang = new Xol_lang_itm(app.Lang_mgr(), Xol_lang_itm_.Key_en);
return new XomwLanguage(lang);
}
public void Test_splitTitleString(XomwMediaWikiTitleCodec codec, String src, XomwMediaWikiTitleCodecParts expd) {
XomwMediaWikiTitleCodecParts actl = codec.splitTitleString(Bry_.new_u8(src), XomwDefines.NS_MAIN);
Gftest.Eq__str(expd.ToStr(), actl.ToStr());
}
}