mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Xomw: Add basic namespace parsing to title
This commit is contained in:
parent
ba4634919c
commit
2416ccb7e9
@ -232,19 +232,21 @@ public class XomwNamespace {
|
|||||||
public static int getCanonicalIndex(byte[] name) {
|
public static int getCanonicalIndex(byte[] name) {
|
||||||
if (xNamespaces == null) {
|
if (xNamespaces == null) {
|
||||||
xNamespaces = Hash_adp_bry.cs();
|
xNamespaces = Hash_adp_bry.cs();
|
||||||
int len = namespaces.Len();
|
XomwNamespacesById namespacesHash = getCanonicalNamespaces();
|
||||||
|
int len = namespacesHash.Len();
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
XomwNamespaceItem item = (XomwNamespaceItem)namespaces.GetAtOrNull(i);
|
XomwNamespaceItem item = (XomwNamespaceItem)namespacesHash.GetAtOrNull(i);
|
||||||
xNamespaces.Add(Bry_.Lcase__all(item.name), item); // NOTE: MW does "strtolower($text)"; canonical namespaces are always ascii
|
xNamespaces.Add(Bry_.Lcase__all(item.name), item); // NOTE: MW does "strtolower($text)"; canonical namespaces are always ascii
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// if (array_key_exists($name, $xNamespaces)) {
|
XomwNamespaceItem xNs = (XomwNamespaceItem)xNamespaces.Get_by(name);
|
||||||
// return $xNamespaces[$name];
|
if (xNs != null) {
|
||||||
// } else {
|
return xNs.id;
|
||||||
// return null;
|
}
|
||||||
// }
|
else {
|
||||||
return XomwNamespace.NULL_NS_ID;
|
return XomwNamespace.NULL_NS_ID;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// /**
|
// /**
|
||||||
// * Returns an array of the namespaces (by integer id) that exist on the
|
// * Returns an array of the namespaces (by integer id) that exist on the
|
||||||
|
@ -233,7 +233,7 @@ public class XomwMediaWikiTitleCodec implements XomwTitleFormatter {
|
|||||||
* @return array A map with the fields 'interwiki', 'fragment', 'namespace',
|
* @return array A map with the fields 'interwiki', 'fragment', 'namespace',
|
||||||
* 'user_case_dbkey', and 'dbkey'.
|
* 'user_case_dbkey', and 'dbkey'.
|
||||||
*/
|
*/
|
||||||
// private final byte[][] tmpPrefixRegex = new byte[2][];
|
private final byte[][] tmpPrefixRegex = new byte[2][];
|
||||||
public XomwMediaWikiTitleCodecParts splitTitleString(byte[] text, int defaultNamespace) {
|
public XomwMediaWikiTitleCodecParts splitTitleString(byte[] text, int defaultNamespace) {
|
||||||
byte[] dbkey = XophpString.str_replace(Byte_ascii.Space, Byte_ascii.Underline, text);
|
byte[] dbkey = XophpString.str_replace(Byte_ascii.Space, Byte_ascii.Underline, text);
|
||||||
|
|
||||||
@ -276,74 +276,70 @@ public class XomwMediaWikiTitleCodec implements XomwTitleFormatter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Namespace or interwiki prefix
|
// Namespace or interwiki prefix
|
||||||
if (Bry_.Has_at_bgn(dbkey, Bry_.new_a7("File:"))) {
|
do {
|
||||||
parts.ns = XomwDefines.NS_FILE;
|
byte[][] m = tmpPrefixRegex;
|
||||||
dbkey = Bry_.Mid(dbkey, 5);
|
if (XomwRegexTitlePrefix.preg_match(m, dbkey)) {
|
||||||
|
byte[] p = m[0];
|
||||||
|
int ns = this.language.getNsIndex(p);
|
||||||
|
if (ns != XophpUtility.NULL_INT) {
|
||||||
|
// Ordinary namespace
|
||||||
|
dbkey = m[1];
|
||||||
|
parts.ns = ns;
|
||||||
|
// For Talk:X pages, check if X has a "namespace" prefix
|
||||||
|
if (ns == XomwDefines.NS_TALK && XomwRegexTitlePrefix.preg_match(m, dbkey)) {
|
||||||
|
if (this.language.getNsIndex(m[0]) != XophpUtility.NULL_INT) {
|
||||||
|
// Disallow Talk:File:x type titles...
|
||||||
|
throw new XomwMalformedTitleException("title-invalid-talk-namespace", text);
|
||||||
}
|
}
|
||||||
// do {
|
// else if (Interwiki::isValidInterwiki($x[1])) {
|
||||||
// byte[][] m = tmpPrefixRegex;
|
// // TODO: get rid of global state!
|
||||||
// if (XomwRegexTitlePrefix.preg_match(m, dbkey)) {
|
// // Disallow Talk:Interwiki:x type titles...
|
||||||
// byte[] p = m[0];
|
// throw new XomwMalformedTitleException('title-invalid-talk-namespace', text);
|
||||||
// int ns = this.language.getNsIndex(p);
|
|
||||||
// if (ns != XophpUtility.NULL_INT) {
|
|
||||||
// // Ordinary namespace
|
|
||||||
// dbkey = m[1];
|
|
||||||
// parts.ns = ns;
|
|
||||||
// // For Talk:X pages, check if X has a "namespace" prefix
|
|
||||||
// if (ns == XomwDefines.NS_TALK && XomwRegexTitlePrefix.preg_match(m, dbkey)) {
|
|
||||||
// if (this.language.getNsIndex(m[0]) != XophpUtility.NULL_INT) {
|
|
||||||
// // Disallow Talk:File:x type titles...
|
|
||||||
// throw new XomwMalformedTitleException("title-invalid-talk-namespace", text);
|
|
||||||
// }
|
// }
|
||||||
//// else if (Interwiki::isValidInterwiki($x[1])) {
|
}
|
||||||
//// // TODO: get rid of global state!
|
}
|
||||||
//// // Disallow Talk:Interwiki:x type titles...
|
// else if (Interwiki::isValidInterwiki($p)) {
|
||||||
//// throw new XomwMalformedTitleException('title-invalid-talk-namespace', text);
|
// // Interwiki link
|
||||||
//// }
|
// dbkey = $m[2];
|
||||||
|
// parts['interwiki'] = this.language->lc($p);
|
||||||
|
//
|
||||||
|
// // Redundant interwiki prefix to the local wiki
|
||||||
|
// foreach (this.localInterwikis as $localIW) {
|
||||||
|
// if (0 == strcasecmp(parts['interwiki'], $localIW)) {
|
||||||
|
// if (dbkey == '') {
|
||||||
|
// // Empty self-links should point to the Main Page, to ensure
|
||||||
|
// // compatibility with cross-wiki transclusions and the like.
|
||||||
|
// $mainPage = Title::newMainPage();
|
||||||
|
// return [
|
||||||
|
// 'interwiki' => $mainPage->getInterwiki(),
|
||||||
|
// 'local_interwiki' => true,
|
||||||
|
// 'fragment' => $mainPage->getFragment(),
|
||||||
|
// 'namespace' => $mainPage->getNamespace(),
|
||||||
|
// 'dbkey' => $mainPage->getDBkey(),
|
||||||
|
// 'user_case_dbkey' => $mainPage->getUserCaseDBKey()
|
||||||
|
// ];
|
||||||
|
// }
|
||||||
|
// parts['interwiki'] = '';
|
||||||
|
// // local interwikis should behave like initial-colon links
|
||||||
|
// parts['local_interwiki'] = true;
|
||||||
|
//
|
||||||
|
// // Do another namespace split...
|
||||||
|
// continue 2;
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
//// else if (Interwiki::isValidInterwiki($p)) {
|
//
|
||||||
//// // Interwiki link
|
// // If there's an initial colon after the interwiki, that also
|
||||||
//// dbkey = $m[2];
|
// // resets the default namespace
|
||||||
//// parts['interwiki'] = this.language->lc($p);
|
// if (dbkey !== '' && dbkey[0] == ':') {
|
||||||
////
|
// parts['namespace'] = NS_MAIN;
|
||||||
//// // Redundant interwiki prefix to the local wiki
|
// dbkey = substr(dbkey, 1);
|
||||||
//// foreach (this.localInterwikis as $localIW) {
|
|
||||||
//// if (0 == strcasecmp(parts['interwiki'], $localIW)) {
|
|
||||||
//// if (dbkey == '') {
|
|
||||||
//// // Empty self-links should point to the Main Page, to ensure
|
|
||||||
//// // compatibility with cross-wiki transclusions and the like.
|
|
||||||
//// $mainPage = Title::newMainPage();
|
|
||||||
//// return [
|
|
||||||
//// 'interwiki' => $mainPage->getInterwiki(),
|
|
||||||
//// 'local_interwiki' => true,
|
|
||||||
//// 'fragment' => $mainPage->getFragment(),
|
|
||||||
//// 'namespace' => $mainPage->getNamespace(),
|
|
||||||
//// 'dbkey' => $mainPage->getDBkey(),
|
|
||||||
//// 'user_case_dbkey' => $mainPage->getUserCaseDBKey()
|
|
||||||
//// ];
|
|
||||||
//// }
|
|
||||||
//// parts['interwiki'] = '';
|
|
||||||
//// // local interwikis should behave like initial-colon links
|
|
||||||
//// parts['local_interwiki'] = true;
|
|
||||||
////
|
|
||||||
//// // Do another namespace split...
|
|
||||||
//// continue 2;
|
|
||||||
//// }
|
|
||||||
//// }
|
|
||||||
////
|
|
||||||
//// // If there's an initial colon after the interwiki, that also
|
|
||||||
//// // resets the default namespace
|
|
||||||
//// if (dbkey !== '' && dbkey[0] == ':') {
|
|
||||||
//// parts['namespace'] = NS_MAIN;
|
|
||||||
//// dbkey = substr(dbkey, 1);
|
|
||||||
//// }
|
|
||||||
//// }
|
|
||||||
// // If there's no recognized interwiki or namespace,
|
|
||||||
// // then let the colon expression be part of the title.
|
|
||||||
// }
|
// }
|
||||||
// break;
|
// }
|
||||||
// } while (true);
|
// If there's no recognized interwiki or namespace,
|
||||||
|
// then let the colon expression be part of the title.
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
} while (true);
|
||||||
|
|
||||||
// $fragment = strstr(dbkey, '#');
|
// $fragment = strstr(dbkey, '#');
|
||||||
// if (false !== $fragment) {
|
// if (false !== $fragment) {
|
||||||
|
@ -29,4 +29,10 @@ public class XomwMediaWikiTitleCodecParts {
|
|||||||
this.dbkey = dbkey;
|
this.dbkey = dbkey;
|
||||||
this.user_case_dbkey = dbkey;
|
this.user_case_dbkey = dbkey;
|
||||||
}
|
}
|
||||||
|
public String ToStr() {
|
||||||
|
Bry_bfr bfr = Bry_bfr_.New();
|
||||||
|
bfr.Add_int_variable(ns).Add_byte_pipe();
|
||||||
|
bfr.Add(dbkey).Add_byte_pipe();
|
||||||
|
return bfr.To_str_and_clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,10 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
*/
|
*/
|
||||||
package gplx.xowa.mediawiki.includes.title; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
|
package gplx.xowa.mediawiki.includes.title; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
|
||||||
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*; import gplx.xowa.mediawiki.includes.parsers.*;
|
import org.junit.*; import gplx.core.tests.*;
|
||||||
|
import gplx.core.btries.*;
|
||||||
|
import gplx.xowa.mediawiki.includes.parsers.*;
|
||||||
|
import gplx.xowa.mediawiki.languages.*; import gplx.xowa.langs.*;
|
||||||
public class XomwMediaWikiTitleCodecTest {
|
public class XomwMediaWikiTitleCodecTest {
|
||||||
private final XomwMediaWikiTitleCodecFxt fxt = new XomwMediaWikiTitleCodecFxt();
|
private final XomwMediaWikiTitleCodecFxt fxt = new XomwMediaWikiTitleCodecFxt();
|
||||||
@Test public void regexTitlePrefix() {
|
@Test public void regexTitlePrefix() {
|
||||||
@ -25,6 +28,11 @@ public class XomwMediaWikiTitleCodecTest {
|
|||||||
// underscores
|
// underscores
|
||||||
fxt.Test_regexTitlePrefix("a__:___b" , "a", "b");
|
fxt.Test_regexTitlePrefix("a__:___b" , "a", "b");
|
||||||
}
|
}
|
||||||
|
@Test public void splitTitleString() {
|
||||||
|
XomwMediaWikiTitleCodec codec = fxt.Make_codec(fxt.Make_lang());
|
||||||
|
// ns
|
||||||
|
fxt.Test_splitTitleString(codec, "File:A" , fxt.Make_parts(XomwDefines.NS_FILE, "A"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
class XomwMediaWikiTitleCodecFxt {
|
class XomwMediaWikiTitleCodecFxt {
|
||||||
private byte[][] regexTitlePrefixResult = new byte[2][];
|
private byte[][] regexTitlePrefixResult = new byte[2][];
|
||||||
@ -33,4 +41,19 @@ class XomwMediaWikiTitleCodecFxt {
|
|||||||
Gftest.Eq__str(expd_ns, String_.new_u8(regexTitlePrefixResult[0]));
|
Gftest.Eq__str(expd_ns, String_.new_u8(regexTitlePrefixResult[0]));
|
||||||
Gftest.Eq__str(expd_ttl, String_.new_u8(regexTitlePrefixResult[1]));
|
Gftest.Eq__str(expd_ttl, String_.new_u8(regexTitlePrefixResult[1]));
|
||||||
}
|
}
|
||||||
|
public XomwMediaWikiTitleCodecParts Make_parts(int ns, String dbkey) {
|
||||||
|
return new XomwMediaWikiTitleCodecParts(Bry_.new_u8(dbkey), ns);
|
||||||
|
}
|
||||||
|
public XomwMediaWikiTitleCodec Make_codec(XomwLanguage lang) {
|
||||||
|
return new XomwMediaWikiTitleCodec(lang);
|
||||||
|
}
|
||||||
|
public XomwLanguage Make_lang() {
|
||||||
|
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||||
|
Xol_lang_itm lang = new Xol_lang_itm(app.Lang_mgr(), Xol_lang_itm_.Key_en);
|
||||||
|
return new XomwLanguage(lang);
|
||||||
|
}
|
||||||
|
public void Test_splitTitleString(XomwMediaWikiTitleCodec codec, String src, XomwMediaWikiTitleCodecParts expd) {
|
||||||
|
XomwMediaWikiTitleCodecParts actl = codec.splitTitleString(Bry_.new_u8(src), XomwDefines.NS_MAIN);
|
||||||
|
Gftest.Eq__str(expd.ToStr(), actl.ToStr());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user