1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Embeddable: Create core dbs in proper subdirectory

This commit is contained in:
gnosygnu
2017-10-23 20:50:22 -04:00
parent dc22c15895
commit 1336d44f34
4537 changed files with 0 additions and 311750 deletions

View File

@@ -13,7 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
public interface Xol_grammar {
boolean Grammar_eval(Bry_bfr bfr, Xol_lang_itm lang, byte[] word, byte[] type);
}

View File

@@ -13,44 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import gplx.core.primitives.*; import gplx.core.btries.*;
public class Xol_grammar_ {
public static final byte Tid__max = 9;
public static final byte Tid_genitive = 0, Tid_elative = 1, Tid_partitive = 2, Tid_illative = 3, Tid_inessive = 4, Tid_accusative = 5, Tid_instrumental = 6, Tid_prepositional = 7, Tid_dative = 8, Tid_unknown = Byte_.Max_value_127;
private static final Btrie_slim_mgr Tid_trie = Btrie_slim_mgr.ci_a7() // NOTE:ci.ascii:MW kwds
.Add_str_byte("genitive", Tid_genitive)
.Add_str_byte("elative", Tid_elative)
.Add_str_byte("partitive", Tid_partitive)
.Add_str_byte("illative", Tid_illative)
.Add_str_byte("inessive", Tid_inessive)
.Add_str_byte("accusative", Tid_accusative)
.Add_str_byte("instrumental", Tid_instrumental)
.Add_str_byte("prepositional", Tid_prepositional)
.Add_str_byte("dative", Tid_dative)
;
public static byte Tid_of_type(byte[] v) {
if (Bry_.Len_eq_0(v)) return Tid_unknown;
Object o = Xol_grammar_.Tid_trie.Match_exact(v, 0, v.length);
return o == null ? Tid_unknown : ((Byte_obj_val)o).Val();
}
public static Xol_grammar new_by_lang_id(int lang_id) {
switch (lang_id) {
case Xol_lang_stub_.Id_fi: return new Xol_grammar_fi();
case Xol_lang_stub_.Id_ru: return new Xol_grammar_ru();
case Xol_lang_stub_.Id_he: return new Xol_grammar_he();
case Xol_lang_stub_.Id_pl:
case Xol_lang_stub_.Id_cs: // PAGE:cs.q; DATE:2016-09-04
return Xol_grammar__noop.Instance;
default: return Xol_grammar__unimplemented.Instance;
}
}
}
class Xol_grammar__unimplemented implements Xol_grammar {
public boolean Grammar_eval(Bry_bfr bfr, Xol_lang_itm lang, byte[] word, byte[] type) {return false;}
public static final Xol_grammar__unimplemented Instance = new Xol_grammar__unimplemented(); Xol_grammar__unimplemented() {}
}
class Xol_grammar__noop implements Xol_grammar {
public boolean Grammar_eval(Bry_bfr bfr, Xol_lang_itm lang, byte[] word, byte[] type) {bfr.Add(word); return true;}
public static final Xol_grammar__noop Instance = new Xol_grammar__noop(); Xol_grammar__noop() {}
}

View File

@@ -13,68 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import gplx.core.primitives.*; import gplx.core.btries.*;
import gplx.xowa.apps.urls.*;
public class Xol_grammar_fi implements Xol_grammar {
public boolean Vowel_harmony(byte[] word, int word_len) {
// $aou = preg_match( '/[aou][^äöy]*$/i', $word );
boolean aou_found = false;
for (int i = 0; i < word_len; i++) {
byte b = word[i];
Object o = trie_vh.Match_bgn_w_byte(b, word, i, word_len);
if (o != null) {
byte vh_type = ((Byte_obj_val)o).Val();
if (vh_type == Trie_vh_back)
aou_found = true;
else
aou_found = false;
}
}
return aou_found;
}
public boolean Grammar_eval(Bry_bfr bfr, Xol_lang_itm lang, byte[] word, byte[] type) {
if (Bry_.Len_eq_0(word)) return true; // empty_string returns ""
byte tid = Xol_grammar_.Tid_of_type(type);
if (tid == Xol_grammar_.Tid_unknown) {bfr.Add(word); return true;} // unknown type returns word
// PHP: if (isset($wgGrammarForms['fi'][$case][$word])){ return $wgGrammarForms['fi'][$case][$word];
if (manual_regy == null) {
manual_regy = new Xol_grammar_manual_regy()
.Itms_add(Xol_grammar_.Tid_elative, "Wikiuutiset", "Wikiuutisista");
}
byte[] manual_repl = manual_regy.Itms_get(tid, word);
if (manual_repl != null) {
bfr.Add(manual_repl);
return true;
}
bfr.Add(word); // NOTE: preemptively add word now; the rest of this function takes "word" and adds other letters to it;
int word_len = word.length;
byte[] lower = lang.Case_mgr().Case_build_lower(word, 0, word_len);
boolean aou = Vowel_harmony(lower, word_len);
// PHP: if ( preg_match( '/wiki$/i', $word ) ) $aou = false;
if (aou && Bry_.Has_at_end(lower, Bry_wiki))
aou = false;
// PHP: if ( preg_match( '/[bcdfghjklmnpqrstvwxz]$/i', $word ) ) $word .= 'i';
switch (lower[word_len - 1]) {
case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g:
case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_j: case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m:
case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s:
case Byte_ascii.Ltr_t: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_z:
bfr.Add_byte(Byte_ascii.Ltr_i);
break;
}
switch (tid) {
case Xol_grammar_.Tid_genitive: bfr.Add_byte(Byte_ascii.Ltr_n); break; // case 'genitive': $word .= 'n';
case Xol_grammar_.Tid_elative: bfr.Add(aou ? Bry_sta_y : Bry_sta_n); break; // case 'elative': $word .= ( $aou ? 'sta' : 'stä' );
case Xol_grammar_.Tid_partitive: bfr.Add(aou ? Bry_a_y : Bry_a_n); break; // case 'partitive': $word .= ( $aou ? 'a' : 'ä' );
case Xol_grammar_.Tid_inessive: bfr.Add(aou ? Bry_ssa_y : Bry_ssa_n); break; // case 'inessive': $word .= ( $aou ? 'ssa' : 'ssä' );
case Xol_grammar_.Tid_illative: bfr.Add_byte(word[word_len - 1]).Add_byte(Byte_ascii.Ltr_n); break;// # Double the last letter and add 'n'
}
return true;
} private static Xol_grammar_manual_regy manual_regy;
private static final byte[] Bry_sta_y = Bry_.new_a7("sta"), Bry_sta_n = Bry_.new_u8("stä"), Bry_a_y = Bry_.new_a7("a"), Bry_a_n = Bry_.new_u8("ä"), Bry_ssa_y = Bry_.new_a7("ssa"), Bry_ssa_n = Bry_.new_u8("ssä");
static final byte Trie_vh_back = 0, Trie_vh_front = 1;
private static Btrie_slim_mgr trie_vh = Btrie_slim_mgr.cs().Add_str_byte__many(Trie_vh_back, "a", "o", "u").Add_str_byte__many(Trie_vh_front, "ä", "ö", "y");
private static final byte[] Bry_wiki = Bry_.new_a7("wiki");
}

View File

@@ -13,37 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import gplx.core.btries.*;
public class Xol_grammar_he implements Xol_grammar {
public boolean Grammar_eval(Bry_bfr bfr, Xol_lang_itm lang, byte[] word, byte[] type) {
// if ( isset( $wgGrammarForms['he'][$case][$word] ) ) return $wgGrammarForms['he'][$case][$word]; // TODO_OLD: implement global $wgGrammarForms; WHEN: need to find he.w entries for DefaultSettings.php
if (hash.Get_as_int_or(type, -1) == Tid__prefixed) {
// Duplicate the "Waw" if prefixed, but not if it is already double.
if ( Bry_.Match(word, 0, 2, Bry__waw__0) // "ו"
&& !Bry_.Match(word, 0, 4, Bry__waw__1) // "וו"
)
word = Bry_.Add(Bry__waw__0, word);
// Remove the "He" article if prefixed
if ( Bry_.Match(word, 0, 2, Bry__he__0)) // "ה"
word = Bry_.Mid(word, 2);
// Add a hyphen (maqaf) before non-Hebrew letters.
if ( Bry_.Match(word, 0, 2, Bry__maqaf__0) // "א"
|| Bry_.Compare(word, 0, 2, Bry__maqaf__1, 0, 2) == CompareAble_.More // "ת"
)
word = Bry_.Add(Bry__maqaf__2, word);
}
bfr.Add(word);
return true;
}
private static final int Tid__prefixed = 1;
private static final Hash_adp_bry hash = Hash_adp_bry.ci_u8(gplx.xowa.langs.cases.Xol_case_mgr_.U8())
.Add_str_int("prefixed" , Tid__prefixed)
.Add_str_int("תחילית" , Tid__prefixed)
;
private static final byte[]
Bry__waw__0 = Bry_.new_u8("ו"), Bry__waw__1 = Bry_.new_u8("וו")
, Bry__he__0 = Bry_.new_u8("ה")
, Bry__maqaf__0 = Bry_.new_u8("א"), Bry__maqaf__1 = Bry_.new_u8("ת"), Bry__maqaf__2 = Bry_.new_u8("־")
;
}

View File

@@ -13,20 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
public class Xol_grammar_manual_regy {
private Hash_adp_bry[] ary = new Hash_adp_bry[Xol_grammar_.Tid__max];
public byte[] Itms_get(byte type_tid, byte[] word) {
Hash_adp_bry hash = ary[type_tid]; if (hash == null) return null;
return (byte[])hash.Get_by_bry(word);
}
public Xol_grammar_manual_regy Itms_add(byte type_tid, String orig, String repl) {
Hash_adp_bry hash = ary[type_tid];
if (hash == null) {
hash = Hash_adp_bry.ci_a7(); // ASCII:currently only being used for Wikiuutiset; DATE:2014-07-07
ary[type_tid] = hash;
}
hash.Add_str_obj(orig, Bry_.new_a7(repl));
return this;
}
}

View File

@@ -13,60 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import gplx.core.btries.*;
public class Xol_grammar_ru implements Xol_grammar {
static final byte Genitive_null = 0, Genitive_bnkn = 1, Genitive_Bnkn = 26, Genitive_b = 3, Genitive_nr = 4, Genitive_ka = 5, Genitive_tn = 6, Genitive_abl = 7, Genitive_hnk = 8;
private final Btrie_rv trv = new Btrie_rv();
private static Btrie_bwd_mgr Genitive_trie;
private static Btrie_bwd_mgr genitive_trie_() {
Btrie_bwd_mgr rv = new Btrie_bwd_mgr(false);
genitive_trie_add(rv, Genitive_bnkn, "вики", null);
genitive_trie_add(rv, Genitive_Bnkn, "Вики", null);
genitive_trie_add(rv, Genitive_b, "ь", "я");
genitive_trie_add(rv, Genitive_nr, "ия", "ии");
genitive_trie_add(rv, Genitive_ka, "ка", "ки");
genitive_trie_add(rv, Genitive_tn, "ти", "тей");
genitive_trie_add(rv, Genitive_abl, "ды", "дов");
genitive_trie_add(rv, Genitive_hnk , "ник", "ника");
return rv;
}
private static void genitive_trie_add(Btrie_bwd_mgr trie, byte tid, String find_str, String repl_str) {
byte[] find_bry = Bry_.new_u8(find_str);
byte[] repl_bry = repl_str == null ? null : Bry_.new_u8(repl_str);
Xol_grammar_ru_genitive_itm itm = new Xol_grammar_ru_genitive_itm(tid, find_bry, repl_bry);
trie.Add(find_bry, itm);
}
public boolean Grammar_eval(Bry_bfr bfr, Xol_lang_itm lang, byte[] word, byte[] type) {
if (Bry_.Len_eq_0(word)) return true; // empty_string returns ""
byte tid = Xol_grammar_.Tid_of_type(type);
switch (tid) {
case Xol_grammar_.Tid_genitive: {
if (Genitive_trie == null) Genitive_trie = genitive_trie_();
Object o = Genitive_trie.Match_at(trv, word, word.length - 1, -1);
if (o != null) {
Xol_grammar_ru_genitive_itm itm = (Xol_grammar_ru_genitive_itm)o;
if (!itm.Repl_is_noop()) {
bfr.Add_mid(word, 0, trv.Pos() + 1);
bfr.Add(itm.Repl());
return true;
}
}
break;
}
case Xol_grammar_.Tid_dative: break;
case Xol_grammar_.Tid_accusative: break;
case Xol_grammar_.Tid_instrumental: break;
case Xol_grammar_.Tid_prepositional:break;
}
bfr.Add(word);
return true;
}
}
class Xol_grammar_ru_genitive_itm {
public Xol_grammar_ru_genitive_itm(byte tid, byte[] find, byte[] repl) {this.tid = tid; this.find = find; this.repl = repl;}
public byte Tid() {return tid;} private byte tid;
public byte[] Find() {return find;} private byte[] find;
public byte[] Repl() {return repl;} private byte[] repl;
public boolean Repl_is_noop() {return repl == null;}
}