1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-07-12 21:10:02 -04:00
commit 794b5a232f
3099 changed files with 238212 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
public interface Xol_grammar {
boolean Grammar_eval(Bry_bfr bfr, Xol_lang lang, byte[] word, byte[] type);
}

View File

@@ -0,0 +1,55 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import gplx.core.primitives.*; import gplx.core.btries.*;
public class Xol_grammar_ {
public static final byte Tid__max = 9;
public static final byte Tid_genitive = 0, Tid_elative = 1, Tid_partitive = 2, Tid_illative = 3, Tid_inessive = 4, Tid_accusative = 5, Tid_instrumental = 6, Tid_prepositional = 7, Tid_dative = 8, Tid_unknown = Byte_.Max_value_127;
private static final Btrie_slim_mgr Tid_trie = Btrie_slim_mgr.ci_ascii_() // NOTE:ci.ascii:MW kwds
.Add_str_byte("genitive", Tid_genitive)
.Add_str_byte("elative", Tid_elative)
.Add_str_byte("partitive", Tid_partitive)
.Add_str_byte("illative", Tid_illative)
.Add_str_byte("inessive", Tid_inessive)
.Add_str_byte("accusative", Tid_accusative)
.Add_str_byte("instrumental", Tid_instrumental)
.Add_str_byte("prepositional", Tid_prepositional)
.Add_str_byte("dative", Tid_dative)
;
public static byte Tid_of_type(byte[] v) {
if (Bry_.Len_eq_0(v)) return Tid_unknown;
Object o = Xol_grammar_.Tid_trie.Match_exact(v, 0, v.length);
return o == null ? Tid_unknown : ((Byte_obj_val)o).Val();
}
public static Xol_grammar new_by_lang_id(int lang_id) {
switch (lang_id) {
case Xol_lang_itm_.Id_fi: return new Xol_grammar_fi();
case Xol_lang_itm_.Id_ru: return new Xol_grammar_ru();
case Xol_lang_itm_.Id_pl: return Xol_grammar__noop._;
default: return Xol_grammar__unimplemented._;
}
}
}
class Xol_grammar__unimplemented implements Xol_grammar {
public boolean Grammar_eval(Bry_bfr bfr, Xol_lang lang, byte[] word, byte[] type) {return false;}
public static final Xol_grammar__unimplemented _ = new Xol_grammar__unimplemented(); Xol_grammar__unimplemented() {}
}
class Xol_grammar__noop implements Xol_grammar {
public boolean Grammar_eval(Bry_bfr bfr, Xol_lang lang, byte[] word, byte[] type) {bfr.Add(word); return true;}
public static final Xol_grammar__noop _ = new Xol_grammar__noop(); Xol_grammar__noop() {}
}

View File

@@ -0,0 +1,80 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import gplx.core.primitives.*; import gplx.core.btries.*;
public class Xol_grammar_fi implements Xol_grammar {
public boolean Vowel_harmony(byte[] word, int word_len) {
// $aou = preg_match( '/[aou][^äöy]*$/i', $word );
boolean aou_found = false;
for (int i = 0; i < word_len; i++) {
byte b = word[i];
Object o = trie_vh.Match_bgn_w_byte(b, word, i, word_len);
if (o != null) {
byte vh_type = ((Byte_obj_val)o).Val();
if (vh_type == Trie_vh_back)
aou_found = true;
else
aou_found = false;
}
}
return aou_found;
}
public boolean Grammar_eval(Bry_bfr bfr, Xol_lang lang, byte[] word, byte[] type) {
if (Bry_.Len_eq_0(word)) return true; // empty_string returns ""
byte tid = Xol_grammar_.Tid_of_type(type);
if (tid == Xol_grammar_.Tid_unknown) {bfr.Add(word); return true;} // unknown type returns word
// PHP: if (isset($wgGrammarForms['fi'][$case][$word])){ return $wgGrammarForms['fi'][$case][$word];
if (manual_regy == null) {
manual_regy = new Xol_grammar_manual_regy()
.Itms_add(Xol_grammar_.Tid_elative, "Wikiuutiset", "Wikiuutisista");
}
byte[] manual_repl = manual_regy.Itms_get(tid, word);
if (manual_repl != null) {
bfr.Add(manual_repl);
return true;
}
bfr.Add(word); // NOTE: preemptively add word now; the rest of this function takes "word" and adds other letters to it;
int word_len = word.length;
byte[] lower = lang.Case_mgr().Case_build_lower(word, 0, word_len);
boolean aou = Vowel_harmony(lower, word_len);
// PHP: if ( preg_match( '/wiki$/i', $word ) ) $aou = false;
if (aou && Bry_.Has_at_end(lower, Xoa_url_parser.Bry_wiki_name))
aou = false;
// PHP: if ( preg_match( '/[bcdfghjklmnpqrstvwxz]$/i', $word ) ) $word .= 'i';
switch (lower[word_len - 1]) {
case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g:
case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_j: case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m:
case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s:
case Byte_ascii.Ltr_t: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_z:
bfr.Add_byte(Byte_ascii.Ltr_i);
break;
}
switch (tid) {
case Xol_grammar_.Tid_genitive: bfr.Add_byte(Byte_ascii.Ltr_n); break; // case 'genitive': $word .= 'n';
case Xol_grammar_.Tid_elative: bfr.Add(aou ? Bry_sta_y : Bry_sta_n); break; // case 'elative': $word .= ( $aou ? 'sta' : 'stä' );
case Xol_grammar_.Tid_partitive: bfr.Add(aou ? Bry_a_y : Bry_a_n); break; // case 'partitive': $word .= ( $aou ? 'a' : 'ä' );
case Xol_grammar_.Tid_inessive: bfr.Add(aou ? Bry_ssa_y : Bry_ssa_n); break; // case 'inessive': $word .= ( $aou ? 'ssa' : 'ssä' );
case Xol_grammar_.Tid_illative: bfr.Add_byte(word[word_len - 1]).Add_byte(Byte_ascii.Ltr_n); break;// # Double the last letter and add 'n'
}
return true;
} static Xol_grammar_manual_regy manual_regy;
private static final byte[] Bry_sta_y = Bry_.new_a7("sta"), Bry_sta_n = Bry_.new_u8("stä"), Bry_a_y = Bry_.new_a7("a"), Bry_a_n = Bry_.new_u8("ä"), Bry_ssa_y = Bry_.new_a7("ssa"), Bry_ssa_n = Bry_.new_u8("ssä");
static final byte Trie_vh_back = 0, Trie_vh_front = 1;
private static Btrie_slim_mgr trie_vh = Btrie_slim_mgr.cs_().Add_str_byte__many(Trie_vh_back, "a", "o", "u").Add_str_byte__many(Trie_vh_front, "ä", "ö", "y");
}

View File

@@ -0,0 +1,34 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
public class Xol_grammar_manual_regy {
private Hash_adp_bry[] ary = new Hash_adp_bry[Xol_grammar_.Tid__max];
public byte[] Itms_get(byte type_tid, byte[] word) {
Hash_adp_bry hash = ary[type_tid]; if (hash == null) return null;
return (byte[])hash.Get_by_bry(word);
}
public Xol_grammar_manual_regy Itms_add(byte type_tid, String orig, String repl) {
Hash_adp_bry hash = ary[type_tid];
if (hash == null) {
hash = Hash_adp_bry.ci_ascii_(); // ASCII:currently only being used for Wikiuutiset; DATE:2014-07-07
ary[type_tid] = hash;
}
hash.Add_str_obj(orig, Bry_.new_a7(repl));
return this;
}
}

View File

@@ -0,0 +1,73 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
import gplx.core.btries.*;
public class Xol_grammar_ru implements Xol_grammar {
static final byte Genitive_null = 0, Genitive_bnkn = 1, Genitive_Bnkn = 26, Genitive_b = 3, Genitive_nr = 4, Genitive_ka = 5, Genitive_tn = 6, Genitive_abl = 7, Genitive_hnk = 8;
private static Btrie_bwd_mgr Genitive_trie;
private static Btrie_bwd_mgr genitive_trie_() {
Btrie_bwd_mgr rv = new Btrie_bwd_mgr(false);
genitive_trie_add(rv, Genitive_bnkn, "вики", null);
genitive_trie_add(rv, Genitive_Bnkn, "Вики", null);
genitive_trie_add(rv, Genitive_b, "ь", "я");
genitive_trie_add(rv, Genitive_nr, "ия", "ии");
genitive_trie_add(rv, Genitive_ka, "ка", "ки");
genitive_trie_add(rv, Genitive_tn, "ти", "тей");
genitive_trie_add(rv, Genitive_abl, "ды", "дов");
genitive_trie_add(rv, Genitive_hnk , "ник", "ника");
return rv;
}
private static void genitive_trie_add(Btrie_bwd_mgr trie, byte tid, String find_str, String repl_str) {
byte[] find_bry = Bry_.new_u8(find_str);
byte[] repl_bry = repl_str == null ? null : Bry_.new_u8(repl_str);
Xol_grammar_ru_genitive_itm itm = new Xol_grammar_ru_genitive_itm(tid, find_bry, repl_bry);
trie.Add(find_bry, itm);
}
public boolean Grammar_eval(Bry_bfr bfr, Xol_lang lang, byte[] word, byte[] type) {
if (Bry_.Len_eq_0(word)) return true; // empty_string returns ""
byte tid = Xol_grammar_.Tid_of_type(type);
switch (tid) {
case Xol_grammar_.Tid_genitive: {
if (Genitive_trie == null) Genitive_trie = genitive_trie_();
Object o = Genitive_trie.Match_bgn(word, word.length - 1, -1);
if (o != null) {
Xol_grammar_ru_genitive_itm itm = (Xol_grammar_ru_genitive_itm)o;
if (!itm.Repl_is_noop()) {
bfr.Add_mid(word, 0, Genitive_trie.Match_pos() + 1);
bfr.Add(itm.Repl());
return true;
}
}
break;
}
case Xol_grammar_.Tid_dative: break;
case Xol_grammar_.Tid_accusative: break;
case Xol_grammar_.Tid_instrumental: break;
case Xol_grammar_.Tid_prepositional:break;
}
bfr.Add(word);
return true;
}
}
class Xol_grammar_ru_genitive_itm {
public Xol_grammar_ru_genitive_itm(byte tid, byte[] find, byte[] repl) {this.tid = tid; this.find = find; this.repl = repl;}
public byte Tid() {return tid;} private byte tid;
public byte[] Find() {return find;} private byte[] find;
public byte[] Repl() {return repl;} private byte[] repl;
public boolean Repl_is_noop() {return repl == null;}
}