mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.7.2.1
This commit is contained in:
21
400_xowa/src/gplx/xowa/langs/grammars/Xol_grammar.java
Normal file
21
400_xowa/src/gplx/xowa/langs/grammars/Xol_grammar.java
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
public interface Xol_grammar {
|
||||
boolean Grammar_eval(Bry_bfr bfr, Xol_lang lang, byte[] word, byte[] type);
|
||||
}
|
||||
55
400_xowa/src/gplx/xowa/langs/grammars/Xol_grammar_.java
Normal file
55
400_xowa/src/gplx/xowa/langs/grammars/Xol_grammar_.java
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*;
|
||||
public class Xol_grammar_ {
|
||||
public static final byte Tid__max = 9;
|
||||
public static final byte Tid_genitive = 0, Tid_elative = 1, Tid_partitive = 2, Tid_illative = 3, Tid_inessive = 4, Tid_accusative = 5, Tid_instrumental = 6, Tid_prepositional = 7, Tid_dative = 8, Tid_unknown = Byte_.Max_value_127;
|
||||
private static final Btrie_slim_mgr Tid_trie = Btrie_slim_mgr.ci_ascii_() // NOTE:ci.ascii:MW kwds
|
||||
.Add_str_byte("genitive", Tid_genitive)
|
||||
.Add_str_byte("elative", Tid_elative)
|
||||
.Add_str_byte("partitive", Tid_partitive)
|
||||
.Add_str_byte("illative", Tid_illative)
|
||||
.Add_str_byte("inessive", Tid_inessive)
|
||||
.Add_str_byte("accusative", Tid_accusative)
|
||||
.Add_str_byte("instrumental", Tid_instrumental)
|
||||
.Add_str_byte("prepositional", Tid_prepositional)
|
||||
.Add_str_byte("dative", Tid_dative)
|
||||
;
|
||||
public static byte Tid_of_type(byte[] v) {
|
||||
if (Bry_.Len_eq_0(v)) return Tid_unknown;
|
||||
Object o = Xol_grammar_.Tid_trie.Match_exact(v, 0, v.length);
|
||||
return o == null ? Tid_unknown : ((Byte_obj_val)o).Val();
|
||||
}
|
||||
public static Xol_grammar new_by_lang_id(int lang_id) {
|
||||
switch (lang_id) {
|
||||
case Xol_lang_itm_.Id_fi: return new Xol_grammar_fi();
|
||||
case Xol_lang_itm_.Id_ru: return new Xol_grammar_ru();
|
||||
case Xol_lang_itm_.Id_pl: return Xol_grammar__noop._;
|
||||
default: return Xol_grammar__unimplemented._;
|
||||
}
|
||||
}
|
||||
}
|
||||
class Xol_grammar__unimplemented implements Xol_grammar {
|
||||
public boolean Grammar_eval(Bry_bfr bfr, Xol_lang lang, byte[] word, byte[] type) {return false;}
|
||||
public static final Xol_grammar__unimplemented _ = new Xol_grammar__unimplemented(); Xol_grammar__unimplemented() {}
|
||||
}
|
||||
class Xol_grammar__noop implements Xol_grammar {
|
||||
public boolean Grammar_eval(Bry_bfr bfr, Xol_lang lang, byte[] word, byte[] type) {bfr.Add(word); return true;}
|
||||
public static final Xol_grammar__noop _ = new Xol_grammar__noop(); Xol_grammar__noop() {}
|
||||
}
|
||||
80
400_xowa/src/gplx/xowa/langs/grammars/Xol_grammar_fi.java
Normal file
80
400_xowa/src/gplx/xowa/langs/grammars/Xol_grammar_fi.java
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*;
|
||||
public class Xol_grammar_fi implements Xol_grammar {
|
||||
public boolean Vowel_harmony(byte[] word, int word_len) {
|
||||
// $aou = preg_match( '/[aou][^äöy]*$/i', $word );
|
||||
boolean aou_found = false;
|
||||
for (int i = 0; i < word_len; i++) {
|
||||
byte b = word[i];
|
||||
Object o = trie_vh.Match_bgn_w_byte(b, word, i, word_len);
|
||||
if (o != null) {
|
||||
byte vh_type = ((Byte_obj_val)o).Val();
|
||||
if (vh_type == Trie_vh_back)
|
||||
aou_found = true;
|
||||
else
|
||||
aou_found = false;
|
||||
}
|
||||
}
|
||||
return aou_found;
|
||||
}
|
||||
public boolean Grammar_eval(Bry_bfr bfr, Xol_lang lang, byte[] word, byte[] type) {
|
||||
if (Bry_.Len_eq_0(word)) return true; // empty_string returns ""
|
||||
byte tid = Xol_grammar_.Tid_of_type(type);
|
||||
if (tid == Xol_grammar_.Tid_unknown) {bfr.Add(word); return true;} // unknown type returns word
|
||||
// PHP: if (isset($wgGrammarForms['fi'][$case][$word])){ return $wgGrammarForms['fi'][$case][$word];
|
||||
if (manual_regy == null) {
|
||||
manual_regy = new Xol_grammar_manual_regy()
|
||||
.Itms_add(Xol_grammar_.Tid_elative, "Wikiuutiset", "Wikiuutisista");
|
||||
}
|
||||
byte[] manual_repl = manual_regy.Itms_get(tid, word);
|
||||
if (manual_repl != null) {
|
||||
bfr.Add(manual_repl);
|
||||
return true;
|
||||
}
|
||||
bfr.Add(word); // NOTE: preemptively add word now; the rest of this function takes "word" and adds other letters to it;
|
||||
int word_len = word.length;
|
||||
byte[] lower = lang.Case_mgr().Case_build_lower(word, 0, word_len);
|
||||
boolean aou = Vowel_harmony(lower, word_len);
|
||||
// PHP: if ( preg_match( '/wiki$/i', $word ) ) $aou = false;
|
||||
if (aou && Bry_.Has_at_end(lower, Xoa_url_parser.Bry_wiki_name))
|
||||
aou = false;
|
||||
// PHP: if ( preg_match( '/[bcdfghjklmnpqrstvwxz]$/i', $word ) ) $word .= 'i';
|
||||
switch (lower[word_len - 1]) {
|
||||
case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g:
|
||||
case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_j: case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m:
|
||||
case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s:
|
||||
case Byte_ascii.Ltr_t: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_z:
|
||||
bfr.Add_byte(Byte_ascii.Ltr_i);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (tid) {
|
||||
case Xol_grammar_.Tid_genitive: bfr.Add_byte(Byte_ascii.Ltr_n); break; // case 'genitive': $word .= 'n';
|
||||
case Xol_grammar_.Tid_elative: bfr.Add(aou ? Bry_sta_y : Bry_sta_n); break; // case 'elative': $word .= ( $aou ? 'sta' : 'stä' );
|
||||
case Xol_grammar_.Tid_partitive: bfr.Add(aou ? Bry_a_y : Bry_a_n); break; // case 'partitive': $word .= ( $aou ? 'a' : 'ä' );
|
||||
case Xol_grammar_.Tid_inessive: bfr.Add(aou ? Bry_ssa_y : Bry_ssa_n); break; // case 'inessive': $word .= ( $aou ? 'ssa' : 'ssä' );
|
||||
case Xol_grammar_.Tid_illative: bfr.Add_byte(word[word_len - 1]).Add_byte(Byte_ascii.Ltr_n); break;// # Double the last letter and add 'n'
|
||||
}
|
||||
return true;
|
||||
} static Xol_grammar_manual_regy manual_regy;
|
||||
private static final byte[] Bry_sta_y = Bry_.new_a7("sta"), Bry_sta_n = Bry_.new_u8("stä"), Bry_a_y = Bry_.new_a7("a"), Bry_a_n = Bry_.new_u8("ä"), Bry_ssa_y = Bry_.new_a7("ssa"), Bry_ssa_n = Bry_.new_u8("ssä");
|
||||
static final byte Trie_vh_back = 0, Trie_vh_front = 1;
|
||||
private static Btrie_slim_mgr trie_vh = Btrie_slim_mgr.cs_().Add_str_byte__many(Trie_vh_back, "a", "o", "u").Add_str_byte__many(Trie_vh_front, "ä", "ö", "y");
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
public class Xol_grammar_manual_regy {
|
||||
private Hash_adp_bry[] ary = new Hash_adp_bry[Xol_grammar_.Tid__max];
|
||||
public byte[] Itms_get(byte type_tid, byte[] word) {
|
||||
Hash_adp_bry hash = ary[type_tid]; if (hash == null) return null;
|
||||
return (byte[])hash.Get_by_bry(word);
|
||||
}
|
||||
public Xol_grammar_manual_regy Itms_add(byte type_tid, String orig, String repl) {
|
||||
Hash_adp_bry hash = ary[type_tid];
|
||||
if (hash == null) {
|
||||
hash = Hash_adp_bry.ci_ascii_(); // ASCII:currently only being used for Wikiuutiset; DATE:2014-07-07
|
||||
ary[type_tid] = hash;
|
||||
}
|
||||
hash.Add_str_obj(orig, Bry_.new_a7(repl));
|
||||
return this;
|
||||
}
|
||||
}
|
||||
73
400_xowa/src/gplx/xowa/langs/grammars/Xol_grammar_ru.java
Normal file
73
400_xowa/src/gplx/xowa/langs/grammars/Xol_grammar_ru.java
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.langs.grammars; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xol_grammar_ru implements Xol_grammar {
|
||||
static final byte Genitive_null = 0, Genitive_bnkn = 1, Genitive_Bnkn = 26, Genitive_b = 3, Genitive_nr = 4, Genitive_ka = 5, Genitive_tn = 6, Genitive_abl = 7, Genitive_hnk = 8;
|
||||
private static Btrie_bwd_mgr Genitive_trie;
|
||||
private static Btrie_bwd_mgr genitive_trie_() {
|
||||
Btrie_bwd_mgr rv = new Btrie_bwd_mgr(false);
|
||||
genitive_trie_add(rv, Genitive_bnkn, "вики", null);
|
||||
genitive_trie_add(rv, Genitive_Bnkn, "Вики", null);
|
||||
genitive_trie_add(rv, Genitive_b, "ь", "я");
|
||||
genitive_trie_add(rv, Genitive_nr, "ия", "ии");
|
||||
genitive_trie_add(rv, Genitive_ka, "ка", "ки");
|
||||
genitive_trie_add(rv, Genitive_tn, "ти", "тей");
|
||||
genitive_trie_add(rv, Genitive_abl, "ды", "дов");
|
||||
genitive_trie_add(rv, Genitive_hnk , "ник", "ника");
|
||||
return rv;
|
||||
}
|
||||
private static void genitive_trie_add(Btrie_bwd_mgr trie, byte tid, String find_str, String repl_str) {
|
||||
byte[] find_bry = Bry_.new_u8(find_str);
|
||||
byte[] repl_bry = repl_str == null ? null : Bry_.new_u8(repl_str);
|
||||
Xol_grammar_ru_genitive_itm itm = new Xol_grammar_ru_genitive_itm(tid, find_bry, repl_bry);
|
||||
trie.Add(find_bry, itm);
|
||||
}
|
||||
public boolean Grammar_eval(Bry_bfr bfr, Xol_lang lang, byte[] word, byte[] type) {
|
||||
if (Bry_.Len_eq_0(word)) return true; // empty_string returns ""
|
||||
byte tid = Xol_grammar_.Tid_of_type(type);
|
||||
switch (tid) {
|
||||
case Xol_grammar_.Tid_genitive: {
|
||||
if (Genitive_trie == null) Genitive_trie = genitive_trie_();
|
||||
Object o = Genitive_trie.Match_bgn(word, word.length - 1, -1);
|
||||
if (o != null) {
|
||||
Xol_grammar_ru_genitive_itm itm = (Xol_grammar_ru_genitive_itm)o;
|
||||
if (!itm.Repl_is_noop()) {
|
||||
bfr.Add_mid(word, 0, Genitive_trie.Match_pos() + 1);
|
||||
bfr.Add(itm.Repl());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Xol_grammar_.Tid_dative: break;
|
||||
case Xol_grammar_.Tid_accusative: break;
|
||||
case Xol_grammar_.Tid_instrumental: break;
|
||||
case Xol_grammar_.Tid_prepositional:break;
|
||||
}
|
||||
bfr.Add(word);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
class Xol_grammar_ru_genitive_itm {
|
||||
public Xol_grammar_ru_genitive_itm(byte tid, byte[] find, byte[] repl) {this.tid = tid; this.find = find; this.repl = repl;}
|
||||
public byte Tid() {return tid;} private byte tid;
|
||||
public byte[] Find() {return find;} private byte[] find;
|
||||
public byte[] Repl() {return repl;} private byte[] repl;
|
||||
public boolean Repl_is_noop() {return repl == null;}
|
||||
}
|
||||
Reference in New Issue
Block a user