1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

'v3.7.2.1'

This commit is contained in:
gnosygnu
2016-07-10 23:35:32 -04:00
parent f5f48bb9b1
commit b333db45f8
366 changed files with 4468 additions and 3460 deletions

View File

@@ -20,60 +20,63 @@ import gplx.core.btries.*;
import gplx.xowa.langs.*; import gplx.xowa.langs.kwds.*;
import gplx.xowa.htmls.tocs.*;
public class Xop_under_lxr implements Xop_lxr {
private final Object thread_lock = new Object();
private Btrie_mgr words_trie_ci, words_trie_cs;
public int Lxr_tid() {return Xop_lxr_.Tid_under;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {}
public void Init_by_lang(Xol_lang_itm lang, Btrie_fast_mgr core_trie) {
Xol_kwd_mgr kwd_mgr = lang.Kwd_mgr();
int under_kwds_len = under_kwds.length;
Xop_under_lxr lxr = new Xop_under_lxr();
lxr.words_trie_cs = Btrie_slim_mgr.cs();
lxr.words_trie_ci = Btrie_u8_mgr.new_(lang.Case_mgr());
core_trie.Add(Xop_under_hook.Key_std, lxr);
boolean hook_alt_null = true;
for (int i = 0; i < under_kwds_len; i++) {
int kwd_id = under_kwds[i];
Xol_kwd_grp kwd_grp = kwd_mgr.Get_or_new(kwd_id);
Xol_kwd_itm[] kwd_itms = kwd_grp.Itms(); if (kwd_itms == null) continue;
int kwd_itms_len = kwd_itms.length;
boolean kwd_case_match = kwd_grp.Case_match();
Btrie_mgr words_trie = kwd_grp.Case_match() ? lxr.words_trie_cs : lxr.words_trie_ci;
for (int j = 0; j < kwd_itms_len; j++) {
Xol_kwd_itm kwd_itm = kwd_itms[j];
byte[] kwd_bry = kwd_itm.Val();
int kwd_len = kwd_bry.length;
Object hook_obj = Hook_trie.Match_bgn(kwd_bry, 0, kwd_len);
if (hook_obj != null) {
Xop_under_hook hook = (Xop_under_hook)hook_obj;
byte[] word_bry = Bry_.Mid(kwd_bry, hook.Key_len(), kwd_bry.length);
words_trie.Add_obj(word_bry, new Xop_under_word(kwd_id, word_bry));
if (hook_alt_null && hook.Tid() == Xop_under_hook.Tid_alt) {
core_trie.Add(Xop_under_hook.Key_alt, lxr);
hook_alt_null = false;
synchronized (thread_lock) { // TS; DATE:2016-07-06
Xol_kwd_mgr kwd_mgr = lang.Kwd_mgr();
int under_kwds_len = under_kwds.length;
Xop_under_lxr lxr = new Xop_under_lxr();
lxr.words_trie_cs = Btrie_slim_mgr.cs();
lxr.words_trie_ci = Btrie_u8_mgr.new_(lang.Case_mgr());
core_trie.Add(Xop_under_hook.Key_std, lxr);
boolean hook_alt_null = true;
for (int i = 0; i < under_kwds_len; i++) {
int kwd_id = under_kwds[i];
Xol_kwd_grp kwd_grp = kwd_mgr.Get_or_new(kwd_id);
Xol_kwd_itm[] kwd_itms = kwd_grp.Itms(); if (kwd_itms == null) continue;
int kwd_itms_len = kwd_itms.length;
boolean kwd_case_match = kwd_grp.Case_match();
Btrie_mgr words_trie = kwd_grp.Case_match() ? lxr.words_trie_cs : lxr.words_trie_ci;
for (int j = 0; j < kwd_itms_len; j++) {
Xol_kwd_itm kwd_itm = kwd_itms[j];
byte[] kwd_bry = kwd_itm.Val();
int kwd_len = kwd_bry.length;
Object hook_obj = Hook_trie.Match_bgn(kwd_bry, 0, kwd_len);
if (hook_obj != null) {
Xop_under_hook hook = (Xop_under_hook)hook_obj;
byte[] word_bry = Bry_.Mid(kwd_bry, hook.Key_len(), kwd_bry.length);
words_trie.Add_obj(word_bry, new Xop_under_word(kwd_id, word_bry));
if (hook_alt_null && hook.Tid() == Xop_under_hook.Tid_alt) {
core_trie.Add(Xop_under_hook.Key_alt, lxr);
hook_alt_null = false;
}
}
}
else { // kwd doesn't start with __; no known examples, but just in case; EX: "NOTOC"; DATE:2014-02-14
Xop_word_lxr word_lxr = new Xop_word_lxr(kwd_id);
if (kwd_case_match) // cs; add word directly to trie
core_trie.Add(kwd_bry, word_lxr);
else { // NOTE: next part is imprecise; XOWA parser is cs, but kwd is ci; for now, just add all upper and all lower
Gfo_usr_dlg_.Instance.Warn_many("", "", "under keyword does not start with __; id=~{0} key=~{1} word=~{2}", kwd_id, String_.new_u8(kwd_grp.Key()), String_.new_u8(kwd_bry));
core_trie.Add(lang.Case_mgr().Case_build_lower(kwd_bry), word_lxr);
core_trie.Add(lang.Case_mgr().Case_build_upper(kwd_bry), word_lxr);
else { // kwd doesn't start with __; no known examples, but just in case; EX: "NOTOC"; DATE:2014-02-14
Xop_word_lxr word_lxr = new Xop_word_lxr(kwd_id);
if (kwd_case_match) // cs; add word directly to trie
core_trie.Add(kwd_bry, word_lxr);
else { // NOTE: next part is imprecise; XOWA parser is cs, but kwd is ci; for now, just add all upper and all lower
Gfo_usr_dlg_.Instance.Warn_many("", "", "under keyword does not start with __; id=~{0} key=~{1} word=~{2}", kwd_id, String_.new_u8(kwd_grp.Key()), String_.new_u8(kwd_bry));
core_trie.Add(lang.Case_mgr().Case_build_lower(kwd_bry), word_lxr);
core_trie.Add(lang.Case_mgr().Case_build_upper(kwd_bry), word_lxr);
}
}
}
}
}
}
public void Term(Btrie_fast_mgr core_trie) {}
private static final int[] under_kwds = new int[] // REF.MW:MagicWord.php
private static final int[] under_kwds = new int[] // REF.MW:MagicWord.php
{ Xol_kwd_grp_.Id_toc, Xol_kwd_grp_.Id_notoc, Xol_kwd_grp_.Id_forcetoc
, Xol_kwd_grp_.Id_nogallery, Xol_kwd_grp_.Id_noheader, Xol_kwd_grp_.Id_noeditsection
, Xol_kwd_grp_.Id_notitleconvert, Xol_kwd_grp_.Id_nocontentconvert, Xol_kwd_grp_.Id_newsectionlink, Xol_kwd_grp_.Id_nonewsectionlink
, Xol_kwd_grp_.Id_hiddencat, Xol_kwd_grp_.Id_index, Xol_kwd_grp_.Id_noindex, Xol_kwd_grp_.Id_staticredirect
, Xol_kwd_grp_.Id_disambig
};
private static final Btrie_fast_mgr Hook_trie = Btrie_fast_mgr.cs()
private static final Btrie_fast_mgr Hook_trie = Btrie_fast_mgr.cs()
.Add(Xop_under_hook.Key_std, Xop_under_hook.Itm_std)
.Add(Xop_under_hook.Key_alt, Xop_under_hook.Itm_alt)
;
@@ -111,7 +114,7 @@ public class Xop_under_lxr implements Xop_lxr {
default: break; // ignore anything else
}
}
public static final Xop_under_lxr Instance = new Xop_under_lxr(); Xop_under_lxr() {}
public static final Xop_under_lxr Instance = new Xop_under_lxr(); Xop_under_lxr() {}
}
class Xop_word_lxr implements Xop_lxr {
private int kwd_id;
@@ -131,8 +134,8 @@ class Xop_under_hook {
public byte[] Key() {return key;} private byte[] key;
public int Key_len() {return key_len;} private int key_len;
public static final byte Tid_std = 1, Tid_alt = 2;
public static final byte[] Key_std = new byte[] {Byte_ascii.Underline, Byte_ascii.Underline}, Key_alt = Bry_.new_u8("__"); // ja wikis
public static final Xop_under_hook
public static final byte[] Key_std = new byte[] {Byte_ascii.Underline, Byte_ascii.Underline}, Key_alt = Bry_.new_u8("__"); // ja wikis
public static final Xop_under_hook
Itm_std = new Xop_under_hook(Tid_std, Key_std)
, Itm_alt = new Xop_under_hook(Tid_alt, Key_alt)
;

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.miscs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*; import gplx.xowa.langs.*; import gplx.xowa.langs.kwds.*;
public class Xop_under_lxr_tst {
private final Xop_fxt fxt = new Xop_fxt();
private final Xop_fxt fxt = new Xop_fxt();
@Before public void init() {fxt.Reset();}
@After public void term() {fxt.Init_para_n_();}
@Test public void Toc_basic() {
@@ -98,7 +98,7 @@ public class Xop_under_lxr_tst {
fxt.Test_parse_page_all_str("__DISAMBIG__", "");
}
@Test public void Nocontentconvert() { // simple test; test for flag only; DATE:2014-02-06
gplx.xowa.wikis.pages.Xopg_html_data html_data = fxt.Page().Html_data();
gplx.xowa.wikis.pages.htmls.Xopg_html_data html_data = fxt.Page().Html_data();
Tfds.Eq(html_data.Lang_convert_content(), true);
Tfds.Eq(html_data.Lang_convert_title(), true);
fxt.Test_parse_page_all_str("__NOCONTENTCONVERT__ __NOTITLECONVERT__", " ");