mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v1.9.4.1
This commit is contained in:
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.wikis.xwikis.*; import gplx.xowa.parsers.amps.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.wikis.xwikis.*; import gplx.xowa.parsers.amps.*; import gplx.xowa.langs.cases.*;
|
||||
public class Xoa_ttl { // PAGE:en.w:http://en.wikipedia.org/wiki/Help:Link; REF.MW: Ttl.php|secureAndSplit;
|
||||
public Xow_ns Ns() {return ns;} private Xow_ns ns;
|
||||
public boolean ForceLiteralLink() {return forceLiteralLink;} private boolean forceLiteralLink;
|
||||
@@ -90,25 +90,30 @@ public class Xoa_ttl { // PAGE:en.w:http://en.wikipedia.org/wiki/Help:Link; REF.
|
||||
public static Xoa_ttl parse_(Xow_wiki wiki, int ns_id, byte[] ttl) {
|
||||
Xow_ns ns = wiki.Ns_mgr().Ids_get_or_null(ns_id);
|
||||
byte[] raw = Bry_.Add(ns.Name_db_w_colon(), ttl);
|
||||
return new_(wiki, wiki.App().Msg_log(), raw, raw, 0, raw.length);
|
||||
return new_(wiki, wiki.App().Msg_log(), raw, 0, raw.length);
|
||||
}
|
||||
public static Xoa_ttl parse_(Xow_wiki wiki, byte[] raw) {return new_(wiki, wiki.App().Msg_log(), raw, raw, 0, raw.length);}
|
||||
public static Xoa_ttl parse_(Xow_wiki wiki, byte[] raw) {return new_(wiki, wiki.App().Msg_log(), raw, 0, raw.length);}
|
||||
private static final Object thread_lock = new Object();
|
||||
public static Xoa_ttl new_(Xow_wiki wiki, Gfo_msg_log msg_log, byte[] raw, byte[] src, int bgn, int end) {
|
||||
public static Xoa_ttl new_(Xow_wiki wiki, Gfo_msg_log msg_log, byte[] src, int bgn, int end) {
|
||||
Xoa_app app = wiki.App();
|
||||
Bry_bfr_mkr bry_mkr = app.Utl_bry_bfr_mkr();
|
||||
return parse(bry_mkr, app.Parser_amp_mgr(), wiki.Lang().Case_mgr(), wiki.Xwiki_mgr(), wiki.Ns_mgr(), msg_log, src, bgn, end);
|
||||
} private Xoa_ttl() {}
|
||||
public static Xoa_ttl parse(Bry_bfr_mkr bry_mkr, Xop_amp_mgr amp_mgr, Xol_case_mgr case_mgr, Xow_xwiki_mgr xwiki_mgr, Xow_ns_mgr ns_mgr, Gfo_msg_log msg_log, byte[] src, int bgn, int end) {
|
||||
synchronized (thread_lock) {
|
||||
Xoa_ttl rv = new Xoa_ttl();
|
||||
Bry_bfr bfr = wiki.Utl_bry_bfr_mkr().Get_b512();
|
||||
Bry_bfr bfr = bry_mkr.Get_b512();
|
||||
try {
|
||||
boolean pass = rv.Parse(bfr, wiki, msg_log, raw, src, bgn, end);
|
||||
boolean pass = rv.Parse(bfr, bry_mkr, amp_mgr, case_mgr, xwiki_mgr, ns_mgr, msg_log, src, bgn, end);
|
||||
return pass ? rv : null;
|
||||
}
|
||||
finally {bfr.Mkr_rls();}
|
||||
}
|
||||
} private Xoa_ttl() {}
|
||||
private boolean Parse(Bry_bfr bfr, Xow_wiki wiki, Gfo_msg_log msg_log, byte[] raw, byte[] src, int bgn, int end) {
|
||||
}
|
||||
private boolean Parse(Bry_bfr bfr, Bry_bfr_mkr bry_mkr, Xop_amp_mgr amp_mgr, Xol_case_mgr case_mgr, Xow_xwiki_mgr xwiki_mgr, Xow_ns_mgr ns_mgr, Gfo_msg_log msg_log, byte[] src, int bgn, int end) {
|
||||
/* This proc will
|
||||
- identify all parts: Wiki, Namespace, Base/Leaf, Anchor; it will also identify Subject/Talk ns
|
||||
- trim whitespace around part delimiters; EX: "Help : Test" --> "Help:Test"; note that it will trim only if the part is real; EX: "Helpx : Test" is unchanged
|
||||
- trim whitespace around part delimiters; EX: "Help : Test" --> "Help:Test"; note that it will trim only if the ns part is real; EX: "Helpx : Test" is unchanged
|
||||
- replace multiple whitespaces with 1; EX: "Many ws" --> "Many ws"
|
||||
- capitalize the first letter of the page title
|
||||
note: a byte[] is needed b/c proc does collapsing and casing
|
||||
@@ -125,13 +130,11 @@ public class Xoa_ttl { // PAGE:en.w:http://en.wikipedia.org/wiki/Help:Link; REF.
|
||||
Bry_bfr anchor_encoder_bfr = null;
|
||||
bfr.Clear();
|
||||
if (end - bgn == 0) {msg_log.Add_itm_none(Xop_ttl_log.Len_0, src, bgn, bgn); return false;}
|
||||
this.raw = raw;
|
||||
Xow_ns_mgr ns_mgr = wiki.Ns_mgr(); ns = ns_mgr.Ns_main();
|
||||
this.raw = src;
|
||||
ns = ns_mgr.Ns_main();
|
||||
boolean add_ws = false, ltr_bgn_reset = false;
|
||||
int ltr_bgn = -1, txt_bb_len = 0, colon_count = 0; bfr.Clear();
|
||||
Xop_amp_mgr amp_mgr = wiki.App().Parser_amp_mgr();
|
||||
Btrie_slim_mgr amp_trie = amp_mgr.Amp_trie();
|
||||
//Btrie_fast_mgr ttlTrie = wiki.App().TtlTrie();
|
||||
Btrie_slim_mgr amp_trie = amp_mgr.Amp_trie();
|
||||
byte[] b_ary = null;
|
||||
int cur = bgn;
|
||||
int match_pos = -1;
|
||||
@@ -155,7 +158,7 @@ public class Xoa_ttl { // PAGE:en.w:http://en.wikipedia.org/wiki/Help:Link; REF.
|
||||
if (colon_count == 0) {// 1st colon;
|
||||
Object o = ns_mgr.Names_get_or_null(bfr.Bfr(), ltr_bgn, txt_bb_len);
|
||||
if (o == null) { // not ns; try alias
|
||||
wik_itm = wiki.Xwiki_mgr().Get_by_mid(bfr.Bfr(), ltr_bgn, txt_bb_len); // check if wiki; note: wiki is not possible for other colons
|
||||
wik_itm = xwiki_mgr.Get_by_mid(bfr.Bfr(), ltr_bgn, txt_bb_len); // check if wiki; note: wiki is not possible for other colons
|
||||
if (wik_itm != null) {
|
||||
wik_bgn = 0; // wik_bgn can only start at 0
|
||||
part_found = true;
|
||||
@@ -271,24 +274,7 @@ public class Xoa_ttl { // PAGE:en.w:http://en.wikipedia.org/wiki/Help:Link; REF.
|
||||
match_pos = cur + 1;
|
||||
}
|
||||
break;
|
||||
//case Byte_ascii.Bang: case Byte_ascii.Quote: case Byte_ascii.Dollar: case Byte_ascii.Percent: case Byte_ascii.Apos: case Byte_ascii.Paren_bgn:
|
||||
//case Byte_ascii.Paren_end: case Byte_ascii.Asterisk: case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot:
|
||||
//case Byte_ascii.Semic: case Byte_ascii.Eq: case Byte_ascii.Question: case Byte_ascii.At:
|
||||
//case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
//case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
//case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
//case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
//case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
//case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
//case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
//case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
//case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
//case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
//case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
//case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
//case Byte_ascii.Backslash: case Byte_ascii.Pow: case Byte_ascii.Tick: case Byte_ascii.Tilde:
|
||||
//case Byte_ascii.Plus: // NOTE: not in wgLegalTitleChars, but appears in MW titles; EX: [[File:ASLSJSW Aas+.PNG]]
|
||||
// NOTE: DefaultSettings.php defines wgLegalTitleChars as " %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF+"; the characters above are okay; those below are not
|
||||
// NOTE: DefaultSettings.php defines wgLegalTitleChars as " %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF+"; the characters above are okay; those below are not
|
||||
case Byte_ascii.Gt: case Byte_ascii.Pipe:
|
||||
case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end: case Byte_ascii.Curly_bgn: case Byte_ascii.Curly_end:
|
||||
if (anch_bgn != -1) {
|
||||
@@ -322,93 +308,8 @@ public class Xoa_ttl { // PAGE:en.w:http://en.wikipedia.org/wiki/Help:Link; REF.
|
||||
break;
|
||||
}
|
||||
++cur;
|
||||
// Object itm_obj = ttlTrie.Match(b, src, cur, end);
|
||||
// if (itm_obj == null) ++cur;
|
||||
// else {
|
||||
// Byte_obj_val itm = (Byte_obj_val)itm_obj;
|
||||
// switch (itm.Val()) {
|
||||
// case Xoa_ttl_trie.Id_invalid:
|
||||
// msg_log.Add_itm_none(src, bgn, end, Xop_ttl_log.Invalid_char);
|
||||
// return false;
|
||||
// case Xoa_ttl_trie.Id_colon:
|
||||
// if (cur == bgn) { // initial colon; flag; note that " :" is not handled; note that colon_count is not incremented
|
||||
// forceLiteralLink = true;
|
||||
// cur = ttlTrie.Match_pos();
|
||||
// continue; // do not add to bfr
|
||||
// }
|
||||
// else {
|
||||
// if (ltr_bgn == -1) {// no ltrs seen; treat as literal; occurs for ::fr:wikt:test and fr::Help:test
|
||||
// ++colon_count;
|
||||
// break;
|
||||
// }
|
||||
// boolean part_found = false;
|
||||
// if (colon_count == 0) {// 1st colon;
|
||||
// Object o = wiki.Wiki_mgr().GetByMid(bfr.Bfr(), ltr_bgn, txt_bb_len); // check if wiki; note: wiki is not possible for other colons
|
||||
// if (o != null) {
|
||||
// wik_bgn = 0; // wik_bgn can only start at 0
|
||||
// part_found = true;
|
||||
// }
|
||||
// else { // not wiki; check if ns; note: if wiki, don't try to extract ns; EX: "fr:Aide:test"
|
||||
// o = ns_mgr.Trie_match_exact(bfr.Bfr(), ltr_bgn, txt_bb_len);
|
||||
// if (o != null) {
|
||||
// ns = (Xow_ns)o;
|
||||
// ns_bgn = ltr_bgn;
|
||||
// part_found = true;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// if (part_found) {
|
||||
// page_bgn = txt_bb_len + 1; // anticipate page_bgn;
|
||||
// add_ws = false; // if there was an add_ws, ignore; EX: "Category :" should ignore space
|
||||
// ltr_bgn_reset = true; // ltr_bgn_reset
|
||||
// }
|
||||
// colon_count++; // increment colon count
|
||||
// break;
|
||||
// }
|
||||
// case Xoa_ttl_trie.Id_hash: anch_bgn = (txt_bb_len) + 1; break; // flag last anch_bgn
|
||||
// case Xoa_ttl_trie.Id_slash: leaf_bgn = (txt_bb_len) + 1; break; // flag last leaf_bgn
|
||||
// case Xoa_ttl_trie.Id_newLine: // NOTE: for now, treat nl just like space; not sure if it should accept "a\nb" or "\nab"; need to handle trailing \n for "Argentina\n\n" in {{Infobox settlement|pushpin_map=Argentina|pushpin_label_position=|pushpin_map_alt=|pushpin_map_caption=Location of Salta in Argentina}};
|
||||
// case Xoa_ttl_trie.Id_space:
|
||||
// case Xoa_ttl_trie.Id_underline:if (ltr_bgn != -1) add_ws = true; cur = ttlTrie.Match_pos(); continue; // only mark add_ws if ltr_seen; this ignores ws at bgn; also, note "continue"
|
||||
// case Xoa_ttl_trie.Id_amp:
|
||||
// cur = ttlTrie.Match_pos();
|
||||
// if (cur == end) {} // guards against terminating &; EX: [[Bisc &]]; NOTE: needed b/c Match_bgn does not do bounds checking for cur in src; src[src.length] will be called when & is last character;
|
||||
// else {
|
||||
// Object html_ent_obj = wiki.App().Amp_trie().Match_bgn(src, cur, end);
|
||||
// if (html_ent_obj != null) {
|
||||
// Xop_amp_trie_itm amp_itm = (Xop_amp_trie_itm)html_ent_obj;
|
||||
// switch (amp_itm.CharInt()) {
|
||||
// case Byte_ascii.Amp:
|
||||
// case Byte_ascii.Quote:
|
||||
// case Byte_ascii.Lt:
|
||||
// case Byte_ascii.Gt:
|
||||
// b_ary = amp_itm.XmlEntityName();
|
||||
// break;
|
||||
// default:
|
||||
// b_ary = amp_itm.CharIntAsAry();
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// break;
|
||||
// case Xoa_ttl_trie.Id_comment_bgn:
|
||||
// cur = ttlTrie.Match_pos();
|
||||
// int find = Bry_finder.Find_fwd(src, Xop_comm_lxr.End_ary, cur, end);
|
||||
// if (find != -1) {
|
||||
// cur = find + Xop_comm_lxr.End_ary.length;
|
||||
// continue;
|
||||
// }
|
||||
// else {
|
||||
// msg_log.Add_itm_none(src, bgn, end, Xop_ttl_log.Comment_eos);
|
||||
// return false;
|
||||
// }
|
||||
// default: break;
|
||||
// }
|
||||
// cur = ttlTrie.Match_pos();
|
||||
// }
|
||||
if (add_ws) { // add ws and toggle flag
|
||||
bfr.Add_byte(Byte_ascii.Space); ++txt_bb_len;
|
||||
// bfr.Add_byte(Byte_ascii.Underline); ++txt_bb_len;
|
||||
add_ws = false;
|
||||
}
|
||||
if (ltr_bgn == -1) ltr_bgn = txt_bb_len; // if 1st letter not seen, mark 1st letter
|
||||
@@ -420,19 +321,11 @@ public class Xoa_ttl { // PAGE:en.w:http://en.wikipedia.org/wiki/Help:Link; REF.
|
||||
}
|
||||
}
|
||||
if (txt_bb_len == 0) {msg_log.Add_itm_none(Xop_ttl_log.Len_0, src, bgn, end); return false;}
|
||||
// if ( ((txt_bb_len - ns_len) > 255 && ns.Id() != Xow_ns_.Id_special) // DELETE: will cause multi-byte langs like thai to fail on long links; DATE:2013-02-02
|
||||
// || ((txt_bb_len - ns_len) > 512 && ns.Id() == Xow_ns_.Id_special)) {msg_log.Add_itm_none(Xop_ttl_log.Len_max, src, bgn, end); return false;}
|
||||
if (wik_bgn == -1 && page_bgn == txt_bb_len) { // if no wiki, but page_bgn is at end, then ttl is ns only; EX: "Help:"; NOTE: "fr:", "fr:Help" is allowed
|
||||
msg_log.Add_itm_none(Xop_ttl_log.Ttl_is_ns_only, src, bgn, end);
|
||||
return false;
|
||||
}
|
||||
full_txt = bfr.XtoAryAndClear();
|
||||
|
||||
// if (wik_bgn == 0 && txt_bb_len == page_bgn) { // 0-len intrawiki; EX: [[fr:]] on Main Page
|
||||
// Xoa_ttl cur_page_ttl = wiki.Ctx().Cur_page().Ttl();
|
||||
// this.raw = cur_page_ttl == null ? Bry_.Empty : Bry_.Copy(cur_page_ttl.Page_txt()); // set raw to current page ttl; note that this is needed for lnki_caption to show correctly (otherwise lnki_caption will be blank); this is a quasi-hack as it depends on the ttl of the current page, but passing in another argument feels sloppy
|
||||
// bfr.Add(this.raw);
|
||||
// }
|
||||
if ( ns.Case_match() == Xow_ns_case_.Id_1st
|
||||
&& wik_bgn == -1 ) { // do not check case if xwiki; EX: "fr:" would have a wik_bgn of 0 (and a wik_end of 3); "A" (and any non-xwiki ttl) would have a wik_bgn == -1
|
||||
byte char_1st = full_txt[page_bgn];
|
||||
@@ -443,8 +336,8 @@ public class Xoa_ttl { // PAGE:en.w:http://en.wikipedia.org/wiki/Help:Link; REF.
|
||||
if (page_end > full_txt_len) // ttl is too too short for 1st multi-byte char; EX: [[%D0]] is 208 but in utf8, 208 requires at least another char; DATE:2013-11-11
|
||||
return false; // ttl is invalid
|
||||
else { // ttl is long enough for 1st mult-byte char; need to use platform uppercasing; Xol_case_mgr_.Utf_8 is not sufficient
|
||||
Bry_bfr upper_1st = wiki.Utl_bry_bfr_mkr().Get_b512();
|
||||
byte[] page_txt = wiki.Lang().Case_mgr().Case_build_1st_upper(upper_1st, full_txt, page_bgn, full_txt_len); // always build; never reuse; (multi-byte character will expand array)
|
||||
Bry_bfr upper_1st = bry_mkr.Get_b512();
|
||||
byte[] page_txt = case_mgr.Case_build_1st_upper(upper_1st, full_txt, page_bgn, full_txt_len); // always build; never reuse; (multi-byte character will expand array)
|
||||
if (page_bgn == 0) // page only; EX: A
|
||||
full_txt = page_txt;
|
||||
else // ns + page; EX: Help:A
|
||||
@@ -453,7 +346,7 @@ public class Xoa_ttl { // PAGE:en.w:http://en.wikipedia.org/wiki/Help:Link; REF.
|
||||
}
|
||||
}
|
||||
else
|
||||
full_txt = wiki.Lang().Case_mgr().Case_reuse_upper(full_txt, page_bgn, page_end);
|
||||
full_txt = case_mgr.Case_reuse_upper(full_txt, page_bgn, page_end);
|
||||
}
|
||||
Xow_ns tors_ns = ns.Id_talk() ? ns_mgr.Ords_get_at(ns.Ord_subj_id()) : ns_mgr.Ords_get_at(ns.Ord_talk_id());
|
||||
tors_txt = tors_ns.Name_txt_w_colon();
|
||||
|
||||
Reference in New Issue
Block a user