1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-07-12 21:10:02 -04:00
commit 794b5a232f
3099 changed files with 238212 additions and 0 deletions

View File

@@ -0,0 +1,83 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_apos_dat {
public int State() {return state;} public void State_clear() {state = Xop_apos_tkn_.State_nil;} private int state = Xop_apos_tkn_.State_nil;
public int Typ() {return typ;} private int typ;
public int Cmd() {return cmd;} private int cmd;
public int Lit_apos() {return lit_apos;} private int lit_apos;
public int Dual_cmd() {return dual_cmd;} private int dual_cmd;
public void Ident(Xop_ctx ctx, byte[] src, int apos_len, int cur_pos) {
typ = cmd = lit_apos = dual_cmd = 0;
switch (apos_len) {
case Xop_apos_tkn_.Len_ital: case Xop_apos_tkn_.Len_bold: case Xop_apos_tkn_.Len_dual:
Ident_props(apos_len); break;
case Xop_apos_tkn_.Len_apos_bold:
lit_apos = 1;
Ident_props(Xop_apos_tkn_.Len_bold); break;
default:
lit_apos = apos_len - Xop_apos_tkn_.Len_dual;
Ident_props(Xop_apos_tkn_.Len_dual);
if (lit_apos > 1)
ctx.Msg_log().Add_itm_none(Xop_apos_log.Multiple_apos, src, cur_pos - apos_len, cur_pos);
break;
}
}
private void Ident_props(int apos_len) {
typ = apos_len;
switch (apos_len) {
case Xop_apos_tkn_.Len_ital: {
switch (state) {
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_nil; break;
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_b; break;
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_bi_end__b_bgn; state = Xop_apos_tkn_.State_b; break;
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_i_end; state = Xop_apos_tkn_.State_b; dual_cmd = Xop_apos_tkn_.Cmd_bi_bgn; break;
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_i_bgn; state = Xop_apos_tkn_.State_bi; break;
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_i_bgn; state = Xop_apos_tkn_.State_i; break;
default: throw Exc_.new_unhandled(state);
}
break;
}
case Xop_apos_tkn_.Len_bold: {
switch (state) {
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_nil; break;
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_ib_end__i_bgn; state = Xop_apos_tkn_.State_i; break;
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_i; break;
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_b_end; state = Xop_apos_tkn_.State_i; break; // NOTE: dual_cmd = Cmd_ib_bgn is implied
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_b_bgn; state = Xop_apos_tkn_.State_ib; break;
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_b_bgn; state = Xop_apos_tkn_.State_b; break;
default: throw Exc_.new_unhandled(state);
}
break;
}
case Xop_apos_tkn_.Len_dual: {
switch (state) {
case Xop_apos_tkn_.State_b: cmd = Xop_apos_tkn_.Cmd_b_end__i_bgn; state = Xop_apos_tkn_.State_i; break;
case Xop_apos_tkn_.State_i: cmd = Xop_apos_tkn_.Cmd_i_end__b_bgn; state = Xop_apos_tkn_.State_b; break;
case Xop_apos_tkn_.State_bi: cmd = Xop_apos_tkn_.Cmd_ib_end; state = Xop_apos_tkn_.State_nil; break;
case Xop_apos_tkn_.State_ib: cmd = Xop_apos_tkn_.Cmd_bi_end; state = Xop_apos_tkn_.State_nil; break;
case Xop_apos_tkn_.State_dual: cmd = Xop_apos_tkn_.Cmd_bi_end; state = Xop_apos_tkn_.State_nil; break; // NOTE: dual_cmd = Cmd_ib_bgn is implied
case Xop_apos_tkn_.State_nil: cmd = Xop_apos_tkn_.Cmd_ib_bgn; state = Xop_apos_tkn_.State_dual; break;
default: throw Exc_.new_unhandled(state);
}
break;
}
default: throw Exc_.new_unhandled(apos_len);
}
}
}

View File

@@ -0,0 +1,26 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_apos_log {
private static final Gfo_msg_grp owner = Gfo_msg_grp_.new_(Xoa_app_.Nde, "apos");
public static final Gfo_msg_itm
Bold_converted_to_ital = Gfo_msg_itm_.new_note_(owner, "Bold_converted_to_ital")
, Dangling_apos = Gfo_msg_itm_.new_note_(owner, "Dangling_apos")
, Multiple_apos = Gfo_msg_itm_.new_note_(owner, "Multiple_apos")
;
}

View File

@@ -0,0 +1,26 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import gplx.core.btries.*;
public class Xop_apos_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_apos;}
public void Init_by_wiki(Xowe_wiki wiki, Btrie_fast_mgr core_trie) {core_trie.Add(Apos_ary, this);} private static final byte[] Apos_ary = new byte[] {Byte_ascii.Apos, Byte_ascii.Apos};
public void Init_by_lang(Xol_lang lang, Btrie_fast_mgr core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {return ctx.Apos().Make_tkn(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos);}
public static final Xop_apos_lxr _ = new Xop_apos_lxr(); Xop_apos_lxr() {}
}

View File

@@ -0,0 +1,29 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_apos_tkn extends Xop_tkn_itm_base {
public Xop_apos_tkn(int bgn, int end, int apos_len, int apos_tid, int apos_cmd, int apos_lit) {
this.apos_len = apos_len; this.apos_tid = apos_tid; this.apos_cmd = apos_cmd; this.apos_lit = apos_lit;
this.Tkn_ini_pos(false, bgn, end);
}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_apos;}
public int Apos_len() {return apos_len;} private int apos_len;
public int Apos_lit() {return apos_lit;} public Xop_apos_tkn Apos_lit_(int v) {apos_lit = v; return this;} private int apos_lit;
public int Apos_tid() {return apos_tid;} public Xop_apos_tkn Apos_tid_(int v) {apos_tid = v; return this;} private int apos_tid;
public int Apos_cmd() {return apos_cmd;} public Xop_apos_tkn Apos_cmd_(int v) {apos_cmd = v; return this;} private int apos_cmd;
}

View File

@@ -0,0 +1,36 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_apos_tkn_ {
public static final int
Cmd_nil = 0
, Cmd_i_bgn = 1, Cmd_i_end = 2, Cmd_b_bgn = 3, Cmd_b_end = 4
, Cmd_bi_bgn = 5, Cmd_ib_bgn = 6, Cmd_ib_end = 7, Cmd_bi_end = 8
, Cmd_bi_end__b_bgn = 9, Cmd_ib_end__i_bgn = 10, Cmd_b_end__i_bgn = 11, Cmd_i_end__b_bgn = 12;
public static final byte[][] Cmds
= new byte[][]
{ Bry_.new_a7("nil")
, Bry_.new_a7("i+"), Bry_.new_a7("i-"), Bry_.new_a7("b+"), Bry_.new_a7("b-")
, Bry_.new_a7("bi+"), Bry_.new_a7("ib+"), Bry_.new_a7("ib-"), Bry_.new_a7("bi-")
, Bry_.new_a7("bi-b+"), Bry_.new_a7("ib-i+"), Bry_.new_a7("b-i+"), Bry_.new_a7("i-b+")
};
public static String Cmd_str(int id) {return String_.new_u8(Cmds[id]);}
public static final int Len_ital = 2, Len_bold = 3, Len_dual = 5, Len_apos_bold = 4;
public static final int Typ_ital = 2, Typ_bold = 3, Typ_dual = 5;
public static final int State_nil = 0, State_i = 1, State_b = 2, State_bi = 3, State_ib = 4, State_dual = 5;
}

View File

@@ -0,0 +1,30 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_apos_tkn_chkr extends Xop_tkn_chkr_base {
@Override public Class<?> TypeOf() {return Xop_apos_tkn.class;}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_apos;}
public int Apos_cmd() {return apos_cmd;} public Xop_apos_tkn_chkr Apos_cmd_(int v) {apos_cmd = v; return this;} private int apos_cmd = Xop_apos_tkn_.Cmd_nil;
public int Apos_lit() {return apos_lit;} public Xop_apos_tkn_chkr Apos_lit_(int v) {apos_lit = v; return this;} private int apos_lit = -1;
@Override public int Chk_hook(Tst_mgr mgr, String path, Object actl_obj, int err) {
Xop_apos_tkn actl = (Xop_apos_tkn)actl_obj;
err += mgr.Tst_val(apos_cmd == Xop_apos_tkn_.Cmd_nil, path, "apos_cmd", Xop_apos_tkn_.Cmd_str(apos_cmd), Xop_apos_tkn_.Cmd_str(actl.Apos_cmd()));
err += mgr.Tst_val(apos_lit == -1, path, "apos_lit", apos_lit, actl.Apos_lit());
return err;
}
}

View File

@@ -0,0 +1,161 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
public class Xop_apos_wkr implements Xop_ctx_wkr {
public Xop_apos_dat Dat() {return dat;} private Xop_apos_dat dat = new Xop_apos_dat();
private List_adp stack = List_adp_.new_(); private int bold_count, ital_count; private Xop_apos_tkn dual_tkn = null;
public void Ctor_ctx(Xop_ctx ctx) {}
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {
Reset();
}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {
this.EndFrame(ctx, root, src, src_len, false);
}
public void AutoClose(Xop_ctx ctx, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {}
public int Stack_len() {return stack.Count();}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
cur_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Apos);
int apos_len = cur_pos - bgn_pos;
dat.Ident(ctx, src, apos_len, cur_pos);
Xop_apos_tkn apos_tkn = tkn_mkr.Apos(bgn_pos, cur_pos, apos_len, dat.Typ(), dat.Cmd(), dat.Lit_apos());
ctx.Subs_add(root, apos_tkn);
ctx.Apos().RegTkn(apos_tkn, cur_pos);
return cur_pos;
}
public void RegTkn(Xop_apos_tkn tkn, int cur_pos) { // REF.MW: Parser|doQuotes
stack.Add(tkn);
switch (tkn.Apos_tid()) {
case Xop_apos_tkn_.Len_ital: ital_count++; break;
case Xop_apos_tkn_.Len_bold: bold_count++; break;
case Xop_apos_tkn_.Len_dual: //bold_count++; ital_count++; // NOTE: removed b/c of '''''a''b'' was trying to convert ''''' to bold
dual_tkn = tkn;
break;
}
if (dat.Dual_cmd() != 0) { // earlier dual tkn assumed to be <i><b>; </i> encountered so change dual to <b><i>
if (dual_tkn == null) throw Exc_.new_("dual tkn is null"); // should never happen
dual_tkn.Apos_cmd_(dat.Dual_cmd());
dual_tkn = null;
}
}
public void EndFrame(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int cur_pos, boolean skip_cancel_if_lnki_and_apos) {
int state = dat.State();
if (state == 0) {Reset(); return;}
if (bold_count % 2 == 1 && ital_count % 2 == 1) ConvertBoldToItal(ctx, src);
state = dat.State();
int closeCmd = 0, closeTyp = 0;
if (state == 0) {Reset(); return;} // all closed: return
byte cur_tkn_tid = ctx.Cur_tkn_tid();
Xop_apos_tkn prv = Previous_bgn(stack, closeTyp);
if ( skip_cancel_if_lnki_and_apos // NOTE: if \n or tblw
&& cur_tkn_tid == Xop_tkn_itm_.Tid_lnki // and cur scope is lnki
// && prv.Ctx_tkn_tid() != Xop_tkn_itm_.Tid_lnki // but apos_bgn is not lnki; NOTE: disabled on 2013-11-10
)
return; // don't end frame
switch (state) {
case Xop_apos_tkn_.State_i: closeTyp = Xop_apos_tkn_.Typ_ital; closeCmd = Xop_apos_tkn_.Cmd_i_end; break;
case Xop_apos_tkn_.State_b: closeTyp = Xop_apos_tkn_.Typ_bold; closeCmd = Xop_apos_tkn_.Cmd_b_end; break;
case Xop_apos_tkn_.State_dual:
case Xop_apos_tkn_.State_ib: closeTyp = Xop_apos_tkn_.Typ_dual; closeCmd = Xop_apos_tkn_.Cmd_bi_end; break;
case Xop_apos_tkn_.State_bi: closeTyp = Xop_apos_tkn_.Typ_dual; closeCmd = Xop_apos_tkn_.Cmd_ib_end; break;
}
ctx.Msg_log().Add_itm_none(Xop_apos_log.Dangling_apos, src, prv.Src_bgn(), cur_pos);
ctx.Subs_add(root, ctx.Tkn_mkr().Apos(cur_pos, cur_pos, 0, closeTyp, closeCmd, 0));
Reset();
}
private void ConvertBoldToItal(Xop_ctx ctx, byte[] src) {
Xop_apos_tkn idxNeg1 = null, idxNeg2 = null, idxNone = null; // look at previous tkn for spaces; EX: "a '''" -> idxNeg1; " a'''" -> idxNeg2; "ab'''" -> idxNone
int tknsLen = stack.Count();
for (int i = 0; i < tknsLen; i++) {
Xop_apos_tkn apos = (Xop_apos_tkn)stack.Get_at(i);
if (apos.Apos_tid() != Xop_apos_tkn_.Typ_bold) continue; // only look for bold
int tknBgn = apos.Src_bgn();
boolean idxNeg1Space = tknBgn > 0 && src[tknBgn - 1] == Byte_ascii.Space;
boolean idxNeg2Space = tknBgn > 1 && src[tknBgn - 2] == Byte_ascii.Space;
if (idxNeg1 == null && idxNeg1Space) {idxNeg1 = apos;}
else if (idxNeg2 == null && idxNeg2Space) {idxNeg2 = apos;}
else if (idxNone == null && !idxNeg1Space && !idxNeg2Space) {idxNone = apos;}
}
if (idxNeg2 != null) ConvertBoldToItal(ctx, src, idxNeg2); // 1st single letter word
else if (idxNone != null) ConvertBoldToItal(ctx, src, idxNone); // 1st multi letter word
else if (idxNeg1 != null) ConvertBoldToItal(ctx, src, idxNeg1); // everything else
// now recalc all cmds for stack
dat.State_clear();
for (int i = 0; i < tknsLen; i++) {
Xop_apos_tkn apos = (Xop_apos_tkn)stack.Get_at(i);
dat.Ident(ctx, src, apos.Apos_tid(), apos.Src_end()); // NOTE: apos.Typ() must map to apos_len
int newCmd = dat.Cmd();
if (newCmd == apos.Apos_cmd()) continue;
apos.Apos_cmd_(newCmd);
}
}
private void ConvertBoldToItal(Xop_ctx ctx, byte[] src, Xop_apos_tkn oldTkn) {
ctx.Msg_log().Add_itm_none(Xop_apos_log.Bold_converted_to_ital, src, oldTkn.Src_bgn(), oldTkn.Src_end());
oldTkn.Apos_tid_(Xop_apos_tkn_.Typ_ital).Apos_cmd_(Xop_apos_tkn_.Cmd_i_bgn).Apos_lit_(oldTkn.Apos_lit() + 1);// NOTE: Cmd_i_bgn may be overridden later
}
private void Reset() {
bold_count = ital_count = 0;
dual_tkn = null;
stack.Clear();
dat.State_clear();
}
private static Xop_apos_tkn Previous_bgn(List_adp stack, int typ) {
int stack_len = stack.Count();
for (int i = stack_len - 1; i > -1; --i) {
Xop_apos_tkn apos = (Xop_apos_tkn)stack.Get_at(i);
int cmd = apos.Apos_cmd();
switch (typ) {
case Xop_apos_tkn_.Typ_ital:
switch (cmd) {
case Xop_apos_tkn_.Cmd_i_bgn:
case Xop_apos_tkn_.Cmd_ib_bgn:
case Xop_apos_tkn_.Cmd_bi_bgn:
case Xop_apos_tkn_.Cmd_ib_end__i_bgn:
case Xop_apos_tkn_.Cmd_b_end__i_bgn:
return apos;
}
break;
case Xop_apos_tkn_.Typ_bold:
switch (cmd) {
case Xop_apos_tkn_.Cmd_b_bgn:
case Xop_apos_tkn_.Cmd_ib_bgn:
case Xop_apos_tkn_.Cmd_bi_bgn:
case Xop_apos_tkn_.Cmd_bi_end__b_bgn:
case Xop_apos_tkn_.Cmd_i_end__b_bgn:
return apos;
}
break;
default: // NOTE: this is approximate; will not be exact in most dual situations; EX: <b>a<i>b will return <i>; should return <b> and <i>
switch (cmd) {
case Xop_apos_tkn_.Cmd_b_bgn:
case Xop_apos_tkn_.Cmd_i_bgn:
case Xop_apos_tkn_.Cmd_ib_bgn:
case Xop_apos_tkn_.Cmd_bi_bgn:
case Xop_apos_tkn_.Cmd_bi_end__b_bgn:
case Xop_apos_tkn_.Cmd_i_end__b_bgn:
case Xop_apos_tkn_.Cmd_ib_end__i_bgn:
case Xop_apos_tkn_.Cmd_b_end__i_bgn:
return apos;
}
break;
}
}
return null;
}
}

View File

@@ -0,0 +1,159 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.apos; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*;
import gplx.xowa.parsers.lists.*;
public class Xop_apos_wkr_tst {
private Xop_fxt fxt = new Xop_fxt();
@Test public void Basic() {
fxt.Test_parse_page_wiki("''a''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn) , fxt.tkn_txt_(2, 3), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
fxt.Test_parse_page_wiki("'''a'''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn) , fxt.tkn_txt_(3, 4), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
fxt.Test_parse_page_wiki("'''''a'''''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn) , fxt.tkn_txt_(5, 6), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_end));
}
@Test public void Advanced() {
fxt.Test_parse_page_wiki("''''a''''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn).Apos_lit_(1) , fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end).Apos_lit_(1)); // 1 apos + bold
fxt.Test_parse_page_wiki("''''''''a''''''''" , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn).Apos_lit_(3) , fxt.tkn_txt_(), fxt.tkn_apos_( Xop_apos_tkn_.Cmd_bi_end).Apos_lit_(3)); // 3 apos + dual
}
@Test public void Combo() {
fxt.Test_parse_page_wiki("''a'''b'''c''", fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // b{i}
fxt.Test_parse_page_wiki("'''a''b''c'''", fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)); // i{b}
fxt.Test_parse_page_wiki("''a''b'''c'''", fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)); // b_i
}
@Test public void Assume_apos() {
fxt.Test_parse_page_wiki("a01'''b01 '''c0 1'''d01''" // pick c0 1, b/c it is idxNeg2
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)
, fxt.tkn_txt_(), fxt.tkn_space_(), fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn)
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // idx_neg2
fxt.Test_parse_page_wiki("a01 '''b01 '''c01'''d01''" // pick c01, b/c it is idxNone
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn)
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // idx_none
fxt.Test_parse_page_wiki("a01 '''b01 '''c01 '''d01''" // pick a01 , b/c it is idxNeg1
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn)
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
, fxt.tkn_txt_(), fxt.tkn_space_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end)
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // idx_neg1
fxt.Test_parse_page_wiki("a''''b''" // strange outlier condition
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Apos_lit_(2)
, fxt.tkn_txt_() , fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end)); // 4 apos -> 2 apos + ital
}
@Test public void Dual() {
fxt.Test_parse_page_wiki("'''''a'''b''" // +ib -b -i; 5apos defaults to ib
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
fxt.Test_parse_page_wiki("'''''a''b'''" // +bi -i -b; change 5apos to bi
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
fxt.Test_parse_page_wiki("''b'''''c'''" // 5q toggles ital n, bold y
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end__b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
}
@Test public void Unclosed() {
fxt.Test_parse_page_wiki("''a"
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
fxt.Test_parse_page_wiki("'''a"
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
fxt.Test_parse_page_wiki("'''''a"
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_end));
}
@Test public void Outliers() {
fxt.Test_parse_page_wiki("''a'''b'''c'''" // '''b -> ' +i b
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Apos_lit_(1)
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
fxt.Test_parse_page_wiki("''a'''b''c''" // '''b -> ' +i b; double check with closing itals
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Apos_lit_(1)
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
fxt.Test_parse_page_wiki("''a'''b''c" // ''c -> -bi + b
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_bgn)
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_end__b_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_b_end));
}
@Test public void MultiLines() {
fxt.Test_parse_page_wiki("a''b\nc''d"
, fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(3, 4), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_nl_char_len1_(4)
, fxt.tkn_txt_(5, 6), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end));
}
@Test public void Lnki() {
fxt.Test_parse_page_wiki_str("[[''a''']]", "<a href=\"/wiki/%27%27a%27%27%27\">''a'''</a>");
}
@Test public void Dual_exceptions() {
fxt.Test_parse_page_wiki("'''''a''b''"
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_bi_bgn), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end), fxt.tkn_txt_(), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn), fxt.tkn_apos_(Xop_apos_tkn_.Cmd_ib_end)
);
}
@Test public void Mix_list_autoClose() {
fxt.Test_parse_page_wiki("''a\n*b"
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Src_rng_(0, 2)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Src_rng_(3, 3)
, fxt.tkn_list_bgn_(3, 5, Xop_list_tkn_.List_itmTyp_ul)
, fxt.tkn_txt_(5, 6)
, fxt.tkn_list_end_(6)
);
}
@Test public void Mix_hr_autoClose() {
fxt.Test_parse_page_wiki("''a\n----"
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Src_rng_(0, 2)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Src_rng_(3, 3)
, fxt.tkn_para_blank_(3)
, fxt.tkn_hr_(3, 8)
);
}
@Test public void Mix_hdr_autoClose() {
fxt.Test_parse_page_wiki("''a\n==b=="
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_bgn).Src_rng_(0, 2)
, fxt.tkn_txt_(2, 3)
, fxt.tkn_apos_(Xop_apos_tkn_.Cmd_i_end).Src_rng_(3, 3)
, fxt.tkn_hdr_(3, 9, 2).Subs_
( fxt.tkn_txt_(6, 7)
));
}
@Test public void Apos_broken_by_tblw_th() { // DATE:2013-04-24
fxt.Test_parse_page_all_str("A ''[[b!!]]'' c", "A <i><a href=\"/wiki/B!!\">b!!</a></i> c");
}
@Test public void Nowiki() { // PAGE:en.w:Wiki; DATE:2013-05-13
fxt.Test_parse_page_all_str("<nowiki>''a''</nowiki>", "''a''");
}
@Test public void Lnki_multi_line() { // PURPOSE: handle apos within multi-line lnki caption; DATE:2013-11-10
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "[[A|b '' c"
, "d '' e ]]"
)
, "<a href=\"/wiki/A\">b <i> c d </i> e</a>"); // NOTE: c d should be italicized, not c e (latter occurs when apos is ended on each line)
}
@Test public void French() { // PURPOSE: L'''A'' -> L'<i>A</i>; DATE:2014-01-06
fxt.Test_parse_page_all_str("L''''A'''", "L'<b>A</b>");
fxt.Test_parse_page_all_str("L'''A''", "L'<i>A</i>");
}
// @Test public void Mix_lnke() { // FUTURE: requires rewrite of apos
// fxt.Test_parse_page_wiki("''a[irc://b c''d''e]f''"
// , fxt.tkn_apos_(0, 2, Xop_apos_tkn_.Cmd_i_bgn)
// , fxt.tkn_txt_(2, 3)
// , fxt.tkn_lnke_(3, 20).Subs_add_ary
// ( fxt.tkn_txt_(12, 13)
// , fxt.tkn_apos_(13, 15, Xop_apos_tkn_.Cmd_i_bgn)
// , fxt.tkn_txt_(15, 16)
// , fxt.tkn_apos_(16, 18, Xop_apos_tkn_.Cmd_i_end)
// , fxt.tkn_txt_(18, 19)
// )
// , fxt.tkn_txt_(20, 21)
// , fxt.tkn_apos_(21, 23, Xop_apos_tkn_.Cmd_i_bgn)
// );
// }
}
/*
*/