mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v1.6.5.1
This commit is contained in:
114
400_xowa/src_460_para/gplx/xowa/Xop_nl_lxr.java
Normal file
114
400_xowa/src_460_para/gplx/xowa/Xop_nl_lxr.java
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
class Xop_nl_lxr implements Xop_lxr {
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_nl;}
|
||||
public void Init_by_wiki(Xow_wiki wiki, ByteTrieMgr_fast core_trie) {core_trie.Add(Byte_ascii.NewLine, this);}
|
||||
public void Init_by_lang(Xol_lang lang, ByteTrieMgr_fast core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_bos) return ctx.Lxr_make_txt_(cur_pos); // simulated nl at beginning of every parse
|
||||
int trim_category_pos = Scan_fwd_for_ctg(ctx, src, cur_pos, src_len);
|
||||
if (trim_category_pos != Bry_.NotFound) { // [[Category]] found after ws
|
||||
int root_subs_len = root.Subs_len();
|
||||
if (root_subs_len > 0) {
|
||||
Xop_tkn_itm tkn = root.Subs_get(root_subs_len - 1);
|
||||
if (tkn.Tkn_tid() == Xop_tkn_itm_.Tid_eq) {
|
||||
Xop_eq_tkn eq_tkn = (Xop_eq_tkn)tkn;
|
||||
if (eq_tkn.Eq_len() > 1) {
|
||||
Xop_nl_tkn nl_tkn = tkn_mkr.NewLine(bgn_pos, cur_pos, Xop_nl_tkn.Tid_char, 1);
|
||||
ctx.Subs_add(root, nl_tkn);
|
||||
}
|
||||
}
|
||||
}
|
||||
return trim_category_pos;
|
||||
}
|
||||
Xop_tkn_itm last_tkn = ctx.Stack_get_last(); // BLOCK:invalid_ttl_check
|
||||
if ( !ctx.Tid_is_image_map()
|
||||
&& last_tkn != null
|
||||
&& last_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) {
|
||||
Xop_lnki_tkn lnki = (Xop_lnki_tkn)last_tkn;
|
||||
if ( lnki.Pipe_count_is_zero()) { // always invalid
|
||||
ctx.Stack_pop_last();
|
||||
return Xop_lnki_wkr_.Invalidate_lnki(ctx, src, root, lnki, bgn_pos);
|
||||
}
|
||||
}
|
||||
|
||||
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, true); // NOTE: frame should at end at bgn_pos (before \n) not after; else, will create tkn at (5,5), while tkn_mkr.Space creates one at (4,5); DATE:2013-10-31
|
||||
ctx.Tblw().Cell_pipe_seen_(false); // flip off "|" in tblw seq; EX: "| a\n||" needs to flip off "|" else "||" will be seen as style dlm"; NOTE: not covered by test?
|
||||
|
||||
Xop_para_wkr para_wkr = ctx.Para();
|
||||
switch (ctx.Cur_tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_hdr: // last tkn was hdr; close it; EX: \n==a==\nb; "\n" should close 2nd "=="; DATE:2014-02-17
|
||||
int acs_pos = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_hdr);
|
||||
ctx.Stack_pop_til(root, src, acs_pos, true, bgn_pos, cur_pos);
|
||||
para_wkr.Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_h2);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_list: // close list
|
||||
Xop_list_wkr_.Close_list_if_present(ctx, root, src, bgn_pos, cur_pos);
|
||||
para_wkr.Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_li);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_lnke: // close lnke
|
||||
if (ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tmpl_invk) == -1) // only close if no tmpl; MWR: [[SHA-2]]; * {{cite journal|title=Proposed
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_lnke), true, bgn_pos, cur_pos);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_lnki: // NOTE: \n in caption or other multipart lnki; don't call para_wkr.Process
|
||||
Xop_tkn_itm nl_tkn = tkn_mkr.Space(root, bgn_pos, cur_pos); // convert \n to \s. may result in multiple \s, but rely on htmlViewer to suppress; EX: w:Schwarzschild_radius; and the stellar [[Velocity dispersion|velocity\ndispersion]];
|
||||
ctx.Subs_add(root, nl_tkn);
|
||||
return cur_pos;
|
||||
// case Xop_tkn_itm_.Tid_tblw_tc: case Xop_tkn_itm_.Tid_tblw_td: // STUB: tc/td should not have attributes
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: case Xop_tkn_itm_.Tid_tblw_tr: case Xop_tkn_itm_.Tid_tblw_th: // nl should close previous tblw's atrs range; EX {{Infobox planet}} and |-\n<tr>
|
||||
Xop_tblw_wkr.Atrs_close(ctx, src, root);
|
||||
break;
|
||||
}
|
||||
if ( ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki // parse_mode is wiki
|
||||
&& para_wkr.Enabled() // check that para is enabled
|
||||
)
|
||||
para_wkr.Process_nl(ctx, root, src, bgn_pos, cur_pos);
|
||||
else { // parse mode is tmpl, or para is disabled; for latter, adding \n for pretty-print
|
||||
Xop_nl_tkn nl_tkn = tkn_mkr.NewLine(bgn_pos, cur_pos, Xop_nl_tkn.Tid_char, 1);
|
||||
ctx.Subs_add(root, nl_tkn);
|
||||
}
|
||||
return cur_pos;
|
||||
}
|
||||
public static int Scan_fwd_for_ctg(Xop_ctx ctx, byte[] src, int cur_pos, int src_len) {
|
||||
for (int i = cur_pos; i < src_len; i++) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.NewLine: case Byte_ascii.CarriageReturn: // ignore ws
|
||||
break;
|
||||
case Byte_ascii.Brack_bgn: // [
|
||||
if ( Bry_.Eq_itm(src, src_len, i + 1, Byte_ascii.Brack_bgn) // [[
|
||||
&& i + 2 < src_len) {
|
||||
int ttl_bgn = Bry_finder.Find_fwd_while(src, i + 2, src_len, Byte_ascii.Space);
|
||||
ByteTrieMgr_slim ctg_trie = ctx.Wiki().Ns_mgr().Category_trie();
|
||||
Object ctg_ns = ctg_trie.MatchAtCur(src, ttl_bgn, src_len);
|
||||
if (ctg_ns != null // "[[Category" found
|
||||
&& Bry_.Eq_itm(src, src_len, ctg_trie.Match_pos(), Byte_ascii.Colon)) { // check that next char is :
|
||||
return i;// return pos of 1st [
|
||||
}
|
||||
return Bry_.NotFound;
|
||||
}
|
||||
break;
|
||||
default: // non-ws; return not found
|
||||
return Bry_.NotFound;
|
||||
}
|
||||
}
|
||||
return Bry_.NotFound;
|
||||
}
|
||||
public static final Xop_nl_lxr _ = new Xop_nl_lxr(); Xop_nl_lxr() {}
|
||||
}
|
||||
50
400_xowa/src_460_para/gplx/xowa/Xop_nl_tab_lxr.java
Normal file
50
400_xowa/src_460_para/gplx/xowa/Xop_nl_tab_lxr.java
Normal file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
class Xop_nl_tab_lxr implements Xop_lxr {
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_nl_tab;}
|
||||
public void Init_by_wiki(Xow_wiki wiki, ByteTrieMgr_fast core_trie) {core_trie.Add(Hook_nl_tab, this);} private static final byte[] Hook_nl_tab = new byte[] {Byte_ascii.NewLine, Byte_ascii.Tab};
|
||||
public void Init_by_lang(Xol_lang lang, ByteTrieMgr_fast core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
int non_ws_pos = Bry_finder.Find_fwd_while_space_or_tab(src, cur_pos, src_len);
|
||||
if (non_ws_pos < src_len) { // bounds check
|
||||
ByteTrieMgr_slim tblw_trie = ctx.App().Utl_trie_tblw_ws();
|
||||
Object tblw_obj = tblw_trie.MatchAtCur(src, non_ws_pos, src_len);
|
||||
if (tblw_obj != null) {
|
||||
Xop_tblw_ws_itm tblw_itm = (Xop_tblw_ws_itm)tblw_obj;
|
||||
byte itm_type = tblw_itm.Tblw_type();
|
||||
switch (itm_type) {
|
||||
case Xop_tblw_ws_itm.Type_nl: // ignore nl
|
||||
case Xop_tblw_ws_itm.Type_xnde: // ignore xnde
|
||||
break;
|
||||
default: { // handle tblw
|
||||
int tblw_rv = ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, non_ws_pos + tblw_itm.Hook_len(), false, itm_type, Xop_tblw_wkr.Called_from_pre, -1, -1);
|
||||
if (tblw_rv != -1) // \n\s| is valid tblw tkn and processed; otherwise fall through;
|
||||
return tblw_rv;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bgn_pos != Xop_parser_.Doc_bgn_bos) // don't add \n if BOS; EX: "<BOS> a" should be " ", not "\n "
|
||||
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos + 1, Xop_nl_tkn.Tid_char, 1));
|
||||
ctx.Subs_add(root, tkn_mkr.Tab(cur_pos - 1, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
public static final Xop_nl_tab_lxr _ = new Xop_nl_tab_lxr(); Xop_nl_tab_lxr() {}
|
||||
}
|
||||
65
400_xowa/src_460_para/gplx/xowa/Xop_nl_tab_lxr_tst.java
Normal file
65
400_xowa/src_460_para/gplx/xowa/Xop_nl_tab_lxr_tst.java
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_nl_tab_lxr_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void teardown() {fxt.Init_para_n_();}
|
||||
@Test public void Basic() { // PURPOSE: \n\t|- should be recognized as tblw; EX:zh.v:西安; DATE:2014-05-06
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl
|
||||
( "{|"
|
||||
, "\t|-"
|
||||
, "|a"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void Ws() { // PURPOSE: \n\t|- should be recognized as tblw; EX:zh.v:西安; DATE:2014-05-06
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl
|
||||
( "{|"
|
||||
, "\t |-" // \t
|
||||
, "|a"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void Ignore() {// PURPOSE: \n\t should not be pre; EX:pl.w:Main_Page; DATE:2014-05-06
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, "\t b"
|
||||
, "c"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<p>a"
|
||||
, "\t b"
|
||||
, "c"
|
||||
, "</p>"
|
||||
));
|
||||
}
|
||||
}
|
||||
27
400_xowa/src_460_para/gplx/xowa/Xop_nl_tkn.java
Normal file
27
400_xowa/src_460_para/gplx/xowa/Xop_nl_tkn.java
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
public class Xop_nl_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_nl_tkn(int bgn, int end, byte nl_tid, int nl_len) {
|
||||
this.Tkn_ini_pos(false, bgn, end);
|
||||
this.nl_tid = nl_tid;
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_newLine;}
|
||||
public byte Nl_tid() {return nl_tid;} private byte nl_tid = Xop_nl_tkn.Tid_unknown;
|
||||
public static final byte Tid_unknown = 0, Tid_char = 1, Tid_hdr = 2, Tid_hr = 3, Tid_list = 4, Tid_tblw = 5, Tid_file = 6;
|
||||
}
|
||||
31
400_xowa/src_460_para/gplx/xowa/Xop_para_tkn.java
Normal file
31
400_xowa/src_460_para/gplx/xowa/Xop_para_tkn.java
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
public class Xop_para_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_para_tkn(int pos) {this.Tkn_ini_pos(false, pos, pos);}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_para;}
|
||||
public byte Para_end() {return para_end;} public Xop_para_tkn Para_end_(byte v) {para_end = v; return this;} private byte para_end = Tid_none;
|
||||
public byte Para_bgn() {return para_bgn;} public Xop_para_tkn Para_bgn_(byte v) {para_bgn = v; return this;} private byte para_bgn = Tid_none;
|
||||
public int Space_bgn() {return space_bgn;} public Xop_para_tkn Space_bgn_(int v) {space_bgn = v; return this;} private int space_bgn = 0;
|
||||
public boolean Nl_bgn() {return nl_bgn;} public Xop_para_tkn Nl_bgn_y_() {nl_bgn = true; return this;} private boolean nl_bgn;
|
||||
public static final byte
|
||||
Tid_none = 0 //
|
||||
, Tid_para = 1 // </p>
|
||||
, Tid_pre = 2 // </pre>
|
||||
;
|
||||
}
|
||||
333
400_xowa/src_460_para/gplx/xowa/Xop_para_wkr.java
Normal file
333
400_xowa/src_460_para/gplx/xowa/Xop_para_wkr.java
Normal file
@@ -0,0 +1,333 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
public class Xop_para_wkr implements Xop_ctx_wkr {
|
||||
private boolean para_enabled;
|
||||
private byte cur_mode;
|
||||
private int para_stack;
|
||||
private boolean in_block, block_is_bgn_xnde, block_is_end_xnde, in_blockquote, block_is_bgn_blockquote, block_is_end_blockquote;
|
||||
private int prv_nl_pos; private Xop_para_tkn prv_para; private int prv_ws_bgn;
|
||||
public boolean Enabled() {return enabled;} public Xop_para_wkr Enabled_(boolean v) {enabled = v; return this;} private boolean enabled = true;
|
||||
public Xop_para_wkr Enabled_y_() {enabled = true; return this;} public Xop_para_wkr Enabled_n_() {enabled = false; return this;}
|
||||
public void Ctor_ctx(Xop_ctx ctx) {}
|
||||
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {
|
||||
this.Clear();
|
||||
para_enabled = enabled && ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki; // only enable for wikitext (not for template)
|
||||
if (para_enabled)
|
||||
Prv_para_new(ctx, root, -1, 0); // create <para> at bos
|
||||
}
|
||||
private void Clear() {
|
||||
cur_mode = Mode_none;
|
||||
para_stack = Para_stack_none;
|
||||
in_block = block_is_bgn_xnde = block_is_end_xnde = false;
|
||||
in_blockquote = block_is_bgn_blockquote = block_is_end_blockquote = false;
|
||||
prv_nl_pos = -1;
|
||||
prv_para = null;
|
||||
prv_ws_bgn = 0;
|
||||
}
|
||||
public void AutoClose(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {}
|
||||
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {
|
||||
if (para_enabled) {
|
||||
Process_nl(ctx, root, src, src_len, src_len);
|
||||
this.Prv_para_end(); // close anything created by Process_nl()
|
||||
}
|
||||
this.Clear();
|
||||
}
|
||||
public void Process_block__bgn_y__end_n(Xop_xnde_tag tag) {Process_block(tag, Bool_.Y, Bool_.N);} // NOTE: disables para for rest of page; Process_block__bgn_n__end_y must be called; DATE:2014-04-18
|
||||
public void Process_block__bgn_n__end_y(Xop_xnde_tag tag) {Process_block(tag, Bool_.N, Bool_.Y);}
|
||||
public void Process_block__xnde(Xop_xnde_tag tag, byte mode) {
|
||||
if (mode == Xop_xnde_tag.Block_bgn) Process_block(tag, Bool_.Y, Bool_.N);
|
||||
else if (mode == Xop_xnde_tag.Block_end) Process_block(tag, Bool_.N, Bool_.Y);
|
||||
}
|
||||
public void Process_block_lnki_div() { // bgn_lhs is pos of [[; end_lhs is pos of ]]
|
||||
if (prv_ws_bgn > 0) // if pre at start of line; ignore it b/c of div; EX: "\n\s[[File:A.png|thumb]]" should not produce thumb; also [[File:A.png|right]]; DATE:2014-02-17
|
||||
prv_ws_bgn = 0;
|
||||
this.Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_div);
|
||||
}
|
||||
private void Process_block(Xop_xnde_tag tag, boolean bgn, boolean end) {
|
||||
if (prv_ws_bgn > 0) {
|
||||
prv_para.Space_bgn_(prv_ws_bgn);
|
||||
prv_ws_bgn = 0;
|
||||
}
|
||||
block_is_bgn_xnde = bgn;
|
||||
block_is_end_xnde = end;
|
||||
switch (tag.Id()) {
|
||||
case Xop_xnde_tag_.Tid_blockquote:
|
||||
if (bgn) block_is_bgn_blockquote = true;
|
||||
if (end) block_is_end_blockquote = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
public void Process_block__bgn__nl_w_symbol(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int bgn_pos, int cur_pos, Xop_xnde_tag tag) {// handle \n== and \n* \n{|; note that nl is at rng of bgn_pos to bgn_pos + 1 (not cur_pos)
|
||||
if (!para_enabled) return;
|
||||
Process_nl(ctx, root, src, bgn_pos, bgn_pos + 1);
|
||||
Process_block__bgn_y__end_n(tag);
|
||||
}
|
||||
public void Process_nl(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int bgn_pos, int cur_pos) {// REF.MW:Parser.php|doBlockLevels
|
||||
Dd_clear(ctx);
|
||||
if (block_is_bgn_xnde || block_is_end_xnde) {
|
||||
para_stack = Para_stack_none; // MW: $paragraphStack = false;
|
||||
Prv_para_end(); // MW: $output .= $this->closeParagraph()
|
||||
if (block_is_bgn_blockquote && !block_is_end_blockquote) // MW: if ( $preOpenMatch and !$preCloseMatch )
|
||||
in_blockquote = true; // MW: $this->mInPre = true;
|
||||
else
|
||||
in_blockquote = false; // XO: turn off blockquote else following para / nl won't work; w:Snappy_(software); DATE:2014-04-25
|
||||
in_block = !block_is_end_xnde; // MW: $inBlockElem = !$closematch;
|
||||
}
|
||||
else if (!in_block && !in_blockquote) { // MW: elseif ( !$inBlockElem && !$this->mInPre ) {
|
||||
boolean line_is_ws = Line_is_ws(src, bgn_pos);
|
||||
if (prv_ws_bgn > 0 && (cur_mode == Mode_pre || !line_is_ws)) { // MW: if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) ) {
|
||||
if (cur_mode != Mode_pre) { // MW: if ( $this->mLastSection !== 'pre' ) {
|
||||
para_stack = Para_stack_none; // MW: $paragraphStack = false;
|
||||
prv_para.Space_bgn_(prv_ws_bgn - 1); // -1 to ignore 1st "\s" in "\n\s"; note that prv_ws_bgn only includes spaces, so BOS doesn't matter; DATE:2014-04-14
|
||||
Prv_para_end(); Prv_para_bgn(Xop_para_tkn.Tid_pre); // MW: $output .= $this->closeParagraph() . '<pre>';
|
||||
cur_mode = Mode_pre; // MW: $this->mLastSection = 'pre';
|
||||
}
|
||||
else { // already in pre
|
||||
if (line_is_ws) { // line is entirely ws
|
||||
int next_char_pos = prv_nl_pos + 2; // "\n\s".length
|
||||
if ( next_char_pos < src.length // bounds check
|
||||
&& src[next_char_pos] == Byte_ascii.NewLine // is "\n\s\n"; i.e.: "\n" only
|
||||
) {
|
||||
ctx.Subs_add(root, ctx.Tkn_mkr().Bry(bgn_pos, bgn_pos, Byte_ascii.NewLine_bry)); // add a "\n" tkn; note that adding a NewLine tkn doesn't work, b/c Xoh_html_wtr has code to remove consecutive \n; PAGE:en.w:Preferred_numbers DATE:2014-06-24
|
||||
prv_nl_pos = bgn_pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
prv_ws_bgn = 0; // MW: $t = substr( $t, 1 );
|
||||
}
|
||||
else {
|
||||
if (bgn_pos - prv_nl_pos == 1 || line_is_ws) { // line is blank ("b" for blank) MW: if ( trim( $t ) === '' ) {
|
||||
if (para_stack != Para_stack_none) { // "b1"; stack has "<p>" or "</p><p>"; output "<br/>"; MW: if ( $paragraphStack ) {
|
||||
Para_stack_end(cur_pos); Add_br(ctx, root, bgn_pos); // MW: $output .= $paragraphStack . '<br />';
|
||||
para_stack = Para_stack_none; // MW: $paragraphStack = false;
|
||||
cur_mode = Mode_para; // MW: $this->mLastSection = 'p';
|
||||
}
|
||||
else { // stack is empty
|
||||
if (cur_mode != Mode_para) { // "b2"; cur is '' or <pre> MW: if ( $this->mLastSection !== 'p' ) {
|
||||
Prv_para_end(); // MW: $output .= $this->closeParagraph();
|
||||
cur_mode = Mode_none; // MW: $this->mLastSection = '';
|
||||
para_stack = Para_stack_bgn; // put <p> on stack MW: $paragraphStack = '<p>';
|
||||
}
|
||||
else // "b3"; cur is p
|
||||
para_stack = Para_stack_mid; // put </p><p> on stack MW: $paragraphStack = '</p><p>';
|
||||
}
|
||||
}
|
||||
else { // line has text ("t" for text); NOTE: tkn already added before \n, so must change prv_para; EX: "a\n" -> this code is called for "\n" but "a" already processed
|
||||
if (para_stack != Para_stack_none) { // "t1" MW: if ( $paragraphStack ) {
|
||||
Para_stack_end(cur_pos); // MW: $output .= $paragraphStack;
|
||||
para_stack = Para_stack_none; // MW: $paragraphStack = false;
|
||||
cur_mode = Mode_para; // MW: $this->mLastSection = 'p';
|
||||
}
|
||||
else if (cur_mode != Mode_para) { // "t2"; cur is '' or <pre> MW: elseif ( $this->mLastSection !== 'p' ) {
|
||||
Prv_para_end(); Prv_para_bgn(Xop_para_tkn.Tid_para); // MW: $output .= $this->closeParagraph() . '<p>';
|
||||
cur_mode = Mode_para; // MW: $this->mLastSection = 'p';
|
||||
}
|
||||
else {} // "t3"
|
||||
}
|
||||
}
|
||||
}
|
||||
if (in_blockquote && prv_ws_bgn > 0) // handle blockquote separate; EX: <blockquote>\n\sa\n</blockquote>; note that "\s" needs to be added literally; MW doesn't have this logic specifically, since it assumes all characters go into $output, whereas XO, sets aside the "\s" in "\n\s" separately
|
||||
prv_para.Space_bgn_(prv_ws_bgn);
|
||||
prv_ws_bgn = 0; // nl encountered and processed; always prv_ws_bgn set to 0, else ws from one line will carry over to next
|
||||
// in_blockquote = false;
|
||||
block_is_bgn_xnde = block_is_end_xnde = false;
|
||||
// if ( $preCloseMatch && $this->mInPre )
|
||||
// $this->mInPre = false;
|
||||
// prv_ws_bgn = false;
|
||||
Prv_para_new(ctx, root, bgn_pos, cur_pos); // add a prv_para placeholder
|
||||
if (para_stack == Para_stack_none) // "x1" MW: if ( $paragraphStack === false ) {
|
||||
if (prv_para != null) prv_para.Nl_bgn_y_(); // add nl; note that "$t" has already been processed; MW: $output .= $t . "\n";
|
||||
}
|
||||
public int Process_pre(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int txt_pos) {
|
||||
Dd_clear(ctx);
|
||||
Object o = ctx.App().Utl_trie_tblw_ws().MatchAtCur(src, txt_pos, src_len);
|
||||
if (o != null) { // tblw_ws found
|
||||
Xop_tblw_ws_itm ws_itm = (Xop_tblw_ws_itm)o;
|
||||
byte tblw_type = ws_itm.Tblw_type();
|
||||
switch (tblw_type) {
|
||||
case Xop_tblw_ws_itm.Type_nl: // \n\s
|
||||
if (cur_mode == Mode_pre) { // already in pre; just process "\n\s"
|
||||
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos, Xop_nl_tkn.Tid_char, 1));
|
||||
prv_nl_pos = bgn_pos; // NOTE: must update prv_nl_pos; PAGE:en.w:Preferred_number DATE:2014-06-24
|
||||
return txt_pos;
|
||||
}
|
||||
break;
|
||||
case Xop_tblw_ws_itm.Type_xnde:
|
||||
if (bgn_pos != Xop_parser_.Doc_bgn_bos)
|
||||
ctx.Para().Process_nl(ctx, root, src, bgn_pos, cur_pos);
|
||||
return ctx.Xnde().Make_tkn(ctx, tkn_mkr, root, src, src_len, txt_pos, txt_pos + 1);
|
||||
default: {
|
||||
int tblw_rv = ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, txt_pos + ws_itm.Hook_len(), false, tblw_type, Xop_tblw_wkr.Called_from_pre, -1, -1);
|
||||
if (tblw_rv != -1) // \n\s| is valid tblw tkn and processed; otherwise process pre-code below; EX:w:Wikipedia:WikiProject_History/CategoryExample; DATE:2014-04-14
|
||||
return tblw_rv;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// NOTE: pre lxr emulates MW for "\n\s" by (1) calling Process nl for "\n"; (2) anticipating next line by setting prv_ws_bgn
|
||||
// EX: "\na\n b\n"; note that "\n " is cur
|
||||
if (bgn_pos != Xop_parser_.Doc_bgn_bos) // if bos, then don't close 1st para
|
||||
Process_nl(ctx, root, src, bgn_pos, bgn_pos + 1); // note that tkn is \n\s; so, bgn_pos -> bgn_pos + 1 is \n ...
|
||||
if (cur_mode == Mode_pre) // in pre_mode
|
||||
ctx.Subs_add(root, tkn_mkr.Space(root, cur_pos, txt_pos)); // cur_pos to start after \s; do not capture "\s" in "\n\s"; (not sure why not before \s)
|
||||
prv_ws_bgn = txt_pos - cur_pos + 1;
|
||||
return txt_pos;
|
||||
}
|
||||
public void Process_lnki_category(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int pos, int src_len) { // REF.MW:Parser.php|replaceInternalLinks2|Strip the whitespace Category links produce;
|
||||
if (!para_enabled) return;
|
||||
int subs_len = root.Subs_len();
|
||||
for (int i = subs_len - 2; i > -1; i--) { // -2: -1 b/c subs_len is invalid; -1 to skip current lnki
|
||||
Xop_tkn_itm sub_tkn = root.Subs_get(i);
|
||||
switch (sub_tkn.Tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_para: // nl found; note this means that BOL -> [[Category:]] is all ws;
|
||||
if (prv_ws_bgn > 0) { // line begins with ws a
|
||||
if (sub_tkn.Src_bgn() != 0) // do not ignore BOS para; needed b/c it is often <p>; needed for test;
|
||||
sub_tkn.Ignore_y_(); // ignore nl (pretty-printing only)
|
||||
prv_ws_bgn = 0; // remove ws
|
||||
if (ctx.Stack_has(Xop_tkn_itm_.Tid_list)){ // HACK: if in list, set prv_nl_pos to EOL; only here for one test to pass
|
||||
int nl_at_eol = -1;
|
||||
for (int j = pos; j < src_len; j++) { // check if rest of line is ws
|
||||
byte b = src[j];
|
||||
switch (b) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Tab: break; // ignore space / tab
|
||||
case Byte_ascii.NewLine:
|
||||
nl_at_eol = j;
|
||||
j = src_len;
|
||||
break;
|
||||
default: // something else besides ws; stop
|
||||
j = src_len;
|
||||
break;
|
||||
}
|
||||
if (nl_at_eol != -1)
|
||||
prv_nl_pos = nl_at_eol + 1; // SEE:NOTE_2
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
default: // exit if anything except para / nl in front of [[Category:]]
|
||||
i = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// if (para_found) // BOS exit; just remove prv_ws_bgn
|
||||
prv_ws_bgn = 0;
|
||||
}
|
||||
private void Prv_para_new(Xop_ctx ctx, Xop_root_tkn root, int prv_nl_pos, int para_pos) {
|
||||
this.prv_nl_pos = prv_nl_pos;
|
||||
prv_para = ctx.Tkn_mkr().Para(para_pos);
|
||||
ctx.Subs_add(root, prv_para);
|
||||
}
|
||||
private void Prv_para_end() { // MW: closeParagraph();
|
||||
// following switch is equivalent to:
|
||||
// MW: if ( $this->mLastSection != '' )
|
||||
// MW: $result = '</' . $this->mLastSection . ">\n";
|
||||
switch (cur_mode) {
|
||||
case Mode_none: return;
|
||||
case Mode_pre: prv_para.Para_end_(Xop_para_tkn.Tid_pre); break;
|
||||
case Mode_para: prv_para.Para_end_(Xop_para_tkn.Tid_para); break;
|
||||
}
|
||||
// in_pre = false; // MW: $this->mInPre = false;
|
||||
cur_mode = Mode_none; // MW: $this->mLastSection = '';
|
||||
}
|
||||
private void Prv_para_bgn(byte mode) {
|
||||
if (prv_para != null) prv_para.Para_bgn_(mode);
|
||||
}
|
||||
private void Para_stack_end(int cur_pos) { // MW: $output .= $paragraphStack;
|
||||
switch (para_stack) {
|
||||
case Para_stack_none: break;
|
||||
case Para_stack_bgn: prv_para.Para_end_(Xop_para_tkn.Tid_none).Para_bgn_(Xop_para_tkn.Tid_para); break; // '<p>'
|
||||
case Para_stack_mid: prv_para.Para_end_(Xop_para_tkn.Tid_para).Para_bgn_(Xop_para_tkn.Tid_para); break; // '</p><p>'
|
||||
}
|
||||
}
|
||||
private void Add_br(Xop_ctx ctx, Xop_root_tkn root, int bgn_pos) {
|
||||
ctx.Subs_add(root, ctx.Tkn_mkr().Xnde(bgn_pos, bgn_pos).Tag_(Xop_xnde_tag_.Tag_br));
|
||||
}
|
||||
private boolean Line_is_ws(byte[] src, int pos) {
|
||||
if (prv_nl_pos == -1) return false;
|
||||
boolean ws = true;
|
||||
for (int i = prv_nl_pos + 1; i < pos; i++) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Tab:
|
||||
case Byte_ascii.Space:
|
||||
break;
|
||||
default:
|
||||
ws = false;
|
||||
i = pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ws;
|
||||
}
|
||||
private void Dd_clear(Xop_ctx ctx) {ctx.List().Dd_chk_(false);}
|
||||
private static final int
|
||||
Para_stack_none = 0 // false
|
||||
, Para_stack_bgn = 1 // <p>
|
||||
, Para_stack_mid = 2 // </p><p>
|
||||
;
|
||||
private static final byte
|
||||
Mode_none = 0 // ''
|
||||
, Mode_para = 1 // p
|
||||
, Mode_pre = 2 // pre
|
||||
;
|
||||
}
|
||||
/*
|
||||
NOTE_1:
|
||||
xowa uses \n as the leading character for multi-character hooks; EX: "\n*","\n{|","\n==",etc..
|
||||
For this section of code, xowa treats \n separately from the rest of the hook for the purpose of emulating MW code.
|
||||
EX: a\n==b==
|
||||
MW:
|
||||
- split into two lines: "a", "==b=="
|
||||
- call process_nl on "a"
|
||||
- call process_nl on "==b=="
|
||||
XO:
|
||||
- split into "tkns": "a", "\n==", "b", "=="
|
||||
- add "a"
|
||||
- add "\n=="
|
||||
- since there is a "\n", call process_nl, which will effectively call it for "a"
|
||||
- note that page_end will effectively call process_nl on "==b=="
|
||||
|
||||
NOTE_2: Category needs to "trim" previous line
|
||||
EX:
|
||||
* a
|
||||
* b
|
||||
[[Category:c]]
|
||||
* d
|
||||
|
||||
MW does the following: (REF.MW:Parser.php|replaceInternalLinks2|Strip the whitespace Category links produce;)
|
||||
- removes the \n after b (REF: $s = rtrim( $s . "\n" ); # bug 87)
|
||||
- trims all space " " in front of [[ (NOTE: this makes it a non-pre line)
|
||||
- plucks out the [[Category:c]]
|
||||
- joins everything after ]] (starting with the \n) to the * b (REF: $s .= trim( $prefix . $trail, "\n" ) == '' ? '': $prefix . $trail;)
|
||||
This effectively "blanks" out the entire line "\n [[Category:c]]" -> ""
|
||||
|
||||
XOWA tries to emulate this by doing the following
|
||||
- mark the para_tkn after \b as blank
|
||||
- disable pre for the line
|
||||
- keep the [[Category:c]], but *simulate* a blank line by moving the prv_nl_pos to after the ]]
|
||||
|
||||
NOTE_3: if (last_section_is_pre)
|
||||
PURPOSE: if Category trims previous nl, but nl was part of pre, deactivate it
|
||||
REASON: occurs b/c MW does separate passes for pre and Category while XO does one pass.
|
||||
EX: "a\n [[Category:c]]"
|
||||
- pre is activated by \n\s
|
||||
- [[Category:c]] indicates that \n\s should be trimmed
|
||||
so, disable_pre, etc.
|
||||
|
||||
*/
|
||||
1074
400_xowa/src_460_para/gplx/xowa/Xop_para_wkr_basic_tst.java
Normal file
1074
400_xowa/src_460_para/gplx/xowa/Xop_para_wkr_basic_tst.java
Normal file
File diff suppressed because it is too large
Load Diff
109
400_xowa/src_460_para/gplx/xowa/Xop_para_wkr_para_tst.java
Normal file
109
400_xowa/src_460_para/gplx/xowa/Xop_para_wkr_para_tst.java
Normal file
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_para_wkr_para_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void teardown() {fxt.Init_para_n_();}
|
||||
@Test public void Pre_then_xnde_pre() { // PURPOSE: if ws_pre is in effect, xnde_pre should end it; EX: b:Knowing Knoppix/Other applications
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( " a"
|
||||
, "b<pre>c"
|
||||
, "d</pre>"
|
||||
, "e"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<pre>a"
|
||||
, "</pre>"
|
||||
, "b<pre>c"
|
||||
, "d</pre>"
|
||||
, ""
|
||||
, "<p>e"
|
||||
, "</p>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void List_ignore_pre_lines() { // PURPOSE: "\s\n" should create new list; was continuing previous list; DATE:2013-07-12
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( ": a"
|
||||
, ":* b"
|
||||
, " "
|
||||
, ": c"
|
||||
, ":* d"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<dl>"
|
||||
, " <dd> a"
|
||||
, ""
|
||||
, " <ul>"
|
||||
, " <li> b"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
, ""
|
||||
, "<dl>"
|
||||
, " <dd> c"
|
||||
, ""
|
||||
, " <ul>"
|
||||
, " <li> d"
|
||||
, " </li>"
|
||||
, " </ul>"
|
||||
, " </dd>"
|
||||
, "</dl>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Multiple_nl_in_tblx() { // PURPOSE: "\n\n\n" was causing multiple breaks; EX:fr.w:Portail:G<>nie m<>canique; DATE:2014-02-17
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( "<table><tr><td>a"
|
||||
, "</td>"
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, ""
|
||||
, "</tr></table>"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Ignore_cr() { // PURPOSE: handle "\r\n"; EX: Special:MovePage; DATE:2014-03-02
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( "a\r"
|
||||
, "\r"
|
||||
, "b\r"
|
||||
)
|
||||
, String_.Concat_lines_nl_skip_last
|
||||
( "<p>a"
|
||||
, "</p>"
|
||||
, ""
|
||||
, "<p>b"
|
||||
, "</p>"
|
||||
, ""
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
247
400_xowa/src_460_para/gplx/xowa/Xop_para_wkr_pre_tst.java
Normal file
247
400_xowa/src_460_para/gplx/xowa/Xop_para_wkr_pre_tst.java
Normal file
@@ -0,0 +1,247 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Xop_para_wkr_pre_tst {
|
||||
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void teardown() {fxt.Init_para_n_();}
|
||||
@Test public void Pre_ignore_bos() { // PURPOSE: ignore pre at bgn; DATE:2013-07-09
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( " "
|
||||
, "b"
|
||||
), String_.Concat_lines_nl
|
||||
( "<p>"
|
||||
, "b"
|
||||
, "</p>"
|
||||
));
|
||||
}
|
||||
@Test public void Pre_ignore_bos_tblw() { // PURPOSE: ignore pre at bgn shouldn't break tblw; EX:commons.wikimedia.org; DATE:2013-07-11
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
|
||||
( " "
|
||||
, "{|"
|
||||
, "|-"
|
||||
, "|a"
|
||||
, "|}"
|
||||
), String_.Concat_lines_nl
|
||||
( "<table>"
|
||||
, " <tr>"
|
||||
, " <td>a"
|
||||
, " </td>"
|
||||
, " </tr>"
|
||||
, "</table>"
|
||||
));
|
||||
}
|
||||
@Test public void Ignore_bos_xnde() { // PURPOSE: space at bgn shouldn't create pre; EX:commons.wikimedia.org; " <center>a\n</center>"; DATE:2013-11-28
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( " <center>a" // NOTE: leading " " matches MW; DATE:2014-06-23
|
||||
, "</center>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( " <center>a"
|
||||
, "</center>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Ignore_pre_in_gallery() {// PURPOSE: pre in gallery should be ignored; EX:uk.w:EP2; DATE:2014-03-11
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.Y_byte;
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, ""
|
||||
, " <gallery>"
|
||||
, " File:A.png"
|
||||
, " </gallery>"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<p>a"
|
||||
, "</p>"
|
||||
, " <ul id=\"xowa_gallery_ul_0\" class=\"gallery mw-gallery-traditional\">"
|
||||
, " <li id=\"xowa_gallery_li_1\" class=\"gallerybox\" style=\"width: 155px\">"
|
||||
, " <div style=\"width: 155px\">"
|
||||
, " <div class=\"thumb\" style=\"width: 150px;\">"
|
||||
, " <div style=\"margin:15px auto;\">"
|
||||
, " <a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_1\" alt=\"A.png\" src=\"file:///mem/wiki/repo/trg/thumb/7/0/A.png/120px.png\" width=\"120\" height=\"120\" /></a>"
|
||||
, " </div>"
|
||||
, " </div>"
|
||||
, " <div class=\"gallerytext\">"
|
||||
, " </div>"
|
||||
, " </div>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
,""
|
||||
));
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.N_byte;
|
||||
}
|
||||
@Test public void Pre_xnde_gallery() { // PURPOSE: <gallery> should invalidate pre; EX: en.w:Mary, Queen of Scots
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.Y_byte;
|
||||
fxt.Wiki().Xtn_mgr().Init_by_wiki(fxt.Wiki());
|
||||
String raw = String_.Concat_lines_nl_skip_last
|
||||
( " <gallery>"
|
||||
, "File:A.png|b"
|
||||
, "</gallery>"
|
||||
);
|
||||
fxt.Test_parse_page_wiki_str(raw, String_.Concat_lines_nl_skip_last
|
||||
( " <ul id=\"xowa_gallery_ul_0\" class=\"gallery mw-gallery-traditional\">" // NOTE: leading " " matches MW; DATE:2014-06-23
|
||||
, " <li id=\"xowa_gallery_li_1\" class=\"gallerybox\" style=\"width: 155px\">"
|
||||
, " <div style=\"width: 155px\">"
|
||||
, " <div class=\"thumb\" style=\"width: 150px;\">"
|
||||
, " <div style=\"margin:15px auto;\">"
|
||||
, " <a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_1\" alt=\"\" src=\"file:///mem/wiki/repo/trg/thumb/7/0/A.png/120px.png\" width=\"120\" height=\"120\" /></a>"
|
||||
, " </div>"
|
||||
, " </div>"
|
||||
, " <div class=\"gallerytext\"><p>b"
|
||||
, "</p>"
|
||||
, ""
|
||||
, " </div>"
|
||||
, " </div>"
|
||||
, " </li>"
|
||||
, "</ul>"
|
||||
));
|
||||
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.N_byte;
|
||||
}
|
||||
@Test public void Ignore_pre_in_center() {// PURPOSE: pre in gallery should be ignored; EX:uk.w:EP2; DATE:2014-03-11
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, " <center>b"
|
||||
, " </center>"
|
||||
, "d"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<p>a"
|
||||
, "</p>"
|
||||
, " <center>b"
|
||||
, " </center>"
|
||||
, ""
|
||||
, "<p>d"
|
||||
, "</p>"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Remove_only_1st_space() { // PURPOSE: pre should only remove 1st space]; EX: w:Wikipedia:WikiProject_History/CategoryExample; DATE:2014-04-14
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( " a"
|
||||
, " b"
|
||||
, " c"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<pre> a"
|
||||
, " b"
|
||||
, " c"
|
||||
, "</pre>"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Remove_only_1st_space__bos() { // PURPOSE: similar to above but check that pre at \n\s is indented correctly; DATE:2014-04-14
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( ""
|
||||
, " a"
|
||||
, " b"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( ""
|
||||
, "<pre> a"
|
||||
, " b"
|
||||
, "</pre>"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Ignore_tblw_td() {// PURPOSE: \n\s| should continue pre; EX:w:Wikipedia:WikiProject_History/CategoryExample; DATE:2014-04-14
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
|
||||
( " a"
|
||||
, " |"
|
||||
, " b"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<pre>a"
|
||||
, "|"
|
||||
, "b"
|
||||
, "</pre>"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Tab() { // PURPOSE: tab inside pre was being converted to space; PAGE:en.w:Cascading_Style_Sheets DATE:2014-06-23
|
||||
fxt.Test_html_full_str
|
||||
( " \ta"
|
||||
, String_.Concat_lines_nl
|
||||
( "<pre>\ta"
|
||||
, "</pre>"
|
||||
));
|
||||
}
|
||||
@Test public void Style() { // PURPOSE: " <style>" was not being put in pre; PAGE:en.w:Cascading_Style_Sheets DATE:2014-06-23
|
||||
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl
|
||||
( " <style>"
|
||||
, " </style>"
|
||||
), String_.Concat_lines_nl
|
||||
( "<pre><style>"
|
||||
, "</style>"
|
||||
, "</pre>"
|
||||
));
|
||||
}
|
||||
@Test public void Nl_only() { // PURPOSE: wiki_pre with \n only was being dropped; PAGE:en.w:Preferred_number DATE:2014-06-24
|
||||
fxt.Test_html_full_str(String_.Concat_lines_nl_skip_last
|
||||
( " a"
|
||||
, " " // was being dropped
|
||||
, " b"
|
||||
), String_.Concat_lines_nl
|
||||
( "<pre>a"
|
||||
, "" // make sure it's still there
|
||||
, "b"
|
||||
, "</pre>"
|
||||
));
|
||||
}
|
||||
@Test public void Nl_w_ws() { // PURPOSE: based on Nl_only; make sure that 1 or more spaces does not add extra \n; PAGE:en.w:Preferred_number DATE:2014-06-24
|
||||
fxt.Test_html_full_str(String_.Concat_lines_nl_skip_last
|
||||
( " a"
|
||||
, " " // 2 spaces
|
||||
, " b"
|
||||
), String_.Concat_lines_nl
|
||||
( "<pre>a"
|
||||
, " " // 1 space
|
||||
, "b"
|
||||
, "</pre>"
|
||||
));
|
||||
}
|
||||
@Test public void Nl_many() { // PURPOSE: handle alternating \n\s; PAGE:en.w:Preferred_number DATE:2014-06-24
|
||||
fxt.Test_html_full_str(String_.Concat_lines_nl_skip_last
|
||||
( " a"
|
||||
, " "
|
||||
, " b"
|
||||
, " "
|
||||
, " c"
|
||||
), String_.Concat_lines_nl
|
||||
( "<pre>a"
|
||||
, ""
|
||||
, "b"
|
||||
, ""
|
||||
, "c"
|
||||
, "</pre>"
|
||||
));
|
||||
}
|
||||
@Test public void Source() { // PURPOSE: " <source>" in pre has issues; PAGE:en.w:Comment_(computer_programming) DATE:2014-06-23
|
||||
fxt.Init_para_y_();
|
||||
fxt.Test_html_wiki_str(String_.Concat_lines_nl
|
||||
( " "
|
||||
, " <source>"
|
||||
, " a"
|
||||
, " </source>"
|
||||
, " "
|
||||
), String_.Concat_lines_nl
|
||||
( "<p>" // this is wrong, but will be stripped by tidy
|
||||
, "</p>"
|
||||
, " <pre>"
|
||||
, " a"
|
||||
, "</pre>"
|
||||
, ""
|
||||
, "<p><br/>" // also wrong, but leave for now
|
||||
, "</p>"
|
||||
));
|
||||
}
|
||||
}
|
||||
103
400_xowa/src_460_para/gplx/xowa/Xop_pre_lxr.java
Normal file
103
400_xowa/src_460_para/gplx/xowa/Xop_pre_lxr.java
Normal file
@@ -0,0 +1,103 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
class Xop_pre_lxr implements Xop_lxr {
|
||||
public byte Lxr_tid() {return Xop_lxr_.Tid_pre;}
|
||||
public void Init_by_wiki(Xow_wiki wiki, ByteTrieMgr_fast core_trie) {core_trie.Add(Hook_space, this);} // NOTE: do not treat \n\t as shorthand pre; EX:pl.w:Main_Page; DATE:2014-05-06
|
||||
public void Init_by_lang(Xol_lang lang, ByteTrieMgr_fast core_trie) {}
|
||||
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
|
||||
if (!ctx.Para().Enabled()) { // para disabled; "\n\s" should just be "\n\s"; NOTE: para disabled in <gallery>
|
||||
if (bgn_pos != Xop_parser_.Doc_bgn_bos) // don't add \n if BOS; EX: "<BOS> a" should be " ", not "\n "
|
||||
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos + 1, Xop_nl_tkn.Tid_char, 1));
|
||||
ctx.Subs_add(root, tkn_mkr.Space(root, cur_pos - 1, cur_pos));
|
||||
return cur_pos;
|
||||
}
|
||||
int txt_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Space); // NOTE: was Find_fwd_while_tab_or_space, which incorrectly converted tabs to spaces; PAGE:en.w:Cascading_Style_Sheets; DATE:2014-06-23
|
||||
if (txt_pos == src_len) return cur_pos; // "\n\s" at EOS; treat as \n only; EX: "a\n " -> ""; also bounds check
|
||||
byte b = src[txt_pos];
|
||||
if (bgn_pos == Xop_parser_.Doc_bgn_bos) { // BOS; gobble up all \s\t; EX: "BOS\s\s\sa" -> "BOSa"
|
||||
if (b == Byte_ascii.NewLine) { // next char is nl
|
||||
cur_pos = txt_pos; // position at nl; NOTE: do not position after nl, else may break hdr, tblw, list, etc; EX: "\s\n{|" needs to preserve "\n" for tblw
|
||||
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, cur_pos, Xop_ignore_tkn.Ignore_tid_pre_at_bos));
|
||||
return cur_pos; // ignore pre if blank line at bos; EX: "BOS\s\s\n" -> "BOS\n"
|
||||
}
|
||||
if (b == Byte_ascii.Lt) // next char is <; possible xnde; flag so that xnde can escape; DATE:2013-11-28; moved outside Doc_bgn_bos block above; PAGE:en.w:Comment_(computer_programming); DATE:2014-06-23
|
||||
ctx.Xnde().Pre_at_bos_(true);
|
||||
}
|
||||
switch (ctx.Cur_tkn_tid()) { // close tblw attrs; NOTE: after BOS (since no tblw at BOS) but before "\n !" check
|
||||
case Xop_tkn_itm_.Tid_tblw_tb: case Xop_tkn_itm_.Tid_tblw_tr: case Xop_tkn_itm_.Tid_tblw_th:
|
||||
Xop_tblw_wkr.Atrs_close(ctx, src, root);
|
||||
break;
|
||||
case Xop_tkn_itm_.Tid_list:
|
||||
if (Close_list(ctx, root, src, src_len, bgn_pos, cur_pos, txt_pos)) {
|
||||
// ctx.Para().Process_nl(ctx, root, src, bgn_pos, new_pos, true); // add blank line for truncated "\n\s"; DATE:2013-07-12; DELETE: DATE:2014-02-18; doesn't seem necessary; doesn't break tests; devised for www.mediawiki.org/wiki/MediaWiki which loads fine
|
||||
return txt_pos; // must exit early; do not process pre
|
||||
}
|
||||
break;
|
||||
}
|
||||
switch (b) { // handle "\n !" which can be tbl
|
||||
case Byte_ascii.Bang:
|
||||
switch (ctx.Cur_tkn_tid()) {
|
||||
case Xop_tkn_itm_.Tid_tblw_tb:
|
||||
case Xop_tkn_itm_.Tid_tblw_tc:
|
||||
case Xop_tkn_itm_.Tid_tblw_tr:
|
||||
case Xop_tkn_itm_.Tid_tblw_th:
|
||||
case Xop_tkn_itm_.Tid_tblw_td:
|
||||
case Xop_tkn_itm_.Tid_tblw_te:
|
||||
int new_cur_pos = txt_pos + 1; // +1 to skip Byte_ascii.Bang
|
||||
Xop_tblw_lxr_ws.Make(ctx, tkn_mkr, root, src, src_len, bgn_pos, new_cur_pos, Xop_tblw_wkr.Tblw_type_th, true);
|
||||
return new_cur_pos;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return ctx.Para().Process_pre(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, txt_pos);
|
||||
}
|
||||
private static boolean Close_list(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int txt_pos) {// SEE:NOTE_4; EX.en.w:SHA-2
|
||||
if (Bry_finder.Find_fwd(src, Xop_tkn_.Lnki_bgn, txt_pos, src_len) == txt_pos) { // look for "[["
|
||||
txt_pos += Xop_tkn_.Lnki_bgn.length;
|
||||
if (Bry_finder.Find_fwd(src, ctx.Wiki().Ns_mgr().Ns_category().Name_db_w_colon(), txt_pos, src_len) == txt_pos) // look for "Category:"
|
||||
return false; // "[[Category:" found; "\n\s[[Category:" should not close list; note that [[Category]] is invisible
|
||||
}
|
||||
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_list), true, bgn_pos, cur_pos); // "* a\n\sb" found; close *a
|
||||
if ( txt_pos < src_len // bounds check
|
||||
&& src[txt_pos] == Byte_ascii.NewLine) { // NOTE: handle "*a\n\s\n" between lists; DATE:2013-07-12
|
||||
Xop_list_wkr_.Close_list_if_present(ctx, root, src, bgn_pos, cur_pos); // NOTE: above line only closes one list; should probably change to close all lists, but for now, close all lists only if "\n\s", not "\n"; DATE:2013-07-12
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
public static final Xop_pre_lxr _ = new Xop_pre_lxr(); Xop_pre_lxr() {}
|
||||
private static final byte[]
|
||||
Hook_space = new byte[] {Byte_ascii.NewLine, Byte_ascii.Space}
|
||||
;
|
||||
}
|
||||
/*
|
||||
NOTE_4: Close_list
|
||||
PURPOSE: \n should ordinarily close list. However, if \n[[Category:A]], then don't close list since [[Category:A]] will trim preceding \n
|
||||
REASON: occurs b/c MW does separate passes for list and Category while XO does one pass.
|
||||
|
||||
EX: closes *a list
|
||||
*a
|
||||
|
||||
*b
|
||||
|
||||
EX: does not close
|
||||
*a
|
||||
[[Category:A]]
|
||||
*b
|
||||
*/
|
||||
27
400_xowa/src_460_para/gplx/xowa/Xop_pre_tkn.java
Normal file
27
400_xowa/src_460_para/gplx/xowa/Xop_pre_tkn.java
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa; import gplx.*;
|
||||
public class Xop_pre_tkn extends Xop_tkn_itm_base {
|
||||
public Xop_pre_tkn(int bgn, int end, byte pre_tid, Xop_tkn_itm pre_bgn_tkn) {
|
||||
this.Tkn_ini_pos(false, bgn, end);
|
||||
this.pre_tid = pre_tid;
|
||||
}
|
||||
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_pre;}
|
||||
public byte Pre_tid() {return pre_tid;} private byte pre_tid = Pre_tid_null;
|
||||
public static final byte Pre_tid_null = 0, Pre_tid_bgn = 1, Pre_tid_end = 2;
|
||||
}
|
||||
Reference in New Issue
Block a user