1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2014-06-30 00:04:32 -04:00
parent 85594d3cdd
commit bae88e739c
2482 changed files with 198730 additions and 0 deletions

View File

@@ -0,0 +1,114 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
class Xop_nl_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_nl;}
public void Init_by_wiki(Xow_wiki wiki, ByteTrieMgr_fast core_trie) {core_trie.Add(Byte_ascii.NewLine, this);}
public void Init_by_lang(Xol_lang lang, ByteTrieMgr_fast core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
if (bgn_pos == Xop_parser_.Doc_bgn_bos) return ctx.Lxr_make_txt_(cur_pos); // simulated nl at beginning of every parse
int trim_category_pos = Scan_fwd_for_ctg(ctx, src, cur_pos, src_len);
if (trim_category_pos != Bry_.NotFound) { // [[Category]] found after ws
int root_subs_len = root.Subs_len();
if (root_subs_len > 0) {
Xop_tkn_itm tkn = root.Subs_get(root_subs_len - 1);
if (tkn.Tkn_tid() == Xop_tkn_itm_.Tid_eq) {
Xop_eq_tkn eq_tkn = (Xop_eq_tkn)tkn;
if (eq_tkn.Eq_len() > 1) {
Xop_nl_tkn nl_tkn = tkn_mkr.NewLine(bgn_pos, cur_pos, Xop_nl_tkn.Tid_char, 1);
ctx.Subs_add(root, nl_tkn);
}
}
}
return trim_category_pos;
}
Xop_tkn_itm last_tkn = ctx.Stack_get_last(); // BLOCK:invalid_ttl_check
if ( !ctx.Tid_is_image_map()
&& last_tkn != null
&& last_tkn.Tkn_tid() == Xop_tkn_itm_.Tid_lnki) {
Xop_lnki_tkn lnki = (Xop_lnki_tkn)last_tkn;
if ( lnki.Pipe_count_is_zero()) { // always invalid
ctx.Stack_pop_last();
return Xop_lnki_wkr_.Invalidate_lnki(ctx, src, root, lnki, bgn_pos);
}
}
ctx.Apos().EndFrame(ctx, root, src, bgn_pos, true); // NOTE: frame should at end at bgn_pos (before \n) not after; else, will create tkn at (5,5), while tkn_mkr.Space creates one at (4,5); DATE:2013-10-31
ctx.Tblw().Cell_pipe_seen_(false); // flip off "|" in tblw seq; EX: "| a\n||" needs to flip off "|" else "||" will be seen as style dlm"; NOTE: not covered by test?
Xop_para_wkr para_wkr = ctx.Para();
switch (ctx.Cur_tkn_tid()) {
case Xop_tkn_itm_.Tid_hdr: // last tkn was hdr; close it; EX: \n==a==\nb; "\n" should close 2nd "=="; DATE:2014-02-17
int acs_pos = ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_hdr);
ctx.Stack_pop_til(root, src, acs_pos, true, bgn_pos, cur_pos);
para_wkr.Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_h2);
break;
case Xop_tkn_itm_.Tid_list: // close list
Xop_list_wkr_.Close_list_if_present(ctx, root, src, bgn_pos, cur_pos);
para_wkr.Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_li);
break;
case Xop_tkn_itm_.Tid_lnke: // close lnke
if (ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_tmpl_invk) == -1) // only close if no tmpl; MWR: [[SHA-2]]; * {{cite journal|title=Proposed
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_lnke), true, bgn_pos, cur_pos);
break;
case Xop_tkn_itm_.Tid_lnki: // NOTE: \n in caption or other multipart lnki; don't call para_wkr.Process
Xop_tkn_itm nl_tkn = tkn_mkr.Space(root, bgn_pos, cur_pos); // convert \n to \s. may result in multiple \s, but rely on htmlViewer to suppress; EX: w:Schwarzschild_radius; and the stellar [[Velocity dispersion|velocity\ndispersion]];
ctx.Subs_add(root, nl_tkn);
return cur_pos;
// case Xop_tkn_itm_.Tid_tblw_tc: case Xop_tkn_itm_.Tid_tblw_td: // STUB: tc/td should not have attributes
case Xop_tkn_itm_.Tid_tblw_tb: case Xop_tkn_itm_.Tid_tblw_tr: case Xop_tkn_itm_.Tid_tblw_th: // nl should close previous tblw's atrs range; EX {{Infobox planet}} and |-\n<tr>
Xop_tblw_wkr.Atrs_close(ctx, src, root);
break;
}
if ( ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki // parse_mode is wiki
&& para_wkr.Enabled() // check that para is enabled
)
para_wkr.Process_nl(ctx, root, src, bgn_pos, cur_pos);
else { // parse mode is tmpl, or para is disabled; for latter, adding \n for pretty-print
Xop_nl_tkn nl_tkn = tkn_mkr.NewLine(bgn_pos, cur_pos, Xop_nl_tkn.Tid_char, 1);
ctx.Subs_add(root, nl_tkn);
}
return cur_pos;
}
public static int Scan_fwd_for_ctg(Xop_ctx ctx, byte[] src, int cur_pos, int src_len) {
for (int i = cur_pos; i < src_len; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.NewLine: case Byte_ascii.CarriageReturn: // ignore ws
break;
case Byte_ascii.Brack_bgn: // [
if ( Bry_.Eq_itm(src, src_len, i + 1, Byte_ascii.Brack_bgn) // [[
&& i + 2 < src_len) {
int ttl_bgn = Bry_finder.Find_fwd_while(src, i + 2, src_len, Byte_ascii.Space);
ByteTrieMgr_slim ctg_trie = ctx.Wiki().Ns_mgr().Category_trie();
Object ctg_ns = ctg_trie.MatchAtCur(src, ttl_bgn, src_len);
if (ctg_ns != null // "[[Category" found
&& Bry_.Eq_itm(src, src_len, ctg_trie.Match_pos(), Byte_ascii.Colon)) { // check that next char is :
return i;// return pos of 1st [
}
return Bry_.NotFound;
}
break;
default: // non-ws; return not found
return Bry_.NotFound;
}
}
return Bry_.NotFound;
}
public static final Xop_nl_lxr _ = new Xop_nl_lxr(); Xop_nl_lxr() {}
}

View File

@@ -0,0 +1,50 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
class Xop_nl_tab_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_nl_tab;}
public void Init_by_wiki(Xow_wiki wiki, ByteTrieMgr_fast core_trie) {core_trie.Add(Hook_nl_tab, this);} private static final byte[] Hook_nl_tab = new byte[] {Byte_ascii.NewLine, Byte_ascii.Tab};
public void Init_by_lang(Xol_lang lang, ByteTrieMgr_fast core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
int non_ws_pos = Bry_finder.Find_fwd_while_space_or_tab(src, cur_pos, src_len);
if (non_ws_pos < src_len) { // bounds check
ByteTrieMgr_slim tblw_trie = ctx.App().Utl_trie_tblw_ws();
Object tblw_obj = tblw_trie.MatchAtCur(src, non_ws_pos, src_len);
if (tblw_obj != null) {
Xop_tblw_ws_itm tblw_itm = (Xop_tblw_ws_itm)tblw_obj;
byte itm_type = tblw_itm.Tblw_type();
switch (itm_type) {
case Xop_tblw_ws_itm.Type_nl: // ignore nl
case Xop_tblw_ws_itm.Type_xnde: // ignore xnde
break;
default: { // handle tblw
int tblw_rv = ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, non_ws_pos + tblw_itm.Hook_len(), false, itm_type, Xop_tblw_wkr.Called_from_pre, -1, -1);
if (tblw_rv != -1) // \n\s| is valid tblw tkn and processed; otherwise fall through;
return tblw_rv;
break;
}
}
}
}
if (bgn_pos != Xop_parser_.Doc_bgn_bos) // don't add \n if BOS; EX: "<BOS> a" should be " ", not "\n "
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos + 1, Xop_nl_tkn.Tid_char, 1));
ctx.Subs_add(root, tkn_mkr.Tab(cur_pos - 1, cur_pos));
return cur_pos;
}
public static final Xop_nl_tab_lxr _ = new Xop_nl_tab_lxr(); Xop_nl_tab_lxr() {}
}

View File

@@ -0,0 +1,65 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_nl_tab_lxr_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void teardown() {fxt.Init_para_n_();}
@Test public void Basic() { // PURPOSE: \n\t|- should be recognized as tblw; EX:zh.v:西安; DATE:2014-05-06
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl
( "{|"
, "\t|-"
, "|a"
, "|}"
), String_.Concat_lines_nl
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void Ws() { // PURPOSE: \n\t|- should be recognized as tblw; EX:zh.v:西安; DATE:2014-05-06
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl
( "{|"
, "\t |-" // \t
, "|a"
, "|}"
), String_.Concat_lines_nl
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void Ignore() {// PURPOSE: \n\t should not be pre; EX:pl.w:Main_Page; DATE:2014-05-06
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "a"
, "\t b"
, "c"
), String_.Concat_lines_nl_skip_last
( "<p>a"
, "\t b"
, "c"
, "</p>"
));
}
}

View File

@@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_nl_tkn extends Xop_tkn_itm_base {
public Xop_nl_tkn(int bgn, int end, byte nl_tid, int nl_len) {
this.Tkn_ini_pos(false, bgn, end);
this.nl_tid = nl_tid;
}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_newLine;}
public byte Nl_tid() {return nl_tid;} private byte nl_tid = Xop_nl_tkn.Tid_unknown;
public static final byte Tid_unknown = 0, Tid_char = 1, Tid_hdr = 2, Tid_hr = 3, Tid_list = 4, Tid_tblw = 5, Tid_file = 6;
}

View File

@@ -0,0 +1,31 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_para_tkn extends Xop_tkn_itm_base {
public Xop_para_tkn(int pos) {this.Tkn_ini_pos(false, pos, pos);}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_para;}
public byte Para_end() {return para_end;} public Xop_para_tkn Para_end_(byte v) {para_end = v; return this;} private byte para_end = Tid_none;
public byte Para_bgn() {return para_bgn;} public Xop_para_tkn Para_bgn_(byte v) {para_bgn = v; return this;} private byte para_bgn = Tid_none;
public int Space_bgn() {return space_bgn;} public Xop_para_tkn Space_bgn_(int v) {space_bgn = v; return this;} private int space_bgn = 0;
public boolean Nl_bgn() {return nl_bgn;} public Xop_para_tkn Nl_bgn_y_() {nl_bgn = true; return this;} private boolean nl_bgn;
public static final byte
Tid_none = 0 //
, Tid_para = 1 // </p>
, Tid_pre = 2 // </pre>
;
}

View File

@@ -0,0 +1,333 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_para_wkr implements Xop_ctx_wkr {
private boolean para_enabled;
private byte cur_mode;
private int para_stack;
private boolean in_block, block_is_bgn_xnde, block_is_end_xnde, in_blockquote, block_is_bgn_blockquote, block_is_end_blockquote;
private int prv_nl_pos; private Xop_para_tkn prv_para; private int prv_ws_bgn;
public boolean Enabled() {return enabled;} public Xop_para_wkr Enabled_(boolean v) {enabled = v; return this;} private boolean enabled = true;
public Xop_para_wkr Enabled_y_() {enabled = true; return this;} public Xop_para_wkr Enabled_n_() {enabled = false; return this;}
public void Ctor_ctx(Xop_ctx ctx) {}
public void Page_bgn(Xop_ctx ctx, Xop_root_tkn root) {
this.Clear();
para_enabled = enabled && ctx.Parse_tid() == Xop_parser_.Parse_tid_page_wiki; // only enable for wikitext (not for template)
if (para_enabled)
Prv_para_new(ctx, root, -1, 0); // create <para> at bos
}
private void Clear() {
cur_mode = Mode_none;
para_stack = Para_stack_none;
in_block = block_is_bgn_xnde = block_is_end_xnde = false;
in_blockquote = block_is_bgn_blockquote = block_is_end_blockquote = false;
prv_nl_pos = -1;
prv_para = null;
prv_ws_bgn = 0;
}
public void AutoClose(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {
if (para_enabled) {
Process_nl(ctx, root, src, src_len, src_len);
this.Prv_para_end(); // close anything created by Process_nl()
}
this.Clear();
}
public void Process_block__bgn_y__end_n(Xop_xnde_tag tag) {Process_block(tag, Bool_.Y, Bool_.N);} // NOTE: disables para for rest of page; Process_block__bgn_n__end_y must be called; DATE:2014-04-18
public void Process_block__bgn_n__end_y(Xop_xnde_tag tag) {Process_block(tag, Bool_.N, Bool_.Y);}
public void Process_block__xnde(Xop_xnde_tag tag, byte mode) {
if (mode == Xop_xnde_tag.Block_bgn) Process_block(tag, Bool_.Y, Bool_.N);
else if (mode == Xop_xnde_tag.Block_end) Process_block(tag, Bool_.N, Bool_.Y);
}
public void Process_block_lnki_div() { // bgn_lhs is pos of [[; end_lhs is pos of ]]
if (prv_ws_bgn > 0) // if pre at start of line; ignore it b/c of div; EX: "\n\s[[File:A.png|thumb]]" should not produce thumb; also [[File:A.png|right]]; DATE:2014-02-17
prv_ws_bgn = 0;
this.Process_block__bgn_n__end_y(Xop_xnde_tag_.Tag_div);
}
private void Process_block(Xop_xnde_tag tag, boolean bgn, boolean end) {
if (prv_ws_bgn > 0) {
prv_para.Space_bgn_(prv_ws_bgn);
prv_ws_bgn = 0;
}
block_is_bgn_xnde = bgn;
block_is_end_xnde = end;
switch (tag.Id()) {
case Xop_xnde_tag_.Tid_blockquote:
if (bgn) block_is_bgn_blockquote = true;
if (end) block_is_end_blockquote = true;
break;
}
}
public void Process_block__bgn__nl_w_symbol(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int bgn_pos, int cur_pos, Xop_xnde_tag tag) {// handle \n== and \n* \n{|; note that nl is at rng of bgn_pos to bgn_pos + 1 (not cur_pos)
if (!para_enabled) return;
Process_nl(ctx, root, src, bgn_pos, bgn_pos + 1);
Process_block__bgn_y__end_n(tag);
}
public void Process_nl(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int bgn_pos, int cur_pos) {// REF.MW:Parser.php|doBlockLevels
Dd_clear(ctx);
if (block_is_bgn_xnde || block_is_end_xnde) {
para_stack = Para_stack_none; // MW: $paragraphStack = false;
Prv_para_end(); // MW: $output .= $this->closeParagraph()
if (block_is_bgn_blockquote && !block_is_end_blockquote) // MW: if ( $preOpenMatch and !$preCloseMatch )
in_blockquote = true; // MW: $this->mInPre = true;
else
in_blockquote = false; // XO: turn off blockquote else following para / nl won't work; w:Snappy_(software); DATE:2014-04-25
in_block = !block_is_end_xnde; // MW: $inBlockElem = !$closematch;
}
else if (!in_block && !in_blockquote) { // MW: elseif ( !$inBlockElem && !$this->mInPre ) {
boolean line_is_ws = Line_is_ws(src, bgn_pos);
if (prv_ws_bgn > 0 && (cur_mode == Mode_pre || !line_is_ws)) { // MW: if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) ) {
if (cur_mode != Mode_pre) { // MW: if ( $this->mLastSection !== 'pre' ) {
para_stack = Para_stack_none; // MW: $paragraphStack = false;
prv_para.Space_bgn_(prv_ws_bgn - 1); // -1 to ignore 1st "\s" in "\n\s"; note that prv_ws_bgn only includes spaces, so BOS doesn't matter; DATE:2014-04-14
Prv_para_end(); Prv_para_bgn(Xop_para_tkn.Tid_pre); // MW: $output .= $this->closeParagraph() . '<pre>';
cur_mode = Mode_pre; // MW: $this->mLastSection = 'pre';
}
else { // already in pre
if (line_is_ws) { // line is entirely ws
int next_char_pos = prv_nl_pos + 2; // "\n\s".length
if ( next_char_pos < src.length // bounds check
&& src[next_char_pos] == Byte_ascii.NewLine // is "\n\s\n"; i.e.: "\n" only
) {
ctx.Subs_add(root, ctx.Tkn_mkr().Bry(bgn_pos, bgn_pos, Byte_ascii.NewLine_bry)); // add a "\n" tkn; note that adding a NewLine tkn doesn't work, b/c Xoh_html_wtr has code to remove consecutive \n; PAGE:en.w:Preferred_numbers DATE:2014-06-24
prv_nl_pos = bgn_pos;
}
}
}
prv_ws_bgn = 0; // MW: $t = substr( $t, 1 );
}
else {
if (bgn_pos - prv_nl_pos == 1 || line_is_ws) { // line is blank ("b" for blank) MW: if ( trim( $t ) === '' ) {
if (para_stack != Para_stack_none) { // "b1"; stack has "<p>" or "</p><p>"; output "<br/>"; MW: if ( $paragraphStack ) {
Para_stack_end(cur_pos); Add_br(ctx, root, bgn_pos); // MW: $output .= $paragraphStack . '<br />';
para_stack = Para_stack_none; // MW: $paragraphStack = false;
cur_mode = Mode_para; // MW: $this->mLastSection = 'p';
}
else { // stack is empty
if (cur_mode != Mode_para) { // "b2"; cur is '' or <pre> MW: if ( $this->mLastSection !== 'p' ) {
Prv_para_end(); // MW: $output .= $this->closeParagraph();
cur_mode = Mode_none; // MW: $this->mLastSection = '';
para_stack = Para_stack_bgn; // put <p> on stack MW: $paragraphStack = '<p>';
}
else // "b3"; cur is p
para_stack = Para_stack_mid; // put </p><p> on stack MW: $paragraphStack = '</p><p>';
}
}
else { // line has text ("t" for text); NOTE: tkn already added before \n, so must change prv_para; EX: "a\n" -> this code is called for "\n" but "a" already processed
if (para_stack != Para_stack_none) { // "t1" MW: if ( $paragraphStack ) {
Para_stack_end(cur_pos); // MW: $output .= $paragraphStack;
para_stack = Para_stack_none; // MW: $paragraphStack = false;
cur_mode = Mode_para; // MW: $this->mLastSection = 'p';
}
else if (cur_mode != Mode_para) { // "t2"; cur is '' or <pre> MW: elseif ( $this->mLastSection !== 'p' ) {
Prv_para_end(); Prv_para_bgn(Xop_para_tkn.Tid_para); // MW: $output .= $this->closeParagraph() . '<p>';
cur_mode = Mode_para; // MW: $this->mLastSection = 'p';
}
else {} // "t3"
}
}
}
if (in_blockquote && prv_ws_bgn > 0) // handle blockquote separate; EX: <blockquote>\n\sa\n</blockquote>; note that "\s" needs to be added literally; MW doesn't have this logic specifically, since it assumes all characters go into $output, whereas XO, sets aside the "\s" in "\n\s" separately
prv_para.Space_bgn_(prv_ws_bgn);
prv_ws_bgn = 0; // nl encountered and processed; always prv_ws_bgn set to 0, else ws from one line will carry over to next
// in_blockquote = false;
block_is_bgn_xnde = block_is_end_xnde = false;
// if ( $preCloseMatch && $this->mInPre )
// $this->mInPre = false;
// prv_ws_bgn = false;
Prv_para_new(ctx, root, bgn_pos, cur_pos); // add a prv_para placeholder
if (para_stack == Para_stack_none) // "x1" MW: if ( $paragraphStack === false ) {
if (prv_para != null) prv_para.Nl_bgn_y_(); // add nl; note that "$t" has already been processed; MW: $output .= $t . "\n";
}
public int Process_pre(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int txt_pos) {
Dd_clear(ctx);
Object o = ctx.App().Utl_trie_tblw_ws().MatchAtCur(src, txt_pos, src_len);
if (o != null) { // tblw_ws found
Xop_tblw_ws_itm ws_itm = (Xop_tblw_ws_itm)o;
byte tblw_type = ws_itm.Tblw_type();
switch (tblw_type) {
case Xop_tblw_ws_itm.Type_nl: // \n\s
if (cur_mode == Mode_pre) { // already in pre; just process "\n\s"
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos, Xop_nl_tkn.Tid_char, 1));
prv_nl_pos = bgn_pos; // NOTE: must update prv_nl_pos; PAGE:en.w:Preferred_number DATE:2014-06-24
return txt_pos;
}
break;
case Xop_tblw_ws_itm.Type_xnde:
if (bgn_pos != Xop_parser_.Doc_bgn_bos)
ctx.Para().Process_nl(ctx, root, src, bgn_pos, cur_pos);
return ctx.Xnde().Make_tkn(ctx, tkn_mkr, root, src, src_len, txt_pos, txt_pos + 1);
default: {
int tblw_rv = ctx.Tblw().Make_tkn_bgn(ctx, tkn_mkr, root, src, src_len, bgn_pos, txt_pos + ws_itm.Hook_len(), false, tblw_type, Xop_tblw_wkr.Called_from_pre, -1, -1);
if (tblw_rv != -1) // \n\s| is valid tblw tkn and processed; otherwise process pre-code below; EX:w:Wikipedia:WikiProject_History/CategoryExample; DATE:2014-04-14
return tblw_rv;
break;
}
}
}
// NOTE: pre lxr emulates MW for "\n\s" by (1) calling Process nl for "\n"; (2) anticipating next line by setting prv_ws_bgn
// EX: "\na\n b\n"; note that "\n " is cur
if (bgn_pos != Xop_parser_.Doc_bgn_bos) // if bos, then don't close 1st para
Process_nl(ctx, root, src, bgn_pos, bgn_pos + 1); // note that tkn is \n\s; so, bgn_pos -> bgn_pos + 1 is \n ...
if (cur_mode == Mode_pre) // in pre_mode
ctx.Subs_add(root, tkn_mkr.Space(root, cur_pos, txt_pos)); // cur_pos to start after \s; do not capture "\s" in "\n\s"; (not sure why not before \s)
prv_ws_bgn = txt_pos - cur_pos + 1;
return txt_pos;
}
public void Process_lnki_category(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int pos, int src_len) { // REF.MW:Parser.php|replaceInternalLinks2|Strip the whitespace Category links produce;
if (!para_enabled) return;
int subs_len = root.Subs_len();
for (int i = subs_len - 2; i > -1; i--) { // -2: -1 b/c subs_len is invalid; -1 to skip current lnki
Xop_tkn_itm sub_tkn = root.Subs_get(i);
switch (sub_tkn.Tkn_tid()) {
case Xop_tkn_itm_.Tid_para: // nl found; note this means that BOL -> [[Category:]] is all ws;
if (prv_ws_bgn > 0) { // line begins with ws a
if (sub_tkn.Src_bgn() != 0) // do not ignore BOS para; needed b/c it is often <p>; needed for test;
sub_tkn.Ignore_y_(); // ignore nl (pretty-printing only)
prv_ws_bgn = 0; // remove ws
if (ctx.Stack_has(Xop_tkn_itm_.Tid_list)){ // HACK: if in list, set prv_nl_pos to EOL; only here for one test to pass
int nl_at_eol = -1;
for (int j = pos; j < src_len; j++) { // check if rest of line is ws
byte b = src[j];
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Tab: break; // ignore space / tab
case Byte_ascii.NewLine:
nl_at_eol = j;
j = src_len;
break;
default: // something else besides ws; stop
j = src_len;
break;
}
if (nl_at_eol != -1)
prv_nl_pos = nl_at_eol + 1; // SEE:NOTE_2
}
}
}
return;
default: // exit if anything except para / nl in front of [[Category:]]
i = -1;
break;
}
}
// if (para_found) // BOS exit; just remove prv_ws_bgn
prv_ws_bgn = 0;
}
private void Prv_para_new(Xop_ctx ctx, Xop_root_tkn root, int prv_nl_pos, int para_pos) {
this.prv_nl_pos = prv_nl_pos;
prv_para = ctx.Tkn_mkr().Para(para_pos);
ctx.Subs_add(root, prv_para);
}
private void Prv_para_end() { // MW: closeParagraph();
// following switch is equivalent to:
// MW: if ( $this->mLastSection != '' )
// MW: $result = '</' . $this->mLastSection . ">\n";
switch (cur_mode) {
case Mode_none: return;
case Mode_pre: prv_para.Para_end_(Xop_para_tkn.Tid_pre); break;
case Mode_para: prv_para.Para_end_(Xop_para_tkn.Tid_para); break;
}
// in_pre = false; // MW: $this->mInPre = false;
cur_mode = Mode_none; // MW: $this->mLastSection = '';
}
private void Prv_para_bgn(byte mode) {
if (prv_para != null) prv_para.Para_bgn_(mode);
}
private void Para_stack_end(int cur_pos) { // MW: $output .= $paragraphStack;
switch (para_stack) {
case Para_stack_none: break;
case Para_stack_bgn: prv_para.Para_end_(Xop_para_tkn.Tid_none).Para_bgn_(Xop_para_tkn.Tid_para); break; // '<p>'
case Para_stack_mid: prv_para.Para_end_(Xop_para_tkn.Tid_para).Para_bgn_(Xop_para_tkn.Tid_para); break; // '</p><p>'
}
}
private void Add_br(Xop_ctx ctx, Xop_root_tkn root, int bgn_pos) {
ctx.Subs_add(root, ctx.Tkn_mkr().Xnde(bgn_pos, bgn_pos).Tag_(Xop_xnde_tag_.Tag_br));
}
private boolean Line_is_ws(byte[] src, int pos) {
if (prv_nl_pos == -1) return false;
boolean ws = true;
for (int i = prv_nl_pos + 1; i < pos; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Tab:
case Byte_ascii.Space:
break;
default:
ws = false;
i = pos;
break;
}
}
return ws;
}
private void Dd_clear(Xop_ctx ctx) {ctx.List().Dd_chk_(false);}
private static final int
Para_stack_none = 0 // false
, Para_stack_bgn = 1 // <p>
, Para_stack_mid = 2 // </p><p>
;
private static final byte
Mode_none = 0 // ''
, Mode_para = 1 // p
, Mode_pre = 2 // pre
;
}
/*
NOTE_1:
xowa uses \n as the leading character for multi-character hooks; EX: "\n*","\n{|","\n==",etc..
For this section of code, xowa treats \n separately from the rest of the hook for the purpose of emulating MW code.
EX: a\n==b==
MW:
- split into two lines: "a", "==b=="
- call process_nl on "a"
- call process_nl on "==b=="
XO:
- split into "tkns": "a", "\n==", "b", "=="
- add "a"
- add "\n=="
- since there is a "\n", call process_nl, which will effectively call it for "a"
- note that page_end will effectively call process_nl on "==b=="
NOTE_2: Category needs to "trim" previous line
EX:
* a
* b
[[Category:c]]
* d
MW does the following: (REF.MW:Parser.php|replaceInternalLinks2|Strip the whitespace Category links produce;)
- removes the \n after b (REF: $s = rtrim( $s . "\n" ); # bug 87)
- trims all space " " in front of [[ (NOTE: this makes it a non-pre line)
- plucks out the [[Category:c]]
- joins everything after ]] (starting with the \n) to the * b (REF: $s .= trim( $prefix . $trail, "\n" ) == '' ? '': $prefix . $trail;)
This effectively "blanks" out the entire line "\n [[Category:c]]" -> ""
XOWA tries to emulate this by doing the following
- mark the para_tkn after \b as blank
- disable pre for the line
- keep the [[Category:c]], but *simulate* a blank line by moving the prv_nl_pos to after the ]]
NOTE_3: if (last_section_is_pre)
PURPOSE: if Category trims previous nl, but nl was part of pre, deactivate it
REASON: occurs b/c MW does separate passes for pre and Category while XO does one pass.
EX: "a\n [[Category:c]]"
- pre is activated by \n\s
- [[Category:c]] indicates that \n\s should be trimmed
so, disable_pre, etc.
*/

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,109 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_para_wkr_para_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void teardown() {fxt.Init_para_n_();}
@Test public void Pre_then_xnde_pre() { // PURPOSE: if ws_pre is in effect, xnde_pre should end it; EX: b:Knowing Knoppix/Other applications
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( " a"
, "b<pre>c"
, "d</pre>"
, "e"
), String_.Concat_lines_nl_skip_last
( "<pre>a"
, "</pre>"
, "b<pre>c"
, "d</pre>"
, ""
, "<p>e"
, "</p>"
, ""
));
}
@Test public void List_ignore_pre_lines() { // PURPOSE: "\s\n" should create new list; was continuing previous list; DATE:2013-07-12
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( ": a"
, ":* b"
, " "
, ": c"
, ":* d"
)
, String_.Concat_lines_nl_skip_last
( "<dl>"
, " <dd> a"
, ""
, " <ul>"
, " <li> b"
, " </li>"
, " </ul>"
, " </dd>"
, "</dl>"
, ""
, "<dl>"
, " <dd> c"
, ""
, " <ul>"
, " <li> d"
, " </li>"
, " </ul>"
, " </dd>"
, "</dl>"
, ""
));
}
@Test public void Multiple_nl_in_tblx() { // PURPOSE: "\n\n\n" was causing multiple breaks; EX:fr.w:Portail:G<>nie m<>canique; DATE:2014-02-17
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( "<table><tr><td>a"
, "</td>"
, ""
, ""
, ""
, ""
, ""
, "</tr></table>"
)
, String_.Concat_lines_nl_skip_last
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
, ""
)
);
}
@Test public void Ignore_cr() { // PURPOSE: handle "\r\n"; EX: Special:MovePage; DATE:2014-03-02
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( "a\r"
, "\r"
, "b\r"
)
, String_.Concat_lines_nl_skip_last
( "<p>a"
, "</p>"
, ""
, "<p>b"
, "</p>"
, ""
)
);
}
}

View File

@@ -0,0 +1,247 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xop_para_wkr_pre_tst {
@Before public void init() {fxt.Reset(); fxt.Init_para_y_();} private Xop_fxt fxt = new Xop_fxt();
@After public void teardown() {fxt.Init_para_n_();}
@Test public void Pre_ignore_bos() { // PURPOSE: ignore pre at bgn; DATE:2013-07-09
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( " "
, "b"
), String_.Concat_lines_nl
( "<p>"
, "b"
, "</p>"
));
}
@Test public void Pre_ignore_bos_tblw() { // PURPOSE: ignore pre at bgn shouldn't break tblw; EX:commons.wikimedia.org; DATE:2013-07-11
fxt.Test_parse_page_all_str(String_.Concat_lines_nl
( " "
, "{|"
, "|-"
, "|a"
, "|}"
), String_.Concat_lines_nl
( "<table>"
, " <tr>"
, " <td>a"
, " </td>"
, " </tr>"
, "</table>"
));
}
@Test public void Ignore_bos_xnde() { // PURPOSE: space at bgn shouldn't create pre; EX:commons.wikimedia.org; " <center>a\n</center>"; DATE:2013-11-28
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( " <center>a" // NOTE: leading " " matches MW; DATE:2014-06-23
, "</center>"
), String_.Concat_lines_nl_skip_last
( " <center>a"
, "</center>"
, ""
));
}
@Test public void Ignore_pre_in_gallery() {// PURPOSE: pre in gallery should be ignored; EX:uk.w:EP2; DATE:2014-03-11
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.Y_byte;
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "a"
, ""
, " <gallery>"
, " File:A.png"
, " </gallery>"
), String_.Concat_lines_nl_skip_last
( "<p>a"
, "</p>"
, " <ul id=\"xowa_gallery_ul_0\" class=\"gallery mw-gallery-traditional\">"
, " <li id=\"xowa_gallery_li_1\" class=\"gallerybox\" style=\"width: 155px\">"
, " <div style=\"width: 155px\">"
, " <div class=\"thumb\" style=\"width: 150px;\">"
, " <div style=\"margin:15px auto;\">"
, " <a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_1\" alt=\"A.png\" src=\"file:///mem/wiki/repo/trg/thumb/7/0/A.png/120px.png\" width=\"120\" height=\"120\" /></a>"
, " </div>"
, " </div>"
, " <div class=\"gallerytext\">"
, " </div>"
, " </div>"
, " </li>"
, "</ul>"
,""
));
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.N_byte;
}
@Test public void Pre_xnde_gallery() { // PURPOSE: <gallery> should invalidate pre; EX: en.w:Mary, Queen of Scots
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.Y_byte;
fxt.Wiki().Xtn_mgr().Init_by_wiki(fxt.Wiki());
String raw = String_.Concat_lines_nl_skip_last
( " <gallery>"
, "File:A.png|b"
, "</gallery>"
);
fxt.Test_parse_page_wiki_str(raw, String_.Concat_lines_nl_skip_last
( " <ul id=\"xowa_gallery_ul_0\" class=\"gallery mw-gallery-traditional\">" // NOTE: leading " " matches MW; DATE:2014-06-23
, " <li id=\"xowa_gallery_li_1\" class=\"gallerybox\" style=\"width: 155px\">"
, " <div style=\"width: 155px\">"
, " <div class=\"thumb\" style=\"width: 150px;\">"
, " <div style=\"margin:15px auto;\">"
, " <a href=\"/wiki/File:A.png\" class=\"image\" xowa_title=\"A.png\"><img id=\"xowa_file_img_1\" alt=\"\" src=\"file:///mem/wiki/repo/trg/thumb/7/0/A.png/120px.png\" width=\"120\" height=\"120\" /></a>"
, " </div>"
, " </div>"
, " <div class=\"gallerytext\"><p>b"
, "</p>"
, ""
, " </div>"
, " </div>"
, " </li>"
, "</ul>"
));
gplx.xowa.xtns.gallery.Gallery_mgr_base.File_found_mode = Bool_.N_byte;
}
@Test public void Ignore_pre_in_center() {// PURPOSE: pre in gallery should be ignored; EX:uk.w:EP2; DATE:2014-03-11
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "a"
, " <center>b"
, " </center>"
, "d"
), String_.Concat_lines_nl_skip_last
( "<p>a"
, "</p>"
, " <center>b"
, " </center>"
, ""
, "<p>d"
, "</p>"
)
);
}
@Test public void Remove_only_1st_space() { // PURPOSE: pre should only remove 1st space]; EX: w:Wikipedia:WikiProject_History/CategoryExample; DATE:2014-04-14
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( " a"
, " b"
, " c"
), String_.Concat_lines_nl_skip_last
( "<pre> a"
, " b"
, " c"
, "</pre>"
)
);
}
@Test public void Remove_only_1st_space__bos() { // PURPOSE: similar to above but check that pre at \n\s is indented correctly; DATE:2014-04-14
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( ""
, " a"
, " b"
), String_.Concat_lines_nl_skip_last
( ""
, "<pre> a"
, " b"
, "</pre>"
)
);
}
@Test public void Ignore_tblw_td() {// PURPOSE: \n\s| should continue pre; EX:w:Wikipedia:WikiProject_History/CategoryExample; DATE:2014-04-14
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( " a"
, " |"
, " b"
), String_.Concat_lines_nl_skip_last
( "<pre>a"
, "|"
, "b"
, "</pre>"
)
);
}
@Test public void Tab() { // PURPOSE: tab inside pre was being converted to space; PAGE:en.w:Cascading_Style_Sheets DATE:2014-06-23
fxt.Test_html_full_str
( " \ta"
, String_.Concat_lines_nl
( "<pre>\ta"
, "</pre>"
));
}
@Test public void Style() { // PURPOSE: " <style>" was not being put in pre; PAGE:en.w:Cascading_Style_Sheets DATE:2014-06-23
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl
( " <style>"
, " </style>"
), String_.Concat_lines_nl
( "<pre>&lt;style>"
, "&lt;/style>"
, "</pre>"
));
}
@Test public void Nl_only() { // PURPOSE: wiki_pre with \n only was being dropped; PAGE:en.w:Preferred_number DATE:2014-06-24
fxt.Test_html_full_str(String_.Concat_lines_nl_skip_last
( " a"
, " " // was being dropped
, " b"
), String_.Concat_lines_nl
( "<pre>a"
, "" // make sure it's still there
, "b"
, "</pre>"
));
}
@Test public void Nl_w_ws() { // PURPOSE: based on Nl_only; make sure that 1 or more spaces does not add extra \n; PAGE:en.w:Preferred_number DATE:2014-06-24
fxt.Test_html_full_str(String_.Concat_lines_nl_skip_last
( " a"
, " " // 2 spaces
, " b"
), String_.Concat_lines_nl
( "<pre>a"
, " " // 1 space
, "b"
, "</pre>"
));
}
@Test public void Nl_many() { // PURPOSE: handle alternating \n\s; PAGE:en.w:Preferred_number DATE:2014-06-24
fxt.Test_html_full_str(String_.Concat_lines_nl_skip_last
( " a"
, " "
, " b"
, " "
, " c"
), String_.Concat_lines_nl
( "<pre>a"
, ""
, "b"
, ""
, "c"
, "</pre>"
));
}
@Test public void Source() { // PURPOSE: " <source>" in pre has issues; PAGE:en.w:Comment_(computer_programming) DATE:2014-06-23
fxt.Init_para_y_();
fxt.Test_html_wiki_str(String_.Concat_lines_nl
( " "
, " <source>"
, " a"
, " </source>"
, " "
), String_.Concat_lines_nl
( "<p>" // this is wrong, but will be stripped by tidy
, "</p>"
, " <pre>"
, " a"
, "</pre>"
, ""
, "<p><br/>" // also wrong, but leave for now
, "</p>"
));
}
}

View File

@@ -0,0 +1,103 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
class Xop_pre_lxr implements Xop_lxr {
public byte Lxr_tid() {return Xop_lxr_.Tid_pre;}
public void Init_by_wiki(Xow_wiki wiki, ByteTrieMgr_fast core_trie) {core_trie.Add(Hook_space, this);} // NOTE: do not treat \n\t as shorthand pre; EX:pl.w:Main_Page; DATE:2014-05-06
public void Init_by_lang(Xol_lang lang, ByteTrieMgr_fast core_trie) {}
public int Make_tkn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos) {
if (!ctx.Para().Enabled()) { // para disabled; "\n\s" should just be "\n\s"; NOTE: para disabled in <gallery>
if (bgn_pos != Xop_parser_.Doc_bgn_bos) // don't add \n if BOS; EX: "<BOS> a" should be " ", not "\n "
ctx.Subs_add(root, tkn_mkr.NewLine(bgn_pos, bgn_pos + 1, Xop_nl_tkn.Tid_char, 1));
ctx.Subs_add(root, tkn_mkr.Space(root, cur_pos - 1, cur_pos));
return cur_pos;
}
int txt_pos = Bry_finder.Find_fwd_while(src, cur_pos, src_len, Byte_ascii.Space); // NOTE: was Find_fwd_while_tab_or_space, which incorrectly converted tabs to spaces; PAGE:en.w:Cascading_Style_Sheets; DATE:2014-06-23
if (txt_pos == src_len) return cur_pos; // "\n\s" at EOS; treat as \n only; EX: "a\n " -> ""; also bounds check
byte b = src[txt_pos];
if (bgn_pos == Xop_parser_.Doc_bgn_bos) { // BOS; gobble up all \s\t; EX: "BOS\s\s\sa" -> "BOSa"
if (b == Byte_ascii.NewLine) { // next char is nl
cur_pos = txt_pos; // position at nl; NOTE: do not position after nl, else may break hdr, tblw, list, etc; EX: "\s\n{|" needs to preserve "\n" for tblw
ctx.Subs_add(root, tkn_mkr.Ignore(bgn_pos, cur_pos, Xop_ignore_tkn.Ignore_tid_pre_at_bos));
return cur_pos; // ignore pre if blank line at bos; EX: "BOS\s\s\n" -> "BOS\n"
}
if (b == Byte_ascii.Lt) // next char is <; possible xnde; flag so that xnde can escape; DATE:2013-11-28; moved outside Doc_bgn_bos block above; PAGE:en.w:Comment_(computer_programming); DATE:2014-06-23
ctx.Xnde().Pre_at_bos_(true);
}
switch (ctx.Cur_tkn_tid()) { // close tblw attrs; NOTE: after BOS (since no tblw at BOS) but before "\n !" check
case Xop_tkn_itm_.Tid_tblw_tb: case Xop_tkn_itm_.Tid_tblw_tr: case Xop_tkn_itm_.Tid_tblw_th:
Xop_tblw_wkr.Atrs_close(ctx, src, root);
break;
case Xop_tkn_itm_.Tid_list:
if (Close_list(ctx, root, src, src_len, bgn_pos, cur_pos, txt_pos)) {
// ctx.Para().Process_nl(ctx, root, src, bgn_pos, new_pos, true); // add blank line for truncated "\n\s"; DATE:2013-07-12; DELETE: DATE:2014-02-18; doesn't seem necessary; doesn't break tests; devised for www.mediawiki.org/wiki/MediaWiki which loads fine
return txt_pos; // must exit early; do not process pre
}
break;
}
switch (b) { // handle "\n !" which can be tbl
case Byte_ascii.Bang:
switch (ctx.Cur_tkn_tid()) {
case Xop_tkn_itm_.Tid_tblw_tb:
case Xop_tkn_itm_.Tid_tblw_tc:
case Xop_tkn_itm_.Tid_tblw_tr:
case Xop_tkn_itm_.Tid_tblw_th:
case Xop_tkn_itm_.Tid_tblw_td:
case Xop_tkn_itm_.Tid_tblw_te:
int new_cur_pos = txt_pos + 1; // +1 to skip Byte_ascii.Bang
Xop_tblw_lxr_ws.Make(ctx, tkn_mkr, root, src, src_len, bgn_pos, new_cur_pos, Xop_tblw_wkr.Tblw_type_th, true);
return new_cur_pos;
}
break;
}
return ctx.Para().Process_pre(ctx, tkn_mkr, root, src, src_len, bgn_pos, cur_pos, txt_pos);
}
private static boolean Close_list(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, int txt_pos) {// SEE:NOTE_4; EX.en.w:SHA-2
if (Bry_finder.Find_fwd(src, Xop_tkn_.Lnki_bgn, txt_pos, src_len) == txt_pos) { // look for "[["
txt_pos += Xop_tkn_.Lnki_bgn.length;
if (Bry_finder.Find_fwd(src, ctx.Wiki().Ns_mgr().Ns_category().Name_db_w_colon(), txt_pos, src_len) == txt_pos) // look for "Category:"
return false; // "[[Category:" found; "\n\s[[Category:" should not close list; note that [[Category]] is invisible
}
ctx.Stack_pop_til(root, src, ctx.Stack_idx_typ(Xop_tkn_itm_.Tid_list), true, bgn_pos, cur_pos); // "* a\n\sb" found; close *a
if ( txt_pos < src_len // bounds check
&& src[txt_pos] == Byte_ascii.NewLine) { // NOTE: handle "*a\n\s\n" between lists; DATE:2013-07-12
Xop_list_wkr_.Close_list_if_present(ctx, root, src, bgn_pos, cur_pos); // NOTE: above line only closes one list; should probably change to close all lists, but for now, close all lists only if "\n\s", not "\n"; DATE:2013-07-12
return true;
}
return false;
}
public static final Xop_pre_lxr _ = new Xop_pre_lxr(); Xop_pre_lxr() {}
private static final byte[]
Hook_space = new byte[] {Byte_ascii.NewLine, Byte_ascii.Space}
;
}
/*
NOTE_4: Close_list
PURPOSE: \n should ordinarily close list. However, if \n[[Category:A]], then don't close list since [[Category:A]] will trim preceding \n
REASON: occurs b/c MW does separate passes for list and Category while XO does one pass.
EX: closes *a list
*a
*b
EX: does not close
*a
[[Category:A]]
*b
*/

View File

@@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xop_pre_tkn extends Xop_tkn_itm_base {
public Xop_pre_tkn(int bgn, int end, byte pre_tid, Xop_tkn_itm pre_bgn_tkn) {
this.Tkn_ini_pos(false, bgn, end);
this.pre_tid = pre_tid;
}
@Override public byte Tkn_tid() {return Xop_tkn_itm_.Tid_pre;}
public byte Pre_tid() {return pre_tid;} private byte pre_tid = Pre_tid_null;
public static final byte Pre_tid_null = 0, Pre_tid_bgn = 1, Pre_tid_end = 2;
}