mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Xomw: Convert XomwBlockLevelPass
This commit is contained in:
parent
0f92bb55db
commit
4781529d12
@ -0,0 +1,664 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.langs.htmls.*;
|
||||
/**
|
||||
* This is the part of the wikitext parser which handles automatic paragraphs
|
||||
* and conversion of start-of-line prefixes to HTML lists.
|
||||
*/
|
||||
public class XomwBlockLevelPass {
|
||||
private boolean DTopen = false;
|
||||
private boolean inPre = false;
|
||||
private int lastSection = LAST_SECTION_NONE;
|
||||
private boolean linestart;
|
||||
// private $text;
|
||||
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private byte[] find_colon_no_links__before, find_colon_no_links__after;
|
||||
|
||||
// State constants for the definition list colon extraction
|
||||
private static final int
|
||||
COLON_STATE_TEXT = 0
|
||||
, COLON_STATE_TAG = 1
|
||||
, COLON_STATE_TAGSTART = 2
|
||||
, COLON_STATE_CLOSETAG = 3
|
||||
, COLON_STATE_TAGSLASH = 4
|
||||
, COLON_STATE_COMMENT = 5
|
||||
, COLON_STATE_COMMENTDASH = 6
|
||||
, COLON_STATE_COMMENTDASHDASH = 7
|
||||
;
|
||||
|
||||
/**
|
||||
* Make lists from lines starting with ':', '*', '#', etc.
|
||||
*
|
||||
* @param String $text
|
||||
* @param boolean $linestart Whether or not this is at the start of a line.
|
||||
* @return String The lists rendered as HTML
|
||||
*/
|
||||
// public static function doBlockLevels($text, $linestart) {
|
||||
// $pass = new self($text, $linestart);
|
||||
// return $pass->execute();
|
||||
// }
|
||||
public void doBlockLevels(XomwParserCtx pctx, XomwParserBfr pbfr, boolean linestart) {
|
||||
this.linestart = linestart;
|
||||
execute(pctx, pbfr, linestart);
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Private constructor
|
||||
// */
|
||||
// private function __construct($text, $linestart) {
|
||||
// $this->text = $text;
|
||||
// $this->linestart = $linestart;
|
||||
// }
|
||||
|
||||
/**
|
||||
* If a pre or p is open, return the corresponding close tag and update
|
||||
* the state. If no tag is open, return an empty String.
|
||||
* @return String
|
||||
*/
|
||||
private byte[] closeParagraph() {
|
||||
byte[] result = Bry_.Empty;
|
||||
if (this.lastSection != LAST_SECTION_NONE) {
|
||||
result = tmp.Add(lastSection == LAST_SECTION_PARA ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs).Add_byte_nl().To_bry_and_clear(); // $result = '</' . $this->lastSection . ">\n";
|
||||
}
|
||||
this.inPre = false;
|
||||
this.lastSection = LAST_SECTION_NONE;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* getCommon() returns the length of the longest common substring
|
||||
* of both arguments, starting at the beginning of both.
|
||||
*
|
||||
* @param String $st1
|
||||
* @param String $st2
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
// getCommon() returns the length of the longest common substring
|
||||
// of both arguments, starting at the beginning of both.
|
||||
private int getCommon(byte[] st1, byte[] st2) {
|
||||
int st1Len = st1.length, st2Len = st2.length;
|
||||
int shorter = st1Len < st2Len ? st1Len : st2Len;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < shorter; ++i) {
|
||||
if (st1[i] != st2[i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the list item element identified by the prefix character.
|
||||
*
|
||||
* @param String $char
|
||||
*
|
||||
* @return String
|
||||
*/
|
||||
private byte[] openList(byte c) {
|
||||
byte[] result = this.closeParagraph();
|
||||
|
||||
if (c == Byte_ascii.Star)
|
||||
result = Bry_.Add(result, Bry_.new_a7("<ul><li>"));
|
||||
else if (c == Byte_ascii.Hash)
|
||||
result = Bry_.Add(result, Bry_.new_a7("<ol><li>"));
|
||||
else if (c == Byte_ascii.Colon)
|
||||
result = Bry_.Add(result, Bry_.new_a7("<dl><dd>"));
|
||||
else if (c == Byte_ascii.Semic) {
|
||||
result = Bry_.Add(result, Bry_.new_a7("<dl><dt>"));
|
||||
this.DTopen = true;
|
||||
}
|
||||
else {
|
||||
result = Bry_.new_a7("<!-- ERR 1 -->");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the current list item and open the next one.
|
||||
* @param String $char
|
||||
*
|
||||
* @return String
|
||||
*/
|
||||
private byte[] nextItem(byte c) {
|
||||
if (c == Byte_ascii.Star || c == Byte_ascii.Hash) {
|
||||
return Bry_.new_a7("</li>\n<li>");
|
||||
}
|
||||
else if (c == Byte_ascii.Colon || c == Byte_ascii.Semic) {
|
||||
byte[] close = Bry_.new_a7("</dd>\n");
|
||||
if (this.DTopen) {
|
||||
close = Bry_.new_a7("</dt>\n");
|
||||
}
|
||||
if (c == Byte_ascii.Semic) {
|
||||
this.DTopen = true;
|
||||
return Bry_.Add(close, Bry_.new_a7("<dt>"));
|
||||
}
|
||||
else {
|
||||
this.DTopen = false;
|
||||
return Bry_.Add(close, Bry_.new_a7("<dd>"));
|
||||
}
|
||||
}
|
||||
return Bry_.new_a7("<!-- ERR 2 -->");
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the current list item identified by the prefix character.
|
||||
* @param String $char
|
||||
*
|
||||
* @return String
|
||||
*/
|
||||
private byte[] closeList(byte c) {
|
||||
byte[] text = null;
|
||||
if (c == Byte_ascii.Star) {
|
||||
text = Bry_.new_a7("</li></ul>");
|
||||
}
|
||||
else if (c == Byte_ascii.Hash) {
|
||||
text = Bry_.new_a7("</li></ol>");
|
||||
}
|
||||
else if (c == Byte_ascii.Colon) {
|
||||
if (this.DTopen) {
|
||||
this.DTopen = false;
|
||||
text = Bry_.new_a7("</dt></dl>");
|
||||
}
|
||||
else {
|
||||
text = Bry_.new_a7("</dd></dl>");
|
||||
}
|
||||
}
|
||||
else {
|
||||
return Bry_.new_a7("<!-- ERR 3 -->");
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the pass.
|
||||
* @return String
|
||||
*/
|
||||
public void execute(XomwParserCtx pctx, XomwParserBfr pbfr, boolean linestart) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
// XO.STATIC
|
||||
if (block_chars_ary == null) {
|
||||
synchronized (Type_adp_.ClassOf_obj(this)) {
|
||||
block_chars_ary = Block_chars_ary__new();
|
||||
openMatchTrie = Btrie_slim_mgr.ci_a7().Add_many_str
|
||||
( "<table", "<h1", "<h2", "<h3", "<h4", "<h5", "<h6", "<pre", "<tr"
|
||||
, "<p", "<ul", "<ol", "<dl", "<li", "</tr", "</td", "</th");
|
||||
closeMatchTrie = Btrie_slim_mgr.ci_a7().Add_many_str
|
||||
( "</table", "</h1", "</h2", "</h3", "</h4", "</h5", "</h6"
|
||||
, "<td", "<th", "<blockquote", "</blockquote", "<div", "</div", "<hr", "</pre", "</p", "</mw:"
|
||||
, XomwParser.MARKER_PREFIX_STR + "-pre"
|
||||
, "</li", "</ul", "</ol", "</dl", "<center", "</center");
|
||||
blockquoteTrie = Btrie_slim_mgr.ci_a7().Add_many_str("<blockquote", "</blockquote");
|
||||
pre_trie = Btrie_slim_mgr.ci_a7().Add_str_int("<pre", PRE_BGN).Add_str_int("</pre", PRE_END);
|
||||
}
|
||||
}
|
||||
|
||||
// clear state
|
||||
this.inPre = false;
|
||||
this.lastSection = LAST_SECTION_NONE;
|
||||
byte[] prefix2 = null;
|
||||
bfr.Clear();
|
||||
|
||||
// Parsing through the text line by line. The main thing
|
||||
// happening here is handling of block-level elements p, pre,
|
||||
// and making lists from lines starting with * # : etc.
|
||||
byte[] lastPrefix = Bry_.Empty;
|
||||
this.DTopen = false;
|
||||
boolean inBlockElem = false;
|
||||
int prefixLen = 0;
|
||||
byte pendingPTag = PARA_STACK_NONE;
|
||||
boolean inBlockquote = false;
|
||||
|
||||
// PORTED.SPLIT: $textLines = StringUtils::explode("\n", $text);
|
||||
int lineBgn = src_bgn;
|
||||
while (lineBgn < src_end) {
|
||||
int lineEnd = Bry_find_.Find_fwd(src, Byte_ascii.Nl, lineBgn);
|
||||
if (lineEnd == Bry_find_.Not_found)
|
||||
lineEnd = src_end;
|
||||
|
||||
// Fix up linestart
|
||||
if (!this.linestart) {
|
||||
bfr.Add_mid(src, lineBgn, lineEnd);
|
||||
this.linestart = true;
|
||||
continue;
|
||||
}
|
||||
// * = ul
|
||||
// # = ol
|
||||
// ; = dt
|
||||
// : = dd
|
||||
int lastPrefixLen = lastPrefix.length;
|
||||
|
||||
// PORTED.BGN: preCloseMatch = preg_match('/<\\/pre/i', $oLine); preOpenMatch = preg_match('/<pre/i', $oLine);
|
||||
int preCur = lineBgn;
|
||||
boolean preCloseMatch = false;
|
||||
boolean preOpenMatch = false;
|
||||
while (true) {
|
||||
if (preCur >= lineEnd)
|
||||
break;
|
||||
Object o = pre_trie.Match_at(trv, src, preCur, lineEnd);
|
||||
if (o == null)
|
||||
preCur++;
|
||||
else {
|
||||
int pre_tid = Int_.cast(o);
|
||||
if (pre_tid == PRE_BGN)
|
||||
preOpenMatch = true;
|
||||
else if (pre_tid == PRE_END)
|
||||
preCloseMatch = true;
|
||||
preCur = trv.Pos();
|
||||
}
|
||||
}
|
||||
// PORTED.END
|
||||
|
||||
byte[] prefix = null, t = null;
|
||||
// If not in a <pre> element, scan for and figure out what prefixes are there.
|
||||
if (!this.inPre) {
|
||||
// Multiple prefixes may abut each other for nested lists.
|
||||
prefixLen = XophpString.strspn_fwd__ary(src, block_chars_ary, lineBgn, lineEnd, lineEnd); // strspn($oLine, '*#:;');
|
||||
prefix = XophpString.substr(src, lineBgn, prefixLen);
|
||||
|
||||
// eh?
|
||||
// ; and : are both from definition-lists, so they're equivalent
|
||||
// for the purposes of determining whether or not we need to open/close
|
||||
// elements.
|
||||
// substr($inputLine, $prefixLength);
|
||||
prefix2 = Bry_.Replace(prefix, Byte_ascii.Semic, Byte_ascii.Colon);
|
||||
t = Bry_.Mid(src, lineBgn + prefixLen, lineEnd);
|
||||
this.inPre = preOpenMatch;
|
||||
}
|
||||
else {
|
||||
// Don't interpret any other prefixes in preformatted text
|
||||
prefixLen = 0;
|
||||
prefix = prefix2 = Bry_.Empty;
|
||||
t = Bry_.Mid(src, lineBgn, lineEnd);
|
||||
}
|
||||
|
||||
// List generation
|
||||
byte[] term = null, t2 = null;
|
||||
int commonPrefixLen = -1;
|
||||
if (prefixLen > 0 && Bry_.Eq(lastPrefix, prefix2)) {
|
||||
// Same as the last item, so no need to deal with nesting or opening stuff
|
||||
bfr.Add(this.nextItem(XophpString.substr_byte(prefix, -1)));
|
||||
pendingPTag = PARA_STACK_NONE;
|
||||
|
||||
if (prefixLen > 0 && prefix[prefixLen - 1] == Byte_ascii.Semic) {
|
||||
// The one nasty exception: definition lists work like this:
|
||||
// ; title : definition text
|
||||
// So we check for : in the remainder text to split up the
|
||||
// title and definition, without b0rking links.
|
||||
term = t2 = Bry_.Empty;
|
||||
if (this.findColonNoLinks(t, term, t2) != Bry_find_.Not_found) {
|
||||
term = find_colon_no_links__before;
|
||||
t2 = find_colon_no_links__after;
|
||||
t = t2;
|
||||
bfr.Add(term).Add(nextItem(Byte_ascii.Colon));
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (prefixLen > 0 || lastPrefixLen > 0) {
|
||||
// We need to open or close prefixes, or both.
|
||||
|
||||
// Either open or close a level...
|
||||
commonPrefixLen = this.getCommon(prefix, lastPrefix);
|
||||
pendingPTag = PARA_STACK_NONE;
|
||||
|
||||
// Close all the prefixes which aren't shared.
|
||||
while (commonPrefixLen < lastPrefixLen) {
|
||||
bfr.Add(this.closeList(lastPrefix[lastPrefixLen - 1]));
|
||||
--lastPrefixLen;
|
||||
}
|
||||
|
||||
// Continue the current prefix if appropriate.
|
||||
if (prefixLen <= commonPrefixLen && commonPrefixLen > 0) {
|
||||
bfr.Add(this.nextItem(prefix[commonPrefixLen - 1]));
|
||||
}
|
||||
|
||||
// Open prefixes where appropriate.
|
||||
if (Bry_.Len_gt_0(lastPrefix) && prefixLen > commonPrefixLen) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
while (prefixLen > commonPrefixLen) {
|
||||
byte c = XophpString.substr_byte(prefix, commonPrefixLen, 1);
|
||||
bfr.Add(this.openList(c));
|
||||
|
||||
if (c == Byte_ascii.Semic) {
|
||||
// @todo FIXME: This is dupe of code above
|
||||
if (findColonNoLinks(t, term, t2) != Bry_find_.Not_found) {
|
||||
term = find_colon_no_links__before;
|
||||
t2 = find_colon_no_links__after;
|
||||
t = t2;
|
||||
bfr.Add(term).Add(nextItem(Byte_ascii.Colon));
|
||||
}
|
||||
}
|
||||
++commonPrefixLen;
|
||||
}
|
||||
if (prefixLen == 0 && Bry_.Len_gt_0(lastPrefix)) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
lastPrefix = prefix2;
|
||||
}
|
||||
|
||||
// If we have no prefixes, go to paragraph mode.
|
||||
if (0 == prefixLen) {
|
||||
// No prefix (not in list)--go to paragraph mode
|
||||
// @todo consider using a stack for nestable elements like span, table and div
|
||||
int tLen = t.length;
|
||||
|
||||
// XO.MW.PORTED.BGN:
|
||||
boolean openMatch = XophpPreg.match(openMatchTrie, trv, t, 0, tLen) != null;
|
||||
boolean closeMatch = XophpPreg.match(closeMatchTrie, trv, t, 0, tLen) != null;
|
||||
// XO.MW.PORTED.END
|
||||
if (openMatch || closeMatch) {
|
||||
pendingPTag = PARA_STACK_NONE;
|
||||
// @todo bug 5718: paragraph closed
|
||||
bfr.Add(this.closeParagraph());
|
||||
if (preOpenMatch && !preCloseMatch) {
|
||||
this.inPre = true;
|
||||
}
|
||||
int bqOffset = 0;
|
||||
// PORTED:preg_match('/<(\\/?)blockquote[\s>]/i', t, $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset)
|
||||
while (true) {
|
||||
Object o = XophpPreg.match(blockquoteTrie, trv, t, bqOffset, tLen);
|
||||
if (o == null) { // no more blockquotes found; exit
|
||||
break;
|
||||
}
|
||||
else {
|
||||
byte[] bq_bry = (byte[])o;
|
||||
inBlockquote = bq_bry[1] != Byte_ascii.Slash; // is this a close tag?
|
||||
bqOffset = trv.Pos();
|
||||
}
|
||||
}
|
||||
// PORTED:END
|
||||
inBlockElem = !closeMatch;
|
||||
}
|
||||
else if (!inBlockElem && !this.inPre) {
|
||||
if (XophpString.substr_byte(t, 0) == Byte_ascii.Space
|
||||
&& (this.lastSection == LAST_SECTION_PRE || Bry_.Trim(t) != Bry_.Empty)
|
||||
&& !inBlockquote
|
||||
) {
|
||||
// pre
|
||||
if (this.lastSection != LAST_SECTION_PRE) {
|
||||
pendingPTag = PARA_STACK_NONE;
|
||||
bfr.Add(closeParagraph()).Add(Gfh_tag_.Pre_lhs);
|
||||
this.lastSection = LAST_SECTION_PRE;
|
||||
}
|
||||
t = Bry_.Mid(t, 1);
|
||||
}
|
||||
else {
|
||||
// paragraph
|
||||
if (Bry_.Trim(t) == Bry_.Empty) {
|
||||
if (pendingPTag != PARA_STACK_NONE) {
|
||||
ParaStackAdd(bfr, pendingPTag);
|
||||
bfr.Add_str_a7("<br />");
|
||||
pendingPTag = PARA_STACK_NONE;
|
||||
this.lastSection = LAST_SECTION_PARA;
|
||||
}
|
||||
else {
|
||||
if (this.lastSection != LAST_SECTION_PARA) {
|
||||
bfr.Add(this.closeParagraph());
|
||||
this.lastSection = LAST_SECTION_NONE;
|
||||
pendingPTag = PARA_STACK_BGN;
|
||||
}
|
||||
else {
|
||||
pendingPTag = PARA_STACK_MID;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (pendingPTag != PARA_STACK_NONE) {
|
||||
ParaStackAdd(bfr, pendingPTag);
|
||||
pendingPTag = PARA_STACK_NONE;
|
||||
this.lastSection = LAST_SECTION_PARA;
|
||||
}
|
||||
else if (lastSection != LAST_SECTION_PARA) {
|
||||
bfr.Add(this.closeParagraph()).Add(Gfh_tag_.P_lhs);
|
||||
this.lastSection = LAST_SECTION_PARA;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// somewhere above we forget to get out of pre block (bug 785)
|
||||
if (preCloseMatch && this.inPre) {
|
||||
this.inPre = false;
|
||||
}
|
||||
if (pendingPTag == PARA_STACK_NONE) {
|
||||
bfr.Add(t);
|
||||
if (prefixLen == 0) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
}
|
||||
|
||||
lineBgn = lineEnd + 1;
|
||||
}
|
||||
|
||||
while (prefixLen > 0) {
|
||||
bfr.Add(this.closeList(prefix2[prefixLen - 1]));
|
||||
--prefixLen;
|
||||
if (prefixLen > 0) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
}
|
||||
if (this.lastSection != LAST_SECTION_NONE) {
|
||||
bfr.Add(this.lastSection == LAST_SECTION_PARA ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs);
|
||||
this.lastSection = LAST_SECTION_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Split up a String on ':', ignoring any occurrences inside tags
|
||||
* to prevent illegal overlapping.
|
||||
*
|
||||
* @param String $str The String to split
|
||||
* @param String &$before Set to everything before the ':'
|
||||
* @param String &$after Set to everything after the ':'
|
||||
* @throws MWException
|
||||
* @return String The position of the ':', or false if none found
|
||||
*/
|
||||
private int findColonNoLinks(byte[] str, byte[] before, byte[] after) {
|
||||
int len = str.length;
|
||||
int colonPos = XophpString.strpos(str, Byte_ascii.Colon, 0, len);
|
||||
if (colonPos == Bry_find_.Not_found) {
|
||||
// Nothing to find!
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
|
||||
int ltPos = XophpString.strpos(str, Byte_ascii.Angle_bgn, 0, len);
|
||||
if (ltPos == Bry_find_.Not_found || ltPos > colonPos) {
|
||||
// Easy; no tag nesting to worry about
|
||||
// XOMW: MW passes before / after by reference; XO: changes member and depends on callers to update
|
||||
find_colon_no_links__before = XophpString.substr(str, 0, colonPos);
|
||||
find_colon_no_links__after = XophpString.substr(str, colonPos + 1);
|
||||
return colonPos;
|
||||
}
|
||||
|
||||
// Ugly state machine to walk through avoiding tags.
|
||||
int state = COLON_STATE_TEXT;
|
||||
int level = 0;
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte c = str[i];
|
||||
|
||||
switch (state) {
|
||||
case COLON_STATE_TEXT:
|
||||
switch (c) {
|
||||
case Byte_ascii.Angle_bgn:
|
||||
// Could be either a <start> tag or an </end> tag
|
||||
state = COLON_STATE_TAGSTART;
|
||||
break;
|
||||
case Byte_ascii.Colon:
|
||||
if (level == 0) {
|
||||
// We found it!
|
||||
find_colon_no_links__before = XophpString.substr(str, 0, i);
|
||||
find_colon_no_links__after = XophpString.substr(str, i + 1);
|
||||
return i;
|
||||
}
|
||||
// Embedded in a tag; don't break it.
|
||||
break;
|
||||
default:
|
||||
// Skip ahead looking for something interesting
|
||||
colonPos = XophpString.strpos(str, Byte_ascii.Colon, i, len);
|
||||
if (colonPos == Bry_find_.Not_found) {
|
||||
// Nothing else interesting
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
ltPos = XophpString.strpos(str, Byte_ascii.Angle_bgn, i, len);
|
||||
if (level == 0) {
|
||||
if (ltPos == Bry_find_.Not_found || colonPos < ltPos) {
|
||||
// We found it!
|
||||
find_colon_no_links__before = XophpString.substr(str, 0, colonPos);
|
||||
find_colon_no_links__after = XophpString.substr(str, colonPos + 1);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
if (ltPos == Bry_find_.Not_found) {
|
||||
// Nothing else interesting to find; abort!
|
||||
// We're nested, but there's no close tags left. Abort!
|
||||
i = len; // break 2
|
||||
break;
|
||||
}
|
||||
// Skip ahead to next tag start
|
||||
i = ltPos;
|
||||
state = COLON_STATE_TAGSTART;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_TAG:
|
||||
// In a <tag>
|
||||
switch (c) {
|
||||
case Byte_ascii.Angle_end:
|
||||
level++;
|
||||
state = COLON_STATE_TEXT;
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
// Slash may be followed by >?
|
||||
state = COLON_STATE_TAGSLASH;
|
||||
break;
|
||||
default:
|
||||
// ignore
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_TAGSTART:
|
||||
switch (c) {
|
||||
case Byte_ascii.Slash:
|
||||
state = COLON_STATE_CLOSETAG;
|
||||
break;
|
||||
case Byte_ascii.Bang:
|
||||
state = COLON_STATE_COMMENT;
|
||||
break;
|
||||
case Byte_ascii.Angle_end:
|
||||
// Illegal early close? This shouldn't happen D:
|
||||
state = COLON_STATE_TEXT;
|
||||
break;
|
||||
default:
|
||||
state = COLON_STATE_TAG;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_CLOSETAG:
|
||||
// In a </tag>
|
||||
if (c == Byte_ascii.Angle_end) {
|
||||
level--;
|
||||
if (level < 0) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; too many close tags");
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
state = COLON_STATE_TEXT;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_TAGSLASH:
|
||||
if (c == Byte_ascii.Angle_end) {
|
||||
// Yes, a self-closed tag <blah/>
|
||||
state = COLON_STATE_TEXT;
|
||||
}
|
||||
else {
|
||||
// Probably we're jumping the gun, and this is an attribute
|
||||
state = COLON_STATE_TAG;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_COMMENT:
|
||||
if (c == Byte_ascii.Dash) {
|
||||
state = COLON_STATE_COMMENTDASH;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_COMMENTDASH:
|
||||
if (c == Byte_ascii.Dash) {
|
||||
state = COLON_STATE_COMMENTDASHDASH;
|
||||
}
|
||||
else {
|
||||
state = COLON_STATE_COMMENT;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_COMMENTDASHDASH:
|
||||
if (c == Byte_ascii.Angle_bgn) {
|
||||
state = COLON_STATE_TEXT;
|
||||
}
|
||||
else {
|
||||
state = COLON_STATE_COMMENT;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw Err_.new_wo_type("State machine error");
|
||||
}
|
||||
}
|
||||
if (level > 0) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; not enough close tags (level ~{0}, state ~{1})", level, state);
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
|
||||
private static final byte
|
||||
LAST_SECTION_NONE = 0 // ''
|
||||
, LAST_SECTION_PARA = 1 // p
|
||||
, LAST_SECTION_PRE = 2 // pre
|
||||
;
|
||||
private static final byte
|
||||
PARA_STACK_NONE = 0 // false
|
||||
, PARA_STACK_BGN = 1 // <p>
|
||||
, PARA_STACK_MID = 2 // </p><p>
|
||||
;
|
||||
private static final int PRE_BGN = 0, PRE_END = 1;
|
||||
private static Btrie_slim_mgr pre_trie;
|
||||
private static boolean[] block_chars_ary;
|
||||
private static boolean[] Block_chars_ary__new() {
|
||||
boolean[] rv = new boolean[256];
|
||||
rv[Byte_ascii.Star] = true;
|
||||
rv[Byte_ascii.Hash] = true;
|
||||
rv[Byte_ascii.Colon] = true;
|
||||
rv[Byte_ascii.Semic] = true;
|
||||
return rv;
|
||||
}
|
||||
private static Btrie_slim_mgr openMatchTrie, closeMatchTrie, blockquoteTrie;
|
||||
private static void ParaStackAdd(Bry_bfr bfr, int id) {
|
||||
switch (id) {
|
||||
case PARA_STACK_BGN: bfr.Add_str_a7("<p>"); break;
|
||||
case PARA_STACK_MID: bfr.Add_str_a7("</p><p>"); break;
|
||||
default: throw Err_.new_unhandled_default(id);
|
||||
}
|
||||
}
|
||||
}
|
@ -16,8 +16,8 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
import gplx.xowa.mediawiki.includes.linkers.*;
|
||||
public class Xomw_block_level_pass__tst {
|
||||
private final Xomw_block_level_pass__fxt fxt = new Xomw_block_level_pass__fxt();
|
||||
public class XomwBlockLevelPassTest {
|
||||
private final XomwBlockLevelPassFxt fxt = new XomwBlockLevelPassFxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test__do_block_levels(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
@ -27,8 +27,8 @@ public class Xomw_block_level_pass__tst {
|
||||
));
|
||||
}
|
||||
}
|
||||
class Xomw_block_level_pass__fxt {
|
||||
private final Xomw_block_level_pass block_level_pass = new Xomw_block_level_pass();
|
||||
class XomwBlockLevelPassFxt {
|
||||
private final XomwBlockLevelPass block_level_pass = new XomwBlockLevelPass();
|
||||
private final XomwParserCtx pctx = new XomwParserCtx();
|
||||
private final XomwParserBfr pbfr = new XomwParserBfr();
|
||||
private boolean apos = true;
|
@ -271,7 +271,7 @@ public class XomwParser implements XomwParserIface {
|
||||
private final Xomw_lnke_wkr lnkeWkr;
|
||||
private final Xomw_magiclinks_wkr magiclinksWkr;
|
||||
private final Xomw_nbsp_wkr nbspWkr = new Xomw_nbsp_wkr();
|
||||
private final Xomw_block_level_pass blockWkr = new Xomw_block_level_pass();
|
||||
private final XomwBlockLevelPass blockWkr = new XomwBlockLevelPass();
|
||||
private final Xomw_doubleunder_data doubleunder_data = new Xomw_doubleunder_data();
|
||||
private static Xomw_regex_space regex_space;
|
||||
private static Xomw_regex_boundary regex_boundary;
|
||||
@ -1692,7 +1692,7 @@ public class XomwParser implements XomwParserIface {
|
||||
dirty = true;
|
||||
byte[] protocol_bry = (byte[])protocol_obj;
|
||||
if (called_by_bry) trg = Bry_bfr_.New();
|
||||
trg.Add_bry_many(XomwStripState.Bry__marker__bgn, Bry__noparse, protocol_bry);
|
||||
trg.Add_bry_many(XomwParser.MARKER_PREFIX, Bry__noparse, protocol_bry);
|
||||
cur += protocol_bry.length;
|
||||
prv = cur;
|
||||
}
|
||||
|
@ -327,11 +327,11 @@ public class XomwStripState {
|
||||
// public function killMarkers($text) {
|
||||
// return preg_replace(this.regex, '', $text);
|
||||
// }
|
||||
public static final String Str__marker_bgn = "\u007f'\"`UNIQ-";
|
||||
public static final byte[]
|
||||
Bry__marker__bgn = Bry_.new_a7(Str__marker_bgn)
|
||||
, Bry__marker__end = Bry_.new_a7("-QINU`\"'\u007f")
|
||||
;
|
||||
// public static final String Str__marker_bgn = "\u007f'\"`UNIQ-";
|
||||
// public static final byte[]
|
||||
// Bry__marker__bgn = Bry_.new_a7(Str__marker_bgn)
|
||||
// , Bry__marker__end = Bry_.new_a7("-QINU`\"'\u007f")
|
||||
// ;
|
||||
public static final byte TYPE_GENERAL = 1, TYPE_NOWIKI = 2, TYPE_BOTH = 3;
|
||||
}
|
||||
class XomwStripItem {
|
||||
|
@ -1,581 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.langs.htmls.*;
|
||||
public class Xomw_block_level_pass {
|
||||
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private boolean in_pre, dt_open;
|
||||
private int last_section;
|
||||
private byte[] find_colon_no_links__before, find_colon_no_links__after;
|
||||
|
||||
public void doBlockLevels(XomwParserCtx pctx, XomwParserBfr pbfr, boolean line_start) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
// XO.STATIC
|
||||
if (block_chars_ary == null) {
|
||||
synchronized (Type_adp_.ClassOf_obj(this)) {
|
||||
block_chars_ary = Block_chars_ary__new();
|
||||
open_match_trie = Btrie_slim_mgr.ci_a7().Add_many_str
|
||||
("<table", "<h1", "<h2", "<h3", "<h4", "<h5", "<h6", "<pre", "<tr", "<p", "<ul", "<ol", "<dl", "<li", "</tr", "</td", "</th");
|
||||
close_match_trie = Btrie_slim_mgr.ci_a7().Add_many_str
|
||||
( "</table", "</h1", "</h2", "</h3", "</h4", "</h5", "</h6", "<td", "<th", "<blockquote", "</blockquote", "<div", "</div", "<hr"
|
||||
, "</pre", "</p", "</mw:", XomwStripState.Str__marker_bgn + "-pre", "</li", "</ul", "</ol", "</dl", "<center", "</center");
|
||||
blockquote_trie = Btrie_slim_mgr.ci_a7().Add_many_str("<blockquote", "</blockquote");
|
||||
pre_trie = Btrie_slim_mgr.ci_a7().Add_str_int("<pre", Pre__bgn).Add_str_int("</pre", Pre__end);
|
||||
}
|
||||
}
|
||||
|
||||
// Parsing through the text line by line. The main thing
|
||||
// happening here is handling of block-level elements p, pre,
|
||||
// and making lists from lines starting with * # : etc.
|
||||
byte[] last_prefix = Bry_.Empty;
|
||||
bfr.Clear();
|
||||
this.dt_open = false;
|
||||
boolean in_block_elem = false;
|
||||
int prefix_len = 0;
|
||||
byte para_stack = Para_stack__none;
|
||||
boolean in_blockquote = false;
|
||||
this.in_pre = false;
|
||||
this.last_section = Last_section__none;
|
||||
byte[] prefix2 = null;
|
||||
|
||||
// PORTED.SPLIT: $textLines = StringUtils::explode("\n", $text);
|
||||
int line_bgn = src_bgn;
|
||||
while (line_bgn < src_end) {
|
||||
int line_end = Bry_find_.Find_fwd(src, Byte_ascii.Nl, line_bgn);
|
||||
if (line_end == Bry_find_.Not_found)
|
||||
line_end = src_end;
|
||||
|
||||
// Fix up line_start
|
||||
if (!line_start) {
|
||||
bfr.Add_mid(src, line_bgn, line_end);
|
||||
line_start = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// * = ul
|
||||
// # = ol
|
||||
// ; = dt
|
||||
// : = dd
|
||||
int last_prefix_len = last_prefix.length;
|
||||
|
||||
// PORTED: pre_close_match = preg_match('/<\\/pre/i', $oLine); pre_open_match = preg_match('/<pre/i', $oLine);
|
||||
int pre_cur = line_bgn;
|
||||
boolean pre_close_match = false;
|
||||
boolean pre_open_match = false;
|
||||
|
||||
while (true) {
|
||||
if (pre_cur >= line_end)
|
||||
break;
|
||||
Object o = pre_trie.Match_at(trv, src, pre_cur, line_end);
|
||||
if (o == null)
|
||||
pre_cur++;
|
||||
else {
|
||||
int pre_tid = Int_.cast(o);
|
||||
if (pre_tid == Pre__bgn)
|
||||
pre_open_match = true;
|
||||
else if (pre_tid == Pre__end)
|
||||
pre_close_match = true;
|
||||
pre_cur = trv.Pos();
|
||||
}
|
||||
}
|
||||
|
||||
byte[] prefix = null, t = null;
|
||||
// If not in a <pre> element, scan for and figure out what prefixes are there.
|
||||
if (!in_pre) {
|
||||
// Multiple prefixes may abut each other for nested lists.
|
||||
prefix_len = XophpString.strspn_fwd__ary(src, block_chars_ary, line_bgn, line_end, line_end); // strspn($oLine, '*#:;');
|
||||
prefix = XophpString.substr(src, line_bgn, prefix_len);
|
||||
|
||||
// eh?
|
||||
// ; and : are both from definition-lists, so they're equivalent
|
||||
// for the purposes of determining whether or not we need to open/close
|
||||
// elements.
|
||||
// substr( $inputLine, $prefixLength );
|
||||
prefix2 = Bry_.Replace(prefix, Byte_ascii.Semic, Byte_ascii.Colon);
|
||||
t = Bry_.Mid(src, line_bgn + prefix_len, line_end);
|
||||
in_pre = pre_open_match;
|
||||
}
|
||||
else {
|
||||
// Don't interpret any other prefixes in preformatted text
|
||||
prefix_len = 0;
|
||||
prefix = prefix2 = Bry_.Empty;
|
||||
t = Bry_.Mid(src, line_bgn, line_end);
|
||||
}
|
||||
|
||||
// List generation
|
||||
byte[] term = null, t2 = null;
|
||||
int common_prefix_len = -1;
|
||||
if (prefix_len > 0 && Bry_.Eq(last_prefix, prefix2)) {
|
||||
// Same as the last item, so no need to deal with nesting or opening stuff
|
||||
bfr.Add(Next_item(XophpString.substr_byte(prefix, -1)));
|
||||
para_stack = Para_stack__none;
|
||||
|
||||
if (prefix_len > 0 && prefix[prefix_len - 1] == Byte_ascii.Semic) {
|
||||
// The one nasty exception: definition lists work like this:
|
||||
// ; title : definition text
|
||||
// So we check for : in the remainder text to split up the
|
||||
// title and definition, without b0rking links.
|
||||
term = t2 = Bry_.Empty;
|
||||
if (Find_colon_no_links(t, term, t2) != Bry_find_.Not_found) {
|
||||
term = find_colon_no_links__before;
|
||||
t2 = find_colon_no_links__after;
|
||||
t = t2;
|
||||
bfr.Add(term).Add(Next_item(Byte_ascii.Colon));
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (prefix_len > 0 || last_prefix_len > 0) {
|
||||
// We need to open or close prefixes, or both.
|
||||
|
||||
// Either open or close a level...
|
||||
common_prefix_len = Get_common(prefix, last_prefix);
|
||||
para_stack = Para_stack__none;
|
||||
|
||||
// Close all the prefixes which aren't shared.
|
||||
while (common_prefix_len < last_prefix_len) {
|
||||
bfr.Add(Close_list(last_prefix[last_prefix_len - 1]));
|
||||
last_prefix_len--;
|
||||
}
|
||||
|
||||
// Continue the current prefix if appropriate.
|
||||
if (prefix_len <= common_prefix_len && common_prefix_len > 0) {
|
||||
bfr.Add(Next_item(prefix[common_prefix_len - 1]));
|
||||
}
|
||||
|
||||
// Open prefixes where appropriate.
|
||||
if (Bry_.Len_gt_0(last_prefix) && prefix_len > common_prefix_len) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
while (prefix_len > common_prefix_len) {
|
||||
byte c = XophpString.substr_byte(prefix, common_prefix_len, 1);
|
||||
bfr.Add(Open_list(c));
|
||||
|
||||
if (c == Byte_ascii.Semic) {
|
||||
// @todo FIXME: This is dupe of code above
|
||||
if (Find_colon_no_links(t, term, t2) != Bry_find_.Not_found) {
|
||||
term = find_colon_no_links__before;
|
||||
t2 = find_colon_no_links__after;
|
||||
t = t2;
|
||||
bfr.Add(term).Add(Next_item(Byte_ascii.Colon));
|
||||
}
|
||||
}
|
||||
++common_prefix_len;
|
||||
}
|
||||
if (prefix_len == 0 && Bry_.Len_gt_0(last_prefix)) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
last_prefix = prefix2;
|
||||
}
|
||||
|
||||
// If we have no prefixes, go to paragraph mode.
|
||||
if (0 == prefix_len) {
|
||||
// No prefix (not in list)--go to paragraph mode
|
||||
// XXX: use a stack for nestable elements like span, table and div
|
||||
int t_len = t.length;
|
||||
boolean open_match = XophpPreg.match(open_match_trie, trv, t, 0, t_len) != null;
|
||||
boolean close_match = XophpPreg.match(close_match_trie, trv, t, 0, t_len) != null;
|
||||
|
||||
if (open_match || close_match) {
|
||||
para_stack = Para_stack__none;
|
||||
// @todo bug 5718: paragraph closed
|
||||
bfr.Add(Close_paragraph());
|
||||
if (pre_open_match && !pre_close_match) {
|
||||
in_pre = true;
|
||||
}
|
||||
int bq_offset = 0;
|
||||
// PORTED:preg_match('/<(\\/?)blockquote[\s>]/i', t, $bqMatch, PREG_OFFSET_CAPTURE, $bq_offset)
|
||||
while (true) {
|
||||
Object o = XophpPreg.match(blockquote_trie, trv, t, bq_offset, t_len);
|
||||
if (o == null) { // no more blockquotes found; exit
|
||||
break;
|
||||
}
|
||||
else {
|
||||
byte[] bq_bry = (byte[])o;
|
||||
in_blockquote = bq_bry[1] != Byte_ascii.Slash; // is this a close tag?
|
||||
bq_offset = trv.Pos();
|
||||
}
|
||||
}
|
||||
in_block_elem = !close_match;
|
||||
}
|
||||
else if (!in_block_elem && !in_pre) {
|
||||
if ( XophpString.substr_byte(t, 0) == Byte_ascii.Space
|
||||
&& (last_section == Last_section__pre || Bry_.Trim(t) != Bry_.Empty)
|
||||
&& !in_blockquote
|
||||
) {
|
||||
// pre
|
||||
if (last_section != Last_section__pre) {
|
||||
para_stack = Para_stack__none;
|
||||
bfr.Add(Close_paragraph()).Add(Gfh_tag_.Pre_lhs);
|
||||
last_section = Last_section__pre;
|
||||
}
|
||||
t = Bry_.Mid(t, 1);
|
||||
}
|
||||
else {
|
||||
// paragraph
|
||||
if (Bry_.Trim(t) == Bry_.Empty) {
|
||||
if (para_stack != Para_stack__none) {
|
||||
Para_stack_bfr(bfr, para_stack);
|
||||
bfr.Add_str_a7("<br />");
|
||||
para_stack = Para_stack__none;
|
||||
last_section = Last_section__para;
|
||||
}
|
||||
else {
|
||||
if (last_section != Last_section__para) {
|
||||
bfr.Add(Close_paragraph());
|
||||
last_section = Last_section__none;
|
||||
para_stack = Para_stack__bgn;
|
||||
}
|
||||
else {
|
||||
para_stack = Para_stack__mid;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (para_stack != Para_stack__none) {
|
||||
Para_stack_bfr(bfr, para_stack);
|
||||
para_stack = Para_stack__none;
|
||||
last_section = Last_section__para;
|
||||
}
|
||||
else if (last_section != Last_section__para) {
|
||||
bfr.Add(Close_paragraph()).Add(Gfh_tag_.P_lhs);
|
||||
this.last_section = Last_section__para;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// somewhere above we forget to get out of pre block (bug 785)
|
||||
if (pre_close_match && in_pre) {
|
||||
in_pre = false;
|
||||
}
|
||||
if (para_stack == Para_stack__none) {
|
||||
bfr.Add(t);
|
||||
if (prefix_len == 0) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
}
|
||||
|
||||
line_bgn = line_end + 1;
|
||||
}
|
||||
|
||||
while (prefix_len > 0) {
|
||||
bfr.Add(Close_list(prefix2[prefix_len - 1]));
|
||||
prefix_len--;
|
||||
if (prefix_len > 0) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
}
|
||||
if (last_section != Last_section__none) {
|
||||
bfr.Add(last_section == Last_section__para ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs);
|
||||
last_section = Last_section__none;
|
||||
}
|
||||
}
|
||||
// If a pre or p is open, return the corresponding close tag and update
|
||||
// the state. If no tag is open, return an empty String.
|
||||
public byte[] Close_paragraph() {
|
||||
byte[] result = Bry_.Empty;
|
||||
if (last_section != Last_section__none) {
|
||||
tmp.Add(last_section == Last_section__para ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs);
|
||||
result = tmp.Add_byte_nl().To_bry_and_clear();
|
||||
}
|
||||
in_pre = false;
|
||||
last_section = Last_section__none;
|
||||
return result;
|
||||
}
|
||||
|
||||
// getCommon() returns the length of the longest common substring
|
||||
// of both arguments, starting at the beginning of both.
|
||||
private int Get_common(byte[] st1, byte[] st2) {
|
||||
int st1_len = st1.length, st2_len = st2.length;
|
||||
int shorter = st1_len < st2_len ? st1_len : st2_len;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < shorter; i++) {
|
||||
if (st1[i] != st2[i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
// Open the list item element identified by the prefix character.
|
||||
private byte[] Open_list(byte c) {
|
||||
byte[] result = Close_paragraph();
|
||||
|
||||
if (c == Byte_ascii.Star)
|
||||
result = tmp.Add(result).Add_str_a7("<ul><li>").To_bry_and_clear();
|
||||
else if (c == Byte_ascii.Hash)
|
||||
result = tmp.Add(result).Add_str_a7("<ol><li>").To_bry_and_clear();
|
||||
else if (c == Byte_ascii.Hash)
|
||||
result = tmp.Add(result).Add_str_a7("<dl><dd>").To_bry_and_clear();
|
||||
else if (c == Byte_ascii.Semic) {
|
||||
result = tmp.Add(result).Add_str_a7("<dl><dt>").To_bry_and_clear();
|
||||
dt_open = true;
|
||||
}
|
||||
else
|
||||
result = tmp.Add_str_a7("<!-- ERR 1 -->").To_bry_and_clear();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Close the current list item and open the next one.
|
||||
private byte[] Next_item(byte c) {
|
||||
if (c == Byte_ascii.Star || c == Byte_ascii.Hash) {
|
||||
return tmp.Add_str_a7("</li>\n<li>").To_bry_and_clear();
|
||||
}
|
||||
else if (c == Byte_ascii.Colon || c == Byte_ascii.Semic) {
|
||||
byte[] close = tmp.Add_str_a7("</dd>\n").To_bry_and_clear();
|
||||
if (dt_open) {
|
||||
close = tmp.Add_str_a7("</dt>\n").To_bry_and_clear();
|
||||
}
|
||||
if (c == Byte_ascii.Semic) {
|
||||
dt_open = true;
|
||||
return tmp.Add(close).Add_str_a7("<dt>").To_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
dt_open = false;
|
||||
return tmp.Add(close).Add_str_a7("<dd>").To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
return tmp.Add_str_a7("<!-- ERR 2 -->").To_bry_and_clear();
|
||||
}
|
||||
|
||||
// Close the current list item identified by the prefix character.
|
||||
private byte[] Close_list(byte c) {
|
||||
byte[] text = null;
|
||||
if (c == Byte_ascii.Star) {
|
||||
text = Bry_.new_a7("</li></ul>");
|
||||
}
|
||||
else if (c == Byte_ascii.Hash) {
|
||||
text = Bry_.new_a7("</li></ol>");
|
||||
}
|
||||
else if (c == Byte_ascii.Colon) {
|
||||
if (dt_open) {
|
||||
dt_open = false;
|
||||
text = Bry_.new_a7("</dt></dl>");
|
||||
}
|
||||
else {
|
||||
text = Bry_.new_a7("</dd></dl>");
|
||||
}
|
||||
}
|
||||
else {
|
||||
return Bry_.new_a7("<!-- ERR 3 -->");
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
// Split up a String on ':', ignoring any occurrences inside tags
|
||||
// to prevent illegal overlapping.
|
||||
private int Find_colon_no_links(byte[] str, byte[] before, byte[] after) {
|
||||
int len = str.length;
|
||||
int colon_pos = XophpString.strpos(str, Byte_ascii.Colon, 0, len);
|
||||
if (colon_pos == Bry_find_.Not_found) {
|
||||
// Nothing to find!
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
|
||||
int lt_pos = XophpString.strpos(str, Byte_ascii.Angle_bgn, 0, len);
|
||||
if (lt_pos == Bry_find_.Not_found || lt_pos > colon_pos) {
|
||||
// Easy; no tag nesting to worry about
|
||||
find_colon_no_links__before = XophpString.substr(str, 0, colon_pos);
|
||||
find_colon_no_links__after = XophpString.substr(str, colon_pos + 1);
|
||||
return colon_pos;
|
||||
}
|
||||
|
||||
// Ugly state machine to walk through avoiding tags.
|
||||
int state = COLON_STATE_TEXT;
|
||||
int level = 0;
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte c = str[i];
|
||||
|
||||
switch (state) {
|
||||
case COLON_STATE_TEXT:
|
||||
switch (c) {
|
||||
case Byte_ascii.Angle_bgn:
|
||||
// Could be either a <start> tag or an </end> tag
|
||||
state = COLON_STATE_TAGSTART;
|
||||
break;
|
||||
case Byte_ascii.Colon:
|
||||
if (level == 0) {
|
||||
// We found it!
|
||||
find_colon_no_links__before = XophpString.substr(str, 0, i);
|
||||
find_colon_no_links__after = XophpString.substr(str, i + 1);
|
||||
return i;
|
||||
}
|
||||
// Embedded in a tag; don't break it.
|
||||
break;
|
||||
default:
|
||||
// Skip ahead looking for something interesting
|
||||
colon_pos = XophpString.strpos(str, Byte_ascii.Colon, i, len);
|
||||
if (colon_pos == Bry_find_.Not_found) {
|
||||
// Nothing else interesting
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
lt_pos = XophpString.strpos(str, Byte_ascii.Angle_bgn, i, len);
|
||||
if (level == 0) {
|
||||
if (lt_pos == Bry_find_.Not_found || colon_pos < lt_pos) {
|
||||
// We found it!
|
||||
find_colon_no_links__before = XophpString.substr(str, 0, colon_pos);
|
||||
find_colon_no_links__after = XophpString.substr(str, colon_pos + 1);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
if (lt_pos == Bry_find_.Not_found) {
|
||||
// Nothing else interesting to find; abort!
|
||||
// We're nested, but there's no close tags left. Abort!
|
||||
i = len; // break 2
|
||||
break;
|
||||
}
|
||||
// Skip ahead to next tag start
|
||||
i = lt_pos;
|
||||
state = COLON_STATE_TAGSTART;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_TAG:
|
||||
// In a <tag>
|
||||
switch (c) {
|
||||
case Byte_ascii.Angle_bgn:
|
||||
level++;
|
||||
state = COLON_STATE_TEXT;
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
// Slash may be followed by >?
|
||||
state = COLON_STATE_TAGSLASH;
|
||||
break;
|
||||
default:
|
||||
// ignore
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_TAGSTART:
|
||||
switch (c) {
|
||||
case Byte_ascii.Slash:
|
||||
state = COLON_STATE_CLOSETAG;
|
||||
break;
|
||||
case Byte_ascii.Bang:
|
||||
state = COLON_STATE_COMMENT;
|
||||
break;
|
||||
case Byte_ascii.Angle_bgn:
|
||||
// Illegal early close? This shouldn't happen D:
|
||||
state = COLON_STATE_TEXT;
|
||||
break;
|
||||
default:
|
||||
state = COLON_STATE_TAG;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_CLOSETAG:
|
||||
// In a </tag>
|
||||
if (c == Byte_ascii.Angle_bgn) {
|
||||
level--;
|
||||
if (level < 0) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; too many close tags");
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
state = COLON_STATE_TEXT;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_TAGSLASH:
|
||||
if (c == Byte_ascii.Angle_bgn) {
|
||||
// Yes, a self-closed tag <blah/>
|
||||
state = COLON_STATE_TEXT;
|
||||
}
|
||||
else {
|
||||
// Probably we're jumping the gun, and this is an attribute
|
||||
state = COLON_STATE_TAG;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_COMMENT:
|
||||
if (c == Byte_ascii.Dash) {
|
||||
state = COLON_STATE_COMMENTDASH;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_COMMENTDASH:
|
||||
if (c == Byte_ascii.Dash) {
|
||||
state = COLON_STATE_COMMENTDASHDASH;
|
||||
}
|
||||
else {
|
||||
state = COLON_STATE_COMMENT;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_COMMENTDASHDASH:
|
||||
if (c == Byte_ascii.Angle_bgn) {
|
||||
state = COLON_STATE_TEXT;
|
||||
}
|
||||
else {
|
||||
state = COLON_STATE_COMMENT;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw Err_.new_wo_type("State machine error");
|
||||
}
|
||||
}
|
||||
if (level > 0) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; not enough close tags (level ~{0}, state ~{1})", level, state);
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
private static final int
|
||||
COLON_STATE_TEXT = 0
|
||||
, COLON_STATE_TAG = 1
|
||||
, COLON_STATE_TAGSTART = 2
|
||||
, COLON_STATE_CLOSETAG = 3
|
||||
, COLON_STATE_TAGSLASH = 4
|
||||
, COLON_STATE_COMMENT = 5
|
||||
, COLON_STATE_COMMENTDASH = 6
|
||||
, COLON_STATE_COMMENTDASHDASH = 7
|
||||
;
|
||||
private static final byte
|
||||
Last_section__none = 0 // ''
|
||||
, Last_section__para = 1 // p
|
||||
, Last_section__pre = 2 // pre
|
||||
;
|
||||
private static final byte
|
||||
Para_stack__none = 0 // false
|
||||
, Para_stack__bgn = 1 // <p>
|
||||
, Para_stack__mid = 2 // </p><p>
|
||||
;
|
||||
private static final int Pre__bgn = 0, Pre__end = 1;
|
||||
private static Btrie_slim_mgr pre_trie;
|
||||
private static boolean[] block_chars_ary;
|
||||
private static boolean[] Block_chars_ary__new() {
|
||||
boolean[] rv = new boolean[256];
|
||||
rv[Byte_ascii.Star] = true;
|
||||
rv[Byte_ascii.Hash] = true;
|
||||
rv[Byte_ascii.Colon] = true;
|
||||
rv[Byte_ascii.Semic] = true;
|
||||
return rv;
|
||||
}
|
||||
private static Btrie_slim_mgr open_match_trie, close_match_trie, blockquote_trie;
|
||||
private static void Para_stack_bfr(Bry_bfr bfr, int id) {
|
||||
switch (id) {
|
||||
case Para_stack__bgn: bfr.Add_str_a7("<p>"); break;
|
||||
case Para_stack__mid: bfr.Add_str_a7("</p><p>"); break;
|
||||
default: throw Err_.new_unhandled_default(id);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user