mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
665 lines
20 KiB
Java
665 lines
20 KiB
Java
/*
|
|
XOWA: the XOWA Offline Wiki Application
|
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
|
|
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
|
or alternatively under the terms of the Apache License Version 2.0.
|
|
|
|
You may use XOWA according to either of these licenses as is most appropriate
|
|
for your project on a case-by-case basis.
|
|
|
|
The terms of each license can be found in the source code repository:
|
|
|
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|
*/
|
|
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
|
|
import gplx.core.btries.*;
|
|
import gplx.langs.htmls.*;
|
|
/**
|
|
* This is the part of the wikitext parser which handles automatic paragraphs
|
|
* and conversion of start-of-line prefixes to HTML lists.
|
|
*/
|
|
public class XomwBlockLevelPass {
|
|
private boolean DTopen = false;
|
|
private boolean inPre = false;
|
|
private int lastSection = LAST_SECTION_NONE;
|
|
private boolean linestart;
|
|
// private $text;
|
|
private final Bry_bfr tmp = Bry_bfr_.New();
|
|
private final Btrie_rv trv = new Btrie_rv();
|
|
private byte[] find_colon_no_links__before, find_colon_no_links__after;
|
|
|
|
// State constants for the definition list colon extraction
|
|
private static final int
|
|
COLON_STATE_TEXT = 0
|
|
, COLON_STATE_TAG = 1
|
|
, COLON_STATE_TAGSTART = 2
|
|
, COLON_STATE_CLOSETAG = 3
|
|
, COLON_STATE_TAGSLASH = 4
|
|
, COLON_STATE_COMMENT = 5
|
|
, COLON_STATE_COMMENTDASH = 6
|
|
, COLON_STATE_COMMENTDASHDASH = 7
|
|
;
|
|
|
|
/**
|
|
* Make lists from lines starting with ':', '*', '#', etc.
|
|
*
|
|
* @param String $text
|
|
* @param boolean $linestart Whether or not this is at the start of a line.
|
|
* @return String The lists rendered as HTML
|
|
*/
|
|
// public static function doBlockLevels($text, $linestart) {
|
|
// $pass = new self($text, $linestart);
|
|
// return $pass->execute();
|
|
// }
|
|
public void doBlockLevels(XomwParserCtx pctx, XomwParserBfr pbfr, boolean linestart) {
|
|
this.linestart = linestart;
|
|
execute(pctx, pbfr, linestart);
|
|
}
|
|
|
|
// /**
|
|
// * Private constructor
|
|
// */
|
|
// private function __construct($text, $linestart) {
|
|
// $this->text = $text;
|
|
// $this->linestart = $linestart;
|
|
// }
|
|
|
|
/**
|
|
* If a pre or p is open, return the corresponding close tag and update
|
|
* the state. If no tag is open, return an empty String.
|
|
* @return String
|
|
*/
|
|
private byte[] closeParagraph() {
|
|
byte[] result = Bry_.Empty;
|
|
if (this.lastSection != LAST_SECTION_NONE) {
|
|
result = tmp.Add(lastSection == LAST_SECTION_PARA ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs).Add_byte_nl().To_bry_and_clear(); // $result = '</' . $this->lastSection . ">\n";
|
|
}
|
|
this.inPre = false;
|
|
this.lastSection = LAST_SECTION_NONE;
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* getCommon() returns the length of the longest common substring
|
|
* of both arguments, starting at the beginning of both.
|
|
*
|
|
* @param String $st1
|
|
* @param String $st2
|
|
*
|
|
* @return int
|
|
*/
|
|
// getCommon() returns the length of the longest common substring
|
|
// of both arguments, starting at the beginning of both.
|
|
private int getCommon(byte[] st1, byte[] st2) {
|
|
int st1Len = st1.length, st2Len = st2.length;
|
|
int shorter = st1Len < st2Len ? st1Len : st2Len;
|
|
|
|
int i;
|
|
for (i = 0; i < shorter; ++i) {
|
|
if (st1[i] != st2[i]) {
|
|
break;
|
|
}
|
|
}
|
|
return i;
|
|
}
|
|
|
|
/**
|
|
* Open the list item element identified by the prefix character.
|
|
*
|
|
* @param String $char
|
|
*
|
|
* @return String
|
|
*/
|
|
private byte[] openList(byte c) {
|
|
byte[] result = this.closeParagraph();
|
|
|
|
if (c == Byte_ascii.Star)
|
|
result = Bry_.Add(result, Bry_.new_a7("<ul><li>"));
|
|
else if (c == Byte_ascii.Hash)
|
|
result = Bry_.Add(result, Bry_.new_a7("<ol><li>"));
|
|
else if (c == Byte_ascii.Colon)
|
|
result = Bry_.Add(result, Bry_.new_a7("<dl><dd>"));
|
|
else if (c == Byte_ascii.Semic) {
|
|
result = Bry_.Add(result, Bry_.new_a7("<dl><dt>"));
|
|
this.DTopen = true;
|
|
}
|
|
else {
|
|
result = Bry_.new_a7("<!-- ERR 1 -->");
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Close the current list item and open the next one.
|
|
* @param String $char
|
|
*
|
|
* @return String
|
|
*/
|
|
private byte[] nextItem(byte c) {
|
|
if (c == Byte_ascii.Star || c == Byte_ascii.Hash) {
|
|
return Bry_.new_a7("</li>\n<li>");
|
|
}
|
|
else if (c == Byte_ascii.Colon || c == Byte_ascii.Semic) {
|
|
byte[] close = Bry_.new_a7("</dd>\n");
|
|
if (this.DTopen) {
|
|
close = Bry_.new_a7("</dt>\n");
|
|
}
|
|
if (c == Byte_ascii.Semic) {
|
|
this.DTopen = true;
|
|
return Bry_.Add(close, Bry_.new_a7("<dt>"));
|
|
}
|
|
else {
|
|
this.DTopen = false;
|
|
return Bry_.Add(close, Bry_.new_a7("<dd>"));
|
|
}
|
|
}
|
|
return Bry_.new_a7("<!-- ERR 2 -->");
|
|
}
|
|
|
|
/**
|
|
* Close the current list item identified by the prefix character.
|
|
* @param String $char
|
|
*
|
|
* @return String
|
|
*/
|
|
private byte[] closeList(byte c) {
|
|
byte[] text = null;
|
|
if (c == Byte_ascii.Star) {
|
|
text = Bry_.new_a7("</li></ul>");
|
|
}
|
|
else if (c == Byte_ascii.Hash) {
|
|
text = Bry_.new_a7("</li></ol>");
|
|
}
|
|
else if (c == Byte_ascii.Colon) {
|
|
if (this.DTopen) {
|
|
this.DTopen = false;
|
|
text = Bry_.new_a7("</dt></dl>");
|
|
}
|
|
else {
|
|
text = Bry_.new_a7("</dd></dl>");
|
|
}
|
|
}
|
|
else {
|
|
return Bry_.new_a7("<!-- ERR 3 -->");
|
|
}
|
|
return text;
|
|
}
|
|
|
|
/**
|
|
* Execute the pass.
|
|
* @return String
|
|
*/
|
|
public void execute(XomwParserCtx pctx, XomwParserBfr pbfr, boolean linestart) {
|
|
// XO.PBFR
|
|
Bry_bfr src_bfr = pbfr.Src();
|
|
byte[] src = src_bfr.Bfr();
|
|
int src_bgn = 0;
|
|
int src_end = src_bfr.Len();
|
|
Bry_bfr bfr = pbfr.Trg();
|
|
pbfr.Switch();
|
|
|
|
// XO.STATIC
|
|
if (block_chars_ary == null) {
|
|
synchronized (Type_.Type_by_obj(this)) {
|
|
block_chars_ary = Block_chars_ary__new();
|
|
openMatchTrie = Btrie_slim_mgr.ci_a7().Add_many_str
|
|
( "<table", "<h1", "<h2", "<h3", "<h4", "<h5", "<h6", "<pre", "<tr"
|
|
, "<p", "<ul", "<ol", "<dl", "<li", "</tr", "</td", "</th");
|
|
closeMatchTrie = Btrie_slim_mgr.ci_a7().Add_many_str
|
|
( "</table", "</h1", "</h2", "</h3", "</h4", "</h5", "</h6"
|
|
, "<td", "<th", "<blockquote", "</blockquote", "<div", "</div", "<hr", "</pre", "</p", "</mw:"
|
|
, XomwParser.MARKER_PREFIX_STR + "-pre"
|
|
, "</li", "</ul", "</ol", "</dl", "<center", "</center");
|
|
blockquoteTrie = Btrie_slim_mgr.ci_a7().Add_many_str("<blockquote", "</blockquote");
|
|
pre_trie = Btrie_slim_mgr.ci_a7().Add_str_int("<pre", PRE_BGN).Add_str_int("</pre", PRE_END);
|
|
}
|
|
}
|
|
|
|
// clear state
|
|
this.inPre = false;
|
|
this.lastSection = LAST_SECTION_NONE;
|
|
byte[] prefix2 = null;
|
|
bfr.Clear();
|
|
|
|
// Parsing through the text line by line. The main thing
|
|
// happening here is handling of block-level elements p, pre,
|
|
// and making lists from lines starting with * # : etc.
|
|
byte[] lastPrefix = Bry_.Empty;
|
|
this.DTopen = false;
|
|
boolean inBlockElem = false;
|
|
int prefixLen = 0;
|
|
byte pendingPTag = PARA_STACK_NONE;
|
|
boolean inBlockquote = false;
|
|
|
|
// PORTED.SPLIT: $textLines = StringUtils::explode("\n", $text);
|
|
int lineBgn = src_bgn;
|
|
while (lineBgn < src_end) {
|
|
int lineEnd = Bry_find_.Find_fwd(src, Byte_ascii.Nl, lineBgn);
|
|
if (lineEnd == Bry_find_.Not_found)
|
|
lineEnd = src_end;
|
|
|
|
// Fix up linestart
|
|
if (!this.linestart) {
|
|
bfr.Add_mid(src, lineBgn, lineEnd);
|
|
this.linestart = true;
|
|
continue;
|
|
}
|
|
// * = ul
|
|
// # = ol
|
|
// ; = dt
|
|
// : = dd
|
|
int lastPrefixLen = lastPrefix.length;
|
|
|
|
// PORTED.BGN: preCloseMatch = preg_match('/<\\/pre/i', $oLine); preOpenMatch = preg_match('/<pre/i', $oLine);
|
|
int preCur = lineBgn;
|
|
boolean preCloseMatch = false;
|
|
boolean preOpenMatch = false;
|
|
while (true) {
|
|
if (preCur >= lineEnd)
|
|
break;
|
|
Object o = pre_trie.Match_at(trv, src, preCur, lineEnd);
|
|
if (o == null)
|
|
preCur++;
|
|
else {
|
|
int pre_tid = Int_.Cast(o);
|
|
if (pre_tid == PRE_BGN)
|
|
preOpenMatch = true;
|
|
else if (pre_tid == PRE_END)
|
|
preCloseMatch = true;
|
|
preCur = trv.Pos();
|
|
}
|
|
}
|
|
// PORTED.END
|
|
|
|
byte[] prefix = null, t = null;
|
|
// If not in a <pre> element, scan for and figure out what prefixes are there.
|
|
if (!this.inPre) {
|
|
// Multiple prefixes may abut each other for nested lists.
|
|
prefixLen = XophpString.strspn_fwd__ary(src, block_chars_ary, lineBgn, lineEnd, lineEnd); // strspn($oLine, '*#:;');
|
|
prefix = XophpString.substr(src, lineBgn, prefixLen);
|
|
|
|
// eh?
|
|
// ; and : are both from definition-lists, so they're equivalent
|
|
// for the purposes of determining whether or not we need to open/close
|
|
// elements.
|
|
// substr($inputLine, $prefixLength);
|
|
prefix2 = Bry_.Replace(prefix, Byte_ascii.Semic, Byte_ascii.Colon);
|
|
t = Bry_.Mid(src, lineBgn + prefixLen, lineEnd);
|
|
this.inPre = preOpenMatch;
|
|
}
|
|
else {
|
|
// Don't interpret any other prefixes in preformatted text
|
|
prefixLen = 0;
|
|
prefix = prefix2 = Bry_.Empty;
|
|
t = Bry_.Mid(src, lineBgn, lineEnd);
|
|
}
|
|
|
|
// List generation
|
|
byte[] term = null, t2 = null;
|
|
int commonPrefixLen = -1;
|
|
if (prefixLen > 0 && Bry_.Eq(lastPrefix, prefix2)) {
|
|
// Same as the last item, so no need to deal with nesting or opening stuff
|
|
bfr.Add(this.nextItem(XophpString.substr_byte(prefix, -1)));
|
|
pendingPTag = PARA_STACK_NONE;
|
|
|
|
if (prefixLen > 0 && prefix[prefixLen - 1] == Byte_ascii.Semic) {
|
|
// The one nasty exception: definition lists work like this:
|
|
// ; title : definition text
|
|
// So we check for : in the remainder text to split up the
|
|
// title and definition, without b0rking links.
|
|
term = t2 = Bry_.Empty;
|
|
if (this.findColonNoLinks(t, term, t2) != Bry_find_.Not_found) {
|
|
term = find_colon_no_links__before;
|
|
t2 = find_colon_no_links__after;
|
|
t = t2;
|
|
bfr.Add(term).Add(nextItem(Byte_ascii.Colon));
|
|
}
|
|
}
|
|
}
|
|
else if (prefixLen > 0 || lastPrefixLen > 0) {
|
|
// We need to open or close prefixes, or both.
|
|
|
|
// Either open or close a level...
|
|
commonPrefixLen = this.getCommon(prefix, lastPrefix);
|
|
pendingPTag = PARA_STACK_NONE;
|
|
|
|
// Close all the prefixes which aren't shared.
|
|
while (commonPrefixLen < lastPrefixLen) {
|
|
bfr.Add(this.closeList(lastPrefix[lastPrefixLen - 1]));
|
|
--lastPrefixLen;
|
|
}
|
|
|
|
// Continue the current prefix if appropriate.
|
|
if (prefixLen <= commonPrefixLen && commonPrefixLen > 0) {
|
|
bfr.Add(this.nextItem(prefix[commonPrefixLen - 1]));
|
|
}
|
|
|
|
// Open prefixes where appropriate.
|
|
if (Bry_.Len_gt_0(lastPrefix) && prefixLen > commonPrefixLen) {
|
|
bfr.Add_byte_nl();
|
|
}
|
|
while (prefixLen > commonPrefixLen) {
|
|
byte c = XophpString.substr_byte(prefix, commonPrefixLen, 1);
|
|
bfr.Add(this.openList(c));
|
|
|
|
if (c == Byte_ascii.Semic) {
|
|
// @todo FIXME: This is dupe of code above
|
|
if (findColonNoLinks(t, term, t2) != Bry_find_.Not_found) {
|
|
term = find_colon_no_links__before;
|
|
t2 = find_colon_no_links__after;
|
|
t = t2;
|
|
bfr.Add(term).Add(nextItem(Byte_ascii.Colon));
|
|
}
|
|
}
|
|
++commonPrefixLen;
|
|
}
|
|
if (prefixLen == 0 && Bry_.Len_gt_0(lastPrefix)) {
|
|
bfr.Add_byte_nl();
|
|
}
|
|
lastPrefix = prefix2;
|
|
}
|
|
|
|
// If we have no prefixes, go to paragraph mode.
|
|
if (0 == prefixLen) {
|
|
// No prefix (not in list)--go to paragraph mode
|
|
// @todo consider using a stack for nestable elements like span, table and div
|
|
int tLen = t.length;
|
|
|
|
// XO.MW.PORTED.BGN:
|
|
boolean openMatch = XophpPreg.match(openMatchTrie, trv, t, 0, tLen) != null;
|
|
boolean closeMatch = XophpPreg.match(closeMatchTrie, trv, t, 0, tLen) != null;
|
|
// XO.MW.PORTED.END
|
|
if (openMatch || closeMatch) {
|
|
pendingPTag = PARA_STACK_NONE;
|
|
// @todo bug 5718: paragraph closed
|
|
bfr.Add(this.closeParagraph());
|
|
if (preOpenMatch && !preCloseMatch) {
|
|
this.inPre = true;
|
|
}
|
|
int bqOffset = 0;
|
|
// PORTED:preg_match('/<(\\/?)blockquote[\s>]/i', t, $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset)
|
|
while (true) {
|
|
Object o = XophpPreg.match(blockquoteTrie, trv, t, bqOffset, tLen);
|
|
if (o == null) { // no more blockquotes found; exit
|
|
break;
|
|
}
|
|
else {
|
|
byte[] bq_bry = (byte[])o;
|
|
inBlockquote = bq_bry[1] != Byte_ascii.Slash; // is this a close tag?
|
|
bqOffset = trv.Pos();
|
|
}
|
|
}
|
|
// PORTED:END
|
|
inBlockElem = !closeMatch;
|
|
}
|
|
else if (!inBlockElem && !this.inPre) {
|
|
if (XophpString.substr_byte(t, 0) == Byte_ascii.Space
|
|
&& (this.lastSection == LAST_SECTION_PRE || Bry_.Trim(t) != Bry_.Empty)
|
|
&& !inBlockquote
|
|
) {
|
|
// pre
|
|
if (this.lastSection != LAST_SECTION_PRE) {
|
|
pendingPTag = PARA_STACK_NONE;
|
|
bfr.Add(closeParagraph()).Add(Gfh_tag_.Pre_lhs);
|
|
this.lastSection = LAST_SECTION_PRE;
|
|
}
|
|
t = Bry_.Mid(t, 1);
|
|
}
|
|
else {
|
|
// paragraph
|
|
if (Bry_.Trim(t) == Bry_.Empty) {
|
|
if (pendingPTag != PARA_STACK_NONE) {
|
|
ParaStackAdd(bfr, pendingPTag);
|
|
bfr.Add_str_a7("<br />");
|
|
pendingPTag = PARA_STACK_NONE;
|
|
this.lastSection = LAST_SECTION_PARA;
|
|
}
|
|
else {
|
|
if (this.lastSection != LAST_SECTION_PARA) {
|
|
bfr.Add(this.closeParagraph());
|
|
this.lastSection = LAST_SECTION_NONE;
|
|
pendingPTag = PARA_STACK_BGN;
|
|
}
|
|
else {
|
|
pendingPTag = PARA_STACK_MID;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (pendingPTag != PARA_STACK_NONE) {
|
|
ParaStackAdd(bfr, pendingPTag);
|
|
pendingPTag = PARA_STACK_NONE;
|
|
this.lastSection = LAST_SECTION_PARA;
|
|
}
|
|
else if (lastSection != LAST_SECTION_PARA) {
|
|
bfr.Add(this.closeParagraph()).Add(Gfh_tag_.P_lhs);
|
|
this.lastSection = LAST_SECTION_PARA;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// somewhere above we forget to get out of pre block (bug 785)
|
|
if (preCloseMatch && this.inPre) {
|
|
this.inPre = false;
|
|
}
|
|
if (pendingPTag == PARA_STACK_NONE) {
|
|
bfr.Add(t);
|
|
if (prefixLen == 0) {
|
|
bfr.Add_byte_nl();
|
|
}
|
|
}
|
|
|
|
lineBgn = lineEnd + 1;
|
|
}
|
|
|
|
while (prefixLen > 0) {
|
|
bfr.Add(this.closeList(prefix2[prefixLen - 1]));
|
|
--prefixLen;
|
|
if (prefixLen > 0) {
|
|
bfr.Add_byte_nl();
|
|
}
|
|
}
|
|
if (this.lastSection != LAST_SECTION_NONE) {
|
|
bfr.Add(this.lastSection == LAST_SECTION_PARA ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs);
|
|
this.lastSection = LAST_SECTION_NONE;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Split up a String on ':', ignoring any occurrences inside tags
|
|
* to prevent illegal overlapping.
|
|
*
|
|
* @param String $str The String to split
|
|
* @param String &$before Set to everything before the ':'
|
|
* @param String &$after Set to everything after the ':'
|
|
* @throws MWException
|
|
* @return String The position of the ':', or false if none found
|
|
*/
|
|
private int findColonNoLinks(byte[] str, byte[] before, byte[] after) {
|
|
int len = str.length;
|
|
int colonPos = XophpString.strpos(str, Byte_ascii.Colon, 0, len);
|
|
if (colonPos == Bry_find_.Not_found) {
|
|
// Nothing to find!
|
|
return Bry_find_.Not_found;
|
|
}
|
|
|
|
int ltPos = XophpString.strpos(str, Byte_ascii.Angle_bgn, 0, len);
|
|
if (ltPos == Bry_find_.Not_found || ltPos > colonPos) {
|
|
// Easy; no tag nesting to worry about
|
|
// XOMW: MW passes before / after by reference; XO: changes member and depends on callers to update
|
|
find_colon_no_links__before = XophpString.substr(str, 0, colonPos);
|
|
find_colon_no_links__after = XophpString.substr(str, colonPos + 1);
|
|
return colonPos;
|
|
}
|
|
|
|
// Ugly state machine to walk through avoiding tags.
|
|
int state = COLON_STATE_TEXT;
|
|
int level = 0;
|
|
for (int i = 0; i < len; i++) {
|
|
byte c = str[i];
|
|
|
|
switch (state) {
|
|
case COLON_STATE_TEXT:
|
|
switch (c) {
|
|
case Byte_ascii.Angle_bgn:
|
|
// Could be either a <start> tag or an </end> tag
|
|
state = COLON_STATE_TAGSTART;
|
|
break;
|
|
case Byte_ascii.Colon:
|
|
if (level == 0) {
|
|
// We found it!
|
|
find_colon_no_links__before = XophpString.substr(str, 0, i);
|
|
find_colon_no_links__after = XophpString.substr(str, i + 1);
|
|
return i;
|
|
}
|
|
// Embedded in a tag; don't break it.
|
|
break;
|
|
default:
|
|
// Skip ahead looking for something interesting
|
|
colonPos = XophpString.strpos(str, Byte_ascii.Colon, i, len);
|
|
if (colonPos == Bry_find_.Not_found) {
|
|
// Nothing else interesting
|
|
return Bry_find_.Not_found;
|
|
}
|
|
ltPos = XophpString.strpos(str, Byte_ascii.Angle_bgn, i, len);
|
|
if (level == 0) {
|
|
if (ltPos == Bry_find_.Not_found || colonPos < ltPos) {
|
|
// We found it!
|
|
find_colon_no_links__before = XophpString.substr(str, 0, colonPos);
|
|
find_colon_no_links__after = XophpString.substr(str, colonPos + 1);
|
|
return i;
|
|
}
|
|
}
|
|
if (ltPos == Bry_find_.Not_found) {
|
|
// Nothing else interesting to find; abort!
|
|
// We're nested, but there's no close tags left. Abort!
|
|
i = len; // break 2
|
|
break;
|
|
}
|
|
// Skip ahead to next tag start
|
|
i = ltPos;
|
|
state = COLON_STATE_TAGSTART;
|
|
break;
|
|
}
|
|
break;
|
|
case COLON_STATE_TAG:
|
|
// In a <tag>
|
|
switch (c) {
|
|
case Byte_ascii.Angle_end:
|
|
level++;
|
|
state = COLON_STATE_TEXT;
|
|
break;
|
|
case Byte_ascii.Slash:
|
|
// Slash may be followed by >?
|
|
state = COLON_STATE_TAGSLASH;
|
|
break;
|
|
default:
|
|
// ignore
|
|
break;
|
|
}
|
|
break;
|
|
case COLON_STATE_TAGSTART:
|
|
switch (c) {
|
|
case Byte_ascii.Slash:
|
|
state = COLON_STATE_CLOSETAG;
|
|
break;
|
|
case Byte_ascii.Bang:
|
|
state = COLON_STATE_COMMENT;
|
|
break;
|
|
case Byte_ascii.Angle_end:
|
|
// Illegal early close? This shouldn't happen D:
|
|
state = COLON_STATE_TEXT;
|
|
break;
|
|
default:
|
|
state = COLON_STATE_TAG;
|
|
break;
|
|
}
|
|
break;
|
|
case COLON_STATE_CLOSETAG:
|
|
// In a </tag>
|
|
if (c == Byte_ascii.Angle_end) {
|
|
level--;
|
|
if (level < 0) {
|
|
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; too many close tags");
|
|
return Bry_find_.Not_found;
|
|
}
|
|
state = COLON_STATE_TEXT;
|
|
}
|
|
break;
|
|
case COLON_STATE_TAGSLASH:
|
|
if (c == Byte_ascii.Angle_end) {
|
|
// Yes, a self-closed tag <blah/>
|
|
state = COLON_STATE_TEXT;
|
|
}
|
|
else {
|
|
// Probably we're jumping the gun, and this is an attribute
|
|
state = COLON_STATE_TAG;
|
|
}
|
|
break;
|
|
case COLON_STATE_COMMENT:
|
|
if (c == Byte_ascii.Dash) {
|
|
state = COLON_STATE_COMMENTDASH;
|
|
}
|
|
break;
|
|
case COLON_STATE_COMMENTDASH:
|
|
if (c == Byte_ascii.Dash) {
|
|
state = COLON_STATE_COMMENTDASHDASH;
|
|
}
|
|
else {
|
|
state = COLON_STATE_COMMENT;
|
|
}
|
|
break;
|
|
case COLON_STATE_COMMENTDASHDASH:
|
|
if (c == Byte_ascii.Angle_bgn) {
|
|
state = COLON_STATE_TEXT;
|
|
}
|
|
else {
|
|
state = COLON_STATE_COMMENT;
|
|
}
|
|
break;
|
|
default:
|
|
throw Err_.new_wo_type("State machine error");
|
|
}
|
|
}
|
|
if (level > 0) {
|
|
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; not enough close tags (level ~{0}, state ~{1})", level, state);
|
|
return Bry_find_.Not_found;
|
|
}
|
|
return Bry_find_.Not_found;
|
|
}
|
|
|
|
private static final byte
|
|
LAST_SECTION_NONE = 0 // ''
|
|
, LAST_SECTION_PARA = 1 // p
|
|
, LAST_SECTION_PRE = 2 // pre
|
|
;
|
|
private static final byte
|
|
PARA_STACK_NONE = 0 // false
|
|
, PARA_STACK_BGN = 1 // <p>
|
|
, PARA_STACK_MID = 2 // </p><p>
|
|
;
|
|
private static final int PRE_BGN = 0, PRE_END = 1;
|
|
private static Btrie_slim_mgr pre_trie;
|
|
private static boolean[] block_chars_ary;
|
|
private static boolean[] Block_chars_ary__new() {
|
|
boolean[] rv = new boolean[256];
|
|
rv[Byte_ascii.Star] = true;
|
|
rv[Byte_ascii.Hash] = true;
|
|
rv[Byte_ascii.Colon] = true;
|
|
rv[Byte_ascii.Semic] = true;
|
|
return rv;
|
|
}
|
|
private static Btrie_slim_mgr openMatchTrie, closeMatchTrie, blockquoteTrie;
|
|
private static void ParaStackAdd(Bry_bfr bfr, int id) {
|
|
switch (id) {
|
|
case PARA_STACK_BGN: bfr.Add_str_a7("<p>"); break;
|
|
case PARA_STACK_MID: bfr.Add_str_a7("</p><p>"); break;
|
|
default: throw Err_.new_unhandled_default(id);
|
|
}
|
|
}
|
|
}
|