mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Parser.Tidy: Add vnu as additional tidy engine [#417]
This commit is contained in:
parent
31c7604f03
commit
cc8b9810a7
@ -6,5 +6,6 @@
|
||||
<classpathentry kind="lib" path="lib/commons-compress-1.5.jar"/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
||||
<classpathentry kind="lib" path="lib/xz-1.5.jar"/>
|
||||
<classpathentry kind="lib" path="lib/Saxon-HE-9.9.1-2.jar"/>
|
||||
<classpathentry kind="output" path="bin"/>
|
||||
</classpath>
|
||||
|
@ -10,5 +10,7 @@
|
||||
<classpathentry exported="true" kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/gplx.gflucene"/>
|
||||
<classpathentry exported="true" kind="lib" path="lib/icu4j-57_1.jar"/>
|
||||
<classpathentry kind="lib" path="lib/vnu.jar"/>
|
||||
<classpathentry kind="lib" path="lib/Saxon-HE-9.9.1-2.jar"/>
|
||||
<classpathentry kind="output" path="bin"/>
|
||||
</classpath>
|
||||
|
@ -21,7 +21,7 @@ public class Xocfg_type_mgr {
|
||||
this.Lists__add("list:xowa.app.startup.pages.type", "blank", "xowa", "previous", "custom");
|
||||
this.Lists__add("list:xowa.gui.html_box.page_load_mode", "mem", "url");
|
||||
this.Lists__add("list:xowa.html.portal.missing_class", Keyval_.new_("", "Show as blue link"), Keyval_.new_("new", "Show as red link"), Keyval_.new_("xowa_display_none", "Hide"));
|
||||
this.Lists__add("list:xowa.html.tidy.engine", "tidy", "jtidy");
|
||||
this.Lists__add("list:xowa.html.tidy.engine", "tidy", "jtidy", "vnu");
|
||||
this.Lists__add("list:xowa.bldr.db.zip_mode", Keyval_.new_("raw", "text"), Keyval_.new_("gzip"), Keyval_.new_("bzip2"), Keyval_.new_("xz"));
|
||||
this.Lists__add("list:xowa.addon.category.catpage.missing_class", "normal", "hide", "red_link");
|
||||
this.Lists__add("list:xowa.addon.http_server.file_retrieve_mode", Keyval_.new_("wait"), Keyval_.new_("skip"), Keyval_.new_("async_server", "async server"));
|
||||
|
@ -145,6 +145,7 @@ public class Xog_cmd_itm_ {
|
||||
, Key_html_tidy_toggle = new_dflt_(Xog_ctg_itm_.Tid_html , "xowa.html.tidy.toggle")
|
||||
, Key_html_tidy_engine_tidy_ = new_dflt_(Xog_ctg_itm_.Tid_html , "xowa.html.tidy.engine_tidy_")
|
||||
, Key_html_tidy_engine_jtidy_ = new_dflt_(Xog_ctg_itm_.Tid_html , "xowa.html.tidy.engine_jtidy_")
|
||||
, Key_html_tidy_engine_vnu_ = new_dflt_(Xog_ctg_itm_.Tid_html , "xowa.html.tidy.engine_vnu_")
|
||||
|
||||
, Key_net_enabled = new_dflt_(Xog_ctg_itm_.Tid_net , "xowa.net.enabled")
|
||||
, Key_net_enabled_n_ = new_dflt_(Xog_ctg_itm_.Tid_net , "xowa.net.enabled_n_")
|
||||
|
@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.htmls.core.htmls.tidy; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*;
|
||||
public class Xoh_tidy_wkr_ {
|
||||
public static final byte Tid_null = 0, Tid_tidy = 1, Tid_jtidy = 2;
|
||||
public static final byte Tid_null = 0, Tid_tidy = 1, Tid_jtidy = 2, Tid_vnu = 3;
|
||||
public static final Xoh_tidy_wkr Wkr_null = new Xoh_tidy_wkr_null();
|
||||
}
|
||||
class Xoh_tidy_wkr_null implements Xoh_tidy_wkr {
|
||||
|
@ -0,0 +1,55 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.htmls.core.htmls.tidy; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*;
|
||||
import gplx.xowa.htmls.core.htmls.tidy.vnus.*;
|
||||
import nu.validator.htmlparser.common.XmlViolationPolicy;
|
||||
import nu.validator.htmlparser.sax.HtmlParser;
|
||||
import java.io.InputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import gplx.core.envs.*;
|
||||
class Xoh_tidy_wkr__vnu implements Xoh_tidy_wkr {
|
||||
private byte[] depurate(Bry_bfr tidy_bfr, boolean compat) throws SAXException, IOException {
|
||||
byte[] input = tidy_bfr.To_bry_and_clear();
|
||||
InputStream stream = new ByteArrayInputStream(input);
|
||||
InputSource source = new InputSource(stream);
|
||||
ByteArrayOutputStream sink = new ByteArrayOutputStream();
|
||||
ContentHandler serializer;
|
||||
serializer = new CompatibilitySerializer(sink);
|
||||
HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALLOW);
|
||||
parser.setContentHandler(serializer);
|
||||
source.setEncoding("UTF-8");
|
||||
parser.setProperty("http://xml.org/sax/properties/lexical-handler", serializer);
|
||||
parser.parse(source);
|
||||
return sink.toByteArray();
|
||||
}
|
||||
public byte Tid() {return Xoh_tidy_wkr_.Tid_vnu;}
|
||||
public void Init_by_app(Xoae_app app) {
|
||||
}
|
||||
public void Indent_(boolean v) {
|
||||
}
|
||||
public void Exec_tidy(Bry_bfr bfr, byte[] page_url) {
|
||||
try {
|
||||
bfr.Add(depurate(bfr, true));
|
||||
}
|
||||
catch (SAXException e) { }
|
||||
catch (IOException e) {}
|
||||
}
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.htmls.core.htmls.tidy; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*;
|
||||
import gplx.core.envs.*;
|
@ -20,6 +20,7 @@ public class Xow_tidy_mgr implements Gfo_invk, Xow_tidy_mgr_interface {
|
||||
private Xoae_app app;
|
||||
private final Xoh_tidy_wkr_tidy wkr__tidy = new Xoh_tidy_wkr_tidy(); // NOTE: app-level; not thread-safe; needed b/c of Options and exe/args DATE:2016-07-12
|
||||
private final Xoh_tidy_wkr_jtidy wkr__jtidy = new Xoh_tidy_wkr_jtidy();
|
||||
private final Xoh_tidy_wkr__vnu wkr__vnu = new Xoh_tidy_wkr__vnu();
|
||||
private Xoh_tidy_wkr wkr = Xoh_tidy_wkr_.Wkr_null; // TEST: set default wkr to null
|
||||
private boolean enabled = true;
|
||||
public void Init_by_wiki(Xowe_wiki wiki) {
|
||||
@ -42,6 +43,7 @@ public class Xow_tidy_mgr implements Gfo_invk, Xow_tidy_mgr_interface {
|
||||
String engine_str = m.ReadStr("v");
|
||||
if (String_.Eq(engine_str, "tidy")) wkr = wkr__tidy; // NOTE: app-level; not thread-safe; needed b/c of Options and exe/args DATE:2016-07-12
|
||||
else if (String_.Eq(engine_str, "jtidy")) wkr = wkr__jtidy;
|
||||
else if (String_.Eq(engine_str, "vnu")) wkr = wkr__vnu;
|
||||
else throw Err_.new_unhandled_default(engine_str);
|
||||
wkr.Init_by_app(app);
|
||||
}
|
||||
|
@ -0,0 +1,386 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.htmls.core.htmls.tidy.vnus; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.htmls.tidy.*;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.io.Writer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Stack;
|
||||
import java.util.EmptyStackException;
|
||||
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.Locator;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.ext.LexicalHandler;
|
||||
import org.xml.sax.helpers.AttributesImpl;
|
||||
public class CompatibilitySerializer implements ContentHandler, LexicalHandler {
|
||||
protected class StackEntry {
|
||||
public String uri;
|
||||
public String localName;
|
||||
public String qName;
|
||||
public Attributes attrs;
|
||||
OutputStream savedStream;
|
||||
public boolean needsPWrapping;
|
||||
public boolean isPWrapper;
|
||||
public boolean blank;
|
||||
public boolean hasText;
|
||||
public boolean split;
|
||||
public int blockNestingLevel;
|
||||
public boolean isDisabledPWrapper;
|
||||
|
||||
public StackEntry(String uri_, String localName_, String qName_,
|
||||
Attributes attrs_, OutputStream savedStream_) {
|
||||
uri = uri_;
|
||||
localName = localName_;
|
||||
qName = qName_;
|
||||
attrs = attrs_;
|
||||
savedStream = savedStream_;
|
||||
needsPWrapping = "body".equals(localName_)
|
||||
|| "blockquote".equals(localName_);
|
||||
blank = true;
|
||||
hasText = false;
|
||||
isPWrapper = "mw:p-wrap".equals(localName_);
|
||||
blockNestingLevel = 0;
|
||||
isDisabledPWrapper = false;
|
||||
split = false;
|
||||
}
|
||||
}
|
||||
|
||||
protected Stack<StackEntry> m_stack;
|
||||
protected DepurateSerializer m_serializer;
|
||||
protected Stack<StackEntry> m_pStack;
|
||||
|
||||
// Warning: this list must be in alphabetical order
|
||||
protected static final String[] ONLY_INLINE_ELEMENTS = {"a", "abbr", "acronym",
|
||||
"applet", "b", "basefont", "bdo", "big", "br", "button", "cite",
|
||||
"code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd",
|
||||
"label", "legend", "map", "object", "param", "q", "rb", "rbc", "rp",
|
||||
"rt", "rtc", "ruby", "s", "samp", "select", "small", "span", "strike",
|
||||
"strong", "sub", "sup", "textarea", "tt", "u", "var"};
|
||||
|
||||
// Warning: this list must be in alphabetical order
|
||||
protected static final String[] MARKED_EMPTY_ELEMENTS = {"li", "p", "tr"};
|
||||
|
||||
public CompatibilitySerializer(OutputStream out) {
|
||||
m_stack = new Stack<StackEntry>();
|
||||
m_pStack = new Stack<StackEntry>();
|
||||
m_serializer = new DepurateSerializer(out);
|
||||
}
|
||||
|
||||
private StackEntry peek(Stack<StackEntry> stack) throws SAXException {
|
||||
try {
|
||||
return stack.peek();
|
||||
} catch (EmptyStackException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pop the top of the stack, restore the parent stream in the serializer
|
||||
* and return the previous stream
|
||||
*/
|
||||
private ByteArrayOutputStream popAndGetContents() throws SAXException {
|
||||
try {
|
||||
StackEntry entry = m_stack.pop();
|
||||
if (entry.isPWrapper) {
|
||||
m_pStack.pop();
|
||||
}
|
||||
ByteArrayOutputStream entryStream =
|
||||
(ByteArrayOutputStream)m_serializer.getOutputStream();
|
||||
m_serializer.setOutputStream(entry.savedStream);
|
||||
return entryStream;
|
||||
} catch (EmptyStackException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Push a new element to the top of the stack, and set up a new empty
|
||||
* stream in the serializer. Returns the new element.
|
||||
*/
|
||||
private StackEntry push(String uri, String localName, String qName,
|
||||
Attributes attrs) throws SAXException {
|
||||
StackEntry entry = new StackEntry(uri, localName, qName, attrs,
|
||||
m_serializer.getOutputStream());
|
||||
m_stack.push(entry);
|
||||
m_serializer.setOutputStream(new ByteArrayOutputStream());
|
||||
return entry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Equivalent to push() for a proposed p element. Will become a real
|
||||
* p element if the contents is non-blank and contains no block elements.
|
||||
*/
|
||||
private StackEntry pushPWrapper() throws SAXException {
|
||||
StackEntry entry = push("", "mw:p-wrap", "mw:p-wrap", new AttributesImpl());
|
||||
m_pStack.push(entry);
|
||||
return entry;
|
||||
}
|
||||
|
||||
private void writePWrapper(StackEntry entry, ByteArrayOutputStream contents)
|
||||
throws SAXException {
|
||||
if (!entry.isDisabledPWrapper && !entry.blank) {
|
||||
m_serializer.write("<p>");
|
||||
m_serializer.writeStream(contents);
|
||||
m_serializer.write("</p>");
|
||||
} else {
|
||||
m_serializer.writeStream(contents);
|
||||
}
|
||||
}
|
||||
|
||||
public void characters(char[] chars, int start, int length)
|
||||
throws SAXException {
|
||||
StackEntry entry = peek(m_stack);
|
||||
if (entry != null) {
|
||||
if (entry.needsPWrapping) {
|
||||
entry = pushPWrapper();
|
||||
}
|
||||
if (entry.blank || !entry.hasText) {
|
||||
for (int i = start; i < start + length; i++) {
|
||||
char c = chars[i];
|
||||
if (!(c == 9 || c == 10 || c == 12 || c == 13 || c == 32)) {
|
||||
entry.blank = false;
|
||||
entry.hasText = true;
|
||||
if (peek(m_pStack) != null) {
|
||||
peek(m_pStack).blank = false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
m_serializer.characters(chars, start, length);
|
||||
}
|
||||
|
||||
private void splitTagStack(boolean haveContent) throws SAXException {
|
||||
StackEntry currentPWrapper = peek(m_pStack);
|
||||
ByteArrayOutputStream seContent;
|
||||
int n = m_stack.size();
|
||||
int i = n - 1;
|
||||
StackEntry se = m_stack.get(i);
|
||||
while (se != currentPWrapper) {
|
||||
seContent = (ByteArrayOutputStream)m_serializer.getOutputStream();
|
||||
m_serializer.setOutputStream(se.savedStream);
|
||||
|
||||
if (se.hasText) {
|
||||
haveContent = true;
|
||||
}
|
||||
|
||||
// Emit content accumulated so far
|
||||
if (haveContent) {
|
||||
m_serializer.startElement(se.uri, se.localName, se.qName, se.attrs);
|
||||
m_serializer.writeStream(seContent);
|
||||
m_serializer.endElement(se.uri, se.localName, se.qName);
|
||||
|
||||
// All text has been output at this point
|
||||
// Record that it has been split and reset it.
|
||||
se.split = true;
|
||||
se.blank = true;
|
||||
se.hasText = false;
|
||||
}
|
||||
|
||||
// Reset parent's saved stream always.
|
||||
// As we unwind the stack, its saved content
|
||||
// could get output.
|
||||
se.savedStream = new ByteArrayOutputStream();
|
||||
|
||||
i--;
|
||||
se = m_stack.get(i);
|
||||
}
|
||||
|
||||
// Dump <p>.. contents ..</p>
|
||||
// Note se == currentPWrapper
|
||||
if (haveContent || se.hasText) {
|
||||
seContent = (ByteArrayOutputStream)m_serializer.getOutputStream();
|
||||
m_serializer.setOutputStream(se.savedStream);
|
||||
|
||||
// Emit content accumulated so far
|
||||
writePWrapper(se, seContent);
|
||||
|
||||
// All text has been output at this point
|
||||
se.blank = true;
|
||||
}
|
||||
|
||||
// New stream going forward
|
||||
m_serializer.setOutputStream(new ByteArrayOutputStream());
|
||||
}
|
||||
|
||||
private boolean isOnlyInline(String localName) {
|
||||
return Arrays.binarySearch(ONLY_INLINE_ELEMENTS, localName) > -1;
|
||||
}
|
||||
|
||||
private void enterBlock(String tagName) throws SAXException {
|
||||
// Whenever we enter a new block wrapper that is
|
||||
// embedded within a p-wrapper,
|
||||
//
|
||||
// 1. Disable p-wrapping.
|
||||
// 2. Split the tag stack and emit accumulated output
|
||||
// with a p-wrapper.
|
||||
|
||||
StackEntry currentPWrapper = peek(m_pStack);
|
||||
|
||||
if (currentPWrapper.blockNestingLevel == 0) {
|
||||
splitTagStack(false);
|
||||
}
|
||||
|
||||
currentPWrapper.blockNestingLevel++;
|
||||
currentPWrapper.isDisabledPWrapper = true;
|
||||
}
|
||||
|
||||
private void leaveBlock(String tagName) throws SAXException {
|
||||
// Whenever we leave the outermost block wrapper that is
|
||||
// embedded within a p-wrapper,
|
||||
//
|
||||
// 1. Re-enable p-wrapping.
|
||||
// 2. Split the tag stack and emit accumulated output
|
||||
// without a p-wrapper.
|
||||
|
||||
StackEntry currentPWrapper = peek(m_pStack);
|
||||
currentPWrapper.blockNestingLevel--;
|
||||
|
||||
if (currentPWrapper.blockNestingLevel == 0) {
|
||||
splitTagStack(true);
|
||||
}
|
||||
|
||||
currentPWrapper.isDisabledPWrapper = false;
|
||||
}
|
||||
|
||||
public void startElement(String uri, String localName, String qName,
|
||||
Attributes atts) throws SAXException {
|
||||
|
||||
StackEntry oldEntry = peek(m_stack);
|
||||
if (oldEntry != null) {
|
||||
if (oldEntry.isPWrapper) {
|
||||
if (!isOnlyInline(localName)) {
|
||||
// This is non-inline so close the p-wrapper
|
||||
ByteArrayOutputStream contents = popAndGetContents();
|
||||
writePWrapper(oldEntry, contents);
|
||||
oldEntry = peek(m_stack);
|
||||
} else {
|
||||
// We're putting an element inside the p-wrapper, so it is non-blank now
|
||||
oldEntry.blank = false;
|
||||
}
|
||||
} else {
|
||||
oldEntry.blank = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Track block nesting level
|
||||
boolean onlyInline = isOnlyInline(localName);
|
||||
StackEntry currentPWrapper = peek(m_pStack);
|
||||
if (currentPWrapper != null && !onlyInline) {
|
||||
enterBlock(localName);
|
||||
}
|
||||
|
||||
if (oldEntry != null && oldEntry.needsPWrapping && onlyInline) {
|
||||
StackEntry entry = pushPWrapper();
|
||||
// We're putting an element inside the p-wrapper, so it is non-blank
|
||||
entry.blank = false;
|
||||
}
|
||||
push(uri, localName, qName, atts);
|
||||
}
|
||||
|
||||
public void endElement(String uri, String localName, String qName)
|
||||
throws SAXException {
|
||||
StackEntry entry = peek(m_stack);
|
||||
ByteArrayOutputStream contents = popAndGetContents();
|
||||
|
||||
if (entry.isPWrapper) {
|
||||
// Since we made this p-wrapper, the caller really wants to end the parent element.
|
||||
// So first we need to close the p-wrapper
|
||||
writePWrapper(entry, contents);
|
||||
entry = peek(m_stack);
|
||||
contents = popAndGetContents();
|
||||
}
|
||||
|
||||
// Annotate empty tr and li elements so that they can be hidden in CSS,
|
||||
// for compatibility with tidy and existing wikitext
|
||||
if (Arrays.binarySearch(MARKED_EMPTY_ELEMENTS, localName) > -1) {
|
||||
if (entry.attrs.getLength() == 0 && entry.blank) {
|
||||
AttributesImpl newAttrs = new AttributesImpl();
|
||||
newAttrs.addAttribute("", "class", "class", "", "mw-empty-elt");
|
||||
entry.attrs = newAttrs;
|
||||
}
|
||||
}
|
||||
|
||||
if (!entry.split || !entry.blank) {
|
||||
m_serializer.startElement(entry.uri, entry.localName, entry.qName, entry.attrs);
|
||||
m_serializer.writeStream(contents);
|
||||
m_serializer.endElement(uri, localName, qName);
|
||||
}
|
||||
|
||||
// Track block nesting level
|
||||
boolean onlyInline = isOnlyInline(localName);
|
||||
StackEntry currentPWrapper = peek(m_pStack);
|
||||
if (currentPWrapper != null && !onlyInline) {
|
||||
leaveBlock(localName);
|
||||
}
|
||||
}
|
||||
|
||||
public void startDocument() throws SAXException {
|
||||
}
|
||||
|
||||
public void endDocument() throws SAXException {
|
||||
m_serializer.endDocument();
|
||||
}
|
||||
|
||||
public void ignorableWhitespace(char[] ch, int start, int length)
|
||||
throws SAXException {
|
||||
characters(ch, start, length);
|
||||
}
|
||||
public void processingInstruction(String target, String data)
|
||||
throws SAXException {
|
||||
}
|
||||
|
||||
public void setDocumentLocator(Locator locator) {
|
||||
}
|
||||
|
||||
public void comment(char[] ch, int start, int length) throws SAXException {
|
||||
m_serializer.comment(ch, start, length);
|
||||
}
|
||||
|
||||
public void endCDATA() throws SAXException {
|
||||
}
|
||||
public void endDTD() throws SAXException {
|
||||
}
|
||||
|
||||
public void endEntity(String name) throws SAXException {
|
||||
}
|
||||
|
||||
public void startCDATA() throws SAXException {
|
||||
}
|
||||
|
||||
public void startDTD(String name, String publicId, String systemId)
|
||||
throws SAXException {
|
||||
}
|
||||
|
||||
public void startEntity(String name) throws SAXException {
|
||||
}
|
||||
|
||||
public void startPrefixMapping(String prefix, String uri)
|
||||
throws SAXException {
|
||||
}
|
||||
|
||||
public void endPrefixMapping(String prefix) throws SAXException {
|
||||
}
|
||||
|
||||
public void skippedEntity(String name) throws SAXException {
|
||||
}
|
||||
}
|
@ -0,0 +1,328 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.htmls.core.htmls.tidy.vnus; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.htmls.tidy.*;
|
||||
/*
|
||||
* Copyright (c) 2007 Henri Sivonen
|
||||
* Copyright (c) 2008-2011 Mozilla Foundation
|
||||
* Copyright (c) 2016 Wikimedia Foundation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is mostly copied from validator.nu's HtmlSerializer. Changes:
|
||||
*
|
||||
* - Add a slash to void elements. This is allowed by the HTML 5 spec, it is
|
||||
* documented as having no effect. It allows the output to pass XHTML
|
||||
* validation.
|
||||
*
|
||||
* - is replaced with  
|
||||
*
|
||||
* - Added getOutputStream(), setOutputStream(), write() to support
|
||||
* CompatibiltySerializer.
|
||||
*/
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.io.Writer;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.Locator;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.ext.LexicalHandler;
|
||||
public class DepurateSerializer implements ContentHandler, LexicalHandler {
|
||||
|
||||
private static final String[] VOID_ELEMENTS = { "area", "base", "basefont",
|
||||
"bgsound", "br", "col", "command", "embed", "frame", "hr", "img",
|
||||
"input", "keygen", "link", "meta", "param", "source", "track",
|
||||
"wbr" };
|
||||
|
||||
private static final String[] NON_ESCAPING = { "iframe", "noembed",
|
||||
"noframes", "noscript", "plaintext", "script", "style", "xmp" };
|
||||
|
||||
private static Writer wrap(OutputStream out) {
|
||||
try {
|
||||
return new OutputStreamWriter(out, "UTF-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private int ignoreLevel = 0;
|
||||
|
||||
private int escapeLevel = 0;
|
||||
|
||||
private OutputStream outputStream;
|
||||
private Writer writer;
|
||||
|
||||
public DepurateSerializer(OutputStream out) {
|
||||
outputStream = out;
|
||||
this.writer = wrap(out);
|
||||
}
|
||||
|
||||
public OutputStream getOutputStream() throws SAXException {
|
||||
try {
|
||||
writer.flush();
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
return outputStream;
|
||||
}
|
||||
|
||||
public void setOutputStream(OutputStream out) throws RuntimeException {
|
||||
outputStream = out;
|
||||
writer = wrap(out);
|
||||
}
|
||||
|
||||
public void write(String s) throws SAXException {
|
||||
try {
|
||||
writer.write(s);
|
||||
writer.flush();
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void writeStream(ByteArrayOutputStream s) throws SAXException {
|
||||
try {
|
||||
writer.flush();
|
||||
s.writeTo(outputStream);
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void characters(char[] ch, int start, int length)
|
||||
throws SAXException {
|
||||
try {
|
||||
if (escapeLevel > 0) {
|
||||
writer.write(ch, start, length);
|
||||
} else {
|
||||
for (int i = start; i < start + length; i++) {
|
||||
char c = ch[i];
|
||||
switch (c) {
|
||||
case '<':
|
||||
writer.write("<");
|
||||
break;
|
||||
case '>':
|
||||
writer.write(">");
|
||||
break;
|
||||
case '&':
|
||||
writer.write("&");
|
||||
break;
|
||||
case '\u00A0':
|
||||
writer.write(" ");
|
||||
break;
|
||||
default:
|
||||
writer.write(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void endDocument() throws SAXException {
|
||||
try {
|
||||
writer.flush();
|
||||
writer.close();
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void endElement(String uri, String localName, String qName)
|
||||
throws SAXException {
|
||||
if (escapeLevel > 0) {
|
||||
escapeLevel--;
|
||||
}
|
||||
if (ignoreLevel > 0) {
|
||||
ignoreLevel--;
|
||||
} else {
|
||||
try {
|
||||
writer.write('<');
|
||||
writer.write('/');
|
||||
writer.write(localName);
|
||||
writer.write('>');
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void ignorableWhitespace(char[] ch, int start, int length)
|
||||
throws SAXException {
|
||||
characters(ch, start, length);
|
||||
}
|
||||
|
||||
public void processingInstruction(String target, String data)
|
||||
throws SAXException {
|
||||
}
|
||||
|
||||
public void setDocumentLocator(Locator locator) {
|
||||
}
|
||||
|
||||
public void startDocument() throws SAXException {
|
||||
try {
|
||||
writer.write("<!DOCTYPE html>\n");
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void startElement(String uri, String localName, String qName,
|
||||
Attributes atts) throws SAXException {
|
||||
if (escapeLevel > 0) {
|
||||
escapeLevel++;
|
||||
}
|
||||
boolean xhtml = "http://www.w3.org/1999/xhtml".equals(uri);
|
||||
if (ignoreLevel > 0
|
||||
|| !(xhtml || "http://www.w3.org/2000/svg".equals(uri) || "http://www.w3.org/1998/Math/MathML".equals(uri))) {
|
||||
ignoreLevel++;
|
||||
return;
|
||||
}
|
||||
try {
|
||||
writer.write('<');
|
||||
writer.write(localName);
|
||||
for (int i = 0; i < atts.getLength(); i++) {
|
||||
String attUri = atts.getURI(i);
|
||||
String attLocal = atts.getLocalName(i);
|
||||
if (attUri.length() == 0) {
|
||||
writer.write(' ');
|
||||
} else if (!xhtml
|
||||
&& "http://www.w3.org/1999/xlink".equals(attUri)) {
|
||||
writer.write(" xlink:");
|
||||
} else if ("http://www.w3.org/XML/1998/namespace".equals(attUri)) {
|
||||
if (xhtml) {
|
||||
if ("lang".equals(attLocal)) {
|
||||
writer.write(' ');
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
writer.write(" xml:");
|
||||
}
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
writer.write(atts.getLocalName(i));
|
||||
writer.write('=');
|
||||
writer.write('"');
|
||||
String val = atts.getValue(i);
|
||||
for (int j = 0; j < val.length(); j++) {
|
||||
char c = val.charAt(j);
|
||||
switch (c) {
|
||||
case '"':
|
||||
writer.write(""");
|
||||
break;
|
||||
case '&':
|
||||
writer.write("&");
|
||||
break;
|
||||
case '\u00A0':
|
||||
writer.write(" ");
|
||||
break;
|
||||
default:
|
||||
writer.write(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
writer.write('"');
|
||||
}
|
||||
if (Arrays.binarySearch(VOID_ELEMENTS, localName) > -1) {
|
||||
writer.write(" />");
|
||||
ignoreLevel++;
|
||||
return;
|
||||
} else {
|
||||
writer.write('>');
|
||||
}
|
||||
if ("pre".equals(localName) || "textarea".equals(localName)
|
||||
|| "listing".equals(localName)) {
|
||||
writer.write('\n');
|
||||
}
|
||||
if (escapeLevel == 0
|
||||
&& Arrays.binarySearch(NON_ESCAPING, localName) > -1) {
|
||||
escapeLevel = 1;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void comment(char[] ch, int start, int length) throws SAXException {
|
||||
if (ignoreLevel > 0 || escapeLevel > 0) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
writer.write("<!--");
|
||||
writer.write(ch, start, length);
|
||||
writer.write("-->");
|
||||
} catch (IOException e) {
|
||||
throw new SAXException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void endCDATA() throws SAXException {
|
||||
}
|
||||
|
||||
public void endDTD() throws SAXException {
|
||||
}
|
||||
|
||||
public void endEntity(String name) throws SAXException {
|
||||
}
|
||||
|
||||
public void startCDATA() throws SAXException {
|
||||
}
|
||||
|
||||
public void startDTD(String name, String publicId, String systemId)
|
||||
throws SAXException {
|
||||
}
|
||||
|
||||
public void startEntity(String name) throws SAXException {
|
||||
}
|
||||
|
||||
public void startPrefixMapping(String prefix, String uri)
|
||||
throws SAXException {
|
||||
}
|
||||
|
||||
public void endPrefixMapping(String prefix) throws SAXException {
|
||||
}
|
||||
|
||||
public void skippedEntity(String name) throws SAXException {
|
||||
}
|
||||
}
|
BIN
res/bin/any/java/vnu/Saxon-HE-9.9.1-2.jar
Normal file
BIN
res/bin/any/java/vnu/Saxon-HE-9.9.1-2.jar
Normal file
Binary file not shown.
BIN
res/bin/any/java/vnu/vnu.jar
Normal file
BIN
res/bin/any/java/vnu/vnu.jar
Normal file
Binary file not shown.
@ -6,7 +6,7 @@
|
||||
<javac includeantruntime="false" target="${jdk}" source="${jdk}" sourcepath=""
|
||||
srcdir ="${root_dir}/src/100_core"
|
||||
destdir ="${root_dir}/src/100_core/bin"
|
||||
classpath="${root_dir}/src/lib/junit.jar:${root_dir}/bin/any/java/apache/commons-compress-1.5.jar:${root_dir}/bin/any/java/xz/xz-1.5.jar">
|
||||
classpath="${root_dir}/src/lib/junit.jar:${root_dir}/bin/any/java/apache/commons-compress-1.5.jar:${root_dir}/bin/any/java/xz/xz-1.5.jar:${root_dir}/bin/any/java/vnu/Saxon-HE-9.9.1-2.jar">
|
||||
<include name="**/*.java"/>
|
||||
</javac>
|
||||
</target>
|
||||
@ -56,7 +56,7 @@
|
||||
<javac includeantruntime="false" target="${jdk}" source="${jdk}" sourcepath=""
|
||||
srcdir ="${root_dir}/src/400_xowa"
|
||||
destdir ="${root_dir}/src/400_xowa/bin"
|
||||
classpath="${root_dir}/src/lib/junit.jar:${root_dir}/src/100_core/bin:${root_dir}/src/110_gfml/bin:${root_dir}/src/140_dbs/bin:${root_dir}/src/150_gfui/bin:${root_dir}/src/gplx.gflucene/bin:${root_dir}/bin/any/java/luaj/luaj_xowa.jar:${root_dir}/bin/any/java/jtidy/jtidy_xowa.jar:${root_dir}/bin/any/java/icu4j/icu4j-57_1.jar">
|
||||
classpath="${root_dir}/src/lib/junit.jar:${root_dir}/src/100_core/bin:${root_dir}/src/110_gfml/bin:${root_dir}/src/140_dbs/bin:${root_dir}/src/150_gfui/bin:${root_dir}/src/gplx.gflucene/bin:${root_dir}/bin/any/java/luaj/luaj_xowa.jar:${root_dir}/bin/any/java/jtidy/jtidy_xowa.jar:${root_dir}/bin/any/java/icu4j/icu4j-57_1.jar:${root_dir}/bin/any/java/vnu/vnu.jar:${root_dir}/bin/any/java/vnu/Saxon-HE-9.9.1-2.jar">
|
||||
<compilerarg line="-encoding utf-8"/>
|
||||
<include name="**/*.java"/>
|
||||
</javac>
|
||||
|
@ -4,7 +4,7 @@
|
||||
<jar destfile="${xowa_jar_path}">
|
||||
<manifest>
|
||||
<attribute name="Main-Class" value="gplx.xowa.Xowa_main"/>
|
||||
<attribute name="Class-Path" value=". bin/${plat_name}/swt/swt.jar bin/any/java/apache/commons-compress-1.5.jar bin/any/java/jdbc/sqlite/sqlite-jdbc-3.18.0.jar bin/any/java/luaj/luaj_xowa.jar bin/any/java/jtidy/jtidy_xowa.jar bin/any/java/xz/xz-1.5.jar bin/any/java/icu4j/icu4j-57_1.jar bin/any/java/lucene/5.3.0.drd/lucene-core-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-highlighter-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-memory-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-queryparser-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-analyzers-common-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-queries-5.3.0-mobile-2.jar"/>
|
||||
<attribute name="Class-Path" value=". bin/${plat_name}/swt/swt.jar bin/any/java/apache/commons-compress-1.5.jar bin/any/java/jdbc/sqlite/sqlite-jdbc-3.18.0.jar bin/any/java/luaj/luaj_xowa.jar bin/any/java/jtidy/jtidy_xowa.jar bin/any/java/xz/xz-1.5.jar bin/any/java/icu4j/icu4j-57_1.jar bin/any/java/lucene/5.3.0.drd/lucene-core-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-highlighter-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-memory-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-queryparser-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-analyzers-common-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-queries-5.3.0-mobile-2.jar bin/any/java/vnu/vnu.jar bin/any/java/vnu/Saxon-HE-9.9.1-2.jar"/>
|
||||
</manifest>
|
||||
<fileset dir="${root_dir}/src/100_core/bin"/>
|
||||
<fileset dir="${root_dir}/src/110_gfml/bin"/>
|
||||
|
Loading…
Reference in New Issue
Block a user