Parser.Tidy: Add vnu as additional tidy engine [#417]

pull/620/head
gnosygnu 5 years ago
parent 31c7604f03
commit cc8b9810a7

@ -6,5 +6,6 @@
<classpathentry kind="lib" path="lib/commons-compress-1.5.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lib/xz-1.5.jar"/>
<classpathentry kind="lib" path="lib/Saxon-HE-9.9.1-2.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>

@ -10,5 +10,7 @@
<classpathentry exported="true" kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
<classpathentry combineaccessrules="false" kind="src" path="/gplx.gflucene"/>
<classpathentry exported="true" kind="lib" path="lib/icu4j-57_1.jar"/>
<classpathentry kind="lib" path="lib/vnu.jar"/>
<classpathentry kind="lib" path="lib/Saxon-HE-9.9.1-2.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>

@ -21,7 +21,7 @@ public class Xocfg_type_mgr {
this.Lists__add("list:xowa.app.startup.pages.type", "blank", "xowa", "previous", "custom");
this.Lists__add("list:xowa.gui.html_box.page_load_mode", "mem", "url");
this.Lists__add("list:xowa.html.portal.missing_class", Keyval_.new_("", "Show as blue link"), Keyval_.new_("new", "Show as red link"), Keyval_.new_("xowa_display_none", "Hide"));
this.Lists__add("list:xowa.html.tidy.engine", "tidy", "jtidy");
this.Lists__add("list:xowa.html.tidy.engine", "tidy", "jtidy", "vnu");
this.Lists__add("list:xowa.bldr.db.zip_mode", Keyval_.new_("raw", "text"), Keyval_.new_("gzip"), Keyval_.new_("bzip2"), Keyval_.new_("xz"));
this.Lists__add("list:xowa.addon.category.catpage.missing_class", "normal", "hide", "red_link");
this.Lists__add("list:xowa.addon.http_server.file_retrieve_mode", Keyval_.new_("wait"), Keyval_.new_("skip"), Keyval_.new_("async_server", "async server"));

@ -145,6 +145,7 @@ public class Xog_cmd_itm_ {
, Key_html_tidy_toggle = new_dflt_(Xog_ctg_itm_.Tid_html , "xowa.html.tidy.toggle")
, Key_html_tidy_engine_tidy_ = new_dflt_(Xog_ctg_itm_.Tid_html , "xowa.html.tidy.engine_tidy_")
, Key_html_tidy_engine_jtidy_ = new_dflt_(Xog_ctg_itm_.Tid_html , "xowa.html.tidy.engine_jtidy_")
, Key_html_tidy_engine_vnu_ = new_dflt_(Xog_ctg_itm_.Tid_html , "xowa.html.tidy.engine_vnu_")
, Key_net_enabled = new_dflt_(Xog_ctg_itm_.Tid_net , "xowa.net.enabled")
, Key_net_enabled_n_ = new_dflt_(Xog_ctg_itm_.Tid_net , "xowa.net.enabled_n_")

@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.htmls.core.htmls.tidy; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*;
public class Xoh_tidy_wkr_ {
public static final byte Tid_null = 0, Tid_tidy = 1, Tid_jtidy = 2;
public static final byte Tid_null = 0, Tid_tidy = 1, Tid_jtidy = 2, Tid_vnu = 3;
public static final Xoh_tidy_wkr Wkr_null = new Xoh_tidy_wkr_null();
}
class Xoh_tidy_wkr_null implements Xoh_tidy_wkr {

@ -0,0 +1,55 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.htmls.core.htmls.tidy; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*;
import gplx.xowa.htmls.core.htmls.tidy.vnus.*;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.sax.HtmlParser;
import java.io.InputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.ContentHandler;
import gplx.core.envs.*;
class Xoh_tidy_wkr__vnu implements Xoh_tidy_wkr {
private byte[] depurate(Bry_bfr tidy_bfr, boolean compat) throws SAXException, IOException {
byte[] input = tidy_bfr.To_bry_and_clear();
InputStream stream = new ByteArrayInputStream(input);
InputSource source = new InputSource(stream);
ByteArrayOutputStream sink = new ByteArrayOutputStream();
ContentHandler serializer;
serializer = new CompatibilitySerializer(sink);
HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALLOW);
parser.setContentHandler(serializer);
source.setEncoding("UTF-8");
parser.setProperty("http://xml.org/sax/properties/lexical-handler", serializer);
parser.parse(source);
return sink.toByteArray();
}
public byte Tid() {return Xoh_tidy_wkr_.Tid_vnu;}
public void Init_by_app(Xoae_app app) {
}
public void Indent_(boolean v) {
}
public void Exec_tidy(Bry_bfr bfr, byte[] page_url) {
try {
bfr.Add(depurate(bfr, true));
}
catch (SAXException e) { }
catch (IOException e) {}
}
}

@ -0,0 +1,17 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.htmls.core.htmls.tidy; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*;
import gplx.core.envs.*;

@ -20,6 +20,7 @@ public class Xow_tidy_mgr implements Gfo_invk, Xow_tidy_mgr_interface {
private Xoae_app app;
private final Xoh_tidy_wkr_tidy wkr__tidy = new Xoh_tidy_wkr_tidy(); // NOTE: app-level; not thread-safe; needed b/c of Options and exe/args DATE:2016-07-12
private final Xoh_tidy_wkr_jtidy wkr__jtidy = new Xoh_tidy_wkr_jtidy();
private final Xoh_tidy_wkr__vnu wkr__vnu = new Xoh_tidy_wkr__vnu();
private Xoh_tidy_wkr wkr = Xoh_tidy_wkr_.Wkr_null; // TEST: set default wkr to null
private boolean enabled = true;
public void Init_by_wiki(Xowe_wiki wiki) {
@ -42,6 +43,7 @@ public class Xow_tidy_mgr implements Gfo_invk, Xow_tidy_mgr_interface {
String engine_str = m.ReadStr("v");
if (String_.Eq(engine_str, "tidy")) wkr = wkr__tidy; // NOTE: app-level; not thread-safe; needed b/c of Options and exe/args DATE:2016-07-12
else if (String_.Eq(engine_str, "jtidy")) wkr = wkr__jtidy;
else if (String_.Eq(engine_str, "vnu")) wkr = wkr__vnu;
else throw Err_.new_unhandled_default(engine_str);
wkr.Init_by_app(app);
}

@ -0,0 +1,386 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.htmls.core.htmls.tidy.vnus; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.htmls.tidy.*;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.Arrays;
import java.util.Stack;
import java.util.EmptyStackException;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.AttributesImpl;
public class CompatibilitySerializer implements ContentHandler, LexicalHandler {
protected class StackEntry {
public String uri;
public String localName;
public String qName;
public Attributes attrs;
OutputStream savedStream;
public boolean needsPWrapping;
public boolean isPWrapper;
public boolean blank;
public boolean hasText;
public boolean split;
public int blockNestingLevel;
public boolean isDisabledPWrapper;
public StackEntry(String uri_, String localName_, String qName_,
Attributes attrs_, OutputStream savedStream_) {
uri = uri_;
localName = localName_;
qName = qName_;
attrs = attrs_;
savedStream = savedStream_;
needsPWrapping = "body".equals(localName_)
|| "blockquote".equals(localName_);
blank = true;
hasText = false;
isPWrapper = "mw:p-wrap".equals(localName_);
blockNestingLevel = 0;
isDisabledPWrapper = false;
split = false;
}
}
protected Stack<StackEntry> m_stack;
protected DepurateSerializer m_serializer;
protected Stack<StackEntry> m_pStack;
// Warning: this list must be in alphabetical order
protected static final String[] ONLY_INLINE_ELEMENTS = {"a", "abbr", "acronym",
"applet", "b", "basefont", "bdo", "big", "br", "button", "cite",
"code", "dfn", "em", "font", "i", "iframe", "img", "input", "kbd",
"label", "legend", "map", "object", "param", "q", "rb", "rbc", "rp",
"rt", "rtc", "ruby", "s", "samp", "select", "small", "span", "strike",
"strong", "sub", "sup", "textarea", "tt", "u", "var"};
// Warning: this list must be in alphabetical order
protected static final String[] MARKED_EMPTY_ELEMENTS = {"li", "p", "tr"};
public CompatibilitySerializer(OutputStream out) {
m_stack = new Stack<StackEntry>();
m_pStack = new Stack<StackEntry>();
m_serializer = new DepurateSerializer(out);
}
private StackEntry peek(Stack<StackEntry> stack) throws SAXException {
try {
return stack.peek();
} catch (EmptyStackException e) {
return null;
}
}
/**
* Pop the top of the stack, restore the parent stream in the serializer
* and return the previous stream
*/
private ByteArrayOutputStream popAndGetContents() throws SAXException {
try {
StackEntry entry = m_stack.pop();
if (entry.isPWrapper) {
m_pStack.pop();
}
ByteArrayOutputStream entryStream =
(ByteArrayOutputStream)m_serializer.getOutputStream();
m_serializer.setOutputStream(entry.savedStream);
return entryStream;
} catch (EmptyStackException e) {
throw new SAXException(e);
}
}
/**
* Push a new element to the top of the stack, and set up a new empty
* stream in the serializer. Returns the new element.
*/
private StackEntry push(String uri, String localName, String qName,
Attributes attrs) throws SAXException {
StackEntry entry = new StackEntry(uri, localName, qName, attrs,
m_serializer.getOutputStream());
m_stack.push(entry);
m_serializer.setOutputStream(new ByteArrayOutputStream());
return entry;
}
/**
* Equivalent to push() for a proposed p element. Will become a real
* p element if the contents is non-blank and contains no block elements.
*/
private StackEntry pushPWrapper() throws SAXException {
StackEntry entry = push("", "mw:p-wrap", "mw:p-wrap", new AttributesImpl());
m_pStack.push(entry);
return entry;
}
private void writePWrapper(StackEntry entry, ByteArrayOutputStream contents)
throws SAXException {
if (!entry.isDisabledPWrapper && !entry.blank) {
m_serializer.write("<p>");
m_serializer.writeStream(contents);
m_serializer.write("</p>");
} else {
m_serializer.writeStream(contents);
}
}
public void characters(char[] chars, int start, int length)
throws SAXException {
StackEntry entry = peek(m_stack);
if (entry != null) {
if (entry.needsPWrapping) {
entry = pushPWrapper();
}
if (entry.blank || !entry.hasText) {
for (int i = start; i < start + length; i++) {
char c = chars[i];
if (!(c == 9 || c == 10 || c == 12 || c == 13 || c == 32)) {
entry.blank = false;
entry.hasText = true;
if (peek(m_pStack) != null) {
peek(m_pStack).blank = false;
}
break;
}
}
}
}
m_serializer.characters(chars, start, length);
}
private void splitTagStack(boolean haveContent) throws SAXException {
StackEntry currentPWrapper = peek(m_pStack);
ByteArrayOutputStream seContent;
int n = m_stack.size();
int i = n - 1;
StackEntry se = m_stack.get(i);
while (se != currentPWrapper) {
seContent = (ByteArrayOutputStream)m_serializer.getOutputStream();
m_serializer.setOutputStream(se.savedStream);
if (se.hasText) {
haveContent = true;
}
// Emit content accumulated so far
if (haveContent) {
m_serializer.startElement(se.uri, se.localName, se.qName, se.attrs);
m_serializer.writeStream(seContent);
m_serializer.endElement(se.uri, se.localName, se.qName);
// All text has been output at this point
// Record that it has been split and reset it.
se.split = true;
se.blank = true;
se.hasText = false;
}
// Reset parent's saved stream always.
// As we unwind the stack, its saved content
// could get output.
se.savedStream = new ByteArrayOutputStream();
i--;
se = m_stack.get(i);
}
// Dump <p>.. contents ..</p>
// Note se == currentPWrapper
if (haveContent || se.hasText) {
seContent = (ByteArrayOutputStream)m_serializer.getOutputStream();
m_serializer.setOutputStream(se.savedStream);
// Emit content accumulated so far
writePWrapper(se, seContent);
// All text has been output at this point
se.blank = true;
}
// New stream going forward
m_serializer.setOutputStream(new ByteArrayOutputStream());
}
private boolean isOnlyInline(String localName) {
return Arrays.binarySearch(ONLY_INLINE_ELEMENTS, localName) > -1;
}
private void enterBlock(String tagName) throws SAXException {
// Whenever we enter a new block wrapper that is
// embedded within a p-wrapper,
//
// 1. Disable p-wrapping.
// 2. Split the tag stack and emit accumulated output
// with a p-wrapper.
StackEntry currentPWrapper = peek(m_pStack);
if (currentPWrapper.blockNestingLevel == 0) {
splitTagStack(false);
}
currentPWrapper.blockNestingLevel++;
currentPWrapper.isDisabledPWrapper = true;
}
private void leaveBlock(String tagName) throws SAXException {
// Whenever we leave the outermost block wrapper that is
// embedded within a p-wrapper,
//
// 1. Re-enable p-wrapping.
// 2. Split the tag stack and emit accumulated output
// without a p-wrapper.
StackEntry currentPWrapper = peek(m_pStack);
currentPWrapper.blockNestingLevel--;
if (currentPWrapper.blockNestingLevel == 0) {
splitTagStack(true);
}
currentPWrapper.isDisabledPWrapper = false;
}
public void startElement(String uri, String localName, String qName,
Attributes atts) throws SAXException {
StackEntry oldEntry = peek(m_stack);
if (oldEntry != null) {
if (oldEntry.isPWrapper) {
if (!isOnlyInline(localName)) {
// This is non-inline so close the p-wrapper
ByteArrayOutputStream contents = popAndGetContents();
writePWrapper(oldEntry, contents);
oldEntry = peek(m_stack);
} else {
// We're putting an element inside the p-wrapper, so it is non-blank now
oldEntry.blank = false;
}
} else {
oldEntry.blank = false;
}
}
// Track block nesting level
boolean onlyInline = isOnlyInline(localName);
StackEntry currentPWrapper = peek(m_pStack);
if (currentPWrapper != null && !onlyInline) {
enterBlock(localName);
}
if (oldEntry != null && oldEntry.needsPWrapping && onlyInline) {
StackEntry entry = pushPWrapper();
// We're putting an element inside the p-wrapper, so it is non-blank
entry.blank = false;
}
push(uri, localName, qName, atts);
}
public void endElement(String uri, String localName, String qName)
throws SAXException {
StackEntry entry = peek(m_stack);
ByteArrayOutputStream contents = popAndGetContents();
if (entry.isPWrapper) {
// Since we made this p-wrapper, the caller really wants to end the parent element.
// So first we need to close the p-wrapper
writePWrapper(entry, contents);
entry = peek(m_stack);
contents = popAndGetContents();
}
// Annotate empty tr and li elements so that they can be hidden in CSS,
// for compatibility with tidy and existing wikitext
if (Arrays.binarySearch(MARKED_EMPTY_ELEMENTS, localName) > -1) {
if (entry.attrs.getLength() == 0 && entry.blank) {
AttributesImpl newAttrs = new AttributesImpl();
newAttrs.addAttribute("", "class", "class", "", "mw-empty-elt");
entry.attrs = newAttrs;
}
}
if (!entry.split || !entry.blank) {
m_serializer.startElement(entry.uri, entry.localName, entry.qName, entry.attrs);
m_serializer.writeStream(contents);
m_serializer.endElement(uri, localName, qName);
}
// Track block nesting level
boolean onlyInline = isOnlyInline(localName);
StackEntry currentPWrapper = peek(m_pStack);
if (currentPWrapper != null && !onlyInline) {
leaveBlock(localName);
}
}
public void startDocument() throws SAXException {
}
public void endDocument() throws SAXException {
m_serializer.endDocument();
}
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException {
characters(ch, start, length);
}
public void processingInstruction(String target, String data)
throws SAXException {
}
public void setDocumentLocator(Locator locator) {
}
public void comment(char[] ch, int start, int length) throws SAXException {
m_serializer.comment(ch, start, length);
}
public void endCDATA() throws SAXException {
}
public void endDTD() throws SAXException {
}
public void endEntity(String name) throws SAXException {
}
public void startCDATA() throws SAXException {
}
public void startDTD(String name, String publicId, String systemId)
throws SAXException {
}
public void startEntity(String name) throws SAXException {
}
public void startPrefixMapping(String prefix, String uri)
throws SAXException {
}
public void endPrefixMapping(String prefix) throws SAXException {
}
public void skippedEntity(String name) throws SAXException {
}
}

@ -0,0 +1,328 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.htmls.core.htmls.tidy.vnus; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.htmls.tidy.*;
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008-2011 Mozilla Foundation
* Copyright (c) 2016 Wikimedia Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/*
* This file is mostly copied from validator.nu's HtmlSerializer. Changes:
*
* - Add a slash to void elements. This is allowed by the HTML 5 spec, it is
* documented as having no effect. It allows the output to pass XHTML
* validation.
*
* - &nbsp; is replaced with &#160;
*
* - Added getOutputStream(), setOutputStream(), write() to support
* CompatibiltySerializer.
*/
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.Arrays;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
public class DepurateSerializer implements ContentHandler, LexicalHandler {
private static final String[] VOID_ELEMENTS = { "area", "base", "basefont",
"bgsound", "br", "col", "command", "embed", "frame", "hr", "img",
"input", "keygen", "link", "meta", "param", "source", "track",
"wbr" };
private static final String[] NON_ESCAPING = { "iframe", "noembed",
"noframes", "noscript", "plaintext", "script", "style", "xmp" };
private static Writer wrap(OutputStream out) {
try {
return new OutputStreamWriter(out, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
private int ignoreLevel = 0;
private int escapeLevel = 0;
private OutputStream outputStream;
private Writer writer;
public DepurateSerializer(OutputStream out) {
outputStream = out;
this.writer = wrap(out);
}
public OutputStream getOutputStream() throws SAXException {
try {
writer.flush();
} catch (IOException e) {
throw new SAXException(e);
}
return outputStream;
}
public void setOutputStream(OutputStream out) throws RuntimeException {
outputStream = out;
writer = wrap(out);
}
public void write(String s) throws SAXException {
try {
writer.write(s);
writer.flush();
} catch (IOException e) {
throw new SAXException(e);
}
}
public void writeStream(ByteArrayOutputStream s) throws SAXException {
try {
writer.flush();
s.writeTo(outputStream);
} catch (IOException e) {
throw new SAXException(e);
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
try {
if (escapeLevel > 0) {
writer.write(ch, start, length);
} else {
for (int i = start; i < start + length; i++) {
char c = ch[i];
switch (c) {
case '<':
writer.write("&lt;");
break;
case '>':
writer.write("&gt;");
break;
case '&':
writer.write("&amp;");
break;
case '\u00A0':
writer.write("&#160;");
break;
default:
writer.write(c);
break;
}
}
}
} catch (IOException e) {
throw new SAXException(e);
}
}
public void endDocument() throws SAXException {
try {
writer.flush();
writer.close();
} catch (IOException e) {
throw new SAXException(e);
}
}
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (escapeLevel > 0) {
escapeLevel--;
}
if (ignoreLevel > 0) {
ignoreLevel--;
} else {
try {
writer.write('<');
writer.write('/');
writer.write(localName);
writer.write('>');
} catch (IOException e) {
throw new SAXException(e);
}
}
}
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException {
characters(ch, start, length);
}
public void processingInstruction(String target, String data)
throws SAXException {
}
public void setDocumentLocator(Locator locator) {
}
public void startDocument() throws SAXException {
try {
writer.write("<!DOCTYPE html>\n");
} catch (IOException e) {
throw new SAXException(e);
}
}
public void startElement(String uri, String localName, String qName,
Attributes atts) throws SAXException {
if (escapeLevel > 0) {
escapeLevel++;
}
boolean xhtml = "http://www.w3.org/1999/xhtml".equals(uri);
if (ignoreLevel > 0
|| !(xhtml || "http://www.w3.org/2000/svg".equals(uri) || "http://www.w3.org/1998/Math/MathML".equals(uri))) {
ignoreLevel++;
return;
}
try {
writer.write('<');
writer.write(localName);
for (int i = 0; i < atts.getLength(); i++) {
String attUri = atts.getURI(i);
String attLocal = atts.getLocalName(i);
if (attUri.length() == 0) {
writer.write(' ');
} else if (!xhtml
&& "http://www.w3.org/1999/xlink".equals(attUri)) {
writer.write(" xlink:");
} else if ("http://www.w3.org/XML/1998/namespace".equals(attUri)) {
if (xhtml) {
if ("lang".equals(attLocal)) {
writer.write(' ');
} else {
continue;
}
} else {
writer.write(" xml:");
}
} else {
continue;
}
writer.write(atts.getLocalName(i));
writer.write('=');
writer.write('"');
String val = atts.getValue(i);
for (int j = 0; j < val.length(); j++) {
char c = val.charAt(j);
switch (c) {
case '"':
writer.write("&quot;");
break;
case '&':
writer.write("&amp;");
break;
case '\u00A0':
writer.write("&nbsp;");
break;
default:
writer.write(c);
break;
}
}
writer.write('"');
}
if (Arrays.binarySearch(VOID_ELEMENTS, localName) > -1) {
writer.write(" />");
ignoreLevel++;
return;
} else {
writer.write('>');
}
if ("pre".equals(localName) || "textarea".equals(localName)
|| "listing".equals(localName)) {
writer.write('\n');
}
if (escapeLevel == 0
&& Arrays.binarySearch(NON_ESCAPING, localName) > -1) {
escapeLevel = 1;
}
} catch (IOException e) {
throw new SAXException(e);
}
}
public void comment(char[] ch, int start, int length) throws SAXException {
if (ignoreLevel > 0 || escapeLevel > 0) {
return;
}
try {
writer.write("<!--");
writer.write(ch, start, length);
writer.write("-->");
} catch (IOException e) {
throw new SAXException(e);
}
}
public void endCDATA() throws SAXException {
}
public void endDTD() throws SAXException {
}
public void endEntity(String name) throws SAXException {
}
public void startCDATA() throws SAXException {
}
public void startDTD(String name, String publicId, String systemId)
throws SAXException {
}
public void startEntity(String name) throws SAXException {
}
public void startPrefixMapping(String prefix, String uri)
throws SAXException {
}
public void endPrefixMapping(String prefix) throws SAXException {
}
public void skippedEntity(String name) throws SAXException {
}
}

Binary file not shown.

@ -6,7 +6,7 @@
<javac includeantruntime="false" target="${jdk}" source="${jdk}" sourcepath=""
srcdir ="${root_dir}/src/100_core"
destdir ="${root_dir}/src/100_core/bin"
classpath="${root_dir}/src/lib/junit.jar:${root_dir}/bin/any/java/apache/commons-compress-1.5.jar:${root_dir}/bin/any/java/xz/xz-1.5.jar">
classpath="${root_dir}/src/lib/junit.jar:${root_dir}/bin/any/java/apache/commons-compress-1.5.jar:${root_dir}/bin/any/java/xz/xz-1.5.jar:${root_dir}/bin/any/java/vnu/Saxon-HE-9.9.1-2.jar">
<include name="**/*.java"/>
</javac>
</target>
@ -56,7 +56,7 @@
<javac includeantruntime="false" target="${jdk}" source="${jdk}" sourcepath=""
srcdir ="${root_dir}/src/400_xowa"
destdir ="${root_dir}/src/400_xowa/bin"
classpath="${root_dir}/src/lib/junit.jar:${root_dir}/src/100_core/bin:${root_dir}/src/110_gfml/bin:${root_dir}/src/140_dbs/bin:${root_dir}/src/150_gfui/bin:${root_dir}/src/gplx.gflucene/bin:${root_dir}/bin/any/java/luaj/luaj_xowa.jar:${root_dir}/bin/any/java/jtidy/jtidy_xowa.jar:${root_dir}/bin/any/java/icu4j/icu4j-57_1.jar">
classpath="${root_dir}/src/lib/junit.jar:${root_dir}/src/100_core/bin:${root_dir}/src/110_gfml/bin:${root_dir}/src/140_dbs/bin:${root_dir}/src/150_gfui/bin:${root_dir}/src/gplx.gflucene/bin:${root_dir}/bin/any/java/luaj/luaj_xowa.jar:${root_dir}/bin/any/java/jtidy/jtidy_xowa.jar:${root_dir}/bin/any/java/icu4j/icu4j-57_1.jar:${root_dir}/bin/any/java/vnu/vnu.jar:${root_dir}/bin/any/java/vnu/Saxon-HE-9.9.1-2.jar">
<compilerarg line="-encoding utf-8"/>
<include name="**/*.java"/>
</javac>

@ -4,7 +4,7 @@
<jar destfile="${xowa_jar_path}">
<manifest>
<attribute name="Main-Class" value="gplx.xowa.Xowa_main"/>
<attribute name="Class-Path" value=". bin/${plat_name}/swt/swt.jar bin/any/java/apache/commons-compress-1.5.jar bin/any/java/jdbc/sqlite/sqlite-jdbc-3.18.0.jar bin/any/java/luaj/luaj_xowa.jar bin/any/java/jtidy/jtidy_xowa.jar bin/any/java/xz/xz-1.5.jar bin/any/java/icu4j/icu4j-57_1.jar bin/any/java/lucene/5.3.0.drd/lucene-core-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-highlighter-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-memory-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-queryparser-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-analyzers-common-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-queries-5.3.0-mobile-2.jar"/>
<attribute name="Class-Path" value=". bin/${plat_name}/swt/swt.jar bin/any/java/apache/commons-compress-1.5.jar bin/any/java/jdbc/sqlite/sqlite-jdbc-3.18.0.jar bin/any/java/luaj/luaj_xowa.jar bin/any/java/jtidy/jtidy_xowa.jar bin/any/java/xz/xz-1.5.jar bin/any/java/icu4j/icu4j-57_1.jar bin/any/java/lucene/5.3.0.drd/lucene-core-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-highlighter-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-memory-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-queryparser-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-analyzers-common-5.3.0-mobile-2.jar bin/any/java/lucene/5.3.0.drd/lucene-queries-5.3.0-mobile-2.jar bin/any/java/vnu/vnu.jar bin/any/java/vnu/Saxon-HE-9.9.1-2.jar"/>
</manifest>
<fileset dir="${root_dir}/src/100_core/bin"/>
<fileset dir="${root_dir}/src/110_gfml/bin"/>

Loading…
Cancel
Save