1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-09-28 14:30:51 +00:00

Xomw: Convert XomwHtml

This commit is contained in:
gnosygnu 2017-02-25 08:37:34 -05:00
parent 1328288cd2
commit 0f92bb55db
7 changed files with 1197 additions and 278 deletions

View File

@ -277,7 +277,7 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
Comm_bgn_len = Comm_bgn.length
, Comm_end_len = Comm_end.length
;
private static final byte[] Rhs_bgn = Bry_.new_a7("</");
public static final byte[] Rhs_bgn = Bry_.new_a7("</");
public static void Bld_lhs_bgn(Bry_bfr bfr, byte[] tag) {bfr.Add_byte(Byte_ascii.Lt).Add(tag);} // >
public static void Bld_lhs_end_nde(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Gt);} // >
public static void Bld_lhs_end_inl(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Slash).Add_byte(Byte_ascii.Gt);} // "/>"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,21 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import gplx.core.btries.*;
public class XomwHtmlTemp {
public final Bry_bfr bfr = Bry_bfr_.New();
public final Btrie_rv trv = new Btrie_rv();
}

View File

@ -13,15 +13,21 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import org.junit.*; import gplx.core.tests.*;
public class Xomw_html_utl__expand_attributes__tst {
private final Xomw_html_utl__expand_attributes__fxt fxt = new Xomw_html_utl__expand_attributes__fxt();
@Test public void Basic() {fxt.Test__expand_attributes(" a=\"b\"", "a", "b");}
import gplx.xowa.mediawiki.includes.htmls.*;
public class XomwHtml_expandAttributesTest {
private final XomwHtml_expandAttributesFxt fxt = new XomwHtml_expandAttributesFxt();
@Test public void Basic() {
fxt.Test__expand_attributes(" a=\"b\"", "a", "b");
}
@Test public void NullVal() {
fxt.Test__expand_attributes("", "a", null);
}
}
class Xomw_html_utl__expand_attributes__fxt {
private final XomwHtml utl = new XomwHtml();
class XomwHtml_expandAttributesFxt {
private final Bry_bfr bfr = Bry_bfr_.New();
private final XomwHtmlTemp temp = new XomwHtmlTemp();
public void Test__expand_attributes(String expd, String... kvs) {
Xomw_atr_mgr atrs = new Xomw_atr_mgr();
int kvs_len = kvs.length;
@ -31,7 +37,7 @@ class Xomw_html_utl__expand_attributes__fxt {
Xomw_atr_itm itm = new Xomw_atr_itm(-1, key, val);
atrs.Add(itm);
}
utl.Expand_attributes(bfr, atrs);
XomwHtml.expandAttributes(bfr, temp, atrs);
Gftest.Eq__str(expd, bfr.To_str_and_clear());
}
}

View File

@ -29,12 +29,12 @@ public class XomwLinker {
// private XomwParserEnv env;
private final Bry_bfr tmp = Bry_bfr_.New(), tmp_2 = Bry_bfr_.New();
private final Linker_rel_splitter splitter = new Linker_rel_splitter();
private final XomwHtml html_utl = new XomwHtml();
private byte[] wg_title = null;
private final Btrie_rv trv = new Btrie_rv();
private final byte[][] split_trail_rv = new byte[2][];
private Btrie_slim_mgr split_trail_trie;
private final Xomw_atr_mgr tmp_attribs = new Xomw_atr_mgr();
private final XomwHtmlTemp htmlTemp = new XomwHtmlTemp();
private static final byte[] Atr__class = Bry_.new_a7("class"), Atr__rel = Bry_.new_a7("rel"), Atr__href = Bry_.new_a7("href"), Rel__nofollow = Bry_.new_a7("nofollow");
public static final byte[]
@ -646,14 +646,14 @@ public class XomwLinker {
zoom_icon = Bry_.Empty;
}
else {
html_utl.rawElement(tmp, Gfh_tag_.Bry__a
XomwHtml.rawElement(tmp, htmlTemp, Gfh_tag_.Bry__a
, tmp_attribs.Clear()
.Add(Gfh_atr_.Bry__href , url)
.Add(Gfh_atr_.Bry__class, Class__internal)
.Add(Gfh_atr_.Bry__title, XomwGlobalFunctions.wfMessage(env, "thumbnail-more").text())
, Bry_.Empty);
byte[] zoom_anch = tmp.To_bry_and_clear();
html_utl.rawElement(tmp, Gfh_tag_.Bry__div, tmp_attribs.Clear().Add(Gfh_atr_.Bry__class, Class__magnify), zoom_anch);
XomwHtml.rawElement(tmp, htmlTemp, Gfh_tag_.Bry__div, tmp_attribs.Clear().Add(Gfh_atr_.Bry__class, Class__magnify), zoom_anch);
zoom_icon = tmp.To_bry_and_clear();
}
}
@ -890,7 +890,7 @@ public class XomwLinker {
// XO.MW.HOOK:LinkerMakeExternalLink
attribs.Set(Atr__href, url);
html_utl.rawElement(bfr, Bry_.new_a7("a"), attribs, text);
XomwHtml.rawElement(bfr, htmlTemp, Bry_.new_a7("a"), attribs, text);
}
// XO.MW: MW puts this function in Parser.php
private byte[] getExternalLinkRel(byte[] url, byte[] title) {

View File

@ -1,264 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.core.btries.*;
public class XomwHtml {
private final Bry_bfr tmp = Bry_bfr_.New();
private final Btrie_rv trv = new Btrie_rv();
public void rawElement(Bry_bfr bfr, byte[] element, Xomw_atr_mgr attribs, byte[] contents) {
Bry_.Lcase__all(element); // XO:lcase element
Open_element__lcased(bfr, element, attribs);
if (void_elements.Has(element)) {
bfr.Del_by_1().Add(Bry__elem__lhs__inl);
}
else {
bfr.Add(contents);
Close_element__lcased(bfr, element);
}
}
private void Open_element__lcased(Bry_bfr bfr, byte[] element, Xomw_atr_mgr attribs) {
// This is not required in HTML5, but let's do it anyway, for
// consistency and better compression.
// $element = strtolower($element); // XO:handled by callers
// Remove invalid input types
if (Bry_.Eq(element, Tag__input)) {
// PORTED.HEADER:valid_input_types
byte[] type_atr_val = attribs.Get_val_or_null(Atr__type);
if (type_atr_val != null && !valid_input_types.Has(type_atr_val)) {
attribs.Del(Atr__type);
}
}
// According to standard the default type for <button> elements is "submit".
// Depending on compatibility mode IE might use "button", instead.
// We enforce the standard "submit".
if (Bry_.Eq(element, Tag__button) && attribs.Get_val_or_null(Atr__type) == null) {
attribs.Set(Atr__type, Val__type__submit);
}
bfr.Add_byte(Byte_ascii.Angle_bgn).Add(element);
Expand_attributes(bfr, attribs); // TODO.XO:self::dropDefaults($element, $attribs)
bfr.Add_byte(Byte_ascii.Angle_end);
}
public void Expand_attributes(Bry_bfr bfr, Xomw_atr_mgr atrs) {
int len = atrs.Len();
for (int i = 0; i < len; i++) {
Xomw_atr_itm atr = (Xomw_atr_itm)atrs.Get_at(i);
byte[] key = atr.Key_bry();
byte[] val = atr.Val();
// Support intuitive [ 'checked' => true/false ] form
if (val == null) { // TESTME
continue;
}
// For boolean attributes, support [ 'foo' ] instead of
// requiring [ 'foo' => 'meaningless' ].
boolean bool_attrib = bool_attribs.Has(val);
if (atr.Key_int() != -1 && bool_attrib) {
key = val;
}
// Not technically required in HTML5 but we'd like consistency
// and better compression anyway.
key = Bry_.Xcase__build__all(tmp, Bool_.N, key);
// PORTED.HEADER:$spaceSeparatedListAttributes
// Specific features for attributes that allow a list of space-separated values
if (space_separated_list_attributes.Has(key)) {
// Apply some normalization and remove duplicates
// Convert into correct array. Array can contain space-separated
// values. Implode/explode to get those into the main array as well.
// if (is_array($value)) {
// If input wasn't an array, we can skip this step
// $newValue = [];
// foreach ($value as $k => $v) {
// if (is_string($v)) {
// String values should be normal `array('foo')`
// Just append them
// if (!isset($value[$v])) {
// As a special case don't set 'foo' if a
// separate 'foo' => true/false exists in the array
// keys should be authoritative
// $newValue[] = $v;
// }
// }
// elseif ($v) {
// If the value is truthy but not a String this is likely
// an [ 'foo' => true ], falsy values don't add strings
// $newValue[] = $k;
// }
// }
// $value = implode(' ', $newValue);
// }
// $value = explode(' ', $value);
// Normalize spacing by fixing up cases where people used
// more than 1 space and/or a trailing/leading space
// $value = array_diff($value, [ '', ' ' ]);
// Remove duplicates and create the String
// $value = implode(' ', array_unique($value));
}
// DELETE
// elseif (is_array($value)) {
// throw new MWException("HTML attribute $key can not contain a list of values");
// }
if (bool_attrib) {
bfr.Add_byte_space().Add(key).Add(Bry__atr__val__empty); // $ret .= " $key=\"\"";
}
else {
// PORTED.HEADER:atr_val_encodings
val = XophpString.strtr(val, atr_val_encodings, tmp, trv);
bfr.Add_byte_space().Add(key).Add(Bry__atr__val__quote).Add(val).Add_byte_quote();
}
}
}
private void Close_element__lcased(Bry_bfr bfr, byte[] element) {
bfr.Add(Bry__elem__rhs__bgn).Add(element).Add_byte(Byte_ascii.Angle_end); // EX: "</", element, ">";
}
private static final byte[]
Bry__elem__lhs__inl = Bry_.new_a7("/>")
, Bry__elem__rhs__bgn = Bry_.new_a7("</")
, Bry__atr__val__quote = Bry_.new_a7("=\"")
, Bry__atr__val__empty = Bry_.new_a7("=\"\"")
, Tag__input = Bry_.new_a7("input")
, Tag__button = Bry_.new_a7("button")
, Atr__type = Bry_.new_a7("type")
, Val__type__submit = Bry_.new_a7("submit")
;
// List of void elements from HTML5, section 8.1.2 as of 2016-09-19
private static final Hash_adp_bry void_elements = Hash_adp_bry.cs().Add_many_str
(
"area",
"super",
"br",
"col",
"embed",
"hr",
"img",
"input",
"keygen",
"link",
"meta",
"param",
"source",
"track",
"wbr"
);
// Boolean attributes, which may have the value omitted entirely. Manually
// collected from the HTML5 spec as of 2011-08-12.
private static final Hash_adp_bry bool_attribs = Hash_adp_bry.ci_a7().Add_many_str(
"async",
"autofocus",
"autoplay",
"checked",
"controls",
"default",
"defer",
"disabled",
"formnovalidate",
"hidden",
"ismap",
// "itemscope", //XO:duplicate; added below
"loop",
"multiple",
"muted",
"novalidate",
"open",
"pubdate",
"final ",
"required",
"reversed",
"scoped",
"seamless",
"selected",
"truespeed",
"typemustmatch",
// HTML5 Microdata
"itemscope"
);
private static final Btrie_slim_mgr atr_val_encodings = Btrie_slim_mgr.cs()
// Apparently we need to entity-encode \n, \r, \t, although the
// spec doesn't mention that. Since we're doing strtr() anyway,
// we may as well not call htmlspecialchars().
// @todo FIXME: Verify that we actually need to
// escape \n\r\t here, and explain why, exactly.
// We could call Sanitizer::encodeAttribute() for this, but we
// don't because we're stubborn and like our marginal savings on
// byte size from not having to encode unnecessary quotes.
// The only difference between this transform and the one by
// Sanitizer::encodeAttribute() is ' is not encoded.
.Add_str_str("&" , "&amp;")
.Add_str_str("\"" , "&quot;")
.Add_str_str(">" , "&gt;")
// '<' allegedly allowed per spec
// but breaks some tools if not escaped.
.Add_str_str("<" , "&lt;")
.Add_str_str("\n" , "&#10;")
.Add_str_str("\r" , "&#13;")
.Add_str_str("\t" , "&#9;");
// https://www.w3.org/TR/html401/index/attributes.html ("space-separated")
// https://www.w3.org/TR/html5/index.html#attributes-1 ("space-separated")
private static final Hash_adp_bry space_separated_list_attributes = Hash_adp_bry.ci_a7().Add_many_str(
"class", // html4, html5
"accesskey", // as of html5, multiple space-separated values allowed
// html4-spec doesn't document rel= as space-separated
// but has been used like that and is now documented as such
// in the html5-spec.
"rel"
);
private static final Hash_adp_bry valid_input_types = Hash_adp_bry.ci_a7().Add_many_str(
// Remove invalid input types
"hidden",
"text",
"password",
"checkbox",
"radio",
"file",
"submit",
"image",
"reset",
"button",
// HTML input types
"datetime",
"datetime-local",
"date",
"month",
"time",
"week",
"number",
"range",
"email",
"url",
"search",
"tel",
"color"
);
}

View File

@ -65,7 +65,7 @@ public class XomwLinkRenderer {
*/
// private boolean runLegacyBeginHook = true;
private final XomwHtml html = new XomwHtml();
private final XomwHtmlTemp htmlTemp = new XomwHtmlTemp();
private final Xomw_atr_mgr attribs = new Xomw_atr_mgr();
private final List_adp tmp_merge_deleted = List_adp_.New();
private final XomwSanitizer sanitizer;
@ -349,7 +349,7 @@ public class XomwLinkRenderer {
// XO.MW.HOOK:LinkEnd
html.rawElement(bfr, Gfh_tag_.Bry__a, attribs, htmlBry);
XomwHtml.rawElement(bfr, htmlTemp, Gfh_tag_.Bry__a, attribs, htmlBry);
}
/**