From 7b3d6e5271f81d31b4b660911668f1fcef70a72d Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Sun, 8 Mar 2020 12:03:48 -0400 Subject: [PATCH] Html: Update HTML Santizer whitelist [#636] --- .../xowa/htmls/core/htmls/Xoh_html_wtr.java | 9 + .../parsers/xndes/Xop_xatr_whitelist_mgr.java | 184 ++++++++++++++++-- .../xowa/parsers/xndes/Xop_xnde_tag_.java | 18 +- 3 files changed, 189 insertions(+), 22 deletions(-) diff --git a/400_xowa/src/gplx/xowa/htmls/core/htmls/Xoh_html_wtr.java b/400_xowa/src/gplx/xowa/htmls/core/htmls/Xoh_html_wtr.java index 36099f7c4..dfcb9a7c5 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/htmls/Xoh_html_wtr.java +++ b/400_xowa/src/gplx/xowa/htmls/core/htmls/Xoh_html_wtr.java @@ -362,6 +362,15 @@ public class Xoh_html_wtr { case Xop_xnde_tag_.Tid__mapframe: case Xop_xnde_tag_.Tid__maplink: case Xop_xnde_tag_.Tid__template_styles: + + // added new tags below; ISSUE#:636 DATE:2020-03-08 + case Xop_xnde_tag_.Tid__audio: + case Xop_xnde_tag_.Tid__video: + case Xop_xnde_tag_.Tid__track: + case Xop_xnde_tag_.Tid__rtc: + case Xop_xnde_tag_.Tid__figure: + case Xop_xnde_tag_.Tid__figure_inline: + case Xop_xnde_tag_.Tid__figcaption: Xox_xnde xtn = xnde.Xnde_xtn(); xtn.Xtn_write(bfr, app, ctx, this, hctx, page, xnde, src); break; diff --git a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xatr_whitelist_mgr.java b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xatr_whitelist_mgr.java index 1f24105d1..521d41133 100644 --- a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xatr_whitelist_mgr.java +++ b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xatr_whitelist_mgr.java @@ -52,21 +52,82 @@ public class Xop_xatr_whitelist_mgr { } return rv; } - public Xop_xatr_whitelist_mgr Ini() { // REF.MW:Sanitizer.php|setupAttributeWhitelist - Ini_grp("common" , null , "id", "class", "lang", "dir", "title", "style", "role"); - Ini_grp("block" , "common" , "align"); - Ini_grp("tablealign" , null , "align", "char", "charoff", "valign"); - Ini_grp("tablecell" , null , "abbr", "axis", "headers", "scope", "rowspan", "colspan", "nowrap", "width", "height", "bgcolor"); + // REF.MW:Sanitizer.php|setupAttributeWhitelist + public Xop_xatr_whitelist_mgr Ini() { + // REF.MW: https://github.com/wikimedia/mediawiki/blob/master/includes/parser/Sanitizer.php#L1767 + Ini_grp("common", null, + // HTML + "id", + "class", + "style", + "lang", + "dir", + "title", + + // WAI-ARIA + "aria-describedby", + "aria-flowto", + "aria-hidden", + "aria-label", + "aria-labelledby", + "aria-owns", + "role", + + // RDFa + // These attributes are specified in section 9 of + // https://www.w3.org/TR/2008/REC-rdfa-syntax-20081014 + "about", + "property", + "resource", + "datatype", + "type"+"of", + + // Microdata. These are specified by + // https://html.spec.whatwg.org/multipage/microdata.html#the-microdata-model + "itemid", + "itemprop", + "itemref", + "itemscope", + "itemtype" + ); + Ini_grp("block" , "common" , "align"); + Ini_grp("tablealign" , null , "align", "valign"); + Ini_grp("tablecell" , null, + "abbr", + "axis", + "headers", + "scope", + "rowspan", + "colspan", + "nowrap", // deprecated + "width", // deprecated + "height", // deprecated + "bgcolor" // deprecated + ); + + // Numbers refer to sections in HTML 4.01 standard describing the element. + // See: https://www.w3.org/TR/html4/ + // 7.5.4 Ini_nde(Xop_xnde_tag_.Tid__div , "block"); - Ini_nde(Xop_xnde_tag_.Tid__center , "common"); + Ini_nde(Xop_xnde_tag_.Tid__center , "common"); // deprecated Ini_nde(Xop_xnde_tag_.Tid__span , "block"); + + // 7.5.5 Ini_nde(Xop_xnde_tag_.Tid__h1 , "block"); Ini_nde(Xop_xnde_tag_.Tid__h2 , "block"); Ini_nde(Xop_xnde_tag_.Tid__h3 , "block"); Ini_nde(Xop_xnde_tag_.Tid__h4 , "block"); Ini_nde(Xop_xnde_tag_.Tid__h5 , "block"); Ini_nde(Xop_xnde_tag_.Tid__h6 , "block"); + + // 7.5.6 + // address + + // 8.2.4 + Ini_nde(Xop_xnde_tag_.Tid__bdo , "common"); + + // 9.2.1 Ini_nde(Xop_xnde_tag_.Tid__em , "common"); Ini_nde(Xop_xnde_tag_.Tid__strong , "common"); Ini_nde(Xop_xnde_tag_.Tid__cite , "common"); @@ -76,32 +137,81 @@ public class Xop_xatr_whitelist_mgr { Ini_nde(Xop_xnde_tag_.Tid__kbd , "common"); Ini_nde(Xop_xnde_tag_.Tid__var , "common"); Ini_nde(Xop_xnde_tag_.Tid__abbr , "common"); + // acronym + + // 9.2.2 Ini_nde(Xop_xnde_tag_.Tid__blockquote , "common", "cite"); + Ini_nde(Xop_xnde_tag_.Tid__q , "common", "cite"); + + // 9.2.3 Ini_nde(Xop_xnde_tag_.Tid__sub , "common"); Ini_nde(Xop_xnde_tag_.Tid__sup , "common"); + + // 9.3.1 Ini_nde(Xop_xnde_tag_.Tid__p , "block"); - Ini_nde(Xop_xnde_tag_.Tid__br , "id", "class", "title", "style", "clear"); + + // 9.3.2 + Ini_nde(Xop_xnde_tag_.Tid__br , "common", "clear"); + + // https://www.w3.org/TR/html5/text-level-semantics.html#the-wbr-element + Ini_nde(Xop_xnde_tag_.Tid__wbr , "common"); + + // 9.3.4 Ini_nde(Xop_xnde_tag_.Tid__pre , "common", "width"); + + // 9.4 Ini_nde(Xop_xnde_tag_.Tid__ins , "common", "cite", "datetime"); Ini_nde(Xop_xnde_tag_.Tid__del , "common", "cite", "datetime"); + + // 10.2 Ini_nde(Xop_xnde_tag_.Tid__ul , "common", "type"); - Ini_nde(Xop_xnde_tag_.Tid__ol , "common", "type", "start"); + Ini_nde(Xop_xnde_tag_.Tid__ol , "common", "type", "start", "reversed"); Ini_nde(Xop_xnde_tag_.Tid__li , "common", "type", "value"); + + // 10.3 Ini_nde(Xop_xnde_tag_.Tid__dl , "common"); Ini_nde(Xop_xnde_tag_.Tid__dd , "common"); Ini_nde(Xop_xnde_tag_.Tid__dt , "common"); + + // 11.2.1 Ini_nde(Xop_xnde_tag_.Tid__table , "common", "summary", "width", "border", "frame", "rules", "cellspacing", "cellpadding", "align", "bgcolor"); - Ini_nde(Xop_xnde_tag_.Tid__caption , "common", "align"); - Ini_nde(Xop_xnde_tag_.Tid__thead , "common", "tablealign"); - Ini_nde(Xop_xnde_tag_.Tid__tfoot , "common", "tablealign"); - Ini_nde(Xop_xnde_tag_.Tid__tbody , "common", "tablealign"); - Ini_nde(Xop_xnde_tag_.Tid__colgroup , "common", "span", "width", "tablealign"); - Ini_nde(Xop_xnde_tag_.Tid__col , "common", "span", "width", "tablealign"); + + // 11.2.2 + Ini_nde(Xop_xnde_tag_.Tid__caption , "block"); + + // 11.2.3 + Ini_nde(Xop_xnde_tag_.Tid__thead , "common"); + Ini_nde(Xop_xnde_tag_.Tid__tfoot , "common"); + Ini_nde(Xop_xnde_tag_.Tid__tbody , "common"); + + // 11.2.4 + Ini_nde(Xop_xnde_tag_.Tid__colgroup , "common", "span"); + Ini_nde(Xop_xnde_tag_.Tid__col , "common", "span"); + + // 11.2.5 Ini_nde(Xop_xnde_tag_.Tid__tr , "common", "bgcolor", "tablealign"); + + // 11.2.6 Ini_nde(Xop_xnde_tag_.Tid__td , "common", "tablecell", "tablealign"); Ini_nde(Xop_xnde_tag_.Tid__th , "common", "tablecell", "tablealign"); - Ini_nde(Xop_xnde_tag_.Tid__a , "common", "href", "rel", "rev"); - Ini_nde(Xop_xnde_tag_.Tid__img , "common", "alt", "src", "width", "height"); + + // 12.2 + // NOTE: is not allowed directly, but the attrib + // whitelist is used from the Parser Object + Ini_nde(Xop_xnde_tag_.Tid__a , "common", "href", "rel", "rev"); // # rel/rev esp. for RDFa + + // 13.2 + // Not usually allowed, but may be used for extension-style hooks + // such as when it is rasterized, or if $wgAllowImageTag is + // true + Ini_nde(Xop_xnde_tag_.Tid__img , "common", "alt", "src", "width", "height", "srcset"); + // Attributes for A/V tags added in T163583 / T133673 + Ini_nde(Xop_xnde_tag_.Tid__audio , "common", "controls", "preload", "width", "height"); + Ini_nde(Xop_xnde_tag_.Tid__video , "common", "poster", "controls", "preload", "width", "height"); + Ini_nde(Xop_xnde_tag_.Tid__source , "common", "type", "src"); + Ini_nde(Xop_xnde_tag_.Tid__track , "common", "type", "src", "srclang", "kind", "label"); + + // 15.2.1 Ini_nde(Xop_xnde_tag_.Tid__tt , "common"); Ini_nde(Xop_xnde_tag_.Tid__b , "common"); Ini_nde(Xop_xnde_tag_.Tid__i , "common"); @@ -110,18 +220,52 @@ public class Xop_xatr_whitelist_mgr { Ini_nde(Xop_xnde_tag_.Tid__strike , "common"); Ini_nde(Xop_xnde_tag_.Tid__s , "common"); Ini_nde(Xop_xnde_tag_.Tid__u , "common"); + + // 15.2.2 Ini_nde(Xop_xnde_tag_.Tid__font , "common", "size", "color", "face"); - Ini_nde(Xop_xnde_tag_.Tid__hr , "common", "noshade", "size", "width"); + // basefont + + // 15.3 + Ini_nde(Xop_xnde_tag_.Tid__hr , "common", "width"); + + + // HTML Ruby annotation text module, simple ruby only. + // https://www.w3.org/TR/html5/text-level-semantics.html#the-ruby-element Ini_nde(Xop_xnde_tag_.Tid__ruby , "common"); + // rbc Ini_nde(Xop_xnde_tag_.Tid__rb , "common"); - Ini_nde(Xop_xnde_tag_.Tid__rt , "common"); Ini_nde(Xop_xnde_tag_.Tid__rp , "common"); + Ini_nde(Xop_xnde_tag_.Tid__rt , "common"); // $merge( $common, [ 'rbspan' ] ), + Ini_nde(Xop_xnde_tag_.Tid__rtc , "common"); + + // MathML root element, where used for extensions + // 'title' may not be 100% valid here; it's XHTML + // https://www.w3.org/TR/REC-MathML/ Ini_nde(Xop_xnde_tag_.Tid__math , "class", "style", "id", "title"); - Ini_nde(Xop_xnde_tag_.Tid__time , "class", "datetime"); + + // HTML 5 section 4.5 + Ini_nde(Xop_xnde_tag_.Tid__figure , "common"); + Ini_nde(Xop_xnde_tag_.Tid__figure_inline, "common"); // T118520 + Ini_nde(Xop_xnde_tag_.Tid__figcaption , "common"); + + // HTML 5 section 4.6 Ini_nde(Xop_xnde_tag_.Tid__bdi , "common"); + + // HTML5 elements, defined by: + // https://html.spec.whatwg.org/multipage/semantics.html#the-data-element Ini_nde(Xop_xnde_tag_.Tid__data , "common", "value"); + Ini_nde(Xop_xnde_tag_.Tid__time , "common", "datetime"); Ini_nde(Xop_xnde_tag_.Tid__mark , "common"); - Ini_nde(Xop_xnde_tag_.Tid__q , "common"); + + // meta and link are only permitted by removeHTMLtags when Microdata + // is enabled so we don't bother adding a conditional to hide these + // Also meta and link are only valid in WikiText as Microdata elements + // (ie: validateTag rejects tags missing the attributes needed for Microdata) + // So we don't bother including $common attributes that have no purpose. + Ini_nde(Xop_xnde_tag_.Tid__meta , "itemprop", "content"); + Ini_nde(Xop_xnde_tag_.Tid__link , "itemprop", "href", "title"); + + // NOTE: not in MW, but needed for "data-sort-type"; check if needed later; DATE:2020-03-08 Ini_all_loose("data"); return this; } diff --git a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag_.java b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag_.java index 0857a27d6..10b82b129 100644 --- a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag_.java +++ b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_tag_.java @@ -146,8 +146,15 @@ public class Xop_xnde_tag_ { , Tid__meta = 121 , Tid__link = 122 , Tid__template_styles = 123 +, Tid__audio = 124 +, Tid__video = 125 +, Tid__track = 126 +, Tid__rtc = 127 +, Tid__figure = 128 +, Tid__figure_inline = 129 +, Tid__figcaption = 130 ; - public static final int Tid__len = 124; + public static final int Tid__len = 131; public static final Xop_xnde_tag[] Ary = new Xop_xnde_tag[Tid__len]; private static Xop_xnde_tag New(int id, String name) { Xop_xnde_tag rv = new Xop_xnde_tag(id, name); @@ -279,5 +286,12 @@ public class Xop_xnde_tag_ { , Tag__meta = New(Tid__meta, "meta") , Tag__link = New(Tid__link, "link") , Tag__template_styles = New(Tid__template_styles, "templatestyles").Xtn_().Html_mkr_(Tag_html_mkr_.Basic(false)) - ; +, Tag__audio = New(Tid__audio, "audio") +, Tag__video = New(Tid__video, "video") +, Tag__track = New(Tid__track, "track") +, Tag__rtc = New(Tid__rtc, "rtc") +, Tag__figure = New(Tid__figure, "figure") +, Tag__figure_inline = New(Tid__figure_inline, "figure-inline") +, Tag__figcaption = New(Tid__figcaption, "figcaption") +; }