Skip to content

Commit

Permalink
fix: parse boolean/enumerated HTML attributes as case-insensitive
Browse files Browse the repository at this point in the history
- add an `HTMUtils` facility to get whether an HTML attribute has a
  case-insensitive value (boolean and enumerated attributes)
- pre-process case-insensitive HTML attributes to lower-case their value
- refactor the pre-processing attribute logic for more clarity
- add a test

Fix #941
  • Loading branch information
rdeltour committed Feb 8, 2019
1 parent 99f882a commit 5b3533a
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 56 deletions.
78 changes: 78 additions & 0 deletions src/main/java/com/adobe/epubcheck/xml/HTMLUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package com.adobe.epubcheck.xml;

import com.google.common.base.Preconditions;

/**
* Utilities for HTML-specific logic.
*
*/
public class HTMLUtils
{
/**
* Returns whether an attribute is defined as having a case-insensitive value in
* HTML. This is notably the case of boolean attributes and enumerated
* attributes.
*
* @param name
* the name of an attribute defined in the HTML specification
* @return <code>true</code> iff the attribute value is case-insensitive
*/
public static boolean isCaseInsensitiveAttribute(String name)
{
switch (Preconditions.checkNotNull(name))
{
case "align":
case "allowfullscreen":
case "allowpaymentrequest":
case "allowusermedia":
case "async":
case "autocapitalize":
case "autocomplete":
case "autofocus":
case "autoplay":
case "checked":
case "contenteditable":
case "controls":
case "crossorigin":
case "default":
case "defer":
case "dir":
case "disabled":
case "draggable":
case "formnovalidate":
case "hidden":
case "http-equiv":
case "ismap":
case "itemscope":
case "kind":
case "loop":
case "multiple":
case "muted":
case "nomodule":
case "novalidate":
case "open":
case "playsinline":
case "preload":
case "readonly":
case "required":
case "reversed":
case "scope":
case "selected":
case "shape":
case "sizes":
case "spellcheck":
case "step":
case "translate":
case "type":
case "typemustmatch":
case "valign":
case "value":
case "wrap":
return true;
default:
return false;
}

}

}
87 changes: 42 additions & 45 deletions src/main/java/com/adobe/epubcheck/xml/XMLParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,8 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.Vector;
Expand Down Expand Up @@ -466,41 +464,12 @@ public void startDocument()
}
}

public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
public void startElement(String namespaceURI, String localName, String qName,
Attributes parsedAttribs)
throws SAXException
{

AttributesImpl attribs = new AttributesImpl(atts);

if ("application/xhtml+xml".equals(context.mimeType)
&& context.version == EPUBVersion.VERSION_3)
{
try
{
int len = attribs.getLength();
List<String> removals = new ArrayList<String>();
for (int i = 0; i < len; i++)
{
if (attribs.getLocalName(i).startsWith("data-"))
{
removals.add(attribs.getQName(i));
}
else if (isCustomNamespaceAttr(attribs.getURI(i)))
{
removals.add(attribs.getQName(i));
}
}
for (String remove : removals)
{
int rmv = attribs.getIndex(remove);
// System.out.println("removing attribute " + attribs.getQName(rmv));
attribs.removeAttribute(rmv);
}
} catch (Exception e)
{
System.err.println("data-* removal exception: " + e.getMessage());
}
}
Attributes attribs = preprocessAttributes(namespaceURI, localName, qName, parsedAttribs);

int vlen = validatorContentHandlers.size();
for (int i = 0; i < vlen; i++)
Expand Down Expand Up @@ -564,23 +533,51 @@ else if (isCustomNamespaceAttr(attribs.getURI(i)))
knownXHTMLContentDocsNamespaces.add(Namespaces.XLINK);
}

private boolean isCustomNamespaceAttr(String nsuri)
private Attributes preprocessAttributes(String elemNamespace, String elemName, String elemQName,
Attributes originalAttribs)
{

if (nsuri == null || nsuri.trim().length() == 0)
{
return false;
}

for (String ns : knownXHTMLContentDocsNamespaces)
AttributesImpl attributes = new AttributesImpl(originalAttribs);
if ("application/xhtml+xml".equals(context.mimeType)
&& context.version == EPUBVersion.VERSION_3)
{
if (ns.equals(nsuri))
try
{
return false;
for (int i = attributes.getLength() - 1; i >= 0; i--)
{
if (isDataAttribute(attributes, i) || isCustomNamespaceAttribute(attributes, i))
{
attributes.removeAttribute(i);
}
else if (Namespaces.XHTML.equals(elemNamespace)
&& isCaseInsensitiveAttribute(attributes, i))
{
attributes.setValue(i, attributes.getValue(i).toLowerCase(Locale.ENGLISH));
}
}
} catch (Exception e)
{
throw new IllegalStateException("data-* removal exception", e);
}
}
return attributes;
}

return true;
private static boolean isDataAttribute(Attributes attributes, int index)
{
return attributes.getLocalName(index).startsWith("data-");
}

private boolean isCustomNamespaceAttribute(Attributes attributes, int index)
{
String ns = attributes.getURI(index);
return (ns != null && ns.trim().length() > 0
&& !knownXHTMLContentDocsNamespaces.contains(ns.trim()));
}

private static boolean isCaseInsensitiveAttribute(Attributes attributes, int index)
{
return (attributes.getURI(index).isEmpty()
&& HTMLUtils.isCaseInsensitiveAttribute(attributes.getLocalName(index)));
}

public void startPrefixMapping(String arg0, String arg1)
Expand Down
14 changes: 12 additions & 2 deletions src/test/java/com/adobe/epubcheck/ops/OPSCheckerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -891,9 +891,19 @@ public void testValidateXHTML301MDInvalid()
}

@Test
public void testValidateXHTML301CustomAttributes()
public void testAttributesInCustomNS()
{
testValidateDocument("xhtml/valid/custom-ns-attrs.xhtml", "application/xhtml+xml",
// test that attribute in a custom namespace are ignored
testValidateDocument("xhtml/valid/attrs-custom-ns.xhtml", "application/xhtml+xml",
EPUBVersion.VERSION_3);
}

@Test
public void testAttributesCaseInsensitive()
{
// test that the value of HTML boolean attributes and enumerated attributes are
// parsed in a case-insensitive manner
testValidateDocument("xhtml/valid/attrs-case-insensitive.xhtml", "application/xhtml+xml",
EPUBVersion.VERSION_3);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<title>Test</title>
<meta charset="UTF-8"/>
</head>
<body>
<h1>Test</h1>
<div hidden="HIDDEN"></div>
</body>
</html>
11 changes: 11 additions & 0 deletions src/test/resources/30/single/xhtml/valid/attrs-custom-ns.xhtml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:foo="https://example.org" lang="en">
<head>
<title>Test</title>
<meta charset="UTF-8" />
</head>
<body>
<h1>Test</h1>
<p foo:bar1="baz" foo:bar2="baz" foo:bar3="baz">custom attribute!</p>
</body>
</html>

This file was deleted.

0 comments on commit 5b3533a

Please sign in to comment.