Skip to content

Commit

Permalink
Merge pull request #4329 from inception-project/bugfix/4328-HTML-file…
Browse files Browse the repository at this point in the history
…s-are-not-rendered-if-they-use-the-HTML-namespace

#4328 - HTML files are not rendered if they use the HTML namespace
  • Loading branch information
reckart authored Nov 30, 2023
2 parents 4b1b180 + 19b9fd8 commit 1d370df
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ version: 1.0
case_sensitive: false
default_attribute_action: DROP
default_element_action: DROP
default_namespace: http://www.w3.org/1999/xhtml
debug: true
policies:
- action: PASS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ void thatOverrideFileIsPickedUp(@TempDir Path aTemp) throws Exception

var sut = new DefaultHtmlDocumentPolicy();

assertThat(sut.getPolicy().getElementPolicies()).hasSize(74);
assertThat(sut.getPolicy().getElementPolicies()).hasSize(148);

write(policyFile.toFile(), "policies: []", UTF_8);
assertThat(policyFile).exists();
Expand All @@ -54,6 +54,6 @@ void thatOverrideFileIsPickedUp(@TempDir Path aTemp) throws Exception

Files.delete(policyFile);
assertThat(policyFile).doesNotExist();
assertThat(sut.getPolicy().getElementPolicies()).hasSize(74);
assertThat(sut.getPolicy().getElementPolicies()).hasSize(148);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public class ExternalPolicyCollection
private boolean debug;
private ElementAction defaultElementAction;
private AttributeAction defaultAttributeAction;
private String defaultNamespace;

public String getName()
{
Expand Down Expand Up @@ -102,4 +103,14 @@ public AttributeAction getDefaultAttributeAction()
{
return defaultAttributeAction;
}

public String getDefaultNamespace()
{
return defaultNamespace;
}

public void setDefaultNamespace(String aDefaultNamespace)
{
defaultNamespace = aDefaultNamespace;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import static de.tudarmstadt.ukp.inception.support.xml.XmlParserUtils.caseInsensitiveQNameComparator;
import static de.tudarmstadt.ukp.inception.support.xml.sanitizer.ElementAction.PASS;
import static org.apache.commons.lang3.StringUtils.isEmpty;

import java.lang.invoke.MethodHandles;
import java.util.LinkedHashMap;
Expand Down Expand Up @@ -46,6 +47,8 @@ public class PolicyCollectionBuilder
private ElementAction defaultElementAction = ElementAction.DROP;
private AttributeAction defaultAttributeAction = AttributeAction.DROP;

private String defaultNamespace;

public static PolicyCollectionBuilder caseSensitive()
{
return new PolicyCollectionBuilder(LinkedHashMap::new);
Expand All @@ -66,6 +69,12 @@ public PolicyCollectionBuilder(Supplier<? extends Map> aMapSupplier)
globalAttributePolicies = mapSupplier.get();
}

public PolicyCollectionBuilder defaultNamespace(String aDefaultNamespace)
{
defaultNamespace = aDefaultNamespace;
return this;
}

public PolicyCollectionBuilder defaultAttributeAction(AttributeAction aDefaultAttributeAction)
{
defaultAttributeAction = aDefaultAttributeAction;
Expand Down Expand Up @@ -137,6 +146,10 @@ PolicyCollectionBuilder elementPolicy(QName aElement, ElementAction aAction)
elementPolicyBuilders.put(aElement,
new ElementPolicyBuilder(aElement, aAction, mapSupplier));

if (isEmpty(aElement.getNamespaceURI()) && defaultNamespace != null) {
elementPolicy(new QName(defaultNamespace, aElement.getLocalPart()), aAction);
}

return this;
}

Expand Down Expand Up @@ -201,6 +214,12 @@ void attributePolicy(QName aElementName, QName aAttributeName, AttributePolicy a
aElementName, aAttributeName, oldPolicy, aPolicy);
}
}

if (isEmpty(aElementName.getNamespaceURI()) && isEmpty(aAttributeName.getNamespaceURI())
&& defaultNamespace != null) {
attributePolicy(new QName(defaultNamespace, aElementName.getLocalPart()),
new QName(defaultNamespace, aAttributeName.getLocalPart()), aPolicy);
}
}

public void allowAttribute(QName aAttribute, Pattern aPattern)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ public static PolicyCollection loadPolicies(InputStream aIs) throws IOException
policyCollectionBuilder
.defaultAttributeAction(externalCollection.getDefaultAttributeAction());
}
if (externalCollection.getDefaultNamespace() != null) {
policyCollectionBuilder.defaultNamespace(externalCollection.getDefaultNamespace());
}

for (ExternalPolicy policy : externalCollection.getPolicies()) {
var isElementPolicy = policy.getElements() != null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ public class SanitizingContentHandler
private final Logger log = LoggerFactory.getLogger(getClass());

private static final String MASKED = "MASKED-";
private static final String NO_LOCAL_NAME = "NO-LOCAL-NAME";
private static final String XMLNS = "xmlns";
private static final String XMLNS_PREFIX = "xmlns:";

Expand Down Expand Up @@ -119,15 +120,20 @@ private void startElement(QName aElement, Attributes aAtts, Optional<ElementPoli
throws SAXException
{
QName element = aElement;
ElementAction action = aAction;

if ((aAction == ElementAction.DROP || aAction == ElementAction.SKIP)
&& policies.isDebug()) {
if (StringUtils.isBlank(element.getLocalPart())) {
action = ElementAction.SKIP;
element = new QName(element.getNamespaceURI(), NO_LOCAL_NAME, "");
}

if ((action == ElementAction.DROP || action == ElementAction.SKIP) && policies.isDebug()) {
element = maskElement(element);
}

stack.push(new Frame(element, aPolicy, aAction, aLocalNamespaces));
stack.push(new Frame(element, aPolicy, action, aLocalNamespaces));

if (aAction == ElementAction.PASS || policies.isDebug()) {
if (action == ElementAction.PASS || policies.isDebug()) {
super.startElement(element.getNamespaceURI(), element.getLocalPart(), getQName(element),
aAtts);
}
Expand Down Expand Up @@ -244,10 +250,15 @@ private void sanitizeAttribute(AttributesImpl aSanitizedAttributes, QName aEleme
var action = policies.forAttribute(aElement, attribute, type, value)
.orElse(policies.getDefaultAttributeAction());

if ("xmlns".equals(attribute.getPrefix())) {
if (XMLNS.equals(attribute.getPrefix())) {
action = AttributeAction.PASS;
}

if (StringUtils.isBlank(attribute.getLocalPart())) {
action = AttributeAction.DROP;
attribute = new QName(attribute.getNamespaceURI(), NO_LOCAL_NAME, "");
}

switch (action) {
case PASS:
aSanitizedAttributes.addAttribute(uri, localName, qName, type, value);
Expand Down

0 comments on commit 1d370df

Please sign in to comment.