From 19b9fd84eb768b2e5858c9f90cc36ff7dcf1e1c1 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Wed, 29 Nov 2023 22:22:59 +0100 Subject: [PATCH] #4328 - HTML files are not rendered if they use the HTML namespace - Add "defaultNamespace" setting to policy to add a default namespace to all elements defined in the policy (also allowing there non-NS versions) - Fix cases where an element or attributeconsists only of a prefix and no local part (we filter those out) --- .../policy/DefaultHtmlDocumentPolicy.yaml | 1 + .../policy/DefaultHtmlDocumentPolicyTest.java | 4 ++-- .../sanitizer/ExternalPolicyCollection.java | 11 ++++++++++ .../sanitizer/PolicyCollectionBuilder.java | 19 +++++++++++++++++ .../sanitizer/PolicyCollectionIOUtils.java | 3 +++ .../sanitizer/SanitizingContentHandler.java | 21 ++++++++++++++----- 6 files changed, 52 insertions(+), 7 deletions(-) diff --git a/inception/inception-external-editor/src/main/java/de/tudarmstadt/ukp/inception/externaleditor/policy/DefaultHtmlDocumentPolicy.yaml b/inception/inception-external-editor/src/main/java/de/tudarmstadt/ukp/inception/externaleditor/policy/DefaultHtmlDocumentPolicy.yaml index 3ee4c225e87..7d432f8c05e 100644 --- a/inception/inception-external-editor/src/main/java/de/tudarmstadt/ukp/inception/externaleditor/policy/DefaultHtmlDocumentPolicy.yaml +++ b/inception/inception-external-editor/src/main/java/de/tudarmstadt/ukp/inception/externaleditor/policy/DefaultHtmlDocumentPolicy.yaml @@ -19,6 +19,7 @@ version: 1.0 case_sensitive: false default_attribute_action: DROP default_element_action: DROP +default_namespace: http://www.w3.org/1999/xhtml debug: true policies: - action: PASS diff --git a/inception/inception-external-editor/src/test/java/de/tudarmstadt/ukp/inception/externaleditor/policy/DefaultHtmlDocumentPolicyTest.java b/inception/inception-external-editor/src/test/java/de/tudarmstadt/ukp/inception/externaleditor/policy/DefaultHtmlDocumentPolicyTest.java index 9b3f885a3aa..0193875ca14 100644 --- a/inception/inception-external-editor/src/test/java/de/tudarmstadt/ukp/inception/externaleditor/policy/DefaultHtmlDocumentPolicyTest.java +++ b/inception/inception-external-editor/src/test/java/de/tudarmstadt/ukp/inception/externaleditor/policy/DefaultHtmlDocumentPolicyTest.java @@ -41,7 +41,7 @@ void thatOverrideFileIsPickedUp(@TempDir Path aTemp) throws Exception var sut = new DefaultHtmlDocumentPolicy(); - assertThat(sut.getPolicy().getElementPolicies()).hasSize(74); + assertThat(sut.getPolicy().getElementPolicies()).hasSize(148); write(policyFile.toFile(), "policies: []", UTF_8); assertThat(policyFile).exists(); @@ -54,6 +54,6 @@ void thatOverrideFileIsPickedUp(@TempDir Path aTemp) throws Exception Files.delete(policyFile); assertThat(policyFile).doesNotExist(); - assertThat(sut.getPolicy().getElementPolicies()).hasSize(74); + assertThat(sut.getPolicy().getElementPolicies()).hasSize(148); } } diff --git a/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/ExternalPolicyCollection.java b/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/ExternalPolicyCollection.java index d9eae804b02..eb93013d620 100644 --- a/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/ExternalPolicyCollection.java +++ b/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/ExternalPolicyCollection.java @@ -32,6 +32,7 @@ public class ExternalPolicyCollection private boolean debug; private ElementAction defaultElementAction; private AttributeAction defaultAttributeAction; + private String defaultNamespace; public String getName() { @@ -102,4 +103,14 @@ public AttributeAction getDefaultAttributeAction() { return defaultAttributeAction; } + + public String getDefaultNamespace() + { + return defaultNamespace; + } + + public void setDefaultNamespace(String aDefaultNamespace) + { + defaultNamespace = aDefaultNamespace; + } } diff --git a/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/PolicyCollectionBuilder.java b/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/PolicyCollectionBuilder.java index 4e103d62466..949e5916498 100644 --- a/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/PolicyCollectionBuilder.java +++ b/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/PolicyCollectionBuilder.java @@ -19,6 +19,7 @@ import static de.tudarmstadt.ukp.inception.support.xml.XmlParserUtils.caseInsensitiveQNameComparator; import static de.tudarmstadt.ukp.inception.support.xml.sanitizer.ElementAction.PASS; +import static org.apache.commons.lang3.StringUtils.isEmpty; import java.lang.invoke.MethodHandles; import java.util.LinkedHashMap; @@ -46,6 +47,8 @@ public class PolicyCollectionBuilder private ElementAction defaultElementAction = ElementAction.DROP; private AttributeAction defaultAttributeAction = AttributeAction.DROP; + private String defaultNamespace; + public static PolicyCollectionBuilder caseSensitive() { return new PolicyCollectionBuilder(LinkedHashMap::new); @@ -66,6 +69,12 @@ public PolicyCollectionBuilder(Supplier aMapSupplier) globalAttributePolicies = mapSupplier.get(); } + public PolicyCollectionBuilder defaultNamespace(String aDefaultNamespace) + { + defaultNamespace = aDefaultNamespace; + return this; + } + public PolicyCollectionBuilder defaultAttributeAction(AttributeAction aDefaultAttributeAction) { defaultAttributeAction = aDefaultAttributeAction; @@ -137,6 +146,10 @@ PolicyCollectionBuilder elementPolicy(QName aElement, ElementAction aAction) elementPolicyBuilders.put(aElement, new ElementPolicyBuilder(aElement, aAction, mapSupplier)); + if (isEmpty(aElement.getNamespaceURI()) && defaultNamespace != null) { + elementPolicy(new QName(defaultNamespace, aElement.getLocalPart()), aAction); + } + return this; } @@ -201,6 +214,12 @@ void attributePolicy(QName aElementName, QName aAttributeName, AttributePolicy a aElementName, aAttributeName, oldPolicy, aPolicy); } } + + if (isEmpty(aElementName.getNamespaceURI()) && isEmpty(aAttributeName.getNamespaceURI()) + && defaultNamespace != null) { + attributePolicy(new QName(defaultNamespace, aElementName.getLocalPart()), + new QName(defaultNamespace, aAttributeName.getLocalPart()), aPolicy); + } } public void allowAttribute(QName aAttribute, Pattern aPattern) diff --git a/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/PolicyCollectionIOUtils.java b/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/PolicyCollectionIOUtils.java index e72d36ab974..dc09a7c2c1e 100644 --- a/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/PolicyCollectionIOUtils.java +++ b/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/PolicyCollectionIOUtils.java @@ -53,6 +53,9 @@ public static PolicyCollection loadPolicies(InputStream aIs) throws IOException policyCollectionBuilder .defaultAttributeAction(externalCollection.getDefaultAttributeAction()); } + if (externalCollection.getDefaultNamespace() != null) { + policyCollectionBuilder.defaultNamespace(externalCollection.getDefaultNamespace()); + } for (ExternalPolicy policy : externalCollection.getPolicies()) { var isElementPolicy = policy.getElements() != null; diff --git a/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/SanitizingContentHandler.java b/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/SanitizingContentHandler.java index 9e042094831..20a1b53cfe5 100644 --- a/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/SanitizingContentHandler.java +++ b/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/SanitizingContentHandler.java @@ -52,6 +52,7 @@ public class SanitizingContentHandler private final Logger log = LoggerFactory.getLogger(getClass()); private static final String MASKED = "MASKED-"; + private static final String NO_LOCAL_NAME = "NO-LOCAL-NAME"; private static final String XMLNS = "xmlns"; private static final String XMLNS_PREFIX = "xmlns:"; @@ -119,15 +120,20 @@ private void startElement(QName aElement, Attributes aAtts, Optional