From 020263f576cc96c7fc80710ab2226f543e9b19b5 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Wed, 15 Nov 2023 20:38:47 +0100 Subject: [PATCH] #4302 - Files imported as HTML do not show properly in HTML-based editor - The HtmlFormatSupport now returns the default HTML content policy - When logging masked element, also log the name of the respective policy - Do not pass the body element through the sanitizer - it should always pass --- .../XHtmlXmlDocumentViewControllerImpl.java | 4 ++-- inception/inception-io-html/pom.xml | 8 ++++++++ .../inception/io/html/HtmlFormatSupport.java | 20 ++++++++++++++++++- .../config/HtmlSupportAutoConfiguration.java | 5 +++-- .../sanitizer/SanitizingContentHandler.java | 4 ++-- 5 files changed, 34 insertions(+), 7 deletions(-) diff --git a/inception/inception-external-editor/src/main/java/de/tudarmstadt/ukp/inception/externaleditor/xhtml/XHtmlXmlDocumentViewControllerImpl.java b/inception/inception-external-editor/src/main/java/de/tudarmstadt/ukp/inception/externaleditor/xhtml/XHtmlXmlDocumentViewControllerImpl.java index 47d7da4b70e..32a2cf0e083 100644 --- a/inception/inception-external-editor/src/main/java/de/tudarmstadt/ukp/inception/externaleditor/xhtml/XHtmlXmlDocumentViewControllerImpl.java +++ b/inception/inception-external-editor/src/main/java/de/tudarmstadt/ukp/inception/externaleditor/xhtml/XHtmlXmlDocumentViewControllerImpl.java @@ -163,7 +163,7 @@ public ResponseEntity getDocument(@PathVariable("projectId") long aProje renderHead(doc, rawHandler); - sanitizingHandler.startElement(null, null, BODY, null); + rawHandler.startElement(null, null, BODY, null); if (maybeXmlDocument.isEmpty()) { // Gracefully handle the case that the CAS does not contain any XML structure at all // and show only the document text in this case. @@ -172,7 +172,7 @@ public ResponseEntity getDocument(@PathVariable("projectId") long aProje else { renderXmlContent(doc, sanitizingHandler, aEditor, maybeXmlDocument.get()); } - sanitizingHandler.endElement(null, null, BODY); + rawHandler.endElement(null, null, BODY); rawHandler.endElement(null, null, HTML); diff --git a/inception/inception-io-html/pom.xml b/inception/inception-io-html/pom.xml index 0ec22e6541a..e7dafa2b46b 100644 --- a/inception/inception-io-html/pom.xml +++ b/inception/inception-io-html/pom.xml @@ -37,6 +37,14 @@ de.tudarmstadt.ukp.inception.app inception-model + + de.tudarmstadt.ukp.inception.app + inception-external-editor + + + de.tudarmstadt.ukp.inception.app + inception-support + commons-io diff --git a/inception/inception-io-html/src/main/java/de/tudarmstadt/ukp/inception/io/html/HtmlFormatSupport.java b/inception/inception-io-html/src/main/java/de/tudarmstadt/ukp/inception/io/html/HtmlFormatSupport.java index bac2c041552..9598e0bb7a1 100644 --- a/inception/inception-io-html/src/main/java/de/tudarmstadt/ukp/inception/io/html/HtmlFormatSupport.java +++ b/inception/inception-io-html/src/main/java/de/tudarmstadt/ukp/inception/io/html/HtmlFormatSupport.java @@ -19,20 +19,25 @@ import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; +import java.io.IOException; +import java.util.Optional; + import org.apache.uima.collection.CollectionReaderDescription; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.resource.metadata.TypeSystemDescription; import de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport; import de.tudarmstadt.ukp.clarin.webanno.model.Project; +import de.tudarmstadt.ukp.inception.externaleditor.policy.DefaultHtmlDocumentPolicy; import de.tudarmstadt.ukp.inception.io.html.config.HtmlSupportAutoConfiguration; import de.tudarmstadt.ukp.inception.io.html.dkprocore.HtmlDocumentReader; +import de.tudarmstadt.ukp.inception.support.xml.sanitizer.PolicyCollection; /** * Support for HTML format. *

* This class is exposed as a Spring Component via - * {@link HtmlSupportAutoConfiguration#htmlFormatSupport()}. + * {@link HtmlSupportAutoConfiguration#htmlFormatSupport}. *

*/ public class HtmlFormatSupport @@ -41,6 +46,13 @@ public class HtmlFormatSupport public static final String ID = "htmldoc"; public static final String NAME = "HTML"; + private final DefaultHtmlDocumentPolicy defaultPolicy; + + public HtmlFormatSupport(DefaultHtmlDocumentPolicy aDefaultPolicy) + { + defaultPolicy = aDefaultPolicy; + } + @Override public String getId() { @@ -66,4 +78,10 @@ public CollectionReaderDescription getReaderDescription(Project aProject, { return createReaderDescription(HtmlDocumentReader.class, aTSD); } + + @Override + public Optional getPolicy() throws IOException + { + return Optional.of(defaultPolicy.getPolicy()); + } } diff --git a/inception/inception-io-html/src/main/java/de/tudarmstadt/ukp/inception/io/html/config/HtmlSupportAutoConfiguration.java b/inception/inception-io-html/src/main/java/de/tudarmstadt/ukp/inception/io/html/config/HtmlSupportAutoConfiguration.java index acba639eb25..67ca2380404 100644 --- a/inception/inception-io-html/src/main/java/de/tudarmstadt/ukp/inception/io/html/config/HtmlSupportAutoConfiguration.java +++ b/inception/inception-io-html/src/main/java/de/tudarmstadt/ukp/inception/io/html/config/HtmlSupportAutoConfiguration.java @@ -21,6 +21,7 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import de.tudarmstadt.ukp.inception.externaleditor.policy.DefaultHtmlDocumentPolicy; import de.tudarmstadt.ukp.inception.io.html.HtmlFormatSupport; import de.tudarmstadt.ukp.inception.io.html.LegacyHtmlFormatSupport; @@ -30,9 +31,9 @@ public class HtmlSupportAutoConfiguration @Bean @ConditionalOnProperty(prefix = "format.html", name = "enabled", // havingValue = "true", matchIfMissing = false) - public HtmlFormatSupport htmlFormatSupport() + public HtmlFormatSupport htmlFormatSupport(DefaultHtmlDocumentPolicy aDefaultPolicy) { - return new HtmlFormatSupport(); + return new HtmlFormatSupport(aDefaultPolicy); } @Bean diff --git a/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/SanitizingContentHandler.java b/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/SanitizingContentHandler.java index 75d383a468d..17660734b49 100644 --- a/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/SanitizingContentHandler.java +++ b/inception/inception-support/src/main/java/de/tudarmstadt/ukp/inception/support/xml/sanitizer/SanitizingContentHandler.java @@ -163,13 +163,13 @@ public void endElement(String aUri, String aLocalName, String aQName) throws SAX if (stack.isEmpty()) { if (policies.isDebug() && log.isDebugEnabled()) { - log.debug("Masked elements: {}", maskedElements.stream() // + log.debug("[{}] Masked elements: {}", policies.getName(), maskedElements.stream() // .map(QName::toString) // .sorted() // .collect(toList())); for (var element : maskedAttributes.keySet().stream() .sorted(comparing(QName::getLocalPart)).collect(toList())) { - log.debug("Masked attributes on {}: {}", element, + log.debug("[{}] Masked attributes on {}: {}", policies.getName(), element, maskedAttributes.get(element).stream().map(QName::toString) // .sorted() // .collect(toList()));