From c90f4439624e797e54d1b75c87b7e51299fa0001 Mon Sep 17 00:00:00 2001 From: Ronald Brill Date: Tue, 11 Apr 2023 08:34:12 +0200 Subject: [PATCH 1/3] step1 --- pom.xml | 4 ++-- .../html/scan/AntiSamyDOMScanner.java | 4 ++-- .../html/scan/AntiSamySAXScanner.java | 4 +--- .../validator/html/scan/MagicSAXFilter.java | 23 +++++++++---------- 4 files changed, 16 insertions(+), 19 deletions(-) diff --git a/pom.xml b/pom.xml index a9ad8401..8bd71b30 100644 --- a/pom.xml +++ b/pom.xml @@ -72,9 +72,9 @@ - net.sourceforge.htmlunit + org.htmlunit neko-htmlunit - 2.70.0 + 3.1.0-SNAPSHOT org.apache.httpcomponents.client5 diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java index 318f509b..f7dec445 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java @@ -32,8 +32,8 @@ import java.util.concurrent.ConcurrentLinkedQueue; import java.util.regex.Matcher; import java.util.regex.Pattern; -import net.sourceforge.htmlunit.cyberneko.parsers.DOMFragmentParser; -import net.sourceforge.htmlunit.xerces.dom.DocumentImpl; +import org.htmlunit.cyberneko.parsers.DOMFragmentParser; +import org.htmlunit.cyberneko.xerces.dom.DocumentImpl; import org.apache.batik.css.parser.ParseException; import org.owasp.validator.css.CssScanner; import org.owasp.validator.html.CleanResults; diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java index 65cbe469..374f1e16 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java @@ -39,7 +39,7 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.sax.SAXResult; import javax.xml.transform.sax.SAXSource; -import net.sourceforge.htmlunit.cyberneko.parsers.SAXParser; +import org.htmlunit.cyberneko.parsers.SAXParser; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.Policy; import org.owasp.validator.html.ScanException; @@ -267,8 +267,6 @@ private static SAXParser getParser() { parser.setFeature("http://xml.org/sax/features/namespaces", false); parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); parser.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections", true); - parser.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true); - parser.setFeature("http://apache.org/xml/features/scanner/notify-builtin-refs", true); parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); return parser; diff --git a/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java b/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java index 8966b46f..0c4d87b4 100644 --- a/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java +++ b/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java @@ -26,16 +26,15 @@ import java.util.*; import java.util.regex.Pattern; -import net.sourceforge.htmlunit.cyberneko.filters.DefaultFilter; -import net.sourceforge.htmlunit.xerces.util.AugmentationsImpl; -import net.sourceforge.htmlunit.xerces.util.XMLAttributesImpl; -import net.sourceforge.htmlunit.xerces.util.XMLStringBuffer; -import net.sourceforge.htmlunit.xerces.xni.Augmentations; -import net.sourceforge.htmlunit.xerces.xni.QName; -import net.sourceforge.htmlunit.xerces.xni.XMLAttributes; -import net.sourceforge.htmlunit.xerces.xni.XMLString; -import net.sourceforge.htmlunit.xerces.xni.XNIException; -import net.sourceforge.htmlunit.xerces.xni.parser.XMLDocumentFilter; +import org.htmlunit.cyberneko.filters.DefaultFilter; +import org.htmlunit.cyberneko.xerces.util.XMLAttributesImpl; +import org.htmlunit.cyberneko.xerces.util.XMLStringBuffer; +import org.htmlunit.cyberneko.xerces.xni.Augmentations; +import org.htmlunit.cyberneko.xerces.xni.QName; +import org.htmlunit.cyberneko.xerces.xni.XMLAttributes; +import org.htmlunit.cyberneko.xerces.xni.XMLString; +import org.htmlunit.cyberneko.xerces.xni.XNIException; +import org.htmlunit.cyberneko.xerces.xni.parser.XMLDocumentFilter; import org.owasp.validator.css.CssScanner; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.InternalPolicy; @@ -191,9 +190,9 @@ public void endElement(QName element, Augmentations augs) throws XNIException { // "text/css"); // start the CSS element - super.startElement(element, cssAttributes, new AugmentationsImpl()); + super.startElement(element, cssAttributes, null); // send the cleaned content - super.characters(new XMLStringBuffer(results.getCleanHTML()), new AugmentationsImpl()); + super.characters(new XMLStringBuffer(results.getCleanHTML()), null); // end the CSS element super.endElement(element, augs); } From 8dbade8e41cd1939109e79de8be95944a1cdeb6e Mon Sep 17 00:00:00 2001 From: Ronald Brill Date: Tue, 11 Apr 2023 13:58:03 +0200 Subject: [PATCH 2/3] improve the code a bit --- .../java/org/owasp/validator/html/scan/MagicSAXFilter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java b/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java index 0c4d87b4..3fd9028c 100644 --- a/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java +++ b/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java @@ -190,9 +190,9 @@ public void endElement(QName element, Augmentations augs) throws XNIException { // "text/css"); // start the CSS element - super.startElement(element, cssAttributes, null); + super.startElement(element, cssAttributes, augs); // send the cleaned content - super.characters(new XMLStringBuffer(results.getCleanHTML()), null); + super.characters(new XMLStringBuffer(results.getCleanHTML()), augs); // end the CSS element super.endElement(element, augs); } From dc7b6469513b957c405f2e1b8395be3d75fb5cf2 Mon Sep 17 00:00:00 2001 From: Ronald Brill Date: Tue, 11 Apr 2023 13:58:28 +0200 Subject: [PATCH 3/3] this feature is no longer supported --- .../org/owasp/validator/html/scan/AntiSamyDOMScanner.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java index f7dec445..5e1d924c 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java @@ -223,13 +223,6 @@ static DOMFragmentParser getDomParser() parser.setFeature("http://cyberneko.org/html/features/scanner/style/strip-cdata-delims", false); parser.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections", true); - - try { - parser.setFeature("http://cyberneko.org/html/features/enforce-strict-attribute-names", true); - } catch (SAXNotRecognizedException se) { - // this indicates that the patched nekohtml is not on the - // classpath - } return parser; }