From 0f46eafb5a01bc9e5d1cb8f78d0f20176c0cfe74 Mon Sep 17 00:00:00 2001 From: Dave Wichers Date: Tue, 11 Apr 2023 19:37:03 -0400 Subject: [PATCH] Update per suggestions from nekohtml maintainer in his PR Neko3 #322. --- pom.xml | 2 +- .../html/scan/AbstractAntiSamyScanner.java | 33 +++++++-------- .../html/scan/AntiSamyDOMScanner.java | 40 ++++++++++--------- .../html/scan/AntiSamySAXScanner.java | 4 +- .../validator/html/scan/MagicSAXFilter.java | 40 +++++++++---------- 5 files changed, 60 insertions(+), 59 deletions(-) diff --git a/pom.xml b/pom.xml index 087bcf7f..38915b68 100644 --- a/pom.xml +++ b/pom.xml @@ -74,7 +74,7 @@ org.htmlunit neko-htmlunit - 3.0.0 + 3.1.0-SNAPSHOT org.apache.httpcomponents.client5 diff --git a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java index 9de704b1..31eafc23 100644 --- a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java @@ -1,25 +1,26 @@ /* - * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2023, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * - * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: * - * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + * Redistributions of source code must retain the above copyright notice, this list of conditions + * and the following disclaimer. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. Neither the name of OWASP nor the names of its + * contributors may be used to endorse or promote products derived from this software without + * specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html.scan; diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java index a5b9edab..420e8e41 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java @@ -1,26 +1,28 @@ /* - * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2023, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * - * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: * - * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + * Redistributions of source code must retain the above copyright notice, this list of conditions + * and the following disclaimer. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. Neither the name of OWASP nor the names of its + * contributors may be used to endorse or promote products derived from this software without + * specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + package org.owasp.validator.html.scan; import java.io.IOException; @@ -224,11 +226,13 @@ static DOMFragmentParser getDomParser() parser.setFeature("http://cyberneko.org/html/features/scanner/style/strip-cdata-delims", false); parser.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections", true); + // cyberneko author removed this block. Why? try { parser.setFeature("http://cyberneko.org/html/features/enforce-strict-attribute-names", true); } catch (SAXNotRecognizedException se) { - // this indicates that the patched nekohtml is not on the - // classpath + // this indicates that the patched nekohtml is not on the classpath + System.out.println( + "DRW: SAXNotRecognizedException for \"http://cyberneko.org/html/features/enforce-strict-attribute-names"); } return parser; } diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java index 03837865..287cfeb6 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2023, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -267,8 +267,6 @@ private static SAXParser getParser() { parser.setFeature("http://xml.org/sax/features/namespaces", false); parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); parser.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections", true); - parser.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true); - parser.setFeature("http://apache.org/xml/features/scanner/notify-builtin-refs", true); parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); return parser; diff --git a/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java b/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java index 17103689..bb541de1 100644 --- a/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java +++ b/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java @@ -1,25 +1,26 @@ /* - * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2023, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * - * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: * - * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + * Redistributions of source code must retain the above copyright notice, this list of conditions + * and the following disclaimer. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. Neither the name of OWASP nor the names of its + * contributors may be used to endorse or promote products derived from this software without + * specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html.scan; @@ -190,12 +191,9 @@ public void endElement(QName element, Augmentations augs) throws XNIException { // "text/css"); // start the CSS element - // super.startElement(element, cssAttributes, new AugmentationsImpl()); - super.startElement(element, cssAttributes, null); + super.startElement(element, cssAttributes, augs); // send the cleaned content - // super.characters(new XMLStringBuffer(results.getCleanHTML()), new - // AugmentationsImpl()); - super.characters(new XMLStringBuffer(results.getCleanHTML()), null); + super.characters(new XMLStringBuffer(results.getCleanHTML()), augs); // end the CSS element super.endElement(element, augs); }