From 7ab57ef4485023acaee3e593c257cfaa40b971a2 Mon Sep 17 00:00:00 2001 From: Dave Wichers Date: Mon, 10 Apr 2023 19:45:25 -0400 Subject: [PATCH 1/4] Change all imports to org.htmlunit and org.htmlunit.cyberneko as needed. Change uses of new AugmentationImpl() to just null. It now compiles, but fails horribly during testing. --- pom.xml | 8 +++--- .../html/scan/AntiSamyDOMScanner.java | 4 +-- .../html/scan/AntiSamySAXScanner.java | 2 +- .../validator/html/scan/MagicSAXFilter.java | 26 ++++++++++--------- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/pom.xml b/pom.xml index a9ad8401..087bcf7f 100644 --- a/pom.xml +++ b/pom.xml @@ -57,7 +57,7 @@ 1.12.0 2.11.0 2.0.7 - 4.7.3.3 + 4.7.3.4 4.7.3 @@ -72,9 +72,9 @@ - net.sourceforge.htmlunit + org.htmlunit neko-htmlunit - 2.70.0 + 3.0.0 org.apache.httpcomponents.client5 @@ -263,7 +263,7 @@ org.apache.maven.plugins maven-enforcer-plugin - 3.2.1 + 3.3.0 org.codehaus.mojo diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java index 318f509b..a5b9edab 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java @@ -32,9 +32,9 @@ import java.util.concurrent.ConcurrentLinkedQueue; import java.util.regex.Matcher; import java.util.regex.Pattern; -import net.sourceforge.htmlunit.cyberneko.parsers.DOMFragmentParser; -import net.sourceforge.htmlunit.xerces.dom.DocumentImpl; import org.apache.batik.css.parser.ParseException; +import org.htmlunit.cyberneko.parsers.DOMFragmentParser; +import org.htmlunit.cyberneko.xerces.dom.DocumentImpl; import org.owasp.validator.css.CssScanner; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.Policy; diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java index 65cbe469..03837865 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java @@ -39,7 +39,7 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.sax.SAXResult; import javax.xml.transform.sax.SAXSource; -import net.sourceforge.htmlunit.cyberneko.parsers.SAXParser; +import org.htmlunit.cyberneko.parsers.SAXParser; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.Policy; import org.owasp.validator.html.ScanException; diff --git a/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java b/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java index 8966b46f..17103689 100644 --- a/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java +++ b/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java @@ -26,16 +26,15 @@ import java.util.*; import java.util.regex.Pattern; -import net.sourceforge.htmlunit.cyberneko.filters.DefaultFilter; -import net.sourceforge.htmlunit.xerces.util.AugmentationsImpl; -import net.sourceforge.htmlunit.xerces.util.XMLAttributesImpl; -import net.sourceforge.htmlunit.xerces.util.XMLStringBuffer; -import net.sourceforge.htmlunit.xerces.xni.Augmentations; -import net.sourceforge.htmlunit.xerces.xni.QName; -import net.sourceforge.htmlunit.xerces.xni.XMLAttributes; -import net.sourceforge.htmlunit.xerces.xni.XMLString; -import net.sourceforge.htmlunit.xerces.xni.XNIException; -import net.sourceforge.htmlunit.xerces.xni.parser.XMLDocumentFilter; +import org.htmlunit.cyberneko.filters.DefaultFilter; +import org.htmlunit.cyberneko.xerces.util.XMLAttributesImpl; +import org.htmlunit.cyberneko.xerces.util.XMLStringBuffer; +import org.htmlunit.cyberneko.xerces.xni.Augmentations; +import org.htmlunit.cyberneko.xerces.xni.QName; +import org.htmlunit.cyberneko.xerces.xni.XMLAttributes; +import org.htmlunit.cyberneko.xerces.xni.XMLString; +import org.htmlunit.cyberneko.xerces.xni.XNIException; +import org.htmlunit.cyberneko.xerces.xni.parser.XMLDocumentFilter; import org.owasp.validator.css.CssScanner; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.InternalPolicy; @@ -191,9 +190,12 @@ public void endElement(QName element, Augmentations augs) throws XNIException { // "text/css"); // start the CSS element - super.startElement(element, cssAttributes, new AugmentationsImpl()); + // super.startElement(element, cssAttributes, new AugmentationsImpl()); + super.startElement(element, cssAttributes, null); // send the cleaned content - super.characters(new XMLStringBuffer(results.getCleanHTML()), new AugmentationsImpl()); + // super.characters(new XMLStringBuffer(results.getCleanHTML()), new + // AugmentationsImpl()); + super.characters(new XMLStringBuffer(results.getCleanHTML()), null); // end the CSS element super.endElement(element, augs); } From 0f46eafb5a01bc9e5d1cb8f78d0f20176c0cfe74 Mon Sep 17 00:00:00 2001 From: Dave Wichers Date: Tue, 11 Apr 2023 19:37:03 -0400 Subject: [PATCH 2/4] Update per suggestions from nekohtml maintainer in his PR Neko3 #322. --- pom.xml | 2 +- .../html/scan/AbstractAntiSamyScanner.java | 33 +++++++-------- .../html/scan/AntiSamyDOMScanner.java | 40 ++++++++++--------- .../html/scan/AntiSamySAXScanner.java | 4 +- .../validator/html/scan/MagicSAXFilter.java | 40 +++++++++---------- 5 files changed, 60 insertions(+), 59 deletions(-) diff --git a/pom.xml b/pom.xml index 087bcf7f..38915b68 100644 --- a/pom.xml +++ b/pom.xml @@ -74,7 +74,7 @@ org.htmlunit neko-htmlunit - 3.0.0 + 3.1.0-SNAPSHOT org.apache.httpcomponents.client5 diff --git a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java index 9de704b1..31eafc23 100644 --- a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java @@ -1,25 +1,26 @@ /* - * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2023, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * - * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: * - * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + * Redistributions of source code must retain the above copyright notice, this list of conditions + * and the following disclaimer. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. Neither the name of OWASP nor the names of its + * contributors may be used to endorse or promote products derived from this software without + * specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html.scan; diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java index a5b9edab..420e8e41 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java @@ -1,26 +1,28 @@ /* - * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2023, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * - * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: * - * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + * Redistributions of source code must retain the above copyright notice, this list of conditions + * and the following disclaimer. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. Neither the name of OWASP nor the names of its + * contributors may be used to endorse or promote products derived from this software without + * specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + package org.owasp.validator.html.scan; import java.io.IOException; @@ -224,11 +226,13 @@ static DOMFragmentParser getDomParser() parser.setFeature("http://cyberneko.org/html/features/scanner/style/strip-cdata-delims", false); parser.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections", true); + // cyberneko author removed this block. Why? try { parser.setFeature("http://cyberneko.org/html/features/enforce-strict-attribute-names", true); } catch (SAXNotRecognizedException se) { - // this indicates that the patched nekohtml is not on the - // classpath + // this indicates that the patched nekohtml is not on the classpath + System.out.println( + "DRW: SAXNotRecognizedException for \"http://cyberneko.org/html/features/enforce-strict-attribute-names"); } return parser; } diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java index 03837865..287cfeb6 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2023, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -267,8 +267,6 @@ private static SAXParser getParser() { parser.setFeature("http://xml.org/sax/features/namespaces", false); parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); parser.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections", true); - parser.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true); - parser.setFeature("http://apache.org/xml/features/scanner/notify-builtin-refs", true); parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); return parser; diff --git a/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java b/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java index 17103689..bb541de1 100644 --- a/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java +++ b/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java @@ -1,25 +1,26 @@ /* - * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2023, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * - * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: * - * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + * Redistributions of source code must retain the above copyright notice, this list of conditions + * and the following disclaimer. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. Neither the name of OWASP nor the names of its + * contributors may be used to endorse or promote products derived from this software without + * specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html.scan; @@ -190,12 +191,9 @@ public void endElement(QName element, Augmentations augs) throws XNIException { // "text/css"); // start the CSS element - // super.startElement(element, cssAttributes, new AugmentationsImpl()); - super.startElement(element, cssAttributes, null); + super.startElement(element, cssAttributes, augs); // send the cleaned content - // super.characters(new XMLStringBuffer(results.getCleanHTML()), new - // AugmentationsImpl()); - super.characters(new XMLStringBuffer(results.getCleanHTML()), null); + super.characters(new XMLStringBuffer(results.getCleanHTML()), augs); // end the CSS element super.endElement(element, augs); } From d8a8a20bd4dbb104c438dab8a31d6486f4c963f0 Mon Sep 17 00:00:00 2001 From: Dave Wichers Date: Tue, 11 Apr 2023 20:42:29 -0400 Subject: [PATCH 3/4] Fix test cases to now pass and other minor cleanup in the test class. --- .../html/scan/AntiSamyDOMScanner.java | 8 -- .../validator/html/test/AntiSamyTest.java | 128 ++++++++---------- 2 files changed, 57 insertions(+), 79 deletions(-) diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java index 420e8e41..2ac7264d 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java @@ -226,14 +226,6 @@ static DOMFragmentParser getDomParser() parser.setFeature("http://cyberneko.org/html/features/scanner/style/strip-cdata-delims", false); parser.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections", true); - // cyberneko author removed this block. Why? - try { - parser.setFeature("http://cyberneko.org/html/features/enforce-strict-attribute-names", true); - } catch (SAXNotRecognizedException se) { - // this indicates that the patched nekohtml is not on the classpath - System.out.println( - "DRW: SAXNotRecognizedException for \"http://cyberneko.org/html/features/enforce-strict-attribute-names"); - } return parser; } diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index 4ef4752e..ede9fd65 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1,29 +1,26 @@ /* - * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2023, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: * - * Redistributions of source code must retain the above copyright notice, this list - * of conditions and the following disclaimer. Redistributions in binary form must - * reproduce the above copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of OWASP nor the names of its contributors may be used to endorse - * or promote products derived from this software without specific prior written permission. + * Redistributions of source code must retain the above copyright notice, this list of conditions + * and the following disclaimer. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. Neither the name of OWASP nor the names of its + * contributors may be used to endorse or promote products derived from this software without + * specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html.test; @@ -83,8 +80,7 @@ public class AntiSamyTest { private static final String[] BASE64_BAD_XML_STRINGS = new String[] { - // first string is - // "click here" + // first string is "click here" "PGEgLSBocmVmPSJodHRwOi8vd3d3Lm93YXNwLm9yZyI+Y2xpY2sgaGVyZTwvYT4=", // the rest are randomly generated 300 byte sequences which generate // parser errors, turned into Strings @@ -109,8 +105,7 @@ public class AntiSamyTest { public void setUp() throws Exception { /* - * Load the policy. You may have to change the path to find the Policy - * file for your environment. + * Load the policy. You may have to change the path to find the Policy file for your environment. */ // get Policy instance from a URL. @@ -800,8 +795,7 @@ public void cssAttacks() throws ScanException, PolicyException { } /* - * Test a bunch of strings that have tweaked the XML parsing capabilities of - * NekoHTML. + * Test a bunch of strings that have tweaked the XML parsing capabilities of NekoHTML. */ @Test public void IllegalXML() throws PolicyException { @@ -818,27 +812,29 @@ public void IllegalXML() throws PolicyException { } } - // This fails due to a bug in NekoHTML - // try { - // assertTrue ( - // as.scan("",policy, - // AntiSamy.DOM).getCleanHTML().indexOf("href") - // != -1 ); - // } catch (Exception e) { - // e.printStackTrace(); - // fail("Couldn't parse malformed HTML: " + e.getMessage()); - // } - - // This fails due to a bug in NekoHTML - // try { - // assertTrue ( - // as.scan("",policy, - // AntiSamy.DOM).getCleanHTML().indexOf("href") - // != -1 ); - // } catch (Exception e) { - // e.printStackTrace(); - // fail("Couldn't parse malformed HTML: " + e.getMessage()); - // } + // This used to fail due to a bug in NekoHTML, but now works in the new ported version. + try { + assertTrue( + as.scan("", policy, AntiSamy.DOM) + .getCleanHTML() + .indexOf("href") + != -1); + } catch (Exception e) { + e.printStackTrace(); + fail("Couldn't parse malformed HTML: " + e.getMessage()); + } + + // This used to fail due to a bug in NekoHTML, but now works in the new ported version. + try { + assertTrue( + as.scan("", policy, AntiSamy.DOM) + .getCleanHTML() + .indexOf("href") + != -1); + } catch (Exception e) { + e.printStackTrace(); + fail("Couldn't parse malformed HTML: " + e.getMessage()); + } try { assertTrue(as.scan("