From aa27b671bbf317039f8f12e2138285c7c68979f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sat, 12 Mar 2022 21:08:23 -0300 Subject: [PATCH 01/11] Use errors list passed to CssHandler constructor --- .../org/owasp/validator/css/CssHandler.java | 2 +- .../validator/html/test/AntiSamyTest.java | 38 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/owasp/validator/css/CssHandler.java b/src/main/java/org/owasp/validator/css/CssHandler.java index a6081a82..79f454b7 100644 --- a/src/main/java/org/owasp/validator/css/CssHandler.java +++ b/src/main/java/org/owasp/validator/css/CssHandler.java @@ -167,7 +167,7 @@ public CssHandler(Policy policy, List errorMessages, ResourceBundle mess * the tag name associated with this inline style */ public CssHandler(Policy policy, List errorMessages, ResourceBundle messages, String tagName) { - this(policy, null, new ArrayList(), tagName, messages); + this(policy, null, errorMessages, tagName, messages); } /** diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index e15a2bc7..ec570d73 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1662,5 +1662,43 @@ public void testLeadingDashOnPropertyName() throws ScanException, PolicyExceptio assertThat(as.scan(input, revised, AntiSamy.DOM).getCleanHTML(), both(containsString("-webkit-border-radius")).and(containsString("-moz-border-radius"))); assertThat(as.scan(input, revised, AntiSamy.SAX).getCleanHTML(), both(containsString("-webkit-border-radius")).and(containsString("-moz-border-radius"))); } + + @Test + public void testScansWithDifferentPolicyLoading() throws ScanException, PolicyException, URISyntaxException { + final String input = "text"; + // Preload policy, do not specify scan type. + AntiSamy asInstance = new AntiSamy(policy); + assertThat(asInstance.scan(input).getCleanHTML(), is(input)); + // Pass policy, assume DOM scan type. + assertThat(asInstance.scan(input, policy).getCleanHTML(), is(input)); + // Pass policy as File. + File policyFile = new File(getClass().getResource("/antisamy.xml").toURI()); + assertThat(asInstance.scan(input, policyFile).getCleanHTML(), is(input)); + // Pass policy filename. + String path = getClass().getResource("/antisamy.xml").getPath(); + path = System.getProperty("file.separator").equals("\\") && path.startsWith("/") ? path.substring(1) : path; + assertThat(asInstance.scan(input, path).getCleanHTML(), is(input)); + // No preloaded nor passed policy, expected to fail. + try { + as.scan(input, null, AntiSamy.DOM); + fail("Scan with no policy must have thrown an exception."); + } catch (PolicyException e) { + // An error is expected. Pass. + } + } + + @Test + public void testGithubIssue151() throws ScanException, PolicyException { + // Concern is error messages when parsing stylesheets are no longer returned in AntiSamy 1.6.5 + String input = ""; + + CleanResults result = as.scan(input, policy, AntiSamy.DOM); + assertThat(result.getErrorMessages().size(), is(1)); + assertThat(result.getCleanHTML(), both(containsString("img")).and(not(containsString("CURSOR")))); + + result = as.scan(input, policy, AntiSamy.SAX); + assertThat(result.getErrorMessages().size(), is(1)); + assertThat(result.getCleanHTML(), both(containsString("img")).and(not(containsString("CURSOR")))); + } } From cd011691bd73a91214daf22b66bca52ae8a9116e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= <30912689+spassarop@users.noreply.github.com> Date: Sun, 13 Mar 2022 13:59:32 -0300 Subject: [PATCH 02/11] Edit copyright date on AntiSamyTest.java --- src/test/java/org/owasp/validator/html/test/AntiSamyTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index ec570d73..979fe7e9 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2021, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * From bc8779bc92b434a2ca6da5d2f138e50eb397aeb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Thu, 3 Mar 2022 20:33:32 -0300 Subject: [PATCH 03/11] Add require-closing-tags to default schema With tests to verify it is accepted and parsed in the policy. --- src/main/resources/antisamy.xsd | 5 +- .../owasp/validator/html/test/PolicyTest.java | 76 ++++++++++++++++++- 2 files changed, 75 insertions(+), 6 deletions(-) diff --git a/src/main/resources/antisamy.xsd b/src/main/resources/antisamy.xsd index 40d9502b..6d82fd23 100644 --- a/src/main/resources/antisamy.xsd +++ b/src/main/resources/antisamy.xsd @@ -13,7 +13,8 @@ - + + @@ -65,7 +66,7 @@ - + diff --git a/src/test/java/org/owasp/validator/html/test/PolicyTest.java b/src/test/java/org/owasp/validator/html/test/PolicyTest.java index 1d95b485..e91cbfa6 100644 --- a/src/test/java/org/owasp/validator/html/test/PolicyTest.java +++ b/src/test/java/org/owasp/validator/html/test/PolicyTest.java @@ -72,9 +72,9 @@ public class PolicyTest { private static final String FOOTER = ""; // Returns a valid policy file with the specified allowedEmptyTags - private String assembleFile(String allowedEmptyTagsSection) { + private String assembleFile(String finalTagsSection) { return HEADER + DIRECTIVES + COMMON_REGEXPS + COMMON_ATTRIBUTES + GLOBAL_TAG_ATTRIBUTES + DYNAMIC_TAG_ATTRIBUTES + TAG_RULES + CSS_RULES + - allowedEmptyTagsSection + FOOTER; + finalTagsSection + FOOTER; } @Before @@ -126,14 +126,64 @@ public void testGetAllowedEmptyTags_emptySection() throws PolicyException { @Test public void testGetAllowedEmptyTags_NoSection() throws PolicyException { String allowedEmptyTagsSection = ""; - String policyFile = assembleFile(allowedEmptyTagsSection); policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); assertTrue(policy.getAllowedEmptyTags().size() == Constants.defaultAllowedEmptyTags.size()); } - + + @Test + public void testGetRequireClosingTags() throws PolicyException { + String requireClosingTagsSection = "\n" + + " \n" + + " \n" + + " \n" + + " \n" + + "\n"; + String policyFile = assembleFile(requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + TagMatcher actualTags = policy.getRequiresClosingTags(); + + assertTrue(actualTags.matches("td")); + assertTrue(actualTags.matches("span")); + } + + @Test + public void testGetRequireClosingTags_emptyList() throws PolicyException { + String requireClosingTagsSection = "\n" + + " \n" + + " \n" + + "\n"; + String policyFile = assembleFile(requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + assertEquals(0, policy.getRequiresClosingTags().size()); + } + + @Test + public void testGetRequireClosingTags_emptySection() throws PolicyException { + String requireClosingTagsSection = "\n" + "\n"; + String policyFile = assembleFile(requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + assertEquals(0, policy.getRequiresClosingTags().size()); + } + + @Test + public void testGetRequireClosingTags_NoSection() throws PolicyException { + String requireClosingTagsSection = ""; + String policyFile = assembleFile(requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + assertTrue(policy.getRequiresClosingTags().size() == Constants.defaultRequireClosingTags.size()); + } + @Test public void testInvalidPolicies() { // Default is to now enforce schema validation on policy files. @@ -295,6 +345,24 @@ public void testSchemaValidationToggleWithInclude() { } } + @Test + public void testSchemaValidationWithOptionallyDefinedTags() throws PolicyException { + String allowedEmptyTagsSection = "\n" + + " \n" + + " \n" + + " \n" + + "\n"; + String requireClosingTagsSection = "\n" + + " \n" + + " \n" + + " \n" + + "\n"; + String policyFile = assembleFile(allowedEmptyTagsSection + requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + // If it reaches this point, it passed schema validation, which is what we want. + } + @Test public void testGithubIssue66() { // Concern is that LSEP characters are not being considered on .* pattern From 578d1f99b5a8d96cbeedef5970eee97fbeb23d27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sat, 26 Mar 2022 12:15:01 -0300 Subject: [PATCH 04/11] Add missing imports on AntiSamyTest.java --- src/test/java/org/owasp/validator/html/test/AntiSamyTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index 979fe7e9..f74c9f20 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -44,11 +44,13 @@ import org.junit.Before; import org.junit.Test; +import java.io.File; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.io.StringWriter; import java.io.Writer; +import java.net.URISyntaxException; import java.net.URL; import java.util.Arrays; import java.util.Collections; From 0199e7e194dba5e7d7197703f43ebe22401e61ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sun, 27 Mar 2022 11:04:31 -0300 Subject: [PATCH 05/11] Support multiple children handling on style tags --- .../html/scan/AntiSamyDOMScanner.java | 28 +++++++++++++------ .../validator/html/test/AntiSamyTest.java | 14 ++++++++++ .../owasp/validator/html/test/TestPolicy.java | 2 +- 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java index eb7ffaab..a2ed7397 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java @@ -407,10 +407,17 @@ private boolean processStyleTag(Element ele, Node parentNode) { CssScanner styleScanner = new CssScanner(policy, messages, policy.isEmbedStyleSheets()); try { - Node firstChild = ele.getFirstChild(); - if (firstChild != null) { + if (ele.getChildNodes().getLength() > 0) { + String toScan = ""; + + for (int i = 0; i < ele.getChildNodes().getLength(); i++) { + Node childNode = ele.getChildNodes().item(i); + if (!toScan.isEmpty()){ + toScan += "\n"; + } + toScan += childNode.getTextContent(); + } - String toScan = firstChild.getNodeValue(); CleanResults cr = styleScanner.scanStyleSheet(toScan, policy.getMaxInputSize()); errorMessages.addAll(cr.getErrorMessages()); @@ -422,12 +429,17 @@ private boolean processStyleTag(Element ele, Node parentNode) { * break all CSS. To prevent that, we have this check. */ - final String cleanHTML = cr.getCleanHTML(); + String cleanHTML = cr.getCleanHTML(); + cleanHTML = cleanHTML == null || cleanHTML.equals("") ? "/* */" : cleanHTML; - if (cleanHTML == null || cleanHTML.equals("")) { - firstChild.setNodeValue("/* */"); - } else { - firstChild.setNodeValue(cleanHTML); + ele.getFirstChild().setNodeValue(cleanHTML); + /* + * Remove every other node after cleaning CSS, there will + * be only one node in the end, as it always should have. + */ + for (int i = 1; i < ele.getChildNodes().getLength(); i++) { + Node childNode = ele.getChildNodes().item(i); + ele.removeChild(childNode); } } diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index f74c9f20..d1ab20bb 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1702,5 +1702,19 @@ public void testGithubIssue151() throws ScanException, PolicyException { assertThat(result.getErrorMessages().size(), is(1)); assertThat(result.getCleanHTML(), both(containsString("img")).and(not(containsString("CURSOR")))); } + + @Test + public void testSmuggledTagsInStyleContent() throws ScanException, PolicyException { + // HTML tags may be smuggled into a style tag after parsing input to an internal representation. + // If that happens, they should be treated as text content and not as children nodes. + + Policy revised = policy.cloneWithDirective(Policy.USE_XHTML,"true"); + assertThat(as.scan("test", revised, AntiSamy.DOM).getCleanHTML(), not(containsString("javascript"))); + assertThat(as.scan("test", revised, AntiSamy.SAX).getCleanHTML(), not(containsString("javascript"))); + + Policy revised2 = policy.cloneWithDirective(Policy.USE_XHTML,"false"); + assertThat(as.scan("Walert(1)", revised2, AntiSamy.DOM).getCleanHTML(), not(containsString("script"))); + assertThat(as.scan("Walert(1)", revised2, AntiSamy.SAX).getCleanHTML(), not(containsString("script"))); + } } diff --git a/src/test/java/org/owasp/validator/html/test/TestPolicy.java b/src/test/java/org/owasp/validator/html/test/TestPolicy.java index 4b71ebbb..435175bc 100644 --- a/src/test/java/org/owasp/validator/html/test/TestPolicy.java +++ b/src/test/java/org/owasp/validator/html/test/TestPolicy.java @@ -43,7 +43,7 @@ */ public class TestPolicy extends InternalPolicy { - protected TestPolicy(Policy.ParseContext parseContext) throws PolicyException { + protected TestPolicy(Policy.ParseContext parseContext) { super(parseContext); } From 4b4df8bd61a859704d1469e9c996ea3fbbce4cd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Tue, 29 Mar 2022 21:02:54 -0300 Subject: [PATCH 06/11] Add deprecated annotations for XHTML --- src/main/java/org/owasp/validator/html/InternalPolicy.java | 2 ++ src/main/java/org/owasp/validator/html/Policy.java | 2 ++ .../java/org/owasp/validator/html/scan/ASXHTMLSerializer.java | 4 +++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/owasp/validator/html/InternalPolicy.java b/src/main/java/org/owasp/validator/html/InternalPolicy.java index cf5e4051..863367eb 100644 --- a/src/main/java/org/owasp/validator/html/InternalPolicy.java +++ b/src/main/java/org/owasp/validator/html/InternalPolicy.java @@ -132,6 +132,8 @@ public boolean isOmitXmlDeclaration() { return omitXmlDeclaration; } + /** @deprecated XHTML usage will go away in AntiSamy 1.7+ */ + @Deprecated public boolean isUseXhtml() { return useXhtml; } diff --git a/src/main/java/org/owasp/validator/html/Policy.java b/src/main/java/org/owasp/validator/html/Policy.java index 42b90c08..bdcb143d 100644 --- a/src/main/java/org/owasp/validator/html/Policy.java +++ b/src/main/java/org/owasp/validator/html/Policy.java @@ -133,6 +133,8 @@ public class Policy { public static final String OMIT_XML_DECLARATION = "omitXmlDeclaration"; public static final String OMIT_DOCTYPE_DECLARATION = "omitDoctypeDeclaration"; + /** @deprecated XHTML usage will go away in AntiSamy 1.7+ */ + @Deprecated public static final String USE_XHTML = "useXHTML"; public static final String FORMAT_OUTPUT = "formatOutput"; public static final String EMBED_STYLESHEETS = "embedStyleSheets"; diff --git a/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java b/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java index 85cf18f4..52127fea 100644 --- a/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java +++ b/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java @@ -12,8 +12,10 @@ /** * This is an extension of the default XHTMLSerializer class that's had it's endElementIO() * method tweaked to serialize closing tags and self-closing tags the way we require. + * + * @deprecated XHTML usage will go away in AntiSamy 1.7+ */ -@SuppressWarnings("deprecation") +@Deprecated public class ASXHTMLSerializer extends org.apache.xml.serialize.XHTMLSerializer { private boolean encodeAllPossibleEntities; From a0d6b3168819c203515c6af7f597654159bb6e93 Mon Sep 17 00:00:00 2001 From: Dave Wichers Date: Tue, 29 Mar 2022 21:58:15 -0400 Subject: [PATCH 07/11] Upgrade 2 dependencies to their latest versions, which require Java 8, but this also breaks 1 test case for some reason. --- pom.xml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index 34dcf6ed..af2573f0 100644 --- a/pom.xml +++ b/pom.xml @@ -44,8 +44,7 @@ UTF-8 2022-01-31T23:13:00Z true - - 2.6 + 2.11.0 1.7.36 4.6.0.0 4.6.0 @@ -76,8 +75,7 @@ net.sourceforge.htmlunit neko-htmlunit - - 2.24 + 2.60.0 org.apache.httpcomponents @@ -279,7 +277,7 @@ - 1.7 + 1.8 true test Dependencies shouldn't require Java 8+. From 1bae19ffabecd4feb564a560b1d09fc98ed25805 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Wed, 30 Mar 2022 00:26:56 -0300 Subject: [PATCH 08/11] Update tests due to Neko html dependency change --- .../validator/html/test/AntiSamyTest.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index d1ab20bb..e251acb5 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1511,11 +1511,13 @@ public void entityReferenceEncodedInHtmlAttribute() throws ScanException, Policy // Concern is that "&" is not being encoded and "#00058" was not being interpreted as ":" // so the validations based on regexp passed and a browser would load "&:" together. // All this when not using the XHTML serializer. + + // UPDATE: Using a new HTML parser library starts decoding entities like #00058 Policy revised = policy.cloneWithDirective("useXHTML","false"); assertThat(as.scan("

xss

", revised, AntiSamy.DOM).getCleanHTML(), - containsString("javascript&#00058")); + not(containsString("javascript"))); assertThat(as.scan("

xss

", revised, AntiSamy.SAX).getCleanHTML(), - containsString("javascript&#00058")); + not(containsString("javascript"))); } @Test @@ -1716,5 +1718,18 @@ public void testSmuggledTagsInStyleContent() throws ScanException, PolicyExcepti assertThat(as.scan("Walert(1)", revised2, AntiSamy.DOM).getCleanHTML(), not(containsString("script"))); assertThat(as.scan("Walert(1)", revised2, AntiSamy.SAX).getCleanHTML(), not(containsString("script"))); } + + @Test(timeout = 3000) + public void testMalformedPIScan() { + // Certain malformed input including a malformed processing instruction may lead the parser to an internal memory error. + try { + as.scan(" Date: Wed, 30 Mar 2022 09:36:16 -0400 Subject: [PATCH 09/11] Upgrade 2x Apache httpcomponent libraries used from their old 4.x verions to the latest 5.x versions. Required updating imports and rewriting a bit of code in the CssScanner class. --- pom.xml | 44 +++------------ .../org/owasp/validator/css/CssScanner.java | 54 +++++++++++++------ 2 files changed, 46 insertions(+), 52 deletions(-) diff --git a/pom.xml b/pom.xml index af2573f0..c7fddfe9 100644 --- a/pom.xml +++ b/pom.xml @@ -60,48 +60,20 @@ - net.sourceforge.htmlunit neko-htmlunit 2.60.0 - org.apache.httpcomponents - httpclient - 4.5.13 - - - - commons-codec - commons-codec - - - - commons-logging - commons-logging - - - org.apache.httpcomponents - httpcore - - + org.apache.httpcomponents.client5 + httpclient5 + 5.1.3 - org.apache.httpcomponents - httpcore - 4.4.15 + org.apache.httpcomponents.core5 + httpcore5 + 5.1.3 org.apache.xmlgraphics @@ -280,7 +252,7 @@ 1.8 true test - Dependencies shouldn't require Java 8+. + Dependencies shouldn't require Java 9+. 3.3.9 @@ -296,7 +268,7 @@ 1.7 - Antisamy is written to support Java 7+. + Antisamy source code is written to support Java 7+. diff --git a/src/main/java/org/owasp/validator/css/CssScanner.java b/src/main/java/org/owasp/validator/css/CssScanner.java index 57ad18bb..ae8c48a6 100644 --- a/src/main/java/org/owasp/validator/css/CssScanner.java +++ b/src/main/java/org/owasp/validator/css/CssScanner.java @@ -43,12 +43,17 @@ import org.apache.batik.css.parser.ParseException; import org.apache.batik.css.parser.Parser; -import org.apache.http.HttpResponse; -import org.apache.http.client.HttpClient; -import org.apache.http.client.config.RequestConfig; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.HttpClientBuilder; -import org.apache.http.util.EntityUtils; +import org.apache.hc.core5.http.ClassicHttpResponse; +import org.apache.hc.core5.http.HttpEntity; +import org.apache.hc.core5.http.HttpStatus; +import org.apache.hc.client5.http.ClientProtocolException; +import org.apache.hc.client5.http.classic.HttpClient; +import org.apache.hc.client5.http.config.RequestConfig; +import org.apache.hc.client5.http.classic.methods.HttpGet; +import org.apache.hc.client5.http.impl.classic.HttpClientBuilder; +import org.apache.hc.core5.http.io.HttpClientResponseHandler; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.apache.hc.core5.util.Timeout; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.InternalPolicy; import org.owasp.validator.html.Policy; @@ -70,7 +75,7 @@ */ public class CssScanner { - protected static final int DEFAULT_TIMEOUT = 1000; + protected static final Timeout DEFAULT_TIMEOUT = Timeout.ofMilliseconds(1000); private static final String CDATA = "^\\s*\\s*$"; @@ -263,15 +268,14 @@ private void parseImportedStylesheets(LinkedList stylesheets, List // Ensure that we have appropriate timeout values so we don't // get DoSed waiting for returns - int timeout = DEFAULT_TIMEOUT; + Timeout timeout = DEFAULT_TIMEOUT; try { - timeout = Integer.parseInt(policy.getDirective(Policy.CONNECTION_TIMEOUT)); + timeout = Timeout.ofMilliseconds(Long.parseLong(policy.getDirective(Policy.CONNECTION_TIMEOUT))); } catch (NumberFormatException nfe) { } RequestConfig requestConfig = RequestConfig.custom() - .setSocketTimeout(timeout) - .setConnectTimeout(timeout) + .setResponseTimeout(timeout) .setConnectionRequestTimeout(timeout) .build(); @@ -302,13 +306,33 @@ private void parseImportedStylesheets(LinkedList stylesheets, List continue; } - HttpGet stylesheetRequest = new HttpGet(stylesheetUri); + // Pulled directly from: https://github.com/apache/httpcomponents-client/blob/5.1.x/httpclient5/src/test/java/org/apache/hc/client5/http/examples/ClientWithResponseHandler.java + // Create a custom response handler to read in the stylesheet + final HttpClientResponseHandler responseHandler = new HttpClientResponseHandler() { + + @Override + public String handleResponse( + final ClassicHttpResponse response) throws IOException { + final int status = response.getCode(); + if (status >= HttpStatus.SC_SUCCESS && status < HttpStatus.SC_REDIRECTION) { + final HttpEntity entity = response.getEntity(); + try { + return entity != null ? EntityUtils.toString(entity) : null; + } catch (final ParseException | org.apache.hc.core5.http.ParseException ex) { + throw new ClientProtocolException(ex); + } + } else { + throw new ClientProtocolException("Unexpected response status: " + status); + } + } + }; byte[] stylesheet = null; + try { + String responseBody = httpClient.execute(new HttpGet(stylesheetUri), responseHandler); // pull down stylesheet, observing size limit - HttpResponse response = httpClient.execute(stylesheetRequest); - stylesheet = EntityUtils.toByteArray(response.getEntity()); + stylesheet = responseBody.getBytes(); if (stylesheet != null && stylesheet.length > sizeLimit) { errorMessages.add(ErrorMessageUtil.getMessage( messages, @@ -323,8 +347,6 @@ private void parseImportedStylesheets(LinkedList stylesheets, List messages, ErrorMessageUtil.ERROR_CSS_IMPORT_FAILURE, new Object[] { HTMLEntityEncoder.htmlEntityEncode(stylesheetUri.toString()) })); - } finally { - stylesheetRequest.releaseConnection(); } if (stylesheet != null) { From a6d1dd80f0867002060e33c809c8c9289daa7176 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Thu, 31 Mar 2022 20:31:38 -0300 Subject: [PATCH 10/11] Add setConnectTimeout back in CssScanner --- src/main/java/org/owasp/validator/css/CssScanner.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/org/owasp/validator/css/CssScanner.java b/src/main/java/org/owasp/validator/css/CssScanner.java index ae8c48a6..279597ba 100644 --- a/src/main/java/org/owasp/validator/css/CssScanner.java +++ b/src/main/java/org/owasp/validator/css/CssScanner.java @@ -275,6 +275,7 @@ private void parseImportedStylesheets(LinkedList stylesheets, List } RequestConfig requestConfig = RequestConfig.custom() + .setConnectTimeout(timeout) .setResponseTimeout(timeout) .setConnectionRequestTimeout(timeout) .build(); From 513c02d6afd85714688fd949bce827b7ca401198 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Thu, 31 Mar 2022 21:25:36 -0300 Subject: [PATCH 11/11] Add CssScannerTest class --- .../java/org/owasp/validator/html/Policy.java | 2 + .../owasp/validator/css/CssScannerTest.java | 122 ++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 src/test/java/org/owasp/validator/css/CssScannerTest.java diff --git a/src/main/java/org/owasp/validator/html/Policy.java b/src/main/java/org/owasp/validator/html/Policy.java index bdcb143d..2fcb2e66 100644 --- a/src/main/java/org/owasp/validator/html/Policy.java +++ b/src/main/java/org/owasp/validator/html/Policy.java @@ -146,6 +146,8 @@ public class Policy { public static final String PRESERVE_COMMENTS = "preserveComments"; public static final String ENTITY_ENCODE_INTL_CHARS = "entityEncodeIntlChars"; public static final String ALLOW_DYNAMIC_ATTRIBUTES = "allowDynamicAttributes"; + public static final String MAX_INPUT_SIZE = "maxInputSize"; + public static final String MAX_STYLESHEET_IMPORTS = "maxStyleSheetImports"; public static final String EXTERNAL_GENERAL_ENTITIES = "http://xml.org/sax/features/external-general-entities"; public static final String EXTERNAL_PARAM_ENTITIES = "http://xml.org/sax/features/external-parameter-entities"; diff --git a/src/test/java/org/owasp/validator/css/CssScannerTest.java b/src/test/java/org/owasp/validator/css/CssScannerTest.java new file mode 100644 index 00000000..0473ae43 --- /dev/null +++ b/src/test/java/org/owasp/validator/css/CssScannerTest.java @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li, Sebastián Passaro + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this list + * of conditions and the following disclaimer. Redistributions in binary form must + * reproduce the above copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the name of OWASP nor the names of its contributors may be used to endorse + * or promote products derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.owasp.validator.css; + +import org.junit.Before; +import org.junit.Test; +import org.owasp.validator.html.*; +import org.owasp.validator.html.scan.Constants; +import org.owasp.validator.html.test.TestPolicy; + +import java.net.URL; +import java.util.Locale; +import java.util.MissingResourceException; +import java.util.ResourceBundle; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.not; +import static org.hamcrest.MatcherAssert.assertThat; + +public class CssScannerTest { + private TestPolicy policy = null; + private ResourceBundle messages = null; + + @Before + public void setUp() throws Exception { + // Load the policy. You may have to change the path to find the Policy file for your environment. + URL url = getClass().getResource("/antisamy.xml"); + policy = TestPolicy.getInstance(url); + // Load resource bundle + try { + messages = ResourceBundle.getBundle("AntiSamy", Locale.getDefault()); + } catch (MissingResourceException mre) { + messages = ResourceBundle.getBundle("AntiSamy", new Locale(Constants.DEFAULT_LOCALE_LANG, + Constants.DEFAULT_LOCALE_LOC)); + } + } + + @Test + public void testAvoidImportingStyles() throws ScanException { + final String input = "@import url(https://raw.githubusercontent.com/nahsra/antisamy/main/src/test/resources/s/slashdot.org_files/classic.css);\n" + + ".very-specific-antisamy {font: 15pt \"Arial\"; color: blue;}"; + // If not passing "shouldParseImportedStyles" then it's false by default. + CssScanner scanner = new CssScanner(policy, messages); + String result = scanner.scanStyleSheet(input, 1000).getCleanHTML(); + // If style sheet was imported, .grid_1 class should be there. + assertThat(result, not(containsString(".grid_1"))); + assertThat(result, containsString(".very-specific-antisamy")); + } + + @Test + public void testAvoidCdataWhenUsingXhtml() throws ScanException { + final String input = ""; + + TestPolicy revised = policy.cloneWithDirective(Policy.USE_XHTML,"true"); + CssScanner scanner = new CssScanner(revised, messages); + assertThat(scanner.scanStyleSheet(input, 1000).getCleanHTML(), not(containsString("CDATA"))); + + revised = policy.cloneWithDirective(Policy.USE_XHTML,"false"); + scanner = new CssScanner(revised, messages); + assertThat(scanner.scanStyleSheet(input, 1000).getCleanHTML(), containsString("CDATA")); + } + + @Test + public void testImportLimiting() throws ScanException { + final String input = "@import url(https://raw.githubusercontent.com/nahsra/antisamy/main/src/test/resources/s/slashdot.org_files/classic.css);\n" + + "@import url(https://raw.githubusercontent.com/nahsra/antisamy/main/src/test/resources/s/slashdot.org_files/providers.css);\n" + + ".very-specific-antisamy {font: 15pt \"Arial\"; color: blue;}"; + TestPolicy revised = policy.cloneWithDirective(Policy.EMBED_STYLESHEETS,"true") + .cloneWithDirective(Policy.MAX_INPUT_SIZE,"500") + .cloneWithDirective(Policy.MAX_STYLESHEET_IMPORTS,"2"); + CssScanner scanner = new CssScanner(revised, messages, true); + CleanResults result = scanner.scanStyleSheet(input, 500); + // Both sheets are larger than 500 bytes + assertThat(result.getErrorMessages().size(), is(2)); + assertThat(result.getErrorMessages().get(0), containsString("500")); + + // Limit to only one import + revised = policy.cloneWithDirective(Policy.EMBED_STYLESHEETS,"true") + .cloneWithDirective(Policy.MAX_STYLESHEET_IMPORTS,"1"); + scanner = new CssScanner(revised, messages, true); + result = scanner.scanStyleSheet(input, 500000); + // If only first style sheet was imported, .grid_1 class should be there and .janrain-provider150-sprit classes should not. + assertThat(result.getCleanHTML(), containsString(".grid_1")); + assertThat(result.getCleanHTML(), not(containsString(".janrain-provider150-sprit"))); + + // Force timeout errors + revised = policy.cloneWithDirective(Policy.EMBED_STYLESHEETS,"true") + .cloneWithDirective(Policy.CONNECTION_TIMEOUT,"1"); + scanner = new CssScanner(revised, messages, true); + result = scanner.scanStyleSheet(input, 500000); + assertThat(result.getErrorMessages().size(), is(2)); + // If style sheets were imported, .grid_1 and .janrain-provider150-sprit classes should be there. + assertThat(result.getCleanHTML(), not(containsString(".grid_1"))); + assertThat(result.getCleanHTML(), not(containsString(".janrain-provider150-sprit"))); + } +}