From 1bae19ffabecd4feb564a560b1d09fc98ed25805 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Wed, 30 Mar 2022 00:26:56 -0300 Subject: [PATCH] Update tests due to Neko html dependency change --- .../validator/html/test/AntiSamyTest.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index d1ab20bb..e251acb5 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1511,11 +1511,13 @@ public void entityReferenceEncodedInHtmlAttribute() throws ScanException, Policy // Concern is that "&" is not being encoded and "#00058" was not being interpreted as ":" // so the validations based on regexp passed and a browser would load "&:" together. // All this when not using the XHTML serializer. + + // UPDATE: Using a new HTML parser library starts decoding entities like #00058 Policy revised = policy.cloneWithDirective("useXHTML","false"); assertThat(as.scan("

xss

", revised, AntiSamy.DOM).getCleanHTML(), - containsString("javascript&#00058")); + not(containsString("javascript"))); assertThat(as.scan("

xss

", revised, AntiSamy.SAX).getCleanHTML(), - containsString("javascript&#00058")); + not(containsString("javascript"))); } @Test @@ -1716,5 +1718,18 @@ public void testSmuggledTagsInStyleContent() throws ScanException, PolicyExcepti assertThat(as.scan("Walert(1)", revised2, AntiSamy.DOM).getCleanHTML(), not(containsString("script"))); assertThat(as.scan("Walert(1)", revised2, AntiSamy.SAX).getCleanHTML(), not(containsString("script"))); } + + @Test(timeout = 3000) + public void testMalformedPIScan() { + // Certain malformed input including a malformed processing instruction may lead the parser to an internal memory error. + try { + as.scan("