From f8d512a9d483b441ed941ede0af1206310b1a967 Mon Sep 17 00:00:00 2001 From: LiuXing-R Date: Wed, 18 Aug 2021 15:29:05 +0800 Subject: [PATCH] fix: change the regex of `lang` to support IANA subtags --- src/main/resources/antisamy-anythinggoes.xml | 2 +- src/main/resources/antisamy-ebay.xml | 2 +- src/main/resources/antisamy-myspace.xml | 2 +- src/main/resources/antisamy-slashdot.xml | 2 +- src/main/resources/antisamy-tinymce.xml | 2 +- src/main/resources/antisamy.xml | 2 +- .../java/org/owasp/validator/html/test/AntiSamyTest.java | 7 +++++++ 7 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/main/resources/antisamy-anythinggoes.xml b/src/main/resources/antisamy-anythinggoes.xml index 7382b5c1..f23706f8 100644 --- a/src/main/resources/antisamy-anythinggoes.xml +++ b/src/main/resources/antisamy-anythinggoes.xml @@ -152,7 +152,7 @@ http://www.w3.org/TR/html401/struct/global.html - + diff --git a/src/main/resources/antisamy-ebay.xml b/src/main/resources/antisamy-ebay.xml index 6beaae6f..839faea0 100644 --- a/src/main/resources/antisamy-ebay.xml +++ b/src/main/resources/antisamy-ebay.xml @@ -150,7 +150,7 @@ http://www.w3.org/TR/html401/struct/global.html - + diff --git a/src/main/resources/antisamy-myspace.xml b/src/main/resources/antisamy-myspace.xml index fbb17ba0..88d4a143 100644 --- a/src/main/resources/antisamy-myspace.xml +++ b/src/main/resources/antisamy-myspace.xml @@ -152,7 +152,7 @@ http://www.w3.org/TR/html401/struct/global.html - + diff --git a/src/main/resources/antisamy-slashdot.xml b/src/main/resources/antisamy-slashdot.xml index b479dd66..8f0da973 100644 --- a/src/main/resources/antisamy-slashdot.xml +++ b/src/main/resources/antisamy-slashdot.xml @@ -59,7 +59,7 @@ Slashdot allowed tags taken from "Reply" page: - + diff --git a/src/main/resources/antisamy-tinymce.xml b/src/main/resources/antisamy-tinymce.xml index fd47c968..de952ff1 100644 --- a/src/main/resources/antisamy-tinymce.xml +++ b/src/main/resources/antisamy-tinymce.xml @@ -58,7 +58,7 @@ description="The 'lang' attribute tells the browser what language the element's attribute values and content are written in"> - + diff --git a/src/main/resources/antisamy.xml b/src/main/resources/antisamy.xml index e9b403b0..917da876 100644 --- a/src/main/resources/antisamy.xml +++ b/src/main/resources/antisamy.xml @@ -171,7 +171,7 @@ http://www.w3.org/TR/html401/struct/global.html - + diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index 8c5dba13..99ba9903 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1502,5 +1502,12 @@ public void entityReferenceEncodedInHtmlAttribute() throws ScanException, Policy assertThat(as.scan("

xss

", revised, AntiSamy.SAX).getCleanHTML(), containsString("javascript&#00058")); } + + @Test + public void testGithubIssue99() throws ScanException, PolicyException { + // Test that the IANA subtags is not lost + assertThat(as.scan("

This paragraph is defined as British English.

", policy, AntiSamy.DOM).getCleanHTML(), containsString("lang=\"en-GB\"")); + assertThat(as.scan("

This paragraph is defined as British English.

", policy, AntiSamy.SAX).getCleanHTML(), containsString("lang=\"en-GB\"")); + } }