From 73b68bc56bed63974eaf9170174f813862ca1ec0 Mon Sep 17 00:00:00 2001 From: Sebastian Uecker Date: Fri, 9 Oct 2015 15:48:03 +0200 Subject: [PATCH 01/28] +Added HtmlPolicyBuilder methods for excluding elements with specific missing or emtpy attributes +Removed AutoCloseableHtmlStreamRenderer for Java SE 6 compatibility --- .../org/owasp/html/HtmlPolicyBuilder.java | 1377 +++++++++-------- .../org/owasp/html/HtmlStreamRenderer.java | 22 - 2 files changed, 715 insertions(+), 684 deletions(-) diff --git a/src/main/java/org/owasp/html/HtmlPolicyBuilder.java b/src/main/java/org/owasp/html/HtmlPolicyBuilder.java index d7a6ea76..683ca5c2 100644 --- a/src/main/java/org/owasp/html/HtmlPolicyBuilder.java +++ b/src/main/java/org/owasp/html/HtmlPolicyBuilder.java @@ -43,7 +43,6 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; - /** * Conveniences for configuring policies for the {@link HtmlSanitizer}. * @@ -54,39 +53,36 @@ * processing modes; and finally call build(renderer) or * toFactory(). *

+ * *
  * // Define the policy.
- * Function<HtmlStreamEventReceiver, HtmlSanitizer.Policy> policy
- *     = new HtmlPolicyBuilder()
- *         .allowElements("a", "p")
- *         .allowAttributes("href").onElements("a")
- *         .toFactory();
- *
+ * Function<HtmlStreamEventReceiver, HtmlSanitizer.Policy> policy = new HtmlPolicyBuilder()
+ * 		.allowElements("a", "p").allowAttributes("href").onElements("a")
+ * 		.toFactory();
+ * 
  * // Sanitize your output.
  * HtmlSanitizer.sanitize(myHtml, policy.apply(myHtmlStreamRenderer));
  * 
* *

Embedded Content

*

- * Embedded URLs are filtered by - * {@link HtmlPolicyBuilder#allowUrlProtocols protocol}. - * There is a {@link HtmlPolicyBuilder#allowStandardUrlProtocols canned policy} - * so you can easily white-list widely used policies that don't violate the - * current pages origin. See "Customization" below for ways to do further - * filtering. If you allow links it might be worthwhile to + * Embedded URLs are filtered by {@link HtmlPolicyBuilder#allowUrlProtocols + * protocol}. There is a {@link HtmlPolicyBuilder#allowStandardUrlProtocols + * canned policy} so you can easily white-list widely used policies that don't + * violate the current pages origin. See "Customization" below for ways to do + * further filtering. If you allow links it might be worthwhile to * {@link HtmlPolicyBuilder#requireRelNofollowOnLinks() require} * {@code rel=nofollow}. *

*

- * This class simply throws out all embedded JS. - * Use a custom element or attribute policy to allow through - * signed or otherwise known-safe code. - * Check out the Caja project if you need a way to contain third-party JS. + * This class simply throws out all embedded JS. Use a custom element or + * attribute policy to allow through signed or otherwise known-safe code. Check + * out the Caja project if you need a way to contain third-party JS. *

*

- * This class does not attempt to faithfully parse and sanitize CSS. - * It does provide {@link HtmlPolicyBuilder#allowStyling() one} styling option - * that allows through a few CSS properties that allow textual styling, but that + * This class does not attempt to faithfully parse and sanitize CSS. It does + * provide {@link HtmlPolicyBuilder#allowStyling() one} styling option that + * allows through a few CSS properties that allow textual styling, but that * disallow image loading, history stealing, layout breaking, code execution, * etc. *

@@ -94,51 +90,44 @@ *

Customization

*

* You can easily do custom processing on tags and attributes by supplying your - * own {@link ElementPolicy element policy} or - * {@link AttributePolicy attribute policy} when calling - * allow…. - * E.g. to convert headers into {@code

}s, you could use an element policy + * own {@link ElementPolicy element policy} or {@link AttributePolicy attribute + * policy} when calling allow…. E.g. to convert headers into + * {@code
}s, you could use an element policy *

+ * *
- * new HtmlPolicyBuilder()
- *   .allowElement(
- *     new ElementPolicy() {
- *       public String apply(String elementName, List<String> attributes){
- *         attributes.add("class");
- *         attributes.add("header-" + elementName);
- *         return "div";
- *       }
- *     },
- *     "h1", "h2", "h3", "h4", "h5", "h6")
- *   .build(outputChannel)
+ * new HtmlPolicyBuilder().allowElement(new ElementPolicy() {
+ * 	public String apply(String elementName, List<String> attributes) {
+ * 		attributes.add("class");
+ * 		attributes.add("header-" + elementName);
+ * 		return "div";
+ * 	}
+ * }, "h1", "h2", "h3", "h4", "h5", "h6").build(outputChannel)
  * 
* *

Rules of Thumb

*

* Throughout this class, several rules hold: *

    - *
  • Everything is denied by default. There are - * disallow… methods, but those reverse - * allows instead of rolling back overly permissive defaults. - *
  • The order of allows and disallows does not matter. - * Disallows trump allows whether they occur before or after them. - * The only method that needs to be called in a particular place is - * {@link HtmlPolicyBuilder#build}. - * Allows or disallows after {@code build} is called have no - * effect on the already built policy. - *
  • Element and attribute policies are applied in the following order: - * element specific attribute policy, global attribute policy, element - * policy. - * Element policies come last so they can observe all the post-processed - * attributes, and so they can add attributes that are exempt from - * attribute policies. - * Element specific policies go first, so they can normalize content to - * a form that might be acceptable to a more simplistic global policy. + *
  • Everything is denied by default. There are disallow… + * methods, but those reverse allows instead of rolling back overly permissive + * defaults. + *
  • The order of allows and disallows does not matter. Disallows trump allows + * whether they occur before or after them. The only method that needs to be + * called in a particular place is {@link HtmlPolicyBuilder#build}. Allows or + * disallows after {@code build} is called have no effect on the already built + * policy. + *
  • Element and attribute policies are applied in the following order: + * element specific attribute policy, global attribute policy, element policy. + * Element policies come last so they can observe all the post-processed + * attributes, and so they can add attributes that are exempt from attribute + * policies. Element specific policies go first, so they can normalize content + * to a form that might be acceptable to a more simplistic global policy. *
* *

Thread safety and efficiency

*

- * This class is not thread-safe. The resulting policy will not violate its + * This class is not thread-safe. The resulting policy will not violate its * security guarantees as a result of race conditions, but is not thread safe * because it maintains state to track whether text inside disallowed elements * should be suppressed. @@ -153,613 +142,677 @@ @TCB @NotThreadSafe public class HtmlPolicyBuilder { - /** - * The default set of elements that are removed if they have no attributes. - * Since {@code } is in this set, by default, a policy will remove - * {@code } because its URL is not allowed - * and it has no other attributes that would warrant it appearing in the - * output. - */ - public static final ImmutableSet DEFAULT_SKIP_IF_EMPTY - = ImmutableSet.of("a", "font", "img", "input", "span"); - - private final Map elPolicies = Maps.newLinkedHashMap(); - private final Map> attrPolicies - = Maps.newLinkedHashMap(); - private final Map globalAttrPolicies - = Maps.newLinkedHashMap(); - private final Set allowedProtocols = Sets.newLinkedHashSet(); - private final Set skipIfEmpty = Sets.newLinkedHashSet( - DEFAULT_SKIP_IF_EMPTY); - private final Map textContainers = Maps.newLinkedHashMap(); - private boolean requireRelNofollowOnLinks; - - /** - * Allows the named elements. - */ - public HtmlPolicyBuilder allowElements(String... elementNames) { - return allowElements(ElementPolicy.IDENTITY_ELEMENT_POLICY, elementNames); - } - - /** - * Disallows the named elements. Elements are disallowed by default, so - * there is no need to disallow elements, unless you are making an exception - * based on an earlier allow. - */ - public HtmlPolicyBuilder disallowElements(String... elementNames) { - return allowElements(ElementPolicy.REJECT_ALL_ELEMENT_POLICY, elementNames); - } - - /** - * Allow the given elements with the given policy. - * - * @param policy May remove or add attributes, change the element name, or - * deny the element. - */ - public HtmlPolicyBuilder allowElements( - ElementPolicy policy, String... elementNames) { - invalidateCompiledState(); - for (String elementName : elementNames) { - elementName = HtmlLexer.canonicalName(elementName); - ElementPolicy newPolicy = ElementPolicy.Util.join( - elPolicies.get(elementName), policy); - // Don't remove if newPolicy is the always reject policy since we want - // that to infect later allowElement calls for this particular element - // name. rejects should have higher priority than allows. - elPolicies.put(elementName, newPolicy); - if (!textContainers.containsKey(elementName) - && TagBalancingHtmlStreamEventReceiver - .allowsPlainTextualContent(elementName)) { - textContainers.put(elementName, true); - } - } - return this; - } - - /** - * A canned policy that allows a number of common formatting elements. - */ - public HtmlPolicyBuilder allowCommonInlineFormattingElements() { - return allowElements( - "b", "i", "font", "s", "u", "o", "sup", "sub", "ins", "del", "strong", - "strike", "tt", "code", "big", "small", "br", "span", "em"); - } - - /** - * A canned policy that allows a number of common block elements. - */ - public HtmlPolicyBuilder allowCommonBlockElements() { - return allowElements( - "p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "li", - "blockquote"); - } - - /** - * Allows text content in the named elements. - * By default, text content is allowed in any - * {@link #allowElements allowed elements} that can contain character data per - * the HTML5 spec, but text content is not allowed by default in elements that - * contain content of other kinds (like JavaScript in {@code " + + "" + + "" + + "" + + "" + + "" + + ""; + PolicyFactory pf = new HtmlPolicyBuilder() + .allowElements("option", "select", "style", "svg") + .allowTextIn("style") + .toFactory(); + assertEquals( + "" + + "" + + "" + + "" + + "" + + "" + + "" + + "" + + "" + + "", + pf.sanitize(input) + ); + } + + @Test + public static final void testSelectIsOdd() { + String input = ""; + PolicyFactory pf = new HtmlPolicyBuilder() + .allowElements("option", "select", "xmp") + .allowTextIn("xmp") + .toFactory(); + assertEquals( + "" + + "", + pf.sanitize(input) + ); + } + @Test public static final void testStyleGlobally() { PolicyFactory policyBuilder = new HtmlPolicyBuilder() @@ -449,7 +496,7 @@ static int fac(int n) { int ifac = 1; for (int i = 1; i <= n; ++i) { int ifacp = ifac * i; - if (ifacp < ifac) { throw new IllegalArgumentException("undeflow"); } + if (ifacp < ifac) { throw new IllegalArgumentException("underflow"); } ifac = ifacp; } return ifac; diff --git a/src/test/java/org/owasp/html/TagBalancingHtmlStreamRendererTest.java b/src/test/java/org/owasp/html/TagBalancingHtmlStreamRendererTest.java index 004e95cd..5195fde9 100644 --- a/src/test/java/org/owasp/html/TagBalancingHtmlStreamRendererTest.java +++ b/src/test/java/org/owasp/html/TagBalancingHtmlStreamRendererTest.java @@ -158,7 +158,9 @@ public final void testTextContent() { + "

Hello,

" + "

Hello" // Text allowed in special style tag. - + "

" + + "" // Whitespace allowed inside
    but non-whitespace text nodes are // moved inside
  • . + "
    • Hello,
    • World!
    ", From 374ea2f4f9c2d8c0a7f42b76146b4953898cfcc0 Mon Sep 17 00:00:00 2001 From: Mike Samuel Date: Mon, 18 Oct 2021 09:35:19 -0400 Subject: [PATCH 19/28] Release candidate 20211018.1 --- README.md | 10 +++++----- aggregate/pom.xml | 4 ++-- change_log.md | 6 ++++++ docs/getting_started.md | 10 +++++----- docs/maven.md | 2 +- empiricism/pom.xml | 4 ++-- html-types/pom.xml | 4 ++-- parent/pom.xml | 2 +- pom.xml | 2 +- 9 files changed, 25 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index df20f4f6..caab4b2e 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ how to get started with or without Maven. ## Prepackaged Policies You can use -[prepackaged policies](https://static.javadoc.io/com.googlecode.owasp-java-html-sanitizer/owasp-java-html-sanitizer/20200713.1/org/owasp/html/Sanitizers.html): +[prepackaged policies](https://static.javadoc.io/com.googlecode.owasp-java-html-sanitizer/owasp-java-html-sanitizer/20211018.1/org/owasp/html/Sanitizers.html): ```Java PolicyFactory policy = Sanitizers.FORMATTING.and(Sanitizers.LINKS); @@ -47,7 +47,7 @@ String safeHTML = policy.sanitize(untrustedHTML); The [tests](https://github.com/OWASP/java-html-sanitizer/blob/main/src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java) show how to configure your own -[policy](https://static.javadoc.io/com.googlecode.owasp-java-html-sanitizer/owasp-java-html-sanitizer/20200713.1/org/owasp/html/HtmlPolicyBuilder.html): +[policy](https://static.javadoc.io/com.googlecode.owasp-java-html-sanitizer/owasp-java-html-sanitizer/20211018.1/org/owasp/html/HtmlPolicyBuilder.html): ```Java PolicyFactory policy = new HtmlPolicyBuilder() @@ -62,7 +62,7 @@ String safeHTML = policy.sanitize(untrustedHTML); ## Custom Policies You can write -[custom policies](https://static.javadoc.io/com.googlecode.owasp-java-html-sanitizer/owasp-java-html-sanitizer/20200713.1/org/owasp/html/ElementPolicy.html) +[custom policies](https://static.javadoc.io/com.googlecode.owasp-java-html-sanitizer/owasp-java-html-sanitizer/20211018.1/org/owasp/html/ElementPolicy.html) to do things like changing `h1`s to `div`s with a certain class: ```Java @@ -85,7 +85,7 @@ need to be explicitly whitelisted using the `allowWithoutAttributes()` method if you want them to be allowed through the filter when these elements do not include any attributes. -[Attribute policies](https://static.javadoc.io/com.googlecode.owasp-java-html-sanitizer/owasp-java-html-sanitizer/20200713.1/org/owasp/html/AttributePolicy.html) allow running custom code too. Adding an attribute policy will not water down any default policy like `style` or URL attribute checks. +[Attribute policies](https://static.javadoc.io/com.googlecode.owasp-java-html-sanitizer/owasp-java-html-sanitizer/20211018.1/org/owasp/html/AttributePolicy.html) allow running custom code too. Adding an attribute policy will not water down any default policy like `style` or URL attribute checks. ```Java new HtmlPolicyBuilder = new HtmlPolicyBuilder() @@ -153,7 +153,7 @@ of the output. ## Telemetry -When a policy rejects an element or attribute it notifies an [HtmlChangeListener](https://static.javadoc.io/com.googlecode.owasp-java-html-sanitizer/owasp-java-html-sanitizer/20200713.1/org/owasp/html/HtmlChangeListener.html). +When a policy rejects an element or attribute it notifies an [HtmlChangeListener](https://static.javadoc.io/com.googlecode.owasp-java-html-sanitizer/owasp-java-html-sanitizer/20211018.1/org/owasp/html/HtmlChangeListener.html). You can use this to keep track of policy violation trends and find out when someone is making an effort to breach your security. diff --git a/aggregate/pom.xml b/aggregate/pom.xml index 5bbfc0ae..f637b0a7 100644 --- a/aggregate/pom.xml +++ b/aggregate/pom.xml @@ -3,12 +3,12 @@ com.googlecode.owasp-java-html-sanitizer aggregate pom - 20200713.2-SNAPSHOT + 20211018.1 ../parent com.googlecode.owasp-java-html-sanitizer parent - 20200713.2-SNAPSHOT + 20211018.1 diff --git a/change_log.md b/change_log.md index 9619abc5..712e97d1 100644 --- a/change_log.md +++ b/change_log.md @@ -1,6 +1,12 @@ # OWASP Java HTML Sanitizer Change Log Most recent at top. + * Release 20211018.1 + * Fix [CVE-2021-42575](https://docs.google.com/document/d/11SoX296sMS0XoQiQbpxc5pNxSdbJKDJkm5BDv0zrX50/edit#) + * Changes rendering of `" + "" - + "" - + "" - + "" - + "" - + ""; + + ""; PolicyFactory pf = new HtmlPolicyBuilder() .allowElements("option", "select", "style", "svg") .allowTextIn("style") @@ -451,36 +447,49 @@ public static final void testStyleTagsInAllTheWrongPlaces() { assertEquals( "" + "" + "" - + "" + + "" + "" - + "" - + "" - + "" - + "" - + "", + + "", pf.sanitize(input) ); } @Test public static final void testSelectIsOdd() { + // Special text modes interact badly with select and option String input = ""; PolicyFactory pf = new HtmlPolicyBuilder() .allowElements("option", "select", "xmp") - .allowTextIn("xmp") + .allowTextIn("xmp", "option") .toFactory(); assertEquals( "" - + "", pf.sanitize(input) ); } + @Test + public static final void testOptionAllowsText() { + String input = ""; + PolicyFactory pf = new HtmlPolicyBuilder() + .allowElements("option", "select", "pre") + .allowTextIn("pre", "option") + .toFactory(); + assertEquals( + "" + + "", + pf.sanitize(input) + ); + } + @Test public static final void testStyleGlobally() { PolicyFactory policyBuilder = new HtmlPolicyBuilder() diff --git a/src/test/java/org/owasp/html/TagBalancingHtmlStreamRendererTest.java b/src/test/java/org/owasp/html/TagBalancingHtmlStreamRendererTest.java index 5195fde9..38a854ef 100644 --- a/src/test/java/org/owasp/html/TagBalancingHtmlStreamRendererTest.java +++ b/src/test/java/org/owasp/html/TagBalancingHtmlStreamRendererTest.java @@ -158,9 +158,9 @@ public final void testTextContent() { + "

    Hello,

    " + "

    Hello" // Text allowed in special style tag. - + "

    " + + "" // Whitespace allowed inside
      but non-whitespace text nodes are // moved inside
    • . + "
      • Hello,
      • World!
      ", From 62a0715666b5101303ee828f26e71a07afaad974 Mon Sep 17 00:00:00 2001 From: Mike Samuel Date: Mon, 18 Oct 2021 15:51:41 -0400 Subject: [PATCH 23/28] Release candidate 20211018.2 --- aggregate/pom.xml | 4 ++-- change_log.md | 4 ++++ empiricism/pom.xml | 4 ++-- html-types/pom.xml | 4 ++-- parent/pom.xml | 2 +- pom.xml | 2 +- 6 files changed, 12 insertions(+), 8 deletions(-) diff --git a/aggregate/pom.xml b/aggregate/pom.xml index c178bb33..535f4a76 100644 --- a/aggregate/pom.xml +++ b/aggregate/pom.xml @@ -3,12 +3,12 @@ com.googlecode.owasp-java-html-sanitizer aggregate pom - 20211018.2-SNAPSHOT + 20211018.2 ../parent com.googlecode.owasp-java-html-sanitizer parent - 20211018.2-SNAPSHOT + 20211018.2 diff --git a/change_log.md b/change_log.md index 712e97d1..549e2046 100644 --- a/change_log.md +++ b/change_log.md @@ -1,6 +1,10 @@ # OWASP Java HTML Sanitizer Change Log Most recent at top. + * Release 20211018.2 + * Tweak how we address CVE-2021-42575 to be more tailored and to + interfere less with `