-
Notifications
You must be signed in to change notification settings - Fork 214
/
HtmlPolicyBuilder.java
1145 lines (1055 loc) · 42.4 KB
/
HtmlPolicyBuilder.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) 2011, Mike Samuel
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// Neither the name of the OWASP nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
package org.owasp.html;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;
import org.owasp.html.ElementPolicy.JoinableElementPolicy;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
/**
* Conveniences for configuring policies for the {@link HtmlSanitizer}.
*
* <h3>Usage</h3>
* <p>
* To create a policy, first construct an instance of this class; then call
* <code>allow…</code> methods to turn on tags, attributes, and other
* processing modes; and finally call <code>build(renderer)</code> or
* <code>toFactory()</code>.
* </p>
* <pre class="prettyprint lang-java">
* // Define the policy.
* Function<HtmlStreamEventReceiver, HtmlSanitizer.Policy> policy
* = new HtmlPolicyBuilder()
* .allowElements("a", "p")
* .allowAttributes("href").onElements("a")
* .toFactory();
*
* // Sanitize your output.
* HtmlSanitizer.sanitize(myHtml, policy.apply(myHtmlStreamRenderer));
* </pre>
*
* <h3>Embedded Content</h3>
* <p>
* Embedded URLs are filtered by
* {@link HtmlPolicyBuilder#allowUrlProtocols protocol}.
* There is a {@link HtmlPolicyBuilder#allowStandardUrlProtocols canned policy}
* so you can easily white-list widely used policies that don't violate the
* current pages origin. See "Customization" below for ways to do further
* filtering. If you allow links it might be worthwhile to
* {@link HtmlPolicyBuilder#requireRelNofollowOnLinks() require}
* {@code rel=nofollow}.
* </p>
* <p>
* This class simply throws out all embedded JS.
* Use a custom element or attribute policy to allow through
* signed or otherwise known-safe code.
* Check out the Caja project if you need a way to contain third-party JS.
* </p>
* <p>
* This class does not attempt to faithfully parse and sanitize CSS.
* It does provide {@link HtmlPolicyBuilder#allowStyling() one} styling option
* that allows through a few CSS properties that allow textual styling, but that
* disallow image loading, history stealing, layout breaking, code execution,
* etc.
* </p>
*
* <h3>Customization</h3>
* <p>
* You can easily do custom processing on tags and attributes by supplying your
* own {@link ElementPolicy element policy} or
* {@link AttributePolicy attribute policy} when calling
* <code>allow…</code>.
* E.g. to convert headers into {@code <div>}s, you could use an element policy
* </p>
* <pre class="prettyprint lang-java">
* new HtmlPolicyBuilder()
* .allowElement(
* new ElementPolicy() {
* public String apply(String elementName, List<String> attributes){
* attributes.add("class");
* attributes.add("header-" + elementName);
* return "div";
* }
* },
* "h1", "h2", "h3", "h4", "h5", "h6")
* .build(outputChannel)
* </pre>
*
* <h3>Rules of Thumb</h3>
* <p>
* Throughout this class, several rules hold:
* <ul>
* <li>Everything is denied by default. There are
* <code>disallow…</code> methods, but those reverse
* allows instead of rolling back overly permissive defaults.
* <li>The order of allows and disallows does not matter.
* Disallows trump allows whether they occur before or after them.
* The only method that needs to be called in a particular place is
* {@link HtmlPolicyBuilder#build}.
* Allows or disallows after {@code build} is called have no
* effect on the already built policy.
* <li>Element and attribute policies are applied in the following order:
* element specific attribute policy, global attribute policy, element
* policy.
* Element policies come last so they can observe all the post-processed
* attributes, and so they can add attributes that are exempt from
* attribute policies.
* Element specific policies go first, so they can normalize content to
* a form that might be acceptable to a more simplistic global policy.
* </ul>
*
* <h3>Thread safety and efficiency</h3>
* <p>
* This class is not thread-safe. The resulting policy will not violate its
* security guarantees as a result of race conditions, but is not thread safe
* because it maintains state to track whether text inside disallowed elements
* should be suppressed.
* <p>
* The resulting policy can be reused, but if you use the
* {@link HtmlPolicyBuilder#toFactory()} method instead of {@link #build}, then
* binding policies to output channels is cheap so there's no need.
* </p>
*
* @author Mike Samuel ([email protected])
*/
@TCB
@NotThreadSafe
public class HtmlPolicyBuilder {
/**
* The default set of elements that are removed if they have no attributes.
* Since {@code <img>} is in this set, by default, a policy will remove
* {@code <img src=javascript:alert(1337)>} because its URL is not allowed
* and it has no other attributes that would warrant it appearing in the
* output.
*/
public static final ImmutableSet<String> DEFAULT_SKIP_IF_EMPTY
= ImmutableSet.of("a", "font", "img", "input", "span");
static final ImmutableMap<String, HtmlTagSkipType> DEFAULT_SKIP_TAG_MAP_IF_EMPTY_ATTR;
static {
ImmutableMap.Builder<String, HtmlTagSkipType> b = ImmutableMap.builder();
for (String elementName : DEFAULT_SKIP_IF_EMPTY) {
b.put(elementName, HtmlTagSkipType.SKIP_BY_DEFAULT);
}
DEFAULT_SKIP_TAG_MAP_IF_EMPTY_ATTR = b.build();
}
/**
* These
* <a href="https://developer.mozilla.org/en-US/docs/Web/HTML/Link_types"
* >{@code rel}</a> attribute values leaking information to the linked site,
* and prevents the linked page from redirecting your page to a phishing site
* when opened from a third-party link from your site.
*
* @see <a href="https://mathiasbynens.github.io/rel-noopener/"
* >About rel=noopener</a>
*/
public static final ImmutableSet<String> DEFAULT_RELS_ON_TARGETTED_LINKS
= ImmutableSet.of("noopener", "noreferrer");
static final String DEFAULT_RELS_ON_TARGETTED_LINKS_STR
= Joiner.on(' ').join(DEFAULT_RELS_ON_TARGETTED_LINKS);
private final Map<String, ElementPolicy> elPolicies = Maps.newLinkedHashMap();
private final Map<String, Map<String, AttributePolicy>> attrPolicies
= Maps.newLinkedHashMap();
private final Map<String, AttributePolicy> globalAttrPolicies
= Maps.newLinkedHashMap();
private final Set<String> allowedProtocols = Sets.newLinkedHashSet();
private final Map<String, HtmlTagSkipType> skipIssueTagMap = Maps.newLinkedHashMap(DEFAULT_SKIP_TAG_MAP_IF_EMPTY_ATTR);
private final Map<String, Boolean> textContainers = Maps.newLinkedHashMap();
private HtmlStreamEventProcessor postprocessor =
HtmlStreamEventProcessor.Processors.IDENTITY;
private HtmlStreamEventProcessor preprocessor =
HtmlStreamEventProcessor.Processors.IDENTITY;
private CssSchema stylingPolicySchema = null;
private AttributePolicy styleUrlPolicy =
AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY;
private Set<String> extraRelsForLinks;
private Set<String> skipRelsForLinks;
/**
* Allows the named elements.
*/
public HtmlPolicyBuilder allowElements(String... elementNames) {
return allowElements(ElementPolicy.IDENTITY_ELEMENT_POLICY, elementNames);
}
/**
* Disallows the named elements. Elements are disallowed by default, so
* there is no need to disallow elements, unless you are making an exception
* based on an earlier allow.
*/
public HtmlPolicyBuilder disallowElements(String... elementNames) {
return allowElements(ElementPolicy.REJECT_ALL_ELEMENT_POLICY, elementNames);
}
private static HtmlElementTables METADATA = HtmlElementTables.get();
/**
* Allow the given elements with the given policy.
*
* @param policy May remove or add attributes, change the element name, or
* deny the element.
*/
public HtmlPolicyBuilder allowElements(
ElementPolicy policy, String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalElementName(elementName);
ElementPolicy newPolicy = ElementPolicy.Util.join(
elPolicies.get(elementName), policy);
// Don't remove if newPolicy is the always reject policy since we want
// that to infect later allowElement calls for this particular element
// name. rejects should have higher priority than allows.
elPolicies.put(elementName, newPolicy);
if (!textContainers.containsKey(elementName)) {
if (METADATA.canContainPlainText(METADATA.indexForName(elementName))) {
textContainers.put(elementName, true);
}
}
}
return this;
}
/**
* A canned policy that allows a number of common formatting elements.
*/
public HtmlPolicyBuilder allowCommonInlineFormattingElements() {
return allowElements(
"b", "i", "font", "s", "u", "o", "sup", "sub", "ins", "del", "strong",
"strike", "tt", "code", "big", "small", "br", "span", "em");
}
/**
* A canned policy that allows a number of common block elements.
*/
public HtmlPolicyBuilder allowCommonBlockElements() {
return allowElements(
"p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "li",
"blockquote");
}
/**
* Allows text content in the named elements.
* By default, text content is allowed in any
* {@link #allowElements allowed elements} that can contain character data per
* the HTML5 spec, but text content is not allowed by default in elements that
* contain content of other kinds (like JavaScript in {@code <script>}
* elements.
* <p>
* To write a policy that whitelists {@code <script>} or {@code <style>}
* elements, first {@code allowTextIn("script")}.
*/
public HtmlPolicyBuilder allowTextIn(String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalElementName(elementName);
textContainers.put(elementName, true);
}
return this;
}
/**
* Disallows text in elements with the given name.
* <p>
* This is useful when an element contains text that is not meant to be
* displayed to the end-user.
* Typically these elements are styled {@code display:none} in browsers'
* default stylesheets, or, like {@code <template>} contain text nodes that
* are eventually for human consumption, but which are created in a separate
* document fragment.
*/
public HtmlPolicyBuilder disallowTextIn(String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalElementName(elementName);
textContainers.put(elementName, false);
}
return this;
}
/**
* Assuming the given elements are allowed, allows them to appear without
* attributes.
*
* @see #DEFAULT_SKIP_TAG_MAP_IF_EMPTY_ATTR
* @see #disallowWithoutAttributes
*/
public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalElementName(elementName);
skipIssueTagMap.put(elementName, HtmlTagSkipType.DO_NOT_SKIP);
}
return this;
}
/**
* Disallows the given elements from appearing without attributes.
*
* @see #DEFAULT_SKIP_TAG_MAP_IF_EMPTY_ATTR
* @see #allowWithoutAttributes
*/
public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
elementName = HtmlLexer.canonicalElementName(elementName);
skipIssueTagMap.put(elementName, HtmlTagSkipType.SKIP);
}
return this;
}
/**
* Returns an object that lets you associate policies with the given
* attributes, and allow them globally or on specific elements.
*/
public AttributeBuilder allowAttributes(String... attributeNames) {
ImmutableList.Builder<String> b = ImmutableList.builder();
for (String attributeName : attributeNames) {
b.add(HtmlLexer.canonicalAttributeName(attributeName));
}
return new AttributeBuilder(b.build());
}
/**
* Reverse an earlier attribute {@link #allowAttributes allow}.
* <p>
* For this to have an effect you must call at least one of
* {@link AttributeBuilder#globally} and {@link AttributeBuilder#onElements}.
* <p>
* Attributes are disallowed by default, so there is no need to call this
* with a laundry list of attribute/element pairs.
*/
public AttributeBuilder disallowAttributes(String... attributeNames) {
return this.allowAttributes(attributeNames)
.matching(AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY);
}
private HtmlPolicyBuilder allowAttributesGlobally(
AttributePolicy policy, List<String> attributeNames) {
invalidateCompiledState();
for (String attributeName : attributeNames) {
// We reinterpret the identity policy later via policy joining since its
// the default passed from the policy-less method, but we don't do
// anything here since we don't know until build() is called whether the
// policy author wants to allow certain URL protocols or wants to deal
// with styles.
AttributePolicy oldPolicy = globalAttrPolicies.get(attributeName);
globalAttrPolicies.put(
attributeName, AttributePolicy.Util.join(oldPolicy, policy));
}
return this;
}
private HtmlPolicyBuilder allowAttributesOnElements(
AttributePolicy policy, List<String> attributeNames,
List<String> elementNames) {
invalidateCompiledState();
for (String elementName : elementNames) {
Map<String, AttributePolicy> policies = attrPolicies.get(elementName);
if (policies == null) {
policies = Maps.newLinkedHashMap();
attrPolicies.put(elementName, policies);
}
for (String attributeName : attributeNames) {
AttributePolicy oldPolicy = policies.get(attributeName);
policies.put(
attributeName,
AttributePolicy.Util.join(oldPolicy, policy));
}
}
return this;
}
/**
* Adds
* <a href="https://developer.mozilla.org/en-US/docs/Web/HTML/Link_types"
* >{@code rel=nofollow}</a>
* to links.
*
* @see #DEFAULT_RELS_ON_TARGETTED_LINKS
* @see #skipRelsOnLinks
*/
public HtmlPolicyBuilder requireRelNofollowOnLinks() {
return requireRelsOnLinks("nofollow");
}
/**
* Adds
* <a href="https://developer.mozilla.org/en-US/docs/Web/HTML/Link_types"
* >{@code rel="..."}</a> to {@code <a href="...">} tags beyond those in
* {@link #DEFAULT_RELS_ON_TARGETTED_LINKS}.
* <p>
* @see #skipRelsOnLinks
*/
public HtmlPolicyBuilder requireRelsOnLinks(String... linkValues) {
this.invalidateCompiledState();
if (this.extraRelsForLinks == null) {
this.extraRelsForLinks = Sets.newLinkedHashSet();
}
for (String linkValue : linkValues) {
linkValue = HtmlLexer.canonicalKeywordAttributeValue(linkValue);
Preconditions.checkArgument(
!Strings.containsHtmlSpace(linkValue),
"spaces in input. use f(\"foo\", \"bar\") not f(\"foo bar\")");
this.extraRelsForLinks.add(linkValue);
}
if (this.skipRelsForLinks != null) {
this.skipRelsForLinks.removeAll(this.extraRelsForLinks);
}
return this;
}
/**
* Opts out of some of the {@link #DEFAULT_RELS_ON_TARGETTED_LINKS} from being added
* to links, and reverses previous calls to requireRelsOnLinks with the given link values.
*
* @see #requireRelsOnLinks
*/
public HtmlPolicyBuilder skipRelsOnLinks(String... linkValues) {
this.invalidateCompiledState();
if (this.skipRelsForLinks == null) {
this.skipRelsForLinks = Sets.newLinkedHashSet();
}
for (String linkValue : linkValues) {
linkValue = HtmlLexer.canonicalKeywordAttributeValue(linkValue);
Preconditions.checkArgument(
!Strings.containsHtmlSpace(linkValue),
"spaces in input. use f(\"foo\", \"bar\") not f(\"foo bar\")");
this.skipRelsForLinks.add(linkValue);
}
if (this.extraRelsForLinks != null) {
this.extraRelsForLinks.removeAll(this.skipRelsForLinks);
}
return this;
}
/**
* Adds to the set of protocols that are allowed in URL attributes.
* For each URL attribute that is allowed, we further constrain it by
* only allowing the value through if it specifies no protocol, or if it
* specifies one in the allowedProtocols white-list.
* This is done regardless of whether any protocols have been allowed, so
* allowing the attribute "href" globally with the identity policy but
* not white-listing any protocols, effectively disallows the "href"
* attribute globally.
* <p>
* Do not allow any <code>*script</code> such as <code>javascript</code>
* protocols if you might use this policy with untrusted code.
*/
public HtmlPolicyBuilder allowUrlProtocols(String... protocols) {
invalidateCompiledState();
// If there is at least one allowed protocol, then allow URLs and
// add a filter that checks href and src values.
// Do not allow href and srcs through otherwise, and only allow on images
// and links.
for (String protocol : protocols) {
protocol = Strings.toLowerCase(protocol);
allowedProtocols.add(protocol);
}
return this;
}
/**
* Reverses a decision made by {@link #allowUrlProtocols}.
*/
public HtmlPolicyBuilder disallowUrlProtocols(String... protocols) {
invalidateCompiledState();
for (String protocol : protocols) {
protocol = Strings.toLowerCase(protocol);
allowedProtocols.remove(protocol);
}
return this;
}
/**
* A canned URL protocol policy that allows <code>http</code>,
* <code>https</code>, and <code>mailto</code>.
*/
public HtmlPolicyBuilder allowStandardUrlProtocols() {
return allowUrlProtocols("http", "https", "mailto");
}
/**
* Convert <code>style="<CSS>"</code> to sanitized CSS which allows
* color, font-size, type-face, and other styling using the default schema;
* but which does not allow content to escape its clipping context.
*/
public HtmlPolicyBuilder allowStyling() {
allowStyling(CssSchema.DEFAULT);
return this;
}
/**
* Convert <code>style="<CSS>"</code> to sanitized CSS which allows
* color, font-size, type-face, and other styling using the given schema.
*/
public HtmlPolicyBuilder allowStyling(CssSchema whitelist) {
invalidateCompiledState();
this.stylingPolicySchema =
this.stylingPolicySchema == null
? whitelist
: CssSchema.union(stylingPolicySchema, whitelist);
// Allow the style attribute, and then we will fix it up later. This allows
// us to attach the final URL policy to the style attribute policy, while
// still not allowing styles when allowStyling is followed by a call to
// disallowAttributesGlobally("style").
this.allowAttributesGlobally(
AttributePolicy.IDENTITY_ATTRIBUTE_POLICY, ImmutableList.of("style"));
return this;
}
/**
* Allow URLs in CSS styles.
* For example,
* {@code <span style="background-image: url(http://example.com/image.png)">}.
* <p>
* URLs in CSS are typically loaded without user-interaction, the way links
* are, so a greater degree of scrutiny is warranted.
*
* @param newStyleUrlPolicy receives URLs from the CSS that pass the allowed
* protocol policies, and may return null to veto the URL or the URL
* to use. URLs will be reported as content in {@code <img src=...>}.
*/
public HtmlPolicyBuilder allowUrlsInStyles(
AttributePolicy newStyleUrlPolicy) {
this.invalidateCompiledState();
this.styleUrlPolicy = newStyleUrlPolicy;
return this;
}
/**
* Inserts a pre-processor into the pipeline between the lexer and the policy.
* Pre-processors receive HTML events before the policy, so the policy will
* be applied to anything they add.
* Pre-processors are not in the TCB since they cannot bypass the policy.
*/
public HtmlPolicyBuilder withPreprocessor(HtmlStreamEventProcessor pp) {
this.preprocessor = HtmlStreamEventProcessor.Processors.compose(
this.preprocessor, pp);
return this;
}
/**
* Inserts a post-processor into the pipeline between the policy and the
* output sink.
* Post-processors can insert events into the stream that are not vetted
* by the policy, so they are in the TCB.
* <p>
* Try doing what you want with a pre-processor instead of a post-processor
* but if you're thinking of doing search/replace on a sanitized string, then
* definitely use either a pre or post-processor instead.
*/
public HtmlPolicyBuilder withPostprocessor(HtmlStreamEventProcessor pp) {
this.postprocessor = HtmlStreamEventProcessor.Processors.compose(
this.postprocessor, pp);
return this;
}
/**
* Maps attribute names that need extra handling to producers of those
* extra guards.
*/
private static final Map<String, AttributeGuardMaker> ATTRIBUTE_GUARDS;
static {
// For each URL attribute that is allowed, we further constrain it by
// only allowing the value through if it specifies no protocol, or if it
// specifies one in the allowedProtocols white-list.
// This is done regardless of whether any protocols have been allowed, so
// allowing the attribute "href" globally with the identity policy but
// not white-listing any protocols, effectively disallows the "href"
// attribute globally.
ImmutableMap.Builder<String, AttributeGuardMaker> b =
ImmutableMap.builder();
AttributeGuardMaker identityGuard = new AttributeGuardMaker() {
@Override
AttributePolicy makeGuard(AttributeGuardIntermediates intermediates) {
return intermediates.urlAttributePolicy;
}
};
for (String urlAttributeName : new String[] {
"action", "archive", "background", "cite", "classid", "codebase", "data",
"dsync", "formaction", "href", "icon", "longdesc", "manifest", "poster",
"profile", "src", "usemap",
}) {
b.put(urlAttributeName, identityGuard);
}
b.put("style", new AttributeGuardMaker() {
@Override
AttributePolicy makeGuard(AttributeGuardIntermediates intermediates) {
if (intermediates.cssSchema == null) {
return null;
}
final AttributePolicy styleUrlPolicyFinal = AttributePolicy.Util.join(
intermediates.styleUrlPolicy, intermediates.urlAttributePolicy);
return new StylingPolicy(
intermediates.cssSchema,
new Function<String, String>() {
public String apply(String url) {
return styleUrlPolicyFinal.apply(
"img", "src",
url != null ? url : "about:invalid");
}
});
}
});
b.put("srcset", new AttributeGuardMaker() {
@Override
AttributePolicy makeGuard(AttributeGuardIntermediates intermediates) {
return new SrcsetAttributePolicy(intermediates.urlAttributePolicy);
}
});
ATTRIBUTE_GUARDS = b.build();
}
/**
* Produces a policy based on the allow and disallow calls previously made.
*
* @param out receives calls to open only tags allowed by
* previous calls to this object.
* Typically a {@link HtmlStreamRenderer}.
*/
public HtmlSanitizer.Policy build(HtmlStreamEventReceiver out) {
return toFactory().apply(out);
}
/**
* Produces a policy based on the allow and disallow calls previously made.
*
* @param out receives calls to open only tags allowed by
* previous calls to this object.
* Typically a {@link HtmlStreamRenderer}.
* @param listener is notified of dropped tags and attributes so that
* intrusion detection systems can be alerted to questionable HTML.
* If {@code null} then no notifications are sent.
* @param context if {@code (listener != null)} then the context value passed
* with alerts. This can be used to let the listener know from which
* connection or request the questionable HTML was received.
*/
public <CTX> HtmlSanitizer.Policy build(
HtmlStreamEventReceiver out,
@Nullable HtmlChangeListener<? super CTX> listener,
@Nullable CTX context) {
return toFactory().apply(out, listener, context);
}
/**
* Like {@link #build} but can be reused to create many different policies
* each backed by a different output channel.
*/
public PolicyFactory toFactory() {
ImmutableSet.Builder<String> textContainerSet = ImmutableSet.builder();
for (Map.Entry<String, Boolean> textContainer
: this.textContainers.entrySet()) {
if (Boolean.TRUE.equals(textContainer.getValue())) {
textContainerSet.add(textContainer.getKey());
}
}
CompiledState compiled = compilePolicies();
return new PolicyFactory(
compiled.compiledPolicies, textContainerSet.build(),
ImmutableMap.copyOf(compiled.globalAttrPolicies),
preprocessor, postprocessor);
}
// Speed up subsequent builds by caching the compiled policies.
private transient CompiledState compiledState;
private static final class CompiledState {
final Map<String, AttributePolicy> globalAttrPolicies;
final ImmutableMap<String, ElementAndAttributePolicies> compiledPolicies;
CompiledState(
Map<String, AttributePolicy> globalAttrPolicies,
ImmutableMap<String, ElementAndAttributePolicies> compiledPolicies) {
this.globalAttrPolicies = globalAttrPolicies;
this.compiledPolicies = compiledPolicies;
}
}
/** Called by mutators to signal that any compiled policy is out-of-date. */
private void invalidateCompiledState() {
compiledState = null;
}
private CompiledState compilePolicies() {
if (compiledState != null) { return compiledState; }
// Copy maps before normalizing in case builder is reused.
@SuppressWarnings("hiding")
Map<String, ElementPolicy> elPolicies
= Maps.newLinkedHashMap(this.elPolicies);
@SuppressWarnings("hiding")
Map<String, Map<String, AttributePolicy>> attrPolicies
= Maps.newLinkedHashMap(this.attrPolicies);
for (Map.Entry<String, Map<String, AttributePolicy>> e :
attrPolicies.entrySet()) {
e.setValue(Maps.newLinkedHashMap(e.getValue()));
}
@SuppressWarnings("hiding")
Map<String, AttributePolicy> globalAttrPolicies
= Maps.newLinkedHashMap(this.globalAttrPolicies);
@SuppressWarnings("hiding")
Set<String> allowedProtocols = ImmutableSet.copyOf(this.allowedProtocols);
// Implement requireRelsOnLinks & skip...
{
ElementPolicy linkPolicy = elPolicies.get("a");
if (linkPolicy != null) {
RelsOnLinksPolicy relsOnLinksPolicy = RelsOnLinksPolicy.create(
this.extraRelsForLinks != null
? this.extraRelsForLinks : ImmutableSet.<String>of(),
this.skipRelsForLinks != null
? this.skipRelsForLinks : ImmutableSet.<String>of());
elPolicies.put(
"a",
ElementPolicy.Util.join(linkPolicy, relsOnLinksPolicy));
}
}
// Add guards on top of any custom policies.
{
final AttributePolicy urlAttributePolicy;
if (allowedProtocols.size() == 3
&& allowedProtocols.contains("mailto")
&& allowedProtocols.contains("http")
&& allowedProtocols.contains("https")) {
urlAttributePolicy = StandardUrlAttributePolicy.INSTANCE;
} else {
urlAttributePolicy = new FilterUrlByProtocolAttributePolicy(
allowedProtocols);
}
Set<String> toGuard = Sets.newLinkedHashSet(ATTRIBUTE_GUARDS.keySet());
AttributeGuardIntermediates intermediates = new AttributeGuardIntermediates(
urlAttributePolicy, this.styleUrlPolicy, this.stylingPolicySchema);
for (Map.Entry<String, AttributeGuardMaker> e : ATTRIBUTE_GUARDS.entrySet()) {
String attributeName = e.getKey();
if (globalAttrPolicies.containsKey(attributeName)) {
toGuard.remove(attributeName);
AttributePolicy guard = e.getValue().makeGuard(intermediates);
globalAttrPolicies.put(attributeName, AttributePolicy.Util.join(
guard, globalAttrPolicies.get(attributeName)));
}
}
// Implement guards not implemented on global policies in the per-element
// policy maps.
for (Map.Entry<String, Map<String, AttributePolicy>> e
: attrPolicies.entrySet()) {
Map<String, AttributePolicy> policies = e.getValue();
for (String attributeName : toGuard) {
if (policies.containsKey(attributeName)) {
AttributePolicy guard = ATTRIBUTE_GUARDS.get(attributeName)
.makeGuard(intermediates);
policies.put(attributeName, AttributePolicy.Util.join(
guard, policies.get(attributeName)));
}
}
}
}
ImmutableMap.Builder<String, ElementAndAttributePolicies> policiesBuilder
= ImmutableMap.builder();
for (Map.Entry<String, ElementPolicy> e : elPolicies.entrySet()) {
String elementName = e.getKey();
ElementPolicy elPolicy = e.getValue();
if (ElementPolicy.REJECT_ALL_ELEMENT_POLICY.equals(elPolicy)) {
continue;
}
Map<String, AttributePolicy> elAttrPolicies
= attrPolicies.get(elementName);
if (elAttrPolicies == null) {
elAttrPolicies = ImmutableMap.of();
}
ImmutableMap.Builder<String, AttributePolicy> attrs
= ImmutableMap.builder();
for (Map.Entry<String, AttributePolicy> ape : elAttrPolicies.entrySet()) {
String attributeName = ape.getKey();
// Handle below so we don't end up putting the same key into the map
// twice. ImmutableMap.Builder hates that.
if (globalAttrPolicies.containsKey(attributeName)) { continue; }
AttributePolicy policy = ape.getValue();
if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) {
attrs.put(attributeName, policy);
}
}
for (Map.Entry<String, AttributePolicy> ape
: globalAttrPolicies.entrySet()) {
String attributeName = ape.getKey();
AttributePolicy policy = AttributePolicy.Util.join(
elAttrPolicies.get(attributeName), ape.getValue());
if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) {
attrs.put(attributeName, policy);
}
}
policiesBuilder.put(
elementName,
new ElementAndAttributePolicies(
elementName,
elPolicy, attrs.build(),
getHtmlTagSkipType(elementName)
)
);
}
compiledState = new CompiledState(
globalAttrPolicies, policiesBuilder.build());
return compiledState;
}
private HtmlTagSkipType getHtmlTagSkipType(String elementName) {
HtmlTagSkipType htmlTagSkipType = skipIssueTagMap.get(elementName);
if (htmlTagSkipType == null) {
if (DEFAULT_SKIP_TAG_MAP_IF_EMPTY_ATTR.containsKey(elementName)) {
return HtmlTagSkipType.SKIP_BY_DEFAULT;
} else {
return HtmlTagSkipType.DO_NOT_SKIP_BY_DEFAULT;
}
}
return htmlTagSkipType;
}
/**
* Builds the relationship between attributes, the values that they may have,
* and the elements on which they may appear.
*
* @author Mike Samuel
*/
public final class AttributeBuilder {
private final List<String> attributeNames;
private AttributePolicy policy = AttributePolicy.IDENTITY_ATTRIBUTE_POLICY;
AttributeBuilder(List<? extends String> attributeNames) {
this.attributeNames = ImmutableList.copyOf(attributeNames);
}
/**
* Filters and/or transforms the attribute values
* allowed by later {@code allow*} calls.
* Multiple calls to {@code matching} are combined so that the policies
* receive the value in order, each seeing the value after any
* transformation by a previous policy.
*/
public AttributeBuilder matching(AttributePolicy attrPolicy) {
this.policy = AttributePolicy.Util.join(this.policy, attrPolicy);
return this;
}
/**
* Restrict the values allowed by later {@code allow*} calls to those
* matching the pattern.
* Multiple calls to {@code matching} are combined to restrict to the
* intersection of possible matched values.
*/
public AttributeBuilder matching(final Pattern pattern) {
return matching(new AttributePolicy() {
public @Nullable String apply(
String elementName, String attributeName, String value) {
return pattern.matcher(value).matches() ? value : null;
}
});
}
/**
* Restrict the values allowed by later {@code allow*} calls to those
* matching the given predicate.
* Multiple calls to {@code matching} are combined to restrict to the
* intersection of possible matched values.
*/
public AttributeBuilder matching(
final Predicate<? super String> filter) {
return matching(new AttributePolicy() {
public @Nullable String apply(
String elementName, String attributeName, String value) {
return filter.apply(value) ? value : null;
}
});
}
/**
* Restrict the values allowed by later {@code allow*} calls to those
* supplied.
* Multiple calls to {@code matching} are combined to restrict to the
* intersection of possible matched values.
*/
public AttributeBuilder matching(
boolean ignoreCase, String... allowedValues) {
return matching(ignoreCase, ImmutableSet.copyOf(allowedValues));
}
/**
* Restrict the values allowed by later {@code allow*} calls to those
* supplied.
* Multiple calls to {@code matching} are combined to restrict to the
* intersection of possible matched values.
*/
public AttributeBuilder matching(
final boolean ignoreCase, Set<? extends String> allowedValues) {
final ImmutableSet<String> allowed = ImmutableSet.copyOf(allowedValues);
return matching(new AttributePolicy() {
public @Nullable String apply(
String elementName, String attributeName, String uncanonValue) {
String value = ignoreCase
? Strings.toLowerCase(uncanonValue)
: uncanonValue;
return allowed.contains(value) ? value : null;
}
});
}
/**
* Allows the given attributes on any elements but filters the
* attributes' values based on previous calls to {@code matching(...)}.
* Global attribute policies are applied after element specific policies.
* Be careful of using this with attributes like <code>type</code> which
* have different meanings on different attributes.
* Also be careful of allowing globally attributes like <code>href</code>
* which can have more far-reaching effects on tags like
* <code><base></code> and <code><link></code> than on
* <code><a></code> because in the former, they have an effect without
* user interaction and can change the behavior of the current page.
*/
@SuppressWarnings("synthetic-access")
public HtmlPolicyBuilder globally() {
return HtmlPolicyBuilder.this.allowAttributesGlobally(
policy, attributeNames);
}
/**
* Allows the named attributes on the given elements but filters the
* attributes' values based on previous calls to {@code matching(...)}.
*/
@SuppressWarnings("synthetic-access")
public HtmlPolicyBuilder onElements(String... elementNames) {
ImmutableList.Builder<String> b = ImmutableList.builder();
for (String elementName : elementNames) {
b.add(HtmlLexer.canonicalElementName(elementName));
}
return HtmlPolicyBuilder.this.allowAttributesOnElements(
policy, attributeNames, b.build());
}
}
private static final class RelsOnLinksPolicy
implements ElementPolicy.JoinableElementPolicy {
final ImmutableSet<String> extra;
final ImmutableSet<String> skip;
final ImmutableSet<String> whenTargetPresent;
static final RelsOnLinksPolicy EMPTY = new RelsOnLinksPolicy(
ImmutableSet.<String>of(), ImmutableSet.<String>of());
static RelsOnLinksPolicy create(