diff --git a/.gitignore b/.gitignore index 7541a45..06fc1c2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ vendor composer.lock .php_cs.cache +.phpunit.result.cache .idea phpunit.xml diff --git a/.travis.yml b/.travis.yml index 295732c..e915fce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,23 @@ language: php + php: - '7.1' - '7.2' - '7.3' -before_install: - - 'composer selfupdate' -install: - - 'composer install --prefer-dist --no-interaction $COMPOSER_FLAGS' + +matrix: + include: + - php: '7.1' + env: SYMFONY_VERSION="symfony/lts:^3" + fast_finish: true + +before_script: + - if [ "$SYMFONY_VERSION" != "" ]; then travis_wait composer require --no-update $SYMFONY_VERSION; fi; + - 'composer update --prefer-dist --no-interaction $COMPOSER_FLAGS' + script: + - 'composer validate --no-check-lock --strict' - 'phpunit --configuration phpunit.xml.dist --coverage-text' + notifications: email: 'snebes+travis@gmail.com' diff --git a/composer.json b/composer.json index 9e49e1a..1e20f68 100644 --- a/composer.json +++ b/composer.json @@ -22,12 +22,12 @@ "require": { "php": ">=7.1", "ext-dom": "*", - "masterminds/html5": "^2.4", + "ext-mbstring": "*", + "masterminds/html5": "^2.0", "psr/log": "^1.0", - "symfony/options-resolver": "^4.2" + "symfony/options-resolver": "^3.0|^4.0" }, "require-dev": { - "phpunit/phpunit": "^7.5", - "symfony/var-dumper": "^4.1" + "phpunit/phpunit": "^7.5" } } diff --git a/docs/1-getting-started.md b/docs/1-getting-started.md index ddd29ca..9ace3a4 100644 --- a/docs/1-getting-started.md +++ b/docs/1-getting-started.md @@ -3,9 +3,12 @@ - [Installation](#installation) - [Basic usage](#basic-usage) - [Extensions](#extensions) -- [Filtering links, images and iframes hosts](#filtering-links-images-and-iframes-hosts) -- [Forcing HTTPS on links, images and iframes hosts](#forcing-https-on-images-and-iframes-source-hosts) - [Configuring allowed attributes](#configuring-allowed-attributes) +- [Configuring blocked attributes](#configuring-blocked-attributes) +- [Configuring allowed classes](#configuring-allowed-classes) +- [Configuring blocked classes](#configuring-blocked-classes) +- [Configuring childless tags](#configuring-childless-tags) +- [Converting tags](#converting-tags) ## Installation @@ -14,16 +17,16 @@ html-sanitizer requires PHP 7.1+. You can install the library using the following command: ``` -composer require tgalopin/html-sanitizer +composer require snebes/html-sanitizer ``` ## Basic usage -The main entry point to the sanitizer is the `HtmlSanitizer\Sanitizer` class. It requires +The main entry point to the sanitizer is the `SN\HtmlSanitizer\Sanitizer` class. It requires an array of configuration: ```php -$sanitizer = HtmlSanitizer\Sanitizer::create(['extensions' => ['basic']]); +$sanitizer = SN\HtmlSanitizer\Sanitizer::create(['extensions' => ['html5']]); $safeHtml = $sanitizer->sanitize($untrustedHtml); ``` @@ -36,150 +39,142 @@ enable to allow specific tags in the content (read the next part to learn more a ## Extensions Extensions are a way to quickly add sets of tags to the whitelist of allowed tags. -There are 8 core extensions that you can enable by adding them in your configuration: +There is 1 core extension that you can enable by adding them in your configuration: ```php -$sanitizer = HtmlSanitizer\Sanitizer::create([ - 'extensions' => ['basic', 'code', 'image', 'list', 'table', 'iframe', 'details', 'extra'], +$sanitizer = SN\HtmlSanitizer\Sanitizer::create([ + 'extensions' => ['html5'], ]); $safeHtml = $sanitizer->sanitize($untrustedHtml); ``` Here is the list of tags each extension allow: -- **basic** allows the insertion of basic HTML elements: - `a`, `b`, `br`, `blockquote`, `div`, `del`, `em`, `figcaption`, `figure`, `h1`, `h2`, `h3`, `h4`, `h5`, - `h6`, `i`, `p`, `q`, `small`, `span`, `strong`, `sub`, `sup` -- **list** allows the insertion of lists: - `dd`, `dl`, `dt`, `li`, `ol`, `ul` -- **table** allows the insertion of tables: - `table`, `thead`, `tbody`, `tfoot`, `tr`, `td`, `th` -- **image** allows the insertion of images: `img` -- **code** allows the insertion of code blocks: `pre`, `code` -- **iframe** allows the insertion of iframes: `iframe` -- **details** allows the insertion of view/hide blocks: `details`, `summary` -- **extra** allows the insertion of the following tags: `abbr`, `caption`, `hr`, `rp`, `rt`, `ruby` +- **html5** allows the insertion of basic HTML elements: + `a`, `abbr`, `address`, `applet`, `area`, `article`, `aside`, `audio`, `b`, `base`, `bdi`, `bdo`, `blockquote`, + `body`, `br`, `button`, `canvas`, `caption`, `cite`, `code`, `col`, `colgroup`, `content`, `data`, `datalist`, `dd`, + `del`, `details`, `dfn`, `dialog`, `dir`, `div`, `dl`, `dt`, `element`, `em`, `embed`, `fieldset`, `figcaption`, + `figure`, `footer`, `form`, `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hgroup`, `hr`, `html`, `i`, + `iframe`, `img`, `input`, `ins`, `kbd`, `label`, `legend`, `li`, `link`, `main`, `map`, `mark`, `menu`, `menuitem`, + `meta`, `meter`, `nav`, `noembed`, `noscript`, `object`, `ol`, `optgroup`, `option`, `output`, `p`, `param`, + `picture`, `pre`, `progress`, `q`, `rb`, `rp`, `rt`, `rtc`, `ruby`, `s`, `samp`, `script`, `section`, `select`, + `shadow`, `slot`, `small`, `source`, `span`, `strong`, `style`, `sub`, `summary`, `sup`, `table`, `tbody`, `td`, + `template`, `textarea`, `tfoot`, `th`, `thead`, `time`, `title`, `tr`, `track`, `tt`, `u`, `ul`, `var`, `video`, `wbr` > Note: sensible attributes are allowed by default for each tag (for instance, the `src` attribute is > allowed by default on images). You can also > [override these allowed attributes manually](#configuring-allowed-attributes) if you need to. -## Filtering links, images and iframes hosts - -> Note: the Sanitizer does not allow relative URLs: they are always filtered out for security reasons. +## Configuring allowed attributes -The sanitizer basic, image and iframe extensions provide a feature to filter hosts, which can be useful -to avoid connecting to external websites that may, for instance, track your website views. +The core extensions define sensible default allowed attributes for each tag, which mean you usually won't need +to change them. However, if you want to customize which attributes are allowed on specific tags, you can use +a tag-specific configuration for them. -To enable this feature, you need to configure the tags: +For instance, to allow only the configured attributes on the `div` and `img` tags, you can use the following +configuration: ```php $sanitizer = HtmlSanitizer\Sanitizer::create([ - 'extensions' => ['basic', 'image', 'iframe'], + 'extensions' => ['html5'], 'tags' => [ - 'a' => [ - /* - * If an array is provided, links targeting other hosts than one in this array - * will be disabled (the `href` attribute will be blank). This can be useful if you want - * to prevent links targeting external websites. Keep null to allow all hosts. - * Any allowed domain also includes its subdomains. - * - * Example: - * 'allowed_hosts' => ['trusted1.com', 'google.com'], - */ - 'allowed_hosts' => null, - - /* - * If true, mailto links will be accepted. - */ - 'allow_mailto' => false, + 'div' => [ + 'allowed_attributes' => ['class'], ], - 'img' => [ - /* - * If an array is provided, images relying on other hosts than one in this array - * will be disabled (the `src` attribute will be blank). This can be useful if you want - * to prevent images contacting external websites. Keep null to allow all hosts. - * Any allowed domain also includes its subdomains. - * - * Example: - * 'allowed_hosts' => ['trusted1.com', 'google.com'], - */ - 'allowed_hosts' => null, - - /* - * If true, images data-uri URLs will be accepted. - */ - 'allow_data_uri' => false, - ], - - 'iframe' => [ - /* - * If an array is provided, iframes relying on other hosts than one in this array - * will be disabled (the `src` attribute will be blank). This can be useful if you want - * to prevent iframes contacting external websites. - * Any allowed domain also includes its subdomains. - * - * Example: - * 'allowed_hosts' => ['trusted1.com', 'google.com'], - */ - 'allowed_hosts' => null, + 'allowed_attributes' => ['src', 'alt', 'title', 'class'], ], ], ]); ``` -## Forcing HTTPs on links, images and iframes hosts +## Configuring blocked attributes -The sanitizer basic, image and iframe extensions provide a feature to force HTTPs on targeted hosts. +The core extensions by default allow any attribute to be used on a tag. However, if you want to customize which +attributes are blocked on specific tags, you can use a tag-specific configuration for them. -To enable this feature, you need to configure the tags: +For instance, to only disallow the `class` attribute on the `div` tags, you can use the following configuration: ```php $sanitizer = HtmlSanitizer\Sanitizer::create([ - 'extensions' => ['basic', 'image', 'iframe'], + 'extensions' => ['html5'], 'tags' => [ - 'a' => [ - /* - * If true, all links targets using the HTTP protocol will be rewritten to use HTTPS instead. - */ - 'force_https' => false, - ], - - 'img' => [ - /* - * If true, all images URLs using the HTTP protocol will be rewritten to use HTTPS instead. - */ - 'force_https' => false, - ], - - 'iframe' => [ - /* - * If true, all iframes URLs using the HTTP protocol will be rewritten to use HTTPS instead. - */ - 'force_https' => false, + 'div' => [ + 'blocked_attributes' => ['class'], ], ], ]); ``` -## Configuring allowed attributes +## Configuring allowed classes -The core extensions define sensible default allowed attributes for each tag, which mean you usually won't need -to change them. However, if you want to customize which attributes are allowed on specific tags, you can use +The core extensions by default allow any class to be used on a tag, which mean you usually won't need +to change them. However, if you want to customize which classes are allowed on specific tags, you can use a tag-specific configuration for them. -For instance, to allow the `class` attribute on the `div` and `img` tags, you can use the following configuration: +For instance, to only allow the `d-flex` class on the `div` tags, you can use the following configuration: ```php $sanitizer = HtmlSanitizer\Sanitizer::create([ - 'extensions' => ['basic'], + 'extensions' => ['html5'], 'tags' => [ 'div' => [ - 'allowed_attributes' => ['class'], + 'allowed_classes' => ['d-flex'], ], - 'img' => [ - 'allowed_attributes' => ['src', 'alt', 'title', 'class'], + ], +]); +``` + +## Configuring blocked classes + +The core extensions by default allow any class to be used on a tag. However, if you want to customize which +classes are blocked on specific tags, you can use a tag-specific configuration for them. + +For instance, to only disallow the `float` class on the `div` tags, you can use the following configuration: + +```php +$sanitizer = HtmlSanitizer\Sanitizer::create([ + 'extensions' => ['html5'], + 'tags' => [ + 'div' => [ + 'blocked_classes' => ['float'], + ], + ], +]); +``` + +## Configuring childless tags + +The core extensions by default sets the `area`, `base`, `br`, `col`, `img`, `input`, `hr`, `link`, `meta`, +`param`, `track`, `wbr` tags as childless tags, which mean you usually won't need to change them. +However, if you want to customize which tags are childless, you can use a tag-specific configuration for them. + +For instance, you can use the following configuration: + +```php +$sanitizer = HtmlSanitizer\Sanitizer::create([ + 'extensions' => ['html5'], + 'tags' => [ + 'hr' => [ + 'childless' => true, + ], + ], +]); +``` + +## Converting tags + +There may be an instance where you want to prevent the use of a deprecated tag that may have been +superseded by another tag. + +For instance, to convert `b` tags to `strong` tags, you can use the following configuration: + +```php +$sanitizer = HtmlSanitizer\Sanitizer::create([ + 'extensions' => ['html5'], + 'tags' => [ + 'strong' => [ + 'convert_elements' => ['b'], ], ], ]); diff --git a/docs/2-creating-an-extension-to-allow-custom-tags.md b/docs/2-creating-an-extension-to-allow-custom-tags.md index 95673f2..98f2d7a 100644 --- a/docs/2-creating-an-extension-to-allow-custom-tags.md +++ b/docs/2-creating-an-extension-to-allow-custom-tags.md @@ -7,89 +7,14 @@ There are two steps in the creation of an extension: creating the node visitor w custom tag and registering this visitor by creating an extension class. To better understand how to create an extension suited to your needs, you can also have a look at the -[Image extension](https://github.com/tgalopin/html-sanitizer/tree/master/src/Extension/Image) +[HTML5 extension](https://github.com/snebes/html-sanitizer/tree/master/src/Extension/HTML5Extension.php) which shows the different features available. -## Creating a node and a node visitor +## Registering the extension -A node visitor is a class able to handle DOMNode instances of a certain type. It needs to implement the -`HtmlSanitizer\Visitor\VisitorInterface`. +Once you created an extension, you need to register the extension in the sanitizer. -A node visitor is responsible of adding a node to the tree of safe HTML by filtering the DOMNode -it's given. Thus, for an example `my-tag` custom tag, we need to create two classes: a Node and -a NodeVisitor. - -The node could look like this: - -```php -namespace App\Sanitizer; - -use HtmlSanitizer\Node\AbstractTagNode; -use HtmlSanitizer\Node\HasChildrenTrait; - -class MyTagNode extends AbstractTagNode -{ - use HasChildrenTrait; // Or IsChildlessTrait - - public function getTagName(): string - { - return 'my-tag'; - } -} -``` - -A simple visitor for a `my-tag` custom tag could look like this: - -```php -namespace App\Sanitizer; - -use HtmlSanitizer\Model\Cursor; -use HtmlSanitizer\Node\NodeInterface; -use HtmlSanitizer\Visitor\AbstractNodeVisitor; -use HtmlSanitizer\Visitor\HasChildrenNodeVisitorTrait; -use HtmlSanitizer\Visitor\NamedNodeVisitorInterface; - -class MyTagNodeVisitor extends AbstractNodeVisitor implements NamedNodeVisitorInterface -{ - use HasChildrenNodeVisitorTrait; // Or IsChildlessTagVisitorTrait - - protected function getDomNodeName(): string - { - return 'my-tag'; - } - - public function getDefaultAllowedAttributes(): array - { - return [ - 'class', 'width', 'height' - ]; - } - - public function getDefaultConfiguration(): array - { - return [ - 'custom_config' => null, - ]; - } - - protected function createNode(\DOMNode $domNode, Cursor $cursor): NodeInterface - { - // You need to pass the current node as your node parent - $node = new MyTagNode($cursor->node); - - // You can use $this->config['custom_config'] to access the user-defined configuration - - return $node; - } -} -``` - -## Registering the node visitor with an extension - -Once you created a node and a node visitor, you need to use an extension to register the visitor in the -sanitizer. - -An extension is a class implementing the `HtmlSanitizer\Extension\ExtensionInterface` interface, which requires +An extension is a class implementing the `SN\HtmlSanitizer\Extension\ExtensionInterface` interface, which requires two methods: - `getName()` which should return the name to use in the configuration (`basic`, `list`, etc.) ; @@ -100,7 +25,8 @@ For our node visitor, this could look like this: ```php namespace App\Sanitizer; -use HtmlSanitizer\Extension\ExtensionInterface; +use SN\HtmlSanitizer\Extension\ExtensionInterface; +use SN\HtmlSanitizer\NodeVisitor\TagNodeVisitor; class MyTagExtension implements ExtensionInterface { @@ -112,10 +38,7 @@ class MyTagExtension implements ExtensionInterface public function createNodeVisitors(array $config = []): array { return [ - 'my-tag' => new MyTagNodeVisitor($config['tags']['my-tag'] ?? []), - - // You can also override previous extensions tags here, for instance: - // 'img' => new MyCustomImgVisitor(), + 'my-tag' => new TagNodeVisitor($config['tags']['my-tag'] ?? []), ]; } } @@ -124,14 +47,13 @@ class MyTagExtension implements ExtensionInterface Then, you can use the builder to create a Sanitizer that include this extension: ```php -$builder = new HtmlSanitizer\SanitizerBuilder(); -$builder->registerExtension(new HtmlSanitizer\Extension\BasicExtension()); -$builder->registerExtension(new HtmlSanitizer\Extension\ListExtension()); +$builder = new SN\HtmlSanitizer\SanitizerBuilder(); +$builder->registerExtension(new SN\HtmlSanitizer\Extension\HTML5Extension()); // Add the other core ones you need $builder->registerExtension(new App\Sanitizer\MyTagExtension()); $sanitizer = $builder->build([ - 'extensions' => ['basic', 'list', 'my-tag'], + 'extensions' => ['html5', 'my-tag'], }); ``` diff --git a/docs/3-configuration-reference.md b/docs/3-configuration-reference.md index 77f2b2e..b3026a3 100644 --- a/docs/3-configuration-reference.md +++ b/docs/3-configuration-reference.md @@ -3,7 +3,7 @@ Here is the configuration default values with annotations describing the specific configuration keys: ```php -$sanitizer = HtmlSanitizer\Sanitizer::create([ +$sanitizer = SN\HtmlSanitizer\Sanitizer::create([ /* * Maximum length in number of characters this sanitizer will accept as inputs. diff --git a/docs/A-security-disclosure-procedure.md b/docs/A-security-disclosure-procedure.md index 4a74f82..9912323 100644 --- a/docs/A-security-disclosure-procedure.md +++ b/docs/A-security-disclosure-procedure.md @@ -1,7 +1,7 @@ # Security issue disclosure procedure If you think that you have found a security issue in html-sanitizer, don't use the bug tracker and -don't publish it publicly. Instead, all security issues must be sent to galopintitouan [at] gmail.com. +don't publish it publicly. Instead, all security issues must be sent to snebes+html.purifier [at] gmail.com. For each report, the core maintainers of html-sanitizer will first try to confirm the vulnerability. When it is confirmed, we will work on a solution following these steps: