diff --git a/pyproject.toml b/pyproject.toml
index 9e871da..8f3f5d4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ authors = [
]
readme = "README.rst"
license = {text = "Apache License, Version 2.0"}
-dependencies = ["bleach>=2.1.0", "docutils>=0.13.1", "Pygments>=2.5.1"]
+dependencies = ["nh3>=0.2.14", "docutils>=0.13.1", "Pygments>=2.5.1"]
classifiers = [
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
diff --git a/readme_renderer/clean.py b/readme_renderer/clean.py
index 29324d3..b081e28 100644
--- a/readme_renderer/clean.py
+++ b/readme_renderer/clean.py
@@ -12,16 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import functools
-from typing import Any, Dict, Iterator, List, Optional
+from typing import Dict, Optional, Set
-import bleach
-import bleach.callbacks
-import bleach.linkifier
-import bleach.sanitizer
+import nh3
-ALLOWED_TAGS = [
+ALLOWED_TAGS = {
# Bleach Defaults
"a", "abbr", "acronym", "b", "blockquote", "code", "em", "i", "li", "ol",
"strong", "ul",
@@ -32,102 +28,62 @@
"span", "sub", "summary", "sup", "table", "tbody", "td", "th", "thead",
"tr", "tt", "kbd", "var", "input", "section", "aside", "nav", "s", "figure",
"figcaption",
-]
+}
ALLOWED_ATTRIBUTES = {
# Bleach Defaults
- "a": ["href", "title"],
- "abbr": ["title"],
- "acronym": ["title"],
+ "a": {"href", "title"},
+ "abbr": {"title"},
+ "acronym": {"title"},
# Custom Additions
- "*": ["id"],
- "hr": ["class"],
- "img": ["src", "width", "height", "alt", "align", "class"],
- "span": ["class"],
- "th": ["align", "class"],
- "td": ["align", "colspan", "rowspan"],
- "div": ["align", "class"],
- "h1": ["align"],
- "h2": ["align"],
- "h3": ["align"],
- "h4": ["align"],
- "h5": ["align"],
- "h6": ["align"],
- "code": ["class"],
- "p": ["align", "class"],
- "pre": ["lang"],
- "ol": ["start"],
- "input": ["type", "checked", "disabled"],
- "aside": ["class"],
- "dd": ["class"],
- "dl": ["class"],
- "dt": ["class"],
- "ul": ["class"],
- "nav": ["class"],
- "figure": ["class"],
+ "*": {"id"},
+ "hr": {"class"},
+ "img": {"src", "width", "height", "alt", "align", "class"},
+ "span": {"class"},
+ "th": {"align", "class"},
+ "td": {"align", "colspan", "rowspan"},
+ "div": {"align", "class"},
+ "h1": {"align"},
+ "h2": {"align"},
+ "h3": {"align"},
+ "h4": {"align"},
+ "h5": {"align"},
+ "h6": {"align"},
+ "code": {"class"},
+ "p": {"align", "class"},
+ "pre": {"lang"},
+ "ol": {"start"},
+ "input": {"type", "checked", "disabled"},
+ "aside": {"class"},
+ "dd": {"class"},
+ "dl": {"class"},
+ "dt": {"class"},
+ "ul": {"class"},
+ "nav": {"class"},
+ "figure": {"class"},
}
-class DisabledCheckboxInputsFilter:
- # The typeshed for bleach (html5lib) filters is incomplete, use `typing.Any`
- # See https://github.com/python/typeshed/blob/505ea726415016e53638c8b584b8fdc9c722cac1/stubs/bleach/bleach/html5lib_shim.pyi#L7-L8 # noqa E501
- def __init__(self, source: Any) -> None:
- self.source = source
-
- def __iter__(self) -> Iterator[Dict[str, Optional[str]]]:
- for token in self.source:
- if token.get("name") == "input":
- # only allow disabled checkbox inputs
- is_checkbox, is_disabled, unsafe_attrs = False, False, False
- for (_, attrname), value in token.get("data", {}).items():
- if attrname == "type" and value == "checkbox":
- is_checkbox = True
- elif attrname == "disabled":
- is_disabled = True
- elif attrname != "checked":
- unsafe_attrs = True
- break
- if is_checkbox and is_disabled and not unsafe_attrs:
- yield token
- else:
- yield token
-
- def __getattr__(self, name: str) -> Any:
- return getattr(self.source, name)
-
-
def clean(
html: str,
- tags: Optional[List[str]] = None,
- attributes: Optional[Dict[str, List[str]]] = None
+ tags: Optional[Set[str]] = None,
+ attributes: Optional[Dict[str, Set[str]]] = None
) -> Optional[str]:
if tags is None:
tags = ALLOWED_TAGS
if attributes is None:
attributes = ALLOWED_ATTRIBUTES
- # Clean the output using Bleach
- cleaner = bleach.sanitizer.Cleaner(
- tags=tags,
- attributes=attributes,
- filters=[
- # Bleach Linkify makes it easy to modify links, however, we will
- # not be using it to create additional links.
- functools.partial(
- bleach.linkifier.LinkifyFilter,
- callbacks=[
- lambda attrs, new: attrs if not new else None,
- bleach.callbacks.nofollow,
- ],
- skip_tags=["pre"],
- parse_email=False,
- ),
- DisabledCheckboxInputsFilter,
- ],
- )
try:
- cleaned = cleaner.clean(html)
+ cleaned = nh3.clean(
+ html,
+ tags=ALLOWED_TAGS,
+ attributes=ALLOWED_ATTRIBUTES,
+ link_rel="nofollow",
+ url_schemes={"http", "https", "mailto"},
+ )
+
return cleaned
except ValueError:
return None
diff --git a/readme_renderer/txt.py b/readme_renderer/txt.py
index 5af4805..adcf53e 100644
--- a/readme_renderer/txt.py
+++ b/readme_renderer/txt.py
@@ -21,4 +21,4 @@
def render(raw: str, **kwargs: Any) -> Optional[str]:
rendered = html_escape(raw).replace("\n", "
")
- return clean(rendered, tags=["br"])
+ return clean(rendered, tags={"br"})
diff --git a/tests/fixtures/test_CommonMark_006.html b/tests/fixtures/test_CommonMark_006.html
index d56d3e2..b3cd426 100644
--- a/tests/fixtures/test_CommonMark_006.html
+++ b/tests/fixtures/test_CommonMark_006.html
@@ -1 +1 @@
-<iframe src="http://mymalicioussite.com/">Click here</iframe>
+Click here
diff --git a/tests/fixtures/test_CommonMark_007.html b/tests/fixtures/test_CommonMark_007.html
index b1c178b..d9b05e6 100644
--- a/tests/fixtures/test_CommonMark_007.html
+++ b/tests/fixtures/test_CommonMark_007.html
@@ -1,4 +1,2 @@
Something naughty this way comes
-<script> - alert("Hello"); -</script> + diff --git a/tests/fixtures/test_CommonMark_008.html b/tests/fixtures/test_CommonMark_008.html index 4d52a37..eef144d 100644 --- a/tests/fixtures/test_CommonMark_008.html +++ b/tests/fixtures/test_CommonMark_008.html @@ -4,13 +4,13 @@ self.name = name def make_sound(self): - print('Ruff!') + print('Ruff!') -dog = Dog('Fido') +dog = Dog('Fido')and then here is some bash:
-if [ "$1" = "--help" ]; then
- echo "OK"
+if [ "$1" = "--help" ]; then
+ echo "OK"
fi
or click SurveyMonkey
diff --git a/tests/fixtures/test_GFM_019.html b/tests/fixtures/test_GFM_019.html
index 5dd4ff1..8ceabeb 100644
--- a/tests/fixtures/test_GFM_019.html
+++ b/tests/fixtures/test_GFM_019.html
@@ -1,3 +1,3 @@
(Visit https://encrypted.google.com/search?q=Markup+(business))
-Anonymous FTP is available at ftp://foo.bar.baz.
+Anonymous FTP is available at ftp://foo.bar.baz.
diff --git a/tests/fixtures/test_GFM_020.html b/tests/fixtures/test_GFM_020.html
index 3df2b31..5bd8002 100644
--- a/tests/fixtures/test_GFM_020.html
+++ b/tests/fixtures/test_GFM_020.html
@@ -1 +1 @@
-
+
diff --git a/tests/fixtures/test_GFM_021.html b/tests/fixtures/test_GFM_021.html
index dbf5bf4..60b099b 100644
--- a/tests/fixtures/test_GFM_021.html
+++ b/tests/fixtures/test_GFM_021.html
@@ -1 +1 @@
-hello@mail+xyz.example isn't valid, but hello+xyz@mail.example is.
+hello@mail+xyz.example isn't valid, but hello+xyz@mail.example is.
diff --git a/tests/fixtures/test_GFM_022.html b/tests/fixtures/test_GFM_022.html
index d92d5e0..5ef0506 100644
--- a/tests/fixtures/test_GFM_022.html
+++ b/tests/fixtures/test_GFM_022.html
@@ -1,4 +1,4 @@
-
-
+
+
a.b-c_d@a.b-
a.b-c_d@a.b_
diff --git a/tests/fixtures/test_GFM_024.html b/tests/fixtures/test_GFM_024.html
index df9cabe..b5f9d02 100644
--- a/tests/fixtures/test_GFM_024.html
+++ b/tests/fixtures/test_GFM_024.html
@@ -1,17 +1,17 @@
This is code text.
def this_is_python():
- """This is a docstring."""
+ """This is a docstring."""
pass
func ThisIsGo(){
diff --git a/tests/fixtures/test_GFM_malicious_pre.html b/tests/fixtures/test_GFM_malicious_pre.html
index 245e17e..dc7b6e6 100644
--- a/tests/fixtures/test_GFM_malicious_pre.html
+++ b/tests/fixtures/test_GFM_malicious_pre.html
@@ -1,6 +1,6 @@
This is normal text.
def this_is_python():
- """This is a docstring."""
+ """This is a docstring."""
pass
-<script type="text/javascript">alert('I am evil.');</script>
+<script type="text/javascript">alert('I am evil.');</script>
diff --git a/tests/fixtures/test_rst_008.html b/tests/fixtures/test_rst_008.html
index 6a01a9d..6762f3a 100644
--- a/tests/fixtures/test_rst_008.html
+++ b/tests/fixtures/test_rst_008.html
@@ -8,8 +8,8 @@
dog = Dog('Fido')
and then here is some bash:
-if [ "$1" = "--help" ]; then
- echo "OK"
+if [ "$1" = "--help" ]; then
+ echo "OK"
fi
or click SurveyMonkey
An unknown code fence block
diff --git a/tests/fixtures/test_rst_bibtex.html b/tests/fixtures/test_rst_bibtex.html
index 4305801..d9048e7 100644
--- a/tests/fixtures/test_rst_bibtex.html
+++ b/tests/fixtures/test_rst_bibtex.html
@@ -1,2 +1,2 @@
-@article{the_impact_of_pygments_docutils_config_and_html5,
+@article{the_impact_of_pygments_docutils_config_and_html5,
year = {2022},
diff --git a/tests/fixtures/test_rst_docinfo.html b/tests/fixtures/test_rst_docinfo.html
index 4d514b4..a9d24d3 100644
--- a/tests/fixtures/test_rst_docinfo.html
+++ b/tests/fixtures/test_rst_docinfo.html
@@ -6,7 +6,7 @@
mer 02 ago 2017 14:49:24 CEST
Author:
-Lele Gaifax <lele@metapensiero.it>
+Lele Gaifax <lele@metapensiero.it>
License:
GNU General Public License version 3 or later
diff --git a/tests/fixtures/test_rst_linkify.html b/tests/fixtures/test_rst_linkify.html
index a151f25..5956925 100644
--- a/tests/fixtures/test_rst_linkify.html
+++ b/tests/fixtures/test_rst_linkify.html
@@ -42,7 +42,7 @@ Development
IRC:
-irc://irc.freenode.net/tulsawebdevs
+
diff --git a/tests/fixtures/test_txt_001.html b/tests/fixtures/test_txt_001.html
index 9e2d139..f676e7a 100644
--- a/tests/fixtures/test_txt_001.html
+++ b/tests/fixtures/test_txt_001.html
@@ -1 +1 @@
-# Algen
Algen generates opionated ORM classes for sqlalchemy given a simple schema
either as a commandline string or as a yaml file.
It is designed to have minimal dependencies and is trivially extensible.
A command line tool is bundled along to help generate the models.
For DB specific types, only postgres is currently supported.
The tool currently assumes that sqlalchemy's declarative base object
is to be imported like ```from .alchemy_base import Base```
### CLI
```bash
$ algen --help
Usage: algen [OPTIONS]
Options:
-n, --name TEXT Name of model
-c, --columns TEXT Column definition. e.g. col_name:col_type Can be
used multiple times hence named columns. e.g. -c
foo:Int -c bar:Unicode(20)
-d, --destination PATH Destination directory. Default will assume 'Models'
directory inside the current working directory
-y, --yaml PATH Yaml file describing the Model. This supersedes the
column definition provided through --columns option.
--help Show this message and exit.
```
Given a file as follows:
```yaml
Person:
columns:
- name: id
type: BigInteger
primary_key: True
auto_increment: True
- name: name
type: Unicode(255)
- name: is_vip
type: Boolean
- name: created_at
type: DateTime(timezone=True)
Address:
columns:
- name: id
type: BigInteger
primary_key: True
auto_increment: True
- name: line1
type: Unicode()
- name: line2
type: Unicode()
- name: line3
type: Unicode()
- name: postcode
type: Unicode(10)
index: True
```
The cli tool will create two the following two files ```Person.py``` and ```Address.py```.
```python
from __future__ import unicode_literals, absolute_import, print_function
from collections import namedtuple
from sqlalchemy import Column, DateTime, Boolean, Unicode, BigInteger
from .alchemy_base import Base
__author__ = 'danishabdullah'
class Person(Base):
__tablename__ = 'persons'
id = Column(BigInteger, primary_key=True, auto_increment=True)
name = Column(Unicode(255), )
is_vip = Column(Boolean, )
created_at = Column(DateTime(timezone=True), )
def __init__(self, id=None, name=None, is_vip=None, created_at=None):
self.id = id
self.name = name
self.is_vip = is_vip
self.created_at = created_at
def add(self, session):
session.add(self)
def update(self, name=None, is_vip=None, created_at=None):
# This function only updates a value if it is not None.
# Falsy values go through in the normal way.
# To set things to None use the usual syntax:
# Person.column_name = None
if name is not None:
self.name = name
if is_vip is not None:
self.is_vip = is_vip
if created_at is not None:
self.created_at = created_at
def delete(self, session):
session.delete(self)
def to_dict(self):
return {x: y for x, y in self.__dict__.items() if not x.startswith("_sa")}
def get_proxy_cls(self):
# PersonProxy is useful when you want to persist data
# independent of the sqlalchemy session. It's just a namedtuple
# that has very low memory/cpu footprint compared the regular
# orm class instances.
keys = self.to_dict().keys()
name = "PersonProxy"
return namedtuple(name, keys)
def to_proxy(self):
# Proxy-ing is useful when you want to persist data
# independent of the sqlalchemy session. It's just a namedtuple
# that has very low memory/cpu footprint compared the regular
# orm class instances.
cls = self._get_proxy_cls()
return cls(**self.to_dict())
@classmethod
def from_proxy(cls, proxy):
return cls(**proxy._asdict())
def __hash__(self):
return hash(str(self.id))
def __eq__(self, other):
return (self.id == other.id)
def __neq__(self, other):
return not (self.id == other.id)
def __str__(self):
return "<Person: {id}>".format(id=self.id)
def __unicode__(self):
return "<Person: {id}>".format(id=self.id)
def __repr__(self):
return "<Person: {id}>".format(id=self.id)
```
```python
from __future__ import unicode_literals, absolute_import, print_function
from collections import namedtuple
from sqlalchemy import Column, Unicode, BigInteger
from .alchemy_base import Base
__author__ = 'danishabdullah'
class Address(Base):
__tablename__ = 'addresses'
id = Column(BigInteger, primary_key=True, auto_increment=True)
line1 = Column(Unicode(), )
line2 = Column(Unicode(), )
line3 = Column(Unicode(), )
postcode = Column(Unicode(10), index=True)
def __init__(self, id=None, line1=None, line2=None, line3=None, postcode=None):
self.id = id
self.line1 = line1
self.line2 = line2
self.line3 = line3
self.postcode = postcode
def add(self, session):
session.add(self)
def update(self, line1=None, line2=None, line3=None, postcode=None):
# This function only updates a value if it is not None.
# Falsy values go through in the normal way.
# To set things to None use the usual syntax:
# Address.column_name = None
if line1 is not None:
self.line1 = line1
if line2 is not None:
self.line2 = line2
if line3 is not None:
self.line3 = line3
if postcode is not None:
self.postcode = postcode
def delete(self, session):
session.delete(self)
def to_dict(self):
return {x: y for x, y in self.__dict__.items() if not x.startswith("_sa")}
def get_proxy_cls(self):
# AddressProxy is useful when you want to persist data
# independent of the sqlalchemy session. It's just a namedtuple
# that has very low memory/cpu footprint compared the regular
# orm class instances.
keys = self.to_dict().keys()
name = "AddressProxy"
return namedtuple(name, keys)
def to_proxy(self):
# Proxy-ing is useful when you want to persist data
# independent of the sqlalchemy session. It's just a namedtuple
# that has very low memory/cpu footprint compared the regular
# orm class instances.
cls = self._get_proxy_cls()
return cls(**self.to_dict())
@classmethod
def from_proxy(cls, proxy):
return cls(**proxy._asdict())
def __hash__(self):
return hash(str(self.id))
def __eq__(self, other):
return (self.id == other.id)
def __neq__(self, other):
return not (self.id == other.id)
def __str__(self):
return "<Address: {id}>".format(id=self.id)
def __unicode__(self):
return "<Address: {id}>".format(id=self.id)
def __repr__(self):
return "<Address: {id}>".format(id=self.id)
```
\ No newline at end of file
+# Algen
Algen generates opionated ORM classes for sqlalchemy given a simple schema
either as a commandline string or as a yaml file.
It is designed to have minimal dependencies and is trivially extensible.
A command line tool is bundled along to help generate the models.
For DB specific types, only postgres is currently supported.
The tool currently assumes that sqlalchemy's declarative base object
is to be imported like ```from .alchemy_base import Base```
### CLI
```bash
$ algen --help
Usage: algen [OPTIONS]
Options:
-n, --name TEXT Name of model
-c, --columns TEXT Column definition. e.g. col_name:col_type Can be
used multiple times hence named columns. e.g. -c
foo:Int -c bar:Unicode(20)
-d, --destination PATH Destination directory. Default will assume 'Models'
directory inside the current working directory
-y, --yaml PATH Yaml file describing the Model. This supersedes the
column definition provided through --columns option.
--help Show this message and exit.
```
Given a file as follows:
```yaml
Person:
columns:
- name: id
type: BigInteger
primary_key: True
auto_increment: True
- name: name
type: Unicode(255)
- name: is_vip
type: Boolean
- name: created_at
type: DateTime(timezone=True)
Address:
columns:
- name: id
type: BigInteger
primary_key: True
auto_increment: True
- name: line1
type: Unicode()
- name: line2
type: Unicode()
- name: line3
type: Unicode()
- name: postcode
type: Unicode(10)
index: True
```
The cli tool will create two the following two files ```Person.py``` and ```Address.py```.
```python
from __future__ import unicode_literals, absolute_import, print_function
from collections import namedtuple
from sqlalchemy import Column, DateTime, Boolean, Unicode, BigInteger
from .alchemy_base import Base
__author__ = 'danishabdullah'
class Person(Base):
__tablename__ = 'persons'
id = Column(BigInteger, primary_key=True, auto_increment=True)
name = Column(Unicode(255), )
is_vip = Column(Boolean, )
created_at = Column(DateTime(timezone=True), )
def __init__(self, id=None, name=None, is_vip=None, created_at=None):
self.id = id
self.name = name
self.is_vip = is_vip
self.created_at = created_at
def add(self, session):
session.add(self)
def update(self, name=None, is_vip=None, created_at=None):
# This function only updates a value if it is not None.
# Falsy values go through in the normal way.
# To set things to None use the usual syntax:
# Person.column_name = None
if name is not None:
self.name = name
if is_vip is not None:
self.is_vip = is_vip
if created_at is not None:
self.created_at = created_at
def delete(self, session):
session.delete(self)
def to_dict(self):
return {x: y for x, y in self.__dict__.items() if not x.startswith("_sa")}
def get_proxy_cls(self):
# PersonProxy is useful when you want to persist data
# independent of the sqlalchemy session. It's just a namedtuple
# that has very low memory/cpu footprint compared the regular
# orm class instances.
keys = self.to_dict().keys()
name = "PersonProxy"
return namedtuple(name, keys)
def to_proxy(self):
# Proxy-ing is useful when you want to persist data
# independent of the sqlalchemy session. It's just a namedtuple
# that has very low memory/cpu footprint compared the regular
# orm class instances.
cls = self._get_proxy_cls()
return cls(**self.to_dict())
@classmethod
def from_proxy(cls, proxy):
return cls(**proxy._asdict())
def __hash__(self):
return hash(str(self.id))
def __eq__(self, other):
return (self.id == other.id)
def __neq__(self, other):
return not (self.id == other.id)
def __str__(self):
return "<Person: {id}>".format(id=self.id)
def __unicode__(self):
return "<Person: {id}>".format(id=self.id)
def __repr__(self):
return "<Person: {id}>".format(id=self.id)
```
```python
from __future__ import unicode_literals, absolute_import, print_function
from collections import namedtuple
from sqlalchemy import Column, Unicode, BigInteger
from .alchemy_base import Base
__author__ = 'danishabdullah'
class Address(Base):
__tablename__ = 'addresses'
id = Column(BigInteger, primary_key=True, auto_increment=True)
line1 = Column(Unicode(), )
line2 = Column(Unicode(), )
line3 = Column(Unicode(), )
postcode = Column(Unicode(10), index=True)
def __init__(self, id=None, line1=None, line2=None, line3=None, postcode=None):
self.id = id
self.line1 = line1
self.line2 = line2
self.line3 = line3
self.postcode = postcode
def add(self, session):
session.add(self)
def update(self, line1=None, line2=None, line3=None, postcode=None):
# This function only updates a value if it is not None.
# Falsy values go through in the normal way.
# To set things to None use the usual syntax:
# Address.column_name = None
if line1 is not None:
self.line1 = line1
if line2 is not None:
self.line2 = line2
if line3 is not None:
self.line3 = line3
if postcode is not None:
self.postcode = postcode
def delete(self, session):
session.delete(self)
def to_dict(self):
return {x: y for x, y in self.__dict__.items() if not x.startswith("_sa")}
def get_proxy_cls(self):
# AddressProxy is useful when you want to persist data
# independent of the sqlalchemy session. It's just a namedtuple
# that has very low memory/cpu footprint compared the regular
# orm class instances.
keys = self.to_dict().keys()
name = "AddressProxy"
return namedtuple(name, keys)
def to_proxy(self):
# Proxy-ing is useful when you want to persist data
# independent of the sqlalchemy session. It's just a namedtuple
# that has very low memory/cpu footprint compared the regular
# orm class instances.
cls = self._get_proxy_cls()
return cls(**self.to_dict())
@classmethod
def from_proxy(cls, proxy):
return cls(**proxy._asdict())
def __hash__(self):
return hash(str(self.id))
def __eq__(self, other):
return (self.id == other.id)
def __neq__(self, other):
return not (self.id == other.id)
def __str__(self):
return "<Address: {id}>".format(id=self.id)
def __unicode__(self):
return "<Address: {id}>".format(id=self.id)
def __repr__(self):
return "<Address: {id}>".format(id=self.id)
```
\ No newline at end of file
diff --git a/tests/test_clean.py b/tests/test_clean.py
index 4eb8b41..1dc3f38 100644
--- a/tests/test_clean.py
+++ b/tests/test_clean.py
@@ -2,4 +2,6 @@
def test_invalid_link():
- assert clean('foo') == "foo"
+ assert clean(
+ 'foo'
+ ) == 'foo'
diff --git a/tox.ini b/tox.ini
index 5b5a5fd..8ae2e5d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,6 +6,7 @@ isolated_build = True
deps =
pytest
pytest-cov
+ pytest-icdiff
commands =
pytest --strict-markers --cov {posargs}
extras = md