Skip to content

Commit

Permalink
PGP format validation (#59)
Browse files Browse the repository at this point in the history
* adding pgp valid formatting check

* adding improvement on the 'newline' check

* added a fix for the BOM issue

* added readme for new errors

* simplify newline message also in readme

* improved 'no_uri' message and added user agent

* user agent added

* error clarification and some information added to the readme

* bumping version

---------

Co-authored-by: SanderKools <[email protected]>
  • Loading branch information
SanderKools-Ordina and SanderKools authored Aug 3, 2023
1 parent 79bb386 commit d56b65a
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 22 deletions.
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ True
>>> from sectxt import SecurityTXT
>>> s = SecurityTXT("www.example.com")
>>> s.errors
[{'code': 'no_uri', 'message': 'The field value must be an URI', 'line': 2}, {'code': 'no_expire', 'message': 'The Expires field is missing', 'line': None}]
[{'code': 'no_uri', 'message': 'Field policy value must be an URI', 'line': 2}, {'code': 'no_expire', 'message': 'The Expires field is missing', 'line': None}]
>>> s.recommendations
[{'code': 'long_expiry', 'message': 'Expiry date is more than one year in the future', 'line': 3}]
```
Expand Down Expand Up @@ -64,17 +64,19 @@ a dict with three keys:
| "no_canonical_match" | "Web URI where security.txt is located must match with a 'Canonical' field. In case of redirecting either the first or last web URI of the redirect chain must match." |
| "multi_lang" | "'Preferred-Languages' field must not appear more than once." |
| "invalid_lang" | "Value in 'Preferred-Languages' field must match one or more language tags as defined in RFC5646, separated by commas." |
| "no_uri" | "Field value must be a URI (e.g. beginning with 'mailto:')." |
| "no_uri" | "Field '{field}' value must be a URI." |
| "no_https" | "Web URI must begin with 'https://'." |
| "prec_ws" | "There must be no whitespace before the field separator (colon)." |
| "no_space" | "Field separator (colon) must be followed by a space." |
| "empty_key" | "Field name must not be empty." |
| "empty_value" | "Field value must not be empty." |
| "invalid_line" | "Line must contain a field name and value, unless the line is blank or contains a comment." |
| "no_line_separators" | "Every line must end with either a carriage return and line feed characters or just a line feed character" |
| "no_line_separators" | "Every line, including the last one, must end with either a carriage return and line feed characters or just a line feed character" |
| "signed_format_issue" | "Signed security.txt must start with the header '-----BEGIN PGP SIGNED MESSAGE-----'. " |
| "data_after_sig" | "Signed security.txt must not contain data after the signature." |
| "no_csaf_file" | "All CSAF fields must point to a provider-metadata.json file." |
| "pgp_data_error" | "Signed message did not contain a correct ASCII-armored PGP block." |
| "pgp_error" | "Decoding or parsing of the pgp message failed." |


### Possible recommendations
Expand All @@ -94,6 +96,11 @@ a dict with three keys:
| "unknown_field"<sup>[2]</sup> | "security.txt contains an unknown field. Field {unknown_field} is either a custom field which may not be widely supported, or there is a typo in a standardised field name. |


### Security.txt scraping information

The scraper attempts to find the security.txt of the given domain in the correct location `/.well-known/security.txt`. It also looks in the old location and with unsecure `http` scheme which would result in validation errors. To prevent possible errors getting the file from the domain a user-agent is added to the header of the request. The user agent that is added is `Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0`, which would mock a browser in firefox with a Windows 7 OS.
If a security.txt file is found that file is than parsed. Any errors, recommendations or notifications that are found would be returned.

---

[1] The security.txt parser will check for the addition of the digital signature, but it will not verify the validity of the signature.
Expand Down
11 changes: 6 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
requests
python-dateutil
langcodes
pytest
requests-mock
requests==2.31.0
python-dateutil==2.8.2
langcodes==3.3.0
pytest==7.4.0
requests-mock==1.11.0
PGPy==0.6.0
56 changes: 47 additions & 9 deletions sectxt/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#
# SPDX-License-Identifier: EUPL-1.2
#
import codecs

import langcodes
import re
import sys
Expand All @@ -9,6 +11,8 @@
from datetime import datetime, timezone
from typing import Optional, Union, List, DefaultDict
from urllib.parse import urlsplit, urlunsplit
import pgpy
from pgpy.errors import PGPError

if sys.version_info < (3, 8):
from typing_extensions import TypedDict
Expand All @@ -18,7 +22,7 @@
import dateutil.parser
import requests

__version__ = "0.8.3"
__version__ = "0.9.0"

s = requests.Session()

Expand Down Expand Up @@ -96,7 +100,10 @@ def _add_error(
self,
code: str,
message: str,
explicit_line_no=None
) -> None:
if explicit_line_no:
self._line_no = explicit_line_no
err_dict: ErrorDict = {"code": code, "message": message, "line": self._line_no}
self._errors.append(err_dict)

Expand Down Expand Up @@ -144,6 +151,21 @@ def _parse_line(self, line: str) -> LineDict:
"'-----BEGIN PGP SIGNED MESSAGE-----'.",
)
self._signed = True

# Check pgp formatting if signed
try:
pgpy.PGPMessage.from_blob(self._content)
except ValueError:
self._add_error(
"pgp_data_error",
"Signed message did not contain a correct ASCII-armored PGP block."
)
except PGPError as e:
self._add_error(
"pgp_error",
"Decoding or parsing of the pgp message failed."
)

return {"type": "pgp_envelope", "field_name": None, "value": line}

if line == "-----BEGIN PGP SIGNATURE-----" and self._signed:
Expand Down Expand Up @@ -199,7 +221,7 @@ def _parse_field(self, line: str) -> LineDict:
if url_parts.scheme == "":
self._add_error(
"no_uri",
"Field value must be a URI (e.g. beginning with 'mailto:').",
f"Field '{key}' value must be a URI.",
)
elif url_parts.scheme == "http":
self._add_error("no_https", "Web URI must begin with 'https://'.")
Expand Down Expand Up @@ -284,9 +306,10 @@ def validate_contents(self) -> None:
if self.lines[-1]["type"] != "empty":
self._add_error(
"no_line_separators",
"Every line must end with either a carriage "
"return and line feed characters or just a line "
"feed character",
"Every line, including the last one, must end with "
"either a carriage return and line feed characters "
"or just a line feed character",
len(self.lines)
)

if "csaf" in self._values:
Expand Down Expand Up @@ -393,26 +416,41 @@ def __init__(self, url: str, recommend_unknown_fields: bool = True):

def _get_str(self, content: bytes) -> str:
try:
return content.decode()
if content.startswith(codecs.BOM_UTF8):
content = content.replace(codecs.BOM_UTF8, b'')
return content.decode('utf-8')
except UnicodeError:
self._add_error("utf8", "Content must be utf-8 encoded.")
return content.decode(errors="replace")
return content.decode('utf-8', errors="replace")

def _process(self) -> None:
security_txt_found = False
for scheme in ["https", "http"]:
for path in [".well-known/security.txt", "security.txt"]:
url = urlunsplit((scheme, self._netloc, path, None, None))
try:
resp = requests.get(url, timeout=5)
resp = requests.get(
url,
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) '
'Gecko/20100101 Firefox/12.0'},
timeout=5
)
except requests.exceptions.SSLError:
if not any(d["code"] == "invalid_cert" for d in self._errors):
self._add_error(
"invalid_cert",
"security.txt must be served with a valid TLS certificate.",
)
try:
resp = requests.get(url, timeout=5, verify=False)
resp = requests.get(
url,
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) '
'Gecko/20100101 Firefox/12.0'},
timeout=5,
verify=False
)
except:
continue
except:
Expand Down
72 changes: 67 additions & 5 deletions test/test_sectxt.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.2
[signature]
wpwEAQEIABAFAmTHcawJEDs4gPMoG10dAACN5wP/UozhFqHcUWRNhg4KwfY4
HHXU8bf222naeYJHgaHadLTJJ8YQIQ9N5fYF7K4BM0jPZc48aaUPaBdhNxw+
KDtQJWPzVREIbbGLRQ5WNYrLR6/7v1LHTI8RvgY22QZD9EAkFQwgdG8paIP4
2APWewNf8e01t1oh4n5bDBtr4IaQoj0=
=DHXw
-----END PGP SIGNATURE-----
"""

Expand Down Expand Up @@ -127,6 +131,37 @@ def test_signed(self):
p = Parser(_signed_example)
self.assertTrue(p.is_valid())

def test_signed_invalid_pgp(self):
# Remove required pgp signature header for pgp data error
content = _signed_example.replace(
"-----BEGIN PGP SIGNATURE-----", ""
)
p1 = Parser(content)
self.assertFalse(p1.is_valid())
self.assertEqual(
len([1 for r in p1._errors if r["code"] == "pgp_data_error"]), 1
)
# Add dash escaping within the pgp signature for pgp data error
content = _signed_example.replace(
"-----BEGIN PGP SIGNATURE-----", "-----BEGIN PGP SIGNATURE-----\n- \n"
)
p2 = Parser(content)
self.assertFalse(p2.is_valid())
self.assertEqual(
len([1 for r in p2._errors if r["code"] == "pgp_data_error"]), 1
)
# create an error in the pgp message by invalidating the base64 encoding of the signature
content = _signed_example.replace(
"wpwEAQEIABAFAmTHcawJEDs4gPMoG10dAACN5wP/UozhFqHcUWRNhg4KwfY4", "wpwEAQEIABAFAmTH"
).replace(
"HHXU8bf222naeYJHgaHadLTJJ8YQIQ9N5fYF7K4BM0jPZc48aaUPaBdhNxw+", "HHXU8bf222naeYJHga"
)
p3 = Parser(content)
self.assertFalse(p3.is_valid())
self.assertEqual(
len([1 for r in p3._errors if r["code"] == "pgp_error"]), 1
)

def test_signed_no_canonical(self):
content = _signed_example.replace(
"Canonical: https://example.com/.well-known/security.txt", ""
Expand Down Expand Up @@ -174,13 +209,27 @@ def test_unknown_fields(self):
def test_no_line_separators(self):
expire_date = (date.today() + timedelta(days=10)).isoformat()
single_line_security_txt = (
f"Contact: mailto:[email protected] Expires: "
"Contact: mailto:[email protected] Expires: "
f"{expire_date}T18:37:07z # All on a single line"
)
p = Parser(single_line_security_txt)
self.assertFalse(p.is_valid())
p_line_separator = Parser(single_line_security_txt)
self.assertFalse(p_line_separator.is_valid())
self.assertEqual(
len([1 for r in p_line_separator._errors if r["code"] == "no_line_separators"]), 1
)
line_length_4_no_carriage_feed = (
"line 1\n"
"line 2\n"
"line 3\n"
"Contact: mailto:[email protected] Expires"
)
p_length_4 = Parser(line_length_4_no_carriage_feed)
self.assertFalse(p_length_4.is_valid())
self.assertEqual(
len([1 for r in p._errors if r["code"] == "no_line_separators"]), 1
len([1 for r in p_length_4._errors if r["code"] == "no_line_separators"]), 1
)
self.assertEqual(
[r["line"] for r in p_length_4._errors if r["code"] == "no_line_separators"], [4]
)

def test_csaf_https_uri(self):
Expand Down Expand Up @@ -239,3 +288,16 @@ def test_invalid_uri_scheme(requests_mock: Mocker):
s = SecurityTXT("example.com")
if not any(d["code"] == "invalid_uri_scheme" for d in s.errors):
pytest.fail("invalid_uri_scheme error code should be given")


def test_byte_order_mark(requests_mock: Mocker):
with Mocker() as m:
byte_content_with_bom = b'\xef\xbb\xbf\xef\xbb\xbfContact: mailto:[email protected]\n' \
b'Expires: 2023-08-11T18:37:07z\n'
m.get(
"https://example.com/.well-known/security.txt",
headers={"content-type": "text/plain"},
content=byte_content_with_bom,
)
s = SecurityTXT("example.com")
assert(s.is_valid())

0 comments on commit d56b65a

Please sign in to comment.