diff --git a/cyclonedx_py/client.py b/cyclonedx_py/client.py index c4a70ce3..ee7113d4 100644 --- a/cyclonedx_py/client.py +++ b/cyclonedx_py/client.py @@ -25,6 +25,7 @@ from datetime import datetime from typing import Any, Optional +from chardet import detect as chardetect # type:ignore[import] from cyclonedx.model import Tool from cyclonedx.model.bom import Bom from cyclonedx.output import BaseOutput, OutputFormat, SchemaVersion, get_instance as get_output_instance @@ -270,11 +271,13 @@ def _get_input_parser(self) -> BaseParser: raise CycloneDxCmdNoInputFileSupplied( 'When using input from Conda JSON, you need to pipe input via STDIN') elif self._arguments.input_from_pip: - self._arguments.input_source = open(os.path.join(current_directory, 'Pipfile.lock'), 'r') + self._arguments.input_source = open(os.path.join(current_directory, 'Pipfile.lock'), + 'rt', encoding="UTF-8") elif self._arguments.input_from_poetry: - self._arguments.input_source = open(os.path.join(current_directory, 'poetry.lock'), 'r') + self._arguments.input_source = open(os.path.join(current_directory, 'poetry.lock'), + 'rt', encoding="UTF-8") elif self._arguments.input_from_requirements: - self._arguments.input_source = open(os.path.join(current_directory, 'requirements.txt'), 'r') + self._arguments.input_source = open(os.path.join(current_directory, 'requirements.txt'), 'rb') else: raise CycloneDxCmdException('Parser type could not be determined.') except FileNotFoundError as error: @@ -285,6 +288,11 @@ def _get_input_parser(self) -> BaseParser: input_data_fh = self._arguments.input_source with input_data_fh: input_data = input_data_fh.read() + if isinstance(input_data, bytes): + input_encoding = chardetect(input_data)['encoding'].replace( + # replace Windows-encoding with code-page + 'Windows-', 'cp') + input_data = input_data.decode(input_encoding) input_data_fh.close() if self._arguments.input_from_conda_explicit: diff --git a/deps.lowest.r b/deps.lowest.r index fa7b3199..acea466d 100644 --- a/deps.lowest.r +++ b/deps.lowest.r @@ -5,6 +5,7 @@ cyclonedx-python-lib == 2.0.0 packageurl-python == 0.9.0 importlib-metadata == 3.4.0 # ; python_version < '3.8' pip-requirements-parser == 32.0.0 +chardet == 5.0.0 setuptools == 47.0.0 types-setuptools == 57.0.0 toml == 0.10.0 diff --git a/poetry.lock b/poetry.lock index 12a9dac3..e6b4e956 100644 --- a/poetry.lock +++ b/poetry.lock @@ -24,6 +24,14 @@ python-versions = "*" pycodestyle = ">=2.8.0" toml = "*" +[[package]] +name = "chardet" +version = "5.0.0" +description = "Universal encoding detector for Python 3" +category = "main" +optional = false +python-versions = ">=3.6" + [[package]] name = "colorama" version = "0.4.4" @@ -459,7 +467,7 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=4.6)", "pytest-black ( [metadata] lock-version = "1.1" python-versions = "^3.6" -content-hash = "f4a134c4de7476924635c11b364c6b9177bce308a6226a827d555af00209b0bf" +content-hash = "fea373d1e3db503671ee52c8931458255e10e7ddf68954cce9ad4dd0a83f2371" [metadata.files] attrs = [ @@ -470,6 +478,10 @@ autopep8 = [ {file = "autopep8-1.6.0-py2.py3-none-any.whl", hash = "sha256:ed77137193bbac52d029a52c59bec1b0629b5a186c495f1eb21b126ac466083f"}, {file = "autopep8-1.6.0.tar.gz", hash = "sha256:44f0932855039d2c15c4510d6df665e4730f2b8582704fa48f9c55bd3e17d979"}, ] +chardet = [ + {file = "chardet-5.0.0-py3-none-any.whl", hash = "sha256:d3e64f022d254183001eccc5db4040520c0f23b1a3f33d6413e099eb7f126557"}, + {file = "chardet-5.0.0.tar.gz", hash = "sha256:0368df2bfd78b5fc20572bb4e9bb7fb53e2c094f60ae9993339e8671d0afb8aa"}, +] colorama = [ {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, diff --git a/pyproject.toml b/pyproject.toml index 2c8b275c..ade31aca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ importlib-metadata = { version = ">= 3.4", python = "< 3.8" } pip-requirements-parser = "^32.0.0" setuptools = ">= 47.0.0" toml = "^0.10.0" +chardet = "^5.0" [tool.poetry.dev-dependencies] autopep8 = "^1.6.0" diff --git a/tests/fixtures/.editorconfig b/tests/fixtures/.editorconfig new file mode 100644 index 00000000..3de20f11 --- /dev/null +++ b/tests/fixtures/.editorconfig @@ -0,0 +1,4 @@ + +[*.cp1252.txt.bin] +charset = unset +end_of_line = crlf diff --git a/tests/fixtures/.gitattributes b/tests/fixtures/.gitattributes new file mode 100644 index 00000000..b1ce527a --- /dev/null +++ b/tests/fixtures/.gitattributes @@ -0,0 +1,3 @@ +*.bin binary +*.txt.bin binary diff=text + diff --git a/tests/fixtures/requirements-regression-issue448.cp1252.txt.bin b/tests/fixtures/requirements-regression-issue448.cp1252.txt.bin new file mode 100644 index 00000000..3ae70eb8 --- /dev/null +++ b/tests/fixtures/requirements-regression-issue448.cp1252.txt.bin @@ -0,0 +1,8 @@ +# This is a regression test for https://github.com/CycloneDX/cyclonedx-python/issues/448 +# This file is in `cp1252` encoding - on purpose +# This file uses CRLF for line endings - on purpose +# some encoding specific characters: €‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ + +packageurl-python>=0.9.4 +requirements_parser>=0.2.0 +setuptools>=50.3.2