Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: input file encoding #596

Merged
merged 8 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions cyclonedx_py/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from datetime import datetime
from typing import Any, Optional

from chardet import detect as chardetect # type:ignore[import]
from cyclonedx.model import Tool
from cyclonedx.model.bom import Bom
from cyclonedx.output import BaseOutput, OutputFormat, SchemaVersion, get_instance as get_output_instance
Expand Down Expand Up @@ -270,11 +271,13 @@ def _get_input_parser(self) -> BaseParser:
raise CycloneDxCmdNoInputFileSupplied(
'When using input from Conda JSON, you need to pipe input via STDIN')
elif self._arguments.input_from_pip:
self._arguments.input_source = open(os.path.join(current_directory, 'Pipfile.lock'), 'r')
self._arguments.input_source = open(os.path.join(current_directory, 'Pipfile.lock'),
'rt', encoding="UTF-8")
elif self._arguments.input_from_poetry:
self._arguments.input_source = open(os.path.join(current_directory, 'poetry.lock'), 'r')
self._arguments.input_source = open(os.path.join(current_directory, 'poetry.lock'),
'rt', encoding="UTF-8")
elif self._arguments.input_from_requirements:
self._arguments.input_source = open(os.path.join(current_directory, 'requirements.txt'), 'r')
self._arguments.input_source = open(os.path.join(current_directory, 'requirements.txt'), 'rb')
else:
raise CycloneDxCmdException('Parser type could not be determined.')
except FileNotFoundError as error:
Expand All @@ -285,6 +288,11 @@ def _get_input_parser(self) -> BaseParser:
input_data_fh = self._arguments.input_source
with input_data_fh:
input_data = input_data_fh.read()
if isinstance(input_data, bytes):
input_encoding = chardetect(input_data)['encoding'].replace(
# replace Windows-encoding with code-page
'Windows-', 'cp')
input_data = input_data.decode(input_encoding)
input_data_fh.close()

if self._arguments.input_from_conda_explicit:
Expand Down
1 change: 1 addition & 0 deletions deps.lowest.r
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ cyclonedx-python-lib == 2.0.0
packageurl-python == 0.9.0
importlib-metadata == 3.4.0 # ; python_version < '3.8'
pip-requirements-parser == 32.0.0
chardet == 5.0.0
setuptools == 47.0.0
types-setuptools == 57.0.0
toml == 0.10.0
Expand Down
14 changes: 13 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ importlib-metadata = { version = ">= 3.4", python = "< 3.8" }
pip-requirements-parser = "^32.0.0"
setuptools = ">= 47.0.0"
toml = "^0.10.0"
chardet = "^5.0"

[tool.poetry.dev-dependencies]
autopep8 = "^1.6.0"
Expand Down
4 changes: 4 additions & 0 deletions tests/fixtures/.editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

[*.cp1252.txt.bin]
charset = unset
end_of_line = crlf
3 changes: 3 additions & 0 deletions tests/fixtures/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*.bin binary
*.txt.bin binary diff=text

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# This is a regression test for https://github.com/CycloneDX/cyclonedx-python/issues/448
# This file is in `cp1252` encoding - on purpose
# This file uses CRLF for line endings - on purpose
# some encoding specific characters: ���������������������������

packageurl-python>=0.9.4
requirements_parser>=0.2.0
setuptools>=50.3.2