From 6d53058604e7eac97076df45d87e7c9f6747a480 Mon Sep 17 00:00:00 2001 From: Karthik Bekal Pattathana <133984042+karthikbekalp@users.noreply.github.com> Date: Thu, 19 Dec 2024 21:08:04 +0000 Subject: [PATCH] fix!: Ensure all open uses UTF-8 instead of default encoding. Signed-off-by: Karthik Bekal Pattathana <133984042+karthikbekalp@users.noreply.github.com> --- .github/scripts/get_latest_changelog.py | 2 +- .../_background/backend_runner.py | 4 ++- .../_background/frontend_runner.py | 9 +++--- .../adaptor_runtime/_background/loaders.py | 2 +- .../_background/log_buffers.py | 4 +-- src/openjd/adaptor_runtime/_entrypoint.py | 8 +++-- .../adaptor_runtime/adaptors/_validator.py | 4 +-- .../adaptors/configuration/_configuration.py | 6 ++-- .../configuration/_configuration_manager.py | 2 +- .../integ/_utils/test_secure_open.py | 6 ++-- .../integ/background/test_background_mode.py | 2 +- .../configuration/test_configuration.py | 32 +++++++++++++------ .../test_configuration_manager.py | 2 +- .../unit/background/test_backend_runner.py | 4 +-- .../unit/background/test_frontend_runner.py | 2 +- .../unit/background/test_log_buffers.py | 4 +-- .../adaptor_runtime/unit/test_entrypoint.py | 4 +-- test/openjd/test_copyright_header.py | 2 +- 18 files changed, 61 insertions(+), 38 deletions(-) diff --git a/.github/scripts/get_latest_changelog.py b/.github/scripts/get_latest_changelog.py index 5616efb..c6a7d33 100644 --- a/.github/scripts/get_latest_changelog.py +++ b/.github/scripts/get_latest_changelog.py @@ -32,7 +32,7 @@ import re h2 = r"^##\s.*$" -with open("CHANGELOG.md") as f: +with open("CHANGELOG.md", encoding="utf-8") as f: contents = f.read() matches = re.findall(h2, contents, re.MULTILINE) changelog = contents[: contents.find(matches[1]) - 1] if len(matches) > 1 else contents diff --git a/src/openjd/adaptor_runtime/_background/backend_runner.py b/src/openjd/adaptor_runtime/_background/backend_runner.py index 5d4e16e..a1f7329 100644 --- a/src/openjd/adaptor_runtime/_background/backend_runner.py +++ b/src/openjd/adaptor_runtime/_background/backend_runner.py @@ -116,7 +116,9 @@ def run(self, *, on_connection_file_written: List[Callable[[], None]] | None = N raise try: - with secure_open(self._connection_file_path, open_mode="w") as conn_file: + with secure_open( + self._connection_file_path, open_mode="w", encoding="utf-8" + ) as conn_file: json.dump( ConnectionSettings(server_path), conn_file, diff --git a/src/openjd/adaptor_runtime/_background/frontend_runner.py b/src/openjd/adaptor_runtime/_background/frontend_runner.py index c83b4ba..e47d4dc 100644 --- a/src/openjd/adaptor_runtime/_background/frontend_runner.py +++ b/src/openjd/adaptor_runtime/_background/frontend_runner.py @@ -153,7 +153,7 @@ def init( bootstrap_output_path = os.path.join( bootstrap_log_dir, f"adaptor-runtime-background-bootstrap-output-{bootstrap_id}.log" ) - output_log_file = open(bootstrap_output_path, mode="w+") + output_log_file = open(bootstrap_output_path, mode="w+", encoding="utf-8") try: process = subprocess.Popen( args, @@ -194,7 +194,7 @@ def init( if process.stderr: process.stderr.close() - with open(bootstrap_output_path, mode="r") as f: + with open(bootstrap_output_path, mode="r", encoding="utf-8") as f: bootstrap_output = f.readlines() _logger.info("========== BEGIN BOOTSTRAP OUTPUT CONTENTS ==========") for line in bootstrap_output: @@ -203,7 +203,7 @@ def init( _logger.info(f"Checking for bootstrap logs at '{bootstrap_log_path}'") try: - with open(bootstrap_log_path, mode="r") as f: + with open(bootstrap_log_path, mode="r", encoding="utf-8") as f: bootstrap_logs = f.readlines() except Exception as e: _logger.error(f"Failed to get bootstrap logs at '{bootstrap_log_path}': {e}") @@ -442,7 +442,8 @@ def _wait_for_connection_file( def file_is_openable() -> bool: try: - open(filepath, mode="r").close() + with open(filepath, mode="r", encoding="utf-8"): + pass except IOError: # File is not available yet return False diff --git a/src/openjd/adaptor_runtime/_background/loaders.py b/src/openjd/adaptor_runtime/_background/loaders.py index e258a29..1ccd726 100644 --- a/src/openjd/adaptor_runtime/_background/loaders.py +++ b/src/openjd/adaptor_runtime/_background/loaders.py @@ -35,7 +35,7 @@ class ConnectionSettingsFileLoader(ConnectionSettingsLoader): def load(self) -> ConnectionSettings: try: - with open(self.file_path) as conn_file: + with open(self.file_path, encoding="utf-8") as conn_file: loaded_settings = json.load(conn_file) except OSError as e: errmsg = f"Failed to open connection file '{self.file_path}': {e}" diff --git a/src/openjd/adaptor_runtime/_background/log_buffers.py b/src/openjd/adaptor_runtime/_background/log_buffers.py index 1aa3db6..394c1bd 100644 --- a/src/openjd/adaptor_runtime/_background/log_buffers.py +++ b/src/openjd/adaptor_runtime/_background/log_buffers.py @@ -132,7 +132,7 @@ def __init__(self, filepath: str, *, formatter: logging.Formatter | None = None) def buffer(self, record: logging.LogRecord) -> None: with ( self._file_lock, - secure_open(self._filepath, open_mode="a") as f, + secure_open(self._filepath, open_mode="a", encoding="utf-8") as f, ): f.write(self._format(record)) @@ -142,7 +142,7 @@ def chunk(self) -> BufferedOutput: with ( self._chunk_lock, self._file_lock, - open(self._filepath, mode="r") as f, + open(self._filepath, mode="r", encoding="utf-8") as f, ): self._chunk.id = id f.seek(self._chunk.start) diff --git a/src/openjd/adaptor_runtime/_entrypoint.py b/src/openjd/adaptor_runtime/_entrypoint.py index 23ad98d..df1a906 100644 --- a/src/openjd/adaptor_runtime/_entrypoint.py +++ b/src/openjd/adaptor_runtime/_entrypoint.py @@ -179,6 +179,10 @@ def _init_loggers(self, *, bootstrap_log_path: str | None = None) -> _LogConfig: formatter = ConditionalFormatter( "%(levelname)s: %(message)s", ignore_patterns=[_OPENJD_LOG_REGEX] ) + + # Ensure stdout uses UTF-8 encoding to avoid issues with + # encoding Non-ASCII characters. + sys.stdout.reconfigure(encoding="utf-8") # type: ignore stream_handler = logging.StreamHandler(sys.stdout) stream_handler.setFormatter(formatter) @@ -590,14 +594,14 @@ def _load_data(data: str) -> dict: def _load_yaml_json(data: str) -> Any: """ - Loads a YAML/JSON file/string. + Loads a YAML/JSON file/string using the UTF-8 encoding. Note that yaml.safe_load() is capable of loading JSON documents. """ loaded_yaml = None if data.startswith("file://"): filepath = data[len("file://") :] - with open(filepath) as yaml_file: + with open(filepath, encoding="utf-8") as yaml_file: loaded_yaml = yaml.safe_load(yaml_file) else: loaded_yaml = yaml.safe_load(data) diff --git a/src/openjd/adaptor_runtime/adaptors/_validator.py b/src/openjd/adaptor_runtime/adaptors/_validator.py index cccd746..7f292e4 100644 --- a/src/openjd/adaptor_runtime/adaptors/_validator.py +++ b/src/openjd/adaptor_runtime/adaptors/_validator.py @@ -74,7 +74,7 @@ def from_schema_file(schema_path: str) -> AdaptorDataValidator: schema_path (str): The path to the JSON schema file to use. """ try: - with open(schema_path) as schema_file: + with open(schema_path, encoding="utf-8") as schema_file: schema = json.load(schema_file) except json.JSONDecodeError as e: _logger.error(f"Failed to decode JSON schema file: {e}") @@ -148,7 +148,7 @@ def _load_yaml_json(data: str) -> Any: loaded_yaml = None if data.startswith("file://"): filepath = data[len("file://") :] - with open(filepath) as yaml_file: + with open(filepath, encoding="utf-8") as yaml_file: loaded_yaml = yaml.safe_load(yaml_file) else: loaded_yaml = yaml.safe_load(data) diff --git a/src/openjd/adaptor_runtime/adaptors/configuration/_configuration.py b/src/openjd/adaptor_runtime/adaptors/configuration/_configuration.py index 320ffbb..92b015f 100644 --- a/src/openjd/adaptor_runtime/adaptors/configuration/_configuration.py +++ b/src/openjd/adaptor_runtime/adaptors/configuration/_configuration.py @@ -82,7 +82,8 @@ def from_file( """ try: - config = json.load(open(config_path)) + with open(config_path, encoding="utf-8") as config_file: + config = json.load(config_file) except OSError as e: _logger.error(f"Failed to open configuration at {config_path}: {e}") raise @@ -102,7 +103,8 @@ def from_file( schema_paths = schema_path if isinstance(schema_path, list) else [schema_path] for path in schema_paths: try: - schema = json.load(open(path)) + with open(path, encoding="utf-8") as schema_file: + schema = json.load(schema_file) except OSError as e: _logger.error(f"Failed to open configuration schema at {path}: {e}") raise diff --git a/src/openjd/adaptor_runtime/adaptors/configuration/_configuration_manager.py b/src/openjd/adaptor_runtime/adaptors/configuration/_configuration_manager.py index b5f8e0e..95b0205 100644 --- a/src/openjd/adaptor_runtime/adaptors/configuration/_configuration_manager.py +++ b/src/openjd/adaptor_runtime/adaptors/configuration/_configuration_manager.py @@ -95,7 +95,7 @@ def _ensure_config_file(filepath: str, *, create: bool = False) -> bool: _logger.info(f"Creating empty configuration at {filepath}") try: os.makedirs(os.path.dirname(filepath), mode=stat.S_IRWXU, exist_ok=True) - with secure_open(filepath, open_mode="w") as f: + with secure_open(filepath, open_mode="w", encoding="utf-8") as f: json.dump({}, f) except OSError as e: _logger.warning(f"Could not write empty configuration to {filepath}: {e}") diff --git a/test/openjd/adaptor_runtime/integ/_utils/test_secure_open.py b/test/openjd/adaptor_runtime/integ/_utils/test_secure_open.py index 2508ad4..8f28840 100644 --- a/test/openjd/adaptor_runtime/integ/_utils/test_secure_open.py +++ b/test/openjd/adaptor_runtime/integ/_utils/test_secure_open.py @@ -24,7 +24,7 @@ def create_file(): f"secure_open_test_{''.join(random.choice(string.ascii_letters) for _ in range(10))}.txt" ) test_file_path = os.path.join(tempfile.gettempdir(), test_file_name) - with secure_open(test_file_path, open_mode="w") as test_file: + with secure_open(test_file_path, open_mode="w", encoding="utf-8") as test_file: test_file.write(file_content) yield test_file_path, file_content os.remove(test_file_path) @@ -36,7 +36,7 @@ def test_secure_open_write_and_read(self, create_file): Test if the file owner can write and read the file """ test_file_path, file_content = create_file - with secure_open(test_file_path, open_mode="r") as test_file: + with secure_open(test_file_path, open_mode="r", encoding="utf-8") as test_file: result = test_file.read() assert result == file_content @@ -61,7 +61,7 @@ def test_secure_open_file_windows_permission(self, create_file, win_test_user): try: with pytest.raises(PermissionError): - with open(test_file_path, "r") as f: + with open(test_file_path, "r", encoding="utf-8") as f: f.read() finally: # Revert the impersonation diff --git a/test/openjd/adaptor_runtime/integ/background/test_background_mode.py b/test/openjd/adaptor_runtime/integ/background/test_background_mode.py index 974e736..5357f48 100644 --- a/test/openjd/adaptor_runtime/integ/background/test_background_mode.py +++ b/test/openjd/adaptor_runtime/integ/background/test_background_mode.py @@ -46,7 +46,7 @@ def mock_runtime_logger_level(self, tmpdir: pathlib.Path): # Set up a config file for the backend process config = {"log_level": "DEBUG"} config_path = os.path.join(tmpdir, "configuration.json") - with open(config_path, mode="w") as f: + with open(config_path, mode="w", encoding="utf-8") as f: json.dump(config, f) # Override the default config path to the one we just created diff --git a/test/openjd/adaptor_runtime/unit/adaptors/configuration/test_configuration.py b/test/openjd/adaptor_runtime/unit/adaptors/configuration/test_configuration.py index 3deeabd..6ea0113 100644 --- a/test/openjd/adaptor_runtime/unit/adaptors/configuration/test_configuration.py +++ b/test/openjd/adaptor_runtime/unit/adaptors/configuration/test_configuration.py @@ -55,7 +55,9 @@ def test_loads_schema( result = Configuration.from_file(config_path, schema_path) # THEN - mock_open.assert_has_calls([call(config_path), call(schema_path)]) + mock_open.assert_has_calls( + [call(config_path, encoding="utf-8"), call(schema_path, encoding="utf-8")] + ) assert mock_load.call_count == 2 mock_validate.assert_called_once_with(config, schema) assert result._config is config @@ -77,7 +79,7 @@ def test_skips_validation_when_no_schema( result = Configuration.from_file(config_path) # THEN - mock_open.assert_called_once_with(config_path) + mock_open.assert_called_once_with(config_path, encoding="utf-8") mock_load.assert_called_once() assert ( f"JSON Schema file path not provided. Configuration file {config_path} will not be " @@ -107,7 +109,13 @@ def test_validates_against_multiple_schemas( result = Configuration.from_file(config_path, [schema_path, schema2_path]) # THEN - mock_open.assert_has_calls([call(config_path), call(schema_path), call(schema2_path)]) + mock_open.assert_has_calls( + [ + call(config_path, encoding="utf-8"), + call(schema_path, encoding="utf-8"), + call(schema2_path, encoding="utf-8"), + ] + ) assert mock_load.call_count == 3 mock_validate.assert_has_calls([call(config, schema), call(config, schema2)]) assert result._config is config @@ -128,7 +136,7 @@ def test_raises_when_nonvalid_schema_path_value( # THEN mock_load.assert_called_once() - mock_open.assert_called_once_with(config_path) + mock_open.assert_called_once_with(config_path, encoding="utf-8") assert raised_err.match(f"Schema path cannot be an empty {type(schema_path)}") @patch.object(configuration.json, "load") @@ -148,7 +156,9 @@ def test_raises_when_schema_open_fails( # THEN mock_load.assert_called_once() - mock_open.assert_has_calls([call(config_path), call(schema_path)]) + mock_open.assert_has_calls( + [call(config_path, encoding="utf-8"), call(schema_path, encoding="utf-8")] + ) assert raised_err.value is err assert f"Failed to open configuration schema at {schema_path}: " in caplog.text @@ -169,7 +179,9 @@ def test_raises_when_schema_json_decode_fails( # THEN assert mock_load.call_count == 2 - mock_open.assert_has_calls([call(config_path), call(schema_path)]) + mock_open.assert_has_calls( + [call(config_path, encoding="utf-8"), call(schema_path, encoding="utf-8")] + ) assert raised_err.value is err assert f"Failed to decode configuration schema at {schema_path}: " in caplog.text @@ -187,7 +199,7 @@ def test_raises_when_config_open_fails( Configuration.from_file(config_path, "") # THEN - mock_open.assert_called_once_with(config_path) + mock_open.assert_called_once_with(config_path, encoding="utf-8") assert raised_err.value is err assert f"Failed to open configuration at {config_path}: " in caplog.text @@ -206,7 +218,7 @@ def test_raises_when_config_json_decode_fails( Configuration.from_file(config_path, "") # THEN - mock_open.assert_called_once_with(config_path) + mock_open.assert_called_once_with(config_path, encoding="utf-8") mock_load.assert_called_once() assert raised_err.value is err assert f"Failed to decode configuration at {config_path}: " in caplog.text @@ -234,7 +246,9 @@ def test_raises_when_config_fails_jsonschema_validation( Configuration.from_file(config_path, schema_path) # THEN - mock_open.assert_has_calls([call(config_path), call(schema_path)]) + mock_open.assert_has_calls( + [call(config_path, encoding="utf-8"), call(schema_path, encoding="utf-8")] + ) assert mock_load.call_count == 2 mock_validate.assert_called_once_with(config, schema) assert raised_err.value is mock_validate.side_effect diff --git a/test/openjd/adaptor_runtime/unit/adaptors/configuration/test_configuration_manager.py b/test/openjd/adaptor_runtime/unit/adaptors/configuration/test_configuration_manager.py index f48c436..8146c40 100644 --- a/test/openjd/adaptor_runtime/unit/adaptors/configuration/test_configuration_manager.py +++ b/test/openjd/adaptor_runtime/unit/adaptors/configuration/test_configuration_manager.py @@ -119,7 +119,7 @@ def test_create( # THEN assert result == created mock_exists.assert_called_once_with(path) - open_mock.assert_called_once_with(path, open_mode="w") + open_mock.assert_called_once_with(path, open_mode="w", encoding="utf-8") assert f'Configuration file at "{path}" does not exist.' in caplog.text assert f"Creating empty configuration at {path}" in caplog.text if created: diff --git a/test/openjd/adaptor_runtime/unit/background/test_backend_runner.py b/test/openjd/adaptor_runtime/unit/background/test_backend_runner.py index 1ce62a6..9b52eed 100644 --- a/test/openjd/adaptor_runtime/unit/background/test_backend_runner.py +++ b/test/openjd/adaptor_runtime/unit/background/test_backend_runner.py @@ -96,7 +96,7 @@ def test_run( ) mock_thread.assert_called_once() mock_thread.return_value.start.assert_called_once() - open_mock.assert_called_once_with(conn_file, open_mode="w") + open_mock.assert_called_once_with(conn_file, open_mode="w", encoding="utf-8") mock_json_dump.assert_called_once_with( ConnectionSettings(socket_path), open_mock.return_value, @@ -170,7 +170,7 @@ def test_run_raises_when_writing_connection_file_fails( ] mock_thread.assert_called_once() mock_thread.return_value.start.assert_called_once() - open_mock.assert_called_once_with(conn_file, open_mode="w") + open_mock.assert_called_once_with(conn_file, open_mode="w", encoding="utf-8") mock_thread.return_value.join.assert_called_once() if OSName.is_posix(): mock_os_remove.assert_has_calls([call(conn_file), call(socket_path)]) diff --git a/test/openjd/adaptor_runtime/unit/background/test_frontend_runner.py b/test/openjd/adaptor_runtime/unit/background/test_frontend_runner.py index 9545ea5..0bb19e8 100644 --- a/test/openjd/adaptor_runtime/unit/background/test_frontend_runner.py +++ b/test/openjd/adaptor_runtime/unit/background/test_frontend_runner.py @@ -963,7 +963,7 @@ def test_waits_for_file( # THEN mock_exists.assert_has_calls([call(filepath)] * 2) mock_sleep.assert_has_calls([call(interval)] * 3) - open_mock.assert_has_calls([call(filepath, mode="r")] * 2) + open_mock.assert_has_calls([call(filepath, mode="r", encoding="utf-8")] * 2) @patch.object(frontend_runner.time, "sleep") @patch.object(frontend_runner.os.path, "exists") diff --git a/test/openjd/adaptor_runtime/unit/background/test_log_buffers.py b/test/openjd/adaptor_runtime/unit/background/test_log_buffers.py index 7ccb7c3..56da529 100644 --- a/test/openjd/adaptor_runtime/unit/background/test_log_buffers.py +++ b/test/openjd/adaptor_runtime/unit/background/test_log_buffers.py @@ -120,7 +120,7 @@ def test_buffer(self) -> None: buffer.buffer(mock_record) # THEN - open_mock.assert_called_once_with(filepath, open_mode="a") + open_mock.assert_called_once_with(filepath, open_mode="a", encoding="utf-8") handle = open_mock.return_value handle.write.assert_called_once_with(mock_record.msg) @@ -140,7 +140,7 @@ def test_chunk(self, mocked_chunk_id: Tuple[str, MagicMock]) -> None: # THEN mock_create_id.assert_called_once() - open_mock.assert_called_once_with(filepath, mode="r") + open_mock.assert_called_once_with(filepath, mode="r", encoding="utf-8") handle = open_mock.return_value handle.seek.assert_called_once_with(buffer._chunk.start) handle.read.assert_called_once() diff --git a/test/openjd/adaptor_runtime/unit/test_entrypoint.py b/test/openjd/adaptor_runtime/unit/test_entrypoint.py index 46adc5d..e859ac1 100644 --- a/test/openjd/adaptor_runtime/unit/test_entrypoint.py +++ b/test/openjd/adaptor_runtime/unit/test_entrypoint.py @@ -865,7 +865,7 @@ def test_accepts_file(self, input: str, expected: dict): # THEN assert output == expected - open_mock.assert_called_once_with(filepath) + open_mock.assert_called_once_with(filepath, encoding="utf-8") @patch.object(runtime_entrypoint, "open") def test_raises_on_os_error(self, mock_open: MagicMock, caplog: pytest.LogCaptureFixture): @@ -880,7 +880,7 @@ def test_raises_on_os_error(self, mock_open: MagicMock, caplog: pytest.LogCaptur # THEN assert raised_err.value is mock_open.side_effect - mock_open.assert_called_once_with(filepath) + mock_open.assert_called_once_with(filepath, encoding="utf-8") assert "Failed to open data file: " in caplog.text def test_raises_when_parsing_fails(self, caplog: pytest.LogCaptureFixture): diff --git a/test/openjd/test_copyright_header.py b/test/openjd/test_copyright_header.py index 23b22db..8689a77 100644 --- a/test/openjd/test_copyright_header.py +++ b/test/openjd/test_copyright_header.py @@ -33,7 +33,7 @@ def _check_file(filename: Path) -> None: def _is_version_file(filename: Path) -> bool: if filename.name != "_version.py": return False - with open(filename) as infile: + with open(filename, encoding="utf-8") as infile: lines_read = 0 for line in infile: if _generated_by_scm.search(line):