From 2683396ba4bf211a8ada1fd091c471e3c6b5c12a Mon Sep 17 00:00:00 2001 From: Johannes Wesch <83402561+JohannesWesch@users.noreply.github.com> Date: Mon, 22 Jan 2024 15:32:26 +0100 Subject: [PATCH] added utf-8 encoding to dataset_repository (#402) --- .../core/evaluation/dataset_repository.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/intelligence_layer/core/evaluation/dataset_repository.py b/src/intelligence_layer/core/evaluation/dataset_repository.py index 3fe5383dc..c9d7b3a23 100644 --- a/src/intelligence_layer/core/evaluation/dataset_repository.py +++ b/src/intelligence_layer/core/evaluation/dataset_repository.py @@ -34,7 +34,7 @@ def example( if not self._fs.exists(example_path): return None - with self._fs.open(example_path, "r") as examples_file: + with self._fs.open(example_path, "r", encoding="utf-8") as examples_file: # Mypy does not accept dynamic types for example in examples_file: validated_example = Example[input_type, expected_output_type].model_validate_json(json_data=example) # type: ignore @@ -48,7 +48,7 @@ def create_dataset(self, examples: Iterable[Example[Input, ExpectedOutput]]) -> if self._fs.exists(dataset_path): raise ValueError(f"Dataset name {dataset_id} already taken") - with self._fs.open(dataset_path, "w") as examples_file: + with self._fs.open(dataset_path, "w", encoding="utf-8") as examples_file: for example in examples: serialized_result = JsonSerializer(root=example) text = serialized_result.model_dump_json() + "\n" @@ -65,7 +65,7 @@ def examples_by_id( if not self._fs.exists(example_path): return None - with self._fs.open(example_path, "r") as examples_file: + with self._fs.open(example_path, "r", encoding="utf-8") as examples_file: # Mypy does not accept dynamic types examples = [Example[input_type, expected_output_type].model_validate_json(json_data=example) for example in examples_file] # type: ignore