From b62fb9d58e2e127e6f33af8858d4941597ee024e Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Tue, 25 Jul 2023 18:13:00 +0200 Subject: [PATCH] Bugfix/partitioning (#312) Bugfix to properly parse partitioning arguments --- src/fondant/data_io.py | 4 ++-- src/fondant/schema.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fondant/data_io.py b/src/fondant/data_io.py index 675422301..18f872969 100644 --- a/src/fondant/data_io.py +++ b/src/fondant/data_io.py @@ -174,8 +174,8 @@ def partition_written_dataframe(self, dataframe: dd.DataFrame) -> dd.DataFrame: elif self.output_partition_size is None: dataframe = dataframe.repartition(partition_size="250MB") logger.info( - f"Repartitioning the written data such that the size per partition is approx." - f" {self.output_partition_size}. (Automatic repartitioning)", + "Repartitioning the written data such that the size per partition is approx." + " 250MB. (Automatic repartitioning)", ) else: msg = ( diff --git a/src/fondant/schema.py b/src/fondant/schema.py index eb295b50f..46a73b1ef 100644 --- a/src/fondant/schema.py +++ b/src/fondant/schema.py @@ -159,7 +159,7 @@ class Field(t.NamedTuple): def validate_partition_number(arg_value): if arg_value in ["disable", None, "None"]: - return arg_value + return arg_value if arg_value != "None" else None try: return int(arg_value) except ValueError: @@ -169,7 +169,7 @@ def validate_partition_number(arg_value): def validate_partition_size(arg_value): if arg_value in ["disable", None, "None"]: - return arg_value + return arg_value if arg_value != "None" else None file_size_pattern = r"^\d+(?:\.\d+)?(?:KB|MB|GB|TB)$" if not bool(re.match(file_size_pattern, arg_value, re.I)):