Skip to content

Commit

Permalink
Bugfix/partitioning (#312)
Browse files Browse the repository at this point in the history
Bugfix to properly parse partitioning arguments
  • Loading branch information
PhilippeMoussalli authored Jul 25, 2023
1 parent 8c023ca commit b62fb9d
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions src/fondant/data_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ def partition_written_dataframe(self, dataframe: dd.DataFrame) -> dd.DataFrame:
elif self.output_partition_size is None:
dataframe = dataframe.repartition(partition_size="250MB")
logger.info(
f"Repartitioning the written data such that the size per partition is approx."
f" {self.output_partition_size}. (Automatic repartitioning)",
"Repartitioning the written data such that the size per partition is approx."
" 250MB. (Automatic repartitioning)",
)
else:
msg = (
Expand Down
4 changes: 2 additions & 2 deletions src/fondant/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ class Field(t.NamedTuple):

def validate_partition_number(arg_value):
if arg_value in ["disable", None, "None"]:
return arg_value
return arg_value if arg_value != "None" else None
try:
return int(arg_value)
except ValueError:
Expand All @@ -169,7 +169,7 @@ def validate_partition_number(arg_value):

def validate_partition_size(arg_value):
if arg_value in ["disable", None, "None"]:
return arg_value
return arg_value if arg_value != "None" else None

file_size_pattern = r"^\d+(?:\.\d+)?(?:KB|MB|GB|TB)$"
if not bool(re.match(file_size_pattern, arg_value, re.I)):
Expand Down

0 comments on commit b62fb9d

Please sign in to comment.