Skip to content

Commit

Permalink
Create dirs before writing data
Browse files Browse the repository at this point in the history
  • Loading branch information
RobbeSneyders committed Mar 6, 2024
1 parent 8de1dfb commit 3f2a1a7
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions src/fondant/component/data_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import dask.dataframe as dd
import dask.distributed
import fsspec
import pyarrow as pa
from dask.diagnostics import ProgressBar
from dask.distributed import as_completed
Expand Down Expand Up @@ -205,6 +206,12 @@ def _write_dataframe(self, dataframe: dd.DataFrame) -> dd.core.Scalar:
f"{self.manifest.run_id}/{self.operation_spec.component_name}"
)

# Create directory the dataframe will be written to, since this is not handled by Pandas
# `to_parquet` method.
protocol = fsspec.utils.get_protocol(location)
fs = fsspec.get_filesystem_class(protocol)
fs().makedirs(location)

schema = {
field.name: field.type.value
for field in self.operation_spec.produces_to_dataset.values()
Expand Down

0 comments on commit 3f2a1a7

Please sign in to comment.