diff --git a/src/fondant/component/data_io.py b/src/fondant/component/data_io.py index 48b4082c..703dd6a9 100644 --- a/src/fondant/component/data_io.py +++ b/src/fondant/component/data_io.py @@ -214,9 +214,19 @@ def _write_dataframe(self, dataframe: dd.DataFrame) -> None: # The id needs to be added explicitly since we will convert this to a PyArrow schema # later and use it in the `pandas.to_parquet` method. + try: + index_type = pa.from_numpy_dtype(dataframe.index.dtype) + except pa.lib.ArrowNotImplementedError: + # The dtype of the index is `np._object`. Fall back on string instead. + logging.warning( + "Failed to infer dtype of index column, falling back to `string`. " + "Specify the dtype explicitly to prevent this.", + ) + index_type = pa.string() + schema.update( { - "id": pa.from_numpy_dtype(dataframe.index.dtype), + "id": index_type, }, )