diff --git a/components/evaluate_ragas/Dockerfile b/components/evaluate_ragas/Dockerfile
deleted file mode 100644
index a5c3fa17a..000000000
--- a/components/evaluate_ragas/Dockerfile
+++ /dev/null
@@ -1,30 +0,0 @@
-FROM --platform=linux/amd64 python:3.8-slim as base
-
-# System dependencies
-RUN apt-get update && \
-    apt-get upgrade -y && \
-    apt-get install git -y
-
-# Install requirements
-COPY requirements.txt /
-RUN pip3 install --no-cache-dir -r requirements.txt
-
-# Install Fondant
-# This is split from other requirements to leverage caching
-ARG FONDANT_VERSION=main
-RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
-
-# Set the working directory to the component folder
-WORKDIR /component
-COPY src/ src/
-
-FROM base as test
-COPY tests/ tests/
-RUN pip3 install --no-cache-dir -r tests/requirements.txt
-ARG OPENAI_KEY
-ENV OPENAI_KEY=${OPENAI_KEY}
-RUN python -m pytest tests
-
-FROM base
-WORKDIR /component/src
-ENTRYPOINT ["fondant", "execute", "main"]
diff --git a/components/evaluate_ragas/README.md b/components/evaluate_ragas/README.md
deleted file mode 100644
index fbaf13a3e..000000000
--- a/components/evaluate_ragas/README.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# Evaluate ragas
-
-<a id="evaluate_ragas#description"></a>
-## Description
-Component that evaluates the retriever using RAGAS
-
-<a id="evaluate_ragas#inputs_outputs"></a>
-## Inputs / outputs 
-
-<a id="evaluate_ragas#consumes"></a>
-### Consumes 
-**This component consumes:**
-
-- question: string
-- retrieved_chunks: list<item: string>
-
-
-
-
-<a id="evaluate_ragas#produces"></a>  
-### Produces 
-
-**This component can produce additional fields**
-- <field_name>: <field_schema>
-This defines a mapping to update the fields produced by the operation as defined in the component spec.
-The keys are the names of the fields to be produced by the component, while the values are 
-the type of the field that should be used to write the output dataset.
-
-
-<a id="evaluate_ragas#arguments"></a>
-## Arguments
-
-The component takes the following arguments to alter its behavior:
-
-| argument | type | description | default |
-| -------- | ---- | ----------- | ------- |
-| llm_module_name | str | Module from which the LLM is imported. Defaults to langchain.llms | langchain.chat_models |
-| llm_class_name | str | Name of the selected llm | ChatOpenAI |
-| llm_kwargs | dict | Arguments of the selected llm | {'model_name': 'gpt-3.5-turbo'} |
-
-<a id="evaluate_ragas#usage"></a>
-## Usage 
-
-You can add this component to your pipeline using the following code:
-
-```python
-from fondant.pipeline import Pipeline
-
-
-pipeline = Pipeline(...)
-
-dataset = pipeline.read(...)
-
-dataset = dataset.apply(
-    "evaluate_ragas",
-    arguments={
-        # Add arguments
-        # "llm_module_name": "langchain.chat_models",
-        # "llm_class_name": "ChatOpenAI",
-        # "llm_kwargs": {'model_name': 'gpt-3.5-turbo'},
-    },
-    produces={
-         <field_name>: <field_schema>,
-         ..., # Add fields
-    },
-)
-```
-
-<a id="evaluate_ragas#testing"></a>
-## Testing
-
-You can run the tests using docker with BuildKit. From this directory, run:
-```
-docker build . --target test
-```
diff --git a/components/evaluate_ragas/fondant_component.yaml b/components/evaluate_ragas/fondant_component.yaml
deleted file mode 100644
index bca9d404b..000000000
--- a/components/evaluate_ragas/fondant_component.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-name: Evaluate ragas
-description: Component that evaluates the retriever using RAGAS
-image: fndnt/evaluate_ragas:dev
-tags:
-  - Text processing
-
-consumes:
-  question:
-    type: string
-  retrieved_chunks:
-    type: array
-    items:
-      type: string
-
-produces:
-  additionalProperties: true
-  # Overwrite with metrics to be computed by ragas
-  # (https://docs.ragas.io/en/latest/concepts/metrics/index.html)
-
-
-args:
-  llm_module_name:
-    description: Module from which the LLM is imported. Defaults to langchain.llms
-    type: str
-    default: "langchain.chat_models"
-  llm_class_name:
-    description: Name of the selected llm
-    type: str
-    default: "ChatOpenAI"
-  llm_kwargs:
-    description: Arguments of the selected llm
-    type: dict
-    default: {"model_name":"gpt-3.5-turbo"}
diff --git a/components/evaluate_ragas/requirements.txt b/components/evaluate_ragas/requirements.txt
deleted file mode 100644
index 64c1d6905..000000000
--- a/components/evaluate_ragas/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-ragas==0.0.21
\ No newline at end of file
diff --git a/components/evaluate_ragas/src/main.py b/components/evaluate_ragas/src/main.py
deleted file mode 100644
index b37e873b2..000000000
--- a/components/evaluate_ragas/src/main.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import typing as t
-
-import pandas as pd
-from datasets import Dataset
-from fondant.component import PandasTransformComponent
-from ragas import evaluate
-from ragas.llms import LangchainLLM
-
-
-class RetrieverEval(PandasTransformComponent):
-    def __init__(
-        self,
-        *,
-        llm_module_name: str,
-        llm_class_name: str,
-        llm_kwargs: dict,
-        produces: t.Dict[str, t.Any],
-    ) -> None:
-        """
-        Args:
-            llm_module_name: Module from which the LLM is imported. Defaults to
-             langchain.chat_models
-            llm_class_name: Name of the selected llm. Defaults to ChatOpenAI
-            llm_kwargs: Arguments of the selected llm
-            produces: RAGAS metrics to compute.
-        """
-        self.llm = self.extract_llm(
-            llm_module_name=llm_module_name,
-            llm_class_name=llm_class_name,
-            llm_kwargs=llm_kwargs,
-        )
-        self.gpt_wrapper = LangchainLLM(llm=self.llm)
-        self.metric_functions = self.extract_metric_functions(
-            metrics=list(produces.keys()),
-        )
-        self.set_llm(self.metric_functions)
-
-    # import the metric functions selected
-    @staticmethod
-    def import_from(module_name: str, element_name: str):
-        module = __import__(module_name, fromlist=[element_name])
-        return getattr(module, element_name)
-
-    def extract_llm(self, llm_module_name: str, llm_class_name: str, llm_kwargs: dict):
-        module = self.import_from(
-            module_name=llm_module_name,
-            element_name=llm_class_name,
-        )
-        return module(**llm_kwargs)
-
-    def extract_metric_functions(self, metrics: list):
-        functions = []
-        for metric in metrics:
-            functions.append(self.import_from("ragas.metrics", metric))
-        return functions
-
-    def set_llm(self, metric_functions: list):
-        for metric_function in metric_functions:
-            metric_function.llm = self.gpt_wrapper
-
-    # evaluate the retriever
-    @staticmethod
-    def create_hf_ds(dataframe: pd.DataFrame):
-        dataframe = dataframe.rename(
-            columns={"retrieved_chunks": "contexts"},
-        )
-        return Dataset.from_pandas(dataframe)
-
-    def ragas_eval(self, dataset):
-        return evaluate(dataset=dataset, metrics=self.metric_functions)
-
-    def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
-        hf_dataset = self.create_hf_ds(
-            dataframe=dataframe[["question", "retrieved_chunks"]],
-        )
-        if "id" in hf_dataset.column_names:
-            hf_dataset = hf_dataset.remove_columns("id")
-
-        result = self.ragas_eval(dataset=hf_dataset)
-        results_df = result.to_pandas()
-        results_df = results_df.set_index(dataframe.index)
-
-        return results_df
diff --git a/components/evaluate_ragas/tests/component_test.py b/components/evaluate_ragas/tests/component_test.py
deleted file mode 100644
index 3a2f79be4..000000000
--- a/components/evaluate_ragas/tests/component_test.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import os
-
-import pandas as pd
-import pyarrow as pa
-from main import RetrieverEval
-
-
-def test_transform():
-    input_dataframe = pd.DataFrame(
-        {
-            "text": [
-                "Lorem ipsum dolor sit amet, consectetur adipiscing elit?",
-                "Sed massa massa, interdum a porttitor sit amet, semper eget nunc?",
-            ],
-            "retrieved_chunks": [
-                [
-                    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. \
-                        Quisque ut efficitur neque. Aenean mollis eleifend est, \
-                        eu laoreet magna egestas quis. Cras id sagittis erat. \
-                        Aliquam vel blandit arcu. Morbi ac nulla ullamcorper, \
-                        rutrum neque nec, pellentesque diam. Nulla nec tempor \
-                        enim. Suspendisse a volutpat leo, quis varius dolor.",
-                    "Curabitur placerat ultrices mauris et lobortis. Maecenas \
-                        laoreet tristique sagittis. Integer facilisis eleifend \
-                        dolor, quis fringilla orci eleifend ac. Vestibulum nunc \
-                        odio, tincidunt ut augue et, ornare vehicula sapien. Orci \
-                        varius natoque penatibus et magnis dis parturient montes, \
-                        nascetur ridiculus mus. Sed auctor felis lacus, rutrum \
-                        tempus ligula viverra ac. Curabitur pharetra mauris et \
-                        ornare pulvinar. Suspendisse a ultricies nisl. Mauris \
-                        sit amet odio condimentum, venenatis orci vitae, \
-                        tincidunt purus. Ut ullamcorper convallis ligula ac \
-                        posuere. In efficitur enim ac lacus dignissim congue. \
-                        Nam turpis augue, aliquam et velit sit amet, varius \
-                        euismod ante. Duis volutpat nisl sit amet auctor tempus.\
-                            Vivamus in eros ex.",
-                ],
-                [
-                    "am leo massa, ultricies eu viverra ac, commodo non sapien. \
-                        Mauris et mauris sollicitudin, ultricies ex ac, luctus \
-                        nulla.",
-                    "Cras tincidunt facilisis mi, ac eleifend justo lobortis ut. \
-                        In lobortis cursus ante et faucibus. Vestibulum auctor \
-                        felis at odio varius, ac vulputate leo dictum. \
-                        Phasellus in augue ante. Aliquam aliquam mauris \
-                        sed tellus egestas fermentum.",
-                ],
-            ],
-        },
-    )
-
-    component = RetrieverEval(
-        module="langchain.llms",
-        llm_name="OpenAI",
-        llm_kwargs={"openai_api_key": os.environ["OPENAI_KEY"]},
-        produces={
-            "context_precision": pa.float32(),
-            "context_relevancy": pa.float32(),
-        },
-    )
-
-    output_dataframe = component.transform(input_dataframe)
-
-    expected_output_dataframe = pd.DataFrame(
-        {
-            "question": [
-                "Lorem ipsum dolor sit amet, consectetur adipiscing elit?",
-                "Sed massa massa, interdum a porttitor sit amet, semper eget nunc?",
-            ],
-            "contexts": [
-                [
-                    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. \
-                        Quisque ut efficitur neque. Aenean mollis eleifend est, \
-                        eu laoreet magna egestas quis. Cras id sagittis erat. \
-                        Aliquam vel blandit arcu. Morbi ac nulla ullamcorper, \
-                        rutrum neque nec, pellentesque diam. Nulla nec tempor \
-                        enim. Suspendisse a volutpat leo, quis varius dolor.",
-                    "Curabitur placerat ultrices mauris et lobortis. Maecenas \
-                        laoreet tristique sagittis. Integer facilisis eleifend \
-                        dolor, quis fringilla orci eleifend ac. Vestibulum nunc \
-                        odio, tincidunt ut augue et, ornare vehicula sapien. Orci \
-                        varius natoque penatibus et magnis dis parturient montes, \
-                        nascetur ridiculus mus. Sed auctor felis lacus, rutrum \
-                        tempus ligula viverra ac. Curabitur pharetra mauris et \
-                        ornare pulvinar. Suspendisse a ultricies nisl. Mauris \
-                        sit amet odio condimentum, venenatis orci vitae, \
-                        tincidunt purus. Ut ullamcorper convallis ligula ac \
-                        posuere. In efficitur enim ac lacus dignissim congue. \
-                        Nam turpis augue, aliquam et velit sit amet, varius \
-                        euismod ante. Duis volutpat nisl sit amet auctor tempus.\
-                            Vivamus in eros ex.",
-                ],
-                [
-                    "am leo massa, ultricies eu viverra ac, commodo non sapien. \
-                        Mauris et mauris sollicitudin, ultricies ex ac, luctus \
-                        nulla.",
-                    "Cras tincidunt facilisis mi, ac eleifend justo lobortis ut. \
-                        In lobortis cursus ante et faucibus. Vestibulum auctor \
-                        felis at odio varius, ac vulputate leo dictum. \
-                        Phasellus in augue ante. Aliquam aliquam mauris \
-                        sed tellus egestas fermentum.",
-                ],
-            ],
-            "context_precision": 0.15,
-            "context_relevancy": 0.35,
-        },
-    )
-
-    # Check if columns are the same
-    columns_equal = expected_output_dataframe.columns.equals(output_dataframe.columns)
-
-    # Check if data types within each column match
-    dtypes_match = expected_output_dataframe.dtypes.equals(output_dataframe.dtypes)
-
-    # Check if both conditions are met
-    assert columns_equal
-    assert dtypes_match
diff --git a/components/evaluate_ragas/tests/pytest.ini b/components/evaluate_ragas/tests/pytest.ini
deleted file mode 100644
index bf6a8a517..000000000
--- a/components/evaluate_ragas/tests/pytest.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-[pytest]
-pythonpath = ../src
\ No newline at end of file
diff --git a/components/evaluate_ragas/tests/requirements.txt b/components/evaluate_ragas/tests/requirements.txt
deleted file mode 100644
index de1887bec..000000000
--- a/components/evaluate_ragas/tests/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-pytest==7.4.2
\ No newline at end of file
diff --git a/components/load_with_llamahub/Dockerfile b/components/load_with_llamahub/Dockerfile
deleted file mode 100644
index 5de6e945f..000000000
--- a/components/load_with_llamahub/Dockerfile
+++ /dev/null
@@ -1,29 +0,0 @@
-FROM --platform=linux/amd64 python:3.8-slim as base
-
-# System dependencies
-RUN apt-get update && \
-    apt-get upgrade -y && \
-    apt-get install git -y
-
-# Install requirements
-COPY requirements.txt /
-RUN pip3 install --no-cache-dir -r requirements.txt
-
-# Install Fondant
-# This is split from other requirements to leverage caching
-ARG FONDANT_VERSION=main
-RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
-
-# Set the working directory to the component folder
-WORKDIR /component
-COPY src/ src/
-
-FROM base as test
-COPY tests/ tests/
-RUN pip3 install --no-cache-dir -r tests/requirements.txt
-RUN python -m pytest tests
-
-FROM base
-WORKDIR /component/src
-ENTRYPOINT ["fondant", "execute", "main"]
-
diff --git a/components/load_with_llamahub/README.md b/components/load_with_llamahub/README.md
deleted file mode 100644
index 5c3aa1320..000000000
--- a/components/load_with_llamahub/README.md
+++ /dev/null
@@ -1,78 +0,0 @@
-# Load with LlamaHub
-
-<a id="load_with_llamahub#description"></a>
-## Description
-Load data using a LlamaHub loader. For available loaders, check the 
-[LlamaHub](https://llamahub.ai/).
-
-
-<a id="load_with_llamahub#inputs_outputs"></a>
-## Inputs / outputs 
-
-<a id="load_with_llamahub#consumes"></a>
-### Consumes 
-
-
-**This component does not consume data.**
-
-
-<a id="load_with_llamahub#produces"></a>  
-### Produces 
-
-**This component can produce additional fields**
-- <field_name>: <field_schema>
-This defines a mapping to update the fields produced by the operation as defined in the component spec.
-The keys are the names of the fields to be produced by the component, while the values are 
-the type of the field that should be used to write the output dataset.
-
-
-<a id="load_with_llamahub#arguments"></a>
-## Arguments
-
-The component takes the following arguments to alter its behavior:
-
-| argument | type | description | default |
-| -------- | ---- | ----------- | ------- |
-| loader_class | str | The name of the LlamaIndex loader class to use. Make sure to provide the name and not the id. The name is passed to `llama_index.download_loader` to download the specified loader. | / |
-| loader_kwargs | str | Keyword arguments to pass when instantiating the loader class. Check the documentation of the loader to check which arguments it accepts. | / |
-| load_kwargs | str | Keyword arguments to pass to the `.load()` method of the loader. Check the documentation ofthe loader to check which arguments it accepts. | / |
-| additional_requirements | list | Some loaders require additional dependencies to be installed. You can specify those here. Use a format accepted by `pip install`. Eg. "pypdf" or "pypdf==3.17.1". Unfortunately additional requirements for LlamaIndex loaders are not documented well, but if a dependencyis missing, a clear error message will be thrown. | / |
-| n_rows_to_load | int | Optional argument that defines the number of rows to load. Useful for testing pipeline runs on a small scale | / |
-| index_column | str | Column to set index to in the load component, if not specified a default globally unique index will be set | / |
-
-<a id="load_with_llamahub#usage"></a>
-## Usage 
-
-You can add this component to your pipeline using the following code:
-
-```python
-from fondant.pipeline import Pipeline
-
-
-pipeline = Pipeline(...)
-
-dataset = pipeline.read(
-    "load_with_llamahub",
-    arguments={
-        # Add arguments
-        # "loader_class": ,
-        # "loader_kwargs": ,
-        # "load_kwargs": ,
-        # "additional_requirements": [],
-        # "n_rows_to_load": 0,
-        # "index_column": ,
-    },
-    produces={
-         <field_name>: <field_schema>,
-         ..., # Add fields
-    },
-)
-```
-
-<a id="load_with_llamahub#testing"></a>
-## Testing
-
-You can run the tests using docker with BuildKit. From this directory, run:
-```
-docker build . --target test
-```
diff --git a/components/load_with_llamahub/fondant_component.yaml b/components/load_with_llamahub/fondant_component.yaml
deleted file mode 100644
index ca16ff794..000000000
--- a/components/load_with_llamahub/fondant_component.yaml
+++ /dev/null
@@ -1,47 +0,0 @@
-name: Load with LlamaHub
-description: |
-  Load data using a LlamaHub loader. For available loaders, check the 
-  [LlamaHub](https://llamahub.ai/).
-image: fndnt/load_with_llamahub:dev
-tags:
-  - Data loading
-
-produces:
-  additionalProperties: true
-
-args:
-  loader_class:
-    description: |
-      The name of the LlamaIndex loader class to use. Make sure to provide the name and not the 
-      id. The name is passed to `llama_index.download_loader` to download the specified loader.
-    type: str
-  loader_kwargs:
-    description: |
-      Keyword arguments to pass when instantiating the loader class. Check the documentation of 
-      the loader to check which arguments it accepts.
-    type: str
-  load_kwargs:
-    description: |
-      Keyword arguments to pass to the `.load()` method of the loader. Check the documentation of
-      the loader to check which arguments it accepts.
-    type: str
-  additional_requirements:
-    description: |
-      Some loaders require additional dependencies to be installed. You can specify those here. 
-      Use a format accepted by `pip install`. Eg. "pypdf" or "pypdf==3.17.1". Unfortunately 
-      additional requirements for LlamaIndex loaders are not documented well, but if a dependency
-      is missing, a clear error message will be thrown.
-    type: list
-    default: []
-  n_rows_to_load:
-    description: |
-      Optional argument that defines the number of rows to load. Useful for testing pipeline runs 
-      on a small scale
-    type: int
-    default: None
-  index_column:
-    description: |
-      Column to set index to in the load component, if not specified a default globally unique 
-      index will be set
-    type: str
-    default: None
diff --git a/components/load_with_llamahub/requirements.txt b/components/load_with_llamahub/requirements.txt
deleted file mode 100644
index 3a7971f8f..000000000
--- a/components/load_with_llamahub/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-llama-index==0.9.9
diff --git a/components/load_with_llamahub/src/main.py b/components/load_with_llamahub/src/main.py
deleted file mode 100644
index 8be99f096..000000000
--- a/components/load_with_llamahub/src/main.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import logging
-import subprocess
-import sys
-import typing as t
-from collections import defaultdict
-
-import dask.dataframe as dd
-import pandas as pd
-from fondant.component import DaskLoadComponent
-from fondant.core.component_spec import OperationSpec
-from llama_index import download_loader
-
-logger = logging.getLogger(__name__)
-
-
-class LlamaHubReader(DaskLoadComponent):
-    def __init__(
-        self,
-        spec: OperationSpec,
-        *,
-        loader_class: str,
-        loader_kwargs: dict,
-        load_kwargs: dict,
-        additional_requirements: t.List[str],
-        n_rows_to_load: t.Optional[int] = None,
-        index_column: t.Optional[str] = None,
-    ) -> None:
-        """
-        Args:
-            spec: the component spec
-            loader_class: The name of the LlamaIndex loader class to use
-            loader_kwargs: Keyword arguments to pass when instantiating the loader class
-            load_kwargs: Keyword arguments to pass to the `.load()` method of the loader
-            additional_requirements: Additional Python requirements to install
-            n_rows_to_load: optional argument that defines the number of rows to load.
-                Useful for testing pipeline runs on a small scale.
-            index_column: Column to set index to in the load component, if not specified a default
-                globally unique index will be set.
-        """
-        self.n_rows_to_load = n_rows_to_load
-        self.index_column = index_column
-        self.spec = spec
-
-        self.install_additional_requirements(additional_requirements)
-
-        loader_cls = download_loader(loader_class)
-        self.loader = loader_cls(**loader_kwargs)
-        self.load_kwargs = load_kwargs
-
-    @staticmethod
-    def install_additional_requirements(additional_requirements: t.List[str]):
-        for requirement in additional_requirements:
-            subprocess.check_call(  # nosec
-                [sys.executable, "-m", "pip", "install", requirement],
-            )
-
-    def set_df_index(self, dask_df: dd.DataFrame) -> dd.DataFrame:
-        if self.index_column is None:
-            logger.info(
-                "Index column not specified, setting a globally unique index",
-            )
-
-            def _set_unique_index(dataframe: pd.DataFrame, partition_info=None):
-                """Function that sets a unique index based on the partition and row number."""
-                dataframe["id"] = 1
-                dataframe["id"] = (
-                    str(partition_info["number"])
-                    + "_"
-                    + (dataframe.id.cumsum()).astype(str)
-                )
-                dataframe.index = dataframe.pop("id")
-                return dataframe
-
-            def _get_meta_df() -> pd.DataFrame:
-                meta_dict = {"id": pd.Series(dtype="object")}
-                for field_name, field in self.spec.inner_produces.items():
-                    meta_dict[field_name] = pd.Series(
-                        dtype=pd.ArrowDtype(field.type.value),
-                    )
-                return pd.DataFrame(meta_dict).set_index("id")
-
-            meta = _get_meta_df()
-            dask_df = dask_df.map_partitions(_set_unique_index, meta=meta)
-        else:
-            logger.info(f"Setting `{self.index_column}` as index")
-            dask_df = dask_df.set_index(self.index_column, drop=True)
-
-        return dask_df
-
-    def load(self) -> dd.DataFrame:
-        try:
-            documents = self.loader.lazy_load_data(**self.load_kwargs)
-        except NotImplementedError:
-            documents = self.loader.load_data(**self.load_kwargs)
-
-        doc_dict = defaultdict(list)
-        for d, document in enumerate(documents):
-            for column in self.spec.inner_produces:
-                if column == "text":
-                    doc_dict["text"].append(document.text)
-                else:
-                    doc_dict[column].append(document.metadata.get(column))
-
-            if d == self.n_rows_to_load:
-                break
-
-        dask_df = dd.from_dict(doc_dict, npartitions=1)
-
-        dask_df = self.set_df_index(dask_df)
-        return dask_df
diff --git a/components/load_with_llamahub/tests/component_test.py b/components/load_with_llamahub/tests/component_test.py
deleted file mode 100644
index 217b42281..000000000
--- a/components/load_with_llamahub/tests/component_test.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from pathlib import Path
-
-import yaml
-from fondant.core.component_spec import ComponentSpec
-
-from src.main import LlamaHubReader
-
-
-def test_arxiv_reader():
-    """Test the component with the ArxivReader.
-
-    This test requires a stable internet connection, both to download the loader, and to download
-    the papers from Arxiv.
-    """
-    with open(Path(__file__).with_name("fondant_component.yaml")) as f:
-        spec = yaml.safe_load(f)
-    spec = ComponentSpec(spec)
-
-    component = LlamaHubReader(
-        spec=spec,
-        loader_class="ArxivReader",
-        loader_kwargs={},
-        load_kwargs={
-            "search_query": "jeff dean",
-            "max_results": 5,
-        },
-        additional_requirements=["pypdf"],
-        n_rows_to_load=None,
-        index_column=None,
-    )
-
-    output_dataframe = component.load().compute()
-
-    assert len(output_dataframe) > 0
-    assert output_dataframe.columns.tolist() == ["text", "URL", "Title of this paper"]
diff --git a/components/load_with_llamahub/tests/fondant_component.yaml b/components/load_with_llamahub/tests/fondant_component.yaml
deleted file mode 100644
index b0f34786f..000000000
--- a/components/load_with_llamahub/tests/fondant_component.yaml
+++ /dev/null
@@ -1,50 +0,0 @@
-name: Load with LlamaHub
-description: |
-  Load data using a LlamaHub loader. For available loaders, check the 
-  [LlamaHub](https://llamahub.ai/).
-image: ghcr.io/ml6team/load_with_llamahub:dev
-
-produces:
-  text:
-    type: string
-  URL:
-    type: string
-  Title of this paper:
-    type: string
-
-args:
-  loader_class:
-    description: |
-      The name of the LlamaIndex loader class to use. Make sure to provide the name and not the 
-      id. The name is passed to `llama_index.download_loader` to download the specified loader.
-    type: str
-  loader_kwargs:
-    description: |
-      Keyword arguments to pass when instantiating the loader class. Check the documentation of 
-      the loader to check which arguments it accepts.
-    type: str
-  load_kwargs:
-    description: |
-      Keyword arguments to pass to the `.load()` method of the loader. Check the documentation of
-      the loader to check which arguments it accepts.
-    type: str
-  additional_requirements:
-    description: |
-      Some loaders require additional dependencies to be installed. You can specify those here. 
-      Use a format accepted by `pip install`. Eg. "pypdf" or "pypdf==3.17.1". Unfortunately 
-      additional requirements for LlamaIndex loaders are not documented well, but if a dependency
-      is missing, a clear error message will be thrown.
-    type: list
-    default: []
-  n_rows_to_load:
-    description: |
-      Optional argument that defines the number of rows to load. Useful for testing pipeline runs 
-      on a small scale
-    type: int
-    default: None
-  index_column:
-    description: |
-      Column to set index to in the load component, if not specified a default globally unique 
-      index will be set
-    type: str
-    default: None
diff --git a/components/load_with_llamahub/tests/pytest.ini b/components/load_with_llamahub/tests/pytest.ini
deleted file mode 100644
index bf6a8a517..000000000
--- a/components/load_with_llamahub/tests/pytest.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-[pytest]
-pythonpath = ../src
\ No newline at end of file
diff --git a/components/load_with_llamahub/tests/requirements.txt b/components/load_with_llamahub/tests/requirements.txt
deleted file mode 100644
index 2a929edcc..000000000
--- a/components/load_with_llamahub/tests/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-pytest==7.4.2
diff --git a/components/normalize_text/Dockerfile b/components/normalize_text/Dockerfile
deleted file mode 100644
index c1e64c082..000000000
--- a/components/normalize_text/Dockerfile
+++ /dev/null
@@ -1,28 +0,0 @@
-FROM --platform=linux/amd64 python:3.8-slim as base
-
-# System dependencies
-RUN apt-get update && \
-    apt-get upgrade -y && \
-    apt-get install git -y
-
-# Install requirements
-COPY requirements.txt /
-RUN pip3 install --no-cache-dir -r requirements.txt
-
-# Install Fondant
-# This is split from other requirements to leverage caching
-ARG FONDANT_VERSION=main
-RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
-
-# Set the working directory to the component folder
-WORKDIR /component
-COPY src/ src/
-
-FROM base as test
-COPY tests/ tests/
-RUN pip3 install --no-cache-dir -r tests/requirements.txt
-RUN python -m pytest tests
-
-FROM base
-WORKDIR /component/src
-ENTRYPOINT ["fondant", "execute", "main"]
diff --git a/components/normalize_text/README.md b/components/normalize_text/README.md
deleted file mode 100644
index 9de782516..000000000
--- a/components/normalize_text/README.md
+++ /dev/null
@@ -1,84 +0,0 @@
-# Normalize text
-
-<a id="normalize_text#description"></a>
-## Description
-This component implements several text normalization techniques to clean and preprocess textual 
-data:
-
-- Apply lowercasing: Converts all text to lowercase
-- Remove unnecessary whitespaces: Eliminates extra spaces between words, e.g. tabs
-- Apply NFC normalization: Converts characters to their canonical representation
-- Remove common seen patterns in webpages following the implementation of 
-  [Penedo et al.](https://arxiv.org/pdf/2306.01116.pdf)
-- Remove punctuation: Strips punctuation marks from the text
-
-These text normalization techniques are valuable for preparing text data before using it for
-the training of large language models.
-
-
-<a id="normalize_text#inputs_outputs"></a>
-## Inputs / outputs 
-
-<a id="normalize_text#consumes"></a>
-### Consumes 
-**This component consumes:**
-
-- text: string
-
-
-
-
-<a id="normalize_text#produces"></a>  
-### Produces 
-**This component produces:**
-
-- text: string
-
-
-
-<a id="normalize_text#arguments"></a>
-## Arguments
-
-The component takes the following arguments to alter its behavior:
-
-| argument | type | description | default |
-| -------- | ---- | ----------- | ------- |
-| remove_additional_whitespaces | bool | If true remove all additional whitespace, tabs. | / |
-| apply_nfc | bool | If true apply nfc normalization | / |
-| normalize_lines | bool | If true analyze documents line-by-line and apply various rules to discard or edit lines. Used to removed common patterns in webpages, e.g. counter | / |
-| do_lowercase | bool | If true apply lowercasing | / |
-| remove_punctuation | str | If true punctuation will be removed | / |
-
-<a id="normalize_text#usage"></a>
-## Usage 
-
-You can add this component to your pipeline using the following code:
-
-```python
-from fondant.pipeline import Pipeline
-
-
-pipeline = Pipeline(...)
-
-dataset = pipeline.read(...)
-
-dataset = dataset.apply(
-    "normalize_text",
-    arguments={
-        # Add arguments
-        # "remove_additional_whitespaces": False,
-        # "apply_nfc": False,
-        # "normalize_lines": False,
-        # "do_lowercase": False,
-        # "remove_punctuation": ,
-    },
-)
-```
-
-<a id="normalize_text#testing"></a>
-## Testing
-
-You can run the tests using docker with BuildKit. From this directory, run:
-```
-docker build . --target test
-```
diff --git a/components/normalize_text/fondant_component.yaml b/components/normalize_text/fondant_component.yaml
deleted file mode 100644
index 35b6c79de..000000000
--- a/components/normalize_text/fondant_component.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-name: Normalize text
-image: fndnt/normalize_text:latest
-description: |
-  This component implements several text normalization techniques to clean and preprocess textual 
-  data:
-
-  - Apply lowercasing: Converts all text to lowercase
-  - Remove unnecessary whitespaces: Eliminates extra spaces between words, e.g. tabs
-  - Apply NFC normalization: Converts characters to their canonical representation
-  - Remove common seen patterns in webpages following the implementation of 
-    [Penedo et al.](https://arxiv.org/pdf/2306.01116.pdf)
-  - Remove punctuation: Strips punctuation marks from the text
-  
-  These text normalization techniques are valuable for preparing text data before using it for
-  the training of large language models.
-tags:
-  - Text processing
-
-consumes:
-  text:
-    type: string
-
-produces:
-  text:
-    type: string
-
-args:
-  remove_additional_whitespaces:
-    description: If true remove all additional whitespace, tabs.
-    type: bool
-  apply_nfc:
-    description: If true apply nfc normalization
-    type: bool
-  normalize_lines:
-    description: If true analyze documents line-by-line and apply various rules to discard or edit lines. Used to removed common patterns in webpages, e.g. counter
-    type: bool
-  do_lowercase:
-    description: If true apply lowercasing
-    type: bool
-  remove_punctuation:
-    description: If true punctuation will be removed
-    type: str
\ No newline at end of file
diff --git a/components/normalize_text/requirements.txt b/components/normalize_text/requirements.txt
deleted file mode 100644
index a4299def8..000000000
--- a/components/normalize_text/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-ftfy==6.1.1
\ No newline at end of file
diff --git a/components/normalize_text/src/main.py b/components/normalize_text/src/main.py
deleted file mode 100644
index cda2939cf..000000000
--- a/components/normalize_text/src/main.py
+++ /dev/null
@@ -1,119 +0,0 @@
-"""A component that normalizes text."""
-import logging
-import re
-import string
-from typing import List
-
-import ftfy
-import pandas as pd
-from fondant.component import PandasTransformComponent
-from utils import is_counter, is_one_word, mainly_uppercase, only_numerical
-
-logger = logging.getLogger(__name__)
-
-
-def _remove_punctuation(text):
-    """Remove punctuation in given text."""
-    return text.translate(str.maketrans("", "", string.punctuation))
-
-
-def _remove_additional_whitespaces(text):
-    """
-    Text cleaning method from slimpajama approach.
-    https://github.com/Cerebras/modelzoo/blob/main/modelzoo/transformers/data_processing/slimpajama/preprocessing/filter.py
-    Apply remove punctuation, and remove consecutive spaces, newlines, tabs in the middle
-    and in the beginning / end.
-    """
-    return re.sub(r"\s+", " ", text.strip())
-
-
-def normalize_lines(text):
-    def any_condition_met(line, discard_condition_functions):
-        return any(condition(line) for condition in discard_condition_functions)
-
-    discard_conditions = [mainly_uppercase, only_numerical, is_counter, is_one_word]
-    return " ".join(
-        [
-            line
-            for line in text.split("\n")
-            if not any_condition_met(line, discard_conditions)
-        ],
-    )
-
-
-class NormalizeTextComponent(PandasTransformComponent):
-    """Component that normalizes text."""
-
-    def __init__(
-        self,
-        *,
-        remove_additional_whitespaces: bool,
-        apply_nfc: bool,
-        normalize_lines: bool,
-        do_lowercase: bool,
-        remove_punctuation: bool,
-    ):
-        self.remove_additional_whitespaces = remove_additional_whitespaces
-        self.apply_nfc = apply_nfc
-        self.normalize_lines = normalize_lines
-        self.do_lowercase = do_lowercase
-        self.remove_punctuation = remove_punctuation
-
-    @staticmethod
-    def _do_nfc_normalization(text: str):
-        """Apply nfc normalization to the text of the dataframe."""
-        return ftfy.fix_text(text, normalization="NFC")
-
-    @staticmethod
-    def _remove_patterns(regex_patterns: List[str], text: str):
-        """Remove each regex pattern in the provided string."""
-        for pattern in regex_patterns:
-            text = re.sub(pattern, "", text)
-        return text
-
-    def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
-        """
-        Apply normalization transformations. The component is capable of:
-        - NFC normalization
-        - Lowercasing
-        - Removing of unnecessary whitespaces (e.g. tabs), punctuation
-        - Apply line-wise transformations that exclude lines matching specified patterns.
-        Patterns include lines that are mainly composed of uppercase characters, lines that consist
-        only of numerical characters, lines that are counters (e.g., "3 likes"), and lines
-        that contain only one word.
-
-        Args:
-            dataframe: Pandas dataframe.
-
-        Returns:
-            Pandas dataframe
-        """
-        if self.normalize_lines:
-            dataframe["text"] = dataframe["text"].apply(
-                normalize_lines,
-            )
-
-        if self.do_lowercase:
-            dataframe["text"] = dataframe["text"].apply(
-                lambda x: x.lower(),
-            )
-
-        if self.apply_nfc:
-            dataframe["text"] = dataframe["text"].apply(
-                self._do_nfc_normalization,
-            )
-
-        if self.remove_punctuation:
-            dataframe["text"] = dataframe["text"].apply(
-                _remove_punctuation,
-            )
-
-        if self.remove_additional_whitespaces:
-            dataframe["text"] = dataframe["text"].apply(
-                _remove_additional_whitespaces,
-            )
-
-        # remove all empty rows
-        dataframe = dataframe[dataframe["text"].astype(bool)]
-
-        return dataframe
diff --git a/components/normalize_text/src/utils.py b/components/normalize_text/src/utils.py
deleted file mode 100644
index b487bc61e..000000000
--- a/components/normalize_text/src/utils.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import re
-
-
-def mainly_uppercase(line: str, threshold: float = 0.7) -> bool:
-    """
-    Checks if a line is mainly composed of uppercase characters.
-
-    Args:
-        line: The input line to check.
-        threshold (float): The threshold (between 0 and 1) to determine what is considered
-        "mainly uppercase."
-
-    Returns:
-        bool: True if the line is mainly uppercase, False otherwise.
-    """
-    uppercase_count = sum(1 for char in line if char.isupper())
-    total_chars = len(line)
-    if total_chars == 0:
-        return False
-
-    uppercase_ratio = uppercase_count / total_chars
-    return uppercase_ratio >= threshold
-
-
-def only_numerical(line: str) -> bool:
-    """
-    Checks if a line is composed only of numerical characters.
-
-    Args:
-        line: The input line to check.
-
-    Returns:
-        bool: True if the line is only composed of numerical characters, False otherwise.
-    """
-    return line.isdigit()
-
-
-def is_counter(line: str) -> bool:
-    """
-    Checks if a line represents a counter (e.g., "3 likes").
-
-    Args:
-        line: The input line to check.
-
-    Returns:
-        bool: True if the line represents a counter, False otherwise.
-    """
-    # Use regular expression to check for the pattern: <number> <text>
-    line = line.strip()
-    pattern = r"^\d+\s+\S+$"
-    return re.match(pattern, line) is not None
-
-
-def is_one_word(line: str) -> bool:
-    """
-    Checks if a line contains only one word.
-
-    Args:
-        line: The input line to check.
-
-    Returns:
-        bool: True if the line contains only one word, False otherwise.
-    """
-    words = line.split()
-    return len(words) == 1
diff --git a/components/normalize_text/tests/component_test.py b/components/normalize_text/tests/component_test.py
deleted file mode 100644
index d7f52967e..000000000
--- a/components/normalize_text/tests/component_test.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import pandas as pd
-
-from src.main import NormalizeTextComponent
-
-
-def test_transform_custom_componen_test():
-    """Test components transform method."""
-    user_arguments = {
-        "remove_additional_whitespaces": True,
-        "apply_nfc": True,
-        "normalize_lines": True,
-        "do_lowercase": True,
-        "remove_punctuation": True,
-    }
-    component = NormalizeTextComponent(**user_arguments)
-
-    input_dataframe = pd.DataFrame(
-        [
-            "\u0043\u0327 something",
-            "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
-            "Nulla facilisi. Sed eu nulla sit amet enim scelerisque dapibus.",
-        ],
-        columns=["text"],
-    )
-
-    expected_output = pd.DataFrame(
-        [
-            "\u00e7 something",
-            "lorem ipsum dolor sit amet consectetur adipiscing elit",
-            "nulla facilisi sed eu nulla sit amet enim scelerisque dapibus",
-        ],
-        columns=["text"],
-    )
-
-    output_dataframe = component.transform(input_dataframe)
-
-    pd.testing.assert_frame_equal(
-        left=expected_output,
-        right=output_dataframe,
-        check_dtype=False,
-    )
diff --git a/components/normalize_text/tests/pytest.ini b/components/normalize_text/tests/pytest.ini
deleted file mode 100644
index bf6a8a517..000000000
--- a/components/normalize_text/tests/pytest.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-[pytest]
-pythonpath = ../src
\ No newline at end of file
diff --git a/components/normalize_text/tests/requirements.txt b/components/normalize_text/tests/requirements.txt
deleted file mode 100644
index 6950eb5a7..000000000
--- a/components/normalize_text/tests/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-pytest==7.4.0
\ No newline at end of file
diff --git a/components/normalize_text/tests/utils_test.py b/components/normalize_text/tests/utils_test.py
deleted file mode 100644
index 8a0892bb2..000000000
--- a/components/normalize_text/tests/utils_test.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from src.utils import (
-    is_counter,
-    is_one_word,
-    mainly_uppercase,
-    only_numerical,
-)
-
-
-def test_mainly_uppercase():
-    line = "HELLO WORLD not upper SOMETHING ELSE IN UPPERCASE"
-    assert mainly_uppercase(line, threshold=0.5)
-
-
-def test_mainly_uppercase_under_threshold():
-    line = "HELLO WORLD not upper SOMETHING ELSE IN UPPERCASE"
-    assert not mainly_uppercase(line, threshold=0.9)
-
-
-def test_only_numerical():
-    line = "42"
-    assert only_numerical(line)
-
-
-def test_only_numerical_on_words():
-    line = "42 lorem ipsum"
-    assert not only_numerical(line)
-
-
-def test_is_counter():
-    line = "13 Likes"
-    assert is_counter(line)
-
-
-def test_is_not_counter():
-    line = "Hello world! 42 people are part of .."
-    assert not is_counter(line)
-
-
-def test_is_one_word():
-    line = "word"
-    assert is_one_word(line)
-
-
-def test_is_not_one_word():
-    line = "two words"
-    assert not is_one_word(line)
diff --git a/components/retrieve_from_weaviate/Dockerfile b/components/retrieve_from_weaviate/Dockerfile
deleted file mode 100644
index 5de6e945f..000000000
--- a/components/retrieve_from_weaviate/Dockerfile
+++ /dev/null
@@ -1,29 +0,0 @@
-FROM --platform=linux/amd64 python:3.8-slim as base
-
-# System dependencies
-RUN apt-get update && \
-    apt-get upgrade -y && \
-    apt-get install git -y
-
-# Install requirements
-COPY requirements.txt /
-RUN pip3 install --no-cache-dir -r requirements.txt
-
-# Install Fondant
-# This is split from other requirements to leverage caching
-ARG FONDANT_VERSION=main
-RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
-
-# Set the working directory to the component folder
-WORKDIR /component
-COPY src/ src/
-
-FROM base as test
-COPY tests/ tests/
-RUN pip3 install --no-cache-dir -r tests/requirements.txt
-RUN python -m pytest tests
-
-FROM base
-WORKDIR /component/src
-ENTRYPOINT ["fondant", "execute", "main"]
-
diff --git a/components/retrieve_from_weaviate/README.md b/components/retrieve_from_weaviate/README.md
deleted file mode 100644
index e5c3337ce..000000000
--- a/components/retrieve_from_weaviate/README.md
+++ /dev/null
@@ -1,206 +0,0 @@
-# retrieve_from_weaviate
-
-<a id="retrieve_from_weaviate#description"></a>
-## Description
-Component that retrieves chunks from a Weaviate vector database.
-The component can retrieve chunks based on a text search or based on a vector search.
-Reranking is only supported for text search.
-More info here [Cohere Ranking](https://github.com/weaviate/recipes/blob/main/ranking/cohere-ranking/cohere-ranking.ipynb)
-[Weaviate Search Rerank](https://weaviate.io/developers/weaviate/search/rerank)
-
-### Running with text as input
-
-```python
-import pyarrow as pa
-from fondant.pipeline import Pipeline
-
-pipeline = Pipeline(name="my_pipeline", base_path="path/to/pipeline")
-
-dataset = pipeline.read(
-    "load_from_csv",
-    arguments={
-        "dataset_uri": "path/to/dataset.csv",
-    },
-    produces={
-        "text": pa.string(),
-    }
-)
-
-dataset = dataset.apply(
-    "index_weaviate",
-    arguments={
-        "weaviate_url": "http://localhost:8080",
-        "class_name": "my_class",
-        "vectorizer": "text2vec-openai",
-        "additional_headers": {
-            "X-OpenAI-Api-Key": "YOUR-OPENAI-API-KEY"
-        }
-    },
-    consumes={
-        "text": "text"
-    }
-)
-
-dataset = dataset.apply(
-    "retrieve_from_weaviate",
-    arguments={
-        "weaviate_url": "http://localhost:8080",
-        "class_name": "my_class",
-        "top_k": 3,
-        "additional_headers": {
-            "X-OpenAI-Api-Key": "YOUR-OPENAI-API-KEY"
-        }
-    },
-    consumes={
-        "text": "text"
-    }
-)
-```
-
-```python
-import pyarrow as pa
-from fondant.pipeline import Pipeline
-
-pipeline = Pipeline(name="my_pipeline", base_path="path/to/pipeline")
-
-dataset = pipeline.read(
-    "load_from_csv",
-    arguments={
-        "dataset_uri": "path/to/dataset.csv",
-    },
-    produces={
-        "text": pa.string(),
-    }
-)
-
-dataset = dataset.apply(
-    "embed_text",
-    arguments={...},
-    consumes={
-        "text": "text",
-    },
-)
-
-dataset = dataset.apply(
-    "index_weaviate",
-    arguments={
-        "weaviate_url": "http://localhost:8080",
-        "class_name": "my_class",
-    },
-    consumes={
-        "embedding": "embedding"
-    }
-)
-
-dataset = pipeline.read(
-    "load_from_csv",
-    arguments={
-        "dataset_uri": "path/to/prompt_dataset.csv",
-    },
-    produces={
-        "prompts": pa.string(),
-    }
-)
-
-dataset = dataset.apply(
-    "embed_text",
-    arguments={...},
-    consumes={
-        "prompts": "text",
-    },
-)
-
-dataset = dataset.apply(
-    "retrieve_from_weaviate",
-    arguments={
-        "weaviate_url": "http://localhost:8080",
-        "class_name": "my_class",
-        "top_k": 3,
-    consumes={
-        "text": "text"
-    }
-)
-```
-
-
-<a id="retrieve_from_weaviate#inputs_outputs"></a>
-## Inputs / outputs 
-
-<a id="retrieve_from_weaviate#consumes"></a>
-### Consumes 
-
-**This component can consume additional fields**
-- <field_name>: <dataset_field_name>
-This defines a mapping to update the fields consumed by the operation as defined in the component spec.
-The keys are the names of the fields to be received by the component, while the values are 
-the name of the field to map from the input dataset
-
-See the usage example below on how to define a field name for additional fields.
-
-
-
-
-<a id="retrieve_from_weaviate#produces"></a>  
-### Produces 
-**This component produces:**
-
-- retrieved_chunks: list<item: string>
-
-
-
-<a id="retrieve_from_weaviate#arguments"></a>
-## Arguments
-
-The component takes the following arguments to alter its behavior:
-
-| argument | type | description | default |
-| -------- | ---- | ----------- | ------- |
-| weaviate_url | str | The URL of the weaviate instance. | http://localhost:8080 |
-| class_name | str | The name of the weaviate class that will be queried | / |
-| top_k | int | Number of chunks to retrieve | / |
-| additional_config | dict | Additional configuration to pass to the weaviate client. | / |
-| additional_headers | dict | Additional headers to pass to the weaviate client. | / |
-| hybrid_query | str | The hybrid query to be used for retrieval. Optional parameter. | / |
-| hybrid_alpha | float | Argument to change how much each search affects the results. An alpha of 1 is a pure vector search. An alpha of 0 is a pure keyword search. | / |
-| rerank | bool | Whether to rerank the results based on the hybrid query. Defaults to False.Check this notebook for more information on reranking:https://github.com/weaviate/recipes/blob/main/ranking/cohere-ranking/cohere-ranking.ipynbhttps://weaviate.io/developers/weaviate/search/rerank. | / |
-
-<a id="retrieve_from_weaviate#usage"></a>
-## Usage 
-
-You can add this component to your pipeline using the following code:
-
-```python
-from fondant.pipeline import Pipeline
-
-
-pipeline = Pipeline(...)
-
-dataset = pipeline.read(...)
-
-dataset = dataset.apply(
-    "retrieve_from_weaviate",
-    arguments={
-        # Add arguments
-        # "weaviate_url": "http://localhost:8080",
-        # "class_name": ,
-        # "top_k": 0,
-        # "additional_config": {},
-        # "additional_headers": {},
-        # "hybrid_query": ,
-        # "hybrid_alpha": 0.0,
-        # "rerank": False,
-    },
-    consumes={
-         <field_name>: <dataset_field_name>,
-         ..., # Add fields
-     },
-)
-```
-
-<a id="retrieve_from_weaviate#testing"></a>
-## Testing
-
-You can run the tests using docker with BuildKit. From this directory, run:
-```
-docker build . --target test
-```
diff --git a/components/retrieve_from_weaviate/fondant_component.yaml b/components/retrieve_from_weaviate/fondant_component.yaml
deleted file mode 100644
index 7b082b37b..000000000
--- a/components/retrieve_from_weaviate/fondant_component.yaml
+++ /dev/null
@@ -1,175 +0,0 @@
-name: retrieve_from_weaviate
-description: |
-  Component that retrieves chunks from a Weaviate vector database.
-  The component can retrieve chunks based on a text search or based on a vector search.
-  Reranking is only supported for text search.
-  More info here [Cohere Ranking](https://github.com/weaviate/recipes/blob/main/ranking/cohere-ranking/cohere-ranking.ipynb)
-  [Weaviate Search Rerank](https://weaviate.io/developers/weaviate/search/rerank)
-
-  ### Running with text as input
-  
-  ```python
-  import pyarrow as pa
-  from fondant.pipeline import Pipeline
-  
-  pipeline = Pipeline(name="my_pipeline", base_path="path/to/pipeline")
-  
-  dataset = pipeline.read(
-      "load_from_csv",
-      arguments={
-          "dataset_uri": "path/to/dataset.csv",
-      },
-      produces={
-          "text": pa.string(),
-      }
-  )
-  
-  dataset = dataset.apply(
-      "index_weaviate",
-      arguments={
-          "weaviate_url": "http://localhost:8080",
-          "class_name": "my_class",
-          "vectorizer": "text2vec-openai",
-          "additional_headers": {
-              "X-OpenAI-Api-Key": "YOUR-OPENAI-API-KEY"
-          }
-      },
-      consumes={
-          "text": "text"
-      }
-  )
-  
-  dataset = dataset.apply(
-      "retrieve_from_weaviate",
-      arguments={
-          "weaviate_url": "http://localhost:8080",
-          "class_name": "my_class",
-          "top_k": 3,
-          "additional_headers": {
-              "X-OpenAI-Api-Key": "YOUR-OPENAI-API-KEY"
-          }
-      },
-      consumes={
-          "text": "text"
-      }
-  )
-  ```
-  
-  ```python
-  import pyarrow as pa
-  from fondant.pipeline import Pipeline
-  
-  pipeline = Pipeline(name="my_pipeline", base_path="path/to/pipeline")
-  
-  dataset = pipeline.read(
-      "load_from_csv",
-      arguments={
-          "dataset_uri": "path/to/dataset.csv",
-      },
-      produces={
-          "text": pa.string(),
-      }
-  )
-  
-  dataset = dataset.apply(
-      "embed_text",
-      arguments={...},
-      consumes={
-          "text": "text",
-      },
-  )
-  
-  dataset = dataset.apply(
-      "index_weaviate",
-      arguments={
-          "weaviate_url": "http://localhost:8080",
-          "class_name": "my_class",
-      },
-      consumes={
-          "embedding": "embedding"
-      }
-  )
-  
-  dataset = pipeline.read(
-      "load_from_csv",
-      arguments={
-          "dataset_uri": "path/to/prompt_dataset.csv",
-      },
-      produces={
-          "prompts": pa.string(),
-      }
-  )
-  
-  dataset = dataset.apply(
-      "embed_text",
-      arguments={...},
-      consumes={
-          "prompts": "text",
-      },
-  )
-  
-  dataset = dataset.apply(
-      "retrieve_from_weaviate",
-      arguments={
-          "weaviate_url": "http://localhost:8080",
-          "class_name": "my_class",
-          "top_k": 3,
-      consumes={
-          "text": "text"
-      }
-  )
-  ```
-
-image: fndnt/retrieve_from_weaviate:dev
-tags:
-  - Data retrieval
-
-consumes:
-  additionalProperties: true
-
-produces:
-  retrieved_chunks:
-    type: array
-    items:
-      type: string
-
-args:
-  weaviate_url:
-    description: The URL of the weaviate instance.
-    type: str
-    default: http://localhost:8080
-  class_name:
-    description:
-      The name of the weaviate class that will be queried
-    type: str
-  top_k:
-    description: Number of chunks to retrieve
-    type: int
-  additional_config:
-    description: |
-      Additional configuration to pass to the weaviate client.
-    type: dict
-    default: {}
-  additional_headers:
-    description: |
-      Additional headers to pass to the weaviate client.
-    type: dict
-    default: {}
-  hybrid_query:
-    description: |
-      The hybrid query to be used for retrieval. Optional parameter.
-    type: str
-    default: None
-  hybrid_alpha:
-    description: |
-      Argument to change how much each search affects the results. An alpha of 1 is a pure vector search. An alpha of 0 is a pure keyword search.
-    type: float
-    default: None
-  rerank:
-    description: |
-      Whether to rerank the results based on the hybrid query. Defaults to False.
-      Check this notebook for more information on reranking:
-      https://github.com/weaviate/recipes/blob/main/ranking/cohere-ranking/cohere-ranking.ipynb
-      https://weaviate.io/developers/weaviate/search/rerank.
-    type: bool
-    default: False
\ No newline at end of file
diff --git a/components/retrieve_from_weaviate/requirements.txt b/components/retrieve_from_weaviate/requirements.txt
deleted file mode 100644
index 12e81349f..000000000
--- a/components/retrieve_from_weaviate/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-weaviate-client==3.24.1
\ No newline at end of file
diff --git a/components/retrieve_from_weaviate/src/main.py b/components/retrieve_from_weaviate/src/main.py
deleted file mode 100644
index f69f2e684..000000000
--- a/components/retrieve_from_weaviate/src/main.py
+++ /dev/null
@@ -1,133 +0,0 @@
-import typing as t
-
-import dask.dataframe as dd
-import pandas as pd
-import weaviate
-from fondant.component import PandasTransformComponent
-
-
-class RetrieveFromWeaviateComponent(PandasTransformComponent):
-    def __init__(
-        self,
-        *,
-        weaviate_url: str,
-        class_name: str,
-        top_k: int,
-        additional_config: t.Optional[dict],
-        additional_headers: t.Optional[dict],
-        hybrid_query: t.Optional[str],
-        hybrid_alpha: t.Optional[float],
-        rerank: bool,
-    ) -> None:
-        """
-        Args:
-            weaviate_url: An argument passed to the component.
-            class_name: Name of class to query
-            top_k: Amount of context to return.
-            additional_config: Additional configuration passed to the weaviate client.
-            additional_headers: Additional headers passed to the weaviate client.
-            hybrid_query: The hybrid query to be used for retrieval. Optional parameter.
-            hybrid_alpha: Argument to change how much each search affects the results. An alpha
-             of 1 is a pure vector search. An alpha of 0 is a pure keyword search.
-            rerank: Whether to rerank the results based on the hybrid query. Defaults to False.
-             Check this notebook for more information on reranking:
-             https://github.com/weaviate/recipes/blob/main/ranking/cohere-ranking/cohere-ranking.ipynb
-             https://weaviate.io/developers/weaviate/search/rerank.
-        """
-        # Initialize your component here based on the arguments
-        self.client = weaviate.Client(
-            url=weaviate_url,
-            additional_config=additional_config if additional_config else None,
-            additional_headers=additional_headers if additional_headers else None,
-        )
-        self.class_name = class_name
-        self.k = top_k
-        self.hybrid_query, self.hybrid_alpha = self.validate_hybrid_query(
-            hybrid_query,
-            hybrid_alpha,
-        )
-        self.rerank = rerank
-
-    @staticmethod
-    def validate_hybrid_query(
-        hybrid_query: t.Optional[str],
-        hybrid_alpha: t.Optional[float],
-    ):
-        if hybrid_query is not None and hybrid_alpha is None:
-            msg = (
-                "If hybrid_query is specified, hybrid_alpha must be specified as well."
-            )
-            raise ValueError(
-                msg,
-            )
-
-        return hybrid_query, hybrid_alpha
-
-    def validate_reranker(self, dataframe: dd.DataFrame) -> None:
-        if self.rerank and "prompt" not in dataframe.columns:
-            msg = (
-                "If rerank is specified, dataframe must contain a 'text' column. Reranking is"
-                " only supported for text data and not for embeddings."
-            )
-            raise ValueError(
-                msg,
-            )
-
-    def teardown(self) -> None:
-        del self.client
-
-    def retrieve_chunks_from_embeddings(self, vector_query: list):
-        """Get results from weaviate database."""
-        query = (
-            self.client.query.get(self.class_name, ["passage"])
-            .with_near_vector({"vector": vector_query})
-            .with_limit(self.k)
-            .with_additional(["distance"])
-        )
-        if self.hybrid_query is not None:
-            query = query.with_hybrid(query=self.hybrid_query, alpha=self.hybrid_alpha)
-
-        result = query.do()
-
-        result_dict = result["data"]["Get"][self.class_name]
-        return [retrieved_chunk["passage"] for retrieved_chunk in result_dict]
-
-    def retrieve_chunks_from_prompts(self, prompt: str):
-        """Get results from weaviate database."""
-        query = (
-            self.client.query.get(self.class_name, ["passage"])
-            .with_near_text({"concepts": [prompt]})
-            .with_limit(self.k)
-        )
-        if self.hybrid_query is not None:
-            query = query.with_hybrid(query=self.hybrid_query, alpha=self.hybrid_alpha)
-
-        if self.rerank:
-            query = query.with_additional(
-                'rerank(property: "passage" query: "prompt") { score }',
-            )
-
-        result = query.do()
-
-        result_dict = result["data"]["Get"][self.class_name]
-        return [retrieved_chunk["passage"] for retrieved_chunk in result_dict]
-
-    def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
-        self.validate_reranker(dataframe)
-
-        if "embedding" in dataframe.columns:
-            dataframe["retrieved_chunks"] = dataframe["embedding"].apply(
-                self.retrieve_chunks_from_embeddings,
-            )
-
-        elif "prompt" in dataframe.columns:
-            dataframe["retrieved_chunks"] = dataframe["prompt"].apply(
-                self.retrieve_chunks_from_prompts,
-            )
-        else:
-            msg = "Dataframe must contain either an 'embedding' column or a 'prompt' column."
-            raise ValueError(
-                msg,
-            )
-
-        return dataframe
diff --git a/components/retrieve_from_weaviate/tests/pytest.ini b/components/retrieve_from_weaviate/tests/pytest.ini
deleted file mode 100644
index bf6a8a517..000000000
--- a/components/retrieve_from_weaviate/tests/pytest.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-[pytest]
-pythonpath = ../src
\ No newline at end of file
diff --git a/components/retrieve_from_weaviate/tests/requirements.txt b/components/retrieve_from_weaviate/tests/requirements.txt
deleted file mode 100644
index 2a929edcc..000000000
--- a/components/retrieve_from_weaviate/tests/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-pytest==7.4.2
diff --git a/components/retrieve_from_weaviate/tests/test_component.py b/components/retrieve_from_weaviate/tests/test_component.py
deleted file mode 100644
index 7b30898f1..000000000
--- a/components/retrieve_from_weaviate/tests/test_component.py
+++ /dev/null
@@ -1,80 +0,0 @@
-import tempfile
-
-import numpy as np
-import pandas as pd
-import weaviate
-from weaviate.embedded import EmbeddedOptions
-
-from src.main import RetrieveFromWeaviateComponent
-
-
-def set_up_instance(client):
-    """Set up an embedded instance using the provided client."""
-    data = [
-        {
-            "data_object": {
-                "passage": "foo",
-            },
-            "vector": np.array([1.0, 2.0]),
-        },
-        {
-            "data_object": {
-                "passage": "bar",
-            },
-            "vector": np.array([2.0, 3.0]),
-        },
-    ]
-
-    for entry in data:
-        client.data_object.create(
-            entry["data_object"],
-            class_name="Test",
-            vector=entry["vector"],
-        )
-
-    return "http://localhost:6666"
-
-
-def test_component():
-    input_dataframe = pd.DataFrame.from_dict(
-        {
-            "id": ["1", "2"],
-            "embedding": [np.array([1.0, 2.0]), np.array([2.0, 3.0])],
-        },
-    )
-    input_dataframe = input_dataframe.set_index("id")
-
-    expected_output_dataframe = pd.DataFrame.from_dict(
-        {
-            "id": ["1", "2"],
-            "retrieved_chunks": [["foo", "bar"], ["bar", "foo"]],
-        },
-    )
-    expected_output_dataframe = expected_output_dataframe.set_index("id")
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        client = weaviate.Client(
-            embedded_options=EmbeddedOptions(
-                persistence_data_path=tmpdir,
-            ),
-        )
-        url = set_up_instance(client)
-
-        component = RetrieveFromWeaviateComponent(
-            weaviate_url=url,
-            class_name="Test",
-            top_k=2,
-            additional_config={},
-            additional_headers={},
-            hybrid_query=None,
-            hybrid_alpha=None,
-            rerank=False,
-        )
-
-        output_dataframe = component.transform(input_dataframe)
-
-    pd.testing.assert_frame_equal(
-        left=expected_output_dataframe,
-        right=output_dataframe["retrieved_chunks"].to_frame(),
-        check_dtype=False,
-    )