From f196a5dc55a0a52c66b97dc10532bfd14d9b8142 Mon Sep 17 00:00:00 2001 From: Matthias Richter Date: Fri, 5 Apr 2024 10:54:23 +0200 Subject: [PATCH] Fixing docs --- docs/dataset.md | 25 +++++++++++++++++++++++-- mkdocs.yml | 12 ++++++------ src/fondant/dataset/dataset.py | 6 ++++++ 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/docs/dataset.md b/docs/dataset.md index f56e0b37..e0fcab90 100644 --- a/docs/dataset.md +++ b/docs/dataset.md @@ -4,6 +4,26 @@ Fondant helps you build datasets by providing a set of operations to load, trans and write data. With Fondant, you can use both reusable components and custom components, and chain them to create datasets. +## Load a Fondant dataset + +You can initialise a dataset from a previous run by using the `read` method. + + + +```python +from fondant.dataset import Dataset + +dataset = Dataset.read("path/to/manfiest.json") + +``` + +??? "View a detailed reference of the `Dataset.read()` method" + + ::: fondant.dataset.dataset.Dataset.read + handler: python + options: + show_source: false + ## Build a dataset Start by creating a `dataset.py` file and adding the following code. @@ -28,7 +48,7 @@ This code initializes a `Dataset` instance with a load component. The load compo ??? "View a detailed reference of the `Dataset.create()` method" - ::: fondant.dataset.Dataset.read + ::: fondant.dataset.dataset.Dataset.create handler: python options: show_source: false @@ -99,7 +119,7 @@ your dataset. When using other runners you will need to make sure that your new environment has access to: - - The base path of your pipeline (as mentioned above) + - The working directory of your workflow (as mentioned above) - The images used in your pipeline (make sure you have access to the registries where the images are stored) @@ -172,6 +192,7 @@ your dataset. containing your dataset, or a factory function that will create your dataset. The working directory can be: + - **A remote cloud location (S3, GCS, Azure Blob storage):** For the local runner, make sure that your local credentials or service account have read/write access to the designated working directory and that you provide them to the dataset. diff --git a/mkdocs.yml b/mkdocs.yml index 7f1b8880..db7888dc 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -41,10 +41,10 @@ nav: - Using the documentation: documentation_guide.md - Getting Started: - Installation: guides/installation.md - - Running your first pipeline: guides/first_pipeline.md - - Building your own pipeline: guides/build_a_simple_pipeline.md + - Running your first pipeline: guides/first_dataset.md + - Building your own pipeline: guides/build_a_simple_dataset.md - Implementing custom components: guides/implement_custom_components.md - - Pipeline: pipeline.md + - Dataset: dataset.md - Components: - Components: components/components.md - Lightweight components: components/lightweight_components.md @@ -92,9 +92,9 @@ markdown_extensions: - pymdownx.superfences - pymdownx.details - attr_list - - pymdownx.emoji: - emoji_index: !!python/name:materialx.emoji.twemoji - emoji_generator: !!python/name:materialx.emoji.to_svg + #- pymdownx.emoji: + # emoji_index: !!python/name:materialx.emoji.twemoji + # emoji_generator: !!python/name:materialx.emoji.to_svg - admonition - def_list - toc: diff --git a/src/fondant/dataset/dataset.py b/src/fondant/dataset/dataset.py index 5aee821d..3276e63e 100644 --- a/src/fondant/dataset/dataset.py +++ b/src/fondant/dataset/dataset.py @@ -500,6 +500,12 @@ def register_operation( @staticmethod def read(manifest_path: str): + """ + Read a dataset from a manifest file. + + Args: + manifest_path: The path to the manifest file. + """ manifest = Manifest.from_file(manifest_path) return Dataset(manifest=manifest)