From c3cd4ce537a2084609cd091cfcf4f6144b5e129f Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Fri, 18 Oct 2024 23:06:58 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=92=9A=20Fix=20&=20polish=20analysis-flow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../register_example_file.py | 1 - docs/analysis-flow.ipynb | 62 ++++++++----------- 2 files changed, 25 insertions(+), 38 deletions(-) diff --git a/docs/analysis-flow-scripts/register_example_file.py b/docs/analysis-flow-scripts/register_example_file.py index e0f6da9..9594882 100644 --- a/docs/analysis-flow-scripts/register_example_file.py +++ b/docs/analysis-flow-scripts/register_example_file.py @@ -1,7 +1,6 @@ import lamindb as ln import bionty as bt - ln.track("K4wsS5DTYdFp0000") # an example dataset that has a few cell type, tissue and disease annotations diff --git a/docs/analysis-flow.ipynb b/docs/analysis-flow.ipynb index 306e9d8..e14d861 100644 --- a/docs/analysis-flow.ipynb +++ b/docs/analysis-flow.ipynb @@ -19,9 +19,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Here, we'll track typical data transformations like subsetting that occur during analysis.\n", - "\n", - "If exploring more generally, read this first: {doc}`/project-flow`." + "Here, we'll track typical data transformations like subsetting that occur during analysis." ] }, { @@ -35,7 +33,7 @@ "outputs": [], "source": [ "# !pip install 'lamindb[jupyter,bionty]'\n", - "!lamin init --storage ./analysis-usecase --schema bionty" + "!lamin init --storage ./analysis-flow --schema bionty" ] }, { @@ -45,8 +43,7 @@ "outputs": [], "source": [ "import lamindb as ln\n", - "import bionty as bt\n", - "from lamin_utils import logger" + "import bionty as bt" ] }, { @@ -54,14 +51,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Register an initial dataset" + "## Save an initial dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Here we register an initial artifact with a pipeline script [register_example_file.py](https://github.com/laminlabs/lamin-usecases/blob/main/docs/analysis-flow-scripts/register_example_file.py)." + "```{eval-rst}\n", + ".. literalinclude:: analysis-flow-scripts/register_example_file.py\n", + " :language: python\n", + " :caption: register_example_file.py\n", + "```" ] }, { @@ -81,7 +82,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Pull the registered dataset, apply a transformation, and register the result" + "## Open a dataset, subset it, and register the result" ] }, { @@ -159,8 +160,8 @@ }, "outputs": [], "source": [ - "cell_types = artifact.cell_types.all().lookup(return_field=\"name\")\n", - "diseases = artifact.diseases.all().lookup(return_field=\"name\")" + "cell_types = artifact.cell_types.all().distinct().lookup(return_field=\"name\")\n", + "diseases = artifact.diseases.all().distinct().lookup(return_field=\"name\")" ] }, { @@ -241,7 +242,6 @@ " },\n", " organism=\"human\",\n", ")\n", - "\n", "curate.validate()" ] }, @@ -255,19 +255,7 @@ }, "outputs": [], "source": [ - "artifact = curate.save_artifact(description=\"anndata with obs subset\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide-output" - ] - }, - "outputs": [], - "source": [ + "artifact = curate.save_artifact(description=\"anndata with obs subset\")\n", "artifact.describe()" ] }, @@ -276,7 +264,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Examine data flow" + "## Examine data lineage" ] }, { @@ -342,20 +330,20 @@ "metadata": {}, "outputs": [], "source": [ - "print(\"--> What is the history of this artifact?\\n\")\n", + "print(\"--> What is the lineage of this artifact?\\n\")\n", "artifact.view_lineage()\n", "\n", "print(\"\\n\\n--> Which features and labels are associated with it?\\n\")\n", - "logger.print(artifact.features)\n", - "logger.print(artifact.labels)\n", + "print(artifact.features)\n", + "print(artifact.labels)\n", "\n", - "print(\"\\n\\n--> Which notebook analyzed and registered this artifact\\n\")\n", - "logger.print(artifact.transform)\n", + "print(\"\\n\\n--> Which notebook analyzed and saved this artifact\\n\")\n", + "print(artifact.transform)\n", "\n", - "print(\"\\n\\n--> By whom\\n\")\n", - "logger.print(artifact.created_by)\n", + "print(\"\\n\\n--> Who save this artifact?\\n\")\n", + "print(artifact.created_by)\n", "\n", - "print(\"\\n\\n--> And which artifact is its parent\\n\")\n", + "print(\"\\n\\n--> Which artifacts were inputs?\\n\")\n", "display(artifact.run.input_artifacts.df())" ] }, @@ -369,8 +357,8 @@ }, "outputs": [], "source": [ - "!rm -r ./analysis-usecase\n", - "!lamin delete --force analysis-usecase" + "!rm -r ./analysis-flow\n", + "!lamin delete --force analysis-flow" ] } ], @@ -393,7 +381,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.10.13" }, "nbproject": { "id": "eNef4Arw8nNM",