From 15d6827a677bef1f6fa241f4495914dd9c8b2aaa Mon Sep 17 00:00:00 2001 From: Alexander Guschin <1aguschin@gmail.com> Date: Fri, 5 May 2023 14:50:41 +0300 Subject: [PATCH 1/6] initial updates --- content/docs/dvclive/index.md | 9 ++++++ content/docs/dvclive/live/log_artifact.md | 2 +- .../start/experiments/experiment-tracking.md | 32 +++++++++++++------ .../user-guide/model-registry/add-a-model.md | 20 ++++++------ .../what-is-a-model-registry.md | 2 +- .../user-guide/experiment-management/index.md | 12 ++++++- .../project-structure/dvcyaml-files.md | 5 ++- 7 files changed, 55 insertions(+), 27 deletions(-) diff --git a/content/docs/dvclive/index.md b/content/docs/dvclive/index.md index 9386a399a4..a805f4de26 100644 --- a/content/docs/dvclive/index.md +++ b/content/docs/dvclive/index.md @@ -42,6 +42,15 @@ live.log_artifact("model.pt") See `Live.log_artifact()`. + + + +```python +live.log_artifact("model.pt", type="model") +``` + +See `Live.log_artifact()`. + diff --git a/content/docs/dvclive/live/log_artifact.md b/content/docs/dvclive/live/log_artifact.md index c6b6c987ce..6acc5ef2d7 100644 --- a/content/docs/dvclive/live/log_artifact.md +++ b/content/docs/dvclive/live/log_artifact.md @@ -43,7 +43,7 @@ If `Live` was initialized with `dvcyaml=True` (which is the default), it will add an [artifact](/doc/user-guide/project-structure/dvcyaml-files#artifacts) and all the metadata passed as arguments to the corresponding `dvc.yaml`. Passing `type="model"` will mark it as a `model` for DVC and will make it appear in -[Studio Model Registry](/doc/studio) (coming soon). +[Studio Model Registry](/doc/studio). If `name` is not provided, the path stem (last part of the path without the file extension) will be used as the artifact name. diff --git a/content/docs/start/experiments/experiment-tracking.md b/content/docs/start/experiments/experiment-tracking.md index 90943aa4fb..0a78ff5600 100644 --- a/content/docs/start/experiments/experiment-tracking.md +++ b/content/docs/start/experiments/experiment-tracking.md @@ -38,9 +38,13 @@ There are some examples below from dvclive.lightning import DVCLiveLogger ... - -trainer = Trainer(logger=DVCLiveLogger(save_dvc_exp=True)) -trainer.fit(model) +with Live(save_dvc_exp=True) as live: + trainer = Trainer( + logger=DVCLiveLogger(save_dvc_exp=True), + default_root_dir="mymodel" + ) + trainer.fit(model) + live.log_artifact("mymodel", type="model") ``` @@ -51,9 +55,11 @@ trainer.fit(model) from dvclive.huggingface import DVCLiveCallback ... - -trainer.add_callback(DVCLiveCallback(save_dvc_exp=True)) -trainer.train() +with Live(save_dvc_exp=True) as live: + trainer.add_callback(DVCLiveCallback(save_dvc_exp=True)) + trainer.train() + trainer.save_model("mymodel") + live.log_artifact("mymodel", type="model") ``` @@ -64,10 +70,14 @@ trainer.train() from dvclive.keras import DVCLiveCallback ... - -model.fit( - train_dataset, validation_data=validation_dataset, - callbacks=[DVCLiveCallback(save_dvc_exp=True)]) +with Live(save_dvc_exp=True) as live: + model.fit( + train_dataset, + validation_data=validation_dataset, + callbacks=[DVCLiveCallback(save_dvc_exp=True)] + ) + model.save("mymodel") + live.log_artifact("mymodel", type="model") ``` @@ -86,6 +96,8 @@ with Live(save_dvc_exp=True) as live: for metric_name, value in metrics.items(): live.log_metric(metric_name, value) live.next_step() + + live.log_artifact("model.pkl", type="model") ``` diff --git a/content/docs/studio/user-guide/model-registry/add-a-model.md b/content/docs/studio/user-guide/model-registry/add-a-model.md index 0685c9ad13..955aba0a80 100644 --- a/content/docs/studio/user-guide/model-registry/add-a-model.md +++ b/content/docs/studio/user-guide/model-registry/add-a-model.md @@ -2,9 +2,9 @@ You can add models from any ML project to the model registry. To add a model to your model registry, Iterative Studio creates an annotation for it in an -`artifacts.yaml` file in your Git repository. If you are using the [GTO] command -line tool, you can also add models [from the CLI][gto annotate]. To add models -using Iterative Studio, watch this tutorial video or read on below: +`dvc.yaml` file in your Git repository. If you are using the [GTO] command line +tool, you can also add models [from the CLI][gto annotate]. To add models using +Iterative Studio, watch this tutorial video or read on below: https://www.youtube.com/watch?v=szzv4ZXmYAs @@ -28,10 +28,8 @@ https://www.youtube.com/watch?v=szzv4ZXmYAs project path of the corresponding `.dvc` file. - If the model file is in remote storage and is not DVC-tracked, enter the absolute path of the model file. - - If you use [MLEM] to save your model, use the path to the binary file that - MLEM generates. After you have run - [`mlem init`](https://mlem.ai/doc/command-reference/init), Iterative Studio - will be able to parse the `.mlem` file to extract model metadata. + - If you use [MLEM] to save your model, use the path to the binary file or + folder that MLEM generates. If the path you entered is a cloud path, Iterative Studio will ask you for the repository path where the dvc reference to the model should be saved. @@ -54,10 +52,10 @@ https://www.youtube.com/watch?v=szzv4ZXmYAs 8. At this point, the new model appears in the models dashboard. 9. In your Git repository, you will find that an entry for the new model has - been created in the `artifacts.yaml` file in the repository's root. If you - had committed to a new branch, a new pull request (or merge request in the - case of GitLab) will also have been created to merge the new branch into the - base branch. + been created in the `dvc.yaml` file in the repository's root. If you had + committed to a new branch, a new pull request (or merge request in the case + of GitLab) will also have been created to merge the new branch into the base + branch. 10. If you had added a model from a cloud storage, the following will also happen before the commit is created: diff --git a/content/docs/studio/user-guide/model-registry/what-is-a-model-registry.md b/content/docs/studio/user-guide/model-registry/what-is-a-model-registry.md index ccd0f7050c..8ba18cdf90 100644 --- a/content/docs/studio/user-guide/model-registry/what-is-a-model-registry.md +++ b/content/docs/studio/user-guide/model-registry/what-is-a-model-registry.md @@ -54,7 +54,7 @@ Note that while you can get the basic Model Registry functionality within Iterative Studio, there are more things you can do using the [MLEM] and [GTO] command line interface (CLI). For example, to save and deploy models, you will need to use MLEM, although future iterations of the Model Registry may -incorporate these tasks also. Similarly, you can use GTO in your CI/CD actions +incorporate these tasks also. Similarly, you can use [GTO] in your CI/CD actions to interpret Git tags for deploying the models to the desired environment. [semantic versioning]: https://semver.org/ diff --git a/content/docs/user-guide/experiment-management/index.md b/content/docs/user-guide/experiment-management/index.md index 7e1edb0fa3..315921deb5 100644 --- a/content/docs/user-guide/experiment-management/index.md +++ b/content/docs/user-guide/experiment-management/index.md @@ -46,7 +46,14 @@ To save an experiment, you can follow one of these roads: Experiments are saved locally by default but you can [share] them so that anyone can reproduce your work. -## Metrics, plots, and parameters +## Datasets and models + +DVC can track datasets or models as part of your repo. One way to let DVC know +the specific artifact is a model or a dataset is to use [DVCLive]. You can also +manually add them to `dvc.yaml`. For models, you'll see them appear in [Studio +Model Registry]. + +## Metrics, plots, parameters DVC can track and compare parameters, metrics, and plots data saved in standard structured files like YAML, JSON, and @@ -74,9 +81,12 @@ https://www.youtube.com/watch?v=LHi3SWGD9nc [pipeline]: /doc/user-guide/pipelines [run]: /doc/user-guide/experiment-management/running-experiments [share]: /doc/user-guide/experiment-management/sharing-experiments +[artifacts]: /doc/user-guide/project-structure/dvcyaml-files#artifacts [parameters]: /doc/user-guide/project-structure/dvcyaml-files#params [metrics]: /doc/user-guide/project-structure/dvcyaml-files#metrics [plots]: /doc/user-guide/project-structure/dvcyaml-files#plots [visualize plots]: /doc/user-guide/experiment-management/visualizing-plots [from the vs code ide]: /doc/vs-code-extension [iterative studio]: /doc/studio +[studio model registry]: + /doc/studio/user-guide/model-registry/what-is-a-model-registry diff --git a/content/docs/user-guide/project-structure/dvcyaml-files.md b/content/docs/user-guide/project-structure/dvcyaml-files.md index d312026629..6c99d4c6e6 100644 --- a/content/docs/user-guide/project-structure/dvcyaml-files.md +++ b/content/docs/user-guide/project-structure/dvcyaml-files.md @@ -20,8 +20,7 @@ Although you can specify artifacts of any `type`, we are in the process of building a DVC-based [model registry](/doc/use-cases/model-registry) that will pick up any artifacts with type `model`. Additionally, they will be picked up and supported by -[Studio Model Registry](/doc/studio/user-guide/model-registry/what-is-a-model-registry) -(coming soon). +[Studio Model Registry](/doc/studio/user-guide/model-registry/what-is-a-model-registry). ```yaml artifacts: @@ -37,7 +36,7 @@ artifacts: ``` Artifact IDs must consist of letters and numbers, and use '-' as separator (but -not at the start or end). The first character must be a letter. +not at the start or end). ## Metrics From 55ea764691683f03af2e8f73ab80597ec2345122 Mon Sep 17 00:00:00 2001 From: Alexander Guschin <1aguschin@gmail.com> Date: Fri, 12 May 2023 11:29:38 +0300 Subject: [PATCH 2/6] fix some things --- content/docs/dvclive/index.md | 9 --------- 1 file changed, 9 deletions(-) diff --git a/content/docs/dvclive/index.md b/content/docs/dvclive/index.md index a805f4de26..20b48676ed 100644 --- a/content/docs/dvclive/index.md +++ b/content/docs/dvclive/index.md @@ -36,15 +36,6 @@ Including `save_dvc_exp=True` will automatically -```python -live.log_artifact("model.pt") -``` - -See `Live.log_artifact()`. - - - - ```python live.log_artifact("model.pt", type="model") ``` From bc88bb9e03760acbc0635eaa487aaba3ce2d6452 Mon Sep 17 00:00:00 2001 From: Alexander Guschin <1aguschin@gmail.com> Date: Fri, 12 May 2023 11:30:55 +0300 Subject: [PATCH 3/6] Apply suggestions from code review Co-authored-by: Dave Berenbaum --- .../docs/studio/user-guide/model-registry/add-a-model.md | 2 +- content/docs/user-guide/experiment-management/index.md | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/content/docs/studio/user-guide/model-registry/add-a-model.md b/content/docs/studio/user-guide/model-registry/add-a-model.md index 955aba0a80..0d29a05d91 100644 --- a/content/docs/studio/user-guide/model-registry/add-a-model.md +++ b/content/docs/studio/user-guide/model-registry/add-a-model.md @@ -1,7 +1,7 @@ # Add a model You can add models from any ML project to the model registry. To add a model to -your model registry, Iterative Studio creates an annotation for it in an +your model registry, Iterative Studio creates an annotation for it in a `dvc.yaml` file in your Git repository. If you are using the [GTO] command line tool, you can also add models [from the CLI][gto annotate]. To add models using Iterative Studio, watch this tutorial video or read on below: diff --git a/content/docs/user-guide/experiment-management/index.md b/content/docs/user-guide/experiment-management/index.md index 315921deb5..e338ae5d8d 100644 --- a/content/docs/user-guide/experiment-management/index.md +++ b/content/docs/user-guide/experiment-management/index.md @@ -48,10 +48,10 @@ can reproduce your work. ## Datasets and models -DVC can track datasets or models as part of your repo. One way to let DVC know -the specific artifact is a model or a dataset is to use [DVCLive]. You can also -manually add them to `dvc.yaml`. For models, you'll see them appear in [Studio -Model Registry]. +DVC can track models or datasets as part of your repo, and you can manage those +models with [Studio Model Registry]. One way to log models or other artifacts is with [DVCLive]. +You can also track them with `dvc add` and declare metadata for the [Studio Model Registry] +in [`dvc.yaml`][artifacts]. ## Metrics, plots, parameters From ab725df353cda5bf89488d04fb3dc8bc2a5d6f4c Mon Sep 17 00:00:00 2001 From: Alexander Guschin <1aguschin@gmail.com> Date: Fri, 12 May 2023 14:28:31 +0300 Subject: [PATCH 4/6] address feedback in pr --- content/docs/dvclive/index.md | 2 +- .../start/experiments/experiment-tracking.md | 26 ++++++++++++++----- .../user-guide/experiment-management/index.md | 14 +++++----- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/content/docs/dvclive/index.md b/content/docs/dvclive/index.md index 20b48676ed..8af346d960 100644 --- a/content/docs/dvclive/index.md +++ b/content/docs/dvclive/index.md @@ -37,7 +37,7 @@ Including `save_dvc_exp=True` will automatically ```python -live.log_artifact("model.pt", type="model") +live.log_artifact("model.pt", type="model", name="gpt") ``` See `Live.log_artifact()`. diff --git a/content/docs/start/experiments/experiment-tracking.md b/content/docs/start/experiments/experiment-tracking.md index 0a78ff5600..6642063460 100644 --- a/content/docs/start/experiments/experiment-tracking.md +++ b/content/docs/start/experiments/experiment-tracking.md @@ -39,12 +39,20 @@ from dvclive.lightning import DVCLiveLogger ... with Live(save_dvc_exp=True) as live: + checkpoint = ModelCheckpoint(dirpath="mymodel") trainer = Trainer( - logger=DVCLiveLogger(save_dvc_exp=True), - default_root_dir="mymodel" + logger=DVCLiveLogger( + save_dvc_exp=True, + experiment=live + ), + callbacks=checkpoint ) trainer.fit(model) - live.log_artifact("mymodel", type="model") + live.log_artifact( + checkpoint.best_model_path, + type="model", + name="lightning-model" + ) ``` @@ -56,7 +64,9 @@ from dvclive.huggingface import DVCLiveCallback ... with Live(save_dvc_exp=True) as live: - trainer.add_callback(DVCLiveCallback(save_dvc_exp=True)) + trainer.add_callback( + DVCLiveCallback(save_dvc_exp=True, live=live) + ) trainer.train() trainer.save_model("mymodel") live.log_artifact("mymodel", type="model") @@ -74,7 +84,9 @@ with Live(save_dvc_exp=True) as live: model.fit( train_dataset, validation_data=validation_dataset, - callbacks=[DVCLiveCallback(save_dvc_exp=True)] + callbacks=[ + DVCLiveCallback(save_dvc_exp=True, live=live) + ] ) model.save("mymodel") live.log_artifact("mymodel", type="model") @@ -111,7 +123,9 @@ containing the results and the changes needed to reproduce it. Framework and any [data tracked by DVC](/doc/start/data-management/data-versioning) but you can also [log additional info](/doc/dvclive#log-data) to be included in the -experiment. +experiment. `live.log_artifact("mymodel", type="model")` will +[track your model with DVC](/doc/dvclive#log-data) and enable managing it with +[Studio Model Registry](/doc/studio/user-guide/model-registry/what-is-a-model-registry). diff --git a/content/docs/user-guide/experiment-management/index.md b/content/docs/user-guide/experiment-management/index.md index e338ae5d8d..1d2096e4b9 100644 --- a/content/docs/user-guide/experiment-management/index.md +++ b/content/docs/user-guide/experiment-management/index.md @@ -46,13 +46,6 @@ To save an experiment, you can follow one of these roads: Experiments are saved locally by default but you can [share] them so that anyone can reproduce your work. -## Datasets and models - -DVC can track models or datasets as part of your repo, and you can manage those -models with [Studio Model Registry]. One way to log models or other artifacts is with [DVCLive]. -You can also track them with `dvc add` and declare metadata for the [Studio Model Registry] -in [`dvc.yaml`][artifacts]. - ## Metrics, plots, parameters DVC can track and compare parameters, metrics, and @@ -63,6 +56,13 @@ parameters, metrics, and plots (and to automatically configure them) is with metafiles to specify which files are [parameters], [metrics], or [plots] (and to specify how to [visualize plots]). +## Models and datasets + +DVC can track models or datasets as part of your repo, and you can manage those +models with [Studio Model Registry]. One way to log models or other artifacts is +with [DVCLive]. You can also track them with `dvc add` and declare metadata for +the [Studio Model Registry] in [`dvc.yaml`][artifacts]. + ## Work with DVC Experiments from a GUI DVC Experiments can be used directly [from the VS Code IDE] or online with From b81a341ea38cc8e12af232fa3ec5f843bf48f69b Mon Sep 17 00:00:00 2001 From: Alexander Guschin <1aguschin@gmail.com> Date: Mon, 15 May 2023 13:02:15 +0300 Subject: [PATCH 5/6] fixes from review --- content/docs/dvclive/live/log_artifact.md | 9 ++++++ .../start/experiments/experiment-tracking.md | 3 +- content/docs/studio/get-started.md | 2 +- .../user-guide/model-registry/add-a-model.md | 30 ++++++++----------- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/content/docs/dvclive/live/log_artifact.md b/content/docs/dvclive/live/log_artifact.md index 21691f2f5a..1ed045b6f1 100644 --- a/content/docs/dvclive/live/log_artifact.md +++ b/content/docs/dvclive/live/log_artifact.md @@ -49,10 +49,19 @@ all the metadata passed as arguments to the corresponding `dvc.yaml`. Passing - `path` - an existing directory or file. +- `type` - an optional type of the artifact. Common types are `model` or + `dataset`. + - `name` - an optional custom name of an artifact. If not provided the path stem (last part of the path without the file extension) will be used as the artifact name. +- `desc` - an optional description of an artifact. + +- `labels` - optional labels describing the artifact. + +- `meta` - optional metainformation in `key: value` format. + - `copy` - copy a directory or file at `path` into the `dvclive/artifacts` location ([default](/doc/dvclive/how-it-works#directory-structure)) before tracking it. The new path is used instead of the original one to track the diff --git a/content/docs/start/experiments/experiment-tracking.md b/content/docs/start/experiments/experiment-tracking.md index 6642063460..7ae0830df3 100644 --- a/content/docs/start/experiments/experiment-tracking.md +++ b/content/docs/start/experiments/experiment-tracking.md @@ -124,7 +124,8 @@ Framework and any [data tracked by DVC](/doc/start/data-management/data-versioning) but you can also [log additional info](/doc/dvclive#log-data) to be included in the experiment. `live.log_artifact("mymodel", type="model")` will -[track your model with DVC](/doc/dvclive#log-data) and enable managing it with +[track your model with DVC](/doc/dvclive/live/log-artifact) and enable managing +it with [Studio Model Registry](/doc/studio/user-guide/model-registry/what-is-a-model-registry). diff --git a/content/docs/studio/get-started.md b/content/docs/studio/get-started.md index 0dc5a508e1..c4a8c4ad9e 100644 --- a/content/docs/studio/get-started.md +++ b/content/docs/studio/get-started.md @@ -74,7 +74,7 @@ details]). ## Manage models 1. Click on the `Models` tab to open the central [Models dashboard]. Iterative - Studio uses your project's `artifacts.yaml` file to identify ML models and + Studio uses your project's `dvc.yaml` files to identify ML models and specially formatted Git tags to identify model versions and stage assignments. diff --git a/content/docs/studio/user-guide/model-registry/add-a-model.md b/content/docs/studio/user-guide/model-registry/add-a-model.md index 0d29a05d91..115e42fcd2 100644 --- a/content/docs/studio/user-guide/model-registry/add-a-model.md +++ b/content/docs/studio/user-guide/model-registry/add-a-model.md @@ -22,14 +22,10 @@ https://www.youtube.com/watch?v=szzv4ZXmYAs 3. Enter the path of the model file as follows: - - If the model file is in the Git repository, enter the relative path of the - model (from the repository root). - - If the model file is in remote storage but is DVC-tracked, enter the - project path of the corresponding `.dvc` file. - - If the model file is in remote storage and is not DVC-tracked, enter the - absolute path of the model file. - - If you use [MLEM] to save your model, use the path to the binary file or - folder that MLEM generates. + - If the model file is in the Git repository (including if it is saved with + DVC and/or [MLEM]), enter the relative path of the model (from the + repository root). + - Otherwise, enter the URL to the model file in the cloud. If the path you entered is a cloud path, Iterative Studio will ask you for the repository path where the dvc reference to the model should be saved. @@ -59,15 +55,15 @@ https://www.youtube.com/watch?v=szzv4ZXmYAs 10. If you had added a model from a cloud storage, the following will also happen before the commit is created: -- If the repository does not contain DVC, Iterative Studio will run `dvc init`. - It is needed to version the model in the git repository. - [Learn more](/doc/command-reference/init). -- If the specified directory does not exist yet, it will be created. -- Iterative Studio will import the model to the repository by executing - `dvc import-url / --no-exec`. -- Iterative Studio annotate the model by executing - `gto annotate --path / --type model`. - [Learn more][gto annotate]. + - If the repository does not contain DVC, Iterative Studio will run + `dvc init`. It is needed to version the model in the git repository. + [Learn more](/doc/command-reference/init). + - If the specified directory does not exist yet, it will be created. + - Iterative Studio will import the model to the repository by executing + `dvc import-url / --no-exec`. + - Iterative Studio annotate the model by executing + `gto annotate --path / --type model`. + [Learn more][gto annotate]. [connected repository]: /doc/studio/user-guide/projects-and-experiments/create-a-project From 5fb150197775c60104c0b4bf517315a1428b0775 Mon Sep 17 00:00:00 2001 From: Alexander Guschin <1aguschin@gmail.com> Date: Mon, 15 May 2023 15:50:34 +0300 Subject: [PATCH 6/6] Update content/docs/start/experiments/experiment-tracking.md --- content/docs/start/experiments/experiment-tracking.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/docs/start/experiments/experiment-tracking.md b/content/docs/start/experiments/experiment-tracking.md index 7ae0830df3..10925fd059 100644 --- a/content/docs/start/experiments/experiment-tracking.md +++ b/content/docs/start/experiments/experiment-tracking.md @@ -124,7 +124,7 @@ Framework and any [data tracked by DVC](/doc/start/data-management/data-versioning) but you can also [log additional info](/doc/dvclive#log-data) to be included in the experiment. `live.log_artifact("mymodel", type="model")` will -[track your model with DVC](/doc/dvclive/live/log-artifact) and enable managing +[track your model with DVC](/doc/dvclive/live/log_artifact) and enable managing it with [Studio Model Registry](/doc/studio/user-guide/model-registry/what-is-a-model-registry).