From c975a864cd01bd702a80453c82fbc85322145de2 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Mon, 1 Mar 2021 16:49:04 -0600 Subject: [PATCH 01/19] ref: dump exp show Examples from https://github.com/iterative/dvc.org/pull/1926/files?file-filters%5B%5D=.json#diff-eb5e116f8c720a515197a450f4ec1c06287a8c9abd2cf241e88dda549a853e23 --- content/docs/command-reference/exp/show.md | 83 ++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/content/docs/command-reference/exp/show.md b/content/docs/command-reference/exp/show.md index 1a49b974fa..2a45af8ea8 100644 --- a/content/docs/command-reference/exp/show.md +++ b/content/docs/command-reference/exp/show.md @@ -109,3 +109,86 @@ metric or param. problems arise, otherwise 1. - `-v`, `--verbose` - displays detailed tracing information. + +## Example: Tabular data + +> This example is based on our +> [Get Started](/doc/tutorials/get-started/experiments), where you can find the +> actual source code. The basic use case shows the values in the current +> workspace: + +```dvc +$ dvc exp show +┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓ +┃ Experiment ┃ Created ┃ auc ┃ featurize.max_fea… ┃ featurize.ngrams ┃ prepare.seed ┃ prepare.split ┃ train.n_estimators ┃ train.seed ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩ +│ workspace │ - │ 0.61314 │ 1500 │ 2 │ 20170428 │ 0.2 │ 50 │ 20170428 │ +│ 11-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ 20170428 │ 0.2 │ 50 │ 20170428 │ +│ ├── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ 20170428 │ 0.2 │ 50 │ 20170428 │ +│ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ 20170428 │ 0.2 │ 50 │ 20170428 │ +│ └── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ 20170428 │ 0.2 │ 50 │ 20170428 │ +└───────────────────────┴──────────────┴─────────┴────────────────────┴──────────────────┴──────────────┴───────────────┴────────────────────┴────────────┘ +``` + +```dvc +$ dvc exp show --include-params=featurize +┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ +┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +│ workspace │ - │ 0.61314 │ 1500 │ 2 │ +│ 11-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ +│ ├── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ +│ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ +│ └── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ +└───────────────────────┴──────────────┴─────────┴────────────────────────┴──────────────────┘ +``` + +To sort experiments by the `auc` metric in ascending order: + +```dvc +$ dvc exp show --include-params=featurize --sort-by=auc --sort-order=asc +┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ +┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +│ workspace │ - │ 0.61314 │ 1500 │ 2 │ +│ 11-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ +│ ├── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ +│ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ +│ └── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ +└───────────────────────┴──────────────┴─────────┴────────────────────────┴──────────────────┘ +``` + +To see all experiments in the workspace and down the Git history: + +```dvc +$ dvc exp show --all-commits --include-params=featurize --sort-by=auc --sort-order=asc +┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ +┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +│ workspace │ - │ 0.61314 │ 1500 │ 2 │ +│ 11-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ +│ ├── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ +│ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ +│ └── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ +│ 10-bigrams-model │ Jun 20, 2020 │ 0.54175 │ 1500 │ 2 │ +│ └── exp-069d9 │ Sep 24, 2020 │ 0.51076 │ 2500 │ 2 │ +│ 9-evaluation │ Jun 20, 2020 │ 0.54175 │ 500 │ 1 │ +│ 8-ml-pipeline │ Jun 20, 2020 │ - │ 500 │ 1 │ +│ 6-prep-stage │ Jun 20, 2020 │ - │ 500 │ 1 │ +│ 5-source-code │ Jun 20, 2020 │ - │ 500 │ 1 │ +│ 4-import-data │ Jun 20, 2020 │ - │ 1500 │ 2 │ +│ 2-track-data │ Jun 20, 2020 │ - │ 1500 │ 2 │ +│ 3-config-remote │ Jun 20, 2020 │ - │ 1500 │ 2 │ +│ 1-dvc-init │ Jun 20, 2020 │ - │ 1500 │ 2 │ +│ 0-git-init │ Jun 20, 2020 │ - │ 1500 │ 2 │ +└───────────────────────┴──────────────┴─────────┴────────────────────────┴──────────────────┘ +``` + +Note that in the final example, the top level Git commits remain in their +original order. The experiment sorting only applies to experiments grouped +according to each top level Git commit. + +The +[Compare Experiments](/doc/tutorials/get-started/experiments#compare-experiments) +chapter of our _Get Started_ covers the `-a` option to collect and print a +metrics file value across all Git branches. From 178b110da84733a7f240446c039b0ea7c6294283 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Mon, 1 Mar 2021 16:54:12 -0600 Subject: [PATCH 02/19] ref: dump exp apply Examples from https://github.com/iterative/dvc.org/pull/1926/files#diff-62736ef3998f181172717e6f0ba3bce23bd6844708089a1d629dc9eacdd349f9 --- content/docs/command-reference/exp/apply.md | 98 +++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/content/docs/command-reference/exp/apply.md b/content/docs/command-reference/exp/apply.md index 9167047bd0..b7d11f7787 100644 --- a/content/docs/command-reference/exp/apply.md +++ b/content/docs/command-reference/exp/apply.md @@ -38,3 +38,101 @@ the current Git commit. - `-v`, `--verbose` - displays detailed tracing information from executing the `dvc pull` command. + +## Example: Apply and promote an experiment + +> This example is based on our +> [Get Started](/doc/tutorials/get-started/experiments), where you can find the +> actual source code. Let's say we have run 3 experiments in our project +> workspace: + +```dvc +$ dvc exp show --include-params=featurize +┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ +┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +│ workspace │ - │ 0.61314 │ 1500 │ 2 │ +│ 11-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ +│ ├── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ +│ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ +│ └── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ +└───────────────────────┴──────────────┴─────────┴────────────────────────┴──────────────────┘ +``` + +We now wish to commit the results of experiment `1dad0d2` into our project. + +```dvc +$ dvc exp apply exp-1dad0 +Changes for experiment 'exp-1dad0' have been applied to your current workspace. +``` + +We can inspect these changes with Git: + +```dvc +$ git status +On branch master +Your branch is up to date with 'origin/master'. +Changes not staged for commit: + modified: dvc.lock + modified: params.yaml + modified: prc.json + modified: scores.json +$ git diff params.yaml scores.json +diff --git a/params.yaml b/params.yaml +index 4c4d898..faf781a 100644 +--- a/params.yaml ++++ b/params.yaml +@@ -3,7 +3,7 @@ prepare: + seed: 20170428 + featurize: +- max_features: 1500 ++ max_features: 2000 + ngrams: 2 + train: +diff --git a/scores.json b/scores.json +index c995f24..c640c4e 100644 +--- a/scores.json ++++ b/scores.json +@@ -1 +1 @@ +-{"auc": 0.6131382960762474} +\ No newline at end of file ++{"auc": 0.5775633054725381} +\ No newline at end of file +``` + +and with DVC: + +``` +$ dvc status +Data and pipelines are up to date. +$ dvc diff +Modified: + data/features/ + data/features/test.pkl + data/features/train.pkl + model.pkl + prc.json + scores.json +files summary: 0 added, 0 deleted, 5 modified, 0 not in cache +``` + +To promote this experiment we simply `git add` and `git commit` the changes: + +```dvc +$ git add . +$ git commit -m "promote experiment exp-1dad0" +[master 0412386] promote experiment exp-1dad0 +``` + +Finally, we can now see that the promoted experiment is the new tip of our +master branch: + +```dvc +$ dvc exp show --include-params=featurize +┏━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ +┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ +┡━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +│ workspace │ - │ 0.57756 │ 2000 │ 2 │ +│ master │ 04:31 PM │ 0.57756 │ 2000 │ 2 │ +└────────────┴──────────┴─────────┴────────────────────────┴──────────────────┘ +``` From 77b96f12c03d27fdef9f8ce42050a95a1ffd9c40 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Mon, 1 Mar 2021 17:08:46 -0600 Subject: [PATCH 03/19] ref: copy edit dumped exp show Examples --- content/docs/command-reference/exp/show.md | 29 +++++++++++++--------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/content/docs/command-reference/exp/show.md b/content/docs/command-reference/exp/show.md index 2a45af8ea8..b7b9c53347 100644 --- a/content/docs/command-reference/exp/show.md +++ b/content/docs/command-reference/exp/show.md @@ -110,12 +110,16 @@ metric or param. - `-v`, `--verbose` - displays detailed tracing information. -## Example: Tabular data +## Examples > This example is based on our > [Get Started](/doc/tutorials/get-started/experiments), where you can find the -> actual source code. The basic use case shows the values in the current -> workspace: +> actual source code. + +Let's say we have run 3 experiments in our project. The basic usage shows the +workspace (Git working tree) and experiments derived from `HEAD` +(`11-bigrams-experiment` branch in this case), and all of their metrics and +params (scroll right to see all): ```dvc $ dvc exp show @@ -130,6 +134,10 @@ $ dvc exp show └───────────────────────┴──────────────┴─────────┴────────────────────┴──────────────────┴──────────────┴───────────────┴────────────────────┴────────────┘ ``` +> You can exit this screen with `Q`, typically. + +Let's limit the param columns to only include the `featurize` group: + ```dvc $ dvc exp show --include-params=featurize ┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ @@ -143,7 +151,7 @@ $ dvc exp show --include-params=featurize └───────────────────────┴──────────────┴─────────┴────────────────────────┴──────────────────┘ ``` -To sort experiments by the `auc` metric in ascending order: +Sort experiments by the `auc` metric, in ascending order: ```dvc $ dvc exp show --include-params=featurize --sort-by=auc --sort-order=asc @@ -158,7 +166,7 @@ $ dvc exp show --include-params=featurize --sort-by=auc --sort-order=asc └───────────────────────┴──────────────┴─────────┴────────────────────────┴──────────────────┘ ``` -To see all experiments in the workspace and down the Git history: +To see all experiments throughout the Git history: ```dvc $ dvc exp show --all-commits --include-params=featurize --sort-by=auc --sort-order=asc @@ -184,11 +192,8 @@ $ dvc exp show --all-commits --include-params=featurize --sort-by=auc --sort-ord └───────────────────────┴──────────────┴─────────┴────────────────────────┴──────────────────┘ ``` -Note that in the final example, the top level Git commits remain in their -original order. The experiment sorting only applies to experiments grouped -according to each top level Git commit. +Note that in the final example, Git commits remain in chronological order. The +sorting only applies to experiment groups (sharing a parent commit). -The -[Compare Experiments](/doc/tutorials/get-started/experiments#compare-experiments) -chapter of our _Get Started_ covers the `-a` option to collect and print a -metrics file value across all Git branches. +📖 See [Metrics, Parameters, and Plots](/doc/start/metrics-parameters-plots) for +an introduction to parameters, metrics, plots. From a0ccd6384d6a615b4f8e5791444cce2374387742 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Mon, 1 Mar 2021 17:21:15 -0600 Subject: [PATCH 04/19] ref: copy edit dumped exp apply Example --- content/docs/command-reference/exp/apply.md | 59 ++++++++------------- 1 file changed, 23 insertions(+), 36 deletions(-) diff --git a/content/docs/command-reference/exp/apply.md b/content/docs/command-reference/exp/apply.md index b7d11f7787..c3b76e3a86 100644 --- a/content/docs/command-reference/exp/apply.md +++ b/content/docs/command-reference/exp/apply.md @@ -39,12 +39,13 @@ the current Git commit. - `-v`, `--verbose` - displays detailed tracing information from executing the `dvc pull` command. -## Example: Apply and promote an experiment +## Example: Make an experiment persistent > This example is based on our > [Get Started](/doc/tutorials/get-started/experiments), where you can find the -> actual source code. Let's say we have run 3 experiments in our project -> workspace: +> actual source code. + +Let's say we have run 3 experiments in our project: ```dvc $ dvc exp show --include-params=featurize @@ -59,50 +60,37 @@ $ dvc exp show --include-params=featurize └───────────────────────┴──────────────┴─────────┴────────────────────────┴──────────────────┘ ``` -We now wish to commit the results of experiment `1dad0d2` into our project. +Since `exp-e6c97` has the best `auc`, we may want to commit it into our project +(this is what we call to "make it persistent"): ```dvc -$ dvc exp apply exp-1dad0 -Changes for experiment 'exp-1dad0' have been applied to your current workspace. +$ dvc exp apply exp-e6c97 +Changes for experiment 'exp-e6c97' have been applied... ``` -We can inspect these changes with Git: +We can inspect what changed in the workspace with Git, ```dvc $ git status On branch master -Your branch is up to date with 'origin/master'. Changes not staged for commit: modified: dvc.lock modified: params.yaml - modified: prc.json modified: scores.json -$ git diff params.yaml scores.json -diff --git a/params.yaml b/params.yaml -index 4c4d898..faf781a 100644 ---- a/params.yaml -+++ b/params.yaml +$ git diff params.yaml +``` + +```git @@ -3,7 +3,7 @@ prepare: - seed: 20170428 featurize: -- max_features: 1500 -+ max_features: 2000 +- max_features: 2000 ++ max_features: 1500 ngrams: 2 - train: -diff --git a/scores.json b/scores.json -index c995f24..c640c4e 100644 ---- a/scores.json -+++ b/scores.json -@@ -1 +1 @@ --{"auc": 0.6131382960762474} -\ No newline at end of file -+{"auc": 0.5775633054725381} -\ No newline at end of file ``` and with DVC: -``` +```dvc $ dvc status Data and pipelines are up to date. $ dvc diff @@ -111,21 +99,17 @@ Modified: data/features/test.pkl data/features/train.pkl model.pkl - prc.json - scores.json -files summary: 0 added, 0 deleted, 5 modified, 0 not in cache +files summary: 0 added, 0 deleted, 3 modified, 0 not in cache ``` -To promote this experiment we simply `git add` and `git commit` the changes: +To finish making this experiment persistent, we commit the changes to the repo: ```dvc $ git add . -$ git commit -m "promote experiment exp-1dad0" -[master 0412386] promote experiment exp-1dad0 +$ git commit -m "persist exp-e6c97" ``` -Finally, we can now see that the promoted experiment is the new tip of our -master branch: +We can now see that the experiment is the new tip of our master branch: ```dvc $ dvc exp show --include-params=featurize @@ -136,3 +120,6 @@ $ dvc exp show --include-params=featurize │ master │ 04:31 PM │ 0.57756 │ 2000 │ 2 │ └────────────┴──────────┴─────────┴────────────────────────┴──────────────────┘ ``` + +Note that all the other experiments are based on a previous commit, so +`dvc exp show` won't display them by default (but they're still saved). From 7f41d8373ea68116bad4545c083bcc4c6b510881 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Mon, 1 Mar 2021 17:27:44 -0600 Subject: [PATCH 05/19] ref: dump and copy edit exp branch Example from https://github.com/iterative/dvc.org/pull/1926/files#diff-cc5af77c4ee8049c6726817aff466e2908f7a54b4435524d5a0fe2059bd48f9d --- content/docs/command-reference/exp/branch.md | 41 ++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/content/docs/command-reference/exp/branch.md b/content/docs/command-reference/exp/branch.md index 8af16b5e64..c78cd4f2c7 100644 --- a/content/docs/command-reference/exp/branch.md +++ b/content/docs/command-reference/exp/branch.md @@ -48,3 +48,44 @@ To switch into the new branch, use `git checkout branch` and `dvc checkout`. - `-v`, `--verbose` - displays detailed tracing information from executing the `dvc pull` command. + +## Example: Make a persistent branch from an experiment + +> This example is based on our +> [Get Started](/doc/tutorials/get-started/experiments), where you can find the +> actual source code. + +Let's say we have run 3 experiments in our project: + +```dvc +$ dvc exp show --include-params=featurize +┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ +┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +│ workspace │ - │ 0.61314 │ 1500 │ 2 │ +│ 11-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ +│ ├── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ +│ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ +│ └── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ +└───────────────────────┴──────────────┴─────────┴────────────────────────┴──────────────────┘ +``` + +We may want to branch-off `exp-1dad0` for a separate experimentation process +(based on 2000 `max_features`). + +```dvc +$ dvc exp branch exp-1dad0 maxf-2000 +Git branch 'maxf-2000' has been created from experiment 'exp-1dad0'. +To switch to the new branch run: + git checkout my-branch +``` + +We can inspect the result with Git: + +```dvc +$ git branch +* master + maxf-2000 +``` + +`maxf-2000` can now be merged, rebased, pushed, etc. like any other Git branch. From baa601cda6cfae5694ee26e63a20bc322b5f3c83 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Mon, 1 Mar 2021 17:38:58 -0600 Subject: [PATCH 06/19] ref: update exp Examples sample repo tags --- content/docs/command-reference/exp/apply.md | 2 +- content/docs/command-reference/exp/branch.md | 2 +- content/docs/command-reference/exp/show.md | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/content/docs/command-reference/exp/apply.md b/content/docs/command-reference/exp/apply.md index c3b76e3a86..852b6bde39 100644 --- a/content/docs/command-reference/exp/apply.md +++ b/content/docs/command-reference/exp/apply.md @@ -53,7 +53,7 @@ $ dvc exp show --include-params=featurize ┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ │ workspace │ - │ 0.61314 │ 1500 │ 2 │ -│ 11-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ +│ 10-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ │ ├── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ │ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ │ └── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ diff --git a/content/docs/command-reference/exp/branch.md b/content/docs/command-reference/exp/branch.md index c78cd4f2c7..baea1cf31e 100644 --- a/content/docs/command-reference/exp/branch.md +++ b/content/docs/command-reference/exp/branch.md @@ -63,7 +63,7 @@ $ dvc exp show --include-params=featurize ┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ │ workspace │ - │ 0.61314 │ 1500 │ 2 │ -│ 11-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ +│ 10-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ │ ├── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ │ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ │ └── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ diff --git a/content/docs/command-reference/exp/show.md b/content/docs/command-reference/exp/show.md index b7b9c53347..ffad01934f 100644 --- a/content/docs/command-reference/exp/show.md +++ b/content/docs/command-reference/exp/show.md @@ -118,7 +118,7 @@ metric or param. Let's say we have run 3 experiments in our project. The basic usage shows the workspace (Git working tree) and experiments derived from `HEAD` -(`11-bigrams-experiment` branch in this case), and all of their metrics and +(`10-bigrams-experiment` branch in this case), and all of their metrics and params (scroll right to see all): ```dvc @@ -127,7 +127,7 @@ $ dvc exp show ┃ Experiment ┃ Created ┃ auc ┃ featurize.max_fea… ┃ featurize.ngrams ┃ prepare.seed ┃ prepare.split ┃ train.n_estimators ┃ train.seed ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩ │ workspace │ - │ 0.61314 │ 1500 │ 2 │ 20170428 │ 0.2 │ 50 │ 20170428 │ -│ 11-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ 20170428 │ 0.2 │ 50 │ 20170428 │ +│ 10-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ 20170428 │ 0.2 │ 50 │ 20170428 │ │ ├── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ 20170428 │ 0.2 │ 50 │ 20170428 │ │ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ 20170428 │ 0.2 │ 50 │ 20170428 │ │ └── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ 20170428 │ 0.2 │ 50 │ 20170428 │ @@ -144,7 +144,7 @@ $ dvc exp show --include-params=featurize ┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ │ workspace │ - │ 0.61314 │ 1500 │ 2 │ -│ 11-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ +│ 10-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ │ ├── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ │ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ │ └── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ @@ -159,7 +159,7 @@ $ dvc exp show --include-params=featurize --sort-by=auc --sort-order=asc ┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ │ workspace │ - │ 0.61314 │ 1500 │ 2 │ -│ 11-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ +│ 10-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ │ ├── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ │ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ │ └── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ @@ -174,7 +174,7 @@ $ dvc exp show --all-commits --include-params=featurize --sort-by=auc --sort-ord ┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ │ workspace │ - │ 0.61314 │ 1500 │ 2 │ -│ 11-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ +│ 10-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ │ ├── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ │ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ │ └── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ From 88686f3997449352e483afec618ba8d0efb52cc5 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Mon, 1 Mar 2021 17:39:47 -0600 Subject: [PATCH 07/19] ref: dump and copy edit exp diff Example from https://github.com/iterative/dvc.org/pull/1926/files#diff-03846d2c3cbcbfacc654745454283a35e2adc2e5c3c4195ea2601ef19733363a --- content/docs/command-reference/exp/diff.md | 56 ++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/content/docs/command-reference/exp/diff.md b/content/docs/command-reference/exp/diff.md index 69226b98eb..8d4610d5d7 100644 --- a/content/docs/command-reference/exp/diff.md +++ b/content/docs/command-reference/exp/diff.md @@ -75,3 +75,59 @@ all the current experiments (without comparisons). problems arise, otherwise 1. - `-v`, `--verbose` - displays detailed tracing information. + +## Examples + +> This example is based on our +> [Get Started](/doc/tutorials/get-started/experiments), where you can find the +> actual source code. + +Let's say we have run 3 experiments in our project: + +```dvc +$ dvc exp show --include-params=featurize +┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ +┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +│ workspace │ - │ 0.61314 │ 1500 │ 2 │ +│ 10-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ +│ ├── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ +│ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ +│ └── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ +└───────────────────────┴──────────────┴─────────┴────────────────────────┴──────────────────┘ +``` + +Since we haven't made any changes to the workspace, we can compare `exp-1dad0` +to its baseline (`10-bigrams-experiment`, current `HEAD`) like this: + +```dvc +$ dvc exp diff exp-1dad0 +Path Metric Value Change +scores.json auc 0.61314 0.035575 +Path Param Value Change +params.yaml featurize.max_features 1500 -500 +``` + +To compare two specific experiments (values are shown for the second one by +default): + +```dvc +$ dvc exp diff exp-1dad0 exp-1df77 +Path Metric Value Change +scores.json auc 0.51676 -0.060799 +Path Param Value Change +params.yaml featurize.max_features 500 -1500 +``` + +To compare an experiment to the +[`7-ml-pipeline`](https://github.com/iterative/example-get-started/releases/tag/7-ml-pipeline) +tag (or any other [revision](https://git-scm.com/docs/revisions)): + +```dvc +$ dvc exp diff exp-1dad0 7-ml-pipeline +Path Metric Value Change +scores.json auc None diff not supported +Path Param Value Change +params.yaml featurize.max_features 500 -1500 +params.yaml featurize.ngrams 1 -1 +``` From fb37dcd1bf5302dcb8115470e7f78494f9733aa6 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Mon, 1 Mar 2021 17:49:10 -0600 Subject: [PATCH 08/19] ref: dump and copy edit exp gc Examples from https://github.com/iterative/dvc.org/pull/1926/files#diff-51e29bde8e57e41f526b72042b3e13ecae3d0563693e9a2d2d24bb9cd4d5b485 --- content/docs/command-reference/exp/gc.md | 70 ++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/content/docs/command-reference/exp/gc.md b/content/docs/command-reference/exp/gc.md index 8816f9f457..01d74213d8 100644 --- a/content/docs/command-reference/exp/gc.md +++ b/content/docs/command-reference/exp/gc.md @@ -58,3 +58,73 @@ separately to delete it. - `-v`, `--verbose` - displays detailed tracing information from executing the `dvc pull` command. + +## Examples + +> This example is based on our +> [Get Started](/doc/tutorials/get-started/experiments), where you can find the +> actual source code. + +Let's say we have the following project, and have just +[applied](/docs/command-reference/exp/apply) and committed `exp-1dad0` (current +`HEAD` of `master`): + +```dvc +$ dvc exp show --all-commits --include-params=featurize +┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ +┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +│ workspace │ - │ 0.57756 │ 2000 │ 2 │ +│ master │ 05:39 PM │ 0.57756 │ 2000 │ 2 │ +│ 10-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ +│ ├── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ +│ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ +│ └── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ +│ 9-bigrams-model │ Jun 20, 2020 │ 0.54175 │ 1500 │ 2 │ +│ └── exp-069d9 │ Sep 24, 2020 │ 0.51076 │ 2500 │ 2 │ +│ 8-evaluation │ Jun 20, 2020 │ 0.54175 │ 500 │ 1 │ +│ 7-ml-pipeline │ Jun 20, 2020 │ - │ 500 │ 1 │ + ... +│ 0-git-init │ Jun 20, 2020 │ - │ 1500 │ 2 │ +└───────────────────────┴──────────────┴─────────┴────────────────────────┴──────────────────┘ +``` + +If we consider all the other experiments unnecessary, we can delete them like +this: + +```dvc +$ dvc exp gc -w +WARNING: This will remove all experiments except ... +Are you sure you want to proceed? [y/n] y +Removed 4 experiments. To remove unused cache files use 'dvc gc'. +``` + +We can confirm that all the previous experiments are gone: + +```dvc +$ dvc exp show --all-commits --include-params=featurize +┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ +┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +│ workspace │ - │ 0.57756 │ 2000 │ 2 │ +│ master │ 05:39 PM │ 0.57756 │ 2000 │ 2 │ +│ 10-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ +│ 9-bigrams-model │ Jun 20, 2020 │ 0.54175 │ 1500 │ 2 │ + ... +│ 0-git-init │ Jun 20, 2020 │ - │ 2000 │ 2 │ +└───────────────────────┴──────────────┴─────────┴────────────────────────┴──────────────────┘ +``` + +To remove any <abbr>cached</abbr> data associated to the deleted experiments and +which are no longer needed in the project, we can use regular `dvc gc` (with the +appropriate options): + +```dvc +$ dvc dvc gc --all-commits +WARNING: This will remove all cache except ... +Are you sure you want to proceed? [y/n] y +... +``` + +> Note the use of `--all-commits` to ensure that we do not garbage collect files +> or directories referenced in remaining commits in the repo. From 70e1cbee93656c23e5a704138a1135e14aee41a3 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Mon, 1 Mar 2021 18:03:13 -0600 Subject: [PATCH 09/19] ref: dump and edit exp list Examples from https://github.com/iterative/dvc.org/pull/1926/files#diff-505710c3e0395830b4f81df1773b817654a720da5778755cd6d6f8db4544a031 --- content/docs/command-reference/exp/list.md | 51 +++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/content/docs/command-reference/exp/list.md b/content/docs/command-reference/exp/list.md index e0bbf086b2..28ec59c2b0 100644 --- a/content/docs/command-reference/exp/list.md +++ b/content/docs/command-reference/exp/list.md @@ -20,7 +20,8 @@ limited to experiment names and with very simple formatting. See also `dvc exp run`. If a working `git_remote` name (e.g. `origin`) or valid Git repo's URL is -provided, lists experiments in that <abbr>repository</abbr> instead (if any). +provided, lists experiments in that <abbr>repository</abbr> instead (if any, +based on the `dvc remote default`). > Note that this utility doesn't require an existing <abbr>DVC project</abbr> to > run from when a `git_remote` URL is given. @@ -45,3 +46,51 @@ options below). - `-v`, `--verbose` - displays detailed tracing information from executing the `dvc pull` command. + +## Examples + +> This example is based on our +> [Get Started](/doc/tutorials/get-started/experiments), where you can find the +> actual source code. + +Let's say we have run 3 experiments in our project. You can quickly list the +available experiments with this command: + +```dvc +$ dvc exp list --all +10-bigrams-experiment: + exp-e6c97 + exp-1dad0 + exp-1df77 +``` + +> Contrast this with the full table +> [displayed by `dvc exp show`](/doc/command-reference/exp/show#examples). + +You can also list experiments in a any DVC repo with `dvc exp list`: + +```dvc +$ dvc exp list --all git@github.com:iterative/example-get-started.git +10-bigrams-experiment: + exp-e6c97 + exp-86dd6 +``` + +We can see that two experiments are available in +([the DVC repo](https://github.com/iterative/example-get-started)). + +If we're currently in a local clone of the repo, we can also use +[Git remote](https://git-scm.com/book/en/v2/Git-Basics-Working-with-Remotes) +name instead: + +```dvc +$ git remote -v +origin git@github.com:iterative/example-get-started.git +$ dvc exp list --all origin +10-bigrams-experiment: + exp-e6c97 + exp-86dd6 +``` + +And in this context, `dvc exp pull` can download the experiments if needed, as +`dvc exp push` can upload any local ones we wish to share. From a204f4100625fe35d6dbde186d5e4185ad6a51a9 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Mon, 1 Mar 2021 18:35:26 -0600 Subject: [PATCH 10/19] ref: dump and copy edit exp push/pull Examples from https://github.com/iterative/dvc.org/pull/1926/files --- content/docs/command-reference/exp/pull.md | 25 +++++++++++++++++ content/docs/command-reference/exp/push.md | 31 ++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/content/docs/command-reference/exp/pull.md b/content/docs/command-reference/exp/pull.md index af00cfc2d1..2539ac2707 100644 --- a/content/docs/command-reference/exp/pull.md +++ b/content/docs/command-reference/exp/pull.md @@ -70,3 +70,28 @@ given with `--remote`. - `-v`, `--verbose` - displays detailed tracing information from executing the `dvc pull` command. + +## Examples + +> This example is based on our +> [Get Started](/doc/tutorials/get-started/experiments), where you can find the +> actual source code. + +Let's say we have cloned a DVC repository, and would like to fetch an experiment +that someone else shared (see also `dvc exp list`). + +```dvc +$ dvc exp list --all origin +master: + exp-e6c97 +$ dvc exp pull origin exp-e6c97 +Pulled experiment 'exp-e6c97' from Git remote 'origin'. +``` + +We can now see that the experiment exists in the local repo: + +```dvc +$ dvc exp list --all +master: + exp-e6c97 +``` diff --git a/content/docs/command-reference/exp/push.md b/content/docs/command-reference/exp/push.md index 16fc4e27f3..bce2090c3b 100644 --- a/content/docs/command-reference/exp/push.md +++ b/content/docs/command-reference/exp/push.md @@ -66,3 +66,34 @@ given with `--remote`. - `-v`, `--verbose` - displays detailed tracing information from executing the `dvc pull` command. + +## Examples + +> This example is based on our +> [Get Started](/doc/tutorials/get-started/experiments), where you can find the +> actual source code. + +Let's say we have run 3 experiments in our project: + +```dvc +$ dvc exp list --all +11-bigrams-experiment: + exp-e6c97 + exp-1dad0 + exp-1df77 +``` + +We would now like to share one of them with others via the Git remote: + +```dvc +$ dvc exp push origin exp-e6c97 +Pushed experiment 'exp-e6c97' to Git remote 'origin'. +``` + +We can now see that the experiment exists in the remote repo: + +```dvc +$ dvc exp list --all origin +master: + exp-e6c97 +``` From 06044836b55ee0db5a4890ccc453b4f2d07bc045 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Wed, 10 Mar 2021 19:22:16 -0700 Subject: [PATCH 11/19] ref: first exp run Example --- content/docs/command-reference/exp/run.md | 38 +++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/content/docs/command-reference/exp/run.md b/content/docs/command-reference/exp/run.md index 8dcf1292a9..b537f44b23 100644 --- a/content/docs/command-reference/exp/run.md +++ b/content/docs/command-reference/exp/run.md @@ -170,3 +170,41 @@ CPU cores). regardless of this flag. - `-v`, `--verbose` - displays detailed tracing information. + +## Examples + +> These examples are based on our +> [Get Started](/doc/tutorials/get-started/experiments), where you can find the +> actual source code. + +Let's clone our example ML project, download the data it <abbr>depends</abbr> +on, and check the latest metrics: + +```dvc +$ git clone git@github.com:iterative/example-get-started.git +$ cd example-get-started +$ dvc pull +$ dvc metrics show +Path avg_prec roc_auc +scores.json 0.60405 0.9608 +``` + +For this experiment, we want to see the results for a smaller dataset input, so +let's limit the data to 20 MB (originally 37) and reproduce the pipeline with +`dvc exp run`: + +```dvc +$ truncate --size=20M data/data.xml +$ dvc exp run +... +Reproduced experiment(s): exp-44136 +Experiment results have been applied to your workspace. + +$ dvc metrics diff +Path Metric Old New Change +scores.json avg_prec 0.60405 0.56103 -0.04302 +scores.json roc_auc 0.9608 0.94003 -0.02077 +``` + +The results in the `exp-44136` experiment are predictably worst, as the +`dvc metrics` commands show. From e62e1e0325dbc8c7ff5ff374b061517b8e9403fe Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Wed, 10 Mar 2021 20:18:36 -0700 Subject: [PATCH 12/19] ref: exp run --set-param example --- content/docs/command-reference/exp/run.md | 60 +++++++++++++++++++++-- 1 file changed, 55 insertions(+), 5 deletions(-) diff --git a/content/docs/command-reference/exp/run.md b/content/docs/command-reference/exp/run.md index b537f44b23..31377b27de 100644 --- a/content/docs/command-reference/exp/run.md +++ b/content/docs/command-reference/exp/run.md @@ -177,21 +177,40 @@ CPU cores). > [Get Started](/doc/tutorials/get-started/experiments), where you can find the > actual source code. -Let's clone our example ML project, download the data it <abbr>depends</abbr> -on, and check the latest metrics: +<details> + +### Expand to prepare the example ML project + +Clone the DVC repo and download the data it <abbr>depends</abbr> on: ```dvc $ git clone git@github.com:iterative/example-get-started.git $ cd example-get-started $ dvc pull +``` + +Let's also install the Python requirements: + +> We **strongly** recommend creating a +> [virtual environment](https://python.readthedocs.io/en/stable/library/venv.html) +> first. + +```dvc +$ pip install -r src/requirements.txt +``` + +</details> + +Let's check the latest metrics of the project: + +```dvc $ dvc metrics show Path avg_prec roc_auc scores.json 0.60405 0.9608 ``` For this experiment, we want to see the results for a smaller dataset input, so -let's limit the data to 20 MB (originally 37) and reproduce the pipeline with -`dvc exp run`: +let's limit the data to 20 MB and reproduce the pipeline with `dvc exp run`: ```dvc $ truncate --size=20M data/data.xml @@ -206,5 +225,36 @@ scores.json avg_prec 0.60405 0.56103 -0.04302 scores.json roc_auc 0.9608 0.94003 -0.02077 ``` -The results in the `exp-44136` experiment are predictably worst, as the +The results in the `exp-44136` experiment seem to be worst (expected), as the `dvc metrics` commands show. + +## Example: Modify parameters on-the-fly + +You could modify a params file just like any other <abbr>dependency</abbr> and +run an experiment on that basis. Since this is a common need, `dvc exp run` +comes with the `--set-param` (`-S`) option built-in. This saves you the need to +manually edit the params file: + +```dvc +$ dvc exp run -S prepare.split=0.25 -S featurize.max_features=2000 +... +Reproduced experiment(s): exp-18bf6 +Experiment results have been applied to your workspace. +``` + +To see the results, we can use `dvc exp diff` which compares both params and +metrics to the previous project version: + +```dvc +$ dvc exp diff +Path Metric Value Change +scores.json avg_prec 0.58187 -0.022184 +scores.json roc_auc 0.93634 -0.024464 + +Path Param Value Change +params.yaml featurize.max_features 2000 -1000 +params.yaml prepare.split 0.25 0.05 +``` + +> Notice that experiments run as a series don't build up on each other. They are +> all based on `HEAD`. From 22ec5d6399fd17e9204cd28889a6f3988b912648 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Wed, 10 Mar 2021 21:43:25 -0700 Subject: [PATCH 13/19] ref: basic exp remove example --- content/docs/command-reference/exp/remove.md | 22 ++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/content/docs/command-reference/exp/remove.md b/content/docs/command-reference/exp/remove.md index d196f9c21c..6363d9d662 100644 --- a/content/docs/command-reference/exp/remove.md +++ b/content/docs/command-reference/exp/remove.md @@ -32,3 +32,25 @@ With `--queue`, the queue of experiments is cleared. - `-v`, `--verbose` - displays detailed tracing information from executing the `dvc pull` command. + +## Examples + +Let's say we have `dvc exp run` 3 experiments in our project: + +```dvc +$ dvc exp list +master: + exp-e6c97 + exp-1dad0 + exp-1df77 +``` + +To remove any of them, just give their names to `dvc exp remove`: + +```dvc +$ dvc exp remove exp-1dad0 exp-1df77 + +$ dvc exp list +master: + exp-e6c97 +``` From 759b12fed5590f4bf4108fa017343c2be57bafca Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Sat, 13 Mar 2021 21:19:50 -0700 Subject: [PATCH 14/19] ref: tooltip for run --- content/docs/user-guide/basic-concepts/dependency.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/docs/user-guide/basic-concepts/dependency.md b/content/docs/user-guide/basic-concepts/dependency.md index 2af6b23b8e..256e77f183 100644 --- a/content/docs/user-guide/basic-concepts/dependency.md +++ b/content/docs/user-guide/basic-concepts/dependency.md @@ -1,6 +1,6 @@ --- name: Dependency -match: [dependency, dependencies] +match: [dependency, dependencies, depends] tooltip: >- A file or directory (possibly tracked by DVC) recorded in the `deps` section of a stage (in `dvc.yaml`) or `.dvc` file file. See `dvc run`. Stages are From ffde7878586d19eecfe7975e6bbe421ca274b00d Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorge@orpinel.com> Date: Tue, 16 Mar 2021 14:23:06 -0600 Subject: [PATCH 15/19] ref: fix exp apply Example per https://github.com/iterative/dvc.org/pull/2259#pullrequestreview-612341710 --- content/docs/command-reference/exp/apply.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/content/docs/command-reference/exp/apply.md b/content/docs/command-reference/exp/apply.md index 852b6bde39..db7ef7f482 100644 --- a/content/docs/command-reference/exp/apply.md +++ b/content/docs/command-reference/exp/apply.md @@ -54,8 +54,8 @@ $ dvc exp show --include-params=featurize ┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ │ workspace │ - │ 0.61314 │ 1500 │ 2 │ │ 10-bigrams-experiment │ Jun 20, 2020 │ 0.61314 │ 1500 │ 2 │ -│ ├── exp-e6c97 │ Oct 21, 2020 │ 0.61314 │ 1500 │ 2 │ -│ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 2000 │ 2 │ +│ ├── exp-e6c97 │ Oct 21, 2020 │ 0.69830 │ 2000 │ 2 │ +│ ├── exp-1dad0 │ Oct 09, 2020 │ 0.57756 │ 1200 │ 2 │ │ └── exp-1df77 │ Oct 09, 2020 │ 0.51676 │ 500 │ 2 │ └───────────────────────┴──────────────┴─────────┴────────────────────────┴──────────────────┘ ``` @@ -83,8 +83,8 @@ $ git diff params.yaml ```git @@ -3,7 +3,7 @@ prepare: featurize: -- max_features: 2000 -+ max_features: 1500 +- max_features: 1500 ++ max_features: 2000 ngrams: 2 ``` @@ -116,8 +116,8 @@ $ dvc exp show --include-params=featurize ┏━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ ┃ Experiment ┃ Created ┃ auc ┃ featurize.max_features ┃ featurize.ngrams ┃ ┡━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ -│ workspace │ - │ 0.57756 │ 2000 │ 2 │ -│ master │ 04:31 PM │ 0.57756 │ 2000 │ 2 │ +│ workspace │ - │ 0.69830 │ 2000 │ 2 │ +│ master │ 04:31 PM │ 0.69830 │ 2000 │ 2 │ └────────────┴──────────┴─────────┴────────────────────────┴──────────────────┘ ``` From c6af90f93bec70a9d4a063a95a344d0236999e87 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorgeorpinel@users.noreply.github.com> Date: Tue, 16 Mar 2021 19:41:24 -0600 Subject: [PATCH 16/19] Update content/docs/command-reference/exp/branch.md Co-authored-by: Dave Berenbaum <dave@iterative.ai> --- content/docs/command-reference/exp/branch.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/docs/command-reference/exp/branch.md b/content/docs/command-reference/exp/branch.md index baea1cf31e..5826c113d4 100644 --- a/content/docs/command-reference/exp/branch.md +++ b/content/docs/command-reference/exp/branch.md @@ -88,4 +88,4 @@ $ git branch maxf-2000 ``` -`maxf-2000` can now be merged, rebased, pushed, etc. like any other Git branch. +`maxf-2000` can now be checked out, merged, rebased, pushed, etc. like any other Git branch. From c2713c42294fc0d43b24a329bf3605e211c838d9 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorgeorpinel@users.noreply.github.com> Date: Tue, 16 Mar 2021 19:42:07 -0600 Subject: [PATCH 17/19] Update content/docs/command-reference/exp/list.md Co-authored-by: Dave Berenbaum <dave@iterative.ai> --- content/docs/command-reference/exp/list.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/docs/command-reference/exp/list.md b/content/docs/command-reference/exp/list.md index 28ec59c2b0..e7889404e0 100644 --- a/content/docs/command-reference/exp/list.md +++ b/content/docs/command-reference/exp/list.md @@ -67,7 +67,7 @@ $ dvc exp list --all > Contrast this with the full table > [displayed by `dvc exp show`](/doc/command-reference/exp/show#examples). -You can also list experiments in a any DVC repo with `dvc exp list`: +You can also list experiments in any DVC repo with `dvc exp list`: ```dvc $ dvc exp list --all git@github.com:iterative/example-get-started.git From d2a389f3bd146408c3cd0890cf5bf1cce9d84a77 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel <jorgeorpinel@users.noreply.github.com> Date: Tue, 16 Mar 2021 19:49:16 -0600 Subject: [PATCH 18/19] Update content/docs/command-reference/exp/run.md Co-authored-by: Dave Berenbaum <dave@iterative.ai> --- content/docs/command-reference/exp/run.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/docs/command-reference/exp/run.md b/content/docs/command-reference/exp/run.md index 31377b27de..b94d52c599 100644 --- a/content/docs/command-reference/exp/run.md +++ b/content/docs/command-reference/exp/run.md @@ -225,8 +225,8 @@ scores.json avg_prec 0.60405 0.56103 -0.04302 scores.json roc_auc 0.9608 0.94003 -0.02077 ``` -The results in the `exp-44136` experiment seem to be worst (expected), as the -`dvc metrics` commands show. +The `dvc metrics diff` command shows the difference in performance +for the experiment we just ran (`exp-44136`). ## Example: Modify parameters on-the-fly From 7b442d57b85bcf42fb9b7192a47230f5af072f8b Mon Sep 17 00:00:00 2001 From: "Restyled.io" <commits@restyled.io> Date: Wed, 17 Mar 2021 01:49:27 +0000 Subject: [PATCH 19/19] Restyled by prettier --- content/docs/command-reference/exp/branch.md | 3 ++- content/docs/command-reference/exp/run.md | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/content/docs/command-reference/exp/branch.md b/content/docs/command-reference/exp/branch.md index 5826c113d4..a2649181aa 100644 --- a/content/docs/command-reference/exp/branch.md +++ b/content/docs/command-reference/exp/branch.md @@ -88,4 +88,5 @@ $ git branch maxf-2000 ``` -`maxf-2000` can now be checked out, merged, rebased, pushed, etc. like any other Git branch. +`maxf-2000` can now be checked out, merged, rebased, pushed, etc. like any other +Git branch. diff --git a/content/docs/command-reference/exp/run.md b/content/docs/command-reference/exp/run.md index b94d52c599..28bc2cba13 100644 --- a/content/docs/command-reference/exp/run.md +++ b/content/docs/command-reference/exp/run.md @@ -225,8 +225,8 @@ scores.json avg_prec 0.60405 0.56103 -0.04302 scores.json roc_auc 0.9608 0.94003 -0.02077 ``` -The `dvc metrics diff` command shows the difference in performance -for the experiment we just ran (`exp-44136`). +The `dvc metrics diff` command shows the difference in performance for the +experiment we just ran (`exp-44136`). ## Example: Modify parameters on-the-fly