diff --git a/.circleci/config.yml b/.circleci/config.yml index fcb289117e..728f7d043f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -8,7 +8,7 @@ defaults: &defaults working_directory: ~/repo docker: # Specify the version you desire here. - - image: circleci/node:12 + - image: circleci/node:16 # Specify service dependencies here if necessary. # CircleCI maintains a library of pre-built images, diff --git a/.github/workflows/download-link-check-schedule.yml b/.github/workflows/download-link-check-schedule.yml index bdf4937e0a..c55772887f 100644 --- a/.github/workflows/download-link-check-schedule.yml +++ b/.github/workflows/download-link-check-schedule.yml @@ -23,4 +23,4 @@ jobs: with: title: DVC Download Link Checker Report content-filepath: ./lychee/out.md - labels: website, automated issue + labels: website, link-checker diff --git a/.github/workflows/update.yaml b/.github/workflows/update.yaml index c2432e394d..d1d6910120 100644 --- a/.github/workflows/update.yaml +++ b/.github/workflows/update.yaml @@ -6,20 +6,20 @@ jobs: update: runs-on: ubuntu-18.04 steps: - - uses: actions/checkout@v2 - - name: Update - id: update - shell: bash - run: | - url=https://api.github.com/repos/iterative/dvc/releases/latest - version=$(curl --silent $url | jq -r .tag_name) - path=src/components/DownloadButton/index.tsx - sed -i "s/^const VERSION = .*$/const VERSION = \`$version\`/g" $path - echo "::set-output name=changes::$(git diff)" - echo "::set-output name=version::$version" - - name: Create PR - if: ${{ steps.update.outputs.changes != '' }} - uses: peter-evans/create-pull-request@v3 - with: - commit-message: dvc ${{ steps.update.outputs.version }} - title: dvc ${{ steps.update.outputs.version }} + - uses: actions/checkout@v2 + - name: Update + id: update + shell: bash + run: | + url=https://api.github.com/repos/iterative/dvc/releases/latest + version=$(curl --silent $url | jq -r .tag_name) + path=src/components/DownloadButton/index.tsx + sed -i "s/^const VERSION = .*$/const VERSION = \`$version\`/g" $path + echo "::set-output name=changes::$(git diff)" + echo "::set-output name=version::$version" + - name: Create PR + if: ${{ steps.update.outputs.changes != '' }} + uses: peter-evans/create-pull-request@v3 + with: + commit-message: dvc ${{ steps.update.outputs.version }} + title: dvc ${{ steps.update.outputs.version }} diff --git a/LICENSE b/LICENSE index 8dada3edaf..fe681074fc 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright {yyyy} {name of copyright owner} + Copyright 2018-2021 Iterative, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/config/prismjs/dvc-commands.js b/config/prismjs/dvc-commands.js index c3c9a76cc7..7a69a9ba5b 100644 --- a/config/prismjs/dvc-commands.js +++ b/config/prismjs/dvc-commands.js @@ -45,8 +45,16 @@ module.exports = [ 'gc', 'freeze', 'fetch', + 'exp show', + 'exp run', + 'exp remove', + 'exp push', + 'exp pull', + 'exp gc', + 'exp diff', + 'exp branch', + 'exp apply', 'exp', - 'experiments', 'doctor', 'diff', 'destroy', diff --git a/content/blog/2021-08-24-transfer-learning-experiments.md b/content/blog/2021-08-24-transfer-learning-experiments.md index 53493a2b9a..0ec3e4bdb0 100644 --- a/content/blog/2021-08-24-transfer-learning-experiments.md +++ b/content/blog/2021-08-24-transfer-learning-experiments.md @@ -14,6 +14,7 @@ tags: - Experiments - Reproducibility - DVC + - Pre-trained Models --- ## Intro @@ -26,6 +27,8 @@ or even people. This is called [transfer learning](https://towardsdatascience.com/a-comprehensive-hands-on-guide-to-transfer-learning-with-real-world-applications-in-deep-learning-212bf3b2f27a) and it can save a lot of time on developing a model from scratch. +https://youtu.be/S3Hm_BPLie0 + For us to take advantage of transfer learning, we can use fine-tuning to adopt the model to our new problem. In many cases, we start by replacing the last layer of the model. With the AlexNet example, this might mean the last layer was diff --git a/content/blog/2021-10-05-adding-data-to-build-a-more-generic-model.md b/content/blog/2021-10-05-adding-data-to-build-a-more-generic-model.md new file mode 100644 index 0000000000..c0025fa2d1 --- /dev/null +++ b/content/blog/2021-10-05-adding-data-to-build-a-more-generic-model.md @@ -0,0 +1,262 @@ +--- +title: Adding Data to Build a More Generic Model +date: 2021-10-05 +description: > + You can easily make changes to your dataset using DVC to handle data + versioning. This will let you extend your models to handle more generic data. +descriptionLong: > + When you have an existing model trained for one problem, you might want to + extend it to handle other problems. When you have data versioning, it's easier + to see which data additions make your model better or worse and then you can + see where to make improvements. +picture: 2021-10-05/cats-and-dogs.png +pictureComment: Adding more data to your dataset for a more generic model +author: milecia_mcgregor +commentsUrl: https://discuss.dvc.org/t/extending-models-with-more-data/881 +tags: + - MLOps + - DVC + - Git + - Experiments + - Data Versioning +--- + +## Intro + +You might be in the middle of training a model and then the business problem +shifts. Now you have this model that has been going through the training process +with a specific dataset and you need to make the model more generic. + +There's likely something that your model learned that can be useful on this new +dataset, so you might not have to restart the entire training process. We'll do +an example of updating a pre-trained model to use a broader dataset with DVC. By +the end of this, you should see how you can handle this quickly and start +running new experiments to get a more generic model. + +## The original pre-trained model + +For this post, we'll be making a more generic image classifier by taking the +original dataset with bees and ants and adding cats and dogs to it. You can +clone [this GitHub repo](https://github.com/iterative/pretrained-model-demo) to +get the current bees and ants model and check out +[this post](https://dvc.org/blog/transfer-learning-experiments) on how we +experimented with both AlexNet and SqueezeNet to build this model. + +So we're starting from our current bees and ants model and extending it to +classify dogs and cats as well. We'll start by adding some cats and dogs data to +our validation data and do some experiments with the current model to see how it +performs on generic data. + +Then we'll add the cats and dogs data to the training data and watch how the +model improves as we run experiments. + +## Updating the dataset with DVC + +To add the new cats and dogs dataset to the project, we'll use this DVC command. + +```dvc +$ dvc get https://github.com/iterative/dataset-registry blog/cats-dogs +``` + +This downloads a sample dataset with images of cats and dogs. You can use this +command to download files or directories that are tracked by DVC or Git. This +command can be used from anywhere in the file system, as long as DVC is +installed. + +This will make a new directory called `./cats-dogs/data/` that was downloaded +from the DVC remote and it has images for cats and dogs. Now we can slowly add +in the new data to the existing data. + +We'll start by moving the `val` data for `cats` and `dogs` from the +`/cats-dogs/data/` directory to the corresponding directory in +`data/hymenoptera_data`. + +_Just a quick note, cats and dogs don't really belong in the `hymenoptera` +directory since that's specific to ants and bees, but it's the easiest and +fastest way to add the data for this tutorial._ + +With this new data in place, we can start training our model. + +## Running new experiments with generic data + +With the updated data, let's run an experiment on the model and see how good the +results are. To run a new experiment, open your terminal and make sure you have +a virtual environment enabled. Then run this command: + +```dvc +$ dvc exp run +``` + +Once the training epochs are finished, run the following command. + +```dvc +$ dvc exp show --no-timestamp \ +--include-metrics step,acc,val_acc,loss,val_loss \ +--include-params lr,momentum +``` + +The `--no-timestamp` hides the timestamps from table. The `--includes-metrics` +option lets us choose which metrics we want to show in the table. The +`--includes-params` option does the same for hyperparameters. This gives us a +table that's easier to read quickly. + +```dvctable +┏━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┓ +┃ neutral:**Experiment** ┃ metric:**step** ┃ metric:**acc** ┃ metric:**val_acc** ┃ metric:**loss** ┃ metric:**val_loss** ┃ param:**lr** ┃ param:**momentum** ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━┩ +│ **workspace** │ **3** │ **0.86885** │ **0.46** │ **0.31573** │ **3.7067** │ **0.001** │ **0.09** │ +│ **data-change** │ **-** │ **-** │ **-** │ **-** │ **-** │ **0.001** │ **0.09** │ +│ │ ╓ 3b3a2a2 [exp-23593] │ 3 │ 0.86885 │ 0.46 │ 0.31573 │ 3.7067 │ 0.001 │ 0.09 │ +│ │ ╟ 93d015d │ 2 │ 0.83197 │ 0.41333 │ 0.36851 │ 3.4259 │ 0.001 │ 0.09 │ +│ │ ╟ d474c42 │ 1 │ 0.79918 │ 0.43333 │ 0.46612 │ 3.286 │ 0.001 │ 0.09 │ +│ ├─╨ 1582b4b │ 0 │ 0.52869 │ 0.39 │ 0.94102 │ 2.5967 │ 0.001 │ 0.09 │ +└─────────────────────────┴──────┴─────────┴─────────┴─────────┴──────────┴───────┴──────────┘ +``` + +You'll notice that the validation accuracy is really low. That's because the +training metrics are based on bees and ants while the validation metrics are +based on bees, ants, cats, and dogs. If we looked at the validation metrics by +class, they'd likely be better for bees and ants than cats and dogs. + +That means we should probably add more data to the training dataset. + +## Adding the cats data to the training dataset + +Let's add the `train` data for `cats` to the corresponding directory in +`data/hymenoptera_data` and go through another experiment run with a different +learning rate. With this new data, we can run another experiment. One important +thing to note here is that we're using checkpoints in our experiments. That's +how we get the metrics for each training epoch. + +If we want to run a fresh experiment that doesn't resume training from the last +epoch, we need to reset our experiment. That's what we're going to do with this +command. + +```dvc +$ dvc exp run --reset +``` + +This will reset all of the existing checkpoints and excute the training script. +Once it's finished, let's take a look at the metrics table with this command. +It's the same as the one we ran last time. + +```dvc +$ dvc exp show --no-timestamp \ + --include-metrics step,acc,val_acc,loss,val_loss \ + --include-params lr,momentum +``` + +Now you'll have a table that shows both experiments and you can see how much +better the new one did with the `cats` data added. + +```dvctable +┏━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┓ +┃ neutral:**Experiment** ┃ metric:**step** ┃ metric:**acc** ┃ metric:**val_acc** ┃ metric:**loss** ┃ metric:**val_loss** ┃ param:**lr** ┃ param:**momentum** ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━┩ +│ **workspace** │ **3** │ **0.91389** │ **0.87** │ **0.20506** │ **0.66306** │ **0.001** │ **0.09** │ +│ **data-change** │ **-** │ **-** │ **-** │ **-** │ **-** │ **0.001** │ **0.09** │ +│ │ ╓ 9405575 [exp-54e8a] │ 3 │ 0.91389 │ 0.87 │ 0.20506 │ 0.66306 │ 0.001 │ 0.09 │ +│ │ ╟ 856d80f │ 2 │ 0.90215 │ 0.87333 │ 0.27204 │ 0.61631 │ 0.001 │ 0.09 │ +│ │ ╟ 23dc98f │ 1 │ 0.87671 │ 0.86 │ 0.35964 │ 0.61713 │ 0.001 │ 0.09 │ +│ ├─╨ 99a3c34 │ 0 │ 0.71429 │ 0.82 │ 0.67674 │ 0.62798 │ 0.001 │ 0.09 │ +│ │ ╓ 3b3a2a2 [exp-23593] │ 3 │ 0.86885 │ 0.46 │ 0.31573 │ 3.7067 │ 0.001 │ 0.09 │ +│ │ ╟ 93d015d │ 2 │ 0.83197 │ 0.41333 │ 0.36851 │ 3.4259 │ 0.001 │ 0.09 │ +│ │ ╟ d474c42 │ 1 │ 0.79918 │ 0.43333 │ 0.46612 │ 3.286 │ 0.001 │ 0.09 │ +│ ├─╨ 1582b4b │ 0 │ 0.52869 │ 0.39 │ 0.94102 │ 2.5967 │ 0.001 │ 0.09 │ +└─────────────────────────┴──────┴─────────┴─────────┴─────────┴──────────┴───────┴──────────┘ +``` + +There's another way you can look at the difference between the model before we +added the `cats` data and after. If you run this in your terminal, you'll get a +plot comparing the two experiments. + +```dvc +$ dvc plots diff exp-23593 exp-54e8a +``` + +The `exp-23593` and `exp-54e8a` values are the ids for the experiments you want +to compare. You'll see a new file gets generated in the `dvc_plots` directory in +your project. That's where you'll find the `index.html` file you should open in +your browser. You'll see something similar to this. + +![plots comparing the accuracy, validation accuracy, loss, and validation loss for all epochs of each experiment](2021-10-05/with-cats-data.png) + +There's a huge difference in the accuracy of our model after we've added this +additional data. Let's see if we can make it even better by adding the `dogs` +data. + +## Adding the dogs data to the training dataset + +We'll add the `train` data for `dogs` to the corresponding directory in +`data/hymenoptera_data` just like we did for the `cats` data. Now we can run a +new experiment with all of the new data included. We'll still need to reset the +experiment like before, so run the following command. + +```dvc +$ dvc exp run --reset +``` + +Once the training epochs are finished, we can take one more look at that metrics +table. + +```dvc +$ dvc exp show --no-timestamp \ +--include-metrics step,acc,val_acc,loss,val_loss \ +--include-params lr,momentum +``` + +Now we'll have all three experiments to compare. + +```dvctable +┏━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┓ +┃ neutral:**Experiment** ┃ metric:**step** ┃ metric:**acc** ┃ metric:**val_acc** ┃ metric:**loss** ┃ metric:**val_loss** ┃ param:**lr** ┃ param:**momentum** ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━┩ +│ **workspace** │ **3** │ **0.8795** │ **0.90667** │ **0.29302** │ **0.25752** │ **0.001** │ **0.09** │ +│ **data-change** │ **-** │ **-** │ **-** │ **-** │ **-** │ **0.001** │ **0.09** │ +│ │ ╓ c20220f [exp-82f70] │ 3 │ 0.8795 │ 0.90667 │ 0.29302 │ 0.25752 │ 0.001 │ 0.09 │ +│ │ ╟ fcb5a0b │ 2 │ 0.85915 │ 0.92333 │ 0.38274 │ 0.25257 │ 0.001 │ 0.09 │ +│ │ ╟ 3768821 │ 1 │ 0.80751 │ 0.84667 │ 0.47681 │ 0.40228 │ 0.001 │ 0.09 │ +│ ├─╨ 7e1b8fb │ 0 │ 0.64632 │ 0.84 │ 0.87301 │ 0.46744 │ 0.001 │ 0.09 │ +│ │ ╓ 9405575 [exp-54e8a] │ 3 │ 0.91389 │ 0.87 │ 0.20506 │ 0.66306 │ 0.001 │ 0.09 │ +│ │ ╟ 856d80f │ 2 │ 0.90215 │ 0.87333 │ 0.27204 │ 0.61631 │ 0.001 │ 0.09 │ +│ │ ╟ 23dc98f │ 1 │ 0.87671 │ 0.86 │ 0.35964 │ 0.61713 │ 0.001 │ 0.09 │ +│ ├─╨ 99a3c34 │ 0 │ 0.71429 │ 0.82 │ 0.67674 │ 0.62798 │ 0.001 │ 0.09 │ +│ │ ╓ 3b3a2a2 [exp-23593] │ 3 │ 0.86885 │ 0.46 │ 0.31573 │ 3.7067 │ 0.001 │ 0.09 │ +│ │ ╟ 93d015d │ 2 │ 0.83197 │ 0.41333 │ 0.36851 │ 3.4259 │ 0.001 │ 0.09 │ +│ │ ╟ d474c42 │ 1 │ 0.79918 │ 0.43333 │ 0.46612 │ 3.286 │ 0.001 │ 0.09 │ +│ ├─╨ 1582b4b │ 0 │ 0.52869 │ 0.39 │ 0.94102 │ 2.5967 │ 0.001 │ 0.09 │ +└─────────────────────────┴──────┴─────────┴─────────┴─────────┴──────────┴───────┴──────────┘ +``` + +These results make sense for the experiments we've run. We're paying attention +to the validation accuracy here because this gives us a fair comparison of +what's happening as we add more data. + +The first experiment's training metrics are for bees and ants. The second +experiment's training metrics are for bees, ants, and cats. And the third +experiment's training metrics are for all four classes. So we can't really +compare these metrics. + +We can look at a comparison between the experiments with the `cats` data and +both the `cats` and `dogs` data. + +```dvc +$ dvc plots diff exp-23593 exp-54e8a exp-82f70 +``` + +![plot of differences between model with just cats data and model with both cats and dogs data](2021-10-05/with-cats-and-dogs-data.png) + +The results you see line up with what is expected for the validation metrics +based on how we added the data to the training set. Now you can keep running +experiments until you get your model tuned like you need it! + +## Conclusion + +When you want to change datasets quickly and start tracking how they affect our +model, using a DVC remote makes it easy to do so on different computers. You'll +be able to quickly upload and download GBs of data and see how changes affect +individual experiments. + +If you need help with anything DVC or CML, make sure to +[join our Discord community](https://discord.com/invite/dvwXA2N)! We're always +answering questions and having good conversations with everybody that shows up. diff --git a/content/blog/2021-10-15-october-21-heartbeat.md b/content/blog/2021-10-15-october-21-heartbeat.md new file mode 100644 index 0000000000..ef4f78b645 --- /dev/null +++ b/content/blog/2021-10-15-october-21-heartbeat.md @@ -0,0 +1,367 @@ +--- +title: October '21 Heartbeat +date: 2021-10-15 +description: > + Monthly updates are here! The word of the month is workflows! Checkout + how Community members improve their workflows with DVC and CML. Find out news + from the team, new learning opportunities, and more! +descriptionLong: | + This month you will find: + + 🗺 MLOps workflows, + + 🤔 Lots of ways to learn, + + 🎥 Meetup and Conference videos, + + 📖 Docs updates, + + 🚀 Info on our growing team, and more! +picture: 2021-10-15/october21cover.png +author: jeny_defigueiredo +commentsUrl: https://discuss.dvc.org/t/october-21-heartbeat/916 +tags: + - Heartbeat + - DVC + - CML + - MLOps Community + - PyTorch Lightning + - DAGsHub +--- + +# From the Community + +This month we have been flooded with content from our Community. We are grateful +and inspired to keep serving you! + +![Thank you!](https://media.giphy.com/media/xUA7aN1MTCZx97V1Ic/giphy.gif) + +## Ricardo Manhães Savii: Trying to turn Machine Learning into value + +If we can't turn machine learning into value, what good are we? +[**Ricardo Manhães Savii**](https://www.linkedin.com/in/ricardoms/) +[wrote a piece in Medium](https://medium.com/@ricardosavii/trying-to-turn-machine-learning-into-value-de9f28cde056) +where he tackles how to technically and visually define the steps to deliver an +Intelligent System with the same level of best practice maturity that software +development has today. He combines and synthesizes the ideas of some of the best +known thinkers in the space to build a thorough architecture of machine learning +best practices. You won't want to miss this post and wrap your head around these +diagrams! + +![CI/CD for Machine Learning](/uploads/images/2021-10-15/manhaes.png) _Ricardo +Manhães Savii's Addendum to François +Chollet's](https://medium.com/@francois.chollet) figure on result of machine +learning +([Source link](https://medium.com/@ricardosavii/trying-to-turn-machine-learning-into-value-de9f28cde056))_ + +## RappiBank: How to build an efficient machine learning project workflow + +Continuing the theme of ML workflow Complexity, +[**Daniel Baena**](https://www.linkedin.com/in/data-box-science/) wrote a +[great overview and tutorial piece](https://medium.com/rappibank/how-to-build-an-efficient-machine-learning-project-workflow-using-data-version-control-dvc-aaeaa9cfb79b) +outlining the challenges that his team at +[RappiBank](https://bank.rappi.com.br/) encountered and found ways to solve with +DVC including: + +- confusing experiment files with different names +- disjointed messaging about training and models and dataset changes +- holding in your head or own notes progress that is not visible to the rest of + the team +- heavy run and re-run times without a modularized system + +Daniel shows how all of these things can be solved using DVC.🏆 + + + +## DAGsHub: Production Oriented Work + +Next up, [**Nir Barazida**](https://twitter.com/barazida) from +[DAGsHub](https://dagshub.com/) +[created a video](https://dagshub.com/docs/workshops/production_oriented_work/?utm_content=bufferef4d6&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer) +on Production-oriented work using a monorepo strategy and focusing on moving +from research to production-ready code using Git and DVC. If you are a data +scientist trying to wrap your head around going from your notebook to +production, this may help! + + + +## ML Data Versioning with DVC: How to Manage Machine Learning Data + +[**Piotr Storożenko**](https://www.linkedin.com/in/piotr-storo%C5%BCenko-438087128/) +of [Appsilon](https://appsilon.com/) wrote +[a great tutorial](https://appsilon.com/ml-data-versioning-with-dvc/) taking +into account the many challenges data scientists and ML engineers encounter in +their data versioning efforts and how DVC solves them. Do these scenarios from +his article look familiar? + +> Was it in `model_3final.pth` or `model_last.pth` that I used a bigger lerning +> rate? +> +> When did I start using data preprocessing, during `model_2a.pth` or +> `model_2aa.pth` +> +> Is `model_7.pth` trained on the new dataset or on the old one?` +> +> Oh, gosh, which set of parameters and data have I used to train `model_2.pth`? +> It was pretty good in the end…” + +# Learning Opportunities + +## Raviraja Ganta's 10-week course on Basic MLOps + +Twitter and LinkedIn were a blaze in the last month when +[**Raviraja Ganta**](https://www.linkedin.com/in/ravirajag/) announced his +[10-Week Course](https://www.ravirajag.dev/blog/mlops-summary) on MLOps basics. +This course is chock full of resoures and practical tutorials to build your +MLOps platform and knowledge. [Week 3](https://www.ravirajag.dev/blog/mlops-dvc) +of the course is about DVC and its ability to solve your versioning and +reproducibility challenges. Be sure to check out +[the course repo](https://github.com/graviraja/MLOps-Basics) as well! + +[**MLOps Community**](https://mlops.community/) is hosting him to speak about +his course on October 20th. +[Sign up to attend here!](https://airtable.com/shrh5eGdEbcBsdEdq) + +![Raviraja Ganta's 10-Week MLOps Course](/uploads/images/2021-10-15/ganta.png) +_Raviraja Ganta's 10-Week Course on MLOps Basics +([Source link](https://www.ravirajag.dev/blog/mlops-summary))_ + +## Josh Willis video on COVID simulations with DVC + +This week, +[this Tweet comment](https://twitter.com/josh_wills/status/1441456258746249216) +led me to +[this work](https://mlconf.com/sessions/the-covid-scenario-pipeline-high-stakes-data-science/) +by [**Josh Wills.**](https://twitter.com/josh_wills) Josh was tapped by +[**DJ Patil**](https://twitter.com/dpatil) to participate in some COVID +simulation research early on in the pandemic in which he used DVC. In his +presentation about the project, he tells of the tools he used and challenges of +the use case. Nice DVC shout out at 19:56! Ah, the fruits of a Twitter 🐇🕳! + +https://www.youtube.com/watch?v=tu7N8M-jwPU&t=10s + +## September Office Hours Video: Transfer Learning with Milecia McGregor + +If you missed last month's Office Hours +[Meetup](https://www.meetup.com/DVC-Community-Virtual-Meetups/), you can now +catch the video! [**Milecia's**](https://twitter.com/FlippedCoding) presentation +was based on [her blog post](https://dvc.org/blog/transfer-learning-experiments) +on the same topic: Using Experiments for Transfer Learning. If you're curious +about transfer learning in general, AlexNet and SqueezeNet in particular, or +using DVC experiments and checkpoints to track all that you do, this video's for +you! + +https://www.youtube.com/watch?v=RmJbyQ36zVk + +## Quoc-Tien Au: Continuously Learning on the Job as a Data Scientist + +[This Towards Data Science](https://towardsdatascience.com/the-what-where-and-how-about-continuously-learning-on-the-job-as-a-data-scientist-b0a31ea4ac48) +article by [**Quoc-Tien Au**](https://www.linkedin.com/in/quoctienau/) entitled +"The What, Where, and How about continuously learning on the job as a data +scientist," speaks to some higher points on the need to have a mindset for +continuous learning in the Data Science field. It's packed with great thought +processes and resources on what to learn, where to learn, and how to keep +learning while still getting your work done. Who stuggles with this? 😅 + +https://media.giphy.com/media/icJCVO3GPDbCvvfgpf/giphy.gif + +# DVC News + +## Amsterdam Off-site + +Most of our team members from Europe got together in Amsterdam recently for a +couple days of brainstorming and team bonding. They went on a Treasure Hunt, ate +Ramen (a favorite among our team) and had great discussions on how to make our +tools and our team even better! Pictured below from front of the room left, +going clockwise (to the back of the room and back up) are David Ortega, Helio +Machado, David de la Iglesia Castro, Laurens Duijvesteijn, Ruslan Kupriev +(hidden), Dmitry Petrov, Jelle Bouwman, Batuhan Taskaya,Svetlana Sachkovskaya, +and Paweł Redzyński. + +Be sure to check out this section next month as our Americas team members will +meet in San Francisco! + +![Europe Iterative Team Members meet in Amsterdam](/uploads/images/2021-10-15/amsterdam.jpg) +_Iterative Team Members meet in Amsterdam +([Source: David Ortega](https://www.linkedin.com/in/gortegadavid/)))_ + +## New Team Members + +[**Jordan Weber**](https://www.linkedin.com/in/jordanwweber/) joins us from Los +Angeles, California as our new Chief of Staff. She has previously held similar +roles at venture captial and FinTech firms. In Jordan's free time she enjoys +cooking, tennis, dance, and hiking! 🎾 + +[**Ken Thom**](https://www.linkedin.com/in/kenthom/) joins us from Palo Alto, +California as our new Director of Operations. His past work includes business +operations, product management, software and hardware development. In his spare +time he likes to spend time with his family, swim, ski, and hike! 🥾 + +[**Jon Burdo**](https://www.linkedin.com/in/jon-burdo-59730a83/) joins us from +Boston, Massachusetts as a Senior Software engineer. He's been working for the +past few years as a machine learnng engineer with a focus on NLP. In his last +role he used DVC and loved it, which is how he eventually ended up here! 🎉 In +his spare time, Jon likes learning about open source software, tinkering with +Linux, and inline skating. + +[**Stephanie Roy**](https://www.linkedin.com/in/stephroy1/) joins the team as a +Senior Software Engineer from Quebec, Canada. Our first Canadian team member! +She has previously worked at LogMeln on one of their mobile apps. In her spare +time she likes taking care of her plants in her indoor grow house, playing +roller derby, and discovering new things to watch, listen to and eat! 😋 + +Welcome to all our new team members! We are so glad you are here! 🙌🏼 + +## Open Positions + +And wouldn't you know it? We're still hiring! +[Use this link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22) +to find details of all the positions including: + +- Senior Software Engineer (ML, Labeling, Python) +- Senior Software Engineer (ML, Labeling, Python) +- Senior Software Engineer (ML, DevTools, Python) +- Field Data Scientist / Sales Engineer +- Developer Advocate (ML) +- Director / VP of Engineering (ML, DevTools) +- Director / VP of Product (ML, Data Infra, SaaS) +- Head of Talent +- Head of DevRel + +Please pass this info on to anyone you know that may fit the bill. We look +forward to new team members! 🎉 + +https://media.giphy.com/media/120jXUxrHF5QJ2/giphy.gif + +## Docs Updates + +Here are a few important docs updates you may want to take a look at this month! + +### 📖 PyTorch Lightning + +We all have +[**Ilia Sirotkin**](https://www.linkedin.com/search/results/all/?keywords=ilia%20sirotkin&origin=RICH_QUERY_SUGGESTION&position=0&searchId=e7bb3154-797a-44a5-a209-90ffece95246&sid=GeC) +to thank for his contribution to our docs. He created the +[PyTorch Lightning integration docs](https://dvc.org/doc/dvclive/ml-frameworks/pytorch-lightning#pytorch-lightning) +for all to use! + +### 📖 CML with DVC guide: + +[Our updated CML with DVC Guide](https://cml.dev/doc/cml-with-dvc) provides +updated code and streamlined information on Cloud Storage Provider credentials +and GitHub Actions set up. + +```yaml +name: CML & DVC +on: [push] +jobs: + run: + runs-on: ubuntu-latest + container: docker://ghcr.io/iterative/cml:0-dvc2-base1 + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Train model + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + run: | + pip install -r requirements.txt # Install dependencies + dvc pull data --run-cache # Pull data & run-cache from S3 + dvc repro # Reproduce pipeline + - name: Create CML report + env: + REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + echo "## Metrics" >> report.md + dvc metrics diff master --show-md >> report.md + + # Publish confusion matrix diff + echo "## Plots" >> report.md + echo "### Class confusions" >> report.md + dvc plots diff \ + --target classes.csv \ + --template confusion \ + -x actual \ + -y predicted \ + --show-vega master > vega.json + vl2png vega.json -s 1.5 | cml publish --md >> report.md + + # Publish regularization function diff + echo "### Effects of regularization" >> report.md + dvc plots diff \ + --target estimators.csv \ + -x Regularization \ + --show-vega master > vega.json + vl2png vega.json -s 1.5 | cml publish --md >> report.md + + cml send-comment report.md +``` + +### 📖 Shtab + +Team member [**Casper da Costa-Luis**](https://www.cdcl.ml/) has +[created a docs website](https://docs.iterative.ai/shtab/) for his python tab- +completion script generator project [shtab](https://github.com/iterative/shtab). +For more info checkout +[the original blog post](https://dvc.org/blog/shtab-completion-release) about it +as well. + +## Next Meetups + +For the second class of +[DVC Learn,](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/280814336/) +join us to learn about getting started running experiments! This lesson will +include information on how to use our +[checkpoints](https://dvc.org/doc/user-guide/experiment-management/checkpoints) +feature as well. We look forward to seeing you there! + + + +Be sure to join us at the +[November Office Hours Meetup,](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/281355245/) +where [**Maykon Shots**](https://www.linkedin.com/in/maykon-schots/) will talk +about how he used DVC and CML to create an internal Kaggle competition for his +team to arrive at their best models in their work for the largest bank in +Brazil. + + + +## Tweet Love ❤️ + +This month, it was exceedingly hard to pick just one Tweet. I'm leaving you with +one that ballooned our followers over the last month. But there have been many! +I encourage you to visit our newly created +[_Wall of Love ❤️_](https://testimonial.to/iterative-open-source-community-shout-outs/all) +to see all the beautiful Iterative tool love. 🛠❤️🤗 + +https://twitter.com/DynamicWebPaige/status/1435256826375720964?s=20 + +--- + +_Do you have any use case questions or need support? Join us in +[Discord](https://discord.com/invite/dvwXA2N)!_ + +_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and +best practices._ diff --git a/content/docs/api-reference/make_checkpoint.md b/content/docs/api-reference/make_checkpoint.md index 97ee0a46cf..0d9132d7e4 100644 --- a/content/docs/api-reference/make_checkpoint.md +++ b/content/docs/api-reference/make_checkpoint.md @@ -59,7 +59,9 @@ stages: The code in `iterate.py` will execute continuously increment an integer number saved in `int.txt` (starting at 0). At 0 and every 100 loops, it makes a -checkpoint for `dvc experiments`: +checkpoint for [DVC experiments]: + +[dvc experiments]: /doc/user-guide/experiment-management#experiments ```py import os @@ -141,4 +143,5 @@ $ dvc exp show If we use `dvc exp run` again, the process will start from 200 (since that's what the workspace reflects). -See `dvc experiments` for more details on managing experiments. +See [Experiment Management](/doc/user-guide/experiment-management) for more +details on managing experiments. diff --git a/content/docs/api-reference/open.md b/content/docs/api-reference/open.md index d014229013..700ea6a2b3 100644 --- a/content/docs/api-reference/open.md +++ b/content/docs/api-reference/open.md @@ -40,10 +40,10 @@ file can be tracked by DVC (as an output) or by Git. (using the `with` keyword, as shown in the examples). This function makes a direct connection to the -[remote storage](/doc/command-reference/remote/add#supported-storage-types) -(except for Google Drive), so the file contents can be streamed. Your code can -process the data [buffer](https://docs.python.org/3/c-api/buffer.html) as it's -streamed, which optimizes memory usage. +[remote storage](/doc/command-reference/remote/add#supported-storage-types), so +the file contents can be streamed. Your code can process the data +[buffer](https://docs.python.org/3/c-api/buffer.html) as it's streamed, which +optimizes memory usage. > Use `dvc.api.read()` to load the complete file contents in a single function > call – no _context manager_ involved. Neither function utilizes disc space. diff --git a/content/docs/command-reference/config.md b/content/docs/command-reference/config.md index d1aa97f8f3..1f3e1b79dd 100644 --- a/content/docs/command-reference/config.md +++ b/content/docs/command-reference/config.md @@ -223,21 +223,33 @@ connection settings, and configuring a remote is the way that can be done. ### state -See -[Internal directories and files](/doc/user-guide/project-structure/internal-files) -to learn more about the state file (database) that is used for optimization. - -- `state.row_limit` - maximum number of entries in the state database, which - affects the physical size of the state file itself, as well as the performance - of certain DVC operations. The default is 10,000,000 rows. The bigger the - limit, the longer the file hash history that DVC can keep, in order to avoid - sequential hash recalculations. - -- `state.row_cleanup_quota` - percentage of the state database that is going to - be deleted when it hits the `state.row_limit`. Default quota is set to 50%. - When an entry in the database is used (e.g. during the `dvc status`), DVC - updates the timestamp on that entry. This way, when the database needs a - cleanup, DVC can sort entries chronologically, and remove the oldest ones. +> 📖 See +> [Internal directories and files](/doc/user-guide/project-structure/internal-files) +> to learn more about the state databases. + +- `state.row_limit` - maximum number of entries in state databases. This affects + the physical size of the state files, as well as the performance of certain + DVC operations. The default is 10,000,000 rows. The bigger the limit, the + longer the file hash history that DVC can keep, for example. + +- `state.row_cleanup_quota` - percentage of the state database to be deleted + when it reaches the `state.row_limit`. The default quota is 50%. DVC removes + the oldest entries (created when `dvc status` is used, for example). + +- `state.dir` - specify a custom location for the state databases (directories), + by default `.dvc/tmp/index` and `.dvc/tmp/md5`. This may be necessary when + using DVC on NFS or other mounted volumes where SQLite encounters file + permission errors. + +### index + +> 📖 See +> [Internal directories and files](/doc/user-guide/project-structure/internal-files) +> to learn more about remote index files. + +- `index.dir` - specify a custom location for the directory where remote index + files will be stored, by default in `.dvc/tmp/index`. This may be necessary + when using DVC on NFS or other mounted volumes. ### plots diff --git a/content/docs/command-reference/exp/diff.md b/content/docs/command-reference/exp/diff.md index 45f3f7b4ba..d20d0f8f8f 100644 --- a/content/docs/command-reference/exp/diff.md +++ b/content/docs/command-reference/exp/diff.md @@ -1,7 +1,7 @@ # exp diff Show changes in [metrics](/doc/command-reference/metrics) and -[parameters](/doc/command-reference/params) between `dvc experiments`. +[parameters](/doc/command-reference/params) between experiments. ## Synopsis @@ -19,9 +19,9 @@ positional arguments: # Description Provides a quick way to compare `dvc params` and `dvc metrics` between two -`dvc experiments` by printing a table of differences. By default, it includes -the params/metrics file "Path", "Param" or "Metric" name, the new "Value", and -the difference ("Change") for numeric values. Example: +experiments by printing a table of differences. By default, it includes the +params/metrics file "Path", "Param" or "Metric" name, the new "Value", and the +difference ("Change") for numeric values. Example: ```dvc $ dvc exp diff diff --git a/content/docs/command-reference/exp/gc.md b/content/docs/command-reference/exp/gc.md index 00f9115d3a..ad0ce5da11 100644 --- a/content/docs/command-reference/exp/gc.md +++ b/content/docs/command-reference/exp/gc.md @@ -1,6 +1,6 @@ # exp gc -Remove unnecessary `dvc experiments` from the project. +Remove unnecessary experiments from the project. ## Synopsis @@ -29,8 +29,8 @@ separately to delete it. ## Options - `-w`, `--workspace` - keep _only_ experiments derived from the last commit - (`HEAD`, default base for `dvc experiments`). This option is enabled - automatically with the other scope options (below). + (`HEAD`, default base for experiments). This option is enabled automatically + with the other scope options (below). - `-a`, `--all-branches` - keep experiments derived from all Git branches, as well as from the last commit (implies `-w`). Note that this can be combined diff --git a/content/docs/command-reference/exp/index.md b/content/docs/command-reference/exp/index.md index 186ad2bf16..608f7869f0 100644 --- a/content/docs/command-reference/exp/index.md +++ b/content/docs/command-reference/exp/index.md @@ -1,4 +1,4 @@ -# experiments +# exp _New in DVC 2.0_ @@ -19,7 +19,7 @@ A set of commands to generate and manage experiments: ## Synopsis ```usage -usage: dvc experiments [-h] [-q | -v] +usage: dvc exp [-h] [-q | -v] {show,apply,diff,run,gc,branch,list,push,pull,remove} ... diff --git a/content/docs/command-reference/exp/list.md b/content/docs/command-reference/exp/list.md index 5044cef671..971982306a 100644 --- a/content/docs/command-reference/exp/list.md +++ b/content/docs/command-reference/exp/list.md @@ -1,6 +1,6 @@ # exp list -List `dvc experiments` in a DVC repository (remote or local). +List experiments in a DVC repository (remote or local). ## Synopsis diff --git a/content/docs/command-reference/exp/pull.md b/content/docs/command-reference/exp/pull.md index e53dc2a08d..eeeaad9f8a 100644 --- a/content/docs/command-reference/exp/pull.md +++ b/content/docs/command-reference/exp/pull.md @@ -20,9 +20,9 @@ positional arguments: The `dvc exp push` and `dvc exp pull` commands are the means for sharing experiments across repository copies via Git (and DVC) remotes. -> Plain `git push` and `git fetch` don't work with `dvc experiments` because -> these are saved under custom Git references. See **How does DVC track -> experiments?** in `dvc exp run` to learn more about DVC experiment storage. +> Plain `git push` and `git fetch` don't work with experiments because these are +> saved under custom Git references. See **How does DVC track experiments?** in +> `dvc exp run` to learn more about DVC experiment storage. A working `git_remote` name (e.g. `origin`) or Git URL is required, as well as an `experiment` name or hash (see `dvc exp run`) to pull. diff --git a/content/docs/command-reference/exp/push.md b/content/docs/command-reference/exp/push.md index bf34b6eebb..1c1d1195aa 100644 --- a/content/docs/command-reference/exp/push.md +++ b/content/docs/command-reference/exp/push.md @@ -20,9 +20,9 @@ positional arguments: The `dvc exp push` and `dvc exp pull` commands are the means for sharing experiments across repository copies via Git (and DVC) remotes. -> Plain `git push` and `git fetch` don't work with `dvc experiments` because -> these are saved under custom Git references. See **How does DVC track -> experiments?** in `dvc exp run` to learn more about DVC experiment storage. +> Plain `git push` and `git fetch` don't work with experiments because these are +> saved under custom Git references. See **How does DVC track experiments?** in +> `dvc exp run` to learn more about DVC experiment storage. A working `git_remote` name (e.g. `origin`) or Git URL is required, as well as an `experiment` name or hash (see `dvc exp run`) to push. diff --git a/content/docs/command-reference/exp/remove.md b/content/docs/command-reference/exp/remove.md index d4d0b39f14..4bb893e4e7 100644 --- a/content/docs/command-reference/exp/remove.md +++ b/content/docs/command-reference/exp/remove.md @@ -1,11 +1,11 @@ # exp remove -Delete specific `dvc experiments` from the project. +Delete specific experiments from the project. ## Synopsis ```usage -usage: dvc exp remove [-h] [-q | -v] [--queue | -A] +usage: dvc exp remove [-h] [-q | -v] [--queue | -A | -g ] [ [ ...]] positional arguments: @@ -28,6 +28,9 @@ With `--queue`, the list of experiments awaiting execution is cleared instead. - `-A`, `--all` - remove all experiments (includes `--queue`). +- `-g`, `--git-remote` - Name or URL of the Git remote to remove the experiment + from + - `-h`, `--help` - shows the help message and exit. - `-q`, `--quiet` - do not write anything to standard output. Exit with 0 if no @@ -46,7 +49,6 @@ master: exp-e6c97 exp-1dad0 exp-1df77 - exp-23d5a ``` To remove any of them, give their names to `dvc exp remove`. Or use the `--all` @@ -58,7 +60,6 @@ $ dvc exp remove exp-1dad0 exp-1df77 $ dvc exp list master: exp-e6c97 - exp-23d5a $ dvc exp remove -A @@ -94,3 +95,22 @@ $ dvc exp show --include-params=train.min_split --no-pager │ └── 5751540 [split32] │ 04:57 PM │ Queued │ - │ - │ 32 │ └───────────────────────┴──────────────┴────────┴──────────┴─────────┴─────────────────┘ ``` + +We can also remove experiments from a remote Git repository: + +```dvc +$ dvc exp push myremote exp-e6c97 +$ dvc exp push myremote exp-9fcef +$ dvc exp push myremote exp-1dad0 + +$ dvc exp list myremote +master: + exp-1dad0 + exp-9fcef + exp-e6c97 + +$ dvc exp remote -g myremote exp-9fcef exp-e6c97 +$ dvc exp list myremote +master: + exp-1dad0 +``` diff --git a/content/docs/command-reference/exp/run.md b/content/docs/command-reference/exp/run.md index 295e647928..d09450924e 100644 --- a/content/docs/command-reference/exp/run.md +++ b/content/docs/command-reference/exp/run.md @@ -18,14 +18,14 @@ positional arguments: ## Description -Provides a way to execute and track `dvc experiments` in your +Provides a way to execute and track experiments in your project without polluting it with unnecessary commits, branches, directories, etc. -> `dvc exp run` is equivalent to `dvc repro` for experiments. It -> has the same behavior when it comes to `targets` and stage execution (restores -> the dependency graph, etc.). See the command [options](#options) for more on -> the differences. +> `dvc exp run` is equivalent to `dvc repro` for experiments. It has the same +> behavior when it comes to `targets` and stage execution (restores the +> dependency graph, etc.). See the command [options](#options) for more on the +> differences. Before running an experiment, you'll probably want to make modifications such as data and code updates, or hyperparameter tuning. For the latter, @@ -154,10 +154,10 @@ CPU cores). > `dvc repro`, with the exception that `--no-commit` has no effect here. - `-S [:]=`, - `--set-param [:]=` - set the specified - `dvc params` for this experiment. `filename` can be any valid params file - (`params.yaml` by default). This will override the param values coming from - the params file. + `--set-param [:]=` - set the value of + existing `dvc params` for this experiment. `filename` can be any valid params + file (`params.yaml` by default). This will override the param values coming + from the params file. - `-n `, `--name ` - specify a name for this experiment. A default name will generated by default, such as `exp-f80g4` (based on the experiment's @@ -258,8 +258,8 @@ experiment we just ran (`exp-44136`). You could modify a params file just like any other dependency and run an experiment on that basis. Since this is a common need, `dvc exp run` -comes with the `--set-param` (`-S`) option built-in. This saves you the need to -manually edit the params file: +comes with the `--set-param` (`-S`) option built-in to update existing +parameters. This saves you the need to manually edit the params file. ```dvc $ dvc exp run -S prepare.split=0.25 -S featurize.max_features=2000 diff --git a/content/docs/command-reference/exp/show.md b/content/docs/command-reference/exp/show.md index 4cc8fb9663..02fdf56537 100644 --- a/content/docs/command-reference/exp/show.md +++ b/content/docs/command-reference/exp/show.md @@ -1,6 +1,6 @@ # exp show -Print a customizable table of `dvc experiments`, their metrics and parameters. +Print a customizable table of experiments, their metrics and parameters. > Press `q` to exit. @@ -14,8 +14,7 @@ usage: dvc exp show [-h] [-q | -v] [-a] [-T] [-A] [-n ] [--exclude-params ] [--param-deps] [--sort-by ] [--sort-order {asc,desc}] [--no-timestamp] [--sha] - [--json] [--csv] [--precision ] -``` + [--json] [--csv] [--md] [--precision ] ## Description @@ -104,8 +103,8 @@ metric or param. sorting. This only affects the ordering of experiments derived from the same parent commit. Parent commits are always sorted chronologically. -- `--sort-order {asc,desc}` - sort order to use with `--sort-by` (defaults to - descending). +- `--sort-order {asc,desc}` - sort order to use with `--sort-by`. Defaults to + ascending (`asc`). - `--no-timestamp` - do not show experiment timestamps. @@ -118,6 +117,8 @@ metric or param. - `--csv` - prints the command's output in CSV format instead of a human-readable table. +- `--md` - prints the command's output in Markdown table format. + - `--precision ` - [round](https://docs.python.org/3/library/functions.html#round) decimal values to `n` digits of precision (5 by default). Applies to metrics only. diff --git a/content/docs/command-reference/gc.md b/content/docs/command-reference/gc.md index 57f1076f73..bef695fe3b 100644 --- a/content/docs/command-reference/gc.md +++ b/content/docs/command-reference/gc.md @@ -73,9 +73,11 @@ The default remote is cleaned (see `dvc config core.remote`) unless the that is never referenced from the workspace or from any Git commit can still be stored in the project's cache). - > \* Except `dvc experiments` + > \* Not including [DVC experiments]( -- `--all-experiments` keep cached objects referenced in all `dvc experiments`, +[dvc experiments]: /doc/user-guide/experiment-management#experiments + +- `--all-experiments` keep cached objects referenced in all [DVC experiments], as well as in the workspace (implying `-w`). This preserves the project's [experimental](/doc/user-guide/experiment-management) data (including checkpoints). diff --git a/content/docs/command-reference/list.md b/content/docs/command-reference/list.md index eca0b13868..44eb94de54 100644 --- a/content/docs/command-reference/list.md +++ b/content/docs/command-reference/list.md @@ -3,6 +3,8 @@ List project contents, including files, models, and directories tracked by DVC and by Git. +> Aliased to `dvc ls`. + > Useful to find data to `dvc get`, `dvc import`, or for `dvc.api` functions. ## Synopsis diff --git a/content/docs/command-reference/plots/index.md b/content/docs/command-reference/plots/index.md index 63355f39b4..6207d97e8f 100644 --- a/content/docs/command-reference/plots/index.md +++ b/content/docs/command-reference/plots/index.md @@ -1,7 +1,7 @@ # plots -A set of commands to visualize and compare _plot metrics_ in structured files -(JSON, YAML, CSV, or TSV): [show](/doc/command-reference/plots/show), +A set of commands to visualize and compare _plot metrics_: +[show](/doc/command-reference/plots/show), [diff](/doc/command-reference/plots/diff), and [modify](/doc/command-reference/plots/modify). @@ -14,7 +14,7 @@ positional arguments: COMMAND show Generate plot from a metrics file. diff Plot differences in metrics between commits. - modify Modify plot properties associated with a target file. + modify Modify display properties of data-series plots (has no effect on image-type plots). ``` ## Types of metrics @@ -37,20 +37,28 @@ This type of metrics files are created by users, or generated by user data processing code, and can be defined in `dvc.yaml` (`plots` field) for tracking (optional). -DVC generates plots as HTML files that can be open with a web browser. These -HTML files use [Vega-Lite](https://vega.github.io/vega-lite/). Vega-Lite is a -declarative grammar for defining plots using JSON. The plots can also be saved -as SVG or PNG image filed from the browser. +DVC can work with two types of plots files: -In contrast to `dvc metrics`, these metrics should be stored as data series. -Unlike its `dvc metrics` counterpart, `dvc plots diff` cannot calculate numeric -differences between the metrics in different experiments. +1. Data series files, which can be JSON, YAML, CSV or TSV. +2. Image files in JPEG, GIF, or PNG format. -### Supported file formats +DVC generates plots as static HTML webpages that can be open with a web browser. +They can also be saved as SVG or PNG image files from the browser. -Plot metrics can be organized as data series in JSON, YAML 1.2, CSV, or TSV -files. DVC expects to see an array (or multiple arrays) of objects (usually -_float numbers_) in the file. +Data-series plots utilize [Vega-Lite](https://vega.github.io/vega-lite/) for +rendering (declarative JSON grammar for defining graphics). Image-type plots are +rendered using `` tags directly. + +## Supported file formats + +Image-type plots are included in HTML as-is, without additional processing. + +> We recommend to track these source image files with DVC instead of Git, to +> prevent the repository from bloating. + +Structured plots can be read from JSON, YAML 1.2, CSV, or TSV files. DVC expects +to see an array (or multiple arrays) of objects (usually _float numbers_) in the +file. In tabular file formats such as CSV and TSV, each column is an array. `dvc plots` subcommands can produce plots for a specified column or a set of @@ -88,13 +96,13 @@ names in the `train` array below: } ``` -## Plot templates +## Plot templates (data series only) -Users have the ability to change the way plots are displayed by modifying the -[Vega-Lite specification](https://vega.github.io/vega-lite/), thus generating -plots in the style that best fits the their needs. This keeps DVC -projects programming language agnostic, as it's independent from user -display configuration and visualization code. +Users have the ability to change the way data-series plots are displayed by +modifying the [Vega-Lite specification](https://vega.github.io/vega-lite/), thus +generating plots in the style that best fits the their needs. This keeps +DVC projects programming language agnostic, as it's independent +from user display configuration and visualization code. Built-in _plot templates_ are stored in the `.dvc/plots/` directory. The default one is called `default.json`. It can be changed with the `--template` (`-t`) diff --git a/content/docs/command-reference/plots/modify.md b/content/docs/command-reference/plots/modify.md index ee8fa937b5..2792ba0e92 100644 --- a/content/docs/command-reference/plots/modify.md +++ b/content/docs/command-reference/plots/modify.md @@ -2,6 +2,10 @@ Modify display properties of [plot metrics](/doc/command-reference/plots) files. +> ⚠️ Note that this command can modify only data-series plots. It has no effect +> on image-type plots. See +> [Types of metrics](/doc/command-reference/plots#types-of-metrics). + ## Synopsis ```usage diff --git a/content/docs/command-reference/remote/modify.md b/content/docs/command-reference/remote/modify.md index 3af18b853a..60aad419d8 100644 --- a/content/docs/command-reference/remote/modify.md +++ b/content/docs/command-reference/remote/modify.md @@ -119,6 +119,38 @@ options: $ dvc remote modify myremote region us-east-2 ``` +- `read_timeout` - set the time in seconds till a timeout exception is thrown + when attempting to read from a connection (60 by default). Let's set it to 5 + minutes for example: + + ```dvc + $ dvc remote modify myremote read_timeout 300 + ``` + +- `connect_timeout` - set the time in seconds till a timeout exception is thrown + when attempting to make a connection (60 by default). Let's set it to 5 + minutes for example: + + ```dvc + $ dvc remote modify myremote connect_timeout 300 + ``` + +- `read_timeout` - set the time in seconds till a timeout exception is thrown + when attempting to read from a connection (60 by default). Let's set it to 5 + minutes for example: + + ```dvc + $ dvc remote modify myremote read_timeout 300 + ``` + +- `connect_timeout` - set the time in seconds till a timeout exception is thrown + when attempting to make a connection (60 by default). Let's set it to 5 + minutes for example: + + ```dvc + $ dvc remote modify myremote connect_timeout 300 + ``` + By default, DVC authenticates using your AWS CLI [configuration](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html) (if set). This uses the default AWS credentials file. Use the following @@ -909,7 +941,7 @@ by HDFS. Read more about by expanding the WebHDFS section in - `basic` - [basic authentication scheme](https://tools.ietf.org/html/rfc7617). `user` and `password` (or `ask_password`) parameters should also be configured. - - `digest` - + - `digest` (**removed** in 2.7.1) - [digest Access Authentication Scheme](https://tools.ietf.org/html/rfc7616). `user` and `password` (or `ask_password`) parameters should also be configured. @@ -940,8 +972,7 @@ by HDFS. Read more about by expanding the WebHDFS section in custom_auth_header 'My-Header' ``` -- `user` - user name to use when the `auth` parameter is set to `basic` or - `digest`. +- `user` - user name to use when the `auth` parameter is set to `basic`. ```dvc $ dvc remote modify --local myremote user myuser diff --git a/content/docs/dvclive/api-reference/get_step.md b/content/docs/dvclive/api-reference/get_step.md deleted file mode 100644 index f5529806a9..0000000000 --- a/content/docs/dvclive/api-reference/get_step.md +++ /dev/null @@ -1,24 +0,0 @@ -# dvclive.get_step() - -Returns the current `step` value. - -```py -def get_step() -> int: -``` - -#### Usage: - -```py -import dvclive - -while dvclive.get_step() < 3: - dvclive.log("metric", 0.9) - dvclive.next_step() -``` - -## Description - -DVCLive uses the `step` to track the progress of each metric logged with -`dvclive.log()`. - -Each call to `dvclive.next_step()` increases the `step` count. diff --git a/content/docs/dvclive/api-reference/index.md b/content/docs/dvclive/api-reference/index.md index 74bd6d7244..fb6427da4f 100644 --- a/content/docs/dvclive/api-reference/index.md +++ b/content/docs/dvclive/api-reference/index.md @@ -1,11 +1,11 @@ # API Reference -This reference provides the details about the functions in the `dvclive` Python -API module, which can be imported regularly, for example: +This reference provides the details about the `dvclive` Python API module, which +can be imported regularly, for example: ```py -import dvclive +from dvclive import Live ``` -Please choose a function from the navigation sidebar to the left, or click the -Next button below to jump into the first one ↘ +Will import the main class of the API: +[`Live()`](/doc/dvclive/api-reference/live). diff --git a/content/docs/dvclive/api-reference/init.md b/content/docs/dvclive/api-reference/init.md deleted file mode 100644 index 5eb00f56bb..0000000000 --- a/content/docs/dvclive/api-reference/init.md +++ /dev/null @@ -1,39 +0,0 @@ -# dvclive.init() - -Initializes a DVCLive logger. - -```py -def init( - path: str = None, - resume: bool = False, - summary: bool = True) -``` - -#### Usage: - -```py -import dvclive - -dvclive.init() -``` - -## Description - -It's usage is optional and focused on configuring the behavior of subsequent -calls to `dvclive.log()` and `dvclive.next_step()`. - -⚠️ If `path` already exists when this functions is called, a cleanup will remove -all existing DVCLive related files. - -## Parameters - -- `path` (`dvclive` by default) - Configure where to save _metrics logs_ and - _metrics summary_ (generated by `dvclive.log()` and `dvclive.next_step()`, - respectively). - -- `resume` - (`False` by default) - if `True`, DVCLive will try to read the - previous `step` from the `path` directory and start from that point. - -- `summary` (`True` by default) - if `True`, upon each `dvclive.next_step()` - call, DVCLive will generate a _metrics summary_ (usable by `dvc metrics`). The - _summary_ will be located at `{path}.json`. diff --git a/content/docs/dvclive/api-reference/live/get_step.md b/content/docs/dvclive/api-reference/live/get_step.md new file mode 100644 index 0000000000..ab3c5b2f62 --- /dev/null +++ b/content/docs/dvclive/api-reference/live/get_step.md @@ -0,0 +1,26 @@ +# Live.get_step() + +Returns the current `step` value. + +```py +def get_step() -> int: +``` + +#### Usage: + +```py +from dvclive import Live + +live = Live() + +while live.get_step() < 3: + live.log("metric", 0.9) + live.next_step() +``` + +## Description + +DVCLive uses `step` to track the progress of each metric logged with +`Live.log()`. + +The `step` value can be updated with `Live.next_step()` or `Live.set_step()`. diff --git a/content/docs/dvclive/api-reference/live/index.md b/content/docs/dvclive/api-reference/live/index.md new file mode 100644 index 0000000000..1a29a98c7e --- /dev/null +++ b/content/docs/dvclive/api-reference/live/index.md @@ -0,0 +1,68 @@ +# Live() + +Initializes a DVCLive logger. + +```py +class Live: + + def __init__( + self, + path: Optional[str] = None, + resume: bool = False, + summary: bool = True, + ): +``` + +#### Usage: + +```py +from dvclive import Live + +live = Live() +``` + +## Description + +Its parameters are focused on configuring the behavior of subsequent calls to +[`Live()` methods](#methods). + +⚠️ If `path` already exists, `Live()` will remove all existing DVCLive related +files. + +## Attributes + +- `dir` - Location of the + [_metrics logs_](/doc/dvclive/get-started#metrics-logs) directory. +- `summary_path` - Location of the + [_metrics summary_](/doc/dvclive/get-started#metrics-summary). +- `html_path` - Location of the + [_html report_](/doc/dvclive/dvclive-with-dvc#html-report). + +## Parameters + +- `path` - Configure where to save _metrics logs_ and _metrics summary_ + (generated by `Live.log()`). _Default_: `None`. + + If `None` and DVC is enabled (see + [DVCLive with DVC](/docs/dvclive/dvclive-with-dvc)), the `path` set by DVC + will be used. If `None` and DVC is **not** enabled, `"dvclive"` will be used. + +- `resume` - If `True`, DVCLive will try to read the previous `step` from the + `path` directory and start from that point. _Default_: `False`. + +- `summary` - If `True`, upon each `Live.log()` call, DVCLive will generate a + _metrics summary_ (usable by `dvc metrics`). The _summary_ will be located at + `{path}.json`. _Default_: `True`. + +## Exceptions + +- `dvclive.error.ConfigMismatchError` - thrown if the provided `path` does not + match with the one set in DVC (see + [DVCLive with DVC](/docs/dvclive/dvclive-with-dvc)) + +## Methods + +- `Live.log()` +- `Live.get_step()` +- `Live.next_step()` +- `Live.set_step()` diff --git a/content/docs/dvclive/api-reference/live/log.md b/content/docs/dvclive/api-reference/live/log.md new file mode 100644 index 0000000000..9e94e66e10 --- /dev/null +++ b/content/docs/dvclive/api-reference/live/log.md @@ -0,0 +1,59 @@ +# Live.log() + +Generates [_metrics logs_](/doc/dvclive/get-started#metrics-logs) (usable by +`dvc plots`) by saving the given `name`: `val` pair to a `.tsv` file. + +```py + def log(name: str, val: float, step: int = None): +``` + +#### Usage: + +```py +from dvclive import Live + +live = Live() + +live.log("loss", 0.9) +``` + +## Description + +The first call to `live.log(name, val)` will create a new file in +`{path}/{name}.tsv` including the header and first row. + +For example `live.log("loss", 0.9)` will create `{path}/loss.tsv`: + +``` +timestamp step loss +1623671484747 0 0.9 +``` + +Each subsequent call to `live.log(name, val)` will add a new row to +`{path}/{name}.tsv`. + +The created file `{path}/{name}.tsv` is usable by `dvc plots`. + +💡 If `name` contains slashes (i.e. `train/loss`), the required subfolders will +be created and the file will be saved inside the last subfolder (i.e. +`{path}/train/loss.tsv`). + +If `summary` is True, `Live.log()` DVCLive will update the +[_metrics summary_](/doc/dvclive/get-started#metrics-summary) with the latest +value logged. + +The updated summary `{path}.json` is usable by `dvc metrics`. + +## Parameters + +- `name` - The _metrics logs_ will be saved in `{path}/{name}.tsv`. + +- `val` - The value to be added in the `name` column of a new row. + +## Exceptions + +- `dvclive.error.InvalidMetricTypeError` - thrown if the provided `val` does not + have a supported type. + +- `dvclive.error.DataAlreadyLoggedError` - thrown if the provided `name` has + already been logged whithin the same `step`. diff --git a/content/docs/dvclive/api-reference/live/next_step.md b/content/docs/dvclive/api-reference/live/next_step.md new file mode 100644 index 0000000000..35f4ff9337 --- /dev/null +++ b/content/docs/dvclive/api-reference/live/next_step.md @@ -0,0 +1,42 @@ +# Live.next_step() + +Signals that the current step has ended and increases `step` value by 1 (one). + +```py +def next_step() +``` + +#### Usage: + +```py +from dvclive import Live + +live = Live() + +for step in range(3): + live.log("metric", 0.9) + live.next_step() +``` + +## Description + +DVCLive uses the `step` value to track the progress of each metric logged with +`Live.log()`. You can use `Live.next_step()` to increase the `step` by 1 (one). + +Each metric logged in between `Live.next_step()` (or `Live.set_step()`) calls +will be associated to the updated `step` value. + +### DVC integration + +When `dvclive` is used alongside `DVC`, each `Live.next_step()` call will have +additional effects. + +By default, on each `Live.next_step()` call, `DVC` will prepare an +[HTML report](/doc/dvclive/dvclive-with-dvc#html-report) with all the _metrics +logs_ logged in `path`. + +In addition, when +[checkpoints](/doc/user-guide/experiment-management/checkpoints) are enabled in +the pipeline, `DVC` will +[create a new checkpoint](/doc/dvclive/dvclive-with-dvc#checkpoints) on each +`Live.next_step()` call. diff --git a/content/docs/dvclive/api-reference/live/set_step.md b/content/docs/dvclive/api-reference/live/set_step.md new file mode 100644 index 0000000000..5fbf439703 --- /dev/null +++ b/content/docs/dvclive/api-reference/live/set_step.md @@ -0,0 +1,57 @@ +# dvclive.set_step() + +Signals that the current step has ended and sets `step` to the given value. + +```py +def set_step(step: int): +``` + +#### Usage: + +```py +from dvclive import Live + +live = Live() + +for step in [0, 10, 20]: + live.set_step(step) + live.log("metric_1", 0.9) + live.log("metric_2", 0.7) +``` + +## Description + +DVCLive uses the `step` value to track the progress of each metric logged with +`Live.log()`. You can use `Live.set_step()` to set `step` to any value. + +Each metric logged in between `Live.set_step()` (or `Live.next_step()`) calls +will be associated to the provided `step` value. + +### DVC integration + +When `dvclive` is used alongside `DVC`, each `Live.set_step()` call will have +additional effects. + +By default, on each `Live.set_step()` call, `DVC` will prepare an +[HTML report](/doc/dvclive/dvclive-with-dvc#html-report) with all the _metrics +logs_ logged in `path`. + +In addition, when +[checkpoints](/doc/user-guide/experiment-management/checkpoints) are enabled in +the pipeline, `DVC` will +[create a new checkpoint](/doc/dvclive/dvclive-with-dvc#checkpoints) on each +`Live.set_step()` call. + +## Example + +Given the [Usage](#usage) code snippet above, the +[metrics logs](/doc/dvclive/get-started#metrics-logs) generated for `metric_1` +would be: + +```dvc +$ cat dvclive/metric_1.tsv +timestamp step metric_1 +1614129197192 0 0.9 +1614129198031 10 0.9 +1614129198848 20 0.9 +``` diff --git a/content/docs/dvclive/api-reference/log.md b/content/docs/dvclive/api-reference/log.md deleted file mode 100644 index 61fead2e26..0000000000 --- a/content/docs/dvclive/api-reference/log.md +++ /dev/null @@ -1,55 +0,0 @@ -# dvclive.log() - -Generates _metrics logs_ (usable by `dvc plots`) by saving the given `name`: -`val` pair to a `.tsv` file. - -```py - def log(name: str, val: float, step: int = None): -``` - -#### Usage: - -```py -import dvclive - -dvclive.log("loss", 0.9) -``` - -## Description - -The first call to `dvclive.log(name, val)` will create a new file in -`{path}/{name}.tsv` including the header and first row. - -For example `dvclive.log("loss", 0.9)` will create `{path}/loss.tsv`: - -``` -timestamp step loss -1623671484747 0 0.9 -``` - -Each subsequent call to `dvclive.log(name, val)` will add a new row to -`{path}/{name}.tsv`. - -The created file `{path}/{name}.tsv` is usable by `dvc plots`. - -💡 If `name` contains slashes (i.e. `train/loss`), the required subfolders will -be created and the file will be saved inside the last subfolder (i.e. -`{path}/train/loss.tsv`). - -💡 If you call `dvclive.log()` without calling `dvclive.init()` first, `dvclive` -will automatically initialize itself using either default values or environment -variables (when used alongside `DVC`). - -## Parameters - -- `name` - The _metrics logs_ will be saved in `{path}/{name}.tsv`. - -- `val` - The value to be added in the `name` column of a new row. - -- `step` (`None` by default) - The value to be added in the `step` column of a - new row. If `None`, the value of `dvclive.get_step()` will be used. - -## Exceptions - -- `dvclive.error.InvalidMetricTypeError` - thrown if the provided `val` does not - have a supported type diff --git a/content/docs/dvclive/api-reference/next_step.md b/content/docs/dvclive/api-reference/next_step.md deleted file mode 100644 index 19f5f848e2..0000000000 --- a/content/docs/dvclive/api-reference/next_step.md +++ /dev/null @@ -1,58 +0,0 @@ -# dvclive.next_step() - -Signals that the current step has ended. Check the -[init parameters](/doc/dvclive/api-reference/init#parameters) for configuring -the behavior. - -```py -def next_step() -``` - -#### Usage: - -```py -import dvclive - -for step in range(3): - dvclive.log("metric", 0.9) - dvclive.next_step() -``` - -## Description - -Each call to `dvclive.next_step()` will behave depending on the parameters -selected in `dvclive.init()` and whether `DVC` is available or not. - -If `summary` is True, on each `dvclive.next_step()` call, DVCLive will generate -a summary of the values previously logged with `dvclive.log()`, and increase the -[`step`](/doc/dvclive/api-reference/get_step) count. - -The _metrics summary_ will be saved to `{path}.json`. Here's an example: - -```json -{ - "step": 2, - "metric": 0.9 -} -``` - -> 💡 These JSON files can be visualized with `dvc metrics`. - -### DVC Integration - -When `dvclive` is used alongside `DVC`, each `dvclive.next_step()` call will -have additional features. - -By default, on each `dvclive.next_step()` call, `DVC` will prepare an -[HTML report](/doc/dvclive/user-guide/dvclive-with-dvc#html-report) with all the -_metrics logs_ logged in `path`. - -When [checkpoints](/doc/user-guide/experiment-management/checkpoints) are -enabled in the pipeline, `DVC` will -[create a new checkpoint](/doc/dvclive/user-guide/dvclive-with-dvc#checkpoints) -on each `dvclive.next_step()` call. - -## Exceptions - -- `dvclive.error.InitializationError` - If `dvclive` has not been properly - initialized (i.e. by calling `dvclive.init()` or `dvclive.log()`). diff --git a/content/docs/dvclive/dvclive-with-dvc.md b/content/docs/dvclive/dvclive-with-dvc.md index 29fbc3f5c6..5d71d39836 100644 --- a/content/docs/dvclive/dvclive-with-dvc.md +++ b/content/docs/dvclive/dvclive-with-dvc.md @@ -19,16 +19,18 @@ We will refer to a training script (`train.py`) already using `dvclive`: ```python # train.py -import dvclive +from dvclive import Live + +live = Live() for epoch in range(NUM_EPOCHS): train_model(...) metrics = evaluate_model(...) for metric_name, value in metrics.items(): - dvclive.log(metric_name, value) + live.log(metric_name, value) - dvclive.next_step() + live.next_step() ``` Let's use `dvc stage add` to create a stage to wrap this code (don't forget to @@ -39,8 +41,8 @@ $ dvc stage add -n train --live training_metrics -d train.py python train.py ``` -`dvc.yaml` will contain a new `train` stage with the [`DVCLive configuration`] -(in the `live` field): +`dvc.yaml` will contain a new `train` stage with the DVCLive configuration (in +the `live` field): ```yaml stages: @@ -60,18 +62,14 @@ for DVCLive to write logs in, and DVC will now command options for the DVC integration: - `--live-no-cache ` - specify a DVCLive log directory `path` but don't - tracked it with DVC. Useful if you prefer to track it with Git. + track it with DVC. Useful if you prefer to track it with Git. - `--live-no-summary` - deactivates [summary](/doc/dvclive/get-started#metrics-summary) generation. - `--live-no-html` - deactivates [HTML report](#html-report) generation. -> Note that these are convenience CLI options. You can still use -> `dvclive.init()` manually, which will override any options sent to -> `dvc stage add`. Just be careful to match the `--live` value (CLI) and `path` -> argument (code). Also, note that summary files are never tracked by DVC -> automatically. +> Note that summary files are never tracked by DVC -Run the training with `dvc repro`: +Run the training with `dvc repro` or `dvc exp run`: ```dvc $ dvc repro train @@ -122,8 +120,4 @@ This will save the metrics, plots, models, etc. associated to each [`step`](/doc/dvclive/api-reference/get_step). You can learn more about how to use them in the -[Checkpoints User Guide](/docs/user-guide/experiment-management/checkpoints) and -in this example -[repository](https://github.com/iterative/dvc-checkpoints-mnist). - -[`dvclive configuration`]: /doc/dvclive/api-reference/init#parameters +[Checkpoints User Guide](/docs/user-guide/experiment-management/checkpoints). diff --git a/content/docs/dvclive/get-started.md b/content/docs/dvclive/get-started.md index c4de08d0d6..7452286dcb 100644 --- a/content/docs/dvclive/get-started.md +++ b/content/docs/dvclive/get-started.md @@ -1,7 +1,6 @@ # Get Started -DVCLive is a simple Python library whose interface consists of three main -methods. +DVCLive is a simple Python library whose interface consists of three main steps. ## Steps @@ -10,28 +9,28 @@ To get it up and running you just need to follow these steps: ### 1. Initialize DVCLive ```python -import dvclive +from dvclive import Live -dvclive.init() +live = Live() ``` -See `dvclive.init()` for details. +See [`Live()`](/doc/dvclive/api-reference/live) for details. ### 2. Log metrics ```python -dvclive.log(metric_name, value) +live.log(metric_name, value) ``` -See `dvclive.log()` for details. +See `Live.log()` for details. ### 3. Increase the step number ```python -dvclive.next_step() +live.next_step() ``` -See `dvclive.next_step()` for details. +See `Live.next_step()` for details. ## Putting all together @@ -40,18 +39,18 @@ Using the above steps, you can easily include DVCLive in your training code: ```python # train.py -import dvclive +from dvclive import Live -dvclive.init() +live = Live() for epoch in range(NUM_EPOCHS): train_model(...) metrics = evaluate_model(...) for metric_name, value in metrics.items(): - dvclive.log(metric_name, value) + live.log(metric_name, value) - dvclive.next_step() + live.next_step() ``` ## Outputs @@ -80,7 +79,7 @@ timestamp step {metric_name} ### Metrics Summary -In addition, when [`summary`](/doc/dvclive/api-reference/init#parameters) is +In addition, when [`summary`](/doc/dvclive/api-reference/live/#parameters) is enabled (True by default), DVCLive generates a metrics _summary_ with the latest metrics: diff --git a/content/docs/dvclive/ml-frameworks/catalyst.md b/content/docs/dvclive/ml-frameworks/catalyst.md index 7382bb65ab..b5cd95a0d7 100644 --- a/content/docs/dvclive/ml-frameworks/catalyst.md +++ b/content/docs/dvclive/ml-frameworks/catalyst.md @@ -16,7 +16,8 @@ To start using DVCLive you just need to add a few lines to your training code in You just need to add the [`DvcLiveCallback`](https://github.com/iterative/dvclive/blob/master/dvclive/catalyst.py) -to the callbacks list passed to your `runner`: +to the callbacks list passed to your +[`Runner`](https://catalyst-team.github.io/catalyst/core/runner.html): ```git +from dvclive.catalyst import DvcLiveCallback @@ -42,10 +43,15 @@ This will generate the metrics logs and summaries as described in the ## Parameters -- `model_file` - The name of the file where the model will be saved at the end - of each `step`. +- `model_file` - (`None` by default) - The name of the file where the model will + be saved at the end of each `step`. -Example: +- `**kwargs` - Any additional arguments will be passed to + [`Live`](/docs/dvclive/api-reference/live). + +## Examples + +- Using `model_file`. ```python from dvclive.catalyst import DvcLiveCallback @@ -58,3 +64,18 @@ runner.train( num_epochs=2, callbacks=[DvcLiveCallback("model.pth")]) ``` + +- Using `**kwargs` to customize [`Live`](/docs/dvclive/api-reference/live). + +```python +from dvclive.catalyst import DvcLiveCallback + +runner.train( + model=model, + criterion=criterion, + optimizer=optimizer, + loaders=loaders, + num_epochs=2, + callbacks=[ + DvcLiveCallback(path="custom_path", summary=False)]) +``` diff --git a/content/docs/dvclive/ml-frameworks/fastai.md b/content/docs/dvclive/ml-frameworks/fastai.md index f8dcc5687b..c8c9c3f748 100644 --- a/content/docs/dvclive/ml-frameworks/fastai.md +++ b/content/docs/dvclive/ml-frameworks/fastai.md @@ -18,7 +18,8 @@ To start using DVCLive you just need to add a few lines to your training code in You just need to add the [`DvcLiveCallback`](https://github.com/iterative/dvclive/blob/master/dvclive/fastai.py) -to the callbacks list passed to your `learner`: +to the callbacks list passed to your +[`Learner`](https://docs.fast.ai/learner.html#Learner): ```git +from dvclive.fastai import DvcLiveCallback @@ -41,10 +42,15 @@ This will generate the metrics logs and summaries as described in the ## Parameters -- `model_file` - The name of the file where the model will be saved at the end - of each `step`. +- `model_file` - (`None` by default) - The name of the file where the model will + be saved at the end of each `step`. -Example: +- `**kwargs` - Any additional arguments will be passed to + [`Live`](/docs/dvclive/api-reference/live). + +## Examples + +- Using `model_file`. ```python from dvclive.fastai import DvcLiveCallback @@ -54,3 +60,14 @@ learn.fit_one_cycle( n_epoch=2, cbs=[DvcLiveCallback(model_file='model.pth')]) ``` + +- Using `**kwargs` to customize [`Live`](/docs/dvclive/api-reference/live). + +```python +from dvclive.fastai import DvcLiveCallback + +learn = tabular_learner(data_loader, metrics=accuracy) +learn.fit_one_cycle( + n_epoch=2, + cbs=[DvcLiveCallback(path='custom_path', summary=False)]) +``` diff --git a/content/docs/dvclive/ml-frameworks/huggingface.md b/content/docs/dvclive/ml-frameworks/huggingface.md index 01594bfd88..7bde8fd106 100644 --- a/content/docs/dvclive/ml-frameworks/huggingface.md +++ b/content/docs/dvclive/ml-frameworks/huggingface.md @@ -15,7 +15,8 @@ To start using DVCLive you just need to add a few lines to your training code in You just need to add the [`DvcLiveCallback`](https://github.com/iterative/dvclive/blob/master/dvclive/huggingface.py) -to the callbacks list passed to your `trainer`: +to the callbacks list passed to your +[`Trainer`](https://huggingface.co/transformers/main_classes/trainer.html): ```git +from dvclive.huggingface import DvcLiveCallback @@ -43,10 +44,33 @@ This will generate the metrics logs and summaries as described in the ## Parameters -- `model_file` - The name of the folder where the model will be saved at the end - of each `step`. +- `model_file` - (`None` by default) - The name of the file where the model will + be saved at the end of each `step`. -Example: +- `**kwargs` - Any additional arguments will be passed to + [`Live`](/docs/dvclive/api-reference/live). + +## Examples + +- Using `model_file`. + +```python +from dvclive.huggingface import DvcLiveCallback + +trainer = Trainer( + model, + args, + train_dataset=train_data, + eval_dataset=eval_data, + tokenizer=tokenizer, + compute_metrics=compute_metrics, +) +trainer.add_callback( + DvcLiveCallback(model_file='my_model_path')) +trainer.train() +``` + +- Using `**kwargs` to customize [`Live`](/docs/dvclive/api-reference/live). ```python from dvclive.huggingface import DvcLiveCallback @@ -59,6 +83,7 @@ trainer = Trainer( tokenizer=tokenizer, compute_metrics=compute_metrics, ) -trainer.add_callback(DvcLiveCallback(model_file='my_model_path')) +trainer.add_callback( + DvcLiveCallback(path='custom_path', summary=False)) trainer.train() ``` diff --git a/content/docs/dvclive/ml-frameworks/index.md b/content/docs/dvclive/ml-frameworks/index.md index fc99c7bde9..59a137420d 100644 --- a/content/docs/dvclive/ml-frameworks/index.md +++ b/content/docs/dvclive/ml-frameworks/index.md @@ -12,6 +12,7 @@ We currently support the following _ML Frameworks_: - [LightGBM](/docs/dvclive/ml-frameworks/lightgbm) - [MMCV](/docs/dvclive/ml-frameworks/mmcv) - [PyTorch](/docs/dvclive/ml-frameworks/pytorch) +- [PyTorch Lightning](/docs/dvclive/ml-frameworks/pytorch-lightning) - [TensorFlow](/docs/dvclive/ml-frameworks/tensorflow) - [XGBoost](/docs/dvclive/ml-frameworks/xgboost) diff --git a/content/docs/dvclive/ml-frameworks/keras.md b/content/docs/dvclive/ml-frameworks/keras.md index f163bf6ff9..c3f722c4de 100644 --- a/content/docs/dvclive/ml-frameworks/keras.md +++ b/content/docs/dvclive/ml-frameworks/keras.md @@ -16,7 +16,8 @@ To start using DVCLive you just need to add a few lines to your training code in You just need to add the [`DvcLiveCallback`](https://github.com/iterative/dvclive/blob/master/dvclive/keras.py) -to the callbacks list passed to your `model`: +to the callbacks list passed to your +[`Model`](https://keras.io/api/models/model/): ```git +from dvclive.keras import DvcLiveCallback @@ -40,14 +41,19 @@ This will generate the metrics logs and summaries as described in the ## Parameters -- `model_file` - The name of the file where the model will be saved at the end - of each `step`. +- `model_file` - (`None` by default) - The name of the file where the model will + be saved at the end of each `step`. - `save_weights_only` (`False` by default) - if True, then only the model's weights will be saved (`model.save_weights(model_file)`), else the full model is saved (`model.save(model_file)`) -Example: +- `**kwargs` - Any additional arguments will be passed to + [`Live`](/docs/dvclive/api-reference/live). + +## Examples + +- Using `model_file` and `save_weights_only`. ```python from dvclive.keras import DvcLiveCallback @@ -61,9 +67,16 @@ model.fit( save_weights_only=True)]) ``` -## Example repository +- Using `**kwargs` to customize [`Live`](/docs/dvclive/api-reference/live). -You can find a fully working example using the DVCLive and Keras in the -following link: +```python +from dvclive.keras import DvcLiveCallback -https://github.com/iterative/example-ml-frameworks/tree/keras +model.fit( + train_dataset, + epochs=num_epochs, + validation_data=validation_dataset, + callbacks=[DvcLiveCallback( + path="custom_path", + summary=False)]) +``` diff --git a/content/docs/dvclive/ml-frameworks/lightgbm.md b/content/docs/dvclive/ml-frameworks/lightgbm.md index 0b2b372b1b..3ae05c1294 100644 --- a/content/docs/dvclive/ml-frameworks/lightgbm.md +++ b/content/docs/dvclive/ml-frameworks/lightgbm.md @@ -40,10 +40,15 @@ This will generate the metrics logs and summaries as described in the ## Parameters -- `model_file` - The name of the file where the model will be saved at the end - of each `step`. +- `model_file` - (`None` by default) - The name of the file where the model will + be saved at the end of each `step`. -Example: +- `**kwargs` - Any additional arguments will be passed to + [`Live`](/docs/dvclive/api-reference/live). + +## Examples + +- Using `model_file`. ```python lightgbm.train( @@ -53,3 +58,16 @@ lightgbm.train( num_round=5, callbacks=[DvcLiveCallback(model_file="lgbm_model.txt")]) ``` + +- Using `**kwargs` to customize [`Live`](/docs/dvclive/api-reference/live). + +```python +lightgbm.train( + param, + train_data, + valid_sets=[validation_data], + num_round=5, + callbacks=[DvcLiveCallback( + path="custom_path", + summary=False)]) +``` diff --git a/content/docs/dvclive/ml-frameworks/mmcv.md b/content/docs/dvclive/ml-frameworks/mmcv.md index 88e1d481b0..94ae63e60d 100644 --- a/content/docs/dvclive/ml-frameworks/mmcv.md +++ b/content/docs/dvclive/ml-frameworks/mmcv.md @@ -35,10 +35,17 @@ to generate metrics _logs_ and _summaries_ during training. ## Parameters -- `model_file` - The name of the file where the model will be saved at the end - of each `step`. +## Parameters + +- `model_file` - (`None` by default) - The name of the file where the model will + be saved at the end of each `step`. + +- `**kwargs` - Any additional arguments will be passed to + [`Live`](/docs/dvclive/api-reference/live). + +## Examples -Example: +- Using `model_file`. ```python log_config = dict( @@ -49,3 +56,18 @@ log_config = dict( ] ) ``` + +- Using `**kwargs` to customize [`Live`](/docs/dvclive/api-reference/live). + +```python +log_config = dict( + interval=100, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='DvcliveLoggerHook', + path="custom_path", + summary=False) + ] +) +``` diff --git a/content/docs/dvclive/ml-frameworks/pytorch-lightning.md b/content/docs/dvclive/ml-frameworks/pytorch-lightning.md new file mode 100644 index 0000000000..064f3c07ad --- /dev/null +++ b/content/docs/dvclive/ml-frameworks/pytorch-lightning.md @@ -0,0 +1,74 @@ +# PyTorch Lightning + +DVCLive allows you to easily add experiment tracking capabilities to your +PyTorch Lightning projects. + +## About PyTorch Lightning + +[PyTorch Lightning](https://www.pytorchlightning.ai/) is an open-source +framework for training PyTorch networks. + +## Usage + +To start using DVCLive you just need to add a few lines to your training code in +**any** PyTorch Lightning project. + +You just need to pass the +[`DvcLiveLogger`](https://github.com/iterative/dvclive/blob/master/dvclive/lightning.py) +to your +[`Trainer`](https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html): + +```git ++from dvclive.lightning import DvcLiveLogger + +. . . + dvclive_logger = DvcLiveLogger() + + trainer = Trainer( ++ logger=dvclive_logger, + ) + trainer.fit(model) +``` + +This will generate the metrics logs and summaries as described in the +[Get Started](/docs/dvclive/get-started#outputs). + +> 💡Without requiring additional modifications to your training code, you can +> use DVCLive alongside DVC. See +> [DVCLive with DVC](/doc/dvclive/dvclive-with-dvc) for more info. + +## Parameters + +- `run_name` - (`None` by default) - Name of the run, used in PyTorch Lightning + to get version. + +- `prefix` - (`None` by default) - string that adds to each metric name. + +- `experiment` - (`None` by default) - + [`Live`](/docs/dvclive/api-reference/live) object to be used instead of + initializing a new one. + +- `**kwargs` - Any additional arguments will be passed to + [`Live`](/docs/dvclive/api-reference/live). + +## Examples + +- Using `**kwargs` to customize [`Live`](/docs/dvclive/api-reference/live). + +```python +from dvclive.lightning import DvcLiveLogger + +dvclive_logger = DvcLiveLogger( + path='my_logs_path', + summary=False +) +trainer = Trainer( + logger=dvclive_logger, +) +trainer.fit(model) +``` + +> 📖 By default, PyTorch Lightning creates a directory to store checkpoints +> using the logger's name (`DvcLiveLogger`). You can change the checkpoint path +> or disable checkpointing at all as described in the +> [PyTorch Lightning documentation](https://pytorch-lightning.readthedocs.io/en/latest/common/weights_loading.html#automatic-saving) diff --git a/content/docs/dvclive/ml-frameworks/pytorch.md b/content/docs/dvclive/ml-frameworks/pytorch.md index d98fe51e5f..27dec530ed 100644 --- a/content/docs/dvclive/ml-frameworks/pytorch.md +++ b/content/docs/dvclive/ml-frameworks/pytorch.md @@ -16,24 +16,28 @@ features: To start using DVCLive you just need to add few modifications to your training code in **any** PyTorch project. -You need to add `dvclive.log()` calls to each place where you would like to log -metrics and one single `dvclive.next_step()` call to indicate that the epoch has +You need to add `Live.log()` calls to each place where you would like to log +metrics and one single `Live.next_step()` call to indicate that the epoch has ended. To ilustrate with some code, extracted from the [official PyTorch ImageNet example](https://github.com/pytorch/examples/blob/master/imagenet/main.py): ```git ++ from dvclive import Live + ++ live = Live() + for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, args) -+ dvclive.log("learning_rate", lr) ++ live.log("learning_rate", lr) train_acc1 = train( train_loader, model, criterion, optimizer, epoch, args) -+ dvclive.log("train/accuracy", train_acc1) ++ live.log("train/accuracy", train_acc1) val_acc1 = validate(val_loader, model, criterion, args) -+ dvclive.log("validation/accuracy", val_acc1) ++ live.log("validation/accuracy", val_acc1) is_best = val_acc1 > best_acc1 best_acc1 = max(val_acc1, best_acc1) @@ -46,7 +50,7 @@ for epoch in range(args.start_epoch, args.epochs): 'optimizer' : optimizer.state_dict(), }, is_best) -+ dvclive.next_step() ++ live.next_step() ``` This will generate the metrics logs and summaries as described in the diff --git a/content/docs/dvclive/ml-frameworks/tensorflow.md b/content/docs/dvclive/ml-frameworks/tensorflow.md index d1e0b695f1..58c6031050 100644 --- a/content/docs/dvclive/ml-frameworks/tensorflow.md +++ b/content/docs/dvclive/ml-frameworks/tensorflow.md @@ -19,14 +19,18 @@ To start using DVCLive you just need to add a few lines to your training code in > 💡 If you prefer the Keras API, check the > [DVCLive - Keras](/docs/dvclive/user-guide/ml-frameworks/keras) page. -You need to add `dvclive.log()` calls to each place where you would like to log -metrics and one single `dvclive.next_step()` call to indicate that the epoch has +You need to add `Live.log()` calls to each place where you would like to log +metrics and one single `Live.next_step()` call to indicate that the epoch has ended. To ilustrate with some code, extracted from the [official TensorFlow guide](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch): ```git ++ from dvclive import Live + ++ live = Live() + for epoch in range(epochs): start_time = time.time() for step, (x_batch_train, y_batch_train) in enumerate(train_dataset): @@ -37,16 +41,16 @@ for epoch in range(epochs): optimizer.apply_gradients(zip(grads, model.trainable_weights)) train_acc_metric.update_state(y_batch_train, logits) -+ dvclive.log("train/accuracy", float(train_acc_metric.result()) ++ live.log("train/accuracy", float(train_acc_metric.result()) train_acc_metric.reset_states() for x_batch_val, y_batch_val in val_dataset: val_logits = model(x_batch_val, training=False) val_acc_metric.update_state(y_batch_val, val_logits) -+ dvclive.log("val/accuracy", float(val_acc_metric.result()) ++ live.log("val/accuracy", float(val_acc_metric.result()) val_acc_metric.reset_states() -+ dvclive.next_step() ++ live.next_step() ``` This will generate the metrics logs and summaries as described in the diff --git a/content/docs/dvclive/ml-frameworks/xgboost.md b/content/docs/dvclive/ml-frameworks/xgboost.md index d8dbaf01c3..c2c205961e 100644 --- a/content/docs/dvclive/ml-frameworks/xgboost.md +++ b/content/docs/dvclive/ml-frameworks/xgboost.md @@ -41,16 +41,25 @@ This will generate the metrics logs and summaries as described in the ## Parameters -- `model_file` - The name of the file where the model will be saved at the end - of each `step`. +- `model_file` - (`None` by default) - The name of the file where the model will + be saved at the end of each `step`. -Example: +- `**kwargs` - Any additional arguments will be passed to + [`Live`](/docs/dvclive/api-reference/live). + +## Examples + +- Using `**kwargs` to customize [`Live`](/docs/dvclive/api-reference/live). ```python xgboost.train( param, dtrain, num_round=5, - callbacks=[DvcLiveCallback("eval_data", model_file="model.json")], + callbacks=[ + DvcLiveCallback( + "eval_data", + path="custom_path", + summary=False)], evals=[(dval, "eval_data")]) ``` diff --git a/content/docs/sidebar.json b/content/docs/sidebar.json index 1a79209f4f..f63d316db3 100644 --- a/content/docs/sidebar.json +++ b/content/docs/sidebar.json @@ -61,6 +61,7 @@ }, { "slug": "experiments", + "label": "Experiments", "tutorials": { "katacoda": "https://katacoda.com/dvc/courses/get-started/experiments" } @@ -245,7 +246,7 @@ "slug": "doctor" }, { - "label": "experiments", + "label": "exp", "slug": "exp", "source": "exp/index.md", "children": [ @@ -629,6 +630,10 @@ "slug": "pytorch", "label": "PyTorch" }, + { + "slug": "pytorch-lightning", + "label": "PyTorch Lightning" + }, { "slug": "tensorflow", "label": "TensorFlow" @@ -645,20 +650,27 @@ "source": "api-reference/index.md", "children": [ { - "slug": "get_step", - "label": "get_step()" - }, - { - "slug": "init", - "label": "init()" - }, - { - "slug": "log", - "label": "log()" - }, - { - "slug": "next_step", - "label": "next_step()" + "slug": "live", + "label": "Live()", + "source": "api-reference/live/index.md", + "children": [ + { + "slug": "log", + "label": "log()" + }, + { + "slug": "get_step", + "label": "get_step()" + }, + { + "slug": "next_step", + "label": "next_step()" + }, + { + "slug": "set_step", + "label": "set_step()" + } + ] } ] } diff --git a/content/docs/start/experiments.md b/content/docs/start/experiments.md index 09bd680eee..e84dadacda 100644 --- a/content/docs/start/experiments.md +++ b/content/docs/start/experiments.md @@ -2,293 +2,240 @@ title: 'Get Started: Experiments' --- -# Get Started: Experiments +# Get Started with Experiments -_New in DVC 2.0_ +In machine learning projects, the number of experiments grows +rapidly. DVC can track these experiments, list and compare their most relevant +parameters and metrics, navigate among them, and commit only the ones that we +need to Git. -Experiments proliferate quickly in ML projects where there are many -parameters to tune or other permutations of the code. We can organize such -projects and keep only what we ultimately need with `dvc experiments`. DVC can -track experiments for you so there's no need to commit each one to Git. This way -your repo doesn't become polluted with all of them. You can discard experiments -once they're no longer needed. - -> 📖 See [Experiment Management](/doc/user-guide/experiment-management) for more -> information on DVC's approach. +> ⚠️This video is out-of-date and will be updated soon! Where there are +> discrepancies between docs and video, please follow the docs. https://youtu.be/FHQq_zZz5ms -## Running experiments +In this section, we explore the basic features of DVC experiment management with +the [`example-dvc-experiments`][ede] project. -Previously, we learned how to tune [ML pipelines](/doc/start/data-pipelines) and -[compare the changes](/doc/start/metrics-parameters-plots). Let's further -increase the number of features in the `featurize` stage to see how it compares. +[ede]: https://github.com/iterative/example-dvc-experiments -`dvc exp run` makes it easy to change hyperparameters and run a new -experiment: +
-```dvc -$ dvc exp run --set-param featurize.max_features=3000 -``` +### ⚙️ Installing the example project -
+These commands are run in the [`example-dvc-experiments`][ede] project. You can +run the commands in this document after cloning the repository, installing the +requirements, and pulling the data. -### 💡 Expand to see what happens under the hood. +#### Clone the project and create virtual environment -`dvc exp run` is similar to `dvc repro` but with some added conveniences for -running experiments. The `--set-param` (or `-S`) flag sets the values for -parameters as a shortcut for editing `params.yaml`. +Please clone the project and create a virtual environment. -Check that the `featurize.max_features` value has been updated in `params.yaml`: +> We strongly recommend to create a virtual environment to keep the libraries we +> use isolated from the rest of your system. This prevents version conflicts. -```git - featurize: -- max_features: 1500 -+ max_features: 3000 +```dvc +$ git clone https://github.com/iterative/example-dvc-experiments -b get-started +$ cd example-dvc-experiments +$ virtualenv .venv +$ . .venv/bin/activate +$ python -m pip install -r requirements.txt ``` -Any edits to dependencies (parameters or source code) will be -reflected in the experiment run. +#### Get the data set -
- -`dvc exp diff` compares experiments: +The repository we cloned doesn't contain the dataset. Instead of storing the +data in the Git repository, we use DVC to retrieve from a shared data store. In +this case, we use `dvc pull` to update the missing data files. ```dvc -$ dvc exp diff -Path Metric Value Change -scores.json avg_prec 0.56191 0.009322 -scores.json roc_auc 0.93345 0.018087 - -Path Param Value Change -params.yaml featurize.max_features 3000 1500 +$ dvc pull ``` -## Queueing experiments +The repository already contains the necessary configuration to run the +experiments. -So far, we have been tuning the `featurize` stage, but there are also parameters -for the `train` stage (which trains a -[random forest classifier](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html)). - -These are the `train` parameters from `params.yaml`: - -```yaml -train: - seed: 20170428 - n_est: 50 - min_split: 2 -``` +
-Let's set up experiments with different hyperparameters. We can use the -`--queue` flag to define all the combinations we want to try without executing -anything (yet): +Running the experiment with the default project settings requires only the +command: ```dvc -$ dvc exp run --queue -S train.min_split=8 -Queued experiment 'd3f6d1e' for future execution. -$ dvc exp run --queue -S train.min_split=64 -Queued experiment 'f1810e0' for future execution. -$ dvc exp run --queue -S train.min_split=2 -S train.n_est=100 -Queued experiment '7323ea2' for future execution. -$ dvc exp run --queue -S train.min_split=8 -S train.n_est=100 -Queued experiment 'c605382' for future execution. -$ dvc exp run --queue -S train.min_split=64 -S train.n_est=100 -Queued experiment '0cdee86' for future execution. +$ dvc exp run +... +Reproduced experiment(s): exp-b28f0 +Experiment results have been applied to your workspace. +... ``` -Next, run all (`--run-all`) queued experiments in parallel (using `--jobs`): +It runs the specified command (`python train.py`) in `dvc.yaml`. That command +writes the metrics values to `metrics.json`. -```dvc -$ dvc exp run --run-all --jobs 2 -``` +This experiment is then associated with the values found in the parameters file +(`params.yaml`), and other dependencies (`data/images/`) with these produced +metrics. -## Comparing many experiments +The purpose of the `dvc exp` family of commands is to let you run, capture, and +compare the machine learning experiments at once as you iterate on your project. +The artifacts like models and metrics produced by each experiment are tracked by +DVC, and the associated parameters and metrics can be committed to Git as text +files. -To compare all of these experiments, we need more than `diff`. `dvc exp show` -compares any number of experiments in one table: +You can review the experiment results with `dvc exp show` and see these metrics +and results in a nicely formatted table: ```dvc -$ dvc exp show --no-timestamp \ - --include-params train.n_est,train.min_split +$ dvc exp show ``` ```dvctable -┏━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ -┃ neutral:**Experiment** ┃ metric:**avg_prec** ┃ metric:**roc_auc** ┃ param:**train.n_est**┃ param:**train.min_split** ┃ -┡━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ -│ **workspace** │ **0.56191** │ **0.93345** │ **50** │ **2** │ -│ **master** │ **0.55259** │ **0.91536** │ **50** │ **2** │ -│ ├── exp-bfe64 │ 0.57833 │ 0.95555 │ 50 │ 8 │ -│ ├── exp-b8082 │ 0.59806 │ 0.95287 │ 50 │ 64 │ -│ ├── exp-c7250 │ 0.58876 │ 0.94524 │ 100 │ 2 │ -│ ├── exp-b9cd4 │ 0.57953 │ 0.95732 │ 100 │ 8 │ -│ ├── exp-98a96 │ 0.60405 │ 0.9608 │ 100 │ 64 │ -│ └── exp-ad5b1 │ 0.56191 │ 0.93345 │ 50 │ 2 │ -└───────────────┴──────────┴─────────┴────────────┴─────────────────┘ +┏━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ +┃ white:**Experiment** ┃ white:**Created** ┃ yellow:**loss** ┃ yellow:**acc** ┃ blue:**train.epochs** ┃ blue:**model.conv_units** ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +│ workspace │ - │ 0.23282 │ 0.9152 │ 10 │ 16 │ +│ 7317bc6 │ Jul 18, 2021 │ - │ - │ 10 │ 16 │ +│ └── 1a1d858 [exp-6dccf] │ 03:21 PM │ 0.23282 │ 0.9152 │ 10 │ 16 │ +└─────────────────────────┴──────────────┴─────────┴────────┴──────────────┴──────────────────┘ ``` -Each experiment is given an arbitrary name by default (although we can specify -one with `dvc exp run -n`.) We can see that `exp-98a96` performed best among -both of our metrics, with 100 estimators and a minimum of 64 samples to split a -node. - -> See `dvc exp show --help` for more info on its options. - -## Persisting experiments - -Now that we know the best parameters, let's keep that experiment and ignore the -rest. - -`dvc exp apply` rolls back the workspace to the specified -experiment: - -```dvc -$ dvc exp apply exp-98a96 -Changes for experiment 'exp-98a96' have been applied to your workspace. -``` +The `workspace` row in the table shows the results of the most recent experiment +that's available in the workspace. The table also shows each +experiment in a separate row, along with the Git commit IDs they are attached +to. We can see that the experiment we run has a name `exp-6dccf` and was run +from the commit ID `7317bc6`.
-### 💡 Expand to see what happens under the hood. +### ℹ️ If you used `dvc repro` before -`dvc exp apply` is similar to `dvc checkout`, but works with experiments -instead. DVC tracks everything in the pipeline for each experiment (parameters, -metrics, dependencies, and outputs), retrieving things later as needed. +Earlier versions of DVC uses `dvc repro` to run the pipeline. If you already +have a DVC project, you may already be using `dvc repro`. -Check that `scores.json` reflects the metrics in the table above: +We use `dvc repro` to run the pipeline as found in the workspace. +All the parameters and dependencies are retrieved from the current workspace. It +doesn't use any specialized mechanism to track experiments. -```json -{ "avg_prec": 0.6040544652105823, "roc_auc": 0.9608017142900953 } -``` +When you have a large number of experiments that you don't want to commit all to +Git, it's better to use `dvc exp run`. It allows to change the parameters +quickly, can track the history of artifacts and has facilities to compare these +experiments easily.
-Once an experiment has been applied to the workspace, it is no different from -reproducing the result without `dvc exp run`. Let's make it persistent in our -regular pipeline by committing it in our Git branch: - -```dvc -$ git add dvc.lock params.yaml prc.json roc.json scores.json -$ git commit -a -m "Preserve best random forest experiment" -``` - -## Sharing experiments +Now let's do some more experimentation. -After committing the best experiments to our Git branch, we can -[store and share](/doc/start/data-and-model-versioning#storing-and-sharing) them -remotely like any other iteration of the pipeline. +DVC allows to update the parameters defined in the pipeline without modifying +the files manually. We use this feature to set the convolutional units in +`train.py`. ```dvc -dvc push -git push +$ dvc exp run --set-param model.conv_units=24 +... +Reproduced experiment(s): exp-7b56f +Experiment results have been applied to your workspace. +... ```
-### 💡 Important information on storing experiments remotely. - -The commands in this section require both a `dvc remote default` and a -[Git remote](https://git-scm.com/book/en/v2/Git-Basics-Working-with-Remotes). A -DVC remote stores the experiment data, and a Git remote stores the code, -parameters, and other metadata associated with the experiment. DVC supports -various types of remote storage (local file system, SSH, Amazon S3, Google Cloud -Storage, HTTP, HDFS, etc.). The Git remote is often a central Git server -(GitHub, GitLab, BitBucket, etc.). +### ⚙️ Run multiple experiments in parallel -
- -Experiments that have not been made persistent will not be stored or shared -remotely through `dvc push` or `git push`. +Instead of running the experiments one-by-one, we can define them to run in a +batch. This is especially handy when you have long running experiments. -`dvc exp push` enables storing and sharing any experiment remotely. +We add experiments to the queue using the `--queue` option of `dvc exp run`. We +also use `-S` (`--set-param`) to set a value for the parameter. ```dvc -$ dvc exp push gitremote exp-bfe64 -Pushed experiment 'exp-bfe64' to Git remote 'gitremote'. +$ dvc exp run --queue -S model.conv_units=32 +Queued experiment '3cac8c6' for future execution. +$ dvc exp run --queue -S model.conv_units=64 +Queued experiment '23660b6' for future execution. +$ dvc exp run --queue -S model.conv_units=128 +Queued experiment '6591a57' for future execution. +$ dvc exp run --queue -S model.conv_units=256 +Queued experiment '9109ea9' for future execution. ``` -`dvc exp list` shows all experiments that have been saved. +Next, run all (`--run-all`) queued experiments in parallel. You can specify the +number of parallel processes using `--jobs`: ```dvc -$ dvc exp list gitremote --all -72ed9cd: - exp-bfe64 +$ dvc exp run --run-all --jobs 2 ``` -`dvc exp pull` retrieves the experiment from a Git remote. + -```dvc -$ dvc exp pull gitremote exp-bfe64 -Pulled experiment 'exp-bfe64' from Git remote 'gitremote'. -``` +## Comparing and persisting experiments -> All these commands take a Git remote as an argument. A `dvc remote default` is -> also required to share the experiment data. +The experiments are run several times with different parameters. We use +`dvc exp show` to compare all of these experiments. -## Cleaning up +```dvc +$ dvc exp show +``` -Let's take another look at the experiments table: +```dvctable +┏━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ +┃ white:**Experiment** ┃ white:**Created** ┃ yellow:**loss** ┃ yellow:**acc** ┃ blue:**train.epochs** ┃ blue:**model.conv_units** ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +│ workspace │ - │ 0.23508 │ 0.9151 │ 10 │ 24 │ +│ 7317bc6 │ Jul 18, 2021 │ - │ - │ 10 │ 16 │ +│ ├── e2647ef [exp-ee8a4] │ 05:14 PM │ 0.23146 │ 0.9145 │ 10 │ 64 │ +│ ├── 15c9451 [exp-a9be6] │ 05:14 PM │ 0.25231 │ 0.9102 │ 10 │ 32 │ +│ ├── 9c32227 [exp-17dd9] │ 04:46 PM │ 0.23687 │ 0.9167 │ 10 │ 256 │ +│ ├── 8a9cb15 [exp-29d93] │ 04:46 PM │ 0.24459 │ 0.9134 │ 10 │ 128 │ +│ ├── dfc536f [exp-a1bd9] │ 03:35 PM │ 0.23508 │ 0.9151 │ 10 │ 24 │ +│ └── 1a1d858 [exp-6dccf] │ 03:21 PM │ 0.23282 │ 0.9152 │ 10 │ 16 │ +└─────────────────────────┴──────────────┴─────────┴────────┴──────────────┴──────────────────┘ +``` + +By default, it shows all the parameters and the metrics with the timestamp. If +you have a large number of parameters, metrics or experiments, this may lead to +a cluttered view. You can limit the table to specific metrics, or parameters, or +hide the timestamp column with `--include-metrics`, `--include-params`, or +`--no-timestamp` options of the command, respectively. ```dvc $ dvc exp show --no-timestamp \ - --include-params train.n_est,train.min_split + --include-params model.conv_units --include-metrics acc ``` ```dvctable -┏━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ -┃ neutral:**Experiment** ┃ metric:**avg_prec** ┃ metric:**roc_auc** ┃ param:**train.n_est**┃ param:**train.min_split** ┃ -┡━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ -│ **workspace** │ **0.60405** │ **0.9608** │ **100** │ **64** │ -│ **master** │ **0.60405** │ **0.9608** │ **100** │ **64** │ -└────────────┴──────────┴─────────┴────────────┴─────────────────┘ -``` - -Where did all the experiments go? By default, `dvc exp show` only shows -experiments since the last commit, but don't worry. The experiments remain -cached and can be shown or applied. For example, use `-n` to show -experiments from the previous _n_ commits: +┏━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ +┃ white:**Experiment** ┃ yellow:**acc** ┃ blue:**model.conv_units** ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +│ workspace │ 0.9151 │ 24 │ +│ 7317bc6 │ - │ 16 │ +│ ├── e2647ef [exp-ee8a4] │ 0.9145 │ 64 │ +│ ├── 15c9451 [exp-a9be6] │ 0.9102 │ 32 │ +│ ├── 9c32227 [exp-17dd9] │ 0.9167 │ 256 │ +│ ├── 8a9cb15 [exp-29d93] │ 0.9134 │ 128 │ +│ ├── dfc536f [exp-a1bd9] │ 0.9151 │ 24 │ +│ └── 1a1d858 [exp-6dccf] │ 0.9152 │ 16 │ +└─────────────────────────┴────────┴──────────────────┘ +``` + +After selecting an experiment from the table, you can create a Git branch that +contains the experiment with all its related files. ```dvc -$ dvc exp show -n 2 --no-timestamp \ - --include-params train.n_est,train.min_split -``` +$ dvc exp branch exp-05e87 "cnn-256" +Git branch 'cnn-256' has been created from experiment 'exp-05e87'. +To switch to the new branch run: -```dvctable -┏━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ -┃ neutral:**Experiment** ┃ metric:**avg_prec** ┃ metric:**roc_auc** ┃ param:**train.n_est**┃ param:**train.min_split** ┃ -┡━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ -│ **workspace** │ **0.60405** │ **0.9608** │ **100** │ **64** │ -│ **master** │ **0.60405** │ **0.9608** │ **100** │ **64** │ -│ **64d74b2** │ **0.55259** │ **0.91536** │ **50** │ **2** │ -│ ├── exp-bfe64 │ 0.57833 │ 0.95555 │ 50 │ 8 │ -│ ├── exp-b8082 │ 0.59806 │ 0.95287 │ 50 │ 64 │ -│ ├── exp-c7250 │ 0.58876 │ 0.94524 │ 100 │ 2 │ -│ ├── exp-98a96 │ 0.60405 │ 0.9608 │ 100 │ 64 │ -│ ├── exp-b9cd4 │ 0.57953 │ 0.95732 │ 100 │ 8 │ -│ └── exp-ad5b1 │ 0.56191 │ 0.93345 │ 50 │ 2 │ -└───────────────┴──────────┴─────────┴────────────┴─────────────────┘ + git checkout cnn-256 ``` -Eventually, old experiments may clutter the experiments table. - -`dvc exp gc` removes all references to old experiments: +You can then checkout and continue working from this branch, or merge the branch +into your `main` branch with the usual Git commands. -```dvc -$ dvc exp gc --workspace -$ dvc exp show -n 2 --no-timestamp \ - --include-params train.n_est,train.min_split -``` +## Go Further -```dvctable -┏━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ -┃ neutral:**Experiment** ┃ metric:**avg_prec** ┃ metric:**roc_auc** ┃ param:**train.n_est**┃ param:**train.min_split** ┃ -┡━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ -│ **workspace** │ **0.60405** │ **0.9608** │ **100** │ **64** │ -│ **master** │ **0.60405** │ **0.9608** │ **100** │ **64** │ -│ **64d74b2** │ **0.55259** │ **0.91536** │ **50** │ **2** │ -└────────────┴──────────┴─────────┴────────────┴─────────────────┘ -``` +There are many other features of `dvc exp`, like cleaning up the unused +experiments, sharing them without committing into Git or getting differences +between two experiments. -> `dvc exp gc` only removes references to the experiments; not the cached -> objects associated with them. To clean up the cache, use -> `dvc gc`. +Please see the section on +[Experiment Management](/doc/user-guide/experiment-management) in the User's +Guide or `dvc exp` and subcommands in the Command Reference. diff --git a/content/docs/studio/troubleshooting.md b/content/docs/studio/troubleshooting.md index 2ebb3698e5..a79b208c41 100644 --- a/content/docs/studio/troubleshooting.md +++ b/content/docs/studio/troubleshooting.md @@ -3,6 +3,17 @@ Here we provide help for some of the problems that you may encounter when using DVC Studio. +- [Error: No data found to visualize](#error-no-data-found-to-visualize) +- [Error: No DVC repo was found at the root](#error-no-dvc-repo-was-found-at-the-root) +- [Error: Non-DVC sub-directory of a monorepo](#error-non-dvc-sub-directory-of-a-monorepo) +- [Error: No commits were found for the sub-directory](#error-no-commits-were-found-for-the-sub-directory) +- [View got created, but does not contain any data](#view-got-created-but-does-not-contain-any-data) +- [View does not contain the columns that I want](#view-does-not-contain-the-columns-that-i-want) +- [View contains columns that I did not import](#view-contains-columns-that-i-did-not-import) +- [Error: Failed to push experiment to repository](#error-failed-to-push-experiment-to-repository) + +## Support + If you need further help, please send us a message using `Help` on the [project website](https://studio.iterative.ai). You can also create a support ticket on [GitHub](https://github.com/iterative/studio-support) or join the @@ -88,3 +99,71 @@ metrics or hyperparameters that you want to visualize. Refer to the [DVC documentation](https://dvc.org/doc) for help on making commits to a DVC repository. Instructions on how to specify custom files can be found [here](/doc/studio/user-guide/views/view-settings#configuring-view-settings). + +## View does not contain the columns that I want + +There are two possible reasons for this: + +1. **The required columns were not imported:** DVC Studio will import up to 200 + columns (metrics, hyperparameters and files) from your Git repository. If + your repository has more than 200 columns, you should + [select the columns that are mandatory to import](/doc/studio/user-guide/views/view-settings#mandatory-columns). + DVC Studio will also import the unselected columns, but only up to a maximum + of 200 columns. + + **What if there are more than 200 mandatory columns?** Currently DVC Studio + cannot import over 200 columns. If you have a large repository (with more + than 200 mandatory columns), one solution is to split the + metrics/hyperparameters/files that you want to display over + multiple subdirectories in your git repository. For each subdirectory, you + can create a new view that is limited to that subdirectory. + + To create views for subdirectories, + [specify the project directory in view settings](/doc/studio/user-guide/views/view-settings#project-directory). + + If this solution does not work for your use case, please create a support + ticket in the + [DVC Studio support GitHub repository](https://github.com/iterative/studio-support). + +2. **The required columns are hidden:** In the view, you can hide the columns + that you do not want to display. If any column that you want is not visible, + make sure you have not hidden it. The following video shows how you can + show/hide columns. Once you show/hide columns, remember to save the changes. + + ###### Show/hide columns + + ![Showing and hiding columns](https://static.iterative.ai/img/studio/show_hide_columns.gif) + +## View contains columns that I did not mark as mandatory to import + +This is not an error. Columns that you select as mandatory in view settings are +guaranteed to be imported. However, columns that are not selected can still be +imported and included in the view - if you have selected less than 200 columns, +DVC Studio will also import DVC Studio will also import some of the unselected +columns, up to a total of 200 columns. + +If you would like to explicitly hide columns, you can simply hide them in the +view. Once you show/hide columns, you can save the changes. Check out the +[above video](#showhide-columns) to see how you can show/hide columns. Once you +show/hide columns, remember to save the changes. + +## Error: Failed to push experiment to repository + +This is a non-specific error with a range of possible causes. To resolve it, +please check: + +- Your account is able to push to the repository. +- The repository is **not** marked as archived / read only. +- In case of GitHub/GitLab/BitBucket enterprise organizations: there is no IP + whitelisting policy in place which limits access to the organization's + resources. +- Whether [GitHub][gh-status], [GitLab][gl-status], or [BitBucket][bb-status] + are experiencing service disruptions. In case of an on-prem deployment, please + check with your administrator. + +[gh-status]: https://www.githubstatus.com/ +[gl-status]: https://status.gitlab.com/ +[bb-status]: https://bitbucket.status.atlassian.com/ + +If you get this error and none of the above applies, please +[get in touch with us](#support). diff --git a/content/docs/studio/user-guide/explore-experiments.md b/content/docs/studio/user-guide/explore-experiments.md index a46f9608bc..cef4351f90 100644 --- a/content/docs/studio/user-guide/explore-experiments.md +++ b/content/docs/studio/user-guide/explore-experiments.md @@ -47,8 +47,18 @@ what columns and values to display. experiments - **File changed:** Whether or not any given file changed in the experiment - **Columns:** Select the columns you want to display and hide the rest. + ![Showing and hiding columns](https://static.iterative.ai/img/studio/show_hide_columns.gif) + Additionally, you can click and drag the columns in the table to rearrange them as per your preferences. + + If your view is missing some required columns or includes columns that you do + not want, refer to the following troubleshooting sections to understand why + this may have happened. + + - [View does not contain the columns that I want](/doc/studio/troubleshooting#view-does-not-contain-the-columns-that-i-want) + - [View contains columns that I did not import](/doc/studio/troubleshooting#view-contains-columns-that-i-did-not-import) + - **Selected only:** Use this toggle switch to show/hide experiments that you have not selected. - **Delta mode:** Toggle between absolute values and difference from the first diff --git a/content/docs/studio/user-guide/teams.md b/content/docs/studio/user-guide/teams.md index 62ac8318ef..4ca06264c9 100644 --- a/content/docs/studio/user-guide/teams.md +++ b/content/docs/studio/user-guide/teams.md @@ -82,7 +82,7 @@ that of any other collaborator who has been assigned the `Admin` role. | Specify project directory | No | No | Yes | Yes | | Use existing cloud / data remote credentials | No | No | Yes | Yes | | Configure cloud / data remote credentials | No | No | No | Yes | -| Manage tracking scope | No | No | Yes | Yes | +| Manage mandatory columns (tracking scope) | No | No | Yes | Yes | | Manage custom files | No | No | Yes | Yes | ### Privileges to manage the team diff --git a/content/docs/studio/user-guide/views/view-settings.md b/content/docs/studio/user-guide/views/view-settings.md index 29edb46dc4..485bde8379 100644 --- a/content/docs/studio/user-guide/views/view-settings.md +++ b/content/docs/studio/user-guide/views/view-settings.md @@ -21,7 +21,7 @@ DVC Studio to be able to access the data required for visualization. Additionally, you can also configure view settings to [change the name](#view-name) of your view and to -[define the tracking scope](#tracking-scope) for your view. +[select mandatory columns](#mandatory-columns) to import in your view. ### Non-DVC repositories @@ -104,15 +104,31 @@ they are not saved into Git. It does not access any other data in your remote storage. And you do not need to provide the credentials if any DVC data remote in not used in your Git repository. -### Tracking scope +### Mandatory columns -DVC Studio can track upto 200 metrics, parameters, and files. If you have more -than 200 values in your Git repository, you can specify which ones to track and -which ones to leave out. To ensure that a value is included, make sure that it -is selected in the tracking scope. Any value that is not selected may not -display in your view. +##### (Tracking scope) -![](https://static.iterative.ai/img/studio/view_settings_tracking_scope.png) +If your repository exceeds 200 columns, DVC Studio will import a subset. The +columns that are not imported will not be available to display in your view. In +the settings for "Mandatory columns", You can select which columns are mandatory +to import. DVC Studio will also import unselected columns up to a maximum +of 200. + +![](https://static.iterative.ai/img/studio/view_settings_mandatory_columns.png) + +Note that some non-mandatory columns will also be imported if there are less +than 200 mandatory columns. If you would like to hide specific columns from your +view, you can do so in the view itself. For this, refer to +[Display preferences -> Columns](/doc/studio/user-guide/explore-experiments#display-preferences). + +If your view is missing some required columns or includes columns that you do +not want, refer to the following troubleshooting sections to understand why this +may have happened. + +- [View does not contain the columns that I want](/doc/studio/troubleshooting#view-does-not-contain-the-columns-that-i-want) +- [View contains columns that I did not import](/doc/studio/troubleshooting#view-contains-columns-that-i-did-not-import) + +Note: The **Mandatory columns** section was earlier called **Tracking scope**. ### Custom metrics and parameters diff --git a/content/docs/user-guide/basic-concepts/experiment.md b/content/docs/user-guide/basic-concepts/experiment.md index 5cd8c44b0e..5f1228cc49 100644 --- a/content/docs/user-guide/basic-concepts/experiment.md +++ b/content/docs/user-guide/basic-concepts/experiment.md @@ -7,5 +7,5 @@ tooltip: >- experiments](/doc/start/experiments), having [built-in mechanisms](/doc/user-guide/experiment-management) like the [run-cache](/doc/user-guide/project-structure/internal-files#run-cache) and - the `dvc experiments` commands (available on DVC 2.0 and above). + the `dvc exp` commands (available on DVC 2.0 and above). --- diff --git a/content/docs/user-guide/contributing/blog.md b/content/docs/user-guide/contributing/blog.md index d3b3ad28e0..7bdb5c4c35 100644 --- a/content/docs/user-guide/contributing/blog.md +++ b/content/docs/user-guide/contributing/blog.md @@ -41,7 +41,6 @@ tags: - Version Control - AI --- - ``` - `title` (**required**) - title of the post. diff --git a/content/docs/user-guide/contributing/docs.md b/content/docs/user-guide/contributing/docs.md index 08d753f323..fa16a0271a 100644 --- a/content/docs/user-guide/contributing/docs.md +++ b/content/docs/user-guide/contributing/docs.md @@ -6,8 +6,8 @@ the documentation content, or (rare) changes to the JS engine we use to run the website. In case of a minor change, you can use the **Edit on GitHub** button to open the -source code page. Use thethe Edit button (pencil icon) to edit the file -in-place, and then **Commit changes** from the bottom of the page. +source code page. Use the Edit button (pencil icon) to edit the file in-place, +and then **Commit changes** from the bottom of the page. > Please see our > [Writing a Blog Post guide](https://dvc.org/doc/user-guide/contributing/blog) @@ -56,8 +56,17 @@ changes before submitting them, and it's quite necessary when making changes to the website engine itself. Source code and content files need to be properly formatted and linted as well, which is also ensured by the full setup below. -Make sure you have a recent LTS version of [Node.js](https://nodejs.org/en/) -(`>=12.0.0`, `<=15.x`), and install [Yarn](https://yarnpkg.com/): +Make sure you have [Python](https://www.python.org/downloads/) 3.6+, a recent +LTS version of [Node.js](https://nodejs.org/en/) (`>=12.0.0`, `<=15.x`), and +install [Yarn](https://yarnpkg.com/): + +> In Windows, you may need to install [Visual Studio Build Tools], and the +> [Windows SDK] first. + +[windows sdk]: + https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk/ +[visual studio build tools]: + https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019 ```dvc $ npm install -g yarn @@ -134,8 +143,8 @@ Some available variables: ## Doc style guidelines (JavaScript and Markdown) -Some the rules below are be applied automatically by a pre-commit Git hook that -is installed when `yarn` runs (see [dev env](#development-environment)). +Some of the following rules are applied automatically by a pre-commit Git hook +that is installed when `yarn` runs (see [dev env](#development-environment)). - No trailing white spaces are allowed. @@ -158,9 +167,11 @@ is installed when `yarn` runs (see [dev env](#development-environment)). create a link to that API method automatically. (No need to use `[]()` explicitly to create them.) -- Markdown: Neither bullet lists nor each item's should be too long (3 sentence +- Markdown: Bullet lists shouldn't be too long (5-7 items max., ideally). + +- Markdown: The text in each bullet item also shouldn't be too long (3 sentence paragraphs max.) Full sentence bullets should begin with a capital letter and - end in period `.` otherwise they can be all lower case and have no ending + end in period `.`. Otherwise, they can be all lower case and have no ending punctuation. Bullets can be separated by an empty line if they contain several paragraphs, but this is discouraged: try to keep items short. @@ -187,9 +198,9 @@ We try to use a casual and fun tone in our docs. We also avoid authoritative language such as "As you can see, clearly this is what happened, of course" etc. which while good-intentioned, may scare readers off. -We prefer human-friendly language than exact jargon, as long as it's correct, -even if using general terminology. Example: avoid Git jargon such as _revision_ -or _reference_, preferring the more basic concept _commit_. +We prefer general, human-friendly language rather than exact jargon as long as +it's correct. Example: avoid Git jargon such as _revision_ or _reference_, +preferring the more basic terms _commit_ or _version_. The [command reference](/doc/command-reference) contains some of our most technical documents where specialized language is used the most, but even there, diff --git a/content/docs/user-guide/experiment-management/checkpoints.md b/content/docs/user-guide/experiment-management/checkpoints.md index fda7eb6609..2fce91fd07 100644 --- a/content/docs/user-guide/experiment-management/checkpoints.md +++ b/content/docs/user-guide/experiment-management/checkpoints.md @@ -3,7 +3,7 @@ ML checkpoints are an important part of deep learning because ML engineers like to save the model files at certain points during a training process. -With DVC experiments and checkpoints, you can: +With checkpoint experiments, you can: - Implement the best practice in deep learning to save your model weights as checkpoints. @@ -27,7 +27,7 @@ https://youtu.be/PcDo-hCvYpw
-## ⚙️ Setting up the project +### ⚙️ Setting up the project You can follow along with the steps here or you can clone the repo directly from GitHub and play with it. To clone the repo, run the following commands. @@ -153,10 +153,10 @@ tracking the metrics along with each checkpoint, so we'll need to add a few lines of code. In the `train.py` file, import the [`dvclive`](/doc/dvclive) package with the -other imports: +other imports:: ```python -import dvclive +from dvclive import Live ``` > It's also possible to use DVC's Python API to register checkpoints, or to use @@ -166,6 +166,8 @@ Then update the following lines of code in the `main` method inside of the training epoch loop. ```git ++ dvclive = Live() + # Iterate over training epochs. for i in range(1, EPOCHS+1): # Train in batches. @@ -215,19 +217,19 @@ Generating lock file 'dvc.lock' Updating lock file 'dvc.lock' Checkpoint experiment iteration 'd99d81c'. -file:///Users/milecia/Repos/checkpoints-tutorial/dvclive.html +file:///Users/milecia/Repos/checkpoints-tutorial/dvclive_dvc_plots/index.html Epoch 2: loss=1.25374174118042 Epoch 2: acc=0.7738 Updating lock file 'dvc.lock' Checkpoint experiment iteration '963b396'. -file:///Users/milecia/Repos/checkpoints-tutorial/dvclive.html +file:///Users/milecia/Repos/checkpoints-tutorial/dvclive_dvc_plots/index.html Epoch 3: loss=0.7242147922515869 Epoch 3: acc=0.8284 Updating lock file 'dvc.lock' Checkpoint experiment iteration 'd630b92'. -file:///Users/milecia/Repos/checkpoints-tutorial/dvclive.html +file:///Users/milecia/Repos/checkpoints-tutorial/dvclive_dvc_plots/index.html Epoch 4: loss=0.5083536505699158 Epoch 4: acc=0.8538 Updating lock file 'dvc.lock' diff --git a/content/docs/user-guide/experiment-management/running-experiments.md b/content/docs/user-guide/experiment-management/running-experiments.md index cb602e44b0..bfa7dff3e8 100644 --- a/content/docs/user-guide/experiment-management/running-experiments.md +++ b/content/docs/user-guide/experiment-management/running-experiments.md @@ -146,12 +146,17 @@ $ dvc exp run -S myparams.toml:learning_rate = 0.0001 ### Updating experiment parameters on-the-fly -DVC allows to update the parameters from command line when running -`dvc experiments`. The `--set-param` (`-S`) option takes a parameter name and -its value, and updates the params file before the run. +DVC allows to update parameters from command line when running experiments. The +`--set-param` (`-S`) option takes an existing parameter name and its value, and +updates the params file before the run. ```dvc +$ cat params.yaml +model: + learning_rate: 0.001 + $ dvc exp run --set-param model.learning_rate=0.0002 +... ``` > Note that parameters are attached to experiments so you can view them together @@ -164,10 +169,6 @@ times: $ dvc exp run -S learning_rate=0.001 -S units=128 ``` -> ⚠️ Note that DVC doesn't check whether parameters given to `--set-param` are -> already in the parameters file. If there is a typo, a new or different param -> will be added/changed. - ## The experiments queue The `--queue` option of `dvc exp run` tells DVC to append an experiment for diff --git a/content/docs/user-guide/how-to/update-tracked-data.md b/content/docs/user-guide/how-to/update-tracked-data.md index 6bb1708fc7..e5407be74f 100644 --- a/content/docs/user-guide/how-to/update-tracked-data.md +++ b/content/docs/user-guide/how-to/update-tracked-data.md @@ -49,6 +49,10 @@ Add the new version of the file back with DVC: $ dvc add train.tsv $ git add train.tsv.dvc $ git commit -m "modify train data" + +# If you have remote storage and/or an upstream repo: +$ dvc push +$ git push ``` ## Replacing files @@ -75,4 +79,8 @@ And start tracking it again: $ dvc add train.tsv $ git add train.tsv.dvc .gitignore $ git commit -m "new train data" + +# If you have remote storage and/or an upstream repo: +$ dvc push +$ git push ``` diff --git a/content/docs/user-guide/project-structure/internal-files.md b/content/docs/user-guide/project-structure/internal-files.md index bf4626c6e2..456df8fd1b 100644 --- a/content/docs/user-guide/project-structure/internal-files.md +++ b/content/docs/user-guide/project-structure/internal-files.md @@ -37,16 +37,23 @@ operation. - `.dvc/tmp`: Directory for miscellaneous temporary files - `.dvc/tmp/index`: Directory for remote index files that are used for - optimizing `dvc push`, `dvc pull`, `dvc fetch` and `dvc status -c` operations + optimizing `dvc push`, `dvc pull`, `dvc fetch` and `dvc status -c` operations. + + > This location may be overridden with `dvc config index.dir`. - `.dvc/tmp/md5s`: This directory is used for optimization. It contains a SQLite - database that stores hash values for files tracked in a DVC project. It also - saves the corresponding timestamps and inodes, to avoid unnecessary file hash - computations. + state database that stores hash values for files tracked in a DVC project. It + also saves the corresponding timestamps and inodes to avoid unnecessary file + hash computations. [note 2](#sqlite-default) + + > This parent location may be overridden with `dvc config state.dir`. + +- `.dvc/tmp/links`: This directory is used to cleanup your workspace when + calling `dvc checkout`. It contains a SQLite state database that stores a list + of file links created by DVC (from cache to workspace). + [note 2](#sqlite-default) -- `.dvc/tmp/links`: This directory contains a SQLite database that stores a list - of file links created by DVC (from cache to workspace). It's used - to cleanup your workspace when calling `dvc checkout`. + > This parent location may be overridden with `dvc config state.dir`. - `.dvc/tmp/updater`: This file is used store the latest available version of DVC. It's used to remind the user to upgrade when the installed version is diff --git a/content/docs/user-guide/project-structure/pipelines-files.md b/content/docs/user-guide/project-structure/pipelines-files.md index 474b8df552..cef84359c0 100644 --- a/content/docs/user-guide/project-structure/pipelines-files.md +++ b/content/docs/user-guide/project-structure/pipelines-files.md @@ -361,7 +361,7 @@ These are the fields that are accepted in each stage: | `always_changed` | Whether or not this stage is considered as changed by commands such as `dvc status` and `dvc repro`. `false` by default | | `meta` | (Optional) arbitrary metadata can be added manually with this field. Any YAML content is supported. `meta` contents are ignored by DVC, but they can be meaningful for user processes that read or write `.dvc` files directly. | | `desc` | (Optional) user description for this stage. This doesn't affect any DVC operations. | -| `live` | (Optional) [Dvclive](/doc/dvclive/user-guide/dvclive-with-dvc) configuration field | +| `live` | (Optional) [DVCLive](/doc/dvclive/dvclive-with-dvc) configuration field | `dvc.yaml` files also support `# comments`. diff --git a/gatsby-config.js b/gatsby-config.js index bd876aa906..957289faed 100644 --- a/gatsby-config.js +++ b/gatsby-config.js @@ -57,6 +57,7 @@ const plugins = [ path: path.join(__dirname, 'static') } }, + 'gatsby-plugin-image', 'community-page', { resolve: 'gatsby-transformer-remark', @@ -122,7 +123,14 @@ const plugins = [ } }, 'gatsby-transformer-sharp', - 'gatsby-plugin-sharp', + { + resolve: 'gatsby-plugin-sharp', + options: { + defaults: { + placeholder: 'blurred' + } + } + }, { resolve: 'gatsby-plugin-catch-links', options: { @@ -224,28 +232,6 @@ const plugins = [ } ] -if (process.env.GITHUB_TOKEN) { - plugins.push({ - resolve: `gatsby-source-github-api`, - options: { - // token: required by the GitHub API - token: process.env.GITHUB_TOKEN, - - // GraphQLquery: defaults to a search query - graphQLQuery: ` - { - repository(owner: "iterative", name: "dvc") { - stargazers { - totalCount - } - } - } - `, - variables: {} - } - }) -} - if (process.env.CONTEXT === 'production') { plugins.push({ resolve: 'gatsby-plugin-google-analytics', @@ -258,11 +244,7 @@ if (process.env.CONTEXT === 'production') { if (process.env.ANALYZE) { plugins.push({ - resolve: 'gatsby-plugin-webpack-bundle-analyzer', - options: { - analyzerPort: 4000, - production: process.env.NODE_ENV === 'production' - } + resolve: 'gatsby-plugin-webpack-bundle-analyser-v2' }) } diff --git a/package.json b/package.json index 39cb6baa06..0fae34ee0d 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,7 @@ "author": "LIMIT_BLOG_PAGES=1 SKIP_DOCS=true gatsby develop", "build": "gatsby build", "start": "node ./src/server/index.js", - "heroku-postbuild": "./scripts/deploy-with-s3.js", + "heroku-postbuild": "./scripts/heroku-deploy.sh", "test": "jest", "format-staged": "pretty-quick --staged --no-restage --bail", "format-check": "prettier --check '**/*.{js,jsx,md,tsx,ts,json}'", @@ -33,50 +33,51 @@ }, "homepage": "https://github.com/iterative/dvc.org#readme", "engines": { - "node": "<=15.x" + "node": "<=16.x" }, "dependencies": { "@hapi/wreck": "^17.0.0", - "@octokit/graphql": "^4.3.1", - "@reach/portal": "^0.10.0", - "@reach/router": "^1.3.3", - "@reach/tooltip": "^0.10.0", + "@octokit/graphql": "^4.8.0", + "@reach/portal": "^0.16.2", + "@reach/router": "^1.3.4", + "@reach/tooltip": "^0.16.2", "classnames": "^2.2.6", - "color": "^3.1.2", + "color": "^4.0.1", "compression": "^1.7.4", - "date-fns": "^2.11.1", + "date-fns": "^2.25.0", "docsearch.js": "^2.6.3", "ease-component": "^1.0.0", "express": "^4.17.1", - "fs-extra": "^9.0.0", - "gatsby": "^2.20.13", - "gatsby-image": "^2.3.1", - "gatsby-link": "^2.3.2", + "fs-extra": "^10.0.0", + "gatsby": "^3.14.3", + "gatsby-link": "^3.14.0", + "gatsby-plugin-image": "^1.14.1", "gatsby-plugin-parent-resolvers": "^1.0.1", - "gatsby-source-github-api": "^0.2.1", "github-markdown-css": "^4.0.0", - "iso-url": "^0.4.7", - "isomorphic-fetch": "^2.2.1", + "graphql": "^15.6.1", + "iso-url": "^1.1.5", + "isomorphic-fetch": "^3.0.0", "lodash": "^4.17.21", "moment": "^2.25.3", - "nanoid": "^3.0.2", + "nanoid": "^3.1.30", "node-cache": "^5.1.0", "perfect-scrollbar": "^1.5.0", - "pretty-quick": "^2.0.1", - "prismjs": "^1.24.0", + "postcss": "^8.3.9", + "pretty-quick": "^3.1.1", + "prismjs": "^1.25.0", "promise-polyfill": "^8.1.3", "prop-types": "^15.7.2", "raf-polyfill": "^1.0.0", - "react": "^16.13.1", + "react": "^17.0.2", "react-collapse": "^5.0.1", "react-collapsible": "^2.7.0", - "react-dom": "^16.13.1", - "react-ga": "^2.7.0", - "react-helmet": "^5.2.1", + "react-dom": "^17.0.2", + "react-ga": "^3.3.0", + "react-helmet": "^6.1.0", "react-popover": "^0.5.10", - "react-slick": "^0.25.2", + "react-slick": "^0.28.1", "react-use": "^14.0.0", - "rehype-react": "^5.0.1", + "rehype-react": "^6.2.1", "remark-preset-lint-recommended": "^5.0.0", "repo-link-check": "^0.7.1", "reset-css": "^5.0.1", @@ -86,10 +87,10 @@ "slick-carousel": "^1.8.1", "title-case": "^3.0.2", "unist-util-visit": "2.0.2", - "upath": "^1.2.0" + "upath": "^2.0.1" }, "devDependencies": { - "@babel/core": "^7.9.0", + "@babel/core": "^7.15.8", "@svgr/webpack": "^5.3.1", "@types/classnames": "^2.2.10", "@types/isomorphic-fetch": "^0.0.35", @@ -99,66 +100,65 @@ "@types/react-dom": "^16.9.6", "@types/react-helmet": "^5.0.15", "@types/react-popover": "^0.5.3", - "@types/react-slick": "^0.23.4", + "@types/react-slick": "^0.23.6", "@types/rehype-react": "^4.0.0", "@typescript-eslint/eslint-plugin": "^2.27.0", "@typescript-eslint/parser": "^2.27.0", - "autoprefixer": "^9.7.6", + "autoprefixer": "^10.3.7", "babel-eslint": "^10.1.0", "babel-jest": "^26.0.1", - "babel-plugin-transform-define": "^2.0.0", + "babel-plugin-transform-define": "^2.0.1", "babel-plugin-transform-object-assign": "^6.22.0", "eslint": "^6.8.0", "eslint-config-prettier": "^6.10.1", - "eslint-plugin-json": "^2.1.1", + "eslint-plugin-json": "^3.1.0", "eslint-plugin-jsx-a11y": "^6.2.3", "eslint-plugin-prettier": "^3.1.2", - "eslint-plugin-react": "^7.19.0", - "gatsby-plugin-catch-links": "^2.2.1", - "gatsby-plugin-feed": "^2.4.1", - "gatsby-plugin-google-analytics": "^2.2.2", - "gatsby-plugin-manifest": "2.2.23", - "gatsby-plugin-postcss": "^2.2.1", - "gatsby-plugin-react-helmet": "^3.2.1", + "eslint-plugin-react": "^7.26.1", + "gatsby-plugin-catch-links": "^3.14.0", + "gatsby-plugin-feed": "^3.14.0", + "gatsby-plugin-google-analytics": "^3.14.0", + "gatsby-plugin-manifest": "^3.14.0", + "gatsby-plugin-postcss": "^4.14.0", + "gatsby-plugin-react-helmet": "^4.14.0", "gatsby-plugin-sentry": "^1.0.1", - "gatsby-plugin-sharp": "2.2.32", - "gatsby-plugin-sitemap": "^2.3.1", - "gatsby-plugin-svgr": "^2.0.2", - "gatsby-plugin-twitter": "^2.2.2", - "gatsby-plugin-typescript": "^2.3.1", - "gatsby-plugin-webpack-bundle-analyzer": "^1.0.5", - "gatsby-remark-autolink-headers": "^2.2.1", - "gatsby-remark-copy-linked-files": "^2.2.1", + "gatsby-plugin-sharp": "^3.14.1", + "gatsby-plugin-sitemap": "^4.10.0", + "gatsby-plugin-svgr": "^3.0.0-beta.0", + "gatsby-plugin-twitter": "^3.14.0", + "gatsby-plugin-webpack-bundle-analyser-v2": "^1.1.25", + "gatsby-remark-autolink-headers": "^4.11.0", + "gatsby-remark-copy-linked-files": "^4.11.0", "gatsby-remark-embed-gist": "^1.1.9", - "gatsby-remark-embedder": "^2.0.0", + "gatsby-remark-embedder": "^5.0.0", "gatsby-remark-external-links": "^0.0.4", - "gatsby-remark-images": "^3.2.2", - "gatsby-remark-prismjs": "^3.4.1", + "gatsby-remark-images": "^5.11.0", + "gatsby-remark-prismjs": "^5.11.0", "gatsby-remark-relative-images": "0.2.3", - "gatsby-remark-responsive-iframe": "^2.3.1", - "gatsby-remark-smartypants": "^2.2.1", - "gatsby-source-filesystem": "^2.2.2", - "gatsby-transformer-remark": "^2.7.1", - "gatsby-transformer-sharp": "2.2.23", + "gatsby-remark-responsive-iframe": "^4.11.0", + "gatsby-remark-smartypants": "^4.11.0", + "gatsby-source-filesystem": "^3.14.0", + "gatsby-transformer-remark": "^4.11.0", + "gatsby-transformer-sharp": "^3.14.0", "hast-util-select": "^4.0.0", "husky": "^4.2.3", - "jest": "^26.0.1", - "lint-staged": "^10.1.2", + "jest": "^27.3.0", + "lint-staged": "^11.2.3", "postcss-color-mod-function": "^3.0.3", - "postcss-custom-media": "^7.0.8", - "postcss-custom-properties": "^9.1.1", - "postcss-mixins": "^6.2.3", - "postcss-nested": "^4.2.1", - "prettier": "^2.2.1", - "rehype-parse": "^6.0.2", - "rehype-stringify": "^7.0.0", - "remark": "^12.0.0", - "remark-html": "^11.0.1", + "postcss-custom-media": "^8.0.0", + "postcss-custom-properties": "^12.0.0", + "postcss-mixins": "^8.1.0", + "postcss-nested": "^5.0.6", + "prettier": "^2.4.1", + "rehype-parse": "^7.0.1", + "rehype-stringify": "^8.0.0", + "remark": "^13.0.0", + "remark-html": "^13.0.1", "remark-parse": "^8.0.2", "stylelint": "^13.3.0", - "stylelint-config-standard": "^20.0.0", + "stylelint-config-standard": "^22.0.0", "typescript": "^3.8.3", - "unist-util-remove-position": "^2.0.1" + "unist-util-remove-position": "^3.0.0" }, "husky": { "hooks": { diff --git a/plugins/gatsby-remark-dvc-linker/index.test.js b/plugins/gatsby-remark-dvc-linker/index.test.js index fa52c1cd07..4cc73689cb 100644 --- a/plugins/gatsby-remark-dvc-linker/index.test.js +++ b/plugins/gatsby-remark-dvc-linker/index.test.js @@ -26,8 +26,8 @@ describe('gatsby-remark-dvc-linker', () => { } live = { - inlineCode: '`dvclive.init()`', - url: '[`dvclive.init()`](/doc/dvclive/api-reference/init)' + inlineCode: '`Live.log()`', + url: '[`Live.log()`](/doc/dvclive/api-reference/live/log)' } it('composes apiLinker and commandLinker', () => { diff --git a/plugins/gatsby-remark-dvc-linker/liveLinker.js b/plugins/gatsby-remark-dvc-linker/liveLinker.js index 5596d5eb51..2b5c4fe15e 100644 --- a/plugins/gatsby-remark-dvc-linker/liveLinker.js +++ b/plugins/gatsby-remark-dvc-linker/liveLinker.js @@ -3,9 +3,9 @@ const { createLinkNode } = require('./helpers') const { getItemByPath } = require('../../src/utils/shared/sidebar') -const LIVE_API_REGEXP = /dvclive.([a-z-._]*\(\)$)?/ +const LIVE_API_REGEXP = /Live.([a-z-._]*\(\)$)?/ const METHOD_REGEXP = /^[a-z-._]*\(\)$/ -const API_ROOT = '/doc/dvclive/api-reference/' +const API_ROOT = '/doc/dvclive/api-reference/live/' module.exports = astNode => { const node = astNode[0] diff --git a/scripts/deploy-with-s3.js b/scripts/deploy-with-s3.js index 3dc199f17d..e91b1b1fe7 100755 --- a/scripts/deploy-with-s3.js +++ b/scripts/deploy-with-s3.js @@ -3,8 +3,9 @@ require('dotenv').config() const path = require('path') const PRODUCTION_PREFIX = 'dvc-org-prod' +const { mkdirSync } = require('fs') -const { DEPLOY_OPTIONS } = process.env +const { DEPLOY_OPTIONS, USE_PRODUCTION_CACHE } = process.env const clearCloudflareCache = require('./clear-cloudflare-cache') // Generate deploy options from a comma separated string in the DEPLOY_OPTIONS @@ -20,6 +21,7 @@ const deployOptions = DEPLOY_OPTIONS : { download: true, build: true, + retry: true, upload: true, clean: true, clearCloudflareCache: true @@ -69,9 +71,8 @@ const { cleanEntry } = require('./s3-utils') const { move } = require('fs-extra') -const { downloadAllFromS3, uploadAllToS3, cleanAllLocal } = withEntries( - cacheDirs -) +const { downloadAllFromS3, uploadAllToS3, cleanAllLocal } = + withEntries(cacheDirs) function run(command) { execSync(command, { @@ -86,7 +87,10 @@ async function main() { // This greatly speeds up PR initial build time. if (deployOptions.download) { - if (emptyPrefix) { + if (USE_PRODUCTION_CACHE) { + console.warn('USE_PRODUCTION_CACHE is set, downloading from production') + await downloadAllFromS3(PRODUCTION_PREFIX) + } else if (emptyPrefix) { console.warn( `The current prefix "${s3Prefix}" is empty! Attempting to fall back on production cache.` ) @@ -94,24 +98,31 @@ async function main() { } else { await downloadAllFromS3(s3Prefix) } + mkdirSync('.cache/json', { recursive: true }) } if (deployOptions.build) { try { run('yarn build') } catch (buildError) { - // Sometimes gatsby build fails because of bad cache. - // Clear it and try again. - - console.error('------------------------\n\n') - console.error('The first Gatsby build attempt failed!\n') - console.error(buildError) - console.error('\nRetrying with a cleared cache:\n') - - // Clear only .cache so we re-use images - await cleanEntry(cacheDirs[1]) - - run('yarn build') + if (deployOptions.retry) { + // Sometimes gatsby build fails because of bad cache. + // Clear it and try again. + + console.error('------------------------\n\n') + console.error('The first Gatsby build attempt failed!\n') + console.error(buildError) + console.error('\nRetrying with a cleared cache:\n') + + // Clear only .cache so we re-use images + await cleanEntry(cacheDirs[1]) + + run('yarn build') + } else { + throw new Error( + 'The first Gatsby build attempt failed, and DEPLOY_OPTIONS does not include "retry"' + ) + } } } diff --git a/scripts/heroku-deploy.sh b/scripts/heroku-deploy.sh new file mode 100755 index 0000000000..3ecfbe75a8 --- /dev/null +++ b/scripts/heroku-deploy.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +NEWPWD="/tmp/gatsby-build" +mv $OLDPWD $NEWPWD +ln -s $NEWPWD $OLDPWD +cd $NEWPWD +./scripts/deploy-with-s3.js diff --git a/src/components/Blog/Feed/Item/index.tsx b/src/components/Blog/Feed/Item/index.tsx index 64b74e3108..e600e03b01 100644 --- a/src/components/Blog/Feed/Item/index.tsx +++ b/src/components/Blog/Feed/Item/index.tsx @@ -2,13 +2,13 @@ import React, { useEffect, useRef } from 'react' import { useRafState, useWindowSize } from 'react-use' import { graphql } from 'gatsby' import Link from '../../../Link' -import Image, { FixedObject, FluidObject } from 'gatsby-image' +import { GatsbyImage, IGatsbyImageData } from 'gatsby-plugin-image' import cn from 'classnames' import { ISocialIcon } from '../../../SocialIcon' import FeedMeta from '../../FeedMeta' -import styles from './styles.module.css' +import * as styles from './styles.module.css' import { ReactComponent as Placeholder } from './placeholder.svg' @@ -21,13 +21,12 @@ export interface IBlogPostData { description: string descriptionLong: string picture?: { - big: FluidObject - small: FluidObject + big: IGatsbyImageData } author: { name: string avatar: { - fixed: FixedObject + gatsbyImageData: IGatsbyImageData } links: Array } @@ -55,7 +54,7 @@ const Item: React.FC = ({ } }, [width]) - const image = picture ? (big ? picture.big : picture.small) : undefined + const image = picture?.big return (
= ({ )} > - {picture ? ( - + {image ? ( + ) : ( )} @@ -104,22 +103,11 @@ export const query = graphql` description descriptionLong picture { - big: fluid( - maxWidth: 650 - maxHeight: 450 - cropFocus: CENTER - quality: 90 - ) { - ...GatsbyImageSharpFluid_withWebp - } - small: fluid( - maxWidth: 300 - maxHeight: 250 - cropFocus: CENTER - quality: 90 - ) { - ...GatsbyImageSharpFluid_withWebp - } + big: gatsbyImageData( + width: 650 + height: 450 + transformOptions: { cropFocus: CENTER } + ) } author { name @@ -128,9 +116,12 @@ export const query = graphql` site } avatar { - fixed(width: 40, height: 40, quality: 50, cropFocus: CENTER) { - ...GatsbyImageSharpFixed_withWebp - } + gatsbyImageData( + width: 40 + height: 40 + transformOptions: { cropFocus: CENTER } + layout: FIXED + ) } } } diff --git a/src/components/Blog/Feed/index.tsx b/src/components/Blog/Feed/index.tsx index 15ac2cc347..bedfa7fa05 100644 --- a/src/components/Blog/Feed/index.tsx +++ b/src/components/Blog/Feed/index.tsx @@ -7,7 +7,7 @@ import cn from 'classnames' import Paginator, { IPaginatorPageInfo } from '../../Paginator' import Item, { IBlogPostData } from './Item' -import styles from './styles.module.css' +import * as styles from './styles.module.css' export interface IBlogFeedPostList { nodes: Array diff --git a/src/components/Blog/FeedMeta/index.tsx b/src/components/Blog/FeedMeta/index.tsx index 13b369d81d..45ad6bc80b 100644 --- a/src/components/Blog/FeedMeta/index.tsx +++ b/src/components/Blog/FeedMeta/index.tsx @@ -1,15 +1,15 @@ -import Image, { FixedObject } from 'gatsby-image' +import { GatsbyImage, IGatsbyImageData } from 'gatsby-plugin-image' import React from 'react' import Link from '../../Link' import { pluralizeComments } from '../../../utils/front/i18n' -import styles from './styles.module.css' +import * as styles from './styles.module.css' import SocialIcon, { ISocialIcon } from '../../SocialIcon' interface IBlogFeedMetaProps { avatar: { - fixed: FixedObject + gatsbyImageData: IGatsbyImageData } commentsUrl?: string commentsCount?: number @@ -30,7 +30,11 @@ const FeedMeta: React.FC = ({ }) => { return (
- +
  • {name}
  • {links && ( diff --git a/src/components/Blog/FeedMeta/styles.module.css b/src/components/Blog/FeedMeta/styles.module.css index 306b6e5dfc..db8efcad97 100644 --- a/src/components/Blog/FeedMeta/styles.module.css +++ b/src/components/Blog/FeedMeta/styles.module.css @@ -37,9 +37,10 @@ margin-right: 14px; &::before { - content: '• '; + content: '•'; position: absolute; right: 100%; + width: 0.55em; } } diff --git a/src/components/Blog/Layout/index.tsx b/src/components/Blog/Layout/index.tsx index 3f8ad589d3..5bd1c8afc7 100644 --- a/src/components/Blog/Layout/index.tsx +++ b/src/components/Blog/Layout/index.tsx @@ -2,7 +2,7 @@ import React from 'react' import SEO from '../../SEO' import MainLayout, { LayoutComponent } from '../../MainLayout' -import styles from './styles.module.css' +import * as styles from './styles.module.css' const keywords = 'git, data, version control, machine learning models management, datasets' diff --git a/src/components/Blog/Post/HeroPic/index.tsx b/src/components/Blog/Post/HeroPic/index.tsx index 4102feed28..3acb5ff4cd 100644 --- a/src/components/Blog/Post/HeroPic/index.tsx +++ b/src/components/Blog/Post/HeroPic/index.tsx @@ -1,40 +1,20 @@ -import Image from 'gatsby-image' +import { GatsbyImage, getImage } from 'gatsby-plugin-image' +import { IGatsbyImageDataParent } from 'gatsby-plugin-image/dist/src/components/hooks' import React from 'react' -import { BLOG } from '../../../../consts' -import { - IBlogPostHeroPic, - IGatsbyImageProps -} from '../../../../templates/blog-post' +import { IBlogPostHeroPic } from '../../../../templates/blog-post' -import styles from './styles.module.css' - -const NonStretchedImage: React.FC = props => { - let normalizedProps = props - if (props.fluid && props.fluid.presentationWidth) { - const presetantionWidth = props.fluid?.presentationWidth - const width = - presetantionWidth < BLOG.imageMaxWidthHero - ? presetantionWidth / 2 - : presetantionWidth - normalizedProps = { - ...props, - style: { - ...(props.style || {}), - maxWidth: width, - margin: '0 auto' - } - } - } - return -} +import * as styles from './styles.module.css' const HeroPic: React.FC = ({ pictureComment, picture }) => { + const image = getImage(picture as IGatsbyImageDataParent) return (
    -
    - -
    + {image && ( +
    + +
    + )} {pictureComment && (
    { if (!triggerRect || !tooltipRect) { diff --git a/src/components/Blog/Post/index.tsx b/src/components/Blog/Post/index.tsx index c19d7236f7..9e6021b768 100644 --- a/src/components/Blog/Post/index.tsx +++ b/src/components/Blog/Post/index.tsx @@ -18,7 +18,7 @@ import Share from './Share' import PageContent from '../../PageContent' import SubscribeSection from '../../SubscribeSection' -import styles from './styles.module.css' +import * as styles from './styles.module.css' const Post: React.FC = ({ html, @@ -84,9 +84,7 @@ const Post: React.FC = ({
    - {picture && ( - - )} +
    diff --git a/src/components/Blog/Post/styles.module.css b/src/components/Blog/Post/styles.module.css index 13636cf525..0a25404938 100644 --- a/src/components/Blog/Post/styles.module.css +++ b/src/components/Blog/Post/styles.module.css @@ -82,6 +82,19 @@ border-radius: 3px; background-color: rgba(27, 31, 35, 0.05); } + + ul { + padding-left: 2em; + margin-bottom: 16px; + } + + li { + margin: 16px 0; + + + li { + margin-top: 0.25em; + } + } } .share { diff --git a/src/components/Community/Block/index.tsx b/src/components/Community/Block/index.tsx index bf258ef63e..2e5d965ad9 100644 --- a/src/components/Community/Block/index.tsx +++ b/src/components/Community/Block/index.tsx @@ -1,6 +1,6 @@ import React from 'react' -import styles from './styles.module.css' +import * as styles from './styles.module.css' interface ICommunityBlockProps { children: React.ReactNode diff --git a/src/components/Community/Contribute/index.tsx b/src/components/Community/Contribute/index.tsx index aec779ad0c..3eba9b4109 100644 --- a/src/components/Community/Contribute/index.tsx +++ b/src/components/Community/Contribute/index.tsx @@ -8,7 +8,7 @@ import CommunitySection from '../Section' import { logEvent } from '../../../utils/front/ga' import { useCommunityData } from '../../../utils/front/community' -import sharedStyles from '../styles.module.css' +import * as sharedStyles from '../styles.module.css' const logPR = (): void => logEvent('community', 'contribute-pr') const logBlogpost = (): void => logEvent('community', 'contribute-blogpost') diff --git a/src/components/Community/Events/index.tsx b/src/components/Community/Events/index.tsx index 2863f44345..714510015f 100644 --- a/src/components/Community/Events/index.tsx +++ b/src/components/Community/Events/index.tsx @@ -10,8 +10,8 @@ import Section from '../Section' import { logEvent } from '../../../utils/front/ga' import { useCommunityData } from '../../../utils/front/community' -import sharedStyles from '../styles.module.css' -import styles from './styles.module.css' +import * as sharedStyles from '../styles.module.css' +import * as styles from './styles.module.css' export interface IEvent { theme: ICommunitySectionTheme @@ -111,7 +111,7 @@ const Events: React.FC<{ theme: ICommunitySectionTheme }> = ({ theme }) => {
    )) ) : ( -
    +
    Subscribe to be up to date!{' '} 👇 diff --git a/src/components/Community/Events/styles.module.css b/src/components/Community/Events/styles.module.css index 774f22a2ae..052796f99c 100644 --- a/src/components/Community/Events/styles.module.css +++ b/src/components/Community/Events/styles.module.css @@ -13,7 +13,6 @@ } .eventsPlaceholder { - composes: gray from '../styles.module.css'; text-align: center; margin: auto; font-size: 1rem; diff --git a/src/components/Community/Hero/index.tsx b/src/components/Community/Hero/index.tsx index badbd3c620..417f248900 100644 --- a/src/components/Community/Hero/index.tsx +++ b/src/components/Community/Hero/index.tsx @@ -6,7 +6,7 @@ import Link from '../../Link' import { useCommunityData } from '../../../utils/front/community' import { logEvent } from '../../../utils/front/ga' -import styles from './styles.module.css' +import * as styles from './styles.module.css' const logHero = (): void => logEvent('community', 'hero') diff --git a/src/components/Community/Learn/index.tsx b/src/components/Community/Learn/index.tsx index 770417c89f..c4be7ef52f 100644 --- a/src/components/Community/Learn/index.tsx +++ b/src/components/Community/Learn/index.tsx @@ -15,8 +15,8 @@ import { useCommunityData } from '../../../utils/front/community' import getPosts from '../../../queries/posts' import { pluralizeComments } from '../../../utils/front/i18n' -import sharedStyles from '../styles.module.css' -import styles from './styles.module.css' +import * as sharedStyles from '../styles.module.css' +import * as styles from './styles.module.css' const docsPage = getFirstPage() @@ -27,7 +27,7 @@ const logDocumentationAll = (): void => interface ICommunityBlogPost { color: string commentsUrl?: string - pictureUrl: string | null + pictureUrl?: string date: string title: string url: string @@ -45,9 +45,10 @@ const BlogPost: React.FC = ({ return null } - const logPost = useCallback(() => logEvent('community', 'blog', title), [ - title - ]) + const logPost = useCallback( + () => logEvent('community', 'blog', title), + [title] + ) const { error, ready, result } = useCommentsCount(commentsUrl) diff --git a/src/components/Community/Meet/index.tsx b/src/components/Community/Meet/index.tsx index 24da2de555..fcdce794df 100644 --- a/src/components/Community/Meet/index.tsx +++ b/src/components/Community/Meet/index.tsx @@ -16,8 +16,8 @@ import { } from '../../../utils/front/api' import { useCommunityData } from '../../../utils/front/community' -import sharedStyles from '../styles.module.css' -import styles from './styles.module.css' +import * as sharedStyles from '../styles.module.css' +import * as styles from './styles.module.css' const logIssueAll = (): void => logEvent('community', 'issue', 'all') const logTopicAll = (): void => logEvent('community', 'topic', 'all') @@ -30,9 +30,10 @@ const Topic: React.FC<{ color: string } & IDiscussTopic> = ({ comments, color }) => { - const logTopic = useCallback(() => logEvent('community', 'forum', title), [ - title - ]) + const logTopic = useCallback( + () => logEvent('community', 'forum', title), + [title] + ) return (
    @@ -67,9 +68,10 @@ const Issue: React.FC<{ color: string } & IGithubIssue> = ({ comments, color }) => { - const logIssue = useCallback(() => logEvent('community', 'issue', title), [ - title - ]) + const logIssue = useCallback( + () => logEvent('community', 'issue', title), + [title] + ) return (
    @@ -82,7 +84,9 @@ const Issue: React.FC<{ color: string } & IGithubIssue> = ({ > {title} -
    +
    void + docsearch?: (opts: Record) => void } } diff --git a/src/components/Documentation/Layout/SidebarMenu/index.tsx b/src/components/Documentation/Layout/SidebarMenu/index.tsx index d48eac9390..f65ebf595a 100644 --- a/src/components/Documentation/Layout/SidebarMenu/index.tsx +++ b/src/components/Documentation/Layout/SidebarMenu/index.tsx @@ -20,7 +20,7 @@ import { } from '../../../../utils/shared/sidebar' import 'perfect-scrollbar/css/perfect-scrollbar.css' -import styles from './styles.module.css' +import * as styles from './styles.module.css' // A map for optional special icons that can be used in menu items // Use the key string here as the "icon" field in sidebar.json @@ -91,10 +91,12 @@ const SidebarMenuItem: React.FC = ({ isExpanded && styles.active, isRootParent && 'docSearch-lvl0', 'link-with-focus', - style ? styles[style] : styles.sidebarDefault, - isLeafItem && styles.leafItem, + // style ? styles[style] : styles.sidebarDefault, + style && styles[style], + // isLeafItem && styles.leafItem, // Limit the default bullet to items with no special icon - icon ? undefined : styles.withDefaultBullet + // icon ? undefined : styles.withDefaultBullet + icon && undefined ) const bulletIconClassName = cn( diff --git a/src/components/Documentation/Layout/index.tsx b/src/components/Documentation/Layout/index.tsx index 680ac9b28c..6e742a8910 100644 --- a/src/components/Documentation/Layout/index.tsx +++ b/src/components/Documentation/Layout/index.tsx @@ -9,7 +9,7 @@ import SidebarMenu from './SidebarMenu' import { matchMedia } from '../../../utils/front/breakpoints' import { focusElementWithHotkey } from '../../../utils/front/focusElementWithHotkey' -import styles from './styles.module.css' +import * as styles from './styles.module.css' import { useWindowSize } from 'react-use' const Layout: LayoutComponent = ({ children, ...restProps }) => { diff --git a/src/components/Documentation/Markdown/Main/index.tsx b/src/components/Documentation/Markdown/Main/index.tsx index 91a815f455..41dc0b0dfb 100644 --- a/src/components/Documentation/Markdown/Main/index.tsx +++ b/src/components/Documentation/Markdown/Main/index.tsx @@ -7,8 +7,8 @@ import Tutorials from '../../TutorialsLinks' import { getPathWithSource } from '../../../../utils/shared/sidebar' import 'github-markdown-css/github-markdown.css' -import sharedStyles from '../../styles.module.css' -import styles from './styles.module.css' +import * as sharedStyles from '../../styles.module.css' +import * as styles from './styles.module.css' const isInsideCodeBlock = (node: Element): boolean => { while (node?.parentNode) { diff --git a/src/components/Documentation/Markdown/ToggleProvider/index.tsx b/src/components/Documentation/Markdown/ToggleProvider/index.tsx new file mode 100644 index 0000000000..7d4df78c6c --- /dev/null +++ b/src/components/Documentation/Markdown/ToggleProvider/index.tsx @@ -0,0 +1,48 @@ +import React, { createContext, useState } from 'react' + +interface ITogglesData { + [key: string]: { texts: string[]; checkedInd: number } +} + +interface ITogglesContext { + addNewToggle?: (id: string, texts: string[]) => void + updateToggleInd?: (id: string, newInd: number) => void + togglesData?: ITogglesData +} + +export const TogglesContext = createContext({}) + +export const TogglesProvider: React.FC = ({ children }) => { + const [togglesData, setTogglesData] = useState({}) + + const addNewToggle = (id: string, texts: string[]): void => { + const togglesDataCopy: ITogglesData = { ...togglesData } + togglesDataCopy[id] = { texts, checkedInd: 0 } + setTogglesData(togglesDataCopy) + } + + const updateToggleInd = (id: string, newInd: number): void => { + const togglesDataCopy: ITogglesData = { ...togglesData } + const selectedTabText = togglesDataCopy[id].texts[newInd] + togglesDataCopy[id] = { ...togglesDataCopy[id], checkedInd: newInd } + + for (const [toggleId, { texts }] of Object.entries(togglesDataCopy)) { + if (texts.includes(selectedTabText)) { + togglesDataCopy[toggleId] = { + ...togglesDataCopy[toggleId], + checkedInd: togglesDataCopy[id].texts.indexOf(selectedTabText) + } + } + } + + setTogglesData(togglesDataCopy) + } + + return ( + + {children} + + ) +} diff --git a/src/components/Documentation/Markdown/Tooltip/DesktopView/index.tsx b/src/components/Documentation/Markdown/Tooltip/DesktopView/index.tsx index 4cc3fbc8fa..3a69b1bad1 100644 --- a/src/components/Documentation/Markdown/Tooltip/DesktopView/index.tsx +++ b/src/components/Documentation/Markdown/Tooltip/DesktopView/index.tsx @@ -4,7 +4,7 @@ import Portal from '@reach/portal' import throttle from 'lodash/throttle' import { getHeaderHeight } from '../../../../../utils/front/scroll' -import styles from './styles.module.css' +import * as styles from './styles.module.css' interface IDesktopViewProps { description: string diff --git a/src/components/Documentation/Markdown/Tooltip/MobileView/index.tsx b/src/components/Documentation/Markdown/Tooltip/MobileView/index.tsx index d54ddde90a..7c58dc58d4 100644 --- a/src/components/Documentation/Markdown/Tooltip/MobileView/index.tsx +++ b/src/components/Documentation/Markdown/Tooltip/MobileView/index.tsx @@ -4,7 +4,7 @@ import Portal from '@reach/portal' import { isTriggeredFromKB } from '../../../../../utils/front/keyboard' -import styles from './styles.module.css' +import * as styles from './styles.module.css' interface IMobileViewProps { description: string diff --git a/src/components/Documentation/Markdown/index.tsx b/src/components/Documentation/Markdown/index.tsx index fc76f76243..4240a1c6ee 100644 --- a/src/components/Documentation/Markdown/index.tsx +++ b/src/components/Documentation/Markdown/index.tsx @@ -1,4 +1,12 @@ -import React, { ReactNode, ReactElement } from 'react' +import React, { + useEffect, + useState, + ReactNode, + ReactElement, + useContext +} from 'react' +import { nanoid } from 'nanoid' +import { Node } from 'unist' import rehypeReact from 'rehype-react' import Collapsible from 'react-collapsible' @@ -6,12 +14,13 @@ import Main from './Main' import Link from '../../Link' import Tooltip from './Tooltip' -import styles from './styles.module.css' +import * as styles from './styles.module.css' +import { TogglesContext, TogglesProvider } from './ToggleProvider' -const Details: React.FC<{ - children: Array<{ props: { children: ReactNode } } | string> -}> = ({ children }) => { - const filteredChildren: ReactNode[] = children.filter(child => child !== '\n') +const Details: React.FC> = ({ children }) => { + const filteredChildren: ReactNode[] = ( + children as Array<{ props: { children: ReactNode } } | string> + ).filter(child => child !== '\n') const firstChild = filteredChildren[0] as JSX.Element if (!/^h.$/.test(firstChild.type)) { @@ -34,7 +43,7 @@ const Details: React.FC<{ */ return ( {filteredChildren.slice(1)} @@ -42,8 +51,8 @@ const Details: React.FC<{ ) } -const Abbr: React.FC<{ children: [string] }> = ({ children }) => { - return +const Abbr: React.FC> = ({ children }) => { + return } const Cards: React.FC = ({ children }) => { @@ -95,21 +104,102 @@ const Card: React.FC<{ ) } -const renderAst = new rehypeReact({ - // eslint-disable-next-line @typescript-eslint/no-explicit-any - createElement: React.createElement as any, +const ToggleTab: React.FC<{ + id: string + title: string + ind: number + onChange: () => void + checked: boolean +}> = ({ children, id, checked, ind, onChange, title }) => { + const inputId = `tab-${id}-${ind}` + + return ( + <> + + + {children} + + ) +} + +const Toggle: React.FC<{ + children: Array<{ props: { title: string } } | string> +}> = ({ children }) => { + const [toggleId, setToggleId] = useState('') + const { + addNewToggle = (): null => null, + updateToggleInd = (): null => null, + togglesData = {} + } = useContext(TogglesContext) + const tabs: Array<{ props: { title: string } } | string> = children.filter( + child => child !== '\n' + ) + const tabsTitles = tabs.map(tab => + typeof tab === 'object' ? tab.props.title : '' + ) + + useEffect(() => { + if (toggleId === '') { + const newId = nanoid() + addNewToggle(newId, tabsTitles) + setToggleId(newId) + } + + if (toggleId && !togglesData[toggleId]) { + addNewToggle(toggleId, tabsTitles) + } + }, [togglesData]) + + return ( +
    + {tabs.map((tab, i) => ( + updateToggleInd(toggleId, i)} + > + {tab} + + ))} +
    + ) +} + +const Tab: React.FC = ({ children }) => ( +
    {children}
    +) + +// Rehype's typedefs don't allow for custom components, even though they work +// eslint-disable-next-line @typescript-eslint/no-explicit-any +const renderAst = new (rehypeReact as any)({ + createElement: React.createElement, Fragment: React.Fragment, components: { - details: Details, - abbr: Abbr, a: Link, + abbr: Abbr, card: Card, - cards: Cards + cards: Cards, + details: Details, + toggle: Toggle, + tab: Tab } }).Compiler interface IMarkdownProps { - htmlAst: object + htmlAst: Node githubLink: string tutorials: { [type: string]: string } prev?: string @@ -125,7 +215,7 @@ const Markdown: React.FC = ({ }) => { return (
    - {renderAst(htmlAst)} + {renderAst(htmlAst)}
    ) } diff --git a/src/components/Documentation/Markdown/styles.module.css b/src/components/Documentation/Markdown/styles.module.css index 08f999dfb3..7c8f0f591c 100644 --- a/src/components/Documentation/Markdown/styles.module.css +++ b/src/components/Documentation/Markdown/styles.module.css @@ -118,3 +118,53 @@ a.card { background-color: var(--color-light-blue); } } + +.toggle { + display: flex; + flex-wrap: wrap; + + input { + height: 0; + opacity: 0; + position: absolute; + width: 0; + overflow: hidden; + } + + input:checked + label { + color: var(--color-azure); + border-color: var(--color-azure); + } + + input:checked + label + .tab { + height: initial; + opacity: initial; + position: static; + width: 100%; + overflow: visible; + } + + .tabHeading { + padding: 12px 16px 10px; + background-color: transparent; + border: none; + border-bottom: 2px solid transparent; + font-weight: bold; + font-size: 16px; + font-family: var(--font-brandon); + order: -1; + + &:hover { + cursor: pointer; + } + } +} + +.tab { + margin: 10px 0 0; + height: 0; + opacity: 0; + position: absolute; + overflow: hidden; + width: 0; +} diff --git a/src/components/Documentation/RightPanel/index.tsx b/src/components/Documentation/RightPanel/index.tsx index 3abb69ab39..ff7569d184 100644 --- a/src/components/Documentation/RightPanel/index.tsx +++ b/src/components/Documentation/RightPanel/index.tsx @@ -9,8 +9,8 @@ import Tutorials from '../TutorialsLinks' import { getScrollPosition, getHeaderHeight } from '../../../utils/front/scroll' import { allImagesLoadedInContainer } from '../../../utils/front/images' -import sharedStyles from '../styles.module.css' -import styles from './styles.module.css' +import * as sharedStyles from '../styles.module.css' +import * as styles from './styles.module.css' interface IRightPanelProps { headings: Array diff --git a/src/components/Documentation/TutorialsLinks/index.tsx b/src/components/Documentation/TutorialsLinks/index.tsx index 98e59c63aa..2539fec72b 100644 --- a/src/components/Documentation/TutorialsLinks/index.tsx +++ b/src/components/Documentation/TutorialsLinks/index.tsx @@ -5,8 +5,8 @@ import startCase from 'lodash/startCase' import Link from '../../Link' -import sharedStyles from '../styles.module.css' -import styles from './styles.module.css' +import * as sharedStyles from '../styles.module.css' +import * as styles from './styles.module.css' interface ITutorialsLinksProps { compact?: boolean diff --git a/src/components/Documentation/WithJSX/AutoLinkElement/index.tsx b/src/components/Documentation/WithJSX/AutoLinkElement/index.tsx index f23c946855..d6f3b2fc2b 100644 --- a/src/components/Documentation/WithJSX/AutoLinkElement/index.tsx +++ b/src/components/Documentation/WithJSX/AutoLinkElement/index.tsx @@ -1,8 +1,8 @@ import React from 'react' -import styles from './styles.module.css' +import * as styles from './styles.module.css' interface IElementProps { - attributes?: object + attributes?: Record el?: 'h1' | 'h2' | 'h3' | 'h4' | 'h5' | 'h6' | 'span' } @@ -28,7 +28,7 @@ const Element: React.FC = ({ children, el, attributes }) => { interface IAutoLinkHeaderProps { id: string el?: 'h1' | 'h2' | 'h3' | 'h4' | 'h5' | 'h6' | 'span' - anchorStyle?: object + anchorStyle?: Record } const AutoLinkElement: React.FC = ({ diff --git a/src/components/Documentation/index.tsx b/src/components/Documentation/index.tsx index 88c2cf480f..a2d9c3e175 100644 --- a/src/components/Documentation/index.tsx +++ b/src/components/Documentation/index.tsx @@ -1,4 +1,5 @@ import React from 'react' +import { Node } from 'unist' import Markdown from './Markdown' import RightPanel from './RightPanel' @@ -16,7 +17,7 @@ export const getGithubLink = (source: string): string => interface IDocumentationProps { path: string headings: Array - htmlAst: object + htmlAst: Node } const Documentation: React.FC = ({ diff --git a/src/components/DownloadButton/index.tsx b/src/components/DownloadButton/index.tsx index 9916965e11..d2472d71f7 100644 --- a/src/components/DownloadButton/index.tsx +++ b/src/components/DownloadButton/index.tsx @@ -7,9 +7,9 @@ import Link from '../Link' import isClient from '../../utils/front/isClient' import { logEvent } from '../../utils/front/ga' -import styles from './styles.module.css' +import * as styles from './styles.module.css' -const VERSION = `2.7.4` +const VERSION = `2.8.1` enum OS { UNKNOWN = '...', @@ -78,44 +78,44 @@ const getUserOS = (): OS => { return OSName } -const DownloadButtonDropdownItems: React.FC = ({ - onClick, - userOS -}) => { - return ( -
    - {dropdownItems.map((os, index) => { - if (os === null) { +const DownloadButtonDropdownItems: React.FC = + ({ onClick, userOS }) => { + return ( +
    + {dropdownItems.map((os, index) => { + if (os === null) { + return ( +
    + ) + } + + const item = itemsByOs[os] + return ( -
    + onClick(os)} + > + {item.title} + ) - } - - const item = itemsByOs[os] - - return ( - onClick(os)} - > - {item.title} - - ) - })} -
    - ) -} + })} +
    + ) + } const DownloadButton: React.FC = ({ openTop }) => { const userOS = useRef(getUserOS()) diff --git a/src/components/Features/index.tsx b/src/components/Features/index.tsx index 51758a796d..b597839986 100644 --- a/src/components/Features/index.tsx +++ b/src/components/Features/index.tsx @@ -6,7 +6,7 @@ import HeroSection from '../HeroSection' import Link from '../Link' import PromoSection from '../PromoSection' -import styles from './styles.module.css' +import * as styles from './styles.module.css' const FeaturesPage: React.FC = () => ( <> diff --git a/src/components/HamburgerIcon/index.tsx b/src/components/HamburgerIcon/index.tsx index 005e61dc31..caedcea2ca 100644 --- a/src/components/HamburgerIcon/index.tsx +++ b/src/components/HamburgerIcon/index.tsx @@ -1,7 +1,7 @@ import cn from 'classnames' import React from 'react' -import styles from './styles.module.css' +import * as styles from './styles.module.css' interface IHamburgerProps { opened?: boolean diff --git a/src/components/HamburgerMenu/index.tsx b/src/components/HamburgerMenu/index.tsx index d1e60b4abd..3b1ec70332 100644 --- a/src/components/HamburgerMenu/index.tsx +++ b/src/components/HamburgerMenu/index.tsx @@ -10,7 +10,7 @@ import { ReactComponent as LogoSVG } from '../../../static/img/logo-white.svg' import { ReactComponent as TwitterIcon } from '../SocialIcon/twitter.svg' import { ReactComponent as GithubIcon } from '../SocialIcon/github.svg' -import styles from './styles.module.css' +import * as styles from './styles.module.css' const docsPage = getFirstPage() @@ -245,7 +245,7 @@ export const HamburgerMenu: React.FC< > Studio logo Studio @@ -255,7 +255,7 @@ export const HamburgerMenu: React.FC< DVC logo DVC @@ -265,7 +265,7 @@ export const HamburgerMenu: React.FC< CML logo CML diff --git a/src/components/HeroSection/index.tsx b/src/components/HeroSection/index.tsx index 29aa072ab3..0fa5bf1b0d 100644 --- a/src/components/HeroSection/index.tsx +++ b/src/components/HeroSection/index.tsx @@ -3,7 +3,7 @@ import cn from 'classnames' import LayoutWidthContainer from '../LayoutWidthContainer' -import styles from './styles.module.css' +import * as styles from './styles.module.css' interface IHeroSectionProps { className?: string diff --git a/src/components/Home/Diagram/index.tsx b/src/components/Home/Diagram/index.tsx index 6f631cd8cf..bf180dfb59 100644 --- a/src/components/Home/Diagram/index.tsx +++ b/src/components/Home/Diagram/index.tsx @@ -8,7 +8,7 @@ import Link from '../../Link' import 'slick-carousel/slick/slick.css' import 'slick-carousel/slick/slick-theme.css' -import styles from './styles.module.css' +import * as styles from './styles.module.css' const LearnMore: React.FC<{ href: string }> = ({ href }) => (
    diff --git a/src/components/Home/LandingHero/GithubLine/index.tsx b/src/components/Home/LandingHero/GithubLine/index.tsx index a6d4d48e7c..947596de5a 100644 --- a/src/components/Home/LandingHero/GithubLine/index.tsx +++ b/src/components/Home/LandingHero/GithubLine/index.tsx @@ -2,7 +2,7 @@ import React from 'react' import Link from '../../../Link' import useStars from '../../../../gatsby/hooks/stars' -import styles from './styles.module.css' +import * as styles from './styles.module.css' const GithubLine: React.FC = () => { const stars = useStars() @@ -15,7 +15,9 @@ const GithubLine: React.FC = () => { GitHub {stars && ( - + {stars} diff --git a/src/components/Home/LandingHero/index.tsx b/src/components/Home/LandingHero/index.tsx index 29dcf67b27..6372db4582 100644 --- a/src/components/Home/LandingHero/index.tsx +++ b/src/components/Home/LandingHero/index.tsx @@ -9,7 +9,7 @@ import GithubLine from './GithubLine' import { scrollIntoLayout, ease } from '../../../utils/front/scroll' import { logEvent } from '../../../utils/front/ga' -import styles from './styles.module.css' +import * as styles from './styles.module.css' interface ILandingHeroProps { scrollToRef: React.RefObject diff --git a/src/components/Home/LearnMore/index.tsx b/src/components/Home/LearnMore/index.tsx index 2760190d81..e02e70d07b 100644 --- a/src/components/Home/LearnMore/index.tsx +++ b/src/components/Home/LearnMore/index.tsx @@ -3,7 +3,7 @@ import React, { useCallback } from 'react' import { logEvent } from '../../../utils/front/ga' import { scrollIntoLayout, ease } from '../../../utils/front/scroll' -import styles from './styles.module.css' +import * as styles from './styles.module.css' interface ILearnMoreProps { scrollToRef: React.RefObject diff --git a/src/components/Home/UseCases/CollapsibleText/index.tsx b/src/components/Home/UseCases/CollapsibleText/index.tsx index 0e11146e9d..b91c2eaa01 100644 --- a/src/components/Home/UseCases/CollapsibleText/index.tsx +++ b/src/components/Home/UseCases/CollapsibleText/index.tsx @@ -3,7 +3,7 @@ import { Collapse } from 'react-collapse' import { isTriggeredFromKB } from '../../../../utils/front/keyboard' -import styles from './styles.module.css' +import * as styles from './styles.module.css' interface ICollapsibleTextProps { children: React.ReactNode diff --git a/src/components/Home/UseCases/Video/index.tsx b/src/components/Home/UseCases/Video/index.tsx index b9d85b2e09..250709e995 100644 --- a/src/components/Home/UseCases/Video/index.tsx +++ b/src/components/Home/UseCases/Video/index.tsx @@ -3,7 +3,7 @@ import React, { useState, useCallback } from 'react' import TwoRowsButton from '../../../TwoRowsButton' import { logEvent } from '../../../../utils/front/ga' -import styles from './styles.module.css' +import * as styles from './styles.module.css' const Video: React.FC<{ id: string }> = ({ id }) => { const [isWatching, setWatching] = useState(false) diff --git a/src/components/Home/UseCases/index.tsx b/src/components/Home/UseCases/index.tsx index 2d6f9ea4d7..37fb4968c7 100644 --- a/src/components/Home/UseCases/index.tsx +++ b/src/components/Home/UseCases/index.tsx @@ -5,7 +5,7 @@ import CollapsibleText from './CollapsibleText' import LayoutWidthContainer from '../../LayoutWidthContainer' import ShowOnly from '../../ShowOnly' -import styles from './styles.module.css' +import * as styles from './styles.module.css' const Heading1: React.FC = () => (
    @@ -86,15 +86,15 @@ const UseCases: React.ForwardRefRenderFunction = (_, ref) => {
    -
    +
    -
    +
    -
    +
    @@ -102,18 +102,20 @@ const UseCases: React.ForwardRefRenderFunction = (_, ref) => { -
    -
    +
    +
    }>
    -
    +
    }>
    -
    +
    }> diff --git a/src/components/Home/UseCases/styles.module.css b/src/components/Home/UseCases/styles.module.css index 46f6baff0b..6600ecc106 100644 --- a/src/components/Home/UseCases/styles.module.css +++ b/src/components/Home/UseCases/styles.module.css @@ -53,7 +53,7 @@ margin-top: 15px; } -.case { +.useCase { margin-bottom: 18px; } diff --git a/src/components/Home/index.tsx b/src/components/Home/index.tsx index a44941a874..b00b4c42df 100644 --- a/src/components/Home/index.tsx +++ b/src/components/Home/index.tsx @@ -10,7 +10,7 @@ import Diagram from './Diagram' import UseCases from './UseCases' import { logEvent } from '../../utils/front/ga' -import styles from './styles.module.css' +import * as styles from './styles.module.css' const Home: React.FC = () => { const diagramSectionRef = useRef(null) diff --git a/src/components/LayoutFooter/index.tsx b/src/components/LayoutFooter/index.tsx index 3f352cc01e..e78cfd09a9 100644 --- a/src/components/LayoutFooter/index.tsx +++ b/src/components/LayoutFooter/index.tsx @@ -7,15 +7,15 @@ import SocialIcon, { ISocialIcon } from '../SocialIcon' import ShowOnly from '../ShowOnly' import { getFirstPage } from '../../utils/shared/sidebar' -import { ReactComponent as LogoSVG } from '../../../static/img/logo.svg' +import { ReactComponent as LogoSVG } from '../../../static/img/dvc_icon-color--square_vector.svg' import { ReactComponent as GithubSVG } from '../SocialIcon/github.svg' import { ReactComponent as TwitterSVG } from '../SocialIcon/twitter.svg' import { ReactComponent as DiscordSVG } from '../SocialIcon/discord.svg' -import { ReactComponent as CmlSVG } from '../../../static/img/cml-icon.svg' -import { ReactComponent as StudioSVG } from '../../../static/img/studio-icon.svg' -import { ReactComponent as IterativeSVG } from '../../../static/img/iterative-icon.svg' +import { ReactComponent as CmlSVG } from '../../../static/img/cml_icon-color--square_vector.svg' +import { ReactComponent as StudioSVG } from '../../../static/img/studio_icon-color--square_vector.svg' +import { ReactComponent as IterativeSVG } from '../../../static/img/iterative_icon-color--square_vector.svg' -import styles from './styles.module.css' +import * as styles from './styles.module.css' const docsPage = getFirstPage() @@ -91,6 +91,10 @@ const footerListsData: Array = [ href: 'https://iterative.ai/about#career', text: 'Career', target: '_blank' + }, + { + href: 'https://iterative.ai/brand', + text: 'Media Kit' } ] }, @@ -153,7 +157,10 @@ const FooterLists: React.FC = () => (

    {header}

      {links.map(({ text, target, href, icon }, i) => ( -
    • +
    • {icon} {text} @@ -183,7 +190,9 @@ const FooterSocialIcons: React.FC = () => ( const LayoutFooter: React.FC = () => (