From f1424133e16e59def079f7dca48b5d8146e8919a Mon Sep 17 00:00:00 2001 From: Emre Sahin Date: Wed, 10 Mar 2021 13:39:50 +0300 Subject: [PATCH] merged step1 and intro and other fixes in #29 --- ...ation.md => 01-manual-data-preparation.md} | 0 get-started/stages/01-whats-a-stage.md | 18 ------------ ...adding-a-stage.md => 02-adding-a-stage.md} | 0 ...nning-a-stage.md => 03-running-a-stage.md} | 0 ...-stages.md => 04-how-dvc-tracks-stages.md} | 0 ...ed.md => 05-how-directories-are-cached.md} | 0 ...stage.md => 06-add-featurization-stage.md} | 0 ...pipeline.md => 07-reproduce-a-pipeline.md} | 0 ...peline.md => 08-visualize-the-pipeline.md} | 0 .../stages/{10-ending.md => 09-ending.md} | 0 get-started/stages/index.json | 22 ++++++--------- get-started/stages/intro.md | 28 +++++++++++++------ 12 files changed, 28 insertions(+), 40 deletions(-) rename get-started/stages/{02-manual-data-preparation.md => 01-manual-data-preparation.md} (100%) delete mode 100644 get-started/stages/01-whats-a-stage.md rename get-started/stages/{03-adding-a-stage.md => 02-adding-a-stage.md} (100%) rename get-started/stages/{04-running-a-stage.md => 03-running-a-stage.md} (100%) rename get-started/stages/{05-how-dvc-tracks-stages.md => 04-how-dvc-tracks-stages.md} (100%) rename get-started/stages/{06-how-directories-are-cached.md => 05-how-directories-are-cached.md} (100%) rename get-started/stages/{07-add-featurization-stage.md => 06-add-featurization-stage.md} (100%) rename get-started/stages/{08-reproduce-a-pipeline.md => 07-reproduce-a-pipeline.md} (100%) rename get-started/stages/{09-visualize-the-pipeline.md => 08-visualize-the-pipeline.md} (100%) rename get-started/stages/{10-ending.md => 09-ending.md} (100%) diff --git a/get-started/stages/02-manual-data-preparation.md b/get-started/stages/01-manual-data-preparation.md similarity index 100% rename from get-started/stages/02-manual-data-preparation.md rename to get-started/stages/01-manual-data-preparation.md diff --git a/get-started/stages/01-whats-a-stage.md b/get-started/stages/01-whats-a-stage.md deleted file mode 100644 index 0ad2a39..0000000 --- a/get-started/stages/01-whats-a-stage.md +++ /dev/null @@ -1,18 +0,0 @@ -# What's a stage? - -[Stages][bcstage] are the basic building blocks of pipelines in DVC. They define -and execute an action, like data import or feature extraction, and usually -produce some output. In this scenario, we create stages and pipelines for a -machine learning project. - -[bcstage]: https://dvc.org/doc/user-guide/basic-concepts/stage - -We have a machine learning project already provided in `~/project`. We covered -these steps in previous scenarios. DVC is installed. Data is downloaded from -`https://github.com/iterative/dataset-registry` and made smaller. A _local -remote_ is created in `/tmp/data-storage` named `mystorage`, and the data in the -DVC repository is pushed. Code and python requirements are prepared, and all -changes are committed to Git. - -You can use the editor to browse the project. - diff --git a/get-started/stages/03-adding-a-stage.md b/get-started/stages/02-adding-a-stage.md similarity index 100% rename from get-started/stages/03-adding-a-stage.md rename to get-started/stages/02-adding-a-stage.md diff --git a/get-started/stages/04-running-a-stage.md b/get-started/stages/03-running-a-stage.md similarity index 100% rename from get-started/stages/04-running-a-stage.md rename to get-started/stages/03-running-a-stage.md diff --git a/get-started/stages/05-how-dvc-tracks-stages.md b/get-started/stages/04-how-dvc-tracks-stages.md similarity index 100% rename from get-started/stages/05-how-dvc-tracks-stages.md rename to get-started/stages/04-how-dvc-tracks-stages.md diff --git a/get-started/stages/06-how-directories-are-cached.md b/get-started/stages/05-how-directories-are-cached.md similarity index 100% rename from get-started/stages/06-how-directories-are-cached.md rename to get-started/stages/05-how-directories-are-cached.md diff --git a/get-started/stages/07-add-featurization-stage.md b/get-started/stages/06-add-featurization-stage.md similarity index 100% rename from get-started/stages/07-add-featurization-stage.md rename to get-started/stages/06-add-featurization-stage.md diff --git a/get-started/stages/08-reproduce-a-pipeline.md b/get-started/stages/07-reproduce-a-pipeline.md similarity index 100% rename from get-started/stages/08-reproduce-a-pipeline.md rename to get-started/stages/07-reproduce-a-pipeline.md diff --git a/get-started/stages/09-visualize-the-pipeline.md b/get-started/stages/08-visualize-the-pipeline.md similarity index 100% rename from get-started/stages/09-visualize-the-pipeline.md rename to get-started/stages/08-visualize-the-pipeline.md diff --git a/get-started/stages/10-ending.md b/get-started/stages/09-ending.md similarity index 100% rename from get-started/stages/10-ending.md rename to get-started/stages/09-ending.md diff --git a/get-started/stages/index.json b/get-started/stages/index.json index 82b4b6c..902569a 100644 --- a/get-started/stages/index.json +++ b/get-started/stages/index.json @@ -7,43 +7,39 @@ "steps": [ { "title": "Step 1", - "text": "01-whats-a-stage.md" + "text": "01-manual-data-preparation.md" }, { "title": "Step 2", - "text": "02-manual-data-preparation.md" + "text": "02-adding-a-stage.md" }, { "title": "Step 3", - "text": "03-adding-a-stage.md" + "text": "03-running-a-stage.md" }, { "title": "Step 4", - "text": "04-running-a-stage.md" + "text": "04-how-dvc-tracks-stages.md" }, { "title": "Step 5", - "text": "05-how-dvc-tracks-stages.md" + "text": "05-how-directories-are-cached.md" }, { "title": "Step 6", - "text": "06-how-directories-are-cached.md" + "text": "06-add-featurization-stage.md" }, { "title": "Step 7", - "text": "07-add-featurization-stage.md" + "text": "07-reproduce-a-pipeline.md" }, { "title": "Step 8", - "text": "08-reproduce-a-pipeline.md" - }, - { - "title": "Step 9", - "text": "09-visualize-the-pipeline.md" + "text": "08-visualize-the-pipeline.md" }, { "title": "Congratulations!", - "text": "10-ending.md" + "text": "09-ending.md" } ], "intro": { diff --git a/get-started/stages/intro.md b/get-started/stages/intro.md index b33fe5b..1d82f44 100644 --- a/get-started/stages/intro.md +++ b/get-started/stages/intro.md @@ -1,17 +1,27 @@ -The commands that we have seen so far (`add`, `push`, `pull`, etc.) provide a -useful framework to track, save, and share models and large data files. In some -cases and projects, this could be all you need. - -Usually, in ML projects, you need to process data and generate outputs in a +In ML projects, usually we need to process data and generate outputs in a reproducible way. This requires establishing a connection between the data -processed, the program that processes them, its parameters and the outputs. - -In a typical machine learning project we have the following stages: +processed, the program that processes them, its parameters, and the outputs. ![](/dvc/courses/get-started/stages/assets/example-flow.png) This process is reflected in DVC with a [data pipeline][bcpipeline]. In this -scenario we begin to build pipelines using stage definitions and connect them +scenario, we begin to build pipelines using stage definitions and connect them together. [bcpipeline]: https://dvc.org/doc/user-guide/basic-concepts/pipeline + +[Stages][bcstage] are the basic building blocks of pipelines in DVC. They define +and execute an action, like data import or feature extraction, and usually +produce some output. + +[bcstage]: https://dvc.org/doc/user-guide/basic-concepts/stage + +We have a machine learning project already provided in `~/project`. We provided +source files in `~/project/src/`, downloaded data to `data/data.xml`, and made +it smaller. You can review these steps in more detail in [Data and Model +Versioning][v] and [Accessing Data and Models][a] scenarios. + +[v]: https://katacoda.com/dvc/courses/get-started/versioning +[a]: https://katacoda.com/dvc/courses/get-started/accessing + +You can use the editor to browse the project.