From d56f2d2e5aa8cddd65c0272f2bbee63c5a7537af Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 8 Jan 2020 14:31:34 -0600 Subject: [PATCH 001/100] api: create index and structure for #463 --- public/static/docs/api-reference/get_url.md | 13 +++++++++++++ public/static/docs/api-reference/index.md | 18 ++++++++++++++++++ public/static/docs/api-reference/open.md | 17 +++++++++++++++++ public/static/docs/api-reference/read.md | 17 +++++++++++++++++ public/static/docs/sidebar.json | 19 +++++++++++++++++++ 5 files changed, 84 insertions(+) create mode 100644 public/static/docs/api-reference/get_url.md create mode 100644 public/static/docs/api-reference/index.md create mode 100644 public/static/docs/api-reference/open.md create mode 100644 public/static/docs/api-reference/read.md diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md new file mode 100644 index 0000000000..9b6797c07c --- /dev/null +++ b/public/static/docs/api-reference/get_url.md @@ -0,0 +1,13 @@ +# dvc.api.get_url() + +get_url(path, repo=None, rev=None, remote=None) - returns an url of an artifact. + +## Arguments + +path - a path to an artifact, relative to repo root + +repo - a path or git url of a repo + +rev - revision, i.e. a branch, a tag, a sha. This only works with an url in repo + +remote - a name of a remote to fetch artifact from/give url to diff --git a/public/static/docs/api-reference/index.md b/public/static/docs/api-reference/index.md new file mode 100644 index 0000000000..deb39d88cb --- /dev/null +++ b/public/static/docs/api-reference/index.md @@ -0,0 +1,18 @@ +# Python API + +When you [install](/doc/install) DVC in an environment, the `dvc` package +becomes available for usage in Python source code. While most of its code is the +internal implementation of the `dvc` command-line tool, we wrote the `dvc.api` +module to expose special user functions that we hope you may find useful! + +> Please don't hesitate in sending a feature request +> [on GitHub](https://github.com/iterative/dvc.org/issues) with ideas of other +> functions we could add to the Python API. + +This reference provides the details about our API functions, their purpose, +usage, and examples. Please note that they also have internal documentation +which you can see in the module's +[source code](https://github.com/iterative/dvc/blob/master/dvc/api.py). + +Please choose from the navigation sidebar to the left, or click the `Next` +button below ↘ diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md new file mode 100644 index 0000000000..323b7359bf --- /dev/null +++ b/public/static/docs/api-reference/open.md @@ -0,0 +1,17 @@ +# dvc.api.open() + +open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - opens an +artifact as a file, may only be used as context manager: + +## Arguments + +path - a path to an artifact, relative to repo root + +repo - a path or git url of a repo + +rev - revision, i.e. a branch, a tag, a sha. This only works with an url in repo + +remote - a name of a remote to fetch artifact from/give url to mode - a mode +with which we open a file, the only sensible options are r/rt and rb + +encoding - an encoding used to decode contents to a string diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md new file mode 100644 index 0000000000..9c5d9a76de --- /dev/null +++ b/public/static/docs/api-reference/read.md @@ -0,0 +1,17 @@ +# dvc.api.read() + +read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - returns +the contents of an artifact as a bytes object or a string. + +## Arguments + +path - a path to an artifact, relative to repo root + +repo - a path or git url of a repo + +rev - revision, i.e. a branch, a tag, a sha. This only works with an url in repo + +remote - a name of a remote to fetch artifact from/give url to mode - a mode +with which we open a file, the only sensible options are r/rt and rb + +encoding - an encoding used to decode contents to a string diff --git a/public/static/docs/sidebar.json b/public/static/docs/sidebar.json index c253134e83..777d93e4fb 100644 --- a/public/static/docs/sidebar.json +++ b/public/static/docs/sidebar.json @@ -354,6 +354,25 @@ } ] }, + { + "slug": "api-reference", + "label": "Python API Reference", + "source": "api-reference/index.md", + "children": [ + { + "slug": "read", + "label": "read()" + }, + { + "slug": "get_url", + "label": "get_url()" + }, + { + "slug": "open", + "label": "open()" + } + ] + }, { "slug": "understanding-dvc", "label": "Understanding DVC", From d53833f410377f9e920ab0eb40f716f8eedabc2a Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 8 Jan 2020 15:27:48 -0600 Subject: [PATCH 002/100] api ref: add summon page, improve format of all pages --- public/static/docs/api-reference/get_url.md | 19 +++++++++++------ public/static/docs/api-reference/open.md | 23 +++++++++++++-------- public/static/docs/api-reference/read.md | 23 +++++++++++++-------- public/static/docs/api-reference/summon.md | 20 ++++++++++++++++++ public/static/docs/sidebar.json | 4 ++++ 5 files changed, 65 insertions(+), 24 deletions(-) create mode 100644 public/static/docs/api-reference/summon.md diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 9b6797c07c..d1ec3281fe 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,13 +1,20 @@ # dvc.api.get_url() -get_url(path, repo=None, rev=None, remote=None) - returns an url of an artifact. +Returns an url of an artifact. -## Arguments +## Signature -path - a path to an artifact, relative to repo root +```py +get_url(path, repo=None, rev=None, remote=None) +``` -repo - a path or git url of a repo +## Parameters -rev - revision, i.e. a branch, a tag, a sha. This only works with an url in repo +- `path` - a path to an artifact, relative to repo root -remote - a name of a remote to fetch artifact from/give url to +- `repo` - a path or git url of a repo + +- `rev` - revision, i.e. a branch, a tag, a SHA. This only works with an url in + repo + +- `remote` - a name of a remote to fetch artifact from/give url to diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 323b7359bf..6e10795961 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -1,17 +1,22 @@ # dvc.api.open() -open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - opens an -artifact as a file, may only be used as context manager: +Opens an artifact as a file, may only be used as context manager -## Arguments +## Signature -path - a path to an artifact, relative to repo root +```py +open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) +``` -repo - a path or git url of a repo +## Parameters -rev - revision, i.e. a branch, a tag, a sha. This only works with an url in repo +- `path` - a path to an artifact, relative to repo root -remote - a name of a remote to fetch artifact from/give url to mode - a mode -with which we open a file, the only sensible options are r/rt and rb +- `repo` - a path or git url of a repo -encoding - an encoding used to decode contents to a string +- `rev` - revision, i.e. a branch, a tag, a SHA. This only works with an url in + repo + +- `remote` - a name of a remote to fetch artifact from/give url to + +- `encoding` - an encoding used to decode contents to a string diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 9c5d9a76de..e5dab42d1b 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -1,17 +1,22 @@ # dvc.api.read() -read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - returns -the contents of an artifact as a bytes object or a string. +Returns the contents of an artifact as a bytes object or a string. -## Arguments +## Signature -path - a path to an artifact, relative to repo root +```py +read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) +``` -repo - a path or git url of a repo +## Parameters -rev - revision, i.e. a branch, a tag, a sha. This only works with an url in repo +- `path` - a path to an artifact, relative to repo root -remote - a name of a remote to fetch artifact from/give url to mode - a mode -with which we open a file, the only sensible options are r/rt and rb +- `repo` - a path or git url of a repo -encoding - an encoding used to decode contents to a string +- `rev` - revision, i.e. a branch, a tag, a SHA. This only works with an url in + repo + +- `remote` - a name of a remote to fetch artifact from/give url to + +- `encoding` - an encoding used to decode contents to a string diff --git a/public/static/docs/api-reference/summon.md b/public/static/docs/api-reference/summon.md new file mode 100644 index 0000000000..af438c51f7 --- /dev/null +++ b/public/static/docs/api-reference/summon.md @@ -0,0 +1,20 @@ +# dvc.api.summon() + +Instantiate an object described in the summon file. + +## Signature + +```py +def summon(name, repo=None, rev=None, summon_file="dvcsummon.yaml", args=None) +``` + +## Parameters + +- `name` - object to summon + +- `repo` - a path or git url of a repo + +- `rev` - revision, i.e. a branch, a tag, a SHA. This only works with an url in + repo + +- `summon_file` - DVC summon configuration file diff --git a/public/static/docs/sidebar.json b/public/static/docs/sidebar.json index 777d93e4fb..dbd6d6bdd1 100644 --- a/public/static/docs/sidebar.json +++ b/public/static/docs/sidebar.json @@ -370,6 +370,10 @@ { "slug": "open", "label": "open()" + }, + { + "slug": "summon", + "label": "summon()" } ] }, From aba4954521a41bc05369db6aba0277d52d201ef4 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 8 Jan 2020 15:33:55 -0600 Subject: [PATCH 003/100] api ref: complete pages per initial desc. per https://github.com/iterative/dvc.org/issues/463#issuecomment-514139792 --- public/static/docs/api-reference/open.md | 16 ++++++++++++++-- public/static/docs/api-reference/read.md | 5 ++++- public/static/docs/api-reference/summon.md | 10 +++++++++- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 6e10795961..de4d379c5b 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -1,6 +1,6 @@ # dvc.api.open() -Opens an artifact as a file, may only be used as context manager +Opens an artifact as a file. May only be used as context manager. ## Signature @@ -19,4 +19,16 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - `remote` - a name of a remote to fetch artifact from/give url to -- `encoding` - an encoding used to decode contents to a string +- `mode` - Mirrors their namesake builtin `open()` has. + +- `encoding` - an encoding used to decode contents to a string. Mirrors their + namesake builtin `open()` has. + +## Example: `open` as a context manager + +```py +with dvc.api.open("path/to/data.csv", remote="my-s3", encoding="utf-8") as f: + for line in f: + process(line) + ... +``` diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index e5dab42d1b..19b5c1097d 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -19,4 +19,7 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - `remote` - a name of a remote to fetch artifact from/give url to -- `encoding` - an encoding used to decode contents to a string +- `mode` - Mirrors their namesake builtin `open()` has. + +- `encoding` - an encoding used to decode contents to a string. Mirrors their + namesake builtin `open()` has. diff --git a/public/static/docs/api-reference/summon.md b/public/static/docs/api-reference/summon.md index af438c51f7..ebf28f9339 100644 --- a/public/static/docs/api-reference/summon.md +++ b/public/static/docs/api-reference/summon.md @@ -5,7 +5,13 @@ Instantiate an object described in the summon file. ## Signature ```py -def summon(name, repo=None, rev=None, summon_file="dvcsummon.yaml", args=None) +def summon( + name, + repo=None, + rev=None, + summon_file="dvcsummon.yaml", + args=None +) ``` ## Parameters @@ -18,3 +24,5 @@ def summon(name, repo=None, rev=None, summon_file="dvcsummon.yaml", args=None) repo - `summon_file` - DVC summon configuration file + +- `args` - other arguments From 0f5a3bb5dca690fe82c4f4ae3fdc775aa8ed0465 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 8 Jan 2020 22:46:49 -0600 Subject: [PATCH 004/100] api: better formatting, added links, and examples per https://github.com/iterative/dvc.org/issues/463#issuecomment-515917973 --- public/static/docs/api-reference/get_url.md | 23 ++++++++--- public/static/docs/api-reference/index.md | 13 ++++-- public/static/docs/api-reference/open.md | 44 ++++++++++++++++----- public/static/docs/api-reference/read.md | 35 +++++++++++----- public/static/docs/api-reference/summon.md | 13 +++--- 5 files changed, 95 insertions(+), 33 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index d1ec3281fe..4ba6ccc837 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,6 +1,6 @@ # dvc.api.get_url() -Returns an url of an artifact. +Returns the URL of a data artifact. ## Signature @@ -10,11 +10,22 @@ get_url(path, repo=None, rev=None, remote=None) ## Parameters -- `path` - a path to an artifact, relative to repo root +- `path` - path to the target artifact relative to the repository's root -- `repo` - a path or git url of a repo +- `repo` - path or Git URL of a DVC repository -- `rev` - revision, i.e. a branch, a tag, a SHA. This only works with an url in - repo +- `rev` - + [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) + (such as a branch name, a tag, or a commit hash). This only works with `repo` + URLs. -- `remote` - a name of a remote to fetch artifact from/give url to +- `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) + to fetch the target artifact from + +## Example + +```py +import dvc.api + +resource_url = dvc.api.get_url("data/prepared.tsv", repo="https://github.com/my-org/my-repo.git", remote="my-s3") +``` diff --git a/public/static/docs/api-reference/index.md b/public/static/docs/api-reference/index.md index deb39d88cb..61862c1168 100644 --- a/public/static/docs/api-reference/index.md +++ b/public/static/docs/api-reference/index.md @@ -1,14 +1,21 @@ # Python API When you [install](/doc/install) DVC in an environment, the `dvc` package -becomes available for usage in Python source code. While most of its code is the -internal implementation of the `dvc` command-line tool, we wrote the `dvc.api` -module to expose special user functions that we hope you may find useful! +becomes available to the corresponding `python` interpreter. While most of the +package implements our [command-line tool](/doc/command-reference), we wrote the +`dvc.api` module to expose special functions you can use in your Python source +code. > Please don't hesitate in sending a feature request > [on GitHub](https://github.com/iterative/dvc.org/issues) with ideas of other > functions we could add to the Python API. +Import the API with: + +```py +import dvc.api +``` + This reference provides the details about our API functions, their purpose, usage, and examples. Please note that they also have internal documentation which you can see in the module's diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index de4d379c5b..b517ed82e0 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -1,6 +1,9 @@ # dvc.api.open() -Opens an artifact as a file. May only be used as context manager. +Opens a data artifact as a +[file object](https://docs.python.org/3.7/glossary.html#term-file-object). May +only be used as +[context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library). ## Signature @@ -10,25 +13,46 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) ## Parameters -- `path` - a path to an artifact, relative to repo root +- `path` - path to the target artifact relative to the repository's root -- `repo` - a path or git url of a repo +- `repo` - path or Git URL of a DVC repository -- `rev` - revision, i.e. a branch, a tag, a SHA. This only works with an url in - repo +- `rev` - + [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) + (such as a branch name, a tag, or a commit hash). This only works with `repo` + URLs. -- `remote` - a name of a remote to fetch artifact from/give url to +- `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) + to fetch the target artifact from -- `mode` - Mirrors their namesake builtin `open()` has. +- `mode` - (optional) mirrors the namesake parameter in builtin + [`open()`](https://docs.python.org/3.7/library/functions.html#open). Defaults + to "r" (read). -- `encoding` - an encoding used to decode contents to a string. Mirrors their - namesake builtin `open()` has. +- `encoding` - (optional) used to decode contents to a string. Mirrors the + namesake parameter in builtin `open()`. ## Example: `open` as a context manager +> See +> [PEP 343 -- The "with" Statement](https://www.python.org/dev/peps/pep-0343/) + +From a DVC remote: + ```py with dvc.api.open("path/to/data.csv", remote="my-s3", encoding="utf-8") as f: for line in f: process(line) - ... +``` + +From a DVC repository: + +```py +import csv +import dvc.api + +with dvc.api.open("dataset.csv", repo="https://github.com/my-org/my-repo.git") as f: + reader = csv.reader(f) + for row in reader: + # ... ``` diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 19b5c1097d..61532f0e7e 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -1,6 +1,7 @@ # dvc.api.read() -Returns the contents of an artifact as a bytes object or a string. +Returns the contents of a data artifact as a bytes object or as a +string. ## Signature @@ -10,16 +11,32 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) ## Parameters -- `path` - a path to an artifact, relative to repo root +- `path` - path to the target artifact relative to the repository's root -- `repo` - a path or git url of a repo +- `repo` - path or Git URL of a DVC repository -- `rev` - revision, i.e. a branch, a tag, a SHA. This only works with an url in - repo +- `rev` - + [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) + (such as a branch name, a tag, or a commit hash). This only works with `repo` + URLs. -- `remote` - a name of a remote to fetch artifact from/give url to +- `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) + to fetch the target artifact from -- `mode` - Mirrors their namesake builtin `open()` has. +- `mode` - (optional) mirrors the namesake parameter in builtin + [`open()`](https://docs.python.org/3.7/library/functions.html#open). Defaults + to "r" (read). -- `encoding` - an encoding used to decode contents to a string. Mirrors their - namesake builtin `open()` has. +- `encoding` - (optional) used to decode contents to a string. Mirrors the + namesake parameter in builtin `open()`. + +## Example: loading from content + +```py +import pickle +import dvc.api + +model = pickle.loads( + dvc.api.read("model.pkl", repo="https://github.com/my-org/my-repo.git") +) +``` diff --git a/public/static/docs/api-reference/summon.md b/public/static/docs/api-reference/summon.md index ebf28f9339..94fdbc2b5a 100644 --- a/public/static/docs/api-reference/summon.md +++ b/public/static/docs/api-reference/summon.md @@ -18,11 +18,14 @@ def summon( - `name` - object to summon -- `repo` - a path or git url of a repo +- `repo` - path or Git URL of a DVC repository -- `rev` - revision, i.e. a branch, a tag, a SHA. This only works with an url in - repo +- `rev` - + [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) + (such as a branch name, a tag, or a commit hash). This only works with `repo` + URLs. -- `summon_file` - DVC summon configuration file +- `summon_file` - summon file describing the object in question. Defaults to + `dvcsummon.yaml`. -- `args` - other arguments +- `args` - arguments to pass onto the object, if any From 701aeb7a26941a36511dc93d7fefc451ff13be38 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 8 Jan 2020 23:28:05 -0600 Subject: [PATCH 005/100] api ref: reorder pages, refine descriptions and options, add examples --- public/static/docs/api-reference/get_url.md | 5 +++-- public/static/docs/api-reference/open.md | 20 ++++++++++---------- public/static/docs/api-reference/read.md | 17 ++++++++++++----- public/static/docs/api-reference/summon.md | 6 +++--- public/static/docs/sidebar.json | 8 ++++---- 5 files changed, 32 insertions(+), 24 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 4ba6ccc837..aa4a5a3110 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,6 +1,7 @@ # dvc.api.get_url() -Returns the URL of a data artifact. +Returns the full URL to the data artifact specified by its `path` +in a `repo`. ## Signature @@ -14,7 +15,7 @@ get_url(path, repo=None, rev=None, remote=None) - `repo` - path or Git URL of a DVC repository -- `rev` - +- `rev` - (optional) [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). This only works with `repo` URLs. diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index b517ed82e0..63f9390c2e 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -1,6 +1,6 @@ # dvc.api.open() -Opens a data artifact as a +Opens a file artifact as a [file object](https://docs.python.org/3.7/glossary.html#term-file-object). May only be used as [context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library). @@ -13,11 +13,13 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) ## Parameters -- `path` - path to the target artifact relative to the repository's root +- `path` - + [path](https://docs.python.org/3.7/glossary.html#term-path-like-object) to the + target artifact relative to the repository's root - `repo` - path or Git URL of a DVC repository -- `rev` - +- `rev` - (optional) [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). This only works with `repo` URLs. @@ -27,31 +29,29 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - `mode` - (optional) mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3.7/library/functions.html#open). Defaults - to "r" (read). + to `"r"` (read). - `encoding` - (optional) used to decode contents to a string. Mirrors the namesake parameter in builtin `open()`. -## Example: `open` as a context manager +## Example: open from a DVC remote > See > [PEP 343 -- The "with" Statement](https://www.python.org/dev/peps/pep-0343/) -From a DVC remote: - ```py -with dvc.api.open("path/to/data.csv", remote="my-s3", encoding="utf-8") as f: +with dvc.api.open("data/raw.csv", remote="my-s3", encoding="utf-8") as f: for line in f: process(line) ``` -From a DVC repository: +## Example: open from a DVC repository ```py import csv import dvc.api -with dvc.api.open("dataset.csv", repo="https://github.com/my-org/my-repo.git") as f: +with dvc.api.open("dataset/", repo="https://github.com/my-org/my-repo.git") as f: reader = csv.reader(f) for row in reader: # ... diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 61532f0e7e..0ca556ada3 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -1,8 +1,13 @@ # dvc.api.read() -Returns the contents of a data artifact as a bytes object or as a +Returns the contents of a file artifact as a bytes object or as a string. +> Wrapper for [`dvc.api.open()`](/doc/api-reference/open) that returns the +> results of the file object's +> [`read()`](https://docs.python.org/3.7/tutorial/inputoutput.html#methods-of-file-objects) +> method. + ## Signature ```py @@ -11,11 +16,13 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) ## Parameters -- `path` - path to the target artifact relative to the repository's root +- `path` - + [path](https://docs.python.org/3.7/glossary.html#term-path-like-object) to the + target artifact relative to the repository's root - `repo` - path or Git URL of a DVC repository -- `rev` - +- `rev` - (optional) [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). This only works with `repo` URLs. @@ -25,12 +32,12 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - `mode` - (optional) mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3.7/library/functions.html#open). Defaults - to "r" (read). + to `"r"` (read). - `encoding` - (optional) used to decode contents to a string. Mirrors the namesake parameter in builtin `open()`. -## Example: loading from content +## Example ```py import pickle diff --git a/public/static/docs/api-reference/summon.md b/public/static/docs/api-reference/summon.md index 94fdbc2b5a..a04a1291f6 100644 --- a/public/static/docs/api-reference/summon.md +++ b/public/static/docs/api-reference/summon.md @@ -1,6 +1,6 @@ # dvc.api.summon() -Instantiate an object described in the summon file. +Instantiate an object, described in a _summon file_. ## Signature @@ -20,12 +20,12 @@ def summon( - `repo` - path or Git URL of a DVC repository -- `rev` - +- `rev` - (optional) [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). This only works with `repo` URLs. -- `summon_file` - summon file describing the object in question. Defaults to +- `summon_file` - YAML file describing the object in question. Defaults to `dvcsummon.yaml`. - `args` - arguments to pass onto the object, if any diff --git a/public/static/docs/sidebar.json b/public/static/docs/sidebar.json index dbd6d6bdd1..8999861e7c 100644 --- a/public/static/docs/sidebar.json +++ b/public/static/docs/sidebar.json @@ -359,10 +359,6 @@ "label": "Python API Reference", "source": "api-reference/index.md", "children": [ - { - "slug": "read", - "label": "read()" - }, { "slug": "get_url", "label": "get_url()" @@ -371,6 +367,10 @@ "slug": "open", "label": "open()" }, + { + "slug": "read", + "label": "read()" + }, { "slug": "summon", "label": "summon()" From 6a18d612dac1cbdb97769e4b09bc712752c19e2d Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 8 Jan 2020 23:33:35 -0600 Subject: [PATCH 006/100] api: note that env mgr install is required per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-340279299 --- public/static/docs/api-reference/index.md | 25 +++++++++++++---------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/public/static/docs/api-reference/index.md b/public/static/docs/api-reference/index.md index 61862c1168..30aaaced6b 100644 --- a/public/static/docs/api-reference/index.md +++ b/public/static/docs/api-reference/index.md @@ -1,25 +1,28 @@ # Python API -When you [install](/doc/install) DVC in an environment, the `dvc` package -becomes available to the corresponding `python` interpreter. While most of the -package implements our [command-line tool](/doc/command-reference), we wrote the -`dvc.api` module to expose special functions you can use in your Python source -code. +When you [install](/doc/install) DVC with an environment manager like `pip` or +`conda`, the `dvc` package becomes available to the corresponding `python` +interpreter. While most of the package implements our +[command-line tool](/doc/command-reference), we wrote the `dvc.api` module to +expose special functions you can use in your Python source code. -> Please don't hesitate in sending a feature request -> [on GitHub](https://github.com/iterative/dvc.org/issues) with ideas of other -> functions we could add to the Python API. +> We **strongly** recommend having `dvc` in requirements or setup file for your +> Python project, and installing it via and env manager such as `pip`. -Import the API with: +To import the API, use: ```py import dvc.api ``` This reference provides the details about our API functions, their purpose, -usage, and examples. Please note that they also have internal documentation -which you can see in the module's +usage, and examples. Please note that they also have inline documentation, which +you can see in the module's [source code](https://github.com/iterative/dvc/blob/master/dvc/api.py). +> Please don't hesitate in sending a feature request +> [on GitHub](https://github.com/iterative/dvc.org/issues) with ideas of other +> functions we could add to the Python API. + Please choose from the navigation sidebar to the left, or click the `Next` button below ↘ From a3023be2d70a61f814d67be9d58bbaf73dafad3d Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 8 Jan 2020 23:44:07 -0600 Subject: [PATCH 007/100] api ref: typo in index --- public/static/docs/api-reference/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/static/docs/api-reference/index.md b/public/static/docs/api-reference/index.md index 30aaaced6b..0e081559b5 100644 --- a/public/static/docs/api-reference/index.md +++ b/public/static/docs/api-reference/index.md @@ -6,8 +6,8 @@ interpreter. While most of the package implements our [command-line tool](/doc/command-reference), we wrote the `dvc.api` module to expose special functions you can use in your Python source code. -> We **strongly** recommend having `dvc` in requirements or setup file for your -> Python project, and installing it via and env manager such as `pip`. +> We **strongly** recommend having `dvc` in a requirements or setup file for +> your Python project, and installing it via and env manager such as `pip`. To import the API, use: From 09c76c5a8e252dea924f0ab2f8c570f04f40db25 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 9 Jan 2020 13:44:45 -0600 Subject: [PATCH 008/100] cmd ref: improve `path` description in get and import --- public/static/docs/command-reference/get.md | 6 +++--- public/static/docs/command-reference/import.md | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/public/static/docs/command-reference/get.md b/public/static/docs/command-reference/get.md index f8b4c693d7..77291d94d7 100644 --- a/public/static/docs/command-reference/get.md +++ b/public/static/docs/command-reference/get.md @@ -35,9 +35,9 @@ doesn't have a default remote set up, instead of downloading, DVC will try to copy the target data from the external source project or its cache). -The `path` argument of this command is used to specify the location, within the -source repository at `url`, of the target(s) to be downloaded. It can point to -any file or directory in the source project, including outputs +The `path` argument of this command is used to specify the location of the +target(s) to be downloaded within the source repository at `url`. It can point +to any file or directory in the source project, including outputs tracked by DVC as well as files tracked by Git. Note that for the former, data should be specified in one of the [DVC-files](/doc/user-guide/dvc-file-format) of the source repository. (In this case, a default diff --git a/public/static/docs/command-reference/import.md b/public/static/docs/command-reference/import.md index b63fc18b9d..04a49e2498 100644 --- a/public/static/docs/command-reference/import.md +++ b/public/static/docs/command-reference/import.md @@ -35,9 +35,9 @@ doesn't have a default remote set up, instead of downloading, DVC will try to copy the target data from the external source project or its cache). -The `path` argument of this command is used to specify the location, within the -source repository at `url`, of the target(s) to be downloaded. It can point to -any file or directory in the source project, including outputs +The `path` argument of this command is used to specify the location of the +target(s) to be downloaded within the source repository at `url`. It can point +to any file or directory in the source project, including outputs tracked by DVC as well as files tracked by Git. Note that for the former, data should be specified in one of the [DVC-files](/doc/user-guide/dvc-file-format) of the source repository. (In this case, a default From 8c14cbdacb7ad5af5bc3d7521ae6c1c8ff029702 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 9 Jan 2020 13:51:20 -0600 Subject: [PATCH 009/100] api: improve path/name and repo param descriptions --- public/static/docs/api-reference/get_url.md | 10 +++++++--- public/static/docs/api-reference/open.md | 10 ++++++---- public/static/docs/api-reference/read.md | 10 ++++++---- public/static/docs/api-reference/summon.md | 8 ++++++-- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index aa4a5a3110..29837ee286 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,7 +1,7 @@ # dvc.api.get_url() Returns the full URL to the data artifact specified by its `path` -in a `repo`. +in a `repo` (DVC project). ## Signature @@ -11,9 +11,13 @@ get_url(path, repo=None, rev=None, remote=None) ## Parameters -- `path` - path to the target artifact relative to the repository's root +- **`path`** - used to specify the location of the target artifact within the + source project in `repo`, relative to the project's root. -- `repo` - path or Git URL of a DVC repository +- `repo` - specifies the location of the source DVC project. Both HTTP and SSH + protocols are supported for online Git repositories (e.g. + `[user@]server:project.git`). `repo` can also be a local file system path to + an "offline" project. - `rev` - (optional) [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 63f9390c2e..1ff6605c71 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -13,11 +13,13 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) ## Parameters -- `path` - - [path](https://docs.python.org/3.7/glossary.html#term-path-like-object) to the - target artifact relative to the repository's root +- **`path`** - used to specify the location of the target artifact within the + source project in `repo`, relative to the project's root. -- `repo` - path or Git URL of a DVC repository +- `repo` - specifies the location of the source DVC project. Both HTTP and SSH + protocols are supported for online Git repositories (e.g. + `[user@]server:project.git`). `repo` can also be a local file system path to + an "offline" project. - `rev` - (optional) [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 0ca556ada3..fa49398cd6 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -16,11 +16,13 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) ## Parameters -- `path` - - [path](https://docs.python.org/3.7/glossary.html#term-path-like-object) to the - target artifact relative to the repository's root +- **`path`** - used to specify the location of the target artifact within the + source project in `repo`, relative to the project's root. -- `repo` - path or Git URL of a DVC repository +- `repo` - specifies the location of the source DVC project. Both HTTP and SSH + protocols are supported for online Git repositories (e.g. + `[user@]server:project.git`). `repo` can also be a local file system path to + an "offline" project. - `rev` - (optional) [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) diff --git a/public/static/docs/api-reference/summon.md b/public/static/docs/api-reference/summon.md index a04a1291f6..cbd499933a 100644 --- a/public/static/docs/api-reference/summon.md +++ b/public/static/docs/api-reference/summon.md @@ -16,9 +16,13 @@ def summon( ## Parameters -- `name` - object to summon +- **`name`** - object to summon within the source project in `repo`, as defined + in the `summon_file`. -- `repo` - path or Git URL of a DVC repository +- `repo` - specifies the location of the source DVC project. Both HTTP and SSH + protocols are supported for online Git repositories (e.g. + `[user@]server:project.git`). `repo` can also be a local file system path to + an "offline" project. - `rev` - (optional) [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) From 97c39725465dbc007bd3eaf13ace6459eb7b031c Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 9 Jan 2020 16:07:56 -0600 Subject: [PATCH 010/100] api: improve `repo` param desc --- public/static/docs/api-reference/get_url.md | 4 ++-- public/static/docs/api-reference/open.md | 4 ++-- public/static/docs/api-reference/read.md | 4 ++-- public/static/docs/api-reference/summon.md | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 29837ee286..4de51d53a4 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -21,8 +21,8 @@ get_url(path, repo=None, rev=None, remote=None) - `rev` - (optional) [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) - (such as a branch name, a tag, or a commit hash). This only works with `repo` - URLs. + (such as a branch name, a tag, or a commit hash). `rev` only has an effect + when a URL is supplied as parameter to `repo`. - `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) to fetch the target artifact from diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 1ff6605c71..e385739e97 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -23,8 +23,8 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - `rev` - (optional) [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) - (such as a branch name, a tag, or a commit hash). This only works with `repo` - URLs. + (such as a branch name, a tag, or a commit hash). `rev` only has an effect + when a URL is supplied as parameter to `repo`. - `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) to fetch the target artifact from diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index fa49398cd6..344467032d 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -26,8 +26,8 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - `rev` - (optional) [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) - (such as a branch name, a tag, or a commit hash). This only works with `repo` - URLs. + (such as a branch name, a tag, or a commit hash). `rev` only has an effect + when a URL is supplied as parameter to `repo`. - `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) to fetch the target artifact from diff --git a/public/static/docs/api-reference/summon.md b/public/static/docs/api-reference/summon.md index cbd499933a..608e31a4b6 100644 --- a/public/static/docs/api-reference/summon.md +++ b/public/static/docs/api-reference/summon.md @@ -26,8 +26,8 @@ def summon( - `rev` - (optional) [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) - (such as a branch name, a tag, or a commit hash). This only works with `repo` - URLs. + (such as a branch name, a tag, or a commit hash). `rev` only has an effect + when a URL is supplied as parameter to `repo`. - `summon_file` - YAML file describing the object in question. Defaults to `dvcsummon.yaml`. From 6894e77c1a2130e17c9f9bc00b6360093ad32e9c Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 9 Jan 2020 16:15:13 -0600 Subject: [PATCH 011/100] api: add assumed defaults to all params in all functions --- public/static/docs/api-reference/get_url.md | 11 +++++++---- public/static/docs/api-reference/open.md | 10 ++++++---- public/static/docs/api-reference/read.md | 10 ++++++---- public/static/docs/api-reference/summon.md | 5 +++-- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 4de51d53a4..c13d327552 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -17,15 +17,18 @@ get_url(path, repo=None, rev=None, remote=None) - `repo` - specifies the location of the source DVC project. Both HTTP and SSH protocols are supported for online Git repositories (e.g. `[user@]server:project.git`). `repo` can also be a local file system path to - an "offline" project. + an "offline" project. If not supplied, this defaults to the current working + directory. - `rev` - (optional) - [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) + [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). `rev` only has an effect - when a URL is supplied as parameter to `repo`. + when a URL is supplied as parameter to `repo`. If not supplied, it uses the + default Git revision, `HEAD`. - `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) - to fetch the target artifact from + to fetch the target artifact from. If not supplied, the default project's + remote is employed. ## Example diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index e385739e97..9259cb5b22 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -22,19 +22,21 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) an "offline" project. - `rev` - (optional) - [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) + [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). `rev` only has an effect - when a URL is supplied as parameter to `repo`. + when a URL is supplied as parameter to `repo`. If not supplied, it uses the + default Git revision, `HEAD`. - `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) - to fetch the target artifact from + to fetch the target artifact from. If not supplied, the default project's + remote is employed. - `mode` - (optional) mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3.7/library/functions.html#open). Defaults to `"r"` (read). - `encoding` - (optional) used to decode contents to a string. Mirrors the - namesake parameter in builtin `open()`. + namesake parameter in builtin `open()`. Defaults to `"utf-8"`. ## Example: open from a DVC remote diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 344467032d..532c19289a 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -25,19 +25,21 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) an "offline" project. - `rev` - (optional) - [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) + [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). `rev` only has an effect - when a URL is supplied as parameter to `repo`. + when a URL is supplied as parameter to `repo`. If not supplied, it uses the + default Git revision, `HEAD`. - `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) - to fetch the target artifact from + to fetch the target artifact from. If not supplied, the default project's + remote is employed. - `mode` - (optional) mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3.7/library/functions.html#open). Defaults to `"r"` (read). - `encoding` - (optional) used to decode contents to a string. Mirrors the - namesake parameter in builtin `open()`. + namesake parameter in builtin `open()`. Defaults to `"utf-8"`. ## Example diff --git a/public/static/docs/api-reference/summon.md b/public/static/docs/api-reference/summon.md index 608e31a4b6..3189442aac 100644 --- a/public/static/docs/api-reference/summon.md +++ b/public/static/docs/api-reference/summon.md @@ -25,9 +25,10 @@ def summon( an "offline" project. - `rev` - (optional) - [Git revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) + [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). `rev` only has an effect - when a URL is supplied as parameter to `repo`. + when a URL is supplied as parameter to `repo`. If not supplied, it uses the + default Git revision, `HEAD`. - `summon_file` - YAML file describing the object in question. Defaults to `dvcsummon.yaml`. From 711838d2697142e07e25456d9444a1cac0198ed2 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 9 Jan 2020 16:17:37 -0600 Subject: [PATCH 012/100] api: oops, forgot about `repo` param's default :B --- public/static/docs/api-reference/open.md | 3 ++- public/static/docs/api-reference/read.md | 3 ++- public/static/docs/api-reference/summon.md | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 9259cb5b22..fedbde8885 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -19,7 +19,8 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - `repo` - specifies the location of the source DVC project. Both HTTP and SSH protocols are supported for online Git repositories (e.g. `[user@]server:project.git`). `repo` can also be a local file system path to - an "offline" project. + an "offline" project. If not supplied, this defaults to the current working + directory. - `rev` - (optional) [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 532c19289a..5eacc63d24 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -22,7 +22,8 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - `repo` - specifies the location of the source DVC project. Both HTTP and SSH protocols are supported for online Git repositories (e.g. `[user@]server:project.git`). `repo` can also be a local file system path to - an "offline" project. + an "offline" project. If not supplied, this defaults to the current working + directory. - `rev` - (optional) [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) diff --git a/public/static/docs/api-reference/summon.md b/public/static/docs/api-reference/summon.md index 3189442aac..4f983e8133 100644 --- a/public/static/docs/api-reference/summon.md +++ b/public/static/docs/api-reference/summon.md @@ -22,7 +22,8 @@ def summon( - `repo` - specifies the location of the source DVC project. Both HTTP and SSH protocols are supported for online Git repositories (e.g. `[user@]server:project.git`). `repo` can also be a local file system path to - an "offline" project. + an "offline" project. If not supplied, this defaults to the current working + directory. - `rev` - (optional) [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) From 68a3dd082dbc62e5229fad9c76a58395d06c0fe4 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 9 Jan 2020 16:54:42 -0600 Subject: [PATCH 013/100] api: comlpete get_url desc and real life example per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-340297527 --- public/static/docs/api-reference/get_url.md | 14 ++++++++++---- public/static/docs/api-reference/open.md | 2 +- public/static/docs/api-reference/read.md | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index c13d327552..fc9fe43fe1 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,7 +1,11 @@ # dvc.api.get_url() -Returns the full URL to the data artifact specified by its `path` -in a `repo` (DVC project). +Returns the full URL to the physical location (in a +[DVC remote](/doc/command-reference/remote)) of a data artifact +specified by its `path` in a `repo` (DVC project). + +> For possible URL formats, refer to the +> [supported remote types](https://dvc.org/doc/command-reference/remote/add#supported-storage-types) ## Signature @@ -28,12 +32,14 @@ get_url(path, repo=None, rev=None, remote=None) - `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) to fetch the target artifact from. If not supplied, the default project's - remote is employed. + remote is used. ## Example ```py import dvc.api -resource_url = dvc.api.get_url("data/prepared.tsv", repo="https://github.com/my-org/my-repo.git", remote="my-s3") +resource_url = dvc.api.get_url("data/data.xml", repo="https://github.com/iterative/example-get-started") + +# resource_url = https://remote.dvc.org/get-started/a3/04afb96060aad90176268345e10355 ``` diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index fedbde8885..2985584afe 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -30,7 +30,7 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) to fetch the target artifact from. If not supplied, the default project's - remote is employed. + remote is used. - `mode` - (optional) mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3.7/library/functions.html#open). Defaults diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 5eacc63d24..e4c0bca0ea 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -33,7 +33,7 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) to fetch the target artifact from. If not supplied, the default project's - remote is employed. + remote is used. - `mode` - (optional) mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3.7/library/functions.html#open). Defaults From 95fa07041e8f1ea5212214186db77ba54f21420f Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 10 Jan 2020 16:23:00 -0600 Subject: [PATCH 014/100] use-cases: link from data-registries to api ref --- public/static/docs/use-cases/data-registries.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/static/docs/use-cases/data-registries.md b/public/static/docs/use-cases/data-registries.md index a52340f3a8..3ba89f26cc 100644 --- a/public/static/docs/use-cases/data-registries.md +++ b/public/static/docs/use-cases/data-registries.md @@ -89,8 +89,8 @@ $ dvc push ## Using registries The main methods to consume data artifacts from a **data registry** -are the `dvc import` and `dvc get` commands, as well as the `dvc.api` Python -API. +are the `dvc import` and `dvc get` commands, as well as the +[`dvc.api`](/doc/api-reference) Python API. ### Simple download (get) From 09db2909eadaf3868feffba2fb63b9b235aeba39 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 10 Jan 2020 16:29:09 -0600 Subject: [PATCH 015/100] doc: add some basic links between API and cmd refs --- public/static/docs/api-reference/open.md | 2 ++ public/static/docs/api-reference/read.md | 2 +- public/static/docs/command-reference/get.md | 4 ++++ public/static/docs/command-reference/import.md | 4 ++++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 2985584afe..b858e511fe 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -5,6 +5,8 @@ Opens a file artifact as a only be used as [context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library). +> This has similar uses as the `dvc get` and `dvc import` CLI commands. + ## Signature ```py diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index e4c0bca0ea..cc5908a2bd 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -4,7 +4,7 @@ Returns the contents of a file artifact as a bytes object or as a string. > Wrapper for [`dvc.api.open()`](/doc/api-reference/open) that returns the -> results of the file object's +> complete file contents directly, by using the file object's > [`read()`](https://docs.python.org/3.7/tutorial/inputoutput.html#methods-of-file-objects) > method. diff --git a/public/static/docs/command-reference/get.md b/public/static/docs/command-reference/get.md index 77291d94d7..8159159e24 100644 --- a/public/static/docs/command-reference/get.md +++ b/public/static/docs/command-reference/get.md @@ -6,6 +6,10 @@ Download a file or directory from any DVC project or Git repository > Unlike `dvc import`, this command does not track the downloaded files (does > not create a DVC-file). + + +> See also our `dvc.api.open` Python API function. + ## Synopsis ```usage diff --git a/public/static/docs/command-reference/import.md b/public/static/docs/command-reference/import.md index 04a49e2498..ab2de6baf4 100644 --- a/public/static/docs/command-reference/import.md +++ b/public/static/docs/command-reference/import.md @@ -9,6 +9,10 @@ import. > See also `dvc get`, that corresponds to the first step this command performs > (just download the data). + + +> See also our `dvc.api.open` Python API function. + ## Synopsis ```usage From bb05e9c3c4088797a5f2404e3cf6c93e708948c9 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 10 Jan 2020 20:24:07 -0600 Subject: [PATCH 016/100] api: better desc. and example explanation in get_url --- public/static/docs/api-reference/get_url.md | 14 ++++++++--- public/static/docs/api-reference/open.md | 27 +++++++++------------ 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index fc9fe43fe1..24a02135dd 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -4,7 +4,11 @@ Returns the full URL to the physical location (in a [DVC remote](/doc/command-reference/remote)) of a data artifact specified by its `path` in a `repo` (DVC project). -> For possible URL formats, refer to the +Having the resource's URL, it would be possible to download it directly with an +appropriate tool such as `wget` for HTTP locations, `aws s3 cp` for Amazon S3, +etc. + +> For possible location protocols, refer to the > [supported remote types](https://dvc.org/doc/command-reference/remote/add#supported-storage-types) ## Signature @@ -40,6 +44,10 @@ get_url(path, repo=None, rev=None, remote=None) import dvc.api resource_url = dvc.api.get_url("data/data.xml", repo="https://github.com/iterative/example-get-started") - -# resource_url = https://remote.dvc.org/get-started/a3/04afb96060aad90176268345e10355 ``` + +The value of `resource_url` in this case would be something like +`https://remote.dvc.org/get-started/a3/04afb96060aad90176268345e10355`. This URL +represents the physical location fo the data, built by interpreting the +corresponding [DVC-file](/doc/user-guide/dvc-file-format), where the file's +checksum is stored, and the project's remote configuration. diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index b858e511fe..b1b7475456 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -41,25 +41,20 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - `encoding` - (optional) used to decode contents to a string. Mirrors the namesake parameter in builtin `open()`. Defaults to `"utf-8"`. -## Example: open from a DVC remote - -> See -> [PEP 343 -- The "with" Statement](https://www.python.org/dev/peps/pep-0343/) - -```py -with dvc.api.open("data/raw.csv", remote="my-s3", encoding="utf-8") as f: - for line in f: - process(line) -``` - -## Example: open from a DVC repository +## Example: read CSV file from an external DVC repository ```py import csv import dvc.api -with dvc.api.open("dataset/", repo="https://github.com/my-org/my-repo.git") as f: - reader = csv.reader(f) - for row in reader: - # ... +with dvc.api.open( + "data/rows.csv", + repo="https://github.com/example/dvc-repository" +) as fd: + reader = csv.reader(fd) + for row in reader: + # ... Process columns ``` + +> See also `dvc.api.read` for a more direct way to read the complete contents of +> a file artifact. From 58e4bb5b6d9cce8579f7ba860880bf648c3ee613 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sat, 11 Jan 2020 17:02:08 -0600 Subject: [PATCH 017/100] api: improve index desc (again?) --- public/static/docs/api-reference/index.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/public/static/docs/api-reference/index.md b/public/static/docs/api-reference/index.md index 0e081559b5..ea525e96aa 100644 --- a/public/static/docs/api-reference/index.md +++ b/public/static/docs/api-reference/index.md @@ -9,20 +9,20 @@ expose special functions you can use in your Python source code. > We **strongly** recommend having `dvc` in a requirements or setup file for > your Python project, and installing it via and env manager such as `pip`. -To import the API, use: +To use the API, import the module first with: ```py import dvc.api ``` -This reference provides the details about our API functions, their purpose, -usage, and examples. Please note that they also have inline documentation, which -you can see in the module's +This reference provides the details about the API functions (inside `dvc.api`): +their purpose, usage, and examples. Please note that they also have inline +documentation, which you get from the module's [source code](https://github.com/iterative/dvc/blob/master/dvc/api.py). > Please don't hesitate in sending a feature request > [on GitHub](https://github.com/iterative/dvc.org/issues) with ideas of other > functions we could add to the Python API. -Please choose from the navigation sidebar to the left, or click the `Next` -button below ↘ +Please choose a function from the navigation sidebar to the left, or click the +`Next` button below to jump into the fist one ↘ From 08fe2266c20c690133b69a59b09fb389b794cd90 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sat, 11 Jan 2020 18:55:35 -0600 Subject: [PATCH 018/100] api: update get_url example to use iterative/dataset-registry repo --- public/static/docs/api-reference/get_url.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 24a02135dd..583eac3ee4 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -43,11 +43,11 @@ get_url(path, repo=None, rev=None, remote=None) ```py import dvc.api -resource_url = dvc.api.get_url("data/data.xml", repo="https://github.com/iterative/example-get-started") +resource_url = dvc.api.get_url("get-started/data.xml", repo="https://github.com/iterative/dataset-registry") ``` The value of `resource_url` in this case would be something like -`https://remote.dvc.org/get-started/a3/04afb96060aad90176268345e10355`. This URL -represents the physical location fo the data, built by interpreting the +`https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355`. +This URL represents the physical location fo the data, built by interpreting the corresponding [DVC-file](/doc/user-guide/dvc-file-format), where the file's checksum is stored, and the project's remote configuration. From c207152b07dfb09b772a074ed9e717438456d360 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sat, 11 Jan 2020 22:37:10 -0600 Subject: [PATCH 019/100] api: complete base examples for get_url, open, and read (they may need confirmation) --- public/static/docs/api-reference/get_url.md | 12 +-- public/static/docs/api-reference/open.md | 81 +++++++++++++++++++-- public/static/docs/api-reference/read.md | 10 ++- public/static/docs/api-reference/summon.md | 2 +- 4 files changed, 87 insertions(+), 18 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 583eac3ee4..907787fbf1 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -23,7 +23,7 @@ get_url(path, repo=None, rev=None, remote=None) source project in `repo`, relative to the project's root. - `repo` - specifies the location of the source DVC project. Both HTTP and SSH - protocols are supported for online Git repositories (e.g. + protocols are supported for online Git repository URLs (e.g. `[user@]server:project.git`). `repo` can also be a local file system path to an "offline" project. If not supplied, this defaults to the current working directory. @@ -35,15 +35,17 @@ get_url(path, repo=None, rev=None, remote=None) default Git revision, `HEAD`. - `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) - to fetch the target artifact from. If not supplied, the default project's - remote is used. + to fetch the target artifact from. If not supplied, the default depends on the + value of `repo`. The local cache is used when `repo` is the current working + directory (default value of `repo`). when `repo` is an external repository + URL, the default project remote is used. -## Example +## Examples ```py import dvc.api -resource_url = dvc.api.get_url("get-started/data.xml", repo="https://github.com/iterative/dataset-registry") +resource_url = dvc.api.get_url('get-started/data.xml', repo='https://github.com/iterative/dataset-registry') ``` The value of `resource_url` in this case would be something like diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index b1b7475456..a0690a29ec 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -7,6 +7,12 @@ only be used as > This has similar uses as the `dvc get` and `dvc import` CLI commands. +💡 Note that `dvc.api.open` is able to +[stream](https://docs.python.org/3.7/library/io.html) the file directly from +**some** [remote](/doc/command-reference/remote) types. Otherwise, the file is +downloaded regularly into a temporary local path before the file object is made +available. + ## Signature ```py @@ -19,7 +25,7 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) source project in `repo`, relative to the project's root. - `repo` - specifies the location of the source DVC project. Both HTTP and SSH - protocols are supported for online Git repositories (e.g. + protocols are supported for online Git repository URLs (e.g. `[user@]server:project.git`). `repo` can also be a local file system path to an "offline" project. If not supplied, this defaults to the current working directory. @@ -31,8 +37,10 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) default Git revision, `HEAD`. - `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) - to fetch the target artifact from. If not supplied, the default project's - remote is used. + to fetch the target artifact from. If not supplied, the default depends on the + value of `repo`. The local cache is used when `repo` is the current working + directory (default value of `repo`). when `repo` is an external repository + URL, the default project remote is used. - `mode` - (optional) mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3.7/library/functions.html#open). Defaults @@ -41,20 +49,77 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - `encoding` - (optional) used to decode contents to a string. Mirrors the namesake parameter in builtin `open()`. Defaults to `"utf-8"`. -## Example: read CSV file from an external DVC repository +## Example: process XML file from an external DVC repository + +```py +from xml.dom.minidom import parse + +import dvc.api + +with dvc.api.open( + "get-started/data.xml", + "https://github.com/iterative/dataset-registry" +) as fd: + xmldom = parse(fd) + # ... Process elements +``` + +> See also `dvc.api.read` for a more direct way to read the complete contents of +> a file artifact. + +## Example: use a file from the local cache + +In this case we don't supply a `repo` value, which means the current working +directory will be tried instead, so make sure that the code is run from within a +DVC repository: + +```py +import dvc.api + +with dvc.api.open('data/nlp/words.txt') as fd: + print(fd.name) +``` + +DVC will look for `data/nlp/words.txt` in the local cache of the +project. (If it's not found there, the default +[remote](/doc/command-reference/remote) will be tried.) + +The output of the script above should be something like +`.dvc/cache/3a/01762e96060aa04a68345fbd910355` – the physical data location. + +## Example: process CSV file from a private repository + +For this we'll have to use the SSH URL to the Git repo (assuming the local +[SSH credentials](https://help.github.com/en/github/authenticating-to-github/connecting-to-github-with-ssh) +are configured locally): ```py import csv import dvc.api with dvc.api.open( - "data/rows.csv", - repo="https://github.com/example/dvc-repository" + "sea_ice.csv", + repo="git@github.com:iterative/df_sea_ice_no_header.git" ) as fd: reader = csv.reader(fd) for row in reader: # ... Process columns ``` -> See also `dvc.api.read` for a more direct way to read the complete contents of -> a file artifact. +## Example: stream file from a specific remote + +Sometimes we may want to chose the [remote](/doc/command-reference/remote) data +source, for example to ensure that file streaming is enabled. This can be done +by providing a `remote` argument: + +```py +import dvc.api + +with dvc.api.open( + 'model.pkl', + repo='https://github.com/example/dvc-repository' + remote='my-s3-bucket' +) as fd: + for line in fd: + # ... Process lines +``` diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index cc5908a2bd..5402ea4a62 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -20,7 +20,7 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) source project in `repo`, relative to the project's root. - `repo` - specifies the location of the source DVC project. Both HTTP and SSH - protocols are supported for online Git repositories (e.g. + protocols are supported for online Git repository URLs (e.g. `[user@]server:project.git`). `repo` can also be a local file system path to an "offline" project. If not supplied, this defaults to the current working directory. @@ -32,8 +32,10 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) default Git revision, `HEAD`. - `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) - to fetch the target artifact from. If not supplied, the default project's - remote is used. + to fetch the target artifact from. If not supplied, the default depends on the + value of `repo`. The local cache is used when `repo` is the current working + directory (default value of `repo`). when `repo` is an external repository + URL, the default project remote is used. - `mode` - (optional) mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3.7/library/functions.html#open). Defaults @@ -42,7 +44,7 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) - `encoding` - (optional) used to decode contents to a string. Mirrors the namesake parameter in builtin `open()`. Defaults to `"utf-8"`. -## Example +## Examples ```py import pickle diff --git a/public/static/docs/api-reference/summon.md b/public/static/docs/api-reference/summon.md index 4f983e8133..5169a03c53 100644 --- a/public/static/docs/api-reference/summon.md +++ b/public/static/docs/api-reference/summon.md @@ -20,7 +20,7 @@ def summon( in the `summon_file`. - `repo` - specifies the location of the source DVC project. Both HTTP and SSH - protocols are supported for online Git repositories (e.g. + protocols are supported for online Git repository URLs (e.g. `[user@]server:project.git`). `repo` can also be a local file system path to an "offline" project. If not supplied, this defaults to the current working directory. From 4a50af911448f1c856750a8b06dc78f792044472 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 14 Jan 2020 12:26:47 -0600 Subject: [PATCH 020/100] install: add notes about installing as a Python lib per https://github.com/iterative/dvc.org/issues/919#issue-549227438 --- public/static/docs/api-reference/index.md | 5 +++-- public/static/docs/install/index.md | 11 +++++++++++ public/static/docs/install/linux.md | 3 +++ public/static/docs/install/macos.md | 3 +++ public/static/docs/install/windows.md | 5 +++++ 5 files changed, 25 insertions(+), 2 deletions(-) diff --git a/public/static/docs/api-reference/index.md b/public/static/docs/api-reference/index.md index ea525e96aa..fca5695627 100644 --- a/public/static/docs/api-reference/index.md +++ b/public/static/docs/api-reference/index.md @@ -6,8 +6,9 @@ interpreter. While most of the package implements our [command-line tool](/doc/command-reference), we wrote the `dvc.api` module to expose special functions you can use in your Python source code. -> We **strongly** recommend having `dvc` in a requirements or setup file for -> your Python project, and installing it via and env manager such as `pip`. +> For API use, we **strongly** recommend having `dvc` in a requirements or setup +> file for your Python project, and installing it via and env manager such as +> `pip`. To use the API, import the module first with: diff --git a/public/static/docs/install/index.md b/public/static/docs/install/index.md index c3ed40abb6..c30a08f6cf 100644 --- a/public/static/docs/install/index.md +++ b/public/static/docs/install/index.md @@ -7,6 +7,17 @@ Please double check that you don't already have DVC (for example running - [Install on Windows](/doc/install/windows) - [Install on Linux](/doc/install/linux) +## Install as a Python library + +When you install DVC with an environment manager like `pip` or `conda`, the +`dvc` package becomes available to the corresponding `python` interpreter. This +is particularly useful in order to access the [Python API](/doc/api-reference) +(`dvc.api` module). + +> For API use, we **strongly** recommend having `dvc` in a requirements or setup +> file for your Python project, and installing it via and env manager such as +> `pip`. + ## Advanced options - Shell completion is automatically enabled by certain installation methods. If diff --git a/public/static/docs/install/linux.md b/public/static/docs/install/linux.md index 7b1dc51983..6525ebaaad 100644 --- a/public/static/docs/install/linux.md +++ b/public/static/docs/install/linux.md @@ -1,5 +1,8 @@ # Installation on Linux +> For [API](/doc/api-reference) use, please +> [install with pip](#install-with-pip) or [with conda](#install-with-conda). + ## Install with pip > We **strongly** recommend creating a diff --git a/public/static/docs/install/macos.md b/public/static/docs/install/macos.md index 774583539f..044c663a38 100644 --- a/public/static/docs/install/macos.md +++ b/public/static/docs/install/macos.md @@ -1,5 +1,8 @@ # Installation on MacOS +> For [API](/doc/api-reference) use, please +> [install with pip](#install-with-pip) or [with conda](#install-with-conda). + ## Install with brew Recommended. Requires [Homebrew](https://brew.sh/). diff --git a/public/static/docs/install/windows.md b/public/static/docs/install/windows.md index 34b605c027..bdecf6be5e 100644 --- a/public/static/docs/install/windows.md +++ b/public/static/docs/install/windows.md @@ -4,6 +4,11 @@ > [Running DVC on Windows](/doc/user-guide/running-dvc-on-windows) for important > tips to improve your experience using DVC on Windows. + + +> For [API](/doc/api-reference) use, please +> [install with pip](#install-with-pip) or [with conda](#install-with-conda). + ## Windows installer The easiest way is to use the self-contained, executable installer (binary), From 89734fd326ad91fba1fc11673555c0b06cda7d85 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 15 Jan 2020 14:05:20 -0600 Subject: [PATCH 021/100] api ref: add note about file existence in get_url, and related updates per https://github.com/iterative/dvc/issues/2994#issuecomment-574815560 --- public/static/docs/api-reference/get_url.md | 28 +++++++++++++++------ public/static/docs/api-reference/open.md | 8 +++--- public/static/docs/api-reference/read.md | 6 ++--- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 907787fbf1..569ab16c3e 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -4,9 +4,16 @@ Returns the full URL to the physical location (in a [DVC remote](/doc/command-reference/remote)) of a data artifact specified by its `path` in a `repo` (DVC project). -Having the resource's URL, it would be possible to download it directly with an -appropriate tool such as `wget` for HTTP locations, `aws s3 cp` for Amazon S3, -etc. +⚠️ Note that the returned URL is formed by analyzing the corresponding +[DVC-file](/doc/user-guide/dvc-file-format) (see [Examples](#examples) below). +**There is no guarantee that the file actually exists in that location**. Please +keep this in mind when using the URL string in your code. + +💡 Having the resource's URL, it should be possible to download it directly with +an appropriate tool such as +[`urlretrieve`](https://docs.python.org/3/library/urllib.request.html#urllib.request.urlretrieve) +or `boto3` +[download_fileobj](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.download_fileobj). > For possible location protocols, refer to the > [supported remote types](https://dvc.org/doc/command-reference/remote/add#supported-storage-types) @@ -45,11 +52,18 @@ get_url(path, repo=None, rev=None, remote=None) ```py import dvc.api -resource_url = dvc.api.get_url('get-started/data.xml', repo='https://github.com/iterative/dataset-registry') +resource_url = dvc.api.get_url( + 'get-started/data.xml', + repo='https://github.com/iterative/dataset-registry' +) ``` -The value of `resource_url` in this case would be something like -`https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355`. +The value of `resource_url` in this case would be something like: + +`https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355` + This URL represents the physical location fo the data, built by interpreting the corresponding [DVC-file](/doc/user-guide/dvc-file-format), where the file's -checksum is stored, and the project's remote configuration. +checksum `a304afb96060aad90176268345e10355` is stored, and the project's remote +configuration where the base URL `https://remote.dvc.org/dataset-registry/` is +saved. diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index a0690a29ec..bb2b8b96d3 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -1,14 +1,14 @@ # dvc.api.open() Opens a file artifact as a -[file object](https://docs.python.org/3.7/glossary.html#term-file-object). May +[file object](https://docs.python.org/3/glossary.html#term-file-object). May only be used as [context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library). > This has similar uses as the `dvc get` and `dvc import` CLI commands. 💡 Note that `dvc.api.open` is able to -[stream](https://docs.python.org/3.7/library/io.html) the file directly from +[stream](https://docs.python.org/3/library/io.html) the file directly from **some** [remote](/doc/command-reference/remote) types. Otherwise, the file is downloaded regularly into a temporary local path before the file object is made available. @@ -43,8 +43,8 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) URL, the default project remote is used. - `mode` - (optional) mirrors the namesake parameter in builtin - [`open()`](https://docs.python.org/3.7/library/functions.html#open). Defaults - to `"r"` (read). + [`open()`](https://docs.python.org/3/library/functions.html#open). Defaults to + `"r"` (read). - `encoding` - (optional) used to decode contents to a string. Mirrors the namesake parameter in builtin `open()`. Defaults to `"utf-8"`. diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 5402ea4a62..bfb93bc7fe 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -5,7 +5,7 @@ string. > Wrapper for [`dvc.api.open()`](/doc/api-reference/open) that returns the > complete file contents directly, by using the file object's -> [`read()`](https://docs.python.org/3.7/tutorial/inputoutput.html#methods-of-file-objects) +> [`read()`](https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects) > method. ## Signature @@ -38,8 +38,8 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) URL, the default project remote is used. - `mode` - (optional) mirrors the namesake parameter in builtin - [`open()`](https://docs.python.org/3.7/library/functions.html#open). Defaults - to `"r"` (read). + [`open()`](https://docs.python.org/3/library/functions.html#open). Defaults to + `"r"` (read). - `encoding` - (optional) used to decode contents to a string. Mirrors the namesake parameter in builtin `open()`. Defaults to `"utf-8"`. From c0942337ef827eab2cc0f914d39afaeb47b900a0 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 16 Jan 2020 14:26:41 -0600 Subject: [PATCH 022/100] api: copy edits per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-343840110 and https://github.com/iterative/dvc.org/pull/908#pullrequestreview-343840519 and https://github.com/iterative/dvc.org/pull/908#pullrequestreview-343847296 --- public/static/docs/api-reference/get_url.md | 2 +- public/static/docs/api-reference/index.md | 10 +++++----- public/static/docs/install/index.md | 5 +++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 569ab16c3e..9a9fda25d8 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -62,7 +62,7 @@ The value of `resource_url` in this case would be something like: `https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355` -This URL represents the physical location fo the data, built by interpreting the +This URL represents the physical location of the data, built by interpreting the corresponding [DVC-file](/doc/user-guide/dvc-file-format), where the file's checksum `a304afb96060aad90176268345e10355` is stored, and the project's remote configuration where the base URL `https://remote.dvc.org/dataset-registry/` is diff --git a/public/static/docs/api-reference/index.md b/public/static/docs/api-reference/index.md index fca5695627..5134704277 100644 --- a/public/static/docs/api-reference/index.md +++ b/public/static/docs/api-reference/index.md @@ -7,10 +7,10 @@ interpreter. While most of the package implements our expose special functions you can use in your Python source code. > For API use, we **strongly** recommend having `dvc` in a requirements or setup -> file for your Python project, and installing it via and env manager such as -> `pip`. - -To use the API, import the module first with: +> file for your Python project, and installing it with a package manager such as +> `pip` (and in a +> [virtual environment](https://packaging.python.org/tutorials/installing-packages/#creating-virtual-environments)). +> To use the API, import the module first with: ```py import dvc.api @@ -26,4 +26,4 @@ documentation, which you get from the module's > functions we could add to the Python API. Please choose a function from the navigation sidebar to the left, or click the -`Next` button below to jump into the fist one ↘ +`Next` button below to jump into the first one ↘ diff --git a/public/static/docs/install/index.md b/public/static/docs/install/index.md index c30a08f6cf..86fcd5d1e5 100644 --- a/public/static/docs/install/index.md +++ b/public/static/docs/install/index.md @@ -15,8 +15,9 @@ is particularly useful in order to access the [Python API](/doc/api-reference) (`dvc.api` module). > For API use, we **strongly** recommend having `dvc` in a requirements or setup -> file for your Python project, and installing it via and env manager such as -> `pip`. +> file for your Python project, and installing it with a package manager such as +> `pip` (and in a +> [virtual environment](https://packaging.python.org/tutorials/installing-packages/#creating-virtual-environments)). ## Advanced options From 8b133caf868f9af99ad4505b9fbacd5ffac6e5c8 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 16 Jan 2020 15:06:12 -0600 Subject: [PATCH 023/100] api: add notes about possible errors in function arguments --- public/static/docs/api-reference/get_url.md | 5 +++++ public/static/docs/api-reference/open.md | 5 +++++ public/static/docs/api-reference/read.md | 5 +++++ public/static/docs/api-reference/summon.md | 2 ++ 4 files changed, 17 insertions(+) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 9a9fda25d8..7232d6c76c 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -35,6 +35,8 @@ get_url(path, repo=None, rev=None, remote=None) an "offline" project. If not supplied, this defaults to the current working directory. + > A `NotDvcRepoError` is thrown if `repo` is not a valid DVC project. + - `rev` - (optional) [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). `rev` only has an effect @@ -47,6 +49,9 @@ get_url(path, repo=None, rev=None, remote=None) directory (default value of `repo`). when `repo` is an external repository URL, the default project remote is used. + > A `NoRemoteError` is thrown if no `remote` is specified and the project has + > no default remote. + ## Examples ```py diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index bb2b8b96d3..a17b9daacc 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -30,6 +30,8 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) an "offline" project. If not supplied, this defaults to the current working directory. + > A `NotDvcRepoError` is thrown if `repo` is not a valid DVC project. + - `rev` - (optional) [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). `rev` only has an effect @@ -42,6 +44,9 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) directory (default value of `repo`). when `repo` is an external repository URL, the default project remote is used. + > A `NoRemoteError` is thrown if no `remote` is specified and the project has + > no default remote. + - `mode` - (optional) mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3/library/functions.html#open). Defaults to `"r"` (read). diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index bfb93bc7fe..9560e4a873 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -25,6 +25,8 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) an "offline" project. If not supplied, this defaults to the current working directory. + > A `NotDvcRepoError` is thrown if `repo` is not a valid DVC project. + - `rev` - (optional) [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). `rev` only has an effect @@ -37,6 +39,9 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) directory (default value of `repo`). when `repo` is an external repository URL, the default project remote is used. + > A `NoRemoteError` is thrown if no `remote` is specified and the project has + > no default remote. + - `mode` - (optional) mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3/library/functions.html#open). Defaults to `"r"` (read). diff --git a/public/static/docs/api-reference/summon.md b/public/static/docs/api-reference/summon.md index 5169a03c53..475513abf8 100644 --- a/public/static/docs/api-reference/summon.md +++ b/public/static/docs/api-reference/summon.md @@ -25,6 +25,8 @@ def summon( an "offline" project. If not supplied, this defaults to the current working directory. + > A `NotDvcRepoError` is thrown if `repo` is not a valid DVC project. + - `rev` - (optional) [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). `rev` only has an effect From eec08488a267077552d124659e644cd397742564 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 16 Jan 2020 15:33:01 -0600 Subject: [PATCH 024/100] api: add return types to first 3 functions --- public/static/docs/api-reference/get_url.md | 8 +++++--- public/static/docs/api-reference/open.md | 2 +- public/static/docs/api-reference/read.md | 6 ++++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 7232d6c76c..c0a83e9078 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,8 +1,10 @@ # dvc.api.get_url() -Returns the full URL to the physical location (in a -[DVC remote](/doc/command-reference/remote)) of a data artifact -specified by its `path` in a `repo` (DVC project). +Returns the full URL +[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str) +to the physical location (in a [DVC remote](/doc/command-reference/remote)) of a +data artifact specified by its `path` in a `repo` (DVC +project). ⚠️ Note that the returned URL is formed by analyzing the corresponding [DVC-file](/doc/user-guide/dvc-file-format) (see [Examples](#examples) below). diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index a17b9daacc..049925e137 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -76,7 +76,7 @@ with dvc.api.open( In this case we don't supply a `repo` value, which means the current working directory will be tried instead, so make sure that the code is run from within a -DVC repository: +DVC project: ```py import dvc.api diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 9560e4a873..2a61525c43 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -1,7 +1,9 @@ # dvc.api.read() -Returns the contents of a file artifact as a bytes object or as a -string. +Returns the contents of a file artifact as a +[bytes object](https://docs.python.org/3/glossary.html#term-bytes-like-object) +or as a +[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). > Wrapper for [`dvc.api.open()`](/doc/api-reference/open) that returns the > complete file contents directly, by using the file object's From 9b58c3f8fa8dd982ad5bbf4bda9e122634bf7eaf Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 16 Jan 2020 15:42:30 -0600 Subject: [PATCH 025/100] api: add list of remotes you can strem from for open fn per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-341457473 --- public/static/docs/api-reference/open.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 049925e137..89c9a772c8 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -9,9 +9,10 @@ only be used as 💡 Note that `dvc.api.open` is able to [stream](https://docs.python.org/3/library/io.html) the file directly from -**some** [remote](/doc/command-reference/remote) types. Otherwise, the file is -downloaded regularly into a temporary local path before the file object is made -available. +**most** +[remote types](/doc/command-reference/remote/add#supported-storage-types) +(local, S3, Azure, GCP, OSS, SSH, HDFS). Otherwise, the file is downloaded +regularly into a temporary local path before the file object is made available. ## Signature From 3e0b909c7c17eec72ecb0e8852fac7e50f20ccb1 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sun, 19 Jan 2020 17:54:15 -0600 Subject: [PATCH 026/100] api ref: remove `summon()` page per https://github.com/iterative/dvc.org/pull/908#issuecomment-576045626 --- public/static/docs/api-reference/summon.md | 39 ---------------------- public/static/docs/sidebar.json | 4 --- 2 files changed, 43 deletions(-) delete mode 100644 public/static/docs/api-reference/summon.md diff --git a/public/static/docs/api-reference/summon.md b/public/static/docs/api-reference/summon.md deleted file mode 100644 index 475513abf8..0000000000 --- a/public/static/docs/api-reference/summon.md +++ /dev/null @@ -1,39 +0,0 @@ -# dvc.api.summon() - -Instantiate an object, described in a _summon file_. - -## Signature - -```py -def summon( - name, - repo=None, - rev=None, - summon_file="dvcsummon.yaml", - args=None -) -``` - -## Parameters - -- **`name`** - object to summon within the source project in `repo`, as defined - in the `summon_file`. - -- `repo` - specifies the location of the source DVC project. Both HTTP and SSH - protocols are supported for online Git repository URLs (e.g. - `[user@]server:project.git`). `repo` can also be a local file system path to - an "offline" project. If not supplied, this defaults to the current working - directory. - - > A `NotDvcRepoError` is thrown if `repo` is not a valid DVC project. - -- `rev` - (optional) - [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) - (such as a branch name, a tag, or a commit hash). `rev` only has an effect - when a URL is supplied as parameter to `repo`. If not supplied, it uses the - default Git revision, `HEAD`. - -- `summon_file` - YAML file describing the object in question. Defaults to - `dvcsummon.yaml`. - -- `args` - arguments to pass onto the object, if any diff --git a/public/static/docs/sidebar.json b/public/static/docs/sidebar.json index e2890ece1f..b556df8f26 100644 --- a/public/static/docs/sidebar.json +++ b/public/static/docs/sidebar.json @@ -371,10 +371,6 @@ { "slug": "read", "label": "read()" - }, - { - "slug": "summon", - "label": "summon()" } ] }, From 9cf939ff44687eee4152b1eee49ba5c8790ff3d4 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 21 Jan 2020 00:17:52 -0600 Subject: [PATCH 027/100] api ref: add full modue path to exceptions mentioned so far per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-345076972 --- public/static/docs/api-reference/get_url.md | 7 ++++--- public/static/docs/api-reference/open.md | 7 ++++--- public/static/docs/api-reference/read.md | 7 ++++--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index c0a83e9078..0ad7308a14 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -37,7 +37,8 @@ get_url(path, repo=None, rev=None, remote=None) an "offline" project. If not supplied, this defaults to the current working directory. - > A `NotDvcRepoError` is thrown if `repo` is not a valid DVC project. + > A `dvc.exceptions.NotDvcRepoError` is thrown if `repo` is not a valid DVC + > project. - `rev` - (optional) [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) @@ -51,8 +52,8 @@ get_url(path, repo=None, rev=None, remote=None) directory (default value of `repo`). when `repo` is an external repository URL, the default project remote is used. - > A `NoRemoteError` is thrown if no `remote` is specified and the project has - > no default remote. + > A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and + > the project has no default remote. ## Examples diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 89c9a772c8..36aab8e01d 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -31,7 +31,8 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) an "offline" project. If not supplied, this defaults to the current working directory. - > A `NotDvcRepoError` is thrown if `repo` is not a valid DVC project. + > A `dvc.exceptions.NotDvcRepoError` is thrown if `repo` is not a valid DVC + > project. - `rev` - (optional) [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) @@ -45,8 +46,8 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) directory (default value of `repo`). when `repo` is an external repository URL, the default project remote is used. - > A `NoRemoteError` is thrown if no `remote` is specified and the project has - > no default remote. + > A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and + > the project has no default remote. - `mode` - (optional) mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3/library/functions.html#open). Defaults to diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 2a61525c43..9b863bbdc5 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -27,7 +27,8 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) an "offline" project. If not supplied, this defaults to the current working directory. - > A `NotDvcRepoError` is thrown if `repo` is not a valid DVC project. + > A `dvc.exceptions.NotDvcRepoError` is thrown if `repo` is not a valid DVC + > project. - `rev` - (optional) [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) @@ -41,8 +42,8 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) directory (default value of `repo`). when `repo` is an external repository URL, the default project remote is used. - > A `NoRemoteError` is thrown if no `remote` is specified and the project has - > no default remote. + > A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and + > the project has no default remote. - `mode` - (optional) mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3/library/functions.html#open). Defaults to From 59bb2f29381699ea80b16b7e04fb61a1fe0f640a Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 21 Jan 2020 01:05:32 -0600 Subject: [PATCH 028/100] api: fix term "environment manager" per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-345080678 --- public/static/docs/api-reference/index.md | 13 +++++++------ public/static/docs/install/index.md | 14 +++++++------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/public/static/docs/api-reference/index.md b/public/static/docs/api-reference/index.md index 5134704277..1fd5104061 100644 --- a/public/static/docs/api-reference/index.md +++ b/public/static/docs/api-reference/index.md @@ -1,16 +1,17 @@ # Python API -When you [install](/doc/install) DVC with an environment manager like `pip` or +When you [install](/doc/install) DVC with an package manager like `pip` or `conda`, the `dvc` package becomes available to the corresponding `python` interpreter. While most of the package implements our [command-line tool](/doc/command-reference), we wrote the `dvc.api` module to expose special functions you can use in your Python source code. -> For API use, we **strongly** recommend having `dvc` in a requirements or setup -> file for your Python project, and installing it with a package manager such as -> `pip` (and in a -> [virtual environment](https://packaging.python.org/tutorials/installing-packages/#creating-virtual-environments)). -> To use the API, import the module first with: +> For API use, we **strongly** recommend having `dvc` in a requirements file for +> your Python project, and installing it with a package manager (in a +> [virtual environment](https://packaging.python.org/tutorials/installing-packages/#creating-virtual-environments) +> preferably). + +To use the API, import the module first with: ```py import dvc.api diff --git a/public/static/docs/install/index.md b/public/static/docs/install/index.md index 86fcd5d1e5..7bd98ebf1b 100644 --- a/public/static/docs/install/index.md +++ b/public/static/docs/install/index.md @@ -9,15 +9,15 @@ Please double check that you don't already have DVC (for example running ## Install as a Python library -When you install DVC with an environment manager like `pip` or `conda`, the -`dvc` package becomes available to the corresponding `python` interpreter. This -is particularly useful in order to access the [Python API](/doc/api-reference) +When you install DVC with an package manager like `pip` or `conda`, the `dvc` +package becomes available to the corresponding `python` interpreter. This is +particularly useful in order to access the [Python API](/doc/api-reference) (`dvc.api` module). -> For API use, we **strongly** recommend having `dvc` in a requirements or setup -> file for your Python project, and installing it with a package manager such as -> `pip` (and in a -> [virtual environment](https://packaging.python.org/tutorials/installing-packages/#creating-virtual-environments)). +> For API use, we **strongly** recommend having `dvc` in a requirements file for +> your Python project, and installing it with a package manager (in a +> [virtual environment](https://packaging.python.org/tutorials/installing-packages/#creating-virtual-environments) +> preferably). ## Advanced options From e340321cf5c712da1b99606612f1778013275225 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 21 Jan 2020 01:21:08 -0600 Subject: [PATCH 029/100] api ref: separate short and long desc, similar to cmd ref per https://github.com/iterative/dvc.org/pull/908#issuecomment-576476297 and improve on the open, read descriptions. --- public/static/docs/api-reference/get_url.md | 22 +++++++++------- public/static/docs/api-reference/open.md | 28 +++++++++++++-------- public/static/docs/api-reference/read.md | 17 ++++++++----- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 0ad7308a14..09e7548653 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,10 +1,20 @@ # dvc.api.get_url() +Return the URL to the storage location of the target data (`repo`/`path`). + +## Signature + +```py +get_url(path, repo=None, rev=None, remote=None) +``` + +## Description + Returns the full URL [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str) -to the physical location (in a [DVC remote](/doc/command-reference/remote)) of a -data artifact specified by its `path` in a `repo` (DVC -project). +to the physical location (in a [DVC remote](/doc/command-reference/remote)) +where a target data artifact specified by its `path` in a `repo` +(DVC project) is stored. ⚠️ Note that the returned URL is formed by analyzing the corresponding [DVC-file](/doc/user-guide/dvc-file-format) (see [Examples](#examples) below). @@ -20,12 +30,6 @@ or `boto3` > For possible location protocols, refer to the > [supported remote types](https://dvc.org/doc/command-reference/remote/add#supported-storage-types) -## Signature - -```py -get_url(path, repo=None, rev=None, remote=None) -``` - ## Parameters - **`path`** - used to specify the location of the target artifact within the diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 36aab8e01d..623e974e6f 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -1,11 +1,23 @@ # dvc.api.open() -Opens a file artifact as a -[file object](https://docs.python.org/3/glossary.html#term-file-object). May -only be used as -[context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library). +_Context manager_ to open a DVC-tracked file artifact as a +[file object](https://docs.python.org/3/glossary.html#term-file-object). -> This has similar uses as the `dvc get` and `dvc import` CLI commands. +## Signature + +```py +open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) +``` + +## Description + +This function is analogous to the +[`open()`](https://docs.python.org/3/library/functions.html#open) Python +builtin, but for files tracked in DVC projects. However, it may +only be used as a +[context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) +(using the `with` keyword as shown in the examples below). There is no +`close()`. 💡 Note that `dvc.api.open` is able to [stream](https://docs.python.org/3/library/io.html) the file directly from @@ -14,11 +26,7 @@ only be used as (local, S3, Azure, GCP, OSS, SSH, HDFS). Otherwise, the file is downloaded regularly into a temporary local path before the file object is made available. -## Signature - -```py -open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) -``` +> This has similar uses as the `dvc get` and `dvc import` CLI commands. ## Parameters diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 9b863bbdc5..be8752a174 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -1,21 +1,26 @@ # dvc.api.read() -Returns the contents of a file artifact as a +Returns the contents of a DVC-tracked file artifact as a [bytes object](https://docs.python.org/3/glossary.html#term-bytes-like-object) or as a [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). -> Wrapper for [`dvc.api.open()`](/doc/api-reference/open) that returns the -> complete file contents directly, by using the file object's -> [`read()`](https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects) -> method. - ## Signature ```py read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) ``` +## Description + +This function wraps [`dvc.api.open()`](/doc/api-reference/open) for a simple and +direct way to return the complete file contents of files tracked in DVC +projects – no _context manager_ (`with` keyword) required. + +> Internally, it uses the _file object_'s +> [`read()`](https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects) +> method. + ## Parameters - **`path`** - used to specify the location of the target artifact within the From cc4a7a8555df71a5959f3b37c0485d258e5d999b Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 21 Jan 2020 12:44:26 -0600 Subject: [PATCH 030/100] api ref: small language refinements --- public/static/docs/api-reference/get_url.md | 9 ++++----- public/static/docs/api-reference/index.md | 4 ++-- public/static/docs/api-reference/open.md | 9 ++++----- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 09e7548653..d798cff2c1 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -10,11 +10,10 @@ get_url(path, repo=None, rev=None, remote=None) ## Description -Returns the full URL -[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str) -to the physical location (in a [DVC remote](/doc/command-reference/remote)) -where a target data artifact specified by its `path` in a `repo` -(DVC project) is stored. +Returns the full URL string (`str` type) to the physical location (in a +[DVC remote](/doc/command-reference/remote)) where a target data +artifact specified by its `path` in a `repo` (DVC project) +is stored. ⚠️ Note that the returned URL is formed by analyzing the corresponding [DVC-file](/doc/user-guide/dvc-file-format) (see [Examples](#examples) below). diff --git a/public/static/docs/api-reference/index.md b/public/static/docs/api-reference/index.md index 1fd5104061..3e36c42e35 100644 --- a/public/static/docs/api-reference/index.md +++ b/public/static/docs/api-reference/index.md @@ -1,8 +1,8 @@ # Python API When you [install](/doc/install) DVC with an package manager like `pip` or -`conda`, the `dvc` package becomes available to the corresponding `python` -interpreter. While most of the package implements our +`conda`, the `dvc` package becomes available to the corresponding Python +environment. While most of the package implements our [command-line tool](/doc/command-reference), we wrote the `dvc.api` module to expose special functions you can use in your Python source code. diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 623e974e6f..460fdf8c98 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -1,6 +1,7 @@ # dvc.api.open() -_Context manager_ to open a DVC-tracked file artifact as a +[Context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) +to open a DVC-tracked file artifact as a [file object](https://docs.python.org/3/glossary.html#term-file-object). ## Signature @@ -14,10 +15,8 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) This function is analogous to the [`open()`](https://docs.python.org/3/library/functions.html#open) Python builtin, but for files tracked in DVC projects. However, it may -only be used as a -[context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) -(using the `with` keyword as shown in the examples below). There is no -`close()`. +only be used as a _context manager_ (using the `with` keyword as shown in the +examples below). There is no `close()`. 💡 Note that `dvc.api.open` is able to [stream](https://docs.python.org/3/library/io.html) the file directly from From 24f2d67a74bcd50751f69c8133db44e47810b97c Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 22 Jan 2020 18:49:26 -0600 Subject: [PATCH 031/100] api ref: add note about `.dir` cache files in get_url ref: iterative/dvc/issues/3182 --- public/static/docs/api-reference/get_url.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index d798cff2c1..72fa9c39b2 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -26,6 +26,11 @@ an appropriate tool such as or `boto3` [download_fileobj](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.download_fileobj). +Note that for directories, DVC stores a special text file with `.dir` that +contains the mapping of files in the directory (as a JSON array), along with +their checksums. (Refer to +[Structure of cache directory](/doc/user-guide/dvc-files-and-directories#structure-of-cache-directory).) + > For possible location protocols, refer to the > [supported remote types](https://dvc.org/doc/command-reference/remote/add#supported-storage-types) From 026eaa28319d8e24d144fdcc0354221add993387 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 7 Feb 2020 17:18:48 -0600 Subject: [PATCH 032/100] api: correct exception path in get_url per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-345730018 --- public/static/docs/api-reference/get_url.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 72fa9c39b2..fccf6865bd 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -45,7 +45,7 @@ their checksums. (Refer to an "offline" project. If not supplied, this defaults to the current working directory. - > A `dvc.exceptions.NotDvcRepoError` is thrown if `repo` is not a valid DVC + > A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC > project. - `rev` - (optional) From 3815a09b7f977e8158c98f4bdac1272e646fc170 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 7 Feb 2020 17:33:59 -0600 Subject: [PATCH 033/100] api: standard indentation and arg usage in all examples, and and clarification on default repo arg, add encoding arg per https://github.com/iterative/dvc.org/pull/908#discussion_r369586665 and https://github.com/iterative/dvc.org/pull/908#discussion_r369587472 and https://github.com/iterative/dvc.org/pull/908#discussion_r369589575 and https://github.com/iterative/dvc.org/pull/908#discussion_r369596667 --- public/static/docs/api-reference/get_url.md | 5 +-- public/static/docs/api-reference/open.md | 49 +++++++++++---------- public/static/docs/api-reference/read.md | 4 +- 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index fccf6865bd..53a120e6d7 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -69,9 +69,8 @@ their checksums. (Refer to import dvc.api resource_url = dvc.api.get_url( - 'get-started/data.xml', - repo='https://github.com/iterative/dataset-registry' -) + 'get-started/data.xml', + repo='https://github.com/iterative/dataset-registry') ``` The value of `resource_url` in this case would be something like: diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 460fdf8c98..5d583a6749 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -71,11 +71,11 @@ from xml.dom.minidom import parse import dvc.api with dvc.api.open( - "get-started/data.xml", - "https://github.com/iterative/dataset-registry" -) as fd: - xmldom = parse(fd) - # ... Process elements + "get-started/data.xml", + repo="https://github.com/iterative/dataset-registry" + ) as fd: + xmldom = parse(fd) + # ... Process elements ``` > See also `dvc.api.read` for a more direct way to read the complete contents of @@ -83,23 +83,27 @@ with dvc.api.open( ## Example: use a file from the local cache -In this case we don't supply a `repo` value, which means the current working -directory will be tried instead, so make sure that the code is run from within a -DVC project: +In this case we don't supply a `repo` value. DVC will walk up the current +working directory tree to find the DVC project: ```py import dvc.api with dvc.api.open('data/nlp/words.txt') as fd: - print(fd.name) + for word in fd: + # ... Process words ``` DVC will look for `data/nlp/words.txt` in the local cache of the project. (If it's not found there, the default [remote](/doc/command-reference/remote) will be tried.) -The output of the script above should be something like -`.dvc/cache/3a/01762e96060aa04a68345fbd910355` – the physical data location. +To specify the file encoding of a text file: + +```py +with dvc.api.open('data/nlp/words.txt', encoding="utf-8") as fd: + # ... +``` ## Example: process CSV file from a private repository @@ -112,12 +116,12 @@ import csv import dvc.api with dvc.api.open( - "sea_ice.csv", - repo="git@github.com:iterative/df_sea_ice_no_header.git" -) as fd: - reader = csv.reader(fd) - for row in reader: - # ... Process columns + "sea_ice.csv", + repo="git@github.com:iterative/df_sea_ice_no_header.git" + ) as fd: + reader = csv.reader(fd) + for row in reader: + # ... Process columns ``` ## Example: stream file from a specific remote @@ -130,10 +134,9 @@ by providing a `remote` argument: import dvc.api with dvc.api.open( - 'model.pkl', - repo='https://github.com/example/dvc-repository' - remote='my-s3-bucket' -) as fd: - for line in fd: - # ... Process lines + 'model.pkl', repo='https://github.com/example/dvc-repository' + remote='my-s3-bucket' + ) as fd: + for line in fd: + # ... Process lines ``` diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index be8752a174..b41af386ae 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -64,6 +64,6 @@ import pickle import dvc.api model = pickle.loads( - dvc.api.read("model.pkl", repo="https://github.com/my-org/my-repo.git") -) + dvc.api.read( + "model.pkl", repo="https://github.com/my-org/my-repo.git")) ``` From 382b19540a452eb0fb5115e565bb581eb1cb90e7 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 7 Feb 2020 18:27:10 -0600 Subject: [PATCH 034/100] api: improve last open() example per https://github.com/iterative/dvc.org/pull/908#discussion_r369594771 --- public/static/docs/api-reference/open.md | 37 ++++++++++++++++++------ 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 5d583a6749..06b63fc642 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -63,7 +63,7 @@ regularly into a temporary local path before the file object is made available. - `encoding` - (optional) used to decode contents to a string. Mirrors the namesake parameter in builtin `open()`. Defaults to `"utf-8"`. -## Example: process XML file from an external DVC repository +## Example: Process XML file from an external DVC repository ```py from xml.dom.minidom import parse @@ -81,7 +81,7 @@ with dvc.api.open( > See also `dvc.api.read` for a more direct way to read the complete contents of > a file artifact. -## Example: use a file from the local cache +## Example: Use a file from the local cache In this case we don't supply a `repo` value. DVC will walk up the current working directory tree to find the DVC project: @@ -105,7 +105,7 @@ with dvc.api.open('data/nlp/words.txt', encoding="utf-8") as fd: # ... ``` -## Example: process CSV file from a private repository +## Example: Process CSV file from a private repository For this we'll have to use the SSH URL to the Git repo (assuming the local [SSH credentials](https://help.github.com/en/github/authenticating-to-github/connecting-to-github-with-ssh) @@ -124,19 +124,38 @@ with dvc.api.open( # ... Process columns ``` -## Example: stream file from a specific remote +> Note that we're using an SSH Git URL for the `repo` argument above. + +## Example: Stream file from a specific remote Sometimes we may want to chose the [remote](/doc/command-reference/remote) data -source, for example to ensure that file streaming is enabled. This can be done -by providing a `remote` argument: +source, for example to ensure that file streaming is enabled (as only certain +remote storage types support streaming). This can be done by providing a +`remote` argument: ```py +import pandas as pd + import dvc.api -with dvc.api.open( - 'model.pkl', repo='https://github.com/example/dvc-repository' +with open( + 'activity.log', + repo='https://example.com/dvc/repo', remote='my-s3-bucket' ) as fd: for line in fd: - # ... Process lines + match = re.search(r"user=(\w+)", line) + # ... +``` + +## Example: Unserialize and employ a binary model + +```py +import pickle + +import dvc.api + +with dvc.api.open('model.pkl', repo='...') as fd: + pickle.load(fd) + # ... Use model ``` From 013f5df8e2a993c44956feeb9807dc4287be91b3 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 7 Feb 2020 18:35:39 -0600 Subject: [PATCH 035/100] api: std use of single vs double quotes and add mode='rb' in read() example per https://github.com/iterative/dvc.org/pull/908#discussion_r369597979 --- public/static/docs/api-reference/open.md | 15 +++++++++------ public/static/docs/api-reference/read.md | 6 +++++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 06b63fc642..11b6caf427 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -71,8 +71,8 @@ from xml.dom.minidom import parse import dvc.api with dvc.api.open( - "get-started/data.xml", - repo="https://github.com/iterative/dataset-registry" + 'get-started/data.xml', + repo='https://github.com/iterative/dataset-registry' ) as fd: xmldom = parse(fd) # ... Process elements @@ -101,7 +101,7 @@ DVC will look for `data/nlp/words.txt` in the local cache of the To specify the file encoding of a text file: ```py -with dvc.api.open('data/nlp/words.txt', encoding="utf-8") as fd: +with dvc.api.open('data/nlp/words.txt', encoding='utf-8') as fd: # ... ``` @@ -116,8 +116,8 @@ import csv import dvc.api with dvc.api.open( - "sea_ice.csv", - repo="git@github.com:iterative/df_sea_ice_no_header.git" + 'sea_ice.csv', + repo='git@github.com:iterative/df_sea_ice_no_header.git' ) as fd: reader = csv.reader(fd) for row in reader: @@ -144,7 +144,7 @@ with open( remote='my-s3-bucket' ) as fd: for line in fd: - match = re.search(r"user=(\w+)", line) + match = re.search(r'user=(\w+)', line) # ... ``` @@ -159,3 +159,6 @@ with dvc.api.open('model.pkl', repo='...') as fd: pickle.load(fd) # ... Use model ``` + +> For a faster shorthand way to perform a similar example, please see the +> [read() example](/doc/api-reference/read#examples). diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index b41af386ae..39a497791b 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -65,5 +65,9 @@ import dvc.api model = pickle.loads( dvc.api.read( - "model.pkl", repo="https://github.com/my-org/my-repo.git")) + 'model.pkl', + repo='https://github.com/my-org/my-repo.git' + mode='rb') ``` + +> We're using `'rb'` mode here for compatibility with `pickle.loads()`. From e9340cec144f49e786a98cc66e7d0110189f18c6 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 7 Feb 2020 21:54:59 -0600 Subject: [PATCH 036/100] api: update index and install section per Ivan's feedback in #908 --- public/static/docs/api-reference/index.md | 32 +++++++---------------- public/static/docs/install/index.md | 13 +++------ 2 files changed, 13 insertions(+), 32 deletions(-) diff --git a/public/static/docs/api-reference/index.md b/public/static/docs/api-reference/index.md index 3e36c42e35..56c22d85ea 100644 --- a/public/static/docs/api-reference/index.md +++ b/public/static/docs/api-reference/index.md @@ -1,30 +1,16 @@ # Python API -When you [install](/doc/install) DVC with an package manager like `pip` or -`conda`, the `dvc` package becomes available to the corresponding Python -environment. While most of the package implements our -[command-line tool](/doc/command-reference), we wrote the `dvc.api` module to -expose special functions you can use in your Python source code. - -> For API use, we **strongly** recommend having `dvc` in a requirements file for -> your Python project, and installing it with a package manager (in a -> [virtual environment](https://packaging.python.org/tutorials/installing-packages/#creating-virtual-environments) -> preferably). - -To use the API, import the module first with: +DVC can be used as a Python library, simply [install](/doc/install) it with a +package manager like `pip` or `conda`, and as a Python +[project requirement](https://pip.pypa.io/en/latest/user_guide/#requirements-files) +if needed. This reference provides the details about the functions in the API +module `dvc.api`, which can be imported any regular way, for example: ```py import dvc.api ``` -This reference provides the details about the API functions (inside `dvc.api`): -their purpose, usage, and examples. Please note that they also have inline -documentation, which you get from the module's -[source code](https://github.com/iterative/dvc/blob/master/dvc/api.py). - -> Please don't hesitate in sending a feature request -> [on GitHub](https://github.com/iterative/dvc.org/issues) with ideas of other -> functions we could add to the Python API. - -Please choose a function from the navigation sidebar to the left, or click the -`Next` button below to jump into the first one ↘ +The purpose of our API is to provide programatic access to the data or models +[stored and versioned using DVC](/doc/use-cases/versioning-data-and-model-files) +from Python apps. Please choose a function from the navigation sidebar to the +left, or click the `Next` button below to jump into the first one ↘ diff --git a/public/static/docs/install/index.md b/public/static/docs/install/index.md index 7bd98ebf1b..c375c0ca61 100644 --- a/public/static/docs/install/index.md +++ b/public/static/docs/install/index.md @@ -9,15 +9,10 @@ Please double check that you don't already have DVC (for example running ## Install as a Python library -When you install DVC with an package manager like `pip` or `conda`, the `dvc` -package becomes available to the corresponding `python` interpreter. This is -particularly useful in order to access the [Python API](/doc/api-reference) -(`dvc.api` module). - -> For API use, we **strongly** recommend having `dvc` in a requirements file for -> your Python project, and installing it with a package manager (in a -> [virtual environment](https://packaging.python.org/tutorials/installing-packages/#creating-virtual-environments) -> preferably). +DVC can be used as a Python library, simply install it with a package manager +like `pip` or `conda`, and as a Python +[project requirement](https://pip.pypa.io/en/latest/user_guide/#requirements-files) +if needed. The [Python API](/doc/api-reference) module is `dvc.api`. ## Advanced options From 0a725e8ec35d34188e4030cf3161aebdbc8e4d51 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sat, 8 Feb 2020 02:32:10 -0600 Subject: [PATCH 037/100] cmd ref: refactor and simplify notes to emphasize those linking to api ref per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-348451664 and https://github.com/iterative/dvc.org/pull/908#pullrequestreview-348451721 --- public/static/docs/api-reference/open.md | 2 +- public/static/docs/command-reference/get-url.md | 15 ++++++--------- public/static/docs/command-reference/get.md | 12 ++++-------- .../static/docs/command-reference/import-url.md | 10 +++++----- public/static/docs/command-reference/import.md | 10 ++++------ 5 files changed, 20 insertions(+), 29 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 11b6caf427..6c0fe5bc57 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -18,7 +18,7 @@ builtin, but for files tracked in DVC projects. However, it may only be used as a _context manager_ (using the `with` keyword as shown in the examples below). There is no `close()`. -💡 Note that `dvc.api.open` is able to +💡 Note that `dvc.api.open()` is able to [stream](https://docs.python.org/3/library/io.html) the file directly from **most** [remote types](/doc/command-reference/remote/add#supported-storage-types) diff --git a/public/static/docs/command-reference/get-url.md b/public/static/docs/command-reference/get-url.md index 1a45e8c992..c2d1af96b8 100644 --- a/public/static/docs/command-reference/get-url.md +++ b/public/static/docs/command-reference/get-url.md @@ -3,8 +3,8 @@ Download a file or directory from a supported URL (for example `s3://`, `ssh://`, and other protocols) into the local file system. -> Unlike `dvc import-url`, this command does not track the downloaded data files -> (does not create a DVC-file). +> See `dvc get` to download data/model files or directories from other DVC +> repositories (e.g. GitHub URLs). ## Synopsis @@ -22,15 +22,15 @@ In some cases it's convenient to get a data artifact from a remote location into the local file system. The `dvc get-url` command helps the user do just that. +> Note that unlike `dvc import-url`, this command does not track the downloaded +> data files (does not create a DVC-file). For that reason, this command doesn't +> require an existing DVC project to run in. + The `url` argument should provide the location of the data to be downloaded, while `out` can be used to specify the directory and/or file name desired for the downloaded data. If an existing directory is specified, then the output will be placed inside of it. -Note that this command doesn't require an existing DVC project to -run in. It's a single-purpose command that can be used out of the box after -installing DVC. - DVC supports several types of (local or) remote locations (protocols): | Type | Description | `url` format | @@ -61,9 +61,6 @@ HTTP(S) it's possible to instead use: $ wget https://example.com/path/to/data.csv ``` -> See `dvc get` to download data/model files or directories from other DVC -> repositories (e.g. GitHub URLs). - ## Options - `-h`, `--help` - prints the usage/help message, and exit. diff --git a/public/static/docs/command-reference/get.md b/public/static/docs/command-reference/get.md index 5ba9c4ee96..9d74e73262 100644 --- a/public/static/docs/command-reference/get.md +++ b/public/static/docs/command-reference/get.md @@ -3,12 +3,7 @@ Download a file or directory tracked by DVC or by Git into the current working directory. -> Unlike `dvc import`, this command does not track the downloaded files (does -> not create a DVC-file). - - - -> See also our `dvc.api.open` Python API function. +> See also our `dvc.api.open()` Python API function. ## Synopsis @@ -28,8 +23,9 @@ repository (e.g. source code, small image/other files). `dvc get` copies the target file or directory (`url`/`path`) to the current working directory. (Analogous to `wget`, but for repos.) -Note that this command doesn't require an existing DVC project to run in. It's a -single-purpose command that can be used out of the box after installing DVC. +> Note that unlike `dvc import`, this command does not track the downloaded +> files (does not create a DVC-file). For that reason, this command doesn't +> require an existing DVC project to run in. The `url` argument specifies the address of the DVC or Git repository containing the data source. Both HTTP and SSH protocols are supported for online repos diff --git a/public/static/docs/command-reference/import-url.md b/public/static/docs/command-reference/import-url.md index e5e7ca8207..3aa52b6238 100644 --- a/public/static/docs/command-reference/import-url.md +++ b/public/static/docs/command-reference/import-url.md @@ -4,8 +4,8 @@ Download a file or directory from a supported URL (for example `s3://`, `ssh://`, and other protocols) into the workspace, and track changes in the remote data source. Creates a DVC-file. -> See also `dvc get-url`, that corresponds to the first half of what this -> command does (downloading the data artifact). +> See `dvc import` to download and tack data/model files or directories from +> other DVC repositories (e.g. GitHub URLs). ## Synopsis @@ -28,6 +28,9 @@ external data source changes. Example scenarios: - A batch process running regularly updates a data file to import. - A shared dataset on a remote storage that is managed and updated outside DVC. +> Note that `dvc get-url` corresponds to the first step this command performs +> (just download the file or directory). + The `dvc import-url` command helps the user create such an external data dependency. The `url` argument specifies the external location of the data to be imported, while `out` can be used to specify the directory and/or file name @@ -103,9 +106,6 @@ Note that import stages are considered always locked, meaning that if you run `dvc repro`, they won't be updated. Use `dvc update` on them to bring the import up to date from the external data source. -> See `dvc import` to download and tack data/model files or directories from -> other DVC repositories (e.g. GitHub URLs). - ## Options - `-f`, `--file` - specify name of the DVC-file it generates. By default the diff --git a/public/static/docs/command-reference/import.md b/public/static/docs/command-reference/import.md index 3a542679ec..ea965b88cb 100644 --- a/public/static/docs/command-reference/import.md +++ b/public/static/docs/command-reference/import.md @@ -6,12 +6,7 @@ Download a file or directory tracked by DVC or by Git into the source, which can later be used to [update](/doc/command-reference/update) the import. -> See also `dvc get`, that corresponds to the first step this command performs -> (just download the data). - - - -> See also our `dvc.api.open` Python API function. +> See also our `dvc.api.open()` Python API function. ## Synopsis @@ -32,6 +27,9 @@ the target file or directory (`url`/`path`) in a way so that it's tracked with DVC, becoming a local data artifact. This also permits updating the import later, if it has changed in its data source. (See `dvc update`.) +> Note that `dvc get` corresponds to the first step this command performs (just +> download the data). + The `url` argument specifies the address of the DVC or Git repository containing the data source. Both HTTP and SSH protocols are supported for online repos (e.g. `[user@]server:project.git`). `url` can also be a local file system path From d696063c186c6355fe8404229fca52007a8ac4b5 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sat, 8 Feb 2020 16:12:32 -0600 Subject: [PATCH 038/100] api ref: rewrite get_url intro per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-348442281 --- public/static/docs/api-reference/get_url.md | 20 ++++++++++--------- public/static/docs/command-reference/get.md | 4 ++-- .../static/docs/command-reference/import.md | 7 ++++--- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 53a120e6d7..48ed97e0c2 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,6 +1,7 @@ # dvc.api.get_url() -Return the URL to the storage location of the target data (`repo`/`path`). +Return the URL to the storage location of a data artifact tracked +by DVC. ## Signature @@ -11,14 +12,15 @@ get_url(path, repo=None, rev=None, remote=None) ## Description Returns the full URL string (`str` type) to the physical location (in a -[DVC remote](/doc/command-reference/remote)) where a target data -artifact specified by its `path` in a `repo` (DVC project) -is stored. - -⚠️ Note that the returned URL is formed by analyzing the corresponding -[DVC-file](/doc/user-guide/dvc-file-format) (see [Examples](#examples) below). -**There is no guarantee that the file actually exists in that location**. Please -keep this in mind when using the URL string in your code. +[DVC remote](/doc/command-reference/remote)) where a target data artifact +specified by its `path` in a `repo` (DVC project) is stored. + +⚠️ Note that the returned URL is formed by evaluating the corresponding +[DVC-file](/doc/user-guide/dvc-file-format) (see [Examples](#examples) below) as +well as the project's +[default remote](https://dvc.org/doc/command-reference/remote/default). **There +is no guarantee that the file actually exists in that location**. Please keep +this in mind when using the URL string in your code. 💡 Having the resource's URL, it should be possible to download it directly with an appropriate tool such as diff --git a/public/static/docs/command-reference/get.md b/public/static/docs/command-reference/get.md index 9d74e73262..a06990494e 100644 --- a/public/static/docs/command-reference/get.md +++ b/public/static/docs/command-reference/get.md @@ -20,8 +20,8 @@ positional arguments: Provides an easy way to download files or directories tracked in any DVC repository (e.g. datasets, intermediate results, ML models), or Git repository (e.g. source code, small image/other files). `dvc get` copies the -target file or directory (`url`/`path`) to the current working directory. -(Analogous to `wget`, but for repos.) +target file or directory (found at `path` in `url`) to the current working +directory. (Analogous to `wget`, but for repos.) > Note that unlike `dvc import`, this command does not track the downloaded > files (does not create a DVC-file). For that reason, this command doesn't diff --git a/public/static/docs/command-reference/import.md b/public/static/docs/command-reference/import.md index ea965b88cb..9c48372f84 100644 --- a/public/static/docs/command-reference/import.md +++ b/public/static/docs/command-reference/import.md @@ -23,9 +23,10 @@ positional arguments: Provides an easy way to reuse files or directories tracked in any DVC repository (e.g. datasets, intermediate results, ML models) or Git repository (e.g. source code, small image/other files). `dvc import` downloads -the target file or directory (`url`/`path`) in a way so that it's tracked with -DVC, becoming a local data artifact. This also permits updating the -import later, if it has changed in its data source. (See `dvc update`.) +the target file or directory (found at `path` in `url`) in a way so that it's +tracked with DVC, becoming a local data artifact. This also permits +updating the import later, if it has changed in its data source. (See +`dvc update`.) > Note that `dvc get` corresponds to the first step this command performs (just > download the data). From f13b3111693c6f170dc49b680f8b74c04943343a Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sat, 8 Feb 2020 16:40:18 -0600 Subject: [PATCH 039/100] api ref: simplify note about get_url not checking for file/dir existence per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-348442808 --- public/static/docs/api-reference/get_url.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 48ed97e0c2..bd0559d931 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,7 +1,8 @@ # dvc.api.get_url() -Return the URL to the storage location of a data artifact tracked -by DVC. +Return the URL +([string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str) +type)) to the storage location of a data artifact tracked by DVC. ## Signature @@ -11,16 +12,16 @@ get_url(path, repo=None, rev=None, remote=None) ## Description -Returns the full URL string (`str` type) to the physical location (in a +Returns the full URL to the physical location (in a [DVC remote](/doc/command-reference/remote)) where a target data artifact -specified by its `path` in a `repo` (DVC project) is stored. - -⚠️ Note that the returned URL is formed by evaluating the corresponding +specified by its `path` in a `repo` (DVC project) is stored. The +URL is formed by evaluating the corresponding [DVC-file](/doc/user-guide/dvc-file-format) (see [Examples](#examples) below) as well as the project's -[default remote](https://dvc.org/doc/command-reference/remote/default). **There -is no guarantee that the file actually exists in that location**. Please keep -this in mind when using the URL string in your code. +[default remote](https://dvc.org/doc/command-reference/remote/default). + +⚠️ This function does not check for the actual existence of the file or +directory in the remote storage. 💡 Having the resource's URL, it should be possible to download it directly with an appropriate tool such as From 73adff59601595257eadcfbad6217c888dfa846f Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sat, 8 Feb 2020 21:05:09 -0600 Subject: [PATCH 040/100] api ref: update note about directory JSON .dir files in get_url per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-348443386 --- public/static/docs/api-reference/get_url.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index bd0559d931..4a2e8da187 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -29,10 +29,11 @@ an appropriate tool such as or `boto3` [download_fileobj](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.download_fileobj). -Note that for directories, DVC stores a special text file with `.dir` that -contains the mapping of files in the directory (as a JSON array), along with -their checksums. (Refer to -[Structure of cache directory](/doc/user-guide/dvc-files-and-directories#structure-of-cache-directory).) +Note that if the target is a directory, the URL will end in `.dir`, as DVC +stores a special JSON file with `.dir` extension that contains the mapping of +files in the directory (as a JSON array), along with their checksums. Refer to +[Structure of cache directory](/doc/user-guide/dvc-files-and-directories#structure-of-cache-directory) +and `dvc add` to learn more about how DVC handles data directories. > For possible location protocols, refer to the > [supported remote types](https://dvc.org/doc/command-reference/remote/add#supported-storage-types) From 6ebe37173ce14b544f43218da513b312bcd26ddd Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sat, 8 Feb 2020 21:35:06 -0600 Subject: [PATCH 041/100] api ref: std. param lang style per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-348443679 and https://github.com/iterative/dvc.org/pull/908#pullrequestreview-348443840 and https://github.com/iterative/dvc.org/pull/908#pullrequestreview-348443936 --- public/static/docs/api-reference/get_url.md | 25 ++++++++--------- public/static/docs/api-reference/open.md | 25 ++++++++--------- public/static/docs/api-reference/read.md | 31 ++++++++++----------- 3 files changed, 39 insertions(+), 42 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 4a2e8da187..dfe472bf77 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -40,32 +40,31 @@ and `dvc add` to learn more about how DVC handles data directories. ## Parameters -- **`path`** - used to specify the location of the target artifact within the - source project in `repo`, relative to the project's root. +- **`path`** - specifies the location of the target artifact within the source + project in `repo`, relative to the project's root. - `repo` - specifies the location of the source DVC project. Both HTTP and SSH protocols are supported for online Git repository URLs (e.g. `[user@]server:project.git`). `repo` can also be a local file system path to - an "offline" project. If not supplied, this defaults to the current working + an "offline" project. If not supplied, defaults to the current working directory. - > A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC - > project. + A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. -- `rev` - (optional) +- `rev` - [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). `rev` only has an effect when a URL is supplied as parameter to `repo`. If not supplied, it uses the default Git revision, `HEAD`. -- `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) - to fetch the target artifact from. If not supplied, the default depends on the - value of `repo`. The local cache is used when `repo` is the current working - directory (default value of `repo`). when `repo` is an external repository - URL, the default project remote is used. +- `remote` - name of the [DVC remote](/doc/command-reference/remote) to fetch + the target artifact from. If not supplied, the default depends on the value of + `repo`. The local cache is used when `repo` is the current working directory + (default value of `repo`). when `repo` is an external repository URL, the + default project remote is used. - > A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and - > the project has no default remote. + A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the + project has no default remote. ## Examples diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 6c0fe5bc57..37c54ec7b2 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -29,32 +29,31 @@ regularly into a temporary local path before the file object is made available. ## Parameters -- **`path`** - used to specify the location of the target artifact within the - source project in `repo`, relative to the project's root. +- **`path`** - specifies the location of the target artifact within the source + project in `repo`, relative to the project's root. - `repo` - specifies the location of the source DVC project. Both HTTP and SSH protocols are supported for online Git repository URLs (e.g. `[user@]server:project.git`). `repo` can also be a local file system path to - an "offline" project. If not supplied, this defaults to the current working + an "offline" project. If not supplied, defaults to the current working directory. - > A `dvc.exceptions.NotDvcRepoError` is thrown if `repo` is not a valid DVC - > project. + A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. -- `rev` - (optional) +- `rev` - [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). `rev` only has an effect when a URL is supplied as parameter to `repo`. If not supplied, it uses the default Git revision, `HEAD`. -- `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) - to fetch the target artifact from. If not supplied, the default depends on the - value of `repo`. The local cache is used when `repo` is the current working - directory (default value of `repo`). when `repo` is an external repository - URL, the default project remote is used. +- `remote` - name of the [DVC remote](/doc/command-reference/remote) to fetch + the target artifact from. If not supplied, the default depends on the value of + `repo`. The local cache is used when `repo` is the current working directory + (default value of `repo`). when `repo` is an external repository URL, the + default project remote is used. - > A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and - > the project has no default remote. + A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the + project has no default remote. - `mode` - (optional) mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3/library/functions.html#open). Defaults to diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 39a497791b..bc429922a1 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -23,39 +23,38 @@ projects – no _context manager_ (`with` keyword) required. ## Parameters -- **`path`** - used to specify the location of the target artifact within the - source project in `repo`, relative to the project's root. +- **`path`** - specifies the location of the target artifact within the source + project in `repo`, relative to the project's root. - `repo` - specifies the location of the source DVC project. Both HTTP and SSH protocols are supported for online Git repository URLs (e.g. `[user@]server:project.git`). `repo` can also be a local file system path to - an "offline" project. If not supplied, this defaults to the current working + an "offline" project. If not supplied, defaults to the current working directory. - > A `dvc.exceptions.NotDvcRepoError` is thrown if `repo` is not a valid DVC - > project. + A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. -- `rev` - (optional) +- `rev` - [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) (such as a branch name, a tag, or a commit hash). `rev` only has an effect when a URL is supplied as parameter to `repo`. If not supplied, it uses the default Git revision, `HEAD`. -- `remote` - (optional) name of the [DVC remote](/doc/command-reference/remote) - to fetch the target artifact from. If not supplied, the default depends on the - value of `repo`. The local cache is used when `repo` is the current working - directory (default value of `repo`). when `repo` is an external repository - URL, the default project remote is used. +- `remote` - name of the [DVC remote](/doc/command-reference/remote) to fetch + the target artifact from. If not supplied, the default depends on the value of + `repo`. The local cache is used when `repo` is the current working directory + (default value of `repo`). when `repo` is an external repository URL, the + default project remote is used. - > A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and - > the project has no default remote. + A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the + project has no default remote. -- `mode` - (optional) mirrors the namesake parameter in builtin +- `mode` - mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3/library/functions.html#open). Defaults to `"r"` (read). -- `encoding` - (optional) used to decode contents to a string. Mirrors the - namesake parameter in builtin `open()`. Defaults to `"utf-8"`. +- `encoding` - used to decode contents to a string. Mirrors the namesake + parameter in builtin `open()`. Defaults to `"utf-8"`. ## Examples From a921b40a7385d1a4629bf1f71bab57d6ab5b27d5 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sat, 8 Feb 2020 22:21:21 -0600 Subject: [PATCH 042/100] api ref: simplify and improve basic param descs per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-348444448 and https://github.com/iterative/dvc.org/pull/908#pullrequestreview-348447249 --- public/static/docs/api-reference/get_url.md | 12 +++++------- public/static/docs/api-reference/open.md | 12 +++++------- public/static/docs/api-reference/read.md | 12 +++++------- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index dfe472bf77..47a47c91a8 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -53,15 +53,13 @@ and `dvc add` to learn more about how DVC handles data directories. - `rev` - [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) - (such as a branch name, a tag, or a commit hash). `rev` only has an effect - when a URL is supplied as parameter to `repo`. If not supplied, it uses the - default Git revision, `HEAD`. + (such as a branch name, a tag, or a commit hash). It only has an effect when + `repo` is a Git repository. If not supplied, it uses the default Git revision, + `HEAD`. - `remote` - name of the [DVC remote](/doc/command-reference/remote) to fetch - the target artifact from. If not supplied, the default depends on the value of - `repo`. The local cache is used when `repo` is the current working directory - (default value of `repo`). when `repo` is an external repository URL, the - default project remote is used. + the target artifact from. If not supplied, the default remote or `repo` is + used (or the cache directory for local projects). A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the project has no default remote. diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 37c54ec7b2..22aa9c09be 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -42,15 +42,13 @@ regularly into a temporary local path before the file object is made available. - `rev` - [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) - (such as a branch name, a tag, or a commit hash). `rev` only has an effect - when a URL is supplied as parameter to `repo`. If not supplied, it uses the - default Git revision, `HEAD`. + (such as a branch name, a tag, or a commit hash). It only has an effect when + `repo` is a Git repository. If not supplied, it uses the default Git revision, + `HEAD`. - `remote` - name of the [DVC remote](/doc/command-reference/remote) to fetch - the target artifact from. If not supplied, the default depends on the value of - `repo`. The local cache is used when `repo` is the current working directory - (default value of `repo`). when `repo` is an external repository URL, the - default project remote is used. + the target artifact from. If not supplied, the default remote or `repo` is + used (or the cache directory for local projects). A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the project has no default remote. diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index bc429922a1..7208d8302e 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -36,15 +36,13 @@ projects – no _context manager_ (`with` keyword) required. - `rev` - [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) - (such as a branch name, a tag, or a commit hash). `rev` only has an effect - when a URL is supplied as parameter to `repo`. If not supplied, it uses the - default Git revision, `HEAD`. + (such as a branch name, a tag, or a commit hash). It only has an effect when + `repo` is a Git repository. If not supplied, it uses the default Git revision, + `HEAD`. - `remote` - name of the [DVC remote](/doc/command-reference/remote) to fetch - the target artifact from. If not supplied, the default depends on the value of - `repo`. The local cache is used when `repo` is the current working directory - (default value of `repo`). when `repo` is an external repository URL, the - default project remote is used. + the target artifact from. If not supplied, the default remote or `repo` is + used (or the cache directory for local projects). A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the project has no default remote. From a6f9eecc667747b1b53435e2e617d97fe9cb8557 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sat, 8 Feb 2020 22:46:09 -0600 Subject: [PATCH 043/100] api ref: improvements to repo param and get_url example per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-348450193 and https://github.com/iterative/dvc.org/pull/908#pullrequestreview-348450263 --- public/static/docs/api-reference/get_url.md | 11 +++++------ public/static/docs/api-reference/open.md | 3 +-- public/static/docs/api-reference/read.md | 3 +-- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 47a47c91a8..0a0419965b 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -46,8 +46,7 @@ and `dvc add` to learn more about how DVC handles data directories. - `repo` - specifies the location of the source DVC project. Both HTTP and SSH protocols are supported for online Git repository URLs (e.g. `[user@]server:project.git`). `repo` can also be a local file system path to - an "offline" project. If not supplied, defaults to the current working - directory. + an "offline" project. If not supplied, defaults to the current DVC project. A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. @@ -78,8 +77,8 @@ The value of `resource_url` in this case would be something like: `https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355` -This URL represents the physical location of the data, built by interpreting the +This URL represents the physical location of the data, built by evaluating the corresponding [DVC-file](/doc/user-guide/dvc-file-format), where the file's -checksum `a304afb96060aad90176268345e10355` is stored, and the project's remote -configuration where the base URL `https://remote.dvc.org/dataset-registry/` is -saved. +checksum (`a304afb96060aad90176268345e10355`) is stored, and the +[project-configuration](https://github.com/iterative/dataset-registry/blob/master/.dvc/config) +where the remote URL (`https://remote.dvc.org/dataset-registry`) is saved. diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 22aa9c09be..55f279aee6 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -35,8 +35,7 @@ regularly into a temporary local path before the file object is made available. - `repo` - specifies the location of the source DVC project. Both HTTP and SSH protocols are supported for online Git repository URLs (e.g. `[user@]server:project.git`). `repo` can also be a local file system path to - an "offline" project. If not supplied, defaults to the current working - directory. + an "offline" project. If not supplied, defaults to the current DVC project. A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 7208d8302e..1cf9ae2fab 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -29,8 +29,7 @@ projects – no _context manager_ (`with` keyword) required. - `repo` - specifies the location of the source DVC project. Both HTTP and SSH protocols are supported for online Git repository URLs (e.g. `[user@]server:project.git`). `repo` can also be a local file system path to - an "offline" project. If not supplied, defaults to the current working - directory. + an "offline" project. If not supplied, defaults to the current DVC project. A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. From c1eb598462db4623e643faa7d8fe0cf1d80e54d4 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 12 Feb 2020 01:09:20 -0600 Subject: [PATCH 044/100] api ref: further explain URL construction in get_url example per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-348447483 --- public/static/docs/api-reference/get_url.md | 28 +++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 0a0419965b..ddefc4ac27 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -31,7 +31,7 @@ or `boto3` Note that if the target is a directory, the URL will end in `.dir`, as DVC stores a special JSON file with `.dir` extension that contains the mapping of -files in the directory (as a JSON array), along with their checksums. Refer to +files in the directory (as a JSON array), along with their hash values. Refer to [Structure of cache directory](/doc/user-guide/dvc-files-and-directories#structure-of-cache-directory) and `dvc add` to learn more about how DVC handles data directories. @@ -73,12 +73,26 @@ resource_url = dvc.api.get_url( repo='https://github.com/iterative/dataset-registry') ``` -The value of `resource_url` in this case would be something like: +The value of `resource_url` in this case would result in: `https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355` -This URL represents the physical location of the data, built by evaluating the -corresponding [DVC-file](/doc/user-guide/dvc-file-format), where the file's -checksum (`a304afb96060aad90176268345e10355`) is stored, and the -[project-configuration](https://github.com/iterative/dataset-registry/blob/master/.dvc/config) -where the remote URL (`https://remote.dvc.org/dataset-registry`) is saved. +This URL represents the physical location of the data, and is built by +evaluating the corresponding DVC-file +([`get-started/data.xml.dvc`](https://github.com/iterative/dataset-registry/blob/master/get-started/data.xml.dvc)) +where the `md5` file hash is stored, + +```yaml +outs: + - md5: a304afb96060aad90176268345e10355 + path: get-started/data.xml +``` + +and the project configuration +([`.dvc/config`](https://github.com/iterative/dataset-registry/blob/master/.dvc/config)) +where the remote URL is saved: + +```dvc +['remote "storage"'] +url = https://remote.dvc.org/dataset-registry +``` From 4f42b8838183a35bc507e75d79694e7e7e1616a9 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 12 Feb 2020 01:23:17 -0600 Subject: [PATCH 045/100] api ref: simplify api index per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-356420096 --- public/static/docs/api-reference/index.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/public/static/docs/api-reference/index.md b/public/static/docs/api-reference/index.md index 56c22d85ea..372ddf94f6 100644 --- a/public/static/docs/api-reference/index.md +++ b/public/static/docs/api-reference/index.md @@ -1,16 +1,14 @@ # Python API -DVC can be used as a Python library, simply [install](/doc/install) it with a -package manager like `pip` or `conda`, and as a Python -[project requirement](https://pip.pypa.io/en/latest/user_guide/#requirements-files) -if needed. This reference provides the details about the functions in the API +DVC can be used as a Python library, simply [install](/doc/install) with `pip` +or `conda`. This reference provides the details about the functions in the API module `dvc.api`, which can be imported any regular way, for example: ```py import dvc.api ``` -The purpose of our API is to provide programatic access to the data or models +The purpose of this API is to provide programatic access to the data or models [stored and versioned using DVC](/doc/use-cases/versioning-data-and-model-files) from Python apps. Please choose a function from the navigation sidebar to the left, or click the `Next` button below to jump into the first one ↘ From 8208fd908e6f4be2b18194d546bac4b0a358ab19 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Mon, 17 Feb 2020 03:30:02 -0600 Subject: [PATCH 046/100] api: add link to dvcx repo --- public/static/docs/api-reference/index.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/public/static/docs/api-reference/index.md b/public/static/docs/api-reference/index.md index 372ddf94f6..73c272cd1c 100644 --- a/public/static/docs/api-reference/index.md +++ b/public/static/docs/api-reference/index.md @@ -12,3 +12,6 @@ The purpose of this API is to provide programatic access to the data or models [stored and versioned using DVC](/doc/use-cases/versioning-data-and-model-files) from Python apps. Please choose a function from the navigation sidebar to the left, or click the `Next` button below to jump into the first one ↘ + +> Please see also [dvcx](https://github.com/iterative/dvcx) for additional API +> extensions. From 1d5d3a9a0a703d839d18509192ca9d34b97642bb Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Mon, 17 Feb 2020 03:52:53 -0600 Subject: [PATCH 047/100] api: open() and read() support Git-tracked files per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-356420324 --- public/static/docs/api-reference/index.md | 8 +++++--- public/static/docs/api-reference/open.md | 8 ++++---- public/static/docs/api-reference/read.md | 5 +++-- public/static/docs/glossary.js | 1 + 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/public/static/docs/api-reference/index.md b/public/static/docs/api-reference/index.md index 73c272cd1c..f6015a2ece 100644 --- a/public/static/docs/api-reference/index.md +++ b/public/static/docs/api-reference/index.md @@ -9,9 +9,11 @@ import dvc.api ``` The purpose of this API is to provide programatic access to the data or models -[stored and versioned using DVC](/doc/use-cases/versioning-data-and-model-files) -from Python apps. Please choose a function from the navigation sidebar to the -left, or click the `Next` button below to jump into the first one ↘ +[stored and versioned](/doc/use-cases/versioning-data-and-model-files) in +DVC repositories from Python apps. > Please see also [dvcx](https://github.com/iterative/dvcx) for additional API > extensions. + +Please choose a function from the navigation sidebar to the left, or click the +`Next` button below to jump into the first one ↘ diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 55f279aee6..90c8e2c3ec 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -1,7 +1,7 @@ # dvc.api.open() [Context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) -to open a DVC-tracked file artifact as a +to open a tracked file as a [file object](https://docs.python.org/3/glossary.html#term-file-object). ## Signature @@ -14,9 +14,9 @@ open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) This function is analogous to the [`open()`](https://docs.python.org/3/library/functions.html#open) Python -builtin, but for files tracked in DVC projects. However, it may -only be used as a _context manager_ (using the `with` keyword as shown in the -examples below). There is no `close()`. +builtin, but for files tracked in DVC projects (by DVC or Git). +However, it may only be used as a _context manager_ (using the `with` keyword as +shown in the **Examples** below). There is no `close()`. 💡 Note that `dvc.api.open()` is able to [stream](https://docs.python.org/3/library/io.html) the file directly from diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 1cf9ae2fab..351d9b4381 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -1,6 +1,6 @@ # dvc.api.read() -Returns the contents of a DVC-tracked file artifact as a +Returns the contents of a tracked file as a [bytes object](https://docs.python.org/3/glossary.html#term-bytes-like-object) or as a [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). @@ -15,7 +15,8 @@ read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) This function wraps [`dvc.api.open()`](/doc/api-reference/open) for a simple and direct way to return the complete file contents of files tracked in DVC -projects – no _context manager_ (`with` keyword) required. +projects (by DVC or Git) – no _context manager_ (`with` keyword) +required. > Internally, it uses the _file object_'s > [`read()`](https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects) diff --git a/public/static/docs/glossary.js b/public/static/docs/glossary.js index bb6d10bc59..fe982958ac 100644 --- a/public/static/docs/glossary.js +++ b/public/static/docs/glossary.js @@ -16,6 +16,7 @@ code, ML models, etc. It will conatain your DVC project. name: 'DVC Project', match: [ 'DVC project', + 'DVC projects', 'project', 'projects', 'DVC repository', From a11465e5c9f2f37c965480159a446dda3d4be0c3 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Mon, 17 Feb 2020 03:57:54 -0600 Subject: [PATCH 048/100] links: fix link-check for api docs --- public/static/docs/api-reference/open.md | 2 +- public/static/docs/api-reference/read.md | 2 +- scripts/exclude-links.txt | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 90c8e2c3ec..357bd2d600 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -136,7 +136,7 @@ import dvc.api with open( 'activity.log', - repo='https://example.com/dvc/repo', + repo='location/of/dvc/project', remote='my-s3-bucket' ) as fd: for line in fd: diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 351d9b4381..cabe3be5fb 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -63,7 +63,7 @@ import dvc.api model = pickle.loads( dvc.api.read( 'model.pkl', - repo='https://github.com/my-org/my-repo.git' + repo='https://github.com/example/project.git' mode='rb') ``` diff --git a/scripts/exclude-links.txt b/scripts/exclude-links.txt index 0019e49244..b332e30bfb 100644 --- a/scripts/exclude-links.txt +++ b/scripts/exclude-links.txt @@ -31,6 +31,7 @@ https://man.dvc.org/foo https://marketplace.visualstudio.com/items?itemName=stkb.rewrap https://myendpoint.com https://object-storage.example.com +https://remote.dvc.org/dataset-registry https://remote.dvc.org/foo/bar https://remote.dvc.org/get-started https://s3-us-east-2.amazonaws.com/dvc-public/code/foo/bar From f86afdea4a1cc20f4bc73fe9ca06aea98d989305 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Mon, 17 Feb 2020 16:01:04 -0600 Subject: [PATCH 049/100] api: typo --- public/static/docs/api-reference/get_url.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index ddefc4ac27..9a1b7bc035 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,6 +1,6 @@ # dvc.api.get_url() -Return the URL +Returns the URL ([string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str) type)) to the storage location of a data artifact tracked by DVC. From 979d70c9752bfe8b1ec8aeb8d3616b2c7e5ca3a5 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Mon, 17 Feb 2020 20:53:14 -0600 Subject: [PATCH 050/100] api: Signature -> definition section in all fns per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-348442563 and https://github.com/iterative/dvc.org/pull/908#pullrequestreview-356421226 --- public/static/docs/api-reference/get_url.md | 16 ++++++++++++---- public/static/docs/api-reference/open.md | 17 ++++++++++++++--- public/static/docs/api-reference/read.md | 21 ++++++++++++++++----- 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 9a1b7bc035..9b1f1dd57a 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,15 +1,23 @@ # dvc.api.get_url() -Returns the URL -([string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str) -type)) to the storage location of a data artifact tracked by DVC. +Returns the URL to the storage location of a data artifact tracked +by DVC. -## Signature +## Definition + +### Signature ```py get_url(path, repo=None, rev=None, remote=None) ``` +### Types + +All **parameter** types as well as the **return** type are +[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). + +Raises `UrlNotDvcRepoError` if `repo` is not a DVC repository. + ## Description Returns the full URL to the physical location (in a diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 357bd2d600..e101e556ee 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -1,15 +1,26 @@ # dvc.api.open() [Context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) -to open a tracked file as a -[file object](https://docs.python.org/3/glossary.html#term-file-object). +to open a tracked file. -## Signature +## Definition + +### Signature ```py open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) ``` +### Types + +All **parameter** types are +[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). + +The **return** type is a +[file object](https://docs.python.org/3/glossary.html#term-file-object) + +No exceptions are thrown by this function directly. + ## Description This function is analogous to the diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index cabe3be5fb..f7f97ea818 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -1,16 +1,27 @@ # dvc.api.read() -Returns the contents of a tracked file as a -[bytes object](https://docs.python.org/3/glossary.html#term-bytes-like-object) -or as a -[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). +Returns the contents of a tracked file. + +## Definition -## Signature +### Signature ```py read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) ``` +### Types + +All **parameter** types are +[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). + +The **return** type can be a +[bytes object](https://docs.python.org/3/glossary.html#term-bytes-like-object) +or a +[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). + +No exceptions are thrown by this function directly. + ## Description This function wraps [`dvc.api.open()`](/doc/api-reference/open) for a simple and From 099dc4e717f6db16471a7a6e412e4888e5099697 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Mon, 17 Feb 2020 21:31:06 -0600 Subject: [PATCH 051/100] api: copy edits and term artifact -> file or dir in get_url per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-356422335 and https://github.com/iterative/dvc.org/pull/908#pullrequestreview-356422855 and https://github.com/iterative/dvc.org/pull/908#pullrequestreview-356423029 --- public/static/docs/api-reference/get_url.md | 24 ++++++++++----------- public/static/docs/api-reference/read.md | 5 ++--- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 9b1f1dd57a..205cc47c5f 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,7 +1,7 @@ # dvc.api.get_url() -Returns the URL to the storage location of a data artifact tracked -by DVC. +Returns the URL to the storage location of a data file or directory tracked by +DVC. ## Definition @@ -21,13 +21,16 @@ Raises `UrlNotDvcRepoError` if `repo` is not a DVC repository. ## Description Returns the full URL to the physical location (in a -[DVC remote](/doc/command-reference/remote)) where a target data artifact -specified by its `path` in a `repo` (DVC project) is stored. The -URL is formed by evaluating the corresponding -[DVC-file](/doc/user-guide/dvc-file-format) (see [Examples](#examples) below) as -well as the project's +[DVC remote](/doc/command-reference/remote)) where a target data file or +directory (artifact), specified by its `path` in a `repo` +(DVC project), is stored. The URL is formed by reading the +corresponding [DVC-file](/doc/user-guide/dvc-file-format) (see +[Examples](#examples) below) as well as the project's [default remote](https://dvc.org/doc/command-reference/remote/default). +The URL schema returned depends on the type of `remote`. Here's a full list of +[supported remote types](https://dvc.org/doc/command-reference/remote/add#supported-storage-types). + ⚠️ This function does not check for the actual existence of the file or directory in the remote storage. @@ -43,9 +46,6 @@ files in the directory (as a JSON array), along with their hash values. Refer to [Structure of cache directory](/doc/user-guide/dvc-files-and-directories#structure-of-cache-directory) and `dvc add` to learn more about how DVC handles data directories. -> For possible location protocols, refer to the -> [supported remote types](https://dvc.org/doc/command-reference/remote/add#supported-storage-types) - ## Parameters - **`path`** - specifies the location of the target artifact within the source @@ -85,8 +85,8 @@ The value of `resource_url` in this case would result in: `https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355` -This URL represents the physical location of the data, and is built by -evaluating the corresponding DVC-file +This URL represents the physical location of the data, and is built by reading +the corresponding DVC-file ([`get-started/data.xml.dvc`](https://github.com/iterative/dataset-registry/blob/master/get-started/data.xml.dvc)) where the `md5` file hash is stored, diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index f7f97ea818..eb956ea1f7 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -25,9 +25,8 @@ No exceptions are thrown by this function directly. ## Description This function wraps [`dvc.api.open()`](/doc/api-reference/open) for a simple and -direct way to return the complete file contents of files tracked in DVC -projects (by DVC or Git) – no _context manager_ (`with` keyword) -required. +direct way to return the complete contents of files tracked in DVC +projects (by DVC or Git) – no _context manager_ (`with` keyword) needed. > Internally, it uses the _file object_'s > [`read()`](https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects) From 9e218257d6ee77ed41967d24d998997c4fb4bb89 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Mon, 17 Feb 2020 22:09:38 -0600 Subject: [PATCH 052/100] api: term artifact -> data since open() and read() don't support dirs per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-356423029 --- public/static/docs/api-reference/get_url.md | 30 ++++++++++----------- public/static/docs/api-reference/open.md | 18 ++++++------- public/static/docs/api-reference/read.md | 16 +++++------ 3 files changed, 29 insertions(+), 35 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 205cc47c5f..d69a7d29a5 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -21,18 +21,18 @@ Raises `UrlNotDvcRepoError` if `repo` is not a DVC repository. ## Description Returns the full URL to the physical location (in a -[DVC remote](/doc/command-reference/remote)) where a target data file or -directory (artifact), specified by its `path` in a `repo` -(DVC project), is stored. The URL is formed by reading the -corresponding [DVC-file](/doc/user-guide/dvc-file-format) (see -[Examples](#examples) below) as well as the project's +[DVC remote](/doc/command-reference/remote)) where a target file or directory +(artifact), specified by its `path` in a `repo` (DVC +project), is stored. The URL is formed by reading the corresponding +[DVC-file](/doc/user-guide/dvc-file-format) (see [Examples](#examples) below) as +well as the project's [default remote](https://dvc.org/doc/command-reference/remote/default). The URL schema returned depends on the type of `remote`. Here's a full list of [supported remote types](https://dvc.org/doc/command-reference/remote/add#supported-storage-types). -⚠️ This function does not check for the actual existence of the file or -directory in the remote storage. +⚠️ This function does not check for the actual existence of the target data in +the remote storage. 💡 Having the resource's URL, it should be possible to download it directly with an appropriate tool such as @@ -48,7 +48,7 @@ and `dvc add` to learn more about how DVC handles data directories. ## Parameters -- **`path`** - specifies the location of the target artifact within the source +- **`path`** - specifies the location of the target data within the source project in `repo`, relative to the project's root. - `repo` - specifies the location of the source DVC project. Both HTTP and SSH @@ -58,15 +58,13 @@ and `dvc add` to learn more about how DVC handles data directories. A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. -- `rev` - - [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) - (such as a branch name, a tag, or a commit hash). It only has an effect when - `repo` is a Git repository. If not supplied, it uses the default Git revision, - `HEAD`. +- `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as + a branch or tag name, or a commit hash). It only has an effect when `repo` is + a Git repository. If not supplied, it uses the default Git revision, `HEAD`. -- `remote` - name of the [DVC remote](/doc/command-reference/remote) to fetch - the target artifact from. If not supplied, the default remote or `repo` is - used (or the cache directory for local projects). +- `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for + the target data. If not supplied, the default remote or `repo` is used (or the + cache directory for local projects). A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the project has no default remote. diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index e101e556ee..80157e4ae0 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -40,7 +40,7 @@ regularly into a temporary local path before the file object is made available. ## Parameters -- **`path`** - specifies the location of the target artifact within the source +- **`path`** - specifies the location of the target data within the source project in `repo`, relative to the project's root. - `repo` - specifies the location of the source DVC project. Both HTTP and SSH @@ -50,15 +50,13 @@ regularly into a temporary local path before the file object is made available. A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. -- `rev` - - [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) - (such as a branch name, a tag, or a commit hash). It only has an effect when - `repo` is a Git repository. If not supplied, it uses the default Git revision, - `HEAD`. +- `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as + a branch or tag name, or a commit hash). It only has an effect when `repo` is + a Git repository. If not supplied, it uses the default Git revision, `HEAD`. -- `remote` - name of the [DVC remote](/doc/command-reference/remote) to fetch - the target artifact from. If not supplied, the default remote or `repo` is - used (or the cache directory for local projects). +- `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for + the target data. If not supplied, the default remote or `repo` is used (or the + cache directory for local projects). A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the project has no default remote. @@ -86,7 +84,7 @@ with dvc.api.open( ``` > See also `dvc.api.read` for a more direct way to read the complete contents of -> a file artifact. +> a tracked file. ## Example: Use a file from the local cache diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index eb956ea1f7..674189501d 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -34,7 +34,7 @@ projects (by DVC or Git) – no _context manager_ (`with` keyword) needed ## Parameters -- **`path`** - specifies the location of the target artifact within the source +- **`path`** - specifies the location of the target data within the source project in `repo`, relative to the project's root. - `repo` - specifies the location of the source DVC project. Both HTTP and SSH @@ -44,15 +44,13 @@ projects (by DVC or Git) – no _context manager_ (`with` keyword) needed A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. -- `rev` - - [Git-revision](https://git-scm.com/book/en/v2/Git-Internals-Git-References) - (such as a branch name, a tag, or a commit hash). It only has an effect when - `repo` is a Git repository. If not supplied, it uses the default Git revision, - `HEAD`. +- `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as + a branch or tag name, or a commit hash). It only has an effect when `repo` is + a Git repository. If not supplied, it uses the default Git revision, `HEAD`. -- `remote` - name of the [DVC remote](/doc/command-reference/remote) to fetch - the target artifact from. If not supplied, the default remote or `repo` is - used (or the cache directory for local projects). +- `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for + the target data. If not supplied, the default remote or `repo` is used (or the + cache directory for local projects). A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the project has no default remote. From 4eb85b37b275224fa805295500f75cdd5ec80069 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 18 Feb 2020 09:18:30 -0600 Subject: [PATCH 053/100] api: typo --- public/static/docs/api-reference/open.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 80157e4ae0..1ac5e069ed 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -17,7 +17,7 @@ All **parameter** types are [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). The **return** type is a -[file object](https://docs.python.org/3/glossary.html#term-file-object) +[file object](https://docs.python.org/3/glossary.html#term-file-object). No exceptions are thrown by this function directly. From b6cd85e8dd3ca50d1441f5d9f10e33877389557b Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 18 Feb 2020 17:40:13 -0600 Subject: [PATCH 054/100] api: improvements to fn params per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-356423326 through https://github.com/iterative/dvc.org/pull/908#pullrequestreview-356424872 --- public/static/docs/api-reference/get_url.md | 14 +++++++------- public/static/docs/api-reference/open.md | 14 +++++++------- public/static/docs/api-reference/read.md | 14 +++++++------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index d69a7d29a5..00022d35df 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -51,19 +51,19 @@ and `dvc add` to learn more about how DVC handles data directories. - **`path`** - specifies the location of the target data within the source project in `repo`, relative to the project's root. -- `repo` - specifies the location of the source DVC project. Both HTTP and SSH - protocols are supported for online Git repository URLs (e.g. - `[user@]server:project.git`). `repo` can also be a local file system path to - an "offline" project. If not supplied, defaults to the current DVC project. +- `repo` - specifies the location of the source DVC project. If not supplied, + defaults to the current DVC project. It can be a URL or a file system path. + Both HTTP and SSH protocols are supported for online Git repos (e.g. + `[user@]server:project.git`). A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as - a branch or tag name, or a commit hash). It only has an effect when `repo` is - a Git repository. If not supplied, it uses the default Git revision, `HEAD`. + a branch or tag name, or a commit hash). If not supplied, it uses the default + Git revision, `HEAD`. If `repo` is a Git repo, this option is ignored. - `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for - the target data. If not supplied, the default remote or `repo` is used (or the + the target data. If not supplied, the default remote of `repo` is used (or the cache directory for local projects). A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 1ac5e069ed..1418ab8020 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -43,19 +43,19 @@ regularly into a temporary local path before the file object is made available. - **`path`** - specifies the location of the target data within the source project in `repo`, relative to the project's root. -- `repo` - specifies the location of the source DVC project. Both HTTP and SSH - protocols are supported for online Git repository URLs (e.g. - `[user@]server:project.git`). `repo` can also be a local file system path to - an "offline" project. If not supplied, defaults to the current DVC project. +- `repo` - specifies the location of the source DVC project. If not supplied, + defaults to the current DVC project. It can be a URL or a file system path. + Both HTTP and SSH protocols are supported for online Git repos (e.g. + `[user@]server:project.git`). A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as - a branch or tag name, or a commit hash). It only has an effect when `repo` is - a Git repository. If not supplied, it uses the default Git revision, `HEAD`. + a branch or tag name, or a commit hash). If not supplied, it uses the default + Git revision, `HEAD`. If `repo` is a Git repo, this option is ignored. - `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for - the target data. If not supplied, the default remote or `repo` is used (or the + the target data. If not supplied, the default remote of `repo` is used (or the cache directory for local projects). A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 674189501d..8a57f08a19 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -37,19 +37,19 @@ projects (by DVC or Git) – no _context manager_ (`with` keyword) needed - **`path`** - specifies the location of the target data within the source project in `repo`, relative to the project's root. -- `repo` - specifies the location of the source DVC project. Both HTTP and SSH - protocols are supported for online Git repository URLs (e.g. - `[user@]server:project.git`). `repo` can also be a local file system path to - an "offline" project. If not supplied, defaults to the current DVC project. +- `repo` - specifies the location of the source DVC project. If not supplied, + defaults to the current DVC project. It can be a URL or a file system path. + Both HTTP and SSH protocols are supported for online Git repos (e.g. + `[user@]server:project.git`). A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as - a branch or tag name, or a commit hash). It only has an effect when `repo` is - a Git repository. If not supplied, it uses the default Git revision, `HEAD`. + a branch or tag name, or a commit hash). If not supplied, it uses the default + Git revision, `HEAD`. If `repo` is a Git repo, this option is ignored. - `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for - the target data. If not supplied, the default remote or `repo` is used (or the + the target data. If not supplied, the default remote of `repo` is used (or the cache directory for local projects). A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the From 11fb55e8928ccd4a18dde9f3094371bb59455d03 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 18 Feb 2020 17:48:49 -0600 Subject: [PATCH 055/100] api: updates to repo param per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-356424954 and https://github.com/iterative/dvc.org/pull/908#pullrequestreview-356425126 --- public/static/docs/api-reference/get_url.md | 9 ++++----- public/static/docs/api-reference/open.md | 11 +++++------ public/static/docs/api-reference/read.md | 11 +++++------ 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 00022d35df..1d7010ab54 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -56,18 +56,17 @@ and `dvc add` to learn more about how DVC handles data directories. Both HTTP and SSH protocols are supported for online Git repos (e.g. `[user@]server:project.git`). - A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. + A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as a branch or tag name, or a commit hash). If not supplied, it uses the default Git revision, `HEAD`. If `repo` is a Git repo, this option is ignored. - `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for - the target data. If not supplied, the default remote of `repo` is used (or the - cache directory for local projects). + the target data. If not supplied, the cache directory is tried first for local + projects; The default remote of `repo` is tried otherwise. - A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the - project has no default remote. + A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. ## Examples diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 1418ab8020..da8cb29415 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -19,7 +19,7 @@ All **parameter** types are The **return** type is a [file object](https://docs.python.org/3/glossary.html#term-file-object). -No exceptions are thrown by this function directly. +No exceptions are raised by this function directly. ## Description @@ -48,18 +48,17 @@ regularly into a temporary local path before the file object is made available. Both HTTP and SSH protocols are supported for online Git repos (e.g. `[user@]server:project.git`). - A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. + A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as a branch or tag name, or a commit hash). If not supplied, it uses the default Git revision, `HEAD`. If `repo` is a Git repo, this option is ignored. - `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for - the target data. If not supplied, the default remote of `repo` is used (or the - cache directory for local projects). + the target data. If not supplied, the cache directory is tried first for local + projects; The default remote of `repo` is tried otherwise. - A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the - project has no default remote. + A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. - `mode` - (optional) mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3/library/functions.html#open). Defaults to diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 8a57f08a19..05b68c30fa 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -20,7 +20,7 @@ The **return** type can be a or a [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). -No exceptions are thrown by this function directly. +No exceptions are raised by this function directly. ## Description @@ -42,18 +42,17 @@ projects (by DVC or Git) – no _context manager_ (`with` keyword) needed Both HTTP and SSH protocols are supported for online Git repos (e.g. `[user@]server:project.git`). - A `dvc.api.UrlNotDvcRepoError` is thrown if `repo` is not a valid DVC project. + A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as a branch or tag name, or a commit hash). If not supplied, it uses the default Git revision, `HEAD`. If `repo` is a Git repo, this option is ignored. - `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for - the target data. If not supplied, the default remote of `repo` is used (or the - cache directory for local projects). + the target data. If not supplied, the cache directory is tried first for local + projects; The default remote of `repo` is tried otherwise. - A `dvc.exceptions.NoRemoteError` is thrown if no `remote` is specified and the - project has no default remote. + A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. - `mode` - mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3/library/functions.html#open). Defaults to From be2316fe5dff453b771e0ec89f2f0a7d32873511 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 18 Feb 2020 19:38:55 -0600 Subject: [PATCH 056/100] install: reword link to api ref per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-356425770 --- public/static/docs/install/linux.md | 2 +- public/static/docs/install/macos.md | 2 +- public/static/docs/install/windows.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/public/static/docs/install/linux.md b/public/static/docs/install/linux.md index 264daa5dc6..5b5ab6c22d 100644 --- a/public/static/docs/install/linux.md +++ b/public/static/docs/install/linux.md @@ -1,6 +1,6 @@ # Installation on Linux -> For [API](/doc/api-reference) use, please +> To use DVC [as a Python library](/doc/api-reference), please > [install with pip](#install-with-pip) or [with conda](#install-with-conda). ## Install with pip diff --git a/public/static/docs/install/macos.md b/public/static/docs/install/macos.md index f1c51b2307..3a231e4647 100644 --- a/public/static/docs/install/macos.md +++ b/public/static/docs/install/macos.md @@ -1,6 +1,6 @@ # Installation on MacOS -> For [API](/doc/api-reference) use, please +> To use DVC [as a Python library](/doc/api-reference), please > [install with pip](#install-with-pip) or [with conda](#install-with-conda). ## Install with brew diff --git a/public/static/docs/install/windows.md b/public/static/docs/install/windows.md index 6829d5bdf0..cc2e78e4fe 100644 --- a/public/static/docs/install/windows.md +++ b/public/static/docs/install/windows.md @@ -6,7 +6,7 @@ -> For [API](/doc/api-reference) use, please +> To use DVC [as a Python library](/doc/api-reference), please > [install with pip](#install-with-pip) or [with conda](#install-with-conda). ## Windows installer From c94610d54ce12cd62d3a0bfd085947d1470625fd Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 18 Feb 2020 19:43:22 -0600 Subject: [PATCH 057/100] term: GitHub URLs -> hosted on GitHub per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-356426254 --- public/static/docs/command-reference/get-url.md | 2 +- public/static/docs/command-reference/import-url.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/public/static/docs/command-reference/get-url.md b/public/static/docs/command-reference/get-url.md index c2d1af96b8..4d7fccf6a6 100644 --- a/public/static/docs/command-reference/get-url.md +++ b/public/static/docs/command-reference/get-url.md @@ -4,7 +4,7 @@ Download a file or directory from a supported URL (for example `s3://`, `ssh://`, and other protocols) into the local file system. > See `dvc get` to download data/model files or directories from other DVC -> repositories (e.g. GitHub URLs). +> repositories (e.g. hosted on GitHub). ## Synopsis diff --git a/public/static/docs/command-reference/import-url.md b/public/static/docs/command-reference/import-url.md index 4812e2c03e..8c7027bdc8 100644 --- a/public/static/docs/command-reference/import-url.md +++ b/public/static/docs/command-reference/import-url.md @@ -5,7 +5,7 @@ Download a file or directory from a supported URL (for example `s3://`, changes in the remote data source. Creates a DVC-file. > See `dvc import` to download and tack data/model files or directories from -> other DVC repositories (e.g. GitHub URLs). +> other DVC repositories (e.g. hosted on GitHub). ## Synopsis From 9c52cd8875dee5a769e61d1ec29bbd1f0a504f26 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 18 Feb 2020 19:50:57 -0600 Subject: [PATCH 058/100] api: add link to read() from open() desc. per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-356426389 --- public/static/docs/api-reference/open.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index da8cb29415..6e274c5efd 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -29,6 +29,9 @@ builtin, but for files tracked in DVC projects (by DVC or Git). However, it may only be used as a _context manager_ (using the `with` keyword as shown in the **Examples** below). There is no `close()`. +> See also `dvc.api.read()` for a shorthand way to read the complete contents of +> a tracked file. + 💡 Note that `dvc.api.open()` is able to [stream](https://docs.python.org/3/library/io.html) the file directly from **most** @@ -82,8 +85,8 @@ with dvc.api.open( # ... Process elements ``` -> See also `dvc.api.read` for a more direct way to read the complete contents of -> a tracked file. +> See `dvc.api.read()` for a shorthand way to read the contents of a tracked +> file. ## Example: Use a file from the local cache @@ -164,5 +167,5 @@ with dvc.api.open('model.pkl', repo='...') as fd: # ... Use model ``` -> For a faster shorthand way to perform a similar example, please see the +> For a faster way to perform a similar example, please see the > [read() example](/doc/api-reference/read#examples). From 942d81dd6b0e6acb19d1e781c78f04c29a896c8b Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 25 Feb 2020 12:48:07 -0600 Subject: [PATCH 059/100] api: remove word "directly" fom exception lists per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-362655336 --- public/static/docs/api-reference/open.md | 2 +- public/static/docs/api-reference/read.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 6e274c5efd..a862a47e23 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -19,7 +19,7 @@ All **parameter** types are The **return** type is a [file object](https://docs.python.org/3/glossary.html#term-file-object). -No exceptions are raised by this function directly. +No exceptions are raised by this function. ## Description diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 05b68c30fa..ac55f57371 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -20,7 +20,7 @@ The **return** type can be a or a [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). -No exceptions are raised by this function directly. +No exceptions are raised by this function. ## Description From e76bd5fd1f73c4f3f0ce48d54f0a5d706c562d17 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 25 Feb 2020 13:07:28 -0600 Subject: [PATCH 060/100] api: add basic usage sections per https://github.com/iterative/dvc.org/pull/908#issuecomment-588210816 --- public/static/docs/api-reference/get_url.md | 13 ++++++++++++- public/static/docs/api-reference/open.md | 12 +++++++++++- public/static/docs/api-reference/read.md | 11 ++++++++++- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 1d7010ab54..7954568363 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -3,7 +3,18 @@ Returns the URL to the storage location of a data file or directory tracked by DVC. -## Definition +## Usage + +```py +import dvc.api + +resource_url = dvc.api.get_url( + 'get-started/data.xml', + repo='https://github.com/iterative/dataset-registry') + +# resource_url = +# https://remote.dvc.org/dataset-registry/a3/04af... +``` ### Signature diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index a862a47e23..c4463ade29 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -3,7 +3,17 @@ [Context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) to open a tracked file. -## Definition +## Usage + +```py +import dvc.api + +with dvc.api.open( + 'get-started/data.xml', + repo='https://github.com/iterative/dataset-registry' + ) as fd: + # ... Process data +``` ### Signature diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index ac55f57371..46dfe9c134 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -2,7 +2,16 @@ Returns the contents of a tracked file. -## Definition +## Usage + +```py +import dvc.api + +modelpkl = dvc.api.read( + 'model.pkl', + repo='https://github.com/example/project.git' + mode='rb') +``` ### Signature From 2b8b4a777577bead517e8ea567e5aa69f17082b4 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 25 Feb 2020 13:12:12 -0600 Subject: [PATCH 061/100] api: improve model open() example per https://github.com/iterative/dvc.org/pull/908#discussion_r382604840 Co-Authored-By: Alexander Schepanovski --- public/static/docs/api-reference/open.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index c4463ade29..a6719566a4 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -173,7 +173,7 @@ import pickle import dvc.api with dvc.api.open('model.pkl', repo='...') as fd: - pickle.load(fd) + model = pickle.load(fd) # ... Use model ``` From aa88ceac031b80dc2835860b69434d64810c04b0 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 25 Feb 2020 13:13:54 -0600 Subject: [PATCH 062/100] api: fix typos and remove lines between import stmts per https://github.com/iterative/dvc.org/pull/908#discussion_r382604346 and https://github.com/iterative/dvc.org/pull/908#discussion_r382605552 --- public/static/docs/api-reference/open.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index c4463ade29..ded3e2304a 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -145,14 +145,12 @@ with dvc.api.open( ## Example: Stream file from a specific remote -Sometimes we may want to chose the [remote](/doc/command-reference/remote) data +Sometimes we may want to choose the [remote](/doc/command-reference/remote) data source, for example to ensure that file streaming is enabled (as only certain remote storage types support streaming). This can be done by providing a `remote` argument: ```py -import pandas as pd - import dvc.api with open( @@ -169,7 +167,6 @@ with open( ```py import pickle - import dvc.api with dvc.api.open('model.pkl', repo='...') as fd: From f66826cd9bb47ff0ddd770df91ec3abe54753e22 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 25 Feb 2020 13:22:09 -0600 Subject: [PATCH 063/100] api: fix closing parentheses in example per https://github.com/iterative/dvc.org/pull/908#discussion_r382606552 --- public/static/docs/api-reference/get_url.md | 3 ++- public/static/docs/api-reference/read.md | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 7954568363..37609d4efb 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -86,7 +86,8 @@ import dvc.api resource_url = dvc.api.get_url( 'get-started/data.xml', - repo='https://github.com/iterative/dataset-registry') + repo='https://github.com/iterative/dataset-registry' + ) ``` The value of `resource_url` in this case would result in: diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 46dfe9c134..1fe8798a98 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -80,7 +80,9 @@ model = pickle.loads( dvc.api.read( 'model.pkl', repo='https://github.com/example/project.git' - mode='rb') + mode='rb' + ) + ) ``` > We're using `'rb'` mode here for compatibility with `pickle.loads()`. From 8b3929cef8b9f63f6a75c155d88b8377f542d9f5 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 25 Feb 2020 16:27:21 -0600 Subject: [PATCH 064/100] api: remove link to dvcx per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-363050160 --- public/static/docs/api-reference/index.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/public/static/docs/api-reference/index.md b/public/static/docs/api-reference/index.md index f6015a2ece..fbeaf79a26 100644 --- a/public/static/docs/api-reference/index.md +++ b/public/static/docs/api-reference/index.md @@ -12,8 +12,5 @@ The purpose of this API is to provide programatic access to the data or models [stored and versioned](/doc/use-cases/versioning-data-and-model-files) in DVC repositories from Python apps. -> Please see also [dvcx](https://github.com/iterative/dvcx) for additional API -> extensions. - Please choose a function from the navigation sidebar to the left, or click the `Next` button below to jump into the first one ↘ From 3ec79a06db063d5918c5e43a9c9f957a97a3f1c8 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 25 Feb 2020 17:31:57 -0600 Subject: [PATCH 065/100] api: updates to open() per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-363050160 through https://github.com/iterative/dvc.org/pull/908#pullrequestreview-363050795 --- public/static/docs/api-reference/open.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index a6008a779c..b82e1328b9 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -37,19 +37,19 @@ This function is analogous to the [`open()`](https://docs.python.org/3/library/functions.html#open) Python builtin, but for files tracked in DVC projects (by DVC or Git). However, it may only be used as a _context manager_ (using the `with` keyword as -shown in the **Examples** below). There is no `close()`. +shown in the [Examples](#examples) below). There is no `dvc.api.close()`. > See also `dvc.api.read()` for a shorthand way to read the complete contents of > a tracked file. -💡 Note that `dvc.api.open()` is able to -[stream](https://docs.python.org/3/library/io.html) the file directly from -**most** -[remote types](/doc/command-reference/remote/add#supported-storage-types) -(local, S3, Azure, GCP, OSS, SSH, HDFS). Otherwise, the file is downloaded -regularly into a temporary local path before the file object is made available. +`dvc.api.open()` avoids downloading files from **most** +[remote types](/doc/command-reference/remote/add#supported-storage-types). It +returns an open connection to the storage, though which file can be used +directly. Only Google Drive storage does not support this, requiring this +function to completely download the file in `path` (into a temporary directory) +before the file object is made available. -> This has similar uses as the `dvc get` and `dvc import` CLI commands. +> This has similar uses as the `dvc get` CLI command. ## Parameters From fcdf0c4f5e3a4037c896aa08d2590b1dd940fdab Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 26 Feb 2020 21:18:18 -0600 Subject: [PATCH 066/100] api: improve open() examples per https://github.com/iterative/dvc.org/pull/908#discussion_r382941600 --- public/static/docs/api-reference/open.md | 69 +++++++++++++----------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index b82e1328b9..c9031ecaf7 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -80,7 +80,13 @@ before the file object is made available. - `encoding` - (optional) used to decode contents to a string. Mirrors the namesake parameter in builtin `open()`. Defaults to `"utf-8"`. -## Example: Process XML file from an external DVC repository +## Example: Use artifacts from online DVC repositories + +Any data artifact can be employed directly in your Python app by +using this API. + +For example, an XML file from a public DVC repo online can be processed directly +in your Python app with: ```py from xml.dom.minidom import parse @@ -92,15 +98,32 @@ with dvc.api.open( repo='https://github.com/iterative/dataset-registry' ) as fd: xmldom = parse(fd) - # ... Process elements + # ... Process DOM ``` > See `dvc.api.read()` for a shorthand way to read the contents of a tracked > file. +Now let's imagine you want to unserialize and use a binary model from a private +repo online. For a case like this, we can use a SSH URL instead (assuming the +[credentials are configured](https://help.github.com/en/github/authenticating-to-github/connecting-to-github-with-ssh) +locally): + +```py +import pickle +import dvc.api + +with dvc.api.open( + 'model.pkl', + repo='git@server.com:path/to/repo.git' + ) as fd: + model = pickle.load(fd) + # ... Use instanciated model +``` + ## Example: Use a file from the local cache -In this case we don't supply a `repo` value. DVC will walk up the current +In this case we don't supply a `repo` argument. DVC will walk up the current working directory tree to find the DVC project: ```py @@ -111,7 +134,7 @@ with dvc.api.open('data/nlp/words.txt') as fd: # ... Process words ``` -DVC will look for `data/nlp/words.txt` in the local cache of the +DVC will look for the file contents of `data/nlp/words.txt` in the local project. (If it's not found there, the default [remote](/doc/command-reference/remote) will be tried.) @@ -122,33 +145,31 @@ with dvc.api.open('data/nlp/words.txt', encoding='utf-8') as fd: # ... ``` -## Example: Process CSV file from a private repository +## Example: Use other versions of data or results -For this we'll have to use the SSH URL to the Git repo (assuming the local -[SSH credentials](https://help.github.com/en/github/authenticating-to-github/connecting-to-github-with-ssh) -are configured locally): +The `rev` argument lets you specify any Git commit to look for an artifact. This +way any previous version, or alternative experiment can be accessed +programmatically. For example, let's say your DVC repo has tagged releases of a +CSV dataset: ```py import csv import dvc.api with dvc.api.open( - 'sea_ice.csv', - repo='git@github.com:iterative/df_sea_ice_no_header.git' + 'clean.csv', + rev='v1.1.0' ) as fd: reader = csv.reader(fd) - for row in reader: - # ... Process columns + # ... Read clean data from version 1.1.0 ``` -> Note that we're using an SSH Git URL for the `repo` argument above. - ## Example: Stream file from a specific remote Sometimes we may want to choose the [remote](/doc/command-reference/remote) data -source, for example to ensure that file streaming is enabled (as only certain -remote storage types support streaming). This can be done by providing a -`remote` argument: +source, for example to ensure that file streaming is enabled (as not all remote +storage types support streaming). This can be done by providing a `remote` +argument: ```py import dvc.api @@ -162,17 +183,3 @@ with open( match = re.search(r'user=(\w+)', line) # ... ``` - -## Example: Unserialize and employ a binary model - -```py -import pickle -import dvc.api - -with dvc.api.open('model.pkl', repo='...') as fd: - model = pickle.load(fd) - # ... Use model -``` - -> For a faster way to perform a similar example, please see the -> [read() example](/doc/api-reference/read#examples). From e5b52ae7280f7bc4153d07b27b065961bd7ccce7 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 26 Feb 2020 21:36:12 -0600 Subject: [PATCH 067/100] api: improve list of 3rd party lib examples in get_url per #908 --- public/static/docs/api-reference/get_url.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 37609d4efb..81c2f482f0 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -46,10 +46,10 @@ The URL schema returned depends on the type of `remote`. Here's a full list of the remote storage. 💡 Having the resource's URL, it should be possible to download it directly with -an appropriate tool such as -[`urlretrieve`](https://docs.python.org/3/library/urllib.request.html#urllib.request.urlretrieve) -or `boto3` -[download_fileobj](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.download_fileobj). +an appropriate library, such as +[`boto3`](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.download_fileobj) +or +[`paramiko`](https://docs.paramiko.org/en/stable/api/sftp.html#paramiko.sftp_client.SFTPClient.get). Note that if the target is a directory, the URL will end in `.dir`, as DVC stores a special JSON file with `.dir` extension that contains the mapping of From 7f2981f34d44c3374a5e4de96f0967c74dfa0421 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 27 Feb 2020 02:06:29 -0600 Subject: [PATCH 068/100] api ref: compact intro/signature before loner descs per https://github.com/iterative/dvc.org/pull/908#discussion_r384961336 --- public/static/docs/api-reference/get_url.md | 14 ++------------ public/static/docs/api-reference/open.md | 14 +------------- public/static/docs/api-reference/read.md | 14 +------------- 3 files changed, 4 insertions(+), 38 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 81c2f482f0..6255af32d0 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,10 +1,8 @@ -# dvc.api.get_url() +### `dvc.api.get_url(path, repo=None, rev=None, remote=None)` Returns the URL to the storage location of a data file or directory tracked by DVC. -## Usage - ```py import dvc.api @@ -16,15 +14,7 @@ resource_url = dvc.api.get_url( # https://remote.dvc.org/dataset-registry/a3/04af... ``` -### Signature - -```py -get_url(path, repo=None, rev=None, remote=None) -``` - -### Types - -All **parameter** types as well as the **return** type are +All **parameter** types, as well as the **return** type are [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). Raises `UrlNotDvcRepoError` if `repo` is not a DVC repository. diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index c9031ecaf7..61781d6cc7 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -1,10 +1,8 @@ -# dvc.api.open() +### `dvc.api.open(path, repo=None, rev=None, remote=None, mode="r", encoding=None)` [Context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) to open a tracked file. -## Usage - ```py import dvc.api @@ -15,22 +13,12 @@ with dvc.api.open( # ... Process data ``` -### Signature - -```py -open(path, repo=None, rev=None, remote=None, mode="r", encoding=None) -``` - -### Types - All **parameter** types are [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). The **return** type is a [file object](https://docs.python.org/3/glossary.html#term-file-object). -No exceptions are raised by this function. - ## Description This function is analogous to the diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 1fe8798a98..1d93e35fa9 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -1,9 +1,7 @@ -# dvc.api.read() +### `dvc.api.read(path, repo=None, rev=None, remote=None, mode="r", encoding=None)` Returns the contents of a tracked file. -## Usage - ```py import dvc.api @@ -13,14 +11,6 @@ modelpkl = dvc.api.read( mode='rb') ``` -### Signature - -```py -read(path, repo=None, rev=None, remote=None, mode="r", encoding=None) -``` - -### Types - All **parameter** types are [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). @@ -29,8 +19,6 @@ The **return** type can be a or a [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). -No exceptions are raised by this function. - ## Description This function wraps [`dvc.api.open()`](/doc/api-reference/open) for a simple and From 03d4e722958ad317972cfafcefe0d84e74c54966 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 27 Feb 2020 02:33:34 -0600 Subject: [PATCH 069/100] api ref: improve dvc.api.open() desc similar to open() builtin per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-363050533 --- public/static/docs/api-reference/get_url.md | 6 ++---- public/static/docs/api-reference/open.md | 21 ++++++++++++--------- public/static/docs/api-reference/read.md | 12 ++++++------ 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 6255af32d0..f420bfd484 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -14,14 +14,12 @@ resource_url = dvc.api.get_url( # https://remote.dvc.org/dataset-registry/a3/04af... ``` -All **parameter** types, as well as the **return** type are +All **parameter** types are [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). -Raises `UrlNotDvcRepoError` if `repo` is not a DVC repository. - ## Description -Returns the full URL to the physical location (in a +Returns the full URL string of the physical location (in a [DVC remote](/doc/command-reference/remote)) where a target file or directory (artifact), specified by its `path` in a `repo` (DVC project), is stored. The URL is formed by reading the corresponding diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 61781d6cc7..ee428634e5 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -16,19 +16,22 @@ with dvc.api.open( All **parameter** types are [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). -The **return** type is a -[file object](https://docs.python.org/3/glossary.html#term-file-object). - ## Description -This function is analogous to the -[`open()`](https://docs.python.org/3/library/functions.html#open) Python -builtin, but for files tracked in DVC projects (by DVC or Git). -However, it may only be used as a _context manager_ (using the `with` keyword as -shown in the [Examples](#examples) below). There is no `dvc.api.close()`. +Open file tracked in a DVC projects (by DVC or Git), and return a +corresponding +[file object](https://docs.python.org/3/glossary.html#term-file-object). If the +file cannot be found, an `PathMissingError` is raised. + +> This function is analogous to the +> [`open()`](https://docs.python.org/3/library/functions.html#open) Python +> builtin. + +It may only be used as a _context manager_ (using the `with` keyword as shown in +the [Examples](#examples) below). There is no `dvc.api.close()`. > See also `dvc.api.read()` for a shorthand way to read the complete contents of -> a tracked file. +> a tracked file – no _context manager_ syntax needed. `dvc.api.open()` avoids downloading files from **most** [remote types](/doc/command-reference/remote/add#supported-storage-types). It diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 1d93e35fa9..f2af33fa04 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -14,16 +14,16 @@ modelpkl = dvc.api.read( All **parameter** types are [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). -The **return** type can be a -[bytes object](https://docs.python.org/3/glossary.html#term-bytes-like-object) -or a -[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). - ## Description This function wraps [`dvc.api.open()`](/doc/api-reference/open) for a simple and direct way to return the complete contents of files tracked in DVC -projects (by DVC or Git) – no _context manager_ (`with` keyword) needed. +projects (by DVC or Git). + +The returned contents can be a +[bytes object](https://docs.python.org/3/glossary.html#term-bytes-like-object) +or a +[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). > Internally, it uses the _file object_'s > [`read()`](https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects) From 6ccfc800afb03e7e047005c542887e530876d90d Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 27 Feb 2020 02:56:40 -0600 Subject: [PATCH 070/100] api ref: updates to get_url per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-365354755 through https://github.com/iterative/dvc.org/pull/908#pullrequestreview-365357254 --- public/static/docs/api-reference/get_url.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index f420bfd484..b884bf983c 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -19,16 +19,16 @@ All **parameter** types are ## Description -Returns the full URL string of the physical location (in a +Returns the URL string of the physical location (in a [DVC remote](/doc/command-reference/remote)) where a target file or directory (artifact), specified by its `path` in a `repo` (DVC project), is stored. The URL is formed by reading the corresponding [DVC-file](/doc/user-guide/dvc-file-format) (see [Examples](#examples) below) as well as the project's -[default remote](https://dvc.org/doc/command-reference/remote/default). +[remote configuration](/doc/command-reference/config#remote). The URL schema returned depends on the type of `remote`. Here's a full list of -[supported remote types](https://dvc.org/doc/command-reference/remote/add#supported-storage-types). +[supported remote types](/doc/command-reference/remote/add#supported-storage-types). ⚠️ This function does not check for the actual existence of the target data in the remote storage. @@ -76,14 +76,15 @@ resource_url = dvc.api.get_url( 'get-started/data.xml', repo='https://github.com/iterative/dataset-registry' ) +print(resource_url) ``` -The value of `resource_url` in this case would result in: +Outputs: `https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355` -This URL represents the physical location of the data, and is built by reading -the corresponding DVC-file +This URL represents the location where the data is stored, and is built by +reading the corresponding DVC-file ([`get-started/data.xml.dvc`](https://github.com/iterative/dataset-registry/blob/master/get-started/data.xml.dvc)) where the `md5` file hash is stored, From 418651d139cbddaa78c7199e1df691b79f7567ba Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 27 Feb 2020 03:13:53 -0600 Subject: [PATCH 071/100] api ref: impros to open() per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-365358605 through https://github.com/iterative/dvc.org/pull/908#pullrequestreview-365359182 --- public/static/docs/api-reference/open.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index ee428634e5..2091e34243 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -1,7 +1,6 @@ ### `dvc.api.open(path, repo=None, rev=None, remote=None, mode="r", encoding=None)` -[Context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) -to open a tracked file. +Opens a tracked file. ```py import dvc.api @@ -10,7 +9,7 @@ with dvc.api.open( 'get-started/data.xml', repo='https://github.com/iterative/dataset-registry' ) as fd: - # ... Process data + # ... fd is a file descriptor that can be processed normally. ``` All **parameter** types are @@ -18,8 +17,8 @@ All **parameter** types are ## Description -Open file tracked in a DVC projects (by DVC or Git), and return a -corresponding +Open file or model tracked in a DVC projects (by DVC or Git), and +return a corresponding [file object](https://docs.python.org/3/glossary.html#term-file-object). If the file cannot be found, an `PathMissingError` is raised. @@ -27,8 +26,9 @@ file cannot be found, an `PathMissingError` is raised. > [`open()`](https://docs.python.org/3/library/functions.html#open) Python > builtin. -It may only be used as a _context manager_ (using the `with` keyword as shown in -the [Examples](#examples) below). There is no `dvc.api.close()`. +It may only be used as a +[context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) +(using the `with` keyword, as shown in the [Examples](#examples) below). > See also `dvc.api.read()` for a shorthand way to read the complete contents of > a tracked file – no _context manager_ syntax needed. From 7161e8cbc5540a5866b85d3c0f397c09559b8536 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 27 Feb 2020 11:55:34 -0600 Subject: [PATCH 072/100] api ref: more impros to open() per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-365360090 + --- public/static/docs/api-reference/open.md | 20 ++++++++------------ public/static/docs/api-reference/read.md | 4 +--- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 2091e34243..a2dbc32ad5 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -17,8 +17,8 @@ All **parameter** types are ## Description -Open file or model tracked in a DVC projects (by DVC or Git), and -return a corresponding +Open file or model (`path`) tracked in a DVC projects (by DVC or +Git), and return a corresponding [file object](https://docs.python.org/3/glossary.html#term-file-object). If the file cannot be found, an `PathMissingError` is raised. @@ -30,17 +30,13 @@ It may only be used as a [context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) (using the `with` keyword, as shown in the [Examples](#examples) below). -> See also `dvc.api.read()` for a shorthand way to read the complete contents of -> a tracked file – no _context manager_ syntax needed. +> Use `dvc.api.read()` to get the file's contents directly – no _context +> manager_ involved. -`dvc.api.open()` avoids downloading files from **most** -[remote types](/doc/command-reference/remote/add#supported-storage-types). It -returns an open connection to the storage, though which file can be used -directly. Only Google Drive storage does not support this, requiring this -function to completely download the file in `path` (into a temporary directory) -before the file object is made available. - -> This has similar uses as the `dvc get` CLI command. +`dvc.api.open()` reads (streams) the file trough a direct connection to the +storage whenever possible, so it does not require any space on the disc to save +the file before making it accessible. The only exception is when using a Google +Drive [remote type](/doc/command-reference/remote/add#supported-storage-types). ## Parameters diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index f2af33fa04..b47b4ad854 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -25,9 +25,7 @@ The returned contents can be a or a [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). -> Internally, it uses the _file object_'s -> [`read()`](https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects) -> method. +> This is similar to the `dvc get` command in our CLI. ## Parameters From a2298039c05a74aef75f4dcc2d56ea6286355e5e Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 27 Feb 2020 13:20:39 -0600 Subject: [PATCH 073/100] api ref: remove term "source" from params per https://github.com/iterative/dvc.org/pull/908#discussion_r384866978 --- public/static/docs/api-reference/get_url.md | 10 +++++----- public/static/docs/api-reference/open.md | 10 +++++----- public/static/docs/api-reference/read.md | 10 +++++----- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index b884bf983c..89cb0b2b23 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -47,12 +47,12 @@ and `dvc add` to learn more about how DVC handles data directories. ## Parameters -- **`path`** - specifies the location of the target data within the source - project in `repo`, relative to the project's root. +- **`path`** - specifies the location of the target data within the project in + `repo`, relative to the project's root. -- `repo` - specifies the location of the source DVC project. If not supplied, - defaults to the current DVC project. It can be a URL or a file system path. - Both HTTP and SSH protocols are supported for online Git repos (e.g. +- `repo` - specifies the location of the DVC project. If not supplied, defaults + to the current DVC project. It can be a URL or a file system path. Both HTTP + and SSH protocols are supported for online Git repos (e.g. `[user@]server:project.git`). A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index a2dbc32ad5..ae0d29a325 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -40,12 +40,12 @@ Drive [remote type](/doc/command-reference/remote/add#supported-storage-types). ## Parameters -- **`path`** - specifies the location of the target data within the source - project in `repo`, relative to the project's root. +- **`path`** - specifies the location of the target data within the project in + `repo`, relative to the project's root. -- `repo` - specifies the location of the source DVC project. If not supplied, - defaults to the current DVC project. It can be a URL or a file system path. - Both HTTP and SSH protocols are supported for online Git repos (e.g. +- `repo` - specifies the location of the DVC project. If not supplied, defaults + to the current DVC project. It can be a URL or a file system path. Both HTTP + and SSH protocols are supported for online Git repos (e.g. `[user@]server:project.git`). A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index b47b4ad854..175acd1954 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -29,12 +29,12 @@ or a ## Parameters -- **`path`** - specifies the location of the target data within the source - project in `repo`, relative to the project's root. +- **`path`** - specifies the location of the target data within the project in + `repo`, relative to the project's root. -- `repo` - specifies the location of the source DVC project. If not supplied, - defaults to the current DVC project. It can be a URL or a file system path. - Both HTTP and SSH protocols are supported for online Git repos (e.g. +- `repo` - specifies the location of the DVC project. If not supplied, defaults + to the current DVC project. It can be a URL or a file system path. Both HTTP + and SSH protocols are supported for online Git repos (e.g. `[user@]server:project.git`). A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. From c43e704f746249340f1cbeff18a118c71a17bd92 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 27 Feb 2020 15:58:52 -0600 Subject: [PATCH 074/100] api ref: better wording in path option per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-365361657 --- public/static/docs/api-reference/get_url.md | 4 ++-- public/static/docs/api-reference/open.md | 4 ++-- public/static/docs/api-reference/read.md | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 89cb0b2b23..b60cb76836 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -47,8 +47,8 @@ and `dvc add` to learn more about how DVC handles data directories. ## Parameters -- **`path`** - specifies the location of the target data within the project in - `repo`, relative to the project's root. +- **`path`** - location and file name of the file or directory in `repo`, + relative to the project's root. - `repo` - specifies the location of the DVC project. If not supplied, defaults to the current DVC project. It can be a URL or a file system path. Both HTTP diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index ae0d29a325..0c744c3780 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -40,8 +40,8 @@ Drive [remote type](/doc/command-reference/remote/add#supported-storage-types). ## Parameters -- **`path`** - specifies the location of the target data within the project in - `repo`, relative to the project's root. +- **`path`** - location and file name of the file in `repo`, relative to the + project's root. - `repo` - specifies the location of the DVC project. If not supplied, defaults to the current DVC project. It can be a URL or a file system path. Both HTTP diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 175acd1954..81c133aeac 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -29,8 +29,8 @@ or a ## Parameters -- **`path`** - specifies the location of the target data within the project in - `repo`, relative to the project's root. +- **`path`** - location and file name of the file in `repo`, relative to the + project's root. - `repo` - specifies the location of the DVC project. If not supplied, defaults to the current DVC project. It can be a URL or a file system path. Both HTTP From 5acaffdb80c221d34f967b1c6818f8be830dad1f Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 27 Feb 2020 16:09:44 -0600 Subject: [PATCH 075/100] api ref: explain mode and encoding options (open/read()) per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-365362335 --- public/static/docs/api-reference/open.md | 11 ++++++----- public/static/docs/api-reference/read.md | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 0c744c3780..af379275c6 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -60,12 +60,13 @@ Drive [remote type](/doc/command-reference/remote/add#supported-storage-types). A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. -- `mode` - (optional) mirrors the namesake parameter in builtin - [`open()`](https://docs.python.org/3/library/functions.html#open). Defaults to - `"r"` (read). +- `mode` - specifies the mode in which the file is opened. Defaults to `"r"` + (read). Mirrors the namesake parameter in builtin + [`open()`](https://docs.python.org/3/library/functions.html#open). -- `encoding` - (optional) used to decode contents to a string. Mirrors the - namesake parameter in builtin `open()`. Defaults to `"utf-8"`. +- `encoding` - used to decode the file contents to a string. This should only be + used in text mode. Defaults to `"utf-8"`. Mirrors the namesake parameter in + builtin `open()`. ## Example: Use artifacts from online DVC repositories diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 81c133aeac..91013ae09d 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -49,12 +49,13 @@ or a A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. -- `mode` - mirrors the namesake parameter in builtin - [`open()`](https://docs.python.org/3/library/functions.html#open). Defaults to - `"r"` (read). +- `mode` - specifies the mode in which the file is opened. Defaults to `"r"` + (read). Mirrors the namesake parameter in builtin + [`open()`](https://docs.python.org/3/library/functions.html#open). -- `encoding` - used to decode contents to a string. Mirrors the namesake - parameter in builtin `open()`. Defaults to `"utf-8"`. +- `encoding` - used to decode the file contents to a string. This should only be + used in text mode. Defaults to `"utf-8"`. Mirrors the namesake parameter in + builtin `open()`. ## Examples From 88403384c310bfe551e6bd3ff8b4c39b22dc9fbd Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 27 Feb 2020 16:14:19 -0600 Subject: [PATCH 076/100] api ref: typo project->cache --- public/static/docs/api-reference/open.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index af379275c6..b2de857100 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -123,7 +123,7 @@ with dvc.api.open('data/nlp/words.txt') as fd: ``` DVC will look for the file contents of `data/nlp/words.txt` in the local -project. (If it's not found there, the default +cache. (If it's not found there, the default [remote](/doc/command-reference/remote) will be tried.) To specify the file encoding of a text file: From 2e9dc3de435abdc077263b63e0859b88d2775812 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 27 Feb 2020 16:54:30 -0600 Subject: [PATCH 077/100] api ref: move default param behavior to fn descriptions rel: https://github.com/iterative/dvc.org/pull/908#pullrequestreview-365363166 --- public/static/docs/api-reference/get_url.md | 29 ++++++----- public/static/docs/api-reference/open.md | 54 ++++++++++++--------- public/static/docs/api-reference/read.md | 33 ++++++++----- 3 files changed, 72 insertions(+), 44 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index b60cb76836..e7ff11550d 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -27,12 +27,21 @@ project), is stored. The URL is formed by reading the corresponding well as the project's [remote configuration](/doc/command-reference/config#remote). -The URL schema returned depends on the type of `remote`. Here's a full list of -[supported remote types](/doc/command-reference/remote/add#supported-storage-types). +The current project is used by default (the current working directory tree is +walked up to find it), unless a `repo` argument is supplied. + +Unless a `remote` argument is given, the +[default remote](/doc/command-reference/remote/default) of `repo` is used. The +URL schema returned depends on the +[type](/doc/command-reference/remote/add#supported-storage-types) of the remote +used. ⚠️ This function does not check for the actual existence of the target data in the remote storage. +For Git-tracked DVC repositories, unless a `rev` argument is +provided, the repo's `HEAD` version is used. + 💡 Having the resource's URL, it should be possible to download it directly with an appropriate library, such as [`boto3`](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.download_fileobj) @@ -50,20 +59,18 @@ and `dvc add` to learn more about how DVC handles data directories. - **`path`** - location and file name of the file or directory in `repo`, relative to the project's root. -- `repo` - specifies the location of the DVC project. If not supplied, defaults - to the current DVC project. It can be a URL or a file system path. Both HTTP - and SSH protocols are supported for online Git repos (e.g. - `[user@]server:project.git`). +- `repo` - specifies the location of the DVC project. It can be a URL or a file + system path. Both HTTP and SSH protocols are supported for online Git repos + (e.g. `[user@]server:project.git`). A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as - a branch or tag name, or a commit hash). If not supplied, it uses the default - Git revision, `HEAD`. If `repo` is a Git repo, this option is ignored. + a branch or tag name, or a commit hash). If `repo` is not a Git repo, this + option is ignored. -- `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for - the target data. If not supplied, the cache directory is tried first for local - projects; The default remote of `repo` is tried otherwise. +- `remote` - name of the [DVC remote](/doc/command-reference/remote) to use to + form the returned URL string. A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index b2de857100..3a9d1fc155 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -19,44 +19,52 @@ All **parameter** types are Open file or model (`path`) tracked in a DVC projects (by DVC or Git), and return a corresponding -[file object](https://docs.python.org/3/glossary.html#term-file-object). If the -file cannot be found, an `PathMissingError` is raised. +[file object](https://docs.python.org/3/glossary.html#term-file-object). + +The current project is used by default (the current working directory tree is +walked up to find it), unless a `repo` argument is supplied. + +Unless a `remote` argument is given, it first tries to find the file in the +cache for local projects. If not found there or for online +projects, the [default remote](/doc/command-reference/remote/default) of `repo` +is tried. If the file cannot be found, a `PathMissingError` is raised. + +For Git-tracked DVC repositories, unless a `rev` argument is +provided, the repo's `HEAD` version is used. > This function is analogous to the > [`open()`](https://docs.python.org/3/library/functions.html#open) Python -> builtin. +> built-in. -It may only be used as a +`dvc.api.open()` may only be used as a [context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) (using the `with` keyword, as shown in the [Examples](#examples) below). > Use `dvc.api.read()` to get the file's contents directly – no _context > manager_ involved. -`dvc.api.open()` reads (streams) the file trough a direct connection to the -storage whenever possible, so it does not require any space on the disc to save -the file before making it accessible. The only exception is when using a Google -Drive [remote type](/doc/command-reference/remote/add#supported-storage-types). +This function reads (streams) the file trough a direct connection to the storage +whenever possible, so it does not require any space on the disc to save the file +before making it accessible. The only exception is when using a Google Drive +[remote type](/doc/command-reference/remote/add#supported-storage-types). ## Parameters - **`path`** - location and file name of the file in `repo`, relative to the project's root. -- `repo` - specifies the location of the DVC project. If not supplied, defaults - to the current DVC project. It can be a URL or a file system path. Both HTTP - and SSH protocols are supported for online Git repos (e.g. - `[user@]server:project.git`). +- `repo` - specifies the location of the DVC project. It can be a URL or a file + system path. Both HTTP and SSH protocols are supported for online Git repos + (e.g. `[user@]server:project.git`). A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as - a branch or tag name, or a commit hash). If not supplied, it uses the default - Git revision, `HEAD`. If `repo` is a Git repo, this option is ignored. + a branch or tag name, or a commit hash). If `repo` is not a Git repo, this + option is ignored. - `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for - the target data. If not supplied, the cache directory is tried first for local - projects; The default remote of `repo` is tried otherwise. + the target data. A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. @@ -64,9 +72,11 @@ Drive [remote type](/doc/command-reference/remote/add#supported-storage-types). (read). Mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3/library/functions.html#open). -- `encoding` - used to decode the file contents to a string. This should only be - used in text mode. Defaults to `"utf-8"`. Mirrors the namesake parameter in - builtin `open()`. +- `encoding` - + [codec](https://docs.python.org/3/library/codecs.html#standard-encodings) used + to decode the file contents to a string. This should only be used in text + mode. Defaults to `"utf-8"`. Mirrors the namesake parameter in builtin + `open()`. ## Example: Use artifacts from online DVC repositories @@ -111,8 +121,8 @@ with dvc.api.open( ## Example: Use a file from the local cache -In this case we don't supply a `repo` argument. DVC will walk up the current -working directory tree to find the DVC project: +In this case we don't supply a `repo` argument. DVC will attempt to find a +current DVC project to use. ```py import dvc.api @@ -129,7 +139,7 @@ DVC will look for the file contents of `data/nlp/words.txt` in the local To specify the file encoding of a text file: ```py -with dvc.api.open('data/nlp/words.txt', encoding='utf-8') as fd: +with dvc.api.open('data/nlp/words_ru.txt', encoding='koi8_r') as fd: # ... ``` diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 91013ae09d..623eef2372 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -20,6 +20,17 @@ This function wraps [`dvc.api.open()`](/doc/api-reference/open) for a simple and direct way to return the complete contents of files tracked in DVC projects (by DVC or Git). +The current project is used by default (the current working directory tree is +walked up to find it), unless a `repo` argument is supplied. + +Unless a `remote` argument is given, it first tries to find the file in the +cache for local projects. If not found there or for online +projects, the [default remote](/doc/command-reference/remote/default) of `repo` +is tried. If the file cannot be found, a `PathMissingError` is raised. + +For Git-tracked DVC repositories, unless a `rev` argument is +provided, the repo's `HEAD` version is used. + The returned contents can be a [bytes object](https://docs.python.org/3/glossary.html#term-bytes-like-object) or a @@ -32,20 +43,18 @@ or a - **`path`** - location and file name of the file in `repo`, relative to the project's root. -- `repo` - specifies the location of the DVC project. If not supplied, defaults - to the current DVC project. It can be a URL or a file system path. Both HTTP - and SSH protocols are supported for online Git repos (e.g. - `[user@]server:project.git`). +- `repo` - specifies the location of the DVC project. It can be a URL or a file + system path. Both HTTP and SSH protocols are supported for online Git repos + (e.g. `[user@]server:project.git`). A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as - a branch or tag name, or a commit hash). If not supplied, it uses the default - Git revision, `HEAD`. If `repo` is a Git repo, this option is ignored. + a branch or tag name, or a commit hash). If `repo` is not a Git repo, this + option is ignored. - `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for - the target data. If not supplied, the cache directory is tried first for local - projects; The default remote of `repo` is tried otherwise. + the target data. A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. @@ -53,9 +62,11 @@ or a (read). Mirrors the namesake parameter in builtin [`open()`](https://docs.python.org/3/library/functions.html#open). -- `encoding` - used to decode the file contents to a string. This should only be - used in text mode. Defaults to `"utf-8"`. Mirrors the namesake parameter in - builtin `open()`. +- `encoding` - + [codec](https://docs.python.org/3/library/codecs.html#standard-encodings) used + to decode the file contents to a string. This should only be used in text + mode. Defaults to `"utf-8"`. Mirrors the namesake parameter in builtin + `open()`. ## Examples From 35674e366032126e0fe390b91cea9554df040aaf Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 27 Feb 2020 17:11:19 -0600 Subject: [PATCH 078/100] api ref: add read() snippet in open() example per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-365363952 --- public/static/docs/api-reference/open.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 3a9d1fc155..ed0f640aba 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -88,7 +88,6 @@ in your Python app with: ```py from xml.dom.minidom import parse - import dvc.api with dvc.api.open( @@ -99,8 +98,14 @@ with dvc.api.open( # ... Process DOM ``` -> See `dvc.api.read()` for a shorthand way to read the contents of a tracked -> file. +> Notice that you could read the contents of a tracked file faster with +> `dvc.api.read()`: +> +> ```py +> xmldata = dvc.api.read('get-started/data.xml', +> repo='https://github.com/iterative/dataset-registry') +> xmldom = parse(xmldata) +> ``` Now let's imagine you want to unserialize and use a binary model from a private repo online. For a case like this, we can use a SSH URL instead (assuming the From 42e8563339b99eae51b5bf40b7ff9c4da3755be7 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 27 Feb 2020 18:08:38 -0600 Subject: [PATCH 079/100] api ref: add read() example explanation and fix link check per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-365364612 and https://github.com/iterative/dvc.org/pull/908#issuecomment-591734590 --- public/static/docs/api-reference/read.md | 2 ++ scripts/exclude-links.txt | 2 ++ 2 files changed, 4 insertions(+) diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 623eef2372..f10a366ad0 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -70,6 +70,8 @@ or a ## Examples +If you want to unserialize and use a binary model from an online repo: + ```py import pickle import dvc.api diff --git a/scripts/exclude-links.txt b/scripts/exclude-links.txt index 65b55280ef..50810643e2 100644 --- a/scripts/exclude-links.txt +++ b/scripts/exclude-links.txt @@ -33,6 +33,8 @@ https://marketplace.visualstudio.com/items?itemName=stkb.rewrap https://myendpoint.com https://object-storage.example.com https://remote.dvc.org/dataset-registry +https://remote.dvc.org/dataset-registry/a3/04af... +https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355 https://remote.dvc.org/foo/bar https://remote.dvc.org/get-started https://s3-us-east-2.amazonaws.com/dvc-public/code/foo/bar From ed806161334d33cc8a33e6fa7c18bef784368b5a Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 28 Feb 2020 13:15:38 -0600 Subject: [PATCH 080/100] api ref: name examples per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-365364612 --- public/static/docs/api-reference/get_url.md | 7 +++---- public/static/docs/api-reference/open.md | 2 +- public/static/docs/api-reference/read.md | 8 ++++++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index e7ff11550d..74ec750a7a 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -23,9 +23,8 @@ Returns the URL string of the physical location (in a [DVC remote](/doc/command-reference/remote)) where a target file or directory (artifact), specified by its `path` in a `repo` (DVC project), is stored. The URL is formed by reading the corresponding -[DVC-file](/doc/user-guide/dvc-file-format) (see [Examples](#examples) below) as -well as the project's -[remote configuration](/doc/command-reference/config#remote). +[DVC-file](/doc/user-guide/dvc-file-format) (see the **Examples** below) as well +as the project's [remote configuration](/doc/command-reference/config#remote). The current project is used by default (the current working directory tree is walked up to find it), unless a `repo` argument is supplied. @@ -74,7 +73,7 @@ and `dvc add` to learn more about how DVC handles data directories. A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. -## Examples +## Example: Get a remote storage URL to a data file ```py import dvc.api diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index ed0f640aba..0dcfb1953a 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -38,7 +38,7 @@ provided, the repo's `HEAD` version is used. `dvc.api.open()` may only be used as a [context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) -(using the `with` keyword, as shown in the [Examples](#examples) below). +(using the `with` keyword, as shown in the **Examples** below). > Use `dvc.api.read()` to get the file's contents directly – no _context > manager_ involved. diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index f10a366ad0..313daf0a8a 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -68,9 +68,13 @@ or a mode. Defaults to `"utf-8"`. Mirrors the namesake parameter in builtin `open()`. -## Examples +## Example: Use artifacts from online DVC repositories -If you want to unserialize and use a binary model from an online repo: +Any data artifact can be employed directly in your Python app by +using this API. + +For example, let's say that you want to unserialize and use a binary model from +an online repo: ```py import pickle From c47b366ecc792b1df7a35cb46f07028c30370e3a Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 28 Feb 2020 18:30:38 -0600 Subject: [PATCH 081/100] api ref: move the default arguments/behavior back to params per chat with Ivan and https://github.com/iterative/dvc.org/pull/908#discussion_r385455830 --- public/static/docs/api-reference/get_url.md | 22 +++++++++---------- public/static/docs/api-reference/open.md | 24 ++++++++++----------- public/static/docs/api-reference/read.md | 24 ++++++++++----------- 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 74ec750a7a..2c26199335 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -26,21 +26,13 @@ project), is stored. The URL is formed by reading the corresponding [DVC-file](/doc/user-guide/dvc-file-format) (see the **Examples** below) as well as the project's [remote configuration](/doc/command-reference/config#remote). -The current project is used by default (the current working directory tree is -walked up to find it), unless a `repo` argument is supplied. - -Unless a `remote` argument is given, the -[default remote](/doc/command-reference/remote/default) of `repo` is used. The -URL schema returned depends on the -[type](/doc/command-reference/remote/add#supported-storage-types) of the remote -used. +The URL schema returned depends on the +[type](/doc/command-reference/remote/add#supported-storage-types) of the +`remote` used (see the [Parameters](#parameters) section). ⚠️ This function does not check for the actual existence of the target data in the remote storage. -For Git-tracked DVC repositories, unless a `rev` argument is -provided, the repo's `HEAD` version is used. - 💡 Having the resource's URL, it should be possible to download it directly with an appropriate library, such as [`boto3`](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.download_fileobj) @@ -62,15 +54,23 @@ and `dvc add` to learn more about how DVC handles data directories. system path. Both HTTP and SSH protocols are supported for online Git repos (e.g. `[user@]server:project.git`). + The current project is used by default (the current working directory tree is + walked up to find it) if a `repo` argument is not given. + A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as a branch or tag name, or a commit hash). If `repo` is not a Git repo, this option is ignored. + `HEAD` is used by default if a `rev` argument is not given. + - `remote` - name of the [DVC remote](/doc/command-reference/remote) to use to form the returned URL string. + The [default remote](/doc/command-reference/remote/default) of `repo` is used + if a `remote` argument is not given. + A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. ## Example: Get a remote storage URL to a data file diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 0dcfb1953a..8d2256894a 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -19,18 +19,8 @@ All **parameter** types are Open file or model (`path`) tracked in a DVC projects (by DVC or Git), and return a corresponding -[file object](https://docs.python.org/3/glossary.html#term-file-object). - -The current project is used by default (the current working directory tree is -walked up to find it), unless a `repo` argument is supplied. - -Unless a `remote` argument is given, it first tries to find the file in the -cache for local projects. If not found there or for online -projects, the [default remote](/doc/command-reference/remote/default) of `repo` -is tried. If the file cannot be found, a `PathMissingError` is raised. - -For Git-tracked DVC repositories, unless a `rev` argument is -provided, the repo's `HEAD` version is used. +[file object](https://docs.python.org/3/glossary.html#term-file-object). If the +file cannot be found, a `PathMissingError` is raised. > This function is analogous to the > [`open()`](https://docs.python.org/3/library/functions.html#open) Python @@ -57,15 +47,25 @@ before making it accessible. The only exception is when using a Google Drive system path. Both HTTP and SSH protocols are supported for online Git repos (e.g. `[user@]server:project.git`). + The current project is used by default (the current working directory tree is + walked up to find it) if a `repo` argument is not given. + A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as a branch or tag name, or a commit hash). If `repo` is not a Git repo, this option is ignored. + `HEAD` is used by default if a `rev` argument is not given. + - `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for the target data. + For online projects, the + [default remote](/doc/command-reference/remote/default) of `repo` is tried if + a `remote` argument is not given. For local projects, the default is to try + the cache before the default remote. + A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. - `mode` - specifies the mode in which the file is opened. Defaults to `"r"` diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 313daf0a8a..1323b7f9a9 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -18,18 +18,8 @@ All **parameter** types are This function wraps [`dvc.api.open()`](/doc/api-reference/open) for a simple and direct way to return the complete contents of files tracked in DVC -projects (by DVC or Git). - -The current project is used by default (the current working directory tree is -walked up to find it), unless a `repo` argument is supplied. - -Unless a `remote` argument is given, it first tries to find the file in the -cache for local projects. If not found there or for online -projects, the [default remote](/doc/command-reference/remote/default) of `repo` -is tried. If the file cannot be found, a `PathMissingError` is raised. - -For Git-tracked DVC repositories, unless a `rev` argument is -provided, the repo's `HEAD` version is used. +projects (by DVC or Git). If the file cannot be found, a +`PathMissingError` is raised. The returned contents can be a [bytes object](https://docs.python.org/3/glossary.html#term-bytes-like-object) @@ -47,15 +37,25 @@ or a system path. Both HTTP and SSH protocols are supported for online Git repos (e.g. `[user@]server:project.git`). + The current project is used by default (the current working directory tree is + walked up to find it) if a `repo` argument is not given. + A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as a branch or tag name, or a commit hash). If `repo` is not a Git repo, this option is ignored. + `HEAD` is used by default if a `rev` argument is not given. + - `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for the target data. + For online projects, the + [default remote](/doc/command-reference/remote/default) of `repo` is tried if + a `remote` argument is not given. For local projects, the default is to try + the cache before the default remote. + A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. - `mode` - specifies the mode in which the file is opened. Defaults to `"r"` From a1a6b341db77676074ca0cd863a4019dfdbefb48 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 28 Feb 2020 19:01:20 -0600 Subject: [PATCH 082/100] api ref: use simple language in example titles per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-365362799 --- public/static/docs/api-reference/get_url.md | 2 +- public/static/docs/api-reference/open.md | 7 ++++--- public/static/docs/api-reference/read.md | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 2c26199335..8fd3e88dab 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -73,7 +73,7 @@ and `dvc add` to learn more about how DVC handles data directories. A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. -## Example: Get a remote storage URL to a data file +## Example: Get the remote storage URL to a data file ```py import dvc.api diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 8d2256894a..5867e8fa71 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -124,7 +124,7 @@ with dvc.api.open( # ... Use instanciated model ``` -## Example: Use a file from the local cache +## Example: Open a tracked file in the local file system In this case we don't supply a `repo` argument. DVC will attempt to find a current DVC project to use. @@ -138,8 +138,9 @@ with dvc.api.open('data/nlp/words.txt') as fd: ``` DVC will look for the file contents of `data/nlp/words.txt` in the local -cache. (If it's not found there, the default -[remote](/doc/command-reference/remote) will be tried.) +cache first, so no download will happen if it's found. (Otherwise, +the default [remote](/doc/command-reference/remote) will be tried. See the +[Parameters](#parameters) section) To specify the file encoding of a text file: diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 1323b7f9a9..3ca95e4d77 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -68,7 +68,7 @@ or a mode. Defaults to `"utf-8"`. Mirrors the namesake parameter in builtin `open()`. -## Example: Use artifacts from online DVC repositories +## Example: Load data tracked in a DVC repository online Any data artifact can be employed directly in your Python app by using this API. From a25125901bb1dc9978e82f9756190397fc737a20 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 28 Feb 2020 19:31:16 -0600 Subject: [PATCH 083/100] api ref: merge local open() example into --rev example per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-366103871 --- public/static/docs/api-reference/open.md | 44 ++++++++++-------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 5867e8fa71..daadda62ce 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -78,7 +78,7 @@ before making it accessible. The only exception is when using a Google Drive mode. Defaults to `"utf-8"`. Mirrors the namesake parameter in builtin `open()`. -## Example: Use artifacts from online DVC repositories +## Example: Use data tracked in a DVC repository online Any data artifact can be employed directly in your Python app by using this API. @@ -124,31 +124,6 @@ with dvc.api.open( # ... Use instanciated model ``` -## Example: Open a tracked file in the local file system - -In this case we don't supply a `repo` argument. DVC will attempt to find a -current DVC project to use. - -```py -import dvc.api - -with dvc.api.open('data/nlp/words.txt') as fd: - for word in fd: - # ... Process words -``` - -DVC will look for the file contents of `data/nlp/words.txt` in the local -cache first, so no download will happen if it's found. (Otherwise, -the default [remote](/doc/command-reference/remote) will be tried. See the -[Parameters](#parameters) section) - -To specify the file encoding of a text file: - -```py -with dvc.api.open('data/nlp/words_ru.txt', encoding='koi8_r') as fd: - # ... -``` - ## Example: Use other versions of data or results The `rev` argument lets you specify any Git commit to look for an artifact. This @@ -168,6 +143,23 @@ with dvc.api.open( # ... Read clean data from version 1.1.0 ``` +Also, notice that in this case we didn't supply a `repo` argument in this +example. DVC will attempt to find a DVC project to use in the +current working directory tree, and look for the file contents of `clean.csv` in +its local cache; no download will happen if found. See the +[Parameters](#parameters) section for more info. + +Note: to specify the file encoding of a text file, use: + +```py +import dvc.api + +with dvc.api.open( + 'data/nlp/words_ru.txt', + encoding='koi8_r') as fd: + # ... +``` + ## Example: Stream file from a specific remote Sometimes we may want to choose the [remote](/doc/command-reference/remote) data From af551be039734e18e6bc31882f7f16b744c45e6a Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 28 Feb 2020 19:36:52 -0600 Subject: [PATCH 084/100] api ref: change motivation of `remote` arg example in open() per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-366104684 --- public/static/docs/api-reference/open.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index daadda62ce..a8df02e3bd 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -160,12 +160,11 @@ with dvc.api.open( # ... ``` -## Example: Stream file from a specific remote +## Example: Chose a specific remote as the data source Sometimes we may want to choose the [remote](/doc/command-reference/remote) data -source, for example to ensure that file streaming is enabled (as not all remote -storage types support streaming). This can be done by providing a `remote` -argument: +source, for example if the `repo` has no default remote set. This can be done by +providing a `remote` argument: ```py import dvc.api From 7277e11efc26cff2cd59d96185df7b6946e20677 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 28 Feb 2020 19:39:42 -0600 Subject: [PATCH 085/100] api ref: unserialize -> deserialize per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-366104762 --- public/static/docs/api-reference/open.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index a8df02e3bd..e2fd611dda 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -107,7 +107,7 @@ with dvc.api.open( > xmldom = parse(xmldata) > ``` -Now let's imagine you want to unserialize and use a binary model from a private +Now let's imagine you want to deserialize and use a binary model from a private repo online. For a case like this, we can use a SSH URL instead (assuming the [credentials are configured](https://help.github.com/en/github/authenticating-to-github/connecting-to-github-with-ssh) locally): From 4f04a628641fb3aa756a4e178028150bd56357d0 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 28 Feb 2020 19:46:32 -0600 Subject: [PATCH 086/100] api ref: some last refinements 9on this feedback round) --- public/static/docs/api-reference/get_url.md | 12 ++++++------ public/static/docs/api-reference/open.md | 4 ++-- public/static/docs/api-reference/read.md | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 8fd3e88dab..587a9c8871 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -19,10 +19,10 @@ All **parameter** types are ## Description -Returns the URL string of the physical location (in a -[DVC remote](/doc/command-reference/remote)) where a target file or directory -(artifact), specified by its `path` in a `repo` (DVC -project), is stored. The URL is formed by reading the corresponding +Returns the URL string of the storage location (in a +[DVC remote](/doc/command-reference/remote)) where a target file or directory, +specified by its `path` in a `repo` (DVC project), is stored. The +URL is formed by reading the corresponding [DVC-file](/doc/user-guide/dvc-file-format) (see the **Examples** below) as well as the project's [remote configuration](/doc/command-reference/config#remote). @@ -54,8 +54,8 @@ and `dvc add` to learn more about how DVC handles data directories. system path. Both HTTP and SSH protocols are supported for online Git repos (e.g. `[user@]server:project.git`). - The current project is used by default (the current working directory tree is - walked up to find it) if a `repo` argument is not given. + The current project is used by default if a `repo` argument is not given (the + current working directory tree is walked up to find it). A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index e2fd611dda..9bbe2401ac 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -47,8 +47,8 @@ before making it accessible. The only exception is when using a Google Drive system path. Both HTTP and SSH protocols are supported for online Git repos (e.g. `[user@]server:project.git`). - The current project is used by default (the current working directory tree is - walked up to find it) if a `repo` argument is not given. + The current project is used by default if a `repo` argument is not given (the + current working directory tree is walked up to find it). A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 3ca95e4d77..bf3160d776 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -37,8 +37,8 @@ or a system path. Both HTTP and SSH protocols are supported for online Git repos (e.g. `[user@]server:project.git`). - The current project is used by default (the current working directory tree is - walked up to find it) if a `repo` argument is not given. + The current project is used by default if a `repo` argument is not given (the + current working directory tree is walked up to find it). A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. From 925f520ea3e8559a65e138d778b8eaf7be365ba6 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Fri, 28 Feb 2020 20:19:38 -0600 Subject: [PATCH 087/100] api ref: rewrite intro blocks for simplicity, use type hints --- public/static/docs/api-reference/get_url.md | 12 ++++++++---- public/static/docs/api-reference/open.md | 12 ++++++++---- public/static/docs/api-reference/read.md | 12 ++++++++---- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 587a9c8871..8a4fbdd600 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,8 +1,15 @@ -### `dvc.api.get_url(path, repo=None, rev=None, remote=None)` +# get_url() Returns the URL to the storage location of a data file or directory tracked by DVC. +```py +dvc.api.get_url( path: str, repo: str = None, + rev: str = None, remote: str = None ) -> str +``` + +**Usage:** + ```py import dvc.api @@ -14,9 +21,6 @@ resource_url = dvc.api.get_url( # https://remote.dvc.org/dataset-registry/a3/04af... ``` -All **parameter** types are -[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). - ## Description Returns the URL string of the storage location (in a diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 9bbe2401ac..1cc893aad2 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -1,7 +1,14 @@ -### `dvc.api.open(path, repo=None, rev=None, remote=None, mode="r", encoding=None)` +# open() Opens a tracked file. +```py +dvc.api.open(path: str, repo: str = None, rev: str = None, + remote: str = None, mode: str = "r", encoding: str = None) +``` + +**Usage:** + ```py import dvc.api @@ -12,9 +19,6 @@ with dvc.api.open( # ... fd is a file descriptor that can be processed normally. ``` -All **parameter** types are -[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). - ## Description Open file or model (`path`) tracked in a DVC projects (by DVC or diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index bf3160d776..1e144a6575 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -1,7 +1,14 @@ -### `dvc.api.read(path, repo=None, rev=None, remote=None, mode="r", encoding=None)` +# read() Returns the contents of a tracked file. +```py +dvc.api.open(path: str, repo: str = None, rev: str = None, + remote: str = None, mode: str = "r", encoding: str = None) +``` + +**Usage:** + ```py import dvc.api @@ -11,9 +18,6 @@ modelpkl = dvc.api.read( mode='rb') ``` -All **parameter** types are -[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). - ## Description This function wraps [`dvc.api.open()`](/doc/api-reference/open) for a simple and From 7105df6e6ef1f328d6f28b311fc679dc3e37a5f4 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sun, 1 Mar 2020 17:56:03 -0600 Subject: [PATCH 088/100] api ref: improve print output of code samples --- public/static/docs/api-reference/get_url.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 8a4fbdd600..465991dd81 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -16,9 +16,6 @@ import dvc.api resource_url = dvc.api.get_url( 'get-started/data.xml', repo='https://github.com/iterative/dataset-registry') - -# resource_url = -# https://remote.dvc.org/dataset-registry/a3/04af... ``` ## Description @@ -89,9 +86,12 @@ resource_url = dvc.api.get_url( print(resource_url) ``` -Outputs: +--- -`https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355` +```dvc +$ python script.py +https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355 +``` This URL represents the location where the data is stored, and is built by reading the corresponding DVC-file From a02b17817867db3799c0f7ec4f5f7601eb543312 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sun, 1 Mar 2020 19:11:22 -0600 Subject: [PATCH 089/100] api ref: few text edits to match to core repo docstrings --- public/static/docs/api-reference/get_url.md | 18 +++++++++--------- public/static/docs/api-reference/open.md | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 465991dd81..9659b5fce0 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,7 +1,7 @@ # get_url() -Returns the URL to the storage location of a data file or directory tracked by -DVC. +Returns the URL to the storage location of a data file or directory tracked in a +DVC project. ```py dvc.api.get_url( path: str, repo: str = None, @@ -22,17 +22,17 @@ resource_url = dvc.api.get_url( Returns the URL string of the storage location (in a [DVC remote](/doc/command-reference/remote)) where a target file or directory, -specified by its `path` in a `repo` (DVC project), is stored. The -URL is formed by reading the corresponding -[DVC-file](/doc/user-guide/dvc-file-format) (see the **Examples** below) as well -as the project's [remote configuration](/doc/command-reference/config#remote). +specified by its `path` in a `repo` (DVC project), is stored. -The URL schema returned depends on the +The URL is formed by reading the the project's +[remote configuration](/doc/command-reference/config#remote) and the +[DVC-file](/doc/user-guide/dvc-file-format) where the given `path` is an output +(see the **Examples** below). The URL schema returned depends on the [type](/doc/command-reference/remote/add#supported-storage-types) of the `remote` used (see the [Parameters](#parameters) section). -⚠️ This function does not check for the actual existence of the target data in -the remote storage. +⚠️ This function does not check for the actual existence of the file or +directory in the remote storage. 💡 Having the resource's URL, it should be possible to download it directly with an appropriate library, such as diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 1cc893aad2..dbc5451f48 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -21,7 +21,7 @@ with dvc.api.open( ## Description -Open file or model (`path`) tracked in a DVC projects (by DVC or +Open file or model (`path`) tracked in a DVC project (by DVC or Git), and return a corresponding [file object](https://docs.python.org/3/glossary.html#term-file-object). If the file cannot be found, a `PathMissingError` is raised. From 466694c642e409598bc6be7ea6e60648dacd5205 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sun, 1 Mar 2020 19:35:50 -0600 Subject: [PATCH 090/100] =?UTF-8?q?api=20ref:=20correct=20docs=20about=20U?= =?UTF-8?q?rlNotDvcRepoError=20=E2=80=93=C2=A0it=20only=20exists=20in=20ge?= =?UTF-8?q?t=5Furl?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- public/static/docs/api-reference/get_url.md | 5 ++--- public/static/docs/api-reference/open.md | 2 -- public/static/docs/api-reference/read.md | 2 -- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 9659b5fce0..434e35d2f2 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -22,7 +22,8 @@ resource_url = dvc.api.get_url( Returns the URL string of the storage location (in a [DVC remote](/doc/command-reference/remote)) where a target file or directory, -specified by its `path` in a `repo` (DVC project), is stored. +specified by its `path` in a `repo` (DVC project), is stored. If +`repo` is not a DVC project, a `dvc.api.UrlNotDvcRepoError` is raised. The URL is formed by reading the the project's [remote configuration](/doc/command-reference/config#remote) and the @@ -58,8 +59,6 @@ and `dvc add` to learn more about how DVC handles data directories. The current project is used by default if a `repo` argument is not given (the current working directory tree is walked up to find it). - A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. - - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as a branch or tag name, or a commit hash). If `repo` is not a Git repo, this option is ignored. diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index dbc5451f48..203ca3278f 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -54,8 +54,6 @@ before making it accessible. The only exception is when using a Google Drive The current project is used by default if a `repo` argument is not given (the current working directory tree is walked up to find it). - A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. - - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as a branch or tag name, or a commit hash). If `repo` is not a Git repo, this option is ignored. diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 1e144a6575..db18cc99a1 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -44,8 +44,6 @@ or a The current project is used by default if a `repo` argument is not given (the current working directory tree is walked up to find it). - A `dvc.api.UrlNotDvcRepoError` is raised if `repo` is not a valid DVC project. - - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as a branch or tag name, or a commit hash). If `repo` is not a Git repo, this option is ignored. From 951742d83fa1449fb5c8df7b008f95ef1173f8e7 Mon Sep 17 00:00:00 2001 From: Ivan Shcheklein Date: Mon, 2 Mar 2020 17:12:18 -0800 Subject: [PATCH 091/100] suggest some minor things to API --- public/static/docs/api-reference/get_url.md | 62 +++++++++++---------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 434e35d2f2..d21e952459 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -4,34 +4,43 @@ Returns the URL to the storage location of a data file or directory tracked in a DVC project. ```py -dvc.api.get_url( path: str, repo: str = None, - rev: str = None, remote: str = None ) -> str +def get_url(path: str, + repo: str = None, + rev: str = None, + remote: str = None) -> str ``` -**Usage:** +#### Usage: ```py import dvc.api -resource_url = dvc.api.get_url( +dvc.api.get_url( 'get-started/data.xml', repo='https://github.com/iterative/dataset-registry') + +# https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355 ``` ## Description Returns the URL string of the storage location (in a [DVC remote](/doc/command-reference/remote)) where a target file or directory, -specified by its `path` in a `repo` (DVC project), is stored. If -`repo` is not a DVC project, a `dvc.api.UrlNotDvcRepoError` is raised. +specified by its `path` in a `repo` (DVC project), is stored. The URL is formed by reading the the project's [remote configuration](/doc/command-reference/config#remote) and the -[DVC-file](/doc/user-guide/dvc-file-format) where the given `path` is an output -(see the **Examples** below). The URL schema returned depends on the +[DVC-file](/doc/user-guide/dvc-file-format) where the given `path` is an +output The URL schema returned depends on the [type](/doc/command-reference/remote/add#supported-storage-types) of the `remote` used (see the [Parameters](#parameters) section). +If the target is a directory, the returned URL will point to a file that +contains the mapping of files in the directory (as a JSON array), along with +their hash values. Refer to +[Structure of cache directory](/doc/user-guide/dvc-files-and-directories#structure-of-cache-directory) +and `dvc add` to learn more about how DVC handles data directories. + ⚠️ This function does not check for the actual existence of the file or directory in the remote storage. @@ -41,39 +50,31 @@ an appropriate library, such as or [`paramiko`](https://docs.paramiko.org/en/stable/api/sftp.html#paramiko.sftp_client.SFTPClient.get). -Note that if the target is a directory, the URL will end in `.dir`, as DVC -stores a special JSON file with `.dir` extension that contains the mapping of -files in the directory (as a JSON array), along with their hash values. Refer to -[Structure of cache directory](/doc/user-guide/dvc-files-and-directories#structure-of-cache-directory) -and `dvc add` to learn more about how DVC handles data directories. - ## Parameters - **`path`** - location and file name of the file or directory in `repo`, relative to the project's root. - `repo` - specifies the location of the DVC project. It can be a URL or a file - system path. Both HTTP and SSH protocols are supported for online Git repos - (e.g. `[user@]server:project.git`). - - The current project is used by default if a `repo` argument is not given (the - current working directory tree is walked up to find it). + system path. Both HTTP and SSH protocols are supported for online Git + repositories (e.g. `[user@]server:project.git`). _Default_: The current + project is used (the current working directory tree is walked up to find it). - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as a branch or tag name, or a commit hash). If `repo` is not a Git repo, this - option is ignored. - - `HEAD` is used by default if a `rev` argument is not given. + option is ignored. _Default_: `HEAD`. - `remote` - name of the [DVC remote](/doc/command-reference/remote) to use to - form the returned URL string. + form the returned URL string. _Default_: The + [default remote](/doc/command-reference/remote/default) of `repo` is used. - The [default remote](/doc/command-reference/remote/default) of `repo` is used - if a `remote` argument is not given. +## Exceptions - A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. +- `dvc.api.UrlNotDvcRepoError` - `repo` is not a DVC project. -## Example: Get the remote storage URL to a data file +- `dvc.exceptions.NoRemoteError` - no `remote` is found. + +## Example: Getting URL to a DVC-tracked file ```py import dvc.api @@ -82,10 +83,13 @@ resource_url = dvc.api.get_url( 'get-started/data.xml', repo='https://github.com/iterative/dataset-registry' ) + print(resource_url) ``` ---- +It prints +`https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355` if +we run it: ```dvc $ python script.py @@ -107,7 +111,7 @@ and the project configuration ([`.dvc/config`](https://github.com/iterative/dataset-registry/blob/master/.dvc/config)) where the remote URL is saved: -```dvc +```ini ['remote "storage"'] url = https://remote.dvc.org/dataset-registry ``` From 410622f360d93d36eedfc94206ca599599d0c7dc Mon Sep 17 00:00:00 2001 From: Ivan Shcheklein Date: Mon, 2 Mar 2020 17:57:57 -0800 Subject: [PATCH 092/100] Update public/static/docs/api-reference/get_url.md Co-Authored-By: Jorge Orpinel --- public/static/docs/api-reference/get_url.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index d21e952459..5af0a65096 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -74,7 +74,7 @@ or - `dvc.exceptions.NoRemoteError` - no `remote` is found. -## Example: Getting URL to a DVC-tracked file +## Example: Getting the URL to a DVC-tracked file ```py import dvc.api From 8089423de119bedfdd110ec17ce343b0b939ad29 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Tue, 3 Mar 2020 20:00:38 -0600 Subject: [PATCH 093/100] api ref: address remaining feedback from PR #1032 --- public/static/docs/api-reference/get_url.md | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 5af0a65096..240052492e 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -15,11 +15,11 @@ def get_url(path: str, ```py import dvc.api -dvc.api.get_url( +resource_url = dvc.api.get_url( 'get-started/data.xml', repo='https://github.com/iterative/dataset-registry') -# https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355 +# resource_url is now "https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355" ``` ## Description @@ -35,9 +35,7 @@ The URL is formed by reading the the project's [type](/doc/command-reference/remote/add#supported-storage-types) of the `remote` used (see the [Parameters](#parameters) section). -If the target is a directory, the returned URL will point to a file that -contains the mapping of files in the directory (as a JSON array), along with -their hash values. Refer to +If the target is a directory, the returned URL will end in `.dir`. Refer to [Structure of cache directory](/doc/user-guide/dvc-files-and-directories#structure-of-cache-directory) and `dvc add` to learn more about how DVC handles data directories. @@ -87,14 +85,9 @@ resource_url = dvc.api.get_url( print(resource_url) ``` -It prints -`https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355` if -we run it: +The script above prints -```dvc -$ python script.py -https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355 -``` +`https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355` This URL represents the location where the data is stored, and is built by reading the corresponding DVC-file From e44028464657bae22bbf49ad90a119207bf7cab8 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 4 Mar 2020 17:40:43 -0600 Subject: [PATCH 094/100] typo --- public/static/docs/api-reference/get_url.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 240052492e..81fb4c5397 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -28,7 +28,7 @@ Returns the URL string of the storage location (in a [DVC remote](/doc/command-reference/remote)) where a target file or directory, specified by its `path` in a `repo` (DVC project), is stored. -The URL is formed by reading the the project's +The URL is formed by reading the project's [remote configuration](/doc/command-reference/config#remote) and the [DVC-file](/doc/user-guide/dvc-file-format) where the given `path` is an output The URL schema returned depends on the From 4ee933538471f1580d59353cbad4738c916f6d04 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 4 Mar 2020 17:58:47 -0600 Subject: [PATCH 095/100] api ref: apply get_url improvements to open and read fns see #1032 --- public/static/docs/api-reference/get_url.md | 8 ++-- public/static/docs/api-reference/open.md | 45 ++++++++++++--------- public/static/docs/api-reference/read.md | 42 ++++++++++--------- 3 files changed, 53 insertions(+), 42 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index 81fb4c5397..ae38d8f4f1 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -31,7 +31,7 @@ specified by its `path` in a `repo` (DVC project), is stored. The URL is formed by reading the project's [remote configuration](/doc/command-reference/config#remote) and the [DVC-file](/doc/user-guide/dvc-file-format) where the given `path` is an -output The URL schema returned depends on the +output. The URL schema returned depends on the [type](/doc/command-reference/remote/add#supported-storage-types) of the `remote` used (see the [Parameters](#parameters) section). @@ -54,9 +54,9 @@ or relative to the project's root. - `repo` - specifies the location of the DVC project. It can be a URL or a file - system path. Both HTTP and SSH protocols are supported for online Git - repositories (e.g. `[user@]server:project.git`). _Default_: The current - project is used (the current working directory tree is walked up to find it). + system path. Both HTTP and SSH protocols are supported for online Git repos + (e.g. `[user@]server:project.git`). _Default_: The current project is used + (the current working directory tree is walked up to find it). - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as a branch or tag name, or a commit hash). If `repo` is not a Git repo, this diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 203ca3278f..77d5aeb5d4 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -3,11 +3,15 @@ Opens a tracked file. ```py -dvc.api.open(path: str, repo: str = None, rev: str = None, - remote: str = None, mode: str = "r", encoding: str = None) +dvc.api.open(path: str, + repo: str = None, + rev: str = None, + remote: str = None, + mode: str = "r", + encoding: str = None) ``` -**Usage:** +#### Usage: ```py import dvc.api @@ -23,8 +27,7 @@ with dvc.api.open( Open file or model (`path`) tracked in a DVC project (by DVC or Git), and return a corresponding -[file object](https://docs.python.org/3/glossary.html#term-file-object). If the -file cannot be found, a `PathMissingError` is raised. +[file object](https://docs.python.org/3/glossary.html#term-file-object). > This function is analogous to the > [`open()`](https://docs.python.org/3/library/functions.html#open) Python @@ -49,26 +52,18 @@ before making it accessible. The only exception is when using a Google Drive - `repo` - specifies the location of the DVC project. It can be a URL or a file system path. Both HTTP and SSH protocols are supported for online Git repos - (e.g. `[user@]server:project.git`). - - The current project is used by default if a `repo` argument is not given (the - current working directory tree is walked up to find it). + (e.g. `[user@]server:project.git`). _Default_: The current project is used + (the current working directory tree is walked up to find it). - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as a branch or tag name, or a commit hash). If `repo` is not a Git repo, this - option is ignored. - - `HEAD` is used by default if a `rev` argument is not given. + option is ignored. _Default_: `HEAD`. - `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for - the target data. - - For online projects, the - [default remote](/doc/command-reference/remote/default) of `repo` is tried if - a `remote` argument is not given. For local projects, the default is to try - the cache before the default remote. - - A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. + the target data. _Default_: The + [default remote](/doc/command-reference/remote/default) of `repo` is used if a + `remote` argument is not given. For local projects, the cache is + tied before the default remote. - `mode` - specifies the mode in which the file is opened. Defaults to `"r"` (read). Mirrors the namesake parameter in builtin @@ -80,6 +75,16 @@ before making it accessible. The only exception is when using a Google Drive mode. Defaults to `"utf-8"`. Mirrors the namesake parameter in builtin `open()`. +## Exceptions + +- `dvc.exceptions.FileMissingError` - file in `path` is missing from `repo`. + +- `dvc.exceptions.PathMissingError` - `path` cannot be found in `repo`. + +- `dvc.api.UrlNotDvcRepoError` - `repo` is not a DVC project. + +- `dvc.exceptions.NoRemoteError` - no `remote` is found. + ## Example: Use data tracked in a DVC repository online Any data artifact can be employed directly in your Python app by diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index db18cc99a1..f73a48c079 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -3,11 +3,15 @@ Returns the contents of a tracked file. ```py -dvc.api.open(path: str, repo: str = None, rev: str = None, - remote: str = None, mode: str = "r", encoding: str = None) +dvc.api.open(path: str, + repo: str = None, + rev: str = None, + remote: str = None, + mode: str = "r", + encoding: str = None) ``` -**Usage:** +#### Usage: ```py import dvc.api @@ -39,26 +43,18 @@ or a - `repo` - specifies the location of the DVC project. It can be a URL or a file system path. Both HTTP and SSH protocols are supported for online Git repos - (e.g. `[user@]server:project.git`). - - The current project is used by default if a `repo` argument is not given (the - current working directory tree is walked up to find it). + (e.g. `[user@]server:project.git`). _Default_: The current project is used + (the current working directory tree is walked up to find it). - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as a branch or tag name, or a commit hash). If `repo` is not a Git repo, this - option is ignored. - - `HEAD` is used by default if a `rev` argument is not given. + option is ignored. _Default_: `HEAD`. - `remote` - name of the [DVC remote](/doc/command-reference/remote) to look for - the target data. - - For online projects, the - [default remote](/doc/command-reference/remote/default) of `repo` is tried if - a `remote` argument is not given. For local projects, the default is to try - the cache before the default remote. - - A `dvc.exceptions.NoRemoteError` is raised if no `remote` is found. + the target data. _Default_: The + [default remote](/doc/command-reference/remote/default) of `repo` is used if a + `remote` argument is not given. For local projects, the cache is + tied before the default remote. - `mode` - specifies the mode in which the file is opened. Defaults to `"r"` (read). Mirrors the namesake parameter in builtin @@ -70,6 +66,16 @@ or a mode. Defaults to `"utf-8"`. Mirrors the namesake parameter in builtin `open()`. +## Exceptions + +- `dvc.exceptions.FileMissingError` - file in `path` is missing from `repo`. + +- `dvc.exceptions.PathMissingError` - `path` cannot be found in `repo`. + +- `dvc.api.UrlNotDvcRepoError` - `repo` is not a DVC project. + +- `dvc.exceptions.NoRemoteError` - no `remote` is found. + ## Example: Load data tracked in a DVC repository online Any data artifact can be employed directly in your Python app by From 6cd45b0a5fc968557f5b6fab7f77b607a3237723 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Wed, 4 Mar 2020 18:24:33 -0600 Subject: [PATCH 096/100] api ref: add info. about types returned/generated to open and read --- public/static/docs/api-reference/open.md | 9 +++++---- public/static/docs/api-reference/read.md | 9 ++++++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 77d5aeb5d4..02e86540c8 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -26,12 +26,13 @@ with dvc.api.open( ## Description Open file or model (`path`) tracked in a DVC project (by DVC or -Git), and return a corresponding +Git), and generate a corresponding [file object](https://docs.python.org/3/glossary.html#term-file-object). -> This function is analogous to the -> [`open()`](https://docs.python.org/3/library/functions.html#open) Python -> built-in. +> The exact type of file object generated depends on the `mode` used. For more +> details, please refer to Python's +> [`open()`](https://docs.python.org/3/library/functions.html#open) built-in, +> which is used under the hood. `dvc.api.open()` may only be used as a [context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index f73a48c079..bdbc2c5688 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -30,9 +30,12 @@ projects (by DVC or Git). If the file cannot be found, a `PathMissingError` is raised. The returned contents can be a -[bytes object](https://docs.python.org/3/glossary.html#term-bytes-like-object) -or a -[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str). +[string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str) +or a [bytearray](https://docs.python.org/3/library/stdtypes.html#bytearray). + +> The type returned depends on the `mode` used. For more details, please refer +> to Python's [`open()`](https://docs.python.org/3/library/functions.html#open) +> built-in, which is used under the hood. > This is similar to the `dvc get` command in our CLI. From ce5b42a03485ca9c8eea93f424feebde80ac4283 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Thu, 5 Mar 2020 15:28:44 -0600 Subject: [PATCH 097/100] api ref: minor changes to open --- public/static/docs/api-reference/open.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 02e86540c8..b495364af5 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -29,14 +29,14 @@ Open file or model (`path`) tracked in a DVC project (by DVC or Git), and generate a corresponding [file object](https://docs.python.org/3/glossary.html#term-file-object). -> The exact type of file object generated depends on the `mode` used. For more -> details, please refer to Python's +> The exact type of file object depends on the `mode` used. For more details, +> please refer to Python's > [`open()`](https://docs.python.org/3/library/functions.html#open) built-in, > which is used under the hood. `dvc.api.open()` may only be used as a [context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) -(using the `with` keyword, as shown in the **Examples** below). +(using the `with` keyword, as shown in the examples). > Use `dvc.api.read()` to get the file's contents directly – no _context > manager_ involved. From 6bd274045174a715531305a9a2dccb37e1c9bcf5 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sat, 7 Mar 2020 19:10:32 -0600 Subject: [PATCH 098/100] api ref: address another round of feedback on open fn per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-369252029 and several other comments --- public/static/docs/api-reference/get_url.md | 2 +- public/static/docs/api-reference/open.md | 56 ++++++++++----------- public/static/docs/api-reference/read.md | 21 ++++---- 3 files changed, 39 insertions(+), 40 deletions(-) diff --git a/public/static/docs/api-reference/get_url.md b/public/static/docs/api-reference/get_url.md index ae38d8f4f1..9a83e33a09 100644 --- a/public/static/docs/api-reference/get_url.md +++ b/public/static/docs/api-reference/get_url.md @@ -1,4 +1,4 @@ -# get_url() +# dvc.api.get_url() Returns the URL to the storage location of a data file or directory tracked in a DVC project. diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index b495364af5..3d9bbcb730 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -1,14 +1,14 @@ -# open() +# dvc.api.open() Opens a tracked file. ```py -dvc.api.open(path: str, - repo: str = None, - rev: str = None, - remote: str = None, - mode: str = "r", - encoding: str = None) +def open(path: str, + repo: str = None, + rev: str = None, + remote: str = None, + mode: str = "r", + encoding: str = None) ``` #### Usage: @@ -25,9 +25,10 @@ with dvc.api.open( ## Description -Open file or model (`path`) tracked in a DVC project (by DVC or -Git), and generate a corresponding -[file object](https://docs.python.org/3/glossary.html#term-file-object). +Open a data or model file tracked in a DVC project and generate a +corresponding +[file object](https://docs.python.org/3/glossary.html#term-file-object). The +file can be tracked by DVC or by Git. > The exact type of file object depends on the `mode` used. For more details, > please refer to Python's @@ -38,12 +39,13 @@ Git), and generate a corresponding [context manager](https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library) (using the `with` keyword, as shown in the examples). -> Use `dvc.api.read()` to get the file's contents directly – no _context -> manager_ involved. +> Use `dvc.api.read()` to get the complete file contents in a single function +> call – no _context manager_ involved. -This function reads (streams) the file trough a direct connection to the storage -whenever possible, so it does not require any space on the disc to save the file -before making it accessible. The only exception is when using a Google Drive +This function makes a direct connection to the storage most of the times, so the +file contents can be streamed as they are read (which requires an active network +connection). This means it does not require space on the disc to save the file +before making it accessible. The only exception is when using Google Drive as [remote type](/doc/command-reference/remote/add#supported-storage-types). ## Parameters @@ -86,13 +88,11 @@ before making it accessible. The only exception is when using a Google Drive - `dvc.exceptions.NoRemoteError` - no `remote` is found. -## Example: Use data tracked in a DVC repository online +## Example: Use data or models from DVC repositories online Any data artifact can be employed directly in your Python app by -using this API. - -For example, an XML file from a public DVC repo online can be processed directly -in your Python app with: +using this API. For example, an XML file tracked in a public DVC repo on Github +can be processed directly in your Python app with: ```py from xml.dom.minidom import parse @@ -106,8 +106,8 @@ with dvc.api.open( # ... Process DOM ``` -> Notice that you could read the contents of a tracked file faster with -> `dvc.api.read()`: +> Notice that if you just need to load the complete file contents to memory, you +> can use `dvc.api.read()` instead: > > ```py > xmldata = dvc.api.read('get-started/data.xml', @@ -116,7 +116,7 @@ with dvc.api.open( > ``` Now let's imagine you want to deserialize and use a binary model from a private -repo online. For a case like this, we can use a SSH URL instead (assuming the +repo. For a case like this, we can use an SSH URL instead (assuming the [credentials are configured](https://help.github.com/en/github/authenticating-to-github/connecting-to-github-with-ssh) locally): @@ -132,7 +132,7 @@ with dvc.api.open( # ... Use instanciated model ``` -## Example: Use other versions of data or results +## Example: Use different versions of data The `rev` argument lets you specify any Git commit to look for an artifact. This way any previous version, or alternative experiment can be accessed @@ -151,10 +151,10 @@ with dvc.api.open( # ... Read clean data from version 1.1.0 ``` -Also, notice that in this case we didn't supply a `repo` argument in this -example. DVC will attempt to find a DVC project to use in the -current working directory tree, and look for the file contents of `clean.csv` in -its local cache; no download will happen if found. See the +Also, notice that we didn't supply a `repo` argument in this example. DVC will +attempt to find a DVC project to use in the current working +directory tree, and look for the file contents of `clean.csv` in its local +cache; no download will happen if found. See the [Parameters](#parameters) section for more info. Note: to specify the file encoding of a text file, use: diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index bdbc2c5688..8986274ffb 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -1,14 +1,14 @@ -# read() +# dvc.api.read() Returns the contents of a tracked file. ```py -dvc.api.open(path: str, - repo: str = None, - rev: str = None, - remote: str = None, - mode: str = "r", - encoding: str = None) +def open(path: str, + repo: str = None, + rev: str = None, + remote: str = None, + mode: str = "r", + encoding: str = None) ``` #### Usage: @@ -24,10 +24,9 @@ modelpkl = dvc.api.read( ## Description -This function wraps [`dvc.api.open()`](/doc/api-reference/open) for a simple and -direct way to return the complete contents of files tracked in DVC -projects (by DVC or Git). If the file cannot be found, a -`PathMissingError` is raised. +This function wraps [`dvc.api.open()`](/doc/api-reference/open), for a simple +way to return the complete contents of a file tracked in a DVC +project. The file can be tracked by DVC or by Git. The returned contents can be a [string](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str) From 90cb882fc827034667121b2ef14df644e67e9038 Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sat, 7 Mar 2020 19:15:38 -0600 Subject: [PATCH 099/100] api ref: small change to reaf fn example title per https://github.com/iterative/dvc.org/pull/908#pullrequestreview-369256172 --- public/static/docs/api-reference/read.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index 8986274ffb..d788f9feb1 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -78,7 +78,7 @@ or a [bytearray](https://docs.python.org/3/library/stdtypes.html#bytearray). - `dvc.exceptions.NoRemoteError` - no `remote` is found. -## Example: Load data tracked in a DVC repository online +## Example: Load data from a DVC repository online Any data artifact can be employed directly in your Python app by using this API. From 7733feaa41eecca226e33a50467a7665f601206b Mon Sep 17 00:00:00 2001 From: Jorge Orpinel Date: Sat, 7 Mar 2020 19:43:33 -0600 Subject: [PATCH 100/100] api ref: a few last improvements it seems --- public/static/docs/api-reference/open.md | 11 ++++++----- public/static/docs/api-reference/read.md | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/public/static/docs/api-reference/open.md b/public/static/docs/api-reference/open.md index 3d9bbcb730..99602624c7 100644 --- a/public/static/docs/api-reference/open.md +++ b/public/static/docs/api-reference/open.md @@ -42,10 +42,11 @@ file can be tracked by DVC or by Git. > Use `dvc.api.read()` to get the complete file contents in a single function > call – no _context manager_ involved. -This function makes a direct connection to the storage most of the times, so the -file contents can be streamed as they are read (which requires an active network -connection). This means it does not require space on the disc to save the file -before making it accessible. The only exception is when using Google Drive as +This function makes a direct connection to the +[remote storage](/doc/command-reference/remote/add#supported-storage-types) +(except for Google Drive), so the file contents can be streamed as they are +read. This means it does not require space on the disc to save the file before +making it accessible. The only exception is when using Google Drive as [remote type](/doc/command-reference/remote/add#supported-storage-types). ## Parameters @@ -88,7 +89,7 @@ before making it accessible. The only exception is when using Google Drive as - `dvc.exceptions.NoRemoteError` - no `remote` is found. -## Example: Use data or models from DVC repositories online +## Example: Use data or models from DVC repositories Any data artifact can be employed directly in your Python app by using this API. For example, an XML file tracked in a public DVC repo on Github diff --git a/public/static/docs/api-reference/read.md b/public/static/docs/api-reference/read.md index d788f9feb1..4fc6def66f 100644 --- a/public/static/docs/api-reference/read.md +++ b/public/static/docs/api-reference/read.md @@ -78,7 +78,7 @@ or a [bytearray](https://docs.python.org/3/library/stdtypes.html#bytearray). - `dvc.exceptions.NoRemoteError` - no `remote` is found. -## Example: Load data from a DVC repository online +## Example: Load data from a DVC repository Any data artifact can be employed directly in your Python app by using this API.