-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add top-level
artifacts:
section (#9220)
* add artifacts section * adding and removing artifacts * delete code for add/remove; add tests * fix pre-commit * remove extra piece of writing to dvc.yaml * remove extra piece * simplify kwargs * allow same names for artifacts in different dvc.yaml files * set ID as path; don't return dvc.yaml(s) with no artifacts * fix PR feedback * issue warning at incorrect artifact name * issue warning at incorrect artifact name * reference right schema * add r to regexp * add tests for regexp * set artifacts for SingleStageFile * fix windows test * make path required; simplify regexp and remove slash from it * allow to use gto's artifacts.yaml * remove extra piece of code for migration from gto
- Loading branch information
Showing
8 changed files
with
229 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import logging | ||
import os | ||
import re | ||
from typing import TYPE_CHECKING, Dict | ||
|
||
from dvc.annotations import Artifact | ||
from dvc.dvcfile import FileMixin | ||
from dvc.utils import relpath | ||
|
||
if TYPE_CHECKING: | ||
from dvc.repo import Repo | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
NAME_RE = re.compile(r"^[a-z]([a-z0-9-]*[a-z0-9])?$") | ||
|
||
|
||
def name_is_compatible(name: str) -> bool: | ||
return bool(NAME_RE.search(name)) | ||
|
||
|
||
def check_name_format(name: str) -> None: | ||
if not name_is_compatible(name): | ||
logger.warning( | ||
"Can't use '%s' as artifact name (ID)." | ||
" You can use letters and numbers, and use '-' as separator" | ||
" (but not at the start or end). The first character must be a letter.", | ||
name, | ||
) | ||
|
||
|
||
class ArtifactsFile(FileMixin): | ||
from dvc.schema import SINGLE_ARTIFACT_SCHEMA as SCHEMA | ||
|
||
def dump(self, stage, **kwargs): | ||
raise NotImplementedError | ||
|
||
def merge(self, ancestor, other, allowed=None): | ||
raise NotImplementedError | ||
|
||
|
||
class Artifacts: | ||
def __init__(self, repo: "Repo") -> None: | ||
self.repo = repo | ||
|
||
def read(self) -> Dict[str, Dict[str, Artifact]]: | ||
artifacts: Dict[str, Dict[str, Artifact]] = {} | ||
for ( | ||
dvcfile, | ||
dvcfile_artifacts, | ||
) in self.repo.index._artifacts.items(): # pylint: disable=protected-access | ||
# read the artifacts.yaml file if needed | ||
if isinstance(dvcfile_artifacts, str): | ||
dvcfile_artifacts = ArtifactsFile( | ||
self.repo, | ||
os.path.join(os.path.dirname(dvcfile), dvcfile_artifacts), | ||
verify=False, | ||
).load() | ||
if not dvcfile_artifacts: | ||
continue | ||
dvcyaml = relpath(dvcfile, self.repo.root_dir) | ||
artifacts[dvcyaml] = {} | ||
for name, value in dvcfile_artifacts.items(): | ||
check_name_format(name) | ||
artifacts[dvcyaml][name] = Artifact(**value) | ||
return artifacts |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
import os | ||
|
||
import pytest | ||
|
||
from dvc.annotations import Artifact | ||
from dvc.repo.artifacts import name_is_compatible | ||
from dvc.utils.strictyaml import YAMLSyntaxError, YAMLValidationError | ||
|
||
dvcyaml = { | ||
"artifacts": { | ||
"myart": {"type": "model", "path": "myart.pkl"}, | ||
"hello": {"type": "file", "path": "hello.txt"}, | ||
"world": { | ||
"type": "object", | ||
"path": "world.txt", | ||
"desc": "The world is not enough", | ||
"labels": ["but", "this", "is"], | ||
"meta": {"such": "a", "perfect": "place to start"}, | ||
}, | ||
} | ||
} | ||
|
||
|
||
def test_reading_artifacts_subdir(tmp_dir, dvc): | ||
(tmp_dir / "dvc.yaml").dump(dvcyaml) | ||
|
||
subdir = tmp_dir / "subdir" | ||
subdir.mkdir() | ||
|
||
(subdir / "dvc.yaml").dump(dvcyaml) | ||
|
||
artifacts = { | ||
name: Artifact(**values) for name, values in dvcyaml["artifacts"].items() | ||
} | ||
assert tmp_dir.dvc.artifacts.read() == { | ||
"dvc.yaml": artifacts, | ||
f"subdir{os.path.sep}dvc.yaml": artifacts, | ||
} | ||
|
||
|
||
bad_dvcyaml_extra_field = { | ||
"artifacts": { | ||
"lol": {"kek": "cheburek", "path": "lol"}, | ||
"hello": {"type": "file", "path": "hello.txt"}, | ||
} | ||
} | ||
|
||
|
||
bad_dvcyaml_missing_path = { | ||
"artifacts": { | ||
"lol": {}, | ||
} | ||
} | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"bad_dvcyaml", [bad_dvcyaml_extra_field, bad_dvcyaml_missing_path] | ||
) | ||
def test_broken_dvcyaml_extra_field(tmp_dir, dvc, bad_dvcyaml): | ||
(tmp_dir / "dvc.yaml").dump(bad_dvcyaml) | ||
|
||
with pytest.raises(YAMLValidationError): | ||
tmp_dir.dvc.artifacts.read() | ||
|
||
|
||
bad_dvcyaml_id_duplication = """ | ||
artifacts: | ||
lol: | ||
type: kek | ||
lol: {} | ||
""" | ||
|
||
|
||
def test_broken_dvcyaml_id_duplication(tmp_dir, dvc): | ||
with open(tmp_dir / "dvc.yaml", "w") as f: | ||
f.write(bad_dvcyaml_id_duplication) | ||
|
||
with pytest.raises(YAMLSyntaxError): | ||
tmp_dir.dvc.artifacts.read() | ||
|
||
|
||
dvcyaml_redirecting = {"artifacts": "artifacts.yaml"} | ||
|
||
|
||
def test_read_artifacts_yaml(tmp_dir, dvc): | ||
(tmp_dir / "dvc.yaml").dump(dvcyaml_redirecting) | ||
(tmp_dir / "artifacts.yaml").dump(dvcyaml["artifacts"]) | ||
|
||
artifacts = { | ||
name: Artifact(**values) for name, values in dvcyaml["artifacts"].items() | ||
} | ||
assert tmp_dir.dvc.artifacts.read() == { | ||
"dvc.yaml": artifacts, | ||
} | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"name", | ||
[ | ||
"m", | ||
"nn", | ||
"m1", | ||
"model-prod", | ||
"model-prod-v1", | ||
], | ||
) | ||
def test_check_name_is_valid(name): | ||
assert name_is_compatible(name) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"name", | ||
[ | ||
"", | ||
"1", | ||
"m/", | ||
"/m", | ||
"1nn", | ||
"###", | ||
"@@@", | ||
"a model", | ||
"a_model", | ||
"-model", | ||
"model-", | ||
"model@1", | ||
"model#1", | ||
"@namespace/model", | ||
"namespace/model", | ||
], | ||
) | ||
def test_check_name_is_invalid(name): | ||
assert not name_is_compatible(name) |