From 609d9dd638165e70911b629afbbf271c8dd8c590 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Saugat=20Pachhai=20=28=E0=A4=B8=E0=A5=8C=E0=A4=97=E0=A4=BE?= =?UTF-8?q?=E0=A4=A4=29?= Date: Mon, 5 Jun 2023 15:16:43 +0545 Subject: [PATCH 1/2] add pydantic schema for dvc.yaml --- dvc/pydantic_schema.py | 109 +++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 4 ++ 2 files changed, 113 insertions(+) create mode 100644 dvc/pydantic_schema.py diff --git a/dvc/pydantic_schema.py b/dvc/pydantic_schema.py new file mode 100644 index 0000000000..8c47a8ddcd --- /dev/null +++ b/dvc/pydantic_schema.py @@ -0,0 +1,109 @@ +import warnings +from typing import TYPE_CHECKING, Dict, List, NewType, Optional, TypeVar, Union + +from pydantic import BaseModel, Field + +if TYPE_CHECKING: + from dvc.fs import FileSystem + from dvc.types import StrOrBytesPath + + +_T = TypeVar("_T") +OneOrMore = Union[_T, List[_T]] +ItemWithConfig = Union[str, Dict[str, _T]] +PathLike = NewType("PathLike", str) +PathOrId = NewType("PathOrId", str) +TemplateNameOrPath = NewType("TemplateNameOrPath", str) +ParamPath = NewType("ParamPath", str) +PlotColumn = NewType("PlotColumn", str) + + +class Plot(BaseModel): + x: Union[PlotColumn, Dict[PathLike, PlotColumn], None] = None + y: Union[ + OneOrMore[PlotColumn], + Dict[PathLike, OneOrMore[PlotColumn]], + None, + ] = None + x_label: Optional[str] = None + y_label: Optional[str] = None + title: Optional[str] = None + template: Optional[TemplateNameOrPath] = None + + +class OutputConfig(BaseModel): + desc: Optional[str] = None + type: Optional[str] = None # noqa: A003 + labels: Optional[str] = None + meta: object = None + cache: bool = True + persist: bool = False + remote: Optional[str] = None + push: bool = True + + +class MetricConfig(OutputConfig): + pass + + +class PlotConfig(OutputConfig): + template: Optional[TemplateNameOrPath] = None + x: Optional[PlotColumn] = None + y: Optional[PlotColumn] = None + x_label: Optional[str] = None + y_label: Optional[str] = None + title: Optional[str] = None + header: bool = False + + +class Stage(BaseModel): + cmd: OneOrMore[str] + wdir: Optional[PathLike] = None + deps: List[PathLike] = Field(default_factory=list) + params: List[Union[ParamPath, Dict[PathLike, List[ParamPath]]]] = Field( + default_factory=list + ) + frozen: bool = False + meta: object = None + desc: Optional[str] = None + always_changed: bool = False + outs: List[ItemWithConfig[OutputConfig]] = Field(default_factory=list) + metrics: List[ItemWithConfig[MetricConfig]] = Field(default_factory=list) + plots: List[Union[PathLike, Dict[PathLike, OneOrMore[PlotConfig]]]] = Field( + default_factory=list + ) + + +class ForeachDo(BaseModel): + foreach: Union[str, Dict, List] + do: Stage + + +class Artifact(BaseModel): + path: PathLike + desc: Optional[str] = None + type: Optional[str] = None # noqa: A003 + labels: List[str] = Field(default_factory=list) + meta: object = None + + +class Project(BaseModel): + plots: List[Union[PathLike, Dict[PathOrId, Optional[Plot]]]] = Field( + default_factory=list + ) + stages: Dict[str, Union[Stage, ForeachDo]] = Field(default_factory=dict) + vars: List[Union[PathLike, Dict[str, object]]] = Field( # noqa; A003 + default_factory=list + ) + params: List[PathLike] = Field(default_factory=list) + metrics: List[PathLike] = Field(default_factory=list) + artifacts: Dict[str, Artifact] = Field(default_factory=dict) + + @classmethod + def load_from(cls, path: "StrOrBytesPath", fs: Optional["FileSystem"] = None): + from dvc.utils.serialize import load_yaml + + d = load_yaml(path, fs=fs) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=DeprecationWarning) + return cls.parse_obj(d) diff --git a/pyproject.toml b/pyproject.toml index 77e663b567..fded33773d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,6 +99,7 @@ tests = [ "dvc-ssh", "filelock", "flaky", + "pydantic>=1.10,<3", "pytest<8,>=7", "pytest-cov>=4.1.0", "pytest-docker>=1,<2", @@ -242,6 +243,9 @@ module = [ [tool.codespell] ignore-words-list = "ba,datas,fo,uptodate,cachable,falsy" +[tool.pylint.master] +extension-pkg-whitelist = ["pydantic"] + [tool.pylint.message_control] disable = [ "cyclic-import", "design", "duplicate-code", "fixme", "format", "import-outside-toplevel", "invalid-name", From 11348cb3d82f472527ec2c41abf2a90c9a8407d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Saugat=20Pachhai=20=28=E0=A4=B8=E0=A5=8C=E0=A4=97=E0=A4=BE?= =?UTF-8?q?=E0=A4=A4=29?= Date: Wed, 14 Jun 2023 22:06:01 +0545 Subject: [PATCH 2/2] support tracking params completely --- dvc/pydantic_schema.py | 37 ++++++++++++++++++++++++++++++------- pyproject.toml | 2 +- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/dvc/pydantic_schema.py b/dvc/pydantic_schema.py index 8c47a8ddcd..9555fd9c1f 100644 --- a/dvc/pydantic_schema.py +++ b/dvc/pydantic_schema.py @@ -1,7 +1,10 @@ import warnings from typing import TYPE_CHECKING, Dict, List, NewType, Optional, TypeVar, Union -from pydantic import BaseModel, Field +import pydantic +from pydantic import ConfigDict, Field +from pydantic.functional_validators import BeforeValidator +from typing_extensions import Annotated if TYPE_CHECKING: from dvc.fs import FileSystem @@ -9,8 +12,13 @@ _T = TypeVar("_T") + +_Key = TypeVar("_Key") +_Config = TypeVar("_Config") + OneOrMore = Union[_T, List[_T]] -ItemWithConfig = Union[str, Dict[str, _T]] +Config = Union[_Key, Dict[_Key, _Config]] + PathLike = NewType("PathLike", str) PathOrId = NewType("PathOrId", str) TemplateNameOrPath = NewType("TemplateNameOrPath", str) @@ -18,6 +26,10 @@ PlotColumn = NewType("PlotColumn", str) +class BaseModel(pydantic.BaseModel): + model_config = ConfigDict(extra="forbid") + + class Plot(BaseModel): x: Union[PlotColumn, Dict[PathLike, PlotColumn], None] = None y: Union[ @@ -34,7 +46,7 @@ class Plot(BaseModel): class OutputConfig(BaseModel): desc: Optional[str] = None type: Optional[str] = None # noqa: A003 - labels: Optional[str] = None + labels: List[str] = Field(default_factory=list) meta: object = None cache: bool = True persist: bool = False @@ -60,15 +72,15 @@ class Stage(BaseModel): cmd: OneOrMore[str] wdir: Optional[PathLike] = None deps: List[PathLike] = Field(default_factory=list) - params: List[Union[ParamPath, Dict[PathLike, List[ParamPath]]]] = Field( + params: List[Union[ParamPath, Dict[PathLike, Optional[List[ParamPath]]]]] = Field( default_factory=list ) frozen: bool = False meta: object = None desc: Optional[str] = None always_changed: bool = False - outs: List[ItemWithConfig[OutputConfig]] = Field(default_factory=list) - metrics: List[ItemWithConfig[MetricConfig]] = Field(default_factory=list) + outs: List[Config[PathLike, OutputConfig]] = Field(default_factory=list) + metrics: List[Config[PathLike, MetricConfig]] = Field(default_factory=list) plots: List[Union[PathLike, Dict[PathLike, OneOrMore[PlotConfig]]]] = Field( default_factory=list ) @@ -79,6 +91,17 @@ class ForeachDo(BaseModel): do: Stage +def foreach_or_stage_validator(v): + if isinstance(v, dict) and "foreach" in v: + return ForeachDo.model_validate(v) + return Stage.model_validate(v) + + +ForeachDoOrStage = Annotated[ + Union[ForeachDo, Stage], BeforeValidator(foreach_or_stage_validator) +] + + class Artifact(BaseModel): path: PathLike desc: Optional[str] = None @@ -91,7 +114,7 @@ class Project(BaseModel): plots: List[Union[PathLike, Dict[PathOrId, Optional[Plot]]]] = Field( default_factory=list ) - stages: Dict[str, Union[Stage, ForeachDo]] = Field(default_factory=dict) + stages: Dict[str, ForeachDoOrStage] = Field(default_factory=dict) vars: List[Union[PathLike, Dict[str, object]]] = Field( # noqa; A003 default_factory=list ) diff --git a/pyproject.toml b/pyproject.toml index fded33773d..08885d2539 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,7 +99,7 @@ tests = [ "dvc-ssh", "filelock", "flaky", - "pydantic>=1.10,<3", + "pydantic==2.0b2", "pytest<8,>=7", "pytest-cov>=4.1.0", "pytest-docker>=1,<2",