diff --git a/dvc/serialize.py b/dvc/serialize.py index 09172f7765..2ab9656470 100644 --- a/dvc/serialize.py +++ b/dvc/serialize.py @@ -23,7 +23,7 @@ def _get_outs(stage: "PipelineStage"): outs_bucket = {} - for o in stage.outs: + for o in sort_by_path(stage.outs): bucket_key = ["metrics"] if o.metric else ["outs"] if not o.metric and o.persist: @@ -32,7 +32,7 @@ def _get_outs(stage: "PipelineStage"): bucket_key += ["no_cache"] key = "_".join(bucket_key) outs_bucket[key] = outs_bucket.get(key, []) + [o.def_path] - return [(key, outs_bucket[key]) for key in outs_bucket.keys()] + return [(key, outs_bucket[key]) for key in sorted(outs_bucket.keys())] def get_params_deps(stage: "PipelineStage"): @@ -79,7 +79,7 @@ def to_pipeline_file(stage: "PipelineStage"): res = [ (stage.PARAM_CMD, stage.cmd), (stage.PARAM_WDIR, stage.resolve_wdir()), - (stage.PARAM_DEPS, [d.def_path for d in deps]), + (stage.PARAM_DEPS, sorted([d.def_path for d in deps])), (stage.PARAM_PARAMS, serialized_params), *_get_outs(stage), (stage.PARAM_LOCKED, stage.locked), diff --git a/dvc/stage/__init__.py b/dvc/stage/__init__.py index e6afdeaa74..d9b436d93e 100644 --- a/dvc/stage/__init__.py +++ b/dvc/stage/__init__.py @@ -5,7 +5,7 @@ import subprocess import threading -from itertools import chain +from itertools import chain, product from funcy import project @@ -387,6 +387,15 @@ def is_cached(self): out.pop(LocalRemote.PARAM_CHECKSUM, None) out.pop(S3Remote.PARAM_CHECKSUM, None) + # outs and deps are lists of dicts. To check equality, we need to make + # them independent of the order, so, we convert them to dicts. + combination = product( + [old_d, new_d], [self.PARAM_DEPS, self.PARAM_OUTS] + ) + for coll, key in combination: + if coll.get(key): + coll[key] = {item["path"]: item for item in coll[key]} + if old_d != new_d: return False