From 2dda540db66815ec07317fbb8e6b846a860e3003 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 4 May 2020 08:40:42 +0000 Subject: [PATCH] Make comparisons in is_cached independent of order (#3731) On is_cached, we used to compare two dicts: one of current stage in memory created by `run` and one that's already written to the file. As `outs` and `dicts` are lists (which is generate by `dumpd`), the comparison was dependent on the order of outs and dicts. So, the `run` before and now, can be in different order and would fail the comparisons. --- dvc/serialize.py | 6 +++--- dvc/stage/__init__.py | 11 ++++++++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/dvc/serialize.py b/dvc/serialize.py index 09172f7765..2ab9656470 100644 --- a/dvc/serialize.py +++ b/dvc/serialize.py @@ -23,7 +23,7 @@ def _get_outs(stage: "PipelineStage"): outs_bucket = {} - for o in stage.outs: + for o in sort_by_path(stage.outs): bucket_key = ["metrics"] if o.metric else ["outs"] if not o.metric and o.persist: @@ -32,7 +32,7 @@ def _get_outs(stage: "PipelineStage"): bucket_key += ["no_cache"] key = "_".join(bucket_key) outs_bucket[key] = outs_bucket.get(key, []) + [o.def_path] - return [(key, outs_bucket[key]) for key in outs_bucket.keys()] + return [(key, outs_bucket[key]) for key in sorted(outs_bucket.keys())] def get_params_deps(stage: "PipelineStage"): @@ -79,7 +79,7 @@ def to_pipeline_file(stage: "PipelineStage"): res = [ (stage.PARAM_CMD, stage.cmd), (stage.PARAM_WDIR, stage.resolve_wdir()), - (stage.PARAM_DEPS, [d.def_path for d in deps]), + (stage.PARAM_DEPS, sorted([d.def_path for d in deps])), (stage.PARAM_PARAMS, serialized_params), *_get_outs(stage), (stage.PARAM_LOCKED, stage.locked), diff --git a/dvc/stage/__init__.py b/dvc/stage/__init__.py index e6afdeaa74..d9b436d93e 100644 --- a/dvc/stage/__init__.py +++ b/dvc/stage/__init__.py @@ -5,7 +5,7 @@ import subprocess import threading -from itertools import chain +from itertools import chain, product from funcy import project @@ -387,6 +387,15 @@ def is_cached(self): out.pop(LocalRemote.PARAM_CHECKSUM, None) out.pop(S3Remote.PARAM_CHECKSUM, None) + # outs and deps are lists of dicts. To check equality, we need to make + # them independent of the order, so, we convert them to dicts. + combination = product( + [old_d, new_d], [self.PARAM_DEPS, self.PARAM_OUTS] + ) + for coll, key in combination: + if coll.get(key): + coll[key] = {item["path"]: item for item in coll[key]} + if old_d != new_d: return False