Skip to content

Commit

Permalink
Make comparisons in is_cached independent of order (#3731)
Browse files Browse the repository at this point in the history
On is_cached, we used to compare two dicts: one of current stage in memory
created by `run` and one that's already written to the file.
As `outs` and `dicts` are lists (which is generate by `dumpd`), the
comparison was dependent on the order of outs and dicts.

So, the `run` before and now, can be in different order and would fail
the comparisons.
  • Loading branch information
skshetry authored May 4, 2020
1 parent 76e6ba2 commit 2dda540
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 4 deletions.
6 changes: 3 additions & 3 deletions dvc/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

def _get_outs(stage: "PipelineStage"):
outs_bucket = {}
for o in stage.outs:
for o in sort_by_path(stage.outs):
bucket_key = ["metrics"] if o.metric else ["outs"]

if not o.metric and o.persist:
Expand All @@ -32,7 +32,7 @@ def _get_outs(stage: "PipelineStage"):
bucket_key += ["no_cache"]
key = "_".join(bucket_key)
outs_bucket[key] = outs_bucket.get(key, []) + [o.def_path]
return [(key, outs_bucket[key]) for key in outs_bucket.keys()]
return [(key, outs_bucket[key]) for key in sorted(outs_bucket.keys())]


def get_params_deps(stage: "PipelineStage"):
Expand Down Expand Up @@ -79,7 +79,7 @@ def to_pipeline_file(stage: "PipelineStage"):
res = [
(stage.PARAM_CMD, stage.cmd),
(stage.PARAM_WDIR, stage.resolve_wdir()),
(stage.PARAM_DEPS, [d.def_path for d in deps]),
(stage.PARAM_DEPS, sorted([d.def_path for d in deps])),
(stage.PARAM_PARAMS, serialized_params),
*_get_outs(stage),
(stage.PARAM_LOCKED, stage.locked),
Expand Down
11 changes: 10 additions & 1 deletion dvc/stage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import subprocess
import threading

from itertools import chain
from itertools import chain, product

from funcy import project

Expand Down Expand Up @@ -387,6 +387,15 @@ def is_cached(self):
out.pop(LocalRemote.PARAM_CHECKSUM, None)
out.pop(S3Remote.PARAM_CHECKSUM, None)

# outs and deps are lists of dicts. To check equality, we need to make
# them independent of the order, so, we convert them to dicts.
combination = product(
[old_d, new_d], [self.PARAM_DEPS, self.PARAM_OUTS]
)
for coll, key in combination:
if coll.get(key):
coll[key] = {item["path"]: item for item in coll[key]}

if old_d != new_d:
return False

Expand Down

0 comments on commit 2dda540

Please sign in to comment.