Skip to content

Commit

Permalink
repro: fix bug in _get_active_graph (#3301)
Browse files Browse the repository at this point in the history
We are currently removing all of the deps stages from the DAG even if
those deps are referenced by some parallel branches of execution.
The new approach is to remove edges and then gradually remove nodes if
there are no more `in` edges left.

Discord context:
https://discordapp.com/channels/485586884165107732/485596304961962003/676464313942409265
  • Loading branch information
efiop authored Feb 10, 2020
1 parent 0c88979 commit 2361bce
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 5 deletions.
17 changes: 12 additions & 5 deletions dvc/repo/reproduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,18 @@ def _get_active_graph(G):
for stage in G:
if not stage.locked:
continue
for st in nx.dfs_postorder_nodes(G, stage):
if st == stage:
continue
if st in active:
active.remove_node(st)
active.remove_edges_from(G.out_edges(stage))
for edge in G.out_edges(stage):
_, to_stage = edge
for node in nx.dfs_preorder_nodes(G, to_stage):
# NOTE: `in_degree` will return InDegreeView({}) if stage
# no longer exists in the `active` DAG.
if not active.in_degree(node):
# NOTE: if some edge no longer exists `remove_edges_from`
# will ignore it without error.
active.remove_edges_from(G.out_edges(node))
active.remove_node(node)

return active


Expand Down
File renamed without changes.
25 changes: 25 additions & 0 deletions tests/unit/repo/test_reproduce.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from dvc.repo.reproduce import _get_active_graph


def test_get_active_graph(tmp_dir, dvc):
pre_foo_stage, = tmp_dir.dvc_gen({"pre-foo": "pre-foo"})
foo_stage = dvc.run(deps=["pre-foo"], outs=["foo"], cmd="echo foo > foo")
bar_stage = dvc.run(deps=["foo"], outs=["bar"], cmd="echo bar > bar")
baz_stage = dvc.run(deps=["foo"], outs=["baz"], cmd="echo baz > baz")

dvc.lock_stage("bar.dvc")

graph = dvc.graph
active_graph = _get_active_graph(graph)
assert active_graph.nodes == graph.nodes
assert set(active_graph.edges) == {
(foo_stage, pre_foo_stage),
(baz_stage, foo_stage),
}

dvc.lock_stage("baz.dvc")

graph = dvc.graph
active_graph = _get_active_graph(graph)
assert set(active_graph.nodes) == {bar_stage, baz_stage}
assert not active_graph.edges

0 comments on commit 2361bce

Please sign in to comment.