From 866e00d78cf90a0022f9cfa21d098784e5d27c04 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Sat, 9 Apr 2016 00:17:31 -0700 Subject: [PATCH] Detecting loops in Sankeys (#271) * Detecting loops in Sankeys * Fixing the algo --- caravel/viz.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/caravel/viz.py b/caravel/viz.py index d81066fd1bc26..e73c051765d59 100644 --- a/caravel/viz.py +++ b/caravel/viz.py @@ -1043,7 +1043,32 @@ def query_obj(self): def get_data(self): df = self.get_df() df.columns = ['source', 'target', 'value'] - return df.to_dict(orient='records') + recs = df.to_dict(orient='records') + + hierarchy = defaultdict(set) + for row in recs: + hierarchy[row['source']].add(row['target']) + + def find_cycle(g): + """Whether there's a cycle in a directed graph""" + path = set() + def visit(vertex): + path.add(vertex) + for neighbour in g.get(vertex, ()): + if neighbour in path or visit(neighbour): + return (vertex, neighbour) + path.remove(vertex) + for v in g: + cycle = visit(v) + if cycle: + return cycle + + cycle = find_cycle(hierarchy) + if cycle: + raise Exception( + "There's a loop in your Sankey, please provide a tree. " + "Here's a faulty link: {}".format(cycle)) + return recs class DirectedForceViz(BaseViz):