From a3ecdf909e48efdbb636dbd65738b7b2104e4298 Mon Sep 17 00:00:00 2001 From: Emilio Jesus Gallego Arias Date: Mon, 9 Dec 2019 17:21:51 +0100 Subject: [PATCH] [performance] Avoid linear walk of graph children. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When calling Dune in scenarios where targets have a large number of deps, Dune will take a long time to start. A common case is when depending on `(package coq)`, which brings into the DAG a few thousand files. `perf` data show this is due to the linear walk in `Dag.is_child`; indeed, doing a naive replacement of the list for a more efficient access structure solves the problem: ``` with this PR: real 0m1,684s user 0m1,552s sys 0m0,128s with master: real 0m11,450s user 0m10,587s sys 0m0,264s ``` This PR is an attempt to fix this by using a more efficient representation of deps [that allows checking of deps in log n time, so the complexity of `is_child` goes from O(n²) to O(n log(n)). Signed-off-by: Emilio Jesus Gallego Arias --- CHANGES.md | 3 +++ src/dag/dag.ml | 13 ++++++------ src/dag/dag_intf.ml | 4 +++- src/memo/memo.ml | 1 + test/expect-tests/dag/dag_tests.ml | 14 ++++++------- test/expect-tests/memo/memoize_tests.ml | 8 +++---- .../src/incremental_cycles.ml | 6 +++--- .../src/incremental_cycles_intf.ml | 21 ++++++++++++++++++- 8 files changed, 48 insertions(+), 22 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index b0f95db38b7c..d550ed1e0148 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -32,6 +32,9 @@ - Do not install vendored packages (#3704, @diml) +- Avoid linear walk to detect children, this should greatly improve performance + when a target has a large number of dependencies (#2959, @ejgallego, review by: ) + 2.1.3 (16/01/2020) ------------------ diff --git a/src/dag/dag.ml b/src/dag/dag.ml index 83947d17f364..e360b19c9fb8 100644 --- a/src/dag/dag.ml +++ b/src/dag/dag.ml @@ -12,12 +12,14 @@ module Make (Value : Value) : S with type value := Value.t = struct type graph = t + module NodeMap = Map.Make(Int) + type node_info = { id : int ; (* only used for printing *) mutable mark : mark ; mutable level : int - ; mutable deps : node list + ; mutable deps : node NodeMap.t ; mutable rev_deps : node list ; mutable parent : node option } @@ -60,7 +62,7 @@ module Make (Value : Value) : S with type value := Value.t = struct let set_parent _ v p = v.info.parent <- Some p let raw_add_edge _ v w = - v.info.deps <- w :: v.info.deps; + v.info.deps <- NodeMap.add_exn v.info.deps w.info.id w; if v.info.level = w.info.level then w.info.rev_deps <- v :: w.info.rev_deps @@ -77,7 +79,7 @@ module Make (Value : Value) : S with type value := Value.t = struct let create_node_info g = let id = g.fresh_id in g.fresh_id <- g.fresh_id + 1; - { id; mark = -1; level = 1; deps = []; rev_deps = []; parent = None } + { id; mark = -1; level = 1; deps = NodeMap.empty; rev_deps = []; parent = None } let add g v w = match IC.add_edge_or_detect_cycle g v w with @@ -100,10 +102,9 @@ module Make (Value : Value) : S with type value := Value.t = struct pp_value n.data ( pp_depth (depth + 1) pp_value |> Fmt.list ~pp_sep:(fun fmt () -> Format.fprintf fmt ";@, ") ) - n.info.deps + (NodeMap.values n.info.deps) let pp_node pp_value fmt n = pp_depth 0 pp_value fmt n - let is_child v w = - v.info.deps |> List.exists ~f:(fun c -> c.info.id = w.info.id) + let is_child v w = NodeMap.mem v.info.deps w.info.id end diff --git a/src/dag/dag_intf.ml b/src/dag/dag_intf.ml index fceea8484024..f22f0bde883c 100644 --- a/src/dag/dag_intf.ml +++ b/src/dag/dag_intf.ml @@ -16,6 +16,8 @@ module type S = sig (** Type of values attached to nodes. *) type value + module NodeMap : Map.S with type key := int + type node = { data : value ; info : node_info @@ -36,7 +38,7 @@ module type S = sig (** [children v] returns all nodes [w] for which an arc going from [v] to [w] exists. *) - val children : node -> node list + val children : node -> node NodeMap.t (** Pretty print a node. *) val pp_node : value Fmt.t -> node Fmt.t diff --git a/src/memo/memo.ml b/src/memo/memo.ml index ab6c2e0bbedd..51bc38b7443a 100644 --- a/src/memo/memo.ml +++ b/src/memo/memo.ml @@ -463,6 +463,7 @@ let add_rev_dep (type i o f) ~called_from_peek (dep_node : (i, o, f) Dep_node.t) order in which they were added. See the comment for [deps : Last_dep.t list]. *) let get_deps_from_graph_exn (dep_node : _ Dep_node.t) = Dag.children dep_node.dag_node + |> Dag.NodeMap.values |> List.map ~f:(fun { Dag.data = Dep_node.T node; _ } -> match node.state with | Init -> assert false diff --git a/test/expect-tests/dag/dag_tests.ml b/test/expect-tests/dag/dag_tests.ml index 2325553e7fdd..64e1149944c2 100644 --- a/test/expect-tests/dag/dag_tests.ml +++ b/test/expect-tests/dag/dag_tests.ml @@ -53,13 +53,13 @@ let%expect_test _ = List.map ~f:Pp.text cycle |> Pp.concat ~sep:Pp.space |> print; [%expect {| -(1: k=1) (root) [(3: k=1) (child 1 2) [(4: k=1) (child 2 1) [(5: k=2) (child 3 1) [ - ]]]; - (2: k=1) (child 1 1) []] -(1: k=1) (root) [(3: k=1) (child 1 2) [(4: k=1) (child 2 1) [(5: k=2) (child 3 1) [ - (6: k=2) (child 4 1) [ - ]]]]; - (2: k=1) (child 1 1) []] +(1: k=1) (root) [(2: k=1) (child 1 1) []; + (3: k=1) (child 1 2) [(4: k=1) (child 2 1) [(5: k=2) (child 3 1) [ + ]]]] +(1: k=1) (root) [(2: k=1) (child 1 1) []; + (3: k=1) (child 1 2) [(4: k=1) (child 2 1) [(5: k=2) (child 3 1) [ + (6: k=2) (child 4 1) [ + ]]]]] child 4 1 child 3 1 child 2 1 child 1 2 root child 4 1 |}] diff --git a/test/expect-tests/memo/memoize_tests.ml b/test/expect-tests/memo/memoize_tests.ml index 6479b6e027f4..f6dfeab9d725 100644 --- a/test/expect-tests/memo/memoize_tests.ml +++ b/test/expect-tests/memo/memoize_tests.ml @@ -78,7 +78,7 @@ let%expect_test _ = |> option (list (pair string (fun x -> x))) |> print_dyn; [%expect {| -Some [ ("another", "aa"); ("some", "a") ] +Some [ ("some", "a"); ("another", "aa") ] |}] let%expect_test _ = @@ -307,7 +307,7 @@ let%expect_test _ = [%expect {| (Some [ ("lazy_memo", "foo") ], -Some [ ("id", "lazy: foo"); ("lazy_memo", "foo") ]) +Some [ ("lazy_memo", "foo"); ("id", "lazy: foo") ]) |}] module Memo_lazy = Test_lazy (Memo.Lazy) @@ -322,8 +322,8 @@ let%expect_test _ = Memo_lazy.deps () |> print_dyn; [%expect {| -(Some [ ("lazy-0", ()); ("lazy_memo", "foo") ], -Some [ ("lazy-0", ()); ("lazy_memo", "foo") ]) +(Some [ ("lazy_memo", "foo"); ("lazy-0", ()) ], +Some [ ("lazy_memo", "foo"); ("lazy-0", ()) ]) |}] (* Tests for depending on the current run *) diff --git a/vendor/incremental-cycles/src/incremental_cycles.ml b/vendor/incremental-cycles/src/incremental_cycles.ml index ec1b9230651b..7b490fa3275e 100644 --- a/vendor/incremental-cycles/src/incremental_cycles.ml +++ b/vendor/incremental-cycles/src/incremental_cycles.ml @@ -153,8 +153,8 @@ struct | [] -> ForwardCompleted | x :: stack -> ( let res = - interruptible_fold - (fun y stack -> + NodeMap.interruptible_fold + ~f:(fun y stack -> if is_marked g y visited then (* We found a path to a marked vertex *) Break y @@ -172,7 +172,7 @@ struct ) else (* y_level > new_level *) Continue stack) - (get_outgoing g x) stack + (get_outgoing g x) ~init:stack in match res with | Break y -> ForwardCyclic (x, y) diff --git a/vendor/incremental-cycles/src/incremental_cycles_intf.ml b/vendor/incremental-cycles/src/incremental_cycles_intf.ml index 7f878933e59d..ac0ab409d7ff 100644 --- a/vendor/incremental-cycles/src/incremental_cycles_intf.ml +++ b/vendor/incremental-cycles/src/incremental_cycles_intf.ml @@ -1,8 +1,27 @@ +module type NodeMap_intf = sig + + type key + type 'a t + + type ('a, 'b) interruptible_fold_step = + | Continue of 'a + | Break of 'b + + val interruptible_fold + : 'a t + -> init:'b + -> f:('a -> 'b -> ('b, 'c) interruptible_fold_step) + -> ('b, 'c) interruptible_fold_step + +end + module type Raw_graph = sig type mark type graph type vertex + module NodeMap : NodeMap_intf with type key := int + val new_mark : graph -> mark val vertex_eq : vertex -> vertex -> bool @@ -20,7 +39,7 @@ module type Raw_graph = sig val get_parent : graph -> vertex -> vertex val set_parent : graph -> vertex -> vertex -> unit - val get_outgoing : graph -> vertex -> vertex list + val get_outgoing : graph -> vertex -> vertex NodeMap.t val raw_add_edge : graph -> vertex -> vertex -> unit val raw_add_vertex : graph -> vertex -> unit