Skip to content

Commit

Permalink
[performance] Avoid linear walk of graph children.
Browse files Browse the repository at this point in the history
When calling Dune in scenarios where targets have a large number of
deps, Dune will take a long time to start. A common case is when
depending on `(package coq)`, which brings into the DAG a few thousand
files.

`perf` data show this is due to the linear walk in `Dag.is_child`;
indeed, doing a naive replacement of the list for a more efficient
access structure solves the problem:

```
with this PR:

real	0m1,684s
user	0m1,552s
sys	0m0,128s

with master:

real	0m11,450s
user	0m10,587s
sys	0m0,264s
```

This PR is an attempt to fix this by using a more efficient
representation of deps [that allows checking of deps in log n time, so
the complexity of `is_child` goes from O(n²) to O(n log(n)).

Signed-off-by: Emilio Jesus Gallego Arias <[email protected]>
  • Loading branch information
ejgallego committed Feb 4, 2020
1 parent f43d3d2 commit 359ef70
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 24 deletions.
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@

- Do not install vendored packages (#3704, @diml)

- Avoid linear walk to detect children, this should greatly improve performance
when a target has a large number of dependencies (#2959, @ejgallego, review by: )

2.1.3 (16/01/2020)
------------------

Expand Down
25 changes: 13 additions & 12 deletions src/dag/dag.ml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ module Make (Value : Value) : S with type value := Value.t = struct

type graph = t

module NodeMap = Map.Make(Int)

type node_info =
{ id : int
; (* only used for printing *)
mutable mark : mark
; mutable level : int
; mutable deps : node list
; mutable rev_deps : node list
; mutable deps : node NodeMap.t
; mutable rev_deps : node NodeMap.t
; mutable parent : node option
}

Expand All @@ -30,8 +32,8 @@ module Make (Value : Value) : S with type value := Value.t = struct
type vertex = node

module VertexSet = struct
type t = vertex list
let interruptible_fold = List.interruptible_fold
type t = vertex NodeMap.t
let interruptible_fold = NodeMap.interruptible_fold
end

let new_mark g =
Expand All @@ -51,9 +53,9 @@ module Make (Value : Value) : S with type value := Value.t = struct

let get_incoming _ v = v.info.rev_deps

let clear_incoming _ v = v.info.rev_deps <- []
let clear_incoming _ v = v.info.rev_deps <- NodeMap.empty

let add_incoming _ v w = v.info.rev_deps <- w :: v.info.rev_deps
let add_incoming _ v w = v.info.rev_deps <- NodeMap.add_exn v.info.rev_deps w.info.id w

let get_outgoing _ v = v.info.deps

Expand All @@ -65,9 +67,9 @@ module Make (Value : Value) : S with type value := Value.t = struct
let set_parent _ v p = v.info.parent <- Some p

let raw_add_edge _ v w =
v.info.deps <- w :: v.info.deps;
v.info.deps <- NodeMap.add_exn v.info.deps w.info.id w;
if v.info.level = w.info.level then
w.info.rev_deps <- v :: w.info.rev_deps
w.info.rev_deps <- NodeMap.add_exn w.info.rev_deps v.info.id v

let raw_add_vertex _ _ = ()
end
Expand All @@ -82,7 +84,7 @@ module Make (Value : Value) : S with type value := Value.t = struct
let create_node_info g =
let id = g.fresh_id in
g.fresh_id <- g.fresh_id + 1;
{ id; mark = -1; level = 1; deps = []; rev_deps = []; parent = None }
{ id; mark = -1; level = 1; deps = NodeMap.empty; rev_deps = NodeMap.empty; parent = None }

let add g v w =
match IC.add_edge_or_detect_cycle g v w with
Expand All @@ -105,10 +107,9 @@ module Make (Value : Value) : S with type value := Value.t = struct
pp_value n.data
( pp_depth (depth + 1) pp_value
|> Fmt.list ~pp_sep:(fun fmt () -> Format.fprintf fmt ";@, ") )
n.info.deps
(NodeMap.values n.info.deps)

let pp_node pp_value fmt n = pp_depth 0 pp_value fmt n

let is_child v w =
v.info.deps |> List.exists ~f:(fun c -> c.info.id = w.info.id)
let is_child v w = NodeMap.mem v.info.deps w.info.id
end
4 changes: 3 additions & 1 deletion src/dag/dag_intf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ module type S = sig
(** Type of values attached to nodes. *)
type value

module NodeMap : Map.S with type key := int

type node =
{ data : value
; info : node_info
Expand All @@ -36,7 +38,7 @@ module type S = sig

(** [children v] returns all nodes [w] for which an arc going from [v] to [w]
exists. *)
val children : node -> node list
val children : node -> node NodeMap.t

(** Pretty print a node. *)
val pp_node : value Fmt.t -> node Fmt.t
Expand Down
1 change: 1 addition & 0 deletions src/memo/memo.ml
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ let add_rev_dep (type i o f) ~called_from_peek (dep_node : (i, o, f) Dep_node.t)
order in which they were added. See the comment for [deps : Last_dep.t list]. *)
let get_deps_from_graph_exn (dep_node : _ Dep_node.t) =
Dag.children dep_node.dag_node
|> Dag.NodeMap.values
|> List.map ~f:(fun { Dag.data = Dep_node.T node; _ } ->
match node.state with
| Init -> assert false
Expand Down
14 changes: 7 additions & 7 deletions test/expect-tests/dag/dag_tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,13 @@ let%expect_test _ =
List.map ~f:Pp.text cycle |> Pp.concat ~sep:Pp.space |> print;
[%expect
{|
(1: k=1) (root) [(3: k=1) (child 1 2) [(4: k=1) (child 2 1) [(5: k=2) (child 3 1) [
]]];
(2: k=1) (child 1 1) []]
(1: k=1) (root) [(3: k=1) (child 1 2) [(4: k=1) (child 2 1) [(5: k=2) (child 3 1) [
(6: k=2) (child 4 1) [
]]]];
(2: k=1) (child 1 1) []]
(1: k=1) (root) [(2: k=1) (child 1 1) [];
(3: k=1) (child 1 2) [(4: k=1) (child 2 1) [(5: k=2) (child 3 1) [
]]]]
(1: k=1) (root) [(2: k=1) (child 1 1) [];
(3: k=1) (child 1 2) [(4: k=1) (child 2 1) [(5: k=2) (child 3 1) [
(6: k=2) (child 4 1) [
]]]]]
child 4 1 child 3 1 child 2 1 child 1 2 root child 4
1
|}]
Expand Down
8 changes: 4 additions & 4 deletions test/expect-tests/memo/memoize_tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ let%expect_test _ =
|> option (list (pair string (fun x -> x)))
|> print_dyn;
[%expect {|
Some [ ("another", "aa"); ("some", "a") ]
Some [ ("some", "a"); ("another", "aa") ]
|}]

let%expect_test _ =
Expand Down Expand Up @@ -307,7 +307,7 @@ let%expect_test _ =
[%expect
{|
(Some [ ("lazy_memo", "foo") ],
Some [ ("id", "lazy: foo"); ("lazy_memo", "foo") ])
Some [ ("lazy_memo", "foo"); ("id", "lazy: foo") ])
|}]

module Memo_lazy = Test_lazy (Memo.Lazy)
Expand All @@ -322,8 +322,8 @@ let%expect_test _ =
Memo_lazy.deps () |> print_dyn;
[%expect
{|
(Some [ ("lazy-0", ()); ("lazy_memo", "foo") ],
Some [ ("lazy-0", ()); ("lazy_memo", "foo") ])
(Some [ ("lazy_memo", "foo"); ("lazy-0", ()) ],
Some [ ("lazy_memo", "foo"); ("lazy-0", ()) ])
|}]

(* Tests for depending on the current run *)
Expand Down

0 comments on commit 359ef70

Please sign in to comment.