Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[performance] Avoid linear walk of graph children. #2959

Merged
merged 8 commits into from
Feb 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
2.3.0 (unreleased)
------------------

- Avoid linear walk to detect children, this should greatly improve
performance when a target has a large number of dependencies (#2959,
@ejgallego, @aalekseyev, @Armael)

2.2.0 (06/02/2020)
------------------

Expand Down
39 changes: 33 additions & 6 deletions src/dag/dag.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ open! Stdune
include Dag_intf

module Make (Value : Value) : S with type value := Value.t = struct
(* Raw_graph here should have the same complexity than the assumed interface
on the incremental_cycles proofs, in particular [get_outgoing] should run
in constant time. *)
module Raw_graph = struct
type mark = int

Expand All @@ -12,12 +15,16 @@ module Make (Value : Value) : S with type value := Value.t = struct

type graph = t

module Node_map = Map.Make (Int)

type node_info =
{ id : int
; (* only used for printing *)
mutable mark : mark
; mutable level : int
; mutable deps : node list
; (* see #2959, we need to implement is_child efficiently *)
mutable deps_set : unit Node_map.t
; mutable rev_deps : node list
; mutable parent : node option
}
Expand Down Expand Up @@ -60,7 +67,8 @@ module Make (Value : Value) : S with type value := Value.t = struct
let set_parent _ v p = v.info.parent <- Some p

let raw_add_edge _ v w =
v.info.deps <- w :: v.info.deps
v.info.deps <- w :: v.info.deps;
v.info.deps_set <- Node_map.add_exn v.info.deps_set w.info.id ()

let raw_add_vertex _ _ = ()
end
Expand All @@ -75,9 +83,19 @@ module Make (Value : Value) : S with type value := Value.t = struct
let create_node_info g =
let id = g.fresh_id in
g.fresh_id <- g.fresh_id + 1;
{ id; mark = -1; level = 1; deps = []; rev_deps = []; parent = None }

let add g v w =
{ id
; mark = -1
; level = 1
; deps = []
; deps_set = Node_map.empty
; rev_deps = []
; parent = None
}

(* [add_assuming_missing dag v w] creates an arc going from [v] to [w]. @raise
Cycle if creating the arc would create a cycle. This assumes that the arc
does not already exist. *)
let add_assuming_missing g v w =
match IC.add_edge_or_detect_cycle g v w with
| IC.EdgeAdded -> ()
| IC.EdgeCreatesCycle compute_cycle ->
Expand All @@ -102,6 +120,15 @@ module Make (Value : Value) : S with type value := Value.t = struct

let pp_node pp_value fmt n = pp_depth 0 pp_value fmt n

let is_child v w =
v.info.deps |> List.exists ~f:(fun c -> c.info.id = w.info.id)
let is_child v w = Node_map.mem v.info.deps_set w.info.id

let add_idempotent g v w =
(* if the edge doesn't already exist, we add it to the graph; note that the
complexity guarantees for `Dag.add` don't hold if the edge is already in
the graph, hence the check , see #2959 for more details and the README of
the vendored library *)
if is_child v w then
()
else
add_assuming_missing g v w
end
3 changes: 3 additions & 0 deletions src/dag/dag.mli
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ open! Stdune
ACM Trans. Algorithms 12, 2, Article 14 (December 2015), 22 pages. DOI:
https://doi.org/10.1145/2756553 *)

(** Note that this file uses [vendor/incremental-cycles] and has to meet some
invariants, for more information see incremental-cycles' README *)

module type Value = Dag_intf.Value

module type S = Dag_intf.S
Expand Down
6 changes: 3 additions & 3 deletions src/dag/dag_intf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ module type S = sig
(** [create_node_info dag v] creates new node info that belongs to [dag]. *)
val create_node_info : t -> node_info

(** [add dag v w] creates an arc going from [v] to [w]. @raise Cycle if
creating the arc would create a cycle. *)
val add : t -> node -> node -> unit
(** [add_idempotent dag v w] creates an arc going from [v] to [w] unless it
already exists. @raise Cycle if creating the arc would create a cycle. *)
val add_idempotent : t -> node -> node -> unit

(** [children v] returns all nodes [w] for which an arc going from [v] to [w]
exists. *)
Expand Down
7 changes: 2 additions & 5 deletions src/memo/memo.ml
Original file line number Diff line number Diff line change
Expand Up @@ -447,10 +447,7 @@ let add_rev_dep (type i o f) ~called_from_peek (dep_node : (i, o, f) Dep_node.t)
in
let dag_node = dep_node.dag_node in
let rev_dep = rev_dep.dag_node in
try
(* if the caller doesn't already contain this as a dependent *)
if Dag.is_child rev_dep dag_node |> not then
Dag.add global_dep_dag rev_dep dag_node
try Dag.add_idempotent global_dep_dag rev_dep dag_node
with Dag.Cycle cycle ->
raise
(Cycle_error.E
Expand All @@ -459,7 +456,7 @@ let add_rev_dep (type i o f) ~called_from_peek (dep_node : (i, o, f) Dep_node.t)
}) )

(* CR-soon amokhov: The order of dependencies in the resulting list seems to be
wrong: [Dag.children] returns children in the reverse order compared to the
wrong: [Dag.children] returns children in the reverse order instead of the
order in which they were added. See the comment for [deps : Last_dep.t list]. *)
let get_deps_from_graph_exn (dep_node : _ Dep_node.t) =
Dag.children dep_node.dag_node
Expand Down
14 changes: 7 additions & 7 deletions test/expect-tests/dag/dag_tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ let node21 = Dag.node dag { name = "child 2 1" }
let node31 = Dag.node dag { name = "child 3 1" }

let () =
Dag.add dag node node11;
Dag.add dag node node12;
Dag.add dag node12 node21;
Dag.add dag node21 node31
Dag.add_idempotent dag node node11;
Dag.add_idempotent dag node node12;
Dag.add_idempotent dag node12 node21;
Dag.add_idempotent dag node21 node31

let pp_mynode fmt n = Format.fprintf fmt "%s" n.name

Expand All @@ -42,11 +42,11 @@ let dag_pp_mynode = Dag.pp_node pp_mynode
let%expect_test _ =
Format.printf "%a@." dag_pp_mynode node;
let node41 = Dag.node dag { name = "child 4 1" } in
Dag.add dag node31 node41;
Dag.add_idempotent dag node31 node41;
Format.printf "%a@." dag_pp_mynode node;
let name node = node.data.name in
try
Dag.add dag node41 node;
Dag.add_idempotent dag node41 node;
print_endline "no cycle"
with Dag.Cycle cycle ->
let cycle = List.map cycle ~f:name in
Expand Down Expand Up @@ -80,7 +80,7 @@ let cycle_test variant =
let edges = ref [] in
let add d n1 n2 =
edges := (n1.data, n2.data) :: !edges;
add d n1 n2
add_idempotent d n1 n2
in
let d = Dag.create () in
let _n1 = node d 1 in
Expand Down
49 changes: 49 additions & 0 deletions vendor/incremental-cycles/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# README for incremental_cycles library

This library is vendored from
https://gitlab.inria.fr/agueneau/incremental-cycles

## Details on the vendoring process

The vendoring process is a bit involved due to the way the library is
specified upstream. In particular, it assumes a graph interface
`Raw_graph` that we have to copy by hand in Dune [see
`src/dag/dag.ml`], and in particular we have to be careful about not
altering the complexity guarantees.


## Complexity guarantees

The complexity and correctness of the implementation of
`incremental_cycles` has been mechanically-verified using the Coq
theorem prover. Note however, that for the main theorem to hold there
are a few requirements that cannot be captured by ML-level interfaces;
more concretely:

- the current specification for the algorithm requires the
`get_outgoing` function provided by the client to return a list of
all successors, and do so in constant time. This is quite demanding,
as basically requires the client to already have the list at hand.

- the main theorem for `Dag.add` does require that the vertex is not
already in the graph; otherwise the theorem doesn't apply. Thus,
clients must ensure that no duplicate edge is added to the graph.

## Dune-specific modifications

Dune uses incremental_cycles in a way that the no-duplicate-egdes
requirement is not satisfied by construction; thus, before a call to
`Dag.add` edge membership on the graph must be checked.

This is a common operation and thus should be done efficiently, thus
Dune performs the following modifications to `dag.ml`:

- we add a set of children nodes in addition to the current list
- we modify `raw_add_edge` so it updates this set, and `is_child` so
it uses the efficient membership set

The rationale for adding a duplicate children field is to actually
preserve the order the edges were added, this could be important in
other parts of the algo, see comment on `is_child` use at `memo.ml`.

For more details see discussion at https://github.com/ocaml/dune/pull/2959