Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start caching errors in Memo #4522

Merged
merged 6 commits into from
Apr 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,14 @@ Unreleased

- Fix a crash when clearing temporary directories (#4489, #4529, Andrey Mokhov)

- Dune now memoizes all errors when running in the file-watching mode. This
speeds up incremental rebuilds but may be inconvenient in rare cases, e.g. if
a build action fails due to a spurious error, such as running out of memory.
Right now, the only way to force such actions to be rebuilt is to restart
Dune, which clears all memoized errors. In future, we would like to provide a
way to rerun all actions failed due to errors without restarting the build,
e.g. via a Dune RPC call. (#4522, Andrey Mokhov)

2.9.0 (unreleased)
------------------

Expand Down
85 changes: 54 additions & 31 deletions src/memo/memo.ml
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,9 @@ module Cache_lookup = struct
end

module Result = struct
type 'a t = ('a, 'a Failure.t) result
type 'a t =
| Ok of 'a
| Failure of 'a Failure.t
end
end

Expand Down Expand Up @@ -473,7 +475,8 @@ module Sample_attempt = struct
}

let restore = function
| Finished cached_value -> Fiber.return (Ok cached_value)
| Finished cached_value ->
Fiber.return (Cache_lookup.Result.Ok cached_value)
| Running { result; _ } -> Once.force result.restore_from_cache

let compute = function
Expand Down Expand Up @@ -718,15 +721,19 @@ module Cached_value = struct
t.deps <- capture_dep_values ~deps_rev;
t

let value_changed (type o) (node : (_, o) Dep_node.t) prev_output curr_output
=
match (prev_output, curr_output) with
| (Value.Error _ | Cancelled _), _ -> true
| _, (Value.Error _ | Cancelled _) -> true
| Ok prev_output, Ok curr_output -> (
let value_changed (node : _ Dep_node.t) prev_value cur_value =
match ((prev_value : _ Value.t), (cur_value : _ Value.t)) with
| Cancelled _, _
| _, Cancelled _
| Error _, Ok _
| Ok _, Error _ ->
true
| Ok prev_value, Ok cur_value -> (
match node.without_state.spec.allow_cutoff with
| Yes equal -> not (equal prev_output curr_output)
| Yes equal -> not (equal prev_value cur_value)
| No -> true)
| Error prev_error, Error cur_error ->
not (Exn_set.equal prev_error cur_error)
end

(* Add a dependency on the [dep_node] from the caller, if there is one. Returns
Expand Down Expand Up @@ -832,7 +839,7 @@ let dep_node (t : (_, _) t) input =

- [Unchanged]: all the dependencies of the current node are up to date and we
can therefore skip recomputing the node and can reuse the value computed in
the previuos run.
the previous run.

- [Changed]: one of the dependencies has changed since the previous run and
the current node should therefore be recomputed.
Expand Down Expand Up @@ -866,20 +873,22 @@ end = struct
-> 'o Cached_value.t Cache_lookup.Result.t Fiber.t =
fun last_cached_value ->
match last_cached_value with
| None -> Fiber.return (Error Cache_lookup.Failure.Not_found)
| None -> Fiber.return (Cache_lookup.Result.Failure Not_found)
| Some cached_value -> (
match cached_value.value with
| Cancelled _dependency_cycle ->
(* Dependencies of cancelled computations are not accurate, so we can't
use [deps_changed] in this case. *)
Fiber.return (Error Cache_lookup.Failure.Not_found)
| Error _ ->
(* We always recompute errors, so there is no point in checking if any
of their dependencies changed. In principle, we could introduce
"persistent errors" that are recomputed only when their dependencies
have changed. *)
Fiber.return (Error Cache_lookup.Failure.Not_found)
| Ok _ -> (
Fiber.return (Cache_lookup.Result.Failure Not_found)
| Ok _
| Error _ -> (
(* We cache errors just like normal values. We assume that all [Memo]
computations are deterministic, which means if we rerun a computation
that previously led to raising a set of errors on the same inputs, we
expect to get the same set of errors back and we might as well skip
the unnecessary work. The downside is that if a computation is
non-deterministic, there is no way to force rerunning it, apart from
changing some of its dependencies. *)
let+ deps_changed =
let rec go deps =
match deps with
Expand All @@ -891,13 +900,21 @@ end = struct
is up to date. If not, we must recompute [last_cached_value]. *)
let* restore_result = consider_and_restore_from_cache dep in
match restore_result with
| Ok cached_value -> (
match Value_id.equal cached_value.id v_id with
| Ok cached_value_of_dep -> (
(* Here we know that [dep] can be restored from the cache, so
how can [v_id] be different from [cached_value_of_dep.id]?
Good question! This can happen if [cached_value]'s node was
skipped in the previous run (because it was unreachable),
while [dep] wasn't skipped and its value changed. In the
current run, [cached_value] is therefore stale. We learn
this when we see that the [cached_value_of_dep] is not as
recorded when computing [cached_value]. *)
match Value_id.equal cached_value_of_dep.id v_id with
| true -> go deps
| false -> Fiber.return Changed_or_not.Changed)
| Error (Cancelled { dependency_cycle }) ->
| Failure (Cancelled { dependency_cycle }) ->
Fiber.return (Changed_or_not.Cancelled { dependency_cycle })
| Error (Not_found | Out_of_date _) ->
| Failure (Not_found | Out_of_date _) ->
Fiber.return Changed_or_not.Changed)
| Yes _equal -> (
(* If [dep] has a cutoff predicate, it is not sufficient to
Expand All @@ -923,10 +940,10 @@ end = struct
match deps_changed with
| Unchanged ->
cached_value.last_validated_at <- Run.current ();
Ok cached_value
| Changed -> Error (Cache_lookup.Failure.Out_of_date cached_value)
Cache_lookup.Result.Ok cached_value
| Changed -> Failure (Out_of_date cached_value)
| Cancelled { dependency_cycle } ->
Error (Cancelled { dependency_cycle })))
Failure (Cancelled { dependency_cycle })))

and compute :
'i 'o. ('i, 'o) Dep_node.t
Expand All @@ -947,12 +964,17 @@ end = struct
let value =
match res with
| Ok res -> Value.Ok res
| Error exns -> Error (Exn_set.of_list exns)
| Error exns ->
(* CR-someday amokhov: Here we remove error duplicates that can appear
due to diamond dependencies. We could add [Fiber.collect_error_set]
that returns a set of errors instead of a list, to get rid of the
duplicates as early as possible. *)
Error (Exn_set.of_list exns)
in
(value, Deps_so_far.get_compute_deps_rev deps_so_far)
in
match cache_lookup_failure with
| Cache_lookup.Failure.Cancelled { dependency_cycle } ->
| Cancelled { dependency_cycle } ->
Fiber.return (Cached_value.create_cancelled ~dependency_cycle)
| Not_found ->
let+ value, deps_rev = compute_value_and_deps_rev () in
Expand Down Expand Up @@ -985,13 +1007,13 @@ end = struct
in
(match restore_result with
| Ok _ -> dep_node.state <- Not_considering
| Error _ -> ());
| Failure _ -> ());
restore_result))
in
let compute =
Once.and_then restore_from_cache ~f_must_not_raise:(function
| Ok cached_value -> Fiber.return cached_value
| Error cache_lookup_failure ->
| Failure cache_lookup_failure ->
Call_stack.push_frame frame (fun () ->
dep_node.last_cached_value <- None;
let+ cached_value =
Expand Down Expand Up @@ -1045,7 +1067,8 @@ end = struct
consider dep_node >>= function
| Ok sample_attempt -> Sample_attempt.restore sample_attempt
| Error dependency_cycle ->
Fiber.return (Error (Cache_lookup.Failure.Cancelled { dependency_cycle }))
Fiber.return
(Cache_lookup.Result.Failure (Cancelled { dependency_cycle }))

let exec_dep_node dep_node =
Fiber.of_thunk (fun () ->
Expand Down
7 changes: 7 additions & 0 deletions src/memo/memo.mli
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
open! Stdune

(* CR-someday amokhov: The current implementation memoizes all errors, which may
be inconvenient in rare cases, e.g. if a build action fails due to a spurious
error, such as running out of memory. Right now, the only way to force such
actions to be rebuilt is to restart Dune, which clears all memoized errors.
In future, we would like to provide a way to rerun all actions failed due to
errors without restarting the build, e.g. via a Dune RPC call. *)

type 'a build

module type Build = sig
Expand Down
118 changes: 107 additions & 11 deletions test/expect-tests/memo/memoize_tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -972,34 +972,34 @@ let%expect_test "dynamic cycles with non-uniform cutoff structure" =
evaluate_and_print summit_no_cutoff 0;
[%expect
{|
Started evaluating the summit with input 0
Started evaluating incrementing_chain_4_yes_cutoff
Started evaluating incrementing_chain_3_no_cutoff
Started evaluating base
Evaluated base: 3
Started evaluating incrementing_chain_2_yes_cutoff
Started evaluating incrementing_chain_1_no_cutoff
Started evaluating cycle_creator_no_cutoff
Started evaluating base
Evaluated base: 3
Evaluated cycle_creator_no_cutoff: 3
Evaluated incrementing_chain_1_no_cutoff: 4
Evaluated incrementing_chain_2_yes_cutoff: 5
Started evaluating incrementing_chain_4_yes_cutoff
Started evaluating incrementing_chain_3_no_cutoff
Evaluated incrementing_chain_3_no_cutoff: 6
Evaluated incrementing_chain_4_yes_cutoff: 7
Started evaluating the summit with input 0
Evaluated the summit with input 0: 7
f 0 = Ok 7 |}];
evaluate_and_print summit_yes_cutoff 0;
[%expect
{|
Started evaluating the summit with input 0
Started evaluating incrementing_chain_4_no_cutoff
Started evaluating incrementing_chain_3_yes_cutoff
Started evaluating incrementing_chain_2_no_cutoff
Started evaluating incrementing_chain_1_yes_cutoff
Started evaluating cycle_creator_yes_cutoff
Evaluated cycle_creator_yes_cutoff: 3
Started evaluating incrementing_chain_1_yes_cutoff
Evaluated incrementing_chain_1_yes_cutoff: 4
Started evaluating incrementing_chain_3_yes_cutoff
Started evaluating incrementing_chain_2_no_cutoff
Evaluated incrementing_chain_2_no_cutoff: 5
Evaluated incrementing_chain_3_yes_cutoff: 6
Started evaluating the summit with input 0
Started evaluating incrementing_chain_4_no_cutoff
Evaluated incrementing_chain_4_no_cutoff: 7
Evaluated the summit with input 0: 7
f 0 = Ok 7 |}];
Expand Down Expand Up @@ -1403,7 +1403,6 @@ let%expect_test "error handling and duplicate exceptions" =
in
Fdecl.set f_impl (fun x ->
printf "Calling f %d\n" x;

match x with
| 0 -> Memo.exec forward_fail x
| 1 -> Memo.exec forward_fail2 x
Expand All @@ -1420,3 +1419,100 @@ let%expect_test "error handling and duplicate exceptions" =
Calling f 0
Error [ "(Failure 42)" ]
|}]

let%expect_test "errors are cached" =
Printexc.record_backtrace false;
let f =
Memo.create_hidden "area of a square"
~input:(module Int)
(fun x ->
printf "Started evaluating %d\n" x;
if x < 0 then failwith (sprintf "Negative input %d" x);
let res = x * x in
printf "Evaluated %d: %d\n" x res;
Memo.Build.return res)
in
evaluate_and_print f 5;
evaluate_and_print f (-5);
[%expect
{|
Started evaluating 5
Evaluated 5: 25
f 5 = Ok 25
Started evaluating -5
f -5 = Error [ { exn = "(Failure \"Negative input -5\")"; backtrace = "" } ]
|}];
evaluate_and_print f 5;
evaluate_and_print f (-5);
(* Note that we do not see any "Started evaluating" messages because both [Ok]
and [Error] results have been cached. *)
[%expect
{|
f 5 = Ok 25
f -5 = Error [ { exn = "(Failure \"Negative input -5\")"; backtrace = "" } ]
|}]

let%expect_test "errors work with early cutoff" =
let divide =
let exception Input_too_large of Memo.Run.t in
Memo.create "divide 100 by input"
~input:(module Int)
~visibility:Hidden
~output:(Allow_cutoff (module Int))
~doc:""
(fun x ->
let+ run = Memo.current_run () in
printf "[divide] Started evaluating %d\n" x;
if x > 100 then
(* This exception will be different in each run. *)
raise (Input_too_large run);
let res = 100 / x in
printf "[divide] Evaluated %d: %d\n" x res;
res)
in
let f =
Memo.create_hidden "Negate"
~input:(module Int)
(fun x ->
printf "[negate] Started evaluating %d\n" x;
let+ res = Memo.exec divide x >>| Stdlib.Int.neg in
printf "[negate] Evaluated %d: %d\n" x res;
res)
in
evaluate_and_print f 0;
evaluate_and_print f 20;
evaluate_and_print f 200;
[%expect
{|
[negate] Started evaluating 0
[divide] Started evaluating 0
f 0 = Error [ { exn = "Division_by_zero"; backtrace = "" } ]
[negate] Started evaluating 20
[divide] Started evaluating 20
[divide] Evaluated 20: 5
[negate] Evaluated 20: -5
f 20 = Ok -5
[negate] Started evaluating 200
[divide] Started evaluating 200
f 200 = Error [ { exn = "Input_too_large(_)"; backtrace = "" } ]
|}];
Memo.restart_current_run ();
evaluate_and_print f 0;
evaluate_and_print f 20;
evaluate_and_print f 200;
(* Here we reevaluate all calls to [divide] because they depend on the current
run. Due to the early cutoff, we skip recomputing the outer [negate] for
the inputs 0 (error) and 20 (success), because the results remain the same.
However, we do attempt to re-evaluate [negate] for the input 200 because
the result of [divide] does change: we get a fresh exception. *)
[%expect
{|
[divide] Started evaluating 0
f 0 = Error [ { exn = "Division_by_zero"; backtrace = "" } ]
[divide] Started evaluating 20
[divide] Evaluated 20: 5
f 20 = Ok -5
[divide] Started evaluating 200
[negate] Started evaluating 200
f 200 = Error [ { exn = "Input_too_large(_)"; backtrace = "" } ]
|}]