diff --git a/bin/build_cmd.ml b/bin/build_cmd.ml index f1cbfe6338aa..35036fce1e65 100644 --- a/bin/build_cmd.ml +++ b/bin/build_cmd.ml @@ -45,8 +45,7 @@ let run_build_command ~(common : Common.t) ~config ~targets = run_build_command_poll else run_build_command_once) - ~setup ~common ~config ~targets; - Build_system.cache_teardown () + ~setup ~common ~config ~targets let runtest = let doc = "Run tests." in diff --git a/bin/caching.ml b/bin/caching.ml index aed9c81bb0c7..fd314adbfa79 100644 --- a/bin/caching.ml +++ b/bin/caching.ml @@ -60,8 +60,8 @@ let trim ~trimmed_size ~size = let open Result.O in match let* cache = - (* CR-someday amokhov: The [Hadrlink] duplication mode is chosen - artitrarily here, instead of respecting the corresponding configuration + (* CR-someday amokhov: The [Hardlink] duplication mode is chosen + arbitrarily here, instead of respecting the corresponding configuration setting, because the mode doesn't matter for the trimmer. It would be better to refactor the code to avoid such arbitrary choices. *) Cache.Local.make ~duplication_mode:Cache.Duplication_mode.Hardlink @@ -123,7 +123,7 @@ let term = and+ root = Arg.( value - & opt path_conv (Cache.Local.default_root ()) + & opt path_conv (Dune_cache_storage.Layout.default_root_path ()) & info ~docv:"PATH" [ "root" ] ~doc:"Root of the dune cache") and+ trimmed_size = Arg.( @@ -142,7 +142,13 @@ let term = | Some Start -> let config = { Cache_daemon.exit_no_client - ; duplication_mode = config.cache_duplication + ; duplication_mode = + (match + (config.cache_duplication : Dune_cache_storage.Mode.t option) + with + | None -> None + | Some Hardlink -> Some Hardlink + | Some Copy -> Some Copy) } in `Ok (start ~config ~foreground ~port_path ~root ~display) diff --git a/bin/dune b/bin/dune index f56cdcfb0dc8..3cdc7752cd7d 100644 --- a/bin/dune +++ b/bin/dune @@ -8,6 +8,8 @@ unix cache_daemon cache + dune_cache + dune_cache_storage dune_rules dune_engine dune_util diff --git a/bin/import.ml b/bin/import.ml index 29186bb7b138..1fddb49d77b1 100644 --- a/bin/import.ml +++ b/bin/import.ml @@ -32,57 +32,12 @@ include Common.Let_syntax let in_group (t, info) = (Term.Group.Term t, info) -let make_cache (config : Dune_config.t) = - let make_cache () = - let command_handler (Cache.Dedup file) = - match Build_system.get_cache () with - | None -> Code_error.raise "deduplication message and no caching" [] - | Some caching -> - Scheduler.send_sync_task (fun () -> - let (module Caching : Cache.Caching) = caching.cache in - match Cached_digest.peek_file (Path.build file.path) with - | None -> () - | Some d when not (Digest.equal d file.digest) -> () - | _ -> Caching.Cache.deduplicate Caching.cache file) - in - match config.cache_transport with - | Dune_config.Caching.Transport.Direct -> - Log.info [ Pp.text "enable binary cache in direct access mode" ]; - let cache = - Result.ok_exn - (Result.map_error - ~f:(fun s -> User_error.E (User_error.make [ Pp.text s ])) - (Cache.Local.make ?duplication_mode:config.cache_duplication - ~command_handler ())) - in - Cache.make_caching (module Cache.Local) cache - | Daemon -> - Log.info [ Pp.text "enable binary cache in daemon mode" ]; - let cache = - Result.ok_exn - (Cache.Client.make ?duplication_mode:config.cache_duplication - ~command_handler ()) - in - Cache.make_caching (module Cache.Client) cache - in - Fiber.return - (match config.cache_mode with - | Dune_config.Caching.Mode.Enabled -> - Some - { Build_system.cache = make_cache () - ; check_probability = config.cache_check_probability - } - | Dune_config.Caching.Mode.Disabled -> - Log.info [ Pp.text "disable binary cache" ]; - None) - module Main = struct include Dune_rules.Main - let setup common config = + let setup common (config : Dune_config.t) = let open Fiber.O in - let* caching = make_cache config - and* conf = Memo.Build.run (Dune_rules.Dune_load.load ()) + let* conf = Memo.Build.run (Dune_rules.Dune_load.load ()) and* contexts = Memo.Build.run (Context.DB.all ()) in let stats = Common.stats common in List.iter contexts ~f:(fun (ctx : Context.t) -> @@ -91,8 +46,25 @@ module Main = struct [ Pp.box ~indent:1 (Pp.text "Dune context:" ++ Pp.cut ++ Dyn.pp (Context.to_dyn ctx)) ]); + (* CR-soon amokhov: Right now, types [Dune_config.Caching.Duplication.t] and + [Dune_cache_storage.Mode.t] are the same. They will be unified after + removing the cache daemon and adapting the configuration format. *) + let cache_config = + match config.cache_mode with + | Disabled -> Dune_cache.Config.Disabled + | Enabled -> + Enabled + { storage_mode = + (match config.cache_duplication with + | None + | Some Hardlink -> + Dune_cache_storage.Mode.Hardlink + | Some Copy -> Copy) + ; check_probability = config.cache_check_probability + } + in init_build_system ~stats ~sandboxing_preference:config.sandboxing_preference - ~caching ~conf ~contexts + ~cache_config ~conf ~contexts end module Scheduler = struct diff --git a/boot/libs.ml b/boot/libs.ml index 27f03950bc2f..4ef48a003a8b 100644 --- a/boot/libs.ml +++ b/boot/libs.ml @@ -15,6 +15,8 @@ let local_libraries = ; ("src/memo", Some "Memo", false, None) ; ("src/dune_util", Some "Dune_util", false, None) ; ("src/xdg", Some "Xdg", false, None) + ; ("src/dune_cache_storage", Some "Dune_cache_storage", false, None) + ; ("src/dune_cache", Some "Dune_cache", false, None) ; ("src/cache", Some "Cache", false, None) ; ("src/cache_daemon", Some "Cache_daemon", false, None) ; ("vendor/re/src", Some "Dune_re", false, None) diff --git a/otherlibs/stdune-unstable/digest.ml b/otherlibs/stdune-unstable/digest.ml index 51b92eb2a2e7..9bd3d0406cde 100644 --- a/otherlibs/stdune-unstable/digest.ml +++ b/otherlibs/stdune-unstable/digest.ml @@ -35,21 +35,24 @@ let to_string_raw s = s or to different memory locations. *) let generic a = string (Marshal.to_string a [ No_sharing ]) -let file_with_stats p (stats : Unix.stats) = +let file_with_executable_bit ~executable path = + (* We follow the digest scheme used by Jenga. *) + let string_and_bool ~digest_hex ~bool = + D.string + (digest_hex + ^ + if bool then + "\001" + else + "\000") + in + let content_digest = file path in + string_and_bool ~digest_hex:content_digest ~bool:executable + +let file_with_stats path (stats : Unix.stats) = match stats.st_kind with | S_DIR -> generic (stats.st_size, stats.st_perm, stats.st_mtime, stats.st_ctime) | _ -> - (* We follow the digest scheme used by Jenga. *) - let string_and_bool ~digest_hex ~bool = - D.string - (digest_hex - ^ - if bool then - "\001" - else - "\000") - in - let content_digest = file p in let executable = stats.st_perm land 0o100 <> 0 in - string_and_bool ~digest_hex:content_digest ~bool:executable + file_with_executable_bit ~executable path diff --git a/otherlibs/stdune-unstable/digest.mli b/otherlibs/stdune-unstable/digest.mli index 12c3afcdc7a4..706ac49f6c87 100644 --- a/otherlibs/stdune-unstable/digest.mli +++ b/otherlibs/stdune-unstable/digest.mli @@ -26,5 +26,9 @@ val to_string_raw : t -> string val generic : 'a -> t -(** Digest a file and its stats. Does something sensible for directories *) +(** Digest a file and its stats. Does something sensible for directories. *) val file_with_stats : Path.t -> Unix.stats -> t + +(** Digest a file taking its executable bit into account. Should not be called + on a directory. *) +val file_with_executable_bit : executable:bool -> Path.t -> t diff --git a/otherlibs/stdune-unstable/path.ml b/otherlibs/stdune-unstable/path.ml index fae316c3f924..abded925452d 100644 --- a/otherlibs/stdune-unstable/path.ml +++ b/otherlibs/stdune-unstable/path.ml @@ -715,6 +715,10 @@ module Build = struct let chmod t ~mode = Unix.chmod (to_string t) mode + let lstat t = Unix.lstat (to_string t) + + let unlink_no_err t = Fpath.unlink_no_err (to_string t) + module Kind = Kind end diff --git a/otherlibs/stdune-unstable/path.mli b/otherlibs/stdune-unstable/path.mli index de6dd7a922f7..37bf40361f60 100644 --- a/otherlibs/stdune-unstable/path.mli +++ b/otherlibs/stdune-unstable/path.mli @@ -186,6 +186,10 @@ module Build : sig (** Set permissions for a given path. You can use the [Permissions] module if you need to modify existing permissions in a non-trivial way. *) val chmod : t -> mode:int -> unit + + val lstat : t -> Unix.stats + + val unlink_no_err : t -> unit end type t = private @@ -379,10 +383,9 @@ val set_of_build_paths_list : Build.t list -> Set.t val string_of_file_kind : Unix.file_kind -> string -(** Rename a file. rename oldpath newpath renames the file called oldpath, - giving it newpath as its new name, moving it between directories if needed. - If newpath already exists, its contents will be replaced with those of - oldpath. *) +(** Rename a file. [rename oldpath newpath] renames the file called [oldpath] to + [newpath], moving it between directories if needed. If [newpath] already + exists, its contents will be replaced with those of [oldpath]. *) val rename : t -> t -> unit (** Set permissions for a given path. You can use the [Permissions] module if diff --git a/otherlibs/stdune-unstable/temp.mli b/otherlibs/stdune-unstable/temp.mli index febdd80b7c73..f98f60bb5bbc 100644 --- a/otherlibs/stdune-unstable/temp.mli +++ b/otherlibs/stdune-unstable/temp.mli @@ -20,7 +20,7 @@ val destroy : what -> Path.t -> unit itself. *) val clear_dir : Path.t -> unit -(** [temp_path ~dir ~prefix ~suffix] generate a temporary path in [dir]. The +(** [temp_path ~dir ~prefix ~suffix] generates a temporary path in [dir]. The base name of the temporary file is formed by concatenating [prefix], then a suitably chosen integer number, then [suffix]. *) val temp_path : dir:Path.t -> prefix:string -> suffix:string -> Path.t diff --git a/src/cache/dune b/src/cache/dune index fe630a830310..f6313e709038 100644 --- a/src/cache/dune +++ b/src/cache/dune @@ -1,4 +1,4 @@ (library (name cache) (synopsis "[Internal] Dune binary artifact cache protocol") - (libraries stdune threads.posix xdg dune_lang dune_util csexp)) + (libraries stdune threads.posix xdg dune_lang dune_util csexp dune_cache)) diff --git a/src/cache/local.ml b/src/cache/local.ml index 15e8866a1e7b..ec6ca510c9ac 100644 --- a/src/cache/local.ml +++ b/src/cache/local.ml @@ -33,7 +33,7 @@ let default_root () = let file_store_root cache = Path.L.relative cache.root [ "files"; "v4" ] -let metadata_store_root cache = Path.L.relative cache.root [ "meta"; "v4" ] +let metadata_store_root cache = Path.L.relative cache.root [ "meta"; "v5" ] (* A file storage scheme. *) module type FSScheme = sig @@ -62,7 +62,7 @@ module FirstTwoCharsSubdir : FSScheme = struct let first_two_chars = String.sub digest ~pos:0 ~len:2 in Path.L.relative root [ first_two_chars; digest ] - (* List all entries in a given cache root. Returns the empty list of the root + (* List all entries in a given cache root. Returns the empty list if the root doesn't exist. *) let list ~root = let open Result.O in @@ -87,7 +87,7 @@ let metadata_path cache key = let file_path cache key = FSSchemeImpl.path ~root:(file_store_root cache) key -(* Support for an older version of the cache. *) +(* Support for older versions of the cache. *) module V3 = struct let file_store_root cache = Path.L.relative cache.root [ "files"; "v3" ] @@ -96,6 +96,10 @@ module V3 = struct let file_path cache key = FSSchemeImpl.path ~root:(file_store_root cache) key end +module V4 = struct + let metadata_store_root cache = Path.L.relative cache.root [ "meta"; "v4" ] +end + module Metadata_file = struct type t = { metadata : Sexp.t list @@ -425,13 +429,14 @@ let trim_bad_metadata_files ~metadata_files ~trimmed_so_far ~file_path cache = | false -> trimmed_so_far) let garbage_collect_impl ~trimmed_so_far cache = - let metadata_files = FSSchemeImpl.list ~root:(V3.metadata_store_root cache) in - let trimmed_so_far = - trim_bad_metadata_files ~metadata_files ~trimmed_so_far - ~file_path:V3.file_path cache - in - let metadata_files = FSSchemeImpl.list ~root:(metadata_store_root cache) in - trim_bad_metadata_files ~metadata_files ~trimmed_so_far ~file_path cache + List.fold_left ~init:trimmed_so_far + ~f:(fun trimmed_so_far (root, file_path) -> + let metadata_files = FSSchemeImpl.list ~root in + trim_bad_metadata_files ~metadata_files ~trimmed_so_far ~file_path cache) + [ (V3.metadata_store_root cache, V3.file_path) + ; (V4.metadata_store_root cache, file_path) + ; (metadata_store_root cache, file_path) + ] let garbage_collect = garbage_collect_impl ~trimmed_so_far:Trimming_result.empty diff --git a/src/dune_cache/config.ml b/src/dune_cache/config.ml new file mode 100644 index 000000000000..aca59ad18078 --- /dev/null +++ b/src/dune_cache/config.ml @@ -0,0 +1,9 @@ +(* CR-someday amokhov: We should probably switch from float [check_probability] + to integer [check_frequency], as in Jenga, to avoid generating random floats. *) + +type t = + | Disabled + | Enabled of + { storage_mode : Dune_cache_storage.Mode.t + ; check_probability : float + } diff --git a/src/dune_cache/config.mli b/src/dune_cache/config.mli new file mode 100644 index 000000000000..a1b589e185b7 --- /dev/null +++ b/src/dune_cache/config.mli @@ -0,0 +1,6 @@ +type t = + | Disabled + | Enabled of + { storage_mode : Dune_cache_storage.Mode.t + ; check_probability : float + } diff --git a/src/dune_cache/dune b/src/dune_cache/dune new file mode 100644 index 000000000000..30d5f574111c --- /dev/null +++ b/src/dune_cache/dune @@ -0,0 +1,4 @@ +(library + (name dune_cache) + (synopsis "[Internal] Dune's local and cloud build cache") + (libraries csexp dune_cache_storage fiber stdune)) diff --git a/src/dune_cache/local.ml b/src/dune_cache/local.ml new file mode 100644 index 000000000000..cde0266b7440 --- /dev/null +++ b/src/dune_cache/local.ml @@ -0,0 +1,252 @@ +open Stdune +open Dune_cache_storage.Layout +open Fiber.O +module Store_result = Dune_cache_storage.Store_result +module Restore_result = Dune_cache_storage.Restore_result + +module Store_artifacts_result = struct + type t = + | Stored of (Path.Build.t * Digest.t) list + | Already_present of (Path.Build.t * Digest.t) list + | Error of exn + | Will_not_store_due_to_non_determinism of Sexp.t + + let of_store_result ~artifacts t = + match (t : Store_result.t) with + | Stored -> Stored artifacts + | Already_present -> Already_present artifacts + | Error exn -> Error exn + | Will_not_store_due_to_non_determinism details -> + Will_not_store_due_to_non_determinism details + + let bind t ~f = + match t with + | Stored data -> f data + | Already_present data -> f data + | (Error _ | Will_not_store_due_to_non_determinism _) as res -> res +end + +module Check_artifacts_result = struct + type t = + | Missing + | Already_present of (Path.Build.t * Digest.t) list + | Error of exn + | Non_determinism_detected of Sexp.t +end + +module Target = struct + type t = + { path : Path.Build.t + ; executable : bool + } + + let create path = + match Path.Build.lstat path with + | { Unix.st_kind = Unix.S_REG; st_perm; _ } -> + Path.Build.chmod path + ~mode:(Path.Permissions.remove ~mode:Path.Permissions.write st_perm); + let executable = st_perm land 0o100 <> 0 in + Some { path; executable } + | (exception _) + | _ -> + None +end + +(* This function is like [Unix.link] but handles the "Too many links" error by + creating a copy of the [src] in a temporary directory, then atomically + replacing the [src] with the copy, and finally creating the requested [dst] + by calling [Unix.link] again. + + We hit the "Too many links" error because we store a lot of empty files in + the cache, which all get deduplicated into the same cache entry. This + function essentially deletes the "overlinked" entry from the cache, creating + a fresh copy with the 0 link count. This leads to some duplication but it's + negligible: we might store the empty file several times across all workspaces + instead of just storing it once. *) +let link_even_if_there_are_too_many_links_already ~src ~dst = + try Path.link src dst with + | Unix.Unix_error (Unix.EMLINK, _, _) -> + Temp.with_temp_path ~dir:temp_path ~prefix:"dune" ~suffix:"copy" + ~f:(function + | Error e -> raise e + | Ok temp_path -> + Io.copy_file ~src ~dst:temp_path (); + Path.rename temp_path dst; + Path.link src dst) + +let store_output = Dune_cache_storage.Value.store + +let restore_output = Dune_cache_storage.Value.restore + +module Artifacts = struct + include Dune_cache_storage.Artifacts + + let store_metadata ~mode ~metadata ~rule_digest + (artifacts : (Path.Build.t * Digest.t) list) = + let entries = + List.map artifacts ~f:(fun (target, file_digest) -> + let entry : Metadata_entry.t = + { file_name = Path.Build.basename target; file_digest } + in + entry) + in + Metadata_file.store ~mode { metadata; entries } ~rule_digest + + (* Step I of [store_skipping_metadata]. + + If any of the targets couldn't be stored in the temporary directory, then + the result is [Error] with the corresponding exception. Otherwise, the + result is [Ok ()]. *) + let store_targets_to ~temp_dir ~targets ~mode : unit Or_exn.t = + Result.List.fold_left targets ~init:() ~f:(fun () { Target.path; _ } -> + let path_in_build_dir = Path.build path in + let path_in_temp_dir = + Path.relative temp_dir (Path.basename path_in_build_dir) + in + Result.try_with (fun () -> + Dune_cache_storage.Util.link_or_copy ~mode ~src:path_in_build_dir + ~dst:path_in_temp_dir)) + + (* Step II of [store_skipping_metadata]. + + Computing digests can be slow, so we do that in parallel. *) + let compute_digests_in ~temp_dir ~targets ~compute_digest : + (Path.Build.t * Digest.t) list Or_exn.t Fiber.t = + let open Fiber.O in + Fiber.parallel_map targets ~f:(fun { Target.path; executable } -> + let file = Path.relative temp_dir (Path.Build.basename path) in + compute_digest ~executable file + >>| Or_exn.map ~f:(fun digest -> (path, digest))) + >>| Result.List.all + + (* Step III of [store_skipping_metadata]. *) + let store_to_cache_from ~temp_dir ~mode + (artifacts : (Path.Build.t * Digest.t) list) = + List.fold_left artifacts ~init:Store_result.empty + ~f:(fun results (target, digest) -> + let file_name = Path.Build.basename target in + let path_in_temp_dir = Path.relative temp_dir file_name in + let path_in_cache = file_path ~file_digest:digest in + let store_using_hardlinks () = + match + Dune_cache_storage.Util.Optimistically.link ~src:path_in_temp_dir + ~dst:path_in_cache + with + | exception Unix.Unix_error (Unix.EEXIST, _, _) -> ( + (* We end up here if the cache already contains an entry for this + artifact. We deduplicate by keeping only one copy, in the cache. *) + let path_in_build_dir = Path.build target in + match + Path.unlink_no_err path_in_temp_dir; + (* At first, we deduplicate the temporary file. Doing this + intermediate step allows us to keep the original target in case + the below link step fails. This might happen if the trimmer has + just deleted [path_in_cache]. In this rare case, this function + fails with an [Error], and so we might end up with some + duplicates in the workspace. *) + link_even_if_there_are_too_many_links_already ~src:path_in_cache + ~dst:path_in_temp_dir; + (* Now we can simply rename the temporary file into the target, + knowing that the original target remains in place if the + renaming fails. + + One curious case to think about is if the file in the cache + happens to have the same inode as the file in the workspace. In + that case this deduplication should be a no-op, but the + [rename] operation has a quirk where [path_in_temp_dir] can + remain on disk. This is not a problem because we clean the + temporary directory later. *) + Path.rename path_in_temp_dir path_in_build_dir + with + | exception e -> Store_result.Error e + | () -> Already_present) + | exception e -> Error e + | () -> Stored + in + let store_using_test_and_rename () = + (* CR-someday amokhov: There is a race here. If [path_in_cache] is + created after [Path.exists] but before [Path.rename], it will be + silently overwritten. Find a good way to avoid this race. *) + match Path.exists path_in_cache with + | true -> Store_result.Already_present + | false -> ( + match + Dune_cache_storage.Util.Optimistically.rename + ~src:path_in_temp_dir ~dst:path_in_cache + with + | exception e -> Error e + | () -> Stored) + in + let result = + match (mode : Dune_cache_storage.Mode.t) with + | Hardlink -> store_using_hardlinks () + | Copy -> store_using_test_and_rename () + in + Store_result.combine results result) + + let store_skipping_metadata ~mode ~targets ~compute_digest : + Store_artifacts_result.t Fiber.t = + Dune_cache_storage.with_temp_dir ~suffix:"artifacts" (function + | Error exn -> Fiber.return (Store_artifacts_result.Error exn) + | Ok temp_dir -> ( + match store_targets_to ~temp_dir ~targets ~mode with + | Error exn -> Fiber.return (Store_artifacts_result.Error exn) + | Ok () -> ( + compute_digests_in ~temp_dir ~targets ~compute_digest >>| function + | Error exn -> Store_artifacts_result.Error exn + | Ok artifacts -> + let result = store_to_cache_from ~temp_dir ~mode artifacts in + Store_artifacts_result.of_store_result ~artifacts result))) + + let store ~mode ~rule_digest ~compute_digest targets : + Store_artifacts_result.t Fiber.t = + let+ result = store_skipping_metadata ~mode ~targets ~compute_digest in + Store_artifacts_result.bind result ~f:(fun artifacts -> + let result = store_metadata ~mode ~rule_digest ~metadata:[] artifacts in + Store_artifacts_result.of_store_result ~artifacts result) + + let restore = + let exception Metadata_entry_missing in + fun ~mode ~rule_digest ~target_dir -> + Restore_result.bind (list ~rule_digest) + ~f:(fun (entries : Metadata_entry.t list) -> + match + List.map entries + ~f:(fun { Metadata_entry.file_name; file_digest } -> + let path_in_build_dir = + Path.Build.relative target_dir file_name + in + Path.Build.unlink_no_err path_in_build_dir; + let path_in_cache = file_path ~file_digest in + let restore () = + let path_in_build_dir = Path.build path_in_build_dir in + match (mode : Dune_cache_storage.Mode.t) with + | Hardlink -> ( + try + link_even_if_there_are_too_many_links_already + ~src:path_in_cache ~dst:path_in_build_dir + with + | Unix.Unix_error (Unix.ENOENT, _, _) -> + raise Metadata_entry_missing) + | Copy -> ( + try + Io.copy_file ~src:path_in_cache ~dst:path_in_build_dir () + with + | Sys_error _ -> raise Metadata_entry_missing) + in + restore (); + (path_in_build_dir, file_digest)) + with + | artifacts -> Restored artifacts + | exception Metadata_entry_missing -> + (* We reach this point when one of the entries mentioned in the + metadata is missing. The trimmer will eventually delete such + "broken" metadata, so it is reasonable to consider that this + [rule_digest] is not found in the cache. *) + Not_found_in_cache + | exception e -> Error e) +end + +let store_artifacts = Artifacts.store + +let restore_artifacts = Artifacts.restore diff --git a/src/dune_cache/local.mli b/src/dune_cache/local.mli new file mode 100644 index 000000000000..83577d7b17d4 --- /dev/null +++ b/src/dune_cache/local.mli @@ -0,0 +1,75 @@ +(** This module implements a local cache of build results that are shared across + multiple Dune instances running on the same machine. See [doc/dec/cache.md] + for design and implementation notes. + + This is meant to be used by several Dune instances running concurrently, and + with concurrent evictions from the cache. + + The files in the cache are assumed to be immutable, which is mostly enforced + by removing write permissions to any file in the cache, but of course anyone + could add back write permissions and corrupt the cache. *) + +(* In case we do run into the problem of corrupted cache: we could actually + store the mtime in the metadata and complain if it's not what we expected. *) + +open Stdune +module Store_result := Dune_cache_storage.Store_result +module Restore_result := Dune_cache_storage.Restore_result + +module Store_artifacts_result : sig + (* Outcomes are ordered in the order of severity. *) + type t = + | Stored of (Path.Build.t * Digest.t) list + | Already_present of (Path.Build.t * Digest.t) list + | Error of exn + (** [Error _] can happen due to genuine problems (cannot parse internal + cache files) or harmless ones (race with a concurrent change to the + cache). *) + | Will_not_store_due_to_non_determinism of Sexp.t +end + +module Check_artifacts_result : sig + type t = + | Missing + | Already_present of (Path.Build.t * Digest.t) list + | Error of exn + (** [Error _] can happen due to genuine problems (cannot parse internal + cache files) or harmless ones (race with a concurrent change to the + cache). *) + | Non_determinism_detected of Sexp.t +end + +module Target : sig + type t + + (** Prepare the target for storing into shared cache. + + If the given file is not regular (e.g. a symbolic link), return [None] + because such targets are not supported by the shared cache. Otherwise, + remove the "write" permissions and record some additional information + about the file, such as whether it is executable or not. *) + val create : Path.Build.t -> t option +end + +val store_output : + mode:Dune_cache_storage.Mode.t + -> action_digest:Digest.t + -> string + -> Store_result.t + +val restore_output : action_digest:Digest.t -> string Restore_result.t + +(** The [compute_digest] function is passed explicitly because the caller might + want to memoize and/or throttle file digest computations. *) +val store_artifacts : + mode:Dune_cache_storage.Mode.t + -> rule_digest:Digest.t + -> compute_digest:(executable:bool -> Path.t -> Digest.t Or_exn.t Fiber.t) + -> Target.t list + -> Store_artifacts_result.t Fiber.t + +val restore_artifacts : + mode:Dune_cache_storage.Mode.t + -> rule_digest:Digest.t + -> target_dir:Path.Build.t + -> (Path.Build.t * Digest.t) list Restore_result.t diff --git a/src/dune_cache_storage/dune b/src/dune_cache_storage/dune new file mode 100644 index 000000000000..baf0314164ed --- /dev/null +++ b/src/dune_cache_storage/dune @@ -0,0 +1,4 @@ +(library + (name dune_cache_storage) + (synopsis "[Internal] Dune cache storage, used for local and cloud caches") + (libraries csexp fiber stdune xdg)) diff --git a/src/dune_cache_storage/dune_cache_storage.ml b/src/dune_cache_storage/dune_cache_storage.ml new file mode 100644 index 000000000000..3f15ef88acf6 --- /dev/null +++ b/src/dune_cache_storage/dune_cache_storage.ml @@ -0,0 +1,304 @@ +open Stdune +module Layout = Layout +module Mode = Mode +module Util = Util + +(* See [doc/dev/cache.md] for design and implementation notes. *) + +module Store_result = struct + type t = + | Stored + | Already_present + | Error of exn + | Will_not_store_due_to_non_determinism of Sexp.t + + let combine x y = + match (x, y) with + | Will_not_store_due_to_non_determinism details, _ -> + Will_not_store_due_to_non_determinism details + | _, Will_not_store_due_to_non_determinism details -> + Will_not_store_due_to_non_determinism details + | Error e, _ -> Error e + | _, Error e -> Error e + | Stored, _ -> Stored + | _, Stored -> Stored + | Already_present, Already_present -> Already_present + + let empty = Already_present + + let of_write_result (t : Util.Write_result.t) = + match t with + | Ok -> Stored + | Already_present -> Already_present + | Error exn -> Error exn +end + +module Restore_result = struct + type 'data t = + | Restored of 'data + | Not_found_in_cache + | Error of exn + + let bind t ~f = + match t with + | Restored data -> f data + | (Not_found_in_cache | Error _) as res -> res + + let map t ~f = + match t with + | Restored data -> Restored (f data) + | (Not_found_in_cache | Error _) as res -> res +end + +let restore_file_content path : string Restore_result.t = + match Io.read_file ~binary:false path with + | contents -> Restored contents + | exception Sys_error (_some_error_message : string) -> + (* CR-someday amokhov: [Io.read_file] doesn't raise "typed" exceptions like + [Unix_error], so we guess here that the exception means "file not found". + Can we make the API of [Io] more precise? *) + Not_found_in_cache + | exception e -> + (* This code path might be unreachable until the above is resolved. *) + Error e + +module Matches_existing_query = struct + type t = + | Match + | Mismatch of Sexp.t +end + +(* Store [metadata] corresponding to a given [rule_or_action_digest] to the + cache using the supplied [to_sexp] serialiser. If the cache already contains + an entry for the hash, we use [matches_existing_entry] to check that the + given [content] matches the previously stored one. If this is not the case, + we return [Will_not_store_due_to_non_determinism]. *) +let store_metadata ~mode ~rule_or_action_digest ~metadata ~to_sexp + ~matches_existing_entry : Store_result.t = + let content = Csexp.to_string (to_sexp metadata) in + let path_in_cache = Layout.metadata_path ~rule_or_action_digest in + match Util.write_atomically ~mode ~content path_in_cache with + | Ok -> Stored + | Error e -> Error e + | Already_present -> ( + match restore_file_content path_in_cache with + | Not_found_in_cache -> + (* This can actually happen, but we think it's an unlikely case. The + [Already_present] branch should already be rarely visited (only if + multiple build systems attempt to store the same entry), but also a + trimmer must be running in parallel to delete this file. *) + Error (Failure "Race in store_metadata") + | Error e -> Error e + | Restored existing_content -> ( + match + (matches_existing_entry metadata ~existing_content + : Matches_existing_query.t) + with + | Mismatch details -> Will_not_store_due_to_non_determinism details + | Match -> + (* At this point we could in principle overwrite the existing metadata + file with the new [content] because it seems fresher. We choose not + to do that because in practice we end up here only due to racing. The + racing processes are not totally ordered, so neither content is + really fresher than the other. *) + Already_present)) + +let restore_metadata_file file ~of_sexp : _ Restore_result.t = + Restore_result.bind (restore_file_content file) ~f:(fun content -> + match Csexp.parse_string content with + | Ok sexp -> ( + match of_sexp sexp with + | Ok content -> Restored content + | Error e -> Error e) + | Error (_offset, msg) -> Error (Failure msg)) + +(* Read a metadata file corresponding to a given [rule_or_action_digest] from + the cache and parse it using the supplied [of_sexp] parser. *) +let restore_metadata ~rule_or_action_digest ~of_sexp : _ Restore_result.t = + restore_metadata_file (Layout.metadata_path ~rule_or_action_digest) ~of_sexp + +module Value = struct + module Metadata_file = struct + type t = + { metadata : Sexp.t list + ; value_digest : Digest.t + } + + let to_sexp { metadata; value_digest } = + Sexp.List + [ List (Atom "metadata" :: metadata) + ; List [ Atom "value"; Sexp.Atom (Digest.to_string value_digest) ] + ] + + let of_sexp = function + | Sexp.List + [ List (Atom "metadata" :: metadata) + ; List [ Atom "value"; Sexp.Atom value_hash ] + ] -> ( + match Digest.from_hex value_hash with + | Some value_digest -> Ok { metadata; value_digest } + | None -> + Error (Failure "Cannot parse cache metadata: malformed value digest")) + | _ -> Error (Failure "Cannot parse cache metadata") + + let restore ~action_digest = + restore_metadata ~rule_or_action_digest:action_digest ~of_sexp + + let matches_existing_entry t ~existing_content : Matches_existing_query.t = + match Csexp.parse_string existing_content with + | Error _ -> Mismatch (Atom "Malformed value in cache") + | Ok sexp -> ( + match of_sexp sexp with + | Error _ -> Mismatch (Atom "Malformed value in cache") + | Ok existing -> ( + match Digest.equal t.value_digest existing.value_digest with + | true -> Match + | false -> + Mismatch + (Sexp.record + [ ("in_cache", Atom (Digest.to_string existing.value_digest)) + ; ("computed", Atom (Digest.to_string t.value_digest)) + ]))) + end + + let store ~mode ~action_digest value : Store_result.t = + let value_digest = Digest.string value in + let metadata : Metadata_file.t = { metadata = []; value_digest } in + match + store_metadata ~mode ~rule_or_action_digest:action_digest ~metadata + ~to_sexp:Metadata_file.to_sexp + ~matches_existing_entry:Metadata_file.matches_existing_entry + with + | Will_not_store_due_to_non_determinism details -> + Will_not_store_due_to_non_determinism details + | Error e -> Error e + | (Already_present | Stored) as metadata_result -> + (* We assume that there are no hash collisions and hence omit the check + for non-determinism when writing values. *) + let value_result = + Util.write_atomically ~mode ~content:value + (Layout.value_path ~value_digest) + |> Store_result.of_write_result + in + Store_result.combine metadata_result value_result + + let restore ~action_digest = + Restore_result.bind (Metadata_file.restore ~action_digest) + ~f:(fun ({ value_digest; _ } : Metadata_file.t) -> + restore_file_content (Layout.value_path ~value_digest)) +end + +module Artifacts = struct + module Metadata_entry = struct + type t = + { file_name : string + ; file_digest : Digest.t + } + + let equal x y = + Digest.equal x.file_digest y.file_digest + && String.equal x.file_name y.file_name + + let to_sexp { file_name; file_digest } = + Sexp.List [ Atom file_name; Atom (Digest.to_string file_digest) ] + + let of_sexp = function + | Sexp.List [ Atom file_name; Atom file_digest ] -> ( + match Digest.from_hex file_digest with + | Some file_digest -> Ok { file_name; file_digest } + | None -> + Error + (Failure + (sprintf "Cannot parse file digest %s in cache metadata entry" + file_digest))) + | _ -> Error (Failure "Cannot parse cache metadata entry") + end + + module Metadata_file = struct + type t = + { metadata : Sexp.t list + ; (* The entries are listed in the same order that they were provided when + storing artifacts in the cache. We keep the order to avoid confusion + even though sorting the entres is tempting. *) + entries : Metadata_entry.t list + } + + let to_sexp { metadata; entries } = + Sexp.List + [ List (Atom "metadata" :: metadata) + ; List (Atom "files" :: List.map entries ~f:Metadata_entry.to_sexp) + ] + + let of_sexp = function + | Sexp.List + [ List (Atom "metadata" :: metadata); List (Atom "files" :: entries) ] + -> ( + let entries = List.map entries ~f:Metadata_entry.of_sexp in + match Result.List.all entries with + | Ok entries -> Ok { metadata; entries } + | Error e -> Error e) + | _ -> Error (Failure "Cannot parse cache metadata") + + let matches_existing_entry t ~existing_content : Matches_existing_query.t = + match Csexp.parse_string existing_content with + | Error _ -> Mismatch (Atom "Malformed value in cache") + | Ok sexp -> ( + match of_sexp sexp with + | Error _ -> Mismatch (Atom "Malformed value in cache") + | Ok existing -> ( + match List.equal Metadata_entry.equal t.entries existing.entries with + | true -> Match + | false -> + Mismatch + (Sexp.record + [ ( "in_cache" + , Sexp.List + (List.map ~f:Metadata_entry.to_sexp existing.entries) ) + ; ( "computed" + , Sexp.List (List.map ~f:Metadata_entry.to_sexp t.entries) ) + ]))) + + let store t ~mode ~rule_digest = + store_metadata ~mode ~rule_or_action_digest:rule_digest ~metadata:t + ~to_sexp ~matches_existing_entry + + let restore ~rule_digest = + restore_metadata ~rule_or_action_digest:rule_digest ~of_sexp + end + + let list ~rule_digest = + Restore_result.map (Metadata_file.restore ~rule_digest) + ~f:(fun ({ entries; _ } : Metadata_file.t) -> entries) +end + +module Metadata = struct + type t = + | Artifacts of Artifacts.Metadata_file.t + | Value of Value.Metadata_file.t + + let of_sexp sexp : (t, exn) result = + match Artifacts.Metadata_file.of_sexp sexp with + | Ok res -> Ok (Artifacts res) + | Error _exn -> + (* CR-someday amokhov: Here we are discarding the [_exn] but it may be + better to combine the two exceptions when both parsers fail. *) + Result.map (Value.Metadata_file.of_sexp sexp) ~f:(fun res -> Value res) + + let restore ~metadata_path ~rule_or_action_digest = + restore_metadata_file (metadata_path ~rule_or_action_digest) ~of_sexp + + let restore = restore ~metadata_path:Layout.metadata_path +end + +module Temp = Temp.Monad (struct + type nonrec 'a t = 'a Fiber.t + + let protect ~f ~finally = + Fiber.finalize f ~finally:(fun () -> finally () |> Fiber.return) +end) + +let with_temp_path ?(prefix = "dune") ~suffix f = + Temp.with_temp_path ~dir:Layout.temp_path ~prefix ~suffix ~f + +let with_temp_dir ?(prefix = "dune") ~suffix f = + Temp.with_temp_dir ~parent_dir:Layout.temp_path ~prefix ~suffix ~f diff --git a/src/dune_cache_storage/dune_cache_storage.mli b/src/dune_cache_storage/dune_cache_storage.mli new file mode 100644 index 000000000000..c5b28bf7c437 --- /dev/null +++ b/src/dune_cache_storage/dune_cache_storage.mli @@ -0,0 +1,105 @@ +(** Basic functionality for manipulating the Dune cache storage, used by the + local and cloud caches. *) + +open Stdune +module Layout = Layout +module Mode = Mode +module Util = Util + +module Store_result : sig + (** Outcomes are ordered in the order of severity. *) + type t = + | Stored + | Already_present + | Error of exn + (** [Error _] can happen due to genuine problems (cannot parse internal + cache files) or harmless ones (race with a concurrent change to the + cache). *) + | Will_not_store_due_to_non_determinism of Sexp.t + + (** We consider [Will_not_store_due_to_non_determinism] as an error of higher + severity compared to [Error], so we make sure to propagate it all the way + up. *) + val combine : t -> t -> t + + (** This is a neutral result with respect to the above function [combine], so + it can be used as starting value when accumulating multiple results. *) + val empty : t +end + +module Restore_result : sig + (** Note: [Error _] can be returned due to genuine problems (e.g. if we cannot + parse an internal cache file) or harmless ones (e.g. if another process + deletes a cache file between the existence check and the load). *) + type 'data t = + | Restored of 'data + | Not_found_in_cache + | Error of exn + + val bind : 'a t -> f:('a -> 'b t) -> 'b t +end + +(** A [Value] entry corresponds to the standard output of an action. *) +module Value : sig + module Metadata_file : sig + type t = + { metadata : Sexp.t list + ; value_digest : Digest.t + } + + val restore : action_digest:Digest.t -> t Restore_result.t + end + + val store : mode:Mode.t -> action_digest:Digest.t -> string -> Store_result.t + + val restore : action_digest:Digest.t -> string Restore_result.t +end + +(** An [Artifacts] entry corresponds to the targets produced by an action. *) +module Artifacts : sig + module Metadata_entry : sig + type t = + { file_name : string + ; file_digest : Digest.t + } + end + + module Metadata_file : sig + type t = + { metadata : Sexp.t list + ; entries : Metadata_entry.t list + } + + val store : t -> mode:Mode.t -> rule_digest:Digest.t -> Store_result.t + + val restore : rule_digest:Digest.t -> t Restore_result.t + end + + val list : rule_digest:Digest.t -> Metadata_entry.t list Restore_result.t +end + +module Metadata : sig + type t = + | Artifacts of Artifacts.Metadata_file.t + | Value of Value.Metadata_file.t + + val restore : rule_or_action_digest:Digest.t -> t Restore_result.t +end + +(** [with_temp_path ?prefix ~suffix f] creates a file in [Layout.temp_path], + then passes it to the callback [f], and makes sure the file is deleted when + [f] completes or raises. The base name of the temporary file is formed by + concatenating the [prefix] (which is set to "dune" by default), then a + suitably chosen integer number, then [suffix]. *) +val with_temp_path : + ?prefix:string + -> suffix:string + -> (Path.t Or_exn.t -> 'a Fiber.t) + -> 'a Fiber.t + +(** Like [with_temp_path] but creates a directory in [Layout.temp_path]. *) +val with_temp_dir : + ?prefix:string + -> suffix:string + -> (Path.t Or_exn.t -> 'a Fiber.t) + -> 'a Fiber.t diff --git a/src/dune_cache_storage/layout.ml b/src/dune_cache_storage/layout.ml new file mode 100644 index 000000000000..6f198ea89e6a --- /dev/null +++ b/src/dune_cache_storage/layout.ml @@ -0,0 +1,43 @@ +open Stdune + +let default_root_path () = + Path.L.relative + (Path.of_filename_relative_to_initial_cwd Xdg.cache_dir) + [ "dune"; "db" ] + +let root_path = + let var = "DUNE_CACHE_ROOT" in + match Sys.getenv_opt var with + | None -> default_root_path () + | Some path -> + if Filename.is_relative path then + failwith (sprintf "%s should be an absolute path, but is %s" var path); + Path.of_filename_relative_to_initial_cwd path + +let ( / ) = Path.relative + +(* We version metadata and actual cache content separately. *) +let metadata_storage_path = root_path / "meta" / "v5" + +let file_storage_path = root_path / "files" / "v4" + +let value_storage_path = root_path / "values" / "v3" + +let cache_path ~dir ~hash = + let two_first_chars = sprintf "%c%c" hash.[0] hash.[1] in + dir / two_first_chars / hash + +let metadata_path ~rule_or_action_digest = + cache_path ~dir:metadata_storage_path + ~hash:(Digest.to_string rule_or_action_digest) + +let value_path ~value_digest = + cache_path ~dir:value_storage_path ~hash:(Digest.to_string value_digest) + +let file_path ~file_digest = + cache_path ~dir:file_storage_path ~hash:(Digest.to_string file_digest) + +let temp_path = root_path / "temp" + +let root_path_subdirectories = + [ metadata_storage_path; file_storage_path; value_storage_path; temp_path ] diff --git a/src/dune_cache_storage/layout.mli b/src/dune_cache_storage/layout.mli new file mode 100644 index 000000000000..6e272ff81a4f --- /dev/null +++ b/src/dune_cache_storage/layout.mli @@ -0,0 +1,57 @@ +(** The layout of the Dune cache storage, used by local and cloud build caches. *) + +(* CR-someday amokhov: Jenga used "value" entries to store the standard output + of anonymous actions, but Dune currently stores everything in "file" entries. + We decided to keep support for values for now but will re-evaluate this + decision in 6 months. *) + +open Stdune + +(** The default path to the root directory of the cache. *) +val default_root_path : unit -> Path.t + +(** The path to the root directory of the cache. *) +val root_path : Path.t + +(** This directory stores metadata files, one per each historically executed + build rule or output-producing action. (While this is a convenient mental + model, in reality we need to occasionally remove some outdated metadata + files to free disk space.) + + A metadata file coresponding to a build rule is named by the rule hash and + stores file names and content hashes of all artifacts produced by the rule. + + A metadata file coresponding to an output-producing action is named by the + action hash and stores the content hash of the resulting output. *) +val metadata_storage_path : Path.t + +(** Path to the metadata file corresponding to a build action or rule with the + given [rule_or_action_digest]. *) +val metadata_path : rule_or_action_digest:Digest.t -> Path.t + +(** This is a storage for artifacts, where files named by content hashes store + the matching contents. We will create hard links to these files from build + directories and rely on the hard link count, as well as on the last access + time as useful metrics during cache trimming. *) +val file_storage_path : Path.t + +(** Path to the artifact corresponding to a given [file_digest]. *) +val file_path : file_digest:Digest.t -> Path.t + +(** This is a storage for outputs and, more generally, other values that the + build system might choose to store in the cache in future. As in + [files_path], we store the values in the files named by their content + hashes. However, these files will always have the hard link count equal to 1 + because they do not appear anywhere in build directories. By storing them in + a separate directory, we simplify the job of the cache trimmer. *) +val value_storage_path : Path.t + +(** Path to the value corresponding to a given [value_digest]. *) +val value_path : value_digest:Digest.t -> Path.t + +(** This directory contains temporary files used for atomic file operations + needed when storing new artifacts in the cache. See [write_atomically]. *) +val temp_path : Path.t + +(** All cache directories. *) +val root_path_subdirectories : Path.t list diff --git a/src/dune_cache_storage/mode.ml b/src/dune_cache_storage/mode.ml new file mode 100644 index 000000000000..d6c25541ad62 --- /dev/null +++ b/src/dune_cache_storage/mode.ml @@ -0,0 +1,20 @@ +open Stdune + +type t = + | Hardlink + | Copy + +let all = [ ("hardlink", Hardlink); ("copy", Copy) ] + +let of_string s = + match List.assoc all s with + | Some mode -> Result.Ok mode + | None -> Result.Error (Format.sprintf "invalid cache storage mode: %s" s) + +let to_string = function + | Hardlink -> "hardlink" + | Copy -> "copy" + +let to_dyn = function + | Copy -> Dyn.Variant ("Copy", []) + | Hardlink -> Dyn.Variant ("Hardlink", []) diff --git a/src/dune_cache_storage/mode.mli b/src/dune_cache_storage/mode.mli new file mode 100644 index 000000000000..a1998b957afb --- /dev/null +++ b/src/dune_cache_storage/mode.mli @@ -0,0 +1,33 @@ +open Stdune + +(** This library can take advantage of hard links to implement some functions + more efficiently and reliably. If your file system supports hard links, we + recommend that you use the [Hardlink] mode. + + Here is a summary of differences between the two modes: + + - In the [Copy] mode, cache entries are stored and restored by copying, + which is both slower and takes more disk space. + + - In the [Hardlink] mode, adding a new entry to the cache is atomic, i.e. an + existing entry is never overwritten. In the [Copy] mode, we currently do + not guarantee atomicity: there is a small chance that an existing cache + entry is silently overwritten, which might interfere with concurrent + reading of that entry. + + - In the [Hardlink] mode, a cache entry can be corrupted by modifying the + hard link that points to it from the build directory. *) +type t = + | Hardlink + | Copy + +val all : (string * t) list + +val to_string : t -> string + +val of_string : string -> (t, string) result + +val to_dyn : t -> Dyn.t + +(* CR-someday amokhov: Add a function to choose the mode by detecting whether + hard links can be created. *) diff --git a/src/dune_cache_storage/util.ml b/src/dune_cache_storage/util.ml new file mode 100644 index 000000000000..d6ebff01b39b --- /dev/null +++ b/src/dune_cache_storage/util.ml @@ -0,0 +1,55 @@ +open Stdune + +module Optimistically = struct + let rename ~src ~dst = + try Path.rename src dst with + | Sys_error _ -> + Path.mkdir_p (Path.parent_exn dst); + Path.rename src dst + + let link ~src ~dst = + try Path.link src dst with + | Unix.Unix_error (Unix.ENOENT, _, _) -> + Path.mkdir_p (Path.parent_exn dst); + Path.link src dst +end + +let link_or_copy ~mode ~src ~dst = + match (mode : Mode.t) with + | Hardlink -> Path.link src dst + | Copy -> Io.copy_file ~src ~dst () + +module Write_result = struct + type t = + | Ok + | Already_present + | Error of exn +end + +let add_atomically ~mode ~src ~dst : Write_result.t = + match (mode : Mode.t) with + | Hardlink -> ( + match Optimistically.link ~src ~dst with + | () -> Ok + | exception Unix.Unix_error (Unix.EEXIST, _, _) -> Already_present + | exception e -> Error e) + | Copy -> ( + (* CR-someday amokhov: There is a race here. If the destination [dst] is + created after [Path.exists] but before [Path.rename], [dst] will be + silently overwritten. Find a good way to avoid this race. *) + match Path.exists dst with + | true -> Already_present + | false -> ( + match Optimistically.rename ~src ~dst with + | () -> Ok + | exception e -> Error e)) + +(* CR-someday amokhov: Switch to [renameat2] to go from two operations to one. *) +let write_atomically ~mode ~content dst : Write_result.t = + Temp.with_temp_path ~dir:Layout.temp_path ~prefix:"dune" ~suffix:"write" + ~f:(function + | Error e -> Write_result.Error e + | Ok temp_path -> ( + match Io.write_file ~binary:false temp_path content with + | exception e -> Error e + | () -> add_atomically ~mode ~src:temp_path ~dst)) diff --git a/src/dune_cache_storage/util.mli b/src/dune_cache_storage/util.mli new file mode 100644 index 000000000000..1f9d1faf7d21 --- /dev/null +++ b/src/dune_cache_storage/util.mli @@ -0,0 +1,49 @@ +open Stdune + +module Write_result : sig + type t = + | Ok + | Already_present + | Error of exn +end + +(** Write a given [content] to a temporary file in [Layout.temp_path], and then + atomically move it to a specified destination. + + If the destination already exists, return [Already_present]. + + When the [mode] is set to [Copy], there is a small chance that atomicity is + violated, in which case the destination is silently overwritten and the + function returns [Ok] instead of [Already_present]. *) +val write_atomically : mode:Mode.t -> content:string -> Path.t -> Write_result.t + +(** A primitive for atomically adding entries to the cache. The behaviour + differs depending on the [mode]: + + - [Hardlink]: If [dst] already exists, return [Already_present]. Otherwise, + create a hard link [dst] pointing to [src]. + + - [Copy]: If [dst] already exists, return [Already_present]. Otherwise, + rename [src] to [dst]. If [dst] is created after the file existence check + but before renaming, [dst] will be silently overwritten. *) +val add_atomically : mode:Mode.t -> src:Path.t -> dst:Path.t -> Write_result.t + +(** Create a hard link or copy depending on the [mode]. *) +val link_or_copy : mode:Mode.t -> src:Path.t -> dst:Path.t -> unit + +(** The functions in this module are bare wrappers that assume that the "target + directory" (whatever that means for a given function) already exists. If the + wrapped function fails, then the "target directory" is created, and the + wrapped function called again. + + The objective is to call [Path.mkdir_p] only when needed, as it entails an + additional system call. When this module was first introduced, [mkdir_p] was + much more expensive (one system call per path component), so the benefit is + much smaller now. *) +module Optimistically : sig + (** Wrapper around [Path.rename]. *) + val rename : src:Path.t -> dst:Path.t -> unit + + (** Wrapper around [Path.link]. *) + val link : src:Path.t -> dst:Path.t -> unit +end diff --git a/src/dune_config/dune b/src/dune_config/dune index 52172ffe98f4..1b3286f72595 100644 --- a/src/dune_config/dune +++ b/src/dune_config/dune @@ -4,7 +4,8 @@ stdune xdg dune_lang - cache + dune_cache + dune_cache_storage dune_engine dune_rpc_private stats diff --git a/src/dune_config/dune_config.ml b/src/dune_config/dune_config.ml index a43809178276..2c4ca8a32e3d 100644 --- a/src/dune_config/dune_config.ml +++ b/src/dune_config/dune_config.ml @@ -117,18 +117,18 @@ module Caching = struct end module Duplication = struct - type t = Cache.Duplication_mode.t option + type t = Dune_cache_storage.Mode.t option let all = ("auto", None) :: List.map ~f:(fun (name, mode) -> (name, Some mode)) - Cache.Duplication_mode.all + Dune_cache_storage.Mode.all let decode = enum all - let to_dyn = Cache.Duplication_mode.to_dyn + let to_dyn = Dune_cache_storage.Mode.to_dyn end end diff --git a/src/dune_config/dune_config.mli b/src/dune_config/dune_config.mli index 26cd22fb54c9..1baa32402a1b 100644 --- a/src/dune_config/dune_config.mli +++ b/src/dune_config/dune_config.mli @@ -40,7 +40,7 @@ module Caching : sig end module Duplication : sig - type t = Cache.Duplication_mode.t option + type t = Dune_cache_storage.Mode.t option val all : (string * t) list diff --git a/src/dune_engine/action_to_sh.mli b/src/dune_engine/action_to_sh.mli index e4d7d7600d6b..1ad4f219127c 100644 --- a/src/dune_engine/action_to_sh.mli +++ b/src/dune_engine/action_to_sh.mli @@ -1,4 +1,4 @@ (** Convert an action to a shell command suitable for [/bin/sh] *) open Stdune -val pp : Action.For_shell.t -> unit Pp.t +val pp : Action.For_shell.t -> _ Pp.t diff --git a/src/dune_engine/build_system.ml b/src/dune_engine/build_system.ml index cbe2b8a0f067..f3cb0f4478f0 100644 --- a/src/dune_engine/build_system.ml +++ b/src/dune_engine/build_system.ml @@ -359,11 +359,6 @@ module Context_or_install = struct | Context s -> Context_name.to_dyn s end -type caching = - { cache : (module Cache.Caching) - ; check_probability : float - } - module Error = struct type t = Exn_with_backtrace.t @@ -384,12 +379,10 @@ type t = -> extra_sub_directories_to_keep Memo.Build.t) option) Fdecl.t - ; mutable caching : caching option ; sandboxing_preference : Sandbox_mode.t list ; mutable rule_done : int ; mutable rule_total : int ; mutable errors : Exn_with_backtrace.t list - ; vcs : Vcs.t list Fdecl.t ; promote_source : ?chmod:(int -> int) -> src:Path.Build.t @@ -399,6 +392,7 @@ type t = ; locks : (Path.t, Fiber.Mutex.t) Table.t ; build_mutex : Fiber.Mutex.t ; stats : Stats.t option + ; cache_config : Dune_cache.Config.t } let t = ref None @@ -431,10 +425,6 @@ let set_rule_generators ~init ~gen_rules = Fdecl.set t.init_rules init_rules; Fdecl.set t.gen_rules gen_rules -let get_cache () = - let t = t () in - t.caching - let get_dir_triage t ~dir = match Dpath.analyse_dir dir with | Source dir -> @@ -524,7 +514,7 @@ let report_rule_conflict fn (rule' : Rule.t) (rule : Rule.t) = | _ -> []) (* This contains the targets of the actions that are being executed. On exit, we - need to delete them as they might contain garbage *) + need to delete them as they might contain garbage. *) let pending_targets = ref Path.Build.Set.empty let () = @@ -533,7 +523,7 @@ let () = pending_targets := Path.Build.Set.empty; Path.Build.Set.iter fns ~f:(fun p -> Path.unlink_no_err (Path.build p))) -let compute_targets_digests targets = +let compute_targets_and_digests targets = match List.map (Path.Build.Set.to_list targets) ~f:(fun target -> (target, Cached_digest.build_file target)) @@ -541,7 +531,7 @@ let compute_targets_digests targets = | l -> Some l | exception (Unix.Unix_error _ | Sys_error _) -> None -let compute_targets_digests_or_raise_error exec_params ~loc targets = +let compute_targets_and_digests_or_raise_error exec_params ~loc targets = let remove_write_permissions = (* Remove write permissions on targets. A first theoretical reason is that the build process should be a computational graph and targets should not @@ -1355,6 +1345,135 @@ end = struct in Stats.emit stats event) + let try_to_restore_from_shared_cache ~mode ~rule_digest ~target_dir = + let hex = Digest.to_string rule_digest in + match Dune_cache.Local.restore_artifacts ~mode ~rule_digest ~target_dir with + | Restored res -> + Log.info [ Pp.textf "cache restore success [%s]" hex ]; + Some res + | Not_found_in_cache -> + Log.info [ Pp.textf "cache restore failure [%s]: not found in cache" hex ]; + None + | Error exn -> + Log.info + [ Pp.textf "cache restore error [%s]: %s" hex (Printexc.to_string exn) ]; + None + + let execute_action_for_rule t ~rule_digest ~action ~deps ~loc ~context ~locks + ~execution_parameters ~sandbox_mode ~env ~dir ~targets = + let open Fiber.O in + pending_targets := Path.Build.Set.union targets !pending_targets; + let sandbox = + Option.map sandbox_mode ~f:(fun mode -> + let sandbox_suffix = rule_digest |> Digest.to_string in + (Path.Build.relative sandbox_dir sandbox_suffix, mode)) + in + let* sandboxed, action = + match sandbox with + | None -> Fiber.return (None, action) + | Some (sandbox_dir, sandbox_mode) -> + Path.rm_rf (Path.build sandbox_dir); + let sandboxed path : Path.Build.t = + Path.Build.append_local sandbox_dir (Path.Build.local path) + in + let* () = + Fiber.parallel_iter_set + (module Path.Set) + (Dep.Facts.dirs deps) + ~f:(fun path -> + Memo.Build.run + (match Path.as_in_build_dir path with + | None -> Fs.assert_exists ~loc path + | Some path -> Fs.mkdir_p (sandboxed path))) + in + let+ () = Memo.Build.run (Fs.mkdir_p (sandboxed dir)) in + let deps = + if + Execution_parameters.should_expand_aliases_when_sandboxing + execution_parameters + then + Dep.Facts.paths deps + else + Dep.Facts.paths_without_expanding_aliases deps + in + ( Some sandboxed + , Action.sandbox action ~sandboxed ~mode:sandbox_mode ~deps ) + and* () = + let chdirs = Action.chdirs action in + Fiber.parallel_iter_set + (module Path.Set) + chdirs + ~f:(fun p -> Memo.Build.run (Fs.mkdir_p_or_check_exists ~loc p)) + in + let build_deps deps = Memo.Build.run (build_deps deps) in + let+ exec_result = + with_locks t locks ~f:(fun () -> + let copy_files_from_sandbox sandboxed = + Path.Build.Set.iter targets ~f:(fun target -> + rename_optional_file ~src:(sandboxed target) ~dst:target) + in + let+ exec_result = + Action_exec.exec ~context ~env ~targets ~rule_loc:loc ~build_deps + ~execution_parameters action + in + Option.iter sandboxed ~f:copy_files_from_sandbox; + exec_result) + in + Option.iter sandbox ~f:(fun (p, _mode) -> Path.rm_rf (Path.build p)); + (* All went well, these targets are no longer pending *) + pending_targets := Path.Build.Set.diff !pending_targets targets; + exec_result + + let try_to_store_to_shared_cache ~mode ~rule_digest ~action ~targets = + let open Fiber.O in + let hex = Digest.to_string rule_digest in + let pp_error msg = + let action = Action.for_shell action |> Action_to_sh.pp in + Pp.concat + [ Pp.textf "cache store error [%s]: %s after executing" hex msg + ; Pp.space + ; Pp.char '(' + ; action + ; Pp.char ')' + ] + in + let update_cached_digests ~targets_and_digests = + List.iter targets_and_digests ~f:(fun (target, digest) -> + Cached_digest.set (Path.build target) digest) + in + match + Path.Build.Set.to_list_map targets ~f:Dune_cache.Local.Target.create + |> Option.List.all + with + | None -> Fiber.return None + | Some targets -> ( + let compute_digest ~executable path = + Result.try_with (fun () -> + Digest.file_with_executable_bit ~executable path) + |> Fiber.return + in + Dune_cache.Local.store_artifacts ~mode ~rule_digest ~compute_digest + targets + >>| function + | Stored targets_and_digests -> + (* CR-someday amokhov: Here and in the case below we can inform the + cloud daemon that a new cache entry can be uploaded to the cloud. *) + Log.info [ Pp.textf "cache store success [%s]" hex ]; + update_cached_digests ~targets_and_digests; + Some targets_and_digests + | Already_present targets_and_digests -> + Log.info [ Pp.textf "cache store skipped [%s]: already present" hex ]; + update_cached_digests ~targets_and_digests; + Some targets_and_digests + | Error exn -> + Log.info [ pp_error (Printexc.to_string exn) ]; + None + | Will_not_store_due_to_non_determinism sexp -> + (* CR-someday amokhov: We should systematically log all warnings. *) + Log.info [ pp_error (Sexp.to_string sexp) ]; + User_warning.emit [ pp_error (Sexp.to_string sexp) ]; + None) + type rule_kind = | Normal_rule | Anonymous_action @@ -1432,65 +1551,56 @@ end = struct force_rerun || Dep.Map.has_universe deps in let rule_digest = compute_rule_digest rule ~deps ~action ~sandbox_mode in - let () = - (* FIXME: Rule hinting provide no relevant speed increase for now. - Disable the overhead until we make a decision. *) - if false then - if Action.is_useful_to_distribute action = Maybe then - let f { cache = (module Caching); _ } = - match Caching.Cache.hint Caching.cache [ rule_digest ] with - | Result.Ok _ -> () - | Result.Error e -> - User_warning.emit [ Pp.textf "unable to hint the cache: %s" e ] - in - Option.iter ~f t.caching + let can_go_in_shared_cache = + not + (always_rerun || is_action_dynamic + || Action.is_useful_to_memoize action = Clearly_not) in - let do_not_memoize = - always_rerun || is_action_dynamic - || Action.is_useful_to_memoize action = Clearly_not + (* We don't need to digest target names here, as these are already part of + the rule digest. *) + let digest_of_targets_and_digests l = + Digest.generic (List.map l ~f:snd) in - (* We don't need to digest names here, as these are already part of the - rule digest. *) - let digest_of_targets_digests l = Digest.generic (List.map l ~f:snd) in - (* Here we determine if we need to rerun the action based on information - stored in Trace_db. If it does, [targets_digests] is [None], otherwise - it is [Some l] where [l] is the list of targets with their digests. *) - let* (targets_digests : (Path.Build.t * Digest.t) list option) = + (* Here we determine if we need to execute the action based on information + stored in [Trace_db]. If we need to, then [targets_and_digests] will be + [None], otherwise it will be [Some l] where [l] is the list of targets + and their digests. *) + let* (targets_and_digests : (Path.Build.t * Digest.t) list option) = if always_rerun then Fiber.return None else (* [prev_trace] will be [None] if rule is run for the first time. *) let prev_trace = Trace_db.get (Path.build head_target) in - let prev_trace_and_targets_digests = + let prev_trace_with_targets_and_digests = match prev_trace with | None -> None | Some prev_trace -> ( if prev_trace.rule_digest <> rule_digest then None else - (* [targets_digest] will be [None] if not all targets were - build. *) - match compute_targets_digests targets with + (* [targets_and_digests] will be [None] if not all targets were + built. *) + match compute_targets_and_digests targets with | None -> None - | Some targets_digests -> + | Some targets_and_digests -> if Digest.equal prev_trace.targets_digest - (digest_of_targets_digests targets_digests) + (digest_of_targets_and_digests targets_and_digests) then - Some (prev_trace, targets_digests) + Some (prev_trace, targets_and_digests) else None) in - match prev_trace_and_targets_digests with + match prev_trace_with_targets_and_digests with | None -> Fiber.return None - | Some (prev_trace, targets_digests) -> + | Some (prev_trace, targets_and_digests) -> (* CR-someday aalekseyev: If there's a change at one of the last stages, we still re-run all the previous stages, which is a bit of a waste. We could remember what stage needs re-running and only re-run that (and later stages). *) let rec loop stages = match stages with - | [] -> Fiber.return (Some targets_digests) + | [] -> Fiber.return (Some targets_and_digests) | (deps, old_digest) :: rest -> let deps = Action_exec.Dynamic_dep.Set.to_dep_set deps in let* deps = build_deps deps in @@ -1502,208 +1612,85 @@ end = struct in loop prev_trace.dynamic_deps_stages in - let sandbox = - Option.map sandbox_mode ~f:(fun mode -> - let sandbox_suffix = rule_digest |> Digest.to_string in - (Path.Build.relative sandbox_dir sandbox_suffix, mode)) - in - let* targets_digests = - match targets_digests with + let* targets_and_digests = + match targets_and_digests with | Some x -> Fiber.return x | None -> ( - let from_cache = - match (do_not_memoize, t.caching) with - | true, _ - | _, None -> + (* Step I. Remove stale targets both from the digest table and from + the build directory. *) + Path.Build.Set.iter targets ~f:(fun target -> + Cached_digest.remove (Path.build target); + Path.Build.unlink_no_err target); + + (* Step II. Try to restore artifacts from the shared cache if the + following conditions are met. + + 1. The rule can be cached, i.e. [can_go_in_shared_cache] is [true]. + + 2. The shared cache is [Enabled]. + + 3. The rule is not selected for a reproducibility check. *) + let targets_and_digests_from_cache = + match (can_go_in_shared_cache, t.cache_config) with + | false, _ + | _, Disabled -> None - | false, Some { cache = (module Caching) as cache; _ } -> ( - match Caching.Cache.search Caching.cache rule_digest with - | Ok (_, files) -> - Log.info - [ Pp.textf "cache hit for %s" (Digest.to_string rule_digest) ]; - Some (files, cache) - | Error msg -> - Log.info - [ Pp.textf "cache miss for %s: %s" - (Digest.to_string rule_digest) - msg - ]; - None) - and cache_checking = - match t.caching with - | Some { check_probability; _ } -> - Random.float 1. < check_probability - | _ -> false - in - let remove_targets () = - Path.Build.Set.iter targets ~f:(fun target -> - Cached_digest.remove (Path.build target); - Path.unlink_no_err (Path.build target)) - in - let pulled_from_cache = - match from_cache with - | Some (files, (module Caching)) when not cache_checking -> ( - let () = remove_targets () in - let retrieve (file : Cache.File.t) = - let retrieved = Caching.Cache.retrieve Caching.cache file in - Cached_digest.set retrieved file.digest; - (file.path, file.digest) - in - match List.map files ~f:retrieve with - | exception Unix.(Unix_error (ENOENT, _, f)) -> - Log.info - [ Pp.textf "missing data file for cached rule %s: %s" - (Digest.to_string rule_digest) - f - ]; - None - | exception Sys_error m -> - Log.info [ Pp.textf "error retrieving data file: %s" m ]; + | true, Enabled { storage_mode = mode; check_probability } -> ( + match Random.float 1. < check_probability with + | true -> + (* CR-someday amokhov: Here we re-execute the rule, as in Jenga. + To make [check_probability] more meaningful, we could first + make sure that the shared cache actually does contain an + entry for [rule_digest]. *) None - | targets_digests -> - Trace_db.set (Path.build head_target) - (* We do not cache dynamic actions so [dynamic_deps_stages] is - always an empty list here. *) - { rule_digest - ; targets_digest = digest_of_targets_digests targets_digests - ; dynamic_deps_stages = [] - }; - Some targets_digests) - | _ -> None + | false -> + (* CR-someday amokhov: If the cloud cache is enabled, then + before attempting to restore artifacts from the shared cache, + we should send a download request for [rule_digest] to the + cloud. *) + try_to_restore_from_shared_cache ~mode ~rule_digest + ~target_dir:rule.dir) in - match pulled_from_cache with - | Some x -> Fiber.return x + match targets_and_digests_from_cache with + | Some targets_and_digests -> Fiber.return targets_and_digests | None -> - let () = remove_targets () in - pending_targets := Path.Build.Set.union targets !pending_targets; - let* sandboxed, action = - match sandbox with - | None -> Fiber.return (None, action) - | Some (sandbox_dir, sandbox_mode) -> - Path.rm_rf (Path.build sandbox_dir); - let sandboxed path : Path.Build.t = - Path.Build.append_local sandbox_dir (Path.Build.local path) - in - let* () = - Fiber.parallel_iter_set - (module Path.Set) - (Dep.Facts.dirs deps) - ~f:(fun path -> - Memo.Build.run - (match Path.as_in_build_dir path with - | None -> Fs.assert_exists ~loc path - | Some path -> Fs.mkdir_p (sandboxed path))) - in - let+ () = Memo.Build.run (Fs.mkdir_p (sandboxed dir)) in - let deps = - if - Execution_parameters.should_expand_aliases_when_sandboxing - execution_parameters - then - Dep.Facts.paths deps - else - Dep.Facts.paths_without_expanding_aliases deps - in - ( Some sandboxed - , Action.sandbox action ~sandboxed ~mode:sandbox_mode ~deps ) - and* () = - let chdirs = Action.chdirs action in - Fiber.parallel_iter_set - (module Path.Set) - chdirs - ~f:(fun p -> Memo.Build.run (Fs.mkdir_p_or_check_exists ~loc p)) - in + (* Step III. Execute the build action. *) let* exec_result = - with_locks t locks ~f:(fun () -> - let copy_files_from_sandbox sandboxed = - Path.Build.Set.iter targets ~f:(fun target -> - rename_optional_file ~src:(sandboxed target) ~dst:target) - in - let+ exec_result = - Action_exec.exec ~context ~env ~targets ~rule_loc:loc - ~build_deps ~execution_parameters action - in - Option.iter sandboxed ~f:copy_files_from_sandbox; - exec_result) - in - Option.iter sandbox ~f:(fun (p, _mode) -> Path.rm_rf (Path.build p)); - (* All went well, these targets are no longer pending *) - pending_targets := Path.Build.Set.diff !pending_targets targets; - let targets_digests = - compute_targets_digests_or_raise_error execution_parameters ~loc - targets - in - let targets_digest = digest_of_targets_digests targets_digests in - let () = - (* Check cache. We don't check for missing file in the cache, - since the file list is part of the rule hash this really never - should happen. *) - match from_cache with - | Some (cached, _) when cache_checking -> - (* This being [false] is unexpected and means we have a hash - collision *) - let data_are_ok = - match - List.for_all2 targets_digests cached - ~f:(fun (target, _) (c : Cache.File.t) -> - Path.Build.equal target c.path) - with - | Ok b -> b - | Error `Length_mismatch -> false - in - if not data_are_ok then - let open Pp.O in - let pp x l ~f = - Pp.box ~indent:2 - (Pp.verbatim x - ++ Dyn.pp - (Dyn.Encoder.list Path.Build.to_dyn (List.map l ~f)) - ) - in - User_warning.emit - [ Pp.text "unexpected list of targets in the cache" - ; pp "expected: " targets_digests ~f:fst - ; pp "got: " cached ~f:(fun (c : Cache.File.t) -> - c.path) - ] - else - List.iter2 targets_digests cached - ~f:(fun (_, digest) (c : Cache.File.t) -> - if not (Digest.equal digest c.digest) then - User_warning.emit - [ Pp.textf "cache mismatch on %s: hash differ with %s" - (Path.Build.to_string_maybe_quoted c.path) - (Path.Build.to_string_maybe_quoted c.path) - ]) - | _ -> () + execute_action_for_rule t ~rule_digest ~action ~deps ~loc ~context + ~locks ~execution_parameters ~sandbox_mode ~env ~dir ~targets in - let () = - (* Promote *) - match t.caching with - | Some { cache = (module Caching : Cache.Caching); _ } - when not do_not_memoize -> - let report msg = - let targets = - Path.Build.Set.to_list_map rule.action.targets - ~f:Path.Build.to_string - |> String.concat ~sep:", " - in - Log.info - [ Pp.textf "promotion failed for %s: %s" targets msg ] + let* targets_and_digests = + (* Step IV. Store results to the shared cache and if that step + fails, post-process targets by removing write permissions and + computing their digets. *) + match t.cache_config with + | Enabled { storage_mode = mode; check_probability = _ } + when can_go_in_shared_cache -> ( + let+ targets_and_digests = + try_to_store_to_shared_cache ~mode ~rule_digest ~targets + ~action in - Caching.Cache.promote Caching.cache targets_digests rule_digest - [] ~repository:None ~duplication:None - |> Result.map_error ~f:report |> ignore - | _ -> () + match targets_and_digests with + | None -> + compute_targets_and_digests_or_raise_error + execution_parameters ~loc targets + | Some targets_and_digets -> targets_and_digets) + | _ -> + Fiber.return + (compute_targets_and_digests_or_raise_error + execution_parameters ~loc targets) in let dynamic_deps_stages = List.map exec_result.dynamic_deps_stages ~f:(fun (deps, fact_map) -> (deps, Dep.Facts.digest fact_map ~sandbox_mode ~env)) in + let targets_digest = + digest_of_targets_and_digests targets_and_digests + in Trace_db.set (Path.build head_target) { rule_digest; dynamic_deps_stages; targets_digest }; - Fiber.return targets_digests) + Fiber.return targets_and_digests) in let+ () = match (mode, !Clflags.promote) with @@ -1784,14 +1771,13 @@ end = struct )) in if rule_kind = Normal_rule then t.rule_done <- t.rule_done + 1; - targets_digests) + targets_and_digests) (* jeremidimino: we need to include the dependencies discovered while running the action here. Otherwise, package dependencies are broken in the presence of dynamic actions *) - >>= - fun targets_digests -> - Memo.Build.return - { deps; targets = Path.Build.Map.of_list_exn targets_digests } + >>| + fun targets_and_digests -> + { deps; targets = Path.Build.Map.of_list_exn targets_and_digests } module Action_desc = struct type t = @@ -2311,38 +2297,16 @@ let load_dir_and_produce_its_rules ~dir = let load_dir ~dir = load_dir_and_produce_its_rules ~dir -let init ~stats ~contexts ~promote_source ?caching ~sandboxing_preference () = +let init ~stats ~contexts ~promote_source ~sandboxing_preference ~cache_config + () = let contexts = Context_name.Map.of_list_map_exn contexts ~f:(fun c -> (c.Build_context.name, c)) in - let caching = - let open Option.O in - let* ({ cache = (module Caching : Cache.Caching); _ } as v) = caching in - let open Result.O in - let res = - let+ cache = Caching.Cache.set_build_dir Caching.cache Path.build_dir in - (module struct - module Cache = Caching.Cache - - let cache = cache - end : Cache.Caching) - in - match res with - | Result.Ok cache -> Some { v with cache } - | Result.Error e -> - User_warning.emit - [ Pp.text "Unable to set cache build directory" - ; Pp.textf "Reason: %s" e - ]; - None - in let t = { contexts ; gen_rules = Fdecl.create Dyn.Encoder.opaque ; init_rules = Fdecl.create Dyn.Encoder.opaque - ; vcs = Fdecl.create Dyn.Encoder.opaque - ; caching ; sandboxing_preference = sandboxing_preference @ Sandbox_mode.all ; rule_done = 0 ; rule_total = 0 @@ -2353,6 +2317,7 @@ let init ~stats ~contexts ~promote_source ?caching ~sandboxing_preference () = ; promote_source ; build_mutex = Fiber.Mutex.create () ; stats + ; cache_config } in let open Fiber.O in @@ -2365,16 +2330,6 @@ let init ~stats ~contexts ~promote_source ?caching ~sandboxing_preference () = set t; Fiber.return () -let cache_teardown () = - match get_cache () with - | Some { cache = (module Caching : Cache.Caching); _ } -> - (* Synchronously wait for the end of the connection with the cache daemon, - ensuring all dedup messages have been queued. *) - Caching.Cache.teardown Caching.cache; - (* Hande all remaining dedup messages. *) - Scheduler.wait_for_dune_cache () - | None -> () - let targets_of = targets_of let all_targets () = all_targets (t ()) diff --git a/src/dune_engine/build_system.mli b/src/dune_engine/build_system.mli index 3aaf34f5544a..eb4eeace5479 100644 --- a/src/dune_engine/build_system.mli +++ b/src/dune_engine/build_system.mli @@ -7,11 +7,6 @@ open! Import (** {2 Creation} *) -type caching = - { cache : (module Cache.Caching) - ; check_probability : float - } - module Error : sig (** Errors when building a target *) type t @@ -29,8 +24,8 @@ val init : -> dst:Path.Source.t -> Build_context.t option -> unit Fiber.t) - -> ?caching:caching -> sandboxing_preference:Sandbox_mode.t list + -> cache_config:Dune_cache.Config.t -> unit -> unit Fiber.t @@ -189,10 +184,6 @@ module For_command_line : sig val eval_build_request : 'a Action_builder.t -> ('a * Dep.Set.t) Memo.Build.t end -val get_cache : unit -> caching option - -val cache_teardown : unit -> unit - (** {2 Running a build} *) val run : (unit -> 'a Memo.Build.t) -> 'a Fiber.t diff --git a/src/dune_engine/dune b/src/dune_engine/dune index 79a8dc209249..5bb1726b333d 100644 --- a/src/dune_engine/dune +++ b/src/dune_engine/dune @@ -16,6 +16,8 @@ dune_lang cache_daemon cache + dune_cache + dune_cache_storage dune_glob ocaml_config chrome_trace diff --git a/src/dune_engine/execution_parameters.mli b/src/dune_engine/execution_parameters.mli index 5a461f09db2a..b22eaf9b157f 100644 --- a/src/dune_engine/execution_parameters.mli +++ b/src/dune_engine/execution_parameters.mli @@ -2,7 +2,7 @@ (** Such as: - - should target be set read-only? + - should targets be set read-only? - should aliases be expanded when sandboxing rules? diff --git a/src/dune_rules/dune b/src/dune_rules/dune index b622c244a110..dce2184dff6b 100644 --- a/src/dune_rules/dune +++ b/src/dune_rules/dune @@ -18,6 +18,7 @@ dune_util dune_meta_parser dune_section + dune_cache build_path_prefix_map dune_engine dune_config) diff --git a/src/dune_rules/main.ml b/src/dune_rules/main.ml index 18862396209d..8b8ee865c9a0 100644 --- a/src/dune_rules/main.ml +++ b/src/dune_rules/main.ml @@ -10,7 +10,8 @@ type build_system = ; scontexts : Super_context.t Context_name.Map.t } -let init_build_system ~stats ~sandboxing_preference ~caching ~conf ~contexts = +let init_build_system ~stats ~sandboxing_preference ~cache_config ~conf + ~contexts = let open Fiber.O in Build_system.reset (); let promote_source ?chmod ~src ~dst ctx = @@ -22,7 +23,7 @@ let init_build_system ~stats ~sandboxing_preference ~caching ~conf ~contexts = let* () = Build_system.init ~stats ~sandboxing_preference ~promote_source ~contexts:(List.map ~f:Context.build_context contexts) - ?caching () + ~cache_config () in List.iter contexts ~f:Context.init_configurator; let+ scontexts = Gen_rules.init () in diff --git a/src/dune_rules/main.mli b/src/dune_rules/main.mli index a0d83d191c19..e8b8adf33c11 100644 --- a/src/dune_rules/main.mli +++ b/src/dune_rules/main.mli @@ -12,7 +12,7 @@ type build_system = val init_build_system : stats:Stats.t option -> sandboxing_preference:Sandbox_mode.t list - -> caching:Build_system.caching option + -> cache_config:Dune_cache.Config.t -> conf:Dune_load.conf -> contexts:Context.t list -> build_system Fiber.t diff --git a/test/blackbox-tests/test-cases/dune-cache/promote-copy.t/run.t b/test/blackbox-tests/test-cases/dune-cache/promote-copy.t/run.t index 954c03d3b22d..b685d95400e9 100644 --- a/test/blackbox-tests/test-cases/dune-cache/promote-copy.t/run.t +++ b/test/blackbox-tests/test-cases/dune-cache/promote-copy.t/run.t @@ -1,7 +1,6 @@ -Test that with the cache in copy mode: -- the cache is still used, i.e. dune skip executions of rules that are -. present in the cache -- files are copied rather than hardlinked +Test the shared cache in copy mode: +- Dune skips executions of rules whose artifacts are present in the cache +- Dune copies artifacts when restoring them instead of creating hard links $ cat > config < (lang dune 2.1) @@ -16,11 +15,9 @@ Test that with the cache in copy mode: > (rule > (deps source) > (targets target) - > (action (bash "touch beacon ; cat source source > target"))) - > EOF - $ cat > source < \_o< COIN + > (action (bash "touch rule-was-run; cat source source > target"))) > EOF + $ echo hello > source Initial build @@ -29,11 +26,10 @@ Initial build 1 $ dune_cmd stat hardlinks _build/default/target 1 - $ ls _build/default/beacon - _build/default/beacon + $ ls _build/default/rule-was-run + _build/default/rule-was-run -Clean + rebuild (we expect dune to reuse things from the cache without -hardlinking them) +Clean + rebuild: Dune should restore artifacts from the cache by copying $ rm -rf _build/default $ env XDG_RUNTIME_DIR=$PWD/.xdg-runtime XDG_CACHE_HOME=$PWD/.xdg-cache dune build --config-file=config target @@ -41,19 +37,23 @@ hardlinking them) 1 $ dune_cmd stat hardlinks _build/default/target 1 - $ test -e _build/default/beacon + +The rule wasn't run: + + $ test -e _build/default/rule-was-run [1] + +The files have been restored correctly: + $ cat _build/default/source - \_o< COIN + hello $ cat _build/default/target - \_o< COIN - \_o< COIN - + hello + hello ------------------ -Check that thins are still rebuild during incremental compilation with -the cache in copy mode +Check that rules are rebuilt correctly when using the shared cache in copy mode $ cat > dune-project < (lang dune 2.1) diff --git a/test/blackbox-tests/test-cases/dune-cache/promote-direct-copy.t/run.t b/test/blackbox-tests/test-cases/dune-cache/promote-direct-copy.t/run.t index 15e2a66fc92c..5eef42244d20 100644 --- a/test/blackbox-tests/test-cases/dune-cache/promote-direct-copy.t/run.t +++ b/test/blackbox-tests/test-cases/dune-cache/promote-direct-copy.t/run.t @@ -16,9 +16,7 @@ rather than the daemon > (targets target) > (action (bash "touch beacon ; cat source source > target"))) > EOF - $ cat > source < \_o< COIN - > EOF + $ echo hello > source $ env XDG_RUNTIME_DIR=$PWD/.xdg-runtime XDG_CACHE_HOME=$PWD/.xdg-cache dune build --config-file=config target $ dune_cmd stat hardlinks _build/default/source 1 @@ -35,10 +33,10 @@ rather than the daemon $ test -e _build/default/beacon [1] $ cat _build/default/source - \_o< COIN + hello $ cat _build/default/target - \_o< COIN - \_o< COIN + hello + hello $ cat > dune-project < (lang dune 2.1) diff --git a/test/blackbox-tests/test-cases/dune-cache/trim.t/run.t b/test/blackbox-tests/test-cases/dune-cache/trim.t/run.t index 267cac068eaa..f22a9ad2ec2d 100644 --- a/test/blackbox-tests/test-cases/dune-cache/trim.t/run.t +++ b/test/blackbox-tests/test-cases/dune-cache/trim.t/run.t @@ -47,12 +47,12 @@ the current digests for both files match those computed by Jenga. ./5e/5e5bb3a0ec0e689e19a59c3ee3d7fca8:content ./62/6274851067c88e9990e912be27cce386:content -Move all current v4 entries to v3 to test trimming of old versions of cache. +Move all current entries to v3 to test trimming of old versions of cache. $ mkdir "$PWD/.xdg-cache/dune/db/files/v3" $ mkdir "$PWD/.xdg-cache/dune/db/meta/v3" $ mv "$PWD/.xdg-cache/dune/db/files/v4"/* "$PWD/.xdg-cache/dune/db/files/v3" - $ mv "$PWD/.xdg-cache/dune/db/meta/v4"/* "$PWD/.xdg-cache/dune/db/meta/v3" + $ mv "$PWD/.xdg-cache/dune/db/meta/v5"/* "$PWD/.xdg-cache/dune/db/meta/v3" Build some more targets. @@ -64,20 +64,22 @@ end up in a situation where the same hash means something different before and after the change, which is bad. To reduce the risk, we inject a version number into rule digests. -If you see the below test breaking, then you probably accidentally -changed the way the digest is computed and you should increase this -version number. This number is stored in the [rule_digest_version] -variable in [build_system.ml]. +If you see the test below breaking, this means you changed the metadata format +or the way that digests are computed and you should increment the corresponding +version number. This number is stored in the [rule_digest_version] variable in +[build_system.ml]. You may also need to change the versioning in [layout.ml] in +the [dune_cache_storage] library and make sure that the cache trimmer treats new +and old cache entries uniformly. - $ (cd "$PWD/.xdg-cache/dune/db/meta/v4"; grep -rws . -e 'metadata' | sort) - ./06/061fb516fd28c9a632c573f380b8a120:((8:metadata)(5:files(16:default/target_a32:5637dd9730e430c7477f52d46de3909c))) - ./50/50148ac6fcde0b35e357cbd120131dbc:((8:metadata)(5:files(16:default/target_b32:8a53bfae3829b48866079fa7f2d97781))) + $ (cd "$PWD/.xdg-cache/dune/db/meta/v5"; grep -rws . -e 'metadata' | sort) + ./06/061fb516fd28c9a632c573f380b8a120:((8:metadata)(5:files(8:target_a32:5637dd9730e430c7477f52d46de3909c))) + ./50/50148ac6fcde0b35e357cbd120131dbc:((8:metadata)(5:files(8:target_b32:8a53bfae3829b48866079fa7f2d97781))) - $ dune_cmd stat size "$PWD/.xdg-cache/dune/db/meta/v4/06/061fb516fd28c9a632c573f380b8a120" - 79 + $ dune_cmd stat size "$PWD/.xdg-cache/dune/db/meta/v5/06/061fb516fd28c9a632c573f380b8a120" + 70 -Trimming the cache at this point should not remove anything, as all -files are still hard-linked in the build directory. +Trimming the cache at this point should not remove anything because all file +entries are still hard-linked from the build directory. $ dune cache trim --trimmed-size 1B Freed 0 bytes @@ -86,11 +88,12 @@ files are still hard-linked in the build directory. $ dune_cmd stat hardlinks _build/default/target_b 2 -If we unlink one file in the build tree, it can be reclaimed when trimming. +If we unlink a file in the build tree, then the corresponding file entry will be +trimmed. $ rm -f _build/default/target_a _build/default/beacon_a _build/default/beacon_b $ dune cache trim --trimmed-size 1B - Freed 88 bytes + Freed 79 bytes $ dune build target_a target_b $ dune_cmd stat hardlinks _build/default/target_a 2 @@ -113,7 +116,7 @@ target_a: $ dune_cmd wait-for-fs-clock-to-advance $ rm -f _build/default/beacon_a _build/default/target_a $ dune cache trim --trimmed-size 1B - Freed 88 bytes + Freed 79 bytes $ dune build target_a target_b $ dune_cmd stat hardlinks _build/default/target_a 2 @@ -131,7 +134,7 @@ When a cache entry becomes unused, its ctime is modified and will determine the $ dune_cmd wait-for-fs-clock-to-advance $ rm -f _build/default/beacon_b _build/default/target_b $ dune cache trim --trimmed-size 1B - Freed 88 bytes + Freed 79 bytes $ dune build target_a target_b $ dune_cmd stat hardlinks _build/default/target_a 2 @@ -163,4 +166,4 @@ they are part of the same rule. $ dune build multi_a multi_b $ rm -f _build/default/multi_a _build/default/multi_b $ dune cache trim --trimmed-size 1B - Freed 141 bytes + Freed 123 bytes