From d9377e8a74fdb25925aae7b338497618a62fbec1 Mon Sep 17 00:00:00 2001 From: Andrey Mokhov Date: Fri, 9 Apr 2021 11:38:39 +0100 Subject: [PATCH] Remove the old cache library and cache daemon (#4465) This PR continues the work started in #4443. It does the following things: * Remove the [cache] and [cache_daemon] libraries. * Clean up and move and the cache trimming logic to [dune_cache]. * Add some tests to make sure the cache trimmer can cope with all versions of the cache. Signed-off-by: Andrey Mokhov --- bin/cache.ml | 71 +++ bin/{caching.mli => cache.mli} | 0 bin/caching.ml | 162 ------ bin/dune | 2 - bin/main.ml | 2 +- boot/libs.ml | 4 +- src/cache/cache.ml | 30 -- src/cache/cache.mli | 21 - src/cache/cache_intf.ml | 104 ---- src/cache/client.ml | 133 ----- src/cache/client.mli | 11 - src/cache/dune | 4 - src/cache/key.ml | 10 - src/cache/key.mli | 8 - src/cache/local.ml | 495 ------------------ src/cache/local.mli | 92 ---- src/cache/messages.ml | 287 ---------- src/cache/messages.mli | 41 -- src/cache/messages_intf.ml | 52 -- src/cache_daemon/cache_daemon.ml | 341 ------------ src/cache_daemon/cache_daemon.mli | 28 - src/cache_daemon/dune | 3 - src/cache_daemon/utils.ml | 23 - src/dune_cache/local.ml | 8 - src/dune_cache/local.mli | 25 +- src/dune_cache/trimmer.ml | 108 ++++ src/dune_cache/trimmer.mli | 27 + src/dune_cache_storage/dune_cache_storage.ml | 6 + src/dune_cache_storage/dune_cache_storage.mli | 24 + src/dune_cache_storage/layout.ml | 78 ++- src/dune_cache_storage/layout.mli | 30 +- src/dune_cache_storage/version.ml | 52 ++ src/dune_cache_storage/version.mli | 47 ++ src/dune_engine/cached_digest.ml | 22 +- src/dune_engine/dune | 2 - .../test-cases/dune-cache/trim.t/run.t | 55 +- test/expect-tests/dune | 1 - 37 files changed, 483 insertions(+), 1926 deletions(-) create mode 100644 bin/cache.ml rename bin/{caching.mli => cache.mli} (100%) delete mode 100644 bin/caching.ml delete mode 100644 src/cache/cache.ml delete mode 100644 src/cache/cache.mli delete mode 100644 src/cache/cache_intf.ml delete mode 100644 src/cache/client.ml delete mode 100644 src/cache/client.mli delete mode 100644 src/cache/dune delete mode 100644 src/cache/key.ml delete mode 100644 src/cache/key.mli delete mode 100644 src/cache/local.ml delete mode 100644 src/cache/local.mli delete mode 100644 src/cache/messages.ml delete mode 100644 src/cache/messages.mli delete mode 100644 src/cache/messages_intf.ml delete mode 100644 src/cache_daemon/cache_daemon.ml delete mode 100644 src/cache_daemon/cache_daemon.mli delete mode 100644 src/cache_daemon/dune delete mode 100644 src/cache_daemon/utils.ml create mode 100644 src/dune_cache/trimmer.ml create mode 100644 src/dune_cache/trimmer.mli create mode 100644 src/dune_cache_storage/version.ml create mode 100644 src/dune_cache_storage/version.mli diff --git a/bin/cache.ml b/bin/cache.ml new file mode 100644 index 00000000000..d81e4e1dc62 --- /dev/null +++ b/bin/cache.ml @@ -0,0 +1,71 @@ +open Stdune +open Import + +let name = "cache" + +(* CR-someday amokhov: Implement other commands supported by Jenga. *) + +let man = + [ `S "DESCRIPTION" + ; `P + {|Dune can share build artifacts between workspaces. Currently, the only + action supported by this command is `trim`, but we plan to provide more + functionality soon. |} + ; `S "ACTIONS" + ; `P {|$(b,trim) trim the shared cache to free space.|} + ; `Blocks Common.help_secs + ] + +let doc = "Manage the shared cache of build artifacts" + +let info = Term.info name ~doc ~man + +let trim ~trimmed_size ~size = + Log.init_disabled (); + let open Result.O in + match + let+ goal = + match (trimmed_size, size) with + | Some trimmed_size, None -> Result.Ok trimmed_size + | None, Some size -> + Result.Ok (Int64.sub (Dune_cache.Trimmer.overhead_size ()) size) + | _ -> Result.Error "specify either --size or --trimmed-size" + in + Dune_cache.Trimmer.trim ~goal + with + | Error s -> User_error.raise [ Pp.text s ] + | Ok { trimmed_bytes } -> + User_message.print + (User_message.make [ Pp.textf "Freed %Li bytes" trimmed_bytes ]) + +type mode = Trim + +let modes = [ ("trim", Trim) ] + +let term = + Term.ret + @@ let+ mode = + Arg.( + value + & pos 0 (some (enum modes)) None + & info [] ~docv:"ACTION" + ~doc: + (Printf.sprintf "The cache action to perform (%s)" + (Arg.doc_alts_enum modes))) + and+ trimmed_size = + Arg.( + value + & opt (some bytes) None + & info ~docv:"BYTES" [ "trimmed-size" ] + ~doc:"size to trim from the cache") + and+ size = + Arg.( + value + & opt (some bytes) None + & info ~docv:"BYTES" [ "size" ] ~doc:"size to trim the cache to") + in + match mode with + | Some Trim -> `Ok (trim ~trimmed_size ~size) + | None -> `Help (`Pager, Some name) + +let command = (term, info) diff --git a/bin/caching.mli b/bin/cache.mli similarity index 100% rename from bin/caching.mli rename to bin/cache.mli diff --git a/bin/caching.ml b/bin/caching.ml deleted file mode 100644 index 160f18f20fd..00000000000 --- a/bin/caching.ml +++ /dev/null @@ -1,162 +0,0 @@ -open Stdune -open Import - -let name = "cache" - -let man = - [ `S "DESCRIPTION" - ; `P - {|Dune is able to share build artifacts between workspaces. - $(b,dune cache-daemon) is a daemon that runs in the background - and manages this shared cache. For instance, it makes sure that it - does not grow too big and try to maximise sharing between the various - workspaces that are using the shared cache.|} - ; `P - {|The daemon is automatically started by Dune when the shared cache is - enabled. You do not need to run this command manually.|} - ; `S "ACTIONS" - ; `P {|$(b,start) starts the daemon if not already running.|} - ; `P {|$(b,stop) stops the daemon.|} - ; `P {|$(b,trim) removes oldest files from the cache to free space.|} - ; `Blocks Common.help_secs - ] - -let doc = "Manage the shared artifacts cache" - -let info = Term.info name ~doc ~man - -let start ~config ~foreground ~port_path ~root ~display = - let show_endpoint ep = - if display <> Some Scheduler.Config.Display.Quiet then - Printf.printf "%s\n%!" ep - in - let f started = - let started daemon_info = - if foreground then show_endpoint daemon_info; - started ~daemon_info - in - Log.verbose := foreground; - Cache_daemon.daemon ~root ~config started - in - match Daemonize.daemonize ~workdir:root ~foreground port_path f with - | Result.Ok Finished -> () - | Result.Ok (Daemonize.Started { daemon_info = endpoint; _ }) -> - show_endpoint endpoint - | Result.Ok (Daemonize.Already_running { daemon_info = endpoint; _ }) - when not foreground -> - show_endpoint endpoint - | Result.Ok (Daemonize.Already_running { daemon_info = endpoint; pid }) -> - User_error.raise - [ Pp.textf "already running on %s (PID %i)" endpoint (Pid.to_int pid) ] - | Result.Error reason -> User_error.raise [ Pp.text reason ] - -let stop ~port_path = - match Daemonize.stop port_path with - | Error s -> User_error.raise [ Pp.text s ] - | Ok () -> () - -let trim ~trimmed_size ~size = - Log.init_disabled (); - let open Result.O in - match - let* cache = - (* CR-someday amokhov: The [Hardlink] duplication mode is chosen - arbitrarily here, instead of respecting the corresponding configuration - setting, because the mode doesn't matter for the trimmer. It would be - better to refactor the code to avoid such arbitrary choices. *) - Cache.Local.make ~duplication_mode:Cache.Duplication_mode.Hardlink - ~command_handler:ignore () - in - let+ goal = - match (trimmed_size, size) with - | Some trimmed_size, None -> Result.Ok trimmed_size - | None, Some size -> - Result.Ok (Int64.sub (Cache.Local.overhead_size cache) size) - | _ -> Result.Error "specify either --size or --trimmed-size" - in - Cache.Local.trim cache ~goal - with - | Error s -> User_error.raise [ Pp.text s ] - | Ok { trimmed_bytes } -> - User_message.print - (User_message.make [ Pp.textf "Freed %Li bytes" trimmed_bytes ]) - -type mode = - | Start - | Stop - | Trim - -let modes = [ ("start", Start); ("stop", Stop); ("trim", Trim) ] - -let path_conv = - ( (fun s -> `Ok (Path.of_string s)) - , fun fmt p -> Format.pp_print_string fmt (Path.to_string_maybe_quoted p) ) - -let term = - Term.ret - @@ let+ config = Common.config_from_config_file - and+ mode = - Arg.( - value - & pos 0 (some (enum modes)) None - & info [] ~docv:"ACTION" - ~doc: - (Printf.sprintf "The cache-daemon action to perform (%s)" - (Arg.doc_alts_enum modes))) - and+ foreground = - Arg.( - value & flag - & info [ "foreground"; "f" ] - ~doc:"Whether to start in the foreground or as a daemon") - and+ exit_no_client = - let doc = "Whether to exit once all clients have disconnected" in - Arg.( - value & flag - & info [ "exit-no-client" ] ~doc - ~env:(Arg.env_var "DUNE_CACHE_EXIT_NO_CLIENT" ~doc)) - and+ port_path = - Arg.( - value - & opt path_conv (Cache_daemon.default_port_file ()) - & info ~docv:"PATH" [ "port-file" ] - ~doc:"The file to read/write the daemon port from/to.") - and+ root = - Arg.( - value - & opt path_conv (Dune_cache_storage.Layout.default_root_path ()) - & info ~docv:"PATH" [ "root" ] ~doc:"Root of the dune cache") - and+ trimmed_size = - Arg.( - value - & opt (some bytes) None - & info ~docv:"BYTES" [ "trimmed-size" ] - ~doc:"size to trim from the cache") - and+ size = - Arg.( - value - & opt (some bytes) None - & info ~docv:"BYTES" [ "size" ] ~doc:"size to trim the cache to") - and+ display = Common.display_term in - let config = Dune_config.(superpose default) config in - match mode with - | Some Start -> - (* CR-soon amokhov: Right now, types [Dune_config.Caching.Duplication.t] - and [Dune_cache_storage.Mode.t] are the same. They will be unified - after removing the cache daemon and adapting the configuration format. *) - let config = - { Cache_daemon.exit_no_client - ; duplication_mode = - (match - (config.cache_duplication : Dune_cache_storage.Mode.t option) - with - | None -> None - | Some Hardlink -> Some Hardlink - | Some Copy -> Some Copy) - } - in - `Ok (start ~config ~foreground ~port_path ~root ~display) - | Some Stop -> `Ok (stop ~port_path) - | Some Trim -> `Ok (trim ~trimmed_size ~size) - | None -> `Help (`Pager, Some name) - -let command = (term, info) diff --git a/bin/dune b/bin/dune index 3cdc7752cd7..57afc54d955 100644 --- a/bin/dune +++ b/bin/dune @@ -6,8 +6,6 @@ fiber stdune unix - cache_daemon - cache dune_cache dune_cache_storage dune_rules diff --git a/bin/main.ml b/bin/main.ml index 2ae11b50644..95e1a1ced68 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -22,7 +22,7 @@ let all : _ Term.Group.t list = ; Format_dune_file.command ; Compute.command ; Upgrade.command - ; Caching.command + ; Cache.command ; Describe.command ; Top.command ; Ocaml_merlin.command diff --git a/boot/libs.ml b/boot/libs.ml index 4ef48a003a8..7dd70332aa2 100644 --- a/boot/libs.ml +++ b/boot/libs.ml @@ -13,12 +13,9 @@ let local_libraries = ; ("src/dag", Some "Dag", false, None) ; ("src/fiber", Some "Fiber", false, None) ; ("src/memo", Some "Memo", false, None) - ; ("src/dune_util", Some "Dune_util", false, None) ; ("src/xdg", Some "Xdg", false, None) ; ("src/dune_cache_storage", Some "Dune_cache_storage", false, None) ; ("src/dune_cache", Some "Dune_cache", false, None) - ; ("src/cache", Some "Cache", false, None) - ; ("src/cache_daemon", Some "Cache_daemon", false, None) ; ("vendor/re/src", Some "Dune_re", false, None) ; ("vendor/opam-file-format/src", None, false, None) ; ("otherlibs/dune-glob/src", Some "Dune_glob", false, None) @@ -27,6 +24,7 @@ let local_libraries = ; ("src/chrome_trace", Some "Chrome_trace", false, None) ; ("vendor/spawn/src", Some "Spawn", false, None) ; ("src/stats", Some "Stats", false, None) + ; ("src/dune_util", Some "Dune_util", false, None) ; ("src/meta_parser", Some "Dune_meta_parser", false, None) ; ("src/section", Some "Dune_section", false, None) ; ("vendor/build_path_prefix_map/src", Some "Build_path_prefix_map", false, diff --git a/src/cache/cache.ml b/src/cache/cache.ml deleted file mode 100644 index 5bc2721150e..00000000000 --- a/src/cache/cache.ml +++ /dev/null @@ -1,30 +0,0 @@ -module Log = Dune_util.Log -open Stdune -module Key = Key -include Cache_intf - -let promotion_to_string = function - | Already_promoted { path; digest } -> - Printf.sprintf "%s already promoted with digest %s" - (Path.Local.to_string (Path.Build.local path)) - (Digest.to_string digest) - | Promoted { path; digest } -> - Printf.sprintf "%s promoted with digest %s" - (Path.Local.to_string (Path.Build.local path)) - (Digest.to_string digest) - -let make_caching (type t) (module Caching : Cache with type t = t) (cache : t) : - (module Caching) = - (module struct - module Cache = Caching - - let cache = cache - end) - -let cachable = function - | Unix.S_REG -> true - | _ -> false - -module Client = Client -module Local = Local -module Messages = Messages diff --git a/src/cache/cache.mli b/src/cache/cache.mli deleted file mode 100644 index b1450c91a0a..00000000000 --- a/src/cache/cache.mli +++ /dev/null @@ -1,21 +0,0 @@ -open Stdune - -include module type of Cache_intf - -module Key : sig - type t = Digest.t - - val of_string : string -> (t, string) Result.t - - val to_string : t -> string -end - -val promotion_to_string : promotion -> string - -val make_caching : (module Cache with type t = 'a) -> 'a -> (module Caching) - -val cachable : Unix.file_kind -> bool - -module Client = Client -module Local = Local -module Messages = Messages diff --git a/src/cache/cache_intf.ml b/src/cache/cache_intf.ml deleted file mode 100644 index 0a3fe563061..00000000000 --- a/src/cache/cache_intf.ml +++ /dev/null @@ -1,104 +0,0 @@ -open Stdune - -type metadata = Sexp.t list - -(** A file stored in Dune cache is fully determined by the build [path] and its - content [digest]. There may be multiple [File]s with the same [digest] due - to sharing between multiple workspaces. In fact, the more such pairs there - are, the more effective the cache is. *) -module File = struct - type t = - { path : Path.Build.t - ; digest : Digest.t - } -end - -type promotion = - | Already_promoted of File.t - | Promoted of File.t - -type repository = - { directory : string - ; remote : string - ; commit : string - } - -type command = Dedup of File.t - -module Duplication_mode = struct - type t = - | Copy - | Hardlink - - let all = [ ("copy", Copy); ("hardlink", Hardlink) ] - - let of_string repr = - match List.assoc all repr with - | Some mode -> Result.Ok mode - | None -> Result.Error (Format.sprintf "invalid duplication mode: %s" repr) - - let to_string = function - | Copy -> "copy" - | Hardlink -> "hardlink" - - let to_dyn = function - | Copy -> Dyn.Variant ("Copy", []) - | Hardlink -> Dyn.Variant ("Hardlink", []) -end - -module type Cache = sig - type t - - (** Set the absolute path to the build directory for interpreting relative - paths when promoting files. *) - val set_build_dir : t -> Path.t -> (t, string) Result.t - - (** Set all the version controlled repositories in the workspace to be - referred to when promoting files. *) - val with_repositories : t -> repository list -> (t, string) Result.t - - (** Promote files produced by a build rule into the cache. *) - val promote : - t - -> (Path.Build.t * Digest.t) list - -> Key.t - -> metadata - -> repository:int option - -> duplication:Duplication_mode.t option - -> (unit, string) Result.t - - (** Find a build rule in the cache by its key. *) - val search : t -> Key.t -> (metadata * File.t list, string) Result.t - - (** Materialise a cached file in the build directory (using [Copy] or - [Hardlink] as per the duplication mode) and return the path to it. *) - val retrieve : t -> File.t -> Path.t - - (** Deduplicate a file, i.e. replace the file [in_the_build_directory] with a - hardlink to the one [in_the_cache] if the deduplication mode is set to - [Hardlink] (or do nothing if the mode is [Copy]). *) - val deduplicate : t -> File.t -> unit - - (** Remove the local cache and disconnect with a distributed cache client if - any. *) - val teardown : t -> unit - - (* Hint that the given rule will be looked up soon *) - val hint : t -> Key.t list -> (unit, string) Result.t -end - -module type Caching = sig - module Cache : Cache - - val cache : Cache.t -end - -type caching = (module Caching) - -let command_to_dyn = function - | Dedup { path; digest } -> - let open Dyn.Encoder in - pair string record - @@ ( "dedup" - , [ ("path", Path.Build.to_dyn path); ("digest", Digest.to_dyn digest) ] - ) diff --git a/src/cache/client.ml b/src/cache/client.ml deleted file mode 100644 index 45bc0219d54..00000000000 --- a/src/cache/client.ml +++ /dev/null @@ -1,133 +0,0 @@ -open Stdune -open Dune_util -open Result.O -open Cache_intf - -type t = - { socket : out_channel - ; fd : Unix.file_descr - ; input : in_channel - ; cache : Local.t - ; thread : Thread.t - ; finally : (unit -> unit) option - ; version : Messages.version - } - -let versions_supported_by_dune : Messages.version list = - [ { major = 1; minor = 2 } ] - -let err msg = User_error.E (User_error.make [ Pp.text msg ]) - -let errf msg = User_error.E (User_error.make msg) - -let read version input = - let* sexp = Csexp.input input in - let+ (Dedup v) = Messages.incoming_message_of_sexp version sexp in - Dedup v - -let make ?finally ?duplication_mode ~command_handler () = - (* This is a bit ugly as it is global, but flushing a closed socket will nuke - the program if we don't. *) - let () = Sys.set_signal Sys.sigpipe Sys.Signal_ignore in - let* cache = - Result.map_error ~f:err - (Local.make ?duplication_mode ~command_handler:ignore ()) - in - let* port = - let cmd = - Format.sprintf "%s cache start --display progress --exit-no-client" - Sys.executable_name - and f stdout = - match Io.input_lines stdout with - | [] -> Result.Error (err "empty output starting cache") - | [ line ] -> Result.Ok line - | _ -> Result.Error (err "unrecognized output starting cache") - and finally stdout = ignore (Unix.close_process_in stdout) (* FIXME *) in - Exn.protectx (Unix.open_process_in cmd) ~finally ~f - in - let* addr, port = - match String.split_on_char ~sep:':' port with - | [ addr; port ] -> ( - match Int.of_string port with - | Some i -> ( - try Result.Ok (Unix.inet_addr_of_string addr, i) with - | Failure _ -> - Result.Error (errf [ Pp.textf "invalid address: %s" addr ])) - | None -> Result.Error (errf [ Pp.textf "invalid port: %s" port ])) - | _ -> Result.Error (errf [ Pp.textf "invalid endpoint: %s" port ]) - in - let fd = Unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in - let* _ = - Result.try_with (fun () -> Unix.connect fd (Unix.ADDR_INET (addr, port))) - in - let socket = Unix.out_channel_of_descr fd in - let input = Unix.in_channel_of_descr fd in - let+ version = - Result.map_error ~f:err - (Messages.negotiate_version ~versions_supported_by_dune fd input socket) - in - Log.info - [ Pp.textf "negotiated version: %s" (Messages.string_of_version version) ]; - let rec thread input = - match - let+ command = read version input in - Log.info - [ (let open Pp.O in - Pp.text "dune-cache command: " ++ Dyn.pp (command_to_dyn command)) - ]; - command_handler command - with - | Result.Error e -> - Log.info [ Pp.textf "dune-cache read error: %s" e ]; - Option.iter ~f:(fun f -> f ()) finally - | Result.Ok () -> (thread [@tailcall]) input - in - let thread = Thread.create thread input in - { socket; fd; input; cache; thread; finally; version } - -let send client message = - try Result.Ok (Messages.send client.version client.socket message) with - | Sys_error (* "Broken_pipe" *) _ -> - Result.Error "lost connection to cache daemon" - -let with_repositories client repositories = - let+ () = send client (SetRepos repositories) in - client - -let promote (client : t) files key metadata ~repository ~duplication = - let duplication = - Some - (Option.value ~default:(Local.duplication_mode client.cache) duplication) - in - send client - (Messages.Promote { key; files; metadata; repository; duplication }) - -let set_build_dir client path = - let+ () = send client (Messages.SetBuildRoot path) in - client - -let search client key = Local.search client.cache key - -let retrieve client file = Local.retrieve client.cache file - -let deduplicate client file = Local.deduplicate client.cache file - -let teardown client = - (try Unix.shutdown client.fd Unix.SHUTDOWN_SEND with - | Unix.Unix_error (Unix.ENOTCONN, _, _) -> ()); - Thread.join client.thread; - Local.teardown client.cache - -let hint client keys = - if Messages.hint_supported client.version then - send client (Messages.Hint keys) - else - User_warning.emit - ~hints: - [ Pp.textf "update sietch to version %s at least" - (Messages.string_of_version Messages.hint_min_version) - ] - [ Pp.textf "not hinting the cache as sietch version is too old: %s" - (Messages.string_of_version client.version) - ] - |> Result.ok diff --git a/src/cache/client.mli b/src/cache/client.mli deleted file mode 100644 index c439c4c9443..00000000000 --- a/src/cache/client.mli +++ /dev/null @@ -1,11 +0,0 @@ -open Cache_intf -open Stdune - -include Cache - -val make : - ?finally:(unit -> unit) - -> ?duplication_mode:Duplication_mode.t - -> command_handler:(command -> unit) - -> unit - -> (t, exn) Result.t diff --git a/src/cache/dune b/src/cache/dune deleted file mode 100644 index f6313e70903..00000000000 --- a/src/cache/dune +++ /dev/null @@ -1,4 +0,0 @@ -(library - (name cache) - (synopsis "[Internal] Dune binary artifact cache protocol") - (libraries stdune threads.posix xdg dune_lang dune_util csexp dune_cache)) diff --git a/src/cache/key.ml b/src/cache/key.ml deleted file mode 100644 index 476ac8a07ed..00000000000 --- a/src/cache/key.ml +++ /dev/null @@ -1,10 +0,0 @@ -open Stdune - -type t = Digest.t - -let to_string = Digest.to_string - -let of_string s = - match Digest.from_hex s with - | Some d -> Result.Ok d - | None -> Result.Error (Printf.sprintf "invalid key: %s" s) diff --git a/src/cache/key.mli b/src/cache/key.mli deleted file mode 100644 index ed7eec7082c..00000000000 --- a/src/cache/key.mli +++ /dev/null @@ -1,8 +0,0 @@ -open Stdune - -(** Cache keys are currently MD5 digests of the corresponding build rules. *) -type t = Digest.t - -val of_string : string -> (t, string) Result.t - -val to_string : t -> string diff --git a/src/cache/local.ml b/src/cache/local.ml deleted file mode 100644 index ec6ca510c9a..00000000000 --- a/src/cache/local.ml +++ /dev/null @@ -1,495 +0,0 @@ -open Stdune -open Result.O -open Cache_intf - -type t = - { root : Path.t - ; build_root : Path.t option - ; info : User_message.Style.t Pp.t list -> unit - ; warn : User_message.Style.t Pp.t list -> unit - ; repositories : repository list - ; command_handler : command -> unit - ; duplication_mode : Duplication_mode.t - ; temp_dir : Path.t - } - -module Trimming_result = struct - type t = { trimmed_bytes : int64 } - - let empty = { trimmed_bytes = 0L } - - (* CR-someday amokhov: Right now Dune doesn't support large (>1Gb) files on - 32-bit platforms due to the pervasive use of [int] for representing - individual file sizes. It's not fundamentally difficult to switch to - [int64], so we should do it if it becomes a real issue. *) - let add t ~(bytes : int) = - { trimmed_bytes = Int64.add t.trimmed_bytes (Int64.of_int bytes) } -end - -let default_root () = - Path.L.relative - (Path.of_filename_relative_to_initial_cwd Xdg.cache_dir) - [ "dune"; "db" ] - -let file_store_root cache = Path.L.relative cache.root [ "files"; "v4" ] - -let metadata_store_root cache = Path.L.relative cache.root [ "meta"; "v5" ] - -(* A file storage scheme. *) -module type FSScheme = sig - (* Given a cache root and a file digest, determine the location of the file in - the cache. *) - val path : root:Path.t -> Digest.t -> Path.t - - (* Given a cache root, list all files stored in the cache. *) - val list : root:Path.t -> Path.t list -end - -(* A file storage scheme where a file with a digest [d] is stored in a - subdirectory whose name is made of the first two characters of [d], that is: - - [//] - - CR-someday amokhov: We currently do not provide support for collisions where - two or more files with different content have the same content digest [d]. If - this ever becomes a real (i.e. not hypothetical) problem, a good way forward - would be to switch from MD5 to SHA1 or higher, making the chance of this - happening even lower, and at the same time making it more difficult to create - deliberate collisions. *) -module FirstTwoCharsSubdir : FSScheme = struct - let path ~root digest = - let digest = Digest.to_string digest in - let first_two_chars = String.sub digest ~pos:0 ~len:2 in - Path.L.relative root [ first_two_chars; digest ] - - (* List all entries in a given cache root. Returns the empty list if the root - doesn't exist. *) - let list ~root = - let open Result.O in - let f dir = - let root = Path.L.relative root [ dir ] in - if String.for_all ~f:Char.is_lowercase_hex dir then - let+ paths = Path.readdir_unsorted root in - List.map ~f:(Path.relative root) paths - else - Ok [] - in - match Path.readdir_unsorted root >>= Result.List.concat_map ~f with - | Ok res -> res - | Error ENOENT -> [] - | Error e -> User_error.raise [ Pp.text (Unix.error_message e) ] -end - -module FSSchemeImpl = FirstTwoCharsSubdir - -let metadata_path cache key = - FSSchemeImpl.path ~root:(metadata_store_root cache) key - -let file_path cache key = FSSchemeImpl.path ~root:(file_store_root cache) key - -(* Support for older versions of the cache. *) -module V3 = struct - let file_store_root cache = Path.L.relative cache.root [ "files"; "v3" ] - - let metadata_store_root cache = Path.L.relative cache.root [ "meta"; "v3" ] - - let file_path cache key = FSSchemeImpl.path ~root:(file_store_root cache) key -end - -module V4 = struct - let metadata_store_root cache = Path.L.relative cache.root [ "meta"; "v4" ] -end - -module Metadata_file = struct - type t = - { metadata : Sexp.t list - ; files : File.t list - } - - let to_sexp { metadata; files } = - let open Sexp in - let f ({ path; digest } : File.t) = - Sexp.List - [ Sexp.Atom (Path.Local.to_string (Path.Build.local path)) - ; Sexp.Atom (Digest.to_string digest) - ] - in - List - [ List (Atom "metadata" :: metadata) - ; List (Atom "files" :: List.map ~f files) - ] - - let of_sexp = function - | Sexp.List - [ List (Atom "metadata" :: metadata); List (Atom "files" :: produced) ] - -> - let+ files = - Result.List.map produced ~f:(function - | List [ Atom path; Atom digest ] -> - let path = Path.Build.of_string path in - let+ digest = - match Digest.from_hex digest with - | Some digest -> Ok digest - | None -> Error "Invalid digest in cache metadata" - in - { File.path; digest } - | _ -> Error "Invalid list of produced files in cache metadata") - in - { metadata; files } - | _ -> Error "Invalid cache metadata" - - let of_string s = - match Csexp.parse_string s with - | Ok sexp -> of_sexp sexp - | Error (_, msg) -> Error msg - - let to_string f = to_sexp f |> Csexp.to_string - - let parse path = Io.with_file_in path ~f:Csexp.input >>= of_sexp -end - -let tmp_path cache name = - let res = Path.relative cache.temp_dir name in - Path.mkdir_p res; - res - -let make_path cache path = - match cache.build_root with - | Some p -> Result.ok (Path.append_local p path) - | None -> - Result.Error - (sprintf "relative path %s while no build root was set" - (Path.Local.to_string_maybe_quoted path)) - -let with_repositories cache repositories = Result.Ok { cache with repositories } - -let duplicate ?(duplication = None) cache ~src ~dst = - match Option.value ~default:cache.duplication_mode duplication with - | Copy -> Io.copy_file ~src ~dst () - | Hardlink -> Path.link src dst - -let retrieve cache (file : File.t) = - let src = file_path cache file.digest in - let dst = Path.build file.path in - cache.info - [ Pp.textf "retrieve %s from cache" (Path.to_string_maybe_quoted dst) ]; - duplicate cache ~src ~dst; - dst - -let deduplicate cache (file : File.t) = - match cache.duplication_mode with - | Copy -> () - | Hardlink -> ( - let path = Path.build file.path in - let path_in_cache = file_path cache file.digest in - let tmpname = Path.build (Path.Build.of_string ".dedup") in - cache.info - [ Pp.textf "deduplicate %s from %s" (Path.to_string path) - (Path.to_string path_in_cache) - ]; - try - Path.unlink_no_err tmpname; - Path.link path_in_cache tmpname; - Path.rename tmpname path - with - | Unix.Unix_error (e, syscall, _) -> - Path.unlink_no_err tmpname; - cache.warn - [ Pp.textf "error handling dune-cache command: %s: %s" syscall - (Unix.error_message e) - ]) - -let apply ~f o v = - match o with - | Some o -> f v o - | None -> v - -let promote_sync cache paths key metadata ~repository ~duplication = - let open Result.O in - let* repo = - match repository with - | None -> Result.Ok None - | Some idx -> ( - match List.nth cache.repositories idx with - | None -> Result.Error (Printf.sprintf "repository out of range: %i" idx) - | repo -> Result.Ok repo) - in - let metadata = - apply - ~f:(fun metadata repository -> - metadata - @ [ Sexp.List [ Sexp.Atom "repo"; Sexp.Atom repository.remote ] - ; Sexp.List [ Sexp.Atom "commit_id"; Sexp.Atom repository.commit ] - ]) - repo metadata - in - let promote (path, expected_digest) = - let* abs_path = make_path cache (Path.Build.local path) in - cache.info [ Pp.textf "promote %s" (Path.to_string abs_path) ]; - let stat = Path.lstat abs_path in - let* stat = - if stat.st_kind = S_REG then - Result.Ok stat - else - Result.Error - (Format.sprintf "invalid file type: %s" - (Path.string_of_file_kind stat.st_kind)) - in - (* Create a duplicate (either a [Copy] or a [Hardlink] depending on the - [duplication] setting) of the promoted file in a temporary directory to - correctly handle the situation when the file is modified or deleted - during the promotion process. *) - Temp.with_temp_path ~dir:cache.temp_dir ~prefix:"temp" ~suffix:"data" - ~f:(function - | Error exn -> - let message = - sprintf "Failed to create a temp file: %s" - (Exn.to_dyn exn |> Dyn.to_string) - in - cache.info [ Pp.text message ]; - Result.Error message - | Ok tmp -> ( - duplicate ~duplication cache ~src:abs_path ~dst:tmp; - let effective_digest = Digest.file_with_stats tmp (Path.stat tmp) in - if Digest.compare effective_digest expected_digest != Ordering.Eq then ( - let message = - Printf.sprintf "digest mismatch: %s != %s" - (Digest.to_string effective_digest) - (Digest.to_string expected_digest) - in - cache.info [ Pp.text message ]; - Result.Error message - ) else - let in_the_cache = file_path cache effective_digest in - (* CR-someday: we assume that if the file with [effective_digest] - exists in the file storage, then its content matches the digest, - i.e. the user never modifies it. In principle, we could add a - consistency check but this would have a non-negligible performance - cost. A good compromise seems to be to add a "paranoid" mode to - Dune cache where we always check file contents for consistency with - the expected digest, so one could enable it when needed. In the - paranoid mode, we could furthermore check for a digest collision - via [Io.compare_files in_the_cache tmp]. *) - match Path.exists in_the_cache with - | true -> - (* Update the timestamp of the existing cache entry, moving it to - the back of the trimming queue. *) - Path.touch in_the_cache; - Result.Ok (Already_promoted { path; digest = effective_digest }) - | false -> - Path.mkdir_p (Path.parent_exn in_the_cache); - (* Move the temporary file to the cache. *) - Path.rename tmp in_the_cache; - (* Remove write permissions, making the cache entry immutable. We - assume that users do not modify the files in the cache. *) - Path.chmod in_the_cache - ~mode: - (Path.Permissions.remove ~mode:Path.Permissions.write - stat.st_perm); - Result.Ok (Promoted { path; digest = effective_digest }))) - in - let+ promoted = Result.List.map ~f:promote paths in - let metadata_path = metadata_path cache key - and metadata_tmp_path = Path.relative cache.temp_dir "metadata" - and files = - List.map promoted ~f:(function - | Already_promoted f - | Promoted f - -> f) - in - let metadata_file : Metadata_file.t = { metadata; files } in - let metadata = Csexp.to_string (Metadata_file.to_sexp metadata_file) in - Io.write_file metadata_tmp_path metadata; - let () = - match Io.read_file metadata_path with - | contents -> - if contents <> metadata then - User_warning.emit - [ Pp.textf "non reproductible collision on rule %s" - (Digest.to_string key) - ] - | exception Sys_error _ -> Path.mkdir_p (Path.parent_exn metadata_path) - in - Path.rename metadata_tmp_path metadata_path; - (* The files that have already been present in the cache can be deduplicated, - i.e. replaced with hardlinks to their cached copies. *) - (match cache.duplication_mode with - | Copy -> () - | Hardlink -> - List.iter promoted ~f:(function - | Already_promoted file -> cache.command_handler (Dedup file) - | _ -> ())); - (metadata_file, promoted) - -let promote cache paths key metadata ~repository ~duplication = - Result.map ~f:ignore - (promote_sync cache paths key metadata ~repository ~duplication) - -let search cache key = - let path = metadata_path cache key in - let* sexp = - try Io.with_file_in path ~f:Csexp.input with - | Sys_error _ -> Error "no cached file" - in - let+ metadata = Metadata_file.of_sexp sexp in - (* Touch cache files so they are removed last by LRU trimming. *) - let () = - let f (file : File.t) = - (* There is no point in trying to trim out files that are missing : dune - will have to check when hardlinking anyway since they could disappear - in the meantime. *) - try Path.touch ~create:false (file_path cache file.digest) with - | Unix.(Unix_error (ENOENT, _, _)) -> () - in - List.iter ~f metadata.files - in - (metadata.metadata, metadata.files) - -let set_build_dir cache p = Result.Ok { cache with build_root = Some p } - -let teardown cache = Temp.destroy Dir cache.temp_dir - -let hint _ _ = Result.Ok () - -let detect_duplication_mode root = - let () = Path.mkdir_p root in - let beacon = Path.relative root "beacon" - and target = Path.relative Path.build_dir ".cache-beacon" in - let () = Path.touch beacon in - let rec test () = - match Path.link beacon target with - | exception Unix.Unix_error (Unix.EEXIST, _, _) -> - Path.unlink_no_err target; - test () - | exception Unix.Unix_error _ -> Duplication_mode.Copy - | () -> Duplication_mode.Hardlink - in - test () - -let make ?(root = default_root ()) - ?(duplication_mode = detect_duplication_mode root) - ?(log = Dune_util.Log.info) ?(warn = fun pp -> User_warning.emit pp) - ~command_handler () = - let res = - { root - ; build_root = None - ; info = log - ; warn - ; repositories = [] - ; command_handler - ; duplication_mode - ; temp_dir = - (* CR-someday amokhov: Introduce [val getpid : unit -> t] in [pid.ml] so - that we don't use the untyped version of pid anywhere. *) - Temp.temp_in_dir ~perms:0o700 Temp.Dir ~dir:root ~prefix:"promoting." - ~suffix:("." ^ string_of_int (Unix.getpid ())) - } - in - match - Path.mkdir_p @@ file_store_root res; - Path.mkdir_p @@ metadata_store_root res - with - | () -> Ok res - | exception exn -> - Error - ("Unable to set up the cache root directory: " ^ Printexc.to_string exn) - -let duplication_mode cache = cache.duplication_mode - -let trim_bad_metadata_files ~metadata_files ~trimmed_so_far ~file_path cache = - List.fold_left metadata_files ~init:trimmed_so_far - ~f:(fun trimmed_so_far path -> - let should_be_removed = - match Metadata_file.parse path with - | Result.Error msg -> - cache.warn - [ Pp.textf "remove invalid metadata file %s: %s" - (Path.to_string_maybe_quoted path) - msg - ]; - true - | Result.Ok { Metadata_file.files; _ } -> - let is_broken = - List.exists files ~f:(fun { File.digest; _ } -> - let reference = file_path cache digest in - not (Path.exists reference)) - in - if is_broken then - cache.info - [ Pp.textf "remove metadata file %s as it refers to missing files" - (Path.to_string_maybe_quoted path) - ]; - is_broken - in - match should_be_removed with - | true -> - let bytes = (Path.stat path).st_size in - Path.unlink_no_err path; - Trimming_result.add trimmed_so_far ~bytes - | false -> trimmed_so_far) - -let garbage_collect_impl ~trimmed_so_far cache = - List.fold_left ~init:trimmed_so_far - ~f:(fun trimmed_so_far (root, file_path) -> - let metadata_files = FSSchemeImpl.list ~root in - trim_bad_metadata_files ~metadata_files ~trimmed_so_far ~file_path cache) - [ (V3.metadata_store_root cache, V3.file_path) - ; (V4.metadata_store_root cache, file_path) - ; (metadata_store_root cache, file_path) - ] - -let garbage_collect = garbage_collect_impl ~trimmed_so_far:Trimming_result.empty - -(* We call a cached file "unused" if there are currently no hard links to it - from build directories. Note that [st_nlink] can return 0 if the file has - been removed since we scanned the tree -- in this case we do not want to - claim that its removal is the result of cache trimming and we, therefore, - skip it while trimming. *) -let file_exists_and_is_unused ~stats = stats.Unix.st_nlink = 1 - -let files_in_cache_for_all_supported_versions cache = - let files = FSSchemeImpl.list ~root:(file_store_root cache) in - let files_v3 = FSSchemeImpl.list ~root:(V3.file_store_root cache) in - files @ files_v3 - -let trim cache ~goal = - let files = files_in_cache_for_all_supported_versions cache in - let f path = - let stats = Path.stat path in - if file_exists_and_is_unused ~stats then - Some (path, stats.st_size, stats.st_ctime) - else - None - and compare (_, _, t1) (_, _, t2) = Ordering.of_int (Stdlib.compare t1 t2) in - let files = List.sort ~compare (List.filter_map ~f files) - and delete (trimmed_so_far : Trimming_result.t) (path, bytes, _) = - if trimmed_so_far.trimmed_bytes >= goal then - trimmed_so_far - else ( - Path.unlink path; - (* CR-someday amokhov: We should really be using block_size * #blocks - because that's how much we save actually. *) - Trimming_result.add trimmed_so_far ~bytes - ) - in - let trimmed_so_far = - List.fold_left ~init:Trimming_result.empty ~f:delete files - in - garbage_collect_impl cache ~trimmed_so_far - -let overhead_size cache = - let files = files_in_cache_for_all_supported_versions cache in - let stats = - let f p = - try - let stats = Path.stat p in - if file_exists_and_is_unused ~stats then - Int64.of_int stats.st_size - else - 0L - with - | Unix.Unix_error (Unix.ENOENT, _, _) -> 0L - in - List.map ~f files - in - List.fold_left ~f:Int64.add ~init:0L stats diff --git a/src/cache/local.mli b/src/cache/local.mli deleted file mode 100644 index ce6272eab23..00000000000 --- a/src/cache/local.mli +++ /dev/null @@ -1,92 +0,0 @@ -open Stdune -open Cache_intf - -include Cache - -(** The default root directory of the local cache. *) -val default_root : unit -> Path.t - -(** A metadata file contains a list of [files] produced by a cached build rule, - along with some [metadata] that can be empty. - - One example of what can be included in the [metadata] field is a git commit - at which the [files] were built, which makes it possible to exchange cache - entries relevant to a specific commit between local and distributed caches. *) -module Metadata_file : sig - type t = - { metadata : Sexp.t list - ; files : File.t list - } - - val to_sexp : t -> Sexp.t - - val of_sexp : Sexp.t -> (t, string) result - - val to_string : t -> string - - val of_string : string -> (t, string) result - - val parse : Path.t -> (t, string) result -end - -(** Like [promote] but also returns the resulting metadata and promotions. *) -val promote_sync : - t - -> (Path.Build.t * Digest.t) list - -> Key.t - -> metadata - -> repository:int option - -> duplication:Duplication_mode.t option - -> (Metadata_file.t * promotion list, string) Result.t - -(** Create a local cache. The only required argument is a handler for commands - from the cache, such as [Dedup] that tell Dune that some files can be - replaced with hardlinks to their cached versions. The [root] argument - defaults to the [default_root]. If [duplication_mode] is omitted, we attempt - to detect whether hardlinks are supported and use [Hardlink] if they are, - falling back to [Copy] otherwise. *) -val make : - ?root:Path.t - -> ?duplication_mode:Duplication_mode.t - -> ?log:(User_message.Style.t Pp.t list -> unit) - -> ?warn:(User_message.Style.t Pp.t list -> unit) - -> command_handler:(command -> unit) - -> unit - -> (t, string) Result.t - -(** The deduplication mode that was set or detected automatically (if omitted) - during the local cache creation with the function [make]. *) -val duplication_mode : t -> Duplication_mode.t - -(** The overhead size of the cache, that is, the total size of files in the - cache that are not linked from any build directory. *) -val overhead_size : t -> int64 - -module Trimming_result : sig - type t = { trimmed_bytes : int64 } -end - -(** Trim the cache by removing a set of unused files from it so that the total - freed space is greater or equal to the specificed [goal], in bytes. We call - a cached file "unused" if there are currently no hard links to it from build - directories. - - Unused files are removed in the order of last access, i.e. we first remove - the least recently accessed one. - - We also remove all metadata files whose file references got broken during - the trimming. *) -val trim : t -> goal:int64 -> Trimming_result.t - -(** Purge cache metadata files that can't be read or contain references to - non-existing files. *) -val garbage_collect : t -> Trimming_result.t - -(** Path to a metadata file *) -val metadata_path : t -> Key.t -> Path.t - -(** Path to a data file *) -val file_path : t -> Key.t -> Path.t - -(** Path to a safe, instance specific temporary directory *) -val tmp_path : t -> string -> Path.t diff --git a/src/cache/messages.ml b/src/cache/messages.ml deleted file mode 100644 index 3cef57163fd..00000000000 --- a/src/cache/messages.ml +++ /dev/null @@ -1,287 +0,0 @@ -open Stdune -open Result.O -open Cache_intf -include Messages_intf - -let invalid_args args = - Result.Error - (Printf.sprintf "invalid arguments:%s" - (String.concat ~sep:" " (List.map ~f:Sexp.to_string args))) - -let version_at_least ~min v = - v.major > min.major || (v.major = min.major && v.minor >= min.minor) - -let string_of_version { major; minor } = sprintf "%i.%i" major minor - -let dyn_of_version { major; minor } = - Dyn.Encoder.record - [ ("major", Dyn.Encoder.int major); ("minor", Dyn.Encoder.int minor) ] - -let hint_min_version = { major = 1; minor = 2 } - -let hint_supported version = version_at_least ~min:hint_min_version version - -let sexp_of_message : type a. version -> a message -> Sexp.t = - fun version -> - let cmd name args = Sexp.List (Sexp.Atom name :: args) in - function - | Hint keys -> - if not (hint_supported version) then - Code_error.raise "tried sending a not yet supported hint message" - [ ("current version", dyn_of_version version) - ; ("minimum version", dyn_of_version hint_min_version) - ]; - let f k = Sexp.Atom (Digest.to_string k) in - cmd "hint" @@ List.map ~f keys - | Lang versions -> - cmd "lang" - (Sexp.Atom "dune-cache-protocol" - :: - (List.map ~f:(fun { major; minor } -> - Sexp.List - [ Sexp.Atom (string_of_int major) - ; Sexp.Atom (string_of_int minor) - ])) - versions) - | Promote promotion -> - let key = Key.to_string promotion.key - and f (path, digest) = - Sexp.List - [ Sexp.Atom (Path.Local.to_string (Path.Build.local path)) - ; Sexp.Atom (Digest.to_string digest) - ] - in - let rest = [] in - let rest = - match promotion.duplication with - | Some mode - when version = { major = 1; minor = 0 } && mode = Duplication_mode.Copy - -> - User_error.raise - [ Pp.textf "cache daemon v1.0 does not support copy duplication mode" - ] - | Some mode -> - Sexp.List - [ Sexp.Atom "duplication" - ; Sexp.Atom (Duplication_mode.to_string mode) - ] - :: rest - | None -> rest - in - let rest = - match promotion.repository with - | Some idx -> - Sexp.List [ Sexp.Atom "repo"; Sexp.Atom (string_of_int idx) ] :: rest - | None -> rest - in - cmd "promote" - (Sexp.List [ Sexp.Atom "key"; Sexp.Atom key ] - :: - Sexp.List (Sexp.Atom "files" :: List.map ~f promotion.files) - :: - Sexp.List [ Sexp.Atom "metadata"; Sexp.List promotion.metadata ] :: rest) - | SetBuildRoot root -> - cmd "set-build-root" [ Sexp.Atom (Path.to_absolute_filename root) ] - | SetCommonMetadata metadata -> cmd "set-common-metadata" metadata - | SetRepos repositories -> - let f { directory; remote; commit } = - Sexp.List - [ Sexp.List [ Sexp.Atom "dir"; Sexp.Atom directory ] - ; Sexp.List [ Sexp.Atom "remote"; Sexp.Atom remote ] - ; Sexp.List [ Sexp.Atom "commit_id"; Sexp.Atom commit ] - ] - in - cmd "set-repos" (List.map ~f repositories) - | Dedup file -> - cmd "dedup" - [ Sexp.List - [ Sexp.Atom (Path.Local.to_string (Path.Build.local file.path)) - ; Sexp.Atom (Digest.to_string file.digest) - ] - ] - -let int_of_string ?where s = - match Int.of_string s with - | Some s -> Ok s - | None -> - Result.Error - (Printf.sprintf "invalid integer%s: %s" - (match where with - | Some l -> " in " ^ l - | None -> "") - s) - -let lang_of_sexp = function - | Sexp.Atom "dune-cache-protocol" :: versions -> - let decode_version = function - | Sexp.List [ Sexp.Atom major; Sexp.Atom minor ] -> - let+ major = int_of_string ~where:"lang command version" major - and+ minor = int_of_string ~where:"lang command version" minor in - { major; minor } - | v -> - Result.Error - (Printf.sprintf "invalid version in lang command: %s" - (Sexp.to_string v)) - in - Result.List.map ~f:decode_version versions - | args -> invalid_args args - -let initial_message_of_sexp = function - | Sexp.List (Sexp.Atom "lang" :: args) -> - let+ versions = lang_of_sexp args in - Lang versions - | exp -> - Result.Error - (Printf.sprintf "invalid initial message: %s" (Sexp.to_string exp)) - -let incoming_message_of_sexp version sexp = - let open Result.O in - let* path, digest = - match sexp with - | Sexp.List - [ Sexp.Atom "dedup"; Sexp.List [ Sexp.Atom path; Sexp.Atom digest ] ] - when version = { major = 1; minor = 2 } -> - Ok (path, digest) - | Sexp.List - [ Sexp.Atom "dedup" - ; Sexp.List - (* Message protocol versions before v1.2 included an additional - field [_path_in_cache] which is no longer used. *) - [ Sexp.Atom path; Sexp.Atom _path_in_cache; Sexp.Atom digest ] - ] -> - Ok (path, digest) - | exp -> - Result.Error (Printf.sprintf "invalid command: %s" (Sexp.to_string exp)) - in - match Digest.from_hex digest with - | Some digest -> - Result.Ok (Dedup { path = Path.Build.of_string path; digest }) - | None -> Result.Error (Printf.sprintf "invalid digest: %s" digest) - -let outgoing_message_of_sexp version = - let repos_of_sexp args = - let convert = function - | Sexp.List - [ Sexp.List [ Sexp.Atom "dir"; Sexp.Atom directory ] - ; Sexp.List [ Sexp.Atom "remote"; Sexp.Atom remote ] - ; Sexp.List [ Sexp.Atom "commit_id"; Sexp.Atom commit ] - ] -> - Result.ok { directory; remote; commit } - | invalid -> - Result.Error - (Printf.sprintf "invalid repo: %s" (Sexp.to_string invalid)) - in - Result.List.map ~f:convert args - and promote_of_sexp = function - | Sexp.List [ Sexp.Atom "key"; Sexp.Atom key ] - :: Sexp.List (Sexp.Atom "files" :: files) - :: Sexp.List [ Sexp.Atom "metadata"; Sexp.List metadata ] :: rest as - cmd -> - let file = function - | Sexp.List [ Sexp.Atom path; Sexp.Atom hash ] -> - let+ d = Key.of_string hash in - (Path.Build.of_local (Path.Local.of_string path), d) - | sexp -> - Result.Error - (Printf.sprintf "invalid file in promotion message: %s" - (Sexp.to_string sexp)) - in - let* repository, rest = - match rest with - | Sexp.List [ Sexp.Atom "repo"; Sexp.Atom repo ] :: rest -> - Result.map - ~f:(fun repo -> (Some repo, rest)) - (int_of_string ~where:"repository index" repo) - | _ -> Result.Ok (None, rest) - in - let+ duplication = - match rest with - | [ Sexp.List [ Sexp.Atom "duplication"; Sexp.Atom mode ] ] -> - Result.map ~f:Option.some (Duplication_mode.of_string mode) - | [] -> Result.Ok None - | _ -> - Result.Error - (Printf.sprintf "invalid promotion message: %s" - (Sexp.to_string (Sexp.List cmd))) - and+ files = Result.List.map ~f:file files - and+ key = Key.of_string key in - { repository; files; key; metadata; duplication } - | args -> invalid_args args - and path_of_sexp = function - | [ Sexp.Atom dir ] -> Result.ok (Path.of_string dir) - | args -> invalid_args args - and hint_of_sexp keys = - let f = function - | Sexp.Atom k -> ( - match Digest.from_hex k with - | Some k -> Result.Ok k - | None -> - Result.Error (Format.asprintf "invalid key in hint message: %s" k)) - | k -> - Result.Error - (Format.asprintf "invalid expression in hint message: %a" Pp.to_fmt - (Sexp.pp k)) - in - Result.List.map ~f keys - in - function - | Sexp.List (Sexp.Atom cmd :: args) -> - Result.map_error - ~f:(fun s -> cmd ^ ": " ^ s) - (match cmd with - | "hint" when hint_supported version -> - let+ keys = hint_of_sexp args in - Hint keys - | "promote" -> - let+ promotions = promote_of_sexp args in - Promote promotions - | "set-build-root" -> - let+ path = path_of_sexp args in - SetBuildRoot path - | "set-common-metadata" -> Result.Ok (SetCommonMetadata args) - | "set-repos" -> - let+ repos = repos_of_sexp args in - SetRepos repos - | _ -> Result.Error (Printf.sprintf "unknown command: %s" cmd)) - | cmd -> - Result.Error - (Printf.sprintf "invalid command format: %s" (Sexp.to_string cmd)) - -let send_sexp output sexp = - output_string output (Csexp.to_string sexp); - flush output - -let send version output message = - send_sexp output (sexp_of_message version message) - -let find_newest_common_version versions_a versions_b = - let find a b = - let f { major; minor } = (major, minor) in - let a = Int.Map.of_list_map_exn ~f a - and b = Int.Map.of_list_map_exn ~f b in - let common = - Int.Map.merge - ~f:(fun _major minor_in_a minor_in_b -> - match (minor_in_a, minor_in_b) with - | Some a, Some b -> Some (min a b) - | _ -> None) - a b - in - Option.map - ~f:(fun (major, minor) -> { major; minor }) - (Int.Map.max_binding common) - in - match find versions_a versions_b with - | None -> Result.Error "no compatible versions" - | Some version -> Result.ok version - -let negotiate_version ~versions_supported_by_dune fd input output = - send { major = 1; minor = 0 } output (Lang versions_supported_by_dune); - let f msg = - Unix.close fd; - msg - in - Result.map_error ~f - (let* sexp = Csexp.input input in - let* (Lang versions) = initial_message_of_sexp sexp in - find_newest_common_version versions_supported_by_dune versions) diff --git a/src/cache/messages.mli b/src/cache/messages.mli deleted file mode 100644 index 3734c24c828..00000000000 --- a/src/cache/messages.mli +++ /dev/null @@ -1,41 +0,0 @@ -open Stdune - -include module type of Messages_intf - -(** Decode an [incoming] message. *) -val incoming_message_of_sexp : - version -> Sexp.t -> (incoming message, string) Result.t - -(** Decode an [initial] message. *) -val initial_message_of_sexp : Sexp.t -> (initial message, string) Result.t - -(** Decode an [outgoing] message. *) -val outgoing_message_of_sexp : - version -> Sexp.t -> (outgoing message, string) Result.t - -(** Encode a message. *) -val sexp_of_message : version -> 'a message -> Sexp.t - -(** Send a message. *) -val send : version -> out_channel -> 'a message -> unit - -(** Find the newest [version] of the communication protocol supported both by - Dune and the cache daemon. To do that, we send [versions_supported_by_dune] - to the cache daemon via the [out_channel], receive the supported versions of - the cache daemon via the [in_channel], and pick the newest one that matches - both lists. *) -val negotiate_version : - versions_supported_by_dune:version list - -> Unix.file_descr - -> in_channel - -> out_channel - -> (version, string) result - -val string_of_version : version -> string - -val hint_supported : version -> bool - -val hint_min_version : version - -val find_newest_common_version : - version list -> version list -> (version, string) result diff --git a/src/cache/messages_intf.ml b/src/cache/messages_intf.ml deleted file mode 100644 index 8683e95f765..00000000000 --- a/src/cache/messages_intf.ml +++ /dev/null @@ -1,52 +0,0 @@ -open Cache_intf -open Stdune - -(** A version of the communication protocol between Dune and the cache daemon. *) -type version = - { major : int - ; minor : int - } - -(** When Dune successfully executes a build rule, it sends a "promotion" message - to the cache daemon, listing the produced [files] along with some [metadata] - and a few other fields relevant for caching. *) -type promotion = - { key : Key.t - ; files : (Path.Build.t * Digest.t) list - ; metadata : Sexp.t list - ; repository : int option - ; duplication : Duplication_mode.t option - } - -(** There is one initial message [Lang], which is sent by Dune and the cache - daemon to each other during the initial negotiation of the version of the - communication protocol. *) -type initial = Initial - -(** Outgoing messages are sent by Dune to the cache daemon. *) -type outgoing = Outgoing - -(** Incoming messages are sent by the cache daemon to Dune. *) -type incoming = Incoming - -(** Messages of the communication protocol between Dune and the cache daemon. *) -type _ message = - | Lang : version list -> initial message - (** Inform the other party about the supported versions of the - communication protocol. *) - | SetBuildRoot : Path.t -> outgoing message - (** Set the absolute path to the build root, to be used when interpreting - relative paths in subsequent messages. *) - | SetCommonMetadata : Sexp.t list -> outgoing message - (** Set the common metadata that should be added to the subsequent - [Promote] messages. *) - | SetRepos : repository list -> outgoing message - (** Set the paths to all the version controlled repositories in the - workspace along with the associated commit identifiers. *) - | Promote : promotion -> outgoing message - (** Promote files produced by a build rule into the cache. *) - | Hint : Digest.t list -> outgoing message - (** The cache daemon a rule is going to be built *) - | Dedup : File.t -> incoming message - (** Inform Dune that a file that was previously promoted can now be - replaced by a hardlink to the corresponding file stored in cache. *) diff --git a/src/cache_daemon/cache_daemon.ml b/src/cache_daemon/cache_daemon.ml deleted file mode 100644 index 81a6a5726ec..00000000000 --- a/src/cache_daemon/cache_daemon.ml +++ /dev/null @@ -1,341 +0,0 @@ -module Evt = Event -module Utils = Utils -open Stdune -module Log = Dune_util.Log -open Utils -open Cache.Messages -open Result.O - -type client = - { fd : Unix.file_descr - ; peer : Unix.sockaddr - ; input : in_channel - ; output : out_channel - ; common_metadata : Sexp.t list - ; cache : Cache.Local.t - ; version : version - } - -let default_port_file () = - let runtime_dir = - match Xdg.runtime_dir with - | Some p -> Path.relative (Path.of_string p) "dune-cache-daemon" - | None -> - (* The runtime directory is 0700 owned by the user for security reasons. - Defaulting to a directory in the dune cache root makes sense in that - regard, since if someone has access to this directory, it has access to - the cache content, and having access to the socket does not make a - difference. *) - Path.relative (Cache.Local.default_root ()) "runtime" - in - Path.L.relative runtime_dir [ "dune-cache-daemon"; "port" ] - -let max_port_size = 1024 - -let check_port_file ?(close = true) p = - let p = Path.to_string p in - match Result.try_with (fun () -> Unix.openfile p [ Unix.O_RDONLY ] 0o600) with - | Result.Ok fd -> - let f () = - Daemonize.retry (fun () -> - match Fcntl.lock_get fd Fcntl.Write with - | None -> Some None - | Some (Fcntl.Read, pid) -> Some (Some pid) - | Some (Fcntl.Write, _) -> None) - |> Result.map_error ~f:(fun m -> Failure m) - >>| Option.map ~f:(fun pid -> - let buf = Bytes.make max_port_size ' ' in - let read = Unix.read fd buf 0 max_port_size in - (Bytes.sub_string buf ~pos:0 ~len:read, pid, fd)) - and finally () = if close then Unix.close fd in - Exn.protect ~f ~finally - | Result.Error (Unix.Unix_error (Unix.ENOENT, _, _)) -> Result.Ok None - | Result.Error e -> Result.Error e - -let send_sexp output sexp = - Csexp.to_channel output sexp; - flush output - -let send version output message = - send_sexp output (sexp_of_message version message) - -module ClientsKey = struct - type t = Unix.file_descr - - let compare a b = Ordering.of_int (Stdlib.compare a b) - - let to_dyn _ = Dyn.Opaque -end - -module Clients = Map.Make (ClientsKey) - -type config = - { exit_no_client : bool - ; duplication_mode : Cache.Duplication_mode.t option - } - -type event = - | Stop - | New_client of Unix.file_descr * Unix.sockaddr - | Client_left of Unix.file_descr - -type t = - { root : Path.t option - ; mutable socket : Unix.file_descr option - ; mutable clients : (client * Thread.t) Clients.t - ; mutable endpoint : string option - ; mutable accept_thread : Thread.t option - ; mutable trim_thread : Thread.t option - ; config : config - ; events : event Evt.channel - ; cache : Cache.Local.t - } - -exception Error of string - -let make ?root ~config () : t = - match - Cache.Local.make ?root ~duplication_mode:Cache.Duplication_mode.Hardlink - ~command_handler:ignore () - with - | Result.Error msg -> User_error.raise [ Pp.text msg ] - | Result.Ok cache -> - { root - ; socket = None - ; clients = Clients.empty - ; endpoint = None - ; accept_thread = None - ; trim_thread = None - ; config - ; events = Evt.new_channel () - ; cache - } - -let getsockname = function - | Unix.ADDR_UNIX _ -> - User_error.raise - [ Pp.textf "got a Unix socket connection on our TCP socket ?" ] - | Unix.ADDR_INET (addr, port) -> (addr, port) - -let peer_name s = - let addr, port = getsockname s in - Printf.sprintf "%s:%d" (Unix.string_of_inet_addr addr) port - -let stop daemon = Evt.sync (Evt.send daemon.events Stop) - -let versions_supported_by_dune : version list = [ { major = 1; minor = 2 } ] - -let endpoint m = m.endpoint - -let client_handle version output = function - | Cache.Dedup f -> send version output (Dedup f) - -let client_thread (events, (client : client)) = - try - let handle_cmd (client : client) sexp = - let* msg = outgoing_message_of_sexp client.version sexp in - match msg with - | Hint _ -> Result.Ok client - | Promote { duplication; repository; files; key; metadata } -> - let+ () = - Cache.Local.promote client.cache files key - (metadata @ client.common_metadata) - ~repository ~duplication - in - client - | SetBuildRoot root -> - let+ cache = Cache.Local.set_build_dir client.cache root in - { client with cache } - | SetCommonMetadata metadata -> - Result.ok { client with common_metadata = metadata } - | SetRepos repositories -> - let+ cache = Cache.Local.with_repositories client.cache repositories in - { client with cache } - in - let input = client.input in - let f () = - Log.info [ Pp.textf "accept client: %s" (peer_name client.peer) ]; - let rec handle client = - match Csexp.input_opt input with - | Error msg -> - Log.info - [ Pp.textf "%s: parse error: %s" (peer_name client.peer) msg ] - | Ok None -> Log.info [ Pp.textf "%s: ended" (peer_name client.peer) ] - | Ok (Some cmd) -> ( - Log.info - [ Pp.textf "%s: received command: %s" (peer_name client.peer) - (Sexp.to_string cmd) - ]; - match handle_cmd client cmd with - | Result.Error e -> - Log.info - [ Pp.textf "%s: command error: %s" (peer_name client.peer) e ]; - handle client - | Result.Ok client -> handle client) - in - handle client - and finally () = - (try Unix.shutdown client.fd Unix.SHUTDOWN_ALL with - | Unix.Unix_error (Unix.ENOTCONN, _, _) -> ()); - Unix.close client.fd; - Evt.sync (Evt.send events (Client_left client.fd)) - in - try Exn.protect ~f ~finally with - | Unix.Unix_error (Unix.EBADF, _, _) -> - Log.info [ Pp.textf "%s: ended" (peer_name client.peer) ] - | Sys_error msg -> - Log.info [ Pp.textf "%s: ended: %s" (peer_name client.peer) msg ] - with - | Code_error.E e as exn -> - Log.info - [ (let open Pp.O in - Pp.textf "%s: fatal error: " (peer_name client.peer) - ++ Dyn.pp (Code_error.to_dyn e)) - ]; - raise exn - -let run ?(port_f = ignore) ?(port = 0) daemon = - let trim_thread ~max_overhead_size period cache = - let rec trim () = - Unix.sleep period; - let () = - match - let overhead_size = Cache.Local.overhead_size cache in - if overhead_size > max_overhead_size then ( - let goal = Int64.sub overhead_size max_overhead_size in - Log.info [ Pp.textf "trimming %Li bytes" goal ]; - Some (Cache.Local.trim cache ~goal) - ) else - None - with - | Some { trimmed_bytes } -> - Log.info [ Pp.textf "trimming freed %Li bytes" trimmed_bytes ] - | None -> Log.info [ Pp.text "skip trimming" ] - in - trim () - in - trim () - in - let rec accept_thread sock = - let rec accept () = - try Unix.accept sock with - | Unix.Unix_error (Unix.EINTR, _, _) -> (accept [@tailcall]) () - in - let fd, peer = accept () in - (try Evt.sync (Evt.send daemon.events (New_client (fd, peer))) with - | Unix.Unix_error (Unix.EBADF, _, _) -> ()); - (accept_thread [@tailcall]) sock - in - let f () = - let+ trim_period = - Option.value - ~default:(Result.Ok (10 * 60)) - (Option.map ~f:int_of_string - (Env.get Env.initial "DUNE_CACHE_TRIM_PERIOD")) - and+ max_overhead_size = - Option.value ~default:(Result.Ok 10_000_000_000L) - (Option.map ~f:int64_of_string - (* CR-someday amokhov: the term "size" is ambiguous, it would be - better to switch to a more precise one, e.g. "max overhead size". *) - (Env.get Env.initial "DUNE_CACHE_TRIM_SIZE")) - in - let sock = Unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in - daemon.socket <- Some sock; - Unix.bind sock (Unix.ADDR_INET (Unix.inet_addr_of_string "127.0.0.1", port)); - let addr, port = getsockname (Unix.getsockname sock) in - let endpoint = - Printf.sprintf "%s:%i" (Unix.string_of_inet_addr addr) port - in - daemon.endpoint <- Some endpoint; - port_f endpoint; - Unix.listen sock 1024; - daemon.accept_thread <- Some (Thread.create accept_thread sock); - daemon.trim_thread <- - Some - (Thread.create - (trim_thread ~max_overhead_size trim_period) - daemon.cache); - let rec handle () = - let stop () = - match daemon.socket with - | Some fd -> - daemon.socket <- None; - let clean f = ignore (Clients.iter ~f daemon.clients) in - clean (fun (client, _) -> Unix.shutdown client.fd Unix.SHUTDOWN_ALL); - clean (fun (_, tid) -> Thread.join tid); - clean (fun (client, _) -> Unix.close client.fd); - Unix.close fd - | _ -> Log.info [ Pp.text "stop" ] - in - (match Evt.sync (Evt.receive daemon.events) with - | Stop -> stop () - | New_client (fd, peer) -> ( - let output = Unix.out_channel_of_descr fd - and input = Unix.in_channel_of_descr fd in - match - let* version = - negotiate_version ~versions_supported_by_dune fd input output - in - let client = - { fd - ; peer - ; input - ; output - ; version - ; common_metadata = [] - ; cache = - (match - Cache.Local.make ?root:daemon.root - ~duplication_mode:Cache.Duplication_mode.Hardlink - ~command_handler:(client_handle version output) - () - with - | Result.Ok m -> m - | Result.Error e -> User_error.raise [ Pp.textf "%s" e ]) - } - in - let tid = Thread.create client_thread (daemon.events, client) in - let+ clients = - Result.map_error - ~f:(fun _ -> "duplicate socket") - (Clients.add daemon.clients client.fd (client, tid)) - in - daemon.clients <- clients - with - | Result.Ok () -> () - | Result.Error msg -> Log.info [ Pp.textf "reject client: %s" msg ]) - | Client_left fd -> - daemon.clients <- Clients.remove daemon.clients fd; - if daemon.config.exit_no_client && Clients.is_empty daemon.clients then - stop ()); - if Option.is_some daemon.socket then (handle [@tailcall]) () - in - handle () - in - match f () with - | Result.Ok () -> () - | Result.Error msg -> User_error.raise [ Pp.text msg ] - | exception Unix.Unix_error (errno, f, _) -> - User_error.raise - [ Pp.textf "unable to %s: %s\n" f (Unix.error_message errno) ] - -let daemon ~root ~config started = - Path.mkdir_p root; - let log_file = Path.relative root "log" in - Log.init ~file:(This log_file) (); - let daemon = make ~root ~config () in - (* Event blocks signals when waiting. Use a separate thread to catch signals. *) - let signal_handler s = - Log.info [ Pp.textf "caught signal %i, exiting" s ]; - ignore (Thread.create stop daemon) - and signals = [ Sys.sigint; Sys.sigterm ] in - let rec signals_handler () = - signal_handler (Thread.wait_signal signals); - signals_handler () - in - ignore (Thread.sigmask Unix.SIG_BLOCK signals); - ignore (Thread.create signals_handler ()); - try run ~port_f:started daemon with - | Error s -> - Printf.fprintf stderr "%s: fatal error: %s\n%!" Sys.argv.(0) s; - exit 1 diff --git a/src/cache_daemon/cache_daemon.mli b/src/cache_daemon/cache_daemon.mli deleted file mode 100644 index fd8a79abb36..00000000000 --- a/src/cache_daemon/cache_daemon.mli +++ /dev/null @@ -1,28 +0,0 @@ -open Stdune -module Utils = Utils - -type t - -exception Error of string - -type config = - { exit_no_client : bool - ; duplication_mode : Cache.Duplication_mode.t option - } - -val make : ?root:Path.t -> config:config -> unit -> t - -val default_port_file : unit -> Path.t - -val check_port_file : - ?close:bool - -> Path.t - -> ((string * int * Unix.file_descr) option, exn) Result.t - -val run : ?port_f:(string -> unit) -> ?port:int -> t -> unit - -val stop : t -> unit - -val endpoint : t -> string option - -val daemon : root:Path.t -> config:config -> (string -> unit) -> unit diff --git a/src/cache_daemon/dune b/src/cache_daemon/dune deleted file mode 100644 index 8abcf0ac976..00000000000 --- a/src/cache_daemon/dune +++ /dev/null @@ -1,3 +0,0 @@ -(library - (name cache_daemon) - (libraries dune_util xdg threads.posix cache stdune csexp)) diff --git a/src/cache_daemon/utils.ml b/src/cache_daemon/utils.ml deleted file mode 100644 index 4809f000bed..00000000000 --- a/src/cache_daemon/utils.ml +++ /dev/null @@ -1,23 +0,0 @@ -open Stdune - -let int_of_string ?where s = - match Int.of_string s with - | Some s -> Ok s - | None -> - Result.Error - (Printf.sprintf "invalid integer%s: %s" - (match where with - | Some l -> " in " ^ l - | None -> "") - s) - -let int64_of_string ?where s = - match Int64.of_string s with - | res -> Ok res - | exception _exn -> - Result.Error - (Printf.sprintf "invalid 64-bit integer%s: %s" - (match where with - | Some l -> " in " ^ l - | None -> "") - s) diff --git a/src/dune_cache/local.ml b/src/dune_cache/local.ml index 0595901f6fe..b11770df344 100644 --- a/src/dune_cache/local.ml +++ b/src/dune_cache/local.ml @@ -26,14 +26,6 @@ module Store_artifacts_result = struct | (Error _ | Will_not_store_due_to_non_determinism _) as res -> res end -module Check_artifacts_result = struct - type t = - | Missing - | Already_present of (Path.Build.t * Digest.t) list - | Error of exn - | Non_determinism_detected of Sexp.t -end - module Target = struct type t = { path : Path.Build.t diff --git a/src/dune_cache/local.mli b/src/dune_cache/local.mli index dd2dd7ac778..0f7544da3a1 100644 --- a/src/dune_cache/local.mli +++ b/src/dune_cache/local.mli @@ -7,7 +7,10 @@ The files in the cache are assumed to be immutable, which is mostly enforced by removing write permissions to any file in the cache, but of course anyone - could add back write permissions and corrupt the cache. *) + could add back write permissions and corrupt the cache. + + See also the [dune_cache_storage] library that provides more functionality + for manipulating the store, such as accessing individual metadata entries. *) (* In case we do run into the problem of corrupted cache: we could actually store the mtime in the metadata and complain if it's not what we expected. *) @@ -27,17 +30,6 @@ module Store_artifacts_result : sig | Will_not_store_due_to_non_determinism of Sexp.t end -module Check_artifacts_result : sig - type t = - | Missing - | Already_present of (Path.Build.t * Digest.t) list - | Error of exn - (** [Error _] can happen due to genuine problems (cannot parse internal - cache files) or harmless ones (race with a concurrent change to the - cache). *) - | Non_determinism_detected of Sexp.t -end - module Target : sig type t @@ -50,7 +42,11 @@ module Target : sig val create : Path.Build.t -> t option end -(** The [compute_digest] function is passed explicitly because the caller might +(** Store targets produced by a rule with a given digest. If successful, this + operation will create one metadata entry plus one file entry per target in + the cache. + + The [compute_digest] function is passed explicitly because the caller might want to memoize and/or throttle file digest computations. *) val store_artifacts : mode:Dune_cache_storage.Mode.t @@ -59,6 +55,9 @@ val store_artifacts : -> Target.t list -> Store_artifacts_result.t Fiber.t +(** Restore targets produced by a rule with a given digest. If successful, this + operation will restore the targets on disk, in the [target_dir] directory, + and will also return their paths and digests. *) val restore_artifacts : mode:Dune_cache_storage.Mode.t -> rule_digest:Digest.t diff --git a/src/dune_cache/trimmer.ml b/src/dune_cache/trimmer.ml new file mode 100644 index 00000000000..892f29965ee --- /dev/null +++ b/src/dune_cache/trimmer.ml @@ -0,0 +1,108 @@ +open Stdune +open Dune_cache_storage + +module Trimming_result = struct + type t = { trimmed_bytes : int64 } + + let empty = { trimmed_bytes = 0L } + + (* CR-someday amokhov: Right now Dune doesn't support large (>1Gb) files on + 32-bit platforms due to the pervasive use of [int] for representing + individual file sizes. It's not fundamentally difficult to switch to + [int64], so we should do it if it becomes a real issue. *) + let add t ~(bytes : int) = + { trimmed_bytes = Int64.add t.trimmed_bytes (Int64.of_int bytes) } +end + +let trim_broken_metadata_entries ~trimmed_so_far = + List.fold_left Version.Metadata.all ~init:trimmed_so_far + ~f:(fun trimmed_so_far version -> + let metadata_entries = Layout.Versioned.list_metadata_entries version in + let file_path = + Layout.Versioned.file_path (Version.Metadata.file_version version) + in + List.fold_left metadata_entries ~init:trimmed_so_far + ~f:(fun trimmed_so_far (path, rule_or_action_digest) -> + let should_be_removed = + match Metadata.Versioned.restore version ~rule_or_action_digest with + | Not_found_in_cache -> + (* A concurrent process must have removed this metadata file. No + need to try removing such "phantom" metadata files again. *) + false + | Error _exn -> + (* If a metadata file can't be restored, let's trim it. *) + true + | Restored metadata -> ( + match metadata with + | Metadata.Value _ -> + (* We do not expect to see any value entries in the cache. Let's + keep them untrimmed for now. *) + false + | Metadata.Artifacts { entries; _ } -> + List.exists entries + ~f:(fun { Artifacts.Metadata_entry.file_digest; _ } -> + let reference = file_path ~file_digest in + not (Path.exists reference))) + in + match should_be_removed with + | true -> + let bytes = (Path.stat path).st_size in + Path.unlink_no_err path; + Trimming_result.add trimmed_so_far ~bytes + | false -> trimmed_so_far)) + +let garbage_collect () = + trim_broken_metadata_entries ~trimmed_so_far:Trimming_result.empty + +let files_in_cache_for_all_supported_versions () = + List.concat_map Version.File.all ~f:(fun file_version -> + Layout.Versioned.list_file_entries file_version) + +(* We call a cached file "unused" if there are currently no hard links to it + from build directories. Note that [st_nlink] can return 0 if the file has + been removed since we scanned the tree -- in this case we do not want to + claim that its removal is the result of cache trimming and we, therefore, + skip it while trimming. *) +let file_exists_and_is_unused ~stats = stats.Unix.st_nlink = 1 + +let trim ~goal = + let files = files_in_cache_for_all_supported_versions () |> List.map ~f:fst in + let f path = + let stats = Path.stat path in + if file_exists_and_is_unused ~stats then + Some (path, stats.st_size, stats.st_ctime) + else + None + and compare (_, _, t1) (_, _, t2) = Poly.compare t1 t2 in + let files = List.sort ~compare (List.filter_map ~f files) + and delete (trimmed_so_far : Trimming_result.t) (path, bytes, _) = + if trimmed_so_far.trimmed_bytes >= goal then + trimmed_so_far + else ( + Path.unlink path; + (* CR-someday amokhov: We should really be using block_size * #blocks + because that's how much we save actually. *) + Trimming_result.add trimmed_so_far ~bytes + ) + in + let trimmed_so_far = + List.fold_left ~init:Trimming_result.empty ~f:delete files + in + trim_broken_metadata_entries ~trimmed_so_far + +let overhead_size () = + let files = files_in_cache_for_all_supported_versions () |> List.map ~f:fst in + let stats = + let f p = + try + let stats = Path.stat p in + if file_exists_and_is_unused ~stats then + Int64.of_int stats.st_size + else + 0L + with + | Unix.Unix_error (Unix.ENOENT, _, _) -> 0L + in + List.map ~f files + in + List.fold_left ~f:Int64.add ~init:0L stats diff --git a/src/dune_cache/trimmer.mli b/src/dune_cache/trimmer.mli new file mode 100644 index 00000000000..2ef16c23859 --- /dev/null +++ b/src/dune_cache/trimmer.mli @@ -0,0 +1,27 @@ +(** Basic functionality for trimming Dune cache. *) + +(* CR-someday amokhov: Reuse the testsuite and cache telemetry functionality + from Jenga's trimmer. *) + +module Trimming_result : sig + type t = { trimmed_bytes : int64 } +end + +(** Trim the cache by removing a set of unused files so that the total freed + space is greater or equal to the specificed [goal], in bytes. A cached file + is "unused" if there are no hard links to it from build directories. + + Unused files are removed in the order of last access, i.e. we first remove + the least recently accessed one. + + We also remove all metadata files whose file references got broken during + the trimming. *) +val trim : goal:int64 -> Trimming_result.t + +(** Purge cache metadata files that can't be read or contain references to + non-existing files. *) +val garbage_collect : unit -> Trimming_result.t + +(** Compute the "overhead size" of the cache, that is, the total size of files + in the cache that are not hardlinked from any build directory. *) +val overhead_size : unit -> int64 diff --git a/src/dune_cache_storage/dune_cache_storage.ml b/src/dune_cache_storage/dune_cache_storage.ml index 3f15ef88acf..44bf596d4c0 100644 --- a/src/dune_cache_storage/dune_cache_storage.ml +++ b/src/dune_cache_storage/dune_cache_storage.ml @@ -2,6 +2,7 @@ open Stdune module Layout = Layout module Mode = Mode module Util = Util +module Version = Version (* See [doc/dev/cache.md] for design and implementation notes. *) @@ -287,6 +288,11 @@ module Metadata = struct let restore ~metadata_path ~rule_or_action_digest = restore_metadata_file (metadata_path ~rule_or_action_digest) ~of_sexp + module Versioned = struct + let restore version = + restore ~metadata_path:(Layout.Versioned.metadata_path version) + end + let restore = restore ~metadata_path:Layout.metadata_path end diff --git a/src/dune_cache_storage/dune_cache_storage.mli b/src/dune_cache_storage/dune_cache_storage.mli index c5b28bf7c43..124fb791e4e 100644 --- a/src/dune_cache_storage/dune_cache_storage.mli +++ b/src/dune_cache_storage/dune_cache_storage.mli @@ -5,6 +5,7 @@ open Stdune module Layout = Layout module Mode = Mode module Util = Util +module Version = Version module Store_result : sig (** Outcomes are ordered in the order of severity. *) @@ -47,11 +48,18 @@ module Value : sig ; value_digest : Digest.t } + (** Restore value metadata produced by an action with a given digest. The + metadata is restored only in memory, i.e. no new files will be created. *) val restore : action_digest:Digest.t -> t Restore_result.t end + (** Store a [string] value produced by an action with a given digest. If + successful, this operation will create one metadata entry and one value + entry in the cache. *) val store : mode:Mode.t -> action_digest:Digest.t -> string -> Store_result.t + (** Restore a [string] value produced by an action with a given digest. The + value is restored only in memory, i.e. no new files will be created. *) val restore : action_digest:Digest.t -> string Restore_result.t end @@ -70,20 +78,36 @@ module Artifacts : sig ; entries : Metadata_entry.t list } + (** Store artifacts metadata produced by a rule with a given digest. If + successful, this operation will create one metadata entry in the cache. *) val store : t -> mode:Mode.t -> rule_digest:Digest.t -> Store_result.t + (** Restore artifacts metadata produced by a rule with a given digest. The + metadata is restored only in memory, i.e. no new files will be created. *) val restore : rule_digest:Digest.t -> t Restore_result.t end + (** List entries of a metadata file produced by a rule with a given digest. + The list of entries is restored only in memory, i.e. no new files will be + created. *) val list : rule_digest:Digest.t -> Metadata_entry.t list Restore_result.t end +(** Some generic operations on metadata files. *) module Metadata : sig type t = | Artifacts of Artifacts.Metadata_file.t | Value of Value.Metadata_file.t + (** Restore metadata produced by a rule or action with a given digest. The + metadata is restored only in memory, i.e. no new files will be created. *) val restore : rule_or_action_digest:Digest.t -> t Restore_result.t + + module Versioned : sig + (** Same as the unversioned function but supports old metadata versions. *) + val restore : + Version.Metadata.t -> rule_or_action_digest:Digest.t -> t Restore_result.t + end end (** [with_temp_path ?prefix ~suffix f] creates a file in [Layout.temp_path], diff --git a/src/dune_cache_storage/layout.ml b/src/dune_cache_storage/layout.ml index 037eaaa0fdd..bd140b57f9d 100644 --- a/src/dune_cache_storage/layout.ml +++ b/src/dune_cache_storage/layout.ml @@ -16,28 +16,74 @@ let root_path = let ( / ) = Path.relative -(* We version metadata and actual cache content separately. *) -let metadata_storage_path = root_path / "meta" / "v5" - -let file_storage_path = root_path / "files" / "v4" - -let value_storage_path = root_path / "values" / "v3" +let temp_path = root_path / "temp" let cache_path ~dir ~hex = let two_first_chars = sprintf "%c%c" hex.[0] hex.[1] in dir / two_first_chars / hex -let metadata_path ~rule_or_action_digest = - cache_path ~dir:metadata_storage_path - ~hex:(Digest.to_string rule_or_action_digest) +(* List all entries in a given storage directory. *) +let list_entries ~storage = + let open Result.O in + let entries dir = + match + String.length dir = 2 && String.for_all ~f:Char.is_lowercase_hex dir + with + | false -> + (* Ignore directories whose name isn't a two-character hex value. *) + Ok [] + | true -> + let dir = storage / dir in + Path.readdir_unsorted dir + >>| List.filter_map ~f:(fun entry_name -> + match Digest.from_hex entry_name with + | None -> + (* Ignore entries whose names are not hex values. *) + None + | Some digest -> Some (dir / entry_name, digest)) + in + match Path.readdir_unsorted storage >>= Result.List.concat_map ~f:entries with + | Ok res -> res + | Error ENOENT -> [] + | Error e -> User_error.raise [ Pp.text (Unix.error_message e) ] -let value_path ~value_digest = - cache_path ~dir:value_storage_path ~hex:(Digest.to_string value_digest) +module Versioned = struct + let metadata_storage_path t = + root_path / "meta" / Version.Metadata.to_string t -let file_path ~file_digest = - cache_path ~dir:file_storage_path ~hex:(Digest.to_string file_digest) + let file_storage_path t = root_path / "files" / Version.File.to_string t -let temp_path = root_path / "temp" + let value_storage_path t = root_path / "values" / Version.Value.to_string t + + let metadata_path t = + let dir = metadata_storage_path t in + fun ~rule_or_action_digest -> + cache_path ~dir ~hex:(Digest.to_string rule_or_action_digest) + + let file_path t = + let dir = file_storage_path t in + fun ~file_digest -> cache_path ~dir ~hex:(Digest.to_string file_digest) + + let value_path t = + let dir = value_storage_path t in + fun ~value_digest -> cache_path ~dir ~hex:(Digest.to_string value_digest) + + let list_metadata_entries t = list_entries ~storage:(metadata_storage_path t) + + let list_file_entries t = list_entries ~storage:(file_storage_path t) + + let list_value_entries t = list_entries ~storage:(value_storage_path t) +end + +let metadata_storage_path = + Versioned.metadata_storage_path Version.Metadata.current + +let metadata_path = Versioned.metadata_path Version.Metadata.current + +let file_storage_path = Versioned.file_storage_path Version.File.current + +let file_path = Versioned.file_path Version.File.current + +let value_storage_path = Versioned.value_storage_path Version.Value.current -let root_path_subdirectories = - [ metadata_storage_path; file_storage_path; value_storage_path; temp_path ] +let value_path = Versioned.value_path Version.Value.current diff --git a/src/dune_cache_storage/layout.mli b/src/dune_cache_storage/layout.mli index cc9ede68cd6..ba86c40240c 100644 --- a/src/dune_cache_storage/layout.mli +++ b/src/dune_cache_storage/layout.mli @@ -53,5 +53,31 @@ val value_path : value_digest:Digest.t -> Path.t needed when storing new artifacts in the cache. See [write_atomically]. *) val temp_path : Path.t -(** All cache directories. *) -val root_path_subdirectories : Path.t list +(** Support for all versions of the layout, used by the cache trimmer. The + functions provided by the top module are obtained by a partial application + of the corresponding function defined here to a suitable current version. *) +module Versioned : sig + val metadata_storage_path : Version.Metadata.t -> Path.t + + val metadata_path : + Version.Metadata.t -> rule_or_action_digest:Digest.t -> Path.t + + val file_storage_path : Version.File.t -> Path.t + + val file_path : Version.File.t -> file_digest:Digest.t -> Path.t + + val value_storage_path : Version.Value.t -> Path.t + + val value_path : Version.Value.t -> value_digest:Digest.t -> Path.t + + (** List all metadata entries currently stored in the cache. Note that there + is no guarantee that the result is up-to-date, since files can be added or + removed concurrently by other processes. *) + val list_metadata_entries : Version.Metadata.t -> (Path.t * Digest.t) list + + (** List [list_metadata_entries] but for file entries. *) + val list_file_entries : Version.File.t -> (Path.t * Digest.t) list + + (** List [list_metadata_entries] but for value entries. *) + val list_value_entries : Version.Value.t -> (Path.t * Digest.t) list +end diff --git a/src/dune_cache_storage/version.ml b/src/dune_cache_storage/version.ml new file mode 100644 index 00000000000..8c56940e027 --- /dev/null +++ b/src/dune_cache_storage/version.ml @@ -0,0 +1,52 @@ +module File = struct + type t = + | V3 + | V4 + + let current = V4 + + let all = [ V3; V4 ] + + let to_string = function + | V3 -> "v3" + | V4 -> "v4" +end + +module Value = struct + type t = V3 + + let current = V3 + + let all = [ V3 ] + + let to_string = function + | V3 -> "v3" +end + +module Metadata = struct + type t = + | V3 + | V4 + | V5 + + let current = V5 + + let all = [ V3; V4; V5 ] + + let to_string = function + | V3 -> "v3" + | V4 -> "v4" + | V5 -> "v5" + + let file_version = function + | V3 -> File.V3 + | V4 + | V5 -> + File.V4 + + let value_version = function + | V3 + | V4 + | V5 -> + Value.V3 +end diff --git a/src/dune_cache_storage/version.mli b/src/dune_cache_storage/version.mli new file mode 100644 index 00000000000..d14d2a0648d --- /dev/null +++ b/src/dune_cache_storage/version.mli @@ -0,0 +1,47 @@ +(** We occasionally need to change the format and layout of the cache and some + systems, e.g. the cache trimmer, provide support for all previous versions. + + Cache metadata and entries are versioned separately and this module keeps + track of all historical versions. *) + +module File : sig + type t = + | V3 + | V4 + + val current : t + + val all : t list + + val to_string : t -> string +end + +module Value : sig + type t = V3 + + val current : t + + val all : t list + + val to_string : t -> string +end + +module Metadata : sig + type t = + | V3 + | V4 + | V5 + + val current : t + + val all : t list + + val to_string : t -> string + + (** Metadata entries contain references to file entries. This function links + the two versions. We guarantee that [file_version current = File.current]. *) + val file_version : t -> File.t + + (** Like [file_version] but for value entries. *) + val value_version : t -> Value.t +end diff --git a/src/dune_engine/cached_digest.ml b/src/dune_engine/cached_digest.ml index 8e16779ec4b..79df01b4931 100644 --- a/src/dune_engine/cached_digest.ml +++ b/src/dune_engine/cached_digest.ml @@ -172,18 +172,16 @@ let refresh_and_chmod fn = - if it is in the build directory, then we expect that the rule producing this file will have taken core of chmodding it *) Path.stat fn - | _ -> ( - match Cache.cachable stats.st_kind with - | true -> - (* We remove write permissions to uniformize behavior regardless of - whether the cache is activated. No need to be zealous in case the - file is not cached anyway. See issue #3311. *) - let perm = - Path.Permissions.remove ~mode:Path.Permissions.write stats.st_perm - in - Path.chmod ~mode:perm fn; - { stats with st_perm = perm } - | false -> stats) + | Unix.S_REG -> + (* We remove write permissions to uniformize behavior regardless of + whether the cache is activated. No need to be zealous in case the file + is not cached anyway. See issue #3311. *) + let perm = + Path.Permissions.remove ~mode:Path.Permissions.write stats.st_perm + in + Path.chmod ~mode:perm fn; + { stats with st_perm = perm } + | _ -> stats in refresh_ stats fn diff --git a/src/dune_engine/dune b/src/dune_engine/dune index 5bb1726b333..768112b1449 100644 --- a/src/dune_engine/dune +++ b/src/dune_engine/dune @@ -14,8 +14,6 @@ threads.posix opam_file_format dune_lang - cache_daemon - cache dune_cache dune_cache_storage dune_glob diff --git a/test/blackbox-tests/test-cases/dune-cache/trim.t/run.t b/test/blackbox-tests/test-cases/dune-cache/trim.t/run.t index 6e80d01c5a9..8b58951322c 100644 --- a/test/blackbox-tests/test-cases/dune-cache/trim.t/run.t +++ b/test/blackbox-tests/test-cases/dune-cache/trim.t/run.t @@ -47,11 +47,13 @@ the current digests for both files match those computed by Jenga. ./5e/5e5bb3a0ec0e689e19a59c3ee3d7fca8:content ./62/6274851067c88e9990e912be27cce386:content -Move all current entries to v3 to test trimming of old versions of cache. +Move all current entries to v3 and v4 to test trimming of old versions of cache. $ mkdir "$PWD/.xdg-cache/dune/db/files/v3" $ mkdir "$PWD/.xdg-cache/dune/db/meta/v3" + $ mkdir "$PWD/.xdg-cache/dune/db/meta/v4" $ mv "$PWD/.xdg-cache/dune/db/files/v4"/* "$PWD/.xdg-cache/dune/db/files/v3" + $ cp -r "$PWD/.xdg-cache/dune/db/meta/v5"/* "$PWD/.xdg-cache/dune/db/meta/v4" $ mv "$PWD/.xdg-cache/dune/db/meta/v5"/* "$PWD/.xdg-cache/dune/db/meta/v3" Build some more targets. @@ -83,15 +85,21 @@ entries uniformly. $ dune_cmd stat size "$PWD/.xdg-cache/dune/db/meta/v5/71/71a631749bd743e4c107ba109224c12f" 70 -Trimming the cache at this point should not remove anything because all file -entries are still hard-linked from the build directory. +Trimming the cache at this point should not remove any file entries because all +of them are still hard-linked from the build directory. However, we should trim +all metadata entries in [meta/v4] since they are broken: remember, we moved all +[files/v4] to [files/v3]. + $ find "$PWD/.xdg-cache/dune/db/meta/v4" -mindepth 2 -maxdepth 2 -type f | dune_cmd count-lines + 4 $ dune cache trim --trimmed-size 1B - Freed 0 bytes + Freed 287 bytes $ dune_cmd stat hardlinks _build/default/target_a 2 $ dune_cmd stat hardlinks _build/default/target_b 2 + $ find "$PWD/.xdg-cache/dune/db/meta/v4" -mindepth 2 -maxdepth 2 -type f | dune_cmd count-lines + 0 If we unlink a file in the build tree, then the corresponding file entry will be trimmed. @@ -107,10 +115,29 @@ trimmed. $ test -e _build/default/beacon_a $ ! test -e _build/default/beacon_b - $ reset +Now let's remove the remaining targets, left from the very first build and rerun +the trimmer. That will delete unused [files/v3] and the corresponding metadata +entries in [meta/v3]. + + $ rm -rf _build + $ find "$PWD/.xdg-cache/dune/db/files/v3" -mindepth 2 -maxdepth 2 -type f | dune_cmd count-lines + 4 + $ find "$PWD/.xdg-cache/dune/db/meta/v3" -mindepth 2 -maxdepth 2 -type f | dune_cmd count-lines + 4 + +We hide the output for reproducibility: some files are executable and their +sizes might vary on different platforms + + $ dune cache trim --size 0B > /dev/null + + $ find "$PWD/.xdg-cache/dune/db/files/v3" -mindepth 2 -maxdepth 2 -type f | dune_cmd count-lines + 0 + $ find "$PWD/.xdg-cache/dune/db/meta/v3" -mindepth 2 -maxdepth 2 -type f | dune_cmd count-lines + 0 The cache deletes oldest files first. + $ reset $ dune build target_b $ dune_cmd wait-for-fs-clock-to-advance $ dune build target_a @@ -132,7 +159,8 @@ target_a: $ reset -When a cache entry becomes unused, its ctime is modified and will determine the order of trimming. +When a cache entry becomes unused, its ctime is modified and will determine the +order of trimming. $ dune build target_a target_b $ rm -f _build/default/beacon_a _build/default/target_a @@ -150,21 +178,6 @@ When a cache entry becomes unused, its ctime is modified and will determine the $ reset -Check background trimming. - - $ env -u DUNE_CACHE_EXIT_NO_CLIENT \ - > DUNE_CACHE_TRIM_SIZE=1 \ - > DUNE_CACHE_TRIM_PERIOD=1 \ - > dune cache start > /dev/null 2>&1 - $ dune build target_a - $ rm -f _build/default/target_a _build/default/beacon_a - $ sleep 2 - $ dune build target_a - $ test -e _build/default/beacon_a - $ dune cache stop - - $ reset - Check garbage collection: both multi_a and multi_b must be removed as they are part of the same rule. diff --git a/test/expect-tests/dune b/test/expect-tests/dune index 1b58c3af0ba..03b7944acd0 100644 --- a/test/expect-tests/dune +++ b/test/expect-tests/dune @@ -14,7 +14,6 @@ fiber dune_lang dune_config - cache memo ;; This is because of the (implicit_transitive_deps false) ;; in dune-project