From a0e9a3afd219a6d6fb6cf14e0701490c810cd63f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20Dimino?= Date: Thu, 21 Oct 2021 16:47:45 +0100 Subject: [PATCH] Copy/link deps outside of Sandbox.action (#5035) So that we can insert a step between the copying/linking of deps and the execution of the action. And refactor the sandboxing code. Signed-off-by: Jeremie Dimino --- otherlibs/stdune-unstable/io.ml | 45 +++++++++++ otherlibs/stdune-unstable/io.mli | 6 ++ otherlibs/stdune-unstable/path.mli | 3 + src/dune_engine/action.ml | 65 +++------------- src/dune_engine/action.mli | 11 +-- src/dune_engine/action_exec.ml | 46 +---------- src/dune_engine/build_system.ml | 95 +++++------------------ src/dune_engine/sandbox.ml | 119 +++++++++++++++++++++++++++++ src/dune_engine/sandbox.mli | 26 +++++++ 9 files changed, 230 insertions(+), 186 deletions(-) create mode 100644 src/dune_engine/sandbox.ml create mode 100644 src/dune_engine/sandbox.mli diff --git a/otherlibs/stdune-unstable/io.ml b/otherlibs/stdune-unstable/io.ml index fec1ba27994..9c0903c12fe 100644 --- a/otherlibs/stdune-unstable/io.ml +++ b/otherlibs/stdune-unstable/io.ml @@ -300,3 +300,48 @@ module String_path = Make (struct let to_string x = x end) + +let portable_symlink ~src ~dst = + if Stdlib.Sys.win32 then + copy_file ~src ~dst () + else + let src = + match Path.parent dst with + | None -> Path.to_string src + | Some from -> Path.reach ~from src + in + let dst = Path.to_string dst in + match Unix.readlink dst with + | target -> + if target <> src then ( + (* @@DRA Win32 remove read-only attribute needed when symlinking + enabled *) + Unix.unlink dst; + Unix.symlink src dst + ) + | exception _ -> Unix.symlink src dst + +let portable_hardlink ~src ~dst = + (* CR-someday amokhov: Instead of always falling back to copying, we could + detect if hardlinking works on Windows and if yes, use it. We do this in + the Dune cache implementation, so we can share some code. *) + match Stdlib.Sys.win32 with + | true -> copy_file ~src ~dst () + | false -> ( + let rec follow_symlinks name = + match Unix.readlink name with + | link_name -> + let name = Filename.concat (Filename.dirname name) link_name in + follow_symlinks name + | exception Unix.Unix_error (Unix.EINVAL, _, _) -> name + in + let src = follow_symlinks (Path.to_string src) in + let dst = Path.to_string dst in + try Unix.link src dst with + | Unix.Unix_error (Unix.EEXIST, _, _) -> + (* CR-someday amokhov: Investigate why we need to occasionally clear the + destination (we also do this in the symlink case above). Perhaps, the + list of dependencies may have duplicates? If yes, it may be better to + filter out the duplicates first. *) + Unix.unlink dst; + Unix.link src dst) diff --git a/otherlibs/stdune-unstable/io.mli b/otherlibs/stdune-unstable/io.mli index c8ce8271966..8bcb5a11334 100644 --- a/otherlibs/stdune-unstable/io.mli +++ b/otherlibs/stdune-unstable/io.mli @@ -17,3 +17,9 @@ val read_all : in_channel -> string include Io_intf.S with type path = Path.t module String_path : Io_intf.S with type path = string + +(** Symlink with fallback to copy on systems that don't support it. *) +val portable_symlink : src:Path.t -> dst:Path.t -> unit + +(** Hardlink with fallback to copy on systems that don't support it. *) +val portable_hardlink : src:Path.t -> dst:Path.t -> unit diff --git a/otherlibs/stdune-unstable/path.mli b/otherlibs/stdune-unstable/path.mli index 11f0dfa39f0..2346a557502 100644 --- a/otherlibs/stdune-unstable/path.mli +++ b/otherlibs/stdune-unstable/path.mli @@ -137,6 +137,9 @@ module Build : sig val append_local : t -> Local.t -> t + (** [append x y] is [append_local x (local y] *) + val append : t -> t -> t + module L : sig val relative : ?error_loc:Loc0.t -> t -> string list -> t end diff --git a/src/dune_engine/action.ml b/src/dune_engine/action.ml index 74093912396..c1f728e4ea9 100644 --- a/src/dune_engine/action.ml +++ b/src/dune_engine/action.ml @@ -78,10 +78,6 @@ end include Action_ast.Make (Prog) (Dpath) (Dpath.Build) (String_with_sexp) (Ast) -type path = Path.t - -type target = Path.Build.t - type string = String.t module For_shell = struct @@ -205,60 +201,17 @@ let rec is_dynamic = function | Format_dune_file _ -> false -let prepare_managed_paths ~link ~sandboxed deps = - let steps = - Path.Map.foldi deps ~init:[] ~f:(fun path _ acc -> - match Path.as_in_build_dir path with - | None -> - (* This can actually raise if we try to sandbox the "copy from source - dir" rules. There is no reason to do that though. *) - if Path.is_in_source_tree path then - Code_error.raise - "Action depends on source tree. All actions should depend on the \ - copies in build directory instead" - [ ("path", Path.to_dyn path) ]; - acc - | Some p -> link path (sandboxed p) :: acc) - in - Progn steps - -let link_function ~(mode : Sandbox_mode.some) : path -> target -> t = - let win32_error mode = - let mode = Sandbox_mode.to_string (Some mode) in - Code_error.raise - (sprintf - "Don't have %ss on win32, but [%s] sandboxing mode was selected. To \ - use emulation via copy, the [copy] sandboxing mode should be \ - selected." - mode mode) - [] - in - match mode with - | Symlink -> ( - match Sys.win32 with - | true -> win32_error mode - | false -> fun a b -> Symlink (a, b)) - | Copy -> fun a b -> Copy (a, b) - | Hardlink -> ( - match Sys.win32 with - | true -> win32_error mode - | false -> fun a b -> Hardlink (a, b)) - -let maybe_sandbox_path f p = +let maybe_sandbox_path sandbox p = match Path.as_in_build_dir p with | None -> p - | Some p -> Path.build (f p) - -let sandbox t ~sandboxed ~mode ~deps : t = - let link = link_function ~mode in - Progn - [ prepare_managed_paths ~sandboxed ~link deps - ; map t ~dir:Path.root - ~f_string:(fun ~dir:_ x -> x) - ~f_path:(fun ~dir:_ p -> maybe_sandbox_path sandboxed p) - ~f_target:(fun ~dir:_ -> sandboxed) - ~f_program:(fun ~dir:_ -> Result.map ~f:(maybe_sandbox_path sandboxed)) - ] + | Some p -> Path.build (Sandbox.map_path sandbox p) + +let sandbox t sandbox : t = + map t ~dir:Path.root + ~f_string:(fun ~dir:_ x -> x) + ~f_path:(fun ~dir:_ p -> maybe_sandbox_path sandbox p) + ~f_target:(fun ~dir:_ p -> Sandbox.map_path sandbox p) + ~f_program:(fun ~dir:_ p -> Result.map p ~f:(maybe_sandbox_path sandbox)) type is_useful = | Clearly_not diff --git a/src/dune_engine/action.mli b/src/dune_engine/action.mli index 9a6e1f299d0..f8a9bb96188 100644 --- a/src/dune_engine/action.mli +++ b/src/dune_engine/action.mli @@ -88,15 +88,8 @@ val empty : t (** Checks, if action contains a [Dynamic_run]. *) val is_dynamic : t -> bool -(** Return a sandboxed version of an action. It takes care of preparing deps in - the sandbox, but it does not copy the targets back out. It's the - responsibility of the caller to do that. *) -val sandbox : - t - -> sandboxed:(Path.Build.t -> Path.Build.t) - -> mode:Sandbox_mode.some - -> deps:_ Path.Map.t - -> t +(** Re-root all the paths in the action to their sandbox version *) +val sandbox : t -> Sandbox.t -> t type is_useful = | Clearly_not diff --git a/src/dune_engine/action_exec.ml b/src/dune_engine/action_exec.ml index b77e8b22a85..278c6b230c2 100644 --- a/src/dune_engine/action_exec.ml +++ b/src/dune_engine/action_exec.ml @@ -226,52 +226,10 @@ let rec exec t ~ectx ~eenv = Io.copy_file ~src ~dst (); Fiber.return Done | Symlink (src, dst) -> - (if Sys.win32 then - let dst = Path.build dst in - Io.copy_file ~src ~dst () - else - let src = - match Path.Build.parent dst with - | None -> Path.to_string src - | Some from -> - let from = Path.build from in - Path.reach ~from src - in - let dst = Path.Build.to_string dst in - match Unix.readlink dst with - | target -> - if target <> src then ( - (* @@DRA Win32 remove read-only attribute needed when symlinking - enabled *) - Unix.unlink dst; - Unix.symlink src dst - ) - | exception _ -> Unix.symlink src dst); + Io.portable_symlink ~src ~dst:(Path.build dst); Fiber.return Done | Hardlink (src, dst) -> - (* CR-someday amokhov: Instead of always falling back to copying, we could - detect if hardlinking works on Windows and if yes, use it. We do this in - the Dune cache implementation, so we can share some code. *) - (match Sys.win32 with - | true -> Io.copy_file ~src ~dst:(Path.build dst) () - | false -> ( - let rec follow_symlinks name = - match Unix.readlink name with - | link_name -> - let name = Filename.concat (Filename.dirname name) link_name in - follow_symlinks name - | exception Unix.Unix_error (Unix.EINVAL, _, _) -> name - in - let src = follow_symlinks (Path.to_string src) in - let dst = Path.Build.to_string dst in - try Unix.link src dst with - | Unix.Unix_error (Unix.EEXIST, _, _) -> - (* CR-someday amokhov: Investigate why we need to occasionally clear the - destination (we also do this in the symlink case above). Perhaps, the - list of dependencies may have duplicates? If yes, it may be better to - filter out the duplicates first. *) - Unix.unlink dst; - Unix.link src dst)); + Io.portable_hardlink ~src ~dst:(Path.build dst); Fiber.return Done | Copy_and_add_line_directive (src, dst) -> Io.with_file_in src ~f:(fun ic -> diff --git a/src/dune_engine/build_system.ml b/src/dune_engine/build_system.ml index b24455e8b32..b3faab0683b 100644 --- a/src/dune_engine/build_system.ml +++ b/src/dune_engine/build_system.ml @@ -607,26 +607,6 @@ let compute_target_digests_or_raise_error exec_params ~loc targets = (pp_path (Path.build target) :: error)) ]) -let sandbox_dir = Path.Build.relative Path.Build.root ".sandbox" - -let init_sandbox = - let init = - lazy - (let dir = Path.build sandbox_dir in - Path.mkdir_p (Path.relative dir ".hg"); - (* We create an empty [.git] file to prevent git from escaping the - sandbox. It will choke on this empty .git and report an error about - its format being invalid. *) - Io.write_file (Path.relative dir ".git") ""; - (* We create a [.hg/requires] file to prevent hg from escaping the - sandbox. It will complain that "Escaping the Dune sandbox" is an - unkown feature. *) - Io.write_file - (Path.relative dir ".hg/requires") - "Escaping the Dune sandbox") - in - fun () -> Lazy.force init - let rec with_locks t mutexes ~f = match mutexes with | [] -> f () @@ -1371,18 +1351,6 @@ end = struct let start_rule t _rule = t.rule_total <- t.rule_total + 1 - (* Same as [rename] except that if the source doesn't exist we delete the - destination *) - let rename_optional_file ~src ~dst = - let src = Path.Build.to_string src in - let dst = Path.Build.to_string dst in - match Unix.rename src dst with - | () -> () - | exception Unix.Unix_error ((ENOENT | ENOTDIR), _, _) -> ( - match Unix.unlink dst with - | exception Unix.Unix_error (ENOENT, _, _) -> () - | () -> ()) - (* The current version of the rule digest scheme. We should increment it when making any changes to the scheme, to avoid collisions. *) let rule_digest_version = 7 @@ -1463,45 +1431,18 @@ end = struct action in pending_targets := Path.Build.Set.union targets !pending_targets; + let chdirs = Action.chdirs action in let sandbox = Option.map sandbox_mode ~f:(fun mode -> - let sandbox_suffix = rule_digest |> Digest.to_string in - (Path.Build.relative sandbox_dir sandbox_suffix, mode)) + Sandbox.create ~mode ~deps ~rule_dir:dir ~chdirs ~rule_digest + ~expand_aliases: + (Execution_parameters.expand_aliases_in_sandbox + execution_parameters)) in - let chdirs = Action.chdirs action in - let sandboxed, action = + let action = match sandbox with - | None -> (None, action) - | Some (sandbox_dir, sandbox_mode) -> - init_sandbox (); - Path.rm_rf (Path.build sandbox_dir); - let sandboxed path : Path.Build.t = - Path.Build.append_local sandbox_dir (Path.Build.local path) - in - Path.Set.iter - (Path.Set.union (Dep.Facts.dirs deps) chdirs) - ~f:(fun path -> - match Path.as_in_build_dir path with - | None -> - (* This [path] is not in the build directory, so we do not need to - create it. If it comes from [deps], it must exist already. If - it comes from [chdirs], we'll ensure that it exists in the call - to [Fs.mkdir_p_or_assert_existence] below. *) - () - | Some path -> - (* There is no point in using the memoized version [Fs.mkdir_p] - since these directories are not shared between actions. *) - Path.mkdir_p (Path.build (sandboxed path))); - Path.mkdir_p (Path.build (sandboxed dir)); - let deps = - if Execution_parameters.expand_aliases_in_sandbox execution_parameters - then - Dep.Facts.paths deps - else - Dep.Facts.paths_without_expanding_aliases deps - in - ( Some sandboxed - , Action.sandbox action ~sandboxed ~mode:sandbox_mode ~deps ) + | None -> action + | Some sandbox -> Action.sandbox action sandbox in let* () = Fiber.parallel_iter_set @@ -1511,26 +1452,26 @@ end = struct in let build_deps deps = Memo.Build.run (build_deps deps) in let root = - (match context with + match context with | None -> Path.Build.root - | Some context -> context.build_dir) - |> Option.value sandboxed ~default:Fun.id - |> Path.build + | Some context -> context.build_dir + in + let root = + Path.build + (match sandbox with + | None -> root + | Some sandbox -> Sandbox.map_path sandbox root) in let+ exec_result = with_locks t locks ~f:(fun () -> - let copy_files_from_sandbox sandboxed = - Path.Build.Set.iter targets ~f:(fun target -> - rename_optional_file ~src:(sandboxed target) ~dst:target) - in let+ exec_result = Action_exec.exec ~root ~context ~env ~targets ~rule_loc:loc ~build_deps ~execution_parameters action in - Option.iter sandboxed ~f:copy_files_from_sandbox; + Option.iter sandbox ~f:(Sandbox.move_targets_to_build_dir ~targets); exec_result) in - Option.iter sandbox ~f:(fun (p, _mode) -> Path.rm_rf (Path.build p)); + Option.iter sandbox ~f:Sandbox.destroy; (* All went well, these targets are no longer pending *) pending_targets := Path.Build.Set.diff !pending_targets targets; exec_result diff --git a/src/dune_engine/sandbox.ml b/src/dune_engine/sandbox.ml new file mode 100644 index 00000000000..c05a4a6ee85 --- /dev/null +++ b/src/dune_engine/sandbox.ml @@ -0,0 +1,119 @@ +open! Stdune +open Import + +let sandbox_dir = Path.Build.relative Path.Build.root ".sandbox" + +let init = + let init = + lazy + (let dir = Path.build sandbox_dir in + Path.mkdir_p (Path.relative dir ".hg"); + (* We create an empty [.git] file to prevent git from escaping the + sandbox. It will choke on this empty .git and report an error about + its format being invalid. *) + Io.write_file (Path.relative dir ".git") ""; + (* We create a [.hg/requires] file to prevent hg from escaping the + sandbox. It will complain that "Escaping the Dune sandbox" is an + unkown feature. *) + Io.write_file + (Path.relative dir ".hg/requires") + "Escaping the Dune sandbox") + in + fun () -> Lazy.force init + +type t = { dir : Path.Build.t } [@@unboxed] + +let dir t = t.dir + +let map_path t p = Path.Build.append t.dir p + +let create_dirs t ~deps ~chdirs ~rule_dir = + Path.Set.iter + (Path.Set.add + (Path.Set.union (Dep.Facts.dirs deps) chdirs) + (Path.build rule_dir)) + ~f:(fun path -> + match Path.as_in_build_dir path with + | None -> + (* This [path] is not in the build directory, so we do not need to + create it. If it comes from [deps], it must exist already. If it + comes from [chdirs], we'll ensure that it exists in the call to + [Fs.mkdir_p_or_assert_existence] below. *) + () + | Some path -> + (* There is no point in using the memoized version [Fs.mkdir_p] since + these directories are not shared between actions. *) + Path.mkdir_p (Path.build (map_path t path))) + +let link_function ~(mode : Sandbox_mode.some) = + let win32_error mode = + let mode = Sandbox_mode.to_string (Some mode) in + Code_error.raise + (sprintf + "Don't have %ss on win32, but [%s] sandboxing mode was selected. To \ + use emulation via copy, the [copy] sandboxing mode should be \ + selected." + mode mode) + [] + in + Staged.stage + (match mode with + | Symlink -> ( + match Sys.win32 with + | true -> win32_error mode + | false -> fun src dst -> Io.portable_symlink ~src ~dst) + | Copy -> fun src dst -> Io.copy_file ~src ~dst () + | Hardlink -> ( + match Sys.win32 with + | true -> win32_error mode + | false -> fun src dst -> Io.portable_hardlink ~src ~dst)) + +let link_deps t ~mode ~deps = + let link = Staged.unstage (link_function ~mode) in + Path.Map.iteri deps ~f:(fun path _ -> + match Path.as_in_build_dir path with + | None -> + (* This can actually raise if we try to sandbox the "copy from source + dir" rules. There is no reason to do that though. *) + if Path.is_in_source_tree path then + Code_error.raise + "Action depends on source tree. All actions should depend on the \ + copies in build directory instead" + [ ("path", Path.to_dyn path) ] + | Some p -> link path (Path.build (map_path t p))) + +let create ~mode ~deps ~rule_dir ~chdirs ~rule_digest ~expand_aliases = + init (); + let sandbox_suffix = rule_digest |> Digest.to_string in + let sandbox_dir = Path.Build.relative sandbox_dir sandbox_suffix in + let t = { dir = sandbox_dir } in + Path.rm_rf (Path.build sandbox_dir); + create_dirs t ~deps ~chdirs ~rule_dir; + let deps = + if expand_aliases then + Dep.Facts.paths deps + else + Dep.Facts.paths_without_expanding_aliases deps + in + (* CR-someday amokhov: Note that this doesn't link dynamic dependencies, so + targets produced dynamically will be unavailable. *) + link_deps t ~mode ~deps; + t + +(* Same as [rename] except that if the source doesn't exist we delete the + destination *) +let rename_optional_file ~src ~dst = + let src = Path.Build.to_string src in + let dst = Path.Build.to_string dst in + match Unix.rename src dst with + | () -> () + | exception Unix.Unix_error ((ENOENT | ENOTDIR), _, _) -> ( + match Unix.unlink dst with + | exception Unix.Unix_error (ENOENT, _, _) -> () + | () -> ()) + +let move_targets_to_build_dir t ~targets = + Path.Build.Set.iter targets ~f:(fun target -> + rename_optional_file ~src:(map_path t target) ~dst:target) + +let destroy t = Path.rm_rf (Path.build t.dir) diff --git a/src/dune_engine/sandbox.mli b/src/dune_engine/sandbox.mli new file mode 100644 index 00000000000..79c38147dd4 --- /dev/null +++ b/src/dune_engine/sandbox.mli @@ -0,0 +1,26 @@ +(** Creation and management of sandboxes *) + +open Stdune + +type t + +val dir : t -> Path.Build.t + +(** [map_path t p] returns the path corresponding to [p] inside the sandbox. *) +val map_path : t -> Path.Build.t -> Path.Build.t + +(** Create a new sandbox and copy or link dependencies inside it. *) +val create : + mode:Sandbox_mode.some + -> deps:Dep.Facts.t + -> rule_dir:Path.Build.t + -> chdirs:Path.Set.t + -> rule_digest:Digest.t + -> expand_aliases:bool + -> t + +(** Move the targets created by the action from the sandbox to the build + directory. *) +val move_targets_to_build_dir : t -> targets:Path.Build.Set.t -> unit + +val destroy : t -> unit