From b7f87f1abb19aed086207d1e05a84a7e191cfe5b Mon Sep 17 00:00:00 2001 From: Christiano Haesbaert Date: Wed, 5 Oct 2022 15:30:00 +0200 Subject: [PATCH] Expose `File.stat` and improve `Path.load` Some notes: - Use Unix.Largestat instead of Unix.stat - Add a portable stat for the user - Moved Path.Unix_perm into File.Unix_perm to avoid cyclical ref A previous attempt of this diff tried to make proper use of Luv.File.Stat but there were multiple issues: `mode` was not properly exported so I had to add a discovery.ml; it only works for a subset of file types and we had to manually convert timestamps to float; all doable but an insuferable amount of code to end up with less functionality than just calling into Unix.Largestat.stat. There are still some kinks: - Luv will always follow a link, so we can't really stat on a symbolic link, this is because we rely on realpath(3) which resolves the link. - We can't open named pipes in with luv atm, so we also can't stat it (#357). - I did test all filemodes, but didn't add all to the test since they are impractical (opening /dev/tty for Character_special...) -- Path.load was doubling the buffer and trying to fit enough until it saw an EOF. This would cause many unecessary large allocations and my poor 16GB ram laptop couldn't even load a 5GB file without getting OOMed. This diff makes load create an initial buffer of the size of the file and load exactly that amount of bytes in it. --- lib_eio/file.ml | 45 ++++++++++++++++++++++++++++++++ lib_eio/fs.ml | 16 +++++------- lib_eio/path.ml | 18 ++++++++++--- lib_eio/path.mli | 2 +- lib_eio_linux/eio_linux.ml | 34 +++++++++++++++++++++--- lib_eio_linux/eio_linux.mli | 4 +-- lib_eio_linux/tests/eurcp_lib.ml | 2 +- lib_eio_luv/eio_luv.ml | 33 +++++++++++++++++++++++ lib_eio_luv/eio_luv.mli | 3 +++ tests/fs.md | 18 +++++++++++++ 10 files changed, 155 insertions(+), 20 deletions(-) diff --git a/lib_eio/file.ml b/lib_eio/file.ml index b1fbf0647..ec4752ba8 100644 --- a/lib_eio/file.ml +++ b/lib_eio/file.ml @@ -1,8 +1,47 @@ +(** Tranditional Unix permissions. *) +module Unix_perm = struct + type t = int + (** This is the same as {!Unix.file_perm}, but avoids a dependency on [Unix]. *) +end + +(** Portable file stats. *) +module Stat = struct + + (** Kind of file from st_mode. **) + type kind = [ + | `Unknown + | `Fifo + | `Character_special + | `Directory + | `Block_device + | `Regular_file + | `Symbolic_link + | `Socket + ] + + (** Like stat(2). *) + type t = { + dev : Int64.t; + ino : Int64.t; + kind : kind; + perm : Unix_perm.t; + nlink : Int64.t; + uid : Int64.t; + gid : Int64.t; + rdev : Int64.t; + size : Optint.Int63.t; + atime : float; + mtime : float; + ctime : float; + } +end + (** A file opened for reading. *) class virtual ro = object (_ : ) method probe _ = None method read_methods = [] method virtual pread : file_offset:Optint.Int63.t -> Cstruct.t list -> int + method virtual stat : Stat.t end (** A file opened for reading and writing. *) @@ -11,6 +50,12 @@ class virtual rw = object (_ : ) method virtual pwrite : file_offset:Optint.Int63.t -> Cstruct.t list -> int end +(** [stat t] returns the {!Stat.t} record associated with [t]. *) +let stat (t : #ro) = t#stat + +(** [size t] returns the size of [t]. *) +let size t = (stat t).size + (** [pread t ~file_offset bufs] performs a single read of [t] at [file_offset] into [bufs]. It returns the number of bytes read, which may be less than the space in [bufs], diff --git a/lib_eio/fs.ml b/lib_eio/fs.ml index 867ee74a4..8b5b63eef 100644 --- a/lib_eio/fs.ml +++ b/lib_eio/fs.ml @@ -1,23 +1,21 @@ (** Defines types used by file-systems. *) (** Tranditional Unix permissions. *) -module Unix_perm = struct - type t = int - (** This is the same as {!Unix.file_perm}, but avoids a dependency on [Unix]. *) -end +module Unix_perm = File.Unix_perm [@@deprecated "Moved to File.Unix_perm"] type path = string exception Already_exists of path * exn exception Not_found of path * exn exception Permission_denied of path * exn +exception File_too_large of path * exn (** When to create a new file. *) type create = [ - | `Never (** fail if the named file doesn't exist *) - | `If_missing of Unix_perm.t (** create if file doesn't already exist *) - | `Or_truncate of Unix_perm.t (** any existing file is truncated to zero length *) - | `Exclusive of Unix_perm.t (** always create; fail if the file already exists *) + | `Never (** fail if the named file doesn't exist *) + | `If_missing of File.Unix_perm.t (** create if file doesn't already exist *) + | `Or_truncate of File.Unix_perm.t (** any existing file is truncated to zero length *) + | `Exclusive of File.Unix_perm.t (** always create; fail if the file already exists *) ] (** If a new file is created, the given permissions are used for it. *) @@ -30,7 +28,7 @@ class virtual dir = object (_ : #Generic.t) append:bool -> create:create -> path -> - method virtual mkdir : perm:Unix_perm.t -> path -> unit + method virtual mkdir : perm:File.Unix_perm.t -> path -> unit method virtual open_dir : sw:Switch.t -> path -> dir_with_close method virtual read_dir : path -> string list method virtual unlink : path -> unit diff --git a/lib_eio/path.ml b/lib_eio/path.ml index c278210e3..18d1fb197 100644 --- a/lib_eio/path.ml +++ b/lib_eio/path.ml @@ -30,10 +30,20 @@ let with_lines path fn = let buf = Buf_read.of_flow flow ~max_size:max_int in fn (Buf_read.lines buf) -let load path = - with_open_in path @@ fun flow -> - let buf = Buf_read.of_flow flow ~max_size:max_int in - Buf_read.take_all buf +let load (t, path) = + with_open_in (t, path) @@ fun flow -> + let size = File.size flow in + if Optint.Int63.(compare size (of_int Sys.max_string_length)) = 1 then + raise (Fs.File_too_large + (path, Invalid_argument "can't represent a string that long")); + let buf = Cstruct.create (Optint.Int63.to_int size) in + let rec loop buf got = + match Flow.single_read flow buf with + | n -> loop (Cstruct.shift buf n) (n + got) + | exception End_of_file -> got + in + let got = loop buf 0 in + Cstruct.to_string ~len:got buf let save ?append ~create path data = with_open_out ?append ~create path @@ fun flow -> diff --git a/lib_eio/path.mli b/lib_eio/path.mli index 6cce230da..72808e730 100644 --- a/lib_eio/path.mli +++ b/lib_eio/path.mli @@ -88,7 +88,7 @@ val with_open_out : (** {1 Directories} *) -val mkdir : perm:Unix_perm.t -> _ t -> unit +val mkdir : perm:File.Unix_perm.t -> _ t -> unit (** [mkdir ~perm t] creates a new directory [t] with permissions [perm]. *) val open_dir : sw:Switch.t -> _ t -> t diff --git a/lib_eio_linux/eio_linux.ml b/lib_eio_linux/eio_linux.ml index 63ef5c888..6ae7ecc28 100644 --- a/lib_eio_linux/eio_linux.ml +++ b/lib_eio_linux/eio_linux.ml @@ -110,6 +110,34 @@ module FD = struct match t.fd with | `Open fd -> Fmt.pf f "%d" (Obj.magic fd : int) | `Closed -> Fmt.string f "(closed)" + + let fstat t = + (* todo: use uring *) + let ust = Unix.LargeFile.fstat (get_exn "fstat" t) in + let st_kind : Eio.File.Stat.kind = + match ust.st_kind with + | Unix.S_REG -> `Regular_file + | Unix.S_DIR -> `Directory + | Unix.S_CHR -> `Character_special + | Unix.S_BLK -> `Block_device + | Unix.S_LNK -> `Symbolic_link + | Unix.S_FIFO -> `Fifo + | Unix.S_SOCK -> `Socket + in + Eio.File.Stat.{ + dev = ust.st_dev |> Int64.of_int; + ino = ust.st_ino |> Int64.of_int; + kind = st_kind; + perm = ust.st_perm; + nlink = ust.st_nlink |> Int64.of_int; + uid = ust.st_uid |> Int64.of_int; + gid = ust.st_gid |> Int64.of_int; + rdev = ust.st_rdev |> Int64.of_int; + size = ust.st_size |> Optint.Int63.of_int64; + atime = ust.st_atime; + mtime = ust.st_mtime; + ctime = ust.st_ctime; + } end type _ Eio.Generic.ty += FD : FD.t Eio.Generic.ty @@ -816,9 +844,7 @@ module Low_level = struct | Cwd -> openat2 ~sw ?seekable ~access ~flags ~perm ~resolve:Uring.Resolve.beneath path | Fs -> openat2 ~sw ?seekable ~access ~flags ~perm ~resolve:Uring.Resolve.empty path - let fstat fd = - (* todo: use uring *) - Unix.fstat (FD.get_exn "fstat" fd) + let fstat fd = FD.fstat fd external eio_mkdirat : Unix.file_descr -> string -> Unix.file_perm -> unit = "caml_eio_mkdirat" @@ -1033,6 +1059,8 @@ let flow fd = method fd = fd method close = FD.close fd + method stat = FD.fstat fd + method probe : type a. a Eio.Generic.ty -> a option = function | FD -> Some fd | Eio_unix.Private.Unix_file_descr op -> Some (FD.to_unix op fd) diff --git a/lib_eio_linux/eio_linux.mli b/lib_eio_linux/eio_linux.mli index c8e2bd47d..f101a8187 100644 --- a/lib_eio_linux/eio_linux.mli +++ b/lib_eio_linux/eio_linux.mli @@ -203,8 +203,8 @@ module Low_level : sig val await_writable : FD.t -> unit (** [await_writable fd] blocks until [fd] is writable (or has an error). *) - val fstat : FD.t -> Unix.stats - (** Like {!Unix.fstat}. *) + val fstat : FD.t -> Eio.File.Stat.t + (** Like {!Unix.LargeFile.fstat}. *) val read_dir : FD.t -> string list (** [read_dir dir] reads all directory entries from [dir]. diff --git a/lib_eio_linux/tests/eurcp_lib.ml b/lib_eio_linux/tests/eurcp_lib.ml index 7931c1497..cf6218e61 100644 --- a/lib_eio_linux/tests/eurcp_lib.ml +++ b/lib_eio_linux/tests/eurcp_lib.ml @@ -31,7 +31,7 @@ let run_cp block_size queue_depth infile outfile () = let open Unix in let infd = U.openfile ~sw infile [O_RDONLY] 0 in let outfd = U.openfile ~sw outfile [O_WRONLY; O_CREAT; O_TRUNC] 0o644 in - let insize = U.fstat infd |> fun {st_size; _} -> Int63.of_int st_size in + let insize = (U.fstat infd).size in Logs.debug (fun l -> l "eurcp: %s -> %s size %a queue %d bs %d" infile outfile diff --git a/lib_eio_luv/eio_luv.ml b/lib_eio_luv/eio_luv.ml index bb4834619..79836d835 100644 --- a/lib_eio_luv/eio_luv.ml +++ b/lib_eio_luv/eio_luv.ml @@ -249,6 +249,33 @@ let get_loop () = enter_unchecked @@ fun t k -> Suspended.continue k t.loop +let unix_fstat fd = + let ust = Unix.LargeFile.fstat fd in + let st_kind : Eio.File.Stat.kind = + match ust.st_kind with + | Unix.S_REG -> `Regular_file + | Unix.S_DIR -> `Directory + | Unix.S_CHR -> `Character_special + | Unix.S_BLK -> `Block_device + | Unix.S_LNK -> `Symbolic_link + | Unix.S_FIFO -> `Fifo + | Unix.S_SOCK -> `Socket + in + Eio.File.Stat.{ + dev = ust.st_dev |> Int64.of_int; + ino = ust.st_ino |> Int64.of_int; + kind = st_kind; + perm = ust.st_perm; + nlink = ust.st_nlink |> Int64.of_int; + uid = ust.st_uid |> Int64.of_int; + gid = ust.st_gid |> Int64.of_int; + rdev = ust.st_rdev |> Int64.of_int; + size = ust.st_size |> Optint.Int63.of_int64; + atime = ust.st_atime; + mtime = ust.st_mtime; + ctime = ust.st_ctime; + } + module Low_level = struct type 'a or_error = ('a, Luv.Error.t) result @@ -377,6 +404,10 @@ module Low_level = struct t.release_hook <- Switch.on_release_cancellable sw (fun () -> ensure_closed t); t + let fstat fd = + let request = Luv.File.Request.make () in + await_with_cancel ~request (fun loop -> Luv.File.fstat ~loop ~request (to_luv fd)) + let open_ ~sw ?mode path flags = let request = Luv.File.Request.make () in await_with_cancel ~request (fun loop -> Luv.File.open_ ~loop ?mode ~request path flags) @@ -598,6 +629,8 @@ let flow fd = object (_ : ) method close = Low_level.File.close fd method unix_fd op = File.to_unix op fd + method stat = unix_fstat (File.to_unix `Peek fd) + method probe : type a. a Eio.Generic.ty -> a option = function | FD -> Some fd | Eio_unix.Private.Unix_file_descr op -> Some (File.to_unix op fd) diff --git a/lib_eio_luv/eio_luv.mli b/lib_eio_luv/eio_luv.mli index 3f19416e9..d62ee927f 100644 --- a/lib_eio_luv/eio_luv.mli +++ b/lib_eio_luv/eio_luv.mli @@ -55,6 +55,9 @@ module Low_level : sig This allows unsafe access to the FD. @raise Invalid_arg if [t] is closed. *) + val fstat : t -> Luv.File.Stat.t or_error + (** [fstat fd] returns the stat of [fd]. *) + val open_ : sw:Switch.t -> ?mode:Luv.File.Mode.t list -> diff --git a/tests/fs.md b/tests/fs.md index 2b5f165f3..9eafb0dca 100644 --- a/tests/fs.md +++ b/tests/fs.md @@ -64,6 +64,10 @@ let try_rmdir path = let chdir path = traceln "chdir %S" path; Unix.chdir path + +let assert_kind path kind = + Path.with_open_in path @@ fun file -> + assert ((Eio.File.stat file).kind = kind) ``` # Basic test cases @@ -495,3 +499,17 @@ Unconfined: +rename to -> ok - : unit = () ``` + +# Stat +```ocaml +# run @@ fun env -> + let cwd = Eio.Stdenv.cwd env in + Switch.run @@ fun sw -> + try_mkdir (cwd / "stat_subdir"); + assert_kind (cwd / "stat_subdir") `Directory; + try_write_file (cwd / "stat_reg") "kingbula" ~create:(`Exclusive 0o600); + assert_kind (cwd / "stat_reg") `Regular_file;; ++mkdir -> ok ++write -> ok +- : unit = () +```