From 1a6446b5eb5e73ee210e06c8f3c083fb24eaa1fb Mon Sep 17 00:00:00 2001 From: Alex Ameen Date: Sun, 22 Jan 2023 13:45:02 -0600 Subject: [PATCH 1/3] primop: add readFileType, optimize readDir Allows checking directory entry type of a single file/directory. This was added to optimize the use of `builtins.readDir` on some filesystems and operating systems which cannot detect this information using POSIX's `readdir`. Previously `builtins.readDir` would eagerly use system calls to lookup these filetypes using other interfaces; this change makes these operations lazy in the attribute values for each file with application of `builtins.readFileType`. (cherry picked from commit 153ee460c59c67910476f7b1eadcabcd47206f2f) --- doc/manual/src/release-notes/rl-next.md | 8 ++++ src/libexpr/primops.cc | 64 ++++++++++++++++++++++--- tests/lang/eval-okay-readDir.exp | 2 +- tests/lang/eval-okay-readFileType.exp | 1 + tests/lang/eval-okay-readFileType.nix | 6 +++ tests/lang/readDir/ldir | 1 + tests/lang/readDir/linked | 1 + 7 files changed, 75 insertions(+), 8 deletions(-) create mode 100644 tests/lang/eval-okay-readFileType.exp create mode 100644 tests/lang/eval-okay-readFileType.nix create mode 120000 tests/lang/readDir/ldir create mode 120000 tests/lang/readDir/linked diff --git a/doc/manual/src/release-notes/rl-next.md b/doc/manual/src/release-notes/rl-next.md index 78ae99f4bb7..56507d73172 100644 --- a/doc/manual/src/release-notes/rl-next.md +++ b/doc/manual/src/release-notes/rl-next.md @@ -1,2 +1,10 @@ # Release X.Y (202?-??-??) +* A new function `builtins.readFileType` is available. It is similar to + `builtins.readDir` but acts on a single file or directory. + +* The `builtins.readDir` function has been optimized when encountering unknown + file types from POSIX's `readdir`. In such cases the type of each file is/was + discovered by making multiple syscalls. This change makes these operations + lazy such that these lookups will only be performed if the attribute is used. + This optimization effects a minority of filesystems and operating systems. diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 080892cbd52..24f83b93260 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -1646,23 +1646,73 @@ static RegisterPrimOp primop_hashFile({ .fun = prim_hashFile, }); + +/* Stringize a directory entry enum. Used by `readFileType' and `readDir'. */ +static const char * dirEntTypeToString(unsigned char dtType) +{ + /* Enum DT_(DIR|LNK|REG|UNKNOWN) */ + switch(dtType) { + case DT_REG: return "regular"; break; + case DT_DIR: return "directory"; break; + case DT_LNK: return "symlink"; break; + default: return "unknown"; break; + } + return "unknown"; /* Unreachable */ +} + + +static void prim_readFileType(EvalState & state, const PosIdx pos, Value * * args, Value & v) +{ + auto path = realisePath(state, pos, *args[0]); + /* Retrieve the directory entry type and stringize it. */ + v.mkString(dirEntTypeToString(getFileType(path))); +} + +static RegisterPrimOp primop_readFileType({ + .name = "__readFileType", + .args = {"p"}, + .doc = R"( + Determine the directory entry type of a filesystem node, being + one of "directory", "regular", "symlink", or "unknown". + )", + .fun = prim_readFileType, +}); + /* Read a directory (without . or ..) */ static void prim_readDir(EvalState & state, const PosIdx pos, Value * * args, Value & v) { auto path = realisePath(state, pos, *args[0]); + // Retrieve directory entries for all nodes in a directory. + // This is similar to `getFileType` but is optimized to reduce system calls + // on many systems. DirEntries entries = readDirectory(path); auto attrs = state.buildBindings(entries.size()); + // If we hit unknown directory entry types we may need to fallback to + // using `getFileType` on some systems. + // In order to reduce system calls we make each lookup lazy by using + // `builtins.readFileType` application. + Value * readFileType = nullptr; + for (auto & ent : entries) { - if (ent.type == DT_UNKNOWN) - ent.type = getFileType(path + "/" + ent.name); - attrs.alloc(ent.name).mkString( - ent.type == DT_REG ? "regular" : - ent.type == DT_DIR ? "directory" : - ent.type == DT_LNK ? "symlink" : - "unknown"); + auto & attr = attrs.alloc(ent.name); + if (ent.type == DT_UNKNOWN) { + // Some filesystems or operating systems may not be able to return + // detailed node info quickly in this case we produce a thunk to + // query the file type lazily. + auto epath = state.allocValue(); + Path path2 = path + "/" + ent.name; + epath->mkString(path2); + if (!readFileType) + readFileType = &state.getBuiltin("readFileType"); + attr.mkApp(readFileType, epath); + } else { + // This branch of the conditional is much more likely. + // Here we just stringize the directory entry type. + attr.mkString(dirEntTypeToString(ent.type)); + } } v.mkAttrs(attrs); diff --git a/tests/lang/eval-okay-readDir.exp b/tests/lang/eval-okay-readDir.exp index bf8d2c14ea4..6413f6d4f9e 100644 --- a/tests/lang/eval-okay-readDir.exp +++ b/tests/lang/eval-okay-readDir.exp @@ -1 +1 @@ -{ bar = "regular"; foo = "directory"; } +{ bar = "regular"; foo = "directory"; ldir = "symlink"; linked = "symlink"; } diff --git a/tests/lang/eval-okay-readFileType.exp b/tests/lang/eval-okay-readFileType.exp new file mode 100644 index 00000000000..6413f6d4f9e --- /dev/null +++ b/tests/lang/eval-okay-readFileType.exp @@ -0,0 +1 @@ +{ bar = "regular"; foo = "directory"; ldir = "symlink"; linked = "symlink"; } diff --git a/tests/lang/eval-okay-readFileType.nix b/tests/lang/eval-okay-readFileType.nix new file mode 100644 index 00000000000..174fb6c3a02 --- /dev/null +++ b/tests/lang/eval-okay-readFileType.nix @@ -0,0 +1,6 @@ +{ + bar = builtins.readFileType ./readDir/bar; + foo = builtins.readFileType ./readDir/foo; + linked = builtins.readFileType ./readDir/linked; + ldir = builtins.readFileType ./readDir/ldir; +} diff --git a/tests/lang/readDir/ldir b/tests/lang/readDir/ldir new file mode 120000 index 00000000000..19102815663 --- /dev/null +++ b/tests/lang/readDir/ldir @@ -0,0 +1 @@ +foo \ No newline at end of file diff --git a/tests/lang/readDir/linked b/tests/lang/readDir/linked new file mode 120000 index 00000000000..c503f86a0cf --- /dev/null +++ b/tests/lang/readDir/linked @@ -0,0 +1 @@ +foo/git-hates-directories \ No newline at end of file From 1dfcf416455bc4e080d2ed55f494e7bcf8ed26f5 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Mon, 23 Jan 2023 11:28:31 +0100 Subject: [PATCH 2/3] rl-next.md: Minor improvement (cherry picked from commit 37c533ed2730b26207434ffbc972cc4555820037) --- doc/manual/src/release-notes/rl-next.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/manual/src/release-notes/rl-next.md b/doc/manual/src/release-notes/rl-next.md index 56507d73172..8a79703abd8 100644 --- a/doc/manual/src/release-notes/rl-next.md +++ b/doc/manual/src/release-notes/rl-next.md @@ -3,8 +3,8 @@ * A new function `builtins.readFileType` is available. It is similar to `builtins.readDir` but acts on a single file or directory. -* The `builtins.readDir` function has been optimized when encountering unknown +* The `builtins.readDir` function has been optimized when encountering not-yet-known file types from POSIX's `readdir`. In such cases the type of each file is/was discovered by making multiple syscalls. This change makes these operations lazy such that these lookups will only be performed if the attribute is used. - This optimization effects a minority of filesystems and operating systems. + This optimization affects a minority of filesystems and operating systems. From 995b658e727a6b27122befb384f76219b6002a83 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Wed, 24 May 2023 18:47:09 +0200 Subject: [PATCH 3/3] Fixup release notes (#8393) * Fixup release notes --- doc/manual/src/release-notes/rl-2.13.md | 9 +++++++++ doc/manual/src/release-notes/rl-next.md | 10 ---------- 2 files changed, 9 insertions(+), 10 deletions(-) delete mode 100644 doc/manual/src/release-notes/rl-next.md diff --git a/doc/manual/src/release-notes/rl-2.13.md b/doc/manual/src/release-notes/rl-2.13.md index 0759f890413..49dda01c68c 100644 --- a/doc/manual/src/release-notes/rl-2.13.md +++ b/doc/manual/src/release-notes/rl-2.13.md @@ -38,3 +38,12 @@ * You can now disable the global flake registry by setting the `flake-registry` configuration option to an empty string. The same can be achieved at runtime with `--flake-registry ""`. + +* Since 2.13.5, a new function `builtins.readFileType` is available. It is similar to + `builtins.readDir` but acts on a single file or directory. + +* Since 2.13.5, the `builtins.readDir` function has been optimized when encountering not-yet-known + file types from POSIX's `readdir`. In such cases the type of each file was + discovered by making multiple syscalls. This change makes these operations + lazy such that these lookups will only be performed if the attribute is used. + This optimization affects a minority of filesystems and operating systems. diff --git a/doc/manual/src/release-notes/rl-next.md b/doc/manual/src/release-notes/rl-next.md deleted file mode 100644 index 8a79703abd8..00000000000 --- a/doc/manual/src/release-notes/rl-next.md +++ /dev/null @@ -1,10 +0,0 @@ -# Release X.Y (202?-??-??) - -* A new function `builtins.readFileType` is available. It is similar to - `builtins.readDir` but acts on a single file or directory. - -* The `builtins.readDir` function has been optimized when encountering not-yet-known - file types from POSIX's `readdir`. In such cases the type of each file is/was - discovered by making multiple syscalls. This change makes these operations - lazy such that these lookups will only be performed if the attribute is used. - This optimization affects a minority of filesystems and operating systems.