From 4f35f003e6e5b800be75e3985054e5fce2dea50a Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Tue, 26 Sep 2023 00:26:00 +0200 Subject: [PATCH] lib.fileset: Internal representation v3, empty value without a base `unions []` now works! Notably the new empty value without a base is not exposed in the interface. I don't know of any use case for it. --- lib/fileset/README.md | 29 ++++++++++++++++- lib/fileset/default.nix | 7 ++-- lib/fileset/internal.nix | 69 ++++++++++++++++++++++++++++++++-------- lib/fileset/tests.sh | 40 +++++++++++++++++++---- 4 files changed, 119 insertions(+), 26 deletions(-) diff --git a/lib/fileset/README.md b/lib/fileset/README.md index 6e57f1f8f2b4a..2ef7b81e88503 100644 --- a/lib/fileset/README.md +++ b/lib/fileset/README.md @@ -41,9 +41,16 @@ An attribute set with these values: - `_type` (constant string `"fileset"`): Tag to indicate this value is a file set. -- `_internalVersion` (constant `2`, the current version): +- `_internalVersion` (constant `3`, the current version): Version of the representation. +- `_internalIsEmptyWithoutBase` (bool): + Whether this file set is the empty file set without a base path. + If `true`, `_internalBase*` and `_internalTree` are not set. + This is the only way to represent an empty file set without needing a base path. + + Such a value can be used as the identity element for `union` and the return value of `unions []` and co. + - `_internalBase` (path): Any files outside of this path cannot influence the set of files. This is always a directory. @@ -111,6 +118,26 @@ Arguments: - (+) This can be removed later, if we discover it's too restrictive - (-) It leads to errors when a sensible result could sometimes be returned, such as in the above example. +### Empty file set without a base + +There is a special representation for an empty file set without a base path. +This is used for return values that should be empty but when there's no base path that would makes sense. + +Arguments: +- Alternative: This could also be represented using `_internalBase = /.` and `_internalTree = null`. + - (+) Removes the need for a special representation. + - (-) Due to [influence tracking](#influence-tracking), + `union empty ./.` would have `/.` as the base path, + which would then prevent `toSource { root = ./.; fileset = union empty ./.; }` from working, + which is not as one would expect. + - (-) With the assumption that there can be multiple filesystem roots (as established with the [path library](../path/README.md)), + this would have to cause an error with `union empty pathWithAnotherFilesystemRoot`, + which is not as one would expect. +- Alternative: Do not have such a value and error when it would be needed as a return value + - (+) Removes the need for a special representation. + - (-) Leaves us with no identity element for `union` and no reasonable return value for `unions []`. + From a set theory perspective, which has a well-known notion of empty sets, this is unintuitive. + ### Empty directories File sets can only represent a _set_ of local files, directories on their own are not representable. diff --git a/lib/fileset/default.nix b/lib/fileset/default.nix index 88c8dcd1a70b8..ab26112c94709 100644 --- a/lib/fileset/default.nix +++ b/lib/fileset/default.nix @@ -156,7 +156,7 @@ If a directory does not recursively contain any file, it is omitted from the sto lib.fileset.toSource: `root` is of type ${typeOf root}, but it should be a path instead.'' # Currently all Nix paths have the same filesystem root, but this could change in the future. # See also ../path/README.md - else if rootFilesystemRoot != filesetFilesystemRoot then + else if ! fileset._internalIsEmptyWithoutBase && rootFilesystemRoot != filesetFilesystemRoot then throw '' lib.fileset.toSource: Filesystem roots are not the same for `fileset` and `root` ("${toString root}"): `root`: root "${toString rootFilesystemRoot}" @@ -170,7 +170,7 @@ If a directory does not recursively contain any file, it is omitted from the sto lib.fileset.toSource: `root` (${toString root}) is a file, but it should be a directory instead. Potential solutions: - If you want to import the file into the store _without_ a containing directory, use string interpolation or `builtins.path` instead of this function. - If you want to import the file into the store _with_ a containing directory, set `root` to the containing directory, such as ${toString (dirOf root)}, and set `fileset` to the file path.'' - else if ! hasPrefix root fileset._internalBase then + else if ! fileset._internalIsEmptyWithoutBase && ! hasPrefix root fileset._internalBase then throw '' lib.fileset.toSource: `fileset` could contain files in ${toString fileset._internalBase}, which is not under the `root` (${toString root}). Potential solutions: - Set `root` to ${toString fileset._internalBase} or any directory higher up. This changes the layout of the resulting store path. @@ -264,9 +264,6 @@ If a directory does not recursively contain any file, it is omitted from the sto filesets: if ! isList filesets then throw "lib.fileset.unions: Expected argument to be a list, but got a ${typeOf filesets}." - else if filesets == [ ] then - # TODO: This could be supported, but requires an extra internal representation for the empty file set, which would be special for not having a base path. - throw "lib.fileset.unions: Expected argument to be a list with at least one element, but it contains no elements." else pipe filesets [ # Annotate the elements with context, used by _coerceMany for better errors diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix index 2c329edb390d6..b3dbc5b33dc3f 100644 --- a/lib/fileset/internal.nix +++ b/lib/fileset/internal.nix @@ -28,6 +28,7 @@ let drop elemAt filter + findFirst findFirstIndex foldl' head @@ -64,7 +65,7 @@ rec { # - Increment this version # - Add an additional migration function below # - Update the description of the internal representation in ./README.md - _currentVersion = 2; + _currentVersion = 3; # Migrations between versions. The 0th element converts from v0 to v1, and so on migrations = [ @@ -89,8 +90,34 @@ rec { _internalVersion = 2; } ) + + # Convert v2 into v3: filesetTree's now have a representation for an empty file set without a base path + ( + filesetV2: + filesetV2 // { + # All v1 file sets are not the new empty file set + _internalIsEmptyWithoutBase = false; + _internalVersion = 3; + } + ) ]; + _noEvalMessage = '' + lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.''; + + # The empty file set without a base path + _emptyWithoutBase = { + _type = "fileset"; + + _internalVersion = _currentVersion; + + # The one and only! + _internalIsEmptyWithoutBase = true; + + # Double __ to make it be evaluated and ordered first + __noEval = throw _noEvalMessage; + }; + # Create a fileset, see ./README.md#fileset # Type: path -> filesetTree -> fileset _create = base: tree: @@ -103,14 +130,15 @@ rec { _type = "fileset"; _internalVersion = _currentVersion; + + _internalIsEmptyWithoutBase = false; _internalBase = base; _internalBaseRoot = parts.root; _internalBaseComponents = components parts.subpath; _internalTree = tree; # Double __ to make it be evaluated and ordered first - __noEval = throw '' - lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.''; + __noEval = throw _noEvalMessage; }; # Coerce a value to a fileset, erroring when the value cannot be coerced. @@ -155,14 +183,20 @@ rec { _coerce "${functionContext}: ${context}" value ) list; - firstBaseRoot = (head filesets)._internalBaseRoot; + # Find the first value with a base, there may be none! + firstWithBase = findFirst (fileset: ! fileset._internalIsEmptyWithoutBase) null filesets; + # This value is only accessed if first != null + firstBaseRoot = firstWithBase._internalBaseRoot; # Finds the first element with a filesystem root different than the first element, if any differentIndex = findFirstIndex (fileset: - firstBaseRoot != fileset._internalBaseRoot + # The empty value without a base doesn't have a base path + ! fileset._internalIsEmptyWithoutBase + && firstBaseRoot != fileset._internalBaseRoot ) null filesets; in - if differentIndex != null then + # Only evaluates `differentIndex` if there are any elements with a base + if firstWithBase != null && differentIndex != null then throw '' ${functionContext}: Filesystem roots are not the same: ${(head list).context}: root "${toString firstBaseRoot}" @@ -311,7 +345,7 @@ rec { # Special case because the code below assumes that the _internalBase is always included in the result # which shouldn't be done when we have no files at all in the base # This also forces the tree before returning the filter, leads to earlier error messages - if tree == null then + if fileset._internalIsEmptyWithoutBase || tree == null then empty else nonEmpty; @@ -321,7 +355,12 @@ rec { # Type: [ Fileset ] -> Fileset _unionMany = filesets: let - first = head filesets; + # All filesets that have a base, aka not the ones that are the empty value without a base + filesetsWithBase = filter (fileset: ! fileset._internalIsEmptyWithoutBase) filesets; + + # The first fileset that has a base. + # This value is only accessed if there are at all. + firstWithBase = head filesetsWithBase; # To be able to union filesetTree's together, they need to have the same base path. # Base paths can be unioned by taking their common prefix, @@ -332,14 +371,14 @@ rec { # so this cannot cause a stack overflow due to a build-up of unevaluated thunks. commonBaseComponents = foldl' (components: el: commonPrefix components el._internalBaseComponents) - first._internalBaseComponents + firstWithBase._internalBaseComponents # We could also not do the `tail` here to avoid a list allocation, # but then we'd have to pay for a potentially expensive # but unnecessary `commonPrefix` call - (tail filesets); + (tail filesetsWithBase); # The common base path assembled from a filesystem root and the common components - commonBase = append first._internalBaseRoot (join commonBaseComponents); + commonBase = append firstWithBase._internalBaseRoot (join commonBaseComponents); # A list of filesetTree's that all have the same base path # This is achieved by nesting the trees into the components they have over the common base path @@ -351,14 +390,18 @@ rec { setAttrByPath (drop (length commonBaseComponents) fileset._internalBaseComponents) fileset._internalTree - ) filesets; + ) filesetsWithBase; # Folds all trees together into a single one using _unionTree # We do not use a fold here because it would cause a thunk build-up # which could cause a stack overflow for a large number of trees resultTree = _unionTrees trees; in - _create commonBase resultTree; + # If there's no values with a base, we have no files + if filesetsWithBase == [ ] then + _emptyWithoutBase + else + _create commonBase resultTree; # The union of multiple filesetTree's with the same base path. # Later elements are only evaluated if necessary. diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh index 0ea96859e7a34..d80c3942195f3 100755 --- a/lib/fileset/tests.sh +++ b/lib/fileset/tests.sh @@ -282,24 +282,27 @@ expectFailure 'toSource { root = ./.; fileset = ./a; }' 'lib.fileset.toSource: ` # File sets cannot be evaluated directly expectFailure 'union ./. ./.' 'lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.' +expectFailure '_emptyWithoutBase' 'lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.' # Past versions of the internal representation are supported expectEqual '_coerce ": value" { _type = "fileset"; _internalVersion = 0; _internalBase = ./.; }' \ - '{ _internalBase = ./.; _internalBaseComponents = path.subpath.components (path.splitRoot ./.).subpath; _internalBaseRoot = /.; _internalVersion = 2; _type = "fileset"; }' + '{ _internalBase = ./.; _internalBaseComponents = path.subpath.components (path.splitRoot ./.).subpath; _internalBaseRoot = /.; _internalIsEmptyWithoutBase = false; _internalVersion = 3; _type = "fileset"; }' expectEqual '_coerce ": value" { _type = "fileset"; _internalVersion = 1; }' \ - '{ _type = "fileset"; _internalVersion = 2; }' + '{ _type = "fileset"; _internalIsEmptyWithoutBase = false; _internalVersion = 3; }' +expectEqual '_coerce ": value" { _type = "fileset"; _internalVersion = 2; }' \ + '{ _type = "fileset"; _internalIsEmptyWithoutBase = false; _internalVersion = 3; }' # Future versions of the internal representation are unsupported -expectFailure '_coerce ": value" { _type = "fileset"; _internalVersion = 3; }' ': value is a file set created from a future version of the file set library with a different internal representation: -\s*- Internal version of the file set: 3 -\s*- Internal version of the library: 2 +expectFailure '_coerce ": value" { _type = "fileset"; _internalVersion = 4; }' ': value is a file set created from a future version of the file set library with a different internal representation: +\s*- Internal version of the file set: 4 +\s*- Internal version of the library: 3 \s*Make sure to update your Nixpkgs to have a newer version of `lib.fileset`.' # _create followed by _coerce should give the inputs back without any validation expectEqual '{ inherit (_coerce "" (_create ./. "directory")) _internalVersion _internalBase _internalTree; -}' '{ _internalBase = ./.; _internalTree = "directory"; _internalVersion = 2; }' +}' '{ _internalBase = ./.; _internalTree = "directory"; _internalVersion = 3; }' #### Resulting store path #### @@ -311,6 +314,12 @@ tree=( ) checkFileset './.' +# The empty value without a base should also result in an empty result +tree=( + [a]=0 +) +checkFileset '_emptyWithoutBase' + # Directories recursively containing no files are not included tree=( [e/]=0 @@ -408,13 +417,30 @@ expectFailure 'toSource { root = ./.; fileset = unions [ ./. ./b ]; }' 'lib.file # unions needs a list with at least 1 element expectFailure 'toSource { root = ./.; fileset = unions null; }' 'lib.fileset.unions: Expected argument to be a list, but got a null.' -expectFailure 'toSource { root = ./.; fileset = unions [ ]; }' 'lib.fileset.unions: Expected argument to be a list with at least one element, but it contains no elements.' # The tree of later arguments should not be evaluated if a former argument already includes all files tree=() checkFileset 'union ./. (_create ./. (abort "This should not be used!"))' checkFileset 'unions [ ./. (_create ./. (abort "This should not be used!")) ]' +# unions doesn't include any files for an empty list or only empty values without a base +tree=( + [x]=0 + [y/z]=0 +) +checkFileset 'unions [ ]' +checkFileset 'unions [ _emptyWithoutBase ]' +checkFileset 'unions [ _emptyWithoutBase _emptyWithoutBase ]' +checkFileset 'union _emptyWithoutBase _emptyWithoutBase' + +# The empty value without a base is the left and right identity of union +tree=( + [x]=1 + [y/z]=0 +) +checkFileset 'union ./x _emptyWithoutBase' +checkFileset 'union _emptyWithoutBase ./x' + # union doesn't include files that weren't specified tree=( [x]=1