diff --git a/lib/fileset/default.nix b/lib/fileset/default.nix index 0342be3e0371e..4a97633b4a89d 100644 --- a/lib/fileset/default.nix +++ b/lib/fileset/default.nix @@ -9,6 +9,7 @@ let _fileFilter _printFileset _intersection + _difference ; inherit (builtins) @@ -368,6 +369,58 @@ If a directory does not recursively contain any file, it is omitted from the sto (elemAt filesets 0) (elemAt filesets 1); + /* + The file set containing all files from the first file set that are not in the second file set. + See also [Difference (set theory)](https://en.wikipedia.org/wiki/Complement_(set_theory)#Relative_complement). + + The given file sets are evaluated as lazily as possible, + with the first argument being evaluated first if needed. + + Type: + union :: FileSet -> FileSet -> FileSet + + Example: + # Create a file set containing all files from the current directory, + # except ones under ./tests + difference ./. ./tests + + let + # A set of Nix-related files + nixFiles = unions [ ./default.nix ./nix ./tests/default.nix ]; + in + # Create a file set containing all files under ./tests, except ones in `nixFiles`, + # meaning only without ./tests/default.nix + difference ./tests nixFiles + */ + difference = + # The positive file set. + # The result can only contain files that are also in this file set. + # + # This argument can also be a path, + # which gets [implicitly coerced to a file set](#sec-fileset-path-coercion). + positive: + # The negative file set. + # The result will never contain files that are also in this file set. + # + # This argument can also be a path, + # which gets [implicitly coerced to a file set](#sec-fileset-path-coercion). + negative: + let + filesets = _coerceMany "lib.fileset.difference" [ + { + context = "first argument (positive set)"; + value = positive; + } + { + context = "second argument (negative set)"; + value = negative; + } + ]; + in + _difference + (elemAt filesets 0) + (elemAt filesets 1); + /* Incrementally evaluate and trace a file set in a pretty way. This function is only intended for debugging purposes. diff --git a/lib/fileset/internal.nix b/lib/fileset/internal.nix index 2d52a8cb410b1..8543bf616c2e3 100644 --- a/lib/fileset/internal.nix +++ b/lib/fileset/internal.nix @@ -639,6 +639,86 @@ rec { # In all other cases it's the rhs rhs; + # Compute the set difference between two file sets. + # The filesets must already be coerced and validated to be in the same filesystem root. + # Type: Fileset -> Fileset -> Fileset + _difference = positive: negative: + let + # The common base components prefix, e.g. + # (/foo/bar, /foo/bar/baz) -> /foo/bar + # (/foo/bar, /foo/baz) -> /foo + commonBaseComponentsLength = + # TODO: Have a `lib.lists.commonPrefixLength` function such that we don't need the list allocation from commonPrefix here + length ( + commonPrefix + positive._internalBaseComponents + negative._internalBaseComponents + ); + + # We need filesetTree's with the same base to be able to compute the difference between them + # This here is the filesetTree from the negative file set, but for a base path that matches the positive file set. + # Examples: + # For `difference /foo /foo/bar`, `negativeTreeWithPositiveBase = { bar = "directory"; }` + # because under the base path of `/foo`, only `bar` from the negative file set is included + # For `difference /foo/bar /foo`, `negativeTreeWithPositiveBase = "directory"` + # because under the base path of `/foo/bar`, everything from the negative file set is included + # For `difference /foo /bar`, `negativeTreeWithPositiveBase = null` + # because under the base path of `/foo`, nothing from the negative file set is included + negativeTreeWithPositiveBase = + if commonBaseComponentsLength == length positive._internalBaseComponents then + # The common prefix is the same as the positive base path, so the second path is equal or longer. + # We need to _shorten_ the negative filesetTree to the same base path as the positive one + # E.g. for `difference /foo /foo/bar` the common prefix is /foo, equal to the positive file set's base + # So we need to shorten the base of the tree for the negative argument from /foo/bar to just /foo + _shortenTreeBase positive._internalBaseComponents negative + else if commonBaseComponentsLength == length negative._internalBaseComponents then + # The common prefix is the same as the negative base path, so the first path is longer. + # We need to lengthen the negative filesetTree to the same base path as the positive one. + # E.g. for `difference /foo/bar /foo` the common prefix is /foo, equal to the negative file set's base + # So we need to lengthen the base of the tree for the negative argument from /foo to /foo/bar + _lengthenTreeBase positive._internalBaseComponents negative + else + # The common prefix is neither the first nor the second path. + # This means there's no overlap between the two file sets, + # and nothing from the negative argument should get removed from the positive one + # E.g for `difference /foo /bar`, we remove nothing to get the same as `/foo` + null; + + resultingTree = + _differenceTree + positive._internalBase + positive._internalTree + negativeTreeWithPositiveBase; + in + # If the first file set is empty, we can never have any files in the result + if positive._internalIsEmptyWithoutBase then + _emptyWithoutBase + # If the second file set is empty, nothing gets removed, so the result is just the first file set + else if negative._internalIsEmptyWithoutBase then + positive + else + # We use the positive file set base for the result, + # because only files from the positive side may be included, + # which is what base path is for + _create positive._internalBase resultingTree; + + # Computes the set difference of two filesetTree's + # Type: Path -> filesetTree -> filesetTree + _differenceTree = path: lhs: rhs: + # If the lhs doesn't have any files, or the right hand side includes all files + if lhs == null || isString rhs then + # The result will always be empty + null + # If the right hand side has no files + else if rhs == null then + # The result is always the left hand side, because nothing gets removed + lhs + else + # Otherwise we always have two attribute sets to recurse into + mapAttrs (name: lhsValue: + _differenceTree (path + "/${name}") lhsValue (rhs.${name} or null) + ) (_directoryEntries path lhs); + _fileFilter = predicate: fileset: let recurse = path: tree: diff --git a/lib/fileset/tests.sh b/lib/fileset/tests.sh index d8d8dd4131894..d6ebdb098b5bd 100755 --- a/lib/fileset/tests.sh +++ b/lib/fileset/tests.sh @@ -677,6 +677,104 @@ tree=( ) checkFileset 'intersection (unions [ ./a/b ./c/d ./c/e ]) (unions [ ./a ./c/d/f ./c/e ])' +## Difference + +# Subtracting something from itself results in nothing +tree=( + [a]=0 +) +checkFileset 'difference ./. ./.' + +# The tree of the second argument should not be evaluated if not needed +checkFileset 'difference _emptyWithoutBase (_create ./. (abort "This should not be used!"))' +checkFileset 'difference (_create ./. null) (_create ./. (abort "This should not be used!"))' + +# Subtracting nothing gives the same thing back +tree=( + [a]=1 +) +checkFileset 'difference ./. _emptyWithoutBase' +checkFileset 'difference ./. (_create ./. null)' + +# Subtracting doesn't influence the base path +mkdir a b +touch {a,b}/x +expectEqual 'toSource { root = ./a; fileset = difference ./a ./b; }' 'toSource { root = ./a; fileset = ./a; }' +rm -rf -- * + +# Also not the other way around +mkdir a +expectFailure 'toSource { root = ./a; fileset = difference ./. ./a; }' 'lib.fileset.toSource: `fileset` could contain files in '"$work"', which is not under the `root` \('"$work"'/a\). Potential solutions: +\s*- Set `root` to '"$work"' or any directory higher up. This changes the layout of the resulting store path. +\s*- Set `fileset` to a file set that cannot contain files outside the `root` \('"$work"'/a\). This could change the files included in the result.' +rm -rf -- * + +# Difference actually works +# We test all combinations of ./., ./a, ./a/x and ./b +tree=( + [a/x]=0 + [a/y]=0 + [b]=0 + [c]=0 +) +checkFileset 'difference ./. ./.' +checkFileset 'difference ./a ./.' +checkFileset 'difference ./a/x ./.' +checkFileset 'difference ./b ./.' +checkFileset 'difference ./a ./a' +checkFileset 'difference ./a/x ./a' +checkFileset 'difference ./a/x ./a/x' +checkFileset 'difference ./b ./b' +tree=( + [a/x]=0 + [a/y]=0 + [b]=1 + [c]=1 +) +checkFileset 'difference ./. ./a' +tree=( + [a/x]=1 + [a/y]=1 + [b]=0 + [c]=0 +) +checkFileset 'difference ./a ./b' +tree=( + [a/x]=1 + [a/y]=0 + [b]=0 + [c]=0 +) +checkFileset 'difference ./a/x ./b' +tree=( + [a/x]=0 + [a/y]=1 + [b]=0 + [c]=0 +) +checkFileset 'difference ./a ./a/x' +tree=( + [a/x]=0 + [a/y]=0 + [b]=1 + [c]=0 +) +checkFileset 'difference ./b ./a' +checkFileset 'difference ./b ./a/x' +tree=( + [a/x]=0 + [a/y]=1 + [b]=1 + [c]=1 +) +checkFileset 'difference ./. ./a/x' +tree=( + [a/x]=1 + [a/y]=1 + [b]=0 + [c]=1 +) +checkFileset 'difference ./. ./b' ## File filter