From 8372ae27f973d235565c0cf443c657359a360a47 Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Sat, 25 Jul 2020 19:53:15 +0200 Subject: [PATCH 1/3] lib/strings: Add levenshtein distance functions Adds some functions related to string similarity: - lib.strings.commonPrefixLength - lib.strings.commonSuffixLength - lib.strings.levenshtein - lib.strings.levenshteinAtMost --- lib/strings.nix | 127 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/lib/strings.nix b/lib/strings.nix index b2fd495e4c841..d34263c994948 100644 --- a/lib/strings.nix +++ b/lib/strings.nix @@ -774,4 +774,131 @@ rec { (x: if stringLength x == 0 then "unknown" else x) ]; + /* Computes the Levenshtein distance between two strings. + Complexity O(n*m) where n and m are the lengths of the strings. + Algorithm adjusted from https://stackoverflow.com/a/9750974/6605742 + + Type: levenshtein :: string -> string -> int + + Example: + levenshtein "foo" "foo" + => 0 + levenshtein "book" "hook" + => 1 + levenshtein "hello" "Heyo" + => 3 + */ + levenshtein = a: b: let + # Two dimensional array with dimensions (stringLength a + 1, stringLength b + 1) + arr = lib.genList (i: + lib.genList (j: + dist i j + ) (stringLength b + 1) + ) (stringLength a + 1); + d = x: y: lib.elemAt (lib.elemAt arr x) y; + dist = i: j: + let c = if substring (i - 1) 1 a == substring (j - 1) 1 b + then 0 else 1; + in + if j == 0 then i + else if i == 0 then j + else lib.min + ( lib.min (d (i - 1) j + 1) (d i (j - 1) + 1)) + ( d (i - 1) (j - 1) + c ); + in d (stringLength a) (stringLength b); + + /* Returns the length of the prefix common to both strings. + */ + commonPrefixLength = a: b: + let + m = lib.min (stringLength a) (stringLength b); + go = i: if i >= m then m else if substring i 1 a == substring i 1 b then go (i + 1) else i; + in go 0; + + /* Returns the length of the suffix common to both strings. + */ + commonSuffixLength = a: b: + let + m = lib.min (stringLength a) (stringLength b); + go = i: if i >= m then m else if substring (stringLength a - i - 1) 1 a == substring (stringLength b - i - 1) 1 b then go (i + 1) else i; + in go 0; + + /* Returns whether the levenshtein distance between two strings is at most some value + Complexity is O(min(n,m)) for k <= 2 and O(n*m) otherwise + + Type: levenshteinAtMost :: int -> string -> string -> bool + + Example: + levenshteinAtMost 0 "foo" "foo" + => true + levenshteinAtMost 1 "foo" "boa" + => false + levenshteinAtMost 2 "foo" "boa" + => true + levenshteinAtMost 2 "This is a sentence" "this is a sentense." + => false + levenshteinAtMost 3 "This is a sentence" "this is a sentense." + => true + + */ + levenshteinAtMost = let + infixDifferAtMost1 = x: y: stringLength x <= 1 && stringLength y <= 1; + + # This function takes two strings stripped by their common pre and suffix, + # and returns whether they differ by at most two by Levenshtein distance. + # Because of this stripping, if they do indeed differ by at most two edits, + # we know that those edits were (if at all) done at the start or the end, + # while the middle has to have stayed the same. This fact is used in the + # implementation. + infixDifferAtMost2 = x: y: + let + xlen = stringLength x; + ylen = stringLength y; + # This function is only called with |x| >= |y| and |x| - |y| <= 2, so + # diff is one of 0, 1 or 2 + diff = xlen - ylen; + + # Infix of x and y, stripped by the left and right most character + xinfix = substring 1 (xlen - 2) x; + yinfix = substring 1 (ylen - 2) y; + + # x and y but a character deleted at the left or right + xdelr = substring 0 (xlen - 1) x; + xdell = substring 1 (xlen - 1) x; + ydelr = substring 0 (ylen - 1) y; + ydell = substring 1 (ylen - 1) y; + in + # A length difference of 2 can only be gotten with 2 delete edits, + # which have to have happened at the start and end of x + # Example: "abcdef" -> "bcde" + if diff == 2 then xinfix == y + # A length difference of 1 can only be gotten with a deletion on the + # right and a replacement on the left or vice versa. + # Example: "abcdef" -> "bcdez" or "zbcde" + else if diff == 1 then xinfix == ydelr || xinfix == ydell + # No length difference can either happen through replacements on both + # sides, or a deletion on the left and an insertion on the right or + # vice versa + # Example: "abcdef" -> "zbcdez" or "bcdefz" or "zabcde" + else xinfix == yinfix || xdelr == ydell || xdell == ydelr; + + in k: if k <= 0 then a: b: a == b else + let f = a: b: + let + alen = stringLength a; + blen = stringLength b; + prelen = commonPrefixLength a b; + suflen = commonSuffixLength a b; + presuflen = prelen + suflen; + ainfix = substring prelen (alen - presuflen) a; + binfix = substring prelen (blen - presuflen) b; + in + # Make a be the bigger string + if alen < blen then f b a + # If a has over k more characters than b, even with k deletes on a, b can't be reached + else if alen - blen > k then false + else if k == 1 then infixDifferAtMost1 ainfix binfix + else if k == 2 then infixDifferAtMost2 ainfix binfix + else levenshtein ainfix binfix <= k; + in f; } From 2673a136896816ebc5f4ae5ea004ab0265068329 Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Sat, 25 Jul 2020 19:54:19 +0200 Subject: [PATCH 2/3] lib/tests: Add tests for levenshtein functions --- lib/tests/misc.nix | 152 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/lib/tests/misc.nix b/lib/tests/misc.nix index 2711190313956..1eb2d953ebbe9 100644 --- a/lib/tests/misc.nix +++ b/lib/tests/misc.nix @@ -913,4 +913,156 @@ runTests { }; }; + ## Levenshtein distance functions and co. + testCommonPrefixLengthEmpty = { + expr = strings.commonPrefixLength "" "hello"; + expected = 0; + }; + + testCommonPrefixLengthSame = { + expr = strings.commonPrefixLength "hello" "hello"; + expected = 5; + }; + + testCommonPrefixLengthDiffering = { + expr = strings.commonPrefixLength "hello" "hey"; + expected = 2; + }; + + testCommonSuffixLengthEmpty = { + expr = strings.commonSuffixLength "" "hello"; + expected = 0; + }; + + testCommonSuffixLengthSame = { + expr = strings.commonSuffixLength "hello" "hello"; + expected = 5; + }; + + testCommonSuffixLengthDiffering = { + expr = strings.commonSuffixLength "test" "rest"; + expected = 3; + }; + + testLevenshteinEmpty = { + expr = strings.levenshtein "" ""; + expected = 0; + }; + + testLevenshteinOnlyAdd = { + expr = strings.levenshtein "" "hello there"; + expected = 11; + }; + + testLevenshteinOnlyRemove = { + expr = strings.levenshtein "hello there" ""; + expected = 11; + }; + + testLevenshteinOnlyTransform = { + expr = strings.levenshtein "abcdef" "ghijkl"; + expected = 6; + }; + + testLevenshteinMixed = { + expr = strings.levenshtein "kitchen" "sitting"; + expected = 5; + }; + + testLevenshteinAtMostZeroFalse = { + expr = strings.levenshteinAtMost 0 "foo" "boo"; + expected = false; + }; + + testLevenshteinAtMostZeroTrue = { + expr = strings.levenshteinAtMost 0 "foo" "foo"; + expected = true; + }; + + testLevenshteinAtMostOneFalse = { + expr = strings.levenshteinAtMost 1 "car" "ct"; + expected = false; + }; + + testLevenshteinAtMostOneTrue = { + expr = strings.levenshteinAtMost 1 "car" "cr"; + expected = true; + }; + + # We test levenshteinAtMost 2 particularly well because it uses a complicated + # implementation + testLevenshteinAtMostTwoIsEmpty = { + expr = strings.levenshteinAtMost 2 "" ""; + expected = true; + }; + + testLevenshteinAtMostTwoIsZero = { + expr = strings.levenshteinAtMost 2 "abcdef" "abcdef"; + expected = true; + }; + + testLevenshteinAtMostTwoIsOne = { + expr = strings.levenshteinAtMost 2 "abcdef" "abddef"; + expected = true; + }; + + testLevenshteinAtMostTwoDiff0False = { + expr = strings.levenshteinAtMost 2 "abcdef" "aczyef"; + expected = false; + }; + + testLevenshteinAtMostTwoDiff0Outer = { + expr = strings.levenshteinAtMost 2 "abcdef" "zbcdez"; + expected = true; + }; + + testLevenshteinAtMostTwoDiff0DelLeft = { + expr = strings.levenshteinAtMost 2 "abcdef" "bcdefz"; + expected = true; + }; + + testLevenshteinAtMostTwoDiff0DelRight = { + expr = strings.levenshteinAtMost 2 "abcdef" "zabcde"; + expected = true; + }; + + testLevenshteinAtMostTwoDiff1False = { + expr = strings.levenshteinAtMost 2 "abcdef" "bddez"; + expected = false; + }; + + testLevenshteinAtMostTwoDiff1DelLeft = { + expr = strings.levenshteinAtMost 2 "abcdef" "bcdez"; + expected = true; + }; + + testLevenshteinAtMostTwoDiff1DelRight = { + expr = strings.levenshteinAtMost 2 "abcdef" "zbcde"; + expected = true; + }; + + testLevenshteinAtMostTwoDiff2False = { + expr = strings.levenshteinAtMost 2 "hello" "hxo"; + expected = false; + }; + + testLevenshteinAtMostTwoDiff2True = { + expr = strings.levenshteinAtMost 2 "hello" "heo"; + expected = true; + }; + + testLevenshteinAtMostTwoDiff3 = { + expr = strings.levenshteinAtMost 2 "hello" "ho"; + expected = false; + }; + + testLevenshteinAtMostThreeFalse = { + expr = strings.levenshteinAtMost 3 "hello" "Holla!"; + expected = false; + }; + + testLevenshteinAtMostThreeTrue = { + expr = strings.levenshteinAtMost 3 "hello" "Holla"; + expected = true; + }; } From 71120e0e9df2c677e8466014254e4fa32175bb20 Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Sat, 25 Jul 2020 19:54:37 +0200 Subject: [PATCH 3/3] lib/customization: Improve callPackage error message for missing args This uses the levenshtein distance to look through all possible arguments to find ones that are close to what was requested: error: Function in /home/infinisil/src/nixpkgs/pkgs/tools/text/ripgrep/default.nix called without required argument "fetchFromGithub", did you mean "fetchFromGitHub" or "fetchFromGitLab"? With https://github.com/NixOS/nix/pull/3468 (in current nixUnstable) the error message becomes even better, adding line location info --- lib/customisation.nix | 51 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/lib/customisation.nix b/lib/customisation.nix index 234a528527d3c..cc9a9b1c55d0a 100644 --- a/lib/customisation.nix +++ b/lib/customisation.nix @@ -117,8 +117,55 @@ rec { callPackageWith = autoArgs: fn: args: let f = if lib.isFunction fn then fn else import fn; - auto = builtins.intersectAttrs (lib.functionArgs f) autoArgs; - in makeOverridable f (auto // args); + fargs = lib.functionArgs f; + + # All arguments that will be passed to the function + # This includes automatic ones and ones passed explicitly + allArgs = builtins.intersectAttrs fargs autoArgs // args; + + # A list of argument names that the function requires, but + # wouldn't be passed to it + missingArgs = lib.attrNames + # Filter out arguments that have a default value + (lib.filterAttrs (name: value: ! value) + # Filter out arguments that would be passed + (removeAttrs fargs (lib.attrNames allArgs))); + + # Get a list of suggested argument names for a given missing one + getSuggestions = arg: lib.pipe (autoArgs // args) [ + lib.attrNames + # Only use ones that are at most 2 edits away. While mork would work, + # levenshteinAtMost is only fast for 2 or less. + (lib.filter (lib.strings.levenshteinAtMost 2 arg)) + # Put strings with shorter distance first + (lib.sort (x: y: lib.strings.levenshtein x arg < lib.strings.levenshtein y arg)) + # Only take the first couple results + (lib.take 3) + # Quote all entries + (map (x: "\"" + x + "\"")) + ]; + + prettySuggestions = suggestions: + if suggestions == [] then "" + else if lib.length suggestions == 1 then ", did you mean ${lib.elemAt suggestions 0}?" + else ", did you mean ${lib.concatStringsSep ", " (lib.init suggestions)} or ${lib.last suggestions}?"; + + errorForArg = arg: + let + loc = builtins.unsafeGetAttrPos arg fargs; + # loc' can be removed once lib/minver.nix is >2.3.4, since that includes + # https://github.com/NixOS/nix/pull/3468 which makes loc be non-null + loc' = if loc != null then loc.file + ":" + toString loc.line + else if ! lib.isFunction fn then + toString fn + lib.optionalString (lib.sources.pathIsDirectory fn) "/default.nix" + else ""; + in "Function called without required argument \"${arg}\" at " + + "${loc'}${prettySuggestions (getSuggestions arg)}"; + + # Only show the error for the first missing argument + error = errorForArg (lib.head missingArgs); + + in if missingArgs == [] then makeOverridable f allArgs else throw error; /* Like callPackage, but for a function that returns an attribute