From b60330742fbf2dbb34eebc530b485c7985ebfd47 Mon Sep 17 00:00:00 2001 From: Mel Zuser Date: Tue, 4 Apr 2023 23:35:12 -0400 Subject: [PATCH] Use shorter hash for script-builds directories Using a Base64 hash and truncating it to 26 characters, saves 38 chars, which helps avoid long paths issues on Windows, while still providing 130 bits of hash in order to avoid collisions. Bug #8841 --- cabal-install/cabal-install.cabal | 1 + cabal-install/src/Distribution/Client/HashValue.hs | 5 +++++ cabal-install/src/Distribution/Client/ScriptUtils.hs | 9 +++++++-- cabal-testsuite/cabal-testsuite.cabal | 2 +- cabal-testsuite/src/Test/Cabal/Prelude.hs | 5 +++-- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/cabal-install/cabal-install.cabal b/cabal-install/cabal-install.cabal index 99fa0398ae6..64bcf2f0096 100644 --- a/cabal-install/cabal-install.cabal +++ b/cabal-install/cabal-install.cabal @@ -205,6 +205,7 @@ library async >= 2.0 && < 2.3, array >= 0.4 && < 0.6, base16-bytestring >= 0.1.1 && < 1.1.0.0, + base64-bytestring >= 1.0 && < 1.3, binary >= 0.7.3 && < 0.9, bytestring >= 0.10.6.0 && < 0.12, containers >= 0.5.6.2 && < 0.7, diff --git a/cabal-install/src/Distribution/Client/HashValue.hs b/cabal-install/src/Distribution/Client/HashValue.hs index 67117b231cc..86281a309ff 100644 --- a/cabal-install/src/Distribution/Client/HashValue.hs +++ b/cabal-install/src/Distribution/Client/HashValue.hs @@ -6,6 +6,7 @@ module Distribution.Client.HashValue ( hashValue, truncateHash, showHashValue, + showHashValueBase64, readFileHashValue, hashFromTUF, ) where @@ -17,6 +18,7 @@ import qualified Hackage.Security.Client as Sec import qualified Crypto.Hash.SHA256 as SHA256 import qualified Data.ByteString.Base16 as Base16 +import qualified Data.ByteString.Base64 as Base64 import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as LBS @@ -55,6 +57,9 @@ hashValue = HashValue . SHA256.hashlazy showHashValue :: HashValue -> String showHashValue (HashValue digest) = BS.unpack (Base16.encode digest) +showHashValueBase64 :: HashValue -> String +showHashValueBase64 (HashValue digest) = BS.unpack (Base64.encode digest) + -- | Hash the content of a file. Uses SHA256. -- readFileHashValue :: FilePath -> IO HashValue diff --git a/cabal-install/src/Distribution/Client/ScriptUtils.hs b/cabal-install/src/Distribution/Client/ScriptUtils.hs index db377c8f10a..e4be34f1612 100644 --- a/cabal-install/src/Distribution/Client/ScriptUtils.hs +++ b/cabal-install/src/Distribution/Client/ScriptUtils.hs @@ -26,7 +26,7 @@ import Distribution.Client.Config import Distribution.Client.DistDirLayout ( DistDirLayout(..) ) import Distribution.Client.HashValue - ( hashValue, showHashValue ) + ( hashValue, showHashValueBase64 ) import Distribution.Client.HttpUtils ( HttpTransport, configureTransport ) import Distribution.Client.NixStyleOptions @@ -125,7 +125,12 @@ import qualified Text.Parsec as P -- Two hashes will be the same as long as the absolute paths -- are the same. getScriptHash :: FilePath -> IO String -getScriptHash script = showHashValue . hashValue . fromString <$> canonicalizePath script +getScriptHash script + -- Base64 is shorter than Base16, which helps avoid long path issues on windows + -- but it can contain /'s which aren't valid in file paths so replace them with + -- %'s. 26 chars / 130 bits is enough to practically avoid collisions. + = map (\c -> if c == '/' then '%' else c) . take 26 + . showHashValueBase64 . hashValue . fromString <$> canonicalizePath script -- | Get the directory for caching a script build. -- diff --git a/cabal-testsuite/cabal-testsuite.cabal b/cabal-testsuite/cabal-testsuite.cabal index 204859e50d5..01278468e62 100644 --- a/cabal-testsuite/cabal-testsuite.cabal +++ b/cabal-testsuite/cabal-testsuite.cabal @@ -60,7 +60,7 @@ library , aeson ^>= 1.4.2.0 || ^>=1.5.0.0 || ^>= 2.0.0.0 || ^>= 2.1.0.0 , async ^>= 2.2.1 , attoparsec ^>= 0.13.2.2 || ^>=0.14.1 - , base16-bytestring ^>= 0.1.1.6 || ^>= 1.0.0.0 + , base64-bytestring ^>= 1.0.0.0 || ^>= 1.1.0.0 || ^>= 1.2.0.0 , bytestring ^>= 0.10.0.2 || ^>= 0.11.0.0 , containers ^>= 0.5.0.0 || ^>= 0.6.0.1 , cryptohash-sha256 ^>= 0.11.101.0 diff --git a/cabal-testsuite/src/Test/Cabal/Prelude.hs b/cabal-testsuite/src/Test/Cabal/Prelude.hs index 10a3ac287ef..081dd935eeb 100644 --- a/cabal-testsuite/src/Test/Cabal/Prelude.hs +++ b/cabal-testsuite/src/Test/Cabal/Prelude.hs @@ -52,7 +52,7 @@ import Control.Monad (unless, when, void, forM_, liftM2, liftM4) import Control.Monad.Trans.Reader (withReaderT, runReaderT) import Control.Monad.IO.Class (MonadIO (..)) import qualified Crypto.Hash.SHA256 as SHA256 -import qualified Data.ByteString.Base16 as Base16 +import qualified Data.ByteString.Base64 as Base64 import qualified Data.ByteString.Char8 as C import Data.List (isInfixOf, stripPrefix, isPrefixOf, intercalate) import Data.List.NonEmpty (NonEmpty (..)) @@ -837,7 +837,8 @@ getScriptCacheDirectory :: FilePath -> TestM FilePath getScriptCacheDirectory script = do cabalDir <- testCabalDir `fmap` getTestEnv hashinput <- liftIO $ canonicalizePath script - let hash = C.unpack . Base16.encode . SHA256.hash . C.pack $ hashinput + let hash = map (\c -> if c == '/' then '%' else c) . take 26 + . C.unpack . Base64.encode . SHA256.hash . C.pack $ hashinput return $ cabalDir "script-builds" hash ------------------------------------------------------------------------