Skip to content

Commit

Permalink
Use shorter hash for script-builds directories
Browse files Browse the repository at this point in the history
Using a Base64 hash and truncating it to 26 characters, saves 38 chars,
which helps avoid long paths issues on Windows, while still providing
130 bits of hash in order to avoid collisions.

Bug haskell#8841
  • Loading branch information
bacchanalia authored and Mikolaj committed May 24, 2023
1 parent 5fd439f commit b603307
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 5 deletions.
1 change: 1 addition & 0 deletions cabal-install/cabal-install.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ library
async >= 2.0 && < 2.3,
array >= 0.4 && < 0.6,
base16-bytestring >= 0.1.1 && < 1.1.0.0,
base64-bytestring >= 1.0 && < 1.3,
binary >= 0.7.3 && < 0.9,
bytestring >= 0.10.6.0 && < 0.12,
containers >= 0.5.6.2 && < 0.7,
Expand Down
5 changes: 5 additions & 0 deletions cabal-install/src/Distribution/Client/HashValue.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ module Distribution.Client.HashValue (
hashValue,
truncateHash,
showHashValue,
showHashValueBase64,
readFileHashValue,
hashFromTUF,
) where
Expand All @@ -17,6 +18,7 @@ import qualified Hackage.Security.Client as Sec

import qualified Crypto.Hash.SHA256 as SHA256
import qualified Data.ByteString.Base16 as Base16
import qualified Data.ByteString.Base64 as Base64
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as LBS

Expand Down Expand Up @@ -55,6 +57,9 @@ hashValue = HashValue . SHA256.hashlazy
showHashValue :: HashValue -> String
showHashValue (HashValue digest) = BS.unpack (Base16.encode digest)

showHashValueBase64 :: HashValue -> String
showHashValueBase64 (HashValue digest) = BS.unpack (Base64.encode digest)

-- | Hash the content of a file. Uses SHA256.
--
readFileHashValue :: FilePath -> IO HashValue
Expand Down
9 changes: 7 additions & 2 deletions cabal-install/src/Distribution/Client/ScriptUtils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import Distribution.Client.Config
import Distribution.Client.DistDirLayout
( DistDirLayout(..) )
import Distribution.Client.HashValue
( hashValue, showHashValue )
( hashValue, showHashValueBase64 )
import Distribution.Client.HttpUtils
( HttpTransport, configureTransport )
import Distribution.Client.NixStyleOptions
Expand Down Expand Up @@ -125,7 +125,12 @@ import qualified Text.Parsec as P
-- Two hashes will be the same as long as the absolute paths
-- are the same.
getScriptHash :: FilePath -> IO String
getScriptHash script = showHashValue . hashValue . fromString <$> canonicalizePath script
getScriptHash script
-- Base64 is shorter than Base16, which helps avoid long path issues on windows
-- but it can contain /'s which aren't valid in file paths so replace them with
-- %'s. 26 chars / 130 bits is enough to practically avoid collisions.
= map (\c -> if c == '/' then '%' else c) . take 26
. showHashValueBase64 . hashValue . fromString <$> canonicalizePath script

-- | Get the directory for caching a script build.
--
Expand Down
2 changes: 1 addition & 1 deletion cabal-testsuite/cabal-testsuite.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ library
, aeson ^>= 1.4.2.0 || ^>=1.5.0.0 || ^>= 2.0.0.0 || ^>= 2.1.0.0
, async ^>= 2.2.1
, attoparsec ^>= 0.13.2.2 || ^>=0.14.1
, base16-bytestring ^>= 0.1.1.6 || ^>= 1.0.0.0
, base64-bytestring ^>= 1.0.0.0 || ^>= 1.1.0.0 || ^>= 1.2.0.0
, bytestring ^>= 0.10.0.2 || ^>= 0.11.0.0
, containers ^>= 0.5.0.0 || ^>= 0.6.0.1
, cryptohash-sha256 ^>= 0.11.101.0
Expand Down
5 changes: 3 additions & 2 deletions cabal-testsuite/src/Test/Cabal/Prelude.hs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ import Control.Monad (unless, when, void, forM_, liftM2, liftM4)
import Control.Monad.Trans.Reader (withReaderT, runReaderT)
import Control.Monad.IO.Class (MonadIO (..))
import qualified Crypto.Hash.SHA256 as SHA256
import qualified Data.ByteString.Base16 as Base16
import qualified Data.ByteString.Base64 as Base64
import qualified Data.ByteString.Char8 as C
import Data.List (isInfixOf, stripPrefix, isPrefixOf, intercalate)
import Data.List.NonEmpty (NonEmpty (..))
Expand Down Expand Up @@ -837,7 +837,8 @@ getScriptCacheDirectory :: FilePath -> TestM FilePath
getScriptCacheDirectory script = do
cabalDir <- testCabalDir `fmap` getTestEnv
hashinput <- liftIO $ canonicalizePath script
let hash = C.unpack . Base16.encode . SHA256.hash . C.pack $ hashinput
let hash = map (\c -> if c == '/' then '%' else c) . take 26
. C.unpack . Base64.encode . SHA256.hash . C.pack $ hashinput
return $ cabalDir </> "script-builds" </> hash

------------------------------------------------------------------------
Expand Down

0 comments on commit b603307

Please sign in to comment.