Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve preprocessing performance #10534

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions Cabal/src/Distribution/Simple/Build.hs
Original file line number Diff line number Diff line change
Expand Up @@ -614,8 +614,11 @@ generateCode
-> Verbosity
-> IO (SymbolicPath Pkg (Dir Source), [ModuleName.ModuleName])
generateCode codeGens nm pdesc bi lbi clbi verbosity = do
debug verbosity $ "generateCode: " <> prettyShow (package pdesc)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a lot of extra debug statements here to help me identify where we were losing time. I can remove those if desired.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please can you remove these, I think you can see in the cost centre profile there is a hot-spot in findCwdWithExtension, which is also what you identified.

when (not . null $ codeGens) $ createDirectoryIfMissingVerbose verbosity True $ i tgtDir
(\x -> (tgtDir, x)) . concat <$> mapM go codeGens
ret <- (\x -> (tgtDir, x)) . concat <$> mapM go codeGens
debug verbosity "generateCode complete"
pure ret
where
allLibs = (maybe id (:) $ library pdesc) (subLibraries pdesc)
dependencyLibs = filter (const True) allLibs -- intersect with componentPackageDeps of clbi
Expand All @@ -625,7 +628,8 @@ generateCode codeGens nm pdesc bi lbi clbi verbosity = do
i = interpretSymbolicPath mbWorkDir -- See Note [Symbolic paths] in Distribution.Utils.Path
tgtDir = buildDir lbi </> makeRelativePathEx (nm' </> nm' ++ "-gen")
go :: String -> IO [ModuleName.ModuleName]
go codeGenProg =
go codeGenProg = do
debug verbosity $ "Performing codegen: " <> codeGenProg
fmap fromString . lines
<$> getDbProgramOutputCwd
verbosity
Expand Down
141 changes: 80 additions & 61 deletions Cabal/src/Distribution/Simple/PreProcess.hs
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ import Distribution.Compat.Prelude
import Distribution.Compat.Stack
import Prelude ()

import Control.Concurrent.Async
import Distribution.Backpack.DescribeUnitId
import qualified Distribution.InstalledPackageInfo as Installed
import Distribution.ModuleName (ModuleName)
import Distribution.Package
import Distribution.PackageDescription as PD
import Distribution.Pretty
import Distribution.Simple.BuildPaths
import Distribution.Simple.CCompiler
import Distribution.Simple.Compiler
Expand Down Expand Up @@ -159,14 +161,16 @@ preprocessComponent pd comp lbi clbi isSrcDist verbosity handlers =
(Nothing :: Maybe [(ModuleName, Module)])
case comp of
(CLib lib@Library{libBuildInfo = bi}) -> do
debug verbosity $ "Preprocessing library: " <> show (libName lib)
let dirs =
hsSourceDirs bi
++ [autogenComponentModulesDir lbi clbi, autogenPackageModulesDir lbi]
let hndlrs = localHandlers bi
mods <- orderingFromHandlers verbosity dirs hndlrs (allLibModules lib clbi)
for_ (map moduleNameSymbolicPath mods) $
for_ $
pre dirs (componentBuildDir lbi clbi) hndlrs
(CFLib flib@ForeignLib{foreignLibBuildInfo = bi}) -> do
debug verbosity $ "Preprocessing foreign library: " <> prettyShow (foreignLibName flib)
let flibDir = flibBuildDir lbi flib
dirs =
hsSourceDirs bi
Expand All @@ -186,6 +190,7 @@ preprocessComponent pd comp lbi clbi isSrcDist verbosity handlers =
]
let hndlrs = localHandlers bi
mods <- orderingFromHandlers verbosity dirs hndlrs (otherModules bi)
debug verbosity $ "Module count: " <> show (length mods)
for_ (map moduleNameSymbolicPath mods) $
pre dirs exeDir hndlrs
pre (hsSourceDirs bi) exeDir (localHandlers bi) $
Expand All @@ -208,8 +213,11 @@ preprocessComponent pd comp lbi clbi isSrcDist verbosity handlers =
BenchmarkUnsupported tt ->
dieWithException verbosity $ NoSupportForPreProcessingBenchmark tt
where
orderingFromHandlers v d hndlrs mods =
foldM (\acc (_, pp) -> ppOrdering pp v d acc) mods hndlrs
orderingFromHandlers v d hndlrs mods = do
debug v $ " orderingFromHandlers begin"
a <- foldM (\acc (_, pp) -> ppOrdering pp v d acc) mods hndlrs
debug v $ " orderingFromHandlers end"
pure a
builtinCSuffixes = map Suffix cSourceExtensions
builtinSuffixes = builtinHaskellSuffixes ++ builtinCSuffixes
localHandlers bi = [(ext, h bi lbi clbi) | (ext, h) <- handlers]
Expand Down Expand Up @@ -292,10 +300,11 @@ preprocessFile
-- ^ fail on missing file
-> IO ()
preprocessFile mbWorkDir searchLoc buildLoc forSDist baseFile verbosity builtinSuffixes handlers failOnMissing = do
debug verbosity $ "preprocessFile: " <> prettyShow baseFile
bsrcFiles <- findFileCwdWithExtension mbWorkDir builtinSuffixes (searchLoc ++ [buildAsSrcLoc]) baseFile
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

First, we try and find a regular Haskell file.

We also add buildAsSrcLoc] to the end of the list, since the locations are tried in-order. Most likely, we're going to be in the first directory we're searching in, so this should eliminate a lot of file lookups.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this is a change in behaviour? Before, if you had a .hs and a .y file in the directory, would it prefer to regenerate the .hs from the .y file?

It seems that it would then prefer the generated .y file rather than the .hs file in the directory, because it searched the build directory first?

Could we have some tests to check this behaviour?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good question - what is the expected behavior of having multiple files with different extensions in directory? Rather, what should happen if we have a directory project like this:

src/Foo.hs
src/Foo.y

The old code would ignore src/Foo.hs in favor of src/Foo.y. The new code ignores src/Foo.y in favor of src/Foo.hs.

This is definitely a change in behavior, but it would require a project to be poorly structured - somehow having Foo.hs and Foo.y together, with the expectation that Foo.hs is never used.

-- look for files in the various source dirs with this module name
-- and a file extension of a known preprocessor
psrcFiles <- findFileCwdWithExtension' mbWorkDir (map fst handlers) searchLoc baseFile
case psrcFiles of
case bsrcFiles of
-- no preprocessor file exists, look for an ordinary source file
-- just to make sure one actually exists at all for this module.

Expand All @@ -307,48 +316,56 @@ preprocessFile mbWorkDir searchLoc buildLoc forSDist baseFile verbosity builtinS
-- files generate source modules directly into the build dir without
-- the rest of the build system being aware of it (somewhat dodgy)
Nothing -> do
bsrcFiles <- findFileCwdWithExtension mbWorkDir builtinSuffixes (buildAsSrcLoc : searchLoc) baseFile
case (bsrcFiles, failOnMissing) of
(Nothing, True) ->
dieWithException verbosity $
CantFindSourceForPreProcessFile $
"can't find source for "
++ getSymbolicPath baseFile
++ " in "
++ intercalate ", " (map getSymbolicPath searchLoc)
_ -> return ()
-- found a pre-processable file in one of the source dirs
Just (psrcLoc, psrcRelFile) -> do
let (srcStem, ext) = splitExtension $ getSymbolicPath psrcRelFile
psrcFile = psrcLoc </> psrcRelFile
pp =
fromMaybe
(error "Distribution.Simple.PreProcess: Just expected")
(lookup (Suffix $ safeTail ext) handlers)
-- Preprocessing files for 'sdist' is different from preprocessing
-- for 'build'. When preprocessing for sdist we preprocess to
-- avoid that the user has to have the preprocessors available.
-- ATM, we don't have a way to specify which files are to be
-- preprocessed and which not, so for sdist we only process
-- platform independent files and put them into the 'buildLoc'
-- (which we assume is set to the temp. directory that will become
-- the tarball).
-- TODO: eliminate sdist variant, just supply different handlers
when (not forSDist || forSDist && platformIndependent pp) $ do
-- look for existing pre-processed source file in the dest dir to
-- see if we really have to re-run the preprocessor.
ppsrcFiles <- findFileCwdWithExtension mbWorkDir builtinSuffixes [buildAsSrcLoc] baseFile
recomp <- case ppsrcFiles of
Nothing -> return True
Just ppsrcFile ->
i psrcFile `moreRecentFile` i ppsrcFile
when recomp $ do
let destDir = i buildLoc </> takeDirectory srcStem
createDirectoryIfMissingVerbose verbosity True destDir
runPreProcessorWithHsBootHack
pp
(getSymbolicPath $ psrcLoc, getSymbolicPath $ psrcRelFile)
(getSymbolicPath $ buildLoc, srcStem <.> "hs")
psrcFiles <- findFileCwdWithExtension' mbWorkDir (map fst handlers) searchLoc baseFile
case psrcFiles of
Nothing ->
when failOnMissing $ do
dieWithException verbosity $
CantFindSourceForPreProcessFile $
"can't find source for "
++ getSymbolicPath baseFile
++ " in "
++ intercalate ", " (map getSymbolicPath searchLoc)

Just (psrcLoc, psrcRelFile) -> do
debug verbosity $ " Found pre-processable file: " <> prettyShow psrcLoc
let (srcStem, ext) = splitExtension $ getSymbolicPath psrcRelFile
psrcFile = psrcLoc </> psrcRelFile
pp =
fromMaybe
(error "Distribution.Simple.PreProcess: Just expected")
(lookup (Suffix $ safeTail ext) handlers)
-- Preprocessing files for 'sdist' is different from preprocessing
-- for 'build'. When preprocessing for sdist we preprocess to
-- avoid that the user has to have the preprocessors available.
-- ATM, we don't have a way to specify which files are to be
-- preprocessed and which not, so for sdist we only process
-- platform independent files and put them into the 'buildLoc'
-- (which we assume is set to the temp. directory that will become
-- the tarball).
-- TODO: eliminate sdist variant, just supply different handlers
when (not forSDist || forSDist && platformIndependent pp) $ do
debug verbosity " Searching for existing pre-processed source file"
-- look for existing pre-processed source file in the dest dir to
-- see if we really have to re-run the preprocessor.
ppsrcFiles <- findFileCwdWithExtension mbWorkDir builtinSuffixes [buildAsSrcLoc] baseFile
recomp <- case ppsrcFiles of
Nothing -> return True
Just ppsrcFile ->
i psrcFile `moreRecentFile` i ppsrcFile
when recomp $ do
debug verbosity " Preprocessing file. . ."
let destDir = i buildLoc </> takeDirectory srcStem
createDirectoryIfMissingVerbose verbosity True destDir
runPreProcessorWithHsBootHack
pp
(getSymbolicPath $ psrcLoc, getSymbolicPath $ psrcRelFile)
(getSymbolicPath $ buildLoc, srcStem <.> "hs")
debug verbosity $ "Preprocessing file complete: " <> prettyShow baseFile

-- found a non-processable file in one of the source dirs
Just _ -> do
pure ()
where
i = interpretSymbolicPath mbWorkDir -- See Note [Symbolic paths] in Distribution.Utils.Path
buildAsSrcLoc :: SymbolicPath Pkg (Dir Source)
Expand Down Expand Up @@ -897,20 +914,22 @@ preprocessExtras
-> Component
-> LocalBuildInfo
-> IO [SymbolicPath Pkg File]
preprocessExtras verbosity comp lbi = case comp of
CLib _ -> pp $ buildDir lbi
(CExe exe@Executable{}) -> pp $ exeBuildDir lbi exe
(CFLib flib@ForeignLib{}) -> pp $ flibBuildDir lbi flib
CTest test ->
case testInterface test of
TestSuiteUnsupported tt ->
dieWithException verbosity $ NoSupportPreProcessingTestExtras tt
_ -> pp $ testBuildDir lbi test
CBench bm ->
case benchmarkInterface bm of
BenchmarkUnsupported tt ->
dieWithException verbosity $ NoSupportPreProcessingBenchmarkExtras tt
_ -> pp $ benchmarkBuildDir lbi bm
preprocessExtras verbosity comp lbi = do
debug verbosity $ "in preprocessExtras"
case comp of
CLib _ -> pp $ buildDir lbi
(CExe exe@Executable{}) -> pp $ exeBuildDir lbi exe
(CFLib flib@ForeignLib{}) -> pp $ flibBuildDir lbi flib
CTest test ->
case testInterface test of
TestSuiteUnsupported tt ->
dieWithException verbosity $ NoSupportPreProcessingTestExtras tt
_ -> pp $ testBuildDir lbi test
CBench bm ->
case benchmarkInterface bm of
BenchmarkUnsupported tt ->
dieWithException verbosity $ NoSupportPreProcessingBenchmarkExtras tt
_ -> pp $ benchmarkBuildDir lbi bm
where
pp :: SymbolicPath Pkg (Dir Build) -> IO [SymbolicPath Pkg File]
pp builddir = do
Expand Down
Loading