From 9514a75fd5eced9de4a3a613fedba00dc836c6c0 Mon Sep 17 00:00:00 2001 From: Mac Malone Date: Fri, 1 Nov 2024 18:26:54 -0400 Subject: [PATCH] feat: add `text` option for buildFile* utilities + related touchups --- src/lake/Lake/Build/Common.lean | 65 ++++++++++++------- src/lake/Lake/Build/Job.lean | 14 ++-- src/lake/Lake/Build/Trace.lean | 110 ++++++++++++++++++++------------ 3 files changed, 119 insertions(+), 70 deletions(-) diff --git a/src/lake/Lake/Build/Common.lean b/src/lake/Lake/Build/Common.lean index e5d723cf07a6..4917f5ad181a 100644 --- a/src/lake/Lake/Build/Common.lean +++ b/src/lake/Lake/Build/Common.lean @@ -161,15 +161,17 @@ def clearFileHash (file : FilePath) : IO Unit := do /-- Fetches the hash of a file that may already be cached in a `.hash` file. -If the `.hash` file does not exist or hash files are not trusted -(e.g., with `--rehash`), creates it with a newly computed hash. +If hash files are not trusted (e.g., with `--rehash`) or the `.hash` file does +not exist, it will be created with a newly computed hash. + +If `text := true`, `file` is hashed as a text file rather than a binary file. -/ -def fetchFileHash (file : FilePath) : JobM Hash := do +def fetchFileHash (file : FilePath) (text := false) : JobM Hash := do let hashFile := FilePath.mk <| file.toString ++ ".hash" if (← getTrustHash) then if let some hash ← Hash.load? hashFile then return hash - let hash ← computeHash file + let hash ← computeFileHash file text createParentDirs hashFile IO.FS.writeFile hashFile hash.toString return hash @@ -177,9 +179,11 @@ def fetchFileHash (file : FilePath) : JobM Hash := do /-- Fetches the trace of a file that may have already have its hash cached in a `.hash` file. If no such `.hash` file exists, recomputes and creates it. + +If `text := true`, `file` is hashed as text file rather than a binary file. -/ -def fetchFileTrace (file : FilePath) : JobM BuildTrace := do - return .mk (← fetchFileHash file) (← getMTime file) +def fetchFileTrace (file : FilePath) (text := false) : JobM BuildTrace := do + return .mk (← fetchFileHash file text) (← getMTime file) /-- Builds `file` using `build` unless it already exists and `depTrace` matches @@ -191,66 +195,83 @@ a `.log.json` file and replayed from there if the build is skipped. For example, given `file := "foo.c"`, compares `depTrace` with that in `foo.c.trace` with the hash cached in `foo.c.hash` and the log cached in `foo.c.trace`. + +If `text := true`, `file` is hashed as a text file rather than a binary file. -/ def buildFileUnlessUpToDate - (file : FilePath) (depTrace : BuildTrace) (build : JobM PUnit) + (file : FilePath) (depTrace : BuildTrace) (build : JobM PUnit) (text := false) : JobM BuildTrace := do let traceFile := FilePath.mk <| file.toString ++ ".trace" buildUnlessUpToDate file depTrace traceFile do build clearFileHash file - fetchFileTrace file + fetchFileTrace file text /-- Build `file` using `build` after `dep` completes if the dependency's trace (and/or `extraDepTrace`) has changed. + +If `text := true`, `file` is handled as a text file rather than a binary file. -/ @[inline] def buildFileAfterDep (file : FilePath) (dep : BuildJob α) (build : α → JobM PUnit) - (extraDepTrace : JobM _ := pure BuildTrace.nil) + (extraDepTrace : JobM _ := pure BuildTrace.nil) (text := false) : SpawnM (BuildJob FilePath) := dep.bindSync fun depInfo depTrace => do let depTrace := depTrace.mix (← extraDepTrace) - let trace ← buildFileUnlessUpToDate file depTrace <| build depInfo + let trace ← buildFileUnlessUpToDate file depTrace (build depInfo) text return (file, trace) -/-- Build `file` using `build` after `deps` have built if any of their traces change. -/ +/-- +Build `file` using `build` after `deps` have built if any of their traces change. + +If `text := true`, `file` is handled as a text file rather than a binary file. +-/ @[inline] def buildFileAfterDepList (file : FilePath) (deps : List (BuildJob α)) (build : List α → JobM PUnit) - (extraDepTrace : JobM _ := pure BuildTrace.nil) + (extraDepTrace : JobM _ := pure BuildTrace.nil) (text := false) : SpawnM (BuildJob FilePath) := do - buildFileAfterDep file (← BuildJob.collectList deps) build extraDepTrace + buildFileAfterDep file (.collectList deps) build extraDepTrace text + +/-- +Build `file` using `build` after `deps` have built if any of their traces change. -/-- Build `file` using `build` after `deps` have built if any of their traces change. -/ +If `text := true`, `file` is handled as a text file rather than a binary file. +-/ @[inline] def buildFileAfterDepArray (file : FilePath) (deps : Array (BuildJob α)) (build : Array α → JobM PUnit) - (extraDepTrace : JobM _ := pure BuildTrace.nil) + (extraDepTrace : JobM _ := pure BuildTrace.nil) (text := false) : SpawnM (BuildJob FilePath) := do - buildFileAfterDep file (← BuildJob.collectArray deps) build extraDepTrace + buildFileAfterDep file (.collectArray deps) build extraDepTrace text /-! ## Common Builds -/ /-- A build job for binary file that is expected to already exist (e.g., a data blob). -Any byte difference in the file will trigger a rebuild of dependents. + +Any byte difference in a binary file will trigger a rebuild of its dependents. -/ def inputBinFile (path : FilePath) : SpawnM (BuildJob FilePath) := Job.async <| (path, ·) <$> computeTrace path /-- A build job for text file that is expected to already exist (e.g., a source file). -Normalizes line endings (converts CRLF to LF) to produce platform-independent traces. + +Text file traces have normalized line endings to avoid unnecessary rebuilds across platforms. -/ def inputTextFile (path : FilePath) : SpawnM (BuildJob FilePath) := Job.async <| (path, ·) <$> computeTrace (TextFilePath.mk path) /-- -A build job for file that is expected to already exist. +A build job for file that is expected to already exist (e.g., a data blob or source file). -**Deprecated:** Use either `inputTextFile` or `inputBinFile`. -`inputTextFile` normalizes line endings to produce platform-independent traces. +If `text := true`, the file is handled as a text file rather than a binary file. +Any byte difference in a binary file will trigger a rebuild of its dependents. +In contrast, text file traces have normalized line endings to avoid unnecessary +rebuilds across platforms. -/ -@[deprecated (since := "2024-06-08")] abbrev inputFile := @inputBinFile +@[inline] def inputFile (path : FilePath) (text : Bool) : SpawnM (BuildJob FilePath) := + if text then inputTextFile path else inputBinFile path /-- Build an object file from a source file job using `compiler`. The invocation is: diff --git a/src/lake/Lake/Build/Job.lean b/src/lake/Lake/Build/Job.lean index 6c909644b8f2..4bc441c18e05 100644 --- a/src/lake/Lake/Build/Job.lean +++ b/src/lake/Lake/Build/Job.lean @@ -298,11 +298,11 @@ instance : Functor BuildJob where @[inline] protected def wait? (self : BuildJob α) : BaseIO (Option α) := (·.map (·.1)) <$> self.toJob.wait? -def add (t1 : BuildJob α) (t2 : BuildJob β) : BuildJob α := - mk <| t1.toJob.zipWith (fun a _ => a) t2.toJob +def add (self : BuildJob α) (other : BuildJob β) : BuildJob α := + mk <| self.toJob.zipWith (fun a _ => a) other.toJob -def mix (t1 : BuildJob α) (t2 : BuildJob β) : BuildJob Unit := - mk <| t1.toJob.zipWith (fun (_,t) (_,t') => ((), mixTrace t t')) t2.toJob +def mix (self : BuildJob α) (other : BuildJob β) : BuildJob Unit := + mk <| self.toJob.zipWith (fun (_,t) (_,t') => ((), mixTrace t t')) other.toJob def mixList (jobs : List (BuildJob α)) : Id (BuildJob Unit) := ofJob $ jobs.foldr (·.toJob.zipWith (fun (_,t') t => mixTrace t t') ·) (pure nilTrace) @@ -311,12 +311,12 @@ def mixArray (jobs : Array (BuildJob α)) : Id (BuildJob Unit) := ofJob $ jobs.foldl (·.zipWith (fun t (_,t') => mixTrace t t') ·.toJob) (pure nilTrace) def zipWith - (f : α → β → γ) (t1 : BuildJob α) (t2 : BuildJob β) + (f : α → β → γ) (self : BuildJob α) (other : BuildJob β) : BuildJob γ := - mk <| t1.toJob.zipWith (fun (a,t) (b,t') => (f a b, mixTrace t t')) t2.toJob + mk <| self.toJob.zipWith (fun (a,t) (b,t') => (f a b, mixTrace t t')) other.toJob def collectList (jobs : List (BuildJob α)) : Id (BuildJob (List α)) := return jobs.foldr (zipWith List.cons) (pure []) def collectArray (jobs : Array (BuildJob α)) : Id (BuildJob (Array α)) := - return jobs.foldl (zipWith Array.push) (pure #[]) + return jobs.foldl (zipWith Array.push) (pure (Array.mkEmpty jobs.size)) diff --git a/src/lake/Lake/Build/Trace.lean b/src/lake/Lake/Build/Trace.lean index a8323e961620..204886677d42 100644 --- a/src/lake/Lake/Build/Trace.lean +++ b/src/lake/Lake/Build/Trace.lean @@ -6,12 +6,20 @@ Authors: Mac Malone import Lake.Util.IO import Lean.Data.Json +/-! # Lake Traces + +This module defines Lake traces and associated utilities. +Traces are used to determine whether a Lake build artifact is *dirty* +(needs to be rebuilt) or is already *up-to-date*. +The primary type is `Lake.BuildTrace`. +-/ + open System Lean namespace Lake -------------------------------------------------------------------------------- -/-! # Utilities -/ +/-! ## Utilities -/ -------------------------------------------------------------------------------- class CheckExists.{u} (i : Type u) where @@ -24,60 +32,63 @@ instance : CheckExists FilePath where checkExists := FilePath.pathExists -------------------------------------------------------------------------------- -/-! # Trace Abstraction -/ +/-! ## Trace Abstraction -/ -------------------------------------------------------------------------------- -class ComputeTrace.{u,v,w} (i : Type u) (m : outParam $ Type v → Type w) (t : Type v) where - /-- Compute the trace of some target info using information from the monadic context. -/ - computeTrace : i → m t +class ComputeTrace (α : Type u) (m : outParam $ Type v → Type w) (τ : Type v) where + /-- Compute the trace of an object in its preferred monad. -/ + computeTrace : α → m τ -@[inline] def computeTrace [ComputeTrace i m t] [MonadLiftT m n] (info : i) : n t := - liftM <| ComputeTrace.computeTrace info +/-- Compute the trace of an object in a supporting monad. -/ +@[inline] def computeTrace [ComputeTrace α m τ] [MonadLiftT m n] (a : α) : n τ := + liftM <| ComputeTrace.computeTrace a -class NilTrace.{u} (t : Type u) where +class NilTrace.{u} (α : Type u) where /-- The nil trace. Should not unduly clash with a proper trace. -/ - nilTrace : t + nilTrace : α export NilTrace (nilTrace) -instance inhabitedOfNilTrace [NilTrace t] : Inhabited t := ⟨nilTrace⟩ +instance inhabitedOfNilTrace [NilTrace α] : Inhabited α := ⟨nilTrace⟩ -class MixTrace.{u} (t : Type u) where +class MixTrace.{u} (α : Type u) where /-- Combine two traces. The result should be dirty if either of the inputs is dirty. -/ - mixTrace : t → t → t + mixTrace : α → α → α export MixTrace (mixTrace) section -variable [MixTrace t] [NilTrace t] +variable [MixTrace τ] [NilTrace τ] -def mixTraceList (traces : List t) : t := +/- Combine a `List` of traces (left-to-right). -/ +def mixTraceList (traces : List τ) : τ := traces.foldl mixTrace nilTrace -def mixTraceArray (traces : Array t) : t := +/- Combine an `Array` of traces (left-to-right). -/ +def mixTraceArray (traces : Array τ) : τ := traces.foldl mixTrace nilTrace -variable [ComputeTrace i m t] +variable [ComputeTrace α m τ] -@[specialize] def computeListTrace [MonadLiftT m n] [Monad n] (artifacts : List i) : n t := - artifacts.foldlM (fun ts t => return mixTrace ts (← computeTrace t)) nilTrace +/- Compute the trace of each element of a `List` and combine them (left-to-right). -/ +@[inline] def computeListTrace [MonadLiftT m n] [Monad n] (as : List α) : n τ := + as.foldlM (fun ts t => return mixTrace ts (← computeTrace t)) nilTrace -instance [Monad m] : ComputeTrace (List i) m t := ⟨computeListTrace⟩ +instance [Monad m] : ComputeTrace (List α) m τ := ⟨computeListTrace⟩ -@[specialize] def computeArrayTrace [MonadLiftT m n] [Monad n] (artifacts : Array i) : n t := - artifacts.foldlM (fun ts t => return mixTrace ts (← computeTrace t)) nilTrace +/- Compute the trace of each element of an `Array` and combine them (left-to-right). -/ +@[inline] def computeArrayTrace [MonadLiftT m n] [Monad n] (as : Array α) : n τ := + as.foldlM (fun ts t => return mixTrace ts (← computeTrace t)) nilTrace -instance [Monad m] : ComputeTrace (Array i) m t := ⟨computeArrayTrace⟩ +instance [Monad m] : ComputeTrace (Array α) m τ := ⟨computeArrayTrace⟩ end -------------------------------------------------------------------------------- -/-! # Hash Trace -/ +/-! ## Hash Trace -/ -------------------------------------------------------------------------------- -/-- -A content hash. -TODO: Use a secure hash rather than the builtin Lean hash function. --/ +/-- A content hash. -/ +-- TODO: Use a secure hash rather than the builtin Lean hash function. structure Hash where val : UInt64 deriving BEq, DecidableEq, Repr @@ -127,51 +138,66 @@ instance : FromJson Hash := ⟨Hash.fromJson?⟩ end Hash class ComputeHash (α : Type u) (m : outParam $ Type → Type v) where + /-- Compute the hash of an object in its preferred monad. -/ computeHash : α → m Hash instance [ComputeHash α m] : ComputeTrace α m Hash := ⟨ComputeHash.computeHash⟩ +/-- Compute the hash of object `a` in a pure context. -/ @[inline] def pureHash [ComputeHash α Id] (a : α) : Hash := ComputeHash.computeHash a +/-- Compute the hash an object in an supporting monad. -/ @[inline] def computeHash [ComputeHash α m] [MonadLiftT m n] (a : α) : n Hash := liftM <| ComputeHash.computeHash a instance : ComputeHash String Id := ⟨Hash.ofString⟩ -def computeFileHash (file : FilePath) : IO Hash := +/-- +Compute the hash of a binary file. +Binary files are equivalent only if they are byte identical. +-/ +def computeBinFileHash (file : FilePath) : IO Hash := Hash.ofByteArray <$> IO.FS.readBinFile file -instance : ComputeHash FilePath IO := ⟨computeFileHash⟩ +instance : ComputeHash FilePath IO := ⟨computeBinFileHash⟩ +/-- +Compute the hash of a text file. +Normalizes `\r\n` sequences to `\n` for cross-platform compatibility. +-/ def computeTextFileHash (file : FilePath) : IO Hash := do let text ← IO.FS.readFile file let text := text.crlfToLf return Hash.ofString text /-- - A wrapper around `FilePath` that adjusts its `ComputeHash` implementation - to normalize `\r\n` sequences to `\n` for cross-platform compatibility. -/ +A wrapper around `FilePath` that adjusts its `ComputeHash` implementation +to normalize `\r\n` sequences to `\n` for cross-platform compatibility. +-/ structure TextFilePath where path : FilePath instance : Coe TextFilePath FilePath := ⟨(·.path)⟩ +instance : ComputeHash TextFilePath IO := ⟨computeTextFileHash⟩ -instance : ComputeHash TextFilePath IO where - computeHash file := computeTextFileHash file +/-- Compute the hash of a file. If `text := true`, normalize line endings. -/ +@[inline] def computeFileHash (file : FilePath) (text := false) : IO Hash := + if text then computeTextFileHash file else computeBinFileHash file -@[specialize] def computeArrayHash [ComputeHash α m] [Monad m] (xs : Array α) : m Hash := - xs.foldlM (fun h a => return h.mix (← computeHash a)) .nil +/-- Compute the hash of each element of an array and combine them (left-to-right). -/ +@[inline] def computeArrayHash [ComputeHash α m] [Monad m] (as : Array α) : m Hash := + computeArrayTrace as instance [ComputeHash α m] [Monad m] : ComputeHash (Array α) m := ⟨computeArrayHash⟩ -------------------------------------------------------------------------------- -/-! # Modification Time (MTime) Trace -/ +/-! ## Modification Time (MTime) Trace -/ -------------------------------------------------------------------------------- open IO.FS (SystemTime) -/-- A modification time. -/ +/-- A modification time (e.g., of a file). -/ def MTime := SystemTime namespace MTime @@ -192,12 +218,14 @@ instance : MixTrace MTime := ⟨max⟩ end MTime -class GetMTime (α) where +class GetMTime (α : Type u) where + /-- Return the modification time of an object. -/ getMTime : α → IO MTime export GetMTime (getMTime) instance [GetMTime α] : ComputeTrace α IO MTime := ⟨getMTime⟩ +/-- Return the modification time of a file recorded by the OS. -/ @[inline] def getFileMTime (file : FilePath) : IO MTime := return (← file.metadata).modified @@ -216,7 +244,7 @@ That is, check if the info is newer than `self`. | .error _ => return false -------------------------------------------------------------------------------- -/-! # Lake Build Trace (Hash + MTIme) -/ +/-! ## Lake Build Trace -/ -------------------------------------------------------------------------------- /-- Trace used for common Lake targets. Combines `Hash` and `MTime`. -/ @@ -242,10 +270,10 @@ def nil : BuildTrace := instance : NilTrace BuildTrace := ⟨nil⟩ -@[specialize] def compute [ComputeHash i m] [MonadLiftT m IO] [GetMTime i] (info : i) : IO BuildTrace := +@[specialize] def compute [ComputeHash α m] [MonadLiftT m IO] [GetMTime α] (info : α) : IO BuildTrace := return mk (← computeHash info) (← getMTime info) -instance [ComputeHash i m] [MonadLiftT m IO] [GetMTime i] : ComputeTrace i IO BuildTrace := ⟨compute⟩ +instance [ComputeHash α m] [MonadLiftT m IO] [GetMTime α] : ComputeTrace α IO BuildTrace := ⟨compute⟩ def mix (t1 t2 : BuildTrace) : BuildTrace := mk (Hash.mix t1.hash t2.hash) (max t1.mtime t2.mtime)