From 8f3244827c8e1e28c265481515a6097acde2f9e6 Mon Sep 17 00:00:00 2001 From: Joe Hendrix Date: Fri, 5 Jan 2024 10:58:33 -0800 Subject: [PATCH] chore: Provide consistent Std.Data modules. (#498) This provides a script to help ensure there is a module for each directory entry in Std/Data and adds a test to verify and autofix. Co-authored-by: Mario Carneiro --- .github/workflows/build.yml | 9 +-- README.md | 7 +- Std.lean | 61 +++----------- Std/Data/Array.lean | 6 ++ Std/Data/BitVec.lean | 3 + Std/Data/Fin.lean | 4 + Std/Data/HashMap.lean | 1 + Std/Data/Int.lean | 3 + Std/Data/List.lean | 7 ++ Std/Data/MLList.lean | 3 + Std/Data/Nat.lean | 5 ++ Std/Data/Option.lean | 3 + Std/Data/Prod.lean | 1 + Std/Data/RBMap.lean | 2 + Std/Data/Range.lean | 1 + scripts/check_imports.lean | 157 ++++++++++++++++++++++++++++++++++++ scripts/updateStd.sh | 4 +- 17 files changed, 211 insertions(+), 66 deletions(-) create mode 100644 Std/Data/Array.lean create mode 100644 Std/Data/Fin.lean create mode 100644 Std/Data/Int.lean create mode 100644 Std/Data/List.lean create mode 100644 Std/Data/MLList.lean create mode 100644 Std/Data/Nat.lean create mode 100644 Std/Data/Option.lean create mode 100644 Std/Data/Prod.lean create mode 100644 Std/Data/Range.lean create mode 100644 scripts/check_imports.lean diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4f9e0e75d1..2f27f6d6a0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,10 +23,6 @@ jobs: - uses: actions/checkout@v2 - - name: update Std.lean - run: | - find Std -name "*.lean" | LC_ALL=C sort | sed 's/\.lean//;s,/,.,g;s/^/import /' > Std.lean - - name: build std id: build run: lake build -Kwerror @@ -39,9 +35,8 @@ jobs: if: steps.build.outcome == 'success' run: make lint - - name: check that all files are imported - if: always() - run: git diff --exit-code + - name: Check that all files are imported + run: lake env lean scripts/check_imports.lean - name: Check for forbidden character ↦ if: always() diff --git a/README.md b/README.md index ff399e6ddc..d538c6a4ac 100644 --- a/README.md +++ b/README.md @@ -14,11 +14,8 @@ Work in progress standard library for Lean 4. This is a collection of data struc ``` If this also fails, follow the instructions under `Regular install` [here](https://leanprover-community.github.io/get_started.html). * To build `std4` run `lake build`. To build and run all tests, run `make`. -* If you added a new file, run the following command to update `Std.lean`: - ``` - find Std -name "*.lean" | env LC_ALL=C sort | sed 's/\.lean//;s,/,.,g;s/^/import /' > Std.lean - ``` - (or use `scripts/updateStd.sh` which contains this command). +* If you added a new file, run the command `scripts/updateStd.sh` to update the + imports. # Documentation diff --git a/Std.lean b/Std.lean index 0ff42b013e..84d9780369 100644 --- a/Std.lean +++ b/Std.lean @@ -15,73 +15,30 @@ import Std.Control.ForInStep.Basic import Std.Control.ForInStep.Lemmas import Std.Control.Lemmas import Std.Control.Nondet.Basic -import Std.Data.Array.Basic -import Std.Data.Array.Init.Basic -import Std.Data.Array.Init.Lemmas -import Std.Data.Array.Lemmas -import Std.Data.Array.Match -import Std.Data.Array.Merge +import Std.Data.Array import Std.Data.AssocList import Std.Data.BinomialHeap -import Std.Data.BinomialHeap.Basic -import Std.Data.BinomialHeap.Lemmas import Std.Data.BitVec -import Std.Data.BitVec.Basic -import Std.Data.BitVec.Bitblast -import Std.Data.BitVec.Folds -import Std.Data.BitVec.Lemmas import Std.Data.Bool import Std.Data.ByteArray import Std.Data.Char import Std.Data.DList -import Std.Data.Fin.Basic -import Std.Data.Fin.Init.Lemmas -import Std.Data.Fin.Iterate -import Std.Data.Fin.Lemmas +import Std.Data.Fin import Std.Data.HashMap -import Std.Data.HashMap.Basic -import Std.Data.HashMap.Lemmas -import Std.Data.HashMap.WF -import Std.Data.Int.Basic -import Std.Data.Int.DivMod -import Std.Data.Int.Lemmas +import Std.Data.Int import Std.Data.Json -import Std.Data.List.Basic -import Std.Data.List.Count -import Std.Data.List.Init.Attach -import Std.Data.List.Init.Lemmas -import Std.Data.List.Lemmas -import Std.Data.List.Pairwise -import Std.Data.List.Perm -import Std.Data.MLList.Basic -import Std.Data.MLList.Heartbeats -import Std.Data.MLList.IO -import Std.Data.Nat.Basic -import Std.Data.Nat.Bitwise -import Std.Data.Nat.Gcd -import Std.Data.Nat.Init.Lemmas -import Std.Data.Nat.Lemmas -import Std.Data.Option.Basic -import Std.Data.Option.Init.Lemmas -import Std.Data.Option.Lemmas +import Std.Data.List +import Std.Data.MLList +import Std.Data.Nat +import Std.Data.Option import Std.Data.Ord import Std.Data.PairingHeap -import Std.Data.Prod.Lex +import Std.Data.Prod import Std.Data.RBMap -import Std.Data.RBMap.Alter -import Std.Data.RBMap.Basic -import Std.Data.RBMap.Lemmas -import Std.Data.RBMap.WF -import Std.Data.Range.Lemmas +import Std.Data.Range import Std.Data.Rat -import Std.Data.Rat.Basic -import Std.Data.Rat.Lemmas import Std.Data.String -import Std.Data.String.Basic -import Std.Data.String.Lemmas import Std.Data.Sum -import Std.Data.Sum.Basic -import Std.Data.Sum.Lemmas import Std.Data.UInt import Std.Lean.AttributeExtra import Std.Lean.Command diff --git a/Std/Data/Array.lean b/Std/Data/Array.lean new file mode 100644 index 0000000000..f32f416e07 --- /dev/null +++ b/Std/Data/Array.lean @@ -0,0 +1,6 @@ +import Std.Data.Array.Basic +import Std.Data.Array.Init.Basic +import Std.Data.Array.Init.Lemmas +import Std.Data.Array.Lemmas +import Std.Data.Array.Match +import Std.Data.Array.Merge diff --git a/Std/Data/BitVec.lean b/Std/Data/BitVec.lean index 8c97911b68..037cb6b6c9 100644 --- a/Std/Data/BitVec.lean +++ b/Std/Data/BitVec.lean @@ -1 +1,4 @@ import Std.Data.BitVec.Basic +import Std.Data.BitVec.Bitblast +import Std.Data.BitVec.Folds +import Std.Data.BitVec.Lemmas diff --git a/Std/Data/Fin.lean b/Std/Data/Fin.lean new file mode 100644 index 0000000000..bb8df44e80 --- /dev/null +++ b/Std/Data/Fin.lean @@ -0,0 +1,4 @@ +import Std.Data.Fin.Basic +import Std.Data.Fin.Init.Lemmas +import Std.Data.Fin.Iterate +import Std.Data.Fin.Lemmas diff --git a/Std/Data/HashMap.lean b/Std/Data/HashMap.lean index fe5724acdd..fbf74fca0d 100644 --- a/Std/Data/HashMap.lean +++ b/Std/Data/HashMap.lean @@ -1,2 +1,3 @@ import Std.Data.HashMap.Basic +import Std.Data.HashMap.Lemmas import Std.Data.HashMap.WF diff --git a/Std/Data/Int.lean b/Std/Data/Int.lean new file mode 100644 index 0000000000..3f2b36ad95 --- /dev/null +++ b/Std/Data/Int.lean @@ -0,0 +1,3 @@ +import Std.Data.Int.Basic +import Std.Data.Int.DivMod +import Std.Data.Int.Lemmas diff --git a/Std/Data/List.lean b/Std/Data/List.lean new file mode 100644 index 0000000000..137c762db9 --- /dev/null +++ b/Std/Data/List.lean @@ -0,0 +1,7 @@ +import Std.Data.List.Basic +import Std.Data.List.Count +import Std.Data.List.Init.Attach +import Std.Data.List.Init.Lemmas +import Std.Data.List.Lemmas +import Std.Data.List.Pairwise +import Std.Data.List.Perm diff --git a/Std/Data/MLList.lean b/Std/Data/MLList.lean new file mode 100644 index 0000000000..34eac57a66 --- /dev/null +++ b/Std/Data/MLList.lean @@ -0,0 +1,3 @@ +import Std.Data.MLList.Basic +import Std.Data.MLList.Heartbeats +import Std.Data.MLList.IO diff --git a/Std/Data/Nat.lean b/Std/Data/Nat.lean new file mode 100644 index 0000000000..4983ff4773 --- /dev/null +++ b/Std/Data/Nat.lean @@ -0,0 +1,5 @@ +import Std.Data.Nat.Basic +import Std.Data.Nat.Bitwise +import Std.Data.Nat.Gcd +import Std.Data.Nat.Init.Lemmas +import Std.Data.Nat.Lemmas diff --git a/Std/Data/Option.lean b/Std/Data/Option.lean new file mode 100644 index 0000000000..a7f6b1dacf --- /dev/null +++ b/Std/Data/Option.lean @@ -0,0 +1,3 @@ +import Std.Data.Option.Basic +import Std.Data.Option.Init.Lemmas +import Std.Data.Option.Lemmas diff --git a/Std/Data/Prod.lean b/Std/Data/Prod.lean new file mode 100644 index 0000000000..78ce8cfb58 --- /dev/null +++ b/Std/Data/Prod.lean @@ -0,0 +1 @@ +import Std.Data.Prod.Lex diff --git a/Std/Data/RBMap.lean b/Std/Data/RBMap.lean index a074f5fa7b..396a2cb6db 100644 --- a/Std/Data/RBMap.lean +++ b/Std/Data/RBMap.lean @@ -1,2 +1,4 @@ +import Std.Data.RBMap.Alter import Std.Data.RBMap.Basic +import Std.Data.RBMap.Lemmas import Std.Data.RBMap.WF diff --git a/Std/Data/Range.lean b/Std/Data/Range.lean new file mode 100644 index 0000000000..1131c1d673 --- /dev/null +++ b/Std/Data/Range.lean @@ -0,0 +1 @@ +import Std.Data.Range.Lemmas diff --git a/scripts/check_imports.lean b/scripts/check_imports.lean new file mode 100644 index 0000000000..2023954e45 --- /dev/null +++ b/scripts/check_imports.lean @@ -0,0 +1,157 @@ +/- +Copyright (c) 2024 Joe Hendrix. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Joe Hendrix +-/ +import Std + +/-! +This test checks that all directories in `Std/Data/` have corresponding +`Std.Data.` modules imported by `Std` that import all of the submodules +under that directory. + +It will also check that `Std` imports all the expected modules. + +It has a flag (`autofix` below) to automatically fix the errors found. This +command may need to be rerun to fix all errors; it tries to avoid overwriting +existing files. +-/ + +open Lean System + +/-- Monad to log errors to stderr while record error count. -/ +abbrev LogIO := StateRefT (Bool × Bool) IO + +def runLogIO (act : LogIO Unit) : MetaM Unit := do + let ((), (warnings, _)) ← act.run (false, false) + if warnings then + throwError "Fatal error" + +def warn (fixable : Bool) (msg : String) : LogIO Unit := do + modify (fun (_, u) => (true, u || not fixable)) + liftM (IO.eprintln msg) + +-- | Predicate indicates if warnings are present and if they fixable. +def getWarningInfo : LogIO (Bool × Bool) := get + +def createModuleHashmap (env : Environment) : HashMap Name ModuleData := Id.run do + let mut nameMap := {} + for i in [0:env.header.moduleNames.size] do + let nm := env.header.moduleNames[i]! + let md := env.header.moduleData[i]! + nameMap := nameMap.insert nm md + pure nameMap + +/-- Get the imports we expect in a directory of `Std.Data`. -/ +partial def addModulesIn (recurse : Bool) (prev : Array Name) (root : Name := .anonymous) + (path : FilePath) : IO (Array Name) := do + let mut r := prev + for entry in ← path.readDir do + if ← entry.path.isDir then + if recurse then + r ← addModulesIn recurse r (root.mkStr entry.fileName) entry.path + else + let .some mod := FilePath.fileStem entry.fileName + | continue + r := r.push (root.mkStr mod) + pure r + +def modulePath (name : Name) : FilePath := + let path := name.toString.replace "." FilePath.pathSeparator.toString + s!"{path}.lean" + +def writeImportModule (path : FilePath) (imports : Array Name) : IO Unit := do + let imports := imports.qsort (·.toString < ·.toString) + let lines := imports.map (s!"import {·}\n") + let contents := String.join lines.toList + IO.println s!"Generating {path}" + IO.FS.writeFile path contents + +/-- Check for imports and return true if warnings issued. -/ +def checkMissingImports (modName : Name) (modData : ModuleData) (reqImports : Array Name) : + LogIO Bool := do + let names : HashSet Name := HashSet.ofArray (modData.imports.map (·.module)) + let mut warned := false + for req in reqImports do + if !names.contains req then + warn true s!"Missing import {req} in {modName}" + warned := true + pure warned + +/-- Check directory entry in `Std/Data/` -/ +def checkStdDataDir + (modMap : HashMap Name ModuleData) + (entry : IO.FS.DirEntry) (autofix : Bool := false) : LogIO Unit := do + let moduleName := `Std.Data ++ entry.fileName + let requiredImports ← addModulesIn (recurse := true) #[] (root := moduleName) entry.path + let .some module := modMap.find? moduleName + | warn true s!"Could not find {moduleName}; Not imported into Std." + let path := modulePath moduleName + -- We refuse to generate imported modules whose path doesn't exist. + -- The import failure will be fixed later and the file rerun + if autofix then + if ← path.pathExists then + warn false s!"Skipping writing of {moduleName}: rerun after {moduleName} imported." + else + writeImportModule path requiredImports + return + let hasDecls : Bool := module.constants.size > 0 + if hasDecls then + warn false + s!"Expected {moduleName} to not contain additional declarations.\n\ + Declarations should be moved out.\n\ + This error cannot be automatically fixed." + let warned ← checkMissingImports moduleName module requiredImports + if autofix && warned && !hasDecls then + writeImportModule (modulePath moduleName) requiredImports + +/-- Compute imports expected by `Std.lean` -/ +def expectedStdImports : IO (Array Name) := do + let mut needed := #[] + for top in ← FilePath.readDir "Std" do + if top.fileName == "Data" then + needed ← addModulesIn (recurse := false) needed `Std.Data top.path + else + let nm := `Std + let rootname := FilePath.withExtension top.fileName "" + let root := nm.mkStr rootname.toString + if ← top.path.isDir then + needed ← addModulesIn (recurse := true) needed (root := root) top.path + else + needed := needed.push root + pure needed + +def checkStdDataImports : MetaM Unit := do + -- N.B. This can be used to automatically fix Std.lean as well as + -- other import files. + -- It uses an environment variable to do that. + -- The easiest way to use this is run `./scripts/updateStd.sh.` + let autofix := (← IO.getEnv "__LEAN_STD_AUTOFIX_IMPORTS").isSome + let env ← getEnv + let modMap := createModuleHashmap env + runLogIO do + for entry in ← FilePath.readDir ("Std" / "Data") do + if ← entry.path.isDir then + checkStdDataDir (autofix := autofix) modMap entry + let stdImports ← expectedStdImports + let .some stdMod := modMap.find? `Std + | warn false "Missing Std module!; Run `lake build`." + let warned ← checkMissingImports `Std stdMod stdImports + if autofix && warned then + writeImportModule "Std.lean" stdImports + match ← getWarningInfo with + | (false, _) => + pure () + | (_, true) => + IO.eprintln s!"Found errors that cannot be automatically fixed.\n\ + Address unfixable issues and rerun lake build && ./scripts/updateStd.sh." + | _ => + if autofix then + IO.eprintln s!"Found missing imports and attempted fixes.\n\ + Run lake build && ./scripts/updateStd.sh to verify.\n\ + Multiple runs may be needed." + else + IO.eprintln s!"Found missing imports.\n\ + Run lake build && ./scripts/updateStd.sh to attempt automatic fixes." + +run_meta checkStdDataImports diff --git a/scripts/updateStd.sh b/scripts/updateStd.sh index c2346c4a9a..a50b54437f 100755 --- a/scripts/updateStd.sh +++ b/scripts/updateStd.sh @@ -1,5 +1,5 @@ #!/bin/sh +set -e # This command updates the `Std.lean` file to include a list of all files # in the `Std` directory. -find Std -name "*.lean" | env LC_ALL=C sort \ - | sed 's/\.lean//;s,/,.,g;s/^/import /' > Std.lean +__LEAN_STD_AUTOFIX_IMPORTS=true lake env lean scripts/check_imports.lean