Skip to content

Commit

Permalink
Merge pull request #371 from sjrd/maybe-lazy-classpaths
Browse files Browse the repository at this point in the history
Redesign Classpaths so that they could lazily read their files.
  • Loading branch information
bishabosha authored Oct 31, 2023
2 parents 4d4e7b1 + 97c1bf0 commit f7f540b
Show file tree
Hide file tree
Showing 10 changed files with 278 additions and 176 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ object ClasspathLoaders:
* to create a [[Contexts.Context]]. The latter gives semantic access to all
* the definitions on the classpath.
*
* @note the resulting [[Classpaths.Classpath.Entry Classpath.Entry]] entries of
* @note the resulting [[Classpaths.ClasspathEntry ClasspathEntry]] entries of
* the returned [[Classpaths.Classpath]] correspond to the elements of `classpath`.
*/
def read(classpath: List[String])(using ExecutionContext): Future[Classpath] =
Expand All @@ -55,28 +55,28 @@ object ClasspathLoaders:
}
}

def makeEntry(allFiles: Seq[FileContent]): Classpath.Entry =
def makeEntry(entryDebugPath: String, allFiles: Seq[FileContent]): ClasspathEntry =
val packageDatas = allFiles
.groupMap[String, ClassData | TastyData](_.packagePath) { fileContent =>
.groupMap[String, InMemory.ClassData](_.packagePath) { fileContent =>
val isClassFile = fileContent.name.endsWith(".class")
val binaryName =
if isClassFile then fileContent.name.stripSuffix(".class")
else fileContent.name.stripSuffix(".tasty")
if isClassFile then ClassData(binaryName, fileContent.debugPath, fileContent.content)
else TastyData(binaryName, fileContent.debugPath, fileContent.content)
if isClassFile then InMemory.ClassData(fileContent.debugPath, binaryName, None, Some(fileContent.content))
else InMemory.ClassData(fileContent.debugPath, binaryName, Some(fileContent.content), None)
}
.map { (packagePath, classAndTastys) =>
.map { (packagePath, allClassDatas) =>
val packageDebugPath = entryDebugPath + ":" + packagePath
val packageName = packagePath.replace('/', '.').nn
val (classes, tastys) = classAndTastys.partitionMap {
case classData: ClassData => Left(classData)
case tastyData: TastyData => Right(tastyData)
}
PackageData(packageName, IArray.from(classes.sortBy(_.binaryName)), IArray.from(tastys.sortBy(_.binaryName)))
val mergedClassDatas =
allClassDatas.groupMapReduce(_.binaryName)(identity)(_.combineWith(_)).valuesIterator.toList
InMemory.PackageData(packageDebugPath, packageName, mergedClassDatas)
}
Classpath.Entry(IArray.from(packageDatas))
.toList
InMemory.ClasspathEntry(entryDebugPath, packageDatas)
end makeEntry

for allEntries <- allEntriesFuture yield Classpath(IArray.from(allEntries).map(makeEntry))
for allEntries <- allEntriesFuture yield classpath.lazyZip(allEntries).map(makeEntry(_, _))
end read

private def fromDirectory(dir: String, relPath: String)(implicit ec: ExecutionContext): Future[Seq[FileContent]] =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@ import tastyquery.Classpaths.*
*/
object ClasspathLoaders {

private given Ordering[ClassData] = Ordering.by(_.binaryName)
private given Ordering[TastyData] = Ordering.by(_.binaryName)
private given Ordering[PackageData] = Ordering.by(_.dotSeparatedName)

private enum FileKind(val ext: String):
case Class extends FileKind("class")
case Tasty extends FileKind("tasty")
Expand All @@ -44,7 +40,7 @@ object ClasspathLoaders {
* to create a [[Contexts.Context]]. The latter gives semantic access to all
* the definitions on the classpath.
*
* @note the resulting [[Classpaths.Classpath.Entry Classpath.Entry]] entries of
* @note the resulting [[Classpaths.ClasspathEntry ClasspathEntry]] entries of
* the returned [[Classpaths.Classpath]] correspond to the elements of `classpath`.
*/
def read(classpath: List[Path]): Classpath =
Expand All @@ -70,36 +66,40 @@ object ClasspathLoaders {
classFile.replace('/', '.').nn.replace('\\', '.').nn
end binaryName

def compressPackageData(data: List[(String, ClassData | TastyData)]): IArray[PackageData] =
val groupedPackages = IArray.from(data).groupMap((pkg, _) => pkg)((_, data) => data)
val pkgs = groupedPackages.map { (pkg, classAndTastys) =>
val (classes, tastys) = classAndTastys.partitionMap {
case classData: ClassData => Left(classData)
case tastyData: TastyData => Right(tastyData)
}
PackageData(pkg, classes.sorted, tastys.sorted)
}
IArray.from(pkgs).sorted
def compressPackageData(
entryDebugString: String,
data: List[(String, InMemory.ClassData)]
): List[InMemory.PackageData] =
val groupedPackages = data.groupMap((pkg, _) => pkg)((_, data) => data)
groupedPackages.map { (packageName, allClassDatas) =>
val packageDebugString = entryDebugString + ":" + packageName
val mergedClassDatas =
allClassDatas.groupMapReduce(_.binaryName)(identity)(_.combineWith(_)).valuesIterator.toList
InMemory.PackageData(packageDebugString, packageName, mergedClassDatas)
}.toList
end compressPackageData

def toEntry(entry: ClasspathEntry): Classpath.Entry =
def toEntry(entryDebugString: String, entry: ClasspathEntryKind): InMemory.ClasspathEntry =
val map = entry.walkFiles(kinds.toSeq*) { (kind, fileWithExt, path, bytes) =>
val (s"$file.${kind.`ext`}") = fileWithExt: @unchecked
val bin = binaryName(file)
val (packageName, simpleName) = classAndPackage(bin)
kind match {
case FileKind.Class =>
packageName -> ClassData(simpleName, path, bytes)
packageName -> InMemory.ClassData(path, simpleName, None, Some(bytes))
case FileKind.Tasty =>
packageName -> TastyData(simpleName, path, bytes)
packageName -> InMemory.ClassData(path, simpleName, Some(bytes), None)
}
}
val packageDatas =
compressPackageData(map.get(FileKind.Class).getOrElse(Nil) ++ map.get(FileKind.Tasty).getOrElse(Nil))
Classpath.Entry(packageDatas)
val packageDatas: List[InMemory.PackageData] =
compressPackageData(
entryDebugString,
map.get(FileKind.Class).getOrElse(Nil) ++ map.get(FileKind.Tasty).getOrElse(Nil)
)
InMemory.ClasspathEntry(entryDebugString, packageDatas)
end toEntry

Classpath(classpathToEntries(classpath).map(toEntry))
classpathToEntries(classpath).map(toEntry)
end read

private def loadBytes(fileStream: InputStream): IArray[Byte] = {
Expand All @@ -113,16 +113,18 @@ object ClasspathLoaders {
IArray.from(bytes.toByteArray().nn)
}

private def classpathToEntries(classpath: List[Path]): IArray[ClasspathEntry] =
for e <- IArray.from(classpath)
yield
if Files.exists(e) then
if Files.isDirectory(e) then ClasspathEntry.Directory(e)
else if e.getFileName().toString().endsWith(".jar") then ClasspathEntry.Jar(e)
else throw IllegalArgumentException("Illegal classpath entry: " + e)
else ClasspathEntry.Empty

private enum ClasspathEntry {
private def classpathToEntries(classpath: List[Path]): List[(String, ClasspathEntryKind)] =
for e <- classpath yield
val entryKind =
if Files.exists(e) then
if Files.isDirectory(e) then ClasspathEntryKind.Directory(e)
else if e.getFileName().toString().endsWith(".jar") then ClasspathEntryKind.Jar(e)
else throw IllegalArgumentException("Illegal classpath entry: " + e)
else ClasspathEntryKind.Empty
e.toString() -> entryKind
end classpathToEntries

private enum ClasspathEntryKind {
case Jar(path: Path)
case Directory(path: Path)
case Empty
Expand Down Expand Up @@ -191,7 +193,7 @@ object ClasspathLoaders {
}
}.toMap

case ClasspathEntry.Empty => Map.empty
case Empty => Map.empty
}
}

Expand Down
207 changes: 136 additions & 71 deletions tasty-query/shared/src/main/scala/tastyquery/Classpaths.scala
Original file line number Diff line number Diff line change
@@ -1,88 +1,153 @@
package tastyquery

/** In-memory representation of the contents of classpaths. */
/** Representation of the contents of classpaths. */
object Classpaths:
/** Contains class data and tasty data for a given package. */
final class PackageData(val dotSeparatedName: String, val classes: IArray[ClassData], val tastys: IArray[TastyData]):
override def toString(): String = s"PackageData($dotSeparatedName)"

/** In-memory representation of a `.class` file.
/** The representation of an entire classpath.
*
* `binaryName` is the file name without the `.class` extension.
* Classpaths are made of a sequence of entries (where order is relevant).
* Each entry contains a set of packages, and packages contain set of class
* information files.
*/
final class ClassData(val binaryName: String, val debugPath: String, val bytes: IArray[Byte]):
override def toString(): String = s"ClassData($binaryName, $debugPath)"
type Classpath = List[ClasspathEntry]

/** In-memory representation of a `.tasty` file.
/** One entry of the classpath.
*
* A `ClasspathEntry` must have a meaningful `equals` and `hashCode`, which
* must reflect the identity of the entry (not necessarily the reference
* identity). Its equality is notably used by
* [[Contexts.Context.findSymbolsByClasspathEntry]].
*
* Users of a `ClasspathEntry` and its components may consider them to be
* idempotent.
*
* `binaryName` is the file name without the `.class` extension.
* All the methods of `ClasspathEntry` and its components may throw
* `java.io.IOException`s to indicate I/O errors.
*
* Implementations of this class are encouraged to define a `toString()`
* method that helps identifying the entry for debugging purposes.
*/
final class TastyData(val binaryName: String, val debugPath: String, val bytes: IArray[Byte]):
override def toString(): String = s"TastyData($binaryName, $debugPath)"
trait ClasspathEntry:
/** Lists all the packages available in this entry, including nested packages.
*
* This method must not return two items with the same [[PackageData.dotSeparatedName]].
*
* Subsequent calls to `listAllPackages` may return the same instances of
* [[PackageData]], but need not do so.
*/
def listAllPackages(): List[PackageData]
end ClasspathEntry

/** In-memory representation of an entire classpath.
/** Information about one package within a [[ClasspathEntry]].
*
* A [[Classpath]] can be given to [[Contexts.Context.initialize]] to create a
* [[Contexts.Context]]. The latter gives semantic access to all the
* definitions on the classpath.
* Implementations of this class are encouraged to define a `toString()`
* method that helps identifying the package and its enclosing classpath
* entry for debugging purposes.
*/
final class Classpath(val entries: IArray[Classpath.Entry]):

/** Returns the concatenation of this classpath with `other`.
* This is useful for structural sharing of [[Classpath.Entry Classpath Entries]]. e.g. in the following example
* the standard library is loaded once and shared between two classpaths:
* ```scala
* val stdLibCp = ClasspathLoaders.read(standardLibraryPaths)
* val libV101Cp = ClasspathLoaders.read(List(Paths.get("path/to/lib-1.0.1.jar"))) ++ stdLibCp
* val libV102Cp = ClasspathLoaders.read(List(Paths.get("path/to/lib-1.0.2.jar"))) ++ stdLibCp
* ```
trait PackageData:
/** The fully-qualified name of the package represented by this `PackageData`. */
val dotSeparatedName: String

/** Lists all the files containing class information in this package (but not nested packages).
*
* Class information is found in `.class` files and `.tasty` files. For
* any binary name `X`, if there is both an `X.class` and an `X.tasty`,
* they must be returned as part of the same [[ClassData]].
*
* This method must not return two items with the same [[ClassData.binaryName]].
*
* Subsequent calls to `listAllClassDatas` and [[getClassDataByBinaryName]]
* may return the same instances of [[ClassData]], but need not do so.
*/
def ++(other: Classpath): Classpath = Classpath(entries ++ other.entries)

/** Filter a classpath so it only contains roots that match the given binary names. */
def withFilter(binaryNames: List[String]): Classpath =

def packageAndClass(binaryName: String): (String, String) =
val lastSep = binaryName.lastIndexOf('.')
if lastSep == -1 then ("", binaryName)
else
import scala.language.unsafeNulls
val packageName = binaryName.substring(0, lastSep)
val className = binaryName.substring(lastSep + 1)
(packageName, className)

def filterEntry(entry: Classpath.Entry, lookup: Map[String, List[String]]) =
val packages = entry.packages.collect {
case pkg if lookup.contains(pkg.dotSeparatedName) =>
val tastys = pkg.tastys.filter(t => lookup(pkg.dotSeparatedName).contains(t.binaryName))
val classes = pkg.classes.filter(c => lookup(pkg.dotSeparatedName).contains(c.binaryName))
PackageData(pkg.dotSeparatedName, classes, tastys)
}
Classpath.Entry(packages)

val formatted = binaryNames.map(packageAndClass)
val lookup = formatted.groupMap((pkg, _) => pkg)((_, cls) => cls)
val filtered = entries.map(filterEntry(_, lookup))
Classpath(filtered)
end withFilter
end Classpath

/** Factory object for [[Classpath]] instances. */
object Classpath {

/** An entry (directory or jar file) of a [[Classpath]].
def listAllClassDatas(): List[ClassData]

/** Get the [[ClassData]] associated with the given `binaryName` in this package, if it exists.
*
* You can lookup all symbols originating from a particular [[Classpath.Entry]]
* with [[Contexts.Context.findSymbolsByClasspathEntry ctx.findSymbolsByClasspathEntry]].
* Returns `None` if neither `binaryName.class` nor `binaryName.tasty` exists.
*
* For example:
* Subsequent calls to `getClassDataByBinaryName` and [[listAllClassDatas]]
* may return the same instance of [[ClassData]], but need not do so.
*/
def getClassDataByBinaryName(binaryName: String): Option[ClassData]
end PackageData

/** Information about one class within a [[PackageData]].
*
* When both a `.class` file and a `.tasty` file exist for a given binary
* name, they are represented by the same instance of `ClassData`.
*
* Implementations of this class are encouraged to define a `toString()`
* method that helps identifying the class and its enclosing package and
* classpath entry for debugging purposes.
*/
trait ClassData:
/** The binary name of the class information represented by this `ClassData`.
*
* ```scala
* val classpath = ClasspathLoaders.read(myLibraryPath :: stdLibPaths)
* given Context = Contexts.init(classpath)
* val myLibSyms = ctx.findSymbolsByClasspathEntry(classpath.entries.head)
* ```
* It is the name of the file(s) without the `.class` or `.tasty` extension.
*/
final class Entry(val packages: IArray[PackageData])
}
val binaryName: String

/** Tests whether this class information has an associated `.tasty` file. */
def hasTastyFile: Boolean

/** Reads the contents of the `.tasty` file associated with this class information. */
def readTastyFileBytes(): IArray[Byte]

/** Tests whether this class information has an associated `.class` file. */
def hasClassFile: Boolean

/** Reads the contents of the `.class` file associated with this class information. */
def readClassFileBytes(): IArray[Byte]
end ClassData

/** In-memory representation of classpath entries. */
object InMemory:
import Classpaths as generic

final class ClasspathEntry(debugString: String, val packages: List[PackageData]) extends generic.ClasspathEntry:
override def toString(): String = debugString

def listAllPackages(): List[generic.PackageData] = packages
end ClasspathEntry

final class PackageData(debugString: String, val dotSeparatedName: String, val classes: List[ClassData])
extends generic.PackageData:
private lazy val byBinaryName = classes.map(c => c.binaryName -> c).toMap

override def toString(): String = debugString

def listAllClassDatas(): List[generic.ClassData] = classes

def getClassDataByBinaryName(binaryName: String): Option[generic.ClassData] = byBinaryName.get(binaryName)
end PackageData

final class ClassData(
debugString: String,
val binaryName: String,
val tastyFileBytes: Option[IArray[Byte]],
val classFileBytes: Option[IArray[Byte]]
) extends generic.ClassData:
override def toString(): String = debugString

def hasTastyFile: Boolean = tastyFileBytes.isDefined

def readTastyFileBytes(): IArray[Byte] = tastyFileBytes.get

def hasClassFile: Boolean = classFileBytes.isDefined

def readClassFileBytes(): IArray[Byte] = classFileBytes.get

def combineWith(that: ClassData): ClassData =
require(
this.binaryName == that.binaryName,
s"cannot combine two ClassData for different binary names ${this.binaryName} and ${that.binaryName}"
)
ClassData(
debugString,
binaryName,
this.tastyFileBytes.orElse(that.tastyFileBytes),
this.classFileBytes.orElse(that.classFileBytes)
)
end combineWith
end ClassData
end InMemory
end Classpaths
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ object Contexts {
sourceFiles.getOrElseUpdate(path, new SourceFile(path))

/** For a given classpath entry, return a lazy view over all the roots covered by the entry. */
def findSymbolsByClasspathEntry(entry: Classpath.Entry): Iterable[TermOrTypeSymbol] =
def findSymbolsByClasspathEntry(entry: ClasspathEntry): Iterable[TermOrTypeSymbol] =
classloader.lookupByEntry(entry).getOrElse {
throw new UnknownClasspathEntry(entry)
}
Expand Down
Loading

0 comments on commit f7f540b

Please sign in to comment.