Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-7205] Support .ivy2/local and .m2/repositories/ in --packages #5755

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 26 additions & 8 deletions core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -722,13 +722,31 @@ private[deploy] object SparkSubmitUtils {
/**
* Extracts maven coordinates from a comma-delimited string
* @param remoteRepos Comma-delimited string of remote repositories
* @param ivySettings The Ivy settings for this session
* @return A ChainResolver used by Ivy to search for and resolve dependencies.
*/
def createRepoResolvers(remoteRepos: Option[String]): ChainResolver = {
def createRepoResolvers(remoteRepos: Option[String], ivySettings: IvySettings): ChainResolver = {
// We need a chain resolver if we want to check multiple repositories
val cr = new ChainResolver
cr.setName("list")

val localM2 = new IBiblioResolver
localM2.setM2compatible(true)
val m2Path = ".m2" + File.separator + "repository" + File.separator
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure that you can point ivy to a maven repository like this? Ivy and maven use different format for laying out directory structures (does setUsepoms just make this work?). I'd make sure you explicitly test this locally with the .m2 folder.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah I guess that comes with setM2compatible.

localM2.setRoot(new File(System.getProperty("user.home"), m2Path).toURI.toString)
localM2.setUsepoms(true)
localM2.setName("local-m2-cache")
cr.add(localM2)

val localIvy = new IBiblioResolver
localIvy.setRoot(new File(ivySettings.getDefaultIvyUserDir,
"local" + File.separator).toURI.toString)
val ivyPattern = Seq("[organisation]", "[module]", "[revision]", "[type]s",
"[artifact](-[classifier]).[ext]").mkString(File.separator)
localIvy.setPattern(ivyPattern)
localIvy.setName("local-ivy-cache")
cr.add(localIvy)

// the biblio resolver resolves POM declared dependencies
val br: IBiblioResolver = new IBiblioResolver
br.setM2compatible(true)
Expand Down Expand Up @@ -761,8 +779,7 @@ private[deploy] object SparkSubmitUtils {

/**
* Output a comma-delimited list of paths for the downloaded jars to be added to the classpath
* (will append to jars in SparkSubmit). The name of the jar is given
* after a '!' by Ivy. It also sometimes contains '(bundle)' after '.jar'. Remove that as well.
* (will append to jars in SparkSubmit).
* @param artifacts Sequence of dependencies that were resolved and retrieved
* @param cacheDirectory directory where jars are cached
* @return a comma-delimited list of paths for the dependencies
Expand All @@ -771,10 +788,9 @@ private[deploy] object SparkSubmitUtils {
artifacts: Array[AnyRef],
cacheDirectory: File): String = {
artifacts.map { artifactInfo =>
val artifactString = artifactInfo.toString
val jarName = artifactString.drop(artifactString.lastIndexOf("!") + 1)
val artifact = artifactInfo.asInstanceOf[Artifact].getModuleRevisionId
cacheDirectory.getAbsolutePath + File.separator +
jarName.substring(0, jarName.lastIndexOf(".jar") + 4)
s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}.jar"
}.mkString(",")
}

Expand Down Expand Up @@ -856,6 +872,7 @@ private[deploy] object SparkSubmitUtils {
if (alternateIvyCache.trim.isEmpty) {
new File(ivySettings.getDefaultIvyUserDir, "jars")
} else {
ivySettings.setDefaultIvyUserDir(new File(alternateIvyCache))
ivySettings.setDefaultCache(new File(alternateIvyCache, "cache"))
new File(alternateIvyCache, "jars")
}
Expand All @@ -865,7 +882,7 @@ private[deploy] object SparkSubmitUtils {
// create a pattern matcher
ivySettings.addMatcher(new GlobPatternMatcher)
// create the dependency resolvers
val repoResolver = createRepoResolvers(remoteRepos)
val repoResolver = createRepoResolvers(remoteRepos, ivySettings)
ivySettings.addResolver(repoResolver)
ivySettings.setDefaultResolver(repoResolver.getName)

Expand Down Expand Up @@ -899,7 +916,8 @@ private[deploy] object SparkSubmitUtils {
}
// retrieve all resolved dependencies
ivy.retrieve(rr.getModuleDescriptor.getModuleRevisionId,
packagesDirectory.getAbsolutePath + File.separator + "[artifact](-[classifier]).[ext]",
packagesDirectory.getAbsolutePath + File.separator +
"[organization]_[artifact]-[revision].[ext]",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the reason we got rid of (-[classifier])?

I saw some discussions about this. For example,

If this pattern for instance doesn't has the [type] or [classifier] token, Ivy will download the source/javadoc artifacts to the same file as the regular jar.

retrieveOptions.setConfs(Array(ivyConfName)))
System.setOut(sysOut)
resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package org.apache.spark.deploy

import java.io.{PrintStream, OutputStream, File}

import org.apache.ivy.core.settings.IvySettings

import scala.collection.mutable.ArrayBuffer

import org.scalatest.{BeforeAndAfterAll, FunSuite}
Expand Down Expand Up @@ -56,24 +58,23 @@ class SparkSubmitUtilsSuite extends FunSuite with BeforeAndAfterAll {
}

test("create repo resolvers") {
val resolver1 = SparkSubmitUtils.createRepoResolvers(None)
val settings = new IvySettings
val res1 = SparkSubmitUtils.createRepoResolvers(None, settings)
// should have central and spark-packages by default
assert(resolver1.getResolvers.size() === 2)
assert(resolver1.getResolvers.get(0).asInstanceOf[IBiblioResolver].getName === "central")
assert(resolver1.getResolvers.get(1).asInstanceOf[IBiblioResolver].getName === "spark-packages")
assert(res1.getResolvers.size() === 4)
assert(res1.getResolvers.get(0).asInstanceOf[IBiblioResolver].getName === "local-m2-cache")
assert(res1.getResolvers.get(1).asInstanceOf[IBiblioResolver].getName === "local-ivy-cache")
assert(res1.getResolvers.get(2).asInstanceOf[IBiblioResolver].getName === "central")
assert(res1.getResolvers.get(3).asInstanceOf[IBiblioResolver].getName === "spark-packages")

val repos = "a/1,b/2,c/3"
val resolver2 = SparkSubmitUtils.createRepoResolvers(Option(repos))
assert(resolver2.getResolvers.size() === 5)
val resolver2 = SparkSubmitUtils.createRepoResolvers(Option(repos), settings)
assert(resolver2.getResolvers.size() === 7)
val expected = repos.split(",").map(r => s"$r/")
resolver2.getResolvers.toArray.zipWithIndex.foreach { case (resolver: IBiblioResolver, i) =>
if (i == 0) {
assert(resolver.getName === "central")
} else if (i == 1) {
assert(resolver.getName === "spark-packages")
} else {
assert(resolver.getName === s"repo-${i - 1}")
assert(resolver.getRoot === expected(i - 2))
if (i > 3) {
assert(resolver.getName === s"repo-${i - 3}")
assert(resolver.getRoot === expected(i - 4))
}
}
}
Expand Down