From fbc52bcb46e46fcbe8c6a087ff457f8b2c913c4c Mon Sep 17 00:00:00 2001 From: James Baiera Date: Fri, 13 Mar 2020 11:58:33 -0400 Subject: [PATCH] Replace apache download tasks with custom ivy repository. (#1438) Hadoop ecosystem tarballs are hosted on Apache's download mirrors only instead of in maven repositories. Previously we were downloading these artifacts from a round robin of mirror endpoints, using securely obtained hashes to validate the downloaded files even from non HTTPS sources. This PR removes those download tasks, replacing it with a custom Ivy repository backed by an HTTPS enabled apache mirror. This allows us to ensure the artifacts are securely downloaded and cached by Gradle instead of relying on task outputs like the old mirror download task. --- .../hadoop/HadoopClusterFormationTasks.groovy | 54 ++--- .../fixture/hadoop/HadoopFixturePlugin.groovy | 87 ++++++++ .../fixture/hadoop/ServiceDescriptor.groovy | 11 +- .../services/HadoopServiceDescriptor.groovy | 25 +-- .../services/HiveServiceDescriptor.groovy | 25 +-- .../services/PigServiceDescriptor.groovy | 24 +- .../SparkYarnServiceDescriptor.groovy | 28 +-- .../gradle/tasks/ApacheMirrorDownload.groovy | 182 ---------------- .../gradle/tasks/VerifyChecksums.groovy | 79 ------- qa/kerberos/build.gradle | 205 +++++++++--------- 10 files changed, 215 insertions(+), 505 deletions(-) create mode 100644 buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/HadoopFixturePlugin.groovy delete mode 100644 buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/tasks/ApacheMirrorDownload.groovy delete mode 100644 buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/tasks/VerifyChecksums.groovy diff --git a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/HadoopClusterFormationTasks.groovy b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/HadoopClusterFormationTasks.groovy index a0f024a02..a291696f3 100644 --- a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/HadoopClusterFormationTasks.groovy +++ b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/HadoopClusterFormationTasks.groovy @@ -28,13 +28,12 @@ import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.HadoopClusterConfigur import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.InstanceConfiguration import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.RoleConfiguration import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.ServiceConfiguration -import org.elasticsearch.hadoop.gradle.tasks.ApacheMirrorDownload -import org.elasticsearch.hadoop.gradle.tasks.VerifyChecksums import org.gradle.api.AntBuilder import org.gradle.api.DefaultTask import org.gradle.api.GradleException import org.gradle.api.Project import org.gradle.api.Task +import org.gradle.api.artifacts.Configuration import org.gradle.api.logging.Logger import org.gradle.api.tasks.Copy import org.gradle.api.tasks.Delete @@ -59,14 +58,6 @@ class HadoopClusterFormationTasks { Task stopTask } - /** - * Pairing of download and verification tasks for a distribution - */ - static class DistributionTasks { - ApacheMirrorDownload download - VerifyChecksums verify - } - /** * Adds dependent tasks to the given task to start and stop a cluster with the given configuration. *

@@ -104,7 +95,7 @@ class HadoopClusterFormationTasks { for (ServiceConfiguration serviceConfiguration : clusterConfiguration.getServices()) { // Get the download task for this service's package and add it to the service's dependency tasks - DistributionTasks distributionTasks = getOrConfigureDistributionDownload(project, serviceConfiguration) + Configuration distributionConfiguration = getOrConfigureDistributionDownload(project, serviceConfiguration) // Keep track of the start tasks in this service List serviceTaskPairs = [] @@ -140,7 +131,7 @@ class HadoopClusterFormationTasks { TaskPair instanceTasks try { instanceTasks = configureNode(project, prefix, instanceDependencies, instanceInfo, - distributionTasks) + distributionConfiguration) } catch (Exception e) { throw new GradleException( "Exception occurred while initializing instance [${instanceInfo.toString()}]", e) @@ -207,36 +198,21 @@ class HadoopClusterFormationTasks { * either an already created one from the root project, or a newly created download task. These also contain the * verify task to ensure the download has been securely captured. */ - static DistributionTasks getOrConfigureDistributionDownload(Project project, ServiceConfiguration serviceConfiguration) { + static Configuration getOrConfigureDistributionDownload(Project project, ServiceConfiguration serviceConfiguration) { Version serviceVersion = serviceConfiguration.getVersion() - String downloadTaskName = "download${serviceConfiguration.serviceDescriptor.packageName().capitalize()}#${serviceVersion}" - String verifyTaskName = "verify${serviceConfiguration.serviceDescriptor.packageName().capitalize()}#${serviceVersion}" - - ApacheMirrorDownload downloadTask = project.rootProject.tasks.findByName(downloadTaskName) as ApacheMirrorDownload - if (downloadTask == null) { - downloadTask = project.rootProject.tasks.create(name: downloadTaskName, type: ApacheMirrorDownload) as ApacheMirrorDownload - serviceConfiguration.getServiceDescriptor().configureDownload(downloadTask, serviceConfiguration) - downloadTask.group = 'downloads' - downloadTask.onlyIf { !downloadTask.outputFile().exists() } - } - - VerifyChecksums verifyTask = project.rootProject.tasks.findByName(verifyTaskName) as VerifyChecksums - if (verifyTask == null) { - verifyTask = project.rootProject.tasks.create(name: verifyTaskName, type: VerifyChecksums) as VerifyChecksums - verifyTask.group = 'downloads' - verifyTask.dependsOn downloadTask - verifyTask.inputFile downloadTask.outputFile() - for (Map.Entry hash : serviceConfiguration.serviceDescriptor.packageHashVerification(serviceVersion)) { - verifyTask.checksum hash.key, hash.value - } + String configurationName = "download${serviceConfiguration.serviceDescriptor.packageName().capitalize()}#${serviceVersion}" + Configuration configuration = project.configurations.findByName(configurationName) + if (configuration == null) { + configuration = project.configurations.create(configurationName) + project.dependencies.add(configurationName, serviceConfiguration.getServiceDescriptor().getDependencyCoordinates(serviceConfiguration)) } - return new DistributionTasks(download: downloadTask, verify: verifyTask) + return configuration } static TaskPair configureNode(Project project, String prefix, Object dependsOn, InstanceInfo node, - DistributionTasks distribution) { + Configuration distributionConfiguration) { Task setup = project.tasks.create(name: taskName(prefix, node, 'clean'), type: Delete, dependsOn: dependsOn) { delete node.homeDir delete node.cwd @@ -257,7 +233,7 @@ class HadoopClusterFormationTasks { } // Always extract the package contents, and configure the files - setup = configureExtractTask(taskName(prefix, node, 'extract'), project, setup, node, distribution) + setup = configureExtractTask(taskName(prefix, node, 'extract'), project, setup, node, distributionConfiguration) setup = configureWriteConfigTask(taskName(prefix, node, 'configure'), project, setup, node) setup = configureExtraConfigFilesTask(taskName(prefix, node, 'extraConfig'), project, setup, node) @@ -329,13 +305,13 @@ class HadoopClusterFormationTasks { return setup } - static Task configureExtractTask(String name, Project project, Task setup, InstanceInfo node, DistributionTasks distribution) { - List extractDependsOn = [distribution.verify, setup] + static Task configureExtractTask(String name, Project project, Task setup, InstanceInfo node, Configuration distributionConfiguration) { + List extractDependsOn = [distributionConfiguration, setup] return project.tasks.create(name: name, type: Copy, dependsOn: extractDependsOn) { group = 'hadoopFixture' // TODO: Switch logic if a service is ever not a tar distribution from { - project.tarTree(project.resources.gzip(distribution.download.outputFile())) + project.tarTree(project.resources.gzip(distributionConfiguration.files.first())) } into node.baseDir } diff --git a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/HadoopFixturePlugin.groovy b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/HadoopFixturePlugin.groovy new file mode 100644 index 000000000..060fe2cf0 --- /dev/null +++ b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/HadoopFixturePlugin.groovy @@ -0,0 +1,87 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.hadoop.gradle.fixture.hadoop + +import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.HadoopClusterConfiguration +import org.gradle.api.Action +import org.gradle.api.NamedDomainObjectContainer +import org.gradle.api.NamedDomainObjectFactory +import org.gradle.api.Plugin +import org.gradle.api.Project +import org.gradle.api.artifacts.dsl.RepositoryHandler +import org.gradle.api.artifacts.repositories.IvyArtifactRepository +import org.gradle.api.artifacts.repositories.IvyPatternRepositoryLayout +import org.gradle.api.publish.ivy.IvyArtifact + +class HadoopFixturePlugin implements Plugin { + + private static final String APACHE_MIRROR = "https://apache.osuosl.org/" + + static class HadoopFixturePluginExtension { + private NamedDomainObjectContainer clusters + + HadoopFixturePluginExtension(final Project project) { + this.clusters = project.container(HadoopClusterConfiguration.class, new NamedDomainObjectFactory() { + @Override + HadoopClusterConfiguration create(String name) { + return new HadoopClusterConfiguration(project, name) + } + }) + } + + HadoopClusterConfiguration cluster(String name, Closure config) { + clusters.maybeCreate(name) + return clusters.getByName(name, config) + } + + NamedDomainObjectContainer getClusters() { + return clusters + } + } + + @Override + void apply(Project project) { + HadoopFixturePluginExtension extension = project.getExtensions().create("hadoop", HadoopFixturePluginExtension.class, project) + configureApacheMirrorRepository(project) + project.afterEvaluate { + extension.getClusters().forEach { config -> + // Finish cluster setup + HadoopClusterFormationTasks.setup(project, config) + } + } + } + + private static configureApacheMirrorRepository(Project project) { + RepositoryHandler repositoryHandler = project.getRepositories() + repositoryHandler.add(repositoryHandler.ivy({IvyArtifactRepository ivyArtifactRepository -> + ivyArtifactRepository.setUrl(APACHE_MIRROR) + ivyArtifactRepository.patternLayout({IvyPatternRepositoryLayout ivyPatternRepositoryLayout -> + // We use this pattern normally and break the regular tradition of a strictly numerical version + // because Hive does not provide a reasonable artifact name that makes a more robust pattern + // reasonable (it has a very unorthodox layout) + ivyPatternRepositoryLayout.artifact("[organization]/[module]/[revision].[ext]") + ivyPatternRepositoryLayout.setM2compatible(true) + }) + ivyArtifactRepository.metadataSources({IvyArtifactRepository.MetadataSources metadataSources -> + metadataSources.artifact() + }) + })) + } +} diff --git a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/ServiceDescriptor.groovy b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/ServiceDescriptor.groovy index 4f01c94d3..8b8d78829 100644 --- a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/ServiceDescriptor.groovy +++ b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/ServiceDescriptor.groovy @@ -22,7 +22,6 @@ package org.elasticsearch.hadoop.gradle.fixture.hadoop import org.elasticsearch.gradle.Version import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.InstanceConfiguration import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.ServiceConfiguration -import org.elasticsearch.hadoop.gradle.tasks.ApacheMirrorDownload import static org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.SettingsContainer.FileSettings @@ -60,9 +59,10 @@ interface ServiceDescriptor { Version defaultVersion() /** - * Callback to configure a download task to perform the package download. + * The coordinates for this dependency that will be used with a custom Ivy Repository to download the artifact from + * an Apache mirror. */ - void configureDownload(ApacheMirrorDownload task, ServiceConfiguration configuration) + String getDependencyCoordinates(ServiceConfiguration configuration) /** * The official apache package name for the artifact. @@ -74,11 +74,6 @@ interface ServiceDescriptor { */ String artifactName(ServiceConfiguration configuration) - /** - * Return a mapping of hash algorithm id to hash value for an artifact of the given version. - */ - Map packageHashVerification(Version version) - /** * The name of the directory under the base dir that contains the package contents. */ diff --git a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/HadoopServiceDescriptor.groovy b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/HadoopServiceDescriptor.groovy index 0a15d81fe..7ae833fe9 100644 --- a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/HadoopServiceDescriptor.groovy +++ b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/HadoopServiceDescriptor.groovy @@ -27,19 +27,11 @@ import org.elasticsearch.hadoop.gradle.fixture.hadoop.ServiceDescriptor import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.InstanceConfiguration import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.ServiceConfiguration import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.SettingsContainer -import org.elasticsearch.hadoop.gradle.tasks.ApacheMirrorDownload -import org.gradle.api.GradleException import static org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.SettingsContainer.FileSettings class HadoopServiceDescriptor implements ServiceDescriptor { - static final Map> VERSION_MAP = [:] - static { - VERSION_MAP.put(new Version(2, 7, 7), - ['SHA-512': '17c8917211dd4c25f78bf60130a390f9e273b0149737094e45f4ae5c917b1174b97eb90818c5df068e607835120126281bcc07514f38bd7fd3cb8e9d3db1bdde']) - } - static final RoleDescriptor NAMENODE = RoleDescriptor.requiredProcess('namenode') static final RoleDescriptor DATANODE = RoleDescriptor.requiredProcess('datanode', [NAMENODE]) static final RoleDescriptor RESOURCEMANAGER = RoleDescriptor.requiredProcess('resourcemanager') @@ -73,12 +65,8 @@ class HadoopServiceDescriptor implements ServiceDescriptor { } @Override - void configureDownload(ApacheMirrorDownload task, ServiceConfiguration configuration) { - Version version = configuration.getVersion() - task.packagePath = 'hadoop/common' - task.packageName = 'hadoop' - task.artifactFileName = "hadoop-${version}.tar.gz" - task.version = "${version}" + String getDependencyCoordinates(ServiceConfiguration configuration) { + return "hadoop.common:hadoop-${configuration.getVersion()}:${artifactName(configuration)}@tar.gz" } @Override @@ -92,15 +80,6 @@ class HadoopServiceDescriptor implements ServiceDescriptor { return "hadoop-${version}" } - @Override - Map packageHashVerification(Version version) { - Map hashVerifications = VERSION_MAP.get(version) - if (hashVerifications == null) { - throw new GradleException("Unsupported version [$version] - No download hash configured") - } - return hashVerifications - } - @Override String homeDirName(InstanceConfiguration configuration) { return artifactName(configuration.getServiceConf()) diff --git a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/HiveServiceDescriptor.groovy b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/HiveServiceDescriptor.groovy index bef8457bf..86e5a44ef 100644 --- a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/HiveServiceDescriptor.groovy +++ b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/HiveServiceDescriptor.groovy @@ -26,19 +26,11 @@ import org.elasticsearch.hadoop.gradle.fixture.hadoop.RoleDescriptor import org.elasticsearch.hadoop.gradle.fixture.hadoop.ServiceDescriptor import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.InstanceConfiguration import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.ServiceConfiguration -import org.elasticsearch.hadoop.gradle.tasks.ApacheMirrorDownload -import org.gradle.api.GradleException import static org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.SettingsContainer.FileSettings class HiveServiceDescriptor implements ServiceDescriptor { - static final Map> VERSION_MAP = [:] - static { - VERSION_MAP.put(new Version(1, 2, 2), - ['SHA-256' : '763b246a1a1ceeb815493d1e5e1d71836b0c5b9be1c4cd9c8d685565113771d1']) - } - static RoleDescriptor HIVESERVER = RoleDescriptor.requiredProcess('hiveserver') @Override @@ -67,12 +59,8 @@ class HiveServiceDescriptor implements ServiceDescriptor { } @Override - void configureDownload(ApacheMirrorDownload task, ServiceConfiguration configuration) { - Version version = configuration.getVersion() - task.packagePath = 'hive' - task.packageName = 'hive' - task.artifactFileName = "apache-hive-${version}-bin.tar.gz" - task.version = "${version}" + String getDependencyCoordinates(ServiceConfiguration configuration) { + return "hive:hive-${configuration.getVersion()}:${artifactName(configuration)}@tar.gz" } @Override @@ -86,15 +74,6 @@ class HiveServiceDescriptor implements ServiceDescriptor { return "apache-hive-${version}-bin" } - @Override - Map packageHashVerification(Version version) { - Map hashVerifications = VERSION_MAP.get(version) - if (hashVerifications == null) { - throw new GradleException("Unsupported version [$version] - No download hash configured") - } - return hashVerifications - } - @Override String homeDirName(InstanceConfiguration configuration) { return artifactName(configuration.getServiceConf()) diff --git a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/PigServiceDescriptor.groovy b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/PigServiceDescriptor.groovy index fa8df3c1a..0daea188d 100644 --- a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/PigServiceDescriptor.groovy +++ b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/PigServiceDescriptor.groovy @@ -26,19 +26,11 @@ import org.elasticsearch.hadoop.gradle.fixture.hadoop.ServiceDescriptor import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.HadoopClusterConfiguration import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.InstanceConfiguration import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.ServiceConfiguration -import org.elasticsearch.hadoop.gradle.tasks.ApacheMirrorDownload -import org.gradle.api.GradleException import static org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.SettingsContainer.FileSettings class PigServiceDescriptor implements ServiceDescriptor { - static final Map> VERSION_MAP = [:] - static { - VERSION_MAP.put(new Version(0, 17, 0), - ['MD5': 'da76998409fe88717b970b45678e00d4']) - } - static RoleDescriptor GATEWAY = RoleDescriptor.requiredGateway('pig', []) @Override @@ -67,11 +59,8 @@ class PigServiceDescriptor implements ServiceDescriptor { } @Override - void configureDownload(ApacheMirrorDownload task, ServiceConfiguration configuration) { - task.setPackagePath('pig') - task.setPackageName('pig') - task.setVersion(configuration.getVersion().toString()) - task.setArtifactFileName("${artifactName(configuration)}.tar.gz") + String getDependencyCoordinates(ServiceConfiguration configuration) { + return "pig:pig-${configuration.getVersion()}:${artifactName(configuration)}@tar.gz" } @Override @@ -84,15 +73,6 @@ class PigServiceDescriptor implements ServiceDescriptor { return "pig-${configuration.getVersion()}" } - @Override - Map packageHashVerification(Version version) { - Map hashVerifications = VERSION_MAP.get(version) - if (hashVerifications == null) { - throw new GradleException("Unsupported version [$version] - No download hash configured") - } - return hashVerifications - } - @Override String homeDirName(InstanceConfiguration configuration) { return artifactName(configuration.getServiceConf()) diff --git a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/SparkYarnServiceDescriptor.groovy b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/SparkYarnServiceDescriptor.groovy index 170aa91fd..0c0c6146d 100644 --- a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/SparkYarnServiceDescriptor.groovy +++ b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/SparkYarnServiceDescriptor.groovy @@ -26,20 +26,11 @@ import org.elasticsearch.hadoop.gradle.fixture.hadoop.ServiceDescriptor import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.HadoopClusterConfiguration import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.InstanceConfiguration import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.ServiceConfiguration -import org.elasticsearch.hadoop.gradle.tasks.ApacheMirrorDownload -import org.gradle.api.GradleException import static org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.SettingsContainer.FileSettings class SparkYarnServiceDescriptor implements ServiceDescriptor { - static final Version VERSION = new Version(2, 3, 4) - static final Map> VERSION_MAP = [:] - static { - VERSION_MAP.put(VERSION, - ['SHA-512': '9FBEFCE2739990FFEDE6968A9C2F3FE399430556163BFDABDF5737A8F9E52CD535489F5CA7D641039A87700F50BFD91A706CA47979EE51A3A18787A92E2D6D53']) - } - static RoleDescriptor GATEWAY = RoleDescriptor.requiredGateway('spark', []) @Override @@ -64,16 +55,12 @@ class SparkYarnServiceDescriptor implements ServiceDescriptor { @Override Version defaultVersion() { - return VERSION + return new Version(2, 3, 4) } @Override - void configureDownload(ApacheMirrorDownload task, ServiceConfiguration configuration) { - Version version = configuration.getVersion() - task.packagePath = 'spark' - task.packageName = 'spark' - task.version = "$version" - task.artifactFileName = "${artifactName(configuration)}.tgz" + String getDependencyCoordinates(ServiceConfiguration configuration) { + return "spark:spark-${configuration.getVersion()}:${artifactName(configuration)}@tgz" } @Override @@ -89,15 +76,6 @@ class SparkYarnServiceDescriptor implements ServiceDescriptor { return "spark-$version-bin-hadoop${hadoopVersion.major}.${hadoopVersion.minor}" } - @Override - Map packageHashVerification(Version version) { - Map hashVerifications = VERSION_MAP.get(version) - if (hashVerifications == null) { - throw new GradleException("Unsupported version [$version] - No download hash configured") - } - return hashVerifications - } - @Override String homeDirName(InstanceConfiguration configuration) { return artifactName(configuration.getServiceConf()) diff --git a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/tasks/ApacheMirrorDownload.groovy b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/tasks/ApacheMirrorDownload.groovy deleted file mode 100644 index 8f0e5d8fd..000000000 --- a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/tasks/ApacheMirrorDownload.groovy +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.hadoop.gradle.tasks - -import groovy.json.JsonSlurper -import org.gradle.api.DefaultTask -import org.gradle.api.tasks.OutputDirectory -import org.gradle.api.tasks.TaskAction - -/** - * Downloads an artifact from the closest selected Apache Download Mirror. - * - * Mirror selection is done by requesting the closest mirror from the apache site. The download - * is performed on the preferred mirror, with falling back on to the other mirrors in the mirror - * list. - * - * Note: Apache's download mirrors are almost always http only. Since we cannot rely on downloading - * over https, you should always use the {@linkplain org.elasticsearch.hadoop.gradle.tasks.VerifyChecksums} - * task to make sure the package you are downloading is safe/intact. - */ -class ApacheMirrorDownload extends DefaultTask { - - private static final String MIRROR_DISCOVERY = "https://www.apache.org/dyn/closer.cgi" - private static final String JSON_SUFFIX = "?as_json=1" - - static class MirrorInfo { - String preferred - List mirrors - List backups - - MirrorInfo(String mirrorInfo) { - Object parsed = new JsonSlurper().parseText(mirrorInfo) - preferred = parsed.preferred - mirrors = parsed.http - backups = parsed.backup - Collections.shuffle(mirrors) - Collections.shuffle(backups) - } - } - - private String packagePath - private String packageName - private String version - private String artifactFileName - private File downloadDir = project.rootProject.buildDir.toPath().resolve("downloadcache").toFile() - - /** - * @return The path on the apache download server to the directory that holds the versioned artifact directories - */ - String getPackagePath() { - return packagePath - } - - /** - * Sets the path to the directory on the Apache download server that holds the versioned artifact directories. - * Also used to look up a valid mirror server for downloading the artifact for this task. - * @param packagePath The path on the apache download server to the directory that holds the versioned artifact - * directories - */ - void setPackagePath(String packagePath) { - this.packagePath = packagePath - } - - /** - * @return The root name of the package to download - */ - String getPackageName() { - return packageName - } - - /** - * Set the root name of the package to download. The version is appended to this to locate the versioned directory - * to pick out the actual download artifact, as well as to cache the artifact on the disk once it is downloaded. - * @param packageName The root name of the package to download - */ - void setPackageName(String packageName) { - this.packageName = packageName - } - - /** - * @return The version of the package to download - */ - String getVersion() { - return version - } - - /** - * @param version The version of the package to download - */ - void setVersion(String version) { - this.version = version - } - - /** - * @return The final name of the artifact to download, including version and file extension - */ - String getArtifactFileName() { - return artifactFileName - } - - /** - * @param artifactFileName The final name of the artifact to download, including the version and file extension - */ - void setArtifactFileName(String artifactFileName) { - this.artifactFileName = artifactFileName - } - - /** - * @return The local directory that this task will download its artifact into - */ - @OutputDirectory - File getArtifactDirectory() { - return downloadDir.toPath().resolve("${packageName}-${version}").toFile() - } - - /** - * @return The downloaded artifact - */ - File outputFile() { - return getArtifactDirectory().toPath().resolve(artifactFileName).toFile() - } - - @TaskAction - def doMirroredDownload() { - getArtifactDirectory().mkdirs() - String packageDirectory = packageName + '-' + version - - MirrorInfo mirrorInfo = getMirrors(packagePath) - def mirrors = new LinkedList(mirrorInfo.mirrors) - String mirror = mirrorInfo.preferred - // Sanitize mirror link - if (mirror.endsWith('/')) { - mirror = mirror.substring(0, mirror.length() - 1) - } - while (true) { - // Ex: [http://blah.blah/dist]/[hive]/[hive-1.2.2]/[apache-hive-1.2.2-bin.tar.gz] - // Ex: [http://blah.blah/dist]/[hadoop/common]/[hadoop-2.7.7]/[hadoop-2.7.7.tar.gz] - String url = "${mirror}/${packagePath}/${packageDirectory}/${artifactFileName}" - try { - logger.info("Downloading [$url]...") - project.getAnt().get( - src: url, - dest: outputFile(), - maxtime: (5 * 60).toString() // 5 minutes for download timeout - ) - break - } catch (Exception e) { - if (mirrors.isEmpty()) { - throw e - } - logger.warn("Could not download [$url]. Trying next mirror.") - mirror = mirrors.poll() - } - } - } - - private static MirrorInfo getMirrors(String path) { - String mirrorDiscovery = MIRROR_DISCOVERY - if (path != null) { - mirrorDiscovery = mirrorDiscovery + "/" + path - } - mirrorDiscovery = mirrorDiscovery + JSON_SUFFIX - return new MirrorInfo(mirrorDiscovery.toURL().getText()) - } -} diff --git a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/tasks/VerifyChecksums.groovy b/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/tasks/VerifyChecksums.groovy deleted file mode 100644 index 10378cd9f..000000000 --- a/buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/tasks/VerifyChecksums.groovy +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.hadoop.gradle.tasks - -import org.gradle.api.DefaultTask -import org.gradle.api.GradleException -import org.gradle.api.tasks.Input -import org.gradle.api.tasks.InputFile -import org.gradle.api.tasks.TaskAction - -class VerifyChecksums extends DefaultTask { - - private File inputFile - private Map checksums = new HashMap<>() - - @InputFile - public File getInputFile() { - return this.inputFile - } - - public void inputFile(File file) { - this.inputFile = file - } - - @Input - public Map getChecksums() { - return this.checksums - } - - public void checksum(String algorithm, String result) { - this.checksums.put(algorithm, result) - } - - public void checksums(Map additions) { - this.checksums.putAll(additions) - } - - @TaskAction - public void verifyChecksums() { - if (inputFile == null) { - throw new GradleException("Input file required on verify checksums task") - } - AntBuilder antBuilder = project.getAnt() - checksums.collect { String algorithmName, String expected -> - String verifyPropertyName = "${getName()}.${algorithmName}.result" - antBuilder.checksum( - file: inputFile.absolutePath, - algorithm: algorithmName, - property: "${verifyPropertyName}", - ) - String expectedHash = expected.toUpperCase() - String actualHash = antBuilder.properties[verifyPropertyName].toString().toUpperCase() - boolean success = actualHash.equals(expectedHash) - logger.info("Validation of [${algorithmName}] checksum was [${success ? "successful" : "failure"}]") - if (!success) { - throw new GradleException("Failed to verify [${inputFile}] against [${algorithmName}] checksum.\n" + - "Expected [${expectedHash}]\n" + - " but got [${actualHash}].") - } - } - } -} diff --git a/qa/kerberos/build.gradle b/qa/kerberos/build.gradle index 55e2bba32..d6f717b5d 100644 --- a/qa/kerberos/build.gradle +++ b/qa/kerberos/build.gradle @@ -20,9 +20,9 @@ import org.elasticsearch.gradle.test.AntFixture import org.elasticsearch.gradle.testclusters.DefaultTestClustersTask +import org.elasticsearch.hadoop.gradle.fixture.hadoop.HadoopFixturePlugin import org.elasticsearch.hadoop.gradle.fixture.hadoop.ServiceDescriptor import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.HadoopClusterConfiguration -import org.elasticsearch.hadoop.gradle.fixture.hadoop.HadoopClusterFormationTasks import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.InstanceConfiguration import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.RoleConfiguration import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.ServiceConfiguration @@ -34,6 +34,7 @@ import org.elasticsearch.hadoop.gradle.fixture.hadoop.tasks.SparkApp apply plugin: 'es.hadoop.build' apply plugin: 'scala' +apply plugin: HadoopFixturePlugin configurations { kdcFixture @@ -268,110 +269,109 @@ if (disableTests) { // ============================================================================= // Project instance available implicitly - String prefix = "hadoopFixture" - HadoopClusterConfiguration config = new HadoopClusterConfiguration(project, prefix) - - // Hadoop cluster depends on KDC Fixture being up and running - config.addDependency(kdcFixture) - config.useElasticsearchCluster(testClusters.integTest) - - config.service('hadoop') { ServiceConfiguration s -> - s.addSystemProperty("java.security.krb5.conf", krb5Conf.toString()) - // Core Site Config - s.settingsFile('core-site.xml') { SettingsContainer.FileSettings f -> - // Enable Security - f.addSetting("hadoop.security.authentication", "kerberos") - f.addSetting("hadoop.security.authorization", "true") - f.addSetting("hadoop.rpc.protection", "authentication") - f.addSetting("hadoop.ssl.require.client.cert", "false") - f.addSetting("hadoop.ssl.hostname.verifier", "DEFAULT") - f.addSetting("hadoop.ssl.keystores.factory.class", "org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory") - f.addSetting("hadoop.ssl.server.conf", "ssl-server.xml") - f.addSetting("hadoop.ssl.client.conf", "ssl-client.xml") - f.addSetting("hadoop.proxyuser.hive.hosts", "*") - f.addSetting("hadoop.proxyuser.hive.groups", "*") - - // Add ES Security settings here because without them Spark will not obtain tokens - f.addSetting('es.security.authentication', 'kerberos') - f.addSetting('es.net.spnego.auth.elasticsearch.principal', "${esPrincipal}${realm}") - } - // SSL Server Config - s.settingsFile('ssl-server.xml') { SettingsContainer.FileSettings f -> - f.addSetting("ssl.server.keystore.type", "jks") - f.addSetting("ssl.server.keystore.location", "${resourceDir.getAbsolutePath()}/ssl/server.jks") - f.addSetting("ssl.server.keystore.password", "bigdata") - f.addSetting("ssl.server.keystore.keypassword", "bigdata") - } - // HDFS Site Config - s.settingsFile('hdfs-site.xml') { SettingsContainer.FileSettings f -> - f.addSetting("dfs.http.policy", "HTTPS_ONLY") - f.addSetting("dfs.web.authentication.kerberos.principal", "HTTP/hadoop.build.elastic.co$realm") - f.addSetting("dfs.web.authentication.kerberos.keytab", "$hadoopKeytab") - f.addSetting("dfs.block.access.token.enable", "true") - f.addSetting("dfs.namenode.kerberos.principal", "$namenodePrincipal$realm") - f.addSetting("dfs.namenode.keytab.file", "$hadoopKeytab") - f.addSetting("dfs.namenode.kerberos.internal.spnego.principal", "HTTP/hadoop.build.elastic.co") - f.addSetting("dfs.datanode.data.dir.perm", "700") - f.addSetting("dfs.datanode.kerberos.principal", "$datanodePrincipal$realm") - f.addSetting("dfs.datanode.keytab.file", "$hadoopKeytab") - f.addSetting("dfs.encrypt.data.transfer", "false") - f.addSetting("dfs.data.transfer.protection", "authentication") - } - // Yarn Site Config - s.settingsFile('yarn-site.xml') { SettingsContainer.FileSettings f -> - f.addSetting("yarn.resourcemanager.principal", "$resourceManagerPrincipal$realm") - f.addSetting("yarn.resourcemanager.keytab", "$hadoopKeytab") - f.addSetting("yarn.nodemanager.principal", "$nodeManagerPrincipal$realm") - f.addSetting("yarn.nodemanager.keytab", "$hadoopKeytab") + HadoopClusterConfiguration config = project.hadoop.cluster("hadoopFixture") { HadoopClusterConfiguration config -> + // Hadoop cluster depends on KDC Fixture being up and running + config.addDependency(kdcFixture) + config.useElasticsearchCluster(testClusters.integTest) + + config.service('hadoop') { ServiceConfiguration s -> + s.addSystemProperty("java.security.krb5.conf", krb5Conf.toString()) + // Core Site Config + s.settingsFile('core-site.xml') { SettingsContainer.FileSettings f -> + // Enable Security + f.addSetting("hadoop.security.authentication", "kerberos") + f.addSetting("hadoop.security.authorization", "true") + f.addSetting("hadoop.rpc.protection", "authentication") + f.addSetting("hadoop.ssl.require.client.cert", "false") + f.addSetting("hadoop.ssl.hostname.verifier", "DEFAULT") + f.addSetting("hadoop.ssl.keystores.factory.class", "org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory") + f.addSetting("hadoop.ssl.server.conf", "ssl-server.xml") + f.addSetting("hadoop.ssl.client.conf", "ssl-client.xml") + f.addSetting("hadoop.proxyuser.hive.hosts", "*") + f.addSetting("hadoop.proxyuser.hive.groups", "*") + + // Add ES Security settings here because without them Spark will not obtain tokens + f.addSetting('es.security.authentication', 'kerberos') + f.addSetting('es.net.spnego.auth.elasticsearch.principal', "${esPrincipal}${realm}") + } + // SSL Server Config + s.settingsFile('ssl-server.xml') { SettingsContainer.FileSettings f -> + f.addSetting("ssl.server.keystore.type", "jks") + f.addSetting("ssl.server.keystore.location", "${resourceDir.getAbsolutePath()}/ssl/server.jks") + f.addSetting("ssl.server.keystore.password", "bigdata") + f.addSetting("ssl.server.keystore.keypassword", "bigdata") + } + // HDFS Site Config + s.settingsFile('hdfs-site.xml') { SettingsContainer.FileSettings f -> + f.addSetting("dfs.http.policy", "HTTPS_ONLY") + f.addSetting("dfs.web.authentication.kerberos.principal", "HTTP/hadoop.build.elastic.co$realm") + f.addSetting("dfs.web.authentication.kerberos.keytab", "$hadoopKeytab") + f.addSetting("dfs.block.access.token.enable", "true") + f.addSetting("dfs.namenode.kerberos.principal", "$namenodePrincipal$realm") + f.addSetting("dfs.namenode.keytab.file", "$hadoopKeytab") + f.addSetting("dfs.namenode.kerberos.internal.spnego.principal", "HTTP/hadoop.build.elastic.co") + f.addSetting("dfs.datanode.data.dir.perm", "700") + f.addSetting("dfs.datanode.kerberos.principal", "$datanodePrincipal$realm") + f.addSetting("dfs.datanode.keytab.file", "$hadoopKeytab") + f.addSetting("dfs.encrypt.data.transfer", "false") + f.addSetting("dfs.data.transfer.protection", "authentication") + } + // Yarn Site Config + s.settingsFile('yarn-site.xml') { SettingsContainer.FileSettings f -> + f.addSetting("yarn.resourcemanager.principal", "$resourceManagerPrincipal$realm") + f.addSetting("yarn.resourcemanager.keytab", "$hadoopKeytab") + f.addSetting("yarn.nodemanager.principal", "$nodeManagerPrincipal$realm") + f.addSetting("yarn.nodemanager.keytab", "$hadoopKeytab") + } + // Mapred Site Config + s.settingsFile('mapred-site.xml') { SettingsContainer.FileSettings f -> + f.addSetting("mapreduce.framework.name", "yarn") + f.addSetting("mapreduce.shuffle.ssl.enabled", "false") + f.addSetting("mapreduce.jobhistory.principal", "$historyServerPrincipal$realm") + f.addSetting("mapreduce.jobhistory.keytab", "$hadoopKeytab") + f.addSetting("yarn.resourcemanager.principal", "$resourceManagerPrincipal$realm") + } + + // Add the ES-Hadoop jar to the resource manager classpath so that it can load the token renewer implementation + // for ES tokens. Otherwise, tokens may not be cancelled at the end of the job. + s.role('resourcemanager') { RoleConfiguration r -> + r.addEnvironmentVariable('YARN_USER_CLASSPATH', testingJar.archivePath.toString()) + r.settingsFile('yarn-site.xml') { SettingsContainer.FileSettings f -> + // Add settings specifically for ES Node to allow for cancelling the tokens + f.addSetting('es.nodes', esAddress) + } + } } - // Mapred Site Config - s.settingsFile('mapred-site.xml') { SettingsContainer.FileSettings f -> - f.addSetting("mapreduce.framework.name", "yarn") - f.addSetting("mapreduce.shuffle.ssl.enabled", "false") - f.addSetting("mapreduce.jobhistory.principal", "$historyServerPrincipal$realm") - f.addSetting("mapreduce.jobhistory.keytab", "$hadoopKeytab") - f.addSetting("yarn.resourcemanager.principal", "$resourceManagerPrincipal$realm") + config.service('spark') + config.service('hive') { ServiceConfiguration s -> + s.addSystemProperty("java.security.krb5.conf", krb5Conf.toString()) + s.addSetting('hive.server2.authentication', 'kerberos') + s.addSetting('hive.server2.authentication.kerberos.principal', "$hivePrincipalName$realm") + s.addSetting('hive.server2.authentication.kerberos.keytab', "$hiveKeytab") + // s.addSetting('hive.server2.logging.operation.level', "VERBOSE") + s.addSetting('yarn.app.mapreduce.am.command-opts', "-Djava.security.krb5.conf=${krb5Conf.toString()}") + s.addSetting('mapreduce.map.java.opts', "-Djava.security.krb5.conf=${krb5Conf.toString()}") + s.addSetting('mapreduce.reduce.java.opts', "-Djava.security.krb5.conf=${krb5Conf.toString()}") + s.addSetting('es.nodes', esAddress) } - - // Add the ES-Hadoop jar to the resource manager classpath so that it can load the token renewer implementation - // for ES tokens. Otherwise, tokens may not be cancelled at the end of the job. - s.role('resourcemanager') { RoleConfiguration r -> - r.addEnvironmentVariable('YARN_USER_CLASSPATH', mrJar.archivePath.toString()) - r.settingsFile('yarn-site.xml') { SettingsContainer.FileSettings f -> - // Add settings specifically for ES Node to allow for cancelling the tokens - f.addSetting('es.nodes', esAddress) - } + config.service('pig') { ServiceConfiguration s -> + s.addSetting('java.security.krb5.conf', krb5Conf.toString()) + s.addSetting('hadoop.security.krb5.principal', "$clientPrincipal$realm") + s.addSetting('hadoop.security.krb5.keytab', clientKeytab.toString()) + s.addSetting('yarn.app.mapreduce.am.command-opts', "-Djava.security.krb5.conf=${krb5Conf.toString()}") + s.addSetting('mapreduce.map.java.opts', "-Djava.security.krb5.conf=${krb5Conf.toString()}") + s.addSetting('mapreduce.reduce.java.opts', "-Djava.security.krb5.conf=${krb5Conf.toString()}") + s.addSetting('es.nodes', esAddress) } + config.addDependency(jar) + config.addDependency(kerberosItestJar) + config.addDependency(mrJar) + config.addDependency(mrItestJar) + config.addDependency(hiveItestJar) + config.addDependency(pigItestJar) + config.addDependency(sparkItestJar) + config.addDependency(stormItestJar) } - config.service('spark') - config.service('hive') { ServiceConfiguration s -> - s.addSystemProperty("java.security.krb5.conf", krb5Conf.toString()) - s.addSetting('hive.server2.authentication', 'kerberos') - s.addSetting('hive.server2.authentication.kerberos.principal', "$hivePrincipalName$realm") - s.addSetting('hive.server2.authentication.kerberos.keytab', "$hiveKeytab") - // s.addSetting('hive.server2.logging.operation.level', "VERBOSE") - s.addSetting('yarn.app.mapreduce.am.command-opts', "-Djava.security.krb5.conf=${krb5Conf.toString()}") - s.addSetting('mapreduce.map.java.opts', "-Djava.security.krb5.conf=${krb5Conf.toString()}") - s.addSetting('mapreduce.reduce.java.opts', "-Djava.security.krb5.conf=${krb5Conf.toString()}") - s.addSetting('es.nodes', esAddress) - } - config.service('pig') { ServiceConfiguration s -> - s.addSetting('java.security.krb5.conf', krb5Conf.toString()) - s.addSetting('hadoop.security.krb5.principal', "$clientPrincipal$realm") - s.addSetting('hadoop.security.krb5.keytab', clientKeytab.toString()) - s.addSetting('yarn.app.mapreduce.am.command-opts', "-Djava.security.krb5.conf=${krb5Conf.toString()}") - s.addSetting('mapreduce.map.java.opts', "-Djava.security.krb5.conf=${krb5Conf.toString()}") - s.addSetting('mapreduce.reduce.java.opts', "-Djava.security.krb5.conf=${krb5Conf.toString()}") - s.addSetting('es.nodes', esAddress) - } - config.addDependency(jar) - config.addDependency(kerberosItestJar) - config.addDependency(mrJar) - config.addDependency(mrItestJar) - config.addDependency(hiveItestJar) - config.addDependency(pigItestJar) - config.addDependency(sparkItestJar) - config.addDependency(stormItestJar) // We need to create a tmp directory in hadoop before history server does, because history server will set permissions // wrong. @@ -675,7 +675,4 @@ if (disableTests) { // disk integrationTest.dependsOn(copyOutputTask) } - - // Finish cluster setup - HadoopClusterFormationTasks.setup(project, config) }