Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrading HDFS Repository Plugin to use HDFS 2.8.1 Client #25497

Merged
merged 6 commits into from
Jun 30, 2017
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 37 additions & 11 deletions plugins/repository-hdfs/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ esplugin {
apply plugin: 'elasticsearch.vagrantsupport'

versions << [
'hadoop2': '2.7.1'
'hadoop2': '2.8.1'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did no dependency versions change? Just double checking; remember we don't use transitive dependencies, so upgrading means inspecting the new version's deps compared to what we currently pull in.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Htrace is the only new dependency with a new version

]

configurations {
Expand All @@ -45,7 +45,8 @@ dependencies {
compile "org.apache.hadoop:hadoop-annotations:${versions.hadoop2}"
compile "org.apache.hadoop:hadoop-auth:${versions.hadoop2}"
compile "org.apache.hadoop:hadoop-hdfs:${versions.hadoop2}"
compile 'org.apache.htrace:htrace-core:3.1.0-incubating'
compile "org.apache.hadoop:hadoop-hdfs-client:${versions.hadoop2}"
compile 'org.apache.htrace:htrace-core4:4.0.1-incubating'
compile 'com.google.guava:guava:11.0.2'
compile 'com.google.protobuf:protobuf-java:2.5.0'
compile 'commons-logging:commons-logging:1.1.3'
Expand Down Expand Up @@ -317,8 +318,7 @@ thirdPartyAudit.excludes = [
'org.apache.commons.digester.substitution.MultiVariableExpander',
'org.apache.commons.digester.substitution.VariableSubstitutor',
'org.apache.commons.digester.xmlrules.DigesterLoader',
'org.apache.commons.httpclient.util.URIUtil',
'org.apache.commons.jxpath.JXPathContext',
'org.apache.commons.jxpath.JXPathContext',
'org.apache.commons.jxpath.ri.JXPathContextReferenceImpl',
'org.apache.commons.jxpath.ri.QName',
'org.apache.commons.jxpath.ri.compiler.NodeNameTest',
Expand Down Expand Up @@ -402,14 +402,12 @@ thirdPartyAudit.excludes = [
'org.codehaus.jackson.JsonFactory',
'org.codehaus.jackson.JsonGenerator',
'org.codehaus.jackson.JsonGenerator$Feature',
'org.codehaus.jackson.JsonNode',
'org.codehaus.jackson.map.MappingJsonFactory',
'org.codehaus.jackson.map.MappingJsonFactory',
'org.codehaus.jackson.map.ObjectMapper',
'org.codehaus.jackson.map.ObjectReader',
'org.codehaus.jackson.map.ObjectWriter',
'org.codehaus.jackson.node.ContainerNode',
'org.codehaus.jackson.type.TypeReference',
'org.codehaus.jackson.util.MinimalPrettyPrinter',
'org.codehaus.jackson.util.MinimalPrettyPrinter',
'org.fusesource.leveldbjni.JniDBFactory',
'org.iq80.leveldb.DB',
'org.iq80.leveldb.Options',
Expand Down Expand Up @@ -437,8 +435,7 @@ thirdPartyAudit.excludes = [
'org.mortbay.jetty.servlet.ServletHolder',
'org.mortbay.jetty.servlet.SessionHandler',
'org.mortbay.jetty.webapp.WebAppContext',
'org.mortbay.log.Log',
'org.mortbay.thread.QueuedThreadPool',
'org.mortbay.thread.QueuedThreadPool',
'org.mortbay.util.MultiException',
'org.mortbay.util.ajax.JSON$Convertible',
'org.mortbay.util.ajax.JSON$Output',
Expand Down Expand Up @@ -473,7 +470,36 @@ thirdPartyAudit.excludes = [
'org.apache.log4j.AppenderSkeleton',
'org.apache.log4j.AsyncAppender',
'org.apache.log4j.helpers.ISO8601DateFormat',
'org.apache.log4j.spi.ThrowableInformation'
'org.apache.log4j.spi.ThrowableInformation',

// Not pulled in for the plugin, but third party checker complains that they're missing
// Their absence is not missed.

// Hadoop Common
'com.jcraft.jsch.ChannelSftp$LsEntry',
'com.jcraft.jsch.ChannelSftp',
'com.jcraft.jsch.SftpATTRS',
'org.mortbay.jetty.security.SslSelectChannelConnector',

// Hadoop Auth
'com.nimbusds.jose.JWSObject$State',
'com.nimbusds.jose.crypto.RSASSAVerifier',
'com.nimbusds.jwt.ReadOnlyJWTClaimsSet',
'com.nimbusds.jwt.SignedJWT',
'org.apache.directory.shared.kerberos.components.EncryptionKey',

// HDFS Client
'com.squareup.okhttp.Call',
'com.squareup.okhttp.MediaType',
'com.squareup.okhttp.OkHttpClient',
'com.squareup.okhttp.Request$Builder',
'com.squareup.okhttp.RequestBody',
'com.squareup.okhttp.Response',
'com.squareup.okhttp.ResponseBody',

// HDFS
'io.netty.channel.ChannelOption',
'io.netty.util.ReferenceCountUtil'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you sort these, for example, there are already io.netty excludes above?

]

// Gradle 2.13 bundles org.slf4j.impl.StaticLoggerBinder in its core.jar which leaks into the forbidden APIs ant task
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
335a867cf42bf789919bfc3229ff26747124e8f1

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
688ccccc0e0739d8737a93b0039a4a661e52084b

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
4812f251f8100fd4722c3cec5d7353f71f69cda9

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a4df18b79e4d0349ce4b58a52d314e7ae1d6be99

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a378f4bc8e6cd779d779c9f512e0e31edd771633
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
6b0100e4f58ecf7ce75817fce1ffdfbec947337a

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
f4ef727cb4675788ac66f48e217020acc1690960
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ of dependencies that are NOT Apache Licensed.
See the License for the specific language governing permissions and
limitations under the License.

The HTrace Owl logo is from http://www.clker.com/clipart-13653.html. It is
public domain.
Units, a string formatting go library, is Copyright (c) 2014 Alec Thomas
and MIT licensed: https://github.com/alecthomas/units/blob/master/COPYING

D3, a javascript library for manipulating data, used by htrace-hbase
is Copyright 2010-2014, Michael Bostock and BSD licensed:
Expand Down Expand Up @@ -239,4 +239,7 @@ https://github.com/moment/moment/blob/develop/LICENSE
CMP is an implementation of the MessagePack serialization format in
C. It is licensed under the MIT license:
https://github.com/camgunz/cmp/blob/master/LICENSE
See ./htrace-c/src/util/cmp.c and ./htrace-c/src/util/cmp.h.

go-codec is an implementation of several serialization and deserialization
codecs in Go. It is licensed under the MIT license:
https://github.com/ugorji/go/blob/master/LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ that are NOT Apache licensed (with pointers to their licensing)
Apache HTrace includes an Apache Thrift connector to Zipkin. Zipkin
is a distributed tracing system that is Apache 2.0 Licensed.
Copyright 2012 Twitter, Inc.

Our Owl logo we took from http://www.clker.com/clipart-13653.html.
It is public domain/free.
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ private static Void evilHadoopInit() {
Class.forName("org.apache.hadoop.util.StringUtils");
Class.forName("org.apache.hadoop.util.ShutdownHookManager");
Class.forName("org.apache.hadoop.conf.Configuration");
Class.forName("org.apache.hadoop.hdfs.protocol.HdfsConstants");
Class.forName("org.apache.hadoop.hdfs.protocol.datatransfer.PipelineAck");
} catch (ClassNotFoundException | IOException e) {
throw new RuntimeException(e);
} finally {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,6 @@

package org.elasticsearch.repositories.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.AbstractFileSystem;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.common.blobstore.BlobStore;
import org.elasticsearch.repositories.ESBlobStoreContainerTestCase;

import javax.security.auth.Subject;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
Expand All @@ -38,7 +29,18 @@
import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction;
import java.util.Collections;
import javax.security.auth.Subject;

import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.AbstractFileSystem;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.common.blobstore.BlobStore;
import org.elasticsearch.repositories.ESBlobStoreContainerTestCase;

@ThreadLeakFilters(filters = {HdfsClientThreadLeakFilter.class})
public class HdfsBlobStoreContainerTests extends ESBlobStoreContainerTestCase {

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.repositories.hdfs;

import com.carrotsearch.randomizedtesting.ThreadFilter;

/**
* In Hadoop 2.8.0, there is a thread that is started by the filesystem to clean up old execution stats.
* This thread ignores all interrupts, catching InterruptedException, logging it, and continuing on
* with it's work. The thread is a daemon, so it thankfully does not stop the JVM from closing, and it
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: it's -> its

* is started only once in a class's static initialization. This currently breaks our testing as this
* thread leaks out of the client and is picked up by the test framework. This thread filter is meant
* to ignore the offending thread until a version of Hadoop is released that addresses the incorrect
* interrupt handling.
*
* @see <a href="https://issues.apache.org/jira/browse/HADOOP-12829">https://issues.apache.org/jira/browse/HADOOP-12829</a>
* @see "org.apache.hadoop.fs.FileSystem.Statistics.StatisticsDataReferenceCleaner"
* @see "org.apache.hadoop.fs.FileSystem.Statistics"
*/
public class HdfsClientThreadLeakFilter implements ThreadFilter {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can it be package private and final?


private static final String OFFENDING_THREAD_NAME =
"org.apache.hadoop.fs.FileSystem$Statistics$StatisticsDataReferenceCleaner";

@Override
public boolean reject(Thread t) {
return t.getName().equalsIgnoreCase(OFFENDING_THREAD_NAME);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the ignore case needed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Old habit of mine. I'll fix.

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import java.util.Collection;

import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryResponse;
import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse;
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse;
Expand All @@ -34,6 +35,7 @@
import org.elasticsearch.snapshots.SnapshotState;
import org.elasticsearch.test.ESSingleNodeTestCase;

@ThreadLeakFilters(filters = {HdfsClientThreadLeakFilter.class})
public class HdfsTests extends ESSingleNodeTestCase {

@Override
Expand Down
2 changes: 1 addition & 1 deletion test/fixtures/hdfs-fixture/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
apply plugin: 'elasticsearch.build'

versions << [
'hadoop2': '2.7.1'
'hadoop2': '2.8.1'
]

// we create MiniHdfsCluster with the hadoop artifact
Expand Down
6 changes: 5 additions & 1 deletion test/fixtures/hdfs-fixture/src/main/java/hdfs/MiniHDFS.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
Expand All @@ -49,7 +50,8 @@ public class MiniHDFS {

public static void main(String[] args) throws Exception {
if (args.length != 1 && args.length != 3) {
throw new IllegalArgumentException("MiniHDFS <baseDirectory> [<kerberosPrincipal> <kerberosKeytab>]");
throw new IllegalArgumentException("Expected: MiniHDFS <baseDirectory> [<kerberosPrincipal> <kerberosKeytab>], " +
"got: " + Arrays.toString(args));
}
boolean secure = args.length == 3;

Expand Down Expand Up @@ -83,6 +85,8 @@ public static void main(String[] args) throws Exception {
cfg.set(DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY, keytabFile);
cfg.set(DFSConfigKeys.DFS_DATANODE_KEYTAB_FILE_KEY, keytabFile);
cfg.set(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, "true");
// dfs.block.access.token.enable
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this comment line doesn't seem to add anything meaningful

cfg.set(DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY, "true");
cfg.set(DFSConfigKeys.IGNORE_SECURE_PORTS_FOR_TESTING_KEY, "true");
}

Expand Down