From 4802fec11226956bcc8a4f994c497672bf2ce911 Mon Sep 17 00:00:00 2001 From: Linary Date: Wed, 4 Nov 2020 17:33:38 +0800 Subject: [PATCH] Adapt snappy-raw compression read (#119) * adapt afs * comment afs dependency Change-Id: Ie38ba73573d12edcfd7eaac22a3b041c781dc7f6 --- assembly/static/bin/hugegraph-loader.sh | 2 ++ pom.xml | 18 +++++++++++++++++- .../loader/reader/file/FileLineFetcher.java | 13 ++++++++++++- .../loader/test/functional/FileLoadTest.java | 5 +---- 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/assembly/static/bin/hugegraph-loader.sh b/assembly/static/bin/hugegraph-loader.sh index 1100c8719..0cd9dc20f 100755 --- a/assembly/static/bin/hugegraph-loader.sh +++ b/assembly/static/bin/hugegraph-loader.sh @@ -14,6 +14,7 @@ BIN=`abs_path` TOP="$(cd ${BIN}/../ && pwd)" CONF="$TOP/conf" LIB="$TOP/lib" +NATIVE="$TOP/native" LOG="$TOP/logs" # Use the unofficial bash strict mode to avoid subtle bugs impossible. @@ -61,4 +62,5 @@ export JVM_OPTS="$JVM_OPTS -Xmx10g -cp $LOADER_CLASSPATH" #JVM_OPTS="$JVM_OPTS -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=1414" exec "$JAVA" -Dname="HugeGraphLoader" -Dlog4j.configurationFile="$CONF/log4j2.xml" \ +-Djava.library.path=${NATIVE} \ ${JVM_OPTS} com.baidu.hugegraph.loader.HugeGraphLoader ${VARS} diff --git a/pom.xml b/pom.xml index f809eed01..ed471ed70 100644 --- a/pom.xml +++ b/pom.xml @@ -154,6 +154,13 @@ hadoop-hdfs-client 2.8.5 + org.apache.hadoop hadoop-mapred @@ -172,6 +179,11 @@ commons-compress 1.19 + + org.apache.httpcomponents + httpclient + 4.5.10 + org.tukaani xz @@ -210,6 +222,10 @@ org.slf4j slf4j-api + + org.slf4j + slf4j-log4j12 + org.apache.logging.log4j log4j-slf4j-impl @@ -275,8 +291,8 @@ parquet-column - libfb303 org.apache.thrift + libfb303 diff --git a/src/main/java/com/baidu/hugegraph/loader/reader/file/FileLineFetcher.java b/src/main/java/com/baidu/hugegraph/loader/reader/file/FileLineFetcher.java index 7ce1dabf2..1be72eec3 100644 --- a/src/main/java/com/baidu/hugegraph/loader/reader/file/FileLineFetcher.java +++ b/src/main/java/com/baidu/hugegraph/loader/reader/file/FileLineFetcher.java @@ -33,6 +33,11 @@ import org.apache.commons.compress.compressors.CompressorInputStream; import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.apache.hadoop.io.compress.SnappyCodec; +import org.apache.hadoop.util.ReflectionUtils; import org.slf4j.Logger; import com.baidu.hugegraph.loader.exception.LoadException; @@ -252,11 +257,17 @@ private static Reader createCompressReader(InputStream stream, switch (compression) { case NONE: return new InputStreamReader(stream, charset); + case SNAPPY_RAW: + Configuration config = new Configuration(); + CompressionCodec codec = ReflectionUtils.newInstance( + SnappyCodec.class, config); + CompressionInputStream sis = codec.createInputStream( + stream, codec.createDecompressor()); + return new InputStreamReader(sis, charset); case GZIP: case BZ2: case XZ: case LZMA: - case SNAPPY_RAW: case SNAPPY_FRAMED: case Z: case DEFLATE: diff --git a/src/test/java/com/baidu/hugegraph/loader/test/functional/FileLoadTest.java b/src/test/java/com/baidu/hugegraph/loader/test/functional/FileLoadTest.java index 1a54aa8cb..4647993d5 100644 --- a/src/test/java/com/baidu/hugegraph/loader/test/functional/FileLoadTest.java +++ b/src/test/java/com/baidu/hugegraph/loader/test/functional/FileLoadTest.java @@ -1683,10 +1683,7 @@ public void testLZMACompressFile() { Assert.assertEquals(1, vertices.size()); } - /** - * Didn't find a way to generate the compression file using code - */ - //@Test +// @Test public void testSnappyRawCompressFile() { ioUtil.write("vertex_person.snappy", Compression.SNAPPY_RAW, "name,age,city",