diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index 9eadf896888a5..4d5df9c45e7ac 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -109,6 +109,38 @@
jackson-databind
test
+
+ org.apache.arrow.orc
+ arrow-orc
+ ${project.version}
+ test
+
+
+ org.apache.orc
+ orc-core
+ 1.7.6
+ test
+
+
+ log4j
+ log4j
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ commons-logging
+ commons-logging
+
+
+
+
+ org.apache.hive
+ hive-storage-api
+ 2.8.1
+ test
+
diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc
index d088163903457..ef9178b1b5d4f 100644
--- a/java/dataset/src/main/cpp/jni_wrapper.cc
+++ b/java/dataset/src/main/cpp/jni_wrapper.cc
@@ -91,6 +91,8 @@ arrow::Result> GetFileFormat(
return std::make_shared();
case 1:
return std::make_shared();
+ case 2:
+ return std::make_shared();
default:
std::string error_message =
"illegal file format id: " + std::to_string(file_format_id);
diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java b/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java
index 343e458ce23a9..b428b254b10ca 100644
--- a/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java
@@ -23,6 +23,7 @@
public enum FileFormat {
PARQUET(0),
ARROW_IPC(1),
+ ORC(2),
NONE(-1);
private final int id;
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/OrcWriteSupport.java b/java/dataset/src/test/java/org/apache/arrow/dataset/OrcWriteSupport.java
new file mode 100644
index 0000000000000..c49612995ee1e
--- /dev/null
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/OrcWriteSupport.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.orc.OrcFile;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+
+public class OrcWriteSupport {
+ public static void writeTempFile(TypeDescription orcSchema, Path path, Integer[] values) throws IOException {
+ Writer writer = OrcFile.createWriter(path, OrcFile.writerOptions(new Configuration()).setSchema(orcSchema));
+ VectorizedRowBatch batch = orcSchema.createRowBatch();
+ LongColumnVector longColumnVector = (LongColumnVector) batch.cols[0];
+ for (int idx = 0; idx < values.length; idx++) {
+ longColumnVector.vector[idx] = values[idx];
+ }
+ batch.size = values.length;
+ writer.addRowBatch(batch);
+ writer.close();
+ }
+}
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java
index 2fd8a19bac1f1..b8d51a3edb169 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java
@@ -37,6 +37,7 @@
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
+import org.apache.arrow.dataset.OrcWriteSupport;
import org.apache.arrow.dataset.ParquetWriteSupport;
import org.apache.arrow.dataset.jni.NativeDataset;
import org.apache.arrow.dataset.jni.NativeInstanceReleasedException;
@@ -59,6 +60,8 @@
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.GenericRecordBuilder;
+import org.apache.hadoop.fs.Path;
+import org.apache.orc.TypeDescription;
import org.junit.Assert;
import org.junit.ClassRule;
import org.junit.Test;
@@ -357,6 +360,34 @@ public void testBaseArrowIpcRead() throws Exception {
AutoCloseables.close(factory);
}
+ @Test
+ public void testBaseOrcRead() throws Exception {
+ String dataName = "test-orc";
+ String basePath = TMP.getRoot().getAbsolutePath();
+
+ TypeDescription orcSchema = TypeDescription.fromString("struct");
+ Path path = new Path(basePath, dataName);
+ OrcWriteSupport.writeTempFile(orcSchema, path, new Integer[]{Integer.MIN_VALUE, Integer.MAX_VALUE});
+
+ String orcDatasetUri = new File(basePath, dataName).toURI().toString();
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(),
+ FileFormat.ORC, orcDatasetUri);
+ ScanOptions options = new ScanOptions(100);
+ Schema schema = inferResultSchemaFromFactory(factory, options);
+ List datum = collectResultFromFactory(factory, options);
+
+ assertSingleTaskProduced(factory, options);
+ assertEquals(1, datum.size());
+ assertEquals(1, schema.getFields().size());
+ assertEquals("ints", schema.getFields().get(0).getName());
+
+ String expectedJsonUnordered = "[[2147483647], [-2147483648]]";
+ checkParquetReadResult(schema, expectedJsonUnordered, datum);
+
+ AutoCloseables.close(datum);
+ AutoCloseables.close(factory);
+ }
+
private void checkParquetReadResult(Schema schema, String expectedJson, List actual)
throws IOException {
final ObjectMapper json = new ObjectMapper();