Skip to content

Commit

Permalink
ARROW-17883: [Java] implement immutable table (apache#14316)
Browse files Browse the repository at this point in the history
Table is a new immutable tabular data structure based on FieldVectors.

This PR is described in detail in the included README.md file. The original design discussion can be found [here](https://docs.google.com/document/d/1J77irZFWNnSID7vK71z26Nw_Pi99I9Hb9iryno8B03c/edit#heading=h.a1lebwljypq5), if you're interested.

Note to reviewers:
- This is a fairly large change set. Most of the code is in "getters" in the Row class. These methods are fairly well covered by tests, but it would be good to have someone look especially at the complex vector types. 
- The only changes to existing classes were three new export methods added to the Data class. These use the logic for exporting VectorSchemaRoots. 

Lead-authored-by: Larry White <[email protected]>
Co-authored-by: Larry White <[email protected]>
Signed-off-by: David Li <[email protected]>
  • Loading branch information
2 people authored and fatemehp committed Oct 17, 2022
1 parent f757157 commit 387980e
Show file tree
Hide file tree
Showing 13 changed files with 4,702 additions and 8 deletions.
59 changes: 57 additions & 2 deletions java/c/src/main/java/org/apache/arrow/c/Data.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.arrow.vector.dictionary.DictionaryProvider;
import org.apache.arrow.vector.ipc.ArrowReader;
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
import org.apache.arrow.vector.table.Table;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID;
import org.apache.arrow.vector.types.pojo.Field;
Expand Down Expand Up @@ -114,22 +115,76 @@ public static void exportVector(BufferAllocator allocator, FieldVector vector, D
exporter.export(out, vector, provider);
}

/**
* Export the current contents of a Java Table using the C data
* interface format.
* <p>
* The table is exported as if it were a struct array. The
* resulting ArrowArray struct keeps the record batch data and buffers alive
* until its release callback is called by the consumer.
*
* @param allocator Buffer allocator for allocating C data interface fields
* @param table Table to export
* @param out C struct where to export the record batch
*/
public static void exportTable(BufferAllocator allocator, Table table, ArrowArray out) {
exportTable(allocator, table, table.getDictionaryProvider(), out, null);
}

/**
* Export the current contents of a Java Table using the C data
* interface format.
* <p>
* The table is exported as if it were a struct array. The
* resulting ArrowArray struct keeps the record batch data and buffers alive
* until its release callback is called by the consumer.
*
* @param allocator Buffer allocator for allocating C data interface fields
* @param table Table to export
* @param provider Dictionary provider for dictionary encoded vectors
* (optional)
* @param out C struct where to export the record batch
*/
public static void exportTable(BufferAllocator allocator, Table table,
DictionaryProvider provider, ArrowArray out) {
exportTable(allocator, table, provider, out, null);
}

/**
* Export the current contents of a Java Table using the C data interface format.
* <p>
* The table is exported as if it were a struct array. The
* resulting ArrowArray struct keeps the record batch data and buffers alive
* until its release callback is called by the consumer.
*
* @param allocator Buffer allocator for allocating C data interface fields
* @param table Table to export
* @param provider Dictionary provider for dictionary encoded vectors
* (optional)
* @param out C struct where to export the record batch
* @param outSchema C struct where to export the record batch schema (optional)
*/
public static void exportTable(BufferAllocator allocator, Table table,
DictionaryProvider provider, ArrowArray out, ArrowSchema outSchema) {
exportVectorSchemaRoot(allocator, table.toVectorSchemaRoot(), provider, out, outSchema);
}

/**
* Export the current contents of a Java VectorSchemaRoot using the C data
* interface format.
* <p>
* The vector schema root is exported as if it were a struct array. The
* resulting ArrowArray struct keeps the record batch data and buffers alive
* until its release callback is called by the consumer.
*
*
* @param allocator Buffer allocator for allocating C data interface fields
* @param vsr Vector schema root to export
* @param provider Dictionary provider for dictionary encoded vectors
* (optional)
* @param out C struct where to export the record batch
*/
public static void exportVectorSchemaRoot(BufferAllocator allocator, VectorSchemaRoot vsr,
DictionaryProvider provider, ArrowArray out) {
DictionaryProvider provider, ArrowArray out) {
exportVectorSchemaRoot(allocator, vsr, provider, out, null);
}

Expand Down
34 changes: 34 additions & 0 deletions java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
import org.apache.arrow.vector.holders.IntervalDayHolder;
import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder;
import org.apache.arrow.vector.holders.NullableUInt4Holder;
import org.apache.arrow.vector.table.Table;
import org.apache.arrow.vector.types.TimeUnit;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.ArrowType;
Expand Down Expand Up @@ -657,6 +658,39 @@ public void testVectorSchemaRoot() {
imported.close();
}

/**
* Tests exporting Table and importing back to VSR. Importing back to Table is not supported at present.
*/
@Test
public void testTable() {
VectorSchemaRoot imported;

// Consumer allocates empty structures
try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator);
ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) {
try (
VectorSchemaRoot vsr = createTestVSR();
Table table = new Table(vsr);
) {
// Producer creates structures from existing memory pointers
try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress());
ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) {
// Producer exports vector into the C Data Interface structures
Data.exportTable(allocator, table, arrowArray);
}
}
// Consumer imports vector
imported = Data.importVectorSchemaRoot(allocator, consumerArrowArray, consumerArrowSchema, null);
}

// Ensure that imported VectorSchemaRoot is valid even after C Data Interface
// structures are closed
try (VectorSchemaRoot original = createTestVSR()) {
assertTrue(imported.equals(original));
}
imported.close();
}

@Test
public void testVectorSchemaRootWithDuplicatedFieldNames() {
VectorSchemaRoot imported;
Expand Down
10 changes: 5 additions & 5 deletions java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,9 @@
<version>8.19</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
<version>1.7.5</version>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
<version>1.7.5</version>
</dependency>
</dependencies>
<executions>
Expand Down Expand Up @@ -749,7 +749,7 @@
<!-- Use the version of arrow-vector that shades flatbuffers and packages format -->
<id>shade-flatbuffers</id>
<properties>
<arrow.vector.classifier>shade-format-flatbuffers</arrow.vector.classifier>
<arrow.vector.classifier>shade-format-flatbuffers</arrow.vector.classifier>
</properties>
</profile>

Expand All @@ -763,7 +763,7 @@
<activation>
<jdk>1.8</jdk>
<property>
<name>!m2e.version</name>
<name>!m2e.version</name>
</property>
</activation>
<build>
Expand Down
Loading

0 comments on commit 387980e

Please sign in to comment.