diff --git a/docs/source/java/cdata.rst b/docs/source/java/cdata.rst index 7e5c2df1c5e47..44e4f230ba65d 100644 --- a/docs/source/java/cdata.rst +++ b/docs/source/java/cdata.rst @@ -33,56 +33,57 @@ Python communication using the C Data Interface. Java to C++ ----------- -Example: Share an Int64 array from C++ to Java: - -**C++ Side** - See :doc:`../developers/cpp/building` to build the Arrow C++ libraries: .. code-block:: shell - $ git clone https://github.com/apache/arrow.git - $ cd arrow/cpp - $ mkdir build # from inside the `cpp` subdirectory - $ cd build - $ cmake .. --preset ninja-debug-minimal - $ cmake --build . - $ tree debug/ - debug/ - ├── libarrow.800.0.0.dylib - ├── libarrow.800.dylib -> libarrow.800.0.0.dylib - └── libarrow.dylib -> libarrow.800.dylib + $ git clone https://github.com/apache/arrow.git + $ cd arrow/cpp + $ mkdir build # from inside the `cpp` subdirectory + $ cd build + $ cmake .. --preset ninja-debug-minimal + $ cmake --build . + $ tree debug/ + debug/ + ├── libarrow.800.0.0.dylib + ├── libarrow.800.dylib -> libarrow.800.0.0.dylib + └── libarrow.dylib -> libarrow.800.dylib + +Share an Int64 array from C++ to Java +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**C++ Side** Implement a function in CDataCppBridge.h that exports an array via the C Data Interface: .. code-block:: cpp - #include - #include - #include - - void FillInt64Array(const uintptr_t c_schema_ptr, const uintptr_t c_array_ptr) { - arrow::Int64Builder builder; - builder.Append(1); - builder.Append(2); - builder.Append(3); - builder.AppendNull(); - builder.Append(5); - builder.Append(6); - builder.Append(7); - builder.Append(8); - builder.Append(9); - builder.Append(10); - std::shared_ptr array = *builder.Finish(); - - struct ArrowSchema* c_schema = reinterpret_cast(c_schema_ptr); - auto c_schema_status = arrow::ExportType(*array->type(), c_schema); - if (!c_schema_status.ok()) c_schema_status.Abort(); - - struct ArrowArray* c_array = reinterpret_cast(c_array_ptr); - auto c_array_status = arrow::ExportArray(*array, c_array); - if (!c_array_status.ok()) c_array_status.Abort(); - } + #include + #include + #include + + void FillInt64Array(const uintptr_t c_schema_ptr, const uintptr_t c_array_ptr) { + arrow::Int64Builder builder; + builder.Append(1); + builder.Append(2); + builder.Append(3); + builder.AppendNull(); + builder.Append(5); + builder.Append(6); + builder.Append(7); + builder.Append(8); + builder.Append(9); + builder.Append(10); + std::shared_ptr array = *builder.Finish(); + + struct ArrowSchema* c_schema = reinterpret_cast(c_schema_ptr); + auto c_schema_status = arrow::ExportType(*array->type(), c_schema); + if (!c_schema_status.ok()) c_schema_status.Abort(); + + struct ArrowArray* c_array = reinterpret_cast(c_array_ptr); + auto c_array_status = arrow::ExportArray(*array, c_array); + if (!c_array_status.ok()) c_array_status.Abort(); + } **Java Side** @@ -91,98 +92,98 @@ without writing JNI bindings ourselves. .. code-block:: xml - - - 4.0.0 - - org.example - java-cdata-example - 1.0-SNAPSHOT - - - 8 - 8 - 8.0.0 - - - - org.bytedeco - javacpp - 1.5.7 - - - org.apache.arrow - arrow-c-data - ${arrow.version} - - - org.apache.arrow - arrow-vector - ${arrow.version} - - - org.apache.arrow - arrow-memory-core - ${arrow.version} - - - org.apache.arrow - arrow-memory-netty - ${arrow.version} - - - org.apache.arrow - arrow-format - ${arrow.version} - - - + + + 4.0.0 + + org.example + java-cdata-example + 1.0-SNAPSHOT + + + 8 + 8 + 9.0.0 + + + + org.bytedeco + javacpp + 1.5.7 + + + org.apache.arrow + arrow-c-data + ${arrow.version} + + + org.apache.arrow + arrow-vector + ${arrow.version} + + + org.apache.arrow + arrow-memory-core + ${arrow.version} + + + org.apache.arrow + arrow-memory-netty + ${arrow.version} + + + org.apache.arrow + arrow-format + ${arrow.version} + + + .. code-block:: java - import org.bytedeco.javacpp.annotation.Platform; - import org.bytedeco.javacpp.annotation.Properties; - import org.bytedeco.javacpp.tools.InfoMap; - import org.bytedeco.javacpp.tools.InfoMapper; - - @Properties( - target = "CDataJavaToCppExample", - value = @Platform( - include = { - "CDataCppBridge.h" - }, - compiler = {"cpp11"}, - linkpath = {"/arrow/cpp/build/debug/"}, - link = {"arrow"} - ) - ) - public class CDataJavaConfig implements InfoMapper { - - @Override - public void map(InfoMap infoMap) { - } - } + import org.bytedeco.javacpp.annotation.Platform; + import org.bytedeco.javacpp.annotation.Properties; + import org.bytedeco.javacpp.tools.InfoMap; + import org.bytedeco.javacpp.tools.InfoMapper; + + @Properties( + target = "CDataJavaToCppExample", + value = @Platform( + include = { + "CDataCppBridge.h" + }, + compiler = {"cpp11"}, + linkpath = {"/arrow/cpp/build/debug/"}, + link = {"arrow"} + ) + ) + public class CDataJavaConfig implements InfoMapper { + + @Override + public void map(InfoMap infoMap) { + } + } .. code-block:: shell - # Compile our Java code - $ javac -cp javacpp-1.5.7.jar CDataJavaConfig.java + # Compile our Java code + $ javac -cp javacpp-1.5.7.jar CDataJavaConfig.java - # Generate CDataInterfaceLibrary - $ java -jar javacpp-1.5.7.jar CDataJavaConfig.java + # Generate CDataInterfaceLibrary + $ java -jar javacpp-1.5.7.jar CDataJavaConfig.java - # Generate libjniCDataInterfaceLibrary.dylib - $ java -jar javacpp-1.5.7.jar CDataJavaToCppExample.java + # Generate libjniCDataInterfaceLibrary.dylib + $ java -jar javacpp-1.5.7.jar CDataJavaToCppExample.java - # Validate libjniCDataInterfaceLibrary.dylib created - $ otool -L macosx-x86_64/libjniCDataJavaToCppExample.dylib - macosx-x86_64/libjniCDataJavaToCppExample.dylib: - libjniCDataJavaToCppExample.dylib (compatibility version 0.0.0, current version 0.0.0) - @rpath/libarrow.800.dylib (compatibility version 800.0.0, current version 800.0.0) - /usr/lib/libc++.1.dylib (compatibility version 1.0.0, current version 1200.3.0) - /usr/lib/libSystem.B.dylib (compatibility version 1.0.0, current version 1311.0.0) + # Validate libjniCDataInterfaceLibrary.dylib created + $ otool -L macosx-x86_64/libjniCDataJavaToCppExample.dylib + macosx-x86_64/libjniCDataJavaToCppExample.dylib: + libjniCDataJavaToCppExample.dylib (compatibility version 0.0.0, current version 0.0.0) + @rpath/libarrow.800.dylib (compatibility version 800.0.0, current version 800.0.0) + /usr/lib/libc++.1.dylib (compatibility version 1.0.0, current version 1200.3.0) + /usr/lib/libSystem.B.dylib (compatibility version 1.0.0, current version 1311.0.0) **Java Test** @@ -190,34 +191,280 @@ Let's create a Java class to test our bridge: .. code-block:: java - import org.apache.arrow.c.ArrowArray; - import org.apache.arrow.c.ArrowSchema; - import org.apache.arrow.c.Data; - import org.apache.arrow.memory.BufferAllocator; - import org.apache.arrow.memory.RootAllocator; - import org.apache.arrow.vector.BigIntVector; - - public class TestCDataInterface { - public static void main(String[] args) { - try( - BufferAllocator allocator = new RootAllocator(); - ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator); - ArrowArray arrowArray = ArrowArray.allocateNew(allocator) - ){ - CDataJavaToCppExample.FillInt64Array( - arrowSchema.memoryAddress(), arrowArray.memoryAddress()); - try( - BigIntVector bigIntVector = (BigIntVector) Data.importVector( - allocator, arrowArray, arrowSchema, null) - ){ - System.out.println("C++-allocated array: " + bigIntVector); - } - } - } - } + import org.apache.arrow.c.ArrowArray; + import org.apache.arrow.c.ArrowSchema; + import org.apache.arrow.c.Data; + import org.apache.arrow.memory.BufferAllocator; + import org.apache.arrow.memory.RootAllocator; + import org.apache.arrow.vector.BigIntVector; + + public class TestCDataInterface { + public static void main(String[] args) { + try( + BufferAllocator allocator = new RootAllocator(); + ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator); + ArrowArray arrowArray = ArrowArray.allocateNew(allocator) + ){ + CDataJavaToCppExample.FillInt64Array( + arrowSchema.memoryAddress(), arrowArray.memoryAddress()); + try( + BigIntVector bigIntVector = (BigIntVector) Data.importVector( + allocator, arrowArray, arrowSchema, null) + ){ + System.out.println("C++-allocated array: " + bigIntVector); + } + } + } + } + +.. code-block:: shell + + C++-allocated array: [1, 2, 3, null, 5, 6, 7, 8, 9, 10] + +Share an Int32 array from Java to C++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Java Side** + +For this example, we will build a JAR with all dependencies bundled. + +.. code-block:: xml + + + + 4.0.0 + org.example + cpptojava + 1.0-SNAPSHOT + + 8 + 8 + 9.0.0 + + + + org.apache.arrow + arrow-c-data + ${arrow.version} + + + org.apache.arrow + arrow-memory-netty + ${arrow.version} + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + package + + single + + + + jar-with-dependencies + + + + + + + + + +.. code-block:: java + + import org.apache.arrow.c.ArrowArray; + import org.apache.arrow.c.ArrowSchema; + import org.apache.arrow.c.Data; + import org.apache.arrow.memory.BufferAllocator; + import org.apache.arrow.memory.RootAllocator; + import org.apache.arrow.vector.FieldVector; + import org.apache.arrow.vector.IntVector; + import org.apache.arrow.vector.VectorSchemaRoot; + + import java.util.Arrays; + + public class ToBeCalledByCpp { + final static BufferAllocator allocator = new RootAllocator(); + + /** + * Create a {@link FieldVector} and export it via the C Data Interface + * @param schemaAddress Schema memory address to wrap + * @param arrayAddress Array memory address to wrap + */ + public static void fillVector(long schemaAddress, long arrayAddress){ + try (ArrowArray arrow_array = ArrowArray.wrap(arrayAddress); + ArrowSchema arrow_schema = ArrowSchema.wrap(schemaAddress) ) { + Data.exportVector(allocator, populateFieldVectorToExport(), null, arrow_array, arrow_schema); + } + } + + /** + * Create a {@link VectorSchemaRoot} and export it via the C Data Interface + * @param schemaAddress Schema memory address to wrap + * @param arrayAddress Array memory address to wrap + */ + public static void fillVectorSchemaRoot(long schemaAddress, long arrayAddress){ + try (ArrowArray arrow_array = ArrowArray.wrap(arrayAddress); + ArrowSchema arrow_schema = ArrowSchema.wrap(schemaAddress) ) { + Data.exportVectorSchemaRoot(allocator, populateVectorSchemaRootToExport(), null, arrow_array, arrow_schema); + } + } + + private static FieldVector populateFieldVectorToExport(){ + IntVector intVector = new IntVector("int-to-export", allocator); + intVector.allocateNew(3); + intVector.setSafe(0, 1); + intVector.setSafe(1, 2); + intVector.setSafe(2, 3); + intVector.setValueCount(3); + System.out.println("[Java] FieldVector: \n" + intVector); + return intVector; + } + + private static VectorSchemaRoot populateVectorSchemaRootToExport(){ + IntVector intVector = new IntVector("age-to-export", allocator); + intVector.setSafe(0, 10); + intVector.setSafe(1, 20); + intVector.setSafe(2, 30); + VectorSchemaRoot root = new VectorSchemaRoot(Arrays.asList(intVector)); + root.setRowCount(3); + System.out.println("[Java] VectorSchemaRoot: \n" + root.contentToTSVString()); + return root; + } + } + +Build the JAR and copy it to the C++ project. .. code-block:: shell - C++-allocated array: [1, 2, 3, null, 5, 6, 7, 8, 9, 10] + $ mvn clean install + $ cp target/cpptojava-1.0-SNAPSHOT-jar-with-dependencies.jar /cpptojava.jar + +**C++ Side** + +This application uses JNI to call Java code, but transfers data (zero-copy) via the C Data Interface instead. + +.. code-block:: cpp + + #include + #include + + #include + #include + + JNIEnv *CreateVM(JavaVM **jvm) { + JNIEnv *env; + JavaVMInitArgs vm_args; + JavaVMOption options[2]; + options[0].optionString = "-Djava.class.path=cpptojava.jar"; + options[1].optionString = "-DXcheck:jni:pedantic"; + vm_args.version = JNI_VERSION_1_8; + vm_args.nOptions = 2; + vm_args.options = options; + int status = JNI_CreateJavaVM(jvm, (void **) &env, &vm_args); + if (status < 0) { + std::cerr << "\n<<<<< Unable to Launch JVM >>>>>\n" << std::endl; + return nullptr; + } + return env; + } + + int main() { + JNIEnv *env; + JavaVM *jvm; + env = CreateVM(&jvm); + if (env == nullptr) return EXIT_FAILURE; + jclass javaClassToBeCalledByCpp = env->FindClass("ToBeCalledByCpp"); + if (javaClassToBeCalledByCpp != nullptr) { + jmethodID fillVector = env->GetStaticMethodID(javaClassToBeCalledByCpp, + "fillVector", + "(JJ)V"); + if (fillVector != nullptr) { + struct ArrowSchema arrowSchema; + struct ArrowArray arrowArray; + std::cout << "\n<<<<< C++ to Java for Arrays >>>>>\n" << std::endl; + env->CallStaticVoidMethod(javaClassToBeCalledByCpp, fillVector, + static_cast(reinterpret_cast(&arrowSchema)), + static_cast(reinterpret_cast(&arrowArray))); + auto resultImportArray = arrow::ImportArray(&arrowArray, &arrowSchema); + std::shared_ptr array = resultImportArray.ValueOrDie(); + std::cout << "[C++] Array: " << array->ToString() << std::endl; + } else { + std::cerr << "Could not find fillVector method\n" << std::endl; + return EXIT_FAILURE; + } + jmethodID fillVectorSchemaRoot = env->GetStaticMethodID(javaClassToBeCalledByCpp, + "fillVectorSchemaRoot", + "(JJ)V"); + if (fillVectorSchemaRoot != nullptr) { + struct ArrowSchema arrowSchema; + struct ArrowArray arrowArray; + std::cout << "\n<<<<< C++ to Java for RecordBatch >>>>>\n" << std::endl; + env->CallStaticVoidMethod(javaClassToBeCalledByCpp, fillVectorSchemaRoot, + static_cast(reinterpret_cast(&arrowSchema)), + static_cast(reinterpret_cast(&arrowArray))); + auto resultImportVectorSchemaRoot = arrow::ImportRecordBatch(&arrowArray, &arrowSchema); + std::shared_ptr recordBatch = resultImportVectorSchemaRoot.ValueOrDie(); + std::cout << "[C++] RecordBatch: " << recordBatch->ToString() << std::endl; + } else { + std::cerr << "Could not find fillVectorSchemaRoot method\n" << std::endl; + return EXIT_FAILURE; + } + } else { + std::cout << "Could not find ToBeCalledByCpp class\n" << std::endl; + return EXIT_FAILURE; + } + jvm->DestroyJavaVM(); + return EXIT_SUCCESS; + } + +CMakeLists.txt definition file: + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.19) + project(cdatacpptojava) + find_package(JNI REQUIRED) + find_package(Arrow REQUIRED) + message(STATUS "Arrow version: ${ARROW_VERSION}") + include_directories(${JNI_INCLUDE_DIRS}) + set(CMAKE_CXX_STANDARD 11) + add_executable(${PROJECT_NAME} main.cpp) + target_link_libraries(cdatacpptojava PRIVATE arrow_shared) + target_link_libraries(cdatacpptojava PRIVATE ${JNI_LIBRARIES}) + +**Result** + +.. code-block:: text + + <<<<< C++ to Java for Arrays >>>>> + [Java] FieldVector: + [1, 2, 3] + [C++] Array: [ + 1, + 2, + 3 + ] + + <<<<< C++ to Java for RecordBatch >>>>> + [Java] VectorSchemaRoot: + age-to-export + 10 + 20 + 30 + + [C++] RecordBatch: age-to-export: [ + 10, + 20, + 30 + ] .. _`JavaCPP`: https://github.com/bytedeco/javacpp