From 4b1e17824b10fd158335f4b361e4b4bc58830a37 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 26 Nov 2024 19:23:25 -0500
Subject: [PATCH] GH-6: Add Linux test CI

Fixes #6.
---
 .env                                          |   52 +
 .github/workflows/java.yml                    |   73 +
 .gitignore                                    |    1 +
 .gitmodules                                   |    3 +
 arrow-format/File.fbs                         |   52 +
 arrow-format/Flight.proto                     |  645 ++++++
 arrow-format/FlightSql.proto                  | 1925 +++++++++++++++++
 arrow-format/Message.fbs                      |  157 ++
 arrow-format/README.rst                       |   25 +
 arrow-format/Schema.fbs                       |  571 +++++
 arrow-format/SparseTensor.fbs                 |  228 ++
 arrow-format/Tensor.fbs                       |   54 +
 arrow-format/substrait/extension_types.yaml   |  170 ++
 ci/scripts/java_build.sh                      |   78 +
 ci/scripts/java_test.sh                       |   55 +
 dataset/pom.xml                               |    2 +-
 docker-compose.yml                            |   47 +
 flight/flight-core/pom.xml                    |    4 +-
 flight/flight-sql-jdbc-core/pom.xml           |    2 +-
 testing                                       |    1 +
 .../apache/arrow/tools/TestIntegration.java   |   20 +-
 .../resources/integration_json_simple.json    |   98 +
 .../resources/integration_json_struct.json    |  201 ++
 .../apache/arrow/vector/ipc/TestJSONFile.java |    8 +-
 .../resources/integration_json_struct.json    |  201 ++
 25 files changed, 4653 insertions(+), 20 deletions(-)
 create mode 100644 .env
 create mode 100644 .github/workflows/java.yml
 create mode 100644 .gitmodules
 create mode 100644 arrow-format/File.fbs
 create mode 100644 arrow-format/Flight.proto
 create mode 100644 arrow-format/FlightSql.proto
 create mode 100644 arrow-format/Message.fbs
 create mode 100644 arrow-format/README.rst
 create mode 100644 arrow-format/Schema.fbs
 create mode 100644 arrow-format/SparseTensor.fbs
 create mode 100644 arrow-format/Tensor.fbs
 create mode 100644 arrow-format/substrait/extension_types.yaml
 create mode 100755 ci/scripts/java_build.sh
 create mode 100755 ci/scripts/java_test.sh
 create mode 100644 docker-compose.yml
 create mode 160000 testing
 create mode 100644 tools/src/test/resources/integration_json_simple.json
 create mode 100644 tools/src/test/resources/integration_json_struct.json
 create mode 100644 vector/src/test/resources/integration_json_struct.json

diff --git a/.env b/.env
new file mode 100644
index 000000000..959b1bfc2
--- /dev/null
+++ b/.env
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# All of the following environment variables are required to set default values
+# for the parameters in docker-compose.yml.
+
+# empty prefix means that the docker-compose configuration will use named
+# volumes which potentially improves the performance on docker for macos and
+# docker for windows, it also prevents the contamination of the source
+# directory
+# a non-empty prefix means that directories from the host are bind-mounted
+# into the container, it should be set to ".docker/" on github actions to keep
+# the cache plugin functional
+DOCKER_VOLUME_PREFIX=
+
+# turn on inline build cache, this is a docker buildx feature documented
+# at https://github.com/docker/buildx#--cache-tonametypetypekeyvalue
+BUILDKIT_INLINE_CACHE=1
+COMPOSE_DOCKER_CLI_BUILD=1
+DOCKER_BUILDKIT=1
+
+# different architecture notations
+ARCH=amd64
+ARCH_ALIAS=x86_64
+ARCH_SHORT=amd64
+
+# Default repository to pull and push images from
+REPO=apache/arrow-dev
+
+# The setup attempts to generate coredumps by default, in order to disable the
+# coredump generation set it to 0
+ULIMIT_CORE=-1
+
+# Default versions for platforms
+
+# Default versions for various dependencies
+JDK=11
+MAVEN=3.9.6
diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
new file mode 100644
index 000000000..8641059ff
--- /dev/null
+++ b/.github/workflows/java.yml
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Test
+
+on:
+  push:
+    branches:
+      - '**'
+      - '!dependabot/**'
+    tags:
+      - '**'
+  pull_request:
+
+concurrency:
+  group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+env:
+  DOCKER_VOLUME_PREFIX: ".docker/"
+
+jobs:
+  ubuntu:
+    name: AMD64 Ubuntu 22.04 JDK ${{ matrix.jdk }} Maven ${{ matrix.maven }}
+    runs-on: ubuntu-latest
+    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        jdk: [11, 17, 21, 22]
+        maven: [3.9.6]
+        image: [java]
+    env:
+      JDK: ${{ matrix.jdk }}
+      MAVEN: ${{ matrix.maven }}
+    steps:
+      - name: Checkout Arrow
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
+        with:
+          fetch-depth: 0
+          submodules: recursive
+      - name: Cache Docker Volumes
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
+        with:
+          path: .docker
+          key: maven-${{ hashFiles('java/**') }}
+          restore-keys: maven-
+      - name: Execute Docker Build
+        env:
+          DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
+        run: |
+          docker compose run \
+            -e CI=true \
+            -e "DEVELOCITY_ACCESS_KEY=$DEVELOCITY_ACCESS_KEY" \
+            ${{ matrix.image }}
diff --git a/.gitignore b/.gitignore
index 63c90af7b..80722af45 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,4 @@ arrow-git.properties
 cmake_install.cmake
 install_manifest.txt
 target/
+/.mvn/.develocity/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..71139e3e3
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "testing"]
+	path = testing
+	url = git@github.com:apache/arrow-testing
diff --git a/arrow-format/File.fbs b/arrow-format/File.fbs
new file mode 100644
index 000000000..906d494f2
--- /dev/null
+++ b/arrow-format/File.fbs
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+include "Schema.fbs";
+
+namespace org.apache.arrow.flatbuf;
+
+/// ----------------------------------------------------------------------
+/// Arrow File metadata
+///
+
+table Footer {
+  version: org.apache.arrow.flatbuf.MetadataVersion;
+
+  schema: org.apache.arrow.flatbuf.Schema;
+
+  dictionaries: [ Block ];
+
+  recordBatches: [ Block ];
+
+  /// User-defined metadata
+  custom_metadata: [ KeyValue ];
+}
+
+struct Block {
+
+  /// Index to the start of the RecordBlock (note this is past the Message header)
+  offset: long;
+
+  /// Length of the metadata
+  metaDataLength: int;
+
+  /// Length of the data (this is aligned so there can be a gap between this and
+  /// the metadata).
+  bodyLength: long;
+}
+
+root_type Footer;
diff --git a/arrow-format/Flight.proto b/arrow-format/Flight.proto
new file mode 100644
index 000000000..f2b0f889c
--- /dev/null
+++ b/arrow-format/Flight.proto
@@ -0,0 +1,645 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+import "google/protobuf/timestamp.proto";
+
+option java_package = "org.apache.arrow.flight.impl";
+option go_package = "github.com/apache/arrow-go/arrow/flight/gen/flight";
+option csharp_namespace = "Apache.Arrow.Flight.Protocol";
+
+package arrow.flight.protocol;
+
+/*
+ * A flight service is an endpoint for retrieving or storing Arrow data. A
+ * flight service can expose one or more predefined endpoints that can be
+ * accessed using the Arrow Flight Protocol. Additionally, a flight service
+ * can expose a set of actions that are available.
+ */
+service FlightService {
+
+  /*
+   * Handshake between client and server. Depending on the server, the
+   * handshake may be required to determine the token that should be used for
+   * future operations. Both request and response are streams to allow multiple
+   * round-trips depending on auth mechanism.
+   */
+  rpc Handshake(stream HandshakeRequest) returns (stream HandshakeResponse) {}
+
+  /*
+   * Get a list of available streams given a particular criteria. Most flight
+   * services will expose one or more streams that are readily available for
+   * retrieval. This api allows listing the streams available for
+   * consumption. A user can also provide a criteria. The criteria can limit
+   * the subset of streams that can be listed via this interface. Each flight
+   * service allows its own definition of how to consume criteria.
+   */
+  rpc ListFlights(Criteria) returns (stream FlightInfo) {}
+
+  /*
+   * For a given FlightDescriptor, get information about how the flight can be
+   * consumed. This is a useful interface if the consumer of the interface
+   * already can identify the specific flight to consume. This interface can
+   * also allow a consumer to generate a flight stream through a specified
+   * descriptor. For example, a flight descriptor might be something that
+   * includes a SQL statement or a Pickled Python operation that will be
+   * executed. In those cases, the descriptor will not be previously available
+   * within the list of available streams provided by ListFlights but will be
+   * available for consumption for the duration defined by the specific flight
+   * service.
+   */
+  rpc GetFlightInfo(FlightDescriptor) returns (FlightInfo) {}
+
+  /*
+   * For a given FlightDescriptor, start a query and get information
+   * to poll its execution status. This is a useful interface if the
+   * query may be a long-running query. The first PollFlightInfo call
+   * should return as quickly as possible. (GetFlightInfo doesn't
+   * return until the query is complete.)
+   *
+   * A client can consume any available results before
+   * the query is completed. See PollInfo.info for details.
+   *
+   * A client can poll the updated query status by calling
+   * PollFlightInfo() with PollInfo.flight_descriptor. A server
+   * should not respond until the result would be different from last
+   * time. That way, the client can "long poll" for updates
+   * without constantly making requests. Clients can set a short timeout
+   * to avoid blocking calls if desired.
+   *
+   * A client can't use PollInfo.flight_descriptor after
+   * PollInfo.expiration_time passes. A server might not accept the
+   * retry descriptor anymore and the query may be cancelled.
+   *
+   * A client may use the CancelFlightInfo action with
+   * PollInfo.info to cancel the running query.
+   */
+  rpc PollFlightInfo(FlightDescriptor) returns (PollInfo) {}
+
+  /*
+   * For a given FlightDescriptor, get the Schema as described in Schema.fbs::Schema
+   * This is used when a consumer needs the Schema of flight stream. Similar to
+   * GetFlightInfo this interface may generate a new flight that was not previously
+   * available in ListFlights.
+   */
+   rpc GetSchema(FlightDescriptor) returns (SchemaResult) {}
+
+  /*
+   * Retrieve a single stream associated with a particular descriptor
+   * associated with the referenced ticket. A Flight can be composed of one or
+   * more streams where each stream can be retrieved using a separate opaque
+   * ticket that the flight service uses for managing a collection of streams.
+   */
+  rpc DoGet(Ticket) returns (stream FlightData) {}
+
+  /*
+   * Push a stream to the flight service associated with a particular
+   * flight stream. This allows a client of a flight service to upload a stream
+   * of data. Depending on the particular flight service, a client consumer
+   * could be allowed to upload a single stream per descriptor or an unlimited
+   * number. In the latter, the service might implement a 'seal' action that
+   * can be applied to a descriptor once all streams are uploaded.
+   */
+  rpc DoPut(stream FlightData) returns (stream PutResult) {}
+
+  /*
+   * Open a bidirectional data channel for a given descriptor. This
+   * allows clients to send and receive arbitrary Arrow data and
+   * application-specific metadata in a single logical stream. In
+   * contrast to DoGet/DoPut, this is more suited for clients
+   * offloading computation (rather than storage) to a Flight service.
+   */
+  rpc DoExchange(stream FlightData) returns (stream FlightData) {}
+
+  /*
+   * Flight services can support an arbitrary number of simple actions in
+   * addition to the possible ListFlights, GetFlightInfo, DoGet, DoPut
+   * operations that are potentially available. DoAction allows a flight client
+   * to do a specific action against a flight service. An action includes
+   * opaque request and response objects that are specific to the type action
+   * being undertaken.
+   */
+  rpc DoAction(Action) returns (stream Result) {}
+
+  /*
+   * A flight service exposes all of the available action types that it has
+   * along with descriptions. This allows different flight consumers to
+   * understand the capabilities of the flight service.
+   */
+  rpc ListActions(Empty) returns (stream ActionType) {}
+}
+
+/*
+ * The request that a client provides to a server on handshake.
+ */
+message HandshakeRequest {
+
+  /*
+   * A defined protocol version
+   */
+  uint64 protocol_version = 1;
+
+  /*
+   * Arbitrary auth/handshake info.
+   */
+  bytes payload = 2;
+}
+
+message HandshakeResponse {
+
+  /*
+   * A defined protocol version
+   */
+  uint64 protocol_version = 1;
+
+  /*
+   * Arbitrary auth/handshake info.
+   */
+  bytes payload = 2;
+}
+
+/*
+ * A message for doing simple auth.
+ */
+message BasicAuth {
+  string username = 2;
+  string password = 3;
+}
+
+message Empty {}
+
+/*
+ * Describes an available action, including both the name used for execution
+ * along with a short description of the purpose of the action.
+ */
+message ActionType {
+  string type = 1;
+  string description = 2;
+}
+
+/*
+ * A service specific expression that can be used to return a limited set
+ * of available Arrow Flight streams.
+ */
+message Criteria {
+  bytes expression = 1;
+}
+
+/*
+ * An opaque action specific for the service.
+ */
+message Action {
+  string type = 1;
+  bytes body = 2;
+}
+
+/*
+ * An opaque result returned after executing an action.
+ */
+message Result {
+  bytes body = 1;
+}
+
+/*
+ * Wrap the result of a getSchema call
+ */
+message SchemaResult {
+  // The schema of the dataset in its IPC form:
+  //   4 bytes - an optional IPC_CONTINUATION_TOKEN prefix
+  //   4 bytes - the byte length of the payload
+  //   a flatbuffer Message whose header is the Schema
+  bytes schema = 1;
+}
+
+/*
+ * The name or tag for a Flight. May be used as a way to retrieve or generate
+ * a flight or be used to expose a set of previously defined flights.
+ */
+message FlightDescriptor {
+
+  /*
+   * Describes what type of descriptor is defined.
+   */
+  enum DescriptorType {
+
+    // Protobuf pattern, not used.
+    UNKNOWN = 0;
+
+    /*
+     * A named path that identifies a dataset. A path is composed of a string
+     * or list of strings describing a particular dataset. This is conceptually
+     *  similar to a path inside a filesystem.
+     */
+    PATH = 1;
+
+    /*
+     * An opaque command to generate a dataset.
+     */
+    CMD = 2;
+  }
+
+  DescriptorType type = 1;
+
+  /*
+   * Opaque value used to express a command. Should only be defined when
+   * type = CMD.
+   */
+  bytes cmd = 2;
+
+  /*
+   * List of strings identifying a particular dataset. Should only be defined
+   * when type = PATH.
+   */
+  repeated string path = 3;
+}
+
+/*
+ * The access coordinates for retrieval of a dataset. With a FlightInfo, a
+ * consumer is able to determine how to retrieve a dataset.
+ */
+message FlightInfo {
+  // The schema of the dataset in its IPC form:
+  //   4 bytes - an optional IPC_CONTINUATION_TOKEN prefix
+  //   4 bytes - the byte length of the payload
+  //   a flatbuffer Message whose header is the Schema
+  bytes schema = 1;
+
+  /*
+   * The descriptor associated with this info.
+   */
+  FlightDescriptor flight_descriptor = 2;
+
+  /*
+   * A list of endpoints associated with the flight. To consume the
+   * whole flight, all endpoints (and hence all Tickets) must be
+   * consumed. Endpoints can be consumed in any order.
+   *
+   * In other words, an application can use multiple endpoints to
+   * represent partitioned data.
+   *
+   * If the returned data has an ordering, an application can use
+   * "FlightInfo.ordered = true" or should return the all data in a
+   * single endpoint. Otherwise, there is no ordering defined on
+   * endpoints or the data within.
+   *
+   * A client can read ordered data by reading data from returned
+   * endpoints, in order, from front to back.
+   *
+   * Note that a client may ignore "FlightInfo.ordered = true". If an
+   * ordering is important for an application, an application must
+   * choose one of them:
+   *
+   * * An application requires that all clients must read data in
+   *   returned endpoints order.
+   * * An application must return the all data in a single endpoint.
+   */
+  repeated FlightEndpoint endpoint = 3;
+
+  // Set these to -1 if unknown.
+  int64 total_records = 4;
+  int64 total_bytes = 5;
+
+  /*
+   * FlightEndpoints are in the same order as the data.
+   */
+  bool ordered = 6;
+
+  /*
+   * Application-defined metadata.
+   *
+   * There is no inherent or required relationship between this
+   * and the app_metadata fields in the FlightEndpoints or resulting
+   * FlightData messages. Since this metadata is application-defined,
+   * a given application could define there to be a relationship,
+   * but there is none required by the spec.
+   */
+  bytes app_metadata = 7;
+}
+
+/*
+ * The information to process a long-running query.
+ */
+message PollInfo {
+  /*
+   * The currently available results.
+   *
+   * If "flight_descriptor" is not specified, the query is complete
+   * and "info" specifies all results. Otherwise, "info" contains
+   * partial query results.
+   *
+   * Note that each PollInfo response contains a complete
+   * FlightInfo (not just the delta between the previous and current
+   * FlightInfo).
+   *
+   * Subsequent PollInfo responses may only append new endpoints to
+   * info.
+   *
+   * Clients can begin fetching results via DoGet(Ticket) with the
+   * ticket in the info before the query is
+   * completed. FlightInfo.ordered is also valid.
+   */
+  FlightInfo info = 1;
+
+  /*
+   * The descriptor the client should use on the next try.
+   * If unset, the query is complete.
+   */
+  FlightDescriptor flight_descriptor = 2;
+
+  /*
+   * Query progress. If known, must be in [0.0, 1.0] but need not be
+   * monotonic or nondecreasing. If unknown, do not set.
+   */
+  optional double progress = 3;
+
+  /*
+   * Expiration time for this request. After this passes, the server
+   * might not accept the retry descriptor anymore (and the query may
+   * be cancelled). This may be updated on a call to PollFlightInfo.
+   */
+  google.protobuf.Timestamp expiration_time = 4;
+}
+
+/*
+ * The request of the CancelFlightInfo action.
+ *
+ * The request should be stored in Action.body.
+ */
+message CancelFlightInfoRequest {
+  FlightInfo info = 1;
+}
+
+/*
+ * The result of a cancel operation.
+ *
+ * This is used by CancelFlightInfoResult.status.
+ */
+enum CancelStatus {
+  // The cancellation status is unknown. Servers should avoid using
+  // this value (send a NOT_FOUND error if the requested query is
+  // not known). Clients can retry the request.
+  CANCEL_STATUS_UNSPECIFIED = 0;
+  // The cancellation request is complete. Subsequent requests with
+  // the same payload may return CANCELLED or a NOT_FOUND error.
+  CANCEL_STATUS_CANCELLED = 1;
+  // The cancellation request is in progress. The client may retry
+  // the cancellation request.
+  CANCEL_STATUS_CANCELLING = 2;
+  // The query is not cancellable. The client should not retry the
+  // cancellation request.
+  CANCEL_STATUS_NOT_CANCELLABLE = 3;
+}
+
+/*
+ * The result of the CancelFlightInfo action.
+ *
+ * The result should be stored in Result.body.
+ */
+message CancelFlightInfoResult {
+  CancelStatus status = 1;
+}
+
+/*
+ * An opaque identifier that the service can use to retrieve a particular
+ * portion of a stream.
+ *
+ * Tickets are meant to be single use. It is an error/application-defined
+ * behavior to reuse a ticket.
+ */
+message Ticket {
+  bytes ticket = 1;
+}
+
+/*
+ * A location where a Flight service will accept retrieval of a particular
+ * stream given a ticket.
+ */
+message Location {
+  string uri = 1;
+}
+
+/*
+ * A particular stream or split associated with a flight.
+ */
+message FlightEndpoint {
+
+  /*
+   * Token used to retrieve this stream.
+   */
+  Ticket ticket = 1;
+
+  /*
+   * A list of URIs where this ticket can be redeemed via DoGet().
+   *
+   * If the list is empty, the expectation is that the ticket can only
+   * be redeemed on the current service where the ticket was
+   * generated.
+   *
+   * If the list is not empty, the expectation is that the ticket can be
+   * redeemed at any of the locations, and that the data returned will be
+   * equivalent. In this case, the ticket may only be redeemed at one of the
+   * given locations, and not (necessarily) on the current service. If one
+   * of the given locations is "arrow-flight-reuse-connection://?", the
+   * client may redeem the ticket on the service where the ticket was
+   * generated (i.e., the same as above), in addition to the other
+   * locations. (This URI was chosen to maximize compatibility, as 'scheme:'
+   * or 'scheme://' are not accepted by Java's java.net.URI.)
+   *
+   * In other words, an application can use multiple locations to
+   * represent redundant and/or load balanced services.
+   */
+  repeated Location location = 2;
+
+  /*
+   * Expiration time of this stream. If present, clients may assume
+   * they can retry DoGet requests. Otherwise, it is
+   * application-defined whether DoGet requests may be retried.
+   */
+  google.protobuf.Timestamp expiration_time = 3;
+
+  /*
+   * Application-defined metadata.
+   *
+   * There is no inherent or required relationship between this
+   * and the app_metadata fields in the FlightInfo or resulting
+   * FlightData messages. Since this metadata is application-defined,
+   * a given application could define there to be a relationship,
+   * but there is none required by the spec.
+   */
+  bytes app_metadata = 4;
+}
+
+/*
+ * The request of the RenewFlightEndpoint action.
+ *
+ * The request should be stored in Action.body.
+ */
+message RenewFlightEndpointRequest {
+  FlightEndpoint endpoint = 1;
+}
+
+/*
+ * A batch of Arrow data as part of a stream of batches.
+ */
+message FlightData {
+
+  /*
+   * The descriptor of the data. This is only relevant when a client is
+   * starting a new DoPut stream.
+   */
+  FlightDescriptor flight_descriptor = 1;
+
+  /*
+   * Header for message data as described in Message.fbs::Message.
+   */
+  bytes data_header = 2;
+
+  /*
+   * Application-defined metadata.
+   */
+  bytes app_metadata = 3;
+
+  /*
+   * The actual batch of Arrow data. Preferably handled with minimal-copies
+   * coming last in the definition to help with sidecar patterns (it is
+   * expected that some implementations will fetch this field off the wire
+   * with specialized code to avoid extra memory copies).
+   */
+  bytes data_body = 1000;
+}
+
+/**
+ * The response message associated with the submission of a DoPut.
+ */
+message PutResult {
+  bytes app_metadata = 1;
+}
+
+/*
+ * EXPERIMENTAL: Union of possible value types for a Session Option to be set to.
+ *
+ * By convention, an attempt to set a valueless SessionOptionValue should
+ * attempt to unset or clear the named option value on the server.
+ */
+message SessionOptionValue {
+  message StringListValue {
+    repeated string values = 1;
+  }
+
+  oneof option_value {
+    string string_value = 1;
+    bool bool_value = 2;
+    sfixed64 int64_value = 3;
+    double double_value = 4;
+    StringListValue string_list_value = 5;
+  }
+}
+
+/*
+ * EXPERIMENTAL: A request to set session options for an existing or new (implicit)
+ * server session.
+ *
+ * Sessions are persisted and referenced via a transport-level state management, typically
+ * RFC 6265 HTTP cookies when using an HTTP transport.  The suggested cookie name or state
+ * context key is 'arrow_flight_session_id', although implementations may freely choose their
+ * own name.
+ *
+ * Session creation (if one does not already exist) is implied by this RPC request, however
+ * server implementations may choose to initiate a session that also contains client-provided
+ * session options at any other time, e.g. on authentication, or when any other call is made
+ * and the server wishes to use a session to persist any state (or lack thereof).
+ */
+message SetSessionOptionsRequest {
+  map<string, SessionOptionValue> session_options = 1;
+}
+
+/*
+ * EXPERIMENTAL: The results (individually) of setting a set of session options.
+ *
+ * Option names should only be present in the response if they were not successfully
+ * set on the server; that is, a response without an Error for a name provided in the
+ * SetSessionOptionsRequest implies that the named option value was set successfully.
+ */
+message SetSessionOptionsResult {
+  enum ErrorValue {
+    // Protobuf deserialization fallback value: The status is unknown or unrecognized.
+    // Servers should avoid using this value. The request may be retried by the client.
+    UNSPECIFIED = 0;
+    // The given session option name is invalid.
+    INVALID_NAME = 1;
+    // The session option value or type is invalid.
+    INVALID_VALUE = 2;
+    // The session option cannot be set.
+    ERROR = 3;
+  }
+
+  message Error {
+    ErrorValue value = 1;
+  }
+
+  map<string, Error> errors = 1;
+}
+
+/*
+ * EXPERIMENTAL: A request to access the session options for the current server session.
+ *
+ * The existing session is referenced via a cookie header or similar (see
+ * SetSessionOptionsRequest above); it is an error to make this request with a missing,
+ * invalid, or expired session cookie header or other implementation-defined session
+ * reference token.
+ */
+message GetSessionOptionsRequest {
+}
+
+/*
+ * EXPERIMENTAL: The result containing the current server session options.
+ */
+message GetSessionOptionsResult {
+    map<string, SessionOptionValue> session_options = 1;
+}
+
+/*
+ * Request message for the "Close Session" action.
+ *
+ * The exiting session is referenced via a cookie header.
+ */
+message CloseSessionRequest {
+}
+
+/*
+ * The result of closing a session.
+ */
+message CloseSessionResult {
+  enum Status {
+    // Protobuf deserialization fallback value: The session close status is unknown or
+    // not recognized. Servers should avoid using this value (send a NOT_FOUND error if
+    // the requested session is not known or expired). Clients can retry the request.
+    UNSPECIFIED = 0;
+    // The session close request is complete. Subsequent requests with
+    // the same session produce a NOT_FOUND error.
+    CLOSED = 1;
+    // The session close request is in progress. The client may retry
+    // the close request.
+    CLOSING = 2;
+    // The session is not closeable. The client should not retry the
+    // close request.
+    NOT_CLOSEABLE = 3;
+  }
+
+  Status status = 1;
+}
diff --git a/arrow-format/FlightSql.proto b/arrow-format/FlightSql.proto
new file mode 100644
index 000000000..ef1ae7513
--- /dev/null
+++ b/arrow-format/FlightSql.proto
@@ -0,0 +1,1925 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+import "google/protobuf/descriptor.proto";
+
+option java_package = "org.apache.arrow.flight.sql.impl";
+option go_package = "github.com/apache/arrow-go/arrow/flight/gen/flight";
+package arrow.flight.protocol.sql;
+
+/*
+ * Represents a metadata request. Used in the command member of FlightDescriptor
+ * for the following RPC calls:
+ *  - GetSchema: return the Arrow schema of the query.
+ *  - GetFlightInfo: execute the metadata request.
+ *
+ * The returned Arrow schema will be:
+ * <
+ *  info_name: uint32 not null,
+ *  value: dense_union<
+ *              string_value: utf8,
+ *              bool_value: bool,
+ *              bigint_value: int64,
+ *              int32_bitmask: int32,
+ *              string_list: list<string_data: utf8>
+ *              int32_to_int32_list_map: map<key: int32, value: list<$data$: int32>>
+ * >
+ * where there is one row per requested piece of metadata information.
+ */
+message CommandGetSqlInfo {
+
+  /*
+   * Values are modelled after ODBC's SQLGetInfo() function. This information is intended to provide
+   * Flight SQL clients with basic, SQL syntax and SQL functions related information.
+   * More information types can be added in future releases.
+   * E.g. more SQL syntax support types, scalar functions support, type conversion support etc.
+   *
+   * Note that the set of metadata may expand.
+   *
+   * Initially, Flight SQL will support the following information types:
+   * - Server Information - Range [0-500)
+   * - Syntax Information - Range [500-1000)
+   * Range [0-10,000) is reserved for defaults (see SqlInfo enum for default options).
+   * Custom options should start at 10,000.
+   *
+   * If omitted, then all metadata will be retrieved.
+   * Flight SQL Servers may choose to include additional metadata above and beyond the specified set, however they must
+   * at least return the specified set. IDs ranging from 0 to 10,000 (exclusive) are reserved for future use.
+   * If additional metadata is included, the metadata IDs should start from 10,000.
+   */
+  repeated uint32 info = 1;
+}
+
+// Options for CommandGetSqlInfo.
+enum SqlInfo {
+
+  // Server Information [0-500): Provides basic information about the Flight SQL Server.
+
+  // Retrieves a UTF-8 string with the name of the Flight SQL Server.
+  FLIGHT_SQL_SERVER_NAME = 0;
+
+  // Retrieves a UTF-8 string with the native version of the Flight SQL Server.
+  FLIGHT_SQL_SERVER_VERSION = 1;
+
+  // Retrieves a UTF-8 string with the Arrow format version of the Flight SQL Server.
+  FLIGHT_SQL_SERVER_ARROW_VERSION = 2;
+
+  /*
+   * Retrieves a boolean value indicating whether the Flight SQL Server is read only.
+   *
+   * Returns:
+   * - false: if read-write
+   * - true: if read only
+   */
+  FLIGHT_SQL_SERVER_READ_ONLY = 3;
+
+  /*
+   * Retrieves a boolean value indicating whether the Flight SQL Server supports executing
+   * SQL queries.
+   *
+   * Note that the absence of this info (as opposed to a false value) does not necessarily
+   * mean that SQL is not supported, as this property was not originally defined.
+   */
+  FLIGHT_SQL_SERVER_SQL = 4;
+
+  /*
+   * Retrieves a boolean value indicating whether the Flight SQL Server supports executing
+   * Substrait plans.
+   */
+  FLIGHT_SQL_SERVER_SUBSTRAIT = 5;
+
+  /*
+   * Retrieves a string value indicating the minimum supported Substrait version, or null
+   * if Substrait is not supported.
+   */
+  FLIGHT_SQL_SERVER_SUBSTRAIT_MIN_VERSION = 6;
+
+  /*
+   * Retrieves a string value indicating the maximum supported Substrait version, or null
+   * if Substrait is not supported.
+   */
+  FLIGHT_SQL_SERVER_SUBSTRAIT_MAX_VERSION = 7;
+
+  /*
+   * Retrieves an int32 indicating whether the Flight SQL Server supports the
+   * BeginTransaction/EndTransaction/BeginSavepoint/EndSavepoint actions.
+   *
+   * Even if this is not supported, the database may still support explicit "BEGIN
+   * TRANSACTION"/"COMMIT" SQL statements (see SQL_TRANSACTIONS_SUPPORTED); this property
+   * is only about whether the server implements the Flight SQL API endpoints.
+   *
+   * The possible values are listed in `SqlSupportedTransaction`.
+   */
+  FLIGHT_SQL_SERVER_TRANSACTION = 8;
+
+  /*
+   * Retrieves a boolean value indicating whether the Flight SQL Server supports explicit
+   * query cancellation (the CancelQuery action).
+   */
+  FLIGHT_SQL_SERVER_CANCEL = 9;
+
+  /*
+   * Retrieves a boolean value indicating whether the Flight SQL Server supports executing
+   * bulk ingestion.
+   */
+   FLIGHT_SQL_SERVER_BULK_INGESTION = 10;
+
+  /*
+   * Retrieves a boolean value indicating whether transactions are supported for bulk ingestion. If not, invoking
+   * the method commit in the context of a bulk ingestion is a noop, and the isolation level is
+   * `arrow.flight.protocol.sql.SqlTransactionIsolationLevel.TRANSACTION_NONE`.
+   *
+   * Returns:
+   * - false: if bulk ingestion transactions are unsupported;
+   * - true: if bulk ingestion transactions are supported.
+   */
+   FLIGHT_SQL_SERVER_INGEST_TRANSACTIONS_SUPPORTED = 11;
+
+  /*
+   * Retrieves an int32 indicating the timeout (in milliseconds) for prepared statement handles.
+   *
+   * If 0, there is no timeout.  Servers should reset the timeout when the handle is used in a command.
+   */
+  FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT = 100;
+
+  /*
+   * Retrieves an int32 indicating the timeout (in milliseconds) for transactions, since transactions are not tied to a connection.
+   *
+   * If 0, there is no timeout.  Servers should reset the timeout when the handle is used in a command.
+   */
+  FLIGHT_SQL_SERVER_TRANSACTION_TIMEOUT = 101;
+
+  // SQL Syntax Information [500-1000): provides information about SQL syntax supported by the Flight SQL Server.
+
+  /*
+   * Retrieves a boolean value indicating whether the Flight SQL Server supports CREATE and DROP of catalogs.
+   *
+   * Returns:
+   * - false: if it doesn't support CREATE and DROP of catalogs.
+   * - true: if it supports CREATE and DROP of catalogs.
+   */
+  SQL_DDL_CATALOG = 500;
+
+  /*
+   * Retrieves a boolean value indicating whether the Flight SQL Server supports CREATE and DROP of schemas.
+   *
+   * Returns:
+   * - false: if it doesn't support CREATE and DROP of schemas.
+   * - true: if it supports CREATE and DROP of schemas.
+   */
+  SQL_DDL_SCHEMA = 501;
+
+  /*
+   * Indicates whether the Flight SQL Server supports CREATE and DROP of tables.
+   *
+   * Returns:
+   * - false: if it doesn't support CREATE and DROP of tables.
+   * - true: if it supports CREATE and DROP of tables.
+   */
+  SQL_DDL_TABLE = 502;
+
+  /*
+   * Retrieves a int32 ordinal representing the case sensitivity of catalog, table, schema and table names.
+   *
+   * The possible values are listed in `arrow.flight.protocol.sql.SqlSupportedCaseSensitivity`.
+   */
+  SQL_IDENTIFIER_CASE = 503;
+
+  // Retrieves a UTF-8 string with the supported character(s) used to surround a delimited identifier.
+  SQL_IDENTIFIER_QUOTE_CHAR = 504;
+
+  /*
+   * Retrieves a int32 describing the case sensitivity of quoted identifiers.
+   *
+   * The possible values are listed in `arrow.flight.protocol.sql.SqlSupportedCaseSensitivity`.
+   */
+  SQL_QUOTED_IDENTIFIER_CASE = 505;
+
+  /*
+   * Retrieves a boolean value indicating whether all tables are selectable.
+   *
+   * Returns:
+   * - false: if not all tables are selectable or if none are;
+   * - true: if all tables are selectable.
+   */
+  SQL_ALL_TABLES_ARE_SELECTABLE = 506;
+
+  /*
+   * Retrieves the null ordering.
+   *
+   * Returns a int32 ordinal for the null ordering being used, as described in
+   * `arrow.flight.protocol.sql.SqlNullOrdering`.
+   */
+  SQL_NULL_ORDERING = 507;
+
+  // Retrieves a UTF-8 string list with values of the supported keywords.
+  SQL_KEYWORDS = 508;
+
+  // Retrieves a UTF-8 string list with values of the supported numeric functions.
+  SQL_NUMERIC_FUNCTIONS = 509;
+
+  // Retrieves a UTF-8 string list with values of the supported string functions.
+  SQL_STRING_FUNCTIONS = 510;
+
+  // Retrieves a UTF-8 string list with values of the supported system functions.
+  SQL_SYSTEM_FUNCTIONS = 511;
+
+  // Retrieves a UTF-8 string list with values of the supported datetime functions.
+  SQL_DATETIME_FUNCTIONS = 512;
+
+  /*
+   * Retrieves the UTF-8 string that can be used to escape wildcard characters.
+   * This is the string that can be used to escape '_' or '%' in the catalog search parameters that are a pattern
+   * (and therefore use one of the wildcard characters).
+   * The '_' character represents any single character; the '%' character represents any sequence of zero or more
+   * characters.
+   */
+  SQL_SEARCH_STRING_ESCAPE = 513;
+
+  /*
+   * Retrieves a UTF-8 string with all the "extra" characters that can be used in unquoted identifier names
+   * (those beyond a-z, A-Z, 0-9 and _).
+   */
+  SQL_EXTRA_NAME_CHARACTERS = 514;
+
+  /*
+   * Retrieves a boolean value indicating whether column aliasing is supported.
+   * If so, the SQL AS clause can be used to provide names for computed columns or to provide alias names for columns
+   * as required.
+   *
+   * Returns:
+   * - false: if column aliasing is unsupported;
+   * - true: if column aliasing is supported.
+   */
+  SQL_SUPPORTS_COLUMN_ALIASING = 515;
+
+  /*
+   * Retrieves a boolean value indicating whether concatenations between null and non-null values being
+   * null are supported.
+   *
+   * - Returns:
+   * - false: if concatenations between null and non-null values being null are unsupported;
+   * - true: if concatenations between null and non-null values being null are supported.
+   */
+  SQL_NULL_PLUS_NULL_IS_NULL = 516;
+
+  /*
+   * Retrieves a map where the key is the type to convert from and the value is a list with the types to convert to,
+   * indicating the supported conversions. Each key and each item on the list value is a value to a predefined type on
+   * SqlSupportsConvert enum.
+   * The returned map will be:  map<int32, list<int32>>
+   */
+  SQL_SUPPORTS_CONVERT = 517;
+
+  /*
+   * Retrieves a boolean value indicating whether, when table correlation names are supported,
+   * they are restricted to being different from the names of the tables.
+   *
+   * Returns:
+   * - false: if table correlation names are unsupported;
+   * - true: if table correlation names are supported.
+   */
+  SQL_SUPPORTS_TABLE_CORRELATION_NAMES = 518;
+
+  /*
+   * Retrieves a boolean value indicating whether, when table correlation names are supported,
+   * they are restricted to being different from the names of the tables.
+   *
+   * Returns:
+   * - false: if different table correlation names are unsupported;
+   * - true: if different table correlation names are supported
+   */
+  SQL_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES = 519;
+
+  /*
+   * Retrieves a boolean value indicating whether expressions in ORDER BY lists are supported.
+   *
+   * Returns:
+   * - false: if expressions in ORDER BY are unsupported;
+   * - true: if expressions in ORDER BY are supported;
+   */
+  SQL_SUPPORTS_EXPRESSIONS_IN_ORDER_BY = 520;
+
+  /*
+   * Retrieves a boolean value indicating whether using a column that is not in the SELECT statement in a GROUP BY
+   * clause is supported.
+   *
+   * Returns:
+   * - false: if using a column that is not in the SELECT statement in a GROUP BY clause is unsupported;
+   * - true: if using a column that is not in the SELECT statement in a GROUP BY clause is supported.
+   */
+  SQL_SUPPORTS_ORDER_BY_UNRELATED = 521;
+
+  /*
+   * Retrieves the supported GROUP BY commands;
+   *
+   * Returns an int32 bitmask value representing the supported commands.
+   * The returned bitmask should be parsed in order to retrieve the supported commands.
+   *
+   * For instance:
+   * - return 0 (\b0)   => [] (GROUP BY is unsupported);
+   * - return 1 (\b1)   => [SQL_GROUP_BY_UNRELATED];
+   * - return 2 (\b10)  => [SQL_GROUP_BY_BEYOND_SELECT];
+   * - return 3 (\b11)  => [SQL_GROUP_BY_UNRELATED, SQL_GROUP_BY_BEYOND_SELECT].
+   * Valid GROUP BY types are described under `arrow.flight.protocol.sql.SqlSupportedGroupBy`.
+   */
+  SQL_SUPPORTED_GROUP_BY = 522;
+
+  /*
+   * Retrieves a boolean value indicating whether specifying a LIKE escape clause is supported.
+   *
+   * Returns:
+   * - false: if specifying a LIKE escape clause is unsupported;
+   * - true: if specifying a LIKE escape clause is supported.
+   */
+  SQL_SUPPORTS_LIKE_ESCAPE_CLAUSE = 523;
+
+  /*
+   * Retrieves a boolean value indicating whether columns may be defined as non-nullable.
+   *
+   * Returns:
+   * - false: if columns cannot be defined as non-nullable;
+   * - true: if columns may be defined as non-nullable.
+   */
+  SQL_SUPPORTS_NON_NULLABLE_COLUMNS = 524;
+
+  /*
+   * Retrieves the supported SQL grammar level as per the ODBC specification.
+   *
+   * Returns an int32 bitmask value representing the supported SQL grammar level.
+   * The returned bitmask should be parsed in order to retrieve the supported grammar levels.
+   *
+   * For instance:
+   * - return 0 (\b0)   => [] (SQL grammar is unsupported);
+   * - return 1 (\b1)   => [SQL_MINIMUM_GRAMMAR];
+   * - return 2 (\b10)  => [SQL_CORE_GRAMMAR];
+   * - return 3 (\b11)  => [SQL_MINIMUM_GRAMMAR, SQL_CORE_GRAMMAR];
+   * - return 4 (\b100) => [SQL_EXTENDED_GRAMMAR];
+   * - return 5 (\b101) => [SQL_MINIMUM_GRAMMAR, SQL_EXTENDED_GRAMMAR];
+   * - return 6 (\b110) => [SQL_CORE_GRAMMAR, SQL_EXTENDED_GRAMMAR];
+   * - return 7 (\b111) => [SQL_MINIMUM_GRAMMAR, SQL_CORE_GRAMMAR, SQL_EXTENDED_GRAMMAR].
+   * Valid SQL grammar levels are described under `arrow.flight.protocol.sql.SupportedSqlGrammar`.
+   */
+  SQL_SUPPORTED_GRAMMAR = 525;
+
+  /*
+   * Retrieves the supported ANSI92 SQL grammar level.
+   *
+   * Returns an int32 bitmask value representing the supported ANSI92 SQL grammar level.
+   * The returned bitmask should be parsed in order to retrieve the supported commands.
+   *
+   * For instance:
+   * - return 0 (\b0)   => [] (ANSI92 SQL grammar is unsupported);
+   * - return 1 (\b1)   => [ANSI92_ENTRY_SQL];
+   * - return 2 (\b10)  => [ANSI92_INTERMEDIATE_SQL];
+   * - return 3 (\b11)  => [ANSI92_ENTRY_SQL, ANSI92_INTERMEDIATE_SQL];
+   * - return 4 (\b100) => [ANSI92_FULL_SQL];
+   * - return 5 (\b101) => [ANSI92_ENTRY_SQL, ANSI92_FULL_SQL];
+   * - return 6 (\b110) => [ANSI92_INTERMEDIATE_SQL, ANSI92_FULL_SQL];
+   * - return 7 (\b111) => [ANSI92_ENTRY_SQL, ANSI92_INTERMEDIATE_SQL, ANSI92_FULL_SQL].
+   * Valid ANSI92 SQL grammar levels are described under `arrow.flight.protocol.sql.SupportedAnsi92SqlGrammarLevel`.
+   */
+  SQL_ANSI92_SUPPORTED_LEVEL = 526;
+
+  /*
+   * Retrieves a boolean value indicating whether the SQL Integrity Enhancement Facility is supported.
+   *
+   * Returns:
+   * - false: if the SQL Integrity Enhancement Facility is supported;
+   * - true: if the SQL Integrity Enhancement Facility is supported.
+   */
+  SQL_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY = 527;
+
+  /*
+   * Retrieves the support level for SQL OUTER JOINs.
+   *
+   * Returns a int32 ordinal for the SQL ordering being used, as described in
+   * `arrow.flight.protocol.sql.SqlOuterJoinsSupportLevel`.
+   */
+  SQL_OUTER_JOINS_SUPPORT_LEVEL = 528;
+
+  // Retrieves a UTF-8 string with the preferred term for "schema".
+  SQL_SCHEMA_TERM = 529;
+
+  // Retrieves a UTF-8 string with the preferred term for "procedure".
+  SQL_PROCEDURE_TERM = 530;
+
+  /*
+   * Retrieves a UTF-8 string with the preferred term for "catalog".
+   * If a empty string is returned its assumed that the server does NOT supports catalogs.
+   */
+  SQL_CATALOG_TERM = 531;
+
+  /*
+   * Retrieves a boolean value indicating whether a catalog appears at the start of a fully qualified table name.
+   *
+   * - false: if a catalog does not appear at the start of a fully qualified table name;
+   * - true: if a catalog appears at the start of a fully qualified table name.
+   */
+  SQL_CATALOG_AT_START = 532;
+
+  /*
+   * Retrieves the supported actions for a SQL schema.
+   *
+   * Returns an int32 bitmask value representing the supported actions for a SQL schema.
+   * The returned bitmask should be parsed in order to retrieve the supported actions for a SQL schema.
+   *
+   * For instance:
+   * - return 0 (\b0)   => [] (no supported actions for SQL schema);
+   * - return 1 (\b1)   => [SQL_ELEMENT_IN_PROCEDURE_CALLS];
+   * - return 2 (\b10)  => [SQL_ELEMENT_IN_INDEX_DEFINITIONS];
+   * - return 3 (\b11)  => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS];
+   * - return 4 (\b100) => [SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS];
+   * - return 5 (\b101) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS];
+   * - return 6 (\b110) => [SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS];
+   * - return 7 (\b111) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS].
+   * Valid actions for a SQL schema described under `arrow.flight.protocol.sql.SqlSupportedElementActions`.
+   */
+  SQL_SCHEMAS_SUPPORTED_ACTIONS = 533;
+
+  /*
+   * Retrieves the supported actions for a SQL schema.
+   *
+   * Returns an int32 bitmask value representing the supported actions for a SQL catalog.
+   * The returned bitmask should be parsed in order to retrieve the supported actions for a SQL catalog.
+   *
+   * For instance:
+   * - return 0 (\b0)   => [] (no supported actions for SQL catalog);
+   * - return 1 (\b1)   => [SQL_ELEMENT_IN_PROCEDURE_CALLS];
+   * - return 2 (\b10)  => [SQL_ELEMENT_IN_INDEX_DEFINITIONS];
+   * - return 3 (\b11)  => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS];
+   * - return 4 (\b100) => [SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS];
+   * - return 5 (\b101) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS];
+   * - return 6 (\b110) => [SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS];
+   * - return 7 (\b111) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS].
+   * Valid actions for a SQL catalog are described under `arrow.flight.protocol.sql.SqlSupportedElementActions`.
+   */
+  SQL_CATALOGS_SUPPORTED_ACTIONS = 534;
+
+  /*
+   * Retrieves the supported SQL positioned commands.
+   *
+   * Returns an int32 bitmask value representing the supported SQL positioned commands.
+   * The returned bitmask should be parsed in order to retrieve the supported SQL positioned commands.
+   *
+   * For instance:
+   * - return 0 (\b0)   => [] (no supported SQL positioned commands);
+   * - return 1 (\b1)   => [SQL_POSITIONED_DELETE];
+   * - return 2 (\b10)  => [SQL_POSITIONED_UPDATE];
+   * - return 3 (\b11)  => [SQL_POSITIONED_DELETE, SQL_POSITIONED_UPDATE].
+   * Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlSupportedPositionedCommands`.
+   */
+  SQL_SUPPORTED_POSITIONED_COMMANDS = 535;
+
+  /*
+   * Retrieves a boolean value indicating whether SELECT FOR UPDATE statements are supported.
+   *
+   * Returns:
+   * - false: if SELECT FOR UPDATE statements are unsupported;
+   * - true: if SELECT FOR UPDATE statements are supported.
+   */
+  SQL_SELECT_FOR_UPDATE_SUPPORTED = 536;
+
+  /*
+   * Retrieves a boolean value indicating whether stored procedure calls that use the stored procedure escape syntax
+   * are supported.
+   *
+   * Returns:
+   * - false: if stored procedure calls that use the stored procedure escape syntax are unsupported;
+   * - true: if stored procedure calls that use the stored procedure escape syntax are supported.
+   */
+  SQL_STORED_PROCEDURES_SUPPORTED = 537;
+
+  /*
+   * Retrieves the supported SQL subqueries.
+   *
+   * Returns an int32 bitmask value representing the supported SQL subqueries.
+   * The returned bitmask should be parsed in order to retrieve the supported SQL subqueries.
+   *
+   * For instance:
+   * - return 0   (\b0)     => [] (no supported SQL subqueries);
+   * - return 1   (\b1)     => [SQL_SUBQUERIES_IN_COMPARISONS];
+   * - return 2   (\b10)    => [SQL_SUBQUERIES_IN_EXISTS];
+   * - return 3   (\b11)    => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS];
+   * - return 4   (\b100)   => [SQL_SUBQUERIES_IN_INS];
+   * - return 5   (\b101)   => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_INS];
+   * - return 6   (\b110)   => [SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_EXISTS];
+   * - return 7   (\b111)   => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS];
+   * - return 8   (\b1000)  => [SQL_SUBQUERIES_IN_QUANTIFIEDS];
+   * - return 9   (\b1001)  => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_QUANTIFIEDS];
+   * - return 10  (\b1010)  => [SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_QUANTIFIEDS];
+   * - return 11  (\b1011)  => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_QUANTIFIEDS];
+   * - return 12  (\b1100)  => [SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS];
+   * - return 13  (\b1101)  => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS];
+   * - return 14  (\b1110)  => [SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS];
+   * - return 15  (\b1111)  => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS];
+   * - ...
+   * Valid SQL subqueries are described under `arrow.flight.protocol.sql.SqlSupportedSubqueries`.
+   */
+  SQL_SUPPORTED_SUBQUERIES = 538;
+
+  /*
+   * Retrieves a boolean value indicating whether correlated subqueries are supported.
+   *
+   * Returns:
+   * - false: if correlated subqueries are unsupported;
+   * - true: if correlated subqueries are supported.
+   */
+  SQL_CORRELATED_SUBQUERIES_SUPPORTED = 539;
+
+  /*
+   * Retrieves the supported SQL UNIONs.
+   *
+   * Returns an int32 bitmask value representing the supported SQL UNIONs.
+   * The returned bitmask should be parsed in order to retrieve the supported SQL UNIONs.
+   *
+   * For instance:
+   * - return 0 (\b0)   => [] (no supported SQL positioned commands);
+   * - return 1 (\b1)   => [SQL_UNION];
+   * - return 2 (\b10)  => [SQL_UNION_ALL];
+   * - return 3 (\b11)  => [SQL_UNION, SQL_UNION_ALL].
+   * Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlSupportedUnions`.
+   */
+  SQL_SUPPORTED_UNIONS = 540;
+
+  // Retrieves a int64 value representing the maximum number of hex characters allowed in an inline binary literal.
+  SQL_MAX_BINARY_LITERAL_LENGTH = 541;
+
+  // Retrieves a int64 value representing the maximum number of characters allowed for a character literal.
+  SQL_MAX_CHAR_LITERAL_LENGTH = 542;
+
+  // Retrieves a int64 value representing the maximum number of characters allowed for a column name.
+  SQL_MAX_COLUMN_NAME_LENGTH = 543;
+
+  // Retrieves a int64 value representing the maximum number of columns allowed in a GROUP BY clause.
+  SQL_MAX_COLUMNS_IN_GROUP_BY = 544;
+
+  // Retrieves a int64 value representing the maximum number of columns allowed in an index.
+  SQL_MAX_COLUMNS_IN_INDEX = 545;
+
+  // Retrieves a int64 value representing the maximum number of columns allowed in an ORDER BY clause.
+  SQL_MAX_COLUMNS_IN_ORDER_BY = 546;
+
+  // Retrieves a int64 value representing the maximum number of columns allowed in a SELECT list.
+  SQL_MAX_COLUMNS_IN_SELECT = 547;
+
+  // Retrieves a int64 value representing the maximum number of columns allowed in a table.
+  SQL_MAX_COLUMNS_IN_TABLE = 548;
+
+  // Retrieves a int64 value representing the maximum number of concurrent connections possible.
+  SQL_MAX_CONNECTIONS = 549;
+
+  // Retrieves a int64 value the maximum number of characters allowed in a cursor name.
+  SQL_MAX_CURSOR_NAME_LENGTH = 550;
+
+  /*
+   * Retrieves a int64 value representing the maximum number of bytes allowed for an index,
+   * including all of the parts of the index.
+   */
+  SQL_MAX_INDEX_LENGTH = 551;
+
+  // Retrieves a int64 value representing the maximum number of characters allowed in a schema name.
+  SQL_DB_SCHEMA_NAME_LENGTH = 552;
+
+  // Retrieves a int64 value representing the maximum number of characters allowed in a procedure name.
+  SQL_MAX_PROCEDURE_NAME_LENGTH = 553;
+
+  // Retrieves a int64 value representing the maximum number of characters allowed in a catalog name.
+  SQL_MAX_CATALOG_NAME_LENGTH = 554;
+
+  // Retrieves a int64 value representing the maximum number of bytes allowed in a single row.
+  SQL_MAX_ROW_SIZE = 555;
+
+  /*
+   * Retrieves a boolean indicating whether the return value for the JDBC method getMaxRowSize includes the SQL
+   * data types LONGVARCHAR and LONGVARBINARY.
+   *
+   * Returns:
+   * - false: if return value for the JDBC method getMaxRowSize does
+   *          not include the SQL data types LONGVARCHAR and LONGVARBINARY;
+   * - true: if return value for the JDBC method getMaxRowSize includes
+   *         the SQL data types LONGVARCHAR and LONGVARBINARY.
+   */
+  SQL_MAX_ROW_SIZE_INCLUDES_BLOBS = 556;
+
+  /*
+   * Retrieves a int64 value representing the maximum number of characters allowed for an SQL statement;
+   * a result of 0 (zero) means that there is no limit or the limit is not known.
+   */
+  SQL_MAX_STATEMENT_LENGTH = 557;
+
+  // Retrieves a int64 value representing the maximum number of active statements that can be open at the same time.
+  SQL_MAX_STATEMENTS = 558;
+
+  // Retrieves a int64 value representing the maximum number of characters allowed in a table name.
+  SQL_MAX_TABLE_NAME_LENGTH = 559;
+
+  // Retrieves a int64 value representing the maximum number of tables allowed in a SELECT statement.
+  SQL_MAX_TABLES_IN_SELECT = 560;
+
+  // Retrieves a int64 value representing the maximum number of characters allowed in a user name.
+  SQL_MAX_USERNAME_LENGTH = 561;
+
+  /*
+   * Retrieves this database's default transaction isolation level as described in
+   * `arrow.flight.protocol.sql.SqlTransactionIsolationLevel`.
+   *
+   * Returns a int32 ordinal for the SQL transaction isolation level.
+   */
+  SQL_DEFAULT_TRANSACTION_ISOLATION = 562;
+
+  /*
+   * Retrieves a boolean value indicating whether transactions are supported. If not, invoking the method commit is a
+   * noop, and the isolation level is `arrow.flight.protocol.sql.SqlTransactionIsolationLevel.TRANSACTION_NONE`.
+   *
+   * Returns:
+   * - false: if transactions are unsupported;
+   * - true: if transactions are supported.
+   */
+  SQL_TRANSACTIONS_SUPPORTED = 563;
+
+  /*
+   * Retrieves the supported transactions isolation levels.
+   *
+   * Returns an int32 bitmask value representing the supported transactions isolation levels.
+   * The returned bitmask should be parsed in order to retrieve the supported transactions isolation levels.
+   *
+   * For instance:
+   * - return 0   (\b0)     => [] (no supported SQL transactions isolation levels);
+   * - return 1   (\b1)     => [SQL_TRANSACTION_NONE];
+   * - return 2   (\b10)    => [SQL_TRANSACTION_READ_UNCOMMITTED];
+   * - return 3   (\b11)    => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED];
+   * - return 4   (\b100)   => [SQL_TRANSACTION_REPEATABLE_READ];
+   * - return 5   (\b101)   => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ];
+   * - return 6   (\b110)   => [SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ];
+   * - return 7   (\b111)   => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ];
+   * - return 8   (\b1000)  => [SQL_TRANSACTION_REPEATABLE_READ];
+   * - return 9   (\b1001)  => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ];
+   * - return 10  (\b1010)  => [SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ];
+   * - return 11  (\b1011)  => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ];
+   * - return 12  (\b1100)  => [SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ];
+   * - return 13  (\b1101)  => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ];
+   * - return 14  (\b1110)  => [SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ];
+   * - return 15  (\b1111)  => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ];
+   * - return 16  (\b10000) => [SQL_TRANSACTION_SERIALIZABLE];
+   * - ...
+   * Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlTransactionIsolationLevel`.
+   */
+  SQL_SUPPORTED_TRANSACTIONS_ISOLATION_LEVELS = 564;
+
+  /*
+   * Retrieves a boolean value indicating whether a data definition statement within a transaction forces
+   * the transaction to commit.
+   *
+   * Returns:
+   * - false: if a data definition statement within a transaction does not force the transaction to commit;
+   * - true: if a data definition statement within a transaction forces the transaction to commit.
+   */
+  SQL_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT = 565;
+
+  /*
+   * Retrieves a boolean value indicating whether a data definition statement within a transaction is ignored.
+   *
+   * Returns:
+   * - false: if a data definition statement within a transaction is taken into account;
+   * - true: a data definition statement within a transaction is ignored.
+   */
+  SQL_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED = 566;
+
+  /*
+   * Retrieves an int32 bitmask value representing the supported result set types.
+   * The returned bitmask should be parsed in order to retrieve the supported result set types.
+   *
+   * For instance:
+   * - return 0   (\b0)     => [] (no supported result set types);
+   * - return 1   (\b1)     => [SQL_RESULT_SET_TYPE_UNSPECIFIED];
+   * - return 2   (\b10)    => [SQL_RESULT_SET_TYPE_FORWARD_ONLY];
+   * - return 3   (\b11)    => [SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_FORWARD_ONLY];
+   * - return 4   (\b100)   => [SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE];
+   * - return 5   (\b101)   => [SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE];
+   * - return 6   (\b110)   => [SQL_RESULT_SET_TYPE_FORWARD_ONLY, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE];
+   * - return 7   (\b111)   => [SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_FORWARD_ONLY, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE];
+   * - return 8   (\b1000)  => [SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE];
+   * - ...
+   * Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetType`.
+   */
+  SQL_SUPPORTED_RESULT_SET_TYPES = 567;
+
+  /*
+   * Returns an int32 bitmask value concurrency types supported for
+   * `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_UNSPECIFIED`.
+   *
+   * For instance:
+   * - return 0 (\b0)   => [] (no supported concurrency types for this result set type)
+   * - return 1 (\b1)   => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED]
+   * - return 2 (\b10)  => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY]
+   * - return 3 (\b11)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY]
+   * - return 4 (\b100) => [SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * - return 6 (\b110)  => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * - return 7 (\b111)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`.
+   */
+  SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_UNSPECIFIED = 568;
+
+  /*
+   * Returns an int32 bitmask value concurrency types supported for
+   * `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_FORWARD_ONLY`.
+   *
+   * For instance:
+   * - return 0 (\b0)   => [] (no supported concurrency types for this result set type)
+   * - return 1 (\b1)   => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED]
+   * - return 2 (\b10)  => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY]
+   * - return 3 (\b11)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY]
+   * - return 4 (\b100) => [SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * - return 6 (\b110)  => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * - return 7 (\b111)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`.
+   */
+  SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_FORWARD_ONLY = 569;
+
+  /*
+   * Returns an int32 bitmask value concurrency types supported for
+   * `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE`.
+   *
+   * For instance:
+   * - return 0 (\b0)   => [] (no supported concurrency types for this result set type)
+   * - return 1 (\b1)   => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED]
+   * - return 2 (\b10)  => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY]
+   * - return 3 (\b11)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY]
+   * - return 4 (\b100) => [SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * - return 6 (\b110)  => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * - return 7 (\b111)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`.
+   */
+  SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_SENSITIVE = 570;
+
+  /*
+   * Returns an int32 bitmask value concurrency types supported for
+   * `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE`.
+   *
+   * For instance:
+   * - return 0 (\b0)   => [] (no supported concurrency types for this result set type)
+   * - return 1 (\b1)   => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED]
+   * - return 2 (\b10)  => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY]
+   * - return 3 (\b11)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY]
+   * - return 4 (\b100) => [SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * - return 6 (\b110)  => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * - return 7 (\b111)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+   * Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`.
+   */
+  SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_INSENSITIVE = 571;
+
+  /*
+   * Retrieves a boolean value indicating whether this database supports batch updates.
+   *
+   * - false: if this database does not support batch updates;
+   * - true: if this database supports batch updates.
+   */
+  SQL_BATCH_UPDATES_SUPPORTED = 572;
+
+  /*
+   * Retrieves a boolean value indicating whether this database supports savepoints.
+   *
+   * Returns:
+   * - false: if this database does not support savepoints;
+   * - true: if this database supports savepoints.
+   */
+  SQL_SAVEPOINTS_SUPPORTED = 573;
+
+  /*
+   * Retrieves a boolean value indicating whether named parameters are supported in callable statements.
+   *
+   * Returns:
+   * - false: if named parameters in callable statements are unsupported;
+   * - true: if named parameters in callable statements are supported.
+   */
+  SQL_NAMED_PARAMETERS_SUPPORTED = 574;
+
+  /*
+   * Retrieves a boolean value indicating whether updates made to a LOB are made on a copy or directly to the LOB.
+   *
+   * Returns:
+   * - false: if updates made to a LOB are made directly to the LOB;
+   * - true: if updates made to a LOB are made on a copy.
+   */
+  SQL_LOCATORS_UPDATE_COPY = 575;
+
+  /*
+   * Retrieves a boolean value indicating whether invoking user-defined or vendor functions
+   * using the stored procedure escape syntax is supported.
+   *
+   * Returns:
+   * - false: if invoking user-defined or vendor functions using the stored procedure escape syntax is unsupported;
+   * - true: if invoking user-defined or vendor functions using the stored procedure escape syntax is supported.
+   */
+  SQL_STORED_FUNCTIONS_USING_CALL_SYNTAX_SUPPORTED = 576;
+}
+
+// The level of support for Flight SQL transaction RPCs.
+enum SqlSupportedTransaction {
+  // Unknown/not indicated/no support
+  SQL_SUPPORTED_TRANSACTION_NONE = 0;
+  // Transactions, but not savepoints.
+  // A savepoint is a mark within a transaction that can be individually
+  // rolled back to. Not all databases support savepoints.
+  SQL_SUPPORTED_TRANSACTION_TRANSACTION = 1;
+  // Transactions and savepoints
+  SQL_SUPPORTED_TRANSACTION_SAVEPOINT = 2;
+}
+
+enum SqlSupportedCaseSensitivity {
+  SQL_CASE_SENSITIVITY_UNKNOWN = 0;
+  SQL_CASE_SENSITIVITY_CASE_INSENSITIVE = 1;
+  SQL_CASE_SENSITIVITY_UPPERCASE = 2;
+  SQL_CASE_SENSITIVITY_LOWERCASE = 3;
+}
+
+enum SqlNullOrdering {
+  SQL_NULLS_SORTED_HIGH = 0;
+  SQL_NULLS_SORTED_LOW = 1;
+  SQL_NULLS_SORTED_AT_START = 2;
+  SQL_NULLS_SORTED_AT_END = 3;
+}
+
+enum SupportedSqlGrammar {
+  SQL_MINIMUM_GRAMMAR = 0;
+  SQL_CORE_GRAMMAR = 1;
+  SQL_EXTENDED_GRAMMAR = 2;
+}
+
+enum SupportedAnsi92SqlGrammarLevel {
+  ANSI92_ENTRY_SQL = 0;
+  ANSI92_INTERMEDIATE_SQL = 1;
+  ANSI92_FULL_SQL = 2;
+}
+
+enum SqlOuterJoinsSupportLevel {
+  SQL_JOINS_UNSUPPORTED = 0;
+  SQL_LIMITED_OUTER_JOINS = 1;
+  SQL_FULL_OUTER_JOINS = 2;
+}
+
+enum SqlSupportedGroupBy {
+  SQL_GROUP_BY_UNRELATED = 0;
+  SQL_GROUP_BY_BEYOND_SELECT = 1;
+}
+
+enum SqlSupportedElementActions {
+  SQL_ELEMENT_IN_PROCEDURE_CALLS = 0;
+  SQL_ELEMENT_IN_INDEX_DEFINITIONS = 1;
+  SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS = 2;
+}
+
+enum SqlSupportedPositionedCommands {
+  SQL_POSITIONED_DELETE = 0;
+  SQL_POSITIONED_UPDATE = 1;
+}
+
+enum SqlSupportedSubqueries {
+  SQL_SUBQUERIES_IN_COMPARISONS = 0;
+  SQL_SUBQUERIES_IN_EXISTS = 1;
+  SQL_SUBQUERIES_IN_INS = 2;
+  SQL_SUBQUERIES_IN_QUANTIFIEDS = 3;
+}
+
+enum SqlSupportedUnions {
+  SQL_UNION = 0;
+  SQL_UNION_ALL = 1;
+}
+
+enum SqlTransactionIsolationLevel {
+  SQL_TRANSACTION_NONE = 0;
+  SQL_TRANSACTION_READ_UNCOMMITTED = 1;
+  SQL_TRANSACTION_READ_COMMITTED = 2;
+  SQL_TRANSACTION_REPEATABLE_READ = 3;
+  SQL_TRANSACTION_SERIALIZABLE = 4;
+}
+
+enum SqlSupportedTransactions {
+  SQL_TRANSACTION_UNSPECIFIED = 0;
+  SQL_DATA_DEFINITION_TRANSACTIONS = 1;
+  SQL_DATA_MANIPULATION_TRANSACTIONS = 2;
+}
+
+enum SqlSupportedResultSetType {
+  SQL_RESULT_SET_TYPE_UNSPECIFIED = 0;
+  SQL_RESULT_SET_TYPE_FORWARD_ONLY = 1;
+  SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE = 2;
+  SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE = 3;
+}
+
+enum SqlSupportedResultSetConcurrency {
+  SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED = 0;
+  SQL_RESULT_SET_CONCURRENCY_READ_ONLY = 1;
+  SQL_RESULT_SET_CONCURRENCY_UPDATABLE = 2;
+}
+
+enum SqlSupportsConvert {
+  SQL_CONVERT_BIGINT = 0;
+  SQL_CONVERT_BINARY = 1;
+  SQL_CONVERT_BIT = 2;
+  SQL_CONVERT_CHAR = 3;
+  SQL_CONVERT_DATE = 4;
+  SQL_CONVERT_DECIMAL = 5;
+  SQL_CONVERT_FLOAT = 6;
+  SQL_CONVERT_INTEGER = 7;
+  SQL_CONVERT_INTERVAL_DAY_TIME = 8;
+  SQL_CONVERT_INTERVAL_YEAR_MONTH = 9;
+  SQL_CONVERT_LONGVARBINARY = 10;
+  SQL_CONVERT_LONGVARCHAR = 11;
+  SQL_CONVERT_NUMERIC = 12;
+  SQL_CONVERT_REAL = 13;
+  SQL_CONVERT_SMALLINT = 14;
+  SQL_CONVERT_TIME = 15;
+  SQL_CONVERT_TIMESTAMP = 16;
+  SQL_CONVERT_TINYINT = 17;
+  SQL_CONVERT_VARBINARY = 18;
+  SQL_CONVERT_VARCHAR = 19;
+}
+
+/**
+ * The JDBC/ODBC-defined type of any object.
+ * All the values here are the same as in the JDBC and ODBC specs.
+ */
+enum XdbcDataType {
+  XDBC_UNKNOWN_TYPE = 0;
+  XDBC_CHAR = 1;
+  XDBC_NUMERIC = 2;
+  XDBC_DECIMAL = 3;
+  XDBC_INTEGER = 4;
+  XDBC_SMALLINT = 5;
+  XDBC_FLOAT = 6;
+  XDBC_REAL = 7;
+  XDBC_DOUBLE = 8;
+  XDBC_DATETIME = 9;
+  XDBC_INTERVAL = 10;
+  XDBC_VARCHAR = 12;
+  XDBC_DATE = 91;
+  XDBC_TIME = 92;
+  XDBC_TIMESTAMP = 93;
+  XDBC_LONGVARCHAR = -1;
+  XDBC_BINARY = -2;
+  XDBC_VARBINARY = -3;
+  XDBC_LONGVARBINARY = -4;
+  XDBC_BIGINT = -5;
+  XDBC_TINYINT = -6;
+  XDBC_BIT = -7;
+  XDBC_WCHAR = -8;
+  XDBC_WVARCHAR = -9;
+}
+
+/**
+ * Detailed subtype information for XDBC_TYPE_DATETIME and XDBC_TYPE_INTERVAL.
+ */
+enum XdbcDatetimeSubcode {
+  option allow_alias = true;
+  XDBC_SUBCODE_UNKNOWN = 0;
+  XDBC_SUBCODE_YEAR = 1;
+  XDBC_SUBCODE_DATE = 1;
+  XDBC_SUBCODE_TIME = 2;
+  XDBC_SUBCODE_MONTH = 2;
+  XDBC_SUBCODE_TIMESTAMP = 3;
+  XDBC_SUBCODE_DAY = 3;
+  XDBC_SUBCODE_TIME_WITH_TIMEZONE = 4;
+  XDBC_SUBCODE_HOUR = 4;
+  XDBC_SUBCODE_TIMESTAMP_WITH_TIMEZONE = 5;
+  XDBC_SUBCODE_MINUTE = 5;
+  XDBC_SUBCODE_SECOND = 6;
+  XDBC_SUBCODE_YEAR_TO_MONTH = 7;
+  XDBC_SUBCODE_DAY_TO_HOUR = 8;
+  XDBC_SUBCODE_DAY_TO_MINUTE = 9;
+  XDBC_SUBCODE_DAY_TO_SECOND = 10;
+  XDBC_SUBCODE_HOUR_TO_MINUTE = 11;
+  XDBC_SUBCODE_HOUR_TO_SECOND = 12;
+  XDBC_SUBCODE_MINUTE_TO_SECOND = 13;
+  XDBC_SUBCODE_INTERVAL_YEAR = 101;
+  XDBC_SUBCODE_INTERVAL_MONTH = 102;
+  XDBC_SUBCODE_INTERVAL_DAY = 103;
+  XDBC_SUBCODE_INTERVAL_HOUR = 104;
+  XDBC_SUBCODE_INTERVAL_MINUTE = 105;
+  XDBC_SUBCODE_INTERVAL_SECOND = 106;
+  XDBC_SUBCODE_INTERVAL_YEAR_TO_MONTH = 107;
+  XDBC_SUBCODE_INTERVAL_DAY_TO_HOUR = 108;
+  XDBC_SUBCODE_INTERVAL_DAY_TO_MINUTE = 109;
+  XDBC_SUBCODE_INTERVAL_DAY_TO_SECOND = 110;
+  XDBC_SUBCODE_INTERVAL_HOUR_TO_MINUTE = 111;
+  XDBC_SUBCODE_INTERVAL_HOUR_TO_SECOND = 112;
+  XDBC_SUBCODE_INTERVAL_MINUTE_TO_SECOND = 113;
+}
+
+enum Nullable {
+  /**
+   * Indicates that the fields does not allow the use of null values.
+   */
+  NULLABILITY_NO_NULLS = 0;
+
+  /**
+   * Indicates that the fields allow the use of null values.
+   */
+  NULLABILITY_NULLABLE = 1;
+
+  /**
+   * Indicates that nullability of the fields cannot be determined.
+   */
+  NULLABILITY_UNKNOWN = 2;
+}
+
+enum Searchable {
+  /**
+   * Indicates that column cannot be used in a WHERE clause.
+   */
+  SEARCHABLE_NONE = 0;
+
+  /**
+   * Indicates that the column can be used in a WHERE clause if it is using a
+   * LIKE operator.
+   */
+  SEARCHABLE_CHAR = 1;
+
+  /**
+   * Indicates that the column can be used In a WHERE clause with any
+   * operator other than LIKE.
+   *
+   * - Allowed operators: comparison, quantified comparison, BETWEEN,
+   *                      DISTINCT, IN, MATCH, and UNIQUE.
+   */
+  SEARCHABLE_BASIC = 2;
+
+  /**
+   * Indicates that the column can be used in a WHERE clause using any operator.
+   */
+  SEARCHABLE_FULL = 3;
+}
+
+/*
+ * Represents a request to retrieve information about data type supported on a Flight SQL enabled backend.
+ * Used in the command member of FlightDescriptor for the following RPC calls:
+ *  - GetSchema: return the schema of the query.
+ *  - GetFlightInfo: execute the catalog metadata request.
+ *
+ * The returned schema will be:
+ * <
+ *   type_name: utf8 not null (The name of the data type, for example: VARCHAR, INTEGER, etc),
+ *   data_type: int32 not null (The SQL data type),
+ *   column_size: int32 (The maximum size supported by that column.
+ *                       In case of exact numeric types, this represents the maximum precision.
+ *                       In case of string types, this represents the character length.
+ *                       In case of datetime data types, this represents the length in characters of the string representation.
+ *                       NULL is returned for data types where column size is not applicable.),
+ *   literal_prefix: utf8 (Character or characters used to prefix a literal, NULL is returned for
+ *                         data types where a literal prefix is not applicable.),
+ *   literal_suffix: utf8 (Character or characters used to terminate a literal,
+ *                         NULL is returned for data types where a literal suffix is not applicable.),
+ *   create_params: list<utf8 not null>
+ *                        (A list of keywords corresponding to which parameters can be used when creating
+ *                         a column for that specific type.
+ *                         NULL is returned if there are no parameters for the data type definition.),
+ *   nullable: int32 not null (Shows if the data type accepts a NULL value. The possible values can be seen in the
+ *                             Nullable enum.),
+ *   case_sensitive: bool not null (Shows if a character data type is case-sensitive in collations and comparisons),
+ *   searchable: int32 not null (Shows how the data type is used in a WHERE clause. The possible values can be seen in the
+ *                               Searchable enum.),
+ *   unsigned_attribute: bool (Shows if the data type is unsigned. NULL is returned if the attribute is
+ *                             not applicable to the data type or the data type is not numeric.),
+ *   fixed_prec_scale: bool not null (Shows if the data type has predefined fixed precision and scale.),
+ *   auto_increment: bool (Shows if the data type is auto incremental. NULL is returned if the attribute
+ *                         is not applicable to the data type or the data type is not numeric.),
+ *   local_type_name: utf8 (Localized version of the data source-dependent name of the data type. NULL
+ *                          is returned if a localized name is not supported by the data source),
+ *   minimum_scale: int32 (The minimum scale of the data type on the data source.
+ *                         If a data type has a fixed scale, the MINIMUM_SCALE and MAXIMUM_SCALE
+ *                         columns both contain this value. NULL is returned if scale is not applicable.),
+ *   maximum_scale: int32 (The maximum scale of the data type on the data source.
+ *                         NULL is returned if scale is not applicable.),
+ *   sql_data_type: int32 not null (The value of the SQL DATA TYPE which has the same values
+ *                                  as data_type value. Except for interval and datetime, which
+ *                                  uses generic values. More info about those types can be
+ *                                  obtained through datetime_subcode. The possible values can be seen
+ *                                  in the XdbcDataType enum.),
+ *   datetime_subcode: int32 (Only used when the SQL DATA TYPE is interval or datetime. It contains
+ *                            its sub types. For type different from interval and datetime, this value
+ *                            is NULL. The possible values can be seen in the XdbcDatetimeSubcode enum.),
+ *   num_prec_radix: int32 (If the data type is an approximate numeric type, this column contains
+ *                          the value 2 to indicate that COLUMN_SIZE specifies a number of bits. For
+ *                          exact numeric types, this column contains the value 10 to indicate that
+ *                          column size specifies a number of decimal digits. Otherwise, this column is NULL.),
+ *   interval_precision: int32 (If the data type is an interval data type, then this column contains the value
+ *                              of the interval leading precision. Otherwise, this column is NULL. This fields
+ *                              is only relevant to be used by ODBC).
+ * >
+ * The returned data should be ordered by data_type and then by type_name.
+ */
+message CommandGetXdbcTypeInfo {
+
+  /*
+   * Specifies the data type to search for the info.
+   */
+  optional int32 data_type = 1;
+}
+
+/*
+ * Represents a request to retrieve the list of catalogs on a Flight SQL enabled backend.
+ * The definition of a catalog depends on vendor/implementation. It is usually the database itself
+ * Used in the command member of FlightDescriptor for the following RPC calls:
+ *  - GetSchema: return the Arrow schema of the query.
+ *  - GetFlightInfo: execute the catalog metadata request.
+ *
+ * The returned Arrow schema will be:
+ * <
+ *  catalog_name: utf8 not null
+ * >
+ * The returned data should be ordered by catalog_name.
+ */
+message CommandGetCatalogs {
+}
+
+/*
+ * Represents a request to retrieve the list of database schemas on a Flight SQL enabled backend.
+ * The definition of a database schema depends on vendor/implementation. It is usually a collection of tables.
+ * Used in the command member of FlightDescriptor for the following RPC calls:
+ *  - GetSchema: return the Arrow schema of the query.
+ *  - GetFlightInfo: execute the catalog metadata request.
+ *
+ * The returned Arrow schema will be:
+ * <
+ *  catalog_name: utf8,
+ *  db_schema_name: utf8 not null
+ * >
+ * The returned data should be ordered by catalog_name, then db_schema_name.
+ */
+message CommandGetDbSchemas {
+
+  /*
+   * Specifies the Catalog to search for the tables.
+   * An empty string retrieves those without a catalog.
+   * If omitted the catalog name should not be used to narrow the search.
+   */
+  optional string catalog = 1;
+
+  /*
+   * Specifies a filter pattern for schemas to search for.
+   * When no db_schema_filter_pattern is provided, the pattern will not be used to narrow the search.
+   * In the pattern string, two special characters can be used to denote matching rules:
+   *    - "%" means to match any substring with 0 or more characters.
+   *    - "_" means to match any one character.
+   */
+  optional string db_schema_filter_pattern = 2;
+}
+
+/*
+ * Represents a request to retrieve the list of tables, and optionally their schemas, on a Flight SQL enabled backend.
+ * Used in the command member of FlightDescriptor for the following RPC calls:
+ *  - GetSchema: return the Arrow schema of the query.
+ *  - GetFlightInfo: execute the catalog metadata request.
+ *
+ * The returned Arrow schema will be:
+ * <
+ *  catalog_name: utf8,
+ *  db_schema_name: utf8,
+ *  table_name: utf8 not null,
+ *  table_type: utf8 not null,
+ *  [optional] table_schema: bytes not null (schema of the table as described in Schema.fbs::Schema,
+ *                                           it is serialized as an IPC message.)
+ * >
+ * Fields on table_schema may contain the following metadata:
+ *  - ARROW:FLIGHT:SQL:CATALOG_NAME      - Table's catalog name
+ *  - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME    - Database schema name
+ *  - ARROW:FLIGHT:SQL:TABLE_NAME        - Table name
+ *  - ARROW:FLIGHT:SQL:TYPE_NAME         - The data source-specific name for the data type of the column.
+ *  - ARROW:FLIGHT:SQL:PRECISION         - Column precision/size
+ *  - ARROW:FLIGHT:SQL:SCALE             - Column scale/decimal digits if applicable
+ *  - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise.
+ *  - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise.
+ *  - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
+ *  - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
+ * The returned data should be ordered by catalog_name, db_schema_name, table_name, then table_type, followed by table_schema if requested.
+ */
+message CommandGetTables {
+
+  /*
+   * Specifies the Catalog to search for the tables.
+   * An empty string retrieves those without a catalog.
+   * If omitted the catalog name should not be used to narrow the search.
+   */
+  optional string catalog = 1;
+
+  /*
+   * Specifies a filter pattern for schemas to search for.
+   * When no db_schema_filter_pattern is provided, all schemas matching other filters are searched.
+   * In the pattern string, two special characters can be used to denote matching rules:
+   *    - "%" means to match any substring with 0 or more characters.
+   *    - "_" means to match any one character.
+   */
+  optional string db_schema_filter_pattern = 2;
+
+  /*
+   * Specifies a filter pattern for tables to search for.
+   * When no table_name_filter_pattern is provided, all tables matching other filters are searched.
+   * In the pattern string, two special characters can be used to denote matching rules:
+   *    - "%" means to match any substring with 0 or more characters.
+   *    - "_" means to match any one character.
+   */
+  optional string table_name_filter_pattern = 3;
+
+  /*
+   * Specifies a filter of table types which must match.
+   * The table types depend on vendor/implementation. It is usually used to separate tables from views or system tables.
+   * TABLE, VIEW, and SYSTEM TABLE are commonly supported.
+   */
+  repeated string table_types = 4;
+
+  // Specifies if the Arrow schema should be returned for found tables.
+  bool include_schema = 5;
+}
+
+/*
+ * Represents a request to retrieve the list of table types on a Flight SQL enabled backend.
+ * The table types depend on vendor/implementation. It is usually used to separate tables from views or system tables.
+ * TABLE, VIEW, and SYSTEM TABLE are commonly supported.
+ * Used in the command member of FlightDescriptor for the following RPC calls:
+ *  - GetSchema: return the Arrow schema of the query.
+ *  - GetFlightInfo: execute the catalog metadata request.
+ *
+ * The returned Arrow schema will be:
+ * <
+ *  table_type: utf8 not null
+ * >
+ * The returned data should be ordered by table_type.
+ */
+message CommandGetTableTypes {
+}
+
+/*
+ * Represents a request to retrieve the primary keys of a table on a Flight SQL enabled backend.
+ * Used in the command member of FlightDescriptor for the following RPC calls:
+ *  - GetSchema: return the Arrow schema of the query.
+ *  - GetFlightInfo: execute the catalog metadata request.
+ *
+ * The returned Arrow schema will be:
+ * <
+ *  catalog_name: utf8,
+ *  db_schema_name: utf8,
+ *  table_name: utf8 not null,
+ *  column_name: utf8 not null,
+ *  key_name: utf8,
+ *  key_sequence: int32 not null
+ * >
+ * The returned data should be ordered by catalog_name, db_schema_name, table_name, key_name, then key_sequence.
+ */
+message CommandGetPrimaryKeys {
+
+  /*
+   * Specifies the catalog to search for the table.
+   * An empty string retrieves those without a catalog.
+   * If omitted the catalog name should not be used to narrow the search.
+   */
+  optional string catalog = 1;
+
+  /*
+   * Specifies the schema to search for the table.
+   * An empty string retrieves those without a schema.
+   * If omitted the schema name should not be used to narrow the search.
+   */
+  optional string db_schema = 2;
+
+  // Specifies the table to get the primary keys for.
+  string table = 3;
+}
+
+enum UpdateDeleteRules {
+  CASCADE = 0;
+  RESTRICT = 1;
+  SET_NULL = 2;
+  NO_ACTION = 3;
+  SET_DEFAULT = 4;
+}
+
+/*
+ * Represents a request to retrieve a description of the foreign key columns that reference the given table's
+ * primary key columns (the foreign keys exported by a table) of a table on a Flight SQL enabled backend.
+ * Used in the command member of FlightDescriptor for the following RPC calls:
+ *  - GetSchema: return the Arrow schema of the query.
+ *  - GetFlightInfo: execute the catalog metadata request.
+ *
+ * The returned Arrow schema will be:
+ * <
+ *  pk_catalog_name: utf8,
+ *  pk_db_schema_name: utf8,
+ *  pk_table_name: utf8 not null,
+ *  pk_column_name: utf8 not null,
+ *  fk_catalog_name: utf8,
+ *  fk_db_schema_name: utf8,
+ *  fk_table_name: utf8 not null,
+ *  fk_column_name: utf8 not null,
+ *  key_sequence: int32 not null,
+ *  fk_key_name: utf8,
+ *  pk_key_name: utf8,
+ *  update_rule: uint8 not null,
+ *  delete_rule: uint8 not null
+ * >
+ * The returned data should be ordered by fk_catalog_name, fk_db_schema_name, fk_table_name, fk_key_name, then key_sequence.
+ * update_rule and delete_rule returns a byte that is equivalent to actions declared on UpdateDeleteRules enum.
+ */
+message CommandGetExportedKeys {
+
+  /*
+   * Specifies the catalog to search for the foreign key table.
+   * An empty string retrieves those without a catalog.
+   * If omitted the catalog name should not be used to narrow the search.
+   */
+  optional string catalog = 1;
+
+  /*
+   * Specifies the schema to search for the foreign key table.
+   * An empty string retrieves those without a schema.
+   * If omitted the schema name should not be used to narrow the search.
+   */
+  optional string db_schema = 2;
+
+  // Specifies the foreign key table to get the foreign keys for.
+  string table = 3;
+}
+
+/*
+ * Represents a request to retrieve the foreign keys of a table on a Flight SQL enabled backend.
+ * Used in the command member of FlightDescriptor for the following RPC calls:
+ *  - GetSchema: return the Arrow schema of the query.
+ *  - GetFlightInfo: execute the catalog metadata request.
+ *
+ * The returned Arrow schema will be:
+ * <
+ *  pk_catalog_name: utf8,
+ *  pk_db_schema_name: utf8,
+ *  pk_table_name: utf8 not null,
+ *  pk_column_name: utf8 not null,
+ *  fk_catalog_name: utf8,
+ *  fk_db_schema_name: utf8,
+ *  fk_table_name: utf8 not null,
+ *  fk_column_name: utf8 not null,
+ *  key_sequence: int32 not null,
+ *  fk_key_name: utf8,
+ *  pk_key_name: utf8,
+ *  update_rule: uint8 not null,
+ *  delete_rule: uint8 not null
+ * >
+ * The returned data should be ordered by pk_catalog_name, pk_db_schema_name, pk_table_name, pk_key_name, then key_sequence.
+ * update_rule and delete_rule returns a byte that is equivalent to actions:
+ *    - 0 = CASCADE
+ *    - 1 = RESTRICT
+ *    - 2 = SET NULL
+ *    - 3 = NO ACTION
+ *    - 4 = SET DEFAULT
+ */
+message CommandGetImportedKeys {
+
+  /*
+   * Specifies the catalog to search for the primary key table.
+   * An empty string retrieves those without a catalog.
+   * If omitted the catalog name should not be used to narrow the search.
+   */
+  optional string catalog = 1;
+
+  /*
+   * Specifies the schema to search for the primary key table.
+   * An empty string retrieves those without a schema.
+   * If omitted the schema name should not be used to narrow the search.
+   */
+  optional string db_schema = 2;
+
+  // Specifies the primary key table to get the foreign keys for.
+  string table = 3;
+}
+
+/*
+ * Represents a request to retrieve a description of the foreign key columns in the given foreign key table that
+ * reference the primary key or the columns representing a unique constraint of the parent table (could be the same
+ * or a different table) on a Flight SQL enabled backend.
+ * Used in the command member of FlightDescriptor for the following RPC calls:
+ *  - GetSchema: return the Arrow schema of the query.
+ *  - GetFlightInfo: execute the catalog metadata request.
+ *
+ * The returned Arrow schema will be:
+ * <
+ *  pk_catalog_name: utf8,
+ *  pk_db_schema_name: utf8,
+ *  pk_table_name: utf8 not null,
+ *  pk_column_name: utf8 not null,
+ *  fk_catalog_name: utf8,
+ *  fk_db_schema_name: utf8,
+ *  fk_table_name: utf8 not null,
+ *  fk_column_name: utf8 not null,
+ *  key_sequence: int32 not null,
+ *  fk_key_name: utf8,
+ *  pk_key_name: utf8,
+ *  update_rule: uint8 not null,
+ *  delete_rule: uint8 not null
+ * >
+ * The returned data should be ordered by pk_catalog_name, pk_db_schema_name, pk_table_name, pk_key_name, then key_sequence.
+ * update_rule and delete_rule returns a byte that is equivalent to actions:
+ *    - 0 = CASCADE
+ *    - 1 = RESTRICT
+ *    - 2 = SET NULL
+ *    - 3 = NO ACTION
+ *    - 4 = SET DEFAULT
+ */
+message CommandGetCrossReference {
+
+  /**
+   * The catalog name where the parent table is.
+   * An empty string retrieves those without a catalog.
+   * If omitted the catalog name should not be used to narrow the search.
+   */
+  optional string pk_catalog = 1;
+
+  /**
+   * The Schema name where the parent table is.
+   * An empty string retrieves those without a schema.
+   * If omitted the schema name should not be used to narrow the search.
+   */
+  optional string pk_db_schema = 2;
+
+  /**
+   * The parent table name. It cannot be null.
+   */
+  string pk_table = 3;
+
+  /**
+   * The catalog name where the foreign table is.
+   * An empty string retrieves those without a catalog.
+   * If omitted the catalog name should not be used to narrow the search.
+   */
+  optional string fk_catalog = 4;
+
+  /**
+   * The schema name where the foreign table is.
+   * An empty string retrieves those without a schema.
+   * If omitted the schema name should not be used to narrow the search.
+   */
+  optional string fk_db_schema = 5;
+
+  /**
+   * The foreign table name. It cannot be null.
+   */
+  string fk_table = 6;
+}
+
+// Query Execution Action Messages
+
+/*
+ * Request message for the "CreatePreparedStatement" action on a Flight SQL enabled backend.
+ */
+message ActionCreatePreparedStatementRequest {
+
+  // The valid SQL string to create a prepared statement for.
+  string query = 1;
+  // Create/execute the prepared statement as part of this transaction (if
+  // unset, executions of the prepared statement will be auto-committed).
+  optional bytes transaction_id = 2;
+}
+
+/*
+ * An embedded message describing a Substrait plan to execute.
+ */
+message SubstraitPlan {
+
+  // The serialized substrait.Plan to create a prepared statement for.
+  // XXX(ARROW-16902): this is bytes instead of an embedded message
+  // because Protobuf does not really support one DLL using Protobuf
+  // definitions from another DLL.
+  bytes plan = 1;
+  // The Substrait release, e.g. "0.12.0". This information is not
+  // tracked in the plan itself, so this is the only way for consumers
+  // to potentially know if they can handle the plan.
+  string version = 2;
+}
+
+/*
+ * Request message for the "CreatePreparedSubstraitPlan" action on a Flight SQL enabled backend.
+ */
+message ActionCreatePreparedSubstraitPlanRequest {
+
+  // The serialized substrait.Plan to create a prepared statement for.
+  SubstraitPlan plan = 1;
+  // Create/execute the prepared statement as part of this transaction (if
+  // unset, executions of the prepared statement will be auto-committed).
+  optional bytes transaction_id = 2;
+}
+
+/*
+ * Wrap the result of a "CreatePreparedStatement" or "CreatePreparedSubstraitPlan" action.
+ *
+ * The resultant PreparedStatement can be closed either:
+ * - Manually, through the "ClosePreparedStatement" action;
+ * - Automatically, by a server timeout.
+ *
+ * The result should be wrapped in a google.protobuf.Any message.
+ */
+message ActionCreatePreparedStatementResult {
+
+  // Opaque handle for the prepared statement on the server.
+  bytes prepared_statement_handle = 1;
+
+  // If a result set generating query was provided, dataset_schema contains the
+  // schema of the result set.  It should be an IPC-encapsulated Schema, as described in Schema.fbs.
+  // For some queries, the schema of the results may depend on the schema of the parameters.  The server
+  // should provide its best guess as to the schema at this point.  Clients must not assume that this
+  // schema, if provided, will be accurate.
+  bytes dataset_schema = 2;
+
+  // If the query provided contained parameters, parameter_schema contains the
+  // schema of the expected parameters.  It should be an IPC-encapsulated Schema, as described in Schema.fbs.
+  bytes parameter_schema = 3;
+}
+
+/*
+ * Request message for the "ClosePreparedStatement" action on a Flight SQL enabled backend.
+ * Closes server resources associated with the prepared statement handle.
+ */
+message ActionClosePreparedStatementRequest {
+
+  // Opaque handle for the prepared statement on the server.
+  bytes prepared_statement_handle = 1;
+}
+
+/*
+ * Request message for the "BeginTransaction" action.
+ * Begins a transaction.
+ */
+message ActionBeginTransactionRequest {
+}
+
+/*
+ * Request message for the "BeginSavepoint" action.
+ * Creates a savepoint within a transaction.
+ *
+ * Only supported if FLIGHT_SQL_TRANSACTION is
+ * FLIGHT_SQL_TRANSACTION_SUPPORT_SAVEPOINT.
+ */
+message ActionBeginSavepointRequest {
+
+  // The transaction to which a savepoint belongs.
+  bytes transaction_id = 1;
+  // Name for the savepoint.
+  string name = 2;
+}
+
+/*
+ * The result of a "BeginTransaction" action.
+ *
+ * The transaction can be manipulated with the "EndTransaction" action, or
+ * automatically via server timeout. If the transaction times out, then it is
+ * automatically rolled back.
+ *
+ * The result should be wrapped in a google.protobuf.Any message.
+ */
+message ActionBeginTransactionResult {
+
+  // Opaque handle for the transaction on the server.
+  bytes transaction_id = 1;
+}
+
+/*
+ * The result of a "BeginSavepoint" action.
+ *
+ * The transaction can be manipulated with the "EndSavepoint" action.
+ * If the associated transaction is committed, rolled back, or times
+ * out, then the savepoint is also invalidated.
+ *
+ * The result should be wrapped in a google.protobuf.Any message.
+ */
+message ActionBeginSavepointResult {
+
+  // Opaque handle for the savepoint on the server.
+  bytes savepoint_id = 1;
+}
+
+/*
+ * Request message for the "EndTransaction" action.
+ *
+ * Commit (COMMIT) or rollback (ROLLBACK) the transaction.
+ *
+ * If the action completes successfully, the transaction handle is
+ * invalidated, as are all associated savepoints.
+ */
+message ActionEndTransactionRequest {
+
+  enum EndTransaction {
+    END_TRANSACTION_UNSPECIFIED = 0;
+    // Commit the transaction.
+    END_TRANSACTION_COMMIT = 1;
+    // Roll back the transaction.
+    END_TRANSACTION_ROLLBACK = 2;
+  }
+  // Opaque handle for the transaction on the server.
+  bytes transaction_id = 1;
+  // Whether to commit/rollback the given transaction.
+  EndTransaction action = 2;
+}
+
+/*
+ * Request message for the "EndSavepoint" action.
+ *
+ * Release (RELEASE) the savepoint or rollback (ROLLBACK) to the
+ * savepoint.
+ *
+ * Releasing a savepoint invalidates that savepoint.  Rolling back to
+ * a savepoint does not invalidate the savepoint, but invalidates all
+ * savepoints created after the current savepoint.
+ */
+message ActionEndSavepointRequest {
+
+  enum EndSavepoint {
+    END_SAVEPOINT_UNSPECIFIED = 0;
+    // Release the savepoint.
+    END_SAVEPOINT_RELEASE = 1;
+    // Roll back to a savepoint.
+    END_SAVEPOINT_ROLLBACK = 2;
+  }
+  // Opaque handle for the savepoint on the server.
+  bytes savepoint_id = 1;
+  // Whether to rollback/release the given savepoint.
+  EndSavepoint action = 2;
+}
+
+// Query Execution Messages.
+
+/*
+ * Represents a SQL query. Used in the command member of FlightDescriptor
+ * for the following RPC calls:
+ *  - GetSchema: return the Arrow schema of the query.
+ *    Fields on this schema may contain the following metadata:
+ *    - ARROW:FLIGHT:SQL:CATALOG_NAME      - Table's catalog name
+ *    - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME    - Database schema name
+ *    - ARROW:FLIGHT:SQL:TABLE_NAME        - Table name
+ *    - ARROW:FLIGHT:SQL:TYPE_NAME         - The data source-specific name for the data type of the column.
+ *    - ARROW:FLIGHT:SQL:PRECISION         - Column precision/size
+ *    - ARROW:FLIGHT:SQL:SCALE             - Column scale/decimal digits if applicable
+ *    - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise.
+ *    - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise.
+ *    - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
+ *    - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
+ *  - GetFlightInfo: execute the query.
+ */
+message CommandStatementQuery {
+
+  // The SQL syntax.
+  string query = 1;
+  // Include the query as part of this transaction (if unset, the query is auto-committed).
+  optional bytes transaction_id = 2;
+}
+
+/*
+ * Represents a Substrait plan. Used in the command member of FlightDescriptor
+ * for the following RPC calls:
+ *  - GetSchema: return the Arrow schema of the query.
+ *    Fields on this schema may contain the following metadata:
+ *    - ARROW:FLIGHT:SQL:CATALOG_NAME      - Table's catalog name
+ *    - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME    - Database schema name
+ *    - ARROW:FLIGHT:SQL:TABLE_NAME        - Table name
+ *    - ARROW:FLIGHT:SQL:TYPE_NAME         - The data source-specific name for the data type of the column.
+ *    - ARROW:FLIGHT:SQL:PRECISION         - Column precision/size
+ *    - ARROW:FLIGHT:SQL:SCALE             - Column scale/decimal digits if applicable
+ *    - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise.
+ *    - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise.
+ *    - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
+ *    - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
+ *  - GetFlightInfo: execute the query.
+ *  - DoPut: execute the query.
+ */
+message CommandStatementSubstraitPlan {
+
+  // A serialized substrait.Plan
+  SubstraitPlan plan = 1;
+  // Include the query as part of this transaction (if unset, the query is auto-committed).
+  optional bytes transaction_id = 2;
+}
+
+/**
+ * Represents a ticket resulting from GetFlightInfo with a CommandStatementQuery.
+ * This should be used only once and treated as an opaque value, that is, clients should not attempt to parse this.
+ */
+message TicketStatementQuery {
+
+  // Unique identifier for the instance of the statement to execute.
+  bytes statement_handle = 1;
+}
+
+/*
+ * Represents an instance of executing a prepared statement. Used in the command member of FlightDescriptor for
+ * the following RPC calls:
+ *  - GetSchema: return the Arrow schema of the query.
+ *    Fields on this schema may contain the following metadata:
+ *    - ARROW:FLIGHT:SQL:CATALOG_NAME      - Table's catalog name
+ *    - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME    - Database schema name
+ *    - ARROW:FLIGHT:SQL:TABLE_NAME        - Table name
+ *    - ARROW:FLIGHT:SQL:TYPE_NAME         - The data source-specific name for the data type of the column.
+ *    - ARROW:FLIGHT:SQL:PRECISION         - Column precision/size
+ *    - ARROW:FLIGHT:SQL:SCALE             - Column scale/decimal digits if applicable
+ *    - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise.
+ *    - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise.
+ *    - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
+ *    - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
+ *
+ *    If the schema is retrieved after parameter values have been bound with DoPut, then the server should account
+ *    for the parameters when determining the schema.
+ *  - DoPut: bind parameter values. All of the bound parameter sets will be executed as a single atomic execution.
+ *  - GetFlightInfo: execute the prepared statement instance.
+ */
+message CommandPreparedStatementQuery {
+
+  // Opaque handle for the prepared statement on the server.
+  bytes prepared_statement_handle = 1;
+}
+
+/*
+ * Represents a SQL update query. Used in the command member of FlightDescriptor
+ * for the RPC call DoPut to cause the server to execute the included SQL update.
+ */
+message CommandStatementUpdate {
+
+  // The SQL syntax.
+  string query = 1;
+  // Include the query as part of this transaction (if unset, the query is auto-committed).
+  optional bytes transaction_id = 2;
+}
+
+/*
+ * Represents a SQL update query. Used in the command member of FlightDescriptor
+ * for the RPC call DoPut to cause the server to execute the included
+ * prepared statement handle as an update.
+ */
+message CommandPreparedStatementUpdate {
+
+  // Opaque handle for the prepared statement on the server.
+  bytes prepared_statement_handle = 1;
+}
+
+/*
+ * Represents a bulk ingestion request. Used in the command member of FlightDescriptor
+ * for the the RPC call DoPut to cause the server load the contents of the stream's
+ * FlightData into the target destination.
+ */
+message CommandStatementIngest {
+
+  // Options for table definition behavior
+  message TableDefinitionOptions {
+    // The action to take if the target table does not exist
+    enum TableNotExistOption {
+      // Do not use. Servers should error if this is specified by a client.
+      TABLE_NOT_EXIST_OPTION_UNSPECIFIED = 0;
+      // Create the table if it does not exist
+      TABLE_NOT_EXIST_OPTION_CREATE = 1;
+      // Fail if the table does not exist
+      TABLE_NOT_EXIST_OPTION_FAIL = 2;
+    }
+    // The action to take if the target table already exists
+    enum TableExistsOption {
+      // Do not use. Servers should error if this is specified by a client.
+      TABLE_EXISTS_OPTION_UNSPECIFIED = 0;
+      // Fail if the table already exists
+      TABLE_EXISTS_OPTION_FAIL = 1;
+      // Append to the table if it already exists
+      TABLE_EXISTS_OPTION_APPEND = 2;
+      // Drop and recreate the table if it already exists
+      TABLE_EXISTS_OPTION_REPLACE = 3;
+    }
+
+    TableNotExistOption if_not_exist = 1;
+    TableExistsOption if_exists = 2;
+  }
+
+  // The behavior for handling the table definition.
+  TableDefinitionOptions table_definition_options = 1;
+  // The table to load data into.
+  string table = 2;
+  // The db_schema of the destination table to load data into. If unset, a backend-specific default may be used.
+  optional string schema = 3;
+  // The catalog of the destination table to load data into. If unset, a backend-specific default may be used.
+  optional string catalog = 4;
+  /*
+   * Store ingested data in a temporary table.
+   * The effect of setting temporary is to place the table in a backend-defined namespace, and to drop the table at the end of the session.
+   * The namespacing may make use of a backend-specific schema and/or catalog.
+   * The server should return an error if an explicit choice of schema or catalog is incompatible with the server's namespacing decision.
+  */
+  bool temporary = 5;
+  // Perform the ingestion as part of this transaction. If specified, results should not be committed in the event of an error/cancellation.
+  optional bytes transaction_id = 6;
+
+  // Future extensions to the parameters of CommandStatementIngest should be added here, at a lower index than the generic 'options' parameter.
+
+  // Backend-specific options.
+  map<string, string> options = 1000;
+}
+
+/*
+ * Returned from the RPC call DoPut when a CommandStatementUpdate,
+ * CommandPreparedStatementUpdate, or CommandStatementIngest was
+ * in the request, containing results from the update.
+ */
+message DoPutUpdateResult {
+
+  // The number of records updated. A return value of -1 represents
+  // an unknown updated record count.
+  int64 record_count = 1;
+}
+
+/* An *optional* response returned when `DoPut` is called with `CommandPreparedStatementQuery`.
+ *
+ * *Note on legacy behavior*: previous versions of the protocol did not return any result for
+ * this command, and that behavior should still be supported by clients. In that case, the client
+ * can continue as though the fields in this message were not provided or set to sensible default values.
+ */
+message DoPutPreparedStatementResult {
+
+  // Represents a (potentially updated) opaque handle for the prepared statement on the server.
+  // Because the handle could potentially be updated, any previous handles for this prepared
+  // statement should be considered invalid, and all subsequent requests for this prepared
+  // statement must use this new handle.
+  // The updated handle allows implementing query parameters with stateless services.
+  // 
+  // When an updated handle is not provided by the server, clients should contiue
+  // using the previous handle provided by `ActionCreatePreparedStatementResonse`.
+  optional bytes prepared_statement_handle = 1;
+}
+
+/*
+ * Request message for the "CancelQuery" action.
+ *
+ * Explicitly cancel a running query.
+ *
+ * This lets a single client explicitly cancel work, no matter how many clients
+ * are involved/whether the query is distributed or not, given server support.
+ * The transaction/statement is not rolled back; it is the application's job to
+ * commit or rollback as appropriate. This only indicates the client no longer
+ * wishes to read the remainder of the query results or continue submitting
+ * data.
+ *
+ * This command is idempotent.
+ *
+ * This command is deprecated since 13.0.0. Use the "CancelFlightInfo"
+ * action with DoAction instead.
+ */
+message ActionCancelQueryRequest {
+  option deprecated = true;
+
+  // The result of the GetFlightInfo RPC that initiated the query.
+  // XXX(ARROW-16902): this must be a serialized FlightInfo, but is
+  // rendered as bytes because Protobuf does not really support one
+  // DLL using Protobuf definitions from another DLL.
+  bytes info = 1;
+}
+
+/*
+ * The result of cancelling a query.
+ *
+ * The result should be wrapped in a google.protobuf.Any message.
+ *
+ * This command is deprecated since 13.0.0. Use the "CancelFlightInfo"
+ * action with DoAction instead.
+ */
+message ActionCancelQueryResult {
+  option deprecated = true;
+
+  enum CancelResult {
+    // The cancellation status is unknown. Servers should avoid using
+    // this value (send a NOT_FOUND error if the requested query is
+    // not known). Clients can retry the request.
+    CANCEL_RESULT_UNSPECIFIED = 0;
+    // The cancellation request is complete. Subsequent requests with
+    // the same payload may return CANCELLED or a NOT_FOUND error.
+    CANCEL_RESULT_CANCELLED = 1;
+    // The cancellation request is in progress. The client may retry
+    // the cancellation request.
+    CANCEL_RESULT_CANCELLING = 2;
+    // The query is not cancellable. The client should not retry the
+    // cancellation request.
+    CANCEL_RESULT_NOT_CANCELLABLE = 3;
+  }
+
+  CancelResult result = 1;
+}
+
+extend google.protobuf.MessageOptions {
+  bool experimental = 1000;
+}
diff --git a/arrow-format/Message.fbs b/arrow-format/Message.fbs
new file mode 100644
index 000000000..be57533d8
--- /dev/null
+++ b/arrow-format/Message.fbs
@@ -0,0 +1,157 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+include "Schema.fbs";
+include "SparseTensor.fbs";
+include "Tensor.fbs";
+
+namespace org.apache.arrow.flatbuf;
+
+/// ----------------------------------------------------------------------
+/// Data structures for describing a table row batch (a collection of
+/// equal-length Arrow arrays)
+
+/// Metadata about a field at some level of a nested type tree (but not
+/// its children).
+///
+/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
+/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
+/// null_count: 0} for its Int16 node, as separate FieldNode structs
+struct FieldNode {
+  /// The number of value slots in the Arrow array at this level of a nested
+  /// tree
+  length: long;
+
+  /// The number of observed nulls. Fields with null_count == 0 may choose not
+  /// to write their physical validity bitmap out as a materialized buffer,
+  /// instead setting the length of the bitmap buffer to 0.
+  null_count: long;
+}
+
+enum CompressionType:byte {
+  // LZ4 frame format, for portability, as provided by lz4frame.h or wrappers
+  // thereof. Not to be confused with "raw" (also called "block") format
+  // provided by lz4.h
+  LZ4_FRAME,
+
+  // Zstandard
+  ZSTD
+}
+
+/// Provided for forward compatibility in case we need to support different
+/// strategies for compressing the IPC message body (like whole-body
+/// compression rather than buffer-level) in the future
+enum BodyCompressionMethod:byte {
+  /// Each constituent buffer is first compressed with the indicated
+  /// compressor, and then written with the uncompressed length in the first 8
+  /// bytes as a 64-bit little-endian signed integer followed by the compressed
+  /// buffer bytes (and then padding as required by the protocol). The
+  /// uncompressed length may be set to -1 to indicate that the data that
+  /// follows is not compressed, which can be useful for cases where
+  /// compression does not yield appreciable savings.
+  BUFFER
+}
+
+/// Optional compression for the memory buffers constituting IPC message
+/// bodies. Intended for use with RecordBatch but could be used for other
+/// message types
+table BodyCompression {
+  /// Compressor library.
+  /// For LZ4_FRAME, each compressed buffer must consist of a single frame.
+  codec: CompressionType = LZ4_FRAME;
+
+  /// Indicates the way the record batch body was compressed
+  method: BodyCompressionMethod = BUFFER;
+}
+
+/// A data header describing the shared memory layout of a "record" or "row"
+/// batch. Some systems call this a "row batch" internally and others a "record
+/// batch".
+table RecordBatch {
+  /// number of records / rows. The arrays in the batch should all have this
+  /// length
+  length: long;
+
+  /// Nodes correspond to the pre-ordered flattened logical schema
+  nodes: [FieldNode];
+
+  /// Buffers correspond to the pre-ordered flattened buffer tree
+  ///
+  /// The number of buffers appended to this list depends on the schema. For
+  /// example, most primitive arrays will have 2 buffers, 1 for the validity
+  /// bitmap and 1 for the values. For struct arrays, there will only be a
+  /// single buffer for the validity (nulls) bitmap
+  buffers: [Buffer];
+
+  /// Optional compression of the message body
+  compression: BodyCompression;
+
+  /// Some types such as Utf8View are represented using a variable number of buffers.
+  /// For each such Field in the pre-ordered flattened logical schema, there will be
+  /// an entry in variadicBufferCounts to indicate the number of number of variadic
+  /// buffers which belong to that Field in the current RecordBatch.
+  ///
+  /// For example, the schema
+  ///     col1: Struct<alpha: Int32, beta: BinaryView, gamma: Float64>
+  ///     col2: Utf8View
+  /// contains two Fields with variadic buffers so variadicBufferCounts will have
+  /// two entries, the first counting the variadic buffers of `col1.beta` and the
+  /// second counting `col2`'s.
+  ///
+  /// This field may be omitted if and only if the schema contains no Fields with
+  /// a variable number of buffers, such as BinaryView and Utf8View.
+  variadicBufferCounts: [long];
+}
+
+/// For sending dictionary encoding information. Any Field can be
+/// dictionary-encoded, but in this case none of its children may be
+/// dictionary-encoded.
+/// There is one vector / column per dictionary, but that vector / column
+/// may be spread across multiple dictionary batches by using the isDelta
+/// flag
+
+table DictionaryBatch {
+  id: long;
+  data: RecordBatch;
+
+  /// If isDelta is true the values in the dictionary are to be appended to a
+  /// dictionary with the indicated id. If isDelta is false this dictionary
+  /// should replace the existing dictionary.
+  isDelta: bool = false;
+}
+
+/// ----------------------------------------------------------------------
+/// The root Message type
+
+/// This union enables us to easily send different message types without
+/// redundant storage, and in the future we can easily add new message types.
+///
+/// Arrow implementations do not need to implement all of the message types,
+/// which may include experimental metadata types. For maximum compatibility,
+/// it is best to send data using RecordBatch
+union MessageHeader {
+  Schema, DictionaryBatch, RecordBatch, Tensor, SparseTensor
+}
+
+table Message {
+  version: org.apache.arrow.flatbuf.MetadataVersion;
+  header: MessageHeader;
+  bodyLength: long;
+  custom_metadata: [ KeyValue ];
+}
+
+root_type Message;
diff --git a/arrow-format/README.rst b/arrow-format/README.rst
new file mode 100644
index 000000000..0eaad49b7
--- /dev/null
+++ b/arrow-format/README.rst
@@ -0,0 +1,25 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Arrow Protocol Files
+====================
+
+This folder contains binary protocol definitions for the Arrow columnar format
+and other parts of the project, like the Flight RPC framework.
+
+For documentation about the Arrow format, see the `docs/source/format`
+directory.
diff --git a/arrow-format/Schema.fbs b/arrow-format/Schema.fbs
new file mode 100644
index 000000000..e8e14b112
--- /dev/null
+++ b/arrow-format/Schema.fbs
@@ -0,0 +1,571 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Logical types, vector layouts, and schemas
+
+/// Format Version History.
+/// Version 1.0 - Forward and backwards compatibility guaranteed.
+/// Version 1.1 - Add Decimal256.
+/// Version 1.2 - Add Interval MONTH_DAY_NANO.
+/// Version 1.3 - Add Run-End Encoded.
+/// Version 1.4 - Add BinaryView, Utf8View, variadicBufferCounts, ListView, and
+/// LargeListView.
+/// Version 1.5 - Add 32-bit and 64-bit as allowed bit widths for Decimal
+
+namespace org.apache.arrow.flatbuf;
+
+enum MetadataVersion:short {
+  /// 0.1.0 (October 2016).
+  V1,
+
+  /// 0.2.0 (February 2017). Non-backwards compatible with V1.
+  V2,
+
+  /// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
+  V3,
+
+  /// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
+  V4,
+
+  /// >= 1.0.0 (July 2020). Backwards compatible with V4 (V5 readers can read V4
+  /// metadata and IPC messages). Implementations are recommended to provide a
+  /// V4 compatibility mode with V5 format changes disabled.
+  ///
+  /// Incompatible changes between V4 and V5:
+  /// - Union buffer layout has changed. In V5, Unions don't have a validity
+  ///   bitmap buffer.
+  V5,
+}
+
+/// Represents Arrow Features that might not have full support
+/// within implementations. This is intended to be used in
+/// two scenarios:
+///  1.  A mechanism for readers of Arrow Streams
+///      and files to understand that the stream or file makes
+///      use of a feature that isn't supported or unknown to
+///      the implementation (and therefore can meet the Arrow
+///      forward compatibility guarantees).
+///  2.  A means of negotiating between a client and server
+///      what features a stream is allowed to use. The enums
+///      values here are intented to represent higher level
+///      features, additional details maybe negotiated
+///      with key-value pairs specific to the protocol.
+///
+/// Enums added to this list should be assigned power-of-two values
+/// to facilitate exchanging and comparing bitmaps for supported
+/// features.
+enum Feature : long {
+  /// Needed to make flatbuffers happy.
+  UNUSED = 0,
+  /// The stream makes use of multiple full dictionaries with the
+  /// same ID and assumes clients implement dictionary replacement
+  /// correctly.
+  DICTIONARY_REPLACEMENT = 1,
+  /// The stream makes use of compressed bodies as described
+  /// in Message.fbs.
+  COMPRESSED_BODY = 2
+}
+
+/// These are stored in the flatbuffer in the Type union below
+
+table Null {
+}
+
+/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
+/// (according to the physical memory layout). We used Struct_ here as
+/// Struct is a reserved word in Flatbuffers
+table Struct_ {
+}
+
+table List {
+}
+
+/// Same as List, but with 64-bit offsets, allowing to represent
+/// extremely large data values.
+table LargeList {
+}
+
+/// Represents the same logical types that List can, but contains offsets and
+/// sizes allowing for writes in any order and sharing of child values among
+/// list values.
+table ListView {
+}
+
+/// Same as ListView, but with 64-bit offsets and sizes, allowing to represent
+/// extremely large data values.
+table LargeListView {
+}
+
+table FixedSizeList {
+  /// Number of list items per value
+  listSize: int;
+}
+
+/// A Map is a logical nested type that is represented as
+///
+/// List<entries: Struct<key: K, value: V>>
+///
+/// In this layout, the keys and values are each respectively contiguous. We do
+/// not constrain the key and value types, so the application is responsible
+/// for ensuring that the keys are hashable and unique. Whether the keys are sorted
+/// may be set in the metadata for this field.
+///
+/// In a field with Map type, the field has a child Struct field, which then
+/// has two children: key type and the second the value type. The names of the
+/// child fields may be respectively "entries", "key", and "value", but this is
+/// not enforced.
+///
+/// Map
+/// ```text
+///   - child[0] entries: Struct
+///     - child[0] key: K
+///     - child[1] value: V
+/// ```
+/// Neither the "entries" field nor the "key" field may be nullable.
+///
+/// The metadata is structured so that Arrow systems without special handling
+/// for Map can make Map an alias for List. The "layout" attribute for the Map
+/// field must have the same contents as a List.
+table Map {
+  /// Set to true if the keys within each value are sorted
+  keysSorted: bool;
+}
+
+enum UnionMode:short { Sparse, Dense }
+
+/// A union is a complex type with children in Field
+/// By default ids in the type vector refer to the offsets in the children
+/// optionally typeIds provides an indirection between the child offset and the type id
+/// for each child `typeIds[offset]` is the id used in the type vector
+table Union {
+  mode: UnionMode;
+  typeIds: [ int ]; // optional, describes typeid of each child.
+}
+
+table Int {
+  bitWidth: int; // restricted to 8, 16, 32, and 64 in v1
+  is_signed: bool;
+}
+
+enum Precision:short {HALF, SINGLE, DOUBLE}
+
+table FloatingPoint {
+  precision: Precision;
+}
+
+/// Unicode with UTF-8 encoding
+table Utf8 {
+}
+
+/// Opaque binary data
+table Binary {
+}
+
+/// Same as Utf8, but with 64-bit offsets, allowing to represent
+/// extremely large data values.
+table LargeUtf8 {
+}
+
+/// Same as Binary, but with 64-bit offsets, allowing to represent
+/// extremely large data values.
+table LargeBinary {
+}
+
+/// Logically the same as Utf8, but the internal representation uses a view
+/// struct that contains the string length and either the string's entire data
+/// inline (for small strings) or an inlined prefix, an index of another buffer,
+/// and an offset pointing to a slice in that buffer (for non-small strings).
+///
+/// Since it uses a variable number of data buffers, each Field with this type
+/// must have a corresponding entry in `variadicBufferCounts`.
+table Utf8View {
+}
+
+/// Logically the same as Binary, but the internal representation uses a view
+/// struct that contains the string length and either the string's entire data
+/// inline (for small strings) or an inlined prefix, an index of another buffer,
+/// and an offset pointing to a slice in that buffer (for non-small strings).
+///
+/// Since it uses a variable number of data buffers, each Field with this type
+/// must have a corresponding entry in `variadicBufferCounts`.
+table BinaryView {
+}
+
+
+table FixedSizeBinary {
+  /// Number of bytes per value
+  byteWidth: int;
+}
+
+table Bool {
+}
+
+/// Contains two child arrays, run_ends and values.
+/// The run_ends child array must be a 16/32/64-bit integer array
+/// which encodes the indices at which the run with the value in 
+/// each corresponding index in the values child array ends.
+/// Like list/struct types, the value array can be of any type.
+table RunEndEncoded {
+}
+
+/// Exact decimal value represented as an integer value in two's
+/// complement. Currently 32-bit (4-byte), 64-bit (8-byte), 
+/// 128-bit (16-byte) and 256-bit (32-byte) integers are used.
+/// The representation uses the endianness indicated in the Schema.
+table Decimal {
+  /// Total number of decimal digits
+  precision: int;
+
+  /// Number of digits after the decimal point "."
+  scale: int;
+
+  /// Number of bits per value. The accepted widths are 32, 64, 128 and 256.
+  /// We use bitWidth for consistency with Int::bitWidth.
+  bitWidth: int = 128;
+}
+
+enum DateUnit: short {
+  DAY,
+  MILLISECOND
+}
+
+/// Date is either a 32-bit or 64-bit signed integer type representing an
+/// elapsed time since UNIX epoch (1970-01-01), stored in either of two units:
+///
+/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
+///   leap seconds), where the values are evenly divisible by 86400000
+/// * Days (32 bits) since the UNIX epoch
+table Date {
+  unit: DateUnit = MILLISECOND;
+}
+
+enum TimeUnit: short { SECOND, MILLISECOND, MICROSECOND, NANOSECOND }
+
+/// Time is either a 32-bit or 64-bit signed integer type representing an
+/// elapsed time since midnight, stored in either of four units: seconds,
+/// milliseconds, microseconds or nanoseconds.
+///
+/// The integer `bitWidth` depends on the `unit` and must be one of the following:
+/// * SECOND and MILLISECOND: 32 bits
+/// * MICROSECOND and NANOSECOND: 64 bits
+///
+/// The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds
+/// (exclusive), adjusted for the time unit (for example, up to 86400000
+/// exclusive for the MILLISECOND unit).
+/// This definition doesn't allow for leap seconds. Time values from
+/// measurements with leap seconds will need to be corrected when ingesting
+/// into Arrow (for example by replacing the value 86400 with 86399).
+table Time {
+  unit: TimeUnit = MILLISECOND;
+  bitWidth: int = 32;
+}
+
+/// Timestamp is a 64-bit signed integer representing an elapsed time since a
+/// fixed epoch, stored in either of four units: seconds, milliseconds,
+/// microseconds or nanoseconds, and is optionally annotated with a timezone.
+///
+/// Timestamp values do not include any leap seconds (in other words, all
+/// days are considered 86400 seconds long).
+///
+/// Timestamps with a non-empty timezone
+/// ------------------------------------
+///
+/// If a Timestamp column has a non-empty timezone value, its epoch is
+/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone
+/// (the Unix epoch), regardless of the Timestamp's own timezone.
+///
+/// Therefore, timestamp values with a non-empty timezone correspond to
+/// physical points in time together with some additional information about
+/// how the data was obtained and/or how to display it (the timezone).
+///
+///   For example, the timestamp value 0 with the timezone string "Europe/Paris"
+///   corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the
+///   application may prefer to display it as "January 1st 1970, 01h00" in
+///   the Europe/Paris timezone (which is the same physical point in time).
+///
+/// One consequence is that timestamp values with a non-empty timezone
+/// can be compared and ordered directly, since they all share the same
+/// well-known point of reference (the Unix epoch).
+///
+/// Timestamps with an unset / empty timezone
+/// -----------------------------------------
+///
+/// If a Timestamp column has no timezone value, its epoch is
+/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone.
+///
+/// Therefore, timestamp values without a timezone cannot be meaningfully
+/// interpreted as physical points in time, but only as calendar / clock
+/// indications ("wall clock time") in an unspecified timezone.
+///
+///   For example, the timestamp value 0 with an empty timezone string
+///   corresponds to "January 1st 1970, 00h00" in an unknown timezone: there
+///   is not enough information to interpret it as a well-defined physical
+///   point in time.
+///
+/// One consequence is that timestamp values without a timezone cannot
+/// be reliably compared or ordered, since they may have different points of
+/// reference.  In particular, it is *not* possible to interpret an unset
+/// or empty timezone as the same as "UTC".
+///
+/// Conversion between timezones
+/// ----------------------------
+///
+/// If a Timestamp column has a non-empty timezone, changing the timezone
+/// to a different non-empty value is a metadata-only operation:
+/// the timestamp values need not change as their point of reference remains
+/// the same (the Unix epoch).
+///
+/// However, if a Timestamp column has no timezone value, changing it to a
+/// non-empty value requires to think about the desired semantics.
+/// One possibility is to assume that the original timestamp values are
+/// relative to the epoch of the timezone being set; timestamp values should
+/// then adjusted to the Unix epoch (for example, changing the timezone from
+/// empty to "Europe/Paris" would require converting the timestamp values
+/// from "Europe/Paris" to "UTC", which seems counter-intuitive but is
+/// nevertheless correct).
+///
+/// Guidelines for encoding data from external libraries
+/// ----------------------------------------------------
+///
+/// Date & time libraries often have multiple different data types for temporal
+/// data. In order to ease interoperability between different implementations the
+/// Arrow project has some recommendations for encoding these types into a Timestamp
+/// column.
+///
+/// An "instant" represents a physical point in time that has no relevant timezone
+/// (for example, astronomical data). To encode an instant, use a Timestamp with
+/// the timezone string set to "UTC", and make sure the Timestamp values
+/// are relative to the UTC epoch (January 1st 1970, midnight).
+///
+/// A "zoned date-time" represents a physical point in time annotated with an
+/// informative timezone (for example, the timezone in which the data was
+/// recorded).  To encode a zoned date-time, use a Timestamp with the timezone
+/// string set to the name of the timezone, and make sure the Timestamp values
+/// are relative to the UTC epoch (January 1st 1970, midnight).
+///
+///  (There is some ambiguity between an instant and a zoned date-time with the
+///   UTC timezone.  Both of these are stored the same in Arrow.  Typically,
+///   this distinction does not matter.  If it does, then an application should
+///   use custom metadata or an extension type to distinguish between the two cases.)
+///
+/// An "offset date-time" represents a physical point in time combined with an
+/// explicit offset from UTC.  To encode an offset date-time, use a Timestamp
+/// with the timezone string set to the numeric timezone offset string
+/// (e.g. "+03:00"), and make sure the Timestamp values are relative to
+/// the UTC epoch (January 1st 1970, midnight).
+///
+/// A "naive date-time" (also called "local date-time" in some libraries)
+/// represents a wall clock time combined with a calendar date, but with
+/// no indication of how to map this information to a physical point in time.
+/// Naive date-times must be handled with care because of this missing
+/// information, and also because daylight saving time (DST) may make
+/// some values ambiguous or nonexistent. A naive date-time may be
+/// stored as a struct with Date and Time fields. However, it may also be
+/// encoded into a Timestamp column with an empty timezone. The timestamp
+/// values should be computed "as if" the timezone of the date-time values
+/// was UTC; for example, the naive date-time "January 1st 1970, 00h00" would
+/// be encoded as timestamp value 0.
+table Timestamp {
+  unit: TimeUnit;
+
+  /// The timezone is an optional string indicating the name of a timezone,
+  /// one of:
+  ///
+  /// * As used in the Olson timezone database (the "tz database" or
+  ///   "tzdata"), such as "America/New_York".
+  /// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX",
+  ///   such as "+07:30".
+  ///
+  /// Whether a timezone string is present indicates different semantics about
+  /// the data (see above).
+  timezone: string;
+}
+
+enum IntervalUnit: short { YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO}
+// A "calendar" interval which models types that don't necessarily
+// have a precise duration without the context of a base timestamp (e.g.
+// days can differ in length during day light savings time transitions).
+// All integers in the types below are stored in the endianness indicated
+// by the schema.
+//
+// YEAR_MONTH - Indicates the number of elapsed whole months, stored as
+//   4-byte signed integers.
+// DAY_TIME - Indicates the number of elapsed days and milliseconds (no leap seconds),
+//   stored as 2 contiguous 32-bit signed integers (8-bytes in total). Support
+//   of this IntervalUnit is not required for full arrow compatibility.
+// MONTH_DAY_NANO - A triple of the number of elapsed months, days, and nanoseconds.
+//  The values are stored contiguously in 16-byte blocks. Months and days are
+//  encoded as 32-bit signed integers and nanoseconds is encoded as a 64-bit
+//  signed integer. Nanoseconds does not allow for leap seconds. Each field is
+//  independent (e.g. there is no constraint that nanoseconds have the same
+//  sign as days or that the quantity of nanoseconds represents less than a
+//  day's worth of time).
+table Interval {
+  unit: IntervalUnit;
+}
+
+// An absolute length of time unrelated to any calendar artifacts.
+//
+// For the purposes of Arrow Implementations, adding this value to a Timestamp
+// ("t1") naively (i.e. simply summing the two number) is acceptable even
+// though in some cases the resulting Timestamp (t2) would not account for
+// leap-seconds during the elapsed time between "t1" and "t2".  Similarly,
+// representing the difference between two Unix timestamp is acceptable, but
+// would yield a value that is possibly a few seconds off from the true elapsed
+// time.
+//
+//  The resolution defaults to millisecond, but can be any of the other
+//  supported TimeUnit values as with Timestamp and Time types.  This type is
+//  always represented as an 8-byte integer.
+table Duration {
+  unit: TimeUnit = MILLISECOND;
+}
+
+/// ----------------------------------------------------------------------
+/// Top-level Type value, enabling extensible type-specific metadata. We can
+/// add new logical types to Type without breaking backwards compatibility
+
+union Type {
+  Null,
+  Int,
+  FloatingPoint,
+  Binary,
+  Utf8,
+  Bool,
+  Decimal,
+  Date,
+  Time,
+  Timestamp,
+  Interval,
+  List,
+  Struct_,
+  Union,
+  FixedSizeBinary,
+  FixedSizeList,
+  Map,
+  Duration,
+  LargeBinary,
+  LargeUtf8,
+  LargeList,
+  RunEndEncoded,
+  BinaryView,
+  Utf8View,
+  ListView,
+  LargeListView,
+}
+
+/// ----------------------------------------------------------------------
+/// user defined key value pairs to add custom metadata to arrow
+/// key namespacing is the responsibility of the user
+
+table KeyValue {
+  key: string;
+  value: string;
+}
+
+/// ----------------------------------------------------------------------
+/// Dictionary encoding metadata
+/// Maintained for forwards compatibility, in the future
+/// Dictionaries might be explicit maps between integers and values
+/// allowing for non-contiguous index values
+enum DictionaryKind : short { DenseArray }
+table DictionaryEncoding {
+  /// The known dictionary id in the application where this data is used. In
+  /// the file or streaming formats, the dictionary ids are found in the
+  /// DictionaryBatch messages
+  id: long;
+
+  /// The dictionary indices are constrained to be non-negative integers. If
+  /// this field is null, the indices must be signed int32. To maximize
+  /// cross-language compatibility and performance, implementations are
+  /// recommended to prefer signed integer types over unsigned integer types
+  /// and to avoid uint64 indices unless they are required by an application.
+  indexType: Int;
+
+  /// By default, dictionaries are not ordered, or the order does not have
+  /// semantic meaning. In some statistical, applications, dictionary-encoding
+  /// is used to represent ordered categorical data, and we provide a way to
+  /// preserve that metadata here
+  isOrdered: bool;
+
+  dictionaryKind: DictionaryKind;
+}
+
+/// ----------------------------------------------------------------------
+/// A field represents a named column in a record / row batch or child of a
+/// nested type.
+
+table Field {
+  /// Name is not required, in i.e. a List
+  name: string;
+
+  /// Whether or not this field can contain nulls. Should be true in general.
+  nullable: bool;
+
+  /// This is the type of the decoded value if the field is dictionary encoded.
+  type: Type;
+
+  /// Present only if the field is dictionary encoded.
+  dictionary: DictionaryEncoding;
+
+  /// children apply only to nested data types like Struct, List and Union. For
+  /// primitive types children will have length 0.
+  children: [ Field ];
+
+  /// User-defined metadata
+  custom_metadata: [ KeyValue ];
+}
+
+/// ----------------------------------------------------------------------
+/// Endianness of the platform producing the data
+
+enum Endianness:short { Little, Big }
+
+/// ----------------------------------------------------------------------
+/// A Buffer represents a single contiguous memory segment
+struct Buffer {
+  /// The relative offset into the shared memory page where the bytes for this
+  /// buffer starts
+  offset: long;
+
+  /// The absolute length (in bytes) of the memory buffer. The memory is found
+  /// from offset (inclusive) to offset + length (non-inclusive). When building
+  /// messages using the encapsulated IPC message, padding bytes may be written
+  /// after a buffer, but such padding bytes do not need to be accounted for in
+  /// the size here.
+  length: long;
+}
+
+/// ----------------------------------------------------------------------
+/// A Schema describes the columns in a row batch
+
+table Schema {
+
+  /// endianness of the buffer
+  /// it is Little Endian by default
+  /// if endianness doesn't match the underlying system then the vectors need to be converted
+  endianness: Endianness=Little;
+
+  fields: [Field];
+  // User-defined metadata
+  custom_metadata: [ KeyValue ];
+
+  /// Features used in the stream/file.
+  features : [ Feature ];
+}
+
+root_type Schema;
diff --git a/arrow-format/SparseTensor.fbs b/arrow-format/SparseTensor.fbs
new file mode 100644
index 000000000..a6fd2f9e7
--- /dev/null
+++ b/arrow-format/SparseTensor.fbs
@@ -0,0 +1,228 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// EXPERIMENTAL: Metadata for n-dimensional sparse arrays, aka "sparse tensors".
+/// Arrow implementations in general are not required to implement this type
+
+include "Tensor.fbs";
+
+namespace org.apache.arrow.flatbuf;
+
+/// ----------------------------------------------------------------------
+/// EXPERIMENTAL: Data structures for sparse tensors
+
+/// Coordinate (COO) format of sparse tensor index.
+///
+/// COO's index list are represented as a NxM matrix,
+/// where N is the number of non-zero values,
+/// and M is the number of dimensions of a sparse tensor.
+///
+/// indicesBuffer stores the location and size of the data of this indices
+/// matrix.  The value type and the stride of the indices matrix is
+/// specified in indicesType and indicesStrides fields.
+///
+/// For example, let X be a 2x3x4x5 tensor, and it has the following
+/// 6 non-zero values:
+/// ```text
+///   X[0, 1, 2, 0] := 1
+///   X[1, 1, 2, 3] := 2
+///   X[0, 2, 1, 0] := 3
+///   X[0, 1, 3, 0] := 4
+///   X[0, 1, 2, 1] := 5
+///   X[1, 2, 0, 4] := 6
+/// ```
+/// In COO format, the index matrix of X is the following 4x6 matrix:
+/// ```text
+///   [[0, 0, 0, 0, 1, 1],
+///    [1, 1, 1, 2, 1, 2],
+///    [2, 2, 3, 1, 2, 0],
+///    [0, 1, 0, 0, 3, 4]]
+/// ```
+/// When isCanonical is true, the indices is sorted in lexicographical order
+/// (row-major order), and it does not have duplicated entries.  Otherwise,
+/// the indices may not be sorted, or may have duplicated entries.
+table SparseTensorIndexCOO {
+  /// The type of values in indicesBuffer
+  indicesType: Int (required);
+
+  /// Non-negative byte offsets to advance one value cell along each dimension
+  /// If omitted, default to row-major order (C-like).
+  indicesStrides: [long];
+
+  /// The location and size of the indices matrix's data
+  indicesBuffer: Buffer (required);
+
+  /// This flag is true if and only if the indices matrix is sorted in
+  /// row-major order, and does not have duplicated entries.
+  /// This sort order is the same as of Tensorflow's SparseTensor,
+  /// but it is inverse order of SciPy's canonical coo_matrix
+  /// (SciPy employs column-major order for its coo_matrix).
+  isCanonical: bool;
+}
+
+enum SparseMatrixCompressedAxis: short { Row, Column }
+
+/// Compressed Sparse format, that is matrix-specific.
+table SparseMatrixIndexCSX {
+  /// Which axis, row or column, is compressed
+  compressedAxis: SparseMatrixCompressedAxis;
+
+  /// The type of values in indptrBuffer
+  indptrType: Int (required);
+
+  /// indptrBuffer stores the location and size of indptr array that
+  /// represents the range of the rows.
+  /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
+  /// The length of this array is 1 + (the number of rows), and the type
+  /// of index value is long.
+  ///
+  /// For example, let X be the following 6x4 matrix:
+  /// ```text
+  ///   X := [[0, 1, 2, 0],
+  ///         [0, 0, 3, 0],
+  ///         [0, 4, 0, 5],
+  ///         [0, 0, 0, 0],
+  ///         [6, 0, 7, 8],
+  ///         [0, 9, 0, 0]].
+  /// ```
+  /// The array of non-zero values in X is:
+  /// ```text
+  ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
+  /// ```
+  /// And the indptr of X is:
+  /// ```text
+  ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
+  /// ```
+  indptrBuffer: Buffer (required);
+
+  /// The type of values in indicesBuffer
+  indicesType: Int (required);
+
+  /// indicesBuffer stores the location and size of the array that
+  /// contains the column indices of the corresponding non-zero values.
+  /// The type of index value is long.
+  ///
+  /// For example, the indices of the above X is:
+  /// ```text
+  ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
+  /// ```
+  /// Note that the indices are sorted in lexicographical order for each row.
+  indicesBuffer: Buffer (required);
+}
+
+/// Compressed Sparse Fiber (CSF) sparse tensor index.
+table SparseTensorIndexCSF {
+  /// CSF is a generalization of compressed sparse row (CSR) index.
+  /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
+  ///
+  /// CSF index recursively compresses each dimension of a tensor into a set
+  /// of prefix trees. Each path from a root to leaf forms one tensor
+  /// non-zero index. CSF is implemented with two arrays of buffers and one
+  /// arrays of integers.
+  ///
+  /// For example, let X be a 2x3x4x5 tensor and let it have the following
+  /// 8 non-zero values:
+  /// ```text
+  ///   X[0, 0, 0, 1] := 1
+  ///   X[0, 0, 0, 2] := 2
+  ///   X[0, 1, 0, 0] := 3
+  ///   X[0, 1, 0, 2] := 4
+  ///   X[0, 1, 1, 0] := 5
+  ///   X[1, 1, 1, 0] := 6
+  ///   X[1, 1, 1, 1] := 7
+  ///   X[1, 1, 1, 2] := 8
+  /// ```
+  /// As a prefix tree this would be represented as:
+  /// ```text
+  ///         0          1
+  ///        / \         |
+  ///       0   1        1
+  ///      /   / \       |
+  ///     0   0   1      1
+  ///    /|  /|   |    /| |
+  ///   1 2 0 2   0   0 1 2
+  /// ```
+  /// The type of values in indptrBuffers
+  indptrType: Int (required);
+
+  /// indptrBuffers stores the sparsity structure.
+  /// Each two consecutive dimensions in a tensor correspond to a buffer in
+  /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
+  /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in
+  /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
+  ///
+  /// For example, the indptrBuffers for the above X is:
+  /// ```text
+  ///   indptrBuffer(X) = [
+  ///                       [0, 2, 3],
+  ///                       [0, 1, 3, 4],
+  ///                       [0, 2, 4, 5, 8]
+  ///                     ].
+  /// ```
+  indptrBuffers: [Buffer] (required);
+
+  /// The type of values in indicesBuffers
+  indicesType: Int (required);
+
+  /// indicesBuffers stores values of nodes.
+  /// Each tensor dimension corresponds to a buffer in indicesBuffers.
+  /// For example, the indicesBuffers for the above X is:
+  /// ```text
+  ///   indicesBuffer(X) = [
+  ///                        [0, 1],
+  ///                        [0, 1, 1],
+  ///                        [0, 0, 1, 1],
+  ///                        [1, 2, 0, 2, 0, 0, 1, 2]
+  ///                      ].
+  /// ```
+  indicesBuffers: [Buffer] (required);
+
+  /// axisOrder stores the sequence in which dimensions were traversed to
+  /// produce the prefix tree.
+  /// For example, the axisOrder for the above X is:
+  /// ```text
+  ///   axisOrder(X) = [0, 1, 2, 3].
+  /// ```
+  axisOrder: [int] (required);
+}
+
+union SparseTensorIndex {
+  SparseTensorIndexCOO,
+  SparseMatrixIndexCSX,
+  SparseTensorIndexCSF
+}
+
+table SparseTensor {
+  /// The type of data contained in a value cell.
+  /// Currently only fixed-width value types are supported,
+  /// no strings or nested types.
+  type: Type (required);
+
+  /// The dimensions of the tensor, optionally named.
+  shape: [TensorDim] (required);
+
+  /// The number of non-zero values in a sparse tensor.
+  non_zero_length: long;
+
+  /// Sparse tensor index
+  sparseIndex: SparseTensorIndex (required);
+
+  /// The location and size of the tensor's data
+  data: Buffer (required);
+}
+
+root_type SparseTensor;
diff --git a/arrow-format/Tensor.fbs b/arrow-format/Tensor.fbs
new file mode 100644
index 000000000..409297ccf
--- /dev/null
+++ b/arrow-format/Tensor.fbs
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// EXPERIMENTAL: Metadata for n-dimensional arrays, aka "tensors" or
+/// "ndarrays". Arrow implementations in general are not required to implement
+/// this type
+
+include "Schema.fbs";
+
+namespace org.apache.arrow.flatbuf;
+
+/// ----------------------------------------------------------------------
+/// Data structures for dense tensors
+
+/// Shape data for a single axis in a tensor
+table TensorDim {
+  /// Length of dimension
+  size: long;
+
+  /// Name of the dimension, optional
+  name: string;
+}
+
+table Tensor {
+  /// The type of data contained in a value cell. Currently only fixed-width
+  /// value types are supported, no strings or nested types
+  type: Type (required);
+
+  /// The dimensions of the tensor, optionally named
+  shape: [TensorDim] (required);
+
+  /// Non-negative byte offsets to advance one value cell along each dimension
+  /// If omitted, default to row-major order (C-like).
+  strides: [long];
+
+  /// The location and size of the tensor's data
+  data: Buffer (required);
+}
+
+root_type Tensor;
diff --git a/arrow-format/substrait/extension_types.yaml b/arrow-format/substrait/extension_types.yaml
new file mode 100644
index 000000000..0073da1ac
--- /dev/null
+++ b/arrow-format/substrait/extension_types.yaml
@@ -0,0 +1,170 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# substrait::{ExtensionTypeVariation, ExtensionType}s
+# for wrapping types which appear in the arrow type system but
+# are not first-class in substrait. These include:
+# - null
+# - unsigned integers
+# - half-precision floating point numbers
+# - 32-bit times and 64-bit dates
+# - timestamps with units other than microseconds
+# - timestamps with timezones other than UTC
+# - 256-bit decimals
+# - sparse and dense unions
+# - dictionary encoded types
+# - durations
+# - string and binary with 64 bit offsets
+# - list with 64-bit offsets
+# - interval<months: i32>
+# - interval<days: i32, millis: i32>
+# - interval<months: i32, days: i32, nanos: i64>
+# - arrow::ExtensionTypes
+#
+# These types fall into several categories of behavior:
+
+# Certain Arrow data types are, from Substrait's point of view, encodings.
+# These include dictionary, the view types (e.g. binary view, list view),
+# and REE.
+#
+# These types are not logically distinct from the type they are encoding.
+# Specifically, the types meet the following criteria:
+#  *  There is no value in the decoded type that cannot be represented
+#     as a value in the encoded type and vice versa.
+#  *  Functions have the same meaning when applied to the encoded type
+#
+# Note: if two types have a different range (e.g. string and large_string) then
+# they do not satisfy the above criteria and are not encodings. 
+#
+# These types will never have a Substrait equivalent.  In the Substrait point
+# of view these are execution details.
+
+# The following types are encodings:
+
+# binary_view
+# list_view
+# dictionary
+# ree
+
+# Arrow-cpp's Substrait serde does not yet handle parameterized UDTs.  This means
+# the following types are not yet supported but may be supported in the future.
+# We define them below in case other implementations support them in the meantime.
+
+# decimal256
+# large_list
+# fixed_size_list
+# duration
+
+# Other types are not encodings, but are not first-class in Substrait.  These
+# types are often similar to existing Substrait types but define a different range
+# of values.  For example, unsigned integer types are very similar to their integer
+# counterparts, but have a different range of values.  These types are defined here
+# as extension types.
+#
+# A full description of the types, along with their specified range, can be found
+# in Schema.fbs
+#
+# Consumers should take care when supporting the below types.  Should Substrait decide
+# later to support these types, the consumer will need to make sure to continue supporting
+# the extension type names as aliases for proper backwards compatibility.
+types:
+  - name: "null"
+    structure: {}
+  - name: interval_month
+    structure:
+      months: i32
+  - name: interval_day_milli
+    structure:
+      days: i32
+      millis: i32
+  - name: interval_month_day_nano
+    structure:
+      months: i32
+      days: i32
+      nanos: i64
+  # All unsigned integer literals are encoded as user defined literals with
+  # a google.protobuf.UInt64Value message.
+  - name: u8
+    structure: {}
+  - name: u16
+    structure: {}
+  - name: u32
+    structure: {}
+  - name: u64
+    structure: {}
+  # fp16 literals are encoded as user defined literals with
+  # a google.protobuf.UInt32Value message where the lower 16 bits are
+  # the fp16 value.
+  - name: fp16
+    structure: {}
+  # 64-bit integers are big.  Even though date64 stores ms and not days it
+  # can still represent about 50x more dates than date32.  Since it has a
+  # different range of values, it is an extension type.
+  #
+  # date64 literals are encoded as user defined literals with
+  # a google.protobuf.Int64Value message.
+  - name: date_millis
+    structure: {}
+  # time literals are encoded as user defined literals with
+  # a google.protobuf.Int32Value message (for time_seconds/time_millis)
+  # or a google.protobuf.Int64Value message (for time_nanos).
+  - name: time_seconds
+    structure: {}
+  - name: time_millis
+    structure: {}
+  - name: time_nanos
+    structure: {}
+  # Large string literals are encoded using a
+  # google.protobuf.StringValue message.
+  - name: large_string
+    structure: {}
+  # Large binary literals are encoded using a
+  # google.protobuf.BytesValue message.
+  - name: large_binary
+    structure: {}
+  # We cannot generate these today because they are parameterized UDTs and
+  # substrait-cpp does not yet support parameterized UDTs.
+  - name: decimal256
+    structure: {}
+    parameters:
+     - name: precision
+       type: integer
+       min: 0
+       max: 76
+     - name: scale
+       type: integer
+       min: 0
+       max: 76
+  - name: large_list
+    structure: {}
+    parameters:
+      - name: value_type
+        type: dataType
+  - name: fixed_size_list
+    structure: {}
+    parameters:
+      - name: value_type
+        type: dataType
+      - name: dimension
+        type: integer
+        min: 0
+  - name: duration
+    structure: {}
+    parameters:
+      - name: unit
+        type: string
+
diff --git a/ci/scripts/java_build.sh b/ci/scripts/java_build.sh
new file mode 100755
index 000000000..8441e00cc
--- /dev/null
+++ b/ci/scripts/java_build.sh
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eo pipefail
+
+if [[ "${ARROW_JAVA_BUILD:-ON}" != "ON" ]]; then
+  exit
+fi
+
+arrow_dir=${1}
+source_dir=${1}
+build_dir=${2}
+java_jni_dist_dir=${3}
+
+: ${BUILD_DOCS_JAVA:=OFF}
+
+mvn="mvn -B -DskipTests -Drat.skip=true -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
+
+if [ $ARROW_JAVA_SKIP_GIT_PLUGIN ]; then
+  mvn="${mvn} -Dmaven.gitcommitid.skip=true"
+fi
+
+# We want to do an out-of-source build since (when using docker) we'll pollute
+# the source directory with files owned by root, but Maven does not really
+# support this.  Instead, copy directories to the build directory.
+
+rm -rf "${build_dir}"
+mkdir -p "${build_dir}/arrow-format"
+cp -r "${source_dir}/arrow-format" "${build_dir}"
+cp -r "${source_dir}/dev" "${build_dir}"
+
+for source_root in $(find "${source_dir}" -not \( -path "${source_dir}"/build -prune \) -type f -name pom.xml -exec realpath -s --relative-to="${source_dir}" '{}' \; |
+                         awk -F/ '{print $1}' |
+                         sort -u); do
+    cp -r "${source_dir}/${source_root}" "${build_dir}"
+done
+
+pushd "${build_dir}"
+
+if [ "${ARROW_JAVA_SHADE_FLATBUFFERS}" == "ON" ]; then
+  mvn="${mvn} -Pshade-flatbuffers"
+fi
+
+if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then
+  mvn="${mvn} -Darrow.c.jni.dist.dir=${java_jni_dist_dir} -Parrow-c-data"
+fi
+
+if [ "${ARROW_JAVA_JNI}" = "ON" ]; then
+  mvn="${mvn} -Darrow.cpp.build.dir=${java_jni_dist_dir} -Parrow-jni"
+fi
+
+# Use `2 * ncores` threads
+${mvn} -T 2C clean install
+
+if [ "${BUILD_DOCS_JAVA}" == "ON" ]; then
+  # HTTP pooling is turned of to avoid download issues https://issues.apache.org/jira/browse/ARROW-11633
+  # GH-43378: Maven site plugins not compatible with multithreading
+  mkdir -p ${build_dir}/docs/java/reference
+  ${mvn} -Dcheckstyle.skip=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false clean install site
+  rsync -a target/site/apidocs/ ${build_dir}/docs/java/reference
+fi
+
+popd
diff --git a/ci/scripts/java_test.sh b/ci/scripts/java_test.sh
new file mode 100755
index 000000000..9d4bc018b
--- /dev/null
+++ b/ci/scripts/java_test.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+if [[ "${ARROW_JAVA_TEST:-ON}" != "ON" ]]; then
+  exit
+fi
+
+arrow_dir=${1}
+source_dir=${1}
+build_dir=${2}
+java_jni_dist_dir=${3}
+
+mvn="mvn -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
+# Use `2 * ncores` threads
+mvn="${mvn} -T 2C"
+
+pushd ${build_dir}
+
+${mvn} -Darrow.test.dataRoot="${source_dir}/testing/data" clean test
+
+projects=()
+if [ "${ARROW_JAVA_JNI}" = "ON" ]; then
+  projects+=(adapter/orc)
+  projects+=(dataset)
+  projects+=(gandiva)
+fi
+if [ "${#projects[@]}" -gt 0 ]; then
+  ${mvn} clean test \
+         -Parrow-jni \
+         -pl $(IFS=,; echo "${projects[*]}") \
+         -Darrow.cpp.build.dir=${java_jni_dist_dir}
+fi
+
+if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then
+  ${mvn} clean test -Parrow-c-data -pl c -Darrow.c.jni.dist.dir=${java_jni_dist_dir}
+fi
+
+popd
diff --git a/dataset/pom.xml b/dataset/pom.xml
index 0c1f55dd6..21f67f1a6 100644
--- a/dataset/pom.xml
+++ b/dataset/pom.xml
@@ -183,7 +183,7 @@ under the License.
           <argLine combine.self="override">--add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
           <enableAssertions>false</enableAssertions>
           <systemPropertyVariables>
-            <arrow.test.dataRoot>${project.basedir}/../../testing/data</arrow.test.dataRoot>
+            <arrow.test.dataRoot>${project.basedir}/../testing/data</arrow.test.dataRoot>
           </systemPropertyVariables>
         </configuration>
       </plugin>
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 000000000..103f2f3ad
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Usage
+# -----
+#
+# The docker compose file is parametrized using environment variables, the
+# defaults are set in .env file.
+#
+# Example:
+# $ ARCH=arm64v8 docker compose build java
+# $ ARCH=arm64v8 docker compose run java
+
+volumes:
+  maven-cache:
+    name: maven-cache
+
+services:
+  java:
+    # Usage:
+    #   docker compose build java
+    #   docker compose run java
+    # Parameters:
+    #   MAVEN: 3.9.6
+    #   JDK: 11, 17, 21
+    image: ${ARCH}/maven:${MAVEN}-eclipse-temurin-${JDK}
+    volumes: &java-volumes
+      - .:/arrow-java:delegated
+      - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated
+    command: &java-command >
+      /bin/bash -c "
+        /arrow-java/ci/scripts/java_build.sh /arrow-java /build &&
+        /arrow-java/ci/scripts/java_test.sh /arrow-java /build"
diff --git a/flight/flight-core/pom.xml b/flight/flight-core/pom.xml
index 374f6fcda..9dac97dd9 100644
--- a/flight/flight-core/pom.xml
+++ b/flight/flight-core/pom.xml
@@ -155,7 +155,7 @@ under the License.
           <argLine combine.self="override">--add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
           <enableAssertions>false</enableAssertions>
           <systemPropertyVariables>
-            <arrow.test.dataRoot>${project.basedir}/../../../testing/data</arrow.test.dataRoot>
+            <arrow.test.dataRoot>${project.basedir}/../../testing/data</arrow.test.dataRoot>
           </systemPropertyVariables>
         </configuration>
       </plugin>
@@ -170,7 +170,7 @@ under the License.
               <goal>compile-custom</goal>
             </goals>
             <configuration>
-              <protoSourceRoot>${basedir}/../../../format/</protoSourceRoot>
+              <protoSourceRoot>${basedir}/../../arrow-format/</protoSourceRoot>
             </configuration>
           </execution>
           <execution>
diff --git a/flight/flight-sql-jdbc-core/pom.xml b/flight/flight-sql-jdbc-core/pom.xml
index fc033a5ea..8ad7e7680 100644
--- a/flight/flight-sql-jdbc-core/pom.xml
+++ b/flight/flight-sql-jdbc-core/pom.xml
@@ -144,7 +144,7 @@ under the License.
         <configuration>
           <enableAssertions>false</enableAssertions>
           <systemPropertyVariables>
-            <arrow.test.dataRoot>${project.basedir}/../../../testing/data</arrow.test.dataRoot>
+            <arrow.test.dataRoot>${project.basedir}/../../testing/data</arrow.test.dataRoot>
           </systemPropertyVariables>
         </configuration>
       </plugin>
diff --git a/testing b/testing
new file mode 160000
index 000000000..4d209492d
--- /dev/null
+++ b/testing
@@ -0,0 +1 @@
+Subproject commit 4d209492d514c2d3cb2d392681b9aa00e6d8da1c
diff --git a/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java b/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java
index 28f9a9010..935252287 100644
--- a/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java
+++ b/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java
@@ -23,6 +23,7 @@
 import static org.apache.arrow.tools.ArrowFileTestFixtures.writeInput;
 import static org.apache.arrow.tools.ArrowFileTestFixtures.writeVariableWidthViewInput;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -34,6 +35,7 @@
 import java.io.File;
 import java.io.IOException;
 import java.io.StringReader;
+import java.net.URL;
 import java.util.Map;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
@@ -163,12 +165,9 @@ public void testValid() throws Exception {
 
   @Test
   public void testJSONRoundTripWithVariableWidth() throws Exception {
-    File testJSONFile =
-        new File("../../docs/source/format/integration_json_examples/simple.json")
-            .getCanonicalFile();
-    if (!testJSONFile.exists()) {
-      testJSONFile = new File("../docs/source/format/integration_json_examples/simple.json");
-    }
+    URL resource = getClass().getResource("/integration_json_simple.json");
+    assertNotNull(resource);
+    File testJSONFile = new File(resource.getFile()).getCanonicalFile();
     File testOutFile = new File(testFolder, "testOut.arrow");
     File testRoundTripJSONFile = new File(testFolder, "testOut.json");
     testOutFile.delete();
@@ -211,12 +210,9 @@ public void testJSONRoundTripWithVariableWidth() throws Exception {
 
   @Test
   public void testJSONRoundTripWithStruct() throws Exception {
-    File testJSONFile =
-        new File("../../docs/source/format/integration_json_examples/struct.json")
-            .getCanonicalFile();
-    if (!testJSONFile.exists()) {
-      testJSONFile = new File("../docs/source/format/integration_json_examples/struct.json");
-    }
+    URL resource = getClass().getResource("/integration_json_struct.json");
+    assertNotNull(resource);
+    File testJSONFile = new File(resource.getFile()).getCanonicalFile();
     File testOutFile = new File(testFolder, "testOutStruct.arrow");
     File testRoundTripJSONFile = new File(testFolder, "testOutStruct.json");
     testOutFile.delete();
diff --git a/tools/src/test/resources/integration_json_simple.json b/tools/src/test/resources/integration_json_simple.json
new file mode 100644
index 000000000..663472919
--- /dev/null
+++ b/tools/src/test/resources/integration_json_simple.json
@@ -0,0 +1,98 @@
+{
+  "schema": {
+    "fields": [
+      {
+        "name": "foo",
+        "type": {"name": "int", "isSigned": true, "bitWidth": 32},
+        "nullable": true,
+        "children": []
+      },
+      {
+        "name": "bar",
+        "type": {"name": "floatingpoint", "precision": "DOUBLE"},
+        "nullable": true,
+        "children": []
+      },
+      {
+        "name": "baz",
+        "type": {"name": "utf8"},
+        "nullable": true,
+        "children": []
+      }
+    ]
+  },
+  "batches": [
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "foo",
+          "count": 5,
+          "VALIDITY": [1, 0, 1, 1, 1],
+          "DATA": [1, 2, 3, 4, 5]
+        },
+        {
+          "name": "bar",
+          "count": 5,
+          "VALIDITY": [1, 0, 0, 1, 1],
+          "DATA": [1.0, 2.0, 3.0, 4.0, 5.0]
+        },
+        {
+          "name": "baz",
+          "count": 5,
+          "VALIDITY": [1, 0, 0, 1, 1],
+          "OFFSET": [0, 2, 2, 2, 5, 9],
+          "DATA": ["aa", "", "", "bbb", "cccc"]
+        }
+      ]
+    },
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "foo",
+          "count": 5,
+          "VALIDITY": [1, 1, 1, 1, 1],
+          "DATA": [1, 2, 3, 4, 5]
+        },
+        {
+          "name": "bar",
+          "count": 5,
+          "VALIDITY": [1, 1, 1, 1, 1],
+          "DATA": [1.0, 2.0, 3.0, 4.0, 5.0]
+        },
+        {
+          "name": "baz",
+          "count": 5,
+          "VALIDITY": [1, 1, 1, 1, 1],
+          "OFFSET": [0, 2, 3, 4, 7, 11],
+          "DATA": ["aa", "b", "c", "ddd", "eeee"]
+        }
+      ]
+    },
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "foo",
+          "count": 5,
+          "VALIDITY": [0, 0, 0, 0, 0],
+          "DATA": [1, 2, 3, 4, 5]
+        },
+        {
+          "name": "bar",
+          "count": 5,
+          "VALIDITY": [0, 0, 0, 0, 0],
+          "DATA": [1.0, 2.0, 3.0, 4.0, 5.0]
+        },
+        {
+          "name": "baz",
+          "count": 5,
+          "VALIDITY": [0, 0, 0, 0, 0],
+          "OFFSET": [0, 0, 0, 0, 0, 0],
+          "DATA": ["", "", "", "", ""]
+        }
+      ]
+    }
+  ]
+}
diff --git a/tools/src/test/resources/integration_json_struct.json b/tools/src/test/resources/integration_json_struct.json
new file mode 100644
index 000000000..4e6cc774e
--- /dev/null
+++ b/tools/src/test/resources/integration_json_struct.json
@@ -0,0 +1,201 @@
+{
+  "schema": {
+    "fields": [
+      {
+        "name": "struct_nullable",
+        "type": {
+          "name": "struct"
+        },
+        "nullable": true,
+        "children": [
+          {
+            "name": "f1",
+            "type": {
+              "name": "int",
+              "isSigned": true,
+              "bitWidth": 32
+            },
+            "nullable": true,
+            "children": []
+          },
+          {
+            "name": "f2",
+            "type": {
+              "name": "utf8"
+            },
+            "nullable": true,
+            "children": []
+          }
+        ]
+      }
+    ]
+  },
+  "batches": [
+    {
+      "count": 7,
+      "columns": [
+        {
+          "name": "struct_nullable",
+          "count": 7,
+          "VALIDITY": [
+            0,
+            1,
+            1,
+            1,
+            0,
+            1,
+            0
+          ],
+          "children": [
+            {
+              "name": "f1",
+              "count": 7,
+              "VALIDITY": [
+                1,
+                0,
+                1,
+                1,
+                1,
+                0,
+                0
+              ],
+              "DATA": [
+                1402032511,
+                290876774,
+                137773603,
+                410361374,
+                1959836418,
+                1995074679,
+                -163525262
+              ]
+            },
+            {
+              "name": "f2",
+              "count": 7,
+              "VALIDITY": [
+                0,
+                1,
+                1,
+                1,
+                0,
+                1,
+                0
+              ],
+              "OFFSET": [
+                0,
+                0,
+                7,
+                14,
+                21,
+                21,
+                28,
+                28
+              ],
+              "DATA": [
+                "",
+                "MhRNxD4",
+                "3F9HBxK",
+                "aVd88fp",
+                "",
+                "3loZrRf",
+                ""
+              ]
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "count": 10,
+      "columns": [
+        {
+          "name": "struct_nullable",
+          "count": 10,
+          "VALIDITY": [
+            0,
+            1,
+            1,
+            0,
+            1,
+            0,
+            0,
+            1,
+            1,
+            1
+          ],
+          "children": [
+            {
+              "name": "f1",
+              "count": 10,
+              "VALIDITY": [
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                1,
+                0,
+                0,
+                0
+              ],
+              "DATA": [
+                -2041500147,
+                1715692943,
+                -35444996,
+                1425496657,
+                112765084,
+                1760754983,
+                413888857,
+                2039738337,
+                -1924327700,
+                670528518
+              ]
+            },
+            {
+              "name": "f2",
+              "count": 10,
+              "VALIDITY": [
+                1,
+                0,
+                0,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                0
+              ],
+              "OFFSET": [
+                0,
+                7,
+                7,
+                7,
+                14,
+                21,
+                28,
+                35,
+                42,
+                49,
+                49
+              ],
+              "DATA": [
+                "AS5oARE",
+                "",
+                "",
+                "JGdagcX",
+                "78SLiRw",
+                "vbGf7OY",
+                "5uh5fTs",
+                "0ilsf82",
+                "LjS9MbU",
+                ""
+              ]
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
diff --git a/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java b/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java
index 8037212aa..b394a667d 100644
--- a/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java
+++ b/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java
@@ -22,6 +22,7 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Arrays;
@@ -318,10 +319,9 @@ public void testWriteReadDecimalJSON() throws IOException {
 
   @Test
   public void testSetStructLength() throws IOException {
-    File file = new File("../../docs/source/format/integration_json_examples/struct.json");
-    if (!file.exists()) {
-      file = new File("../docs/source/format/integration_json_examples/struct.json");
-    }
+    URL resource = getClass().getResource("/integration_json_struct.json");
+    assertNotNull(resource);
+    File file = new File(resource.getFile());
     try (BufferAllocator readerAllocator =
             allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
         JsonFileReader reader = new JsonFileReader(file, readerAllocator)) {
diff --git a/vector/src/test/resources/integration_json_struct.json b/vector/src/test/resources/integration_json_struct.json
new file mode 100644
index 000000000..4e6cc774e
--- /dev/null
+++ b/vector/src/test/resources/integration_json_struct.json
@@ -0,0 +1,201 @@
+{
+  "schema": {
+    "fields": [
+      {
+        "name": "struct_nullable",
+        "type": {
+          "name": "struct"
+        },
+        "nullable": true,
+        "children": [
+          {
+            "name": "f1",
+            "type": {
+              "name": "int",
+              "isSigned": true,
+              "bitWidth": 32
+            },
+            "nullable": true,
+            "children": []
+          },
+          {
+            "name": "f2",
+            "type": {
+              "name": "utf8"
+            },
+            "nullable": true,
+            "children": []
+          }
+        ]
+      }
+    ]
+  },
+  "batches": [
+    {
+      "count": 7,
+      "columns": [
+        {
+          "name": "struct_nullable",
+          "count": 7,
+          "VALIDITY": [
+            0,
+            1,
+            1,
+            1,
+            0,
+            1,
+            0
+          ],
+          "children": [
+            {
+              "name": "f1",
+              "count": 7,
+              "VALIDITY": [
+                1,
+                0,
+                1,
+                1,
+                1,
+                0,
+                0
+              ],
+              "DATA": [
+                1402032511,
+                290876774,
+                137773603,
+                410361374,
+                1959836418,
+                1995074679,
+                -163525262
+              ]
+            },
+            {
+              "name": "f2",
+              "count": 7,
+              "VALIDITY": [
+                0,
+                1,
+                1,
+                1,
+                0,
+                1,
+                0
+              ],
+              "OFFSET": [
+                0,
+                0,
+                7,
+                14,
+                21,
+                21,
+                28,
+                28
+              ],
+              "DATA": [
+                "",
+                "MhRNxD4",
+                "3F9HBxK",
+                "aVd88fp",
+                "",
+                "3loZrRf",
+                ""
+              ]
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "count": 10,
+      "columns": [
+        {
+          "name": "struct_nullable",
+          "count": 10,
+          "VALIDITY": [
+            0,
+            1,
+            1,
+            0,
+            1,
+            0,
+            0,
+            1,
+            1,
+            1
+          ],
+          "children": [
+            {
+              "name": "f1",
+              "count": 10,
+              "VALIDITY": [
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                1,
+                0,
+                0,
+                0
+              ],
+              "DATA": [
+                -2041500147,
+                1715692943,
+                -35444996,
+                1425496657,
+                112765084,
+                1760754983,
+                413888857,
+                2039738337,
+                -1924327700,
+                670528518
+              ]
+            },
+            {
+              "name": "f2",
+              "count": 10,
+              "VALIDITY": [
+                1,
+                0,
+                0,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                0
+              ],
+              "OFFSET": [
+                0,
+                7,
+                7,
+                7,
+                14,
+                21,
+                28,
+                35,
+                42,
+                49,
+                49
+              ],
+              "DATA": [
+                "AS5oARE",
+                "",
+                "",
+                "JGdagcX",
+                "78SLiRw",
+                "vbGf7OY",
+                "5uh5fTs",
+                "0ilsf82",
+                "LjS9MbU",
+                ""
+              ]
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}