diff --git a/pom.xml b/pom.xml
index fb42881807573..e23d4e16f450a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -116,7 +116,7 @@
2.17.2
3.3.3
- 2.5.0
+ 3.21.1
${hadoop.version}
3.6.2
2.13.0
@@ -220,6 +220,9 @@
${java.home}
+ 1.47.0
+ 6.0.53
+
org.apache.spark.tags.ChromeUITest
@@ -825,6 +828,31 @@
0.9.30
+
+
+ io.grpc
+ grpc-netty-shaded
+ ${io.grpc.version}
+ runtime
+
+
+ io.grpc
+ grpc-protobuf
+ ${io.grpc.version}
+
+
+ io.grpc
+ grpc-stub
+ ${io.grpc.version}
+
+
+ org.apache.tomcat
+ annotations-api
+ ${tomcat.annotations.api.version}
+ provided
+
+
+
io.netty
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index a316c7f21dc7b..fac01bb5b1230 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -145,6 +145,30 @@
org.apache.xbean
xbean-asm9-shaded
+
+ io.grpc
+ grpc-netty-shaded
+ runtime
+
+
+ io.grpc
+ grpc-protobuf
+
+
+ io.grpc
+ grpc-stub
+
+
+ org.apache.tomcat
+ annotations-api
+ provided
+
+
+
+ com.thesamet.scalapb
+ scalapb-runtime-grpc_${scala.binary.version}
+ 0.11.11
+
org.scalacheck
scalacheck_${scala.binary.version}
@@ -203,6 +227,14 @@
+
+
+
+ kr.motd.maven
+ os-maven-plugin
+ 1.6.2
+
+
target/scala-${scala.binary.version}/classes
target/scala-${scala.binary.version}/test-classes
@@ -265,6 +297,37 @@
+
+
+ com.github.os72
+ protoc-jar-maven-plugin
+ 3.11.4
+
+
+ generate-sources
+
+ run
+
+
+
+
+ com.google.protobuf:protoc:3.21.1
+ transitive
+
+ src/main/protobuf
+
+
+ src/main/protobuf
+
+
+
+ scalapb
+ grpc,flat_package
+ com.thesamet.scalapb:protoc-gen-scala:0.11.11:sh:unix
+
+
+
+
diff --git a/sql/core/src/main/protobuf/google/protobuf/any.proto b/sql/core/src/main/protobuf/google/protobuf/any.proto
new file mode 100644
index 0000000000000..b3c046e584b70
--- /dev/null
+++ b/sql/core/src/main/protobuf/google/protobuf/any.proto
@@ -0,0 +1,155 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+
+package google.protobuf;
+
+option csharp_namespace = "Google.Protobuf.WellKnownTypes";
+option go_package = "github.com/golang/protobuf/ptypes/any";
+option java_package = "com.google.protobuf";
+option java_outer_classname = "AnyProto";
+option java_multiple_files = true;
+option objc_class_prefix = "GPB";
+
+// `Any` contains an arbitrary serialized protocol buffer message along with a
+// URL that describes the type of the serialized message.
+//
+// Protobuf library provides support to pack/unpack Any values in the form
+// of utility functions or additional generated methods of the Any type.
+//
+// Example 1: Pack and unpack a message in C++.
+//
+// Foo foo = ...;
+// Any any;
+// any.PackFrom(foo);
+// ...
+// if (any.UnpackTo(&foo)) {
+// ...
+// }
+//
+// Example 2: Pack and unpack a message in Java.
+//
+// Foo foo = ...;
+// Any any = Any.pack(foo);
+// ...
+// if (any.is(Foo.class)) {
+// foo = any.unpack(Foo.class);
+// }
+//
+// Example 3: Pack and unpack a message in Python.
+//
+// foo = Foo(...)
+// any = Any()
+// any.Pack(foo)
+// ...
+// if any.Is(Foo.DESCRIPTOR):
+// any.Unpack(foo)
+// ...
+//
+// Example 4: Pack and unpack a message in Go
+//
+// foo := &pb.Foo{...}
+// any, err := ptypes.MarshalAny(foo)
+// ...
+// foo := &pb.Foo{}
+// if err := ptypes.UnmarshalAny(any, foo); err != nil {
+// ...
+// }
+//
+// The pack methods provided by protobuf library will by default use
+// 'type.googleapis.com/full.type.name' as the type URL and the unpack
+// methods only use the fully qualified type name after the last '/'
+// in the type URL, for example "foo.bar.com/x/y.z" will yield type
+// name "y.z".
+//
+//
+// JSON
+// ====
+// The JSON representation of an `Any` value uses the regular
+// representation of the deserialized, embedded message, with an
+// additional field `@type` which contains the type URL. Example:
+//
+// package google.profile;
+// message Person {
+// string first_name = 1;
+// string last_name = 2;
+// }
+//
+// {
+// "@type": "type.googleapis.com/google.profile.Person",
+// "firstName": ,
+// "lastName":
+// }
+//
+// If the embedded message type is well-known and has a custom JSON
+// representation, that representation will be embedded adding a field
+// `value` which holds the custom JSON in addition to the `@type`
+// field. Example (for message [google.protobuf.Duration][]):
+//
+// {
+// "@type": "type.googleapis.com/google.protobuf.Duration",
+// "value": "1.212s"
+// }
+//
+message Any {
+ // A URL/resource name that uniquely identifies the type of the serialized
+ // protocol buffer message. This string must contain at least
+ // one "/" character. The last segment of the URL's path must represent
+ // the fully qualified name of the type (as in
+ // `path/google.protobuf.Duration`). The name should be in a canonical form
+ // (e.g., leading "." is not accepted).
+ //
+ // In practice, teams usually precompile into the binary all types that they
+ // expect it to use in the context of Any. However, for URLs which use the
+ // scheme `http`, `https`, or no scheme, one can optionally set up a type
+ // server that maps type URLs to message definitions as follows:
+ //
+ // * If no scheme is provided, `https` is assumed.
+ // * An HTTP GET on the URL must yield a [google.protobuf.Type][]
+ // value in binary format, or produce an error.
+ // * Applications are allowed to cache lookup results based on the
+ // URL, or have them precompiled into a binary to avoid any
+ // lookup. Therefore, binary compatibility needs to be preserved
+ // on changes to types. (Use versioned type names to manage
+ // breaking changes.)
+ //
+ // Note: this functionality is not currently available in the official
+ // protobuf release, and it is not used for type URLs beginning with
+ // type.googleapis.com.
+ //
+ // Schemes other than `http`, `https` (or the empty scheme) might be
+ // used with implementation specific semantics.
+ //
+ string type_url = 1;
+
+ // Must be a valid serialized protocol buffer of the above specified type.
+ bytes value = 2;
+}
\ No newline at end of file
diff --git a/sql/core/src/main/protobuf/google/protobuf/empty.proto b/sql/core/src/main/protobuf/google/protobuf/empty.proto
new file mode 100644
index 0000000000000..d675375e145ae
--- /dev/null
+++ b/sql/core/src/main/protobuf/google/protobuf/empty.proto
@@ -0,0 +1,52 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+
+package google.protobuf;
+
+option csharp_namespace = "Google.Protobuf.WellKnownTypes";
+option go_package = "github.com/golang/protobuf/ptypes/empty";
+option java_package = "com.google.protobuf";
+option java_outer_classname = "EmptyProto";
+option java_multiple_files = true;
+option objc_class_prefix = "GPB";
+option cc_enable_arenas = true;
+
+// A generic empty message that you can re-use to avoid defining duplicated
+// empty messages in your APIs. A typical example is to use it as the request
+// or the response type of an API method. For instance:
+//
+// service Foo {
+// rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty);
+// }
+//
+// The JSON representation for `Empty` is empty JSON object `{}`.
+message Empty {}
\ No newline at end of file
diff --git a/sql/core/src/main/protobuf/spark/connect/base.proto b/sql/core/src/main/protobuf/spark/connect/base.proto
new file mode 100644
index 0000000000000..d51452f3a2a85
--- /dev/null
+++ b/sql/core/src/main/protobuf/spark/connect/base.proto
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+import "spark/connect/commands.proto";
+import "spark/connect/relations.proto";
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.protobuf";
+
+// A [[Plan]] is the structure that carries the runtime information for the execution from the
+// client to the server. A [[Plan]] can either be of the type [[Relation]] which is a reference
+// to the underlying logical plan or it can be of the [[Command]] type that is used to execute
+// commands on the server.
+message Plan {
+ oneof op_type {
+ Relation root = 1;
+ Command command = 2;
+ }
+}
+
+// A request to be executed by the service.
+message Request {
+ // The client_id is set by the client to be able to collate streaming responses from
+ // different queries.
+ string client_id = 1;
+ // User context
+ UserContext user_context = 2;
+ // The logical plan to be executed / analyzed.
+ Plan plan = 3;
+
+ // User Context is used to refer to one particular user session that is executing
+ // queries in the backend.
+ message UserContext {
+ string user_id = 1;
+ string user_name = 2;
+ }
+}
+
+// The response of a query, can be one or more for each request. Responses belonging to the
+// same input query, carry the same `client_id`.
+message Response {
+ string client_id = 1;
+ ArrowBatch batch = 2;
+
+ // Metrics for the query execution. Typically, this field is only present in the last
+ // batch of results and then represent the overall state of the query execution.
+ Metrics metrics = 3;
+
+ // Batch results of metrics.
+ message ArrowBatch {
+ int64 row_count = 1;
+ int64 uncompressed_bytes = 2;
+ int64 compressed_bytes = 3;
+ bytes data = 4;
+ bytes schema = 5;
+ }
+
+ message Metrics {
+
+ repeated MetricObject metrics = 1;
+
+ message MetricObject {
+ string name = 1;
+ int64 plan_id = 2;
+ int64 parent = 3;
+ map execution_metrics = 4;
+ }
+
+ message MetricValue {
+ string name = 1;
+ int64 value = 2;
+ string metric_type = 3;
+ }
+ }
+}
+
+message AnalyzeResponse {
+ string client_id = 1;
+ repeated string column_names = 2;
+ repeated string column_types = 3;
+}
+
+// Main interface for the SparkConnect service.
+service SparkConnectService {
+
+ // Executes a request that contains the query and returns a stream of [[Response]].
+ rpc ExecutePlan(Request) returns (stream Response) {}
+
+ // Analyzes a query and returns a [[AnalyzeResponse]] containing metadata about the query.
+ rpc AnalyzePlan(Request) returns (AnalyzeResponse) {}
+}
+
diff --git a/sql/core/src/main/protobuf/spark/connect/commands.proto b/sql/core/src/main/protobuf/spark/connect/commands.proto
new file mode 100644
index 0000000000000..a5057c2e3475c
--- /dev/null
+++ b/sql/core/src/main/protobuf/spark/connect/commands.proto
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+import "spark/connect/types.proto";
+
+package spark.connect;
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.protobuf";
+
+// A [[Command]] is an operation that is executed by the server that does not directly consume or
+// produce a relational result.
+message Command {
+ oneof command_type {
+ CreateScalarFunction create_function = 1;
+ }
+}
+
+// Simple message that is used to create a scalar function based on the provided function body.
+//
+// This message is used to register for example a Python UDF in the session catalog by providing
+// the serialized method body.
+message CreateScalarFunction {
+ // Fully qualified name of the function including the catalog / schema names.
+ repeated string parts = 1;
+ FunctionLanguage language = 2;
+ bool temporary = 3;
+ repeated Type argument_types = 4;
+ Type return_type = 5;
+
+ // How the function body is defined:
+ oneof function_definition {
+ // As a raw string serialized:
+ bytes serialized_function = 6;
+ // As a code literal
+ string literal_string = 7;
+ }
+
+ enum FunctionLanguage {
+ FUNCTION_LANGUAGE_UNSPECIFIED = 0;
+ FUNCTION_LANGUAGE_SQL = 1;
+ FUNCTION_LANGUAGE_PYTHON = 2;
+ FUNCTION_LANGUAGE_SCALA = 3;
+ }
+}
\ No newline at end of file
diff --git a/sql/core/src/main/protobuf/spark/connect/expressions.proto b/sql/core/src/main/protobuf/spark/connect/expressions.proto
new file mode 100644
index 0000000000000..e8c0b8bd8bbe8
--- /dev/null
+++ b/sql/core/src/main/protobuf/spark/connect/expressions.proto
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+import "spark/connect/types.proto";
+import "google/protobuf/any.proto";
+
+package spark.connect;
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.protobuf";
+
+/*
+ Expression used to refer to fields, functions and similar. This can be used everywhere
+ expressions in SQL appear.
+ */
+message Expression {
+
+ oneof expr_type {
+ Literal literal = 1;
+ UnresolvedAttribute unresolved_attribute = 2;
+ UnresolvedFunction unresolved_function = 3;
+ ExpressionString expression_string = 4;
+ }
+
+ message Literal {
+ oneof literal_type {
+ bool boolean = 1;
+ int32 i8 = 2;
+ int32 i16 = 3;
+ int32 i32 = 5;
+ int64 i64 = 7;
+ float fp32 = 10;
+ double fp64 = 11;
+ string string = 12;
+ bytes binary = 13;
+ // Timestamp in units of microseconds since the UNIX epoch.
+ int64 timestamp = 14;
+ // Date in units of days since the UNIX epoch.
+ int32 date = 16;
+ // Time in units of microseconds past midnight
+ int64 time = 17;
+ IntervalYearToMonth interval_year_to_month = 19;
+ IntervalDayToSecond interval_day_to_second = 20;
+ string fixed_char = 21;
+ VarChar var_char = 22;
+ bytes fixed_binary = 23;
+ Decimal decimal = 24;
+ Struct struct = 25;
+ Map map = 26;
+ // Timestamp in units of microseconds since the UNIX epoch.
+ int64 timestamp_tz = 27;
+ bytes uuid = 28;
+ Type null = 29; // a typed null literal
+ List list = 30;
+ Type.List empty_list = 31;
+ Type.Map empty_map = 32;
+ UserDefined user_defined = 33;
+ }
+
+ // whether the literal type should be treated as a nullable type. Applies to
+ // all members of union other than the Typed null (which should directly
+ // declare nullability).
+ bool nullable = 50;
+
+ // optionally points to a type_variation_anchor defined in this plan.
+ // Applies to all members of union other than the Typed null (which should
+ // directly declare the type variation).
+ uint32 type_variation_reference = 51;
+
+ message VarChar {
+ string value = 1;
+ uint32 length = 2;
+ }
+
+ message Decimal {
+ // little-endian twos-complement integer representation of complete value
+ // (ignoring precision) Always 16 bytes in length
+ bytes value = 1;
+ // The maximum number of digits allowed in the value.
+ // the maximum precision is 38.
+ int32 precision = 2;
+ // declared scale of decimal literal
+ int32 scale = 3;
+ }
+
+ message Map {
+ message KeyValue {
+ Literal key = 1;
+ Literal value = 2;
+ }
+
+ repeated KeyValue key_values = 1;
+ }
+
+ message IntervalYearToMonth {
+ int32 years = 1;
+ int32 months = 2;
+ }
+
+ message IntervalDayToSecond {
+ int32 days = 1;
+ int32 seconds = 2;
+ int32 microseconds = 3;
+ }
+
+ message Struct {
+ // A possibly heterogeneously typed list of literals
+ repeated Literal fields = 1;
+ }
+
+ message List {
+ // A homogeneously typed list of literals
+ repeated Literal values = 1;
+ }
+
+ message UserDefined {
+ // points to a type_anchor defined in this plan
+ uint32 type_reference = 1;
+
+ // the value of the literal, serialized using some type-specific
+ // protobuf message
+ google.protobuf.Any value = 2;
+ }
+ }
+
+ // An unresolved attribute that is not explicitly bound to a specific column, but the column
+ // is resolved during analysis by name.
+ message UnresolvedAttribute {
+ repeated string parts = 1;
+ }
+
+ // An unresolved function is not explicitly bound to one explicit function, but the function
+ // is resolved during analysis following Sparks name resolution rules.
+ message UnresolvedFunction {
+ repeated string parts = 1;
+ repeated Expression arguments = 2;
+ }
+
+ // Expression as string.
+ message ExpressionString {
+ string expression = 1;
+ }
+
+}
diff --git a/sql/core/src/main/protobuf/spark/connect/relations.proto b/sql/core/src/main/protobuf/spark/connect/relations.proto
new file mode 100644
index 0000000000000..1d0551142603e
--- /dev/null
+++ b/sql/core/src/main/protobuf/spark/connect/relations.proto
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+import "spark/connect/expressions.proto";
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.protobuf";
+
+/*
+ The main [[Relation]] type. Fundamentally, a relation is a typed container
+ that has exactly one explicit relation type set.
+
+ When adding new relation types, they have to be registered here.
+ */
+message Relation {
+ RelationCommon common = 1;
+ oneof rel_type {
+ Read read = 2;
+ Project project = 3;
+ Filter filter = 4;
+ Join join = 5;
+ Union union = 6;
+ Sort sort = 7;
+ Fetch fetch = 8;
+ Aggregate aggregate = 9;
+ Sql sql = 10;
+ }
+}
+/*
+ Common metadata of all relations.
+ */
+message RelationCommon {
+ string source_info = 1;
+ string alias = 2;
+}
+
+/*
+ Relation that uses a SQL query to generate the output.
+ */
+message Sql {
+ string query = 1;
+}
+
+/*
+ Relation that reads from a file / table or other data source. Does not have additional
+ inputs.
+ */
+message Read {
+ oneof read_type {
+ NamedTable named_table = 1;
+ }
+
+ message NamedTable {
+ repeated string parts = 1;
+ }
+}
+
+/*
+ Projection of a bag of expressions for a given input relation.
+
+ The input relation must be specified.
+ The projected expression can be an arbitrary expression.
+ */
+message Project {
+ Relation input = 1;
+ repeated Expression expressions = 3;
+}
+
+/*
+ Relation that applies a boolean expression `condition` on each row of `input` to produce the output result.
+ */
+message Filter {
+ Relation input = 1;
+ Expression condition = 2;
+}
+
+/*
+ Relation of type [[Join]].
+
+ `left` and `right` must be present.
+ */
+message Join {
+ Relation left = 1;
+ Relation right = 2;
+ Expression on = 3;
+ JoinType how = 4;
+
+ enum JoinType {
+ JOIN_TYPE_UNSPECIFIED = 0;
+ JOIN_TYPE_INNER = 1;
+ JOIN_TYPE_OUTER = 2;
+ JOIN_TYPE_LEFT_OUTER = 3;
+ JOIN_TYPE_RIGHT_OUTER = 4;
+ JOIN_TYPE_ANTI = 5;
+ }
+}
+
+/*
+ Relation of type [[Union]], at least one input must be set.
+ */
+message Union {
+ repeated Relation inputs = 1;
+ UnionType union_type = 2;
+
+ enum UnionType {
+ UNION_TYPE_UNSPECIFIED = 0;
+ UNION_TYPE_DISTINCT = 1;
+ UNION_TYPE_ALL = 2;
+ }
+}
+
+/*
+ Relation of type [[Fetch]] that is used to read `limit` / `offset` rows from the input relation.
+ */
+message Fetch {
+ Relation input = 1;
+ int32 limit = 2;
+ int32 offset = 3;
+}
+
+/*
+ Relation of type [[Aggregate]].
+ */
+message Aggregate {
+ Relation input = 1;
+
+ // Grouping sets are used in rollups
+ repeated GroupingSet grouping_sets = 2;
+
+ // Measures
+ repeated Measure measures = 3;
+
+ message GroupingSet {
+ repeated Expression aggregate_expressions = 1;
+ }
+
+ message Measure {
+ AggregateFunction function = 1;
+ // Conditional filter for SUM(x FILTER WHERE x < 10)
+ Expression filter = 2;
+ }
+
+ message AggregateFunction {
+ string name = 1;
+ repeated Expression arguments = 2;
+ }
+}
+
+/*
+ Relation of type [[Sort]].
+ */
+message Sort {
+ Relation input = 1;
+ repeated SortField sort_fields = 2;
+
+ message SortField {
+ Expression expression = 1;
+ SortDirection direction = 2;
+ SortNulls nulls = 3;
+ }
+
+ enum SortDirection {
+ SORT_DIRECTION_UNSPECIFIED = 0;
+ SORT_DIRECTION_ASCENDING = 1;
+ SORT_DIRECTION_DESCENDING = 2;
+ }
+
+ enum SortNulls {
+ SORT_NULLS_UNSPECIFIED = 0;
+ SORT_NULLS_FIRST = 1;
+ SORT_NULLS_LAST = 2;
+ }
+}
\ No newline at end of file
diff --git a/sql/core/src/main/protobuf/spark/connect/types.proto b/sql/core/src/main/protobuf/spark/connect/types.proto
new file mode 100644
index 0000000000000..3d6b649978dbe
--- /dev/null
+++ b/sql/core/src/main/protobuf/spark/connect/types.proto
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.protobuf";
+
+/*
+ This message describes the logical [[Type]] of something. It does not carry the value
+ itself but only describes it.
+ */
+message Type {
+ oneof kind {
+ Boolean bool = 1;
+ I8 i8 = 2;
+ I16 i16 = 3;
+ I32 i32 = 5;
+ I64 i64 = 7;
+ FP32 fp32 = 10;
+ FP64 fp64 = 11;
+ String string = 12;
+ Binary binary = 13;
+ Timestamp timestamp = 14;
+ Date date = 16;
+ Time time = 17;
+ IntervalYear interval_year = 19;
+ IntervalDay interval_day = 20;
+ TimestampTZ timestamp_tz = 29;
+ UUID uuid = 32;
+
+ FixedChar fixed_char = 21;
+ VarChar varchar = 22;
+ FixedBinary fixed_binary = 23;
+ Decimal decimal = 24;
+
+ Struct struct = 25;
+ List list = 27;
+ Map map = 28;
+
+ uint32 user_defined_type_reference = 31;
+ }
+
+ enum Nullability {
+ NULLABILITY_UNSPECIFIED = 0;
+ NULLABILITY_NULLABLE = 1;
+ NULLABILITY_REQUIRED = 2;
+ }
+
+ message Boolean {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+ message I8 {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ message I16 {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ message I32 {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ message I64 {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ message FP32 {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ message FP64 {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ message String {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ message Binary {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ message Timestamp {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ message Date {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ message Time {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ message TimestampTZ {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ message IntervalYear {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ message IntervalDay {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ message UUID {
+ uint32 type_variation_reference = 1;
+ Nullability nullability = 2;
+ }
+
+ // Start compound types.
+ message FixedChar {
+ int32 length = 1;
+ uint32 type_variation_reference = 2;
+ Nullability nullability = 3;
+ }
+
+ message VarChar {
+ int32 length = 1;
+ uint32 type_variation_reference = 2;
+ Nullability nullability = 3;
+ }
+
+ message FixedBinary {
+ int32 length = 1;
+ uint32 type_variation_reference = 2;
+ Nullability nullability = 3;
+ }
+
+ message Decimal {
+ int32 scale = 1;
+ int32 precision = 2;
+ uint32 type_variation_reference = 3;
+ Nullability nullability = 4;
+ }
+
+ message Struct {
+ repeated Type types = 1;
+ uint32 type_variation_reference = 2;
+ Nullability nullability = 3;
+ }
+
+ message List {
+ Type type = 1;
+ uint32 type_variation_reference = 2;
+ Nullability nullability = 3;
+ }
+
+ message Map {
+ Type key = 1;
+ Type value = 2;
+ uint32 type_variation_reference = 3;
+ Nullability nullability = 4;
+ }
+}