From 7e89c17f1d84628143da37653dbdb835c0bc699f Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Thu, 16 Jun 2022 10:59:14 +0200 Subject: [PATCH 1/2] Adding the proto and the dependencies --- pom.xml | 30 ++- sql/core/pom.xml | 62 ++++++ .../main/protobuf/google/protobuf/any.proto | 155 ++++++++++++++ .../main/protobuf/google/protobuf/empty.proto | 52 +++++ .../main/protobuf/spark/connect/base.proto | 110 ++++++++++ .../protobuf/spark/connect/commands.proto | 61 ++++++ .../protobuf/spark/connect/expressions.proto | 160 +++++++++++++++ .../protobuf/spark/connect/relations.proto | 191 ++++++++++++++++++ .../main/protobuf/spark/connect/types.proto | 189 +++++++++++++++++ 9 files changed, 1009 insertions(+), 1 deletion(-) create mode 100644 sql/core/src/main/protobuf/google/protobuf/any.proto create mode 100644 sql/core/src/main/protobuf/google/protobuf/empty.proto create mode 100644 sql/core/src/main/protobuf/spark/connect/base.proto create mode 100644 sql/core/src/main/protobuf/spark/connect/commands.proto create mode 100644 sql/core/src/main/protobuf/spark/connect/expressions.proto create mode 100644 sql/core/src/main/protobuf/spark/connect/relations.proto create mode 100644 sql/core/src/main/protobuf/spark/connect/types.proto diff --git a/pom.xml b/pom.xml index fb42881807573..e23d4e16f450a 100644 --- a/pom.xml +++ b/pom.xml @@ -116,7 +116,7 @@ 2.17.2 3.3.3 - 2.5.0 + 3.21.1 ${hadoop.version} 3.6.2 2.13.0 @@ -220,6 +220,9 @@ ${java.home} + 1.47.0 + 6.0.53 + org.apache.spark.tags.ChromeUITest @@ -825,6 +828,31 @@ 0.9.30 + + + io.grpc + grpc-netty-shaded + ${io.grpc.version} + runtime + + + io.grpc + grpc-protobuf + ${io.grpc.version} + + + io.grpc + grpc-stub + ${io.grpc.version} + + + org.apache.tomcat + annotations-api + ${tomcat.annotations.api.version} + provided + + + io.netty diff --git a/sql/core/pom.xml b/sql/core/pom.xml index a316c7f21dc7b..79b7c70d09b7b 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -145,6 +145,30 @@ org.apache.xbean xbean-asm9-shaded + + io.grpc + grpc-netty-shaded + runtime + + + io.grpc + grpc-protobuf + + + io.grpc + grpc-stub + + + org.apache.tomcat + annotations-api + provided + + + + com.thesamet.scalapb + scalapb-runtime-grpc_2.12 + 0.11.11 + org.scalacheck scalacheck_${scala.binary.version} @@ -203,6 +227,14 @@ + + + + kr.motd.maven + os-maven-plugin + 1.6.2 + + target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes @@ -265,6 +297,36 @@ + + + com.github.os72 + protoc-jar-maven-plugin + 3.11.4 + + + generate-sources + + run + + + + + transitive + + src/main/protobuf + + + src/main/protobuf + + + + scalapb + grpc,flat_package + com.thesamet.scalapb:protoc-gen-scala:0.11.11:sh:unix + + + + diff --git a/sql/core/src/main/protobuf/google/protobuf/any.proto b/sql/core/src/main/protobuf/google/protobuf/any.proto new file mode 100644 index 0000000000000..b3c046e584b70 --- /dev/null +++ b/sql/core/src/main/protobuf/google/protobuf/any.proto @@ -0,0 +1,155 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +syntax = "proto3"; + +package google.protobuf; + +option csharp_namespace = "Google.Protobuf.WellKnownTypes"; +option go_package = "github.com/golang/protobuf/ptypes/any"; +option java_package = "com.google.protobuf"; +option java_outer_classname = "AnyProto"; +option java_multiple_files = true; +option objc_class_prefix = "GPB"; + +// `Any` contains an arbitrary serialized protocol buffer message along with a +// URL that describes the type of the serialized message. +// +// Protobuf library provides support to pack/unpack Any values in the form +// of utility functions or additional generated methods of the Any type. +// +// Example 1: Pack and unpack a message in C++. +// +// Foo foo = ...; +// Any any; +// any.PackFrom(foo); +// ... +// if (any.UnpackTo(&foo)) { +// ... +// } +// +// Example 2: Pack and unpack a message in Java. +// +// Foo foo = ...; +// Any any = Any.pack(foo); +// ... +// if (any.is(Foo.class)) { +// foo = any.unpack(Foo.class); +// } +// +// Example 3: Pack and unpack a message in Python. +// +// foo = Foo(...) +// any = Any() +// any.Pack(foo) +// ... +// if any.Is(Foo.DESCRIPTOR): +// any.Unpack(foo) +// ... +// +// Example 4: Pack and unpack a message in Go +// +// foo := &pb.Foo{...} +// any, err := ptypes.MarshalAny(foo) +// ... +// foo := &pb.Foo{} +// if err := ptypes.UnmarshalAny(any, foo); err != nil { +// ... +// } +// +// The pack methods provided by protobuf library will by default use +// 'type.googleapis.com/full.type.name' as the type URL and the unpack +// methods only use the fully qualified type name after the last '/' +// in the type URL, for example "foo.bar.com/x/y.z" will yield type +// name "y.z". +// +// +// JSON +// ==== +// The JSON representation of an `Any` value uses the regular +// representation of the deserialized, embedded message, with an +// additional field `@type` which contains the type URL. Example: +// +// package google.profile; +// message Person { +// string first_name = 1; +// string last_name = 2; +// } +// +// { +// "@type": "type.googleapis.com/google.profile.Person", +// "firstName": , +// "lastName": +// } +// +// If the embedded message type is well-known and has a custom JSON +// representation, that representation will be embedded adding a field +// `value` which holds the custom JSON in addition to the `@type` +// field. Example (for message [google.protobuf.Duration][]): +// +// { +// "@type": "type.googleapis.com/google.protobuf.Duration", +// "value": "1.212s" +// } +// +message Any { + // A URL/resource name that uniquely identifies the type of the serialized + // protocol buffer message. This string must contain at least + // one "/" character. The last segment of the URL's path must represent + // the fully qualified name of the type (as in + // `path/google.protobuf.Duration`). The name should be in a canonical form + // (e.g., leading "." is not accepted). + // + // In practice, teams usually precompile into the binary all types that they + // expect it to use in the context of Any. However, for URLs which use the + // scheme `http`, `https`, or no scheme, one can optionally set up a type + // server that maps type URLs to message definitions as follows: + // + // * If no scheme is provided, `https` is assumed. + // * An HTTP GET on the URL must yield a [google.protobuf.Type][] + // value in binary format, or produce an error. + // * Applications are allowed to cache lookup results based on the + // URL, or have them precompiled into a binary to avoid any + // lookup. Therefore, binary compatibility needs to be preserved + // on changes to types. (Use versioned type names to manage + // breaking changes.) + // + // Note: this functionality is not currently available in the official + // protobuf release, and it is not used for type URLs beginning with + // type.googleapis.com. + // + // Schemes other than `http`, `https` (or the empty scheme) might be + // used with implementation specific semantics. + // + string type_url = 1; + + // Must be a valid serialized protocol buffer of the above specified type. + bytes value = 2; +} \ No newline at end of file diff --git a/sql/core/src/main/protobuf/google/protobuf/empty.proto b/sql/core/src/main/protobuf/google/protobuf/empty.proto new file mode 100644 index 0000000000000..d675375e145ae --- /dev/null +++ b/sql/core/src/main/protobuf/google/protobuf/empty.proto @@ -0,0 +1,52 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +syntax = "proto3"; + +package google.protobuf; + +option csharp_namespace = "Google.Protobuf.WellKnownTypes"; +option go_package = "github.com/golang/protobuf/ptypes/empty"; +option java_package = "com.google.protobuf"; +option java_outer_classname = "EmptyProto"; +option java_multiple_files = true; +option objc_class_prefix = "GPB"; +option cc_enable_arenas = true; + +// A generic empty message that you can re-use to avoid defining duplicated +// empty messages in your APIs. A typical example is to use it as the request +// or the response type of an API method. For instance: +// +// service Foo { +// rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty); +// } +// +// The JSON representation for `Empty` is empty JSON object `{}`. +message Empty {} \ No newline at end of file diff --git a/sql/core/src/main/protobuf/spark/connect/base.proto b/sql/core/src/main/protobuf/spark/connect/base.proto new file mode 100644 index 0000000000000..d51452f3a2a85 --- /dev/null +++ b/sql/core/src/main/protobuf/spark/connect/base.proto @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = 'proto3'; + +package spark.connect; + +import "spark/connect/commands.proto"; +import "spark/connect/relations.proto"; + +option java_multiple_files = true; +option java_package = "org.apache.spark.connect.protobuf"; + +// A [[Plan]] is the structure that carries the runtime information for the execution from the +// client to the server. A [[Plan]] can either be of the type [[Relation]] which is a reference +// to the underlying logical plan or it can be of the [[Command]] type that is used to execute +// commands on the server. +message Plan { + oneof op_type { + Relation root = 1; + Command command = 2; + } +} + +// A request to be executed by the service. +message Request { + // The client_id is set by the client to be able to collate streaming responses from + // different queries. + string client_id = 1; + // User context + UserContext user_context = 2; + // The logical plan to be executed / analyzed. + Plan plan = 3; + + // User Context is used to refer to one particular user session that is executing + // queries in the backend. + message UserContext { + string user_id = 1; + string user_name = 2; + } +} + +// The response of a query, can be one or more for each request. Responses belonging to the +// same input query, carry the same `client_id`. +message Response { + string client_id = 1; + ArrowBatch batch = 2; + + // Metrics for the query execution. Typically, this field is only present in the last + // batch of results and then represent the overall state of the query execution. + Metrics metrics = 3; + + // Batch results of metrics. + message ArrowBatch { + int64 row_count = 1; + int64 uncompressed_bytes = 2; + int64 compressed_bytes = 3; + bytes data = 4; + bytes schema = 5; + } + + message Metrics { + + repeated MetricObject metrics = 1; + + message MetricObject { + string name = 1; + int64 plan_id = 2; + int64 parent = 3; + map execution_metrics = 4; + } + + message MetricValue { + string name = 1; + int64 value = 2; + string metric_type = 3; + } + } +} + +message AnalyzeResponse { + string client_id = 1; + repeated string column_names = 2; + repeated string column_types = 3; +} + +// Main interface for the SparkConnect service. +service SparkConnectService { + + // Executes a request that contains the query and returns a stream of [[Response]]. + rpc ExecutePlan(Request) returns (stream Response) {} + + // Analyzes a query and returns a [[AnalyzeResponse]] containing metadata about the query. + rpc AnalyzePlan(Request) returns (AnalyzeResponse) {} +} + diff --git a/sql/core/src/main/protobuf/spark/connect/commands.proto b/sql/core/src/main/protobuf/spark/connect/commands.proto new file mode 100644 index 0000000000000..a5057c2e3475c --- /dev/null +++ b/sql/core/src/main/protobuf/spark/connect/commands.proto @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = 'proto3'; + +import "spark/connect/types.proto"; + +package spark.connect; + +option java_multiple_files = true; +option java_package = "org.apache.spark.connect.protobuf"; + +// A [[Command]] is an operation that is executed by the server that does not directly consume or +// produce a relational result. +message Command { + oneof command_type { + CreateScalarFunction create_function = 1; + } +} + +// Simple message that is used to create a scalar function based on the provided function body. +// +// This message is used to register for example a Python UDF in the session catalog by providing +// the serialized method body. +message CreateScalarFunction { + // Fully qualified name of the function including the catalog / schema names. + repeated string parts = 1; + FunctionLanguage language = 2; + bool temporary = 3; + repeated Type argument_types = 4; + Type return_type = 5; + + // How the function body is defined: + oneof function_definition { + // As a raw string serialized: + bytes serialized_function = 6; + // As a code literal + string literal_string = 7; + } + + enum FunctionLanguage { + FUNCTION_LANGUAGE_UNSPECIFIED = 0; + FUNCTION_LANGUAGE_SQL = 1; + FUNCTION_LANGUAGE_PYTHON = 2; + FUNCTION_LANGUAGE_SCALA = 3; + } +} \ No newline at end of file diff --git a/sql/core/src/main/protobuf/spark/connect/expressions.proto b/sql/core/src/main/protobuf/spark/connect/expressions.proto new file mode 100644 index 0000000000000..e8c0b8bd8bbe8 --- /dev/null +++ b/sql/core/src/main/protobuf/spark/connect/expressions.proto @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = 'proto3'; + +import "spark/connect/types.proto"; +import "google/protobuf/any.proto"; + +package spark.connect; + +option java_multiple_files = true; +option java_package = "org.apache.spark.connect.protobuf"; + +/* + Expression used to refer to fields, functions and similar. This can be used everywhere + expressions in SQL appear. + */ +message Expression { + + oneof expr_type { + Literal literal = 1; + UnresolvedAttribute unresolved_attribute = 2; + UnresolvedFunction unresolved_function = 3; + ExpressionString expression_string = 4; + } + + message Literal { + oneof literal_type { + bool boolean = 1; + int32 i8 = 2; + int32 i16 = 3; + int32 i32 = 5; + int64 i64 = 7; + float fp32 = 10; + double fp64 = 11; + string string = 12; + bytes binary = 13; + // Timestamp in units of microseconds since the UNIX epoch. + int64 timestamp = 14; + // Date in units of days since the UNIX epoch. + int32 date = 16; + // Time in units of microseconds past midnight + int64 time = 17; + IntervalYearToMonth interval_year_to_month = 19; + IntervalDayToSecond interval_day_to_second = 20; + string fixed_char = 21; + VarChar var_char = 22; + bytes fixed_binary = 23; + Decimal decimal = 24; + Struct struct = 25; + Map map = 26; + // Timestamp in units of microseconds since the UNIX epoch. + int64 timestamp_tz = 27; + bytes uuid = 28; + Type null = 29; // a typed null literal + List list = 30; + Type.List empty_list = 31; + Type.Map empty_map = 32; + UserDefined user_defined = 33; + } + + // whether the literal type should be treated as a nullable type. Applies to + // all members of union other than the Typed null (which should directly + // declare nullability). + bool nullable = 50; + + // optionally points to a type_variation_anchor defined in this plan. + // Applies to all members of union other than the Typed null (which should + // directly declare the type variation). + uint32 type_variation_reference = 51; + + message VarChar { + string value = 1; + uint32 length = 2; + } + + message Decimal { + // little-endian twos-complement integer representation of complete value + // (ignoring precision) Always 16 bytes in length + bytes value = 1; + // The maximum number of digits allowed in the value. + // the maximum precision is 38. + int32 precision = 2; + // declared scale of decimal literal + int32 scale = 3; + } + + message Map { + message KeyValue { + Literal key = 1; + Literal value = 2; + } + + repeated KeyValue key_values = 1; + } + + message IntervalYearToMonth { + int32 years = 1; + int32 months = 2; + } + + message IntervalDayToSecond { + int32 days = 1; + int32 seconds = 2; + int32 microseconds = 3; + } + + message Struct { + // A possibly heterogeneously typed list of literals + repeated Literal fields = 1; + } + + message List { + // A homogeneously typed list of literals + repeated Literal values = 1; + } + + message UserDefined { + // points to a type_anchor defined in this plan + uint32 type_reference = 1; + + // the value of the literal, serialized using some type-specific + // protobuf message + google.protobuf.Any value = 2; + } + } + + // An unresolved attribute that is not explicitly bound to a specific column, but the column + // is resolved during analysis by name. + message UnresolvedAttribute { + repeated string parts = 1; + } + + // An unresolved function is not explicitly bound to one explicit function, but the function + // is resolved during analysis following Sparks name resolution rules. + message UnresolvedFunction { + repeated string parts = 1; + repeated Expression arguments = 2; + } + + // Expression as string. + message ExpressionString { + string expression = 1; + } + +} diff --git a/sql/core/src/main/protobuf/spark/connect/relations.proto b/sql/core/src/main/protobuf/spark/connect/relations.proto new file mode 100644 index 0000000000000..1d0551142603e --- /dev/null +++ b/sql/core/src/main/protobuf/spark/connect/relations.proto @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = 'proto3'; + +package spark.connect; + +import "spark/connect/expressions.proto"; + +option java_multiple_files = true; +option java_package = "org.apache.spark.connect.protobuf"; + +/* + The main [[Relation]] type. Fundamentally, a relation is a typed container + that has exactly one explicit relation type set. + + When adding new relation types, they have to be registered here. + */ +message Relation { + RelationCommon common = 1; + oneof rel_type { + Read read = 2; + Project project = 3; + Filter filter = 4; + Join join = 5; + Union union = 6; + Sort sort = 7; + Fetch fetch = 8; + Aggregate aggregate = 9; + Sql sql = 10; + } +} +/* + Common metadata of all relations. + */ +message RelationCommon { + string source_info = 1; + string alias = 2; +} + +/* + Relation that uses a SQL query to generate the output. + */ +message Sql { + string query = 1; +} + +/* + Relation that reads from a file / table or other data source. Does not have additional + inputs. + */ +message Read { + oneof read_type { + NamedTable named_table = 1; + } + + message NamedTable { + repeated string parts = 1; + } +} + +/* + Projection of a bag of expressions for a given input relation. + + The input relation must be specified. + The projected expression can be an arbitrary expression. + */ +message Project { + Relation input = 1; + repeated Expression expressions = 3; +} + +/* + Relation that applies a boolean expression `condition` on each row of `input` to produce the output result. + */ +message Filter { + Relation input = 1; + Expression condition = 2; +} + +/* + Relation of type [[Join]]. + + `left` and `right` must be present. + */ +message Join { + Relation left = 1; + Relation right = 2; + Expression on = 3; + JoinType how = 4; + + enum JoinType { + JOIN_TYPE_UNSPECIFIED = 0; + JOIN_TYPE_INNER = 1; + JOIN_TYPE_OUTER = 2; + JOIN_TYPE_LEFT_OUTER = 3; + JOIN_TYPE_RIGHT_OUTER = 4; + JOIN_TYPE_ANTI = 5; + } +} + +/* + Relation of type [[Union]], at least one input must be set. + */ +message Union { + repeated Relation inputs = 1; + UnionType union_type = 2; + + enum UnionType { + UNION_TYPE_UNSPECIFIED = 0; + UNION_TYPE_DISTINCT = 1; + UNION_TYPE_ALL = 2; + } +} + +/* + Relation of type [[Fetch]] that is used to read `limit` / `offset` rows from the input relation. + */ +message Fetch { + Relation input = 1; + int32 limit = 2; + int32 offset = 3; +} + +/* + Relation of type [[Aggregate]]. + */ +message Aggregate { + Relation input = 1; + + // Grouping sets are used in rollups + repeated GroupingSet grouping_sets = 2; + + // Measures + repeated Measure measures = 3; + + message GroupingSet { + repeated Expression aggregate_expressions = 1; + } + + message Measure { + AggregateFunction function = 1; + // Conditional filter for SUM(x FILTER WHERE x < 10) + Expression filter = 2; + } + + message AggregateFunction { + string name = 1; + repeated Expression arguments = 2; + } +} + +/* + Relation of type [[Sort]]. + */ +message Sort { + Relation input = 1; + repeated SortField sort_fields = 2; + + message SortField { + Expression expression = 1; + SortDirection direction = 2; + SortNulls nulls = 3; + } + + enum SortDirection { + SORT_DIRECTION_UNSPECIFIED = 0; + SORT_DIRECTION_ASCENDING = 1; + SORT_DIRECTION_DESCENDING = 2; + } + + enum SortNulls { + SORT_NULLS_UNSPECIFIED = 0; + SORT_NULLS_FIRST = 1; + SORT_NULLS_LAST = 2; + } +} \ No newline at end of file diff --git a/sql/core/src/main/protobuf/spark/connect/types.proto b/sql/core/src/main/protobuf/spark/connect/types.proto new file mode 100644 index 0000000000000..3d6b649978dbe --- /dev/null +++ b/sql/core/src/main/protobuf/spark/connect/types.proto @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = 'proto3'; + +package spark.connect; + +option java_multiple_files = true; +option java_package = "org.apache.spark.connect.protobuf"; + +/* + This message describes the logical [[Type]] of something. It does not carry the value + itself but only describes it. + */ +message Type { + oneof kind { + Boolean bool = 1; + I8 i8 = 2; + I16 i16 = 3; + I32 i32 = 5; + I64 i64 = 7; + FP32 fp32 = 10; + FP64 fp64 = 11; + String string = 12; + Binary binary = 13; + Timestamp timestamp = 14; + Date date = 16; + Time time = 17; + IntervalYear interval_year = 19; + IntervalDay interval_day = 20; + TimestampTZ timestamp_tz = 29; + UUID uuid = 32; + + FixedChar fixed_char = 21; + VarChar varchar = 22; + FixedBinary fixed_binary = 23; + Decimal decimal = 24; + + Struct struct = 25; + List list = 27; + Map map = 28; + + uint32 user_defined_type_reference = 31; + } + + enum Nullability { + NULLABILITY_UNSPECIFIED = 0; + NULLABILITY_NULLABLE = 1; + NULLABILITY_REQUIRED = 2; + } + + message Boolean { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + message I8 { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message I16 { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message I32 { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message I64 { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message FP32 { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message FP64 { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message String { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message Binary { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message Timestamp { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message Date { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message Time { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message TimestampTZ { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message IntervalYear { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message IntervalDay { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message UUID { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + // Start compound types. + message FixedChar { + int32 length = 1; + uint32 type_variation_reference = 2; + Nullability nullability = 3; + } + + message VarChar { + int32 length = 1; + uint32 type_variation_reference = 2; + Nullability nullability = 3; + } + + message FixedBinary { + int32 length = 1; + uint32 type_variation_reference = 2; + Nullability nullability = 3; + } + + message Decimal { + int32 scale = 1; + int32 precision = 2; + uint32 type_variation_reference = 3; + Nullability nullability = 4; + } + + message Struct { + repeated Type types = 1; + uint32 type_variation_reference = 2; + Nullability nullability = 3; + } + + message List { + Type type = 1; + uint32 type_variation_reference = 2; + Nullability nullability = 3; + } + + message Map { + Type key = 1; + Type value = 2; + uint32 type_variation_reference = 3; + Nullability nullability = 4; + } +} From e8a068ebe2aa748f0178e77282a3e9d6501d6661 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Tue, 5 Jul 2022 15:27:36 +0200 Subject: [PATCH 2/2] Trying to fix aarch64 --- sql/core/pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 79b7c70d09b7b..fac01bb5b1230 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -166,7 +166,7 @@ com.thesamet.scalapb - scalapb-runtime-grpc_2.12 + scalapb-runtime-grpc_${scala.binary.version} 0.11.11 @@ -311,6 +311,7 @@ + com.google.protobuf:protoc:3.21.1 transitive src/main/protobuf