Skip to content

Commit

Permalink
Merge pull request #1 from discord/sinks/gcp-bigquery
Browse files Browse the repository at this point in the history
Add sinks to write to GCP Bigquery
  • Loading branch information
AndrooTheChen authored Sep 24, 2024
2 parents f99e052 + 01c125c commit 05e939c
Show file tree
Hide file tree
Showing 27 changed files with 3,020 additions and 0 deletions.
1 change: 1 addition & 0 deletions .github/actions/spelling/allow.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ armhf
backpressure
backticks
bigendian
bigquery
bindir
binfmt
bitcast
Expand Down
1 change: 1 addition & 0 deletions .github/actions/spelling/excludes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,5 @@
^\Qwebsite/layouts/shortcodes/config/unit-tests.html\E$
^lib/codecs/tests/data/native_encoding/
^\Qwebsite/config.toml\E$
^proto/google/
ignore$
1 change: 1 addition & 0 deletions .github/actions/spelling/expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,7 @@ ingesters
ingestor
initdb
initech
Insertdata
installdeb
Instrumentable
interpolatedstring
Expand Down
3 changes: 3 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ fn main() {
#[cfg(feature = "protobuf-build")]
{
println!("cargo:rerun-if-changed=proto/third-party/dnstap.proto");
println!("cargo:rerun-if-changed=proto/third-party/google/cloud/bigquery/storage/v1/storage.proto");
println!("cargo:rerun-if-changed=proto/third-party/google/pubsub/v1/pubsub.proto");
println!("cargo:rerun-if-changed=proto/third-party/google/rpc/status.proto");
println!("cargo:rerun-if-changed=proto/vector/dd_metric.proto");
Expand Down Expand Up @@ -148,6 +149,7 @@ fn main() {
"proto/vector/ddsketch_full.proto",
"proto/vector/dd_metric.proto",
"proto/vector/dd_trace.proto",
"proto/third-party/google/cloud/bigquery/storage/v1/storage.proto",
"proto/third-party/google/pubsub/v1/pubsub.proto",
"proto/third-party/google/rpc/status.proto",
"proto/vector/vector.proto",
Expand Down Expand Up @@ -268,3 +270,4 @@ fn main() {
// Emit the aforementioned stanzas.
tracker.emit_rerun_stanzas();
}

11 changes: 11 additions & 0 deletions lib/codecs/tests/data/protobuf/integration.desc

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions lib/codecs/tests/data/protobuf/integration.proto

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

64 changes: 64 additions & 0 deletions proto/third-party/google/cloud/bigquery/storage/v1/arrow.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.bigquery.storage.v1;

option csharp_namespace = "Google.Cloud.BigQuery.Storage.V1";
option go_package = "cloud.google.com/go/bigquery/storage/apiv1/storagepb;storagepb";
option java_multiple_files = true;
option java_outer_classname = "ArrowProto";
option java_package = "com.google.cloud.bigquery.storage.v1";
option php_namespace = "Google\\Cloud\\BigQuery\\Storage\\V1";

// Arrow schema as specified in
// https://arrow.apache.org/docs/python/api/datatypes.html
// and serialized to bytes using IPC:
// https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc
//
// See code samples on how this message can be deserialized.
message ArrowSchema {
// IPC serialized Arrow schema.
bytes serialized_schema = 1;
}

// Arrow RecordBatch.
message ArrowRecordBatch {
// IPC-serialized Arrow RecordBatch.
bytes serialized_record_batch = 1;

// [Deprecated] The count of rows in `serialized_record_batch`.
// Please use the format-independent ReadRowsResponse.row_count instead.
int64 row_count = 2 [deprecated = true];
}

// Contains options specific to Arrow Serialization.
message ArrowSerializationOptions {
// Compression codec's supported by Arrow.
enum CompressionCodec {
// If unspecified no compression will be used.
COMPRESSION_UNSPECIFIED = 0;

// LZ4 Frame (https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md)
LZ4_FRAME = 1;

// Zstandard compression.
ZSTD = 2;
}

// The compression codec to use for Arrow buffers in serialized record
// batches.
CompressionCodec buffer_compression = 2;
}
56 changes: 56 additions & 0 deletions proto/third-party/google/cloud/bigquery/storage/v1/avro.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.bigquery.storage.v1;

option csharp_namespace = "Google.Cloud.BigQuery.Storage.V1";
option go_package = "cloud.google.com/go/bigquery/storage/apiv1/storagepb;storagepb";
option java_multiple_files = true;
option java_outer_classname = "AvroProto";
option java_package = "com.google.cloud.bigquery.storage.v1";
option php_namespace = "Google\\Cloud\\BigQuery\\Storage\\V1";

// Avro schema.
message AvroSchema {
// Json serialized schema, as described at
// https://avro.apache.org/docs/1.8.1/spec.html.
string schema = 1;
}

// Avro rows.
message AvroRows {
// Binary serialized rows in a block.
bytes serialized_binary_rows = 1;

// [Deprecated] The count of rows in the returning block.
// Please use the format-independent ReadRowsResponse.row_count instead.
int64 row_count = 2 [deprecated = true];
}

// Contains options specific to Avro Serialization.
message AvroSerializationOptions {
// Enable displayName attribute in Avro schema.
//
// The Avro specification requires field names to be alphanumeric. By
// default, in cases when column names do not conform to these requirements
// (e.g. non-ascii unicode codepoints) and Avro is requested as an output
// format, the CreateReadSession call will fail.
//
// Setting this field to true, populates avro field names with a placeholder
// value and populates a "displayName" attribute for every avro field with the
// original column name.
bool enable_display_name_attribute = 1;
}
48 changes: 48 additions & 0 deletions proto/third-party/google/cloud/bigquery/storage/v1/protobuf.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.bigquery.storage.v1;

import "google/protobuf/descriptor.proto";

option csharp_namespace = "Google.Cloud.BigQuery.Storage.V1";
option go_package = "cloud.google.com/go/bigquery/storage/apiv1/storagepb;storagepb";
option java_multiple_files = true;
option java_outer_classname = "ProtoBufProto";
option java_package = "com.google.cloud.bigquery.storage.v1";
option php_namespace = "Google\\Cloud\\BigQuery\\Storage\\V1";

// ProtoSchema describes the schema of the serialized protocol buffer data rows.
message ProtoSchema {
// Descriptor for input message. The provided descriptor must be self
// contained, such that data rows sent can be fully decoded using only the
// single descriptor. For data rows that are compositions of multiple
// independent messages, this means the descriptor may need to be transformed
// to only use nested types:
// https://developers.google.com/protocol-buffers/docs/proto#nested
//
// For additional information for how proto types and values map onto BigQuery
// see: https://cloud.google.com/bigquery/docs/write-api#data_type_conversions
google.protobuf.DescriptorProto proto_descriptor = 1;
}

message ProtoRows {
// A sequence of rows serialized as a Protocol Buffer.
//
// See https://developers.google.com/protocol-buffers/docs/overview for more
// information on deserializing this field.
repeated bytes serialized_rows = 1;
}
Loading

0 comments on commit 05e939c

Please sign in to comment.