From f03e2ff64c589163a75f72645e2d9270db30e084 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Tue, 2 Apr 2024 13:38:43 +0200 Subject: [PATCH 01/48] Include the aws-cpp-sdk in the build --- tensorstore/kvstore/s3/BUILD | 2 + .../aws_c_common.BUILD.bazel | 46 +++++ .../system.BUILD.bazel | 4 + .../com_github_aws_c_common/workspace.bzl | 34 ++++ .../aws_c_event_stream.BUILD.bazel | 40 ++++ .../system.BUILD.bazel | 4 + .../workspace.bzl | 34 ++++ .../aws_checksums.BUILD.bazel | 42 ++++ .../system.BUILD.bazel | 4 + .../com_github_aws_checksums/workspace.bzl | 35 ++++ .../aws_cpp_sdk.BUILD.bazel | 180 ++++++++++++++++++ .../com_github_aws_cpp_sdk/system.BUILD.bazel | 4 + .../com_github_aws_cpp_sdk/workspace.bzl | 38 ++++ third_party/third_party.bzl | 8 + 14 files changed, 475 insertions(+) create mode 100644 third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel create mode 100644 third_party/com_github_aws_c_common/system.BUILD.bazel create mode 100644 third_party/com_github_aws_c_common/workspace.bzl create mode 100644 third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel create mode 100644 third_party/com_github_aws_c_event_stream/system.BUILD.bazel create mode 100644 third_party/com_github_aws_c_event_stream/workspace.bzl create mode 100644 third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel create mode 100644 third_party/com_github_aws_checksums/system.BUILD.bazel create mode 100644 third_party/com_github_aws_checksums/workspace.bzl create mode 100644 third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel create mode 100644 third_party/com_github_aws_cpp_sdk/system.BUILD.bazel create mode 100644 third_party/com_github_aws_cpp_sdk/workspace.bzl diff --git a/tensorstore/kvstore/s3/BUILD b/tensorstore/kvstore/s3/BUILD index 0f4642e99..72587d0af 100644 --- a/tensorstore/kvstore/s3/BUILD +++ b/tensorstore/kvstore/s3/BUILD @@ -157,6 +157,8 @@ tensorstore_cc_library( "//tensorstore/internal/log:verbose_flag", "//tensorstore/kvstore:byte_range", "//tensorstore/kvstore/s3/credentials:aws_credentials", + # TODO: substitute this with just the auth components of the aws sdk + "@com_github_aws_cpp_sdk//:s3", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/log:absl_check", "@com_google_absl//absl/log:absl_log", diff --git a/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel b/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel new file mode 100644 index 000000000..d28de4b41 --- /dev/null +++ b/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel @@ -0,0 +1,46 @@ +# Description: +# AWS C Common + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +cc_library( + name = "aws-c-common", + srcs = glob([ + "include/aws/common/*.h", + "include/aws/common/private/*.h", + "source/*.c", + ]) + select({ + "@bazel_tools//src/conditions:windows": glob([ + "source/windows/*.c", + ]), + "//conditions:default": glob([ + "source/posix/*.c", + ]), + }), + hdrs = [ + "include/aws/common/config.h", + ], + defines = [], + includes = [ + "include", + ], + textual_hdrs = glob([ + "include/**/*.inl", + ]), + deps = [], +) + +genrule( + name = "config_h", + srcs = [ + "include/aws/common/config.h.in", + ], + outs = [ + "include/aws/common/config.h", + ], + cmd = "sed 's/cmakedefine/undef/g' $< > $@", +) \ No newline at end of file diff --git a/third_party/com_github_aws_c_common/system.BUILD.bazel b/third_party/com_github_aws_c_common/system.BUILD.bazel new file mode 100644 index 000000000..921f5b9f5 --- /dev/null +++ b/third_party/com_github_aws_c_common/system.BUILD.bazel @@ -0,0 +1,4 @@ +cc_library( + name = "aws_c_common", + visibility = ["//visibility:public"], +) diff --git a/third_party/com_github_aws_c_common/workspace.bzl b/third_party/com_github_aws_c_common/workspace.bzl new file mode 100644 index 000000000..ce515f76a --- /dev/null +++ b/third_party/com_github_aws_c_common/workspace.bzl @@ -0,0 +1,34 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_aws_c_common", + sha256 = "01c2a58553a37b3aa5914d9e0bf7bf14507ff4937bc5872a678892ca20fcae1f", + strip_prefix = "aws-c-common-0.4.29", + urls = [ + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/awslabs/aws-c-common/archive/v0.4.29.tar.gz", + "https://github.com/awslabs/aws-c-common/archive/v0.4.29.tar.gz", + ], build_file = Label("//third_party:com_github_aws_c_common/aws_c_common.BUILD.bazel"), + system_build_file = Label("//third_party:com_github_aws_c_common/system.BUILD.bazel"), + cmake_name = "aws_c_common", + cmake_target_mapping = { + "@com_github_aws_c_common//:aws_c_common": "aws_c_common::aws_c_common", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel b/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel new file mode 100644 index 000000000..de1fbdcbd --- /dev/null +++ b/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel @@ -0,0 +1,40 @@ +# Description: +# AWS C Event Stream + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +cc_library( + name = "aws-c-event-stream", + srcs = glob([ + "include/aws/**/*.h", + #"source/arch/*.c", + "source/*.c", + ]) + select({ + "@bazel_tools//src/conditions:windows": glob([ + "source/windows/*.c", + ]), + "//conditions:default": glob([ + "source/posix/*.c", + ]), + }), + + + # srcs = glob([ + # "include/**/*.h", + # "source/**/*.c", + # ]), + hdrs = [ + ], + defines = [], + includes = [ + "include", + ], + deps = [ + "@com_github_aws_c_common//:aws-c-common", + "@com_github_aws_checksums//:aws-checksums", + ], +) \ No newline at end of file diff --git a/third_party/com_github_aws_c_event_stream/system.BUILD.bazel b/third_party/com_github_aws_c_event_stream/system.BUILD.bazel new file mode 100644 index 000000000..3f947e8b8 --- /dev/null +++ b/third_party/com_github_aws_c_event_stream/system.BUILD.bazel @@ -0,0 +1,4 @@ +cc_library( + name = "aws_c_event_stream", + visibility = ["//visibility:public"], +) diff --git a/third_party/com_github_aws_c_event_stream/workspace.bzl b/third_party/com_github_aws_c_event_stream/workspace.bzl new file mode 100644 index 000000000..19aaaae7e --- /dev/null +++ b/third_party/com_github_aws_c_event_stream/workspace.bzl @@ -0,0 +1,34 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_aws_c_event_stream", + sha256 = "31d880d1c868d3f3df1e1f4b45e56ac73724a4dc3449d04d47fc0746f6f077b6", + strip_prefix = "aws-c-event-stream-0.1.4", + urls = [ + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/awslabs/aws-c-event-stream/archive/v0.1.4.tar.gz", + "https://github.com/awslabs/aws-c-event-stream/archive/v0.1.4.tar.gz", + ], build_file = Label("//third_party:com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel"), + system_build_file = Label("//third_party:com_github_aws_c_event_stream/system.BUILD.bazel"), + cmake_name = "aws_c_event_stream", + cmake_target_mapping = { + "@com_github_aws_c_event_stream//:aws_c_event_stream": "aws_c_event_stream::aws_c_event_stream", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel b/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel new file mode 100644 index 000000000..5244d7072 --- /dev/null +++ b/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel @@ -0,0 +1,42 @@ +# Description: +# AWS CheckSums + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +cc_library( + name = "aws-checksums", + srcs = glob([ + "include/aws/checksums/*.h", + "include/aws/checksums/private/*.h", + "source/*.c", + ]) + [ + "crc_hw.c", + ], + hdrs = [], + defines = [], + includes = [ + "include", + ], + deps = [], +) + +genrule( + name = "crc_hw_c", + outs = ["crc_hw.c"], + cmd = "\n".join([ + "cat <<'EOF' >$@", + "#include ", + "#include ", + "int aws_checksums_do_cpu_id(int32_t *cpuid) {", + " return 0;", + "}", + "uint32_t aws_checksums_crc32c_hw(const uint8_t *input, int length, uint32_t previousCrc32) {", + " return aws_checksums_crc32c_sw(input, length, previousCrc32);", + "}", + "EOF", + ]), +) \ No newline at end of file diff --git a/third_party/com_github_aws_checksums/system.BUILD.bazel b/third_party/com_github_aws_checksums/system.BUILD.bazel new file mode 100644 index 000000000..90655ac70 --- /dev/null +++ b/third_party/com_github_aws_checksums/system.BUILD.bazel @@ -0,0 +1,4 @@ +cc_library( + name = "aws_checksums", + visibility = ["//visibility:public"], +) diff --git a/third_party/com_github_aws_checksums/workspace.bzl b/third_party/com_github_aws_checksums/workspace.bzl new file mode 100644 index 000000000..d58abb27c --- /dev/null +++ b/third_party/com_github_aws_checksums/workspace.bzl @@ -0,0 +1,35 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_aws_checksums", + sha256 = "6e6bed6f75cf54006b6bafb01b3b96df19605572131a2260fddaf0e87949ced0", + strip_prefix = "aws-checksums-0.1.5", + urls = [ + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/awslabs/aws-checksums/archive/v0.1.5.tar.gz", + "https://github.com/awslabs/aws-checksums/archive/v0.1.5.tar.gz", + ], + build_file = Label("//third_party:com_github_aws_checksums/aws_checksums.BUILD.bazel"), + system_build_file = Label("//third_party:com_github_aws_checksums/system.BUILD.bazel"), + cmake_name = "aws_checksums", + cmake_target_mapping = { + "@com_github_aws_checksums//:aws_checksums": "aws_checksums::aws_checksums", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel b/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel new file mode 100644 index 000000000..8ed17607b --- /dev/null +++ b/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel @@ -0,0 +1,180 @@ +# Description: +# AWS C++ SDK + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +cc_library( + name = "core", + srcs = glob([ + "aws-cpp-sdk-core/source/*.cpp", # AWS_SOURCE + "aws-cpp-sdk-core/source/external/tinyxml2/*.cpp", # AWS_TINYXML2_SOURCE + "aws-cpp-sdk-core/source/external/cjson/*.cpp", # CJSON_SOURCE + "aws-cpp-sdk-core/source/auth/*.cpp", # AWS_AUTH_SOURCE + "aws-cpp-sdk-core/source/client/*.cpp", # AWS_CLIENT_SOURCE + "aws-cpp-sdk-core/source/internal/*.cpp", # AWS_INTERNAL_SOURCE + "aws-cpp-sdk-core/source/aws/model/*.cpp", # AWS_MODEL_SOURCE + "aws-cpp-sdk-core/source/http/*.cpp", # HTTP_SOURCE + "aws-cpp-sdk-core/source/http/standard/*.cpp", # HTTP_STANDARD_SOURCE + "aws-cpp-sdk-core/source/config/*.cpp", # CONFIG_SOURCE + "aws-cpp-sdk-core/source/monitoring/*.cpp", # MONITORING_SOURCE + "aws-cpp-sdk-core/source/utils/*.cpp", # UTILS_SOURCE + "aws-cpp-sdk-core/source/utils/event/*.cpp", # UTILS_EVENT_SOURCE + "aws-cpp-sdk-core/source/utils/base64/*.cpp", # UTILS_BASE64_SOURCE + "aws-cpp-sdk-core/source/utils/crypto/*.cpp", # UTILS_CRYPTO_SOURCE + "aws-cpp-sdk-core/source/utils/json/*.cpp", # UTILS_JSON_SOURCE + "aws-cpp-sdk-core/source/utils/threading/*.cpp", # UTILS_THREADING_SOURCE + "aws-cpp-sdk-core/source/utils/xml/*.cpp", # UTILS_XML_SOURCE + "aws-cpp-sdk-core/source/utils/logging/*.cpp", # UTILS_LOGGING_SOURCE + "aws-cpp-sdk-core/source/utils/memory/*.cpp", # UTILS_MEMORY_SOURCE + "aws-cpp-sdk-core/source/utils/memory/stl/*.cpp", # UTILS_MEMORY_STL_SOURCE + "aws-cpp-sdk-core/source/utils/stream/*.cpp", # UTILS_STREAM_SOURCE + "aws-cpp-sdk-core/source/utils/crypto/factory/*.cpp", # UTILS_CRYPTO_FACTORY_SOURCE + "aws-cpp-sdk-core/source/http/curl/*.cpp", # HTTP_CURL_CLIENT_SOURCE + "aws-cpp-sdk-core/source/utils/crypto/openssl/*.cpp", # UTILS_CRYPTO_OPENSSL_SOURCE + ]) + select({ + "@bazel_tools//src/conditions:windows": glob([ + "aws-cpp-sdk-core/source/net/windows/*.cpp", # NET_SOURCE + "aws-cpp-sdk-core/source/platform/windows/*.cpp", # PLATFORM_WINDOWS_SOURCE + ]), + "//conditions:default": glob([ + "aws-cpp-sdk-core/source/net/linux-shared/*.cpp", # NET_SOURCE + "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", # PLATFORM_LINUX_SHARED_SOURCE + ]), + }), + hdrs = [ + "aws-cpp-sdk-core/include/aws/core/SDKConfig.h", + ] + glob([ + "aws-cpp-sdk-core/include/aws/core/*.h", # AWS_HEADERS + "aws-cpp-sdk-core/include/aws/core/auth/*.h", # AWS_AUTH_HEADERS + "aws-cpp-sdk-core/include/aws/core/client/*.h", # AWS_CLIENT_HEADERS + "aws-cpp-sdk-core/include/aws/core/internal/*.h", # AWS_INTERNAL_HEADERS + "aws-cpp-sdk-core/include/aws/core/net/*.h", # NET_HEADERS + "aws-cpp-sdk-core/include/aws/core/http/*.h", # HTTP_HEADERS + "aws-cpp-sdk-core/include/aws/core/http/standard/*.h", # HTTP_STANDARD_HEADERS + "aws-cpp-sdk-core/include/aws/core/config/*.h", # CONFIG_HEADERS + "aws-cpp-sdk-core/include/aws/core/monitoring/*.h", # MONITORING_HEADERS + "aws-cpp-sdk-core/include/aws/core/platform/*.h", # PLATFORM_HEADERS + "aws-cpp-sdk-core/include/aws/core/utils/*.h", # UTILS_HEADERS + "aws-cpp-sdk-core/include/aws/core/utils/event/*.h", # UTILS_EVENT_HEADERS + "aws-cpp-sdk-core/include/aws/core/utils/base64/*.h", # UTILS_BASE64_HEADERS + "aws-cpp-sdk-core/include/aws/core/utils/crypto/*.h", # UTILS_CRYPTO_HEADERS + "aws-cpp-sdk-core/include/aws/core/utils/json/*.h", # UTILS_JSON_HEADERS + "aws-cpp-sdk-core/include/aws/core/utils/threading/*.h", # UTILS_THREADING_HEADERS + "aws-cpp-sdk-core/include/aws/core/utils/xml/*.h", # UTILS_XML_HEADERS + "aws-cpp-sdk-core/include/aws/core/utils/memory/*.h", # UTILS_MEMORY_HEADERS + "aws-cpp-sdk-core/include/aws/core/utils/memory/stl/*.h", # UTILS_STL_HEADERS + "aws-cpp-sdk-core/include/aws/core/utils/logging/*.h", # UTILS_LOGGING_HEADERS + "aws-cpp-sdk-core/include/aws/core/utils/ratelimiter/*.h", # UTILS_RATE_LIMITER_HEADERS + "aws-cpp-sdk-core/include/aws/core/utils/stream/*.h", # UTILS_STREAM_HEADERS + "aws-cpp-sdk-core/include/aws/core/external/cjson/*.h", # CJSON_HEADERS + "aws-cpp-sdk-core/include/aws/core/external/tinyxml2/*.h", # TINYXML2_HEADERS + "aws-cpp-sdk-core/include/aws/core/http/curl/*.h", # HTTP_CURL_CLIENT_HEADERS + "aws-cpp-sdk-core/include/aws/core/utils/crypto/openssl/*.h", # UTILS_CRYPTO_OPENSSL_HEADERS + ]), + defines = [ + 'AWS_SDK_VERSION_STRING=\\"1.7.366\\"', + "AWS_SDK_VERSION_MAJOR=1", + "AWS_SDK_VERSION_MINOR=7", + "AWS_SDK_VERSION_PATCH=366", + "ENABLE_OPENSSL_ENCRYPTION=1", + "ENABLE_CURL_CLIENT=1", + "OPENSSL_IS_BORINGSSL=1", + ] + select({ + "@bazel_tools//src/conditions:windows": [ + "PLATFORM_WINDOWS", + "WIN32_LEAN_AND_MEAN", + ], + "//conditions:default": [ + "PLATFORM_LINUX", + ], + }), + includes = [ + "aws-cpp-sdk-core/include", + ], + linkopts = select({ + "@bazel_tools//src/conditions:windows": [ + "-DEFAULTLIB:userenv.lib", + "-DEFAULTLIB:version.lib", + ], + "//conditions:default": [], + }), + deps = [ + "@com_github_aws_c_event_stream//:aws-c-event-stream", + "@com_google_boringssl//:crypto", + "@com_google_boringssl//:ssl", + "@se_curl//:curl", + ], +) + +cc_library( + name = "s3", + srcs = glob([ + "aws-cpp-sdk-s3/source/*.cpp", # AWS_S3_SOURCE + "aws-cpp-sdk-s3/source/model/*.cpp", # AWS_S3_MODEL_SOURCE + ]), + hdrs = glob([ + "aws-cpp-sdk-s3/include/aws/s3/*.h", # AWS_S3_HEADERS + "aws-cpp-sdk-s3/include/aws/s3/model/*.h", # AWS_S3_MODEL_HEADERS + ]), + includes = [ + "aws-cpp-sdk-s3/include", + ], + deps = [ + ":core", + ], +) + +cc_library( + name = "transfer", + srcs = glob([ + "aws-cpp-sdk-transfer/source/transfer/*.cpp", # TRANSFER_SOURCE + ]), + hdrs = glob([ + "aws-cpp-sdk-transfer/include/aws/transfer/*.h", # TRANSFER_HEADERS + ]), + includes = [ + "aws-cpp-sdk-transfer/include", + ], + deps = [ + ":core", + ":s3", + ], +) + +cc_library( + name = "kinesis", + srcs = glob([ + "aws-cpp-sdk-kinesis/source/*.cpp", # AWS_KINESIS_SOURCE + "aws-cpp-sdk-kinesis/source/model/*.cpp", # AWS_KINESIS_MODEL_SOURCE + ]), + hdrs = glob([ + "aws-cpp-sdk-kinesis/include/aws/kinesis/*.h", # AWS_KINESIS_HEADERS + "aws-cpp-sdk-kinesis/include/aws/kinesis/model/*.h", # AWS_KINESIS_MODEL_HEADERS + ]), + includes = [ + "aws-cpp-sdk-kinesis/include", + ], + deps = [ + ":core", + ], +) + +genrule( + name = "SDKConfig_h", + outs = [ + "aws-cpp-sdk-core/include/aws/core/SDKConfig.h", + ], + cmd = "\n".join([ + "cat <<'EOF' >$@", + "#define USE_AWS_MEMORY_MANAGEMENT", + "#if defined(_MSC_VER)", + "#include ", + "#undef IGNORE", + "#endif", + "EOF", + ]), +) \ No newline at end of file diff --git a/third_party/com_github_aws_cpp_sdk/system.BUILD.bazel b/third_party/com_github_aws_cpp_sdk/system.BUILD.bazel new file mode 100644 index 000000000..4fd02c7d9 --- /dev/null +++ b/third_party/com_github_aws_cpp_sdk/system.BUILD.bazel @@ -0,0 +1,4 @@ +cc_library( + name = "aws_cpp_sdk", + visibility = ["//visibility:public"], +) diff --git a/third_party/com_github_aws_cpp_sdk/workspace.bzl b/third_party/com_github_aws_cpp_sdk/workspace.bzl new file mode 100644 index 000000000..15a892f47 --- /dev/null +++ b/third_party/com_github_aws_cpp_sdk/workspace.bzl @@ -0,0 +1,38 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_aws_cpp_sdk", + patch_cmds = [ + """sed -i.bak 's/UUID::RandomUUID/Aws::Utils::UUID::RandomUUID/g' aws-cpp-sdk-core/source/client/AWSClient.cpp""", + """sed -i.bak 's/__attribute__((visibility("default")))//g' aws-cpp-sdk-core/include/aws/core/external/tinyxml2/tinyxml2.h """, + ], + sha256 = "ae1cb22225b1f47eee351c0064be5e87676bf7090bb9ad19888bea0dab0e2749", + strip_prefix = "aws-sdk-cpp-1.8.187", + urls = [ + "https://github.com/aws/aws-sdk-cpp/archive/1.8.187.tar.gz", + ], + build_file = Label("//third_party:com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel"), + system_build_file = Label("//third_party:com_github_aws_cpp_sdk/system.BUILD.bazel"), + cmake_name = "aws_cpp_sdk", + cmake_target_mapping = { + "@com_github_aws_cpp_sdk//:aws_cpp_sdk": "aws_cpp_sdk::aws_cpp_sdk", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/third_party.bzl b/third_party/third_party.bzl index 49ccf805a..4ba103f92 100644 --- a/third_party/third_party.bzl +++ b/third_party/third_party.bzl @@ -1,5 +1,9 @@ load("//third_party:bazel_skylib/workspace.bzl", repo_bazel_skylib = "repo") load("//third_party:blake3/workspace.bzl", repo_blake3 = "repo") +load("//third_party:com_github_aws_c_common/workspace.bzl", repo_com_github_aws_c_common = "repo") +load("//third_party:com_github_aws_c_event_stream/workspace.bzl", repo_com_github_aws_c_event_stream = "repo") +load("//third_party:com_github_aws_checksums/workspace.bzl", repo_com_github_aws_checksums = "repo") +load("//third_party:com_github_aws_cpp_sdk/workspace.bzl", repo_com_github_aws_cpp_sdk = "repo") load("//third_party:com_github_cares_cares/workspace.bzl", repo_com_github_cares_cares = "repo") load("//third_party:com_github_cncf_udpa/workspace.bzl", repo_com_github_cncf_udpa = "repo") load("//third_party:com_github_grpc_grpc/workspace.bzl", repo_com_github_grpc_grpc = "repo") @@ -46,6 +50,10 @@ load("//third_party:tinyxml2/workspace.bzl", repo_tinyxml2 = "repo") def third_party_dependencies(): repo_bazel_skylib() repo_blake3() + repo_com_github_aws_c_common() + repo_com_github_aws_c_event_stream() + repo_com_github_aws_checksums() + repo_com_github_aws_cpp_sdk() repo_com_github_cares_cares() repo_com_github_cncf_udpa() repo_com_github_grpc_grpc() From a003a76482a5105529868fd7e9ce5b473bc9c906 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Tue, 2 Apr 2024 15:23:06 +0200 Subject: [PATCH 02/48] Remove unnecessarily introduced os-specific includes in aws-c-event-stream BUILD. This aligns the BUILD more closely with tensorflow/io --- .../aws_c_event_stream.BUILD.bazel | 23 ++++--------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel b/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel index de1fbdcbd..5c6427846 100644 --- a/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel +++ b/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel @@ -10,25 +10,10 @@ exports_files(["LICENSE"]) cc_library( name = "aws-c-event-stream", srcs = glob([ - "include/aws/**/*.h", - #"source/arch/*.c", - "source/*.c", - ]) + select({ - "@bazel_tools//src/conditions:windows": glob([ - "source/windows/*.c", - ]), - "//conditions:default": glob([ - "source/posix/*.c", - ]), - }), - - - # srcs = glob([ - # "include/**/*.h", - # "source/**/*.c", - # ]), - hdrs = [ - ], + "include/**/*.h", + "source/**/*.c", + ]), + hdrs = [], defines = [], includes = [ "include", From c0efd6e3f5413e8df210e30443912e5754ebdea3 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Wed, 3 Apr 2024 09:37:19 +0200 Subject: [PATCH 03/48] @platforms//os:windows --- third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel b/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel index d28de4b41..fb7523037 100644 --- a/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel +++ b/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel @@ -14,7 +14,7 @@ cc_library( "include/aws/common/private/*.h", "source/*.c", ]) + select({ - "@bazel_tools//src/conditions:windows": glob([ + "@platforms//os:windows": glob([ "source/windows/*.c", ]), "//conditions:default": glob([ From da85486ee493f2e79d706c31429c7c747fb96604 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Wed, 3 Apr 2024 09:39:01 +0200 Subject: [PATCH 04/48] Remove system.BUILD.bazel --- third_party/com_github_aws_c_common/system.BUILD.bazel | 4 ---- third_party/com_github_aws_c_event_stream/system.BUILD.bazel | 4 ---- third_party/com_github_aws_checksums/system.BUILD.bazel | 4 ---- third_party/com_github_aws_cpp_sdk/system.BUILD.bazel | 4 ---- 4 files changed, 16 deletions(-) delete mode 100644 third_party/com_github_aws_c_common/system.BUILD.bazel delete mode 100644 third_party/com_github_aws_c_event_stream/system.BUILD.bazel delete mode 100644 third_party/com_github_aws_checksums/system.BUILD.bazel delete mode 100644 third_party/com_github_aws_cpp_sdk/system.BUILD.bazel diff --git a/third_party/com_github_aws_c_common/system.BUILD.bazel b/third_party/com_github_aws_c_common/system.BUILD.bazel deleted file mode 100644 index 921f5b9f5..000000000 --- a/third_party/com_github_aws_c_common/system.BUILD.bazel +++ /dev/null @@ -1,4 +0,0 @@ -cc_library( - name = "aws_c_common", - visibility = ["//visibility:public"], -) diff --git a/third_party/com_github_aws_c_event_stream/system.BUILD.bazel b/third_party/com_github_aws_c_event_stream/system.BUILD.bazel deleted file mode 100644 index 3f947e8b8..000000000 --- a/third_party/com_github_aws_c_event_stream/system.BUILD.bazel +++ /dev/null @@ -1,4 +0,0 @@ -cc_library( - name = "aws_c_event_stream", - visibility = ["//visibility:public"], -) diff --git a/third_party/com_github_aws_checksums/system.BUILD.bazel b/third_party/com_github_aws_checksums/system.BUILD.bazel deleted file mode 100644 index 90655ac70..000000000 --- a/third_party/com_github_aws_checksums/system.BUILD.bazel +++ /dev/null @@ -1,4 +0,0 @@ -cc_library( - name = "aws_checksums", - visibility = ["//visibility:public"], -) diff --git a/third_party/com_github_aws_cpp_sdk/system.BUILD.bazel b/third_party/com_github_aws_cpp_sdk/system.BUILD.bazel deleted file mode 100644 index 4fd02c7d9..000000000 --- a/third_party/com_github_aws_cpp_sdk/system.BUILD.bazel +++ /dev/null @@ -1,4 +0,0 @@ -cc_library( - name = "aws_cpp_sdk", - visibility = ["//visibility:public"], -) From 98550fcb5ba0800ee3f4edb1c12160d093ac463f Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Wed, 3 Apr 2024 09:39:39 +0200 Subject: [PATCH 05/48] build_file formatting --- third_party/com_github_aws_c_common/workspace.bzl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/third_party/com_github_aws_c_common/workspace.bzl b/third_party/com_github_aws_c_common/workspace.bzl index ce515f76a..578fef844 100644 --- a/third_party/com_github_aws_c_common/workspace.bzl +++ b/third_party/com_github_aws_c_common/workspace.bzl @@ -24,7 +24,8 @@ def repo(): urls = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/awslabs/aws-c-common/archive/v0.4.29.tar.gz", "https://github.com/awslabs/aws-c-common/archive/v0.4.29.tar.gz", - ], build_file = Label("//third_party:com_github_aws_c_common/aws_c_common.BUILD.bazel"), + ], + build_file = Label("//third_party:com_github_aws_c_common/aws_c_common.BUILD.bazel"), system_build_file = Label("//third_party:com_github_aws_c_common/system.BUILD.bazel"), cmake_name = "aws_c_common", cmake_target_mapping = { From 71fd67624a5f4ce39bd97684f513c5af661b0cc9 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Wed, 3 Apr 2024 09:41:38 +0200 Subject: [PATCH 06/48] Change - to _ in cc_library name field --- third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel | 2 +- .../aws_c_event_stream.BUILD.bazel | 2 +- third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel b/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel index fb7523037..50020212b 100644 --- a/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel +++ b/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel @@ -8,7 +8,7 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) cc_library( - name = "aws-c-common", + name = "aws_c_common", srcs = glob([ "include/aws/common/*.h", "include/aws/common/private/*.h", diff --git a/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel b/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel index 5c6427846..c4782fefa 100644 --- a/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel +++ b/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel @@ -8,7 +8,7 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) cc_library( - name = "aws-c-event-stream", + name = "aws_c_event_stream", srcs = glob([ "include/**/*.h", "source/**/*.c", diff --git a/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel b/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel index 5244d7072..c6628f9ba 100644 --- a/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel +++ b/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel @@ -8,7 +8,7 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) cc_library( - name = "aws-checksums", + name = "aws_checksums", srcs = glob([ "include/aws/checksums/*.h", "include/aws/checksums/private/*.h", From 9672159dc9b0ca4afe1bfa35e6f5b96db688289a Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Wed, 3 Apr 2024 09:48:51 +0200 Subject: [PATCH 07/48] #define WIN32_LEAN_AND_MEAN in aws/core/SDKConfig.h --- third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel | 3 +++ 1 file changed, 3 insertions(+) diff --git a/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel b/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel index 8ed17607b..ca127714a 100644 --- a/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel +++ b/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel @@ -172,6 +172,9 @@ genrule( "cat <<'EOF' >$@", "#define USE_AWS_MEMORY_MANAGEMENT", "#if defined(_MSC_VER)", + "#ifndef WIN32_LEAN_AND_MEAN", + "#define WIN32_LEAN_AND_MEAN", + "#endif", "#include ", "#undef IGNORE", "#endif", From 6ffb5c9f1272587c1bcb2a98df66f0505a92af60 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Wed, 3 Apr 2024 13:35:43 +0200 Subject: [PATCH 08/48] Change - to _ in referenced locations --- .../aws_c_event_stream.BUILD.bazel | 4 ++-- third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel b/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel index c4782fefa..4db5a26a6 100644 --- a/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel +++ b/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel @@ -19,7 +19,7 @@ cc_library( "include", ], deps = [ - "@com_github_aws_c_common//:aws-c-common", - "@com_github_aws_checksums//:aws-checksums", + "@com_github_aws_c_common//:aws_c_common", + "@com_github_aws_checksums//:aws_checksums", ], ) \ No newline at end of file diff --git a/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel b/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel index ca127714a..c3754ce07 100644 --- a/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel +++ b/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel @@ -103,7 +103,7 @@ cc_library( "//conditions:default": [], }), deps = [ - "@com_github_aws_c_event_stream//:aws-c-event-stream", + "@com_github_aws_c_event_stream//:aws_c_event_stream", "@com_google_boringssl//:crypto", "@com_google_boringssl//:ssl", "@se_curl//:curl", From de6b273f95c7f50dec82a1b85ce3514d6dfab62a Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Sat, 13 Apr 2024 22:21:23 +0200 Subject: [PATCH 09/48] More @platforms//os:windows --- third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel b/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel index c3754ce07..83e2b37fe 100644 --- a/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel +++ b/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel @@ -36,7 +36,7 @@ cc_library( "aws-cpp-sdk-core/source/http/curl/*.cpp", # HTTP_CURL_CLIENT_SOURCE "aws-cpp-sdk-core/source/utils/crypto/openssl/*.cpp", # UTILS_CRYPTO_OPENSSL_SOURCE ]) + select({ - "@bazel_tools//src/conditions:windows": glob([ + "@platforms//os:windows": glob([ "aws-cpp-sdk-core/source/net/windows/*.cpp", # NET_SOURCE "aws-cpp-sdk-core/source/platform/windows/*.cpp", # PLATFORM_WINDOWS_SOURCE ]), @@ -84,7 +84,7 @@ cc_library( "ENABLE_CURL_CLIENT=1", "OPENSSL_IS_BORINGSSL=1", ] + select({ - "@bazel_tools//src/conditions:windows": [ + "@platforms//os:windows": [ "PLATFORM_WINDOWS", "WIN32_LEAN_AND_MEAN", ], @@ -96,7 +96,7 @@ cc_library( "aws-cpp-sdk-core/include", ], linkopts = select({ - "@bazel_tools//src/conditions:windows": [ + "@platforms//os:windows": [ "-DEFAULTLIB:userenv.lib", "-DEFAULTLIB:version.lib", ], From cfcf0d2b068c26b244ddd6fe55a237b3515d3426 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 26 Apr 2024 12:47:33 +0200 Subject: [PATCH 10/48] Convert patch_cmds to patch file in @com_github_aws_cpp_sdk --- .../patches/update_sdk.diff | 35 +++++++++++++++++++ .../com_github_aws_cpp_sdk/workspace.bzl | 6 ++-- 2 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 third_party/com_github_aws_cpp_sdk/patches/update_sdk.diff diff --git a/third_party/com_github_aws_cpp_sdk/patches/update_sdk.diff b/third_party/com_github_aws_cpp_sdk/patches/update_sdk.diff new file mode 100644 index 000000000..cf493191e --- /dev/null +++ b/third_party/com_github_aws_cpp_sdk/patches/update_sdk.diff @@ -0,0 +1,35 @@ +diff --git a/aws-cpp-sdk-core/include/aws/core/external/tinyxml2/tinyxml2.h b/aws-cpp-sdk-core/include/aws/core/external/tinyxml2/tinyxml2.h +index 3721ed1..73ac4a3 100644 +--- a/aws-cpp-sdk-core/include/aws/core/external/tinyxml2/tinyxml2.h ++++ b/aws-cpp-sdk-core/include/aws/core/external/tinyxml2/tinyxml2.h +@@ -80,7 +80,7 @@ This file has been modified from its original version by Amazon: + #endif // AWS_CORE_EXPORTS + #endif // USE_IMPORT_EXPORT + #elif __GNUC__ >= 4 +- #define TINYXML2_LIB __attribute__((visibility("default"))) ++ #define TINYXML2_LIB + #endif // _WIN32 + + #ifndef TINYXML2_LIB +diff --git a/aws-cpp-sdk-core/source/client/AWSClient.cpp b/aws-cpp-sdk-core/source/client/AWSClient.cpp +index 4b2a38b..5198448 100644 +--- a/aws-cpp-sdk-core/source/client/AWSClient.cpp ++++ b/aws-cpp-sdk-core/source/client/AWSClient.cpp +@@ -232,7 +232,7 @@ HttpResponseOutcome AWSClient::AttemptExhaustively(const Aws::Http::URI& uri, + const char* signerRegion = signerRegionOverride; + Aws::String regionFromResponse; + +- Aws::String invocationId = UUID::RandomUUID(); ++ Aws::String invocationId = Aws::Utils::UUID::RandomUUID(); + RequestInfo requestInfo; + requestInfo.attempt = 1; + requestInfo.maxAttempts = 0; +@@ -358,7 +358,7 @@ HttpResponseOutcome AWSClient::AttemptExhaustively(const Aws::Http::URI& uri, + const char* signerRegion = signerRegionOverride; + Aws::String regionFromResponse; + +- Aws::String invocationId = UUID::RandomUUID(); ++ Aws::String invocationId = Aws::Utils::UUID::RandomUUID(); + RequestInfo requestInfo; + requestInfo.attempt = 1; + requestInfo.maxAttempts = 0; diff --git a/third_party/com_github_aws_cpp_sdk/workspace.bzl b/third_party/com_github_aws_cpp_sdk/workspace.bzl index 15a892f47..36c28a26e 100644 --- a/third_party/com_github_aws_cpp_sdk/workspace.bzl +++ b/third_party/com_github_aws_cpp_sdk/workspace.bzl @@ -19,10 +19,10 @@ def repo(): maybe( third_party_http_archive, name = "com_github_aws_cpp_sdk", - patch_cmds = [ - """sed -i.bak 's/UUID::RandomUUID/Aws::Utils::UUID::RandomUUID/g' aws-cpp-sdk-core/source/client/AWSClient.cpp""", - """sed -i.bak 's/__attribute__((visibility("default")))//g' aws-cpp-sdk-core/include/aws/core/external/tinyxml2/tinyxml2.h """, + patches = [ + Label("//third_party:com_github_aws_cpp_sdk/patches/update_sdk.diff"), ], + patch_args = ["-p1"], sha256 = "ae1cb22225b1f47eee351c0064be5e87676bf7090bb9ad19888bea0dab0e2749", strip_prefix = "aws-sdk-cpp-1.8.187", urls = [ From 5c877d93879803c24b64ec7ea80e2942f0816e07 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 26 Apr 2024 12:48:26 +0200 Subject: [PATCH 11/48] Use write_file in com_github_aws_checksums --- .../aws_checksums.BUILD.bazel | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel b/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel index c6628f9ba..2a2b48dd3 100644 --- a/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel +++ b/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel @@ -1,6 +1,8 @@ # Description: # AWS CheckSums +load("@bazel_skylib//rules:write_file.bzl", "write_file") + package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 @@ -18,17 +20,16 @@ cc_library( ], hdrs = [], defines = [], - includes = [ - "include", - ], + includes = ["include"], deps = [], ) -genrule( +write_file( name = "crc_hw_c", - outs = ["crc_hw.c"], - cmd = "\n".join([ - "cat <<'EOF' >$@", + out = "crc_hw.c", + newline = "auto", + + content = [ "#include ", "#include ", "int aws_checksums_do_cpu_id(int32_t *cpuid) {", @@ -37,6 +38,5 @@ genrule( "uint32_t aws_checksums_crc32c_hw(const uint8_t *input, int length, uint32_t previousCrc32) {", " return aws_checksums_crc32c_sw(input, length, previousCrc32);", "}", - "EOF", - ]), -) \ No newline at end of file + ], +) From 84cda4a09ae2da96848acdeab9adb9edc40705a3 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 26 Apr 2024 13:32:28 +0200 Subject: [PATCH 12/48] Generate SDKConfig.h with a write_file --- .../aws_cpp_sdk.BUILD.bazel | 61 +++++++++---------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel b/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel index 83e2b37fe..ade56d2d5 100644 --- a/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel +++ b/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel @@ -1,6 +1,8 @@ # Description: # AWS C++ SDK +load("@bazel_skylib//rules:write_file.bzl", "write_file") + package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 @@ -75,23 +77,7 @@ cc_library( "aws-cpp-sdk-core/include/aws/core/http/curl/*.h", # HTTP_CURL_CLIENT_HEADERS "aws-cpp-sdk-core/include/aws/core/utils/crypto/openssl/*.h", # UTILS_CRYPTO_OPENSSL_HEADERS ]), - defines = [ - 'AWS_SDK_VERSION_STRING=\\"1.7.366\\"', - "AWS_SDK_VERSION_MAJOR=1", - "AWS_SDK_VERSION_MINOR=7", - "AWS_SDK_VERSION_PATCH=366", - "ENABLE_OPENSSL_ENCRYPTION=1", - "ENABLE_CURL_CLIENT=1", - "OPENSSL_IS_BORINGSSL=1", - ] + select({ - "@platforms//os:windows": [ - "PLATFORM_WINDOWS", - "WIN32_LEAN_AND_MEAN", - ], - "//conditions:default": [ - "PLATFORM_LINUX", - ], - }), + defines = [], includes = [ "aws-cpp-sdk-core/include", ], @@ -163,21 +149,32 @@ cc_library( ], ) -genrule( +write_file( name = "SDKConfig_h", - outs = [ - "aws-cpp-sdk-core/include/aws/core/SDKConfig.h", - ], - cmd = "\n".join([ - "cat <<'EOF' >$@", + out = "aws-cpp-sdk-core/include/aws/core/SDKConfig.h", + newline = "auto", + + content = [ "#define USE_AWS_MEMORY_MANAGEMENT", - "#if defined(_MSC_VER)", - "#ifndef WIN32_LEAN_AND_MEAN", - "#define WIN32_LEAN_AND_MEAN", - "#endif", - "#include ", - "#undef IGNORE", - "#endif", - "EOF", - ]), + "#define AWS_SDK_VERSION_STRING=\\"1.8.187\\"", + "#define AWS_SDK_VERSION_MAJOR=1", + "#define AWS_SDK_VERSION_MINOR=8", + "#define AWS_SDK_VERSION_PATCH=187", + "#define ENABLE_OPENSSL_ENCRYPTION=1", + "#define ENABLE_CURL_CLIENT=0", + "#define OPENSSL_IS_BORINGSSL=1", + ] + select({ + "@platforms//os:windows": [ + "#define WIN32_LEAN_AND_MEAN", + "#include ", + "#define PLATFORM_WINDOWS", + "#undef IGNORE", + ], + "@platforms//os:macos": [ + "#define PLATFORM_MACOS" + ], + "//conditions:default": [ + "PLATFORM_LINUX", + ], + }), ) \ No newline at end of file From a03e45eac5793d80f98cc8236ec094b265c1a5a5 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 26 Apr 2024 14:02:46 +0200 Subject: [PATCH 13/48] Add s3_encryption --- .../aws_cpp_sdk.BUILD.bazel | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel b/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel index ade56d2d5..f8e697596 100644 --- a/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel +++ b/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel @@ -114,6 +114,29 @@ cc_library( ], ) +cc_library( + name = "s3_encryption", + srcs = glob([ + "aws-cpp-sdk-s3-encryption/source/*.cpp", + "aws-cpp-sdk-s3-encryption/source/handlers/*.cpp", + "aws-cpp-sdk-s3-encryption/source/materials/*.cpp", + "aws-cpp-sdk-s3-encryption/source/modules/*.cpp", + ]), + hdrs = glob([ + "aws-cpp-sdk-s3-encryption/include/aws/s3/*.h", + "aws-cpp-sdk-s3-encryption/include/handlers/*.h", + "aws-cpp-sdk-s3-encryption/include/materials/*.h", + "aws-cpp-sdk-s3-encryption/include/modules/*.h", + ]), + includes = [ + "aws-cpp-sdk-s3-encryption/include", + ], + deps = [ + ":core", + ], +) + + cc_library( name = "transfer", srcs = glob([ From 278cb8faad625478997317278bea09897d7e35b6 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 26 Apr 2024 14:05:34 +0200 Subject: [PATCH 14/48] Add s3 context --- tensorstore/kvstore/s3/s3_context.cc | 120 ++++++++++++++++++++++ tensorstore/kvstore/s3/s3_context.h | 66 ++++++++++++ tensorstore/kvstore/s3/s3_context_test.cc | 69 +++++++++++++ 3 files changed, 255 insertions(+) create mode 100644 tensorstore/kvstore/s3/s3_context.cc create mode 100644 tensorstore/kvstore/s3/s3_context.h create mode 100644 tensorstore/kvstore/s3/s3_context_test.cc diff --git a/tensorstore/kvstore/s3/s3_context.cc b/tensorstore/kvstore/s3/s3_context.cc new file mode 100644 index 000000000..f80e3328b --- /dev/null +++ b/tensorstore/kvstore/s3/s3_context.cc @@ -0,0 +1,120 @@ +// Copyright 2024 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/kvstore/s3/s3_context.h" + +#include +#include + +#include +#include +#include +#include + +#include "absl/log/absl_log.h" +#include "absl/synchronization/mutex.h" + +namespace tensorstore { +namespace internal_kvstore_s3 { + +namespace { + +absl::Mutex context_mu_; +std::weak_ptr context_; + +} + +AWSLogSystem::AWSLogSystem(Aws::Utils::Logging::LogLevel log_level) + : log_level_(log_level) {} + +Aws::Utils::Logging::LogLevel AWSLogSystem::GetLogLevel(void) const { + return log_level_; +} + +void AWSLogSystem::SetLogLevel(Aws::Utils::Logging::LogLevel log_level) { + log_level_ = log_level; +} + + // Writes the stream to ProcessFormattedStatement. +void AWSLogSystem::LogStream(Aws::Utils::Logging::LogLevel log_level, const char* tag, + const Aws::OStringStream& messageStream) { + LogMessage(log_level, messageStream.rdbuf()->str().c_str()); +} + +void AWSLogSystem::Log(Aws::Utils::Logging::LogLevel log_level, const char* tag, + const char* format, ...) { + char buffer[256]; + va_list args; + va_start(args, format); + vsnprintf(buffer, 256, format, args); + va_end(args); + LogMessage(log_level, buffer); +} + +void AWSLogSystem::LogMessage(Aws::Utils::Logging::LogLevel log_level, const std::string & message) { + switch(log_level) { + case Aws::Utils::Logging::LogLevel::Info: + ABSL_LOG(INFO) << message; + break; + case Aws::Utils::Logging::LogLevel::Warn: + ABSL_LOG(WARNING) << message; + break; + case Aws::Utils::Logging::LogLevel::Error: + ABSL_LOG(ERROR) << message; + break; + case Aws::Utils::Logging::LogLevel::Fatal: + ABSL_LOG(FATAL) << message; + break; + case Aws::Utils::Logging::LogLevel::Trace: + case Aws::Utils::Logging::LogLevel::Debug: + default: + ABSL_LOG(INFO) << message; + break; + } +} + + +// Initialise AWS API and Logging +std::shared_ptr GetAwsContext() { + absl::MutexLock lock(&context_mu_); + if(context_.use_count() > 0) { + ABSL_LOG(INFO) << "Returning existing AwsContext"; + return context_.lock(); + } + + ABSL_LOG(INFO) << "Initialising AWS API"; + auto options = Aws::SDKOptions{}; + Aws::InitAPI(options); + auto log = Aws::MakeShared(kAWSTag, Aws::Utils::Logging::LogLevel::Info); + Aws::Utils::Logging::InitializeAWSLogging(std::move(log)); + auto provider = Aws::MakeShared(kAWSTag); + + auto ctx = std::shared_ptr( + new AwsContext{ + std::move(options), + std::move(log), + std::move(provider)}, + [](AwsContext * ctx) { + absl::MutexLock lock(&context_mu_); + ABSL_LOG(INFO) << "Shutting down AWS API"; + Aws::Utils::Logging::ShutdownAWSLogging(); + Aws::ShutdownAPI(ctx->options); + delete ctx; + }); + context_ = ctx; + return ctx; +} + +} // namespace internal_kvstore_s3 +} // neamespace tensorstore diff --git a/tensorstore/kvstore/s3/s3_context.h b/tensorstore/kvstore/s3/s3_context.h new file mode 100644 index 000000000..9d3fc22e5 --- /dev/null +++ b/tensorstore/kvstore/s3/s3_context.h @@ -0,0 +1,66 @@ +// Copyright 2024 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_KVSTORE_S3_S3_CONTEXT_H_ +#define TENSORSTORE_KVSTORE_S3_S3_CONTEXT_H_ + +#include +#include + +#include +#include +#include + + +namespace tensorstore { +namespace internal_kvstore_s3 { + +static constexpr char kAWSTag[] = "AWS"; + +class AWSLogSystem : public Aws::Utils::Logging::LogSystemInterface { +public: + AWSLogSystem(Aws::Utils::Logging::LogLevel log_level); + Aws::Utils::Logging::LogLevel GetLogLevel(void) const override; + void SetLogLevel(Aws::Utils::Logging::LogLevel log_level); + + // Writes the stream to ProcessFormattedStatement. + void LogStream(Aws::Utils::Logging::LogLevel log_level, const char* tag, + const Aws::OStringStream& messageStream) override; + + // Flushes the buffered messages if the logger supports buffering + void Flush() override { return; }; + + void Log(Aws::Utils::Logging::LogLevel log_level, const char* tag, + const char* format, ...) override; + +private: + void LogMessage(Aws::Utils::Logging::LogLevel log_level, const std::string & message); + Aws::Utils::Logging::LogLevel log_level_; +}; + + +struct AwsContext { + Aws::SDKOptions options; + std::shared_ptr log_system_; + std::shared_ptr cred_provider_; +}; + +// Initialise AWS API and Logging +std::shared_ptr GetAwsContext(); + + +} // namespace internal_kvstore_s3 +} // neamespace tensorstore + +#endif // TENSORSTORE_KVSTORE_S3_S3_CONTEXT_H_ \ No newline at end of file diff --git a/tensorstore/kvstore/s3/s3_context_test.cc b/tensorstore/kvstore/s3/s3_context_test.cc new file mode 100644 index 000000000..5cfc9db01 --- /dev/null +++ b/tensorstore/kvstore/s3/s3_context_test.cc @@ -0,0 +1,69 @@ + +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "tensorstore/kvstore/s3/s3_context.h" + +using ::tensorstore::internal_kvstore_s3::GetAwsContext; +using ::tensorstore::internal_kvstore_s3::AwsContext; + +namespace { + +static constexpr char kAWSTag[] = "AWS"; + +TEST(S3ContextTest, Basic) { + auto ctx = GetAwsContext(); + EXPECT_EQ(ctx.use_count(), 1); + std::weak_ptr wp = ctx; + EXPECT_EQ(wp.use_count(), 1); + + auto ctx2 = GetAwsContext(); + EXPECT_EQ(ctx, ctx2); + EXPECT_EQ(wp.use_count(), 2); + + ctx.reset(); + ctx2.reset(); + + EXPECT_EQ(wp.use_count(), 0); + EXPECT_EQ(wp.lock(), nullptr); + + ctx = GetAwsContext(); +} + +TEST(S3ContextTest, AWS4Signing) { + auto ctx = GetAwsContext(); + auto signer = Aws::Client::AWSAuthV4Signer(ctx->cred_provider_, "s3", "us-east-2"); + //auto req = Aws::Http::HttpRequest(); +} + +TEST(S3ContextTest, Endpoint) { + EXPECT_EQ(Aws::S3::S3Endpoint::ForRegion("us-east-2", false, false), "s3.us-east-2.amazonaws.com"); +} + +TEST(S3ContextTest, Client) { + class OffloadExecutor : public Aws::Utils::Threading::Executor { + protected: + bool SubmitToThread(std::function && fn) { + fn(); + return true; + } + }; + + auto ctx = GetAwsContext(); + auto cfg = Aws::Client::ClientConfiguration(); + cfg.executor = Aws::MakeShared(kAWSTag); + auto client = Aws::S3::S3Client(); +} + +} // namespace { \ No newline at end of file From 354f7007fc9b08f8d72b00164229e3ba3f473c6a Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 26 Apr 2024 14:13:24 +0200 Subject: [PATCH 15/48] header cleanup --- tensorstore/kvstore/s3/s3_context.cc | 3 ++- tensorstore/kvstore/s3/s3_context.h | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorstore/kvstore/s3/s3_context.cc b/tensorstore/kvstore/s3/s3_context.cc index f80e3328b..acc184570 100644 --- a/tensorstore/kvstore/s3/s3_context.cc +++ b/tensorstore/kvstore/s3/s3_context.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include "absl/log/absl_log.h" #include "absl/synchronization/mutex.h" @@ -33,7 +34,7 @@ namespace { absl::Mutex context_mu_; std::weak_ptr context_; -} +} // namespace AWSLogSystem::AWSLogSystem(Aws::Utils::Logging::LogLevel log_level) : log_level_(log_level) {} diff --git a/tensorstore/kvstore/s3/s3_context.h b/tensorstore/kvstore/s3/s3_context.h index 9d3fc22e5..f8c86f949 100644 --- a/tensorstore/kvstore/s3/s3_context.h +++ b/tensorstore/kvstore/s3/s3_context.h @@ -19,8 +19,9 @@ #include #include -#include +#include #include +#include namespace tensorstore { @@ -41,6 +42,7 @@ class AWSLogSystem : public Aws::Utils::Logging::LogSystemInterface { // Flushes the buffered messages if the logger supports buffering void Flush() override { return; }; + // Overridden, but prefer the safer LogStream void Log(Aws::Utils::Logging::LogLevel log_level, const char* tag, const char* format, ...) override; From e2eac845bd350d557f045d47112e605f164bb22a Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 26 Apr 2024 14:21:46 +0200 Subject: [PATCH 16/48] sanity check basic credential retrieval --- tensorstore/kvstore/s3/s3_context_test.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorstore/kvstore/s3/s3_context_test.cc b/tensorstore/kvstore/s3/s3_context_test.cc index 5cfc9db01..5a9ad5632 100644 --- a/tensorstore/kvstore/s3/s3_context_test.cc +++ b/tensorstore/kvstore/s3/s3_context_test.cc @@ -32,6 +32,9 @@ TEST(S3ContextTest, Basic) { EXPECT_EQ(ctx, ctx2); EXPECT_EQ(wp.use_count(), 2); + // sanity check basic credential retrieval + auto creds = ctx->cred_provider_->GetAWSCredentials(); + ctx.reset(); ctx2.reset(); From 1e1bd300d9594586a068b8ca69f537e008b28644 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 26 Apr 2024 17:58:56 +0200 Subject: [PATCH 17/48] Remove AuthSigner cruft from s3_context_test.cc --- tensorstore/kvstore/s3/s3_context_test.cc | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tensorstore/kvstore/s3/s3_context_test.cc b/tensorstore/kvstore/s3/s3_context_test.cc index 5a9ad5632..eadcd6420 100644 --- a/tensorstore/kvstore/s3/s3_context_test.cc +++ b/tensorstore/kvstore/s3/s3_context_test.cc @@ -2,9 +2,7 @@ #include #include -#include #include -#include #include #include @@ -44,11 +42,6 @@ TEST(S3ContextTest, Basic) { ctx = GetAwsContext(); } -TEST(S3ContextTest, AWS4Signing) { - auto ctx = GetAwsContext(); - auto signer = Aws::Client::AWSAuthV4Signer(ctx->cred_provider_, "s3", "us-east-2"); - //auto req = Aws::Http::HttpRequest(); -} TEST(S3ContextTest, Endpoint) { EXPECT_EQ(Aws::S3::S3Endpoint::ForRegion("us-east-2", false, false), "s3.us-east-2.amazonaws.com"); From 2a08922d44be17700f1ab9d0bcb75fc7544f302c Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 26 Apr 2024 18:00:08 +0200 Subject: [PATCH 18/48] Add NewS3RequestBuilder --- .../kvstore/s3/new_s3_request_builder.cc | 0 .../kvstore/s3/new_s3_request_builder.h | 141 ++++++++++++++++++ .../kvstore/s3/new_s3_request_builder_test.cc | 25 ++++ 3 files changed, 166 insertions(+) create mode 100644 tensorstore/kvstore/s3/new_s3_request_builder.cc create mode 100644 tensorstore/kvstore/s3/new_s3_request_builder.h create mode 100644 tensorstore/kvstore/s3/new_s3_request_builder_test.cc diff --git a/tensorstore/kvstore/s3/new_s3_request_builder.cc b/tensorstore/kvstore/s3/new_s3_request_builder.cc new file mode 100644 index 000000000..e69de29bb diff --git a/tensorstore/kvstore/s3/new_s3_request_builder.h b/tensorstore/kvstore/s3/new_s3_request_builder.h new file mode 100644 index 000000000..4b4686c7b --- /dev/null +++ b/tensorstore/kvstore/s3/new_s3_request_builder.h @@ -0,0 +1,141 @@ +#include +#include +#include +#include + + +#include +#include +#include +#include + +#include "absl/strings/cord.h" + +#include "tensorstore/internal/http/http_request.h" +#include "tensorstore/kvstore/s3/s3_context.h" + +namespace tensorstore { +namespace internal_kvstore_s3 { + +// Make an absl::Cord look like a streambuf +class CordStreambuf : public std::streambuf { +public: + CordStreambuf(const absl::Cord& cord) : cord_(cord), current_(cord_.char_begin()) { + setg(nullptr, nullptr, nullptr); + } + +protected: + // Refill the get area of the buffer + int_type underflow() override { + if (current_ == cord_.char_end()) { + return traits_type::eof(); + } + + // Set buffer pointers for the next character + setg(const_cast(&*current_), + const_cast(&*current_), + const_cast(&*std::next(current_))); + + return traits_type::to_int_type(*current_++); + } + +private: + const absl::Cord& cord_; + absl::Cord::CharIterator current_; +}; + +// Make an absl::Cord look like an iostream +class CordIOStream : public std::iostream { +public: + CordIOStream(const absl::Cord& cord) : std::iostream(&buffer_), buffer_(cord) { + rdbuf(&buffer_); + } + +private: + CordStreambuf buffer_; +}; + +class AwsHttpRequestAdapter : public Aws::Http::Standard::StandardHttpRequest { +private: + static Aws::Http::HttpMethod FromStringMethod(std::string_view method) { + if(method == "GET") { + return Aws::Http::HttpMethod::HTTP_GET; + } else if (method == "PUT") { + return Aws::Http::HttpMethod::HTTP_PUT; + } else if (method == "HEAD") { + return Aws::Http::HttpMethod::HTTP_HEAD; + } else if (method == "DELETE") { + return Aws::Http::HttpMethod::HTTP_DELETE; + } else if (method == "POST") { + return Aws::Http::HttpMethod::HTTP_POST; + } else if (method == "PATCH") { + return Aws::Http::HttpMethod::HTTP_PATCH; + } else { + // NOTE: return an error + return Aws::Http::HttpMethod::HTTP_GET; + } + } + +public: + AwsHttpRequestAdapter(std::string_view method, std::string endpoint_url) : + Aws::Http::Standard::StandardHttpRequest(Aws::Http::URI(Aws::String(endpoint_url)), + FromStringMethod(method)) {} +}; + +class NewS3RequestBuilder { +public: + NewS3RequestBuilder(std::string_view method, std::string endpoint_url) : + request_(method, endpoint_url) {} + + NewS3RequestBuilder & AddBody(const absl::Cord & body) { + // NOTE: eliminate allocation + auto cord_adapter = std::make_shared(body); + request_.AddContentBody(cord_adapter); + return *this; + } + + NewS3RequestBuilder & AddHeader(std::string_view header) { + auto delim_pos = header.find(':'); + assert(delim_pos != std::string_view::npos); + // NOTE: string copies + request_.SetHeaderValue(std::string(header.substr(0, delim_pos)).c_str(), + Aws::String(header.substr(delim_pos + 1))); + return *this; + } + + NewS3RequestBuilder & AddQueryParameter(std::string key, std::string value) { + // Note: string copies + request_.AddQueryStringParameter(key.c_str(), Aws::String(value)); + return *this; + } + + internal_http::HttpRequest BuildRequest(AwsContext ctx) { + auto signer = Aws::Client::AWSAuthV4Signer(ctx.cred_provider_, "s3", "us-east-1"); + assert(!request_.HasAuthorization()); + auto succeeded = signer.SignRequest(request_, true); + assert(succeeded); + assert(request_.HasAuthorization()); + auto method = Aws::Http::HttpMethodMapper::GetNameForHttpMethod(request_.GetMethod()); + auto aws_headers = request_.GetHeaders(); + + std::vector headers; + headers.reserve(aws_headers.size()); + + for(auto & pair: aws_headers) { + headers.emplace_back(absl::StrFormat("%s: %s", pair.first, pair.second)); + } + + return internal_http::HttpRequest{ + std::move(method), + std::string(request_.GetURIString(true)), + "", + headers}; + } + +public: + std::shared_ptr body_; + AwsHttpRequestAdapter request_; +}; + +} // namespace internal_kvstore_s3 +} // namespace tensorstore diff --git a/tensorstore/kvstore/s3/new_s3_request_builder_test.cc b/tensorstore/kvstore/s3/new_s3_request_builder_test.cc new file mode 100644 index 000000000..a1ac66b90 --- /dev/null +++ b/tensorstore/kvstore/s3/new_s3_request_builder_test.cc @@ -0,0 +1,25 @@ +#include + +#include "tensorstore/kvstore/s3/s3_context.h" +#include "tensorstore/kvstore/s3/new_s3_request_builder.h" + + +using ::tensorstore::internal_kvstore_s3::NewS3RequestBuilder; + +namespace { + +TEST(NewS3RequestBuilderTest, Basic) { + auto builder = NewS3RequestBuilder("get", "http://bucket") + .AddBody(absl::Cord{"foobar"}) + .AddHeader("foo: bar") + .AddQueryParameter("qux", "baz"); + + auto ctx = tensorstore::internal_kvstore_s3::GetAwsContext(); + + auto req = builder.BuildRequest(*ctx); + EXPECT_TRUE(builder.request_.HasAuthorization()); + + ABSL_LOG(INFO) << req; +} + +} // namespace From 0c0078322f29ce6bd27285cdd7676cd276ab7cfe Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 26 Apr 2024 18:00:25 +0200 Subject: [PATCH 19/48] Update BUILD --- tensorstore/kvstore/s3/BUILD | 52 ++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/tensorstore/kvstore/s3/BUILD b/tensorstore/kvstore/s3/BUILD index 8ebc5f571..08f6e12c5 100644 --- a/tensorstore/kvstore/s3/BUILD +++ b/tensorstore/kvstore/s3/BUILD @@ -159,8 +159,6 @@ tensorstore_cc_library( "//tensorstore/internal/log:verbose_flag", "//tensorstore/kvstore:byte_range", "//tensorstore/kvstore/s3/credentials:aws_credentials", - # TODO: substitute this with just the auth components of the aws sdk - "@com_github_aws_cpp_sdk//:s3", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/log:absl_check", "@com_google_absl//absl/log:absl_log", @@ -186,6 +184,33 @@ tensorstore_cc_test( ], ) +tensorstore_cc_library( + name = "new_s3_request_builder", + srcs = [ + "new_s3_request_builder.cc", + ], + hdrs = [ + "new_s3_request_builder.h" + ], + deps = [ + ":s3_context", + "//tensorstore/internal/http", + "@com_github_aws_cpp_sdk//:core", + "@com_google_absl//absl/strings:cord", + ] +) + +tensorstore_cc_test( + name = "new_s3_request_builder_test", + srcs = [ + "new_s3_request_builder_test.cc", + ], + deps = [ + ":new_s3_request_builder", + "@com_google_googletest//:gtest_main", + ] +) + tensorstore_cc_library( name = "validate", srcs = [ @@ -276,6 +301,29 @@ tensorstore_cc_library( ], ) +tensorstore_cc_library( + name = "s3_context", + srcs = ["s3_context.cc"], + hdrs = ["s3_context.h"], + deps = [ + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/synchronization", + "@com_github_aws_cpp_sdk//:core", + ] +) + +tensorstore_cc_test( + name = "s3_context_test", + size = "small", + srcs = ["s3_context_test.cc"], + deps = [ + ":s3_context", + "@com_github_aws_cpp_sdk//:s3", + "@com_google_googletest//:gtest_main", + ] +) + + tensorstore_cc_test( name = "s3_endpoint_test", size = "small", From 2908289f88fdc05fdd1731c17a9a63441b6bcfc3 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Wed, 8 May 2024 18:53:22 +0200 Subject: [PATCH 20/48] Adapt more AWS classes, improve SDK setup --- tensorstore/kvstore/s3/BUILD | 5 + tensorstore/kvstore/s3/s3_context.cc | 194 +++++++++++++++++++++- tensorstore/kvstore/s3/s3_context.h | 1 - tensorstore/kvstore/s3/s3_context_test.cc | 38 ++++- 4 files changed, 222 insertions(+), 16 deletions(-) diff --git a/tensorstore/kvstore/s3/BUILD b/tensorstore/kvstore/s3/BUILD index e3822a196..2734a324f 100644 --- a/tensorstore/kvstore/s3/BUILD +++ b/tensorstore/kvstore/s3/BUILD @@ -196,6 +196,7 @@ tensorstore_cc_library( ":s3_context", "//tensorstore/internal/http", "@com_github_aws_cpp_sdk//:core", + "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:cord", ] ) @@ -306,6 +307,10 @@ tensorstore_cc_library( srcs = ["s3_context.cc"], hdrs = ["s3_context.h"], deps = [ + "//tensorstore/util:executor", + "//tensorstore/internal/http", + "//tensorstore/internal/http:curl_transport", + "//tensorstore/internal/thread:thread_pool", "@com_google_absl//absl/log:absl_log", "@com_google_absl//absl/synchronization", "@com_github_aws_cpp_sdk//:core", diff --git a/tensorstore/kvstore/s3/s3_context.cc b/tensorstore/kvstore/s3/s3_context.cc index acc184570..84346e8be 100644 --- a/tensorstore/kvstore/s3/s3_context.cc +++ b/tensorstore/kvstore/s3/s3_context.cc @@ -18,24 +18,192 @@ #include #include +#include +#include +#include #include #include #include #include +#include #include "absl/log/absl_log.h" #include "absl/synchronization/mutex.h" +#include "tensorstore/internal/http/curl_transport.h" +#include "tensorstore/internal/http/http_request.h" +#include "tensorstore/internal/http/http_response.h" + namespace tensorstore { namespace internal_kvstore_s3 { namespace { +static constexpr char kAwsTag[] = "AWS"; + absl::Mutex context_mu_; std::weak_ptr context_; } // namespace +/// Wraps a tensorstore HttpRequest in a Aws HttpRequest interface +class HttpRequestAdapter : public Aws::Http::HttpRequest { +public: + ::tensorstore::internal_http::HttpRequest request_; + Aws::Http::HeaderValueCollection headers_; + Aws::IOStreamFactory stream_factory_; + std::shared_ptr body_; + + HttpRequestAdapter(const Aws::Http::URI & uri, Aws::Http::HttpMethod method) : + HttpRequest(uri, method), + headers_(), + stream_factory_(), + body_(nullptr) {}; + + virtual Aws::Http::HeaderValueCollection GetHeaders() const override { + return headers_; + } + + virtual const Aws::String & GetHeaderValue(const char* headerName) const override { + auto it = headers_.find(headerName); + assert(it != headers_.end()); + return it->second; + } + + virtual bool HasHeader(const char* name) const override { + return headers_.find(name) != headers_.end(); + } + + virtual void SetHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) override { + headers_.insert({headerName, headerValue}); + } + + virtual void SetHeaderValue(const char* headerName, const Aws::String& headerValue) override { + headers_.insert({ + Aws::Utils::StringUtils::ToLower(headerName), + Aws::Utils::StringUtils::Trim(headerValue.c_str())}); + } + + virtual void DeleteHeader(const char* headerName) override { + headers_.erase(headers_.find(headerName)); + } + + virtual int64_t GetSize() const override { + return headers_.size(); + } + + virtual void AddContentBody(const std::shared_ptr& strContext) override { + body_ = strContext; + } + + virtual const std::shared_ptr& GetContentBody() const override { + return body_; + } + + virtual void SetResponseStreamFactory(const Aws::IOStreamFactory& streamFactory) override { + stream_factory_ = streamFactory; + } + + virtual const Aws::IOStreamFactory& GetResponseStreamFactory() const override { + return stream_factory_; + } +}; + +/// Wraps a tensorstore HttpResponse in an Aws HttpResponse interface +class HttpResponseAdapter: public Aws::Http::HttpResponse { +public: + ::tensorstore::internal_http::HttpResponse response_; + ::Aws::Utils::Stream::ResponseStream body_stream_; + + HttpResponseAdapter( + ::tensorstore::internal_http::HttpResponse response, + const std::shared_ptr & originatingRequest) : + ::Aws::Http::HttpResponse(originatingRequest), + response_(std::move(response)), + body_stream_(originatingRequest->GetResponseStreamFactory()()) {}; + + virtual Aws::Utils::Stream::ResponseStream && SwapResponseStreamOwnership() override { + return std::move(body_stream_); + } + + virtual void AddHeader(const Aws::String& headerName, const Aws::String& headerValue) override { + response_.headers.insert({headerName, headerValue}); + } + + virtual bool HasHeader(const char* headerName) const override { + return response_.headers.find(Aws::Utils::StringUtils::ToLower(headerName)) != response_.headers.end(); + } + + virtual Aws::Http::HeaderValueCollection GetHeaders() const override { + Aws::Http::HeaderValueCollection headers; + for(const auto & header: response_.headers) { + headers.insert({header.first, header.second}); + } + return headers; + } + + virtual const Aws::String & GetHeader(const Aws::String& headerName) const override { + auto it = response_.headers.find(headerName); + assert(it != response_.headers.end()); + return it->second; + } + + virtual Aws::IOStream & GetResponseBody() const override { + return body_stream_.GetUnderlyingStream(); + } +}; + + +class CustomHttpClient : public Aws::Http::HttpClient { +public: + std::shared_ptr MakeRequest( + const std::shared_ptr & request, + Aws::Utils::RateLimits::RateLimiterInterface* readLimiter = nullptr, + Aws::Utils::RateLimits::RateLimiterInterface* writeLimiter = nullptr) const override { + ABSL_LOG(INFO) << "Making a request " << std::endl; + if(auto req_adapter = std::dynamic_pointer_cast(request); req_adapter) { + auto transport = ::tensorstore::internal_http::GetDefaultHttpTransport(); + auto future = transport->IssueRequest(req_adapter->request_, {}); + // future.ExecuteWhenReady may be desirable + auto response = future.value(); + return Aws::MakeShared(kAWSTag, response, request); + } + + auto failed_response = Aws::MakeShared(kAwsTag, request); + failed_response->SetResponseCode(Aws::Http::HttpResponseCode::PRECONDITION_FAILED); + return failed_response; + }; +}; + + +/// Custom factory overriding Aws::Http::DefaultHttpFatory +class CustomHttpFactory : public Aws::Http::HttpClientFactory { +public: + std::shared_ptr CreateHttpClient( + const Aws::Client::ClientConfiguration & clientConfiguration) const override { + ABSL_LOG(INFO) << "Making a custom HTTP Client"; + return Aws::MakeShared(kAWSTag); + }; + + std::shared_ptr CreateHttpRequest( + const Aws::String &uri, Aws::Http::HttpMethod method, + const Aws::IOStreamFactory &streamFactory) const override { + return CreateHttpRequest(Aws::Http::URI(uri), method, streamFactory); + } + + std::shared_ptr CreateHttpRequest( + const Aws::Http::URI& uri, Aws::Http::HttpMethod method, + const Aws::IOStreamFactory& streamFactory) const override + { + ABSL_LOG(INFO) << "CreateHttpRequest " + << Aws::Http::HttpMethodMapper::GetNameForHttpMethod(method) + << " " << uri.GetURIString(true); + auto request = Aws::MakeShared(kAWSTag, uri, method); + request->SetResponseStreamFactory(streamFactory); + return request; + } +}; + AWSLogSystem::AWSLogSystem(Aws::Utils::Logging::LogLevel log_level) : log_level_(log_level) {} @@ -94,22 +262,36 @@ std::shared_ptr GetAwsContext() { return context_.lock(); } - ABSL_LOG(INFO) << "Initialising AWS API"; auto options = Aws::SDKOptions{}; + // Customise HttpClientFactory + // Disable curl init/cleanup + // Don't install the SIGPIPE handler + // options.httpOptions.httpClientFactory_create_fn = []() { + // return Aws::MakeShared(kAwsTag); + // }; + options.httpOptions.initAndCleanupCurl = false; + options.httpOptions.installSigPipeHandler = false; + + // Install AWS -> Abseil Logging Translator + auto level = Aws::Utils::Logging::LogLevel::Info; + options.loggingOptions.logLevel = level; + options.loggingOptions.logger_create_fn = [=]() { + return Aws::MakeShared(kAWSTag, level); + }; + + ABSL_LOG(INFO) << "Initialising AWS SDK API"; Aws::InitAPI(options); - auto log = Aws::MakeShared(kAWSTag, Aws::Utils::Logging::LogLevel::Info); - Aws::Utils::Logging::InitializeAWSLogging(std::move(log)); + ABSL_LOG(INFO) << "Done Initialising AWS SDK API"; + auto provider = Aws::MakeShared(kAWSTag); auto ctx = std::shared_ptr( new AwsContext{ std::move(options), - std::move(log), std::move(provider)}, [](AwsContext * ctx) { absl::MutexLock lock(&context_mu_); - ABSL_LOG(INFO) << "Shutting down AWS API"; - Aws::Utils::Logging::ShutdownAWSLogging(); + ABSL_LOG(INFO) << "Shutting down AWS SDK API"; Aws::ShutdownAPI(ctx->options); delete ctx; }); diff --git a/tensorstore/kvstore/s3/s3_context.h b/tensorstore/kvstore/s3/s3_context.h index f8c86f949..503d19b4b 100644 --- a/tensorstore/kvstore/s3/s3_context.h +++ b/tensorstore/kvstore/s3/s3_context.h @@ -54,7 +54,6 @@ class AWSLogSystem : public Aws::Utils::Logging::LogSystemInterface { struct AwsContext { Aws::SDKOptions options; - std::shared_ptr log_system_; std::shared_ptr cred_provider_; }; diff --git a/tensorstore/kvstore/s3/s3_context_test.cc b/tensorstore/kvstore/s3/s3_context_test.cc index eadcd6420..47ddf9284 100644 --- a/tensorstore/kvstore/s3/s3_context_test.cc +++ b/tensorstore/kvstore/s3/s3_context_test.cc @@ -1,18 +1,25 @@ +#include #include + +#include "absl/log/absl_log.h" + #include #include #include #include #include +#include + +#include "tensorstore/kvstore/s3/s3_context.h" +#include "tensorstore/internal/thread/thread_pool.h" +#include "tensorstore/util/executor.h" #include #include -#include "tensorstore/kvstore/s3/s3_context.h" - using ::tensorstore::internal_kvstore_s3::GetAwsContext; using ::tensorstore::internal_kvstore_s3::AwsContext; @@ -31,7 +38,7 @@ TEST(S3ContextTest, Basic) { EXPECT_EQ(wp.use_count(), 2); // sanity check basic credential retrieval - auto creds = ctx->cred_provider_->GetAWSCredentials(); + //auto creds = ctx->cred_provider_->GetAWSCredentials(); ctx.reset(); ctx2.reset(); @@ -42,24 +49,37 @@ TEST(S3ContextTest, Basic) { ctx = GetAwsContext(); } - TEST(S3ContextTest, Endpoint) { EXPECT_EQ(Aws::S3::S3Endpoint::ForRegion("us-east-2", false, false), "s3.us-east-2.amazonaws.com"); } TEST(S3ContextTest, Client) { - class OffloadExecutor : public Aws::Utils::Threading::Executor { + class AwsExecutorAdapter : public Aws::Utils::Threading::Executor { + public: + AwsExecutorAdapter(): executor_(::tensorstore::internal::DetachedThreadPool(4)) {} protected: - bool SubmitToThread(std::function && fn) { - fn(); + bool SubmitToThread(std::function && fn) override { + ::tensorstore::WithExecutor(executor_, std::move(fn)); return true; } + + private: + ::tensorstore::Executor executor_; }; auto ctx = GetAwsContext(); auto cfg = Aws::Client::ClientConfiguration(); - cfg.executor = Aws::MakeShared(kAWSTag); - auto client = Aws::S3::S3Client(); + //cfg.executor = Aws::MakeShared(kAWSTag); + cfg.executor->Submit([msg = "Submission Works"] { ABSL_LOG(INFO) << msg; }); + auto client = Aws::S3::S3Client(cfg); + auto head_bucket = Aws::S3::Model::HeadBucketRequest().WithBucket("ratt-public-data"); + auto outcome = client.HeadBucket(head_bucket); + if(!outcome.IsSuccess()) { + auto & err = outcome.GetError(); + std::cerr << "Error: " << err.GetExceptionName() << ": " << err.GetMessage() << std::endl; + } else { + std::cout << "Success" << std::endl; + } } } // namespace { \ No newline at end of file From 9d9ea8ad90c25f808097a987a7a7c240bd39803b Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Thu, 9 May 2024 10:55:54 +0200 Subject: [PATCH 21/48] Move S3Client code in a separate kvstore/s3_sdk directory --- tensorstore/kvstore/BUILD | 1 + tensorstore/kvstore/s3/BUILD | 29 +-------- .../kvstore/s3/new_s3_request_builder.h | 2 +- .../kvstore/s3/new_s3_request_builder_test.cc | 2 +- tensorstore/kvstore/s3_sdk/BUILD | 51 +++++++++++++++ .../kvstore/{s3 => s3_sdk}/s3_context.cc | 63 ++++++++++++++----- .../kvstore/{s3 => s3_sdk}/s3_context.h | 0 .../kvstore/{s3 => s3_sdk}/s3_context_test.cc | 14 +++-- 8 files changed, 109 insertions(+), 53 deletions(-) create mode 100644 tensorstore/kvstore/s3_sdk/BUILD rename tensorstore/kvstore/{s3 => s3_sdk}/s3_context.cc (82%) rename tensorstore/kvstore/{s3 => s3_sdk}/s3_context.h (100%) rename tensorstore/kvstore/{s3 => s3_sdk}/s3_context_test.cc (78%) diff --git a/tensorstore/kvstore/BUILD b/tensorstore/kvstore/BUILD index a28eaf3af..1bea1b829 100644 --- a/tensorstore/kvstore/BUILD +++ b/tensorstore/kvstore/BUILD @@ -13,6 +13,7 @@ DRIVER_DOCS = [ "neuroglancer_uint64_sharded", "ocdbt", "s3", + "s3_sdk", "tsgrpc", "zarr3_sharding_indexed", "zip", diff --git a/tensorstore/kvstore/s3/BUILD b/tensorstore/kvstore/s3/BUILD index 2734a324f..a556fe678 100644 --- a/tensorstore/kvstore/s3/BUILD +++ b/tensorstore/kvstore/s3/BUILD @@ -193,7 +193,7 @@ tensorstore_cc_library( "new_s3_request_builder.h" ], deps = [ - ":s3_context", + "//tensorstore/kvstore/s3_sdk:s3_context", "//tensorstore/internal/http", "@com_github_aws_cpp_sdk//:core", "@com_google_absl//absl/strings", @@ -302,33 +302,6 @@ tensorstore_cc_library( ], ) -tensorstore_cc_library( - name = "s3_context", - srcs = ["s3_context.cc"], - hdrs = ["s3_context.h"], - deps = [ - "//tensorstore/util:executor", - "//tensorstore/internal/http", - "//tensorstore/internal/http:curl_transport", - "//tensorstore/internal/thread:thread_pool", - "@com_google_absl//absl/log:absl_log", - "@com_google_absl//absl/synchronization", - "@com_github_aws_cpp_sdk//:core", - ] -) - -tensorstore_cc_test( - name = "s3_context_test", - size = "small", - srcs = ["s3_context_test.cc"], - deps = [ - ":s3_context", - "@com_github_aws_cpp_sdk//:s3", - "@com_google_googletest//:gtest_main", - ] -) - - tensorstore_cc_test( name = "s3_endpoint_test", size = "small", diff --git a/tensorstore/kvstore/s3/new_s3_request_builder.h b/tensorstore/kvstore/s3/new_s3_request_builder.h index 4b4686c7b..3d1f93939 100644 --- a/tensorstore/kvstore/s3/new_s3_request_builder.h +++ b/tensorstore/kvstore/s3/new_s3_request_builder.h @@ -12,7 +12,7 @@ #include "absl/strings/cord.h" #include "tensorstore/internal/http/http_request.h" -#include "tensorstore/kvstore/s3/s3_context.h" +#include "tensorstore/kvstore/s3_sdk/s3_context.h" namespace tensorstore { namespace internal_kvstore_s3 { diff --git a/tensorstore/kvstore/s3/new_s3_request_builder_test.cc b/tensorstore/kvstore/s3/new_s3_request_builder_test.cc index a1ac66b90..1b33de4c2 100644 --- a/tensorstore/kvstore/s3/new_s3_request_builder_test.cc +++ b/tensorstore/kvstore/s3/new_s3_request_builder_test.cc @@ -1,6 +1,6 @@ #include -#include "tensorstore/kvstore/s3/s3_context.h" +#include "tensorstore/kvstore/s3_sdk/s3_context.h" #include "tensorstore/kvstore/s3/new_s3_request_builder.h" diff --git a/tensorstore/kvstore/s3_sdk/BUILD b/tensorstore/kvstore/s3_sdk/BUILD new file mode 100644 index 000000000..28f82b066 --- /dev/null +++ b/tensorstore/kvstore/s3_sdk/BUILD @@ -0,0 +1,51 @@ +# Placeholder: load py_binary +load("//bazel:tensorstore.bzl", "tensorstore_cc_library", "tensorstore_cc_test") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +filegroup( + name = "doc_sources", + srcs = glob([ + "*.rst", + "*.yml", + ]), +) +tensorstore_cc_library( + name = "s3_context", + srcs = ["s3_context.cc"], + hdrs = ["s3_context.h"], + deps = [ + "//tensorstore/util:executor", + "//tensorstore/internal/http", + "//tensorstore/internal/http:curl_transport", + "//tensorstore/internal/thread:thread_pool", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/synchronization", + "@com_github_aws_cpp_sdk//:core", + ] +) + +tensorstore_cc_test( + name = "s3_context_test", + size = "small", + srcs = ["s3_context_test.cc"], + deps = [ + ":s3_context", + "@com_github_aws_cpp_sdk//:s3", + "@com_google_googletest//:gtest_main", + ] +) + +py_binary( + name = "moto_server", + testonly = 1, + srcs = ["moto_server.py"], + tags = [ + "manual", + "notap", + "skip-cmake", + ], + deps = ["@pypa_moto//:moto"], +) diff --git a/tensorstore/kvstore/s3/s3_context.cc b/tensorstore/kvstore/s3_sdk/s3_context.cc similarity index 82% rename from tensorstore/kvstore/s3/s3_context.cc rename to tensorstore/kvstore/s3_sdk/s3_context.cc index 84346e8be..b65d2933c 100644 --- a/tensorstore/kvstore/s3/s3_context.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context.cc @@ -12,9 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "tensorstore/kvstore/s3/s3_context.h" +#include "tensorstore/kvstore/s3_sdk/s3_context.h" #include +#include #include #include @@ -33,6 +34,7 @@ #include "tensorstore/internal/http/curl_transport.h" #include "tensorstore/internal/http/http_request.h" #include "tensorstore/internal/http/http_response.h" +#include "tensorstore/internal/http/http_transport.h" namespace tensorstore { namespace internal_kvstore_s3 { @@ -58,7 +60,11 @@ class HttpRequestAdapter : public Aws::Http::HttpRequest { HttpRequest(uri, method), headers_(), stream_factory_(), - body_(nullptr) {}; + body_(nullptr) { + + request_.method = Aws::Http::HttpMethodMapper::GetNameForHttpMethod(method); + request_.url = uri.GetURIString(true); + }; virtual Aws::Http::HeaderValueCollection GetHeaders() const override { return headers_; @@ -85,7 +91,9 @@ class HttpRequestAdapter : public Aws::Http::HttpRequest { } virtual void DeleteHeader(const char* headerName) override { - headers_.erase(headers_.find(headerName)); + if(auto it = headers_.find(Aws::Utils::StringUtils::ToLower(headerName)); it != headers_.end()) { + headers_.erase(it); + } } virtual int64_t GetSize() const override { @@ -120,7 +128,14 @@ class HttpResponseAdapter: public Aws::Http::HttpResponse { const std::shared_ptr & originatingRequest) : ::Aws::Http::HttpResponse(originatingRequest), response_(std::move(response)), - body_stream_(originatingRequest->GetResponseStreamFactory()()) {}; + body_stream_(originatingRequest->GetResponseStreamFactory()) { + + // Cast int response code to an HttpResponseCode enum + // Potential for undefined behaviour here, + // but AWS probably? won't respond with + // a response code it doesn't know about + SetResponseCode(static_cast(response_.status_code)); + }; virtual Aws::Utils::Stream::ResponseStream && SwapResponseStreamOwnership() override { return std::move(body_stream_); @@ -160,18 +175,35 @@ class CustomHttpClient : public Aws::Http::HttpClient { const std::shared_ptr & request, Aws::Utils::RateLimits::RateLimiterInterface* readLimiter = nullptr, Aws::Utils::RateLimits::RateLimiterInterface* writeLimiter = nullptr) const override { - ABSL_LOG(INFO) << "Making a request " << std::endl; + absl::Cord payload; if(auto req_adapter = std::dynamic_pointer_cast(request); req_adapter) { + if(auto iostream = req_adapter->GetContentBody(); iostream) { + // This is untested and probably broken + // Ideally, we'd want a streambuf wrapping an underlying Cord + // to avoid the copy here, especially for responses + auto rdbuf = iostream->rdbuf(); + std::streamsize size = rdbuf->pubseekoff(0, iostream->end); + auto cord_buffer = absl::CordBuffer::CreateWithDefaultLimit(size); + absl::Span data = cord_buffer.available_up_to(size); + rdbuf->sgetn(data.data(), data.size()); + cord_buffer.IncreaseLengthBy(data.size()); + payload.Append(std::move(cord_buffer)); + } + auto transport = ::tensorstore::internal_http::GetDefaultHttpTransport(); - auto future = transport->IssueRequest(req_adapter->request_, {}); + ABSL_LOG(INFO) << req_adapter->request_; + auto future = transport->IssueRequest( + req_adapter->request_, + ::tensorstore::internal_http::IssueRequestOptions(payload)); // future.ExecuteWhenReady may be desirable auto response = future.value(); + ABSL_LOG(INFO) << response; return Aws::MakeShared(kAWSTag, response, request); } - auto failed_response = Aws::MakeShared(kAwsTag, request); - failed_response->SetResponseCode(Aws::Http::HttpResponseCode::PRECONDITION_FAILED); - return failed_response; + auto fail = Aws::MakeShared(kAwsTag, request); + fail->SetResponseCode(Aws::Http::HttpResponseCode::PRECONDITION_FAILED); + return fail; }; }; @@ -195,9 +227,6 @@ class CustomHttpFactory : public Aws::Http::HttpClientFactory { const Aws::Http::URI& uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory& streamFactory) const override { - ABSL_LOG(INFO) << "CreateHttpRequest " - << Aws::Http::HttpMethodMapper::GetNameForHttpMethod(method) - << " " << uri.GetURIString(true); auto request = Aws::MakeShared(kAWSTag, uri, method); request->SetResponseStreamFactory(streamFactory); return request; @@ -264,18 +293,18 @@ std::shared_ptr GetAwsContext() { auto options = Aws::SDKOptions{}; // Customise HttpClientFactory - // Disable curl init/cleanup + // Disable curl init/cleanup, tensorstore should control this // Don't install the SIGPIPE handler - // options.httpOptions.httpClientFactory_create_fn = []() { - // return Aws::MakeShared(kAwsTag); - // }; + options.httpOptions.httpClientFactory_create_fn = []() { + return Aws::MakeShared(kAwsTag); + }; options.httpOptions.initAndCleanupCurl = false; options.httpOptions.installSigPipeHandler = false; // Install AWS -> Abseil Logging Translator auto level = Aws::Utils::Logging::LogLevel::Info; options.loggingOptions.logLevel = level; - options.loggingOptions.logger_create_fn = [=]() { + options.loggingOptions.logger_create_fn = [level=level]() { return Aws::MakeShared(kAWSTag, level); }; diff --git a/tensorstore/kvstore/s3/s3_context.h b/tensorstore/kvstore/s3_sdk/s3_context.h similarity index 100% rename from tensorstore/kvstore/s3/s3_context.h rename to tensorstore/kvstore/s3_sdk/s3_context.h diff --git a/tensorstore/kvstore/s3/s3_context_test.cc b/tensorstore/kvstore/s3_sdk/s3_context_test.cc similarity index 78% rename from tensorstore/kvstore/s3/s3_context_test.cc rename to tensorstore/kvstore/s3_sdk/s3_context_test.cc index 47ddf9284..9dad786c0 100644 --- a/tensorstore/kvstore/s3/s3_context_test.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context_test.cc @@ -13,7 +13,7 @@ #include #include -#include "tensorstore/kvstore/s3/s3_context.h" +#include "tensorstore/kvstore/s3_sdk/s3_context.h" #include "tensorstore/internal/thread/thread_pool.h" #include "tensorstore/util/executor.h" @@ -54,12 +54,13 @@ TEST(S3ContextTest, Endpoint) { } TEST(S3ContextTest, Client) { - class AwsExecutorAdapter : public Aws::Utils::Threading::Executor { + // Offload AWS Client tasks onto a Tensorstore executor + class TensorStoreExecutor : public Aws::Utils::Threading::Executor { public: - AwsExecutorAdapter(): executor_(::tensorstore::internal::DetachedThreadPool(4)) {} + TensorStoreExecutor(): executor_(::tensorstore::internal::DetachedThreadPool(4)) {} protected: bool SubmitToThread(std::function && fn) override { - ::tensorstore::WithExecutor(executor_, std::move(fn)); + ::tensorstore::WithExecutor(executor_, std::move(fn))(); return true; } @@ -69,8 +70,9 @@ TEST(S3ContextTest, Client) { auto ctx = GetAwsContext(); auto cfg = Aws::Client::ClientConfiguration(); - //cfg.executor = Aws::MakeShared(kAWSTag); - cfg.executor->Submit([msg = "Submission Works"] { ABSL_LOG(INFO) << msg; }); + // Override the default client executor + cfg.executor = Aws::MakeShared(kAWSTag); + cfg.executor->Submit([msg = "Submission seems to work"] { ABSL_LOG(INFO) << msg; }); auto client = Aws::S3::S3Client(cfg); auto head_bucket = Aws::S3::Model::HeadBucketRequest().WithBucket("ratt-public-data"); auto outcome = client.HeadBucket(head_bucket); From 0e3a781849eef439611551da6a7f6279b994c19a Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Thu, 9 May 2024 11:09:51 +0200 Subject: [PATCH 22/48] Move AWS SDK Adapter code into anonymous namespace --- tensorstore/kvstore/s3_sdk/s3_context.cc | 39 ++++++++++++++++++------ tensorstore/kvstore/s3_sdk/s3_context.h | 28 ----------------- 2 files changed, 30 insertions(+), 37 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/s3_context.cc b/tensorstore/kvstore/s3_sdk/s3_context.cc index b65d2933c..a4119d273 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context.cc @@ -46,8 +46,6 @@ static constexpr char kAwsTag[] = "AWS"; absl::Mutex context_mu_; std::weak_ptr context_; -} // namespace - /// Wraps a tensorstore HttpRequest in a Aws HttpRequest interface class HttpRequestAdapter : public Aws::Http::HttpRequest { public: @@ -198,7 +196,7 @@ class CustomHttpClient : public Aws::Http::HttpClient { // future.ExecuteWhenReady may be desirable auto response = future.value(); ABSL_LOG(INFO) << response; - return Aws::MakeShared(kAWSTag, response, request); + return Aws::MakeShared(kAwsTag, response, request); } auto fail = Aws::MakeShared(kAwsTag, request); @@ -214,7 +212,7 @@ class CustomHttpFactory : public Aws::Http::HttpClientFactory { std::shared_ptr CreateHttpClient( const Aws::Client::ClientConfiguration & clientConfiguration) const override { ABSL_LOG(INFO) << "Making a custom HTTP Client"; - return Aws::MakeShared(kAWSTag); + return Aws::MakeShared(kAwsTag); }; std::shared_ptr CreateHttpRequest( @@ -227,14 +225,36 @@ class CustomHttpFactory : public Aws::Http::HttpClientFactory { const Aws::Http::URI& uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory& streamFactory) const override { - auto request = Aws::MakeShared(kAWSTag, uri, method); + auto request = Aws::MakeShared(kAwsTag, uri, method); request->SetResponseStreamFactory(streamFactory); return request; } }; -AWSLogSystem::AWSLogSystem(Aws::Utils::Logging::LogLevel log_level) - : log_level_(log_level) {} +class AWSLogSystem : public Aws::Utils::Logging::LogSystemInterface { +public: + AWSLogSystem(Aws::Utils::Logging::LogLevel log_level); + Aws::Utils::Logging::LogLevel GetLogLevel(void) const override; + void SetLogLevel(Aws::Utils::Logging::LogLevel log_level); + + // Writes the stream to ProcessFormattedStatement. + void LogStream(Aws::Utils::Logging::LogLevel log_level, const char* tag, + const Aws::OStringStream& messageStream) override; + + // Flushes the buffered messages if the logger supports buffering + void Flush() override { return; }; + + // Overridden, but prefer the safer LogStream + void Log(Aws::Utils::Logging::LogLevel log_level, const char* tag, + const char* format, ...) override; + +private: + void LogMessage(Aws::Utils::Logging::LogLevel log_level, const std::string & message); + Aws::Utils::Logging::LogLevel log_level_; +}; + + +AWSLogSystem::AWSLogSystem(Aws::Utils::Logging::LogLevel log_level) : log_level_(log_level) {}; Aws::Utils::Logging::LogLevel AWSLogSystem::GetLogLevel(void) const { return log_level_; @@ -282,6 +302,7 @@ void AWSLogSystem::LogMessage(Aws::Utils::Logging::LogLevel log_level, const std } } +} // namespace // Initialise AWS API and Logging std::shared_ptr GetAwsContext() { @@ -305,14 +326,14 @@ std::shared_ptr GetAwsContext() { auto level = Aws::Utils::Logging::LogLevel::Info; options.loggingOptions.logLevel = level; options.loggingOptions.logger_create_fn = [level=level]() { - return Aws::MakeShared(kAWSTag, level); + return Aws::MakeShared(kAwsTag, level); }; ABSL_LOG(INFO) << "Initialising AWS SDK API"; Aws::InitAPI(options); ABSL_LOG(INFO) << "Done Initialising AWS SDK API"; - auto provider = Aws::MakeShared(kAWSTag); + auto provider = Aws::MakeShared(kAwsTag); auto ctx = std::shared_ptr( new AwsContext{ diff --git a/tensorstore/kvstore/s3_sdk/s3_context.h b/tensorstore/kvstore/s3_sdk/s3_context.h index 503d19b4b..f6496c1e8 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.h +++ b/tensorstore/kvstore/s3_sdk/s3_context.h @@ -16,42 +16,14 @@ #define TENSORSTORE_KVSTORE_S3_S3_CONTEXT_H_ #include -#include #include #include -#include -#include namespace tensorstore { namespace internal_kvstore_s3 { -static constexpr char kAWSTag[] = "AWS"; - -class AWSLogSystem : public Aws::Utils::Logging::LogSystemInterface { -public: - AWSLogSystem(Aws::Utils::Logging::LogLevel log_level); - Aws::Utils::Logging::LogLevel GetLogLevel(void) const override; - void SetLogLevel(Aws::Utils::Logging::LogLevel log_level); - - // Writes the stream to ProcessFormattedStatement. - void LogStream(Aws::Utils::Logging::LogLevel log_level, const char* tag, - const Aws::OStringStream& messageStream) override; - - // Flushes the buffered messages if the logger supports buffering - void Flush() override { return; }; - - // Overridden, but prefer the safer LogStream - void Log(Aws::Utils::Logging::LogLevel log_level, const char* tag, - const char* format, ...) override; - -private: - void LogMessage(Aws::Utils::Logging::LogLevel log_level, const std::string & message); - Aws::Utils::Logging::LogLevel log_level_; -}; - - struct AwsContext { Aws::SDKOptions options; std::shared_ptr cred_provider_; From fbcb78895be3440009fd145449d2be7f9f6e86e3 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Thu, 9 May 2024 15:18:33 +0200 Subject: [PATCH 23/48] fixups --- tensorstore/kvstore/s3_sdk/BUILD | 47 ++++++++++++++++ tensorstore/kvstore/s3_sdk/s3_context.cc | 69 +++++++++++++++--------- 2 files changed, 90 insertions(+), 26 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/BUILD b/tensorstore/kvstore/s3_sdk/BUILD index 28f82b066..96ae924a1 100644 --- a/tensorstore/kvstore/s3_sdk/BUILD +++ b/tensorstore/kvstore/s3_sdk/BUILD @@ -49,3 +49,50 @@ py_binary( ], deps = ["@pypa_moto//:moto"], ) + +tensorstore_cc_test( + name = "localstack_test", + size = "small", + srcs = ["localstack_test.cc"], + args = [ + "--localstack_binary=$(location :moto_server)", + "--binary_mode=moto", + ], + data = [":moto_server"], + flaky = 1, # Spawning the test process can be flaky. + tags = [ + "cpu:2", + "requires-net:loopback", + "skip-cmake", + ], + deps = [ + ":s3_context", + "//tensorstore:context", + "//tensorstore:json_serialization_options_base", + "//tensorstore/internal:env", + "//tensorstore/internal:json_gtest", + "//tensorstore/internal/http", + "//tensorstore/internal/http:curl_transport", + "//tensorstore/internal/http:transport_test_utils", + "//tensorstore/internal/os:subprocess", + "//tensorstore/kvstore", + "//tensorstore/kvstore:batch_util", + "//tensorstore/kvstore:test_util", + "//tensorstore/util:future", + "//tensorstore/util:result", + "//tensorstore/util:status_testutil", + "@com_github_nlohmann_json//:json", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/log:absl_check", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:cord", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", + "@com_google_googletest//:gtest_main", + "@com_github_aws_cpp_sdk//:core", + "@com_github_aws_cpp_sdk//:s3", + + ], +) diff --git a/tensorstore/kvstore/s3_sdk/s3_context.cc b/tensorstore/kvstore/s3_sdk/s3_context.cc index a4119d273..51a80d374 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context.cc @@ -15,7 +15,6 @@ #include "tensorstore/kvstore/s3_sdk/s3_context.h" #include -#include #include #include @@ -36,6 +35,8 @@ #include "tensorstore/internal/http/http_response.h" #include "tensorstore/internal/http/http_transport.h" +using ::tensorstore::internal_http::IssueRequestOptions; + namespace tensorstore { namespace internal_kvstore_s3 { @@ -53,12 +54,14 @@ class HttpRequestAdapter : public Aws::Http::HttpRequest { Aws::Http::HeaderValueCollection headers_; Aws::IOStreamFactory stream_factory_; std::shared_ptr body_; + absl::Cord payload_; HttpRequestAdapter(const Aws::Http::URI & uri, Aws::Http::HttpMethod method) : HttpRequest(uri, method), - headers_(), - stream_factory_(), - body_(nullptr) { + headers_{}, + stream_factory_{}, + body_(nullptr), + payload_{} { request_.method = Aws::Http::HttpMethodMapper::GetNameForHttpMethod(method); request_.url = uri.GetURIString(true); @@ -79,13 +82,15 @@ class HttpRequestAdapter : public Aws::Http::HttpRequest { } virtual void SetHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) override { - headers_.insert({headerName, headerValue}); + // ABSL_LOG(INFO) << "Setting header " << headerName << " " << headerValue; + request_.headers.push_back(absl::StrCat(headerName, ": ", headerValue)); + headers_.insert({std::move(headerName), std::move(headerValue)}); } virtual void SetHeaderValue(const char* headerName, const Aws::String& headerValue) override { - headers_.insert({ - Aws::Utils::StringUtils::ToLower(headerName), - Aws::Utils::StringUtils::Trim(headerValue.c_str())}); + auto lower_name = Aws::Utils::StringUtils::ToLower(headerName); + auto trimmed_value = Aws::Utils::StringUtils::Trim(headerValue.c_str()); + SetHeaderValue(lower_name, trimmed_value); } virtual void DeleteHeader(const char* headerName) override { @@ -100,6 +105,20 @@ class HttpRequestAdapter : public Aws::Http::HttpRequest { virtual void AddContentBody(const std::shared_ptr& strContext) override { body_ = strContext; + + //ABSL_LOG(INFO) << "AddContentBody " << strContext << " " << body_;; + + if(!body_) { + return; + } + + const size_t bufferSize = 4096; + std::vector buffer(bufferSize); + + while (body_->read(buffer.data(), buffer.size()) || body_->gcount() > 0) { + payload_.Append(absl::Cord(absl::string_view(buffer.data(), body_->gcount()))); + } + ABSL_LOG(INFO) << "AddContentBody " << payload_.size(); } virtual const std::shared_ptr& GetContentBody() const override { @@ -133,6 +152,13 @@ class HttpResponseAdapter: public Aws::Http::HttpResponse { // but AWS probably? won't respond with // a response code it doesn't know about SetResponseCode(static_cast(response_.status_code)); + + // Add the payload to the Response Body is present + // This incurs a copy, which should be avoided by subclassing + // Aws::IOStream + if(!response_.payload.empty()) { + GetResponseBody() << response_.payload; + } }; virtual Aws::Utils::Stream::ResponseStream && SwapResponseStreamOwnership() override { @@ -174,26 +200,16 @@ class CustomHttpClient : public Aws::Http::HttpClient { Aws::Utils::RateLimits::RateLimiterInterface* readLimiter = nullptr, Aws::Utils::RateLimits::RateLimiterInterface* writeLimiter = nullptr) const override { absl::Cord payload; + ABSL_LOG(INFO) << "Making a request "; if(auto req_adapter = std::dynamic_pointer_cast(request); req_adapter) { - if(auto iostream = req_adapter->GetContentBody(); iostream) { - // This is untested and probably broken - // Ideally, we'd want a streambuf wrapping an underlying Cord - // to avoid the copy here, especially for responses - auto rdbuf = iostream->rdbuf(); - std::streamsize size = rdbuf->pubseekoff(0, iostream->end); - auto cord_buffer = absl::CordBuffer::CreateWithDefaultLimit(size); - absl::Span data = cord_buffer.available_up_to(size); - rdbuf->sgetn(data.data(), data.size()); - cord_buffer.IncreaseLengthBy(data.size()); - payload.Append(std::move(cord_buffer)); - } - auto transport = ::tensorstore::internal_http::GetDefaultHttpTransport(); - ABSL_LOG(INFO) << req_adapter->request_; + ABSL_LOG(INFO) << req_adapter->request_ << " " << req_adapter->payload_; + auto req_options = req_adapter->payload_.empty() ? + IssueRequestOptions{} : + IssueRequestOptions(std::move(req_adapter->payload_)); auto future = transport->IssueRequest( - req_adapter->request_, - ::tensorstore::internal_http::IssueRequestOptions(payload)); - // future.ExecuteWhenReady may be desirable + req_adapter->request_, std::move(req_options)); + // future.ExecuteWhenReady is desirable auto response = future.value(); ABSL_LOG(INFO) << response; return Aws::MakeShared(kAwsTag, response, request); @@ -323,7 +339,8 @@ std::shared_ptr GetAwsContext() { options.httpOptions.installSigPipeHandler = false; // Install AWS -> Abseil Logging Translator - auto level = Aws::Utils::Logging::LogLevel::Info; + auto level = Aws::Utils::Logging::LogLevel::Debug; + //auto level = Aws::Utils::Logging::LogLevel::Info; options.loggingOptions.logLevel = level; options.loggingOptions.logger_create_fn = [level=level]() { return Aws::MakeShared(kAwsTag, level); From 484d6b0696bb7e1a73f00517ce504f47da0e2a20 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Thu, 9 May 2024 15:18:50 +0200 Subject: [PATCH 24/48] Add localstack test --- tensorstore/kvstore/s3_sdk/localstack_test.cc | 355 ++++++++++++++++++ 1 file changed, 355 insertions(+) create mode 100644 tensorstore/kvstore/s3_sdk/localstack_test.cc diff --git a/tensorstore/kvstore/s3_sdk/localstack_test.cc b/tensorstore/kvstore/s3_sdk/localstack_test.cc new file mode 100644 index 000000000..d51d863c3 --- /dev/null +++ b/tensorstore/kvstore/s3_sdk/localstack_test.cc @@ -0,0 +1,355 @@ +// Copyright 2023 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include +#include +#include "absl/flags/flag.h" +#include "absl/log/absl_check.h" +#include "absl/log/absl_log.h" +#include "absl/status/status.h" +#include "absl/strings/cord.h" +#include "absl/strings/match.h" +#include "absl/strings/str_format.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "tensorstore/context.h" +#include "tensorstore/internal/env.h" +#include "tensorstore/internal/http/curl_transport.h" +#include "tensorstore/internal/http/http_response.h" +#include "tensorstore/internal/http/http_transport.h" +#include "tensorstore/internal/http/transport_test_utils.h" +#include "tensorstore/internal/json_gtest.h" +#include "tensorstore/internal/os/subprocess.h" +#include "tensorstore/json_serialization_options_base.h" +#include "tensorstore/kvstore/batch_util.h" +#include "tensorstore/kvstore/kvstore.h" +#include "tensorstore/kvstore/spec.h" +#include "tensorstore/kvstore/test_util.h" +#include "tensorstore/util/future.h" +#include "tensorstore/util/result.h" +#include "tensorstore/util/status_testutil.h" + +#include "tensorstore/kvstore/s3_sdk/s3_context.h" + +// When provided with --localstack_binary, localstack_test will start +// localstack in host mode (via package localstack[runtime]). +// +// When provided with --localstack_endpoint, localstack_test will connect +// to a running localstack instance. +ABSL_FLAG(std::string, localstack_endpoint, "", "Localstack endpoint"); +ABSL_FLAG(std::string, localstack_binary, "", "Path to the localstack"); + +// --localstack_timeout is the time the process will wait for localstack. +ABSL_FLAG(absl::Duration, localstack_timeout, absl::Seconds(15), + "Time to wait for localstack process to start serving requests"); + +// --host_header can override the host: header used for signing. +// It can be, for example, s3.af-south-1.localstack.localhost.com +ABSL_FLAG(std::string, host_header, "", "Host header to use for signing"); + +// --binary_mode selects whether the `--localstack_binary` is localstack +// binary or whether it is a moto binary. +ABSL_FLAG(std::string, binary_mode, "", + "Selects options for starting --localstack_binary. Valid values are " + "[moto]. Assumes localstack otherwise."); + +// AWS bucket, region, and path. +ABSL_FLAG(std::string, aws_bucket, "testbucket", + "The S3 bucket used for the test."); + +ABSL_FLAG(std::string, aws_region, "af-south-1", + "The S3 region used for the test."); + +ABSL_FLAG(std::string, aws_path, "tensorstore/test/", + "The S3 path used for the test."); + +namespace kvstore = ::tensorstore::kvstore; + +using ::tensorstore::Context; +using ::tensorstore::MatchesJson; +using ::tensorstore::internal::GetEnv; +using ::tensorstore::internal::GetEnvironmentMap; +using ::tensorstore::internal::SetEnv; +using ::tensorstore::internal::SpawnSubprocess; +using ::tensorstore::internal::Subprocess; +using ::tensorstore::internal::SubprocessOptions; +using ::tensorstore::internal_http::GetDefaultHttpTransport; +using ::tensorstore::internal_http::HttpResponse; +using ::tensorstore::internal_http::IssueRequestOptions; +using ::tensorstore::transport_test_utils::TryPickUnusedPort; + +using ::tensorstore::internal_kvstore_s3::AwsContext; + +namespace { + +static constexpr char kAwsAccessKeyId[] = "LSIAQAAAAAAVNCBMPNSG"; +static constexpr char kAwsSecretKeyId[] = "localstackdontcare"; + +/// sha256 hash of an empty string +static constexpr char kEmptySha256[] = + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; + +std::string Bucket() { return absl::GetFlag(FLAGS_aws_bucket); } +std::string Region() { return absl::GetFlag(FLAGS_aws_region); } +std::string Path() { return absl::GetFlag(FLAGS_aws_path); } + +SubprocessOptions SetupLocalstackOptions(int http_port) { + // See https://docs.localstack.cloud/references/configuration/ + // for the allowed environment variables for localstack. + SubprocessOptions options{absl::GetFlag(FLAGS_localstack_binary), + {"start", "--host"}}; + options.env.emplace(GetEnvironmentMap()); + auto& env = *options.env; + env["GATEWAY_LISTEN"] = absl::StrFormat("localhost:%d", http_port); + env["LOCALSTACK_HOST"] = + absl::StrFormat("localhost.localstack.cloud:%d", http_port); + env["SERVICES"] = "s3"; + return options; +} + +SubprocessOptions SetupMotoOptions(int http_port) { + // See https://docs.getmoto.org/en/latest/docs/getting_started.html + // and https://docs.getmoto.org/en/latest/docs/server_mode.html + SubprocessOptions options{absl::GetFlag(FLAGS_localstack_binary), + {absl::StrFormat("-p%d", http_port)}}; + options.env.emplace(GetEnvironmentMap()); + auto& env = *options.env; + ABSL_CHECK(!Region().empty()); + env["AWS_DEFAULT_REGION"] = Region(); + return options; +} + +// NOTE: Support minio as well, which needs temporary directories. +// https://min.io/docs/minio/linux/reference/minio-server/minio-server.html +// minio server --address :12123 /tmp/minio + +class LocalStackProcess { + public: + LocalStackProcess() = default; + ~LocalStackProcess() { StopProcess(); } + + void SpawnProcess() { + if (child_) return; + + const auto start_child = [this] { + http_port = TryPickUnusedPort().value_or(0); + ABSL_CHECK(http_port > 0); + + SubprocessOptions options = // + (absl::GetFlag(FLAGS_binary_mode) == "moto") + ? SetupMotoOptions(http_port) + : SetupLocalstackOptions(http_port); + + ABSL_LOG(INFO) << "Spawning: " << endpoint_url(); + + absl::SleepFor(absl::Milliseconds(10)); + TENSORSTORE_CHECK_OK_AND_ASSIGN(auto spawn_proc, + SpawnSubprocess(options)); + return spawn_proc; + }; + + Subprocess spawn_proc = start_child(); + + // Give the child process several seconds to start. + auto deadline = absl::Now() + absl::Seconds(10); + while (absl::Now() < deadline) { + absl::SleepFor(absl::Milliseconds(250)); + auto join_result = spawn_proc.Join(/*block=*/false); + + if (join_result.ok()) { + // Process has terminated. Restart. + spawn_proc = start_child(); + continue; + } else if (absl::IsUnavailable(join_result.status())) { + // Child is running. + child_.emplace(std::move(spawn_proc)); + return; + } + // TODO: Also check the http port? + } + + // Deadline has expired & there's nothing to show for it. + ABSL_LOG(FATAL) << "Failed to start process"; + } + + void StopProcess() { + if (child_) { + child_->Kill().IgnoreError(); + auto join_result = child_->Join(); + if (!join_result.ok()) { + ABSL_LOG(ERROR) << "Joining storage_testbench subprocess failed: " + << join_result.status(); + } + } + } + + std::string endpoint_url() { + return absl::StrFormat("http://localhost:%d", http_port); + } + + int http_port = 0; + std::optional child_; +}; + + +class LocalStackFixture : public ::testing::Test { + protected: + static std::shared_ptr context; + static LocalStackProcess process; + static std::shared_ptr client; + + static void SetUpTestSuite() { + if (!GetEnv("AWS_ACCESS_KEY_ID") || !GetEnv("AWS_SECRET_KEY_ID")) { + SetEnv("AWS_ACCESS_KEY_ID", kAwsAccessKeyId); + SetEnv("AWS_SECRET_KEY_ID", kAwsSecretKeyId); + } + + context = tensorstore::internal_kvstore_s3::GetAwsContext(); + + ABSL_CHECK(!Bucket().empty()); + + if (absl::GetFlag(FLAGS_localstack_endpoint).empty()) { + ABSL_CHECK(!absl::GetFlag(FLAGS_localstack_binary).empty()); + process.SpawnProcess(); + } + + if (!absl::StrContains(absl::GetFlag(FLAGS_localstack_endpoint), + "amazonaws.com")) { + // Only try to create the bucket when not connecting to aws. + ABSL_CHECK(!Region().empty()); + MaybeCreateBucket(); + } else { + ABSL_LOG(INFO) << "localstack_test connecting to Amazon using bucket:" + << Bucket(); + } + } + + static void TearDownTestSuite() { + client.reset(); + context.reset(); + process.StopProcess(); + } + + static std::string endpoint_url() { + if (absl::GetFlag(FLAGS_localstack_endpoint).empty()) { + return process.endpoint_url(); + } + return absl::GetFlag(FLAGS_localstack_endpoint); + } + + // Attempts to create the kBucket bucket on the localstack host. + static void MaybeCreateBucket() { + // https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/examples-s3-buckets.html + auto cfg = Aws::Client::ClientConfiguration{}; + cfg.endpointOverride = endpoint_url(); + cfg.region = Region(); + client = std::make_shared(cfg); + + auto create_request = Aws::S3::Model::CreateBucketRequest{}; + create_request.SetBucket(Bucket()); + + if (cfg.region != "us-east-1") { + auto bucket_cfg = Aws::S3::Model::CreateBucketConfiguration{}; + bucket_cfg.SetLocationConstraint( + Aws::S3::Model::BucketLocationConstraintMapper::GetBucketLocationConstraintForName( + cfg.region)); + create_request.SetCreateBucketConfiguration(bucket_cfg); + } + + auto outcome = client->CreateBucket(create_request); + if (!outcome.IsSuccess()) { + auto err = outcome.GetError(); + ABSL_LOG(INFO) << "Error: CreateBucket: " << + err.GetExceptionName() << ": " << err.GetMessage(); + } + else { + ABSL_LOG(INFO) << "Created bucket " << Bucket() << + " in AWS Region " << Region(); + } + } +}; + +LocalStackProcess LocalStackFixture::process; +std::shared_ptr LocalStackFixture::client = nullptr; +std::shared_ptr LocalStackFixture::context = nullptr; + +TEST_F(LocalStackFixture, Basic) { + // auto credentials = context->cred_provider_->GetAWSCredentials(); + // ABSL_LOG(INFO) << credentials.GetAWSAccessKeyId() << " " << credentials.GetAWSSecretKey(); + + auto put_request = Aws::S3::Model::PutObjectRequest{}; + put_request.SetBucket(Bucket()); + put_request.SetKey("portunus"); + put_request.SetBody(Aws::MakeShared("AWS", "this is a test")); + auto put_outcome = client->PutObject(put_request); + EXPECT_TRUE(put_outcome.IsSuccess()); + + put_request = Aws::S3::Model::PutObjectRequest{}; + put_request.SetBucket(Bucket()); + put_request.SetKey("portunus0"); + put_request.SetBody(Aws::MakeShared("AWS", "this is a test")); + put_outcome = client->PutObject(put_request); + EXPECT_TRUE(put_outcome.IsSuccess()); + + + auto list_request = Aws::S3::Model::ListObjectsV2Request{}; + list_request.SetBucket(Bucket()); + auto continuation_token = Aws::String{}; + Aws::Vector objects; + + do { + if (!continuation_token.empty()) { + list_request.SetContinuationToken(continuation_token); + } + + auto outcome = client->ListObjectsV2(list_request); + EXPECT_TRUE(outcome.IsSuccess()); + + auto page_objects = outcome.GetResult().GetContents(); + objects.insert(objects.end(), page_objects.begin(), page_objects.end()); + continuation_token = outcome.GetResult().GetNextContinuationToken(); + } while (!continuation_token.empty()); + + ABSL_LOG(INFO) << "# Objects " << objects.size(); + + for (const auto &object: objects) { + ABSL_LOG(INFO) << object.GetKey(); + } + + // auto get_request = Aws::S3::Model::GetObjectRequest{}; + // get_request.SetBucket(Bucket()); + // get_request.SetKey("portunus"); + // auto get_outcome = client->GetObject(get_request); + // EXPECT_TRUE(get_outcome.IsSuccess()); +} + +} // namespace From b843af4576e144a66bc4b7e13ee16c2968501952 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Thu, 9 May 2024 17:13:01 +0200 Subject: [PATCH 25/48] Updates --- tensorstore/kvstore/BUILD | 2 +- .../kvstore/s3/new_s3_request_builder.h | 2 +- .../kvstore/s3/new_s3_request_builder_test.cc | 3 +- tensorstore/kvstore/s3_sdk/localstack_test.cc | 60 +++--- tensorstore/kvstore/s3_sdk/moto_server.py | 19 ++ tensorstore/kvstore/s3_sdk/s3_context.cc | 174 ++++++------------ tensorstore/kvstore/s3_sdk/s3_context_test.cc | 14 +- 7 files changed, 120 insertions(+), 154 deletions(-) create mode 100644 tensorstore/kvstore/s3_sdk/moto_server.py diff --git a/tensorstore/kvstore/BUILD b/tensorstore/kvstore/BUILD index 1bea1b829..1dad26f7a 100644 --- a/tensorstore/kvstore/BUILD +++ b/tensorstore/kvstore/BUILD @@ -13,7 +13,7 @@ DRIVER_DOCS = [ "neuroglancer_uint64_sharded", "ocdbt", "s3", - "s3_sdk", + # "s3_sdk", "tsgrpc", "zarr3_sharding_indexed", "zip", diff --git a/tensorstore/kvstore/s3/new_s3_request_builder.h b/tensorstore/kvstore/s3/new_s3_request_builder.h index 3d1f93939..846dc7e61 100644 --- a/tensorstore/kvstore/s3/new_s3_request_builder.h +++ b/tensorstore/kvstore/s3/new_s3_request_builder.h @@ -98,7 +98,7 @@ class NewS3RequestBuilder { auto delim_pos = header.find(':'); assert(delim_pos != std::string_view::npos); // NOTE: string copies - request_.SetHeaderValue(std::string(header.substr(0, delim_pos)).c_str(), + request_.SetHeaderValue(Aws::String(header.substr(0, delim_pos)), Aws::String(header.substr(delim_pos + 1))); return *this; } diff --git a/tensorstore/kvstore/s3/new_s3_request_builder_test.cc b/tensorstore/kvstore/s3/new_s3_request_builder_test.cc index 1b33de4c2..da5c4c62d 100644 --- a/tensorstore/kvstore/s3/new_s3_request_builder_test.cc +++ b/tensorstore/kvstore/s3/new_s3_request_builder_test.cc @@ -9,13 +9,12 @@ using ::tensorstore::internal_kvstore_s3::NewS3RequestBuilder; namespace { TEST(NewS3RequestBuilderTest, Basic) { + auto ctx = tensorstore::internal_kvstore_s3::GetAwsContext(); auto builder = NewS3RequestBuilder("get", "http://bucket") .AddBody(absl::Cord{"foobar"}) .AddHeader("foo: bar") .AddQueryParameter("qux", "baz"); - auto ctx = tensorstore::internal_kvstore_s3::GetAwsContext(); - auto req = builder.BuildRequest(*ctx); EXPECT_TRUE(builder.request_.HasAuthorization()); diff --git a/tensorstore/kvstore/s3_sdk/localstack_test.cc b/tensorstore/kvstore/s3_sdk/localstack_test.cc index d51d863c3..7db29773b 100644 --- a/tensorstore/kvstore/s3_sdk/localstack_test.cc +++ b/tensorstore/kvstore/s3_sdk/localstack_test.cc @@ -1,4 +1,4 @@ -// Copyright 2023 The TensorStore Authors +// Copyright 2024 The TensorStore Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ #include #include -#include #include #include @@ -24,7 +23,6 @@ #include "absl/log/absl_check.h" #include "absl/log/absl_log.h" #include "absl/status/status.h" -#include "absl/strings/cord.h" #include "absl/strings/match.h" #include "absl/strings/str_format.h" #include "absl/time/clock.h" @@ -49,14 +47,7 @@ #include "tensorstore/internal/http/transport_test_utils.h" #include "tensorstore/internal/json_gtest.h" #include "tensorstore/internal/os/subprocess.h" -#include "tensorstore/json_serialization_options_base.h" -#include "tensorstore/kvstore/batch_util.h" -#include "tensorstore/kvstore/kvstore.h" -#include "tensorstore/kvstore/spec.h" -#include "tensorstore/kvstore/test_util.h" -#include "tensorstore/util/future.h" #include "tensorstore/util/result.h" -#include "tensorstore/util/status_testutil.h" #include "tensorstore/kvstore/s3_sdk/s3_context.h" @@ -92,7 +83,6 @@ ABSL_FLAG(std::string, aws_region, "af-south-1", ABSL_FLAG(std::string, aws_path, "tensorstore/test/", "The S3 path used for the test."); -namespace kvstore = ::tensorstore::kvstore; using ::tensorstore::Context; using ::tensorstore::MatchesJson; @@ -233,8 +223,6 @@ class LocalStackFixture : public ::testing::Test { SetEnv("AWS_SECRET_KEY_ID", kAwsSecretKeyId); } - context = tensorstore::internal_kvstore_s3::GetAwsContext(); - ABSL_CHECK(!Bucket().empty()); if (absl::GetFlag(FLAGS_localstack_endpoint).empty()) { @@ -251,6 +239,14 @@ class LocalStackFixture : public ::testing::Test { ABSL_LOG(INFO) << "localstack_test connecting to Amazon using bucket:" << Bucket(); } + + auto cfg = Aws::Client::ClientConfiguration{}; + cfg.endpointOverride = endpoint_url(); + cfg.region = Region(); + client = std::make_shared( + cfg, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Always, + false); } static void TearDownTestSuite() { @@ -268,11 +264,11 @@ class LocalStackFixture : public ::testing::Test { // Attempts to create the kBucket bucket on the localstack host. static void MaybeCreateBucket() { - // https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/examples-s3-buckets.html auto cfg = Aws::Client::ClientConfiguration{}; cfg.endpointOverride = endpoint_url(); cfg.region = Region(); - client = std::make_shared(cfg); + // Without Anonymous credentials bucket creation fails with 400 IllegalRegionConstraint + auto create_client = std::make_shared(Aws::Auth::AWSCredentials(), cfg); auto create_request = Aws::S3::Model::CreateBucketRequest{}; create_request.SetBucket(Bucket()); @@ -285,7 +281,7 @@ class LocalStackFixture : public ::testing::Test { create_request.SetCreateBucketConfiguration(bucket_cfg); } - auto outcome = client->CreateBucket(create_request); + auto outcome = create_client->CreateBucket(create_request); if (!outcome.IsSuccess()) { auto err = outcome.GetError(); ABSL_LOG(INFO) << "Error: CreateBucket: " << @@ -300,27 +296,28 @@ class LocalStackFixture : public ::testing::Test { LocalStackProcess LocalStackFixture::process; std::shared_ptr LocalStackFixture::client = nullptr; -std::shared_ptr LocalStackFixture::context = nullptr; +std::shared_ptr LocalStackFixture::context = tensorstore::internal_kvstore_s3::GetAwsContext(); TEST_F(LocalStackFixture, Basic) { - // auto credentials = context->cred_provider_->GetAWSCredentials(); - // ABSL_LOG(INFO) << credentials.GetAWSAccessKeyId() << " " << credentials.GetAWSSecretKey(); + std::string payload = "this is a test"; + // Put an object auto put_request = Aws::S3::Model::PutObjectRequest{}; put_request.SetBucket(Bucket()); put_request.SetKey("portunus"); - put_request.SetBody(Aws::MakeShared("AWS", "this is a test")); + put_request.SetBody(Aws::MakeShared("AWS", payload)); auto put_outcome = client->PutObject(put_request); EXPECT_TRUE(put_outcome.IsSuccess()); + // Put the same object with a different key put_request = Aws::S3::Model::PutObjectRequest{}; put_request.SetBucket(Bucket()); put_request.SetKey("portunus0"); - put_request.SetBody(Aws::MakeShared("AWS", "this is a test")); + put_request.SetBody(Aws::MakeShared("AWS", payload)); put_outcome = client->PutObject(put_request); EXPECT_TRUE(put_outcome.IsSuccess()); - + // List the objects auto list_request = Aws::S3::Model::ListObjectsV2Request{}; list_request.SetBucket(Bucket()); auto continuation_token = Aws::String{}; @@ -339,17 +336,22 @@ TEST_F(LocalStackFixture, Basic) { continuation_token = outcome.GetResult().GetNextContinuationToken(); } while (!continuation_token.empty()); - ABSL_LOG(INFO) << "# Objects " << objects.size(); + + EXPECT_EQ(objects.size(), 2); for (const auto &object: objects) { - ABSL_LOG(INFO) << object.GetKey(); + EXPECT_EQ(object.GetSize(), payload.size()); } - // auto get_request = Aws::S3::Model::GetObjectRequest{}; - // get_request.SetBucket(Bucket()); - // get_request.SetKey("portunus"); - // auto get_outcome = client->GetObject(get_request); - // EXPECT_TRUE(get_outcome.IsSuccess()); + // Get the contents of the key + auto get_request = Aws::S3::Model::GetObjectRequest{}; + get_request.SetBucket(Bucket()); + get_request.SetKey("portunus"); + auto get_outcome = client->GetObject(get_request); + EXPECT_TRUE(get_outcome.IsSuccess()); + std::string result; + std::getline(get_outcome.GetResult().GetBody(), result); + EXPECT_EQ(result, payload); } } // namespace diff --git a/tensorstore/kvstore/s3_sdk/moto_server.py b/tensorstore/kvstore/s3_sdk/moto_server.py new file mode 100644 index 000000000..79612a070 --- /dev/null +++ b/tensorstore/kvstore/s3_sdk/moto_server.py @@ -0,0 +1,19 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from moto.server import main + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tensorstore/kvstore/s3_sdk/s3_context.cc b/tensorstore/kvstore/s3_sdk/s3_context.cc index 51a80d374..0b1e0ec27 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context.cc @@ -44,108 +44,70 @@ namespace { static constexpr char kAwsTag[] = "AWS"; +// Context guarded by mutex absl::Mutex context_mu_; -std::weak_ptr context_; +std::weak_ptr context_ ABSL_GUARDED_BY(context_mu_); /// Wraps a tensorstore HttpRequest in a Aws HttpRequest interface -class HttpRequestAdapter : public Aws::Http::HttpRequest { +class HttpRequestWrapper : public Aws::Http::Standard::StandardHttpRequest { public: ::tensorstore::internal_http::HttpRequest request_; - Aws::Http::HeaderValueCollection headers_; - Aws::IOStreamFactory stream_factory_; - std::shared_ptr body_; absl::Cord payload_; - HttpRequestAdapter(const Aws::Http::URI & uri, Aws::Http::HttpMethod method) : - HttpRequest(uri, method), - headers_{}, - stream_factory_{}, - body_(nullptr), + HttpRequestWrapper(const Aws::Http::URI & uri, Aws::Http::HttpMethod method) : + StandardHttpRequest(uri, method), payload_{} { request_.method = Aws::Http::HttpMethodMapper::GetNameForHttpMethod(method); - request_.url = uri.GetURIString(true); + request_.url = uri.GetURIString(true); // include the query string }; - virtual Aws::Http::HeaderValueCollection GetHeaders() const override { - return headers_; - } - - virtual const Aws::String & GetHeaderValue(const char* headerName) const override { - auto it = headers_.find(headerName); - assert(it != headers_.end()); - return it->second; - } - - virtual bool HasHeader(const char* name) const override { - return headers_.find(name) != headers_.end(); - } - virtual void SetHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) override { - // ABSL_LOG(INFO) << "Setting header " << headerName << " " << headerValue; request_.headers.push_back(absl::StrCat(headerName, ": ", headerValue)); - headers_.insert({std::move(headerName), std::move(headerValue)}); + StandardHttpRequest::SetHeaderValue(headerName, headerValue); } virtual void SetHeaderValue(const char* headerName, const Aws::String& headerValue) override { - auto lower_name = Aws::Utils::StringUtils::ToLower(headerName); - auto trimmed_value = Aws::Utils::StringUtils::Trim(headerValue.c_str()); - SetHeaderValue(lower_name, trimmed_value); - } - - virtual void DeleteHeader(const char* headerName) override { - if(auto it = headers_.find(Aws::Utils::StringUtils::ToLower(headerName)); it != headers_.end()) { - headers_.erase(it); - } - } - - virtual int64_t GetSize() const override { - return headers_.size(); + request_.headers.push_back(absl::StrCat(headerName, ": ", headerValue)); + StandardHttpRequest::SetHeaderValue(headerName, headerValue); } virtual void AddContentBody(const std::shared_ptr& strContext) override { - body_ = strContext; - - //ABSL_LOG(INFO) << "AddContentBody " << strContext << " " << body_;; - - if(!body_) { + StandardHttpRequest::AddContentBody(strContext); + if(!strContext) { return; } - const size_t bufferSize = 4096; + // Copy characters off the stream into the Cord + // TODO: This is impractical for large data and + // should be mitigated by an iostream backed by a Cord + + // Remember the current position in the stream + std::streampos original = strContext->tellg(); + const size_t bufferSize = 1024*1024; std::vector buffer(bufferSize); - while (body_->read(buffer.data(), buffer.size()) || body_->gcount() > 0) { - payload_.Append(absl::Cord(absl::string_view(buffer.data(), body_->gcount()))); + while (strContext->read(buffer.data(), buffer.size()) || strContext->gcount() > 0) { + payload_.Append(absl::Cord(absl::string_view(buffer.data(), strContext->gcount()))); } - ABSL_LOG(INFO) << "AddContentBody " << payload_.size(); - } - virtual const std::shared_ptr& GetContentBody() const override { - return body_; - } - - virtual void SetResponseStreamFactory(const Aws::IOStreamFactory& streamFactory) override { - stream_factory_ = streamFactory; - } + strContext->clear(); + strContext->seekg(original); - virtual const Aws::IOStreamFactory& GetResponseStreamFactory() const override { - return stream_factory_; + ABSL_LOG(INFO) << "AddContentBody " << payload_.size(); } }; /// Wraps a tensorstore HttpResponse in an Aws HttpResponse interface -class HttpResponseAdapter: public Aws::Http::HttpResponse { +class HttpResponseWrapper: public Aws::Http::Standard::StandardHttpResponse { public: ::tensorstore::internal_http::HttpResponse response_; - ::Aws::Utils::Stream::ResponseStream body_stream_; - HttpResponseAdapter( + HttpResponseWrapper( ::tensorstore::internal_http::HttpResponse response, const std::shared_ptr & originatingRequest) : - ::Aws::Http::HttpResponse(originatingRequest), - response_(std::move(response)), - body_stream_(originatingRequest->GetResponseStreamFactory()) { + ::Aws::Http::Standard::StandardHttpResponse(originatingRequest), + response_(std::move(response)) { // Cast int response code to an HttpResponseCode enum // Potential for undefined behaviour here, @@ -153,7 +115,8 @@ class HttpResponseAdapter: public Aws::Http::HttpResponse { // a response code it doesn't know about SetResponseCode(static_cast(response_.status_code)); - // Add the payload to the Response Body is present + // TODO + // Add the payload to the Response Body if present // This incurs a copy, which should be avoided by subclassing // Aws::IOStream if(!response_.payload.empty()) { @@ -161,47 +124,25 @@ class HttpResponseAdapter: public Aws::Http::HttpResponse { } }; - virtual Aws::Utils::Stream::ResponseStream && SwapResponseStreamOwnership() override { - return std::move(body_stream_); - } - virtual void AddHeader(const Aws::String& headerName, const Aws::String& headerValue) override { + StandardHttpResponse::AddHeader(headerName, headerValue); response_.headers.insert({headerName, headerValue}); } - - virtual bool HasHeader(const char* headerName) const override { - return response_.headers.find(Aws::Utils::StringUtils::ToLower(headerName)) != response_.headers.end(); - } - - virtual Aws::Http::HeaderValueCollection GetHeaders() const override { - Aws::Http::HeaderValueCollection headers; - for(const auto & header: response_.headers) { - headers.insert({header.first, header.second}); - } - return headers; - } - - virtual const Aws::String & GetHeader(const Aws::String& headerName) const override { - auto it = response_.headers.find(headerName); - assert(it != response_.headers.end()); - return it->second; - } - - virtual Aws::IOStream & GetResponseBody() const override { - return body_stream_.GetUnderlyingStream(); - } }; +/// Provides a custom Aws HttpClient. +/// Overrides the Aws::HttpClient::MakeRequest to accept HttpRequestWrappers +/// (produce by CustomHttpFactory below), issue a tensorstore HttpRequest, +/// receive a tensorstore HttpResponse to be wrapped in a HttpResponseWrapper class CustomHttpClient : public Aws::Http::HttpClient { public: std::shared_ptr MakeRequest( const std::shared_ptr & request, Aws::Utils::RateLimits::RateLimiterInterface* readLimiter = nullptr, Aws::Utils::RateLimits::RateLimiterInterface* writeLimiter = nullptr) const override { - absl::Cord payload; - ABSL_LOG(INFO) << "Making a request "; - if(auto req_adapter = std::dynamic_pointer_cast(request); req_adapter) { + if(auto req_adapter = std::dynamic_pointer_cast(request); req_adapter) { + // Issue the wrapped HttpRequest on a tensorstore executor auto transport = ::tensorstore::internal_http::GetDefaultHttpTransport(); ABSL_LOG(INFO) << req_adapter->request_ << " " << req_adapter->payload_; auto req_options = req_adapter->payload_.empty() ? @@ -209,10 +150,11 @@ class CustomHttpClient : public Aws::Http::HttpClient { IssueRequestOptions(std::move(req_adapter->payload_)); auto future = transport->IssueRequest( req_adapter->request_, std::move(req_options)); - // future.ExecuteWhenReady is desirable + // TODO + // Figure out how to use a continuation future.ExecuteWhenReady here auto response = future.value(); ABSL_LOG(INFO) << response; - return Aws::MakeShared(kAwsTag, response, request); + return Aws::MakeShared(kAwsTag, response, request); } auto fail = Aws::MakeShared(kAwsTag, request); @@ -223,6 +165,9 @@ class CustomHttpClient : public Aws::Http::HttpClient { /// Custom factory overriding Aws::Http::DefaultHttpFatory +/// Generates a CustomHttpClient (which defers to tensorflow's curl library) +/// as well as overriding Createhttp Request to return +/// HttpRequestWrappers class CustomHttpFactory : public Aws::Http::HttpClientFactory { public: std::shared_ptr CreateHttpClient( @@ -241,21 +186,26 @@ class CustomHttpFactory : public Aws::Http::HttpClientFactory { const Aws::Http::URI& uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory& streamFactory) const override { - auto request = Aws::MakeShared(kAwsTag, uri, method); + auto request = Aws::MakeShared(kAwsTag, uri, method); request->SetResponseStreamFactory(streamFactory); return request; } }; + +/// Connect the AWS SDK's logging system to Abseil logging class AWSLogSystem : public Aws::Utils::Logging::LogSystemInterface { public: - AWSLogSystem(Aws::Utils::Logging::LogLevel log_level); - Aws::Utils::Logging::LogLevel GetLogLevel(void) const override; - void SetLogLevel(Aws::Utils::Logging::LogLevel log_level); + AWSLogSystem(Aws::Utils::Logging::LogLevel log_level) : log_level_(log_level) {}; + Aws::Utils::Logging::LogLevel GetLogLevel(void) const override { + return log_level_; + }; // Writes the stream to ProcessFormattedStatement. void LogStream(Aws::Utils::Logging::LogLevel log_level, const char* tag, - const Aws::OStringStream& messageStream) override; + const Aws::OStringStream& messageStream) override { + LogMessage(log_level, messageStream.rdbuf()->str().c_str()); + } // Flushes the buffered messages if the logger supports buffering void Flush() override { return; }; @@ -270,22 +220,6 @@ class AWSLogSystem : public Aws::Utils::Logging::LogSystemInterface { }; -AWSLogSystem::AWSLogSystem(Aws::Utils::Logging::LogLevel log_level) : log_level_(log_level) {}; - -Aws::Utils::Logging::LogLevel AWSLogSystem::GetLogLevel(void) const { - return log_level_; -} - -void AWSLogSystem::SetLogLevel(Aws::Utils::Logging::LogLevel log_level) { - log_level_ = log_level; -} - - // Writes the stream to ProcessFormattedStatement. -void AWSLogSystem::LogStream(Aws::Utils::Logging::LogLevel log_level, const char* tag, - const Aws::OStringStream& messageStream) { - LogMessage(log_level, messageStream.rdbuf()->str().c_str()); -} - void AWSLogSystem::Log(Aws::Utils::Logging::LogLevel log_level, const char* tag, const char* format, ...) { char buffer[256]; @@ -339,8 +273,8 @@ std::shared_ptr GetAwsContext() { options.httpOptions.installSigPipeHandler = false; // Install AWS -> Abseil Logging Translator - auto level = Aws::Utils::Logging::LogLevel::Debug; - //auto level = Aws::Utils::Logging::LogLevel::Info; + //auto level = Aws::Utils::Logging::LogLevel::Debug; + auto level = Aws::Utils::Logging::LogLevel::Info; options.loggingOptions.logLevel = level; options.loggingOptions.logger_create_fn = [level=level]() { return Aws::MakeShared(kAwsTag, level); diff --git a/tensorstore/kvstore/s3_sdk/s3_context_test.cc b/tensorstore/kvstore/s3_sdk/s3_context_test.cc index 9dad786c0..a776aae63 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context_test.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context_test.cc @@ -1,8 +1,20 @@ +// Copyright 2024 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include #include - #include "absl/log/absl_log.h" #include From 65644fe0f78f8284b4dcf1378395486550ec4bc9 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 10 May 2024 13:08:17 +0200 Subject: [PATCH 26/48] Simplify --- tensorstore/kvstore/s3_sdk/s3_context.cc | 234 +++++++++++------------ 1 file changed, 112 insertions(+), 122 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/s3_context.cc b/tensorstore/kvstore/s3_sdk/s3_context.cc index 0b1e0ec27..fdc9afd66 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context.cc @@ -21,11 +21,9 @@ #include #include #include -#include #include #include #include -#include #include "absl/log/absl_log.h" #include "absl/synchronization/mutex.h" @@ -35,6 +33,20 @@ #include "tensorstore/internal/http/http_response.h" #include "tensorstore/internal/http/http_transport.h" +using AwsHttpClient = ::Aws::Http::HttpClient; +using AwsHttpRequest = ::Aws::Http::HttpRequest; +using AwsHttpResponse = ::Aws::Http::HttpResponse; +using AwsStandardHttpRequest = ::Aws::Http::Standard::StandardHttpRequest; +using AwsStandardHttpResponse = ::Aws::Http::Standard::StandardHttpResponse; +using AwsRateLimiterInterface = ::Aws::Utils::RateLimits::RateLimiterInterface; +using AwsLogLevel = ::Aws::Utils::Logging::LogLevel; +using AwsLogSystemInterface = ::Aws::Utils::Logging::LogSystemInterface; + +using ::Aws::Http::HttpMethodMapper::GetNameForHttpMethod; +using ::Aws::Auth::DefaultAWSCredentialsProviderChain; + +using ::tensorstore::internal_http::HttpRequest; +using ::tensorstore::internal_http::HttpResponse; using ::tensorstore::internal_http::IssueRequestOptions; namespace tensorstore { @@ -43,123 +55,100 @@ namespace internal_kvstore_s3 { namespace { static constexpr char kAwsTag[] = "AWS"; +static constexpr char kUserAgentHeader[] = "user-agent"; // Context guarded by mutex absl::Mutex context_mu_; std::weak_ptr context_ ABSL_GUARDED_BY(context_mu_); -/// Wraps a tensorstore HttpRequest in a Aws HttpRequest interface -class HttpRequestWrapper : public Aws::Http::Standard::StandardHttpRequest { +/// Provides a custom Aws HttpClient. +/// Overrides the Aws::HttpClient::MakeRequest to convert AWS HttpRequests +/// into tensorstore HttpRequests which are issued on the tensorstore +/// default HTTP transport. The returned tensorstore HttpResponse is +// converted into an AWS HttpResponse +class CustomHttpClient : public AwsHttpClient { public: - ::tensorstore::internal_http::HttpRequest request_; - absl::Cord payload_; - - HttpRequestWrapper(const Aws::Http::URI & uri, Aws::Http::HttpMethod method) : - StandardHttpRequest(uri, method), - payload_{} { - - request_.method = Aws::Http::HttpMethodMapper::GetNameForHttpMethod(method); - request_.url = uri.GetURIString(true); // include the query string + struct RequestAndPayload { + HttpRequest request; + absl::Cord cord; }; - virtual void SetHeaderValue(const Aws::String& headerName, const Aws::String& headerValue) override { - request_.headers.push_back(absl::StrCat(headerName, ": ", headerValue)); - StandardHttpRequest::SetHeaderValue(headerName, headerValue); - } - - virtual void SetHeaderValue(const char* headerName, const Aws::String& headerValue) override { - request_.headers.push_back(absl::StrCat(headerName, ": ", headerValue)); - StandardHttpRequest::SetHeaderValue(headerName, headerValue); - } - - virtual void AddContentBody(const std::shared_ptr& strContext) override { - StandardHttpRequest::AddContentBody(strContext); - if(!strContext) { - return; - } + // Converts an Aws StandardHttpRequest to a tensorstore HttpRequest + RequestAndPayload FromAwsRequest(const std::shared_ptr & aws_request) const { + absl::Cord payload; // Copy characters off the stream into the Cord // TODO: This is impractical for large data and - // should be mitigated by an iostream backed by a Cord - - // Remember the current position in the stream - std::streampos original = strContext->tellg(); - const size_t bufferSize = 1024*1024; - std::vector buffer(bufferSize); - - while (strContext->read(buffer.data(), buffer.size()) || strContext->gcount() > 0) { - payload_.Append(absl::Cord(absl::string_view(buffer.data(), strContext->gcount()))); + // should be mitigated by an Aws::IOStream backed by a Cord + if(auto body = aws_request->GetContentBody(); body) { + const size_t bufferSize = 1024*1024; + std::vector buffer(bufferSize); + std::streampos original = body->tellg(); + while (body->read(buffer.data(), buffer.size()) || body->gcount() > 0) { + payload.Append(absl::Cord(absl::string_view(buffer.data(), body->gcount()))); + } + // Reset stream + body->clear(); + body->seekg(original); } - strContext->clear(); - strContext->seekg(original); + auto aws_headers = aws_request->GetHeaders(); + auto headers = std::vector{}; + for(auto &[name, value]: aws_headers) { + headers.emplace_back(absl::StrCat(name, ": ", value)); + } + std::string user_agent; + if(auto it = aws_headers.find(kUserAgentHeader); it != aws_headers.end()) { + user_agent = it->second; + } - ABSL_LOG(INFO) << "AddContentBody " << payload_.size(); + return RequestAndPayload{ + HttpRequest{ + GetNameForHttpMethod(aws_request->GetMethod()), + aws_request->GetURIString(true), + std::move(user_agent), + std::move(headers)}, + std::move(payload) + }; } -}; -/// Wraps a tensorstore HttpResponse in an Aws HttpResponse interface -class HttpResponseWrapper: public Aws::Http::Standard::StandardHttpResponse { -public: - ::tensorstore::internal_http::HttpResponse response_; - - HttpResponseWrapper( - ::tensorstore::internal_http::HttpResponse response, - const std::shared_ptr & originatingRequest) : - ::Aws::Http::Standard::StandardHttpResponse(originatingRequest), - response_(std::move(response)) { - - // Cast int response code to an HttpResponseCode enum - // Potential for undefined behaviour here, - // but AWS probably? won't respond with - // a response code it doesn't know about - SetResponseCode(static_cast(response_.status_code)); - - // TODO - // Add the payload to the Response Body if present - // This incurs a copy, which should be avoided by subclassing - // Aws::IOStream - if(!response_.payload.empty()) { - GetResponseBody() << response_.payload; - } - }; - - virtual void AddHeader(const Aws::String& headerName, const Aws::String& headerValue) override { - StandardHttpResponse::AddHeader(headerName, headerValue); - response_.headers.insert({headerName, headerValue}); - } -}; + // Converts a tensorstore response to an Aws StandardHttpResponse + std::shared_ptr ToAwsResponse( + const HttpResponse & ts_response, + const std::shared_ptr & aws_request) const { + auto aws_response = Aws::MakeShared(kAwsTag, aws_request); + aws_response->SetResponseCode(static_cast(ts_response.status_code)); + for(auto &[name, value]: aws_response->GetHeaders()) { + aws_response->AddHeader(name, value); + } + // Copy cord onto the body stream + // TODO: This should be avoided by subclassing Aws::IOStream + // to encapsulate a Cord + if(!ts_response.payload.empty()) { + aws_response->GetResponseBody() << ts_response.payload; + } -/// Provides a custom Aws HttpClient. -/// Overrides the Aws::HttpClient::MakeRequest to accept HttpRequestWrappers -/// (produce by CustomHttpFactory below), issue a tensorstore HttpRequest, -/// receive a tensorstore HttpResponse to be wrapped in a HttpResponseWrapper -class CustomHttpClient : public Aws::Http::HttpClient { -public: - std::shared_ptr MakeRequest( - const std::shared_ptr & request, - Aws::Utils::RateLimits::RateLimiterInterface* readLimiter = nullptr, - Aws::Utils::RateLimits::RateLimiterInterface* writeLimiter = nullptr) const override { - if(auto req_adapter = std::dynamic_pointer_cast(request); req_adapter) { - // Issue the wrapped HttpRequest on a tensorstore executor - auto transport = ::tensorstore::internal_http::GetDefaultHttpTransport(); - ABSL_LOG(INFO) << req_adapter->request_ << " " << req_adapter->payload_; - auto req_options = req_adapter->payload_.empty() ? - IssueRequestOptions{} : - IssueRequestOptions(std::move(req_adapter->payload_)); - auto future = transport->IssueRequest( - req_adapter->request_, std::move(req_options)); - // TODO - // Figure out how to use a continuation future.ExecuteWhenReady here - auto response = future.value(); - ABSL_LOG(INFO) << response; - return Aws::MakeShared(kAwsTag, response, request); - } + return aws_response; + } - auto fail = Aws::MakeShared(kAwsTag, request); - fail->SetResponseCode(Aws::Http::HttpResponseCode::PRECONDITION_FAILED); - return fail; + /// Overrides the SDK mechanism for issuing AWS HttpRequests + /// Converts AWS HttpRequests to their tensorstore requivalent, + /// which is issued on the default tensorstore transport. + /// The tensorstore response is converted into an AWS HttpResponse. + std::shared_ptr MakeRequest( + const std::shared_ptr & request, + AwsRateLimiterInterface* readLimiter = nullptr, + AwsRateLimiterInterface* writeLimiter = nullptr) const override { + // Issue the wrapped HttpRequest on a tensorstore executor + auto transport = ::tensorstore::internal_http::GetDefaultHttpTransport(); + auto [ts_request, payload] = FromAwsRequest(request); + ABSL_LOG(INFO) << ts_request << " " << payload; + auto future = transport->IssueRequest(ts_request, IssueRequestOptions(payload)); + // TODO: if possible use a continuation (future.ExecuteWhenReady) here + auto response = future.value(); + ABSL_LOG(INFO) << response; + return ToAwsResponse(response, request); }; }; @@ -167,12 +156,12 @@ class CustomHttpClient : public Aws::Http::HttpClient { /// Custom factory overriding Aws::Http::DefaultHttpFatory /// Generates a CustomHttpClient (which defers to tensorflow's curl library) /// as well as overriding Createhttp Request to return -/// HttpRequestWrappers +/// Standard Http Requests class CustomHttpFactory : public Aws::Http::HttpClientFactory { public: std::shared_ptr CreateHttpClient( const Aws::Client::ClientConfiguration & clientConfiguration) const override { - ABSL_LOG(INFO) << "Making a custom HTTP Client"; + ABSL_LOG(INFO) << "Constructing custom HTTP Client"; return Aws::MakeShared(kAwsTag); }; @@ -186,7 +175,7 @@ class CustomHttpFactory : public Aws::Http::HttpClientFactory { const Aws::Http::URI& uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory& streamFactory) const override { - auto request = Aws::MakeShared(kAwsTag, uri, method); + auto request = Aws::MakeShared(kAwsTag, uri, method); request->SetResponseStreamFactory(streamFactory); return request; } @@ -194,15 +183,15 @@ class CustomHttpFactory : public Aws::Http::HttpClientFactory { /// Connect the AWS SDK's logging system to Abseil logging -class AWSLogSystem : public Aws::Utils::Logging::LogSystemInterface { +class AWSLogSystem : public AwsLogSystemInterface { public: - AWSLogSystem(Aws::Utils::Logging::LogLevel log_level) : log_level_(log_level) {}; - Aws::Utils::Logging::LogLevel GetLogLevel(void) const override { + AWSLogSystem(AwsLogLevel log_level) : log_level_(log_level) {}; + AwsLogLevel GetLogLevel(void) const override { return log_level_; }; // Writes the stream to ProcessFormattedStatement. - void LogStream(Aws::Utils::Logging::LogLevel log_level, const char* tag, + void LogStream(AwsLogLevel log_level, const char* tag, const Aws::OStringStream& messageStream) override { LogMessage(log_level, messageStream.rdbuf()->str().c_str()); } @@ -211,16 +200,16 @@ class AWSLogSystem : public Aws::Utils::Logging::LogSystemInterface { void Flush() override { return; }; // Overridden, but prefer the safer LogStream - void Log(Aws::Utils::Logging::LogLevel log_level, const char* tag, + void Log(AwsLogLevel log_level, const char* tag, const char* format, ...) override; private: - void LogMessage(Aws::Utils::Logging::LogLevel log_level, const std::string & message); - Aws::Utils::Logging::LogLevel log_level_; + void LogMessage(AwsLogLevel log_level, const std::string & message); + AwsLogLevel log_level_; }; -void AWSLogSystem::Log(Aws::Utils::Logging::LogLevel log_level, const char* tag, +void AWSLogSystem::Log(AwsLogLevel log_level, const char* tag, const char* format, ...) { char buffer[256]; va_list args; @@ -230,22 +219,22 @@ void AWSLogSystem::Log(Aws::Utils::Logging::LogLevel log_level, const char* tag, LogMessage(log_level, buffer); } -void AWSLogSystem::LogMessage(Aws::Utils::Logging::LogLevel log_level, const std::string & message) { +void AWSLogSystem::LogMessage(AwsLogLevel log_level, const std::string & message) { switch(log_level) { - case Aws::Utils::Logging::LogLevel::Info: + case AwsLogLevel::Info: ABSL_LOG(INFO) << message; break; - case Aws::Utils::Logging::LogLevel::Warn: + case AwsLogLevel::Warn: ABSL_LOG(WARNING) << message; break; - case Aws::Utils::Logging::LogLevel::Error: + case AwsLogLevel::Error: ABSL_LOG(ERROR) << message; break; - case Aws::Utils::Logging::LogLevel::Fatal: + case AwsLogLevel::Fatal: ABSL_LOG(FATAL) << message; break; - case Aws::Utils::Logging::LogLevel::Trace: - case Aws::Utils::Logging::LogLevel::Debug: + case AwsLogLevel::Trace: + case AwsLogLevel::Debug: default: ABSL_LOG(INFO) << message; break; @@ -273,8 +262,8 @@ std::shared_ptr GetAwsContext() { options.httpOptions.installSigPipeHandler = false; // Install AWS -> Abseil Logging Translator - //auto level = Aws::Utils::Logging::LogLevel::Debug; - auto level = Aws::Utils::Logging::LogLevel::Info; + //auto level = AwsLogLevel::Debug; + auto level = AwsLogLevel::Info; options.loggingOptions.logLevel = level; options.loggingOptions.logger_create_fn = [level=level]() { return Aws::MakeShared(kAwsTag, level); @@ -282,9 +271,9 @@ std::shared_ptr GetAwsContext() { ABSL_LOG(INFO) << "Initialising AWS SDK API"; Aws::InitAPI(options); - ABSL_LOG(INFO) << "Done Initialising AWS SDK API"; + ABSL_LOG(INFO) << "Done initialising AWS SDK API"; - auto provider = Aws::MakeShared(kAwsTag); + auto provider = Aws::MakeShared(kAwsTag); auto ctx = std::shared_ptr( new AwsContext{ @@ -294,6 +283,7 @@ std::shared_ptr GetAwsContext() { absl::MutexLock lock(&context_mu_); ABSL_LOG(INFO) << "Shutting down AWS SDK API"; Aws::ShutdownAPI(ctx->options); + ABSL_LOG(INFO) << "Done shutting down AWS SDK API"; delete ctx; }); context_ = ctx; From 83893e9510ecc744d780c9163e393a3df3539627 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 10 May 2024 18:01:33 +0200 Subject: [PATCH 27/48] Add Async S3Client test cases --- tensorstore/kvstore/s3_sdk/localstack_test.cc | 105 ++++++++++++++++-- 1 file changed, 97 insertions(+), 8 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/localstack_test.cc b/tensorstore/kvstore/s3_sdk/localstack_test.cc index 7db29773b..77c3634b5 100644 --- a/tensorstore/kvstore/s3_sdk/localstack_test.cc +++ b/tensorstore/kvstore/s3_sdk/localstack_test.cc @@ -22,6 +22,7 @@ #include "absl/flags/flag.h" #include "absl/log/absl_check.h" #include "absl/log/absl_log.h" +#include "absl/synchronization/notification.h" #include "absl/status/status.h" #include "absl/strings/match.h" #include "absl/strings/str_format.h" @@ -29,6 +30,7 @@ #include "absl/time/time.h" #include +#include #include #include #include @@ -47,8 +49,11 @@ #include "tensorstore/internal/http/transport_test_utils.h" #include "tensorstore/internal/json_gtest.h" #include "tensorstore/internal/os/subprocess.h" +#include "tensorstore/internal/thread/thread_pool.h" +#include "tensorstore/util/executor.h" #include "tensorstore/util/result.h" + #include "tensorstore/kvstore/s3_sdk/s3_context.h" // When provided with --localstack_binary, localstack_test will start @@ -101,6 +106,7 @@ using ::tensorstore::internal_kvstore_s3::AwsContext; namespace { +static constexpr char kAwsTag[] = "AWS"; static constexpr char kAwsAccessKeyId[] = "LSIAQAAAAAAVNCBMPNSG"; static constexpr char kAwsSecretKeyId[] = "localstackdontcare"; @@ -240,11 +246,30 @@ class LocalStackFixture : public ::testing::Test { << Bucket(); } - auto cfg = Aws::Client::ClientConfiguration{}; - cfg.endpointOverride = endpoint_url(); - cfg.region = Region(); + CreateClient(); + } + + // Create client for use by test cases + static void CreateClient() { + // Offload AWS Client tasks onto a Tensorstore executor + class TensorStoreExecutor : public Aws::Utils::Threading::Executor { + public: + TensorStoreExecutor(): executor_(::tensorstore::internal::DetachedThreadPool(4)) {} + protected: + bool SubmitToThread(std::function && fn) override { + ::tensorstore::WithExecutor(executor_, std::move(fn))(); + return true; + } + private: + ::tensorstore::Executor executor_; + }; + + auto config = Aws::Client::ClientConfiguration{}; + config.endpointOverride = endpoint_url(); + config.region = Region(); + config.executor = Aws::MakeShared(kAwsTag); client = std::make_shared( - cfg, + config, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Always, false); } @@ -264,10 +289,11 @@ class LocalStackFixture : public ::testing::Test { // Attempts to create the kBucket bucket on the localstack host. static void MaybeCreateBucket() { + // Create a separate client for creating the bucket + // Without anonymous credentials bucket creation fails with 400 IllegalRegionConstraint auto cfg = Aws::Client::ClientConfiguration{}; cfg.endpointOverride = endpoint_url(); cfg.region = Region(); - // Without Anonymous credentials bucket creation fails with 400 IllegalRegionConstraint auto create_client = std::make_shared(Aws::Auth::AWSCredentials(), cfg); auto create_request = Aws::S3::Model::CreateBucketRequest{}; @@ -298,14 +324,14 @@ LocalStackProcess LocalStackFixture::process; std::shared_ptr LocalStackFixture::client = nullptr; std::shared_ptr LocalStackFixture::context = tensorstore::internal_kvstore_s3::GetAwsContext(); -TEST_F(LocalStackFixture, Basic) { +TEST_F(LocalStackFixture, BasicSync) { std::string payload = "this is a test"; // Put an object auto put_request = Aws::S3::Model::PutObjectRequest{}; put_request.SetBucket(Bucket()); put_request.SetKey("portunus"); - put_request.SetBody(Aws::MakeShared("AWS", payload)); + put_request.SetBody(Aws::MakeShared(kAwsTag, payload)); auto put_outcome = client->PutObject(put_request); EXPECT_TRUE(put_outcome.IsSuccess()); @@ -313,13 +339,14 @@ TEST_F(LocalStackFixture, Basic) { put_request = Aws::S3::Model::PutObjectRequest{}; put_request.SetBucket(Bucket()); put_request.SetKey("portunus0"); - put_request.SetBody(Aws::MakeShared("AWS", payload)); + put_request.SetBody(Aws::MakeShared(kAwsTag, payload)); put_outcome = client->PutObject(put_request); EXPECT_TRUE(put_outcome.IsSuccess()); // List the objects auto list_request = Aws::S3::Model::ListObjectsV2Request{}; list_request.SetBucket(Bucket()); + list_request.SetMaxKeys(1); auto continuation_token = Aws::String{}; Aws::Vector objects; @@ -354,4 +381,66 @@ TEST_F(LocalStackFixture, Basic) { EXPECT_EQ(result, payload); } +TEST_F(LocalStackFixture, BasicAsync) { + struct TestCallbacks { + // Data relevant to GET and PUT + std::string key; + std::string payload; + + // Results and notifications + bool put_succeeded = false; + std::optional get_result; + absl::Notification done; + + void do_put() { + auto put_request = Aws::S3::Model::PutObjectRequest{}; + put_request.SetBucket(Bucket()); + put_request.SetKey(key); + put_request.SetBody(Aws::MakeShared(kAwsTag, payload)); + client->PutObjectAsync(put_request, [this]( + const auto *, const auto &, const auto & outcome, const auto &) { + this->on_put(outcome); + }); + } + + void on_put(const Aws::S3::Model::PutObjectOutcome & outcome) { + if(outcome.IsSuccess()) { + put_succeeded = true; + do_get(); + } else { + done.Notify(); + } + } + + void do_get() { + auto get_request = Aws::S3::Model::GetObjectRequest{}; + get_request.SetBucket(Bucket()); + get_request.SetKey(key); + client->GetObjectAsync(get_request, [this]( + const auto *, const auto &, auto outcome, const auto &) { + this->on_get(std::move(outcome)); + }); + } + + void on_get(Aws::S3::Model::GetObjectOutcome outcome) { + if(outcome.IsSuccess()) { + std::string buffer; + std::getline(outcome.GetResult().GetBody(), buffer); + get_result = buffer; + } + + done.Notify(); + } + }; + + auto callbacks = TestCallbacks{"key", "value"}; + callbacks.do_put(); + EXPECT_TRUE(callbacks.done.WaitForNotificationWithTimeout(absl::Milliseconds(10))); + EXPECT_TRUE(callbacks.put_succeeded); + EXPECT_TRUE(callbacks.get_result.has_value()); + EXPECT_EQ(callbacks.get_result.value(), callbacks.payload); +} + + + } // namespace From f94f55df53e81908805b22b4d0687c322a7811c7 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Mon, 27 May 2024 15:20:23 +0200 Subject: [PATCH 28/48] Reomve unnecessary Cord construction --- tensorstore/kvstore/s3_sdk/s3_context.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorstore/kvstore/s3_sdk/s3_context.cc b/tensorstore/kvstore/s3_sdk/s3_context.cc index fdc9afd66..ec62b61f5 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context.cc @@ -85,7 +85,7 @@ class CustomHttpClient : public AwsHttpClient { std::vector buffer(bufferSize); std::streampos original = body->tellg(); while (body->read(buffer.data(), buffer.size()) || body->gcount() > 0) { - payload.Append(absl::Cord(absl::string_view(buffer.data(), body->gcount()))); + payload.Append(absl::string_view(buffer.data(), body->gcount())); } // Reset stream body->clear(); From c122d4a5211b2e56ba182a7ebe274ac9414695b7 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Mon, 27 May 2024 15:20:50 +0200 Subject: [PATCH 29/48] Fix closing namespace typo --- tensorstore/kvstore/s3_sdk/s3_context.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorstore/kvstore/s3_sdk/s3_context.cc b/tensorstore/kvstore/s3_sdk/s3_context.cc index ec62b61f5..f5dbf2b5a 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context.cc @@ -291,4 +291,4 @@ std::shared_ptr GetAwsContext() { } } // namespace internal_kvstore_s3 -} // neamespace tensorstore +} // namespace tensorstore From 47def678e8640b810a2492315ec4fb8075106175 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Thu, 30 May 2024 16:40:03 +0200 Subject: [PATCH 30/48] Initial CordStreamBuf implementation --- tensorstore/kvstore/s3_sdk/BUILD | 24 +++ tensorstore/kvstore/s3_sdk/cord_streambuf.cc | 155 ++++++++++++++++++ tensorstore/kvstore/s3_sdk/cord_streambuf.h | 77 +++++++++ .../kvstore/s3_sdk/cord_streambuf_test.cc | 96 +++++++++++ 4 files changed, 352 insertions(+) create mode 100644 tensorstore/kvstore/s3_sdk/cord_streambuf.cc create mode 100644 tensorstore/kvstore/s3_sdk/cord_streambuf.h create mode 100644 tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc diff --git a/tensorstore/kvstore/s3_sdk/BUILD b/tensorstore/kvstore/s3_sdk/BUILD index 96ae924a1..03807f1a8 100644 --- a/tensorstore/kvstore/s3_sdk/BUILD +++ b/tensorstore/kvstore/s3_sdk/BUILD @@ -12,11 +12,24 @@ filegroup( "*.yml", ]), ) + + +tensorstore_cc_library( + name = "cord_streambuf", + srcs = ["cord_streambuf.cc"], + hdrs = ["cord_streambuf.h"], + deps = [ + "@com_google_absl//absl/strings:cord", + ] +) + + tensorstore_cc_library( name = "s3_context", srcs = ["s3_context.cc"], hdrs = ["s3_context.h"], deps = [ + ":cord_streambuf", "//tensorstore/util:executor", "//tensorstore/internal/http", "//tensorstore/internal/http:curl_transport", @@ -38,6 +51,17 @@ tensorstore_cc_test( ] ) +tensorstore_cc_test( + name = "cord_streambuf_test", + size = "small", + srcs = ["cord_streambuf_test.cc"], + deps = [ + ":cord_streambuf", + "@com_google_googletest//:gtest_main", + "@com_github_aws_cpp_sdk//:core", + ] +) + py_binary( name = "moto_server", testonly = 1, diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf.cc new file mode 100644 index 000000000..5ab11dbdc --- /dev/null +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf.cc @@ -0,0 +1,155 @@ +// Copyright 2024 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/kvstore/s3_sdk/cord_streambuf.h" + +#include +#include + +#include "absl/strings/cord.h" + +using absl::Cord; +using absl::CordBuffer; +using std::streamsize; +using std::streampos; +using std::streamoff; +using std::ios_base; + +namespace tensorstore { +namespace internal_kvstore_s3 { + +CordStreamBuf::CordStreamBuf() : CordStreamBuf(Cord()) {} + +CordStreamBuf::CordStreamBuf(Cord && cord) : + mode_(cord.size() == 0 ? ios_base::out : ios_base::in), + cord_(std::move(cord)), + get_iterator_(cord_.Chars().begin()) { + + // Set up the get area, if the Cord has data + if(get_iterator_ != cord_.Chars().end()) { + auto chunk = Cord::ChunkRemaining(get_iterator_); + char * data = const_cast(chunk.data()); + setg(data, data, data + chunk.size()); + } +} + +Cord CordStreamBuf::GetCord() { + Cord result; + std::swap(result, cord_); + get_iterator_ = cord_.Chars().begin(); + char dummy; + setg(&dummy, &dummy, &dummy + 1); + setp(&dummy, &dummy + 1); + return result; +} + +// Bulk put operation +streamsize CordStreamBuf::xsputn(const char * s, streamsize count) { + if(!(mode_ & ios_base::out)) return 0; + streamsize n = count; + bool first = true; + streamsize p = 0; + + while (n > 0) { + CordBuffer buffer = first ? cord_.GetAppendBuffer(n) + : CordBuffer::CreateWithDefaultLimit(n); + + auto span = buffer.available_up_to(n); + for(int i = 0; i < span.size(); ++i, ++p) span[i] = s[p]; + buffer.IncreaseLengthBy(span.size()); + cord_.Append(std::move(buffer)); + n -= span.size(); + first = false; + } + + return p; +} + +// Handle buffer overflow. +CordStreamBuf::int_type CordStreamBuf::overflow(int_type ch) { + // Not writing or eof received + if(!(mode_ & ios_base::out)) return traits_type::eof(); + if(traits_type::eq_int_type(ch, traits_type::eof())) return traits_type::eof(); + auto c = traits_type::to_char_type(ch); + cord_.Append(absl::string_view(&c, 1)); + return ch; +} + +// Bulk get operation +streamsize CordStreamBuf::xsgetn(char * s, streamsize count) { + // Not reading or no more Cord data + if(!(mode_ & ios_base::in)) return 0; + if(get_iterator_ == cord_.Chars().end()) return 0; + auto chunk = cord_.ChunkRemaining(get_iterator_); + auto bytes_to_read = std::min(chunk.size(), count); + for(streamsize i = 0; i < bytes_to_read; ++i) s[i] = chunk[i]; + Cord::Advance(&get_iterator_, bytes_to_read); + return bytes_to_read; +} + +// Handle buffer underflow. +CordStreamBuf::int_type CordStreamBuf::underflow() { + // Not reading or no more Cord data + if(!(mode_ & ios_base::in) || get_iterator_ == cord_.Chars().end()) return traits_type::eof(); + Cord::Advance(&get_iterator_, cord_.ChunkRemaining(get_iterator_).size()); + if(get_iterator_ == cord_.Chars().end()) return traits_type::eof(); + auto chunk = cord_.ChunkRemaining(get_iterator_); + char * data = const_cast(chunk.data()); + setg(data, data, data + chunk.size()); + return traits_type::to_int_type(*data); +} + +streampos CordStreamBuf::seekoff(streamoff off, ios_base::seekdir way, ios_base::openmode which) { + if (which == ios_base::in) { + if (way == ios_base::beg) { + // Seek from the beginning of the cord + if(off >= cord_.size()) return traits_type::eof(); + get_iterator_ = cord_.Chars().begin(); + Cord::Advance(&get_iterator_, off); + auto chunk = cord_.ChunkRemaining(get_iterator_); + char * data = const_cast(chunk.data()); + setg(data, data, data + chunk.size()); + } else if (way == ios_base::cur) { + // Seek within the current cord chunk if possible, + // otherwise advance to the next chunk + if(get_iterator_ == cord_.Chars().end()) return traits_type::eof(); + streampos available = egptr() - gptr(); + if(off < available) { + Cord::Advance(&get_iterator_, off); + setg(eback(), gptr() + off, egptr()); + } else { + Cord::Advance(&get_iterator_, cord_.ChunkRemaining(get_iterator_).size()); + if(get_iterator_ == cord_.Chars().end()) return traits_type::eof(); + auto chunk = cord_.ChunkRemaining(get_iterator_); + char * data = const_cast(chunk.data()); + setg(data, data, data + chunk.size()); + } + } else if (way == ios_base::end) { + // Seeks past the stream end are unsupported + return traits_type::eof();; + } + + return std::distance(cord_.Chars().begin(), get_iterator_); + } else if (which == ios_base::out) { + // Only support appends + return traits_type::eof();; + } + return traits_type::eof();; +} + + + + +} // namespace internal_kvstore_s3 +} // namespace tensorstore diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf.h b/tensorstore/kvstore/s3_sdk/cord_streambuf.h new file mode 100644 index 000000000..b9d18ee1d --- /dev/null +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf.h @@ -0,0 +1,77 @@ +// Copyright 2024 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORSTORE_KVSTORE_S3_STREAMBUF_H_ +#define TENSORSTORE_KVSTORE_S3_STREAMBUF_H_ + +#include +#include + +#include "absl/strings/cord.h" + +namespace tensorstore { +namespace internal_kvstore_s3 { + +/// Basic implementation of a std::basic_streambuf +/// backed by an abseil Cord. +/// It should be used in two modes +/// (1) Append-only writing mode, where data is appended to the underlying Cord +/// (2) Read mode, where data is read from the Cord. Seeking is supported +/// within the Stream Buffer. +class CordStreamBuf : public std::basic_streambuf { +public: + // Creates a stream buffer for writing + CordStreamBuf(); + // Creates a stream buffer for reading from the supplied Cord + CordStreamBuf(absl::Cord && cord); + + // Obtain read access to the backing Cord + const absl::Cord & GetCord() const { return cord_; } + + // Returns the underlying Cord, resetting the underlying stream + absl::Cord GetCord(); + +protected: + // Bulk put operation + virtual std::streamsize xsputn(const char * s, std::streamsize count) override; + // Bulk get operation + virtual std::streamsize xsgetn(char * s, std::streamsize count) override; + // Handle buffer overflow. + virtual int_type overflow(int_type ch) override; + // Handle buffer underflow. + virtual int_type underflow() override; + + // Seek within the underlying Cord (only seeks in the get area are supported) + virtual std::streampos seekoff( + std::streamoff off, + std::ios_base::seekdir way, + std::ios_base::openmode which = std::ios_base::in | std::ios_base::out) override; + + // Seek within the underlying Cord (only seeks in the get area are supported) + virtual std::streampos seekpos( + std::streampos sp, + std::ios_base::openmode which = std::ios_base::in | std::ios_base::out) override { + return seekoff(sp - std::streampos(0), std::ios_base::beg, which); + } + +private: + std::ios_base::openmode mode_; + absl::Cord cord_; + absl::Cord::CharIterator get_iterator_; +}; + +} // namespace internal_kvstore_s3 +} // namespace tensorstore + +#endif // TENSORSTORE_KVSTORE_S3_STREAMBUF_H_ \ No newline at end of file diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc new file mode 100644 index 000000000..d8ceadd8a --- /dev/null +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc @@ -0,0 +1,96 @@ +// Copyright 2024 The TensorStore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorstore/kvstore/s3_sdk/cord_streambuf.h" + +#include +#include +#include + +#include "absl/strings/cord.h" + +#include +#include + +#include + +using ::absl::Cord; +using ::absl::CordBuffer; +using ::Aws::Utils::Stream::DefaultUnderlyingStream; +using ::Aws::MakeUnique; +using ::tensorstore::internal_kvstore_s3::CordStreamBuf; + +namespace { + +static constexpr char kAwsTag[] = "AWS"; + +CordStreamBuf & GetStreamBuf(DefaultUnderlyingStream & stream) { + return *dynamic_cast(stream.rdbuf()); +} + +// TEST(CordStreamBufTest, CordAdvanceTest) { +// auto cord = Cord{"This is a test"}; +// auto it = cord.Chars().end(); +// auto chunk = cord.ChunkRemaining(it); +// EXPECT_EQ(chunk.size(), 0); +// Cord::Advance(&it, cord.ChunkRemaining(it).size()); +// EXPECT_EQ(it, cord.Chars().end()); +// } + +TEST(CordStreamBufTest, Basic) { + auto os = DefaultUnderlyingStream(MakeUnique(kAwsTag)); + os << "Hello World"; + os << " "; + os << "This is a test"; + EXPECT_TRUE(os.good()); + // EXPECT_EQ(os.tellp(), 10); + auto cord = GetStreamBuf(os).GetCord(); + EXPECT_EQ(cord, "Hello World This is a test"); + + // Single Cord chunk + auto it = cord.chunk_begin(); + EXPECT_EQ(*it, "Hello World This is a test"); + EXPECT_EQ(++it, cord.chunk_end()); + + auto is = DefaultUnderlyingStream(MakeUnique(kAwsTag, std::move(cord))); + std::istreambuf_iterator in_it{is}, end; + std::string s{in_it, end}; + EXPECT_EQ(s, "Hello World This is a test"); + EXPECT_TRUE(is.good()); +} + +TEST(CordSreamBufTest, GetSeek) { + absl::Cord cord; + int kNBuffers = 3; + for(char ch = 0; ch < kNBuffers; ++ch) { + cord.Append(std::string(CordBuffer::kDefaultLimit, '1' + ch)); + } + EXPECT_EQ(std::distance(cord.Chunks().begin(), cord.Chunks().end()), 3); + + auto is = DefaultUnderlyingStream( + MakeUnique(kAwsTag, std::move(cord))); + + for(char ch = 0; ch < kNBuffers; ++ch) { + is.seekg(5 + CordBuffer::kDefaultLimit * ch); + EXPECT_EQ(is.tellg(), 5 + CordBuffer::kDefaultLimit * ch); + char result[6] = {0x00}; + is.read(result, sizeof(result)); + auto expected = std::string(sizeof(result), '1' + ch); + EXPECT_EQ(std::string_view(result, sizeof(result)), expected); + EXPECT_TRUE(is.good()); + EXPECT_EQ(is.tellg(), 5 + CordBuffer::kDefaultLimit * ch + sizeof(result)); + } +} + +} // namespace \ No newline at end of file From ce7bdf9fdf268c719986ce5c4f07a12f14190024 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Thu, 30 May 2024 20:18:01 +0200 Subject: [PATCH 31/48] Fix indenting --- tensorstore/kvstore/s3_sdk/localstack_test.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/localstack_test.cc b/tensorstore/kvstore/s3_sdk/localstack_test.cc index 77c3634b5..fb94b4af5 100644 --- a/tensorstore/kvstore/s3_sdk/localstack_test.cc +++ b/tensorstore/kvstore/s3_sdk/localstack_test.cc @@ -351,23 +351,23 @@ TEST_F(LocalStackFixture, BasicSync) { Aws::Vector objects; do { - if (!continuation_token.empty()) { - list_request.SetContinuationToken(continuation_token); - } + if (!continuation_token.empty()) { + list_request.SetContinuationToken(continuation_token); + } - auto outcome = client->ListObjectsV2(list_request); - EXPECT_TRUE(outcome.IsSuccess()); + auto outcome = client->ListObjectsV2(list_request); + EXPECT_TRUE(outcome.IsSuccess()); - auto page_objects = outcome.GetResult().GetContents(); - objects.insert(objects.end(), page_objects.begin(), page_objects.end()); - continuation_token = outcome.GetResult().GetNextContinuationToken(); + auto page_objects = outcome.GetResult().GetContents(); + objects.insert(objects.end(), page_objects.begin(), page_objects.end()); + continuation_token = outcome.GetResult().GetNextContinuationToken(); } while (!continuation_token.empty()); EXPECT_EQ(objects.size(), 2); for (const auto &object: objects) { - EXPECT_EQ(object.GetSize(), payload.size()); + EXPECT_EQ(object.GetSize(), payload.size()); } // Get the contents of the key From 6eb3c89c6806d8b559be7a3575d08367ef8db35e Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Thu, 30 May 2024 20:20:29 +0200 Subject: [PATCH 32/48] comment --- tensorstore/kvstore/s3/new_s3_request_builder.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorstore/kvstore/s3/new_s3_request_builder.h b/tensorstore/kvstore/s3/new_s3_request_builder.h index 846dc7e61..9e4e28150 100644 --- a/tensorstore/kvstore/s3/new_s3_request_builder.h +++ b/tensorstore/kvstore/s3/new_s3_request_builder.h @@ -82,6 +82,8 @@ class AwsHttpRequestAdapter : public Aws::Http::Standard::StandardHttpRequest { FromStringMethod(method)) {} }; +/// Similar interface to S3RequestBuilder, +/// but builds an AwsHttpRequestAdapter internally class NewS3RequestBuilder { public: NewS3RequestBuilder(std::string_view method, std::string endpoint_url) : From f885bccb38aa4aca213707b83884a78f14a5bb19 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 31 May 2024 09:31:17 +0200 Subject: [PATCH 33/48] Header hygiene --- tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc index d8ceadd8a..5d74f4270 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc @@ -15,7 +15,6 @@ #include "tensorstore/kvstore/s3_sdk/cord_streambuf.h" #include -#include #include #include "absl/strings/cord.h" From 9edf7d7cef2c9470c45b7b0cfb20bf0835a08707 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 31 May 2024 09:31:31 +0200 Subject: [PATCH 34/48] Remove commented out test case --- tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc index 5d74f4270..1e5fdb15b 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc @@ -38,15 +38,6 @@ CordStreamBuf & GetStreamBuf(DefaultUnderlyingStream & stream) { return *dynamic_cast(stream.rdbuf()); } -// TEST(CordStreamBufTest, CordAdvanceTest) { -// auto cord = Cord{"This is a test"}; -// auto it = cord.Chars().end(); -// auto chunk = cord.ChunkRemaining(it); -// EXPECT_EQ(chunk.size(), 0); -// Cord::Advance(&it, cord.ChunkRemaining(it).size()); -// EXPECT_EQ(it, cord.Chars().end()); -// } - TEST(CordStreamBufTest, Basic) { auto os = DefaultUnderlyingStream(MakeUnique(kAwsTag)); os << "Hello World"; From d2d7f87165fbd9202a8d2a8663f8ada790659e1c Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 31 May 2024 11:06:26 +0200 Subject: [PATCH 35/48] Improve seek from current position case --- tensorstore/kvstore/s3_sdk/cord_streambuf.cc | 21 ++++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf.cc index 5ab11dbdc..72c73aa31 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf.cc +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf.cc @@ -121,25 +121,24 @@ streampos CordStreamBuf::seekoff(streamoff off, ios_base::seekdir way, ios_base: char * data = const_cast(chunk.data()); setg(data, data, data + chunk.size()); } else if (way == ios_base::cur) { - // Seek within the current cord chunk if possible, - // otherwise advance to the next chunk if(get_iterator_ == cord_.Chars().end()) return traits_type::eof(); - streampos available = egptr() - gptr(); - if(off < available) { - Cord::Advance(&get_iterator_, off); - setg(eback(), gptr() + off, egptr()); - } else { - Cord::Advance(&get_iterator_, cord_.ChunkRemaining(get_iterator_).size()); - if(get_iterator_ == cord_.Chars().end()) return traits_type::eof(); + // Advance to next chunk if there isn't space in the current + while(egptr() - gptr() <= off) { auto chunk = cord_.ChunkRemaining(get_iterator_); + Cord::Advance(&get_iterator_, chunk.size()); + if(get_iterator_ == cord_.Chars().end()) return traits_type::eof(); char * data = const_cast(chunk.data()); setg(data, data, data + chunk.size()); + off -= chunk.size(); + } + if(off > 0) { + Cord::Advance(&get_iterator_, off); + setg(eback(), gptr() + off, egptr()); } } else if (way == ios_base::end) { // Seeks past the stream end are unsupported - return traits_type::eof();; + return traits_type::eof(); } - return std::distance(cord_.Chars().begin(), get_iterator_); } else if (which == ios_base::out) { // Only support appends From 171393c5af907c632b3158e68a69dd391d0f4ac0 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 31 May 2024 11:13:13 +0200 Subject: [PATCH 36/48] Sharpen test cases --- .../kvstore/s3_sdk/cord_streambuf_test.cc | 50 +++++++++++++++---- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc index 1e5fdb15b..50ca85440 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc @@ -33,18 +33,33 @@ using ::tensorstore::internal_kvstore_s3::CordStreamBuf; namespace { static constexpr char kAwsTag[] = "AWS"; +static constexpr int kNBuffers = 3; CordStreamBuf & GetStreamBuf(DefaultUnderlyingStream & stream) { return *dynamic_cast(stream.rdbuf()); } -TEST(CordStreamBufTest, Basic) { +TEST(CordStreamBufTest, Read) { + auto cord = absl::Cord{"Hello World This is a test"}; + auto is = DefaultUnderlyingStream(MakeUnique(kAwsTag, std::move(cord))); + std::istreambuf_iterator in_it{is}, end; + std::string s{in_it, end}; + EXPECT_EQ(s, "Hello World This is a test"); + EXPECT_TRUE(is.good()); + + // eof triggered. + char ch; + EXPECT_FALSE(is.get(ch)); + EXPECT_FALSE(is.good()); +} + + +TEST(CordStreamBufTest, Write) { auto os = DefaultUnderlyingStream(MakeUnique(kAwsTag)); os << "Hello World"; os << " "; os << "This is a test"; EXPECT_TRUE(os.good()); - // EXPECT_EQ(os.tellp(), 10); auto cord = GetStreamBuf(os).GetCord(); EXPECT_EQ(cord, "Hello World This is a test"); @@ -52,17 +67,12 @@ TEST(CordStreamBufTest, Basic) { auto it = cord.chunk_begin(); EXPECT_EQ(*it, "Hello World This is a test"); EXPECT_EQ(++it, cord.chunk_end()); - - auto is = DefaultUnderlyingStream(MakeUnique(kAwsTag, std::move(cord))); - std::istreambuf_iterator in_it{is}, end; - std::string s{in_it, end}; - EXPECT_EQ(s, "Hello World This is a test"); - EXPECT_TRUE(is.good()); } -TEST(CordSreamBufTest, GetSeek) { + +/// Test seeking within the CordStreamBuf +TEST(CordSreamBufTest, ReadSeek) { absl::Cord cord; - int kNBuffers = 3; for(char ch = 0; ch < kNBuffers; ++ch) { cord.Append(std::string(CordBuffer::kDefaultLimit, '1' + ch)); } @@ -83,4 +93,24 @@ TEST(CordSreamBufTest, GetSeek) { } } +/// Test that reading the CordStreamBuf reads the Cord +TEST(CordStreamBufTest, GetEntireStreamBuf) { + absl::Cord cord; + for(char ch = 0; ch < kNBuffers; ++ch) { + cord.Append(std::string(CordBuffer::kDefaultLimit, '1' + ch)); + } + EXPECT_EQ(std::distance(cord.Chunks().begin(), cord.Chunks().end()), 3); + + auto is = DefaultUnderlyingStream( + MakeUnique(kAwsTag, std::move(cord))); + + int count = 0; + char ch; + while(is.get(ch)) { + EXPECT_EQ(ch, '1' + count / CordBuffer::kDefaultLimit); + ++count; + } + EXPECT_EQ(count, CordBuffer::kDefaultLimit * kNBuffers); +} + } // namespace \ No newline at end of file From 430f9470c10951f87e9b21c68839d14a1a2c8495 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Sun, 2 Jun 2024 11:41:01 +0200 Subject: [PATCH 37/48] seekoff improvements --- tensorstore/kvstore/s3_sdk/cord_streambuf.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf.cc index 72c73aa31..b4e9903eb 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf.cc +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf.cc @@ -120,6 +120,7 @@ streampos CordStreamBuf::seekoff(streamoff off, ios_base::seekdir way, ios_base: auto chunk = cord_.ChunkRemaining(get_iterator_); char * data = const_cast(chunk.data()); setg(data, data, data + chunk.size()); + return off; } else if (way == ios_base::cur) { if(get_iterator_ == cord_.Chars().end()) return traits_type::eof(); // Advance to next chunk if there isn't space in the current @@ -131,20 +132,21 @@ streampos CordStreamBuf::seekoff(streamoff off, ios_base::seekdir way, ios_base: setg(data, data, data + chunk.size()); off -= chunk.size(); } + // Advance within the chunk if(off > 0) { Cord::Advance(&get_iterator_, off); setg(eback(), gptr() + off, egptr()); } + return std::distance(cord_.Chars().begin(), get_iterator_); } else if (way == ios_base::end) { // Seeks past the stream end are unsupported return traits_type::eof(); } - return std::distance(cord_.Chars().begin(), get_iterator_); } else if (which == ios_base::out) { // Only support appends - return traits_type::eof();; + return traits_type::eof(); } - return traits_type::eof();; + return traits_type::eof(); } From 24144d364384a5607f834163c0b0c4fb6c883cd7 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Mon, 10 Jun 2024 14:18:24 +0200 Subject: [PATCH 38/48] Prefer defining the CordStreamBuf get area in terms of a Cord chunk --- tensorstore/kvstore/s3_sdk/cord_streambuf.cc | 96 +++++++++++-------- tensorstore/kvstore/s3_sdk/cord_streambuf.h | 16 +++- .../kvstore/s3_sdk/cord_streambuf_test.cc | 83 ++++++++++------ 3 files changed, 123 insertions(+), 72 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf.cc index b4e9903eb..35c39ff34 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf.cc +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf.cc @@ -15,6 +15,7 @@ #include "tensorstore/kvstore/s3_sdk/cord_streambuf.h" #include +#include #include #include "absl/strings/cord.h" @@ -29,25 +30,26 @@ using std::ios_base; namespace tensorstore { namespace internal_kvstore_s3 { -CordStreamBuf::CordStreamBuf() : CordStreamBuf(Cord()) {} +CordStreamBuf::CordStreamBuf() : + CordStreamBuf(Cord()) {} CordStreamBuf::CordStreamBuf(Cord && cord) : mode_(cord.size() == 0 ? ios_base::out : ios_base::in), cord_(std::move(cord)), - get_iterator_(cord_.Chars().begin()) { + read_chunk_(cord_.Chunks().begin()) { // Set up the get area, if the Cord has data - if(get_iterator_ != cord_.Chars().end()) { - auto chunk = Cord::ChunkRemaining(get_iterator_); - char * data = const_cast(chunk.data()); - setg(data, data, data + chunk.size()); + if(read_chunk_ != cord_.Chunks().end()) { + char * data = const_cast(read_chunk_->data()); + setg(data, data, data + read_chunk_->size()); } + } Cord CordStreamBuf::GetCord() { Cord result; std::swap(result, cord_); - get_iterator_ = cord_.Chars().begin(); + read_chunk_ = cord_.Chunks().begin(); char dummy; setg(&dummy, &dummy, &dummy + 1); setp(&dummy, &dummy + 1); @@ -90,23 +92,32 @@ CordStreamBuf::int_type CordStreamBuf::overflow(int_type ch) { streamsize CordStreamBuf::xsgetn(char * s, streamsize count) { // Not reading or no more Cord data if(!(mode_ & ios_base::in)) return 0; - if(get_iterator_ == cord_.Chars().end()) return 0; - auto chunk = cord_.ChunkRemaining(get_iterator_); - auto bytes_to_read = std::min(chunk.size(), count); - for(streamsize i = 0; i < bytes_to_read; ++i) s[i] = chunk[i]; - Cord::Advance(&get_iterator_, bytes_to_read); + if(read_chunk_ == cord_.Chunks().end()) return 0; + auto bytes_to_read = std::min(read_chunk_->size(), count); + for(streamsize i = 0; i < bytes_to_read; ++i) s[i] = read_chunk_->operator[](i); + assert(gptr() + bytes_to_read <= egptr()); + if(gptr() + bytes_to_read < egptr()) { + setg(eback(), gptr() + bytes_to_read, egptr()); + } else { + if(++read_chunk_ != cord_.Chunks().end()) { + char_type * data = const_cast(read_chunk_->data()); + setg(data, data, data + read_chunk_->size()); + } + } return bytes_to_read; } // Handle buffer underflow. CordStreamBuf::int_type CordStreamBuf::underflow() { // Not reading or no more Cord data - if(!(mode_ & ios_base::in) || get_iterator_ == cord_.Chars().end()) return traits_type::eof(); - Cord::Advance(&get_iterator_, cord_.ChunkRemaining(get_iterator_).size()); - if(get_iterator_ == cord_.Chars().end()) return traits_type::eof(); - auto chunk = cord_.ChunkRemaining(get_iterator_); - char * data = const_cast(chunk.data()); - setg(data, data, data + chunk.size()); + if(!(mode_ & ios_base::in)) return traits_type::eof(); + if(read_chunk_ == cord_.Chunks().end()) return traits_type::eof(); + if(gptr() < egptr()) { + return traits_type::to_int_type(*gptr()); + } + if(++read_chunk_ == cord_.Chunks().end()) return traits_type::eof(); + char_type * data = const_cast(read_chunk_->data()); + setg(data, data, data + read_chunk_->size()); return traits_type::to_int_type(*data); } @@ -115,29 +126,30 @@ streampos CordStreamBuf::seekoff(streamoff off, ios_base::seekdir way, ios_base: if (way == ios_base::beg) { // Seek from the beginning of the cord if(off >= cord_.size()) return traits_type::eof(); - get_iterator_ = cord_.Chars().begin(); - Cord::Advance(&get_iterator_, off); - auto chunk = cord_.ChunkRemaining(get_iterator_); - char * data = const_cast(chunk.data()); - setg(data, data, data + chunk.size()); + auto n = off; + read_chunk_ = cord_.Chunks().begin(); + while(n > read_chunk_->size()) { + n -= read_chunk_->size(); + ++read_chunk_; + } + char_type * data = const_cast(read_chunk_->data()); + setg(data, data + n, data + read_chunk_->size()); return off; } else if (way == ios_base::cur) { - if(get_iterator_ == cord_.Chars().end()) return traits_type::eof(); - // Advance to next chunk if there isn't space in the current - while(egptr() - gptr() <= off) { - auto chunk = cord_.ChunkRemaining(get_iterator_); - Cord::Advance(&get_iterator_, chunk.size()); - if(get_iterator_ == cord_.Chars().end()) return traits_type::eof(); - char * data = const_cast(chunk.data()); - setg(data, data, data + chunk.size()); - off -= chunk.size(); + if(read_chunk_ == cord_.Chunks().end()) return traits_type::eof(); + auto n = off; + // Advance forward by Cord chunk, + // consuming any remaining characters + // in the chunk + while(n >= remaining()) { + n -= remaining(); + if(++read_chunk_ == cord_.Chunks().end()) return traits_type::eof(); + char_type * data = const_cast(read_chunk_->data()); + setg(data, data, data + read_chunk_->size()); } - // Advance within the chunk - if(off > 0) { - Cord::Advance(&get_iterator_, off); - setg(eback(), gptr() + off, egptr()); - } - return std::distance(cord_.Chars().begin(), get_iterator_); + setg(eback(), gptr() + n, egptr()); + return std::accumulate(cord_.Chunks().begin(), read_chunk_, consumed(), + [](auto i, auto c) { return i + c.size(); }); } else if (way == ios_base::end) { // Seeks past the stream end are unsupported return traits_type::eof(); @@ -149,8 +161,12 @@ streampos CordStreamBuf::seekoff(streamoff off, ios_base::seekdir way, ios_base: return traits_type::eof(); } - - +std::streamsize CordStreamBuf::consumed() const { + return gptr() - eback(); +}; +std::streamsize CordStreamBuf::remaining() const { + return egptr() - gptr(); +} } // namespace internal_kvstore_s3 } // namespace tensorstore diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf.h b/tensorstore/kvstore/s3_sdk/cord_streambuf.h index b9d18ee1d..7212e59f5 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf.h +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf.h @@ -29,6 +29,14 @@ namespace internal_kvstore_s3 { /// (1) Append-only writing mode, where data is appended to the underlying Cord /// (2) Read mode, where data is read from the Cord. Seeking is supported /// within the Stream Buffer. +/// +/// The streambuf get area is assigned to a chunk of the underlying Cord, +/// referred to by read_chunk_: this is usually set up by +/// setg(read_chunk_->data(), +/// read_chunk_->data(), +/// read_chunk->data() + read_chunk_->size()); +/// Then, characters are consumed from this area until underflow occurs, +/// at which point, the get area is constructed with the next chunk class CordStreamBuf : public std::basic_streambuf { public: // Creates a stream buffer for writing @@ -65,10 +73,16 @@ class CordStreamBuf : public std::basic_streambuf { return seekoff(sp - std::streampos(0), std::ios_base::beg, which); } + // Number of characters consumed in the current chunk + // (gptr() - eback()) + std::streamsize consumed() const; + // Number of characters remaining in the current chunk + // (egptr() - gptr()) + std::streamsize remaining() const; private: std::ios_base::openmode mode_; absl::Cord cord_; - absl::Cord::CharIterator get_iterator_; + absl::Cord::ChunkIterator read_chunk_; }; } // namespace internal_kvstore_s3 diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc index 50ca85440..c17e34a0b 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc @@ -24,6 +24,8 @@ #include +using std::ios_base; + using ::absl::Cord; using ::absl::CordBuffer; using ::Aws::Utils::Stream::DefaultUnderlyingStream; @@ -34,9 +36,15 @@ namespace { static constexpr char kAwsTag[] = "AWS"; static constexpr int kNBuffers = 3; +static constexpr auto kBufferSize = CordBuffer::kDefaultLimit; -CordStreamBuf & GetStreamBuf(DefaultUnderlyingStream & stream) { - return *dynamic_cast(stream.rdbuf()); +absl::Cord ThreeBufferCord() { + absl::Cord cord; + for(char ch = 0; ch < kNBuffers; ++ch) { + cord.Append(std::string(kBufferSize, '1' + ch)); + } + assert(std::distance(cord.Chunks().begin(), cord.Chunks().end()) == 3); + return cord; } TEST(CordStreamBufTest, Read) { @@ -51,6 +59,7 @@ TEST(CordStreamBufTest, Read) { char ch; EXPECT_FALSE(is.get(ch)); EXPECT_FALSE(is.good()); + EXPECT_TRUE(is.eof()); } @@ -60,7 +69,7 @@ TEST(CordStreamBufTest, Write) { os << " "; os << "This is a test"; EXPECT_TRUE(os.good()); - auto cord = GetStreamBuf(os).GetCord(); + auto cord = dynamic_cast(os.rdbuf())->GetCord(); EXPECT_EQ(cord, "Hello World This is a test"); // Single Cord chunk @@ -70,47 +79,59 @@ TEST(CordStreamBufTest, Write) { } -/// Test seeking within the CordStreamBuf -TEST(CordSreamBufTest, ReadSeek) { - absl::Cord cord; - for(char ch = 0; ch < kNBuffers; ++ch) { - cord.Append(std::string(CordBuffer::kDefaultLimit, '1' + ch)); - } - EXPECT_EQ(std::distance(cord.Chunks().begin(), cord.Chunks().end()), 3); +TEST(CordStreamBufTest, BufferSeek) { + auto buffer = CordStreamBuf(ThreeBufferCord()); - auto is = DefaultUnderlyingStream( - MakeUnique(kAwsTag, std::move(cord))); + // Seeks from beginning + EXPECT_EQ(buffer.pubseekoff(0, ios_base::beg, ios_base::in), 0); + EXPECT_EQ(buffer.pubseekoff(10, ios_base::beg, ios_base::in), 10); + EXPECT_EQ(buffer.pubseekoff(10 + kBufferSize, ios_base::beg, ios_base::in), 10 + kBufferSize); + EXPECT_EQ(buffer.pubseekoff(10 + 2*kBufferSize, ios_base::beg, ios_base::in), 10 + 2*kBufferSize); + EXPECT_EQ(buffer.pubseekoff(10 + 3*kBufferSize, ios_base::beg, ios_base::in), -1); // eof - for(char ch = 0; ch < kNBuffers; ++ch) { - is.seekg(5 + CordBuffer::kDefaultLimit * ch); - EXPECT_EQ(is.tellg(), 5 + CordBuffer::kDefaultLimit * ch); - char result[6] = {0x00}; - is.read(result, sizeof(result)); - auto expected = std::string(sizeof(result), '1' + ch); - EXPECT_EQ(std::string_view(result, sizeof(result)), expected); - EXPECT_TRUE(is.good()); - EXPECT_EQ(is.tellg(), 5 + CordBuffer::kDefaultLimit * ch + sizeof(result)); - } + // Seeks from current position + EXPECT_EQ(buffer.pubseekoff(0, ios_base::beg, ios_base::in), 0); + EXPECT_EQ(buffer.pubseekoff(10, ios_base::cur, ios_base::in), 10); + EXPECT_EQ(buffer.pubseekoff(kBufferSize, ios_base::cur, ios_base::in), 10 + kBufferSize); + EXPECT_EQ(buffer.pubseekoff(kBufferSize, ios_base::cur, ios_base::in), 10 + 2*kBufferSize); + EXPECT_EQ(buffer.pubseekoff(kBufferSize, ios_base::cur, ios_base::in), -1); // eof } /// Test that reading the CordStreamBuf reads the Cord TEST(CordStreamBufTest, GetEntireStreamBuf) { - absl::Cord cord; - for(char ch = 0; ch < kNBuffers; ++ch) { - cord.Append(std::string(CordBuffer::kDefaultLimit, '1' + ch)); - } - EXPECT_EQ(std::distance(cord.Chunks().begin(), cord.Chunks().end()), 3); - auto is = DefaultUnderlyingStream( - MakeUnique(kAwsTag, std::move(cord))); + MakeUnique(kAwsTag, ThreeBufferCord())); int count = 0; char ch; while(is.get(ch)) { - EXPECT_EQ(ch, '1' + count / CordBuffer::kDefaultLimit); + EXPECT_EQ(ch, '1' + count / kBufferSize); + EXPECT_EQ(is.tellg(), count); ++count; } - EXPECT_EQ(count, CordBuffer::kDefaultLimit * kNBuffers); + EXPECT_EQ(count, kBufferSize * kNBuffers); + EXPECT_FALSE(is.good()); + EXPECT_TRUE(is.eof()); +} + +/// Test seeking within the CordStreamBuf +TEST(CordStreamBufTest, ReadSeek) { + auto is = DefaultUnderlyingStream( + MakeUnique(kAwsTag, ThreeBufferCord())); + + for(char ch = 0; ch < kNBuffers; ++ch) { + is.seekg(5 + kBufferSize * ch); + EXPECT_EQ(is.tellg(), 5 + kBufferSize * ch); + char result[6] = {0x00}; + is.read(result, sizeof(result)); + auto expected = std::string(sizeof(result), '1' + ch); + EXPECT_EQ(std::string_view(result, sizeof(result)), expected); + EXPECT_TRUE(is.good()); + EXPECT_EQ(is.tellg(), 5 + kBufferSize * ch + sizeof(result)); + } + + EXPECT_FALSE(is.good()); + EXPECT_TRUE(is.eof()); } } // namespace \ No newline at end of file From cafd5fa822dd483f7f463825feea77b3096bb126 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Mon, 10 Jun 2024 16:56:25 +0200 Subject: [PATCH 39/48] typo --- tensorstore/kvstore/s3_sdk/s3_context.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorstore/kvstore/s3_sdk/s3_context.cc b/tensorstore/kvstore/s3_sdk/s3_context.cc index f5dbf2b5a..b14511996 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context.cc @@ -155,7 +155,7 @@ class CustomHttpClient : public AwsHttpClient { /// Custom factory overriding Aws::Http::DefaultHttpFatory /// Generates a CustomHttpClient (which defers to tensorflow's curl library) -/// as well as overriding Createhttp Request to return +/// as well as overriding CreateHttpRequest to return /// Standard Http Requests class CustomHttpFactory : public Aws::Http::HttpClientFactory { public: From f8ba211a705b775bf435f82f92b20e04cdbf094d Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Mon, 10 Jun 2024 17:58:05 +0200 Subject: [PATCH 40/48] Fix test cases --- .../kvstore/s3_sdk/cord_streambuf_test.cc | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc index c17e34a0b..11f5a5418 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc @@ -104,10 +104,13 @@ TEST(CordStreamBufTest, GetEntireStreamBuf) { int count = 0; char ch; + while(is.get(ch)) { - EXPECT_EQ(ch, '1' + count / kBufferSize); - EXPECT_EQ(is.tellg(), count); + EXPECT_EQ(ch, '1' + (count / kBufferSize)); + EXPECT_TRUE(is.good()); + EXPECT_FALSE(is.eof()); ++count; + EXPECT_EQ(is.tellg(), count < kBufferSize * kNBuffers ? count : -1); } EXPECT_EQ(count, kBufferSize * kNBuffers); EXPECT_FALSE(is.good()); @@ -119,19 +122,20 @@ TEST(CordStreamBufTest, ReadSeek) { auto is = DefaultUnderlyingStream( MakeUnique(kAwsTag, ThreeBufferCord())); - for(char ch = 0; ch < kNBuffers; ++ch) { - is.seekg(5 + kBufferSize * ch); - EXPECT_EQ(is.tellg(), 5 + kBufferSize * ch); + for(char b = 0; b < kNBuffers; ++b) { + is.seekg(5 + kBufferSize * b); + EXPECT_EQ(is.tellg(), 5 + kBufferSize * b); char result[6] = {0x00}; is.read(result, sizeof(result)); - auto expected = std::string(sizeof(result), '1' + ch); + auto expected = std::string(sizeof(result), '1' + b); EXPECT_EQ(std::string_view(result, sizeof(result)), expected); EXPECT_TRUE(is.good()); - EXPECT_EQ(is.tellg(), 5 + kBufferSize * ch + sizeof(result)); + EXPECT_EQ(is.tellg(), 5 + kBufferSize * b + sizeof(result)); } + is.seekg(kBufferSize * kNBuffers); + EXPECT_EQ(is.tellg(), -1); EXPECT_FALSE(is.good()); - EXPECT_TRUE(is.eof()); } } // namespace \ No newline at end of file From 16321f424a22d6a4904ccc19bd7bc6036c49d873 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 14 Jun 2024 16:33:38 +0200 Subject: [PATCH 41/48] Touch up CordStreamBuf and integrate into HttpRequest/Response workflow --- tensorstore/kvstore/s3_sdk/cord_streambuf.cc | 97 +++++++++++---- tensorstore/kvstore/s3_sdk/cord_streambuf.h | 31 +++-- .../kvstore/s3_sdk/cord_streambuf_test.cc | 110 ++++++++++++++++-- tensorstore/kvstore/s3_sdk/localstack_test.cc | 24 +++- tensorstore/kvstore/s3_sdk/s3_context.cc | 75 +++++++----- tensorstore/kvstore/s3_sdk/s3_context.h | 2 + 6 files changed, 268 insertions(+), 71 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf.cc index 35c39ff34..ce24d24eb 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf.cc +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf.cc @@ -15,7 +15,6 @@ #include "tensorstore/kvstore/s3_sdk/cord_streambuf.h" #include -#include #include #include "absl/strings/cord.h" @@ -34,28 +33,48 @@ CordStreamBuf::CordStreamBuf() : CordStreamBuf(Cord()) {} CordStreamBuf::CordStreamBuf(Cord && cord) : - mode_(cord.size() == 0 ? ios_base::out : ios_base::in), + mode_(ios_base::out | ios_base::in), cord_(std::move(cord)), read_chunk_(cord_.Chunks().begin()) { - // Set up the get area, if the Cord has data + assert(eback() == nullptr); + assert(gptr() == nullptr); + assert(egptr() == nullptr); + assert(pbase() == nullptr); + assert(pptr() == nullptr); + assert(epptr() == nullptr); + + // Set up the get area to point at the first chunk, + // if the Cord has data if(read_chunk_ != cord_.Chunks().end()) { - char * data = const_cast(read_chunk_->data()); + char_type * data = const_cast(read_chunk_->data()); setg(data, data, data + read_chunk_->size()); } - } -Cord CordStreamBuf::GetCord() { +Cord CordStreamBuf::MoveCord() { Cord result; std::swap(result, cord_); read_chunk_ = cord_.Chunks().begin(); - char dummy; - setg(&dummy, &dummy, &dummy + 1); - setp(&dummy, &dummy + 1); + assert(read_chunk_ == cord_.Chunks().end()); + setg(nullptr, nullptr, nullptr); + setp(nullptr, nullptr); return result; } +void CordStreamBuf::TakeCord(Cord && cord) { + setg(nullptr, nullptr, nullptr); + setp(nullptr, nullptr); + + cord_ = std::move(cord); + read_chunk_ = cord_.Chunks().begin(); + + if(read_chunk_ != cord_.Chunks().end()) { + char_type * data = const_cast(read_chunk_->data()); + setg(data, data, data + read_chunk_->size()); + } +} + // Bulk put operation streamsize CordStreamBuf::xsputn(const char * s, streamsize count) { if(!(mode_ & ios_base::out)) return 0; @@ -75,6 +94,7 @@ streamsize CordStreamBuf::xsputn(const char * s, streamsize count) { first = false; } + MaybeSetGetArea(); return p; } @@ -85,6 +105,7 @@ CordStreamBuf::int_type CordStreamBuf::overflow(int_type ch) { if(traits_type::eq_int_type(ch, traits_type::eof())) return traits_type::eof(); auto c = traits_type::to_char_type(ch); cord_.Append(absl::string_view(&c, 1)); + MaybeSetGetArea(); return ch; } @@ -124,8 +145,8 @@ CordStreamBuf::int_type CordStreamBuf::underflow() { streampos CordStreamBuf::seekoff(streamoff off, ios_base::seekdir way, ios_base::openmode which) { if (which == ios_base::in) { if (way == ios_base::beg) { + if(off > cord_.size()) return traits_type::eof(); // Seek from the beginning of the cord - if(off >= cord_.size()) return traits_type::eof(); auto n = off; read_chunk_ = cord_.Chunks().begin(); while(n > read_chunk_->size()) { @@ -137,36 +158,68 @@ streampos CordStreamBuf::seekoff(streamoff off, ios_base::seekdir way, ios_base: return off; } else if (way == ios_base::cur) { if(read_chunk_ == cord_.Chunks().end()) return traits_type::eof(); + auto current = gconsumed(); + for(auto c = cord_.Chunks().begin(); c != read_chunk_; ++c) { + current += c->size(); + } + auto n = off; - // Advance forward by Cord chunk, + // Advance forward in the current chunk + if(n > 0 && gremaining() > 0) { + auto bytes_to_remove = std::min(n, gremaining()); + n -= bytes_to_remove; + gbump(bytes_to_remove); + } + // Advance forward by Cord chunks, // consuming any remaining characters // in the chunk - while(n >= remaining()) { - n -= remaining(); + while(n > 0) { if(++read_chunk_ == cord_.Chunks().end()) return traits_type::eof(); + auto bytes_to_advance = std::min(n, read_chunk_->size()); char_type * data = const_cast(read_chunk_->data()); - setg(data, data, data + read_chunk_->size()); + setg(data, data + bytes_to_advance, data + read_chunk_->size()); + n -= bytes_to_advance; } - setg(eback(), gptr() + n, egptr()); - return std::accumulate(cord_.Chunks().begin(), read_chunk_, consumed(), - [](auto i, auto c) { return i + c.size(); }); + + return current + off; } else if (way == ios_base::end) { // Seeks past the stream end are unsupported - return traits_type::eof(); + if(off > 0) return traits_type::eof(); + auto n = cord_.size() + off; + read_chunk_ = cord_.Chunks().begin(); + while(n > read_chunk_->size()) { + n -= read_chunk_->size(); + ++read_chunk_; + } + char_type * data = const_cast(read_chunk_->data()); + setg(data, data + n, data + read_chunk_->size()); + return cord_.size() + off; } } else if (which == ios_base::out) { - // Only support appends - return traits_type::eof(); + // This streambuf only supports appends. + // Don't respect the off argument, always return + // the append position + return cord_.size(); } return traits_type::eof(); } -std::streamsize CordStreamBuf::consumed() const { +streamsize CordStreamBuf::gconsumed() const { return gptr() - eback(); }; -std::streamsize CordStreamBuf::remaining() const { + +streamsize CordStreamBuf::gremaining() const { return egptr() - gptr(); } +void CordStreamBuf::MaybeSetGetArea() { + if(read_chunk_ == cord_.Chunks().end()) { + read_chunk_ = cord_.Chunks().begin(); + if(read_chunk_ == cord_.Chunks().end()) return; + char_type * data = const_cast(read_chunk_->data()); + setg(data, data, data + read_chunk_->size()); + } +} + } // namespace internal_kvstore_s3 } // namespace tensorstore diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf.h b/tensorstore/kvstore/s3_sdk/cord_streambuf.h index 7212e59f5..d2856a17f 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf.h +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf.h @@ -23,8 +23,7 @@ namespace tensorstore { namespace internal_kvstore_s3 { -/// Basic implementation of a std::basic_streambuf -/// backed by an abseil Cord. +/// Basic implementation of a std::basic_streambuf backed by an abseil Cord. /// It should be used in two modes /// (1) Append-only writing mode, where data is appended to the underlying Cord /// (2) Read mode, where data is read from the Cord. Seeking is supported @@ -36,10 +35,10 @@ namespace internal_kvstore_s3 { /// read_chunk_->data(), /// read_chunk->data() + read_chunk_->size()); /// Then, characters are consumed from this area until underflow occurs, -/// at which point, the get area is constructed with the next chunk +/// at which point, the get area is constructed from the next chunk class CordStreamBuf : public std::basic_streambuf { public: - // Creates a stream buffer for writing + // Creates an empty stream buffer for writing CordStreamBuf(); // Creates a stream buffer for reading from the supplied Cord CordStreamBuf(absl::Cord && cord); @@ -48,7 +47,10 @@ class CordStreamBuf : public std::basic_streambuf { const absl::Cord & GetCord() const { return cord_; } // Returns the underlying Cord, resetting the underlying stream - absl::Cord GetCord(); + absl::Cord MoveCord(); + // Takes the supplied Cord as the underlying Cord, + // resetting the underlying stream + void TakeCord(absl::Cord && cord); protected: // Bulk put operation @@ -60,7 +62,11 @@ class CordStreamBuf : public std::basic_streambuf { // Handle buffer underflow. virtual int_type underflow() override; - // Seek within the underlying Cord (only seeks in the get area are supported) + // Seek within the underlying Cord. + // Seeking in the get area is supported + // Seeking in the put area always returns the length of the Cord + // (only appends are supported). + // do not appear to be called by the AWS C++ SDK virtual std::streampos seekoff( std::streamoff off, std::ios_base::seekdir way, @@ -73,12 +79,17 @@ class CordStreamBuf : public std::basic_streambuf { return seekoff(sp - std::streampos(0), std::ios_base::beg, which); } - // Number of characters consumed in the current chunk + // Number of get characters consumed in the current read chunk // (gptr() - eback()) - std::streamsize consumed() const; - // Number of characters remaining in the current chunk + std::streamsize gconsumed() const; + // Number of characters remaining in the current read chunk // (egptr() - gptr()) - std::streamsize remaining() const; + std::streamsize gremaining() const; + +private: + // Configure the get area after put operations. + void MaybeSetGetArea(); + private: std::ios_base::openmode mode_; absl::Cord cord_; diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc index 11f5a5418..96cd3542a 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc @@ -47,6 +47,40 @@ absl::Cord ThreeBufferCord() { return cord; } +/// Remove this +TEST(CordStreamBufTest, Chunks) { + using ::absl::CordBuffer; + absl::Cord cord; + + auto buffer = cord.GetAppendBuffer(32); + auto span = buffer.available_up_to(32); + EXPECT_EQ(span.size(), 32); + for(int i = 0; i < span.size(); ++i) span[i] = i; + buffer.IncreaseLengthBy(span.size()); + cord.Append(std::move(buffer)); + + EXPECT_EQ(cord.Chunks().begin()->size(), 32); + EXPECT_EQ(cord.size(), 32); + + buffer = cord.GetAppendBuffer(32); + span = buffer.available_up_to(32); + EXPECT_EQ(span.size(), 32); + for(int i = 0; i < span.size(); ++i) span[i] = i; + buffer.IncreaseLengthBy(span.size()); + cord.Append(std::move(buffer)); + + buffer = cord.GetAppendBuffer(97); + span = buffer.available_up_to(97); + EXPECT_EQ(span.size(), 97); + for(int i = 0; i < span.size(); ++i) span[i] = i; + buffer.IncreaseLengthBy(span.size()); + cord.Append(std::move(buffer)); + + EXPECT_EQ(std::distance(cord.Chunks().begin(), cord.Chunks().end()), 3); + EXPECT_EQ(cord.size(), 161); +} + + TEST(CordStreamBufTest, Read) { auto cord = absl::Cord{"Hello World This is a test"}; auto is = DefaultUnderlyingStream(MakeUnique(kAwsTag, std::move(cord))); @@ -66,27 +100,32 @@ TEST(CordStreamBufTest, Read) { TEST(CordStreamBufTest, Write) { auto os = DefaultUnderlyingStream(MakeUnique(kAwsTag)); os << "Hello World"; + EXPECT_EQ(os.tellp(), 11); os << " "; + EXPECT_EQ(os.tellp(), 12); os << "This is a test"; + EXPECT_EQ(os.tellp(), 26); EXPECT_TRUE(os.good()); auto cord = dynamic_cast(os.rdbuf())->GetCord(); EXPECT_EQ(cord, "Hello World This is a test"); // Single Cord chunk - auto it = cord.chunk_begin(); + auto it = cord.Chunks().begin(); EXPECT_EQ(*it, "Hello World This is a test"); - EXPECT_EQ(++it, cord.chunk_end()); + ++it; + EXPECT_EQ(it, cord.Chunks().end()); + EXPECT_EQ(std::distance(cord.Chunks().begin(), cord.Chunks().end()), 1); } +// Test get seeks via the streambuf interface TEST(CordStreamBufTest, BufferSeek) { auto buffer = CordStreamBuf(ThreeBufferCord()); // Seeks from beginning EXPECT_EQ(buffer.pubseekoff(0, ios_base::beg, ios_base::in), 0); - EXPECT_EQ(buffer.pubseekoff(10, ios_base::beg, ios_base::in), 10); - EXPECT_EQ(buffer.pubseekoff(10 + kBufferSize, ios_base::beg, ios_base::in), 10 + kBufferSize); EXPECT_EQ(buffer.pubseekoff(10 + 2*kBufferSize, ios_base::beg, ios_base::in), 10 + 2*kBufferSize); + EXPECT_EQ(buffer.pubseekoff(kNBuffers * kBufferSize, ios_base::beg, ios_base::in), kNBuffers * kBufferSize); EXPECT_EQ(buffer.pubseekoff(10 + 3*kBufferSize, ios_base::beg, ios_base::in), -1); // eof // Seeks from current position @@ -94,7 +133,59 @@ TEST(CordStreamBufTest, BufferSeek) { EXPECT_EQ(buffer.pubseekoff(10, ios_base::cur, ios_base::in), 10); EXPECT_EQ(buffer.pubseekoff(kBufferSize, ios_base::cur, ios_base::in), 10 + kBufferSize); EXPECT_EQ(buffer.pubseekoff(kBufferSize, ios_base::cur, ios_base::in), 10 + 2*kBufferSize); - EXPECT_EQ(buffer.pubseekoff(kBufferSize, ios_base::cur, ios_base::in), -1); // eof + EXPECT_EQ(buffer.pubseekoff(kBufferSize - 10, ios_base::cur, ios_base::in), kNBuffers*kBufferSize); + EXPECT_EQ(buffer.pubseekoff(10, ios_base::cur, ios_base::in), -1); // eof + + // Seek from end + EXPECT_EQ(buffer.pubseekoff(0, ios_base::beg, ios_base::in), 0); + EXPECT_EQ(buffer.pubseekoff(0, ios_base::end, ios_base::in), kBufferSize * kNBuffers); + EXPECT_EQ(buffer.pubseekoff(1, ios_base::end, ios_base::in), -1); // eof + EXPECT_EQ(buffer.pubseekoff(-1, ios_base::end, ios_base::in), kBufferSize * kNBuffers - 1); + EXPECT_EQ(buffer.pubseekoff(-kBufferSize * kNBuffers, ios_base::end, ios_base::in), 0); + EXPECT_EQ(buffer.pubseekoff(1, ios_base::end, ios_base::in), -1); // eof +} + +// Test get seeks via the istream interface +TEST(CordStreamBufTest, StreamSeek) { + auto is = DefaultUnderlyingStream( + MakeUnique(kAwsTag, ThreeBufferCord())); + + // Seek from beginning + is.clear(); + is.seekg(0, is.beg); + EXPECT_EQ(is.tellg(), 0); + is.seekg(kBufferSize * kNBuffers - 1, is.beg); + EXPECT_EQ(is.tellg(), kBufferSize * kNBuffers - 1); + is.seekg(kBufferSize * kNBuffers, is.beg); + EXPECT_EQ(is.tellg(), kBufferSize * kNBuffers); + is.seekg(kBufferSize * kNBuffers + 1, is.beg); + EXPECT_EQ(is.tellg(), -1); // eof + + // Seek from current position + is.clear(); + is.seekg(0, is.beg); + EXPECT_EQ(is.tellg(), 0); + is.seekg(10, is.cur); + EXPECT_EQ(is.tellg(), 10); + is.seekg(kBufferSize, is.cur); + EXPECT_EQ(is.tellg(), 10 + kBufferSize); + is.seekg(kBufferSize, is.cur); + EXPECT_EQ(is.tellg(), 10 + 2*kBufferSize); + is.seekg(kBufferSize - 10, is.cur); + EXPECT_EQ(is.tellg(), kNBuffers*kBufferSize); + is.seekg(10, is.cur); + EXPECT_EQ(is.tellg(), -1); // eof + + // Seek from end + is.clear(); + is.seekg(0, is.end); + EXPECT_EQ(is.tellg(), kBufferSize * kNBuffers); + is.seekg(-kBufferSize * kNBuffers, is.end); + EXPECT_EQ(is.tellg(), 0); + is.seekg(-1, is.end); + EXPECT_EQ(is.tellg(), kBufferSize * kNBuffers - 1); + is.seekg(1, is.end); + EXPECT_EQ(is.tellg(), -1); // eof } /// Test that reading the CordStreamBuf reads the Cord @@ -110,7 +201,7 @@ TEST(CordStreamBufTest, GetEntireStreamBuf) { EXPECT_TRUE(is.good()); EXPECT_FALSE(is.eof()); ++count; - EXPECT_EQ(is.tellg(), count < kBufferSize * kNBuffers ? count : -1); + EXPECT_EQ(is.tellg(), count); } EXPECT_EQ(count, kBufferSize * kNBuffers); EXPECT_FALSE(is.good()); @@ -134,8 +225,13 @@ TEST(CordStreamBufTest, ReadSeek) { } is.seekg(kBufferSize * kNBuffers); - EXPECT_EQ(is.tellg(), -1); + EXPECT_EQ(is.tellg(), kBufferSize*kNBuffers); + EXPECT_TRUE(is.good()); + EXPECT_FALSE(is.eof()); + + EXPECT_EQ(is.get(), -1); EXPECT_FALSE(is.good()); + EXPECT_TRUE(is.eof()); } } // namespace \ No newline at end of file diff --git a/tensorstore/kvstore/s3_sdk/localstack_test.cc b/tensorstore/kvstore/s3_sdk/localstack_test.cc index fb94b4af5..107b7dd59 100644 --- a/tensorstore/kvstore/s3_sdk/localstack_test.cc +++ b/tensorstore/kvstore/s3_sdk/localstack_test.cc @@ -30,8 +30,9 @@ #include "absl/time/time.h" #include -#include #include +#include +#include #include #include #include @@ -50,10 +51,10 @@ #include "tensorstore/internal/json_gtest.h" #include "tensorstore/internal/os/subprocess.h" #include "tensorstore/internal/thread/thread_pool.h" +#include "tensorstore/kvstore/s3_sdk/cord_streambuf.h" #include "tensorstore/util/executor.h" #include "tensorstore/util/result.h" - #include "tensorstore/kvstore/s3_sdk/s3_context.h" // When provided with --localstack_binary, localstack_test will start @@ -88,6 +89,9 @@ ABSL_FLAG(std::string, aws_region, "af-south-1", ABSL_FLAG(std::string, aws_path, "tensorstore/test/", "The S3 path used for the test."); +using ::Aws::MakeUnique; +using ::Aws::MakeShared; +using ::Aws::Utils::Stream::DefaultUnderlyingStream; using ::tensorstore::Context; using ::tensorstore::MatchesJson; @@ -103,6 +107,8 @@ using ::tensorstore::internal_http::IssueRequestOptions; using ::tensorstore::transport_test_utils::TryPickUnusedPort; using ::tensorstore::internal_kvstore_s3::AwsContext; +using ::tensorstore::internal_kvstore_s3::CordStreamBuf; +using ::tensorstore::internal_kvstore_s3::CordBackedResponseStreamFactory; namespace { @@ -178,7 +184,7 @@ class LocalStackProcess { // Give the child process several seconds to start. auto deadline = absl::Now() + absl::Seconds(10); while (absl::Now() < deadline) { - absl::SleepFor(absl::Milliseconds(250)); + absl::SleepFor(absl::Milliseconds(500)); auto join_result = spawn_proc.Join(/*block=*/false); if (join_result.ok()) { @@ -329,22 +335,25 @@ TEST_F(LocalStackFixture, BasicSync) { // Put an object auto put_request = Aws::S3::Model::PutObjectRequest{}; + put_request.SetResponseStreamFactory(CordBackedResponseStreamFactory); put_request.SetBucket(Bucket()); put_request.SetKey("portunus"); - put_request.SetBody(Aws::MakeShared(kAwsTag, payload)); + put_request.SetBody(MakeShared(kAwsTag, MakeUnique(kAwsTag, absl::Cord{payload}))); auto put_outcome = client->PutObject(put_request); EXPECT_TRUE(put_outcome.IsSuccess()); // Put the same object with a different key put_request = Aws::S3::Model::PutObjectRequest{}; + put_request.SetResponseStreamFactory(CordBackedResponseStreamFactory); put_request.SetBucket(Bucket()); put_request.SetKey("portunus0"); - put_request.SetBody(Aws::MakeShared(kAwsTag, payload)); + put_request.SetBody(MakeShared(kAwsTag, MakeUnique(kAwsTag, absl::Cord{payload}))); put_outcome = client->PutObject(put_request); EXPECT_TRUE(put_outcome.IsSuccess()); // List the objects auto list_request = Aws::S3::Model::ListObjectsV2Request{}; + list_request.SetResponseStreamFactory(CordBackedResponseStreamFactory); list_request.SetBucket(Bucket()); list_request.SetMaxKeys(1); auto continuation_token = Aws::String{}; @@ -372,6 +381,7 @@ TEST_F(LocalStackFixture, BasicSync) { // Get the contents of the key auto get_request = Aws::S3::Model::GetObjectRequest{}; + get_request.SetResponseStreamFactory(CordBackedResponseStreamFactory); get_request.SetBucket(Bucket()); get_request.SetKey("portunus"); auto get_outcome = client->GetObject(get_request); @@ -396,7 +406,8 @@ TEST_F(LocalStackFixture, BasicAsync) { auto put_request = Aws::S3::Model::PutObjectRequest{}; put_request.SetBucket(Bucket()); put_request.SetKey(key); - put_request.SetBody(Aws::MakeShared(kAwsTag, payload)); + put_request.SetResponseStreamFactory(CordBackedResponseStreamFactory); + put_request.SetBody(MakeShared(kAwsTag, MakeUnique(kAwsTag, absl::Cord{payload}))); client->PutObjectAsync(put_request, [this]( const auto *, const auto &, const auto & outcome, const auto &) { this->on_put(outcome); @@ -414,6 +425,7 @@ TEST_F(LocalStackFixture, BasicAsync) { void do_get() { auto get_request = Aws::S3::Model::GetObjectRequest{}; + get_request.SetResponseStreamFactory(CordBackedResponseStreamFactory); get_request.SetBucket(Bucket()); get_request.SetKey(key); client->GetObjectAsync(get_request, [this]( diff --git a/tensorstore/kvstore/s3_sdk/s3_context.cc b/tensorstore/kvstore/s3_sdk/s3_context.cc index b14511996..97877db9f 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context.cc @@ -18,12 +18,13 @@ #include #include +#include #include #include #include #include -#include #include +#include #include "absl/log/absl_log.h" #include "absl/synchronization/mutex.h" @@ -32,6 +33,7 @@ #include "tensorstore/internal/http/http_request.h" #include "tensorstore/internal/http/http_response.h" #include "tensorstore/internal/http/http_transport.h" +#include "tensorstore/kvstore/s3_sdk/cord_streambuf.h" using AwsHttpClient = ::Aws::Http::HttpClient; using AwsHttpRequest = ::Aws::Http::HttpRequest; @@ -75,23 +77,6 @@ class CustomHttpClient : public AwsHttpClient { // Converts an Aws StandardHttpRequest to a tensorstore HttpRequest RequestAndPayload FromAwsRequest(const std::shared_ptr & aws_request) const { - absl::Cord payload; - - // Copy characters off the stream into the Cord - // TODO: This is impractical for large data and - // should be mitigated by an Aws::IOStream backed by a Cord - if(auto body = aws_request->GetContentBody(); body) { - const size_t bufferSize = 1024*1024; - std::vector buffer(bufferSize); - std::streampos original = body->tellg(); - while (body->read(buffer.data(), buffer.size()) || body->gcount() > 0) { - payload.Append(absl::string_view(buffer.data(), body->gcount())); - } - // Reset stream - body->clear(); - body->seekg(original); - } - auto aws_headers = aws_request->GetHeaders(); auto headers = std::vector{}; for(auto &[name, value]: aws_headers) { @@ -102,6 +87,29 @@ class CustomHttpClient : public AwsHttpClient { user_agent = it->second; } + absl::Cord payload; + + // Get the underlying body as a Cord + if (auto body = aws_request->GetContentBody(); body) { + // Fast path, extract underlying Cord + if (auto cordstreambuf = dynamic_cast(body->rdbuf()); + cordstreambuf) { + payload = cordstreambuf->MoveCord(); + // TODO: remove this + ABSL_LOG(INFO) << "CordBacked Aws::Http::StandardHttpRequest of size " << payload.size(); + } else { + // Slow path, copy characters off the stream into Cord + std::vector buffer(absl::CordBuffer::kDefaultLimit); + std::streampos original = body->tellg(); + while (body->read(buffer.data(), buffer.size()) || body->gcount() > 0) { + payload.Append(absl::string_view(buffer.data(), body->gcount())); + } + // Reset stream + body->clear(); + body->seekg(original); + } + } + return RequestAndPayload{ HttpRequest{ GetNameForHttpMethod(aws_request->GetMethod()), @@ -114,7 +122,7 @@ class CustomHttpClient : public AwsHttpClient { // Converts a tensorstore response to an Aws StandardHttpResponse std::shared_ptr ToAwsResponse( - const HttpResponse & ts_response, + HttpResponse & ts_response, const std::shared_ptr & aws_request) const { auto aws_response = Aws::MakeShared(kAwsTag, aws_request); @@ -122,11 +130,19 @@ class CustomHttpClient : public AwsHttpClient { for(auto &[name, value]: aws_response->GetHeaders()) { aws_response->AddHeader(name, value); } - // Copy cord onto the body stream - // TODO: This should be avoided by subclassing Aws::IOStream - // to encapsulate a Cord + + // Move Cord into the body stream if(!ts_response.payload.empty()) { - aws_response->GetResponseBody() << ts_response.payload; + auto & body = aws_response->GetResponseBody(); + if(auto cordstreambuf = dynamic_cast(body.rdbuf()); + cordstreambuf) { + // Fast path, directly assign the Cord + // TODO: remove this + ABSL_LOG(INFO) << "CordBacked Aws::Http::StandardHttpResponse of size " << ts_response.payload.size(); + cordstreambuf->TakeCord(std::move(ts_response.payload)); + } else { + body << ts_response.payload; + } } return aws_response; @@ -168,6 +184,7 @@ class CustomHttpFactory : public Aws::Http::HttpClientFactory { std::shared_ptr CreateHttpRequest( const Aws::String &uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory &streamFactory) const override { + ABSL_LOG(INFO) << "Constructing custom HttpRequest"; return CreateHttpRequest(Aws::Http::URI(uri), method, streamFactory); } @@ -175,9 +192,10 @@ class CustomHttpFactory : public Aws::Http::HttpClientFactory { const Aws::Http::URI& uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory& streamFactory) const override { - auto request = Aws::MakeShared(kAwsTag, uri, method); - request->SetResponseStreamFactory(streamFactory); - return request; + ABSL_LOG(INFO) << "Constructing custom HttpRequest"; + auto request = Aws::MakeShared(kAwsTag, uri, method); + request->SetResponseStreamFactory(streamFactory); + return request; } }; @@ -243,6 +261,11 @@ void AWSLogSystem::LogMessage(AwsLogLevel log_level, const std::string & message } // namespace +Aws::IOStream * CordBackedResponseStreamFactory() { + return Aws::New( + kAwsTag, Aws::MakeUnique(kAwsTag)); +} + // Initialise AWS API and Logging std::shared_ptr GetAwsContext() { absl::MutexLock lock(&context_mu_); diff --git a/tensorstore/kvstore/s3_sdk/s3_context.h b/tensorstore/kvstore/s3_sdk/s3_context.h index f6496c1e8..968a73ecc 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.h +++ b/tensorstore/kvstore/s3_sdk/s3_context.h @@ -32,6 +32,8 @@ struct AwsContext { // Initialise AWS API and Logging std::shared_ptr GetAwsContext(); +// Return an IOStream backed by a Cord +Aws::IOStream * CordBackedResponseStreamFactory(); } // namespace internal_kvstore_s3 } // neamespace tensorstore From f156b4da1ea6d18348e85195cabd145b608e098c Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Wed, 19 Jun 2024 16:33:29 +0200 Subject: [PATCH 42/48] xsgetn fixes --- tensorstore/kvstore/s3_sdk/cord_streambuf.cc | 30 ++++++++------ .../kvstore/s3_sdk/cord_streambuf_test.cc | 39 ++++++++++++++++++- 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf.cc index ce24d24eb..70debbead 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf.cc +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf.cc @@ -111,21 +111,29 @@ CordStreamBuf::int_type CordStreamBuf::overflow(int_type ch) { // Bulk get operation streamsize CordStreamBuf::xsgetn(char * s, streamsize count) { - // Not reading or no more Cord data + // Not reading if(!(mode_ & ios_base::in)) return 0; - if(read_chunk_ == cord_.Chunks().end()) return 0; - auto bytes_to_read = std::min(read_chunk_->size(), count); - for(streamsize i = 0; i < bytes_to_read; ++i) s[i] = read_chunk_->operator[](i); - assert(gptr() + bytes_to_read <= egptr()); - if(gptr() + bytes_to_read < egptr()) { - setg(eback(), gptr() + bytes_to_read, egptr()); - } else { - if(++read_chunk_ != cord_.Chunks().end()) { + streamsize bytes_read = 0; + + while(bytes_read < count && read_chunk_ != cord_.Chunks().end()) { + assert(read_chunk_->size() == egptr() - eback()); // invariant + auto bytes_to_read = std::min(gremaining(), count - bytes_read); + for(streamsize i = 0, consumed = gconsumed(); i < bytes_to_read; ++i) { + s[bytes_read + i] = read_chunk_->operator[](consumed + i); + } + if(gptr() + bytes_to_read < egptr()) { + // Data remains in the get area + setg(eback(), gptr() + bytes_to_read, egptr()); + } else if(++read_chunk_ != cord_.Chunks().end()) { + // Initialise get area for next iteration char_type * data = const_cast(read_chunk_->data()); setg(data, data, data + read_chunk_->size()); } - } - return bytes_to_read; + + bytes_read += bytes_to_read; + }; + + return bytes_read; } // Handle buffer underflow. diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc index 96cd3542a..eda73260d 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf_test.cc @@ -16,6 +16,7 @@ #include #include +#include #include "absl/strings/cord.h" @@ -208,8 +209,8 @@ TEST(CordStreamBufTest, GetEntireStreamBuf) { EXPECT_TRUE(is.eof()); } -/// Test seeking within the CordStreamBuf -TEST(CordStreamBufTest, ReadSeek) { +/// Test get seeking within the CordStreamBuf +TEST(CordStreamBufTest, GetSeek) { auto is = DefaultUnderlyingStream( MakeUnique(kAwsTag, ThreeBufferCord())); @@ -234,4 +235,38 @@ TEST(CordStreamBufTest, ReadSeek) { EXPECT_TRUE(is.eof()); } +/// Test read seeking within the CordStreamBuf +/// exercises xsgetn +TEST(CordStreamBuftest, ReadSeek) { + auto is = DefaultUnderlyingStream( + MakeUnique(kAwsTag, ThreeBufferCord())); + + is.seekg(5); + + { + char result[kBufferSize] = {0x00}; + EXPECT_TRUE(is.read(result, kBufferSize)); + auto expected = std::string(kBufferSize - 5, '1') + std::string(5, '2'); + EXPECT_EQ(std::string_view(result, kBufferSize), expected); + EXPECT_EQ(is.tellg(), 5 + kBufferSize); + } + + { + char result[kBufferSize] = {0x00}; + EXPECT_TRUE(is.read(result, kBufferSize)); + auto expected = std::string(kBufferSize - 5, '2') + std::string(5, '3'); + EXPECT_EQ(std::string_view(result, kBufferSize), expected); + EXPECT_EQ(is.tellg(), 5 + 2 * kBufferSize); + } + + { + char result[kBufferSize] = {0x00}; + EXPECT_FALSE(is.read(result, kBufferSize)); + auto expected = std::string(kBufferSize - 5, '3'); + EXPECT_EQ(std::string_view(result, kBufferSize - 5), expected); + EXPECT_EQ(std::string_view(result + kBufferSize - 5, 5), std::string(5, 0)); + EXPECT_EQ(is.tellg(), -1); + } +} + } // namespace \ No newline at end of file From 85b44d9724a2eb8be7b1adea3e89c41c6ca48a58 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Thu, 20 Jun 2024 10:40:44 +0200 Subject: [PATCH 43/48] MoveCord -> DetachCord, TakeCord -> AssignCord --- tensorstore/kvstore/s3_sdk/cord_streambuf.cc | 5 +++-- tensorstore/kvstore/s3_sdk/cord_streambuf.h | 4 ++-- tensorstore/kvstore/s3_sdk/s3_context.cc | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf.cc b/tensorstore/kvstore/s3_sdk/cord_streambuf.cc index 70debbead..d758de7f5 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf.cc +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf.cc @@ -15,6 +15,7 @@ #include "tensorstore/kvstore/s3_sdk/cord_streambuf.h" #include +#include #include #include "absl/strings/cord.h" @@ -52,7 +53,7 @@ CordStreamBuf::CordStreamBuf(Cord && cord) : } } -Cord CordStreamBuf::MoveCord() { +Cord CordStreamBuf::DetachCord() { Cord result; std::swap(result, cord_); read_chunk_ = cord_.Chunks().begin(); @@ -62,7 +63,7 @@ Cord CordStreamBuf::MoveCord() { return result; } -void CordStreamBuf::TakeCord(Cord && cord) { +void CordStreamBuf::AssignCord(Cord && cord) { setg(nullptr, nullptr, nullptr); setp(nullptr, nullptr); diff --git a/tensorstore/kvstore/s3_sdk/cord_streambuf.h b/tensorstore/kvstore/s3_sdk/cord_streambuf.h index d2856a17f..1b5aa6f7b 100644 --- a/tensorstore/kvstore/s3_sdk/cord_streambuf.h +++ b/tensorstore/kvstore/s3_sdk/cord_streambuf.h @@ -47,10 +47,10 @@ class CordStreamBuf : public std::basic_streambuf { const absl::Cord & GetCord() const { return cord_; } // Returns the underlying Cord, resetting the underlying stream - absl::Cord MoveCord(); + absl::Cord DetachCord(); // Takes the supplied Cord as the underlying Cord, // resetting the underlying stream - void TakeCord(absl::Cord && cord); + void AssignCord(absl::Cord && cord); protected: // Bulk put operation diff --git a/tensorstore/kvstore/s3_sdk/s3_context.cc b/tensorstore/kvstore/s3_sdk/s3_context.cc index 97877db9f..8c1af8459 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context.cc @@ -94,7 +94,7 @@ class CustomHttpClient : public AwsHttpClient { // Fast path, extract underlying Cord if (auto cordstreambuf = dynamic_cast(body->rdbuf()); cordstreambuf) { - payload = cordstreambuf->MoveCord(); + payload = cordstreambuf->DetachCord(); // TODO: remove this ABSL_LOG(INFO) << "CordBacked Aws::Http::StandardHttpRequest of size " << payload.size(); } else { @@ -139,7 +139,7 @@ class CustomHttpClient : public AwsHttpClient { // Fast path, directly assign the Cord // TODO: remove this ABSL_LOG(INFO) << "CordBacked Aws::Http::StandardHttpResponse of size " << ts_response.payload.size(); - cordstreambuf->TakeCord(std::move(ts_response.payload)); + cordstreambuf->AssignCord(std::move(ts_response.payload)); } else { body << ts_response.payload; } From 62397fe972b9694887ad8b0a43ad9fd40279531e Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Thu, 20 Jun 2024 11:13:56 +0200 Subject: [PATCH 44/48] Warn on copies of large Http Request/Response bodies --- tensorstore/kvstore/s3_sdk/s3_context.cc | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/s3_context.cc b/tensorstore/kvstore/s3_sdk/s3_context.cc index 8c1af8459..f5eb64fb5 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context.cc @@ -58,6 +58,7 @@ namespace { static constexpr char kAwsTag[] = "AWS"; static constexpr char kUserAgentHeader[] = "user-agent"; +static constexpr std::size_t k1MB = 1024 * 1024; // Context guarded by mutex absl::Mutex context_mu_; @@ -96,7 +97,6 @@ class CustomHttpClient : public AwsHttpClient { cordstreambuf) { payload = cordstreambuf->DetachCord(); // TODO: remove this - ABSL_LOG(INFO) << "CordBacked Aws::Http::StandardHttpRequest of size " << payload.size(); } else { // Slow path, copy characters off the stream into Cord std::vector buffer(absl::CordBuffer::kDefaultLimit); @@ -104,6 +104,11 @@ class CustomHttpClient : public AwsHttpClient { while (body->read(buffer.data(), buffer.size()) || body->gcount() > 0) { payload.Append(absl::string_view(buffer.data(), body->gcount())); } + + if(payload.size() > k1MB) { + ABSL_LOG(WARNING) << "Copied HttpRequest body of size " << payload.size() << " from iostream"; + } + // Reset stream body->clear(); body->seekg(original); @@ -137,10 +142,12 @@ class CustomHttpClient : public AwsHttpClient { if(auto cordstreambuf = dynamic_cast(body.rdbuf()); cordstreambuf) { // Fast path, directly assign the Cord - // TODO: remove this - ABSL_LOG(INFO) << "CordBacked Aws::Http::StandardHttpResponse of size " << ts_response.payload.size(); cordstreambuf->AssignCord(std::move(ts_response.payload)); } else { + if(ts_response.payload.size() > k1MB) { + ABSL_LOG(WARNING) << "Copied HttpResponse body of size " << ts_response.payload.size() << " to iostream"; + } + body << ts_response.payload; } } From 381a62c16837ee32a3b2e15573498f44a68e438c Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Thu, 20 Jun 2024 11:14:32 +0200 Subject: [PATCH 45/48] Comment grammar --- tensorstore/kvstore/s3_sdk/s3_context.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/s3_context.cc b/tensorstore/kvstore/s3_sdk/s3_context.cc index f5eb64fb5..446e1750b 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context.cc @@ -65,9 +65,9 @@ absl::Mutex context_mu_; std::weak_ptr context_ ABSL_GUARDED_BY(context_mu_); /// Provides a custom Aws HttpClient. -/// Overrides the Aws::HttpClient::MakeRequest to convert AWS HttpRequests -/// into tensorstore HttpRequests which are issued on the tensorstore -/// default HTTP transport. The returned tensorstore HttpResponse is +/// Overrides Aws::HttpClient::MakeRequest to convert AWS HttpRequests +/// into tensorstore HttpRequests which are issued on the default tensorstore +/// HTTP transport. The returned tensorstore HttpResponse is // converted into an AWS HttpResponse class CustomHttpClient : public AwsHttpClient { public: From ad453e530aa772b694879849381648dc628bafd3 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Thu, 20 Jun 2024 11:28:36 +0200 Subject: [PATCH 46/48] Update logging statements --- tensorstore/kvstore/s3_sdk/s3_context.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorstore/kvstore/s3_sdk/s3_context.cc b/tensorstore/kvstore/s3_sdk/s3_context.cc index 446e1750b..562090d69 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context.cc @@ -301,7 +301,7 @@ std::shared_ptr GetAwsContext() { ABSL_LOG(INFO) << "Initialising AWS SDK API"; Aws::InitAPI(options); - ABSL_LOG(INFO) << "Done initialising AWS SDK API"; + ABSL_LOG(INFO) << "AWS SDK API Initialised"; auto provider = Aws::MakeShared(kAwsTag); @@ -313,7 +313,7 @@ std::shared_ptr GetAwsContext() { absl::MutexLock lock(&context_mu_); ABSL_LOG(INFO) << "Shutting down AWS SDK API"; Aws::ShutdownAPI(ctx->options); - ABSL_LOG(INFO) << "Done shutting down AWS SDK API"; + ABSL_LOG(INFO) << "AWS SDK API Shutdown"; delete ctx; }); context_ = ctx; From da0b1b6235bdc5176d4b1b6aad57c4b7fb19365a Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Tue, 25 Jun 2024 09:55:38 +0200 Subject: [PATCH 47/48] workspace whitespace --- third_party/com_github_aws_c_event_stream/workspace.bzl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/third_party/com_github_aws_c_event_stream/workspace.bzl b/third_party/com_github_aws_c_event_stream/workspace.bzl index 19aaaae7e..9cef3c085 100644 --- a/third_party/com_github_aws_c_event_stream/workspace.bzl +++ b/third_party/com_github_aws_c_event_stream/workspace.bzl @@ -24,7 +24,8 @@ def repo(): urls = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/awslabs/aws-c-event-stream/archive/v0.1.4.tar.gz", "https://github.com/awslabs/aws-c-event-stream/archive/v0.1.4.tar.gz", - ], build_file = Label("//third_party:com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel"), + ], + build_file = Label("//third_party:com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel"), system_build_file = Label("//third_party:com_github_aws_c_event_stream/system.BUILD.bazel"), cmake_name = "aws_c_event_stream", cmake_target_mapping = { From 3883f04761f8cd68ab3afde31637efbddd39e5fb Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Wed, 3 Jul 2024 13:23:58 +0200 Subject: [PATCH 48/48] Update to more recent version of the AWS C, CRT and C++ SDK's --- tensorstore/kvstore/s3_sdk/localstack_test.cc | 6 +- tensorstore/kvstore/s3_sdk/s3_context.cc | 26 +- .../aws_c_auth.BUILD.bazel | 21 ++ .../com_github_aws_c_auth/workspace.bzl | 34 +++ .../aws_c_cal.BUILD.bazel | 30 +++ .../com_github_aws_c_cal/workspace.bzl | 35 +++ .../aws_c_common.BUILD.bazel | 78 ++++-- .../com_github_aws_c_common/workspace.bzl | 7 +- .../aws_c_compression.BUILD.bazel | 20 ++ .../workspace.bzl | 34 +++ .../aws_c_event_stream.BUILD.bazel | 1 + .../workspace.bzl | 7 +- .../aws_c_http.BUILD.bazel | 24 ++ .../com_github_aws_c_http/workspace.bzl | 34 +++ .../com_github_aws_c_io/aws_c_io.BUILD.bazel | 45 ++++ third_party/com_github_aws_c_io/workspace.bzl | 35 +++ .../aws_c_iot.BUILD.bazel | 30 +++ .../com_github_aws_c_iot/workspace.bzl | 34 +++ .../aws_c_mqtt.BUILD.bazel | 22 ++ .../com_github_aws_c_mqtt/workspace.bzl | 34 +++ .../com_github_aws_c_s3/aws_c_s3.BUILD.bazel | 22 ++ third_party/com_github_aws_c_s3/workspace.bzl | 34 +++ .../aws_c_sdkutils.BUILD.bazel | 20 ++ .../com_github_aws_c_sdkutils/workspace.bzl | 34 +++ .../aws_checksums.BUILD.bazel | 28 +-- .../com_github_aws_checksums/workspace.bzl | 7 +- .../aws_cpp_crt.BUILD.bazel | 44 ++++ .../com_github_aws_cpp_crt/workspace.bzl | 34 +++ .../aws_cpp_sdk.BUILD.bazel | 231 ++++++++---------- .../com_github_aws_cpp_sdk/workspace.bzl | 14 +- .../com_github_s2n_tls/s2n_tls.BUILD.bazel | 25 ++ third_party/com_github_s2n_tls/workspace.bzl | 35 +++ third_party/third_party.bzl | 22 ++ 33 files changed, 921 insertions(+), 186 deletions(-) create mode 100644 third_party/com_github_aws_c_auth/aws_c_auth.BUILD.bazel create mode 100644 third_party/com_github_aws_c_auth/workspace.bzl create mode 100644 third_party/com_github_aws_c_cal/aws_c_cal.BUILD.bazel create mode 100644 third_party/com_github_aws_c_cal/workspace.bzl create mode 100644 third_party/com_github_aws_c_compression/aws_c_compression.BUILD.bazel create mode 100644 third_party/com_github_aws_c_compression/workspace.bzl create mode 100644 third_party/com_github_aws_c_http/aws_c_http.BUILD.bazel create mode 100644 third_party/com_github_aws_c_http/workspace.bzl create mode 100644 third_party/com_github_aws_c_io/aws_c_io.BUILD.bazel create mode 100644 third_party/com_github_aws_c_io/workspace.bzl create mode 100644 third_party/com_github_aws_c_iot/aws_c_iot.BUILD.bazel create mode 100644 third_party/com_github_aws_c_iot/workspace.bzl create mode 100644 third_party/com_github_aws_c_mqtt/aws_c_mqtt.BUILD.bazel create mode 100644 third_party/com_github_aws_c_mqtt/workspace.bzl create mode 100644 third_party/com_github_aws_c_s3/aws_c_s3.BUILD.bazel create mode 100644 third_party/com_github_aws_c_s3/workspace.bzl create mode 100644 third_party/com_github_aws_c_sdkutils/aws_c_sdkutils.BUILD.bazel create mode 100644 third_party/com_github_aws_c_sdkutils/workspace.bzl create mode 100644 third_party/com_github_aws_cpp_crt/aws_cpp_crt.BUILD.bazel create mode 100644 third_party/com_github_aws_cpp_crt/workspace.bzl create mode 100644 third_party/com_github_s2n_tls/s2n_tls.BUILD.bazel create mode 100644 third_party/com_github_s2n_tls/workspace.bzl diff --git a/tensorstore/kvstore/s3_sdk/localstack_test.cc b/tensorstore/kvstore/s3_sdk/localstack_test.cc index 107b7dd59..d72274825 100644 --- a/tensorstore/kvstore/s3_sdk/localstack_test.cc +++ b/tensorstore/kvstore/s3_sdk/localstack_test.cc @@ -300,7 +300,11 @@ class LocalStackFixture : public ::testing::Test { auto cfg = Aws::Client::ClientConfiguration{}; cfg.endpointOverride = endpoint_url(); cfg.region = Region(); - auto create_client = std::make_shared(Aws::Auth::AWSCredentials(), cfg); + auto create_client = std::make_shared( + Aws::Auth::AWSCredentials(), + cfg, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Always, + false); auto create_request = Aws::S3::Model::CreateBucketRequest{}; create_request.SetBucket(Bucket()); diff --git a/tensorstore/kvstore/s3_sdk/s3_context.cc b/tensorstore/kvstore/s3_sdk/s3_context.cc index 562090d69..d52f090ec 100644 --- a/tensorstore/kvstore/s3_sdk/s3_context.cc +++ b/tensorstore/kvstore/s3_sdk/s3_context.cc @@ -16,6 +16,8 @@ #include #include +#include +#include #include #include @@ -102,7 +104,7 @@ class CustomHttpClient : public AwsHttpClient { std::vector buffer(absl::CordBuffer::kDefaultLimit); std::streampos original = body->tellg(); while (body->read(buffer.data(), buffer.size()) || body->gcount() > 0) { - payload.Append(absl::string_view(buffer.data(), body->gcount())); + payload.Append(std::string_view(buffer.data(), body->gcount())); } if(payload.size() > k1MB) { @@ -228,23 +230,35 @@ class AWSLogSystem : public AwsLogSystemInterface { void Log(AwsLogLevel log_level, const char* tag, const char* format, ...) override; + // Overridden, but prefer the safer LogStream + void vaLog(AwsLogLevel log_level, const char* tag, + const char* format, va_list args) override; + private: - void LogMessage(AwsLogLevel log_level, const std::string & message); + void LogMessage(AwsLogLevel log_level, std::string_view message); AwsLogLevel log_level_; }; - void AWSLogSystem::Log(AwsLogLevel log_level, const char* tag, const char* format, ...) { - char buffer[256]; + // https://www.open-std.org/JTC1/SC22/WG14/www/docs/n1570.pdf + // Section 7.16 va_list args; va_start(args, format); - vsnprintf(buffer, 256, format, args); + vaLog(log_level, tag, format, args); va_end(args); +} + +void AWSLogSystem::vaLog(AwsLogLevel log_level, const char* tag, + const char* format, va_list args) { + // https://www.open-std.org/JTC1/SC22/WG14/www/docs/n1570.pdf + // Section 7.16 + char buffer[256]; + vsnprintf(buffer, 256, format, args); LogMessage(log_level, buffer); } -void AWSLogSystem::LogMessage(AwsLogLevel log_level, const std::string & message) { +void AWSLogSystem::LogMessage(AwsLogLevel log_level, std::string_view message) { switch(log_level) { case AwsLogLevel::Info: ABSL_LOG(INFO) << message; diff --git a/third_party/com_github_aws_c_auth/aws_c_auth.BUILD.bazel b/third_party/com_github_aws_c_auth/aws_c_auth.BUILD.bazel new file mode 100644 index 000000000..e1fb0fde2 --- /dev/null +++ b/third_party/com_github_aws_c_auth/aws_c_auth.BUILD.bazel @@ -0,0 +1,21 @@ +# Description: +# AWS C Auth + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +cc_library( + name = "aws_c_auth", + srcs = glob([ + "include/aws/auth/**/*.h", + "source/*.c", + ]), + includes = ["include"], + deps = [ + "@com_github_aws_c_http//:aws_c_http", + "@com_github_aws_c_sdkutils//:aws_c_sdkutils", + ], +) \ No newline at end of file diff --git a/third_party/com_github_aws_c_auth/workspace.bzl b/third_party/com_github_aws_c_auth/workspace.bzl new file mode 100644 index 000000000..b9e2f0604 --- /dev/null +++ b/third_party/com_github_aws_c_auth/workspace.bzl @@ -0,0 +1,34 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_aws_c_auth", + sha256 = "f249a12a6ac319e929c005fb7efd5534c83d3af3a3a53722626ff60a494054bb", + strip_prefix = "aws-c-auth-0.7.22", + urls = [ + "https://github.com/awslabs/aws-c-auth/archive/refs/tags/v0.7.22.tar.gz", + ], + build_file = Label("//third_party:com_github_aws_c_auth/aws_c_auth.BUILD.bazel"), + system_build_file = Label("//third_party:com_github_aws_c_auth/system.BUILD.bazel"), + cmake_name = "aws_c_auth", + cmake_target_mapping = { + "@com_github_aws_c_auth//:aws_c_auth": "aws_c_auth::aws_c_auth", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/com_github_aws_c_cal/aws_c_cal.BUILD.bazel b/third_party/com_github_aws_c_cal/aws_c_cal.BUILD.bazel new file mode 100644 index 000000000..e1b5d8f26 --- /dev/null +++ b/third_party/com_github_aws_c_cal/aws_c_cal.BUILD.bazel @@ -0,0 +1,30 @@ +# Description: +# AWS s2n tls + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "aws_c_cal", + srcs = glob([ + "include/aws/cal/*.h", + "include/aws/cal/private/*.h", + "source/*.c" + ]) + select({ + "@platforms//os:windows": glob([ + "source/windows/*.c", + ]), + "@platforms//os:linux": glob([ + "source/unix/*.c", + ]), + "@platforms//os:osx": glob([ + "source/darwin/*.c", + ]) + }), + includes = ["include"], + deps = [ + "@com_github_aws_c_common//:aws_c_common", + "@com_google_boringssl//:crypto" + ] +) \ No newline at end of file diff --git a/third_party/com_github_aws_c_cal/workspace.bzl b/third_party/com_github_aws_c_cal/workspace.bzl new file mode 100644 index 000000000..14353a4ff --- /dev/null +++ b/third_party/com_github_aws_c_cal/workspace.bzl @@ -0,0 +1,35 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_aws_c_cal", + sha256 = "9c51afbece6aa7a4a3e40b99c242884c1744d7f949a3f720cea41d247ac2d06a", + strip_prefix = "aws-c-cal-0.7.0", + urls = [ + "https://github.com/awslabs/aws-c-cal/archive/refs/tags/v0.7.0.tar.gz", + ], + build_file = Label("//third_party:com_github_aws_c_cal/aws_c_cal.BUILD.bazel"), + #system_build_file = Label("//third_party:com_github_aws_c_cal/system.BUILD.bazel"), + cmake_name = "aws_c_cal", + cmake_target_mapping = { + "@com_github_aws_c_cal//:aws_c_cal": "aws_c_cal::aws_c_cal", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel b/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel index 50020212b..f5e509f87 100644 --- a/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel +++ b/third_party/com_github_aws_c_common/aws_c_common.BUILD.bazel @@ -1,32 +1,92 @@ # Description: # AWS C Common +load("@bazel_skylib//rules:write_file.bzl", "write_file") + package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) +write_file( + name = "write_config_h", + out = "include/aws/common/config.h", + newline = "auto", + + content = [ + "#ifndef AWS_COMMON_CONFIG_H", + "#define AWS_COMMON_CONFIG_H", + "", + "#define AWS_HAVE_GCC_OVERFLOW_MATH_EXTENSIONS", + "#define AWS_HAVE_GCC_INLINE_ASM", + "#define AWS_HAVE_POSIX_LARGE_FILE_SUPPORT", + ] + select({ + "@platforms//os:linux": [ + "#define AWS_HAVE_EXECINFO", + "#define AWS_HAVE_LINUX_IF_LINK_H", + ], + "@platforms//os:windows": [ + "#define AWS_HAVE_WINAPI_DESKTOP", + ], + }) + [ + "#undef AWS_HAVE_WINAPI_DESKTOP", + # TODO: improve with logic from AwsSIMD.cmake + # but this strictly requires configure style tests... + "#undef AWS_USE_CPU_EXTENSIONS", + "#undef AWS_HAVE_MSVC_INTRINSICS_X64", + "#undef AWS_HAVE_AVX2_INTRINSICS", + "#undef AWS_HAVE_AVX512_INTRINSICS", + "#undef AWS_HAVE_MM256_EXTRACT_EPI64", + "#undef AWS_HAVE_CLMUL", + "#undef AWS_HAVE_ARM32_CRC", + "#undef AWS_HAVE_ARMv8_1", + "#undef AWS_ARCH_ARM64", + "#undef AWS_ARCH_INTEL", + "#undef AWS_ARCH_INTEL_X64", + "", + "#endif" + ] +) + cc_library( name = "aws_c_common", srcs = glob([ "include/aws/common/*.h", + "include/aws/common/external/*.h", "include/aws/common/private/*.h", "source/*.c", + "source/arch/generic/*.c", + "source/external/**/*.h", + "source/external/**/*.c", ]) + select({ "@platforms//os:windows": glob([ "source/windows/*.c", ]), - "//conditions:default": glob([ + "@platforms//os:linux": glob([ + "source/linux/*.c", "source/posix/*.c", ]), + "@platforms//os:osx": glob([ + "source/posix/*.c", + ]) }), - hdrs = [ - "include/aws/common/config.h", + hdrs = glob([ + "include/aws/common/*.h", + "include/aws/common/private/*.h", + ]) + [ + ":write_config_h" + ], + defines = [ + # TODO: improve this with logic from AwsThreadAffinity.cmake + "AWS_AFFINITY_METHOD=AWS_AFFINITY_METHOD_NONE", + # Disable macro tracing API + "INTEL_NO_ITTNOTIFY_API", ], - defines = [], includes = [ "include", + "source/external", + "source/external/libcbor" ], textual_hdrs = glob([ "include/**/*.inl", @@ -34,13 +94,3 @@ cc_library( deps = [], ) -genrule( - name = "config_h", - srcs = [ - "include/aws/common/config.h.in", - ], - outs = [ - "include/aws/common/config.h", - ], - cmd = "sed 's/cmakedefine/undef/g' $< > $@", -) \ No newline at end of file diff --git a/third_party/com_github_aws_c_common/workspace.bzl b/third_party/com_github_aws_c_common/workspace.bzl index 578fef844..6706bda5a 100644 --- a/third_party/com_github_aws_c_common/workspace.bzl +++ b/third_party/com_github_aws_c_common/workspace.bzl @@ -19,11 +19,10 @@ def repo(): maybe( third_party_http_archive, name = "com_github_aws_c_common", - sha256 = "01c2a58553a37b3aa5914d9e0bf7bf14507ff4937bc5872a678892ca20fcae1f", - strip_prefix = "aws-c-common-0.4.29", + sha256 = "adf838daf6a60aa31268522105b03262d745f529bc981d3ac665424133d6f91b", + strip_prefix = "aws-c-common-0.9.23", urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/awslabs/aws-c-common/archive/v0.4.29.tar.gz", - "https://github.com/awslabs/aws-c-common/archive/v0.4.29.tar.gz", + "https://github.com/awslabs/aws-c-common/archive/v0.9.23.tar.gz", ], build_file = Label("//third_party:com_github_aws_c_common/aws_c_common.BUILD.bazel"), system_build_file = Label("//third_party:com_github_aws_c_common/system.BUILD.bazel"), diff --git a/third_party/com_github_aws_c_compression/aws_c_compression.BUILD.bazel b/third_party/com_github_aws_c_compression/aws_c_compression.BUILD.bazel new file mode 100644 index 000000000..7095a3a90 --- /dev/null +++ b/third_party/com_github_aws_c_compression/aws_c_compression.BUILD.bazel @@ -0,0 +1,20 @@ +# Description: +# AWS C Compression + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +cc_library( + name = "aws_c_compression", + srcs = glob([ + "include/aws/compression/**/*.h", + "source/*.c", + ]), + includes = ["include"], + deps = [ + "@com_github_aws_c_common//:aws_c_common", + ], +) \ No newline at end of file diff --git a/third_party/com_github_aws_c_compression/workspace.bzl b/third_party/com_github_aws_c_compression/workspace.bzl new file mode 100644 index 000000000..a6330ee37 --- /dev/null +++ b/third_party/com_github_aws_c_compression/workspace.bzl @@ -0,0 +1,34 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_aws_c_compression", + sha256 = "517c361f3b7fffca08efd5ad251a20489794f056eab0dfffacc6d5b341df8e86", + strip_prefix = "aws-c-compression-0.2.18", + urls = [ + "https://github.com/awslabs/aws-c-compression/archive/v0.2.18.tar.gz", + ], + build_file = Label("//third_party:com_github_aws_c_compression/aws_c_compression.BUILD.bazel"), + system_build_file = Label("//third_party:com_github_aws_c_compression/system.BUILD.bazel"), + cmake_name = "aws_c_compression", + cmake_target_mapping = { + "@com_github_aws_c_compression//:aws_c_compression": "aws_c_compression::aws_c_compression", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel b/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel index 4db5a26a6..dfa376839 100644 --- a/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel +++ b/third_party/com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel @@ -21,5 +21,6 @@ cc_library( deps = [ "@com_github_aws_c_common//:aws_c_common", "@com_github_aws_checksums//:aws_checksums", + "@com_github_aws_c_io//:aws_c_io", ], ) \ No newline at end of file diff --git a/third_party/com_github_aws_c_event_stream/workspace.bzl b/third_party/com_github_aws_c_event_stream/workspace.bzl index 9cef3c085..917d432a5 100644 --- a/third_party/com_github_aws_c_event_stream/workspace.bzl +++ b/third_party/com_github_aws_c_event_stream/workspace.bzl @@ -19,11 +19,10 @@ def repo(): maybe( third_party_http_archive, name = "com_github_aws_c_event_stream", - sha256 = "31d880d1c868d3f3df1e1f4b45e56ac73724a4dc3449d04d47fc0746f6f077b6", - strip_prefix = "aws-c-event-stream-0.1.4", + sha256 = "c98b8fa05c2ca10aacfce7327b92a84669c2da95ccb8e7d7b3e3285fcec8beee", + strip_prefix = "aws-c-event-stream-0.4.2", urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/awslabs/aws-c-event-stream/archive/v0.1.4.tar.gz", - "https://github.com/awslabs/aws-c-event-stream/archive/v0.1.4.tar.gz", + "https://github.com/awslabs/aws-c-event-stream/archive/v0.4.2.tar.gz", ], build_file = Label("//third_party:com_github_aws_c_event_stream/aws_c_event_stream.BUILD.bazel"), system_build_file = Label("//third_party:com_github_aws_c_event_stream/system.BUILD.bazel"), diff --git a/third_party/com_github_aws_c_http/aws_c_http.BUILD.bazel b/third_party/com_github_aws_c_http/aws_c_http.BUILD.bazel new file mode 100644 index 000000000..81fa1d460 --- /dev/null +++ b/third_party/com_github_aws_c_http/aws_c_http.BUILD.bazel @@ -0,0 +1,24 @@ +# Description: +# AWS C HTTP + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +cc_library( + name = "aws_c_http", + srcs = glob([ + "include/aws/http/**/*.h", + "source/*.c", + ]), + textual_hdrs = glob([ + "include/aws/http/**/*.def", + ]), + includes = ["include"], + deps = [ + "@com_github_aws_c_io//:aws_c_io", + "@com_github_aws_c_compression//:aws_c_compression", + ], +) \ No newline at end of file diff --git a/third_party/com_github_aws_c_http/workspace.bzl b/third_party/com_github_aws_c_http/workspace.bzl new file mode 100644 index 000000000..bbf05f3b0 --- /dev/null +++ b/third_party/com_github_aws_c_http/workspace.bzl @@ -0,0 +1,34 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_aws_c_http", + sha256 = "a76ba75e59e1ac169df3ec00c0d1c453db1a4db85ee8acd3282a85ee63d6b31c", + strip_prefix = "aws-c-http-0.8.2", + urls = [ + "https://github.com/awslabs/aws-c-http/archive/refs/tags/v0.8.2.tar.gz", + ], + build_file = Label("//third_party:com_github_aws_c_http/aws_c_http.BUILD.bazel"), + system_build_file = Label("//third_party:com_github_aws_c_http/system.BUILD.bazel"), + cmake_name = "aws_c_http", + cmake_target_mapping = { + "@com_github_aws_c_http//:aws_c_http": "aws_c_http::aws_c_http", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/com_github_aws_c_io/aws_c_io.BUILD.bazel b/third_party/com_github_aws_c_io/aws_c_io.BUILD.bazel new file mode 100644 index 000000000..5f2c1e176 --- /dev/null +++ b/third_party/com_github_aws_c_io/aws_c_io.BUILD.bazel @@ -0,0 +1,45 @@ +# Description: +# AWS s2n tls + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "aws_c_io", + srcs = glob([ + "include/aws/io/**/*.h", + "source/*.h", + "source/*.c", + "source/pkcs11/**/*.h", + "source/pkcs11/**/*.c", + "source/s2n/*.h", + "source/s2n/*.c", + ]) + select({ + "@platforms//os:windows": glob([ + "source/windows/*.c", + ]), + "@platforms//os:linux": glob([ + "source/linux/*.c", + "source/posix/*.c", + ]), + "@platforms//os:osx": glob([ + "source/bsd/*.c", + "source/darwin/*.c", + "source/posix/*.c", + ]) + }), + defines = ["USE_S2N"], +# defines = [] + select({ +# "@platforms//os:linux": ["BYO_CRYPTO"], +# "//conditions:default": ["USE_S2N"], +# }), + + includes = ["include"], + deps = [ + "@com_github_aws_c_common//:aws_c_common", + "@com_github_aws_c_cal//:aws_c_cal", + #"@com_github_s2n_tls//:s2n_tls", + "@com_google_boringssl//:crypto, + ] +) \ No newline at end of file diff --git a/third_party/com_github_aws_c_io/workspace.bzl b/third_party/com_github_aws_c_io/workspace.bzl new file mode 100644 index 000000000..e0c59a360 --- /dev/null +++ b/third_party/com_github_aws_c_io/workspace.bzl @@ -0,0 +1,35 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_aws_c_io", + sha256 = "3a3b7236f70209ac12b5bafa7dd81b75cc68b691a0aa0686d6d3b7e4bbe5fbc9", + strip_prefix = "aws-c-io-0.14.9", + urls = [ + "https://github.com/awslabs/aws-c-io/archive/refs/tags/v0.14.9.tar.gz", + ], + build_file = Label("//third_party:com_github_aws_c_io/aws_c_io.BUILD.bazel"), + #system_build_file = Label("//third_party:com_github_aws_c_io/system.BUILD.bazel"), + cmake_name = "aws_c_io", + cmake_target_mapping = { + "@com_github_aws_c_io//:aws_c_io": "aws_c_io::aws_c_io", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/com_github_aws_c_iot/aws_c_iot.BUILD.bazel b/third_party/com_github_aws_c_iot/aws_c_iot.BUILD.bazel new file mode 100644 index 000000000..164e37d0e --- /dev/null +++ b/third_party/com_github_aws_c_iot/aws_c_iot.BUILD.bazel @@ -0,0 +1,30 @@ +# Description: +# AWS C IOT + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +cc_library( + name = "aws_c_iot", + srcs = glob([ + "include/aws/iotdevice/**/*.h", + "source/*.c", + ]) + select({ + "@platforms//os:linux": glob([ + "source/linux/*.c", + ]), + "@platforms//os:windows": glob([ + "source/windows/*.c", + ]), + "@platforms//os:osx": glob([ + "source/apple/*.c", + ]), + }), + includes = ["include"], + deps = [ + "@com_github_aws_c_mqtt//:aws_c_mqtt", + ], +) \ No newline at end of file diff --git a/third_party/com_github_aws_c_iot/workspace.bzl b/third_party/com_github_aws_c_iot/workspace.bzl new file mode 100644 index 000000000..6889c80d2 --- /dev/null +++ b/third_party/com_github_aws_c_iot/workspace.bzl @@ -0,0 +1,34 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_aws_c_iot", + sha256 = "6b9ae985d9b019304e86e49fc6da738ed5fff3b2778ed3617db551f1e033cadf", + strip_prefix = "aws-c-iot-0.1.21", + urls = [ + "https://github.com/awslabs/aws-c-iot/archive/refs/tags/v0.1.21.tar.gz", + ], + build_file = Label("//third_party:com_github_aws_c_iot/aws_c_iot.BUILD.bazel"), + system_build_file = Label("//third_party:com_github_aws_c_iot/system.BUILD.bazel"), + cmake_name = "aws_c_iot", + cmake_target_mapping = { + "@com_github_aws_c_iot//:aws_c_iot": "aws_c_iot::aws_c_iot", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/com_github_aws_c_mqtt/aws_c_mqtt.BUILD.bazel b/third_party/com_github_aws_c_mqtt/aws_c_mqtt.BUILD.bazel new file mode 100644 index 000000000..95e9b22ff --- /dev/null +++ b/third_party/com_github_aws_c_mqtt/aws_c_mqtt.BUILD.bazel @@ -0,0 +1,22 @@ +# Description: +# AWS C MQTT + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +cc_library( + name = "aws_c_mqtt", + srcs = glob([ + "include/aws/mqtt/**/*.h", + "source/*.c", + "source/v5/*.c", + ]), + includes = ["include"], + deps = [ + "@com_github_aws_c_http//:aws_c_http", + "@com_github_aws_c_io//:aws_c_io", + ], +) \ No newline at end of file diff --git a/third_party/com_github_aws_c_mqtt/workspace.bzl b/third_party/com_github_aws_c_mqtt/workspace.bzl new file mode 100644 index 000000000..e79fecc30 --- /dev/null +++ b/third_party/com_github_aws_c_mqtt/workspace.bzl @@ -0,0 +1,34 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_aws_c_mqtt", + sha256 = "63c402b8b81b107e5c1b9b6ae0065bc025b6ad4347518bf30fbd958f999e037e", + strip_prefix = "aws-c-mqtt-0.10.1", + urls = [ + "https://github.com/awslabs/aws-c-mqtt/archive/refs/tags/v0.10.1.tar.gz", + ], + build_file = Label("//third_party:com_github_aws_c_mqtt/aws_c_mqtt.BUILD.bazel"), + system_build_file = Label("//third_party:com_github_aws_c_mqtt/system.BUILD.bazel"), + cmake_name = "aws_c_mqtt", + cmake_target_mapping = { + "@com_github_aws_c_mqtt//:aws_c_mqtt": "aws_c_mqtt::aws_c_mqtt", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/com_github_aws_c_s3/aws_c_s3.BUILD.bazel b/third_party/com_github_aws_c_s3/aws_c_s3.BUILD.bazel new file mode 100644 index 000000000..e6d237dc1 --- /dev/null +++ b/third_party/com_github_aws_c_s3/aws_c_s3.BUILD.bazel @@ -0,0 +1,22 @@ +# Description: +# AWS C S3 + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +cc_library( + name = "aws_c_s3", + srcs = glob([ + "include/aws/s3/**/*.h", + "source/*.c", + ]), + includes = ["include"], + deps = [ + "@com_github_aws_c_auth//:aws_c_auth", + "@com_github_aws_c_http//:aws_c_http", + "@com_github_aws_checksums//:aws_checksums", + ], +) \ No newline at end of file diff --git a/third_party/com_github_aws_c_s3/workspace.bzl b/third_party/com_github_aws_c_s3/workspace.bzl new file mode 100644 index 000000000..b21eb4631 --- /dev/null +++ b/third_party/com_github_aws_c_s3/workspace.bzl @@ -0,0 +1,34 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_aws_c_s3", + sha256 = "28c03a19e52790cfa66e8d63c610734112edb36cc3c525712f18da4f0990a7b8", + strip_prefix = "aws-c-s3-0.5.10", + urls = [ + "https://github.com/awslabs/aws-c-s3/archive/refs/tags/v0.5.10.tar.gz", + ], + build_file = Label("//third_party:com_github_aws_c_s3/aws_c_s3.BUILD.bazel"), + system_build_file = Label("//third_party:com_github_aws_c_s3/system.BUILD.bazel"), + cmake_name = "aws_c_s3", + cmake_target_mapping = { + "@com_github_aws_c_s3//:aws_c_s3": "aws_c_s3::aws_c_s3", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/com_github_aws_c_sdkutils/aws_c_sdkutils.BUILD.bazel b/third_party/com_github_aws_c_sdkutils/aws_c_sdkutils.BUILD.bazel new file mode 100644 index 000000000..cbdf06ba2 --- /dev/null +++ b/third_party/com_github_aws_c_sdkutils/aws_c_sdkutils.BUILD.bazel @@ -0,0 +1,20 @@ +# Description: +# AWS C SDK Utils + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +cc_library( + name = "aws_c_sdkutils", + srcs = glob([ + "include/aws/sdkutils/**/*.h", + "source/**/*.c", + ]), + includes = ["include"], + deps = [ + "@com_github_aws_c_common//:aws_c_common", + ], +) \ No newline at end of file diff --git a/third_party/com_github_aws_c_sdkutils/workspace.bzl b/third_party/com_github_aws_c_sdkutils/workspace.bzl new file mode 100644 index 000000000..962ee1a32 --- /dev/null +++ b/third_party/com_github_aws_c_sdkutils/workspace.bzl @@ -0,0 +1,34 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_aws_c_sdkutils", + sha256 = "4a818563d7c6636b5b245f5d22d4d7c804fa33fc4ea6976e9c296d272f4966d3", + strip_prefix = "aws-c-sdkutils-0.1.16", + urls = [ + "https://github.com/awslabs/aws-c-sdkutils/archive/refs/tags/v0.1.16.tar.gz", + ], + build_file = Label("//third_party:com_github_aws_c_sdkutils/aws_c_sdkutils.BUILD.bazel"), + system_build_file = Label("//third_party:com_github_aws_c_sdkutils/system.BUILD.bazel"), + cmake_name = "aws_c_sdkutils", + cmake_target_mapping = { + "@com_github_aws_c_sdkutils//:aws_c_sdkutils": "aws_c_sdkutils::aws_c_sdkutils", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel b/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel index 2a2b48dd3..08ec386cb 100644 --- a/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel +++ b/third_party/com_github_aws_checksums/aws_checksums.BUILD.bazel @@ -15,28 +15,10 @@ cc_library( "include/aws/checksums/*.h", "include/aws/checksums/private/*.h", "source/*.c", - ]) + [ - "crc_hw.c", - ], - hdrs = [], - defines = [], + "source/generic/*.c", + ]), includes = ["include"], - deps = [], -) - -write_file( - name = "crc_hw_c", - out = "crc_hw.c", - newline = "auto", - - content = [ - "#include ", - "#include ", - "int aws_checksums_do_cpu_id(int32_t *cpuid) {", - " return 0;", - "}", - "uint32_t aws_checksums_crc32c_hw(const uint8_t *input, int length, uint32_t previousCrc32) {", - " return aws_checksums_crc32c_sw(input, length, previousCrc32);", - "}", + deps = [ + "@com_github_aws_c_common//:aws_c_common", ], -) +) \ No newline at end of file diff --git a/third_party/com_github_aws_checksums/workspace.bzl b/third_party/com_github_aws_checksums/workspace.bzl index d58abb27c..6d8cef89f 100644 --- a/third_party/com_github_aws_checksums/workspace.bzl +++ b/third_party/com_github_aws_checksums/workspace.bzl @@ -19,11 +19,10 @@ def repo(): maybe( third_party_http_archive, name = "com_github_aws_checksums", - sha256 = "6e6bed6f75cf54006b6bafb01b3b96df19605572131a2260fddaf0e87949ced0", - strip_prefix = "aws-checksums-0.1.5", + sha256 = "bdba9d0a8b8330a89c6b8cbc00b9aa14f403d3449b37ff2e0d96d62a7301b2ee", + strip_prefix = "aws-checksums-0.1.18", urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/awslabs/aws-checksums/archive/v0.1.5.tar.gz", - "https://github.com/awslabs/aws-checksums/archive/v0.1.5.tar.gz", + "https://github.com/awslabs/aws-checksums/archive/v0.1.18.tar.gz", ], build_file = Label("//third_party:com_github_aws_checksums/aws_checksums.BUILD.bazel"), system_build_file = Label("//third_party:com_github_aws_checksums/system.BUILD.bazel"), diff --git a/third_party/com_github_aws_cpp_crt/aws_cpp_crt.BUILD.bazel b/third_party/com_github_aws_cpp_crt/aws_cpp_crt.BUILD.bazel new file mode 100644 index 000000000..327dc0430 --- /dev/null +++ b/third_party/com_github_aws_cpp_crt/aws_cpp_crt.BUILD.bazel @@ -0,0 +1,44 @@ +# Description: +# AWS CPP CRT + +load("@bazel_skylib//rules:write_file.bzl", "write_file") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +write_file( + name = "generated_config_h", + out = "include/aws/crt/Config.h", + + content = [ + "#pragma once", + "#define AWS_CRT_CPP_VERSION \"0.27.1\"", + "#define AWS_CRT_CPP_VERSION_MAJOR 0", + "#define AWS_CRT_CPP_VERSION_MINOR 27", + "#define AWS_CRT_CPP_VERSION_PATCH 1", + "#define AWS_CRT_CPP_GIT_HASH \"635106906bf8dc0b877d962613f12f019f03e10a\"", + ] +) + +cc_library( + name = "aws_cpp_crt", + hdrs = [":generated_config_h"], + srcs = glob([ + "include/**/*.h", + "source/**/*.cpp", + ]), + includes = ["include"], + # https://docs.aws.amazon.com/sdkref/latest/guide/common-runtime.html#crt-dep + deps = [ + "@com_github_aws_c_event_stream//:aws_c_event_stream", + "@com_github_aws_c_auth//:aws_c_auth", + "@com_github_aws_c_cal//:aws_c_cal", + "@com_github_aws_c_iot//:aws_c_iot", + "@com_github_aws_c_mqtt//:aws_c_mqtt", + "@com_github_aws_c_s3//:aws_c_s3", + "@com_github_aws_checksums//:aws_checksums", + ], +) \ No newline at end of file diff --git a/third_party/com_github_aws_cpp_crt/workspace.bzl b/third_party/com_github_aws_cpp_crt/workspace.bzl new file mode 100644 index 000000000..84efc38e7 --- /dev/null +++ b/third_party/com_github_aws_cpp_crt/workspace.bzl @@ -0,0 +1,34 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_aws_cpp_crt", + sha256 = "9689854b67b1a436b1cd31aae75eed8669fbb8d6240fe36684133f93e345f1ac", + strip_prefix = "aws-crt-cpp-0.27.1", + urls = [ + "https://github.com/awslabs/aws-crt-cpp/archive/refs/tags/v0.27.1.tar.gz", + ], + build_file = Label("//third_party:com_github_aws_cpp_crt/aws_cpp_crt.BUILD.bazel"), + system_build_file = Label("//third_party:com_github_aws_cpp_crt/system.BUILD.bazel"), + cmake_name = "aws_cpp_crt", + cmake_target_mapping = { + "@com_github_aws_cpp_crt//:aws_cpp_crt": "aws_cpp_crt::aws_cpp_crt", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel b/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel index f8e697596..6dd8cb9fe 100644 --- a/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel +++ b/third_party/com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel @@ -9,77 +9,109 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) +cc_library( + name = "smithy", + srcs = glob([ + "src/aws-cpp-sdk-core/include/smithy/**/*.h", + ]), + hdrs = glob([ + "src/aws-cpp-sdk-core/source/smithy/**/*.cpp", + ]), + includes = [ + "src/aws-cpp-sdk-core/include", + "src/aws-cpp-sdk-core/include/smithy/tracing/impl", + ], +) + + +write_file( + name = "write_SDKConfig_h", + out = "src/aws-cpp-sdk-core/include/aws/core/SDKConfig.h", + newline = "auto", + + content = [ + "#undef USE_AWS_MEMORY_MANAGEMENT", + "#define ENABLE_CURL_CLIENT 0", + ] + select({ + "@platforms//os:windows": [ + "#define WIN32_LEAN_AND_MEAN", + "#include ", + "#define PLATFORM_WINDOWS", + ], + "@platforms//os:macos": [ + "#define PLATFORM_MACOS" + ], + "//conditions:default": [ + "#define PLATFORM_LINUX", + ], + }), +) + cc_library( name = "core", srcs = glob([ - "aws-cpp-sdk-core/source/*.cpp", # AWS_SOURCE - "aws-cpp-sdk-core/source/external/tinyxml2/*.cpp", # AWS_TINYXML2_SOURCE - "aws-cpp-sdk-core/source/external/cjson/*.cpp", # CJSON_SOURCE - "aws-cpp-sdk-core/source/auth/*.cpp", # AWS_AUTH_SOURCE - "aws-cpp-sdk-core/source/client/*.cpp", # AWS_CLIENT_SOURCE - "aws-cpp-sdk-core/source/internal/*.cpp", # AWS_INTERNAL_SOURCE - "aws-cpp-sdk-core/source/aws/model/*.cpp", # AWS_MODEL_SOURCE - "aws-cpp-sdk-core/source/http/*.cpp", # HTTP_SOURCE - "aws-cpp-sdk-core/source/http/standard/*.cpp", # HTTP_STANDARD_SOURCE - "aws-cpp-sdk-core/source/config/*.cpp", # CONFIG_SOURCE - "aws-cpp-sdk-core/source/monitoring/*.cpp", # MONITORING_SOURCE - "aws-cpp-sdk-core/source/utils/*.cpp", # UTILS_SOURCE - "aws-cpp-sdk-core/source/utils/event/*.cpp", # UTILS_EVENT_SOURCE - "aws-cpp-sdk-core/source/utils/base64/*.cpp", # UTILS_BASE64_SOURCE - "aws-cpp-sdk-core/source/utils/crypto/*.cpp", # UTILS_CRYPTO_SOURCE - "aws-cpp-sdk-core/source/utils/json/*.cpp", # UTILS_JSON_SOURCE - "aws-cpp-sdk-core/source/utils/threading/*.cpp", # UTILS_THREADING_SOURCE - "aws-cpp-sdk-core/source/utils/xml/*.cpp", # UTILS_XML_SOURCE - "aws-cpp-sdk-core/source/utils/logging/*.cpp", # UTILS_LOGGING_SOURCE - "aws-cpp-sdk-core/source/utils/memory/*.cpp", # UTILS_MEMORY_SOURCE - "aws-cpp-sdk-core/source/utils/memory/stl/*.cpp", # UTILS_MEMORY_STL_SOURCE - "aws-cpp-sdk-core/source/utils/stream/*.cpp", # UTILS_STREAM_SOURCE - "aws-cpp-sdk-core/source/utils/crypto/factory/*.cpp", # UTILS_CRYPTO_FACTORY_SOURCE - "aws-cpp-sdk-core/source/http/curl/*.cpp", # HTTP_CURL_CLIENT_SOURCE - "aws-cpp-sdk-core/source/utils/crypto/openssl/*.cpp", # UTILS_CRYPTO_OPENSSL_SOURCE + "src/aws-cpp-sdk-core/source/*.cpp", + "src/aws-cpp-sdk-core/source/endpoint/*.cpp", + "src/aws-cpp-sdk-core/source/endpoint/internal/*.cpp", + "src/aws-cpp-sdk-core/source/http/*.cpp", + "src/aws-cpp-sdk-core/source/http/crt/*.cpp", + "src/aws-cpp-sdk-core/source/http/curl/*.cpp", + "src/aws-cpp-sdk-core/source/http/standard/*.cpp", + "src/aws-cpp-sdk-core/source/utils/*.cpp", + "src/aws-cpp-sdk-core/source/utils/memory/*.cpp", + "src/aws-cpp-sdk-core/source/utils/memory/stl/*.cpp", + "src/aws-cpp-sdk-core/source/utils/component-registry/*.cpp", + "src/aws-cpp-sdk-core/source/utils/crypto/*.cpp", + "src/aws-cpp-sdk-core/source/utils/crypto/factory/*.cpp", + "src/aws-cpp-sdk-core/source/utils/crypto/crt/*.cpp", + "src/aws-cpp-sdk-core/source/utils/logging/*.cpp", + "src/aws-cpp-sdk-core/source/utils/event/*.cpp", + "src/aws-cpp-sdk-core/source/utils/threading/*.cpp", + "src/aws-cpp-sdk-core/source/utils/stream/*.cpp", + "src/aws-cpp-sdk-core/source/utils/base64/*.cpp", + "src/aws-cpp-sdk-core/source/utils/json/*.cpp", + "src/aws-cpp-sdk-core/source/utils/xml/*.cpp", + "src/aws-cpp-sdk-core/source/external/*.cpp", + "src/aws-cpp-sdk-core/source/external/tinyxml2/*.cpp", + "src/aws-cpp-sdk-core/source/external/cjson/*.cpp", + "src/aws-cpp-sdk-core/source/platform/*.cpp", + "src/aws-cpp-sdk-core/source/config/*.cpp", + "src/aws-cpp-sdk-core/source/config/defaults/*.cpp", + "src/aws-cpp-sdk-core/source/internal/*.cpp", + "src/aws-cpp-sdk-core/source/client/*.cpp", + "src/aws-cpp-sdk-core/source/auth/*.cpp", + "src/aws-cpp-sdk-core/source/auth/signer/*.cpp", + "src/aws-cpp-sdk-core/source/auth/bearer-token-provider/*.cpp", + "src/aws-cpp-sdk-core/source/auth/signer-provider/*.cpp", + "src/aws-cpp-sdk-core/source/smithy/*.cpp", + "src/aws-cpp-sdk-core/source/smithy/tracing/*.cpp", + "src/aws-cpp-sdk-core/source/smithy/tracing/impl/*.cpp", + #"src/aws-cpp-sdk-core/source/smithy/tracing/impl/opentelemetry/*.cpp", + "src/aws-cpp-sdk-core/source/monitoring/*.cpp", ]) + select({ "@platforms//os:windows": glob([ - "aws-cpp-sdk-core/source/net/windows/*.cpp", # NET_SOURCE - "aws-cpp-sdk-core/source/platform/windows/*.cpp", # PLATFORM_WINDOWS_SOURCE + "src/aws-cpp-sdk-core/source/http/windows/*.cpp", + "src/aws-cpp-sdk-core/source/net/windows/*.cpp", + "src/aws-cpp-sdk-core/source/platform/windows/*.cpp", + ]), + "@platforms//os:linux": glob([ + "src/aws-cpp-sdk-core/source/net/linux-shared/*.cpp", + "src/aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", ]), "//conditions:default": glob([ - "aws-cpp-sdk-core/source/net/linux-shared/*.cpp", # NET_SOURCE - "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", # PLATFORM_LINUX_SHARED_SOURCE + "src/aws-cpp-sdk-core/source/http/standard/*.cpp", + "src/aws-cpp-sdk-core/source/net/*.cpp", ]), }), hdrs = [ - "aws-cpp-sdk-core/include/aws/core/SDKConfig.h", + ":write_SDKConfig_h", ] + glob([ - "aws-cpp-sdk-core/include/aws/core/*.h", # AWS_HEADERS - "aws-cpp-sdk-core/include/aws/core/auth/*.h", # AWS_AUTH_HEADERS - "aws-cpp-sdk-core/include/aws/core/client/*.h", # AWS_CLIENT_HEADERS - "aws-cpp-sdk-core/include/aws/core/internal/*.h", # AWS_INTERNAL_HEADERS - "aws-cpp-sdk-core/include/aws/core/net/*.h", # NET_HEADERS - "aws-cpp-sdk-core/include/aws/core/http/*.h", # HTTP_HEADERS - "aws-cpp-sdk-core/include/aws/core/http/standard/*.h", # HTTP_STANDARD_HEADERS - "aws-cpp-sdk-core/include/aws/core/config/*.h", # CONFIG_HEADERS - "aws-cpp-sdk-core/include/aws/core/monitoring/*.h", # MONITORING_HEADERS - "aws-cpp-sdk-core/include/aws/core/platform/*.h", # PLATFORM_HEADERS - "aws-cpp-sdk-core/include/aws/core/utils/*.h", # UTILS_HEADERS - "aws-cpp-sdk-core/include/aws/core/utils/event/*.h", # UTILS_EVENT_HEADERS - "aws-cpp-sdk-core/include/aws/core/utils/base64/*.h", # UTILS_BASE64_HEADERS - "aws-cpp-sdk-core/include/aws/core/utils/crypto/*.h", # UTILS_CRYPTO_HEADERS - "aws-cpp-sdk-core/include/aws/core/utils/json/*.h", # UTILS_JSON_HEADERS - "aws-cpp-sdk-core/include/aws/core/utils/threading/*.h", # UTILS_THREADING_HEADERS - "aws-cpp-sdk-core/include/aws/core/utils/xml/*.h", # UTILS_XML_HEADERS - "aws-cpp-sdk-core/include/aws/core/utils/memory/*.h", # UTILS_MEMORY_HEADERS - "aws-cpp-sdk-core/include/aws/core/utils/memory/stl/*.h", # UTILS_STL_HEADERS - "aws-cpp-sdk-core/include/aws/core/utils/logging/*.h", # UTILS_LOGGING_HEADERS - "aws-cpp-sdk-core/include/aws/core/utils/ratelimiter/*.h", # UTILS_RATE_LIMITER_HEADERS - "aws-cpp-sdk-core/include/aws/core/utils/stream/*.h", # UTILS_STREAM_HEADERS - "aws-cpp-sdk-core/include/aws/core/external/cjson/*.h", # CJSON_HEADERS - "aws-cpp-sdk-core/include/aws/core/external/tinyxml2/*.h", # TINYXML2_HEADERS - "aws-cpp-sdk-core/include/aws/core/http/curl/*.h", # HTTP_CURL_CLIENT_HEADERS - "aws-cpp-sdk-core/include/aws/core/utils/crypto/openssl/*.h", # UTILS_CRYPTO_OPENSSL_HEADERS + "src/aws-cpp-sdk-core/include/aws/core/**/*.h", ]), defines = [], includes = [ - "aws-cpp-sdk-core/include", + "src/aws-cpp-sdk-core/include", + "src/aws-cpp-sdk-core/include/smithy/tracing/impl", ], linkopts = select({ "@platforms//os:windows": [ @@ -89,25 +121,24 @@ cc_library( "//conditions:default": [], }), deps = [ - "@com_github_aws_c_event_stream//:aws_c_event_stream", - "@com_google_boringssl//:crypto", - "@com_google_boringssl//:ssl", + "@com_github_aws_cpp_crt//:aws_cpp_crt", "@se_curl//:curl", + ":smithy", ], ) cc_library( name = "s3", srcs = glob([ - "aws-cpp-sdk-s3/source/*.cpp", # AWS_S3_SOURCE - "aws-cpp-sdk-s3/source/model/*.cpp", # AWS_S3_MODEL_SOURCE + "generated/src/aws-cpp-sdk-s3/source/*.cpp", # AWS_S3_SOURCE + "generated/src/aws-cpp-sdk-s3/source/model/*.cpp", # AWS_S3_MODEL_SOURCE ]), hdrs = glob([ - "aws-cpp-sdk-s3/include/aws/s3/*.h", # AWS_S3_HEADERS - "aws-cpp-sdk-s3/include/aws/s3/model/*.h", # AWS_S3_MODEL_HEADERS + "generated/src/aws-cpp-sdk-s3/include/aws/s3/*.h", # AWS_S3_HEADERS + "generated/src/aws-cpp-sdk-s3/include/aws/s3/model/*.h", # AWS_S3_MODEL_HEADERS ]), includes = [ - "aws-cpp-sdk-s3/include", + "generated/src/aws-cpp-sdk-s3/include", ], deps = [ ":core", @@ -117,19 +148,19 @@ cc_library( cc_library( name = "s3_encryption", srcs = glob([ - "aws-cpp-sdk-s3-encryption/source/*.cpp", - "aws-cpp-sdk-s3-encryption/source/handlers/*.cpp", - "aws-cpp-sdk-s3-encryption/source/materials/*.cpp", - "aws-cpp-sdk-s3-encryption/source/modules/*.cpp", + "src/aws-cpp-sdk-s3-encryption/source/*.cpp", + "src/aws-cpp-sdk-s3-encryption/source/handlers/*.cpp", + "src/aws-cpp-sdk-s3-encryption/source/materials/*.cpp", + "src/aws-cpp-sdk-s3-encryption/source/modules/*.cpp", ]), hdrs = glob([ - "aws-cpp-sdk-s3-encryption/include/aws/s3/*.h", - "aws-cpp-sdk-s3-encryption/include/handlers/*.h", - "aws-cpp-sdk-s3-encryption/include/materials/*.h", - "aws-cpp-sdk-s3-encryption/include/modules/*.h", + "src/aws-cpp-sdk-s3-encryption/include/aws/s3/*.h", + "src/aws-cpp-sdk-s3-encryption/include/handlers/*.h", + "src/aws-cpp-sdk-s3-encryption/include/materials/*.h", + "src/aws-cpp-sdk-s3-encryption/include/modules/*.h", ]), includes = [ - "aws-cpp-sdk-s3-encryption/include", + "src/aws-cpp-sdk-s3-encryption/include", ], deps = [ ":core", @@ -140,13 +171,13 @@ cc_library( cc_library( name = "transfer", srcs = glob([ - "aws-cpp-sdk-transfer/source/transfer/*.cpp", # TRANSFER_SOURCE + "src/aws-cpp-sdk-transfer/source/transfer/*.cpp", # TRANSFER_SOURCE ]), hdrs = glob([ - "aws-cpp-sdk-transfer/include/aws/transfer/*.h", # TRANSFER_HEADERS + "src/aws-cpp-sdk-transfer/include/aws/transfer/*.h", # TRANSFER_HEADERS ]), includes = [ - "aws-cpp-sdk-transfer/include", + "src/aws-cpp-sdk-transfer/include", ], deps = [ ":core", @@ -154,50 +185,4 @@ cc_library( ], ) -cc_library( - name = "kinesis", - srcs = glob([ - "aws-cpp-sdk-kinesis/source/*.cpp", # AWS_KINESIS_SOURCE - "aws-cpp-sdk-kinesis/source/model/*.cpp", # AWS_KINESIS_MODEL_SOURCE - ]), - hdrs = glob([ - "aws-cpp-sdk-kinesis/include/aws/kinesis/*.h", # AWS_KINESIS_HEADERS - "aws-cpp-sdk-kinesis/include/aws/kinesis/model/*.h", # AWS_KINESIS_MODEL_HEADERS - ]), - includes = [ - "aws-cpp-sdk-kinesis/include", - ], - deps = [ - ":core", - ], -) - -write_file( - name = "SDKConfig_h", - out = "aws-cpp-sdk-core/include/aws/core/SDKConfig.h", - newline = "auto", - content = [ - "#define USE_AWS_MEMORY_MANAGEMENT", - "#define AWS_SDK_VERSION_STRING=\\"1.8.187\\"", - "#define AWS_SDK_VERSION_MAJOR=1", - "#define AWS_SDK_VERSION_MINOR=8", - "#define AWS_SDK_VERSION_PATCH=187", - "#define ENABLE_OPENSSL_ENCRYPTION=1", - "#define ENABLE_CURL_CLIENT=0", - "#define OPENSSL_IS_BORINGSSL=1", - ] + select({ - "@platforms//os:windows": [ - "#define WIN32_LEAN_AND_MEAN", - "#include ", - "#define PLATFORM_WINDOWS", - "#undef IGNORE", - ], - "@platforms//os:macos": [ - "#define PLATFORM_MACOS" - ], - "//conditions:default": [ - "PLATFORM_LINUX", - ], - }), -) \ No newline at end of file diff --git a/third_party/com_github_aws_cpp_sdk/workspace.bzl b/third_party/com_github_aws_cpp_sdk/workspace.bzl index 36c28a26e..8f28d6777 100644 --- a/third_party/com_github_aws_cpp_sdk/workspace.bzl +++ b/third_party/com_github_aws_cpp_sdk/workspace.bzl @@ -19,14 +19,14 @@ def repo(): maybe( third_party_http_archive, name = "com_github_aws_cpp_sdk", - patches = [ - Label("//third_party:com_github_aws_cpp_sdk/patches/update_sdk.diff"), - ], - patch_args = ["-p1"], - sha256 = "ae1cb22225b1f47eee351c0064be5e87676bf7090bb9ad19888bea0dab0e2749", - strip_prefix = "aws-sdk-cpp-1.8.187", + # patches = [ + # Label("//third_party:com_github_aws_cpp_sdk/patches/update_sdk.diff"), + # ], + # patch_args = ["-p1"], + sha256 = "c2a6977eb2a10066922b72e3876bccccea2902f87f9e86f978bcd3fb50a0adcc", + strip_prefix = "aws-sdk-cpp-1.11.361", urls = [ - "https://github.com/aws/aws-sdk-cpp/archive/1.8.187.tar.gz", + "https://github.com/aws/aws-sdk-cpp/archive/refs/tags/1.11.361.tar.gz", ], build_file = Label("//third_party:com_github_aws_cpp_sdk/aws_cpp_sdk.BUILD.bazel"), system_build_file = Label("//third_party:com_github_aws_cpp_sdk/system.BUILD.bazel"), diff --git a/third_party/com_github_s2n_tls/s2n_tls.BUILD.bazel b/third_party/com_github_s2n_tls/s2n_tls.BUILD.bazel new file mode 100644 index 000000000..4233876d9 --- /dev/null +++ b/third_party/com_github_s2n_tls/s2n_tls.BUILD.bazel @@ -0,0 +1,25 @@ +# Description: +# AWS s2n tls + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "s2n_tls", + srcs = glob([ + "crypto/*.h", + "crypto/*.c", + "error/*.h", + "error/*.c", + "stuffer/*.h", + "stuffer/*.c", + "tls/**/*.h", + "tls/**/*.c", + "utils/*.h", + "utils/*.c" + ]), + hdrs = glob(["api/**/*.h"]), + includes = ["api"], + deps = ["@com_google_boringssl//:crypto"] +) \ No newline at end of file diff --git a/third_party/com_github_s2n_tls/workspace.bzl b/third_party/com_github_s2n_tls/workspace.bzl new file mode 100644 index 000000000..ed874c9fa --- /dev/null +++ b/third_party/com_github_s2n_tls/workspace.bzl @@ -0,0 +1,35 @@ +# Copyright 2024 The TensorStore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//third_party:repo.bzl", "third_party_http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") + + +def repo(): + maybe( + third_party_http_archive, + name = "com_github_s2n_tls", + sha256 = "84fdbaa894c722bf13cac87b8579f494c1c2d66de642e5e6104638fddea76ad9", + strip_prefix = "s2n-tls-1.4.16", + urls = [ + "https://github.com/aws/s2n-tls/archive/refs/tags/v1.4.16.tar.gz", + ], + build_file = Label("//third_party:com_github_s2n_tls/s2n_tls.BUILD.bazel"), + #system_build_file = Label("//third_party:com_github_s2n_tls/system.BUILD.bazel"), + cmake_name = "s2n_tls", + cmake_target_mapping = { + "@com_github_s2n_tls//:s2n_tls": "s2n_tls::s2n_tls", + }, + bazel_to_cmake = {}, + ) diff --git a/third_party/third_party.bzl b/third_party/third_party.bzl index 4ba103f92..8a0a27f47 100644 --- a/third_party/third_party.bzl +++ b/third_party/third_party.bzl @@ -1,14 +1,25 @@ load("//third_party:bazel_skylib/workspace.bzl", repo_bazel_skylib = "repo") load("//third_party:blake3/workspace.bzl", repo_blake3 = "repo") +load("//third_party:com_github_aws_c_auth/workspace.bzl", repo_com_github_aws_c_auth = "repo") +load("//third_party:com_github_aws_c_cal/workspace.bzl", repo_com_github_aws_c_cal = "repo") load("//third_party:com_github_aws_c_common/workspace.bzl", repo_com_github_aws_c_common = "repo") +load("//third_party:com_github_aws_c_compression/workspace.bzl", repo_com_github_aws_c_compression = "repo") load("//third_party:com_github_aws_c_event_stream/workspace.bzl", repo_com_github_aws_c_event_stream = "repo") +load("//third_party:com_github_aws_c_http/workspace.bzl", repo_com_github_aws_c_http = "repo") +load("//third_party:com_github_aws_c_io/workspace.bzl", repo_com_github_aws_c_io = "repo") +load("//third_party:com_github_aws_c_iot/workspace.bzl", repo_com_github_aws_c_iot = "repo") +load("//third_party:com_github_aws_c_mqtt/workspace.bzl", repo_com_github_aws_c_mqtt = "repo") +load("//third_party:com_github_aws_c_s3/workspace.bzl", repo_com_github_aws_c_s3 = "repo") +load("//third_party:com_github_aws_c_sdkutils/workspace.bzl", repo_com_github_aws_c_sdkutils = "repo") load("//third_party:com_github_aws_checksums/workspace.bzl", repo_com_github_aws_checksums = "repo") +load("//third_party:com_github_aws_cpp_crt/workspace.bzl", repo_com_github_aws_cpp_crt = "repo") load("//third_party:com_github_aws_cpp_sdk/workspace.bzl", repo_com_github_aws_cpp_sdk = "repo") load("//third_party:com_github_cares_cares/workspace.bzl", repo_com_github_cares_cares = "repo") load("//third_party:com_github_cncf_udpa/workspace.bzl", repo_com_github_cncf_udpa = "repo") load("//third_party:com_github_grpc_grpc/workspace.bzl", repo_com_github_grpc_grpc = "repo") load("//third_party:com_github_nlohmann_json/workspace.bzl", repo_com_github_nlohmann_json = "repo") load("//third_party:com_github_pybind_pybind11/workspace.bzl", repo_com_github_pybind_pybind11 = "repo") +load("//third_party:com_github_s2n_tls/workspace.bzl", repo_com_github_s2n_tls = "repo") load("//third_party:com_google_absl/workspace.bzl", repo_com_google_absl = "repo") load("//third_party:com_google_benchmark/workspace.bzl", repo_com_google_benchmark = "repo") load("//third_party:com_google_boringssl/workspace.bzl", repo_com_google_boringssl = "repo") @@ -50,15 +61,26 @@ load("//third_party:tinyxml2/workspace.bzl", repo_tinyxml2 = "repo") def third_party_dependencies(): repo_bazel_skylib() repo_blake3() + repo_com_github_aws_c_auth() + repo_com_github_aws_c_cal() repo_com_github_aws_c_common() + repo_com_github_aws_c_compression() repo_com_github_aws_c_event_stream() + repo_com_github_aws_c_http() + repo_com_github_aws_c_io() + repo_com_github_aws_c_iot() + repo_com_github_aws_c_mqtt() + repo_com_github_aws_c_s3() + repo_com_github_aws_c_sdkutils() repo_com_github_aws_checksums() + repo_com_github_aws_cpp_crt() repo_com_github_aws_cpp_sdk() repo_com_github_cares_cares() repo_com_github_cncf_udpa() repo_com_github_grpc_grpc() repo_com_github_nlohmann_json() repo_com_github_pybind_pybind11() + repo_com_github_s2n_tls() repo_com_google_absl() repo_com_google_benchmark() repo_com_google_boringssl()