Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(storage): new option to disable decompressive transcoding #8834

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions google/cloud/storage/client.h
Original file line number Diff line number Diff line change
Expand Up @@ -1064,20 +1064,23 @@ class Client {
* Valid types for this operation include `DisableCrc32cChecksum`,
* `DisableMD5Hash`, `IfGenerationMatch`, `EncryptionKey`, `Generation`,
* `IfGenerationMatch`, `IfGenerationNotMatch`, `IfMetagenerationMatch`,
* `IfMetagenerationNotMatch`, `ReadFromOffset`, `ReadRange`, `ReadLast`
* and `UserProject`.
* `IfMetagenerationNotMatch`, `ReadFromOffset`, `ReadRange`, `ReadLast`,
* `UserProject`, and `AcceptEncoding`.
*
* @par Idempotency
* This is a read-only operation and is always idempotent.
*
* @par Example
* @snippet storage_object_samples.cc read object
*
* @par Example
* @par Example: read only a sub-range in the object.
* @snippet storage_object_samples.cc read object range
*
* @par Example: read a object encrypted with a CSEK.
* @snippet storage_object_csek_samples.cc read encrypted object
*
* @par Example: disable decompressive transcoding.
* @snippet storage_object_samples.cc read object gzip
*/
template <typename... Options>
ObjectReadStream ReadObject(std::string const& bucket_name,
Expand Down
20 changes: 20 additions & 0 deletions google/cloud/storage/examples/storage_object_samples.cc
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,22 @@ void ReadObjectIntoMemory(google::cloud::storage::Client client,
(std::move(client), argv.at(0), argv.at(1));
}

void ReadObjectGzip(google::cloud::storage::Client client,
std::vector<std::string> const& argv) {
//! [read object gzip]
namespace gcs = ::google::cloud::storage;
[](gcs::Client client, std::string const& bucket_name,
std::string const& object_name) {
auto is =
client.ReadObject(bucket_name, object_name, gcs::AcceptEncodingGzip());
auto const contents = std::string{std::istream_iterator<char>(is), {}};
if (!is.status().ok()) throw std::runtime_error(is.status().message());
std::cout << "The object has " << contents.size() << " characters\n";
}
//! [read object gzip]
(std::move(client), argv.at(0), argv.at(1));
}

void DeleteObject(google::cloud::storage::Client client,
std::vector<std::string> const& argv) {
//! [delete object] [START storage_delete_file]
Expand Down Expand Up @@ -691,6 +707,9 @@ void RunAll(std::vector<std::string> const& argv) {
std::cout << "\nRunning ReadObjectRange() example" << std::endl;
ReadObjectRange(client, {bucket_name, object_name, "1000", "2000"});

std::cout << "\nRunning ReadObjectGzip() example" << std::endl;
ReadObjectGzip(client, {bucket_name, object_name});

std::cout << "\nRunning UpdateObjectMetadata() example" << std::endl;
UpdateObjectMetadata(client,
{bucket_name, object_name, "test-label", "test-value"});
Expand Down Expand Up @@ -793,6 +812,7 @@ int main(int argc, char* argv[]) {
make_entry("read-object", {"<object-name>"}, ReadObject),
make_entry("read-object-range", {"<object-name>", "<start>", "<end>"},
ReadObjectRange),
make_entry("read-object-gzip", {"<object-name>"}, ReadObjectGzip),
make_entry("read-object-into-memory", {"<object-name>"},
ReadObjectIntoMemory),
make_entry("delete-object", {"<object-name>"}, DeleteObject),
Expand Down
1 change: 1 addition & 0 deletions google/cloud/storage/internal/curl_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,7 @@ StatusOr<std::unique_ptr<ObjectReadSource>> CurlClient::ReadObjectXml(
// None of the IfGeneration*Match nor IfMetageneration*Match can be set. This
// is checked by the caller (in this class).
builder.AddOption(request.GetOption<UserProject>());
builder.AddOption(request.GetOption<AcceptEncoding>());

//
// Apply the options from GenericRequestBase<> that are set, translating
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/storage/internal/object_requests.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ class ReadObjectRangeRequest
ReadObjectRangeRequest, DisableCrc32cChecksum, DisableMD5Hash,
EncryptionKey, Generation, IfGenerationMatch, IfGenerationNotMatch,
IfMetagenerationMatch, IfMetagenerationNotMatch, ReadFromOffset,
ReadRange, ReadLast, UserProject> {
ReadRange, ReadLast, UserProject, AcceptEncoding> {
public:
using GenericObjectRequest::GenericObjectRequest;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,81 @@ TEST_F(DecompressiveTranscodingIntegrationTest, WriteAndReadXml) {
ASSERT_NE(decompressed.substr(0, 32), contents.substr(0, 32));
}

TEST_F(DecompressiveTranscodingIntegrationTest, WriteAndReadCompressedJson) {
// TODO(storage-testbench#321) - fix transcoding support in the emulator
if (UsingEmulator()) GTEST_SKIP();

auto const gzip_filename = google::cloud::internal::GetEnv(
"GOOGLE_CLOUD_CPP_STORAGE_TEST_GZIP_FILENAME")
.value_or("");
ASSERT_FALSE(gzip_filename.empty());
std::ifstream gz(gzip_filename, std::ios::binary);
auto const contents = std::string{std::istreambuf_iterator<char>(gz), {}};
ASSERT_TRUE(gz.good());

auto client = Client(
Options{}
.set<TransferStallTimeoutOption>(std::chrono::seconds(3))
.set<RetryPolicyOption>(LimitedErrorCountRetryPolicy(5).clone()));

auto object_name = MakeRandomObjectName();
auto insert = client.InsertObject(
bucket_name(), object_name, contents, IfGenerationMatch(0),
WithObjectMetadata(
ObjectMetadata().set_content_encoding("gzip").set_content_type(
"text/plain")));
ASSERT_STATUS_OK(insert);
ScheduleForDelete(*insert);
EXPECT_EQ(insert->content_encoding(), "gzip");
EXPECT_EQ(insert->content_type(), "text/plain");

auto reader =
client.ReadObject(bucket_name(), object_name, AcceptEncodingGzip(),
IfGenerationNotMatch(0));
ASSERT_STATUS_OK(reader.status());
auto compressed = std::string{std::istreambuf_iterator<char>(reader), {}};
ASSERT_STATUS_OK(reader.status());

ASSERT_EQ(compressed.substr(0, 32), contents.substr(0, 32));
}

TEST_F(DecompressiveTranscodingIntegrationTest, WriteAndReadCompressedXml) {
// TODO(storage-testbench#321) - fix transcoding support in the emulator
if (UsingEmulator()) GTEST_SKIP();

auto const gzip_filename = google::cloud::internal::GetEnv(
"GOOGLE_CLOUD_CPP_STORAGE_TEST_GZIP_FILENAME")
.value_or("");
ASSERT_FALSE(gzip_filename.empty());
std::ifstream gz(gzip_filename, std::ios::binary);
auto const contents = std::string{std::istreambuf_iterator<char>(gz), {}};
ASSERT_TRUE(gz.good());

auto client = Client(
Options{}
.set<TransferStallTimeoutOption>(std::chrono::seconds(3))
.set<RetryPolicyOption>(LimitedErrorCountRetryPolicy(5).clone()));

auto object_name = MakeRandomObjectName();
auto insert = client.InsertObject(
bucket_name(), object_name, contents, IfGenerationMatch(0),
WithObjectMetadata(
ObjectMetadata().set_content_encoding("gzip").set_content_type(
"text/plain")));
ASSERT_STATUS_OK(insert);
ScheduleForDelete(*insert);
EXPECT_EQ(insert->content_encoding(), "gzip");
EXPECT_EQ(insert->content_type(), "text/plain");

auto reader =
client.ReadObject(bucket_name(), object_name, AcceptEncodingGzip());
ASSERT_STATUS_OK(reader.status());
auto compressed = std::string{std::istreambuf_iterator<char>(reader), {}};
ASSERT_STATUS_OK(reader.status());

ASSERT_EQ(compressed.substr(0, 32), contents.substr(0, 32));
}

} // anonymous namespace
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
} // namespace storage
Expand Down
33 changes: 33 additions & 0 deletions google/cloud/storage/well_known_headers.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,39 @@ EncryptionKeyData CreateKeyFromGenerator(Generator& gen) {
return EncryptionDataFromBinaryKey(key);
}

/**
* Modify the accepted encodings.
*
* When using HTTP, GCS decompresses gzip-encoded objects by default:
*
* https://cloud.google.com/storage/docs/transcoding
*
* Setting this option to `gzip` disables automatic decompression. This can be
* useful for applications wanting to operate with the compressed data. Setting
* this option to `identity`, or not setting this option, returns decompressed
* data.
*
* @note Note that decompressive transcoding only apply to objects that are
* compressed with `gzip` and have their `content_encoding()` attribute set
* accordingly. At the time of this writing GCS does not decompress objects
* stored with other compression algorithms, nor does it detect the object
* compression based on the object name or its contents.
*
* @see `AcceptEncodingGzip()` is a helper function to disable decompressive
* encoding.
*/
struct AcceptEncoding
: public internal::WellKnownHeader<AcceptEncoding, std::string> {
using WellKnownHeader<AcceptEncoding, std::string>::WellKnownHeader;
static char const* header_name() { return "Accept-Encoding"; }
};

inline AcceptEncoding AcceptEncodingGzip() { return AcceptEncoding("gzip"); }

inline AcceptEncoding AcceptEncodingIdentity() {
return AcceptEncoding("identity");
}

GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
} // namespace storage
} // namespace cloud
Expand Down