Skip to content

Commit

Permalink
feat(storage): new option to disable decompressive transcoding (#8834)
Browse files Browse the repository at this point in the history
For objects stored in gzip format (and with contentEncoding == "gzip")
GCS over HTTP automatically decompresses the object during download.
Some applications may want to read the object in compressed format.
Support this use-case with a new option (`AcceptEncoding`) for
`Client::ReadObject()`, and a new helper function (`AcceptEncodingGzip()
`) that returns this option with the correct value.
  • Loading branch information
coryan authored May 1, 2022
1 parent 8e4b737 commit 8658400
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 4 deletions.
9 changes: 6 additions & 3 deletions google/cloud/storage/client.h
Original file line number Diff line number Diff line change
Expand Up @@ -1064,20 +1064,23 @@ class Client {
* Valid types for this operation include `DisableCrc32cChecksum`,
* `DisableMD5Hash`, `IfGenerationMatch`, `EncryptionKey`, `Generation`,
* `IfGenerationMatch`, `IfGenerationNotMatch`, `IfMetagenerationMatch`,
* `IfMetagenerationNotMatch`, `ReadFromOffset`, `ReadRange`, `ReadLast`
* and `UserProject`.
* `IfMetagenerationNotMatch`, `ReadFromOffset`, `ReadRange`, `ReadLast`,
* `UserProject`, and `AcceptEncoding`.
*
* @par Idempotency
* This is a read-only operation and is always idempotent.
*
* @par Example
* @snippet storage_object_samples.cc read object
*
* @par Example
* @par Example: read only a sub-range in the object.
* @snippet storage_object_samples.cc read object range
*
* @par Example: read a object encrypted with a CSEK.
* @snippet storage_object_csek_samples.cc read encrypted object
*
* @par Example: disable decompressive transcoding.
* @snippet storage_object_samples.cc read object gzip
*/
template <typename... Options>
ObjectReadStream ReadObject(std::string const& bucket_name,
Expand Down
20 changes: 20 additions & 0 deletions google/cloud/storage/examples/storage_object_samples.cc
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,22 @@ void ReadObjectIntoMemory(google::cloud::storage::Client client,
(std::move(client), argv.at(0), argv.at(1));
}

void ReadObjectGzip(google::cloud::storage::Client client,
std::vector<std::string> const& argv) {
//! [read object gzip]
namespace gcs = ::google::cloud::storage;
[](gcs::Client client, std::string const& bucket_name,
std::string const& object_name) {
auto is =
client.ReadObject(bucket_name, object_name, gcs::AcceptEncodingGzip());
auto const contents = std::string{std::istream_iterator<char>(is), {}};
if (!is.status().ok()) throw std::runtime_error(is.status().message());
std::cout << "The object has " << contents.size() << " characters\n";
}
//! [read object gzip]
(std::move(client), argv.at(0), argv.at(1));
}

void DeleteObject(google::cloud::storage::Client client,
std::vector<std::string> const& argv) {
//! [delete object] [START storage_delete_file]
Expand Down Expand Up @@ -691,6 +707,9 @@ void RunAll(std::vector<std::string> const& argv) {
std::cout << "\nRunning ReadObjectRange() example" << std::endl;
ReadObjectRange(client, {bucket_name, object_name, "1000", "2000"});

std::cout << "\nRunning ReadObjectGzip() example" << std::endl;
ReadObjectGzip(client, {bucket_name, object_name});

std::cout << "\nRunning UpdateObjectMetadata() example" << std::endl;
UpdateObjectMetadata(client,
{bucket_name, object_name, "test-label", "test-value"});
Expand Down Expand Up @@ -793,6 +812,7 @@ int main(int argc, char* argv[]) {
make_entry("read-object", {"<object-name>"}, ReadObject),
make_entry("read-object-range", {"<object-name>", "<start>", "<end>"},
ReadObjectRange),
make_entry("read-object-gzip", {"<object-name>"}, ReadObjectGzip),
make_entry("read-object-into-memory", {"<object-name>"},
ReadObjectIntoMemory),
make_entry("delete-object", {"<object-name>"}, DeleteObject),
Expand Down
1 change: 1 addition & 0 deletions google/cloud/storage/internal/curl_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,7 @@ StatusOr<std::unique_ptr<ObjectReadSource>> CurlClient::ReadObjectXml(
// None of the IfGeneration*Match nor IfMetageneration*Match can be set. This
// is checked by the caller (in this class).
builder.AddOption(request.GetOption<UserProject>());
builder.AddOption(request.GetOption<AcceptEncoding>());

//
// Apply the options from GenericRequestBase<> that are set, translating
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/storage/internal/object_requests.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ class ReadObjectRangeRequest
ReadObjectRangeRequest, DisableCrc32cChecksum, DisableMD5Hash,
EncryptionKey, Generation, IfGenerationMatch, IfGenerationNotMatch,
IfMetagenerationMatch, IfMetagenerationNotMatch, ReadFromOffset,
ReadRange, ReadLast, UserProject> {
ReadRange, ReadLast, UserProject, AcceptEncoding> {
public:
using GenericObjectRequest::GenericObjectRequest;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,81 @@ TEST_F(DecompressiveTranscodingIntegrationTest, WriteAndReadXml) {
ASSERT_NE(decompressed.substr(0, 32), contents.substr(0, 32));
}

TEST_F(DecompressiveTranscodingIntegrationTest, WriteAndReadCompressedJson) {
// TODO(storage-testbench#321) - fix transcoding support in the emulator
if (UsingEmulator()) GTEST_SKIP();

auto const gzip_filename = google::cloud::internal::GetEnv(
"GOOGLE_CLOUD_CPP_STORAGE_TEST_GZIP_FILENAME")
.value_or("");
ASSERT_FALSE(gzip_filename.empty());
std::ifstream gz(gzip_filename, std::ios::binary);
auto const contents = std::string{std::istreambuf_iterator<char>(gz), {}};
ASSERT_TRUE(gz.good());

auto client = Client(
Options{}
.set<TransferStallTimeoutOption>(std::chrono::seconds(3))
.set<RetryPolicyOption>(LimitedErrorCountRetryPolicy(5).clone()));

auto object_name = MakeRandomObjectName();
auto insert = client.InsertObject(
bucket_name(), object_name, contents, IfGenerationMatch(0),
WithObjectMetadata(
ObjectMetadata().set_content_encoding("gzip").set_content_type(
"text/plain")));
ASSERT_STATUS_OK(insert);
ScheduleForDelete(*insert);
EXPECT_EQ(insert->content_encoding(), "gzip");
EXPECT_EQ(insert->content_type(), "text/plain");

auto reader =
client.ReadObject(bucket_name(), object_name, AcceptEncodingGzip(),
IfGenerationNotMatch(0));
ASSERT_STATUS_OK(reader.status());
auto compressed = std::string{std::istreambuf_iterator<char>(reader), {}};
ASSERT_STATUS_OK(reader.status());

ASSERT_EQ(compressed.substr(0, 32), contents.substr(0, 32));
}

TEST_F(DecompressiveTranscodingIntegrationTest, WriteAndReadCompressedXml) {
// TODO(storage-testbench#321) - fix transcoding support in the emulator
if (UsingEmulator()) GTEST_SKIP();

auto const gzip_filename = google::cloud::internal::GetEnv(
"GOOGLE_CLOUD_CPP_STORAGE_TEST_GZIP_FILENAME")
.value_or("");
ASSERT_FALSE(gzip_filename.empty());
std::ifstream gz(gzip_filename, std::ios::binary);
auto const contents = std::string{std::istreambuf_iterator<char>(gz), {}};
ASSERT_TRUE(gz.good());

auto client = Client(
Options{}
.set<TransferStallTimeoutOption>(std::chrono::seconds(3))
.set<RetryPolicyOption>(LimitedErrorCountRetryPolicy(5).clone()));

auto object_name = MakeRandomObjectName();
auto insert = client.InsertObject(
bucket_name(), object_name, contents, IfGenerationMatch(0),
WithObjectMetadata(
ObjectMetadata().set_content_encoding("gzip").set_content_type(
"text/plain")));
ASSERT_STATUS_OK(insert);
ScheduleForDelete(*insert);
EXPECT_EQ(insert->content_encoding(), "gzip");
EXPECT_EQ(insert->content_type(), "text/plain");

auto reader =
client.ReadObject(bucket_name(), object_name, AcceptEncodingGzip());
ASSERT_STATUS_OK(reader.status());
auto compressed = std::string{std::istreambuf_iterator<char>(reader), {}};
ASSERT_STATUS_OK(reader.status());

ASSERT_EQ(compressed.substr(0, 32), contents.substr(0, 32));
}

} // anonymous namespace
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
} // namespace storage
Expand Down
33 changes: 33 additions & 0 deletions google/cloud/storage/well_known_headers.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,39 @@ EncryptionKeyData CreateKeyFromGenerator(Generator& gen) {
return EncryptionDataFromBinaryKey(key);
}

/**
* Modify the accepted encodings.
*
* When using HTTP, GCS decompresses gzip-encoded objects by default:
*
* https://cloud.google.com/storage/docs/transcoding
*
* Setting this option to `gzip` disables automatic decompression. This can be
* useful for applications wanting to operate with the compressed data. Setting
* this option to `identity`, or not setting this option, returns decompressed
* data.
*
* @note Note that decompressive transcoding only apply to objects that are
* compressed with `gzip` and have their `content_encoding()` attribute set
* accordingly. At the time of this writing GCS does not decompress objects
* stored with other compression algorithms, nor does it detect the object
* compression based on the object name or its contents.
*
* @see `AcceptEncodingGzip()` is a helper function to disable decompressive
* encoding.
*/
struct AcceptEncoding
: public internal::WellKnownHeader<AcceptEncoding, std::string> {
using WellKnownHeader<AcceptEncoding, std::string>::WellKnownHeader;
static char const* header_name() { return "Accept-Encoding"; }
};

inline AcceptEncoding AcceptEncodingGzip() { return AcceptEncoding("gzip"); }

inline AcceptEncoding AcceptEncodingIdentity() {
return AcceptEncoding("identity");
}

GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
} // namespace storage
} // namespace cloud
Expand Down

0 comments on commit 8658400

Please sign in to comment.