Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support full object checksum #468

Merged
merged 57 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
bb3c9b4
Add crc64
TingDaoK Oct 3, 2024
f5362e5
add support for header checksum
TingDaoK Oct 7, 2024
9b84eb6
add test instead of modify the old one
TingDaoK Oct 7, 2024
2247165
Add crc64
TingDaoK Oct 3, 2024
ec0979f
Merge branch 'crc64' of github.com:awslabs/aws-c-s3-staging into crc64
TingDaoK Oct 9, 2024
41df82f
support the full object checksum from header
TingDaoK Oct 14, 2024
09a9aca
With test
TingDaoK Oct 16, 2024
425d190
we need type for both create and complete
TingDaoK Oct 16, 2024
58082bc
CI stuff
TingDaoK Oct 16, 2024
5318f1f
manually cherry pick :)
TingDaoK Oct 16, 2024
40512ab
use east-2 bucket as the motorcate only availble from east-2
TingDaoK Oct 16, 2024
f9a1f41
the new header was added
TingDaoK Oct 17, 2024
24ccf1d
fix test and disable the sse-c test, which I believe it's an s3 bug
TingDaoK Oct 17, 2024
5ca2e91
fine windows.
TingDaoK Oct 17, 2024
740eb8e
get CI
TingDaoK Oct 17, 2024
51ef605
add support for header checksum
TingDaoK Oct 7, 2024
7a87f03
add test instead of modify the old one
TingDaoK Oct 7, 2024
1eed0f6
Add crc64
TingDaoK Oct 3, 2024
62fee3e
add crc64
TingDaoK Oct 17, 2024
15e8354
permission
TingDaoK Oct 17, 2024
7771f8b
Merge branch 'latest-CI' into crc64
TingDaoK Oct 17, 2024
ec5ea6f
Merge branch 'crc64' into CI-staging
TingDaoK Oct 17, 2024
02d401e
update the endpoint as motorcade only available from east-2
TingDaoK Oct 17, 2024
b9ba039
skip the sse-c test and fix the region
TingDaoK Oct 17, 2024
c34cb18
Merge branch 'latest-CI' into crc64
TingDaoK Oct 17, 2024
b972ba0
Merge branch 'latest-CI' into CI-staging
TingDaoK Oct 17, 2024
edcceac
hmmmm
TingDaoK Oct 17, 2024
146a203
hmmmm
TingDaoK Oct 17, 2024
56e921e
hmmmm
TingDaoK Oct 17, 2024
52ef09c
merge conflict
TingDaoK Oct 17, 2024
6e8e773
pick up the fix
TingDaoK Oct 18, 2024
2cdc0ce
use two different path for full object checksum
TingDaoK Oct 18, 2024
02c2afc
rename
TingDaoK Oct 18, 2024
258dd5d
don't need to force the config now
TingDaoK Oct 21, 2024
89ac6f8
WIP
TingDaoK Oct 22, 2024
14299cb
address comments
TingDaoK Oct 22, 2024
3a27771
Apply suggestions from code review
TingDaoK Oct 22, 2024
59286c0
remove truncated to
TingDaoK Oct 22, 2024
69841e3
need to update the pointer to clean up as well
TingDaoK Oct 22, 2024
e9247f5
merge
TingDaoK Oct 22, 2024
ef90727
handle error from hash_fn
TingDaoK Oct 22, 2024
3efcc3f
use a hard coded list instead and keep the enum value
TingDaoK Oct 23, 2024
b48a329
ooops
TingDaoK Oct 23, 2024
a477b16
array size
TingDaoK Oct 23, 2024
529e02d
Apply suggestions from code review
TingDaoK Oct 28, 2024
5d51e34
Merge branch 'crc64' into support-full-object-checksum
TingDaoK Oct 28, 2024
c5a109c
fix the assert
TingDaoK Oct 28, 2024
2468855
fix assert
TingDaoK Oct 28, 2024
90166d5
merge fix
TingDaoK Oct 28, 2024
343835d
Merge branch 'crc64' into support-full-object-checksum
TingDaoK Oct 28, 2024
7792547
address comments
TingDaoK Oct 28, 2024
3ab370a
revert the temp change
TingDaoK Dec 2, 2024
b1b1fc1
Merge branch 'main' into support-full-object-checksum
TingDaoK Dec 2, 2024
4df9989
revert ci change
TingDaoK Dec 2, 2024
b47e45f
Merge branch 'main' into support-full-object-checksum
TingDaoK Dec 2, 2024
33ea4a3
Merge branch 'main' into support-full-object-checksum
TingDaoK Dec 4, 2024
2e2a20c
oh, get back the test
TingDaoK Dec 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 39 additions & 14 deletions include/aws/s3/private/s3_checksums.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,45 @@

struct aws_s3_checksum;

/* List to check the checksum algorithm to use based on the priority. */
static const enum aws_s3_checksum_algorithm s_checksum_algo_priority_list[] = {
AWS_SCA_CRC64NVME,
AWS_SCA_CRC32C,
AWS_SCA_CRC32,
AWS_SCA_SHA1,
AWS_SCA_SHA256,
};
AWS_STATIC_ASSERT(AWS_ARRAY_SIZE(s_checksum_algo_priority_list) == (AWS_SCA_END - AWS_SCA_INIT + 1));

struct aws_checksum_vtable {
void (*destroy)(struct aws_s3_checksum *checksum);
int (*update)(struct aws_s3_checksum *checksum, const struct aws_byte_cursor *buf);
int (*finalize)(struct aws_s3_checksum *checksum, struct aws_byte_buf *out, size_t truncate_to);
int (*finalize)(struct aws_s3_checksum *checksum, struct aws_byte_buf *out);
};

struct aws_s3_checksum {
struct aws_allocator *allocator;
struct aws_checksum_vtable *vtable;
void *impl;
size_t digest_size;
enum aws_s3_checksum_algorithm algorithm;
bool good;
union {
struct aws_hash *hash;
uint32_t crc_val_32bit;
uint64_t crc_val_64bit;
} impl;
};

struct checksum_config {
struct checksum_config_storage {
struct aws_allocator *allocator;
struct aws_byte_buf full_object_checksum;
bool has_full_object_checksum;

enum aws_s3_checksum_location location;
enum aws_s3_checksum_algorithm checksum_algorithm;
bool validate_response_checksum;
struct {
bool crc64nvme;
bool crc32c;
bool crc32;
bool sha1;
Expand Down Expand Up @@ -85,25 +104,26 @@ struct aws_input_stream *aws_chunk_stream_new(
* Get the size of the checksum output corresponding to the aws_s3_checksum_algorithm enum value.
*/
AWS_S3_API
size_t aws_get_digest_size_from_algorithm(enum aws_s3_checksum_algorithm algorithm);
size_t aws_get_digest_size_from_checksum_algorithm(enum aws_s3_checksum_algorithm algorithm);

/**
* Get the header name corresponding to the aws_s3_checksum_algorithm enum value.
* Get header name to use for algorithm (e.g. "x-amz-checksum-crc32")
*/
AWS_S3_API
const struct aws_byte_cursor *aws_get_http_header_name_from_algorithm(enum aws_s3_checksum_algorithm algorithm);
struct aws_byte_cursor aws_get_http_header_name_from_checksum_algorithm(enum aws_s3_checksum_algorithm algorithm);

/**
* Get the multipart upload header name corresponding to the aws_s3_checksum_algorithm enum value.
* Get algorithm's name (e.g. "CRC32"), to be used as the value of headers like `x-amz-checksum-algorithm`
*/
AWS_S3_API
const struct aws_byte_cursor *aws_get_create_mpu_header_name_from_algorithm(enum aws_s3_checksum_algorithm algorithm);
struct aws_byte_cursor aws_get_checksum_algorithm_name(enum aws_s3_checksum_algorithm algorithm);

/**
* Get the complete multipart upload name corresponding to the aws_s3_checksum_algorithm enum value.
* Get the name of checksum algorithm to be used as the details of the parts were uploaded. Referring to
* https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompletedPart.html#AmazonS3-Type-CompletedPart
*/
AWS_S3_API
const struct aws_byte_cursor *aws_get_complete_mpu_name_from_algorithm(enum aws_s3_checksum_algorithm algorithm);
struct aws_byte_cursor aws_get_completed_part_name_from_checksum_algorithm(enum aws_s3_checksum_algorithm algorithm);

/**
* create a new aws_checksum corresponding to the aws_s3_checksum_algorithm enum value.
Expand All @@ -121,8 +141,7 @@ int aws_checksum_compute(
struct aws_allocator *allocator,
enum aws_s3_checksum_algorithm algorithm,
const struct aws_byte_cursor *input,
struct aws_byte_buf *output,
size_t truncate_to);
struct aws_byte_buf *output);

/**
* Cleans up and deallocates checksum.
Expand All @@ -141,9 +160,15 @@ int aws_checksum_update(struct aws_s3_checksum *checksum, const struct aws_byte_
* Allocation of output is the caller's responsibility.
*/
AWS_S3_API
int aws_checksum_finalize(struct aws_s3_checksum *checksum, struct aws_byte_buf *output, size_t truncate_to);
int aws_checksum_finalize(struct aws_s3_checksum *checksum, struct aws_byte_buf *output);

AWS_S3_API
void aws_checksum_config_storage_init(
struct aws_allocator *allocator,
struct checksum_config_storage *internal_config,
const struct aws_s3_checksum_config *config);

AWS_S3_API
void checksum_config_init(struct checksum_config *internal_config, const struct aws_s3_checksum_config *config);
void aws_checksum_config_storage_cleanup(struct checksum_config_storage *internal_config);

#endif /* AWS_S3_CHECKSUMS_H */
2 changes: 1 addition & 1 deletion include/aws/s3/private/s3_meta_request_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ struct aws_s3_meta_request {
const bool should_compute_content_md5;

/* deep copy of the checksum config. */
struct checksum_config checksum_config;
struct checksum_config_storage checksum_config;

/* checksum found in either a default get request, or in the initial head request of a multipart get */
struct aws_byte_buf meta_request_level_response_header_checksum;
Expand Down
14 changes: 5 additions & 9 deletions include/aws/s3/private/s3_request_messages.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ struct aws_byte_buf;
struct aws_byte_cursor;
struct aws_string;
struct aws_array_list;
struct checksum_config;
struct checksum_config_storage;

AWS_EXTERN_C_BEGIN

Expand Down Expand Up @@ -52,13 +52,9 @@ struct aws_input_stream *aws_s3_message_util_assign_body(
struct aws_allocator *allocator,
struct aws_byte_buf *byte_buf,
struct aws_http_message *out_message,
const struct checksum_config *checksum_config,
const struct checksum_config_storage *checksum_config,
struct aws_byte_buf *out_checksum);

/* Return true if checksum headers has been set. */
AWS_S3_API
bool aws_s3_message_util_check_checksum_header(struct aws_http_message *message);

/* Create an HTTP request for an S3 Ranged Get Object Request, using the given request as a basis */
AWS_S3_API
struct aws_http_message *aws_s3_ranged_get_object_message_new(
Expand All @@ -80,7 +76,7 @@ AWS_S3_API
struct aws_http_message *aws_s3_create_multipart_upload_message_new(
struct aws_allocator *allocator,
struct aws_http_message *base_message,
const struct checksum_config *checksum_config);
const struct checksum_config_storage *checksum_config);

/* Create an HTTP request for an S3 Put Object request, using the original request as a basis. Creates and assigns a
* body stream using the passed in buffer. If multipart is not needed, part number and upload_id can be 0 and NULL,
Expand All @@ -93,7 +89,7 @@ struct aws_http_message *aws_s3_upload_part_message_new(
uint32_t part_number,
const struct aws_string *upload_id,
bool should_compute_content_md5,
const struct checksum_config *checksum_config,
const struct checksum_config_storage *checksum_config,
struct aws_byte_buf *encoded_checksum_output);

/* Create an HTTP request for an S3 UploadPartCopy request, using the original request as a basis.
Expand All @@ -120,7 +116,7 @@ struct aws_http_message *aws_s3_complete_multipart_message_new(
struct aws_byte_buf *body_buffer,
const struct aws_string *upload_id,
const struct aws_array_list *parts,
const struct checksum_config *checksum_config);
const struct checksum_config_storage *checksum_config);

AWS_S3_API
struct aws_http_message *aws_s3_abort_multipart_upload_message_new(
Expand Down
43 changes: 8 additions & 35 deletions include/aws/s3/private/s3_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,44 +61,17 @@ extern const struct aws_byte_cursor g_request_validation_mode;
AWS_S3_API
extern const struct aws_byte_cursor g_enabled;

/**
* The checksum-algorithm header name used for CopyObject and CreateMultipartUpload
*/
AWS_S3_API
extern const struct aws_byte_cursor g_create_mpu_checksum_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32c_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha1_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha256_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32c_create_mpu_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32_create_mpu_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha1_create_mpu_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha256_create_mpu_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32c_complete_mpu_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32_complete_mpu_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha1_complete_mpu_name;
extern const struct aws_byte_cursor g_checksum_algorithm_header_name;

/**
* The checksum-algorithm header name used for PutObject, UploadParts and PutObject*
*/
AWS_S3_API
extern const struct aws_byte_cursor g_sha256_complete_mpu_name;
extern const struct aws_byte_cursor g_sdk_checksum_algorithm_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_s3_client_version;
Expand Down
7 changes: 4 additions & 3 deletions include/aws/s3/s3_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,8 @@ enum aws_s3_checksum_algorithm {
AWS_SCA_CRC32,
AWS_SCA_SHA1,
AWS_SCA_SHA256,
AWS_SCA_END = AWS_SCA_SHA256,
AWS_SCA_CRC64NVME,
AWS_SCA_END = AWS_SCA_CRC64NVME,
};

enum aws_s3_checksum_location {
Expand Down Expand Up @@ -559,7 +560,7 @@ struct aws_s3_checksum_config {
/**
* The location of client added checksum header.
*
* If AWS_SCL_NONE. No request payload checksum will be calculated or added.
* If AWS_SCL_NONE. No request payload checksum will be added.
*
* If AWS_SCL_HEADER, the client will calculate the checksum and add it to the headers.
*
Expand Down Expand Up @@ -592,7 +593,7 @@ struct aws_s3_checksum_config {
*
* The list of algorithms for user to pick up when validate the checksum. Client will pick up the algorithm from the
* list with the priority based on performance, and the algorithm sent by server. The priority based on performance
* is [CRC32C, CRC32, SHA1, SHA256].
* is [CRC64NVME, CRC32C, CRC32, SHA1, SHA256].
*
* If the response checksum was validated by client, the result will indicate which algorithm was picked.
*/
Expand Down
76 changes: 73 additions & 3 deletions source/s3_auto_ranged_put.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,71 @@ static struct aws_s3_meta_request_vtable s_s3_auto_ranged_put_vtable = {
.pause = s_s3_auto_ranged_put_pause,
};

static int s_init_and_verify_checksum_config_from_headers(
struct checksum_config_storage *checksum_config,
const struct aws_http_message *message,
const void *log_id) {
/* Check if the checksum header was set from the message */
struct aws_http_headers *headers = aws_http_message_get_headers(message);
enum aws_s3_checksum_algorithm header_algo = AWS_SCA_NONE;
struct aws_byte_cursor header_value;
AWS_ZERO_STRUCT(header_value);

for (size_t i = 0; i < AWS_ARRAY_SIZE(s_checksum_algo_priority_list); i++) {
enum aws_s3_checksum_algorithm algorithm = s_checksum_algo_priority_list[i];
const struct aws_byte_cursor algorithm_header_name =
aws_get_http_header_name_from_checksum_algorithm(algorithm);
if (aws_http_headers_get(headers, algorithm_header_name, &header_value) == AWS_OP_SUCCESS) {
if (header_algo == AWS_SCA_NONE) {
header_algo = algorithm;
} else {
/* If there are multiple checksum headers set, it's malformed request */
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST,
"id=%p Could not create auto-ranged-put meta request; multiple checksum headers has been set",
log_id);
return aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
}
}
}
if (header_algo == AWS_SCA_NONE) {
/* No checksum header found, done */
return AWS_OP_SUCCESS;
}

/* Found the full object checksum from the header, check if it matches the explicit setting from config */
if (checksum_config->checksum_algorithm != AWS_SCA_NONE && checksum_config->checksum_algorithm != header_algo) {
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST,
"id=%p Could not create auto-ranged-put meta request; checksum config mismatch the checksum from header.",
log_id);
return aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
}
AWS_ASSERT(!checksum_config->has_full_object_checksum);

AWS_LOGF_DEBUG(
AWS_LS_S3_META_REQUEST,
"id=%p Setting the full-object checksum from header; algorithm: " PRInSTR ", value: " PRInSTR ".",
log_id,
AWS_BYTE_CURSOR_PRI(aws_get_checksum_algorithm_name(header_algo)),
AWS_BYTE_CURSOR_PRI(header_value));
/* Set algo */
checksum_config->checksum_algorithm = header_algo;
if (checksum_config->location == AWS_SCL_NONE) {
/* Set the checksum location to trailer for the parts, complete MPU will still have the checksum in the header.
* But to keep the data integrity for the parts, we need to set the checksum location to trailer to send the
* parts level checksums.
*/
checksum_config->location = AWS_SCL_TRAILER;
}

/* Set full object checksum from the header value. */
aws_byte_buf_init_copy_from_cursor(
&checksum_config->full_object_checksum, checksum_config->allocator, header_value);
checksum_config->has_full_object_checksum = true;
return AWS_OP_SUCCESS;
}

/* Allocate a new auto-ranged put meta request */
struct aws_s3_meta_request *aws_s3_meta_request_auto_ranged_put_new(
struct aws_allocator *allocator,
Expand Down Expand Up @@ -363,6 +428,11 @@ struct aws_s3_meta_request *aws_s3_meta_request_auto_ranged_put_new(
goto error_clean_up;
}

if (s_init_and_verify_checksum_config_from_headers(
&auto_ranged_put->base.checksum_config, options->message, (void *)&auto_ranged_put->base)) {
goto error_clean_up;
}

AWS_LOGF_DEBUG(
AWS_LS_S3_META_REQUEST, "id=%p Created new Auto-Ranged Put Meta Request.", (void *)&auto_ranged_put->base);

Expand Down Expand Up @@ -767,7 +837,7 @@ static int s_verify_part_matches_checksum(
}

struct aws_byte_buf checksum;
if (aws_byte_buf_init(&checksum, allocator, aws_get_digest_size_from_algorithm(algorithm))) {
if (aws_byte_buf_init(&checksum, allocator, aws_get_digest_size_from_checksum_algorithm(algorithm))) {
return AWS_OP_ERR;
}

Expand All @@ -776,14 +846,14 @@ static int s_verify_part_matches_checksum(
int return_status = AWS_OP_SUCCESS;

size_t encoded_len = 0;
if (aws_base64_compute_encoded_len(aws_get_digest_size_from_algorithm(algorithm), &encoded_len)) {
if (aws_base64_compute_encoded_len(aws_get_digest_size_from_checksum_algorithm(algorithm), &encoded_len)) {
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST, "Failed to resume upload. Unable to determine length of encoded checksum.");
return_status = aws_raise_error(AWS_ERROR_S3_RESUME_FAILED);
goto on_done;
}

if (aws_checksum_compute(allocator, algorithm, &body_cur, &checksum, 0)) {
if (aws_checksum_compute(allocator, algorithm, &body_cur, &checksum)) {
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST, "Failed to resume upload. Unable to compute checksum for the skipped part.");
return_status = aws_raise_error(AWS_ERROR_S3_RESUME_FAILED);
Expand Down
2 changes: 1 addition & 1 deletion source/s3_checksum_stream.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ static int s_finalize_checksum(struct aws_checksum_stream *impl) {
return AWS_OP_SUCCESS;
}

if (aws_checksum_finalize(impl->checksum, &impl->checksum_result, 0) != AWS_OP_SUCCESS) {
if (aws_checksum_finalize(impl->checksum, &impl->checksum_result) != AWS_OP_SUCCESS) {
AWS_LOGF_ERROR(
AWS_LS_S3_CLIENT,
"Failed to calculate checksum with error code %d (%s).",
Expand Down
Loading