diff --git a/TODO_sse b/TODO_sse new file mode 100644 index 000000000000..c97b9791b3ff --- /dev/null +++ b/TODO_sse @@ -0,0 +1,45 @@ +# TODO migrate test_sse shell script "tests" to functional written in python + tests + +# TODO test_sse tests different ways of passing customer key and customer key + md5 on the command line, but does not test the same for copy source customer + key/md5 (although these two use the same code path for arg processing) + +# TODO update aws s3 {cp,sync} manpages with examples + +# TODO maybe add debug logging for new options and option encrichment, etc.? + +# TODO is there a better way to do the opt parsing within the awscli framework? + +# TODO validate that ServerSideEncryption, SSECustomerAlgorithm, + SSECustomerKeyMD5, and/or SSEKMSKeyId values present in response data after + certain APIs (head_object(), put_object(), get_object(), copy_object(), + create_multipart_upload(), upload_part(), upload_part_copy()) have expected + values given the request (and, not specific to SSE, there is no checking of + these now for ETag, etc.). Approximate source locations as of 2015-08-27 + (these comments were removed afterwards, so source line numbers are slightly + different; look for use of client APIs above): + + awscli/customizations/s3/filegenerator.py + 317: # TODO validate that ServerSideEncryption, SSECustomerAlgorithm, + + awscli/customizations/s3/fileinfo.py + 196: # TODO validate that ServerSideEncryption, SSECustomerAlgorithm, + 292: # TODO validate that ServerSideEncryption, SSECustomerAlgorithm, + 311: # TODO validate that ServerSideEncryption, SSECustomerAlgorithm, + 329: # TODO validate that ServerSideEncryption, SSECustomerAlgorithm, + 372: # TODO validate that ServerSideEncryption, SSECustomerAlgorithm, + + awscli/customizations/s3/tasks.py + 180: # TODO validate that ServerSideEncryption, SSECustomerAlgorithm, + 261: # TODO validate that ServerSideEncryption, SSECustomerAlgorithm, + 401: # TODO validate that ServerSideEncryption, SSECustomerAlgorithm, + 535: # TODO validate that ServerSideEncryption and/or SSEKMSKeyId values + + (complete_multipart_upload() has only ServerSideEncryption and/or + SSEKMSKeyId. The rest have all four.) + +# TODO consider storing md5sum/sha256sum in object metadata (this is not + specific to SSE, but to anything that alters ETag like multipart) so it could + later be used (perhaps for sync) + diff --git a/awscli/customizations/s3/filegenerator.py b/awscli/customizations/s3/filegenerator.py index bc12877c9ffb..fd09caf5efa1 100644 --- a/awscli/customizations/s3/filegenerator.py +++ b/awscli/customizations/s3/filegenerator.py @@ -115,7 +115,9 @@ class FileGenerator(object): ``FileInfo`` objects to send to a ``Comparator`` or ``S3Handler``. """ def __init__(self, client, operation_name, follow_symlinks=True, - page_size=None, result_queue=None): + page_size=None, result_queue=None, + sse_customer_algorithm=None, sse_customer_key=None, + sse_customer_key_md5=None): self._client = client self.operation_name = operation_name self.follow_symlinks = follow_symlinks @@ -123,6 +125,9 @@ def __init__(self, client, operation_name, follow_symlinks=True, self.result_queue = result_queue if not result_queue: self.result_queue = queue.Queue() + self.sse_customer_algorithm = sse_customer_algorithm + self.sse_customer_key = sse_customer_key + self.sse_customer_key_md5 = sse_customer_key_md5 def call(self, files): """ @@ -303,7 +308,12 @@ def _list_single_object(self, s3_path): # instead use a HeadObject request. bucket, key = find_bucket_key(s3_path) try: - response = self._client.head_object(Bucket=bucket, Key=key) + params = {'Bucket': bucket, 'Key': key} + if self.sse_customer_key: + params['SSECustomerAlgorithm'] = self.sse_customer_algorithm + params['SSECustomerKey'] = self.sse_customer_key + params['SSECustomerKeyMD5'] = self.sse_customer_key_md5 + response = self._client.head_object(**params) except ClientError as e: # We want to try to give a more helpful error message. # This is what the customer is going to see so we want to diff --git a/awscli/customizations/s3/fileinfo.py b/awscli/customizations/s3/fileinfo.py index 77c6b2af3fe2..d3ea855c00a6 100644 --- a/awscli/customizations/s3/fileinfo.py +++ b/awscli/customizations/s3/fileinfo.py @@ -35,6 +35,7 @@ def save_file(filename, response_data, last_update, is_stream=False): body = response_data['Body'] etag = response_data['ETag'][1:-1] sse = response_data.get('ServerSideEncryption', None) + sse_customer_algorithm = response_data.get('SSECustomerAlgorithm', None) if not is_stream: d = os.path.dirname(filename) try: @@ -55,7 +56,8 @@ def save_file(filename, response_data, last_update, is_stream=False): with open(filename, 'wb') as out_file: write_to_file(out_file, etag, md5, file_chunks) - if not _is_multipart_etag(etag) and sse != 'aws:kms': + if not _is_multipart_etag(etag) and sse != 'aws:kms' and \ + sse_customer_algorithm is None: if etag != md5.hexdigest(): if not is_stream: os.remove(filename) @@ -189,6 +191,7 @@ def set_size_from_s3(self): bucket, key = find_bucket_key(self.src) params = {'Bucket': bucket, 'Key': key} + self._handle_sse_params(params) response_data = self.client.head_object(**params) self.size = int(response_data['ContentLength']) @@ -215,8 +218,6 @@ def _handle_object_params(self, params): raise ValueError('grants should be of the form ' 'permission=principal') params[self._permission_to_param(permission)] = grantee - if self.parameters['sse']: - params['ServerSideEncryption'] = 'AES256' if self.parameters['storage_class']: params['StorageClass'] = self.parameters['storage_class'][0] if self.parameters['website_redirect']: @@ -238,6 +239,27 @@ def _handle_object_params(self, params): if self.parameters['expires']: params['Expires'] = self.parameters['expires'][0] + def _handle_sse_params(self, params): + if self.parameters['sse_copy_source_customer_key']: + params['CopySourceSSECustomerAlgorithm'] = \ + self.parameters['sse_copy_source_customer_algorithm'] + params['CopySourceSSECustomerKey'] = \ + self.parameters['sse_copy_source_customer_key'] + params['CopySourceSSECustomerKeyMD5'] = \ + self.parameters['sse_copy_source_customer_key_md5'] + if self.parameters['sse_class'] == 'C': + params['SSECustomerAlgorithm'] = \ + self.parameters['sse_customer_algorithm'] + params['SSECustomerKey'] = \ + self.parameters['sse_customer_key'] + params['SSECustomerKeyMD5'] = \ + self.parameters['sse_customer_key_md5'] + if self.parameters['sse_class'] == 'KMS': + params['ServerSideEncryption'] = 'aws:kms' + params['SSEKMSKeyId'] = self.parameters['sse_kms_key_id'] + if self.parameters['sse_class'] == 'S3': + params['ServerSideEncryption'] = 'AES256' + def _handle_metadata_directive(self, params): if self.parameters['metadata_directive']: params['MetadataDirective'] = \ @@ -262,6 +284,7 @@ def _handle_upload(self, body): 'Body': body, } self._handle_object_params(params) + self._handle_sse_params(params) response_data = self.client.put_object(**params) def _inject_content_type(self, params, filename): @@ -277,6 +300,7 @@ def download(self): """ bucket, key = find_bucket_key(self.src) params = {'Bucket': bucket, 'Key': key} + self._handle_sse_params(params) response_data = self.client.get_object(**params) save_file(self.dest, response_data, self.last_update, self.is_stream) @@ -290,8 +314,9 @@ def copy(self): params = {'Bucket': bucket, 'CopySource': copy_source, 'Key': key} self._handle_object_params(params) + self._handle_sse_params(params) self._handle_metadata_directive(params) - self.client.copy_object(**params) + response_data = self.client.copy_object(**params) def delete(self): """ @@ -325,6 +350,11 @@ def create_multipart_upload(self): bucket, key = find_bucket_key(self.dest) params = {'Bucket': bucket, 'Key': key} self._handle_object_params(params) + self._handle_sse_params(params) + params = params.copy() + params.pop('CopySourceSSECustomerAlgorithm', None) + params.pop('CopySourceSSECustomerKey', None) + params.pop('CopySourceSSECustomerKeyMD5', None) response_data = self.client.create_multipart_upload(**params) upload_id = response_data['UploadId'] return upload_id diff --git a/awscli/customizations/s3/s3handler.py b/awscli/customizations/s3/s3handler.py index fd9d9beb9716..bf4a9643316b 100644 --- a/awscli/customizations/s3/s3handler.py +++ b/awscli/customizations/s3/s3handler.py @@ -55,15 +55,33 @@ def __init__(self, session, params, result_queue=None, self.result_queue = result_queue if not self.result_queue: self.result_queue = queue.Queue() - self.params = {'dryrun': False, 'quiet': False, 'acl': None, - 'guess_mime_type': True, 'sse': False, - 'storage_class': None, 'website_redirect': None, - 'content_type': None, 'cache_control': None, - 'content_disposition': None, 'content_encoding': None, - 'content_language': None, 'expires': None, - 'grants': None, 'only_show_errors': False, - 'is_stream': False, 'paths_type': None, - 'expected_size': None, 'metadata_directive': None} + self.params = {'dryrun': False, + 'quiet': False, + 'acl': None, + 'guess_mime_type': True, + 'sse_copy_source_customer_algorithm': None, + 'sse_copy_source_customer_key': None, + 'sse_copy_source_customer_key_md5': None, + 'sse': False, + 'sse_class': None, + 'sse_customer_algorithm': None, + 'sse_customer_key': None, + 'sse_customer_key_md5': None, + 'sse_kms_key_id': None, + 'storage_class': None, + 'website_redirect': None, + 'content_type': None, + 'cache_control': None, + 'content_disposition': None, + 'content_encoding': None, + 'content_language': None, + 'expires': None, + 'grants': None, + 'only_show_errors': False, + 'is_stream': False, + 'paths_type': None, + 'expected_size': None, + 'metadata_directive': None} self.params['region'] = params['region'] for key in self.params.keys(): if key in params: @@ -269,7 +287,8 @@ def _do_enqueue_range_download_tasks(self, filename, chunksize, task = tasks.DownloadPartTask( part_number=i, chunk_size=chunksize, result_queue=self.result_queue, filename=filename, - context=context, io_queue=self.write_queue) + context=context, io_queue=self.write_queue, + params=self.params) self.executor.submit(task) def _enqueue_multipart_upload_tasks(self, filename, @@ -332,7 +351,8 @@ def _enqueue_upload_single_part_task(self, part_number, chunk_size, payload=None): kwargs = {'part_number': part_number, 'chunk_size': chunk_size, 'result_queue': self.result_queue, - 'upload_context': upload_context, 'filename': filename} + 'upload_context': upload_context, 'filename': filename, + 'params': self.params} if payload: kwargs['payload'] = payload task = task_class(**kwargs) diff --git a/awscli/customizations/s3/subcommands.py b/awscli/customizations/s3/subcommands.py index 72a43c1232e2..e184834f5ab9 100644 --- a/awscli/customizations/s3/subcommands.py +++ b/awscli/customizations/s3/subcommands.py @@ -10,6 +10,8 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +import hashlib +import logging import os import sys @@ -32,6 +34,7 @@ SizeAndLastModifiedSync, NeverSync from awscli.customizations.s3 import transferconfig +LOG = logging.getLogger(__name__) RECURSIVE = {'name': 'recursive', 'action': 'store_true', 'dest': 'dir_op', 'help_text': ( @@ -157,9 +160,93 @@ 'UsingAuthAccess.html">Access Control')} -SSE = {'name': 'sse', 'action': 'store_true', - 'help_text': ( - "Enable Server Side Encryption of the object in S3")} +SSE_COPY_SOURCE_CUSTOMER_ALGORITHM = {'name': 'sse-copy-source-customer-algorithm', + 'action': 'store', + 'choices': ['AES256'], + 'default': 'AES256', + 'help_text': ('''\ +The server-side encryption algorithm to use with the customer-specified key for \ +the source object. When present this must be "AES256". This argument is only \ +valid when doing s3 -> s3 operations.\ +''')} + + +SSE_COPY_SOURCE_CUSTOMER_KEY = {'name': 'sse-copy-source-customer-key', + 'action': 'store', + 'help_text': ('''\ +The customer-controlled encryption key used to server-side encrypt the source \ +object of an s3 -> s3 operation. This argument is only valid when doing s3 -> \ +s3 operations. See --sse-customer-key for how arg values are interpretted.\ +''')} + + +SSE_COPY_SOURCE_CUSTOMER_KEY_MD5 = {'name': 'sse-copy-source-customer-key-md5', + 'action': 'store', + 'help_text': ('''\ +The base64 encoded raw 128 bit MD5 digest of the server-side encryption copy \ +source customer key. Note: This is NOT the base64 encoding of the hex \ +(printable) MD5 digest.\ +''')} + + +SSE = {'name': 'sse', + 'action': 'store_true', + 'help_text': ('''\ +Enable Server Side Encryption of the object in S3. Using this arg will enable \ +SSE-S3 by default if no other --sse-* args are specified.\ +''')} + + +SSE_CLASS = {'name': 'sse-class', + 'action': 'store', + 'choices': ['C', 'KMS', 'S3'], + 'default': None, + 'help_text': ('''\ +The class of Server Side Encryption to use. Valid values are "C", "KMS", and \ +"S3". This argument is optional since it can be inferred from the presence or \ +absence of other --sse-* arguments. Note that KMS requires Signature v4, \ +which needs to be enabled via aws configure. +''')} + + +SSE_CUSTOMER_ALGORITHM = {'name': 'sse-customer-algorithm', + 'action': 'store', + 'choices': ['AES256'], + 'default': 'AES256', + 'help_text': ('''\ +The server-side encryption algorithm to use with the customer-specified key. \ +When present this must be "AES256".\ +''')} + + +SSE_CUSTOMER_KEY = {'name': 'sse-customer-key', + 'action': 'store', + 'help_text': ('''\ +The customer-controlled encryption key to use to server-side encrypt the \ +destination object in S3. Can be specified as a string (insecure) or a file URL \ +(more secure). When --customer-key-md5 is not specified this value is \ +interpretted as raw key bytes and base64 encoded for use in the S3 API. When \ +--customer-key-md5 is specified this value is not encoded at all, and must \ +already be in a form expected by the API (base64 encoded). The unencoded value \ +must be 32 bytes (256 bits) in length.\ +''')} + + +SSE_CUSTOMER_KEY_MD5 = {'name': 'sse-customer-key-md5', + 'action': 'store', + 'help_text': ('''\ +The base64-encoded raw 128 bit MD5 digest of the customer key. Note: This is \ +NOT the base64 encoding of the hex (printable) MD5 digest.\ +''')} + + +SSE_KMS_KEY_ID = {'name': 'sse-kms-key-id', + 'action': 'store', + 'help_text': ('''\ +The AWS KMS key ID that should be used to server-side encrypt the destination \ +object. Note that KMS requires Signature v4, which needs to be enabled via \ +aws configure.\ +''')} STORAGE_CLASS = {'name': 'storage-class', 'nargs': 1, @@ -282,15 +369,36 @@ 'operation. The default value is 1000 (the maximum allowed). ' 'Using a lower value may help if an operation times out.')} - -TRANSFER_ARGS = [DRYRUN, QUIET, INCLUDE, EXCLUDE, ACL, - FOLLOW_SYMLINKS, NO_FOLLOW_SYMLINKS, NO_GUESS_MIME_TYPE, - SSE, STORAGE_CLASS, GRANTS, WEBSITE_REDIRECT, CONTENT_TYPE, - CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, - CONTENT_LANGUAGE, EXPIRES, SOURCE_REGION, ONLY_SHOW_ERRORS, +TRANSFER_ARGS = [DRYRUN, + QUIET, + INCLUDE, + EXCLUDE, + ACL, + FOLLOW_SYMLINKS, + NO_FOLLOW_SYMLINKS, + NO_GUESS_MIME_TYPE, + SSE, + SSE_CLASS, + SSE_COPY_SOURCE_CUSTOMER_ALGORITHM, + SSE_COPY_SOURCE_CUSTOMER_KEY, + SSE_COPY_SOURCE_CUSTOMER_KEY_MD5, + SSE_CUSTOMER_ALGORITHM, + SSE_CUSTOMER_KEY, + SSE_CUSTOMER_KEY_MD5, + SSE_KMS_KEY_ID, + STORAGE_CLASS, + GRANTS, + WEBSITE_REDIRECT, + CONTENT_TYPE, + CACHE_CONTROL, + CONTENT_DISPOSITION, + CONTENT_ENCODING, + CONTENT_LANGUAGE, + EXPIRES, + SOURCE_REGION, + ONLY_SHOW_ERRORS, PAGE_SIZE] - def get_client(session, region, endpoint_url, verify): return session.create_client('s3', region_name=region, endpoint_url=endpoint_url, verify=verify) @@ -496,6 +604,94 @@ def _run_main(self, parsed_args, parsed_globals): cmd_params.add_page_size(parsed_args) cmd_params.add_paths(parsed_args.paths) self._handle_rm_force(parsed_globals, cmd_params.parameters) + + params = cmd_params.parameters # alias + + if 'sse' in params: + # fixup SSE-related params; someone with better knowledge of the + # awscli command line argument subsystem may know a better place + # for this, but this gets the job done + + if params['sse'] and params['sse_class'] is None: + params['sse_class'] = 'S3' + + if params['sse_class'] is not None and not params['sse']: + params['sse'] = True + + if params['sse_class'] is None and \ + params['sse_customer_key'] is not None: + params['sse_class'] = 'C' + + if params['sse_class'] is None and \ + params['sse_kms_key_id'] is not None: + params['sse_class'] = 'KMS' + + if params['sse_class'] == 'C' and \ + params['sse_customer_key'] is None: + raise ValueError('must specify --sse-customer-key') + + if params['sse_class'] == 'KMS' and \ + params['sse_kms_key_id'] is None: + raise ValueError('must specify --sse-kms-key-id') + + if params['sse_class'] == 'S3' and \ + (params['sse_customer_key'] is not None or + params['sse_kms_key_id'] is not None): + raise ValueError('extraneous keys specified') + + if params['sse_copy_source_customer_key'] is not None and \ + params['paths_type'] != 's3s3': + raise ValueError('--sse-copy-source-customer-key is only ' + 'valid for s3 source and s3 target') + + # There's an inconsistency in boto/botocore and the aws s3api + # subcommand regarding how SSECustomerKey, SSECustomerKeyMD5, + # CopySourceSSECustomerKey, and CopySourceSSECustomerKeyMD5 are + # treated. SSECustomerKey is base64 encoded for you by botocore as + # long as you don't set SSECustomerKeyMD5 (no matter what the + # encoding of SSECustomerKey already is). CopySource* are never + # transformed for you in any way by botocore. So in our case, let's + # transform both SSECustomerKey and CopySourceSSECustomerKey + # depending on the presence of the corresponding *MD5. + + for k, m in (('sse_copy_source_customer_key', + 'sse_copy_source_customer_key_md5'), + ('sse_customer_key', + 'sse_customer_key_md5')): + if params[k] is None: + continue + + if params[m] is not None: + # this means that both params[k] and params[m] are + # specified, and assumed to be base64 encoded already + + rawkey = params[k].decode('base64') + + if len(rawkey) != 32: + raise ValueError('%s value is not 32 bytes' % k) + + md5b64 = hashlib.md5(rawkey).digest().encode('base64')\ + .strip() + + if params[m] != md5b64: + raise ValueError('md5 of %s does not match %s' % + (k, m)) + + else: + # params[m] was not specified, so this means that params[k] + # is assumed to be raw bytes. It needs base64 encoding, and + # the base64 encoded raw digest needs to be set. + + if len(params[k]) != 32: + raise ValueError('%s value is not 32 bytes' % k) + + md5b64 = hashlib.md5(params[k]).digest().encode('base64')\ + .strip() + params[k] = params[k].encode('base64').strip() + params[m] = md5b64 + + LOG.debug('S3 params %r', cmd_params.parameters) + runtime_config = transferconfig.RuntimeConfig().build_config( **self._session.get_scoped_config().get('s3', {})) cmd = CommandArchitecture(self._session, self.NAME, @@ -731,6 +927,45 @@ def run(self): files = FileFormat().format(src, dest, self.parameters) rev_files = FileFormat().format(dest, src, self.parameters) + fgen_sse_customer_algorithm = None + fgen_sse_customer_key = None + fgen_sse_customer_key_md5 = None + rgen_sse_customer_algorithm = None + rgen_sse_customer_key = None + rgen_sse_customer_key_md5 = None + + if paths_type == 's3': + fgen_sse_customer_algorithm = \ + self.parameters.get('sse_customer_algorithm', None) + fgen_sse_customer_key = \ + self.parameters.get('sse_customer_key', None) + fgen_sse_customer_key_md5 = \ + self.parameters.get('sse_customer_key_md5', None) + elif paths_type == 's3s3': + fgen_sse_customer_algorithm = \ + self.parameters.get('sse_copy_source_customer_algorithm', None) + fgen_sse_customer_key = \ + self.parameters.get('sse_copy_source_customer_key', None) + fgen_sse_customer_key_md5 = \ + self.parameters.get('sse_copy_source_customer_key_md5', None) + rgen_sse_customer_algorithm = \ + self.parameters.get('sse_customer_algorithm', None) + rgen_sse_customer_key = \ + self.parameters.get('sse_customer_key', None) + rgen_sse_customer_key_md5 = \ + self.parameters.get('sse_customer_key_md5', None) + elif paths_type == 's3local': + fgen_sse_customer_algorithm = \ + self.parameters.get('sse_customer_algorithm', None) + fgen_sse_customer_key = \ + self.parameters.get('sse_customer_key', None) + fgen_sse_customer_key_md5 = \ + self.parameters.get('sse_customer_key_md5', None) + elif paths_type == 'locals3': + pass + else: + raise ValueError('unknown paths_type value') + cmd_translation = {} cmd_translation['locals3'] = {'cp': 'upload', 'sync': 'upload', 'mv': 'move'} @@ -748,11 +983,17 @@ def run(self): operation_name, self.parameters['follow_symlinks'], self.parameters['page_size'], - result_queue=result_queue) + result_queue=result_queue, + sse_customer_algorithm=fgen_sse_customer_algorithm, + sse_customer_key=fgen_sse_customer_key, + sse_customer_key_md5=fgen_sse_customer_key_md5) rev_generator = FileGenerator(self._client, '', self.parameters['follow_symlinks'], self.parameters['page_size'], - result_queue=result_queue) + result_queue=result_queue, + sse_customer_algorithm=rgen_sse_customer_algorithm, + sse_customer_key=rgen_sse_customer_key, + sse_customer_key_md5=rgen_sse_customer_key_md5) taskinfo = [TaskInfo(src=files['src']['path'], src_type='s3', operation_name=operation_name, diff --git a/awscli/customizations/s3/tasks.py b/awscli/customizations/s3/tasks.py index 59a649f645fe..59e7179d2826 100644 --- a/awscli/customizations/s3/tasks.py +++ b/awscli/customizations/s3/tasks.py @@ -126,12 +126,13 @@ def _queue_print_message(self, filename, failed, dryrun, class CopyPartTask(OrderableTask): def __init__(self, part_number, chunk_size, - result_queue, upload_context, filename): + result_queue, upload_context, filename, params): self._result_queue = result_queue self._upload_context = upload_context self._part_number = part_number self._chunk_size = chunk_size self._filename = filename + self._params = params def _is_last_part(self, part_number): return self._part_number == int( @@ -161,6 +162,20 @@ def __call__(self): 'UploadId': upload_id, 'CopySource': '%s/%s' % (src_bucket, src_key), 'CopySourceRange': range_param} + if self._params['sse_copy_source_customer_key']: + params['CopySourceSSECustomerAlgorithm'] = \ + self._params['sse_copy_source_customer_algorithm'] + params['CopySourceSSECustomerKey'] = \ + self._params['sse_copy_source_customer_key'] + params['CopySourceSSECustomerKeyMD5'] = \ + self._params['sse_copy_source_customer_key_md5'] + if self._params['sse_customer_key']: + params['SSECustomerAlgorithm'] = \ + self._params['sse_customer_algorithm'] + params['SSECustomerKey'] = \ + self._params['sse_customer_key'] + params['SSECustomerKeyMD5'] = \ + self._params['sse_customer_key_md5'] response_data = self._filename.client.upload_part_copy(**params) etag = response_data['CopyPartResult']['ETag'][1:-1] self._upload_context.announce_finished_part( @@ -199,12 +214,13 @@ class UploadPartTask(OrderableTask): object. """ def __init__(self, part_number, chunk_size, result_queue, upload_context, - filename, payload=None): + filename, params, payload=None): self._result_queue = result_queue self._upload_context = upload_context self._part_number = part_number self._chunk_size = chunk_size self._filename = filename + self._params = params self._payload = payload def _read_part(self): @@ -231,6 +247,10 @@ def __call__(self): 'PartNumber': self._part_number, 'UploadId': upload_id, 'Body': body} + if self._params['sse_customer_key']: + params['SSECustomerAlgorithm'] = self._params['sse_customer_algorithm'] + params['SSECustomerKey'] = self._params['sse_customer_key'] + params['SSECustomerKeyMD5'] = self._params['sse_customer_key_md5'] try: response_data = self._filename.client.upload_part(**params) finally: @@ -329,7 +349,7 @@ class DownloadPartTask(OrderableTask): TOTAL_ATTEMPTS = 5 def __init__(self, part_number, chunk_size, result_queue, - filename, context, io_queue): + filename, context, io_queue, params): self._part_number = part_number self._chunk_size = chunk_size self._result_queue = result_queue @@ -337,6 +357,7 @@ def __init__(self, part_number, chunk_size, result_queue, self._client = filename.client self._context = context self._io_queue = io_queue + self._params = params def __call__(self): try: @@ -359,7 +380,13 @@ def _download_part(self): LOGGER.debug("Downloading bytes range of %s for file %s", range_param, self._filename.dest) bucket, key = find_bucket_key(self._filename.src) - params = {'Bucket': bucket, 'Key': key, 'Range': range_param} + params = {'Bucket': bucket, + 'Key': key, + 'Range': range_param} + if self._params['sse_customer_key']: + params['SSECustomerAlgorithm'] = self._params['sse_customer_algorithm'] + params['SSECustomerKey'] = self._params['sse_customer_key'] + params['SSECustomerKeyMD5'] = self._params['sse_customer_key_md5'] for i in range(self.TOTAL_ATTEMPTS): try: LOGGER.debug("Making GetObject requests with byte range: %s", @@ -494,7 +521,8 @@ def __call__(self): 'MultipartUpload': {'Parts': parts}, } try: - self.filename.client.complete_multipart_upload(**params) + response_data = self.filename.client.complete_multipart_upload( + **params) except Exception as e: LOGGER.debug("Error trying to complete multipart upload: %s", e, exc_info=True) diff --git a/tests/functional/s3/test_sse b/tests/functional/s3/test_sse new file mode 100755 index 000000000000..58406a6a8d97 --- /dev/null +++ b/tests/functional/s3/test_sse @@ -0,0 +1,450 @@ +#!/bin/bash + +# Script to test SSE-C, SSE-KMS, and SSE-S3 support of the aws s3 cp and sync +# subcommands. +# +# Usage example: +# +# $ BUCKET=yourbucketname KEYID0=yourkmskeyid0 KEYID1=yourkmskeyid1 \ +# DOWNLOAD=false tests/functional/s3/test_sse + +set -e +set -u +set -x + +EXTRAOPTS=${EXTRAOPTS:-} +AWS="aws --endpoint-url https://s3-external-1.amazonaws.com" +DOWNLOAD=${DOWNLOAD:-false} + +BUCKET=${BUCKET:?} +BUCKETURL=s3://$BUCKET/test_sse +CKEY0=${CKEY0:-00000000000000000000000000000000} # this is a horrible key just for testing purposes +CKEY0MD5B64=$(python -c "import hashlib; print hashlib.md5('$CKEY0').digest().encode('base64'),") +CKEY1=${CKEY1:-00000000000000000000000000000001} # this is a horrible key just for testing purposes +CKEY1MD5B64=$(python -c "import hashlib; print hashlib.md5('$CKEY1').digest().encode('base64'),") +KEYID0=${KEYID0:?} +KEYID1=${KEYID1:?} + +TMPDIR=$(mktemp -d) + +cleanup () { + rm -rf $TMPDIR +} + +trap cleanup EXIT + +cd $TMPDIR +mkdir -p $TMPDIR/.aws +mkdir -p $TMPDIR/.key +mkdir -p $TMPDIR/copy +mkdir -p $TMPDIR/head +mkdir -p $TMPDIR/local +mkdir -p $TMPDIR/remote +mkdir -p $TMPDIR/sync +mkdir -p $TMPDIR/synccopy + +echo -n $CKEY0 > $TMPDIR/.key/ckey0_raw +base64 -w 0 $TMPDIR/.key/ckey0_raw > $TMPDIR/.key/ckey0_b64 +echo -n $CKEY0MD5B64 > $TMPDIR/.key/ckey0_md5_b64 + +dd if=/dev/urandom of=local/s3_singlepart bs=4K count=1 +dd if=/dev/urandom of=local/s3_multipart bs=5M count=2 +dd if=/dev/urandom of=local/sse-c_singlepart bs=4K count=1 +dd if=/dev/urandom of=local/sse-c_multipart bs=5M count=2 +dd if=/dev/urandom of=local/sse-kms_singlepart bs=4K count=1 +dd if=/dev/urandom of=local/sse-kms_multipart bs=5M count=2 +dd if=/dev/urandom of=local/sse-s3_singlepart bs=4K count=1 +dd if=/dev/urandom of=local/sse-s3_multipart bs=5M count=2 + +export AWS_CONFIG_FILE=$TMPDIR/.aws/config +cat > $TMPDIR/.aws/config <> $TMPDIR/.aws/config < $head_out + + if [[ $td == s3 ]]; then + if $DOWNLOAD && [[ ! $dst =~ .*multipart ]]; then + test "$(cat $head_out | jq -r '.ETag' | tr -d '\"')" == $(md5sum $head_cmp | cut -d ' ' -f 1) + fi + elif [[ $td == sse-c ]]; then + test "$(cat $head_out | jq -r '.SSECustomerAlgorithm')" == "AES256" + test "$(cat $head_out | jq -r '.SSECustomerKeyMD5')" == $dst_key_md5_b64 + elif [[ $td == sse-kms ]]; then + test "$(cat $head_out | jq -r '.ServerSideEncryption')" == "aws:kms" + test "$(cat $head_out | jq -r '.SSEKMSKeyId' | cut -d '/' -f 2)" == $dst_key + elif [[ $td == sse-s3 ]]; then + test "$(cat $head_out | jq -r '.ServerSideEncryption')" == "AES256" + else + exit 1 + fi + + # optionally download what was uploaded/copied and compare + + if $DOWNLOAD; then + if [[ $op == sync ]]; then + mkdir -p $dst$suffix + fi + + set +e + $AWS $EXTRAOPTS s3 $op $sse_down_opts $BUCKETURL/$dst$suffix $dst$suffix + EC=$? + set -e + test $EC -eq 0 -o $EC -eq 2 + + diff -rq $src$suffix $dst$suffix + fi + + # cleanup tracing + + if ! $nosetx; then + set +x + fi +} + + +######## +# copy # +######## + +# cp: file --> s3, s3 --> file + +test_s3 'cp' 'file' 's3' 'local/s3_singlepart' 'remote/s3_singlepart' +test_s3 'cp' 'file' 's3' 'local/s3_multipart' 'remote/s3_multipart' + +# cp: file --> sse-c, sse-c --> file + +test_s3 'cp' 'file' 'sse-c' 'local/sse-c_singlepart' 'remote/sse-c_singlepart' +# ... test different forms of specifying key arguments +test_s3 'cp' 'file' 'sse-c' 'local/sse-c_singlepart' 'remote/sse-c_singlepart' file://$TMPDIR/.key/ckey0_raw $CKEY0MD5B64 +test_s3 'cp' 'file' 'sse-c' 'local/sse-c_singlepart' 'remote/sse-c_singlepart' file://$TMPDIR/.key/ckey0_b64 $CKEY0MD5B64 $CKEY0MD5B64 +test_s3 'cp' 'file' 'sse-c' 'local/sse-c_singlepart' 'remote/sse-c_singlepart' file://$TMPDIR/.key/ckey0_b64 $CKEY0MD5B64 file://$TMPDIR/.key/ckey0_md5_b64 +# ... multipart +test_s3 'cp' 'file' 'sse-c' 'local/sse-c_multipart' 'remote/sse-c_multipart' + +# cp: file --> sse-kms, sse-kms --> file + +test_s3 'cp' 'file' 'sse-kms' 'local/sse-kms_singlepart' 'remote/sse-kms_singlepart' +test_s3 'cp' 'file' 'sse-kms' 'local/sse-kms_multipart' 'remote/sse-kms_multipart' + +# cp: file --> sse-s3, sse-s3 --> file + +test_s3 'cp' 'file' 'sse-s3' 'local/sse-s3_singlepart' 'remote/sse-s3_singlepart' +test_s3 'cp' 'file' 'sse-s3' 'local/sse-s3_multipart' 'remote/sse-s3_multipart' + +# cp: s3 --> s3 + +test_s3 'cp' 's3' 's3' 'remote/s3_singlepart' 'copy/s3_s3_singlepart' +test_s3 'cp' 's3' 's3' 'remote/s3_multipart' 'copy/s3_s3_multipart' + +# cp: s3 --> sse-c + +test_s3 'cp' 's3' 'sse-c' 'remote/s3_singlepart' 'copy/s3_sse-c_siglepart' +test_s3 'cp' 's3' 'sse-c' 'remote/s3_multipart' 'copy/s3_sse-c_multipart' + +# cp: s3 --> sse-kms + +test_s3 'cp' 's3' 'sse-kms' 'remote/s3_singlepart' 'copy/s3_sse-kms_singlepart' +test_s3 'cp' 's3' 'sse-kms' 'remote/s3_multipart' 'copy/s3_sse-kms_multipart' + +# cp: s3 --> sse-s3 + +test_s3 'cp' 's3' 'sse-s3' 'remote/s3_singlepart' 'copy/s3_sse-s3_singlepart' +test_s3 'cp' 's3' 'sse-s3' 'remote/s3_multipart' 'copy/s3_sse-s3_multipart' + +# cp: sse-c --> s3 + +test_s3 'cp' 'sse-c' 's3' 'remote/sse-c_singlepart' 'copy/sse-c_s3_singlepart' +test_s3 'cp' 'sse-c' 's3' 'remote/sse-c_multipart' 'copy/sse-c_s3_multipart' + +# cp: sse-c --> sse-c (same key) + +test_s3 'cp' 'sse-c' 'sse-c' 'remote/sse-c_singlepart' 'copy/sse-c_sse-c_same_singlepart' +test_s3 'cp' 'sse-c' 'sse-c' 'remote/sse-c_multipart' 'copy/sse-c_sse-c_same_multipart' + +# cp: sse-c --> sse-c (diff key) + +test_s3 'cp' 'sse-c' 'sse-c' 'remote/sse-c_singlepart' 'copy/sse-c_sse-c_diff_singlepart' $CKEY1 $CKEY1MD5B64 +test_s3 'cp' 'sse-c' 'sse-c' 'remote/sse-c_multipart' 'copy/sse-c_sse-c_diff_multipart' $CKEY1 $CKEY1MD5B64 + +# cp: sse-c --> sse-kms + +test_s3 'cp' 'sse-c' 'sse-kms' 'remote/sse-c_singlepart' 'copy/sse-c_sse-kms_singlepart' +test_s3 'cp' 'sse-c' 'sse-kms' 'remote/sse-c_multipart' 'copy/sse-c_sse-kms_multipart' + +# cp: sse-c --> sse-s3 + +test_s3 'cp' 'sse-c' 'sse-s3' 'remote/sse-c_singlepart' 'copy/sse-c_sse-s3_singlepart' +test_s3 'cp' 'sse-c' 'sse-s3' 'remote/sse-c_multipart' 'copy/sse-c_sse-s3_multipart' + +# cp: sse-kms --> s3 + +test_s3 'cp' 'sse-kms' 's3' 'remote/sse-kms_singlepart' 'copy/sse-kms_s3_singlepart' +test_s3 'cp' 'sse-kms' 's3' 'remote/sse-kms_multipart' 'copy/sse-kms_s3_multipart' + +# cp: sse-kms --> sse-c + +test_s3 'cp' 'sse-kms' 'sse-c' 'remote/sse-kms_singlepart' 'copy/sse-kms_sse-c_singlepart' +test_s3 'cp' 'sse-kms' 'sse-c' 'remote/sse-kms_multipart' 'copy/sse-kms_sse-c_multipart' + +# cp: sse-kms --> sse-kms (same key) + +test_s3 'cp' 'sse-kms' 'sse-kms' 'remote/sse-kms_singlepart' 'copy/sse-kms_sse-kms_same_singlepart' +test_s3 'cp' 'sse-kms' 'sse-kms' 'remote/sse-kms_multipart' 'copy/sse-kms_sse-kms_same_multipart' + +# cp: sse-kms --> sse-kms (diff key) + +test_s3 'cp' 'sse-kms' 'sse-kms' 'remote/sse-kms_singlepart' 'copy/sse-kms_sse-kms_diff_singlepart' $KEYID1 +test_s3 'cp' 'sse-kms' 'sse-kms' 'remote/sse-kms_multipart' 'copy/sse-kms_sse-kms_diff_multipart' $KEYID1 + +# cp: sse-kms --> sse-s3 + +test_s3 'cp' 'sse-kms' 'sse-s3' 'remote/sse-kms_singlepart' 'copy/sse-kms_sse-s3_singlepart' +test_s3 'cp' 'sse-kms' 'sse-s3' 'remote/sse-kms_multipart' 'copy/sse-kms_sse-s3_multipart' + +# cp: sse-s3 --> s3 + +test_s3 'cp' 'sse-s3' 's3' 'remote/sse-s3_singlepart' 'copy/sse-s3_s3_singlepart' +test_s3 'cp' 'sse-s3' 's3' 'remote/sse-s3_multipart' 'copy/sse-s3_s3_multipart' + +# cp: sse-s3 --> sse-c + +test_s3 'cp' 'sse-s3' 'sse-c' 'remote/sse-s3_singlepart' 'copy/sse-s3_sse-c_singlepart' +test_s3 'cp' 'sse-s3' 'sse-c' 'remote/sse-s3_multipart' 'copy/sse-s3_sse-c_multipart' + +# cp: sse-s3 --> sse-kms + +test_s3 'cp' 'sse-s3' 'sse-kms' 'remote/sse-s3_singlepart' 'copy/sse-s3_sse-kms_singlepart' +test_s3 'cp' 'sse-s3' 'sse-kms' 'remote/sse-s3_multipart' 'copy/sse-s3_sse-kms_multipart' + +# cp: sse-s3 --> sse-s3 + +test_s3 'cp' 'sse-s3' 'sse-s3' 'remote/sse-s3_singlepart' 'copy/sse-s3_sse-s3_singlepart' +test_s3 'cp' 'sse-s3' 'sse-s3' 'remote/sse-s3_multipart' 'copy/sse-s3_sse-s3_multipart' + +######## +# sync # +######## + +# sync: dir --> s3, s3 --> dir + +test_s3 'sync' 'dir' 's3' 'local' 'sync/dir_s3' + +# sync: dir --> sse-c, sse-c --> dir + +test_s3 'sync' 'dir' 'sse-c' 'local' 'sync/dir_sse-c' + +# sync: dir --> sse-kms, sse-kms --> dir + +test_s3 'sync' 'dir' 'sse-kms' 'local' 'sync/dir_sse-kms' + +# sync: dir --> sse-s3, sse-s3 --> dir + +test_s3 'sync' 'dir' 'sse-s3' 'local' 'sync/dir_sse-s3' + +# sync: s3 --> s3 + +test_s3 'sync' 's3' 's3' 'sync/dir_s3' 'copysync/s3_s3' + +# sync: s3 --> sse-c + +test_s3 'sync' 's3' 'sse-c' 'sync/dir_s3' 'copysync/s3_sse-c' + +# sync: s3 --> sse-kms + +test_s3 'sync' 's3' 'sse-kms' 'sync/dir_s3' 'copysync/s3_sse-kms' + +# sync: s3 --> sse-s3 + +test_s3 'sync' 's3' 'sse-s3' 'sync/dir_s3' 'copysync/s3_sse-s3' + +# sync: sse-c --> s3 + +test_s3 'sync' 'sse-c' 's3' 'sync/dir_sse-c' 'copysync/sse-c_s3' + +# sync: sse-c --> sse-c (same key) + +test_s3 'sync' 'sse-c' 'sse-c' 'sync/dir_sse-c' 'copysync/sse-c_sse-c_same' + +# sync: sse-c --> sse-c (diff key) + +test_s3 'sync' 'sse-c' 'sse-c' 'sync/dir_sse-c' 'copysync/sse-c_sse-c_diff' $CKEY1 $CKEY1MD5B64 + +# sync: sse-c --> sse-kms + +test_s3 'sync' 'sse-c' 'sse-kms' 'sync/dir_sse-c' 'copysync/sse-c_sse-kms' + +# sync: sse-c --> sse-s3 + +test_s3 'sync' 'sse-c' 'sse-s3' 'sync/dir_sse-c' 'copysync/sse-c_sse-s3' + +# sync: sse-kms --> s3 + +test_s3 'sync' 'sse-kms' 's3' 'sync/dir_sse-kms' 'copysync/sse-kms_s3' + +# sync: sse-kms --> sse-c + +test_s3 'sync' 'sse-kms' 'sse-c' 'sync/dir_sse-kms' 'copysync/sse-kms_sse-c' + +# sync: sse-kms --> sse-kms (same key) + +test_s3 'sync' 'sse-kms' 'sse-kms' 'sync/dir_sse-kms' 'copysync/sse-kms_sse-kms_same' + +# sync: sse-kms --> sse-kms (diff key) + +test_s3 'sync' 'sse-kms' 'sse-kms' 'sync/dir_sse-kms' 'copysync/sse-kms_sse-kms_diff' $KEYID1 + +# sync: sse-kms --> sse-s3 + +test_s3 'sync' 'sse-kms' 'sse-s3' 'sync/dir_sse-kms' 'copysync/sse-kms_sse-s3' + +# sync: sse-s3 --> s3 + +test_s3 'sync' 'sse-s3' 's3' 'sync/dir_sse-s3' 'copysync/sse-s3_s3' + +# sync: sse-s3 --> sse-c + +test_s3 'sync' 'sse-s3' 'sse-c' 'sync/dir_sse-s3' 'copysync/sse-s3_sse-c' + +# sync: sse-s3 --> sse-kms + +test_s3 'sync' 'sse-s3' 'sse-kms' 'sync/dir_sse-s3' 'copysync/sse-s3_sse-kms' + +# sync: sse-s3 --> sse-s3 + +test_s3 'sync' 'sse-s3' 'sse-s3' 'sync/dir_sse-s3' 'copysync/sse-s3_sse-s3' + +echo done + diff --git a/tests/unit/customizations/s3/test_fileinfo.py b/tests/unit/customizations/s3/test_fileinfo.py index 7695a217f7da..c0ee53593a81 100644 --- a/tests/unit/customizations/s3/test_fileinfo.py +++ b/tests/unit/customizations/s3/test_fileinfo.py @@ -99,6 +99,6 @@ class TestSetSizeFromS3(unittest.TestCase): def test_set_size_from_s3(self): client = mock.Mock() client.head_object.return_value = {'ContentLength': 5} - file_info = FileInfo(src="bucket/key", client=client) + file_info = FileInfo(src="bucket/key", client=client, parameters=mock.MagicMock()) file_info.set_size_from_s3() self.assertEqual(file_info.size, 5) diff --git a/tests/unit/customizations/s3/test_tasks.py b/tests/unit/customizations/s3/test_tasks.py index b30267bec069..7d805175cc53 100644 --- a/tests/unit/customizations/s3/test_tasks.py +++ b/tests/unit/customizations/s3/test_tasks.py @@ -340,12 +340,13 @@ def setUp(self): self.filename.operation_name = 'download' self.context = mock.Mock() self.open = mock.MagicMock() + self.params = mock.MagicMock() def test_socket_timeout_is_retried(self): self.client.get_object.side_effect = socket.error task = DownloadPartTask(0, 1024 * 1024, self.result_queue, self.filename, self.context, - self.io_queue) + self.io_queue, self.params) # The mock is configured to keep raising a socket.error # so we should cancel the download. with self.assertRaises(RetriesExeededError): @@ -362,7 +363,7 @@ def test_download_succeeds(self): socket.error, {'Body': body}] task = DownloadPartTask(0, 1024 * 1024, self.result_queue, self.filename, self.context, - self.io_queue) + self.io_queue, self.params) task() self.assertEqual(self.result_queue.put.call_count, 1) # And we tried twice, the first one failed, the second one @@ -375,7 +376,7 @@ def test_download_queues_io_properly(self): self.client.get_object.side_effect = [{'Body': body}] task = DownloadPartTask(0, 1024 * 1024, self.result_queue, self.filename, self.context, - self.io_queue) + self.io_queue, self.params) task() call_args_list = self.io_queue.put.call_args_list self.assertEqual(len(call_args_list), 2) @@ -389,7 +390,7 @@ def test_incomplete_read_is_retried(self): IncompleteReadError(actual_bytes=1, expected_bytes=2) task = DownloadPartTask(0, 1024 * 1024, self.result_queue, self.filename, - self.context, self.io_queue) + self.context, self.io_queue, self.params) with self.assertRaises(RetriesExeededError): task() self.context.cancel.assert_called_with() @@ -401,7 +402,8 @@ def test_readtimeout_is_retried(self): ReadTimeoutError(None, None, None) task = DownloadPartTask(0, 1024 * 1024, self.result_queue, self.filename, - self.context, self.io_queue) + self.context, self.io_queue, + self.params) with self.assertRaises(RetriesExeededError): task() self.context.cancel.assert_called_with() @@ -424,7 +426,8 @@ def test_retried_requests_dont_enqueue_writes_twice(self): self.filename.is_stream = True task = DownloadPartTask( 0, transferconfig.DEFAULTS['multipart_chunksize'], - self.result_queue, self.filename, self.context, self.io_queue) + self.result_queue, self.filename, self.context, self.io_queue, + self.params) task() call_args_list = self.io_queue.put.call_args_list self.assertEqual(len(call_args_list), 1) @@ -501,7 +504,7 @@ def complete_task(self): return CompleteDownloadTask(None, None, None, None, None) def download_task(self): - return DownloadPartTask(None, None, None, mock.Mock(), None, None) + return DownloadPartTask(None, None, None, mock.Mock(), None, None, mock.MagicMock()) def shutdown_task(self, priority=None): return ShutdownThreadRequest(priority) diff --git a/tests/unit/test_completer.py b/tests/unit/test_completer.py index 4aacd94b53d6..6f04240a5ac6 100644 --- a/tests/unit/test_completer.py +++ b/tests/unit/test_completer.py @@ -75,7 +75,15 @@ ('aws s3 cp -', -1, set(['--no-guess-mime-type', '--dryrun', '--recursive', '--website-redirect', '--quiet', '--acl', '--storage-class', - '--sse', '--exclude', '--include', + '--sse', '--sse-class', + '--sse-copy-source-customer-algorithm', + '--sse-copy-source-customer-key', + '--sse-copy-source-customer-key-md5', + '--sse-customer-algorithm', + '--sse-customer-key', + '--sse-customer-key-md5', + '--sse-kms-key-id', + '--exclude', '--include', '--follow-symlinks', '--no-follow-symlinks', '--cache-control', '--content-type', '--content-disposition', '--source-region', @@ -90,7 +98,14 @@ '--content-disposition', '--cache-control', '--content-encoding', '--content-language', '--expires', '--website-redirect', '--acl', - '--storage-class', '--sse', + '--storage-class', '--sse', '--sse-class', + '--sse-copy-source-customer-algorithm', + '--sse-copy-source-customer-key', + '--sse-copy-source-customer-key-md5', + '--sse-customer-algorithm', + '--sse-customer-key', + '--sse-customer-key-md5', + '--sse-kms-key-id', '--exclude', '--include', '--source-region', '--metadata-directive', '--grants', '--only-show-errors',