From 54d64301f2bc7319eb5404bd51b946536b05c030 Mon Sep 17 00:00:00 2001 From: kyleknap Date: Tue, 26 Aug 2014 10:37:27 -0700 Subject: [PATCH] Fixed s3 to s3 sync urlencoding pagniation bug. Keys were not getting url decoded properly with pagination. If one bucket exited ListObjects pagination before the other bucket, it caused the handler that decodes the keys to prematurely exit as well. This resulted in some keys not getting url decoded. --- CHANGELOG.rst | 4 ++++ awscli/customizations/s3/utils.py | 13 ++++++++---- .../customizations/s3/test_plugin.py | 21 +++++++++++++------ tests/unit/customizations/s3/fake_session.py | 6 ++++-- tests/unit/customizations/s3/test_utils.py | 12 +++++++---- 5 files changed, 40 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a1de4f7e2a2f..e4c37e25d3e9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -23,6 +23,10 @@ Next Release (TBD) * bugfix:Endpoint URL: Provide a better error message when an invalid ``--endpoint-url`` is provided (`issue 899 `__) +* bugfix:``aws s3``: Fix issue when keys do not get properly + url decoded when syncing from a bucket that requires pagination + to a bucket that requires less pagination + (`issue 909 `__) 1.4.2 diff --git a/awscli/customizations/s3/utils.py b/awscli/customizations/s3/utils.py index eea51a5fbdbc..56fa247f2876 100644 --- a/awscli/customizations/s3/utils.py +++ b/awscli/customizations/s3/utils.py @@ -350,7 +350,8 @@ def list_objects(self, bucket, prefix=None, page_size=None): with ScopedEventHandler(self._operation.session, 'after-call.s3.ListObjects', self._decode_keys, - 'BucketListerDecodeKeys'): + 'BucketListerDecodeKeys', + True): pages = self._operation.paginate(self._endpoint, **kwargs) for response, page in pages: contents = page['Contents'] @@ -368,18 +369,22 @@ def _decode_keys(self, parsed, **kwargs): class ScopedEventHandler(object): """Register an event callback for the duration of a scope.""" - def __init__(self, session, event_name, handler, unique_id=None): + def __init__(self, session, event_name, handler, unique_id=None, + unique_id_uses_count=False): self._session = session self._event_name = event_name self._handler = handler self._unique_id = unique_id + self._unique_id_uses_count = unique_id_uses_count def __enter__(self): - self._session.register(self._event_name, self._handler, self._unique_id) + self._session.register(self._event_name, self._handler, self._unique_id, + self._unique_id_uses_count) def __exit__(self, exc_type, exc_value, traceback): self._session.unregister(self._event_name, self._handler, - self._unique_id) + self._unique_id, + self._unique_id_uses_count) class PrintTask(namedtuple('PrintTask', diff --git a/tests/integration/customizations/s3/test_plugin.py b/tests/integration/customizations/s3/test_plugin.py index 65e9d0f42217..f2da15b9ae57 100644 --- a/tests/integration/customizations/s3/test_plugin.py +++ b/tests/integration/customizations/s3/test_plugin.py @@ -490,20 +490,29 @@ def test_sync_with_plus_chars_paginate(self): self.assertNotIn('upload:', p2.stdout) self.assertEqual('', p2.stdout) - def test_s3_to_s3_sync_with_plus_char(self): - self.files.create_file('foo+.txt', contents="foo") + def test_s3_to_s3_sync_with_plus_char_paginate(self): + keynames = [] + for i in range(4): + keyname = 'foo+%d' % i + keynames.append(keyname) + self.files.create_file(keyname, contents='') + bucket_name = self.create_bucket() bucket_name_2 = self.create_bucket() p = aws('s3 sync %s s3://%s' % (self.files.rootdir, bucket_name)) self.assert_no_errors(p) - self.assertTrue(self.key_exists(bucket_name, 'foo+.txt')) + for key in keynames: + self.assertTrue(self.key_exists(bucket_name, key)) - p = aws('s3 sync s3://%s/ s3://%s/' % (bucket_name, bucket_name_2)) + p = aws('s3 sync s3://%s/ s3://%s/ --page-size 2' % + (bucket_name, bucket_name_2)) self.assert_no_errors(p) - self.assertTrue(self.key_exists(bucket_name_2, 'foo+.txt')) + for key in keynames: + self.assertTrue(self.key_exists(bucket_name_2, key)) - p2 = aws('s3 sync s3://%s/ s3://%s/' % (bucket_name, bucket_name_2)) + p2 = aws('s3 sync s3://%s/ s3://%s/ --page-size 2' % + (bucket_name, bucket_name_2)) self.assertNotIn('copy:', p2.stdout) self.assertEqual('', p2.stdout) diff --git a/tests/unit/customizations/s3/fake_session.py b/tests/unit/customizations/s3/fake_session.py index 3ded0b48b476..dfb93e42be4b 100644 --- a/tests/unit/customizations/s3/fake_session.py +++ b/tests/unit/customizations/s3/fake_session.py @@ -70,10 +70,12 @@ def emit(self, *args, **kwargs): def emit_first_non_none_response(self, *args, **kwargs): pass - def register(self, name, handler, unique_id=None): + def register(self, name, handler, unique_id=None, + unique_id_uses_call=False): pass - def unregister(self, name, handler, unique_id=None): + def unregister(self, name, handler, unique_id=None, + unique_id_uses_call=False): pass diff --git a/tests/unit/customizations/s3/test_utils.py b/tests/unit/customizations/s3/test_utils.py index fad31afc2c66..01c8d1f069c2 100644 --- a/tests/unit/customizations/s3/test_utils.py +++ b/tests/unit/customizations/s3/test_utils.py @@ -297,15 +297,19 @@ def test_scoped_session_handler(self): session = mock.Mock() scoped = ScopedEventHandler(session, 'eventname', 'handler') with scoped: - session.register.assert_called_with('eventname', 'handler', None) - session.unregister.assert_called_with('eventname', 'handler', None) + session.register.assert_called_with('eventname', 'handler', None, + False) + session.unregister.assert_called_with('eventname', 'handler', None, + False) def test_scoped_session_unique(self): session = mock.Mock() scoped = ScopedEventHandler(session, 'eventname', 'handler', 'unique') with scoped: - session.register.assert_called_with('eventname', 'handler', 'unique') - session.unregister.assert_called_with('eventname', 'handler', 'unique') + session.register.assert_called_with('eventname', 'handler', + 'unique', False) + session.unregister.assert_called_with('eventname', 'handler', 'unique', + False) class TestGetFileStat(unittest.TestCase):