Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed s3 to s3 sync url decoding pagniation bug. #909

Merged
merged 1 commit into from
Sep 8, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ Next Release (TBD)
* bugfix:Endpoint URL: Provide a better error message when
an invalid ``--endpoint-url`` is provided
(`issue 899 <https://github.com/aws/aws-cli/issues/899>`__)
* bugfix:``aws s3``: Fix issue when keys do not get properly
url decoded when syncing from a bucket that requires pagination
to a bucket that requires less pagination
(`issue 909 <https://github.com/aws/aws-cli/pull/909>`__)


1.4.2
Expand Down
13 changes: 9 additions & 4 deletions awscli/customizations/s3/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,8 @@ def list_objects(self, bucket, prefix=None, page_size=None):
with ScopedEventHandler(self._operation.session,
'after-call.s3.ListObjects',
self._decode_keys,
'BucketListerDecodeKeys'):
'BucketListerDecodeKeys',
True):
pages = self._operation.paginate(self._endpoint, **kwargs)
for response, page in pages:
contents = page['Contents']
Expand All @@ -368,18 +369,22 @@ def _decode_keys(self, parsed, **kwargs):
class ScopedEventHandler(object):
"""Register an event callback for the duration of a scope."""

def __init__(self, session, event_name, handler, unique_id=None):
def __init__(self, session, event_name, handler, unique_id=None,
unique_id_uses_count=False):
self._session = session
self._event_name = event_name
self._handler = handler
self._unique_id = unique_id
self._unique_id_uses_count = unique_id_uses_count

def __enter__(self):
self._session.register(self._event_name, self._handler, self._unique_id)
self._session.register(self._event_name, self._handler, self._unique_id,
self._unique_id_uses_count)

def __exit__(self, exc_type, exc_value, traceback):
self._session.unregister(self._event_name, self._handler,
self._unique_id)
self._unique_id,
self._unique_id_uses_count)


class PrintTask(namedtuple('PrintTask',
Expand Down
21 changes: 15 additions & 6 deletions tests/integration/customizations/s3/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,20 +490,29 @@ def test_sync_with_plus_chars_paginate(self):
self.assertNotIn('upload:', p2.stdout)
self.assertEqual('', p2.stdout)

def test_s3_to_s3_sync_with_plus_char(self):
self.files.create_file('foo+.txt', contents="foo")
def test_s3_to_s3_sync_with_plus_char_paginate(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it not worth keeping the original test? It might be useful to verify both the non-paginated and paginated cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I felt that the test that I added encompasses the non-paginated case since we always use a paginate call when listing the objects in a bucket. The main benefit in keeping the test is that it adds more granularity such that we would know if it is the first paged that is not url decoded or the pages after it. The reason I took it out in the first place is that it saves time in running the test. I estimated the test takes around 20 seconds. So I figured since the logic was the same, we could save some time/reduce redundancy.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, makes sense.

keynames = []
for i in range(4):
keyname = 'foo+%d' % i
keynames.append(keyname)
self.files.create_file(keyname, contents='')

bucket_name = self.create_bucket()
bucket_name_2 = self.create_bucket()

p = aws('s3 sync %s s3://%s' % (self.files.rootdir, bucket_name))
self.assert_no_errors(p)
self.assertTrue(self.key_exists(bucket_name, 'foo+.txt'))
for key in keynames:
self.assertTrue(self.key_exists(bucket_name, key))

p = aws('s3 sync s3://%s/ s3://%s/' % (bucket_name, bucket_name_2))
p = aws('s3 sync s3://%s/ s3://%s/ --page-size 2' %
(bucket_name, bucket_name_2))
self.assert_no_errors(p)
self.assertTrue(self.key_exists(bucket_name_2, 'foo+.txt'))
for key in keynames:
self.assertTrue(self.key_exists(bucket_name_2, key))

p2 = aws('s3 sync s3://%s/ s3://%s/' % (bucket_name, bucket_name_2))
p2 = aws('s3 sync s3://%s/ s3://%s/ --page-size 2' %
(bucket_name, bucket_name_2))
self.assertNotIn('copy:', p2.stdout)
self.assertEqual('', p2.stdout)

Expand Down
6 changes: 4 additions & 2 deletions tests/unit/customizations/s3/fake_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,12 @@ def emit(self, *args, **kwargs):
def emit_first_non_none_response(self, *args, **kwargs):
pass

def register(self, name, handler, unique_id=None):
def register(self, name, handler, unique_id=None,
unique_id_uses_call=False):
pass

def unregister(self, name, handler, unique_id=None):
def unregister(self, name, handler, unique_id=None,
unique_id_uses_call=False):
pass


Expand Down
12 changes: 8 additions & 4 deletions tests/unit/customizations/s3/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,15 +297,19 @@ def test_scoped_session_handler(self):
session = mock.Mock()
scoped = ScopedEventHandler(session, 'eventname', 'handler')
with scoped:
session.register.assert_called_with('eventname', 'handler', None)
session.unregister.assert_called_with('eventname', 'handler', None)
session.register.assert_called_with('eventname', 'handler', None,
False)
session.unregister.assert_called_with('eventname', 'handler', None,
False)

def test_scoped_session_unique(self):
session = mock.Mock()
scoped = ScopedEventHandler(session, 'eventname', 'handler', 'unique')
with scoped:
session.register.assert_called_with('eventname', 'handler', 'unique')
session.unregister.assert_called_with('eventname', 'handler', 'unique')
session.register.assert_called_with('eventname', 'handler',
'unique', False)
session.unregister.assert_called_with('eventname', 'handler', 'unique',
False)


class TestGetFileStat(unittest.TestCase):
Expand Down