Skip to content

Commit

Permalink
Merge pull request #889 from kyleknap/page-size
Browse files Browse the repository at this point in the history
Enabled ``--page-size`` as global argument.
  • Loading branch information
kyleknap committed Aug 21, 2014
2 parents c560dfe + 6bf5851 commit 1db0a08
Show file tree
Hide file tree
Showing 14 changed files with 161 additions and 31 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ CHANGELOG
Next Release (TBD)
==================

* feature:Page Size: Add a ``--page-size`` option, that
controls page size when perfoming an operation that
uses pagination.
(`issue 889 <https://github.com/aws/aws-cli/pull/889>`__)
* bugfix:``aws s3``: Added support for ignoring and warning
about files that do not exist, user does not have read
permissions, or are special files (i.e. sockets, FIFOs,
Expand Down
2 changes: 2 additions & 0 deletions awscli/clidriver.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,8 @@ def invoke(self, operation_object, parameters, parsed_globals):
endpoint_url=parsed_globals.endpoint_url,
verify=parsed_globals.verify_ssl)
if operation_object.can_paginate and parsed_globals.paginate:
if parsed_globals.page_size:
parameters['page_size'] = parsed_globals.page_size
pages = operation_object.paginate(endpoint, **parameters)
self._display_response(operation_object, pages,
parsed_globals)
Expand Down
6 changes: 4 additions & 2 deletions awscli/customizations/s3/filegenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,12 @@ class FileGenerator(object):
``FileInfo`` objects to send to a ``Comparator`` or ``S3Handler``.
"""
def __init__(self, service, endpoint, operation_name,
follow_symlinks=True, result_queue=None):
follow_symlinks=True, page_size=None, result_queue=None):
self._service = service
self._endpoint = endpoint
self.operation_name = operation_name
self.follow_symlinks = follow_symlinks
self.page_size = page_size
self.result_queue = result_queue
if not result_queue:
self.result_queue = queue.Queue()
Expand Down Expand Up @@ -284,7 +285,8 @@ def list_objects(self, s3_path, dir_op):
else:
operation = self._service.get_operation('ListObjects')
lister = BucketLister(operation, self._endpoint)
for key in lister.list_objects(bucket=bucket, prefix=prefix):
for key in lister.list_objects(bucket=bucket, prefix=prefix,
page_size=self.page_size):
source_path, size, last_update = key
if size == 0 and source_path.endswith('/'):
if self.operation_name == 'delete':
Expand Down
20 changes: 14 additions & 6 deletions awscli/customizations/s3/subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,16 +248,18 @@ def _run_main(self, parsed_args, parsed_globals):
self._list_all_buckets()
elif parsed_args.dir_op:
# Then --recursive was specified.
self._list_all_objects_recursive(bucket, key)
self._list_all_objects_recursive(bucket, key,
parsed_globals.page_size)
else:
self._list_all_objects(bucket, key)
self._list_all_objects(bucket, key, parsed_globals.page_size)
return 0

def _list_all_objects(self, bucket, key):
def _list_all_objects(self, bucket, key, page_size=None):

operation = self.service.get_operation('ListObjects')
iterator = operation.paginate(self.endpoint, bucket=bucket,
prefix=key, delimiter='/')
prefix=key, delimiter='/',
page_size=page_size)
for _, response_data in iterator:
self._display_page(response_data)

Expand Down Expand Up @@ -294,10 +296,10 @@ def _list_all_buckets(self):
uni_print(print_str)
sys.stdout.flush()

def _list_all_objects_recursive(self, bucket, key):
def _list_all_objects_recursive(self, bucket, key, page_size=None):
operation = self.service.get_operation('ListObjects')
iterator = operation.paginate(self.endpoint, bucket=bucket,
prefix=key)
prefix=key, page_size=page_size)
for _, response_data in iterator:
self._display_page(response_data, use_basename=False)

Expand Down Expand Up @@ -373,6 +375,7 @@ def _run_main(self, parsed_args, parsed_globals):
cmd_params.add_region(parsed_globals)
cmd_params.add_endpoint_url(parsed_globals)
cmd_params.add_verify_ssl(parsed_globals)
cmd_params.add_page_size(parsed_globals)
cmd_params.add_paths(parsed_args.paths)
cmd_params.check_force(parsed_globals)
cmd = CommandArchitecture(self._session, self.NAME,
Expand Down Expand Up @@ -561,9 +564,11 @@ def run(self):
self._source_endpoint,
operation_name,
self.parameters['follow_symlinks'],
self.parameters['page_size'],
result_queue=result_queue)
rev_generator = FileGenerator(self._service, self._endpoint, '',
self.parameters['follow_symlinks'],
self.parameters['page_size'],
result_queue=result_queue)
taskinfo = [TaskInfo(src=files['src']['path'],
src_type='s3',
Expand Down Expand Up @@ -796,3 +801,6 @@ def add_endpoint_url(self, parsed_globals):

def add_verify_ssl(self, parsed_globals):
self.parameters['verify_ssl'] = parsed_globals.verify_ssl

def add_page_size(self, parsed_globals):
self.parameters['page_size'] = parsed_globals.page_size
5 changes: 3 additions & 2 deletions awscli/customizations/s3/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,9 @@ def __init__(self, operation, endpoint, date_parser=_date_parser):
self._endpoint = endpoint
self._date_parser = date_parser

def list_objects(self, bucket, prefix=None):
kwargs = {'bucket': bucket, 'encoding_type': 'url'}
def list_objects(self, bucket, prefix=None, page_size=None):
kwargs = {'bucket': bucket, 'encoding_type': 'url',
'page_size': page_size}
if prefix is not None:
kwargs['prefix'] = prefix
# This event handler is needed because we use encoding_type url and
Expand Down
4 changes: 4 additions & 0 deletions awscli/data/cli.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
"query": {
"help": "<p>A JMESPath query to use in filtering the response data.</p>"
},
"page-size": {
"type": "int",
"help": "<p>Specifies the page size when paginating.</p>"
},
"profile": {
"help": "<p>Use a specific profile from your credential file.</p>"
},
Expand Down
20 changes: 20 additions & 0 deletions tests/integration/customizations/s3/test_filegenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import unittest
import os
import itertools

import botocore.session
from awscli import EnvironmentVariables
Expand Down Expand Up @@ -139,6 +140,25 @@ def test_s3_delete_directory(self):
compare_files(self, result_list[1], expected_list[1])
compare_files(self, result_list[2], expected_list[2])

def test_page_size(self):
input_s3_file = {'src': {'path': self.bucket+'/', 'type': 's3'},
'dest': {'path': '', 'type': 'local'},
'dir_op': True, 'use_src_name': True}
file_gen = FileGenerator(self.service, self.endpoint, '',
page_size=1).call(input_s3_file)
limited_file_gen = itertools.islice(file_gen, 1)
result_list = list(limited_file_gen)
file_stat = FileStat(src=self.file2,
dest='another_directory' + os.sep + 'text2.txt',
compare_key='another_directory/text2.txt',
size=21,
last_update=result_list[0].last_update,
src_type='s3',
dest_type='local', operation_name='')
# Ensure only one item is returned from ``ListObjects``
self.assertEqual(len(result_list), 1)
compare_files(self, result_list[0], file_stat)


if __name__ == "__main__":
unittest.main()
17 changes: 10 additions & 7 deletions tests/integration/customizations/s3/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,13 +463,14 @@ def test_download_non_existent_key(self):

class TestSync(BaseS3CLICommand):
def test_sync_with_plus_chars_paginate(self):
# 1. Create > 1000 files with '+' in the filename.
# 2. Sync up to s3.
# 3. Sync up to s3
# This test ensures pagination tokens are url decoded.
# 1. Create > 2 files with '+' in the filename.
# 2. Sync up to s3 while the page size is 2.
# 3. Sync up to s3 while the page size is 2.
# 4. Verify nothing was synced up down from s3 in step 3.
bucket_name = self.create_bucket()
filenames = []
for i in range(2000):
for i in range(4):
# Create a file with a space char and a '+' char in the filename.
# We're interested in testing the filename comparisons, not the
# mtime comparisons so we're setting the mtime to some time
Expand All @@ -480,10 +481,12 @@ def test_sync_with_plus_chars_paginate(self):
self.files.create_file('foo +%06d' % i,
contents='',
mtime=mtime))
p = aws('s3 sync %s s3://%s/' % (self.files.rootdir, bucket_name))
p = aws('s3 sync %s s3://%s/ --page-size 2' %
(self.files.rootdir, bucket_name))
self.assert_no_errors(p)
time.sleep(5)
p2 = aws('s3 sync %s s3://%s/' % (self.files.rootdir, bucket_name))
time.sleep(1)
p2 = aws('s3 sync %s s3://%s/ --page-size 2'
% (self.files.rootdir, bucket_name))
self.assertNotIn('upload:', p2.stdout)
self.assertEqual('', p2.stdout)

Expand Down
33 changes: 33 additions & 0 deletions tests/unit/customizations/s3/test_ls_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,36 @@ def test_errors_out_with_extra_arguments(self):
stderr = self.run_cmd('s3 ls --extra-argument-foo', expected_rc=255)[1]
self.assertIn('Unknown options', stderr)
self.assertIn('--extra-argument-foo', stderr)

def test_operations_use_page_size(self):
time_utc = "2014-01-09T20:45:49.000Z"
self.parsed_responses = [{"CommonPrefixes": [], "Contents": [
{"Key": "foo/bar.txt", "Size": 100,
"LastModified": time_utc}]}]
stdout, _, _ = self.run_cmd('s3 ls s3://bucket/ --page-size 8', expected_rc=0)
call_args = self.operations_called[0][1]
# We should not be calling the args with any delimiter because we
# want a recursive listing.
self.assertEqual(call_args['prefix'], '')
self.assertEqual(call_args['bucket'], 'bucket')
# The page size gets translated to ``MaxKeys`` in the s3 model
self.assertEqual(call_args['MaxKeys'], 8)

def test_operations_use_page_size_recursive(self):
time_utc = "2014-01-09T20:45:49.000Z"
self.parsed_responses = [{"CommonPrefixes": [], "Contents": [
{"Key": "foo/bar.txt", "Size": 100,
"LastModified": time_utc}]}]
stdout, _, _ = self.run_cmd('s3 ls s3://bucket/ --page-size 8 --recursive', expected_rc=0)
call_args = self.operations_called[0][1]
# We should not be calling the args with any delimiter because we
# want a recursive listing.
self.assertEqual(call_args['prefix'], '')
self.assertEqual(call_args['bucket'], 'bucket')
# The page size gets translated to ``MaxKeys`` in the s3 model
self.assertEqual(call_args['MaxKeys'], 8)
self.assertNotIn('delimiter', call_args)


if __name__ == "__main__":
unittest.main()
30 changes: 18 additions & 12 deletions tests/unit/customizations/s3/test_subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ def setUp(self):
def test_ls_command_for_bucket(self):
ls_command = ListCommand(self.session)
parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False)
ls_command._run_main(parsed_args, mock.Mock())
parsed_globals = mock.Mock()
parsed_globals.page_size = '5'
ls_command._run_main(parsed_args, parsed_globals)
call = self.session.get_service.return_value.get_operation\
.return_value.call
paginate = self.session.get_service.return_value.get_operation\
Expand All @@ -69,7 +71,8 @@ def test_ls_command_for_bucket(self):
'ListObjects')
self.assertEqual(
paginate.call_args[1], {'bucket': u'mybucket',
'delimiter': '/', 'prefix': u''})
'delimiter': '/', 'prefix': u'',
'page_size': u'5'})

def test_ls_command_with_no_args(self):
ls_command = ListCommand(self.session)
Expand Down Expand Up @@ -194,7 +197,7 @@ def test_run_cp_put(self):
'src': local_file, 'dest': s3_file, 'filters': filters,
'paths_type': 'locals3', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True}
'follow_symlinks': True, 'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'cp', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -210,7 +213,7 @@ def test_error_on_same_line_as_status(self):
'src': local_file, 'dest': s3_file, 'filters': filters,
'paths_type': 'locals3', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True}
'follow_symlinks': True, 'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'cp', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -233,7 +236,7 @@ def test_run_cp_get(self):
'src': s3_file, 'dest': local_file, 'filters': filters,
'paths_type': 's3local', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True}
'follow_symlinks': True, 'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'cp', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -250,7 +253,7 @@ def test_run_cp_copy(self):
'src': s3_file, 'dest': s3_file, 'filters': filters,
'paths_type': 's3s3', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True}
'follow_symlinks': True, 'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'cp', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -267,7 +270,7 @@ def test_run_mv(self):
'src': s3_file, 'dest': s3_file, 'filters': filters,
'paths_type': 's3s3', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True}
'follow_symlinks': True, 'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'mv', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -284,7 +287,7 @@ def test_run_remove(self):
'src': s3_file, 'dest': s3_file, 'filters': filters,
'paths_type': 's3', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True}
'follow_symlinks': True, 'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'rm', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -305,7 +308,7 @@ def test_run_sync(self):
'src': local_dir, 'dest': s3_prefix, 'filters': filters,
'paths_type': 'locals3', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True}
'follow_symlinks': True, 'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'sync', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -320,7 +323,8 @@ def test_run_mb(self):
params = {'dir_op': True, 'dryrun': True, 'quiet': False,
'src': s3_prefix, 'dest': s3_prefix, 'paths_type': 's3',
'region': 'us-east-1', 'endpoint_url': None,
'verify_ssl': None, 'follow_symlinks': True}
'verify_ssl': None, 'follow_symlinks': True,
'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'mb', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -335,7 +339,8 @@ def test_run_rb(self):
params = {'dir_op': True, 'dryrun': True, 'quiet': False,
'src': s3_prefix, 'dest': s3_prefix, 'paths_type': 's3',
'region': 'us-east-1', 'endpoint_url': None,
'verify_ssl': None, 'follow_symlinks': True}
'verify_ssl': None, 'follow_symlinks': True,
'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'rb', params)
cmd_arc.create_instructions()
rc = cmd_arc.run()
Expand All @@ -351,7 +356,8 @@ def test_run_rb_nonzero_rc(self):
params = {'dir_op': True, 'dryrun': False, 'quiet': False,
'src': s3_prefix, 'dest': s3_prefix, 'paths_type': 's3',
'region': 'us-east-1', 'endpoint_url': None,
'verify_ssl': None, 'follow_symlinks': True}
'verify_ssl': None, 'follow_symlinks': True,
'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'rb', params)
cmd_arc.create_instructions()
rc = cmd_arc.run()
Expand Down
6 changes: 6 additions & 0 deletions tests/unit/ec2/test_describe_instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ def test_multiple_filters_alternate(self):
}
self.assert_params_for_cmd(cmdlist, result)

def test_page_size(self):
args = ' --page-size 10'
cmdline = self.prefix + args
result = {'MaxResults': '10'}
self.assert_params_for_cmd(cmdline, result)


if __name__ == "__main__":
unittest.main()
10 changes: 10 additions & 0 deletions tests/unit/s3/test_list_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@ def test_max_items(self):
'headers': {},}
self.assert_params_for_cmd(cmdline, result, ignore_params=['payload'])

def test_page_size(self):
cmdline = self.prefix
cmdline += ' --bucket mybucket'
# The max-items is a customization and therefore won't
# show up in the result params.
cmdline += ' --page-size 100'
result = {'uri_params': {'Bucket': 'mybucket', 'MaxKeys': 100},
'headers': {},}
self.assert_params_for_cmd(cmdline, result, ignore_params=['payload'])

def test_starting_token(self):
# We don't need to test this in depth because botocore
# tests this. We just want to make sure this is hooked up
Expand Down
Loading

0 comments on commit 1db0a08

Please sign in to comment.