Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --humanize and --summarize options for s3 ls #1103

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions awscli/customizations/s3/subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from awscli.customizations.s3.filters import create_filter
from awscli.customizations.s3.s3handler import S3Handler, S3StreamHandler
from awscli.customizations.s3.utils import find_bucket_key, uni_print, \
AppendFilter, find_dest_path_comp_key
AppendFilter, find_dest_path_comp_key, humanize
from awscli.customizations.s3.syncstrategy.base import MissingFileSync, \
SizeAndLastModifiedSync, NeverSync

Expand All @@ -38,6 +38,12 @@
"Command is performed on all files or objects "
"under the specified directory or prefix.")}

HUMANIZE = {'name': 'humanize', 'action': 'store_true', 'help_text': (
"Displays file sizes in human readable format.")}

SUMMARIZE = {'name': 'summarize', 'action': 'store_true', 'help_text': (
"Displays summary information (number of objects, total size).")}

DRYRUN = {'name': 'dryrun', 'action': 'store_true',
'help_text': (
"Displays the operations that would be performed using the "
Expand Down Expand Up @@ -242,13 +248,16 @@ class ListCommand(S3Command):
USAGE = "<S3Path> or NONE"
ARG_TABLE = [{'name': 'paths', 'nargs': '?', 'default': 's3://',
'positional_arg': True, 'synopsis': USAGE}, RECURSIVE,
PAGE_SIZE]
PAGE_SIZE, HUMANIZE, SUMMARIZE]
EXAMPLES = BasicCommand.FROM_FILE('s3/ls.rst')

def _run_main(self, parsed_args, parsed_globals):
super(ListCommand, self)._run_main(parsed_args, parsed_globals)
self._empty_result = False
self._at_first_page = True
self._total_size = 0
self._total_objects = 0
self._humanize = parsed_args.humanize
path = parsed_args.paths
if path.startswith('s3://'):
path = path[5:]
Expand All @@ -261,6 +270,8 @@ def _run_main(self, parsed_args, parsed_globals):
parsed_args.page_size)
else:
self._list_all_objects(bucket, key, parsed_args.page_size)
if parsed_args.summarize:
self._print_summary()
if key:
# User specified a key to look for. We should return an rc of one
# if there are no matching keys and/or prefixes or return an rc
Expand All @@ -276,7 +287,6 @@ def _run_main(self, parsed_args, parsed_globals):
return 0

def _list_all_objects(self, bucket, key, page_size=None):

operation = self.service.get_operation('ListObjects')
iterator = operation.paginate(self.endpoint, bucket=bucket,
prefix=key, delimiter='/',
Expand All @@ -298,6 +308,8 @@ def _display_page(self, response_data, use_basename=True):
uni_print(print_str)
for content in contents:
last_mod_str = self._make_last_mod_str(content['LastModified'])
self._total_size += int(content['Size'])
self._total_objects += 1
size_str = self._make_size_str(content['Size'])
if use_basename:
filename_components = content['Key'].split('/')
Expand Down Expand Up @@ -343,17 +355,25 @@ def _make_last_mod_str(self, last_mod):
str(last_mod.day).zfill(2),
str(last_mod.hour).zfill(2),
str(last_mod.minute).zfill(2),
str(last_mod.second).zfill(2))
str(last_mod.second).zfill(2))
last_mod_str = "%s-%s-%s %s:%s:%s" % last_mod_tup
return last_mod_str.ljust(19, ' ')

def _make_size_str(self, size):
"""
This function creates the size string when objects are being listed.
"""
size_str = str(size)
size_str = humanize(size) if self._humanize else str(size)
return size_str.rjust(10, ' ')

def _print_summary(self):
"""
This function prints a summary of total objects and total bytes
"""
print_str = str(self._total_objects)
uni_print("\nTotal Objects: ".rjust(15, ' ') + print_str + "\n")
print_str = humanize(self._total_size) if self._humanize else str(self._total_size)
uni_print("Total Size: ".rjust(15, ' ') + print_str + "\n")

class WebsiteCommand(S3Command):
NAME = 'website'
Expand Down
16 changes: 16 additions & 0 deletions awscli/customizations/s3/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,19 @@ def __new__(cls, message, error=False, total_parts=None, warning=None):
class IOCloseRequest(_IOCloseRequest):
def __new__(cls, filename, desired_mtime=None):
return super(IOCloseRequest, cls).__new__(cls, filename, desired_mtime)



humanize_suffixes = ('kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')

def humanize(value):
format='%.1f'
base = 1000
bytes = float(value)

if bytes == 1: return '1 Byte'
elif bytes < base: return '%d Bytes' % bytes

for i,sfx in enumerate(humanize_suffixes):
unit = base ** (i+2)
if bytes < unit: return (format + ' %s') % ((base * bytes / unit), sfx)
21 changes: 21 additions & 0 deletions awscli/examples/s3/ls.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,24 @@ Output::
2013-09-02 21:32:57 189 foo/bar/.baz/hooks/foo
2013-09-02 21:32:57 398 z.txt

The following ``ls`` command demonstrates the same command using the --humanize and --summarize options. --humanize
displays file size in Bytes/MB/KB/MB/GB/TB/PB/EB/ZB/YB. --summarize displays the total number of objects and total size
at the end of the result listing::

aws s3 ls s3://mybucket --recursive --humanize --summarize

Output::

2013-09-02 21:37:53 10 Bytes a.txt
2013-09-02 21:37:53 2.9 MB foo.zip
2013-09-02 21:32:57 23 Bytes foo/bar/.baz/a
2013-09-02 21:32:58 41 Bytes foo/bar/.baz/b
2013-09-02 21:32:57 281 Bytes foo/bar/.baz/c
2013-09-02 21:32:57 73 Bytes foo/bar/.baz/d
2013-09-02 21:32:57 452 Bytes foo/bar/.baz/e
2013-09-02 21:32:57 896 Bytes foo/bar/.baz/hooks/bar
2013-09-02 21:32:57 189 Bytes foo/bar/.baz/hooks/foo
2013-09-02 21:32:57 398 Bytes z.txt

Total Objects: 10
Total Size: 2.9 MB
57 changes: 57 additions & 0 deletions tests/unit/customizations/s3/test_ls_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,63 @@ def test_fail_rc_no_objects_nor_prefixes(self):
self.parsed_responses = [{}]
self.run_cmd('s3 ls s3://bucket/foo', expected_rc=1)

def test_humanize_file_size(self):
time_utc = "2014-01-09T20:45:49.000Z"
self.parsed_responses = [{"CommonPrefixes": [], "Contents": [
{"Key": "onebyte.txt", "Size": 1, "LastModified": time_utc},
{"Key": "onekilobyte.txt", "Size": 1000, "LastModified": time_utc},
{"Key": "onemegabyte.txt", "Size": 1000**2, "LastModified": time_utc},
{"Key": "onegigabyte.txt", "Size": 1000**3, "LastModified": time_utc},
{"Key": "oneterabyte.txt", "Size": 1000**4, "LastModified": time_utc},
{"Key": "onepetabyte.txt", "Size": 1000**5, "LastModified": time_utc} ]}]
stdout, _, _ = self.run_cmd('s3 ls s3://bucket/ --humanize', expected_rc=0)
call_args = self.operations_called[0][1]
# Time is stored in UTC timezone, but the actual time displayed
# is specific to your tzinfo, so shift the timezone to your local's.
time_local = parser.parse(time_utc).astimezone(tz.tzlocal())
time_fmt = time_local.strftime('%Y-%m-%d %H:%M:%S')
self.assertIn('%s 1 Byte onebyte.txt\n' % time_fmt, stdout)
self.assertIn('%s 1.0 kB onekilobyte.txt\n' % time_fmt, stdout)
self.assertIn('%s 1.0 MB onemegabyte.txt\n' % time_fmt, stdout)
self.assertIn('%s 1.0 GB onegigabyte.txt\n' % time_fmt, stdout)
self.assertIn('%s 1.0 TB oneterabyte.txt\n' % time_fmt, stdout)
self.assertIn('%s 1.0 PB onepetabyte.txt\n' % time_fmt, stdout)

def test_summarize(self):
time_utc = "2014-01-09T20:45:49.000Z"
self.parsed_responses = [{"CommonPrefixes": [], "Contents": [
{"Key": "onebyte.txt", "Size": 1, "LastModified": time_utc},
{"Key": "onekilobyte.txt", "Size": 1000, "LastModified": time_utc},
{"Key": "onemegabyte.txt", "Size": 1000**2, "LastModified": time_utc},
{"Key": "onegigabyte.txt", "Size": 1000**3, "LastModified": time_utc},
{"Key": "oneterabyte.txt", "Size": 1000**4, "LastModified": time_utc},
{"Key": "onepetabyte.txt", "Size": 1000**5, "LastModified": time_utc} ]}]
stdout, _, _ = self.run_cmd('s3 ls s3://bucket/ --summarize', expected_rc=0)
call_args = self.operations_called[0][1]
# Time is stored in UTC timezone, but the actual time displayed
# is specific to your tzinfo, so shift the timezone to your local's.
time_local = parser.parse(time_utc).astimezone(tz.tzlocal())
time_fmt = time_local.strftime('%Y-%m-%d %H:%M:%S')
self.assertIn('Total Objects: 6\n', stdout)
self.assertIn('Total Size: 1001001001001001\n', stdout)

def test_summarize_with_humanize(self):
time_utc = "2014-01-09T20:45:49.000Z"
self.parsed_responses = [{"CommonPrefixes": [], "Contents": [
{"Key": "onebyte.txt", "Size": 1, "LastModified": time_utc},
{"Key": "onekilobyte.txt", "Size": 1000, "LastModified": time_utc},
{"Key": "onemegabyte.txt", "Size": 1000**2, "LastModified": time_utc},
{"Key": "onegigabyte.txt", "Size": 1000**3, "LastModified": time_utc},
{"Key": "oneterabyte.txt", "Size": 1000**4, "LastModified": time_utc},
{"Key": "onepetabyte.txt", "Size": 1000**5, "LastModified": time_utc} ]}]
stdout, _, _ = self.run_cmd('s3 ls s3://bucket/ --humanize --summarize', expected_rc=0)
call_args = self.operations_called[0][1]
# Time is stored in UTC timezone, but the actual time displayed
# is specific to your tzinfo, so shift the timezone to your local's.
time_local = parser.parse(time_utc).astimezone(tz.tzlocal())
time_fmt = time_local.strftime('%Y-%m-%d %H:%M:%S')
self.assertIn('Total Objects: 6\n', stdout)
self.assertIn('Total Size: 1.0 PB\n', stdout)

if __name__ == "__main__":
unittest.main()
9 changes: 6 additions & 3 deletions tests/unit/customizations/s3/test_subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def setUp(self):

def test_ls_command_for_bucket(self):
ls_command = ListCommand(self.session)
parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False, page_size='5')
parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False, page_size='5',
humanize=False, summarize=False)
parsed_globals = mock.Mock()
ls_command._run_main(parsed_args, parsed_globals)
call = self.session.get_service.return_value.get_operation\
Expand All @@ -78,7 +79,8 @@ def test_ls_command_for_bucket(self):
def test_ls_command_with_no_args(self):
ls_command = ListCommand(self.session)
parsed_global = FakeArgs(region=None, endpoint_url=None, verify_ssl=None)
parsed_args = FakeArgs(dir_op=False, paths='s3://')
parsed_args = FakeArgs(dir_op=False, paths='s3://', humanize=False,
summarize=False)
ls_command._run_main(parsed_args, parsed_global)
# We should only be a single call.
self.session.get_service.return_value.get_operation.assert_called_with(
Expand All @@ -98,7 +100,8 @@ def test_ls_with_verify_argument(self):
ls_command = ListCommand(self.session)
parsed_global = FakeArgs(region='us-west-2', endpoint_url=None,
verify_ssl=False)
parsed_args = FakeArgs(paths='s3://', dir_op=False)
parsed_args = FakeArgs(paths='s3://', dir_op=False, humanize=False,
summarize=False)
ls_command._run_main(parsed_args, parsed_global)
# Verify get_endpoint
get_endpoint = self.session.get_service.return_value.get_endpoint
Expand Down