From 7a7ab7e268db5e5d695e6136a3b46dd3d86c9f98 Mon Sep 17 00:00:00 2001 From: Viktor Shlapakov Date: Tue, 2 Aug 2016 19:20:26 +0300 Subject: [PATCH] Replace s3cmd with awscli --- scrapy_dotpersistence.py | 11 ++--------- setup.py | 4 ++-- tests/test_dotpersistence.py | 9 +++------ 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/scrapy_dotpersistence.py b/scrapy_dotpersistence.py index 26bfc83..a3be5c6 100644 --- a/scrapy_dotpersistence.py +++ b/scrapy_dotpersistence.py @@ -51,20 +51,13 @@ def _load_data(self): self._bucket, self._projectid, self._spider ) logger.info('Syncing .scrapy directory from %s' % self._s3path) - # pre-create dest dir as non-existent destination is treated as file - # by s3cmd (1.1.0) - if not os.path.isdir(self._localpath): - os.makedirs(self._localpath) - - cmd = ['s3cmd', 'sync', '--no-preserve', self._s3path, self._localpath] + cmd = ['aws', 's3', 'sync', self._s3path, self._localpath] self._call(cmd) def _store_data(self): # check for reason status here? logger.info('Syncing .scrapy directory to %s' % self._s3path) - cmd = ['s3cmd', 'sync', '--no-preserve', - '--multipart-chunk-size-mb=5120', - '--delete-removed', + cmd = ['aws', 's3', 'sync', '--delete', self._localpath, self._s3path] self._call(cmd) diff --git a/setup.py b/setup.py index 7e9e03c..bb75a5a 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ 'Programming Language :: Python :: 2.7', ], install_requires=[ - 'Scrapy>=1.0.3', - 's3cmd>=1.6.0', + 'Scrapy>=1.0.3', + 'awscli>=1.10.51', ], ) diff --git a/tests/test_dotpersistence.py b/tests/test_dotpersistence.py index 8d455a4..8bca772 100644 --- a/tests/test_dotpersistence.py +++ b/tests/test_dotpersistence.py @@ -73,9 +73,8 @@ def test_load_data(self): self.instance._load_data() s3_path1 = 's3://test-bucket/test-user/123/dot-scrapy/testspider/' self.assertEqual(self.instance._s3path, s3_path1) - assert os.path.exists(self.instance._localpath) mocked_call.assert_called_with( - ['s3cmd', 'sync', '--no-preserve', s3_path1, '/tmp/.scrapy']) + ['aws', 's3', 'sync', s3_path1, '/tmp/.scrapy']) # test other s3_path w/o bucket_folder mocked_call.reset() @@ -84,16 +83,14 @@ def test_load_data(self): s3_path2 = 's3://test-bucket/123/dot-scrapy/testspider/' self.assertEqual(self.instance._s3path, s3_path2) mocked_call.assert_called_with( - ['s3cmd', 'sync', '--no-preserve', s3_path2, '/tmp/.scrapy']) + ['aws', 's3', 'sync', s3_path2, '/tmp/.scrapy']) def test_store_data(self): mocked_call = mock.Mock() self.instance._call = mocked_call self.instance._store_data() mocked_call.assert_called_with( - ['s3cmd', 'sync', '--no-preserve', - '--multipart-chunk-size-mb=5120', - '--delete-removed', '/tmp/.scrapy', + ['aws', 's3', 'sync', '--delete', '/tmp/.scrapy', 's3://test-bucket/test-user/123/dot-scrapy/testspider/']) def test_call(self):