Skip to content

Commit

Permalink
Replace s3cmd with awscli
Browse files Browse the repository at this point in the history
  • Loading branch information
vshlapakov committed Aug 2, 2016
1 parent 35403cb commit 7a7ab7e
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 17 deletions.
11 changes: 2 additions & 9 deletions scrapy_dotpersistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,20 +51,13 @@ def _load_data(self):
self._bucket, self._projectid, self._spider
)
logger.info('Syncing .scrapy directory from %s' % self._s3path)
# pre-create dest dir as non-existent destination is treated as file
# by s3cmd (1.1.0)
if not os.path.isdir(self._localpath):

This comment has been minimized.

Copy link
@sklopfenstein

sklopfenstein Jul 11, 2017

if not os.path.isdir(self._localpath): os.makedirs(self._localpath)
This removal makes the v0.3.0 not usable on Scrapinghub since the .scrapy directory does not exist by default.

os.makedirs(self._localpath)

cmd = ['s3cmd', 'sync', '--no-preserve', self._s3path, self._localpath]
cmd = ['aws', 's3', 'sync', self._s3path, self._localpath]
self._call(cmd)

def _store_data(self):
# check for reason status here?
logger.info('Syncing .scrapy directory to %s' % self._s3path)
cmd = ['s3cmd', 'sync', '--no-preserve',
'--multipart-chunk-size-mb=5120',
'--delete-removed',
cmd = ['aws', 's3', 'sync', '--delete',
self._localpath, self._s3path]
self._call(cmd)

Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
'Programming Language :: Python :: 2.7',
],
install_requires=[
'Scrapy>=1.0.3',
's3cmd>=1.6.0',
'Scrapy>=1.0.3',
'awscli>=1.10.51',
],
)
9 changes: 3 additions & 6 deletions tests/test_dotpersistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,8 @@ def test_load_data(self):
self.instance._load_data()
s3_path1 = 's3://test-bucket/test-user/123/dot-scrapy/testspider/'
self.assertEqual(self.instance._s3path, s3_path1)
assert os.path.exists(self.instance._localpath)
mocked_call.assert_called_with(
['s3cmd', 'sync', '--no-preserve', s3_path1, '/tmp/.scrapy'])
['aws', 's3', 'sync', s3_path1, '/tmp/.scrapy'])

# test other s3_path w/o bucket_folder
mocked_call.reset()
Expand All @@ -84,16 +83,14 @@ def test_load_data(self):
s3_path2 = 's3://test-bucket/123/dot-scrapy/testspider/'
self.assertEqual(self.instance._s3path, s3_path2)
mocked_call.assert_called_with(
['s3cmd', 'sync', '--no-preserve', s3_path2, '/tmp/.scrapy'])
['aws', 's3', 'sync', s3_path2, '/tmp/.scrapy'])

def test_store_data(self):
mocked_call = mock.Mock()
self.instance._call = mocked_call
self.instance._store_data()
mocked_call.assert_called_with(
['s3cmd', 'sync', '--no-preserve',
'--multipart-chunk-size-mb=5120',
'--delete-removed', '/tmp/.scrapy',
['aws', 's3', 'sync', '--delete', '/tmp/.scrapy',
's3://test-bucket/test-user/123/dot-scrapy/testspider/'])

def test_call(self):
Expand Down

0 comments on commit 7a7ab7e

Please sign in to comment.