Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding option to store bdist files in S3 as a second-level cache #33

Merged
merged 7 commits into from
Nov 9, 2014
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,32 @@ distributions).
installations manually because ``easy_install`` nor ``pip`` support binary
``*.tar.gz`` distributions.

Storing the Binary Cache on Amazon S3
-------------------------------------

``pip-accel`` can optionally store its binary cache files on Amazon S3. If configured,
S3 is used as a second-level cache - only used if the local filesystem cache does not
have a required pre-built dependency. If the dependency is not found on S3, it is built
and saved locally as usual, but then also saved to S3.

This functionality can be useful for continuous integration build worker boxes that are
ephemeral and don't have a persistent local storage to store the ``pip-accel`` binary
cache.

To configure, install the `boto<https://github.com/boto/boto>`_ module in your environment,
and then set the following environment variables:

* ``PIP_S3_CACHE_BUCKET`` : the name of the S3 bucket to use
* ``PIP_S3_CACHE_PREFIX`` : the prefix (analogous to folder path) for the root of the cache.
Since these are binary packages, this prefix should be unique for each machine-architecture.

If these variables are not set, or if the ``boto`` module is not found, then the S3 cache is
not used.

You will also need to set AWS credentials - either in a
`.boto<http://boto.readthedocs.org/en/latest/boto_config_tut.html>`_ file or in the
``AWS_ACCESS_KEY_ID`` and ``AWS_SECRET_ACCESS_KEY`` environment variables.

Contact
-------

Expand Down
50 changes: 49 additions & 1 deletion pip_accel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
# Modules included in our package.
from pip_accel.bdist import get_binary_dist, install_binary_dist
from pip_accel.config import (binary_index, download_cache, index_version_file,
on_debian, source_index)
on_debian, source_index, s3_cache_bucket, s3_cache_prefix)
from pip_accel.req import Requirement
from pip_accel.utils import run

Expand Down Expand Up @@ -487,5 +487,53 @@ def dependency_links(self):
def dependency_links(self, value):
logger.debug("Custom package finder ignoring 'dependency_links' value (%r) ..", value)


def cache_file_exists(cache_file, binary_index):
"""Check if S3 cache is configured; if so, check the cache to see if the file exists there.
If it does exist in the S3 cache, download it to the local cache so it can be used."""
if os.path.isfile(cache_file):
return True
if s3_cache_bucket is None:
return False
logger.debug("S3_CACHE_BUCKET is set, attempting to read file from S3 cache.")
try:
import boto
from boto.s3.key import Key
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this import isn't needed

bucket = boto.connect_s3().get_bucket(s3_cache_bucket)
s3_key = get_s3_key_path(binary_index, cache_file)
logger.info("Downloading {} from S3 cache.".format(s3_key))
key = bucket.get_key(s3_key)
if key is not None:
key.get_contents_to_filename(cache_file)
return True
except ImportError:
logger.debug("boto module not found - cannot read file from S3 cache.")
return False


def store_file_into_s3_cache(cache_file, binary_index):
"""If the S3 cache is configured, store the file there."""
if s3_cache_bucket is None:
return False
logger.debug("S3_CACHE_BUCKET is set, attempting to store file in S3 cache.")
try:
import boto
from boto.s3.key import Key
bucket = boto.connect_s3().get_bucket(s3_cache_bucket)
s3_key = get_s3_key_path(binary_index, cache_file)
logger.info("Storing file {} into S3 cache at {}.".format(cache_file, s3_key))
key = Key(bucket)
key.key = s3_key
key.set_contents_from_filename(cache_file)
return True
except ImportError:
logger.debug("boto module not found - cannot store file into S3 cache.")
return False


def get_s3_key_path(binary_index, cache_file):
return '/'.join([s3_cache_prefix, cache_file.replace(binary_index + '/', '')])


if __name__ == '__main__':
main()
6 changes: 5 additions & 1 deletion pip_accel/bdist.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from humanfriendly import Spinner, Timer

# Modules included in our package.
import pip_accel
from pip_accel.config import binary_index, on_debian
from pip_accel.deps import sanity_check_dependencies
from pip_accel.utils import get_python_version
Expand Down Expand Up @@ -77,7 +78,7 @@ def get_binary_dist(package, version, directory, url=None, python='/usr/bin/pyth
url = None
tag = hashlib.sha1(str(version + url).encode()).hexdigest() if url else version
cache_file = os.path.join(binary_index, '%s:%s:%s.tar.gz' % (package, tag, get_python_version()))
if not os.path.isfile(cache_file):
if not pip_accel.cache_file_exists(cache_file, binary_index):
logger.debug("%s (%s) hasn't been cached yet, doing so now.", package, version)
# Build the binary distribution.
try:
Expand All @@ -95,6 +96,7 @@ def get_binary_dist(package, version, directory, url=None, python='/usr/bin/pyth
# moving the transformed binary distribution into its final place.
os.rename(transformed_file, cache_file)
logger.debug("%s (%s) cached as %s.", package, version, cache_file)
pip_accel.store_file_into_s3_cache(cache_file, binary_index)
archive = tarfile.open(cache_file, 'r:gz')
for member in archive.getmembers():
yield member, archive.extractfile(member.name)
Expand Down Expand Up @@ -323,3 +325,5 @@ class NoBuildOutput(Exception):
Raised by :py:func:`build_binary_dist()` when a binary distribution build
fails to produce a binary distribution archive.
"""


6 changes: 6 additions & 0 deletions pip_accel/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
# Select the default location of the download cache and other files based on
# the user running the pip-accel command (root goes to /var/cache/pip-accel,
# otherwise ~/.pip-accel).
s3_cache_bucket = None
s3_cache_prefix = None

if os.getuid() == 0:
download_cache = '/root/.pip/download-cache'
pip_accel_cache = '/var/cache/pip-accel'
Expand All @@ -22,6 +25,9 @@
pip_accel_cache = expand_user('~/.pip-accel')

# Enable overriding the default locations with environment variables.
if 'PIP_S3_CACHE_BUCKET' in os.environ:
s3_cache_bucket = os.environ['PIP_S3_CACHE_BUCKET']
s3_cache_prefix = os.environ.get('PIP_S3_CACHE_PREFIX', '')
if 'PIP_DOWNLOAD_CACHE' in os.environ:
download_cache = expand_user(os.environ['PIP_DOWNLOAD_CACHE'])
if 'PIP_ACCEL_CACHE' in os.environ:
Expand Down