Skip to content

Commit

Permalink
[R] Setup and scan ECR registry for upstream images (#4031, #4032, #4177
Browse files Browse the repository at this point in the history
)
  • Loading branch information
hannes-ucsc committed Jun 3, 2023
1 parent 981e458 commit 46b7586
Show file tree
Hide file tree
Showing 14 changed files with 567 additions and 33 deletions.
14 changes: 13 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
FROM docker.io/library/python:3.9.12-buster
ARG registry
FROM ${registry}docker.io/library/python:3.9.12-buster

RUN curl -o /usr/bin/docker-credential-ecr-login \
https://amazon-ecr-credential-helper-releases.s3.us-east-2.amazonaws.com/0.7.0/linux-amd64/docker-credential-ecr-login \
&& printf 'c978912da7f54eb3bccf4a3f990c91cc758e1494a8af7a60f3faf77271b565db /usr/bin/docker-credential-ecr-login\n' | sha256sum -c \
&& chmod +x /usr/bin/docker-credential-ecr-login

ARG registry
ENV azul_docker_registry=${registry}
RUN mkdir -p ${HOME}/.docker \
&& printf '{"credHelpers": {"%s": "ecr-login"}}\n' "${registry%/}" \
> "${HOME}/.docker/config.json"

SHELL ["/bin/bash", "-c"]

Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ define docker
.PHONY: docker$1
docker$1: check_docker
docker build \
--build-arg registry=$$(azul_docker_registry) \
--build-arg PIP_DISABLE_PIP_VERSION_CHECK=$$(PIP_DISABLE_PIP_VERSION_CHECK) \
--build-arg make_target=requirements$2 \
--build-arg cache_seed=${azul_docker_image_cache_seed} \
Expand Down Expand Up @@ -215,7 +216,7 @@ format: check_venv check_docker
--rm \
--volume $$(python scripts/resolve_container_path.py $(project_root)):/home/developer/azul \
--workdir /home/developer/azul \
docker.io/ucscgi/azul-pycharm:2022.3.3 \
$(azul_docker_registry)docker.io/ucscgi/azul-pycharm:2022.3.3 \
/opt/pycharm/bin/format.sh -r -settings .pycharm.style.xml -mask '*.py' $(relative_sources)

.PHONY: test
Expand Down
40 changes: 39 additions & 1 deletion environment
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,51 @@ _preauth() {
fi
eval "$env"
echo >&2
echo >&2 "Temporary session credentials are in effect for AWS and Google." \
if ! {
[ -z "${azul_docker_registry:+x}" ] \
|| aws ecr get-login-password --region us-east-1 \
| docker login \
--username AWS \
--password-stdin \
"${azul_docker_registry%/}"
}; then
echo >&2 "Login to ECR failed!"
return 3
fi
if ! {
[ -z "${azul_gitlab_access_token:+x}" ] \
|| [ -z "${azul_gitlab_user:+x}" ] \
|| printenv azul_gitlab_access_token \
| docker login \
--username "${azul_gitlab_user}" \
--password-stdin \
"docker.gitlab.${AZUL_DOMAIN_NAME}"
}; then
echo >&2 "Login to GitLab registry failed!"
return 3
fi
echo >&2
echo >&2 "Temporary session credentials are in effect for AWS, Google," \
"Amazon ECR and the GitLab docker registry." \
"Use '_deauth' to revert the effect of this command."
echo >&2
return 0
}

_deauth() {
if ! {
[ -z "${azul_docker_registry:+x}" ] \
|| docker logout "${azul_docker_registry}"
}; then
echo >&2 "Warning: Logout from ECR failed!"
fi
if ! {
[ -z "${azul_gitlab_access_token:+x}" ] \
|| [ -z "${azul_gitlab_user:+x}" ] \
|| docker logout "docker.gitlab.${AZUL_DOMAIN_NAME}"
}; then
echo >&2 "Warning: Logout from GitLab registry failed!"
fi
unset AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN
echo >&2
echo >&2 "Revoked temporary AWS credentials."
Expand Down
29 changes: 24 additions & 5 deletions environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,21 @@ def env() -> Mapping[str, Optional[str]]:
# between deployments.
'AZUL_DEPLOYMENT_STAGE': None,

# The Docker registry containing all 3rd party images used by this
# project, including images used locally, in FROM clauses, for CI/CD or
# GitLab. Must be empty or end in a slash. All references to 3rd party
# images must point at the registry defined here, ideally by prefixing
# the image reference with a reference to this variable. The registry
# and the images therein are managed by the `shared` TF component, which
# copies images from the upstream registry into the Azul registry. A
# 3rd-party image at `<registry>/<username>/<repository>:tag`, is stored
# as `${azul_docker_registry>}<registry>/<username>/<repository>:tag`
# in the Azul registry. To disable the use of the Azul registry, set
# this variable to the empty string.
#
'azul_docker_registry': '{AZUL_AWS_ACCOUNT_ID}.dkr.ecr.'
'{AWS_DEFAULT_REGION}.amazonaws.com/',

# Whether to enable direct access to objects in the DSS main bucket. If 0,
# bundles and files are retrieved from the DSS using the GET /bundles/{uuid}
# and GET /files/{UUID} endpoints. If 1, S3 GetObject requests are made
Expand Down Expand Up @@ -414,14 +429,18 @@ def env() -> Mapping[str, Optional[str]]:
# the repository defined in `azul_github_project`.
'azul_github_access_token': '',

# A GitLab private access token with `read_api` scope. This is used to
# download distribution tarballs for the `browser` TF component. This
# variable is typically only set on developer machines. In GitLab CI/CD
# pipelines, this variable should NOT be set because a different type of
# token is automatically provided via the CI_JOB_TOKEN variable.
# A GitLab private access token with scopes `read_api`, `read_registry`
# and `write_registry`. This variable is typically only set on developer
# machines. In GitLab CI/CD pipelines, this variable should NOT be set
# because a different type of token is automatically provided via the
# CI_JOB_TOKEN variable.
#
'azul_gitlab_access_token': None,

# The name of the user owning the token in `azul_gitlab_access_token`.
#
'azul_gitlab_user': None,

'PYTHONPATH': '{project_root}/src:{project_root}/test',
'MYPYPATH': '{project_root}/stubs',

Expand Down
1 change: 1 addition & 0 deletions requirements.dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ moto[s3,sqs,sns,dynamodb,iam]==4.1.4.dev8 # match the extras with the backends
boto3-stubs[s3,sqs,lambda,dynamodb,iam]==1.24.94 # match this with the version of the `boto3` runtime dependency
openapi-spec-validator==0.5.1
openpyxl==3.0.6
posix-ipc==1.1.1
pygithub==1.56
python-gitlab==3.13.0
pyyaml==5.4.1 # can't upgrade beyond that b/c awscli 1.25.95 depends on PyYAML<5.5 and >=3.10
Expand Down
146 changes: 146 additions & 0 deletions scripts/copy_images_to_ecr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import logging
from pathlib import (
Path,
)
import subprocess
from sys import (
argv,
)
from typing import (
Any,
ContextManager,
Iterable,
)

import docker
import posix_ipc

from azul import (
config,
require,
)
from azul.docker import (
ImageRef,
)
from azul.logging import (
configure_script_logging,
)

log = logging.getLogger(__name__)


def copy_image(src: str):
src = ImageRef.parse(src)
dst = ImageRef.create(name=config.docker_registry + src.name,
tag=src.tag)
dst_parts = []
client = docker.client.from_env()
for platform in src.platforms:
output = client.api.pull(src.name,
platform=str(platform),
tag=src.tag,
stream=True)
log_lines(src, 'pull', output)
image = client.images.get(str(src))
dst_part = ImageRef.create(name=config.docker_registry + src.name,
tag=src.tag + '-' + str(platform).replace('/', '-'))
image.tag(dst_part.name, tag=dst_part.tag)
output = client.api.push(dst_part.name,
tag=dst_part.tag,
stream=True)
log_lines(src, 'push', output)
dst_parts.append(dst_part)

subprocess.run([
'docker', 'manifest', 'rm',
str(dst)
], check=False)
subprocess.run([
'docker', 'manifest', 'create',
str(dst),
*list(map(str, dst_parts))
], check=True)
subprocess.run([
'docker', 'manifest', 'push',
str(dst)
], check=True)


def log_lines(context: Any, command: str, output: Iterable[bytes]):
for line in output:
log.info('%s: docker %s %s', context, command, line.decode().strip())


class Semaphore(ContextManager):
"""
A semaphore for synchronizing multiple Python programs, or multiple
instances of a Python program. Not thread-safe. It's a wrapper around
posix_ipc.Semaphore that handles creation on demand and unlinking more
succinctly, and that adds logging of important events in the lifecycle the
semaphore.
"""

def __init__(self, name: str, initial_value: int) -> None:
super().__init__()
self.name = name
self.initial_value = initial_value
self.inner = None

def __enter__(self):
if self.inner is None:
log.debug('Creating or opening semaphore %r', self.name)
self.inner = posix_ipc.Semaphore(self.name,
initial_value=self.initial_value,
flags=posix_ipc.O_CREAT)
log.info('Acquiring semaphore %r', self.name)
self.inner.acquire()
log.info('Acquired semaphore %r', self.name)
return self

def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> bool:
self.inner.release()
log.info('Released semaphore %r', self.name)
return False

def destroy(self):
if self.inner is None:
log.debug('Opening semaphore %r for deletion', self.name)
semaphore = None
try:
semaphore = posix_ipc.Semaphore(self.name)
except Exception:
if log.isEnabledFor(logging.DEBUG):
log.debug('Failed to open semaphore %s', self.name, exc_info=True)
finally:
if semaphore is not None:
self._destroy(semaphore)
else:
semaphore, self.inner = self.inner, None
self._destroy(semaphore)

def _destroy(self, semaphore):
log.info('Deleting semaphore %r', self.name)
semaphore.unlink()


def main():
# Terraform has a default concurrency of 10, which means that there could be
# as many concurrent invocations of this script. This would overwhelm the
# local Docker daemon and cause many of those invocations to raise socket
# timeouts. There is currently no way to limit Terraform's concurrency per
# resource or resource type, and we don't want to limit it on a global
# basis, so we have to enforce a concurrency limit here using a semaphore.
num_args = len(argv)
require(num_args in (1, 2), 'Must pass zero or one argument', num_args)
semaphore = Semaphore(name='/' + Path(__file__).stem + '.' + config.deployment_stage,
initial_value=1)
if num_args == 2:
with semaphore:
copy_image(argv[1])
else:
semaphore.destroy()


if __name__ == '__main__':
configure_script_logging(log)
main()
2 changes: 2 additions & 0 deletions scripts/kibana_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

from azul import (
cached_property,
config,
)
from azul.deployment import (
aws,
Expand All @@ -61,6 +62,7 @@ def __init__(self, options) -> None:
self.docker = docker.from_env()

def create_container(self, image: str, *args, **kwargs) -> Container:
image = config.docker_registry + image
try:
container = self.docker.containers.create(image, *args, **kwargs)
except docker.errors.ImageNotFound:
Expand Down
26 changes: 26 additions & 0 deletions src/azul/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1418,6 +1418,32 @@ def slack_integration(self) -> Optional[SlackIntegration]:

manifest_column_joiner = '||'

@property
def docker_registry(self) -> str:
return self.environ['azul_docker_registry']

# Note that a change to the image references here also requires redeploying
# the `shared` TF component.

docker_images = [
'docker.elastic.co/elasticsearch/elasticsearch:7.10.1',
'docker.elastic.co/kibana/kibana-oss:7.10.2',
'docker.io/clamav/clamav:1.1.0-1',
'docker.io/cllunsford/aws-signing-proxy:0.2.2',
'docker.io/gitlab/gitlab-ce:15.11.2-ce.0',
'docker.io/gitlab/gitlab-runner:v15.11.0',
'docker.io/library/docker:20.10.18',
'docker.io/library/docker:20.10.18-dind',
'docker.io/library/python:3.9.12-buster',
'docker.io/lmenezes/cerebro:0.9.4',
'docker.io/ucscgi/azul-pycharm:2022.3.3',
]

docker_platforms = [
'linux/arm64',
'linux/amd64'
]


config: Config = Config() # yes, the type hint does help PyCharm

Expand Down
Loading

0 comments on commit 46b7586

Please sign in to comment.