Skip to content

Commit

Permalink
Merge pull request #59 from apls777/dev
Browse files Browse the repository at this point in the history
Spotty v1.2.4
  • Loading branch information
apls777 authored Feb 1, 2020
2 parents 27fb337 + 14c3611 commit e67cae8
Show file tree
Hide file tree
Showing 15 changed files with 92 additions and 44 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ build/
dist/
*.egg-info/
__pycache__/
todo
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,28 @@
# ...

instances:
- name: i1
- name: gcp-image
provider: gcp
parameters:
zone: us-central1-a
machineType: n1-standard-1
onDemandInstance: true
imageName: spotty-1-0-0-20190827
imageName: spotty-image-[x-x-x]-[YYYYMMDD]
gpu:
type: nvidia-tesla-k80
```
The `imageName` parameter should contain a version of the image from the
`spotty.providers.gcp.deployment.image_deployment.ImageDeployment.VERSION` property.

If new image is incompatible with the current version of Spotty, the family name should be
changed to `spotty_<version>`, where the `<version>` (written with dashes) is a version of Spotty
since new image will be supported. The family name should also be changed in the code with the next release (
`spotty/providers/gcp/deployment/instance_deployment.py`, `_get_image()` method).

A command to create an image:
```bash
spotty gcp create-image -f spotty
spotty gcp create-image -f spotty-1-2-5
```

2. Share a created image:
Expand All @@ -32,11 +40,11 @@
gcloud compute images get-iam-policy $IMAGE_NAME --format json > policy.json
```

- Update the `policy.json` file:
- Update the `policy.json` file by adding `bindings`:

```json
{
"etag": "BwWRHwABAg0=",
"etag": "ACAB",
"version": 1,
"bindings": [
{
Expand Down
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ def get_description():
long_description=get_description(),
long_description_content_type='text/markdown',
packages=find_packages(exclude=['tests*']),
package_data={'spotty.providers.aws.deployment.cf_templates': ['data/*.yaml']},
package_data={
'spotty.providers.aws.deployment.cf_templates': ['data/*.yaml'],
'spotty.providers.gcp.deployment.dm_templates': ['image/*.yaml', 'instance/*.yaml'],
},
scripts=['bin/spotty'],
install_requires=[
'boto3>=1.9.0',
Expand Down
2 changes: 1 addition & 1 deletion spotty/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.2.3'
__version__ = '1.2.4'
2 changes: 1 addition & 1 deletion spotty/deployment/container_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def get_runtime_parameters(self, is_nvidia_runtime: bool):
"""Returns parameters for the ""docker run" command."""
parameters = self._config.runtime_parameters + ['-td', '--net=host']
if is_nvidia_runtime:
parameters += ['--runtime=nvidia']
parameters += ['--gpus', 'all']

for volume_mount in self.volume_mounts:
parameters += ['-v', '%s:%s' % (volume_mount.host_dir, volume_mount.container_dir)]
Expand Down
6 changes: 5 additions & 1 deletion spotty/providers/aws/deployment/ami_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
class AmiDeployment(AbstractAwsDeployment):

# version of the AMI stack
VERSION = '1.0.4'
VERSION = '1.1.0'

@property
def ec2_instance_name(self) -> str:
Expand Down Expand Up @@ -61,6 +61,10 @@ def _get_template_parameters(self, debug_mode: bool = False):
'InstanceType': self.instance_config.instance_type,
'ImageName': self.instance_config.ami_name,
'InstanceNameTag': self.ec2_instance_name,
'NvidiaDriverVersion': '410',
'DockerCEVersion': '19.03.5',
'ContainerdIOVersion': '1.2.10-3',
'NvidiaContainerToolkitVersion': '1.0.5-1',
}

if debug_mode:
Expand Down
38 changes: 26 additions & 12 deletions spotty/providers/aws/deployment/cf_templates/data/ami.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,22 @@ Parameters:
Description: Name for the instance
Type: String
Default: ''
NvidiaDriverVersion:
Description: NVIDIA Driver Version
Type: String
Default: ''
DockerCEVersion:
Description: Docker CE Version
Type: String
Default: ''
ContainerdIOVersion:
Description: containerd.io Version
Type: String
Default: ''
NvidiaContainerToolkitVersion:
Description: NVIDIA Container Toolkit Version
Type: String
Default: ''
DebugMode:
Description: Debug mode
Type: String
Expand All @@ -44,7 +60,7 @@ Resources:
if(!amiId){throw new Error('AMI not found')}
console.log('Found AMI ID='+amiId)
physicalId=amiId;success()}).catch((err)=>failed(err))};function isBeta(imageName){return imageName.toLowerCase().indexOf("beta")>-1||imageName.toLowerCase().indexOf(".rc")>-1}
Runtime: nodejs8.10
Runtime: nodejs12.x
Timeout: 30
FindBaseAMILambdaExecutionRole:
Type: AWS::IAM::Role
Expand Down Expand Up @@ -215,32 +231,30 @@ Resources:
stable"
apt-get update
apt-get install -y \
docker-ce=5:18.09.3~3-0~ubuntu-xenial \
docker-ce-cli=5:18.09.3~3-0~ubuntu-xenial \
containerd.io
docker-ce=5:${DockerCEVersion}~3-0~ubuntu-xenial \
docker-ce-cli=5:${DockerCEVersion}~3-0~ubuntu-xenial \
containerd.io=${ContainerdIOVersion}
# install NVIDIA driver
apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub
bash -c 'echo "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list'
mkdir /usr/lib/nvidia # a fix to install the driver, see: https://devtalk.nvidia.com/default/topic/1032456/linux/nvidia-387-26-for-ubuntu-16-04-package-broken-/
apt-get update
apt-get install -y nvidia-410
apt-get install -y nvidia-${NvidiaDriverVersion}
# install nvidia-docker2
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | apt-key add -
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \
tee /etc/apt/sources.list.d/nvidia-docker.list
apt-get update
apt-get install -y \
nvidia-container-runtime=2.0.0+docker18.09.3-1 \
nvidia-docker2=2.0.3+docker18.09.3-1
apt-get install -y nvidia-container-toolkit=${NvidiaContainerToolkitVersion}
# reload the Docker daemon configuration
pkill -SIGHUP dockerd
systemctl restart docker
# test NVIDIA Docker runtime
docker run --runtime=nvidia --rm nvidia/cuda:10.0-base-ubuntu16.04 nvidia-smi
docker run --gpus all --rm nvidia/cuda:10.1-base-ubuntu16.04 nvidia-smi
docker rmi $(docker images -q)
mode: '000755'
owner: ubuntu
Expand Down Expand Up @@ -364,7 +378,7 @@ Resources:
console.log('"describeImages" response:\n',data);physicalId=data.Images[0].ImageId;success()}).catch((err)=>failed(err))}else{var imageId=physicalId;console.log('Searching AMI with ID='+imageId);ec2.describeImages({ImageIds:[imageId]}).promise().then((data)=>{if(!data.Images.length){throw new Error('No images found')}
console.log('"describeImages" response:\n',data);return ec2.deregisterImage({ImageId:imageId}).promise()}).then((data)=>{console.log('Image deregistered:\n',data);return ec2.describeSnapshots({Filters:[{Name:'description',Values:['*'+imageId+'*']}]}).promise()}).then((data)=>{console.log('"describeSnapshots" response:\n',data);if(!data.Snapshots.length){throw new Error('No snapshots found')}
return ec2.deleteSnapshot({SnapshotId:data.Snapshots[0].SnapshotId}).promise()}).then((data)=>{console.log('Snapshot deleted:\n',data);success()}).catch((err)=>failed(err))}}
Runtime: nodejs8.10
Runtime: nodejs12.x
Timeout: 60
DeleteAMILambdaExecutionRole:
Type: AWS::IAM::Role
Expand Down Expand Up @@ -408,7 +422,7 @@ Resources:
Properties:
Handler: index.handler
Role: !GetAtt SetLogsRetentionLambdaExecutionRole.Arn
Runtime: nodejs8.10
Runtime: nodejs12.x
Timeout: 30
Code:
ZipFile: |
Expand Down
4 changes: 2 additions & 2 deletions spotty/providers/aws/deployment/instance_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,11 +235,11 @@ def _get_ami(self) -> Image:
# get the latest "Deep Learning Base AMI"
res = self._ec2.describe_images(
Owners=['amazon'],
Filters=[{'Name': 'name', 'Values': ['Deep Learning Base AMI (Ubuntu)*']}],
Filters=[{'Name': 'name', 'Values': ['Deep Learning AMI (Ubuntu 16.04) Version*']}],
)

if not len(res['Images']):
raise ValueError('AWS Deep Learning Base AMI not found.\n'
raise ValueError('AWS Deep Learning AMI not found.\n'
'Use the "spotty aws create-ami" command to create an AMI with NVIDIA Docker.')

image_info = sorted(res['Images'], key=lambda x: x['CreationDate'], reverse=True)[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ write_files:
apt-get install -y \
docker-ce=5:{{DOCKER_CE_VERSION}}~3-0~ubuntu-xenial \
docker-ce-cli=5:{{DOCKER_CE_VERSION}}~3-0~ubuntu-xenial \
containerd.io
containerd.io={{CONTAINERD_IO_VERSION}}
# install NVIDIA driver
apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub
Expand All @@ -89,15 +89,13 @@ write_files:
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \
tee /etc/apt/sources.list.d/nvidia-docker.list
apt-get update
apt-get install -y \
nvidia-container-runtime=2.0.0+docker{{DOCKER_CE_VERSION}}-1 \
nvidia-docker2={{NVIDIA_DOCKER_VERSION}}+docker{{DOCKER_CE_VERSION}}-1
apt-get install -y nvidia-container-toolkit={{NVIDIA_CONTAINER_TOOLKIT_VERSION}}
# reload the Docker daemon configuration
pkill -SIGHUP dockerd
systemctl restart docker
# test NVIDIA Docker runtime
docker run --runtime=nvidia --rm nvidia/cuda:10.0-base-ubuntu16.04 nvidia-smi
docker run --gpus all --rm nvidia/cuda:10.1-base-ubuntu16.04 nvidia-smi
docker rmi $(docker images -q)
- path: /run/spotty-image/scripts/create_image.sh
Expand All @@ -118,8 +116,9 @@ write_files:
--force \
--source-disk {{MACHINE_NAME}} \
--source-disk-zone {{ZONE}} \
--storage-location eu \
{{#IMAGE_FAMILY}}--family {{IMAGE_FAMILY}} {{/IMAGE_FAMILY}}\
--description "Spotty Image v{{STACK_VERSION}}: Ubuntu 16.04 LTS, NVIDIA driver {{NVIDIA_DRIVER_VERSION}}, Docker CE {{DOCKER_CE_VERSION}}, NVIDIA Docker {{NVIDIA_DOCKER_VERSION}}." \
--description "Spotty Image v{{STACK_VERSION}}: Ubuntu 16.04 LTS, NVIDIA driver {{NVIDIA_DRIVER_VERSION}}, Docker CE {{DOCKER_CE_VERSION}}, NVIDIA Container Toolkit {{NVIDIA_CONTAINER_TOOLKIT_VERSION}}." \
--format json`
# return the image ID
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ def prepare_image_template(instance_config: InstanceConfig, machine_name: str, s
'IMAGE_FAMILY': image_family if image_family else '',
'STACK_VERSION': stack_version,
'NVIDIA_DRIVER_VERSION': '410',
'DOCKER_CE_VERSION': '18.09.3',
'NVIDIA_DOCKER_VERSION': '2.0.3',
'DOCKER_CE_VERSION': '19.03.5',
'CONTAINERD_IO_VERSION': '1.2.10-3',
'NVIDIA_CONTAINER_TOOLKIT_VERSION': '1.0.5-1',
'DEBUG_MODE': debug_mode,
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ write_files:
/tmp/spotty/instance/scripts/prepare_instance.sh \
&& /tmp/spotty/instance/scripts/mount_volumes.sh \
&& /tmp/spotty/instance/scripts/sync_project.sh {{{SYNC_ARGS}}} \
&& /tmp/spotty/instance/scripts/startup_commands.sh \
&& /tmp/spotty/instance/scripts/run_container.sh
# send signal that the Docker container is ready or failed
Expand Down Expand Up @@ -77,6 +78,14 @@ write_files:
gsutil -m rsync -r "$@" gs://{{PROJECT_GS_BUCKET}}/{{BUCKET_SYNC_DIR}} {{HOST_PROJECT_DIR}}
fi
- path: /tmp/spotty/instance/scripts/startup_commands.sh
permissions: 0755
owner: ubuntu
content: |
#!/bin/bash -xe
{{{INSTANCE_STARTUP_COMMANDS}}}
- path: /tmp/spotty/instance/scripts/run_container.sh
permissions: 0755
owner: ubuntu
Expand Down Expand Up @@ -141,7 +150,8 @@ write_files:
permissions: 0644
owner: ubuntu
content: |
echo "Nothing to do"
# container startup commands
{{{CONTAINER_STARTUP_COMMANDS}}}
- path: /home/ubuntu/.tmux.conf
permissions: 0664
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ resources:
autoDelete: true
initializeParams:
sourceImage: {{SOURCE_IMAGE}}
{{#BOOT_DISK_SIZE}}
diskSizeGb: {{BOOT_DISK_SIZE}}
{{/BOOT_DISK_SIZE}}

{{#DISK_ATTACHMENTS}}
- source: {{DISK_LINK}}
Expand Down
21 changes: 13 additions & 8 deletions spotty/providers/gcp/deployment/dm_templates/instance_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,16 @@ def prepare_instance_template(instance_config: InstanceConfig, container: Contai
output: AbstractOutputWriter):
"""Prepares deployment template to run an instance."""

# read and update the template
with open(os.path.join(os.path.dirname(__file__), 'instance', 'template.yaml')) as f:
template = f.read()

# get disk attachments
disk_attachments, disk_device_names, disk_mount_dirs = _get_disk_attachments(volumes, instance_config.zone)

# get Docker runtime parameters
runtime_parameters = container.get_runtime_parameters(bool(instance_config.gpu))

# render startup script
startup_script = open(os.path.join(os.path.dirname(__file__), 'instance', 'cloud_init.yaml'), 'r').read()
with open(os.path.join(os.path.dirname(__file__), 'instance', 'cloud_init.yaml')) as f:
startup_script = f.read()

startup_script = chevron.render(startup_script, {
'MACHINE_NAME': machine_name,
'ZONE': instance_config.zone,
Expand All @@ -45,14 +43,22 @@ def prepare_instance_template(instance_config: InstanceConfig, container: Contai
'DOCKER_BUILD_CONTEXT_PATH': container.docker_context_path,
'DOCKER_RUNTIME_PARAMS': runtime_parameters,
'DOCKER_WORKING_DIR': container.config.working_dir,
'INSTANCE_STARTUP_COMMANDS': fix_indents_for_lines(instance_config.commands, startup_script,
'{{{INSTANCE_STARTUP_COMMANDS}}}'),
'CONTAINER_STARTUP_COMMANDS': fix_indents_for_lines(container.config.commands, startup_script,
'{{{CONTAINER_STARTUP_COMMANDS}}}'),
})

# render the template
parameters = {
with open(os.path.join(os.path.dirname(__file__), 'instance', 'template.yaml')) as f:
template = f.read()

template = chevron.render(template, {
'SERVICE_ACCOUNT_EMAIL': service_account_email,
'ZONE': instance_config.zone,
'MACHINE_TYPE': instance_config.machine_type,
'SOURCE_IMAGE': image_link,
'BOOT_DISK_SIZE': instance_config.boot_disk_size,
'STARTUP_SCRIPT': fix_indents_for_lines(startup_script, template, '{{{STARTUP_SCRIPT}}}'),
'MACHINE_NAME': machine_name,
'PREEMPTIBLE': 'false' if instance_config.on_demand else 'true',
Expand All @@ -61,8 +67,7 @@ def prepare_instance_template(instance_config: InstanceConfig, container: Contai
'DISK_ATTACHMENTS': disk_attachments,
'PUB_KEY_VALUE': public_key_value,
'PORTS': ', '.join([str(port) for port in set(container.config.ports + [22])]),
}
template = chevron.render(template, parameters)
})

# print some information about the deployment
output.write('- image URL: ' + '/'.join(image_link.split('/')[-5:]))
Expand Down
2 changes: 1 addition & 1 deletion spotty/providers/gcp/deployment/image_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
class ImageDeployment(AbstractGcpDeployment):

# version of the image stack
VERSION = '1.0.0'
VERSION = '1.1.0'

@property
def machine_name(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion spotty/providers/gcp/deployment/instance_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def _get_image(self) -> Image:
raise ValueError('Image with the name "%s" was not found.' % self.instance_config.image_name)
else:
# get the latest image from the "spotty-cloud" project
spotty_image_family_url = 'projects/spotty-cloud/global/images/family/spotty'
spotty_image_family_url = 'projects/spotty-cloud/global/images/family/spotty-1-2-5'
image = Image.get_by_url(self._ce, spotty_image_family_url)
if not image:
raise ValueError('Image "%s" not found.\n'
Expand Down

0 comments on commit e67cae8

Please sign in to comment.