Skip to content

Commit

Permalink
fix compatibility issues with the latest version of apptainer
Browse files Browse the repository at this point in the history
  • Loading branch information
ksugar committed May 30, 2023
1 parent 895e06e commit 2de50cc
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 28 deletions.
26 changes: 15 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: help rebuild build launch bash bashroot notebook warmup test singularity-build singularity-launch singularity-stop
.PHONY: help rebuild build launch bash bashroot notebook warmup test apptainer-build apptainer-launch apptainer-shell apptainer-stop

help:
@cat Makefile
Expand All @@ -8,6 +8,7 @@ ELEPHANT_WORKSPACE?=${PWD}/workspace
ELEPHANT_IMAGE_NAME?=elephant-server:0.5.0
ELEPHANT_NVIDIA_GID?=$$(ls -n /dev/nvidia0 2>/dev/null | awk '{print $$4}')
ELEPHANT_DOCKER?=docker
ELEPHANT_REDIS_PORT?=6379

rebuild:
@IMAGEID=$$($(ELEPHANT_DOCKER) images -q $(ELEPHANT_IMAGE_NAME)); \
Expand Down Expand Up @@ -39,7 +40,7 @@ warmup:

launch: warmup
$(ELEPHANT_DOCKER) run -it --rm $(GPU_ARG) --shm-size=8g -v $(ELEPHANT_WORKSPACE):/workspace -p 8080:80 -p 5672:5672 \
-e LOCAL_UID=$(shell id -u) -e LOCAL_GID=$(shell id -g) -e NVIDIA_GID=$(ELEPHANT_NVIDIA_GID) \
-e LOCAL_UID=$(shell id -u) -e LOCAL_GID=$(shell id -g) -e NVIDIA_GID=$(ELEPHANT_NVIDIA_GID) -e ELEPHANT_REDIS_PORT=$(ELEPHANT_REDIS_PORT)\
$(ELEPHANT_IMAGE_NAME)

bash: warmup
Expand All @@ -60,17 +61,20 @@ test:
$(ELEPHANT_DOCKER) build -t $(ELEPHANT_IMAGE_NAME)-test -f Dockerfile-test . && $(ELEPHANT_DOCKER) image prune -f
$(ELEPHANT_DOCKER) run -it --rm $(ELEPHANT_IMAGE_NAME)-test

singularity-build:
singularity build --fakeroot elephant.sif elephant.def
singularity run --fakeroot elephant.sif
apptainer-build:
apptainer build --fakeroot elephant.sif elephant.def
apptainer run --fakeroot --bind $(HOME):/root elephant.sif

singularity-launch:
singularity instance start --nv --bind $(HOME)/.elephant_binds/var/lib:/var/lib,$(HOME)/.elephant_binds/var/log:/var/log,$(HOME)/.elephant_binds/var/run:/var/run,$(ELEPHANT_WORKSPACE):/workspace elephant.sif elephant
apptainer-launch:
apptainer instance start --nv --bind $(HOME),$(HOME)/.elephant_binds/var/lib:/var/lib,$(HOME)/.elephant_binds/var/log:/var/log,$(HOME)/.elephant_binds/var/run:/var/run,$(HOME)/.elephant_binds/etc/nginx:/etc/nginx,$(HOME)/.elephant_binds/etc/rabbitmq:/etc/rabbitmq,$(ELEPHANT_WORKSPACE):/workspace elephant.sif elephant
if [ $(ELEPHANT_GPU) = all ]; then \
singularity exec instance://elephant /start.sh; \
apptainer exec --env ELEPHANT_REDIS_PORT=$(ELEPHANT_REDIS_PORT) instance://elephant /start.sh; \
else \
SINGULARITYENV_CUDA_VISIBLE_DEVICES=$(ELEPHANT_GPU) singularity exec instance://elephant /start.sh; \
apptainer exec --env CUDA_VISIBLE_DEVICES=$(ELEPHANT_GPU),ELEPHANT_REDIS_PORT=$(ELEPHANT_REDIS_PORT) instance://elephant /start.sh; \
fi

singularity-stop:
singularity instance stop elephant
apptainer-shell:
apptainer shell --env ELEPHANT_REDIS_PORT=$(ELEPHANT_REDIS_PORT) --bind $(HOME),$(HOME)/.elephant_binds/var/lib:/var/lib,$(HOME)/.elephant_binds/var/log:/var/log,$(HOME)/.elephant_binds/var/run:/var/run,$(HOME)/.elephant_binds/etc/nginx:/etc/nginx,$(HOME)/.elephant_binds/etc/rabbitmq:/etc/rabbitmq elephant.sif

apptainer-stop:
apptainer instance stop elephant
10 changes: 6 additions & 4 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# ==============================================================================
"""Flask endpoints."""
import os

from celery import Celery
from flask import Flask
from flask import request
Expand All @@ -31,7 +33,9 @@
from apis import init_api
from elephant.logging import logger
from elephant.redis_util import TrainState
from elephant.redis_util import REDIS_HOST
from elephant.redis_util import REDIS_KEY_STATE
from elephant.redis_util import REDIS_PORT


def make_celery(app):
Expand All @@ -53,15 +57,13 @@ def __call__(self, *args, **kwargs):

app = Flask(__name__)
app.config.update(
broker_url='redis://localhost:6379',
result_backend='redis://localhost:6379',
broker_url=f'redis://{REDIS_HOST}:{REDIS_PORT}',
result_backend=f'redis://{REDIS_HOST}:{REDIS_PORT}',
worker_redirect_stdouts=False,
worker_prefetch_multiplier=1,
)

celery = make_celery(app)
redis_client = FlaskRedis(app)
redis_client.set(REDIS_KEY_STATE, TrainState.IDLE.value)


init_api(app)
Expand Down
1 change: 1 addition & 0 deletions docker/rabbitmq-env.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
NODENAME=rabbit@localhost
2 changes: 1 addition & 1 deletion docker/rabbitmq.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ echo "*** Log in the WebUI at port 5672 (example: http:/localhost:5672) ***") &
# $@ is used to pass arguments to the rabbitmq-server command.
# For example if you use it like this: docker run -d rabbitmq arg1 arg2,
# it will be as you run in the container rabbitmq-server arg1 arg2
rabbitmq-server $@
/etc/init.d/rabbitmq-server $@
2 changes: 1 addition & 1 deletion docker/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ stopsignal=QUIT
priority=900

[program:redis-server]
command=redis-server
command=redis-server --port %(ENV_ELEPHANT_REDIS_PORT)s
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
Expand Down
4 changes: 2 additions & 2 deletions docker/uwsgi.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ cheaper = 2
; By default, when on demand, run up to 16 processes
processes = 16
socket = /tmp/uwsgi.sock
chown-socket = nginx:nginx
chmod-socket = 664
# chown-socket = nginx:nginx
# chmod-socket = 664
; Graceful shutdown on SIGTERM, see https://github.com/unbit/uwsgi/issues/849#issuecomment-118869386
hook-master-start = unix_signal:15 gracefully_kill_them_all
need-app = true
Expand Down
6 changes: 5 additions & 1 deletion elephant-core/elephant/redis_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"""Utils used for Redis."""

from enum import Enum
import os

import redis

Expand All @@ -36,6 +37,8 @@
REDIS_KEY_TIMEPOINT = 'timepoint'
REDIS_KEY_UPDATE_ONGOING_SEG = 'update_ongoing_seg'
REDIS_KEY_UPDATE_ONGOING_FLOW = 'update_ongoing_flow'
REDIS_HOST = os.environ.get('ELEPHANT_REDIS_HOST', 'localhost')
REDIS_PORT = os.environ.get('ELEPHANT_REDIS_PORT', '6379')


class TrainState(Enum):
Expand All @@ -45,7 +48,8 @@ class TrainState(Enum):


if RUN_ON_FLASK:
redis_client = redis.Redis.from_url('redis://localhost:6379/0')
redis_client = redis.Redis.from_url(f'redis://{REDIS_HOST}:{REDIS_PORT}/0')
redis_client.set(REDIS_KEY_STATE, TrainState.IDLE.value)
else:
redis_client = None

Expand Down
20 changes: 12 additions & 8 deletions elephant.def
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,14 @@ From: pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime

# Install and set up RabbbitMQ
chmod +x /docker/install-rabbitmq.sh && /docker/install-rabbitmq.sh
cp /docker/rabbitmq-env.conf /etc/rabbitmq/rabbitmq-env.conf

RABBITMQ_USER=user && RABBITMQ_PASSWORD=user
service rabbitmq-server start
/etc/init.d/rabbitmq-server start
rabbitmqctl add_user $RABBITMQ_USER $RABBITMQ_PASSWORD 2>/dev/null
rabbitmqctl set_user_tags $RABBITMQ_USER administrator
rabbitmqctl set_permissions -p / $RABBITMQ_USER ".*" ".*" ".*"
service rabbitmq-server stop
/etc/init.d/rabbitmq-server stop

# Set up nginx
cp /docker/nginx.conf /etc/nginx/nginx.conf
Expand Down Expand Up @@ -80,16 +81,19 @@ From: pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime
mkdir -p $HOME/.elephant_binds/var/run
cp -a /var/log $HOME/.elephant_binds/var/
cp -a /var/lib $HOME/.elephant_binds/var/
mkdir -p $HOME/.elephant_binds/etc
cp -a /etc/nginx $HOME/.elephant_binds/etc/
cp -a /etc/rabbitmq $HOME/.elephant_binds/etc/

%labels
Author Ko Sugawara <[email protected]>

%help
This is a container to run ELEPHANt server.
Usage:
singularity build --fakeroot elephant.sif elephant.def
singularity run --fakeroot elephant.sif
singularity instance start --nv --bind $HOME/.elephant_binds/var/lib:/var/lib,$HOME/.elephant_binds/var/log:/var/log,$HOME/.elephant_binds/var/run:/var/run,$ELEPHANT_WORKSPACE:/workspace elephant.sif elephant
singularity exec instance://elephant python /opt/elephant/script/dataset_generator.py --uint16 /workspace/datasets/elephant-demo/elephant-demo.h5 /workspace/datasets/elephant-demo
SINGULARITYENV_CUDA_VISIBLE_DEVICES=0 singularity exec instance://elephant /start.sh
singularity instance stop elephant
apptainer build --fakeroot elephant.sif elephant.def
apptainer run --bind $HOME elephant.sif
apptainer instance start --nv --bind $HOME,$HOME/.elephant_binds/var/lib:/var/lib,$HOME/.elephant_binds/var/log:/var/log,$HOME/.elephant_binds/var/run:/var/run,$HOME/.elephant_binds/etc/nginx:/etc/nginx,$HOME/.elephant_binds/etc/rabbitmq:/etc/rabbitmq,$ELEPHANT_WORKSPACE:/workspace elephant.sif elephant
apptainer exec instance://elephant python /opt/elephant/script/dataset_generator.py --uint16 /workspace/datasets/elephant-demo/elephant-demo.h5 /workspace/datasets/elephant-demo
SINGULARITYENV_CUDA_VISIBLE_DEVICES=0 apptainer exec instance://elephant /start.sh
apptainer instance stop elephant

0 comments on commit 2de50cc

Please sign in to comment.