Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #471 update tests #472

Merged
merged 1 commit into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ rsconf_install_file '/srv/mpi_worker/user/vagrant/.rsmpi/known_hosts' '7ae6386df
rsconf_install_access '700' 'vagrant' 'vagrant'
rsconf_install_directory '/srv/mpi_worker/user/vagrant/bin'
rsconf_install_access '500' 'vagrant' 'vagrant'
rsconf_install_file '/srv/mpi_worker/user/vagrant/bin/rsmpi' '2c05ad5ad02341db2ab056c735906d73'
rsconf_install_file '/srv/mpi_worker/user/vagrant/bin/rsmpi' '97a2b53f4e90e919a91134c949d79a4b'
rsconf_install_access '700' 'vagrant' 'vagrant'
rsconf_install_directory '/srv/mpi_worker'
rsconf_install_access '500' 'vagrant' 'vagrant'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,34 @@ _rsmpi_net='10.10.10.0/24'
_rsmpi_ssh_config='/home/vagrant/jupyter/.rsmpi/ssh_config'


# allow overrides in deployment, but do not document in usage.
# should be fixed here, when there is a missing pattern. ^OMP_
# is necessary to avoid conflicts with OMPI_ (openmpi), which we
# don't want to pass.
: ${rsmpi_pass_env:='MODULE|^OMP_|PATH|PYENV|PYTHON|SYNERGIA|VIRTUAL'}
declare -i _rsmpi_num_hosts=${#_rsmpi_ips[@]}

rsmpi_exec() {
local hostfile=$1
declare hostfile=$1
shift
rsmpi_exec_ssh_config
local a=
local x
for x in $(compgen -A variable|egrep '(SYNERGIA|PYENV|PYTHON|VIRTUAL|PATH|MODULE)'); do
declare a=
declare x
declare e=
for x in $(compgen -A variable | egrep "($rsmpi_pass_env)"); do
a+=$x,
done
local i=$(rsmpi_local_ip)
declare i=$(rsmpi_local_ip)
if [[ ! $i ]]; then
echo "cannot determine local ip address for network=$_rsmpi_net" 1>&2
fi
exec mpiexec -f "$hostfile" -localhost "$i" -envlist "${a[@]:0:-1}" "$@"
}

rsmpi_exec_ssh_config() {
local d=$HOME/.ssh
local l=$d.lock
local i
declare d=$HOME/.ssh
declare l=$d.lock
declare i
for i in $(seq 1 10); do
if ! mkdir "$l" >& /dev/null; then
if (( i == 9 )); then
Expand All @@ -67,8 +73,8 @@ rsmpi_exec_ssh_config() {
}

rsmpi_exec_ssh_config_cp() {
local src=$1
local dst=$2
declare src=$1
declare dst=$2
if ! cmp "$src" "$dst" >& /dev/null; then
install -m 600 "$src" "$dst"
fi
Expand All @@ -79,7 +85,7 @@ rsmpi_help() {
if (( $# > 0 )); then
echo "error: $@"
fi
local h=$(seq --separator=, 1 "$_rsmpi_num_hosts")
declare h=$(seq --separator=, 1 "$_rsmpi_num_hosts")
cat <<EOF
usage: rsmpi [-n processes] [-h hosts] [-t tasks-per-host] <mpi-command args...>

Expand All @@ -99,26 +105,26 @@ EOF
}

rsmpi_hosts() {
local -i slots=$1
declare -i slots=$1
shift
local -i tasks=$1
declare -i tasks=$1
shift
local -a hosts=( "$@" )
declare -a hosts=( "$@" )
if (( $# <= 0 )); then
hosts=( $(seq 1 $_rsmpi_num_hosts) )
fi
local t=/tmp/rsmpi_hosts-
declare t=/tmp/rsmpi_hosts-
# Avoid collisions with parallel executions
find "$t"* -mmin +60 -exec rm -f {} \; >& /dev/null || true
local i
declare i
t+=$RANDOM$RANDOM
if (( slots <= 0 )); then
slots=$(( ${#hosts[@]} * _rsmpi_slots_per_host ))
fi
if (( tasks <= 0 )); then
tasks=$_rsmpi_slots_per_host
fi
local s
declare s
for i in "${hosts[@]}"; do
slots=$(( slots - tasks ))
s=$tasks
Expand All @@ -135,9 +141,9 @@ rsmpi_hosts() {

rsmpi_local_ip() {
#TODO(robnagler) only works for class C
local n=${_rsmpi_net%0/*}
declare n=${_rsmpi_net%0/*}
n=^${n//./\\.}
local i
declare i
for i in $(hostname -i); do
if [[ $i =~ $n ]]; then
echo $i
Expand All @@ -148,17 +154,17 @@ rsmpi_local_ip() {
}

rsmpi_main() {
local o OPTARG OPTIND=1 OPTERR h
local -i slots=0 tasks=0
local -a hosts=()
declare o OPTARG OPTIND=1 OPTERR h
declare -i slots=0 tasks=0
declare -a hosts=()
while getopts "h:n:t:" o; do
case $o in
h)
if [[ ! $OPTARG =~ ^[0-9,]+$ ]]; then
rsmpi_help "invalid host spec: $OPTARG"
fi
IFS=, read -ra hosts <<<$OPTARG
local -A seen=()
declare -A seen=()
for h in "${hosts[@]}"; do
if (( h < 1 || h > _rsmpi_num_hosts )); then
rsmpi_help "invalid host number: $h"
Expand Down Expand Up @@ -204,8 +210,8 @@ rsmpi_main() {
}

rsmpi_ping() {
local h x
local -i i=0 ok=0
declare h x
declare -i i=0 ok=0
for h in "${_rsmpi_hosts[@]}"; do
i+=1
x=$(ssh -F "$_rsmpi_ssh_config" \
Expand Down
Loading