Skip to content

Commit

Permalink
Merge pull request #24 from Layth17/main
Browse files Browse the repository at this point in the history
Fixing Issues #23, #20, and #21
  • Loading branch information
Layth17 authored Jun 28, 2022
2 parents eca11d0 + e7e8cfc commit 1b52b6d
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 27 deletions.
53 changes: 38 additions & 15 deletions manual-workflows/resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ SRC_DIR=$(dirname "$0")
function show_help {
echo "$0 - Create/Destroy resources for manual Cromwell workflow execution"
echo ""
echo "usage: sh $0 COMMAND --project <PROJECT> --bucket <BUCKET> --CIDR <CIDR> --GC_REGION <REGION>"
echo "usage: sh $0 COMMAND --project <PROJECT> --bucket <BUCKET> --ip-range <RANGE>"
echo ""
echo "commands:"
echo " init-project Create required resources for the project. You'll almost always want this one."
Expand All @@ -14,10 +14,12 @@ function show_help {
echo ""
echo "arguments:"
echo " -h, --help print this block"
echo " --config-dir a dir path that is writable, DEFAULT='\$SRC_DIR'"
echo " --bucket name for the GCS bucket used by Cromwell"
echo " --project name of your GCP project"
echo " --CIDR block/range of acceptable IPs e.g. 172.16.0.0/24 or a single IP address e.g. 172.16.5.9/32 or a comma-seperated list of IPs/CIDRs."
echo " --GC_REGION default='us-central1'. For other regions check: https://cloud.google.com/compute/docs/regions-zones"
echo " --ip-range block/range of acceptable IPs e.g. 172.16.0.0/24 or a single IP address e.g. 172.16.5.9/32 or a comma-seperated list of IPs/CIDRs."
echo " --gc-region DEFAULT='us-central1'. For other regions, check: https://cloud.google.com/compute/docs/regions-zones"
echo " --retention DEFAULT is none. For more option, check: https://cloud.google.com/storage/docs/gsutil/commands/mb#retention-policy"
echo ""
}

Expand All @@ -35,13 +37,20 @@ if [[ ($COMMAND != "init-project") && ($COMMAND != "generate-config")]]; then
die "ERROR: invalid command - $COMMAND"
fi


while test $# -gt 0; do
case $1 in
-h|--help)
show_help
exit
;;
--config-dir*)
if [ ! "$2" ]; then
CONFIG_DIR=$SRC_DIR
else
CONFIG_DIR=$2
shift
fi
;;
--bucket*)
if [ ! "$2" ]; then
die 'ERROR: "--bucket" requires a non-empty argument.'
Expand All @@ -58,58 +67,72 @@ while test $# -gt 0; do
shift
fi
;;
--CIDR*)
--ip-range*)
if [ ! "$2" ]; then
die 'ERROR: "--CIDR" requires a non-empty argument.'
die 'ERROR: "--ip-range" requires a non-empty argument.'
else
CIDR=$2
IP_RANGE=$2
shift
fi
;;
--GC_REGION*)
--gc-region*)
if [ ! "$2" ]; then
GC_REGION="us-central1"
else
GC_REGION=$2
shift
fi
;;
*)
--retention*)
if [ ! "$2" ]; then
RETENTION=""
else
RETENTION=$2
shift
fi
;;
*)
break
;;
esac
shift
done

if [ -z $CONFIG_DIR ]; then
CONFIG_DIR=$SRC_DIR
fi
if [ -z $PROJECT ]; then
die 'ERROR: "--project" must be set.'
fi
if [ -z $BUCKET ]; then
die 'ERROR: "--bucket" must be set.'
fi
if [ -z $CIDR ]; then
die 'ERROR: "--CIDR" must be set.'
if [ -z $IP_RANGE ]; then
die 'ERROR: "--ip-range" must be set.'
fi
if [ -z $GC_REGION ]; then
GC_REGION="us-central1"
fi
if [ -z $RETENTION ]; then
RETENTION=""
fi

COMPUTE_NAME="cromwell-compute"
SERVER_NAME="cromwell-server"
COMPUTE_ACCOUNT="$COMPUTE_NAME@$PROJECT.iam.gserviceaccount.com"
SERVER_ACCOUNT="$SERVER_NAME@$PROJECT.iam.gserviceaccount.com"

function generate_config {
cp $SRC_DIR/base_cromwell.conf $SRC_DIR/cromwell.conf
cat << EOF >> $SRC_DIR/cromwell.conf
cp $SRC_DIR/base_cromwell.conf $CONFIG_DIR/cromwell.conf
cat << EOF >> $CONFIG_DIR/cromwell.conf
backend.providers.default.config {
project = "$PROJECT"
root = "gs://$BUCKET/cromwell-executions"
genomics.compute-service-account = "$COMPUTE_ACCOUNT"
filesystems.gcs.project = "$PROJECT"
}
EOF
cat <<EOF > $SRC_DIR/workflow_options.json
cat <<EOF > $CONFIG_DIR/workflow_options.json
{
"default_runtime_attributes": {
"preemptible": 1,
Expand All @@ -126,7 +149,7 @@ sh $SRC_DIR/../scripts/enable_api.sh
case $COMMAND in
"init-project")
# Create service accounts
sh $SRC_DIR/../scripts/create_resources.sh $PROJECT $SERVER_NAME $COMPUTE_NAME $BUCKET $CIDR $GC_REGION
sh $SRC_DIR/../scripts/create_resources.sh $PROJECT $SERVER_NAME $COMPUTE_NAME $BUCKET $IP_RANGE $GC_REGION $RETENTION
# Create bucket if not exists
# Generate cromwell.conf
generate_config
Expand Down
49 changes: 39 additions & 10 deletions manual-workflows/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ $0 - Start a new Cromwell VM instance
usage: $0 INSTANCE_NAME [--argument value]*
arguments:
-h, --help print this block and immediately exits
--project GCP project name
--server-account Email identifier of service account used by main Cromwell instance
--cromwell-conf Local path to configuration file for Cromwell server. DEFAULT $SRC_DIR/cromwell.conf
--machine-type GCP machine type for the instance. DEFAULT e2-standard-2
-h, --help prints this block and immediately exits
--project GCP project name
--server-account Email identifier of service account used by main Cromwell instance
--cromwell-conf Local path to configuration file for Cromwell server. DEFAULT \$SRC_DIR/cromwell.conf
--workflow-options Local path to workflow_options.json. DEFAULT \$SRC_DIR/workflow_options.json
--machine-type GCP machine type for the instance. DEFAULT e2-standard-2
--zone DEFAULT us-central1-c. For options, visit: https://cloud.google.com/compute/docs/regions-zones
Additional arguments are passed directly to gsutil compute instances
create command. For more information on those arguments, check that commands
Expand Down Expand Up @@ -47,6 +49,22 @@ while test $# -gt 0; do
show_help
exit 0
;;
--cromwell-conf*)
if [ ! "$2" ]; then
CROMWELL_CONF="$SRC_DIR/cromwell.conf"
else
CROMWELL_CONF=$2
shift
fi
;;
--workflow-dir*)
if [ ! "$2" ]; then
WORKFLOW_OPTIONS="$SRC_DIR/workflow_options.json"
else
WORKFLOW_OPTIONS=$2
shift
fi
;;
--project*)
if [ ! "$2" ]; then
die 'Error: "--project" requires a string argument for the GCP project name used'
Expand All @@ -71,6 +89,14 @@ while test $# -gt 0; do
shift
fi
;;
--zone*)
if [ ! "$2" ]; then
ZONE="us-central1-c"
else
ZONE=$2
shift
fi
;;
*)
break
;;
Expand All @@ -80,10 +106,12 @@ done

MACHINE_TYPE=${MACHINE_TYPE:-"e2-standard-2"}

[ -z $SERVER_ACCOUNT ] && die "Missing argument --server-account"
[ -z $PROJECT ] && die "Missing argument --project"
[ -z $SERVER_ACCOUNT ] && die "Missing argument --server-account"
[ -z $PROJECT ] && die "Missing argument --project"
[ -z $CROMWELL_CONF ] && CROMWELL_CONF="$SRC_DIR/cromwell.conf"
[ -z $WORKFLOW_OPTIONS ] && WORKFLOW_OPTIONS="$SRC_DIR/workflow_options.json"
[ -z $ZONE ] && ZONE="us-central1-c"

CROMWELL_CONF="$SRC_DIR/cromwell.conf"
if [[ ! -f $CROMWELL_CONF ]]; then
cat <<EOF
cromwell.conf does not exist. Check passed value or generate via
Expand All @@ -94,7 +122,6 @@ EOF
exit 1
fi

WORKFLOW_OPTIONS="$SRC_DIR/workflow_options.json"
if [[ ! -f $WORKFLOW_OPTIONS ]]; then
cat <<EOF
workflow_options.json does not exist. Check passed value or generate via
Expand All @@ -105,11 +132,13 @@ EOF
exit 1
fi

# $@ indicates the ability to add any of the other flags that come with gcloud compute instances creat
# for a full account, visit https://cloud.google.com/sdk/gcloud/reference/compute/instances/create
gcloud compute instances create $INSTANCE_NAME \
--project $PROJECT \
--image-family debian-11 \
--image-project debian-cloud \
--zone us-central1-c \
--zone $ZONE \
--machine-type=$MACHINE_TYPE \
--service-account=$SERVER_ACCOUNT --scopes=cloud-platform \
--network=$NETWORK --subnet=$SUBNET \
Expand Down
6 changes: 6 additions & 0 deletions scripts/cloudize-workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,12 @@ def cloudize(bucket, wf_path, inputs_path, output_path, dryrun=False):
and its workflow's CWL definition."""
workflow = make_workflow(wf_path, inputs_path)
file_inputs = workflow.find_file_inputs()

if (not bool(file_inputs)):
logging.error(f"file_inputs is empty {file_inputs}. "
"Check that your input files are properly accessbile.")
exit()

set_cloud_paths(file_inputs)
cloudized_inputs = cloudize_file_paths(workflow.inputs, bucket, file_inputs)
write_new_inputs(cloudized_inputs, output_path)
Expand Down
6 changes: 4 additions & 2 deletions scripts/create_resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ PROJECT=$1
SERVER_NAME=$2
COMPUTE_NAME=$3
BUCKET=$4
CIDR=$5
IP_RANGE=$5
GC_REGION=$6
RETENTION=$7

NETWORK=cloud-workflows
SUBNET=cloud-workflows-default
Expand Down Expand Up @@ -55,11 +56,12 @@ gcloud compute networks subnets create $SUBNET \
# Firewall
gcloud compute firewall-rules create $NETWORK-allow-ssh \
--project=$PROJECT \
--source-ranges $CIDR \
--source-ranges $IP_RANGE \
--network=$NETWORK \
--allow tcp:22

# Bucket
[ ! -z $RETENTION ] && gsutil mb --retention $RETENTION gs://$BUCKET
gsutil mb -p $PROJECT -b on gs://$BUCKET
gsutil iam ch serviceAccount:$COMPUTE_ACCOUNT:objectAdmin gs://$BUCKET
gsutil iam ch serviceAccount:$COMPUTE_ACCOUNT:legacyBucketOwner gs://$BUCKET
Expand Down

0 comments on commit 1b52b6d

Please sign in to comment.