From 1cd61882e8aeed64fde8e4d8fafc342862f3c67e Mon Sep 17 00:00:00 2001 From: Steve Huang Date: Wed, 28 Aug 2019 14:36:51 -0400 Subject: [PATCH] update SV Spark pipeline example shell scripts saving results to GCS (#6114) --- scripts/sv/copy_sv_results.sh | 6 +++--- scripts/sv/manage_sv_pipeline.sh | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/sv/copy_sv_results.sh b/scripts/sv/copy_sv_results.sh index b1bb02fa084..276585d8c89 100755 --- a/scripts/sv/copy_sv_results.sh +++ b/scripts/sv/copy_sv_results.sh @@ -15,7 +15,7 @@ if [[ "$#" -lt 3 ]]; then [2] GCS cluster name (required) [3] cluster output directory (required) [4] GCS user name (defaults to local user name) - [5] GCS save bucket/path (defaults to \$PROJECT_NAME/\$GCS_USER if + [5] GCS save bucket/path (defaults to \$PROJECT_NAME-\$GCS_USER if omitted or empty) [6] path to local log file (default to empty, i.e. no log) [*] additional arguments that were passed to @@ -31,7 +31,7 @@ PROJECT_NAME=$1 CLUSTER_NAME=$2 OUTPUT_DIR=$3 GCS_USER=${4:-${USER}} -GCS_SAVE_PATH=${5:-"${PROJECT_NAME}/${GCS_USER}"} +GCS_SAVE_PATH=${5:-"${PROJECT_NAME}-${GCS_USER}"} LOCAL_LOG_FILE=${6:-"/dev/null"} COPY_FASTQ=${COPY_FASTQ:-"Y"} @@ -44,7 +44,6 @@ GCS_SAVE_PATH=${GCS_SAVE_PATH%/} # remove trailing slash to avoid double slashes echo "CLUSTER_INFO=\$(gcloud dataproc clusters list --project=${PROJECT_NAME} --filter='clusterName=${CLUSTER_NAME}')" CLUSTER_INFO=$(gcloud dataproc clusters list --project=${PROJECT_NAME} --filter="clusterName=${CLUSTER_NAME}" --format="csv(NAME, WORKER_COUNT, PREEMPTIBLE_WORKER_COUNT, STATUS, ZONE)") ZONE=$(echo "${CLUSTER_INFO}" | tail -1 | cut -d"," -f 5) -echo "Zone = $ZONE" if [ -z "${ZONE}" ]; then # cluster is down. echo "Cluster \"${CLUSTER_NAME}\" is down. Only log and command args will be uploaded" @@ -52,6 +51,7 @@ if [ -z "${ZONE}" ]; then else # get the latest time-stamped results directory from the cluster # (may not be current date stamp if multiple jobs run on same cluster) + echo "Zone = $ZONE" MASTER="${CLUSTER_NAME}-m" RESULTS_DIR="$(dirname ${OUTPUT_DIR})" RESULTS_DIR=$(gcloud compute ssh ${MASTER} --project ${PROJECT_NAME} --zone ${ZONE} --command="hadoop fs -ls ${RESULTS_DIR} | tail -n 1") diff --git a/scripts/sv/manage_sv_pipeline.sh b/scripts/sv/manage_sv_pipeline.sh index 11ed3a0291c..5e9ba3dcf1e 100755 --- a/scripts/sv/manage_sv_pipeline.sh +++ b/scripts/sv/manage_sv_pipeline.sh @@ -230,7 +230,7 @@ if [ "$(dirname ${GCS_REFERENCE_2BIT})" != "$(dirname ${GCS_REFERENCE_IMAGE})" ] exit -1 fi -GCS_SAVE_PATH=${GCS_SAVE_PATH:-"${PROJECT_NAME}/${GCS_USER}"} +GCS_SAVE_PATH=${GCS_SAVE_PATH:-"${PROJECT_NAME}-${GCS_USER}"} # configure caching .jar files export GATK_GCS_STAGING=${GATK_GCS_STAGING:-"gs://${PROJECT_NAME}/${GCS_USER}/staging/"}