From 1cd61882e8aeed64fde8e4d8fafc342862f3c67e Mon Sep 17 00:00:00 2001
From: Steve Huang <shuang@broadinstitute.org>
Date: Wed, 28 Aug 2019 14:36:51 -0400
Subject: [PATCH] update SV Spark pipeline example shell scripts saving results
 to GCS (#6114)

---
 scripts/sv/copy_sv_results.sh    | 6 +++---
 scripts/sv/manage_sv_pipeline.sh | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/sv/copy_sv_results.sh b/scripts/sv/copy_sv_results.sh
index b1bb02fa084..276585d8c89 100755
--- a/scripts/sv/copy_sv_results.sh
+++ b/scripts/sv/copy_sv_results.sh
@@ -15,7 +15,7 @@ if [[ "$#" -lt 3 ]]; then
   [2] GCS cluster name (required)
   [3] cluster output directory (required)
   [4] GCS user name (defaults to local user name)
-  [5] GCS save bucket/path (defaults to \$PROJECT_NAME/\$GCS_USER if
+  [5] GCS save bucket/path (defaults to \$PROJECT_NAME-\$GCS_USER if
       omitted or empty)
   [6] path to local log file (default to empty, i.e. no log)
   [*] additional arguments that were passed to
@@ -31,7 +31,7 @@ PROJECT_NAME=$1
 CLUSTER_NAME=$2
 OUTPUT_DIR=$3
 GCS_USER=${4:-${USER}}
-GCS_SAVE_PATH=${5:-"${PROJECT_NAME}/${GCS_USER}"}
+GCS_SAVE_PATH=${5:-"${PROJECT_NAME}-${GCS_USER}"}
 LOCAL_LOG_FILE=${6:-"/dev/null"}
 COPY_FASTQ=${COPY_FASTQ:-"Y"}
 
@@ -44,7 +44,6 @@ GCS_SAVE_PATH=${GCS_SAVE_PATH%/} # remove trailing slash to avoid double slashes
 echo "CLUSTER_INFO=\$(gcloud dataproc clusters list --project=${PROJECT_NAME} --filter='clusterName=${CLUSTER_NAME}')"
 CLUSTER_INFO=$(gcloud dataproc clusters list --project=${PROJECT_NAME} --filter="clusterName=${CLUSTER_NAME}" --format="csv(NAME, WORKER_COUNT, PREEMPTIBLE_WORKER_COUNT, STATUS, ZONE)")
 ZONE=$(echo "${CLUSTER_INFO}" | tail -1 | cut -d"," -f 5)
-echo "Zone = $ZONE"
 if [ -z "${ZONE}" ]; then
     # cluster is down.
     echo "Cluster \"${CLUSTER_NAME}\" is down. Only log and command args will be uploaded"
@@ -52,6 +51,7 @@ if [ -z "${ZONE}" ]; then
 else
     # get the latest time-stamped results directory from the cluster
     # (may not be current date stamp if multiple jobs run on same cluster)
+    echo "Zone = $ZONE"
     MASTER="${CLUSTER_NAME}-m"
     RESULTS_DIR="$(dirname ${OUTPUT_DIR})"
     RESULTS_DIR=$(gcloud compute ssh ${MASTER} --project ${PROJECT_NAME} --zone ${ZONE} --command="hadoop fs -ls ${RESULTS_DIR} | tail -n 1")
diff --git a/scripts/sv/manage_sv_pipeline.sh b/scripts/sv/manage_sv_pipeline.sh
index 11ed3a0291c..5e9ba3dcf1e 100755
--- a/scripts/sv/manage_sv_pipeline.sh
+++ b/scripts/sv/manage_sv_pipeline.sh
@@ -230,7 +230,7 @@ if [ "$(dirname ${GCS_REFERENCE_2BIT})" != "$(dirname ${GCS_REFERENCE_IMAGE})" ]
     exit -1
 fi
 
-GCS_SAVE_PATH=${GCS_SAVE_PATH:-"${PROJECT_NAME}/${GCS_USER}"}
+GCS_SAVE_PATH=${GCS_SAVE_PATH:-"${PROJECT_NAME}-${GCS_USER}"}
 
 # configure caching .jar files
 export GATK_GCS_STAGING=${GATK_GCS_STAGING:-"gs://${PROJECT_NAME}/${GCS_USER}/staging/"}