Skip to content

Commit

Permalink
update SV Spark pipeline example shell scripts saving results to GCS (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
SHuang-Broad authored Aug 28, 2019
1 parent 8f4efec commit 1cd6188
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
6 changes: 3 additions & 3 deletions scripts/sv/copy_sv_results.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ if [[ "$#" -lt 3 ]]; then
[2] GCS cluster name (required)
[3] cluster output directory (required)
[4] GCS user name (defaults to local user name)
[5] GCS save bucket/path (defaults to \$PROJECT_NAME/\$GCS_USER if
[5] GCS save bucket/path (defaults to \$PROJECT_NAME-\$GCS_USER if
omitted or empty)
[6] path to local log file (default to empty, i.e. no log)
[*] additional arguments that were passed to
Expand All @@ -31,7 +31,7 @@ PROJECT_NAME=$1
CLUSTER_NAME=$2
OUTPUT_DIR=$3
GCS_USER=${4:-${USER}}
GCS_SAVE_PATH=${5:-"${PROJECT_NAME}/${GCS_USER}"}
GCS_SAVE_PATH=${5:-"${PROJECT_NAME}-${GCS_USER}"}
LOCAL_LOG_FILE=${6:-"/dev/null"}
COPY_FASTQ=${COPY_FASTQ:-"Y"}

Expand All @@ -44,14 +44,14 @@ GCS_SAVE_PATH=${GCS_SAVE_PATH%/} # remove trailing slash to avoid double slashes
echo "CLUSTER_INFO=\$(gcloud dataproc clusters list --project=${PROJECT_NAME} --filter='clusterName=${CLUSTER_NAME}')"
CLUSTER_INFO=$(gcloud dataproc clusters list --project=${PROJECT_NAME} --filter="clusterName=${CLUSTER_NAME}" --format="csv(NAME, WORKER_COUNT, PREEMPTIBLE_WORKER_COUNT, STATUS, ZONE)")
ZONE=$(echo "${CLUSTER_INFO}" | tail -1 | cut -d"," -f 5)
echo "Zone = $ZONE"
if [ -z "${ZONE}" ]; then
# cluster is down.
echo "Cluster \"${CLUSTER_NAME}\" is down. Only log and command args will be uploaded"
RESULTS_DIR=""
else
# get the latest time-stamped results directory from the cluster
# (may not be current date stamp if multiple jobs run on same cluster)
echo "Zone = $ZONE"
MASTER="${CLUSTER_NAME}-m"
RESULTS_DIR="$(dirname ${OUTPUT_DIR})"
RESULTS_DIR=$(gcloud compute ssh ${MASTER} --project ${PROJECT_NAME} --zone ${ZONE} --command="hadoop fs -ls ${RESULTS_DIR} | tail -n 1")
Expand Down
2 changes: 1 addition & 1 deletion scripts/sv/manage_sv_pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ if [ "$(dirname ${GCS_REFERENCE_2BIT})" != "$(dirname ${GCS_REFERENCE_IMAGE})" ]
exit -1
fi

GCS_SAVE_PATH=${GCS_SAVE_PATH:-"${PROJECT_NAME}/${GCS_USER}"}
GCS_SAVE_PATH=${GCS_SAVE_PATH:-"${PROJECT_NAME}-${GCS_USER}"}

# configure caching .jar files
export GATK_GCS_STAGING=${GATK_GCS_STAGING:-"gs://${PROJECT_NAME}/${GCS_USER}/staging/"}
Expand Down

0 comments on commit 1cd6188

Please sign in to comment.