diff --git a/scripts/variantstore/wdl/ImportGenomes.wdl b/scripts/variantstore/wdl/ImportGenomes.wdl index 52c062f94a4..c2ea921f80b 100644 --- a/scripts/variantstore/wdl/ImportGenomes.wdl +++ b/scripts/variantstore/wdl/ImportGenomes.wdl @@ -24,6 +24,13 @@ workflow ImportGenomes { String docker_final = select_first([docker, "us.gcr.io/broad-gatk/gatk:4.1.7.0"]) + call SetLock { + input: + output_directory = output_directory, + service_account_json = service_account_json, + preemptible_tries = preemptible_tries + } + call GetMaxTableId { input: sample_map = sample_map @@ -87,7 +94,8 @@ workflow ImportGenomes { output_directory = output_directory, gatk_override = gatk_override, docker = docker_final, - preemptible_tries = preemptible_tries + preemptible_tries = preemptible_tries, + run_uuid = SetLock.run_uuid } } @@ -104,7 +112,8 @@ workflow ImportGenomes { table_creation_done = CreateMetadataTables.done, tsv_creation_done = CreateImportTsvs.done, service_account_json = service_account_json, - docker = docker_final + docker = docker_final, + run_uuid = SetLock.run_uuid } } @@ -121,7 +130,8 @@ workflow ImportGenomes { table_creation_done = CreatePetTables.done, tsv_creation_done = CreateImportTsvs.done, service_account_json = service_account_json, - docker = docker_final + docker = docker_final, + run_uuid = SetLock.run_uuid } } @@ -138,9 +148,128 @@ workflow ImportGenomes { table_creation_done = CreateVetTables.done, tsv_creation_done = CreateImportTsvs.done, service_account_json = service_account_json, - docker = docker_final + docker = docker_final, + run_uuid = SetLock.run_uuid } } + + call ReleaseLock { + input: + run_uuid = SetLock.run_uuid, + output_directory = output_directory, + load_metadata_done = LoadMetadataTable.done, + load_pet_done = LoadPetTable.done, + load_vet_done = LoadVetTable.done, + service_account_json = service_account_json, + preemptible_tries = preemptible_tries + } +} + +# we create a lock file in the output directory with a uuid for this run of ImportGenomes. +# other tasks (TSV creation, bq load) check that the lock file exists and contains the run_uuid +# specific to this task. +task SetLock { + meta { + volatile: true + } + + input { + String output_directory + File? service_account_json + + # runtime + Int? preemptible_tries + } + + String has_service_account_file = if (defined(service_account_json)) then 'true' else 'false' + + command <<< + set -x + set -e + + if [ ~{has_service_account_file} = 'true' ]; then + gcloud auth activate-service-account --key-file='~{service_account_json}' + fi + + # generate uuid for this run + RUN_UUID=$(dbus-uuidgen) + echo $RUN_UUID | tee RUN_UUID_STRING + + DIR="~{output_directory}/" + + # check for existing lock file + LOCKFILE="LOCKFILE" + HAS_LOCKFILE=$(gsutil ls "${DIR}${LOCKFILE}" | wc -l) + if [ $HAS_LOCKFILE -gt 0 ]; then + echo "ERROR: lock file in place. Check whether another run of ImportGenomes with this output directory is in progress or a previous run had an error. If you would like to proceed, run `gsutil rm ${DIR}${LOCKFILE}` and re-run the workflow." 1>&2 + exit 1 + else # put the lock file in place + echo "Setting lock file with UUID ${RUN_UUID}" + echo $RUN_UUID > $LOCKFILE + gsutil cp $LOCKFILE "${DIR}${LOCKFILE}" || { echo "Error uploading lockfile to ${DIR}${LOCKFILE}" 1>&2 ; exit 1; } + fi + >>> + + runtime { + docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:305.0.0" + memory: "1 GB" + disks: "local-disk 10 HDD" + preemptible: select_first([preemptible_tries, 5]) + cpu: 1 + } + + output { + String run_uuid = read_string("RUN_UUID_STRING") + } +} + +task ReleaseLock { + meta { + volatile: true + } + + input { + String run_uuid + String output_directory + Array[String] load_metadata_done + Array[String] load_pet_done + Array[String] load_vet_done + File? service_account_json + + # runtime + Int? preemptible_tries + } + + String has_service_account_file = if (defined(service_account_json)) then 'true' else 'false' + + command <<< + set -x + set -e + + if [ ~{has_service_account_file} = 'true' ]; then + gcloud auth activate-service-account --key-file='~{service_account_json}' + fi + + + LOCKFILE="~{output_directory}/LOCKFILE" + EXISTING_LOCK_ID=$(gsutil cat ${LOCKFILE}) + CURRENT_RUN_ID="~{run_uuid}" + + if [ ${EXISTING_LOCK_ID} = ${CURRENT_RUN_ID} ]; then + gsutil rm $LOCKFILE + else + echo "ERROR: found mismatched lockfile containing run ${EXISTING_LOCK_ID}, which does not match this run ${CURRENT_RUN_ID}." 1>&2 + exit 1 + fi + >>> + + runtime { + docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:305.0.0" + memory: "1 GB" + disks: "local-disk 10 HDD" + preemptible: select_first([preemptible_tries, 5]) + cpu: 1 + } } task GetMaxTableId { @@ -186,6 +315,7 @@ task CreateImportTsvs { String docker String? for_testing_only + String run_uuid } Int disk_size = if defined(drop_state) then 30 else 75 @@ -220,6 +350,16 @@ task CreateImportTsvs { gsutil cp ~{input_vcf} . gsutil cp ~{input_vcf_index} . fi + + # check for existence of the correct lockfile + LOCKFILE="~{output_directory}/LOCKFILE" + EXISTING_LOCK_ID=$(gsutil cat ${LOCKFILE}) || { echo "Error retrieving lockfile from ${LOCKFILE}" 1>&2 ; exit 1; } + CURRENT_RUN_ID="~{run_uuid}" + + if [ ${EXISTING_LOCK_ID} != ${CURRENT_RUN_ID} ]; then + echo "ERROR: found mismatched lockfile containing run ${EXISTING_LOCK_ID}, which does not match this run ${CURRENT_RUN_ID}." 1>&2 + exit 1 + fi gatk --java-options "-Xmx7000m" CreateVariantIngestFiles \ -V ~{updated_input_vcf} \ @@ -342,6 +482,7 @@ task LoadTable { File? service_account_json String table_creation_done Array[String] tsv_creation_done + String run_uuid String docker } @@ -357,6 +498,17 @@ task LoadTable { gcloud config set project ~{project_id} fi + DIR="~{storage_location}/~{datatype}_tsvs/" + # check for existence of the correct lockfile + LOCKFILE="~{storage_location}/LOCKFILE" + EXISTING_LOCK_ID=$(gsutil cat ${LOCKFILE}) || { echo "Error retrieving lockfile from ${LOCKFILE}" 1>&2 ; exit 1; } + CURRENT_RUN_ID="~{run_uuid}" + + if [ "${EXISTING_LOCK_ID}" != "${CURRENT_RUN_ID}" ]; then + echo "ERROR: found mismatched lockfile containing run ${EXISTING_LOCK_ID}, which does not match this run ${CURRENT_RUN_ID}." 1>&2 + exit 1 + fi + DIR="~{storage_location}/~{datatype}_tsvs/" printf -v PADDED_TABLE_ID "%03d" ~{table_id} @@ -379,7 +531,6 @@ task LoadTable { else echo "no ${FILES} files to process in $DIR" fi - >>> runtime { @@ -389,5 +540,9 @@ task LoadTable { preemptible: 0 cpu: 1 } + + output { + String done = "true" + } }