Skip to content

Commit

Permalink
fix the broken --keep function
Browse files Browse the repository at this point in the history
  • Loading branch information
dpark01 committed Sep 22, 2023
1 parent ee01ab2 commit ef36163
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion pipes/WDL/tasks/tasks_interhost.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ task subsample_by_cases {
~{"--start_date " + start_date} \
~{"--end_date " + end_date} \
--output outputs/genome_matrix_days.tsv
date;uptime;free
# rule unit_conversion
# Generate matrix of genome and case counts per epiweek
Expand All @@ -72,6 +73,7 @@ task subsample_by_cases {
~{"--start_date " + start_date} \
~{"--end_date " + end_date} \
--output outputs/matrix_cases_unit.tsv
date;uptime;free
# rule correct_bias
# Correct under- and oversampling genome counts based on epidemiological data
Expand All @@ -84,18 +86,22 @@ task subsample_by_cases {
--output1 outputs/weekly_sampling_proportions.tsv \
--output2 outputs/weekly_sampling_bias.tsv \
--output3 outputs/matrix_genomes_unit_corrected.tsv
date;uptime;free
# rule subsample
# Sample genomes and metadata according to the corrected genome matrix
echo "subsample data according to bias-correction"
# subsampler_timeseries says --keep is optional but actually fails if you don't specify one
cp /dev/null data/keep.txt
~{"cp " + keep_file + " data/keep.txt"}
python3 /opt/subsampler/scripts/subsampler_timeseries.py \
--metadata data/metadata.tsv \
--genome-matrix outputs/matrix_genomes_unit_corrected.tsv \
--index-column ~{id_column} \
--geo-column ~{geo_column} \
--date-column ~{date_column} \
--time-unit ~{unit} \
~{"--keep " + keep_file} \
--keep data/keep.txt \
~{"--remove " + remove_file} \
~{"--filter-file " + filter_file} \
~{"--seed " + seed_num} \
Expand All @@ -106,13 +112,15 @@ task subsample_by_cases {
--sampled-metadata outputs/selected_metadata.tsv \
--report outputs/sampling_stats.txt
echo '# Sampling proportion: ~{baseline}' | cat - outputs/sampling_stats.txt > temp && mv temp outputs/sampling_stats.txt
date;uptime;free
# copy outputs from container's temp dir to host-accessible working dir for delocalization
echo "wrap up"
mv outputs/* .
# get counts
cat selected_sequences.txt | wc -l | tee NUM_OUT
# get hardware utilization
set +o pipefail
cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC
cat /proc/loadavg > CPU_LOAD
{ cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES
Expand Down

0 comments on commit ef36163

Please sign in to comment.