Skip to content

Commit

Permalink
fix: order of include flag sample (#462)
Browse files Browse the repository at this point in the history
* Fixed include flag sample order

* Added include-flag to main.yaml

* Added includeflag to further functions in main.yml

* KeyError for missing flags

* Correction main.yml

* Code style changes
  • Loading branch information
AKBrueggemann authored Feb 9, 2022
1 parent 78cb7f4 commit 429e5d9
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 21 deletions.
16 changes: 0 additions & 16 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ jobs:
run: |
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
- name: Prepare test data for all technologies
if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'all' || matrix.rule == 'compare_assemblers')
run: |
Expand All @@ -92,7 +91,6 @@ jobs:
echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,$AMPLICON,illumina >> .tests/config/pep/samples.csv
echo ont-test,data/ont_reads.fastq.gz,,2022-01-01,$AMPLICON,ont >> .tests/config/pep/samples.csv
echo ion-test,data/ion_reads.fastq.gz,,2022-01-01,$AMPLICON,ion >> .tests/config/pep/samples.csv
- name: Prepare test data for Illumina
if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'illumina' || matrix.rule == 'compare_assemblers')
run: |
Expand All @@ -102,7 +100,6 @@ jobs:
curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.2.fastq.gz
echo sample_name,fq1,fq2,date,is_amplicon_data,technology > .tests/config/pep/samples.csv
echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,$AMPLICON,illumina >> .tests/config/pep/samples.csv
- name: Prepare test data for Oxford Nanopore
if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'ont' || matrix.rule == 'compare_assemblers')
run: |
Expand All @@ -111,7 +108,6 @@ jobs:
curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ont_reads.fastq.gz > .tests/data/ont_reads.fastq.gz
echo sample_name,fq1,date,is_amplicon_data,technology > .tests/config/pep/samples.csv
echo ont-test,data/ont_reads.fastq.gz,2022-01-01,$AMPLICON,ont >> .tests/config/pep/samples.csv
- name: Prepare test data for Ion Torrent
if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'ion' || matrix.rule == 'compare_assemblers')
run: |
Expand All @@ -120,23 +116,20 @@ jobs:
curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR574/003/ERR5745913/ERR5745913.fastq.gz > .tests/data/ion_reads.fastq.gz
echo sample_name,fq1,date,is_amplicon_data,technology > .tests/config/pep/samples.csv
echo ion-test,data/ion_reads.fastq.gz,2022-01-01,$AMPLICON,ion >> .tests/config/pep/samples.csv
- name: Use smaller reference files for testing
if: steps.test-resources.outputs.cache-hit != true
run: |
# mkdir -p .tests/resources/minikraken-8GB
# curl -SL https://github.com/thomasbtf/small-kraken-db/raw/master/human_k2db.tar.gz | tar zxvf - -C .tests/resources/minikraken-8GB --strip 1
mkdir -p .tests/resources/genomes
curl -SL "https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?id=NC_000021.9&db=nuccore&report=fasta" | gzip -c > .tests/resources/genomes/human-genome.fna.gz
- name: Simulate GISAID download
run: |
mkdir -p .tests/results/benchmarking/tables
echo -e "resources/genomes/B.1.1.7.fasta\nresources/genomes/B.1.351.fasta" > .tests/results/benchmarking/tables/strain-genomes.txt
mkdir -p .tests/resources/genomes
curl "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=MZ314997.1&rettype=fasta" | sed '$ d' > .tests/resources/genomes/B.1.1.7.fasta
curl "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=MZ314998.1&rettype=fasta" | sed '$ d' > .tests/resources/genomes/B.1.351.fasta
- name: Test rule ${{ matrix.rule }} on ${{ matrix.technology }} ${{ matrix.seq_method }} data
uses: snakemake/[email protected]
with:
Expand Down Expand Up @@ -293,7 +286,6 @@ jobs:
echo sample_name,fq1,fq2,date,is_amplicon_data,technology,test_case > .tests/config/pep/samples.csv
echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,1,illumina,case >> .tests/config/pep/samples.csv
echo ont-test,data/ont_reads.fastq.gz,,2022-01-01,1,ont,case >> .tests/config/pep/samples.csv
- name: Prepare test data
if: matrix.rule != 'generate_test_cases'
run: |
Expand All @@ -302,23 +294,20 @@ jobs:
curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.2.fastq.gz
echo sample_name,fq1,fq2,date,is_amplicon_data,technology > .tests/config/pep/samples.csv
echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,0,illumina >> .tests/config/pep/samples.csv
- name: Use smaller reference files for testing
if: steps.test-resources.outputs.cache-hit != true
run: |
# mkdir -p .tests/resources/minikraken-8GB
# curl -SL https://github.com/thomasbtf/small-kraken-db/raw/master/human_k2db.tar.gz | tar zxvf - -C .tests/resources/minikraken-8GB --strip 1
mkdir -p .tests/resources/genomes
curl -SL "https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?id=NC_000021.9&db=nuccore&report=fasta" | gzip -c > .tests/resources/genomes/human-genome.fna.gz
- name: Simulate GISAID download
run: |
mkdir -p .tests/results/benchmarking/tables
echo -e "resources/genomes/B.1.1.7.fasta\nresources/genomes/B.1.351.fasta" > .tests/results/benchmarking/tables/strain-genomes.txt
mkdir -p .tests/resources/genomes
curl "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=MZ314997.1&rettype=fasta" | sed '$ d' > .tests/resources/genomes/B.1.1.7.fasta
curl "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=MZ314998.1&rettype=fasta" | sed '$ d' > .tests/resources/genomes/B.1.351.fasta
- name: Test rule ${{ matrix.rule }}
uses: snakemake/[email protected]
with:
Expand Down Expand Up @@ -368,7 +357,6 @@ jobs:
else
echo "Strain calling was successful in all cases."
fi
- name: Check pseudoassembly benchmark
if: matrix.rule == 'benchmark_assembly'
run: |
Expand All @@ -380,7 +368,6 @@ jobs:
else
echo "Pseudoassembly was successful."
fi
- name: Check assembly benchmark
if: matrix.rule == 'benchmark_assembly'
run: |
Expand All @@ -392,12 +379,10 @@ jobs:
else
echo "Assembly was successful."
fi
- name: Print non-sars-cov-2 kallisto calls
if: matrix.rule == 'benchmark_non_sars_cov_2'
run: |
cat .tests/results/benchmarking/tables/strain-calls/non-cov2-*.strains.kallisto.tsv
- name: Test non-sars-cov-2 coronaviruses
if: matrix.rule == 'benchmark_non_sars_cov_2'
run: |
Expand All @@ -409,7 +394,6 @@ jobs:
else
echo "Workflow sucessfully identified samples as non-sars-cov-2 in all cases."
fi
- name: Change permissions for caching
run: sudo chmod -R 755 .tests/.snakemake/conda

Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -1272,11 +1272,11 @@ def get_include_flag(sample):
try:
samples = pep.sample_table
samples.dropna(subset=["include_in_high_genome_summary"], inplace=True)
return samples.loc[sample]["include_in_high_genome_summary"]
return {sample: samples.loc[sample]["include_in_high_genome_summary"]}
# if there is no include_in_high_genome_summary in the
# samples.csvdefined, always include the sample
except KeyError:
return 1
return {sample: "1"}


def get_include_flag_for_date(wildcards):
Expand Down
12 changes: 9 additions & 3 deletions workflow/scripts/generate-high-quality-report.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,19 @@
else:
# Aggregating fasta files
sequence_names = []
include_flag = []
sample_dict = {}
for sample in snakemake.params.includeflag:
sample_dict.update(sample)

with open(snakemake.output.fasta, "w") as outfile:
for file, include in zip(snakemake.input.contigs, snakemake.params.includeflag):
for file in snakemake.input.contigs:
with pysam.FastxFile(file) as infile:
for entry in infile:
sequence_names.append(entry.name)
if bool(int(include)):
to_include = int(sample_dict.get(entry.name))
include_flag.append(to_include)
if to_include:
print(f">{entry.name}", file=outfile)
print(entry.sequence, file=outfile)

Expand All @@ -52,7 +58,7 @@
"SAMPLE_TYPE": "s001",
"PUBLICATION_STATUS": "N",
"OWN_FASTA_ID": sequence_names,
"include": snakemake.params.includeflag,
"include": include_flag,
}
)

Expand Down

0 comments on commit 429e5d9

Please sign in to comment.