From 0070d8927290dd01f418b876b60f3405158f1ab9 Mon Sep 17 00:00:00 2001 From: Alexander Thomas <77535027+alethomas@users.noreply.github.com> Date: Tue, 31 Oct 2023 14:33:10 +0100 Subject: [PATCH] fix: update multiqc (#607) * update multiqc * change action with pinned python version * re-implement ont testing * update multiqc * leave old fastqc version * input only one fastq file * update fastqc and add resources * add new input rule for only one fastq file * fmt * fmt * fmt * update multiqc * exclude kraken input * iclude TODO for kraken in multiqc --- .github/workflows/main.yml | 23 +++++++++++++---------- workflow/rules/common.smk | 9 ++++----- workflow/rules/qc.smk | 19 ++++++++++++------- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e7c07708..1dd78e03 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -71,11 +71,14 @@ jobs: matrix: rule: [all, all -npr] # disable ont actions - # technology: [all, illumina, ont, ion] - technology: [all, illumina, ion] + technology: [all, illumina, ont, ion] + # technology: [all, illumina, ion] seq_method: [shotgun, amplicon] steps: - uses: actions/checkout@v2 + - uses: actions/setup-python@v4 + with: + python-version: '3.11' # android - will release about 10 GB if you don't need Android # dotnet - will release about 20 GB if you don't need .NET @@ -103,14 +106,14 @@ jobs: curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.2.fastq.gz echo sample_name,fq1,fq2,date,is_amplicon_data,technology > .tests/config/pep/samples.csv echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,$AMPLICON,illumina >> .tests/config/pep/samples.csv - # - name: Prepare test data for Oxford Nanopore - # if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'ont' || matrix.rule == 'compare_assemblers') - # run: | - # if [[ "${{ matrix.seq_method }}" = "shotgun" ]] ; then export AMPLICON=0; else export AMPLICON=1; fi - # mkdir -p .tests/data - # curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ont_reads.fastq.gz > .tests/data/ont_reads.fastq.gz - # echo sample_name,fq1,date,is_amplicon_data,technology > .tests/config/pep/samples.csv - # echo ont-test,data/ont_reads.fastq.gz,2022-01-01,$AMPLICON,ont >> .tests/config/pep/samples.csv + - name: Prepare test data for Oxford Nanopore + if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'ont' || matrix.rule == 'compare_assemblers') + run: | + if [[ "${{ matrix.seq_method }}" = "shotgun" ]] ; then export AMPLICON=0; else export AMPLICON=1; fi + mkdir -p .tests/data + curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ont_reads.fastq.gz > .tests/data/ont_reads.fastq.gz + echo sample_name,fq1,date,is_amplicon_data,technology > .tests/config/pep/samples.csv + echo ont-test,data/ont_reads.fastq.gz,2022-01-01,$AMPLICON,ont >> .tests/config/pep/samples.csv - name: Prepare test data for Ion Torrent if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'ion' || matrix.rule == 'compare_assemblers') run: | diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index d712652a..a372380e 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -71,7 +71,6 @@ def get_samples_for_date(date, filtered=False): # filter if filtered: with checkpoints.quality_filter.get(date=date).output.passed_filter.open() as f: - passend_samples = [] for line in f: passend_samples.append(line.strip()) @@ -212,6 +211,10 @@ def get_fastqs(wildcards): return pep.sample_table.loc[wildcards.sample][["fq1"]] +def get_fastqc_input(wildcards): + return pep.sample_table.loc[wildcards.sample][["fq1"]] + + def get_resource(name): return workflow.source_path(f"../../resources/{name}") @@ -406,7 +409,6 @@ def get_reads(wildcards): or wildcards.reference.startswith("polished-") or wildcards.reference.startswith("consensus-") ): - illumina_pattern = expand( "results/{date}/trimmed/fastp-pe/{sample}.{read}.fastq.gz", read=[1, 2], @@ -683,7 +685,6 @@ def generate_mixtures(wildcards): mixture_list = [] for mix in range(no_mixtures): - fractions = [random.randint(1, 100) for _ in range(no_strains)] s = sum(fractions) fractions = [round(i / s * 100) for i in fractions] @@ -964,7 +965,6 @@ def get_assemblies_for_submission(wildcards, agg_type): return "results/{date}/contigs/pseudoassembled/{sample}.fasta" if wildcards.date != BENCHMARK_DATE_WILDCARD: - all_samples_for_date = get_samples_for_date(wildcards.date) masked_samples = load_filtered_samples(wildcards, "masked-assembly") @@ -1558,7 +1558,6 @@ def get_aggregated_pangolin_calls(wildcards, return_list="paths"): expanded_patterns = [] for sample in samples: - stage_wildcards = get_pattern_by_technology( wildcards, sample=sample, diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index d45cd0a38b..9d80ed7c 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -6,14 +6,17 @@ rule fastqc: input: - get_fastqs, + get_fastqc_input, output: html="results/{date}/qc/fastqc/{sample}.html", zip="results/{date}/qc/fastqc/{sample}_fastqc.zip", log: "logs/{date}/fastqc/{sample}.log", + threads: 1 + resources: + mem_mb=1024, wrapper: - "v1.12.2/bio/fastqc" + "v2.6.0/bio/fastqc" # TODO Change multiqc rules back to MultiQC wrapper once v1.11 is released @@ -33,8 +36,9 @@ rule multiqc: ), expand_samples_for_date("logs/{{date}}/kallisto_quant/{sample}.log"), get_fastp_results, - get_kraken_output, - get_kraken_output_after_filtering, + # TODO re-implement kraken output + # get_kraken_output, + # get_kraken_output_after_filtering, output: "results/{date}/qc/multiqc.html", params: @@ -44,7 +48,7 @@ rule multiqc: log: "logs/{date}/multiqc.log", wrapper: - "v1.15.1/bio/multiqc" + "v2.8.0/bio/multiqc" rule multiqc_lab: @@ -56,7 +60,8 @@ rule multiqc_lab: ] ), get_fastp_results, - get_kraken_output, + # TODO re-implement kraken output + # get_kraken_output, output: report( "results/{date}/qc/laboratory/multiqc.html", @@ -70,7 +75,7 @@ rule multiqc_lab: log: "logs/{date}/multiqc.log", wrapper: - "v1.15.1/bio/multiqc" + "v2.8.0/bio/multiqc" rule samtools_flagstat: