diff --git a/.tests/config/config.yaml b/.tests/config/config.yaml index 823df7959..25a8ea6f4 100644 --- a/.tests/config/config.yaml +++ b/.tests/config/config.yaml @@ -82,6 +82,12 @@ preprocessing: amplicon-reference: "MN908947" variant-calling: + # genome annotation to use. Can be + # 'orf' (e.g. ORF1ab, S, etc..) and/or + # 'protein' (e.g. nsp1, Hel, Spike Protein S1, etc..) + annotations: + - "orf" + - "protein" # false discovery rate to control for fdr: 0.05 # downsample loci to this read depth diff --git a/config/config.yaml b/config/config.yaml index 9d9ba78a2..afea4a946 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -91,6 +91,12 @@ assembly: min-variant-prob: 0.95 variant-calling: + # genome annotation to use. Can be + # 'orf' (e.g. ORF1ab, S, etc..) and/or + # 'protein' (e.g. nsp1, Hel, Spike Protein S1, etc..) + annotations: + - "orf" + - "protein" # false discovery rate to control for fdr: 0.05 # downsample loci to this read depth diff --git a/workflow/envs/genometools.yaml b/workflow/envs/genometools.yaml new file mode 100644 index 000000000..8b295f90f --- /dev/null +++ b/workflow/envs/genometools.yaml @@ -0,0 +1,5 @@ +channels: + - bioconda + - conda-forge +dependencies: + - genometools-genometools =1.6.2 diff --git a/workflow/envs/gff3sort.yaml b/workflow/envs/gff3sort.yaml new file mode 100644 index 000000000..c3a285ae4 --- /dev/null +++ b/workflow/envs/gff3sort.yaml @@ -0,0 +1,5 @@ +channels: + - bioconda + - conda-forge +dependencies: + - gff3sort = 0.1 diff --git a/workflow/envs/ucsc.yaml b/workflow/envs/ucsc.yaml new file mode 100644 index 000000000..46f35cd28 --- /dev/null +++ b/workflow/envs/ucsc.yaml @@ -0,0 +1,5 @@ +channels: + - bioconda + - conda-forge +dependencies: + - ucsc-bigbedtobed =377 diff --git a/workflow/rules/assembly.smk b/workflow/rules/assembly.smk index aa10715dc..0696ff023 100644 --- a/workflow/rules/assembly.smk +++ b/workflow/rules/assembly.smk @@ -159,8 +159,8 @@ rule filter_chr0: rule assembly_polishing_illumina: input: fasta="results/{date}/contigs/ordered/{sample}.fasta", - bcf="results/{date}/filtered-calls/ref~{sample}/{sample}.clonal.nofilter.bcf", - bcfidx="results/{date}/filtered-calls/ref~{sample}/{sample}.clonal.nofilter.bcf.csi", + bcf="results/{date}/filtered-calls/ref~{sample}/{sample}.clonal.nofilter.orf.bcf", + bcfidx="results/{date}/filtered-calls/ref~{sample}/{sample}.clonal.nofilter.orf.bcf.csi", output: report( "results/{date}/polishing/bcftools-illumina/{sample}.fasta", diff --git a/workflow/rules/benchmarking_common.smk b/workflow/rules/benchmarking_common.smk index 768dac228..5e876d900 100644 --- a/workflow/rules/benchmarking_common.smk +++ b/workflow/rules/benchmarking_common.smk @@ -21,8 +21,8 @@ def get_test_cases_variant_calls(technology, suffix="", get="path"): len(sample_table) == 1 ), f'Too many sampels are defined with technology "{technology}" for test case {wildcards.test_case}.' - bcf_path_high = "results/{date}/filtered-calls/ref~main/{sample}.subclonal.high+moderate-impact.bcf{suffix}" - bcf_path_low = "results/{date}/filtered-calls/ref~main/{sample}.subclonal.low-impact.bcf{suffix}" + bcf_path_high = "results/{date}/filtered-calls/ref~main/{sample}.subclonal.high+moderate-impact.orf.bcf{suffix}" + bcf_path_low = "results/{date}/filtered-calls/ref~main/{sample}.subclonal.low-impact.orf.bcf{suffix}" high_impact = expand( bcf_path_high, diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index d459a0d8d..d096a0904 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -239,7 +239,7 @@ def get_report_samples(wildcards): def get_merge_calls_input(suffix): def inner(wildcards): return expand( - "results/{{date}}/filtered-calls/ref~{{reference}}/{{sample}}.{{clonality}}.{{filter}}.{vartype}.fdr-controlled{suffix}", + "results/{{date}}/filtered-calls/ref~{{reference}}/{{sample}}.{{clonality}}.{{filter}}.{vartype}.{{annotation}}.fdr-controlled{suffix}", suffix=suffix, vartype=VARTYPES, ) @@ -602,7 +602,7 @@ def get_target_events(wildcards): def get_control_fdr_input(wildcards): if wildcards.reference == "main": - return "results/{date}/filtered-calls/ref~{reference}/{sample}.{filter}.bcf" + return "results/{date}/filtered-calls/ref~{reference}/annot~{annotation}/{sample}.{filter}.bcf" else: # If reference is not main, we are polishing an assembly. # Here, there is no need to structural variants or annotation based filtering. @@ -1590,7 +1590,7 @@ def get_input_by_mode(wildcard): mode=["major", "any"], ), zip_expand( - "results/{zip1}/filtered-calls/ref~main/{zip2}.subclonal.{{exp}}.bcf", + "results/{zip1}/filtered-calls/ref~main/{zip2}.subclonal.{{exp}}.orf.bcf", zip_wildcard_1=get_dates(), zip_wildcard_2=get_samples(), expand_wildcard=config["variant-calling"]["filters"], @@ -1642,10 +1642,25 @@ def get_pangolin_for_report(wildcards): return paths +def get_genome_annotation(suffix=""): + def inner(wildcards): + if wildcards.annotation == "orf": + return f"resources/annotation.gff.gz{suffix}" + elif wildcards.annotation == "protein": + return f"resources/protein_products.gff.gz{suffix}" + + raise AttributeError( + f"Config for annotation '{wildcards.annotation}' not recognizied. Can be either 'orf' or 'protein'." + ) + + return inner + + wildcard_constraints: sample="[^/.]+", vartype="|".join(VARTYPES), clonality="subclonal|clonal", + annotation="orf|protein", filter="|".join( list(map(re.escape, config["variant-calling"]["filters"])) + ["nofilter"] ), diff --git a/workflow/rules/generate_output.smk b/workflow/rules/generate_output.smk index ffb6573ea..5db0fe226 100644 --- a/workflow/rules/generate_output.smk +++ b/workflow/rules/generate_output.smk @@ -141,7 +141,7 @@ rule overview_table_patient_csv: kraken=get_kraken_output, pangolin=get_pangolin_for_report, bcf=expand_samples_for_date( - "results/{{date}}/filtered-calls/ref~main/{sample}.subclonal.high+moderate-impact.bcf", + "results/{{date}}/filtered-calls/ref~main/{sample}.subclonal.high+moderate-impact.orf.bcf", ), # Added because WorkflowError: Rule parameter depends on checkpoint but checkpoint output is not defined # as input file for the rule. Please add the output of the respective checkpoint to the rule inputs. @@ -172,7 +172,7 @@ use rule overview_table_patient_csv as overview_table_environment_csv with: "results/{{date}}/tables/read_pair_counts/{sample}.txt", ), bcf=expand_samples_for_date( - "results/{{date}}/filtered-calls/ref~main/{sample}.subclonal.high+moderate-impact.bcf", + "results/{{date}}/filtered-calls/ref~main/{sample}.subclonal.high+moderate-impact.orf.bcf", ), output: qc_data="results/{date}/tables/environment-overview.csv", @@ -238,7 +238,7 @@ rule filter_overview_html: htmlindex="index.html", caption="../report/filter-overview.rst", category="4. Sequences", - subcategory="0. Quality Overview", + subcategory=" Quality Overview", ), params: pin_until="Sample", @@ -279,13 +279,13 @@ rule plot_lineages_over_time: rule plot_variants_over_time: input: bcf=lambda wildcards: expand( - "results/{date}/filtered-calls/ref~main/{sample}.subclonal.high+moderate-impact.bcf", + "results/{date}/filtered-calls/ref~main/{sample}.subclonal.high+moderate-impact.orf.bcf", zip, date=get_dates_before_date(wildcards), sample=get_samples_before_date(wildcards), ), csi=lambda wildcards: expand( - "results/{date}/filtered-calls/ref~main/{sample}.subclonal.high+moderate-impact.bcf.csi", + "results/{date}/filtered-calls/ref~main/{sample}.subclonal.high+moderate-impact.orf.bcf.csi", zip, date=get_dates_before_date(wildcards), sample=get_samples_before_date(wildcards), @@ -338,7 +338,7 @@ rule pangolin_call_overview_html: htmlindex="index.html", caption="../report/pangolin-call-overview.rst", category="4. Sequences", - subcategory="0. Quality Overview", + subcategory=" Quality Overview", ), params: pin_until="Sample", @@ -372,9 +372,10 @@ rule snakemake_reports_patient: ["results/{{date}}/lineage-variant-report/{sample}.lineage-variants"] ), lambda wildcards: expand( - "results/{{date}}/vcf-report/{target}.{filter}", + "results/{{date}}/vcf-report/{target}.{filter}.{annotation}", target=get_samples_for_date(wildcards.date) + ["all"], filter=config["variant-calling"]["filters"], + annotation=config["variant-calling"]["annotations"], ), # 3. Sequencing Details "results/{date}/qc/laboratory/multiqc.html", @@ -394,8 +395,9 @@ rule snakemake_reports_patient: ), # 5. Variant Call Files expand( - ["results/{{date}}/ucsc-vcfs/all.{{date}}.{filter}.vcf"], + ["results/{{date}}/ucsc-vcfs/all.{{date}}.{filter}.{annotation}.vcf"], filter=config["variant-calling"]["filters"], + annotation=config["variant-calling"]["annotations"], ), # 6. High Quality Genomes "results/high-quality-genomes/{date}.fasta", @@ -430,9 +432,10 @@ use rule snakemake_reports_patient as snakemake_reports_environment with: ["results/{{date}}/lineage-variant-report/{sample}.lineage-variants"] ), lambda wildcards: expand( - "results/{{date}}/vcf-report/{target}.{filter}", + "results/{{date}}/vcf-report/{target}.{filter}.{annotation}", target=get_samples_for_date(wildcards.date) + ["all"], filter=config["variant-calling"]["filters"], + annotation=config["variant-calling"]["annotations"], ), # 3. Sequencing Details "results/{date}/qc/laboratory/multiqc.html", diff --git a/workflow/rules/lineage_variant_calling.smk b/workflow/rules/lineage_variant_calling.smk index 7eb4a1815..3f6bbeed1 100644 --- a/workflow/rules/lineage_variant_calling.smk +++ b/workflow/rules/lineage_variant_calling.smk @@ -59,7 +59,7 @@ use rule overview_table_html as generate_lineage_variant_report with: htmlindex="index.html", caption="../report/lineage-variant-report.rst", category="2. Variant Call Details", - subcategory="1. VOC Similarity", + subcategory=" VOC Similarity", ), log: "logs/{date}/lineage-variant-report/{sample}.log", diff --git a/workflow/rules/long_read.smk b/workflow/rules/long_read.smk index 3a7321ca4..a11215a10 100644 --- a/workflow/rules/long_read.smk +++ b/workflow/rules/long_read.smk @@ -149,8 +149,8 @@ use rule assembly_polishing_ont as medaka_consensus_reference with: rule bcftools_consensus_ont: input: fasta="results/{date}/consensus/medaka/{sample}/{sample}.fasta", - bcf="results/{date}/filtered-calls/ref~{sample}/{sample}.subclonal.high+moderate-impact.bcf", # clonal vs. subclonal? - bcfidx="results/{date}/filtered-calls/ref~{sample}/{sample}.subclonal.high+moderate-impact.bcf.csi", + bcf="results/{date}/filtered-calls/ref~{sample}/{sample}.subclonal.high+moderate-impact.orf.bcf", # clonal vs. subclonal? + bcfidx="results/{date}/filtered-calls/ref~{sample}/{sample}.subclonal.high+moderate-impact.orf.bcf.csi", output: temp("results/{date}/consensus/bcftools/{sample}.fasta"), log: diff --git a/workflow/rules/ref.smk b/workflow/rules/ref.smk index 996ba3c1e..3f469ea3b 100644 --- a/workflow/rules/ref.smk +++ b/workflow/rules/ref.smk @@ -48,6 +48,88 @@ rule get_genome_annotation: "zcat | grep -v '#' | sort -k1,1 -k4,4n -k5,5n -t$'\t' | bgzip -c > {output}) 2> {log}" +rule download_protein_products: + output: + temp("resources/protein_products.bed"), + log: + "logs/download_protein_products.log", + conda: + "../envs/ucsc.yaml" + shell: + "(bigBedToBed http://hgdownload.soe.ucsc.edu/gbdb/wuhCor1/uniprot/unipChainCov2.bb" + " -chrom=NC_045512v2 -start=0 -end=29903 {output})" + "2>{log}" + + +rule bed2gff: + input: + "resources/protein_products.bed", + output: + temp("resources/protein_products.gff"), + log: + "logs/bed2gff3.log", + conda: + "../envs/genometools.yaml" + shell: + "(cut -f1-12 {input} | sed -e 's/ /-/g' | sed -e 's/NC_045512v2/NC_045512.2/g'" + " | gt bed_to_gff3 -featuretype gene -thicktype transcript -blocktype CDS -o {output} -force /dev/stdin )" + "2> {log}" + + +rule filter_gff: + input: + "resources/protein_products.gff", + output: + temp("resources/protein_products.formatted.gff"), + log: + "logs/format_gff.log", + conda: + "../envs/tabix.yaml" + shell: + # download, sort and bgzip gff (see https://www.ensembl.org/info/docs/tools/vep/script/vep_custom.html) + "cat {input} | grep -v '#' > {output} 2> {log}" + + +rule fix_gff: + input: + "resources/protein_products.formatted.gff", + output: + temp("resources/protein_products.fixed.gff"), + log: + "logs/fix_gff.log", + conda: + "../envs/python.yaml" + script: + "../scripts/fix-protein-gff.py" + + +rule format_fixed_gff: + input: + "resources/protein_products.fixed.gff", + output: + temp("resources/protein_products.fixed.formatted.gff"), + log: + "logs/format_gff.log", + conda: + "../envs/tabix.yaml" + shell: + # download, sort and bgzip gff (see https://www.ensembl.org/info/docs/tools/vep/script/vep_custom.html) + "cat {input} | grep -v '#' | sort -k1,1 -k4,4n -k5,5n -k3,3n -t$'\t' > {output} 2> {log}" + + +rule compress_gff: + input: + "resources/protein_products.fixed.formatted.gff", + output: + "resources/protein_products.gff.gz", + log: + "logs/compress_gff.log", + conda: + "../envs/tabix.yaml" + shell: + "bgzip -c {input} > {output} 2> {log}" + + rule get_genome_annotation_for_known_variants: output: "resources/annotation_known_variants.gff.gz", diff --git a/workflow/rules/variant_annotation.smk b/workflow/rules/variant_annotation.smk index 129945e0a..687eb73ad 100644 --- a/workflow/rules/variant_annotation.smk +++ b/workflow/rules/variant_annotation.smk @@ -21,19 +21,19 @@ rule annotate_variants: plugins="resources/vep/plugins", fasta="resources/genomes/main.fasta", fai="resources/genomes/main.fasta.fai", - gff="resources/annotation.gff.gz", - csi="resources/annotation.gff.gz.tbi", + gff=get_genome_annotation(), + csi=get_genome_annotation(suffix=".tbi"), problematic="resources/problematic-sites.vcf.gz", problematic_tbi="resources/problematic-sites.vcf.gz.tbi", output: - calls="results/{date}/annotated-calls/ref~main/{sample}.bcf", - stats="results/{date}/annotated-calls/ref~main/{sample}.html", + calls="results/{date}/annotated-calls/ref~main/annot~{annotation}/{sample}.bcf", + stats="results/{date}/annotated-calls/ref~main/annot~{annotation}/{sample}.html", params: # Pass a list of plugins to use, see https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html # Plugin args can be added as well, e.g. via an entry "MyPlugin,1,FOO", see docs. plugins=["LoFtool"], extra=get_vep_args, log: - "logs/{date}/vep/{sample}.log", + "logs/{date}/vep/{annotation}/{sample}.log", wrapper: "0.72.0/bio/vep/annotate" diff --git a/workflow/rules/variant_filtration.smk b/workflow/rules/variant_filtration.smk index b8c125728..0c81778c3 100644 --- a/workflow/rules/variant_filtration.smk +++ b/workflow/rules/variant_filtration.smk @@ -6,14 +6,16 @@ rule vembrane_filter: input: - "results/{date}/annotated-calls/ref~main/{sample}.bcf", + "results/{date}/annotated-calls/ref~main/annot~{annotation}/{sample}.bcf", output: - temp("results/{date}/filtered-calls/ref~main/{sample}.{filter}.bcf"), + temp( + "results/{date}/filtered-calls/ref~main/annot~{annotation}/{sample}.{filter}.bcf" + ), params: expression=get_vembrane_expression, extra="", log: - "logs/{date}/vembrane/{sample}.{filter}.log", + "logs/{date}/vembrane/{sample}.{filter}.{annotation}.log", wrapper: "0.71.1/bio/vembrane/filter" @@ -23,13 +25,13 @@ rule control_fdr: get_control_fdr_input, output: temp( - "results/{date}/filtered-calls/ref~{reference}/{sample}.{clonality}.{filter}.{vartype}.fdr-controlled.bcf" + "results/{date}/filtered-calls/ref~{reference}/{sample}.{clonality}.{filter}.{vartype}.{annotation}.fdr-controlled.bcf" ), params: fdr=config["variant-calling"]["fdr"], events=get_target_events, log: - "logs/{date}/control-fdr/ref~{reference}/{sample}.{clonality}.{filter}.{vartype}.log", + "logs/{date}/control-fdr/ref~{reference}/{sample}.{clonality}.{filter}.{vartype}.{annotation}.log", conda: "../envs/varlociraptor.yaml" shell: @@ -42,9 +44,9 @@ rule merge_calls: calls=get_merge_calls_input(".bcf"), idx=get_merge_calls_input(".bcf.csi"), output: - "results/{date}/filtered-calls/ref~{reference}/{sample}.{clonality}.{filter}.bcf", + "results/{date}/filtered-calls/ref~{reference}/{sample}.{clonality}.{filter}.{annotation}.bcf", log: - "logs/{date}/merge-calls/ref~{reference}/{sample}.{clonality}.{filter}.log", + "logs/{date}/merge-calls/ref~{reference}/{sample}.{clonality}.{filter}.{annotation}.log", params: "-a -Ob", wrapper: diff --git a/workflow/rules/variant_report.smk b/workflow/rules/variant_report.smk index 1b002c1a1..bf3e8cc6a 100644 --- a/workflow/rules/variant_report.smk +++ b/workflow/rules/variant_report.smk @@ -11,15 +11,15 @@ rule vcf_report: bams=get_report_input("results/{date}/recal/ref~main/{sample}.bam"), bais=get_report_input("results/{date}/recal/ref~main/{sample}.bam.bai"), bcfs=get_report_input( - "results/{date}/filtered-calls/ref~main/{sample}.subclonal.{filter}.bcf" + "results/{date}/filtered-calls/ref~main/{sample}.subclonal.{filter}.{annotation}.bcf" ), output: report( - directory("results/{date}/vcf-report/{target}.{filter}"), + directory("results/{date}/vcf-report/{target}.{filter}.{annotation}"), htmlindex="index.html", caption="../report/variant-calls.rst", category="2. Variant Call Details", - subcategory="2. {filter}", + subcategory="Variants with {filter} on {annotation} level", ), params: bcfs=get_report_bcfs, @@ -31,7 +31,7 @@ rule vcf_report: template=get_resource("custom-table-report.js"), ), log: - "logs/{date}/vcf-report/{target}.{filter}.log", + "logs/{date}/vcf-report/{target}.{filter}.{annotation}.log", conda: "../envs/rbt.yaml" shell: @@ -42,20 +42,20 @@ rule vcf_report: rule ucsc_vcf: input: bcfs=get_report_input( - "results/{date}/filtered-calls/ref~main/{sample}.subclonal.{filter}.bcf" + "results/{date}/filtered-calls/ref~main/{sample}.subclonal.{filter}.{annotation}.bcf" ), strain_call=( "results/{date}/tables/strain-calls/{target}.polished.strains.pangolin.csv" ), output: report( - "results/{date}/ucsc-vcfs/{target}.{filter}.vcf", + "results/{date}/ucsc-vcfs/{target}.{filter}.{annotation}.vcf", caption="../report/variant-calls.rst", category="5. Variant Call Files", - subcategory="{filter}", + subcategory="With {filter} on {annotation}s", ), log: - "logs/{date}/ucsc-vcf/{target}.subclonal.{filter}.log", + "logs/{date}/ucsc-vcf/{target}.subclonal.{filter}.{annotation}.log", conda: "../envs/bcftools.yaml" script: @@ -64,16 +64,18 @@ rule ucsc_vcf: rule aggregate_ucsc_vcfs: input: - expand_samples_for_date("results/{{date}}/ucsc-vcfs/{sample}.{{filter}}.vcf"), + expand_samples_for_date( + "results/{{date}}/ucsc-vcfs/{sample}.{{filter}}.{{annotation}}.vcf" + ), output: report( - "results/{date}/ucsc-vcfs/all.{date}.{filter}.vcf", + "results/{date}/ucsc-vcfs/all.{date}.{filter}.{annotation}.vcf", caption="../report/ucsc.rst", category="5. Variant Call Files", subcategory="Overview", ), log: - "logs/{date}/aggregate_ucsc_vcfs-{filter}.log", + "logs/{date}/aggregate_ucsc_vcfs-{filter}-{annotation}.log", conda: "../envs/unix.yaml" shell: diff --git a/workflow/scripts/aggregate-pangolin-calls-per-stage.py b/workflow/scripts/aggregate-pangolin-calls-per-stage.py index f28810458..228b1b7de 100644 --- a/workflow/scripts/aggregate-pangolin-calls-per-stage.py +++ b/workflow/scripts/aggregate-pangolin-calls-per-stage.py @@ -32,8 +32,6 @@ "Call failed. Reason: " + pangolin_calls_by_stage.loc[failed_called, "note"] ) -print(pangolin_calls_by_stage) - pangolin_calls_by_stage = pangolin_calls_by_stage.pivot( index="sample", columns="stage", values="lineage" ).reset_index() diff --git a/workflow/scripts/fix-protein-gff.py b/workflow/scripts/fix-protein-gff.py new file mode 100644 index 000000000..f9a83da27 --- /dev/null +++ b/workflow/scripts/fix-protein-gff.py @@ -0,0 +1,60 @@ +import sys + +sys.stderr = open(snakemake.log[0], "w") + +import re + +import pandas as pd + + +def set_id(info_string: str, format: str) -> str: + if "ID=" in info_string: + return info_string + + infos = dict(x.split("=") for x in info_string.split(";")) + + info_string = f"ID={format}-{infos['Name']};" + info_string + return info_string + + +def fix_transcipt(info_string: str, type: str, to_change: str, true_parent: str) -> str: + if type != to_change: + return info_string + + infos = dict(x.split("=") for x in info_string.split(";")) + + return re.sub("Parent=?.*;", f'Parent={true_parent}-{infos["Name"]};', info_string) + + +gff = pd.read_csv(snakemake.input[0], sep="\t", header=None) + +info_column = gff.columns[-1] +format_column = gff.columns[2] + +exons = gff.loc[gff[format_column] == "CDS"].copy() +exons.replace(to_replace="CDS", value="exon", inplace=True) +gff = pd.concat([gff, exons]) + + +gff[info_column] = gff.apply(lambda x: set_id(x[info_column], x[format_column]), axis=1) +gff[info_column] = gff.apply( + lambda x: fix_transcipt( + x[info_column], x[format_column], to_change="CDS", true_parent="transcript" + ), + axis=1, +) +gff[info_column] = gff.apply( + lambda x: fix_transcipt( + x[info_column], x[format_column], to_change="exon", true_parent="transcript" + ), + axis=1, +) + + +gff.loc[gff[format_column] == "gene", info_column] = gff.loc[ + gff[format_column] == "gene", info_column +].apply(lambda x: x + ";biotype=protein_coding") + +gff[format_column].replace(to_replace="transcript", value="mRNA", inplace=True) + +gff.to_csv(snakemake.output[0], header=False, index=False, sep="\t")