From e407afbcb9a1ac81ec7a88933e57420c61682fb9 Mon Sep 17 00:00:00 2001 From: Miguel Brown Date: Wed, 17 Jul 2024 14:55:13 -0400 Subject: [PATCH] :pencil: update snv hotspot refs --- README.md | 2 +- workflows/kfdrc_mutect2_production_wf.cwl | 2 +- workflows/kfdrc_tumor_only_dna_wf.cwl | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 00d95ea..1d1cee0 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ Most inputs have recommended values that should auto import both files and param - `b_allele`: dbSNP_v153_ucsc-compatible.converted.vt.decomp.norm.common_snps.vcf.gz. dbSNP v153 was obtained from [the ftp site](https://ftp.ncbi.nih.gov/snp/archive/b153/VCF/GCF_000001405.38.gz). Then, using a awk/perl/bash script of your choice, convert NCBI accession names to UCSC-style chromosome names using [this table](https://hgdownload.soe.ucsc.edu/hubs/GCF/000/001/405/GCF_000001405.39/GCF_000001405.39.chromAlias.txt). Next, run the [VCF normalization tool](https://github.com/kids-first/kf-annotation-tools/blob/master/tools/normalize_vcf.cwl), then use bcftools to extract only common snps: `bcftools view --include INFO/COMMON=1 --types snps dbSNP_v153_ucsc-compatible.converted.vt.decomp.norm.vcf.gz -O z -o dbSNP_v153_ucsc-compatible.converted.vt.decomp.norm.common_snps.vcf.gz`. Lastly, use tabix to index the resultant file. - `vep_cache`: homo_sapiens_merged_vep_105_indexed_GRCh38.tar.gz - `genomic_hotspots`: tert.bed # bed file with TERT gene promoter region - - `protein_snv_hotspots`: protein_snv_cancer_hotspots_v2.ENS105_liftover.tsv + - `protein_snv_hotspots`: kfdrc_protein_snv_cancer_hotspots_20240718.txt - `protein_indel_hotspots`: protein_indel_cancer_hotspots_v2.ENS105_liftover.tsv - `echtvar_anno_zips`: gnomad.v3.1.1.custom.echtvar.zip ### Necessary for user to define: diff --git a/workflows/kfdrc_mutect2_production_wf.cwl b/workflows/kfdrc_mutect2_production_wf.cwl index fae986f..b775b60 100644 --- a/workflows/kfdrc_mutect2_production_wf.cwl +++ b/workflows/kfdrc_mutect2_production_wf.cwl @@ -202,7 +202,7 @@ inputs: protein_snv_hotspots: {type: 'File[]?', doc: "Column-name-containing, tab-delimited\ \ file(s) containing protein names and amino acid positions corresponding to\ \ hotspots", "sbg:suggestedValue": [{class: File, path: 645919782fe81458768c552c, - name: protein_snv_cancer_hotspots_v2.ENS105_liftover.tsv}]} + name: kfdrc_protein_snv_cancer_hotspots_20240718.txt}]} protein_indel_hotspots: {type: 'File[]?', doc: "Column-name-containing, tab-delimited\ \ file(s) containing protein names and amino acid position ranges corresponding\ \ to hotspots", "sbg:suggestedValue": [{class: File, path: 645919782fe81458768c552d, diff --git a/workflows/kfdrc_tumor_only_dna_wf.cwl b/workflows/kfdrc_tumor_only_dna_wf.cwl index 47ba370..45bae83 100644 --- a/workflows/kfdrc_tumor_only_dna_wf.cwl +++ b/workflows/kfdrc_tumor_only_dna_wf.cwl @@ -65,7 +65,7 @@ doc: | - `b_allele`: dbSNP_v153_ucsc-compatible.converted.vt.decomp.norm.common_snps.vcf.gz. dbSNP v153 was obtained from [the ftp site](https://ftp.ncbi.nih.gov/snp/archive/b153/VCF/GCF_000001405.38.gz). Then, using a awk/perl/bash script of your choice, convert NCBI accession names to UCSC-style chromosome names using [this table](https://hgdownload.soe.ucsc.edu/hubs/GCF/000/001/405/GCF_000001405.39/GCF_000001405.39.chromAlias.txt). Next, run the [VCF normalization tool](https://github.com/kids-first/kf-annotation-tools/blob/master/tools/normalize_vcf.cwl), then use bcftools to extract only common snps: `bcftools view --include INFO/COMMON=1 --types snps dbSNP_v153_ucsc-compatible.converted.vt.decomp.norm.vcf.gz -O z -o dbSNP_v153_ucsc-compatible.converted.vt.decomp.norm.common_snps.vcf.gz`. Lastly, use tabix to index the resultant file. - `vep_cache`: homo_sapiens_merged_vep_105_indexed_GRCh38.tar.gz - `genomic_hotspots`: tert.bed # bed file with TERT gene promoter region - - `protein_snv_hotspots`: protein_snv_cancer_hotspots_v2.ENS105_liftover.tsv + - `protein_snv_hotspots`: kfdrc_protein_snv_cancer_hotspots_20240718.txt - `protein_indel_hotspots`: protein_indel_cancer_hotspots_v2.ENS105_liftover.tsv - `echtvar_anno_zips`: gnomad.v3.1.1.custom.echtvar.zip ### Necessary for user to define: @@ -204,7 +204,7 @@ inputs: genomic_hotspots: {type: 'File[]?', doc: "Tab-delimited BED formatted file(s) containing hg38 genomic positions corresponding to hotspots", "sbg:suggestedValue": [{class: File, path: 607713829360f10e3982a423, name: tert.bed}]} protein_snv_hotspots: {type: 'File[]?', doc: "Column-name-containing, tab-delimited file(s) containing protein names and amino acid - positions corresponding to hotspots", "sbg:suggestedValue": [{class: File, path: 663d2bcc27374715fccd8c6a, name: protein_snv_cancer_hotspots_v2.ENS105_liftover.tsv}]} + positions corresponding to hotspots", "sbg:suggestedValue": [{class: File, path: 66980e845a58091951d53984, name: kfdrc_protein_snv_cancer_hotspots_20240718.txt}]} protein_indel_hotspots: {type: 'File[]?', doc: "Column-name-containing, tab-delimited file(s) containing protein names and amino acid position ranges corresponding to hotspots", "sbg:suggestedValue": [{class: File, path: 663d2bcc27374715fccd8c6f, name: protein_indel_cancer_hotspots_v2.ENS105_liftover.tsv}]} mutect2_retain_info: {type: 'string?', doc: "csv string with INFO fields that you want to keep", default: "gnomad_3_1_1_AC,gnomad_3_1_1_AN,gnomad_3_1_1_AF,gnomad_3_1_1_nhomalt,gnomad_3_1_1_AC_popmax,gnomad_3_1_1_AN_popmax,gnomad_3_1_1_AF_popmax,gnomad_3_1_1_nhomalt_popmax,gnomad_3_1_1_AC_controls_and_biobanks,gnomad_3_1_1_AN_controls_and_biobanks,gnomad_3_1_1_AF_controls_and_biobanks,gnomad_3_1_1_AF_non_cancer,gnomad_3_1_1_primate_ai_score,gnomad_3_1_1_splice_ai_consequence,gnomad_3_1_1_AF_non_cancer_afr,gnomad_3_1_1_AF_non_cancer_ami,gnomad_3_1_1_AF_non_cancer_asj,gnomad_3_1_1_AF_non_cancer_eas,gnomad_3_1_1_AF_non_cancer_fin,gnomad_3_1_1_AF_non_cancer_mid,gnomad_3_1_1_AF_non_cancer_nfe,gnomad_3_1_1_AF_non_cancer_oth,gnomad_3_1_1_AF_non_cancer_raw,gnomad_3_1_1_AF_non_cancer_sas,gnomad_3_1_1_AF_non_cancer_amr,gnomad_3_1_1_AF_non_cancer_popmax,gnomad_3_1_1_AF_non_cancer_all_popmax,gnomad_3_1_1_FILTER,MQ,MQ0,QSI,HotSpotAllele"}