Skip to content

Commit

Permalink
fix panel match and gnomad col
Browse files Browse the repository at this point in the history
  • Loading branch information
sigven committed Dec 17, 2023
1 parent 361276b commit b60ea96
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 11 deletions.
9 changes: 5 additions & 4 deletions pcgr/cpsr.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ def get_args():
optional_other.add_argument('--version', action='version', version=str(utils.get_cpsr_version()))
optional_other.add_argument('--no_reporting',action="store_true",help="Run functional variant annotation on VCF through VEP/vcfanno, omit classification/report generation (STEP 4), default: %(default)s")
optional_other.add_argument('--retained_info_tags', dest ='retained_info_tags', default='None', help='Comma-separated string of VCF INFO tags from query VCF that should be kept in CPSR output TSV')
#optional_other.add_argument('--report_theme',choices = ['default','cerulean','journal','flatly','readable','spacelab','united','cosmo','lumen','paper','sandstone','simplex','yeti'], default = 'default', help='Visual report theme (rmarkdown), default: %(default)s' )
#optional_other.add_argument('--report_nonfloating_toc', action='store_true', help='Do not float the table of contents (TOC) in output HTML report, default: %(default)s')
#optional_other.add_argument('--report_table_display', choices = ['full','light'], default='light', help="Set the level of detail/comprehensiveness in interactive datables of HTML report, very comprehensive (option 'full') or slim/focused ('light'), default: %(default)s")
optional_other.add_argument('--report_theme',choices = ['default','cerulean','journal','flatly','readable','spacelab','united','cosmo','lumen','paper','sandstone','simplex','yeti'], default = 'default', help='Visual report theme (rmarkdown), default: %(default)s' )
optional_other.add_argument('--report_nonfloating_toc', action='store_true', help='Do not float the table of contents (TOC) in output HTML report, default: %(default)s')
optional_other.add_argument('--report_table_display', choices = ['full','light'], default='light', help="Set the level of detail/comprehensiveness in interactive datables of HTML report, very comprehensive (option 'full') or slim/focused ('light'), default: %(default)s")
optional_other.add_argument('--ignore_noncoding', action='store_true',dest='ignore_noncoding',default=False,help='Ignore non-coding (i.e. non protein-altering) variants in report, default: %(default)s')
optional_other.add_argument("--debug", action="store_true", help="Print full commands to log")
optional_other.add_argument("--pcgrr_conda", default="pcgrr", help="pcgrr conda env name (default: %(default)s)")
Expand Down Expand Up @@ -282,7 +282,8 @@ def run_cpsr(conf_options, cpsr_paths):
output_pass_vcf2tsv_gz, pcgr_db_dir = cpsr_paths["db_dir"], logger = logger)
variant_set = variant.clean_annotations(variant_set, yaml_data, germline = True, logger = logger)
variant_set.to_csv(output_pass_tsv_gz, sep="\t", compression="gzip", index=False)
utils.remove(output_pass_vcf2tsv_gz)
if not debug:
utils.remove(output_pass_vcf2tsv_gz)

logger.info('Finished cpsr-summarise')

Expand Down
3 changes: 2 additions & 1 deletion pcgr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,8 @@ def run_pcgr(pcgr_paths, conf_options):
variant_set = variant.set_allelic_support(variant_set, allelic_support_tags = yaml_data["conf"]['somatic_snv']['allelic_support'])
variant_set = variant.clean_annotations(variant_set, yaml_data, germline = False, logger = logger)
variant_set.to_csv(output_pass_tsv_gz, sep="\t", compression="gzip", index=False)
utils.remove(output_pass_vcf2tsv_gz)
if not debug:
utils.remove(output_pass_vcf2tsv_gz)

if yaml_data["conf"]['assay_properties']['type'] == 'WGS' or yaml_data["conf"]['assay_properties']['type'] == 'WES':
# check that output file exist
Expand Down
10 changes: 7 additions & 3 deletions pcgr/variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def append_annotations(vcf2tsv_gz_fname: str, pcgr_db_dir: str, logger):
## check number of variants with Ensembl gene ID's
num_recs_with_entrez_hits = vcf2tsv_df["ENTREZGENE"].notna().sum()

#print(str(num_recs_with_entrez_hits))
## merge variant set with ClinVar trait and variant origin annotations
if num_recs_with_clinvar_hits > 0:
if os.path.exists(clinvar_tsv_fname):
Expand Down Expand Up @@ -133,7 +134,6 @@ def append_annotations(vcf2tsv_gz_fname: str, pcgr_db_dir: str, logger):
usecols=["entrezgene","name"])
gene_xref_df = gene_xref_df[gene_xref_df['entrezgene'].notnull()].drop_duplicates()
gene_xref_df["entrezgene"] = gene_xref_df["entrezgene"].astype("int64").astype("string")
#print(gene_xref_df.head)
gene_xref_df.rename(columns = {'entrezgene':'ENTREZGENE', 'name':'GENENAME'}, inplace = True)
vcf2tsv_df = vcf2tsv_df.merge(gene_xref_df, left_on=["ENTREZGENE"], right_on=["ENTREZGENE"], how="left")
vcf2tsv_df["ENTREZGENE"] = vcf2tsv_df['ENTREZGENE'].str.replace("\\.[0-9]{1,}$", "", regex = True)
Expand Down Expand Up @@ -253,8 +253,12 @@ def clean_annotations(variant_set: pd.DataFrame, yaml_data: dict, germline: bool
variant_set['EFFECT_PREDICTIONS'] = variant_set['EFFECT_PREDICTIONS'].str.replace("\\.&|\\.$", "NA&", regex = True)
variant_set['EFFECT_PREDICTIONS'] = variant_set['EFFECT_PREDICTIONS'].str.replace("&$", "", regex = True)
variant_set['EFFECT_PREDICTIONS'] = variant_set['EFFECT_PREDICTIONS'].str.replace("&", ", ", regex = True)
variant_set.loc[variant_set['CLINVAR_CONFLICTED'] == 1, "CLINVAR_CONFLICTED"] = True
variant_set.loc[variant_set['CLINVAR_CONFLICTED'] != 1, "CLINVAR_CONFLICTED"] = False
variant_set['clinvar_conflicted_bool'] = True
variant_set.loc[variant_set['CLINVAR_CONFLICTED'] == 1, "clinvar_conflicted_bool"] = True
variant_set.loc[variant_set['CLINVAR_CONFLICTED'] != 1, "clinvar_conflicted_bool"] = False
variant_set.drop('CLINVAR_CONFLICTED', inplace=True, axis=1)
variant_set.rename(columns = {'clinvar_conflicted_bool':'CLINVAR_CONFLICTED'}, inplace = True)


if not {'VCF_SAMPLE_ID'}.issubset(variant_set.columns):
variant_set['VCF_SAMPLE_ID'] = str(yaml_data['sample_id'])
Expand Down
4 changes: 2 additions & 2 deletions pcgrr/data-raw/data-raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ usethis::use_data(color_palette, overwrite = T)
#-----evidence types---------#
evidence_types <- c("predictive","prognostic","diagnostic",
"oncogenic","predisposing","functional")
usethis::use_data(evidence_types)
usethis::use_data(evidence_types, overwrite = T)

#-----evidence levels---------#
evidence_levels <- c("any","A_B","C_D_E")
usethis::use_data(evidence_levels)
usethis::use_data(evidence_levels, overwrite = T)


#-----input column names/types-----#
Expand Down
Binary file modified pcgrr/data/data_coltype_defs.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion scripts/cpsr_validate_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def simplify_vcf(input_vcf, validated_vcf, vcf, custom_bed, pcgr_directory, geno

## awk command to ignore secondary finding records while keeping records that belong to target (and that can potentially
## be part of the secondary findings list)
awk_command = "awk 'BEGIN{FS=\"\\t\"}{if($4 !~ /ACMG_SF/ || ($4 ~ /ACMG_SF/ && $4 ~ /" + str(ge_panel_identifier) + "/))print;}'"
awk_command = "awk 'BEGIN{FS=\"\\t\"}{if($4 !~ /ACMG_SF/ || ($4 ~ /ACMG_SF/ && $4 ~ /" + str(ge_panel_identifier) + ":/))print;}'"
if gwas_findings == 0 and secondary_findings == 1:
check_subprocess(logger, f'bgzip -dc {target_bed_gz} | egrep -v "(\|tag\|)" >> {virtual_panels_tmp_bed}', debug)
elif gwas_findings == 0 and secondary_findings == 0:
Expand Down

0 comments on commit b60ea96

Please sign in to comment.