From 28ef9ab91d101f6d877a64f735d5e168b84b6dbc Mon Sep 17 00:00:00 2001 From: Judy Yao Date: Tue, 24 May 2022 10:59:16 -0400 Subject: [PATCH] Revise SNPclip sort order, use user supplied input format;Fix LDpair multiallelic query for second SNP input --- LDlink/LDcommon.py | 6 ++++-- LDlink/LDhap.py | 2 +- LDlink/LDmatrix.py | 2 +- LDlink/LDmatrix_plot_sub.py | 2 +- LDlink/SNPclip.py | 2 +- LDlink/news-5.3.3.html | 1 + 6 files changed, 9 insertions(+), 6 deletions(-) diff --git a/LDlink/LDcommon.py b/LDlink/LDcommon.py index a4c27a7b..93fe2c6c 100644 --- a/LDlink/LDcommon.py +++ b/LDlink/LDcommon.py @@ -218,7 +218,7 @@ def getRecomb(db, filename, chromosome, begin, end, genome_build): }) + '\n') return recomb_results_sanitized -def parse_vcf(vcf,snp_coords): +def parse_vcf(vcf,snp_coords,ifsorted): delimiter = "#" snp_lists = str('**'.join(vcf)).split(delimiter) snp_dict = {} @@ -258,7 +258,9 @@ def parse_vcf(vcf,snp_coords): snp_rs_dict[s_key] = snp_dict[snp_coord[2]] del snp_dict - sorted_snp_rs = OrderedDict(sorted(snp_rs_dict.items(),key=customsort)) + sorted_snp_rs = snp_rs_dict + if ifsorted: + sorted_snp_rs = OrderedDict(sorted(snp_rs_dict.items(),key=customsort)) return sorted_snp_rs," ".join(missing_rs) diff --git a/LDlink/LDhap.py b/LDlink/LDhap.py index 510b3771..1a123742 100755 --- a/LDlink/LDhap.py +++ b/LDlink/LDhap.py @@ -240,7 +240,7 @@ def set_alleles(a1, a2): hap2.append([]) # parse vcf - snp_dict,missing_snp = parse_vcf(vcf[h+1:],snp_coords) + snp_dict,missing_snp = parse_vcf(vcf[h+1:],snp_coords,True) # throw error if no data is returned from 1000G if len(missing_snp.split()) == len(snp_pos): output["error"] = "Input variant list does not contain any valid RS numbers or coordinates. " + str(output["warning"] if "warning" in output else "") diff --git a/LDlink/LDmatrix.py b/LDlink/LDmatrix.py index 271a5423..4c0d0b3d 100755 --- a/LDlink/LDmatrix.py +++ b/LDlink/LDmatrix.py @@ -280,7 +280,7 @@ def set_alleles(a1, a2): for i in range(len(index) - 1): hap2.append([]) - snp_dict,missing_snp = parse_vcf(vcf[h+1:],snp_coords) + snp_dict,missing_snp = parse_vcf(vcf[h+1:],snp_coords,True) # all lists does not contain data which is returned from 1000G if len(missing_snp.split()) == len(snp_pos): diff --git a/LDlink/LDmatrix_plot_sub.py b/LDlink/LDmatrix_plot_sub.py index b3839a15..6e43cb82 100644 --- a/LDlink/LDmatrix_plot_sub.py +++ b/LDlink/LDmatrix_plot_sub.py @@ -189,7 +189,7 @@ def set_alleles(a1, a2): for i in range(len(index) - 1): hap2.append([]) - snp_dict,missing_snp = parse_vcf(vcf[h+1:],snp_coords) + snp_dict,missing_snp = parse_vcf(vcf[h+1:],snp_coords,True) rsnum_lst = [] allele_lst = [] diff --git a/LDlink/SNPclip.py b/LDlink/SNPclip.py index c3110ec9..d54b1ce9 100755 --- a/LDlink/SNPclip.py +++ b/LDlink/SNPclip.py @@ -305,7 +305,7 @@ def calc_r2(var1, var2): if head[i] in pop_ids: pop_index.append(i) - snp_dict,missing_snp = parse_vcf(vcf[h+1:],snp_coords) + snp_dict,missing_snp = parse_vcf(vcf[h+1:],snp_coords,False) # throw error if no data is returned from 1000G if len(missing_snp.split()) == len(snp_pos): diff --git a/LDlink/news-5.3.3.html b/LDlink/news-5.3.3.html index a67c700d..a3b89c1d 100644 --- a/LDlink/news-5.3.3.html +++ b/LDlink/news-5.3.3.html @@ -1,5 +1,6 @@

LDlink 5.3.3 Release (05/24/2022)