h1/h2 to hap1/hap2

aberthel · Jul 20, 2018 · 5294aa2 · 5294aa2
1 parent f89aa46
commit 5294aa2
Show file tree

Hide file tree

Showing 20 changed files with 224 additions and 224 deletions.
diff --git a/FalsePos.py b/FalsePos.py
@@ -89,7 +89,7 @@ def simpval2(cnt,bm):
     with open(ifile, 'r') as inf:
 	inf.readline()
 	for line in inf:
-            (chr,ref_coord,h1_coord,h2_coord,ref_allele,h1_allele,h2_allele,cA,cC,cG,cT,cN,
+            (chr,ref_coord,hap1_coord,hap2_coord,ref_allele,hap1_allele,hap2_allele,cA,cC,cG,cT,cN,
               ref_allele_ratio,sum_ref_n_alt_cnts,p_binom,cnv,mmap_log) = line.split('\t')
             act_pvals_list.append(float(p_binom))
             counts = [int(e) for e in [cA, cC, cG, cT]]

diff --git a/FalsePos_regions.py b/FalsePos_regions.py
@@ -89,11 +89,11 @@ def simpval2(cnt,bm):
     with open(ifile, 'r') as inf:
 	inf.readline()
 	for line in inf:
-            region,h1_count,h2_count,h1_allele_ratio,p_binom,snv_count,snv_coords = line.split('\t')
+            region,hap1_count,hap2_count,hap1_allele_ratio,p_binom,snv_count,snv_coords = line.split('\t')
             act_pvals_list.append(float(p_binom))
             #counts = [int(e) for e in [cA, cC, cG, cT]]
             #counts = sorted(counts, reverse=True)[:2]
-            counts = [int(h1_count), int(h2_count)]
+            counts = [int(hap1_count), int(hap2_count)]
             cnt_sums_list.append(sum(counts))
     act_pvals = numpy.array(act_pvals_list)
     cnt_sums  = numpy.array(cnt_sums_list)

diff --git a/PIPELINE.mk b/PIPELINE.mk
@@ -156,42 +156,42 @@ $(PREFIX)_ref_allele_ratios.filtered_counts.chrs1-22$(KEEP_CHR).pdf: $(PREFIX)_f
 # in non-autosomal chr; 
 # and sites with seemingly misphased/miscalled nearby variants
 # filter/adjust sites imbalanced likely due to unaccounted multi-mapping reads 
-$(PREFIX)_filtered_counts.chrs1-22$(KEEP_CHR).tsv: $(PREFIX)_raw_counts.tsv $(PREFIX)_h1_mmapreads.mpileup $(PREFIX)_h2_mmapreads.mpileup
+$(PREFIX)_filtered_counts.chrs1-22$(KEEP_CHR).tsv: $(PREFIX)_raw_counts.tsv $(PREFIX)_hap1_mmapreads.mpileup $(PREFIX)_hap2_mmapreads.mpileup
 	cat $< | \
 	python $(PL)/filter_cnv_sites.py $(PREFIX)_discarded_HetSNVs.tsv $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_rd.tab | \
 	python $(PL)/filter_non-autosomal_chr.py $(PREFIX)_discarded_HetSNVs.tsv $(KEEP_CHR) | \
 	python $(PL)/filter_phase_warnings.py $(PREFIX)_discarded_HetSNVs.tsv | \
 	python $(PL)/filter_sites_w_mmaps.py $(AMB_MODE) $(PREFIX)_discarded_HetSNVs.tsv $(PREFIX)_sites_w_mmaps.log \
-		$(PREFIX)_h1_mmapreads.mpileup $(PREFIX)_h2_mmapreads.mpileup > $@
+		$(PREFIX)_hap1_mmapreads.mpileup $(PREFIX)_hap2_mmapreads.mpileup > $@
 
 # allelic ratio distrs
 $(PREFIX)_ref_allele_ratios.raw_counts.pdf: $(PREFIX)_raw_counts.tsv
 	cat $< | python $(PL)/plot_AllelicRatio_distribution.py $(PREFIX)_raw_counts
 
 # counts
-$(PREFIX)_raw_counts.tsv: $(PREFIX)_h1_uniqreads.mpileup $(PREFIX)_h2_uniqreads.mpileup
+$(PREFIX)_raw_counts.tsv: $(PREFIX)_hap1_uniqreads.mpileup $(PREFIX)_hap2_uniqreads.mpileup
 	python $(PL)/pileup2counts.py 1 $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_ref.bed \
-	$(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_h1.bed $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_h2.bed \
+	$(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_hap1.bed $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_hap2.bed \
 	$(PREFIX)_discarded_HetSNVs.tsv \
-	$(PREFIX)_h1_uniqreads.mpileup $(PREFIX)_h2_uniqreads.mpileup > $@
+	$(PREFIX)_hap1_uniqreads.mpileup $(PREFIX)_hap2_uniqreads.mpileup > $@
 
 
 # pileups
-$(PREFIX)_h1_mmapreads.mpileup: $(HetSNV_MMAPALNS_FILENAME)
-	$(SAMTOOLS) mpileup -BQ0 --max-depth 999999 --ff UNMAP -f $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_h1.fa $< \
-	--positions $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_h1.bed --output $@
+$(PREFIX)_hap1_mmapreads.mpileup: $(HetSNV_MMAPALNS_FILENAME)
+	$(SAMTOOLS) mpileup -BQ0 --max-depth 999999 --ff UNMAP -f $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hap1.fa $< \
+	--positions $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_hap1.bed --output $@
 
-$(PREFIX)_h2_mmapreads.mpileup: $(HetSNV_MMAPALNS_FILENAME)
-	$(SAMTOOLS) mpileup -BQ0 --max-depth 999999 --ff UNMAP -f $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_h2.fa $< \
-	--positions $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_h2.bed --output $@
+$(PREFIX)_hap2_mmapreads.mpileup: $(HetSNV_MMAPALNS_FILENAME)
+	$(SAMTOOLS) mpileup -BQ0 --max-depth 999999 --ff UNMAP -f $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hap2.fa $< \
+	--positions $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_hap2.bed --output $@
 
-$(PREFIX)_h1_uniqreads.mpileup: $(HetSNV_UNIQALNS_FILENAME)
-	$(SAMTOOLS) mpileup -BQ0 --max-depth 999999 --ff UNMAP -f $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_h1.fa $< \
-	--positions $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_h1.bed --output $@
+$(PREFIX)_hap1_uniqreads.mpileup: $(HetSNV_UNIQALNS_FILENAME)
+	$(SAMTOOLS) mpileup -BQ0 --max-depth 999999 --ff UNMAP -f $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hap1.fa $< \
+	--positions $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_hap1.bed --output $@
 
-$(PREFIX)_h2_uniqreads.mpileup: $(HetSNV_UNIQALNS_FILENAME)
-	$(SAMTOOLS) mpileup -BQ0 --max-depth 999999 --ff UNMAP -f $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_h2.fa $< \
-	--positions $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_h2.bed --output $@
+$(PREFIX)_hap2_uniqreads.mpileup: $(HetSNV_UNIQALNS_FILENAME)
+	$(SAMTOOLS) mpileup -BQ0 --max-depth 999999 --ff UNMAP -f $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hap2.fa $< \
+	--positions $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_hap2.bed --output $@
 
 # by default --ff was also filtering-out some other - secondary? reads:  [UNMAP,SECONDARY,QCFAIL,DUP], leaving only UNMAP for now
 
@@ -200,15 +200,15 @@ $(PREFIX)_h2_uniqreads.mpileup: $(HetSNV_UNIQALNS_FILENAME)
 
 # non-uniq alns over hetSNVs:
 $(PREFIX)_$(ALIGNMENT_MODE)-params_crdsorted_mmapreads_over_hetSNVs.bam: $(FINAL_ALIGNMENT_FILENAME)
-	cat $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_h1.bed $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_h2.bed | \
+	cat $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_hap1.bed $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_hap2.bed | \
 	$(SAMTOOLS) view -h -L - $< | awk '$$5!="255" {print $$0}' | \
 	$(SAMTOOLS) view -b - > $@
 	$(SAMTOOLS) index $@
 	$(SAMTOOLS) flagstat $@ > $@.stat
 
 # uniq alns over hetSNVs:
 $(PREFIX)_$(ALIGNMENT_MODE)-params_crdsorted_uniqreads_over_hetSNVs.bam: $(FINAL_ALIGNMENT_FILENAME)
-	cat $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_h1.bed $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_h2.bed | \
+	cat $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_hap1.bed $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_hap2.bed | \
 	$(SAMTOOLS) view -h -q 255 -L - $< | \
 	$(SAMTOOLS) view -b - > $@
 	$(SAMTOOLS) index $@

diff --git a/PIPELINE_aggregate_over_genomic_regions.mk b/PIPELINE_aggregate_over_genomic_regions.mk
@@ -47,16 +47,16 @@ $(info $(empty_string))
 ######################
 
 
-all: $(PREFIX)_region_h1_ratios.filtered_counts.chrs1-22$(KEEP_CHR).pdf $(PREFIX)_region_h1_ratios.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.pdf $(PREFIX)_interesting_regions.FDR-$(FDR_CUTOFF).binom.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv $(PREFIX)_interesting_regions.FDR-$(FDR_CUTOFF).betabinom.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv
+all: $(PREFIX)_region_hap1_ratios.filtered_counts.chrs1-22$(KEEP_CHR).pdf $(PREFIX)_region_hap1_ratios.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.pdf $(PREFIX)_interesting_regions.FDR-$(FDR_CUTOFF).binom.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv $(PREFIX)_interesting_regions.FDR-$(FDR_CUTOFF).betabinom.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv
 
 # todo
 # this seems to work, but the way it deals with paths, filenames, etc needs to be cleaned up
 # here it is even worse
 # fix columns
 $(PREFIX)_interesting_regions.FDR-$(FDR_CUTOFF).betabinom.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv: $(PREFIX)_region_filtered_counts.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv
 	awk '{print $$1"\t"$$2"\t"$$3"\t"$$2"\t"$$3"\t0\t0\t"$$2+$$3"\t"$$4"\t1.0"}' $< | \
-		sed 's/h1_count\th2_count\t0\t0\t0/cA\tcC\tcG\tcT\tsum_ref_n_alt_cnts/' | \
-		sed 's/h1_allele_ratio\t1.0/ref_allele_ratio\tcnv/' > $(PREFIX)_region_filtered_counts_min.$(Cntthresh_tot)-total.$(Cntthresh_min)-lower.tsv.tmp
+		sed 's/hap1_count\thap2_count\t0\t0\t0/cA\tcC\tcG\tcT\tsum_ref_n_alt_cnts/' | \
+		sed 's/hap1_allele_ratio\t1.0/ref_allele_ratio\tcnv/' > $(PREFIX)_region_filtered_counts_min.$(Cntthresh_tot)-total.$(Cntthresh_min)-lower.tsv.tmp
 	Rscript $(PL)/alleledb_calcOverdispersion.R \
 		$(PREFIX)_region_filtered_counts_min.$(Cntthresh_tot)-total.$(Cntthresh_min)-lower.tsv.tmp \
 		$(PREFIX)_region_FDR-$(FDR_CUTOFF).betabinomial.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min 
@@ -74,38 +74,38 @@ $(PREFIX)_interesting_regions.FDR-$(FDR_CUTOFF).binom.chrs1-22$(KEEP_CHR).$(Cntt
 	cat $< | python $(PL)/filter_regions_by_pval.py $(PREFIX)_region_FDR-$(FDR_CUTOFF).binom.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.txt > $@
 
 # allelic ratio distrs
-$(PREFIX)_region_h1_ratios.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.pdf: $(PREFIX)_region_filtered_counts.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv
-	cat $< | python $(PL)/plot_AllelicRatio_distribution.py $(PREFIX)_region_filtered_counts.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt h1_allele_ratio
+$(PREFIX)_region_hap1_ratios.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.pdf: $(PREFIX)_region_filtered_counts.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv
+	cat $< | python $(PL)/plot_AllelicRatio_distribution.py $(PREFIX)_region_filtered_counts.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt hap1_allele_ratio
 
 $(PREFIX)_region_filtered_counts.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv: $(PREFIX)_region_filtered_counts.chrs1-22$(KEEP_CHR).tsv
 	cat $< | python $(PL)/filter_regions_by_counts.py $(Cntthresh_tot) $(Cntthresh_min) > $@
 
 # allelic ratio distrs
-$(PREFIX)_region_h1_ratios.filtered_counts.chrs1-22$(KEEP_CHR).pdf: $(PREFIX)_region_filtered_counts.chrs1-22$(KEEP_CHR).tsv
-	cat $< | python $(PL)/plot_AllelicRatio_distribution.py $(PREFIX)_region_filtered_counts.chrs1-22$(KEEP_CHR) h1_allele_ratio
+$(PREFIX)_region_hap1_ratios.filtered_counts.chrs1-22$(KEEP_CHR).pdf: $(PREFIX)_region_filtered_counts.chrs1-22$(KEEP_CHR).tsv
+	cat $< | python $(PL)/plot_AllelicRatio_distribution.py $(PREFIX)_region_filtered_counts.chrs1-22$(KEEP_CHR) hap1_allele_ratio
 
 $(PREFIX)_region_filtered_counts.chrs1-22$(KEEP_CHR).tsv: $(PREFIX)_region_raw_counts_uniq.tsv $(PREFIX)_region_raw_counts_mmap.tsv
 	awk '{print $$1"\t"$$2"\t"$$3"\t"$$4"\t"$$5"\t"$$6}' $< | \
 	python $(PL)/filter_regions_non-autosomal_chr.py $(REGIONS_FILE) $(PREFIX)_discarded_regions.tsv $(KEEP_CHR) | \
 	python $(PL)/filter_regions_w_mmaps.py $(AMB_MODE) \
 	$(PREFIX)_discarded_regions.tsv $(PREFIX)_regions_w_mmaps.log $(PREFIX)_region_raw_counts_mmap.tsv > $@
 
-$(PREFIX)_region_raw_counts_mmap.tsv: hets_regions_h2.bed
-	cat hets_regions_h1.bed hets_regions_h2.bed | $(BEDTOOLS_intersectBed) -a stdin -b $(MMAP_ALN_FILES) -split -wb -bed | \
-	python $(PL)/intersect2counts.py mmap hets_regions_h1.bed > $@
+$(PREFIX)_region_raw_counts_mmap.tsv: hets_regions_hap2.bed
+	cat hets_regions_hap1.bed hets_regions_hap2.bed | $(BEDTOOLS_intersectBed) -a stdin -b $(MMAP_ALN_FILES) -split -wb -bed | \
+	python $(PL)/intersect2counts.py mmap hets_regions_hap1.bed > $@
 
 # allowing multiple UNIQ_ALN_FILES files, but probably slower (since became -b instead of -a), also changes to intersect2counts.py to accomodate output file format
-#$(PREFIX)_region_counts.tsv: hets_regions_h1.bed hets_regions_h2.bed
-#        $(BEDTOOLS_intersectBed) -a $(UNIQ_ALN_FILES) -b hets_regions_h1.bed hets_regions_h2.bed -split -wb -bed | \
+#$(PREFIX)_region_counts.tsv: hets_regions_hap1.bed hets_regions_hap2.bed
+#        $(BEDTOOLS_intersectBed) -a $(UNIQ_ALN_FILES) -b hets_regions_hap1.bed hets_regions_hap2.bed -split -wb -bed | \
 #                python $(PL)/intersect2counts.py > $@
 
 
 # -split -- not counting reads that splice over a het
-$(PREFIX)_region_raw_counts_uniq.tsv: hets_regions_h2.bed
-	cat hets_regions_h1.bed hets_regions_h2.bed | $(BEDTOOLS_intersectBed) -a stdin -b $(UNIQ_ALN_FILES) -split -wb -bed | \
-	python $(PL)/intersect2counts.py uniq hets_regions_h1.bed > $@
+$(PREFIX)_region_raw_counts_uniq.tsv: hets_regions_hap2.bed
+	cat hets_regions_hap1.bed hets_regions_hap2.bed | $(BEDTOOLS_intersectBed) -a stdin -b $(UNIQ_ALN_FILES) -split -wb -bed | \
+	python $(PL)/intersect2counts.py uniq hets_regions_hap1.bed > $@
 
-hets_regions_h2.bed : $(REGIONS_FILE) $(COUNTS_FILE)
+hets_regions_hap2.bed : $(REGIONS_FILE) $(COUNTS_FILE)
 	grep -v '^#chr' $(COUNTS_FILE) | awk '{print $$1"\t"$$2-1"\t"$$2}' | $(BEDTOOLS_intersectBed) -a $(REGIONS_FILE) -b stdin | \
-	python $(PL)/refbed2hapcoords.py $(COUNTS_FILE) hets_regions_h1.bed hets_regions_h2.bed
+	python $(PL)/refbed2hapcoords.py $(COUNTS_FILE) hets_regions_hap1.bed hets_regions_hap2.bed
 
diff --git a/PIPELINE_aggregated_counts.mk b/PIPELINE_aggregated_counts.mk
@@ -39,9 +39,9 @@ $(info $(empty_string))
 ### PIPELINE START ###
 ######################
 
-all: $(PREFIX)_ref_allele_ratios.raw_counts.pdf $(PREFIX)_ref_allele_ratios.filtered_counts.chrs1-22$(KEEP_CHR).pdf $(PREFIX)_interestingHets.FDR-$(FDR_CUTOFF).binom.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv $(PREFIX)_interestingHets.FDR-$(FDR_CUTOFF).betabinom.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv 
+#all: $(PREFIX)_ref_allele_ratios.raw_counts.pdf $(PREFIX)_ref_allele_ratios.filtered_counts.chrs1-22$(KEEP_CHR).pdf $(PREFIX)_interestingHets.FDR-$(FDR_CUTOFF).binom.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv $(PREFIX)_interestingHets.FDR-$(FDR_CUTOFF).betabinom.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv 
 
-#all: $(PREFIX)_interestingHets.FDR-$(FDR_CUTOFF).betabinom.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv 
+all: $(PREFIX)_interestingHets.FDR-$(FDR_CUTOFF).betabinom.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv 
 
 # this seems to work, but the way it deals with paths, filenames, etc needs to be cleaned up
 $(PREFIX)_interestingHets.FDR-$(FDR_CUTOFF).betabinom.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv: $(PREFIX)_filtered_counts.chrs1-22$(KEEP_CHR).$(Cntthresh_tot)-tot_$(Cntthresh_min)-min_cnt.tsv
@@ -85,7 +85,7 @@ $(PREFIX)_ref_allele_ratios.raw_counts.pdf: $(PREFIX)_raw_counts.tsv
 # counts
 $(PREFIX)_raw_counts.tsv: $(INPUT_UNIQ_READS_PILEUP_FILES)
 	python $(PL)/pileup2counts.py 1 $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_ref.bed \
-	$(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_h1.bed $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_h2.bed \
+	$(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_hap1.bed $(PGENOME_DIR)/$(VCF_SAMPLE_ID)_hetSNVs_hap2.bed \
 	$(PREFIX)_discarded_HetSNVs.tsv \
 	$(INPUT_UNIQ_READS_PILEUP_FILES) > $@
 

diff --git a/filter_by_counts.py b/filter_by_counts.py
@@ -4,8 +4,8 @@
 th_each = int(sys.argv[2])
 sys.stdout.write(sys.stdin.readline())
 for line in sys.stdin:
-    (chrm, ref_coord, h1_coord, h2_coord, ref_allele, h1_allele, h2_allele, 
+    (chrm, ref_coord, hap1_coord, hap2_coord, ref_allele, hap1_allele, hap2_allele, 
      cA, cC, cG, cT, cN, ref_allele_ratio, sum_ref_n_alt_cnts, p_binom, cnv, mmap_log) = line.split('\t')
     counts = {'A': int(cA), 'C': int(cC), 'G':int(cG), 'T':int(cT), 'N':int(cN)}
-    if counts[h1_allele] + counts[h2_allele] >= th_tot and counts[h1_allele] >= th_each and counts[h2_allele] >= th_each:
+    if counts[hap1_allele] + counts[hap2_allele] >= th_tot and counts[hap1_allele] >= th_each and counts[hap2_allele] >= th_each:
         sys.stdout.write(line)
diff --git a/filter_by_pval.py b/filter_by_pval.py
@@ -5,14 +5,14 @@
         if line.startswith('Target'): pth = float(line.split()[-1])
 
 for line in sys.stdin:
-    (chr,ref_coord,h1_coord,h2_coord,ref_allele,h1_allele,h2_allele,cA,cC,cG,cT,cN,
+    (chr,ref_coord,hap1_coord,hap2_coord,ref_allele,hap1_allele,hap2_allele,cA,cC,cG,cT,cN,
     ref_allele_ratio,sum_ref_n_alt_cnts,p_binom,cnv,mmap_log) = line.split('\t')
     outcolmns = '\t'.join([
                                        chr,
                                        ref_coord,
                                        ref_allele,
-                                       h1_allele,
-                                       h2_allele,
+                                       hap1_allele,
+                                       hap2_allele,
                                        cA, cC, cG, cT, cN,
                                        ref_allele_ratio,
                                        p_binom

diff --git a/filter_cnv_sites.py b/filter_cnv_sites.py
@@ -11,9 +11,9 @@
 with open(sys.argv[1], 'a') as rm_hetSNV_f:
     for line in sys.stdin:
         if not line.startswith('#'):
-            (chrm,ref_coord,h1_coord,h2_coord,ref_allele,h1_allele,h2_allele,
+            (chrm,ref_coord,hap1_coord,hap2_coord,ref_allele,hap1_allele,hap2_allele,
                 cA,cC,cG,cT,cN,ref_allele_ratio,sum_ref_n_alt_cnts,p_binom,
-                warning_h1,warning_h2) = line.split()
+                warning_hap1,warning_hap2) = line.split()
             rd = float(cnv_dict[chrm+"_"+ref_coord])
             if 0.5 <= rd <= 1.5:
                 sys.stdout.write(line.strip()+'\t'+str(rd)+'\n')

diff --git a/filter_non-autosomal_chr.py b/filter_non-autosomal_chr.py
@@ -9,9 +9,9 @@
     for line in sys.stdin:
 
         if not line.startswith('#'):
-            (chrm,ref_coord,h1_coord,h2_coord,ref_allele,h1_allele,h2_allele,
+            (chrm,ref_coord,hap1_coord,hap2_coord,ref_allele,hap1_allele,hap2_allele,
                      cA,cC,cG,cT,cN,ref_allele_ratio,sum_ref_n_alt_cnts,p_binom,
-                     warning_h1,warning_h2,cnv) = line.split('\t')
+                     warning_hap1,warning_hap2,cnv) = line.split('\t')
 
             if chrm in keep_chrs: sys.stdout.write(line)
             else: rm_hetSNV_f.write('\t'.join([chrm,ref_coord, 

diff --git a/filter_phase_warnings.py b/filter_phase_warnings.py
@@ -8,15 +8,15 @@ def check_subset(l1,l2):
 with open(sys.argv[1], 'a') as rm_hetSNV_f:
     for line in sys.stdin:
         if not line.startswith('#'):
-            (chrm,ref_coord,h1_coord,h2_coord,ref_allele,h1_allele,h2_allele,
+            (chrm,ref_coord,hap1_coord,hap2_coord,ref_allele,hap1_allele,hap2_allele,
                 cA,cC,cG,cT,cN,ref_allele_ratio,sum_ref_n_alt_cnts,p_binom,
-                warning_h1,warning_h2,cnv) = line.split()
-            if check_subset(warning_h1.split(','),['.','zero_cnt']) and check_subset(warning_h2.split(','),['.','zero_cnt']) and h1_coord != 'notLifted?' and h2_coord != 'notLifted?': 
+                warning_hap1,warning_hap2,cnv) = line.split()
+            if check_subset(warning_hap1.split(','),['.','zero_cnt']) and check_subset(warning_hap2.split(','),['.','zero_cnt']) and hap1_coord != 'notLifted?' and hap2_coord != 'notLifted?': 
                 sys.stdout.write('\t'.join([
-                                               chrm,ref_coord,h1_coord,h2_coord,ref_allele,h1_allele,h2_allele,
+                                               chrm,ref_coord,hap1_coord,hap2_coord,ref_allele,hap1_allele,hap2_allele,
                                                cA,cC,cG,cT,cN,ref_allele_ratio,sum_ref_n_alt_cnts,p_binom, cnv
                                            ])+'\n')
             else: 
                 rm_hetSNV_f.write('\t'.join([chrm, ref_coord, 
-                                    '_'.join([cA,cC,cG,cT,'','um',warning_h1,warning_h2])])+'\n')
+                                    '_'.join([cA,cC,cG,cT,'','um',warning_hap1,warning_hap2])])+'\n')
         else: sys.stdout.write('\t'.join(line.split()[:15] + [line.split()[-1]])+'\n')
diff --git a/filter_regions_by_counts.py b/filter_regions_by_counts.py
@@ -5,6 +5,6 @@
 sys.stdout.write(sys.stdin.readline())
 for line in sys.stdin:
 
-    region,h1_count,h2_count,h1_allele_ratio,p_binom,snv_count,mmap_log = line.split('\t')
-    if int(h1_count) + int(h2_count) >= th_tot and int(h1_count) >= th_each and int(h2_count) >= th_each:
+    region,hap1_count,hap2_count,hap1_allele_ratio,p_binom,snv_count,mmap_log = line.split('\t')
+    if int(hap1_count) + int(hap2_count) >= th_tot and int(hap1_count) >= th_each and int(hap2_count) >= th_each:
         sys.stdout.write(line)
diff --git a/filter_regions_by_pval.py b/filter_regions_by_pval.py
@@ -7,7 +7,7 @@
 sys.stdout.write(sys.stdin.readline())
 for line in sys.stdin:
 
-    region, h1_count, h2_count, h1_allele_ratio, p_binom, snv_count, mmap_log = line.split('\t')
-    outcolmns = '\t'.join([region, h1_count, h2_count, h1_allele_ratio, p_binom, snv_count])
+    region, hap1_count, hap2_count, hap1_allele_ratio, p_binom, snv_count, mmap_log = line.split('\t')
+    outcolmns = '\t'.join([region, hap1_count, hap2_count, hap1_allele_ratio, p_binom, snv_count])
 
     if float(p_binom) <= pth: sys.stdout.write(outcolmns+'\n')