Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WACS #283

Open
wants to merge 30 commits into
base: master
Choose a base branch
from
Open

WACS #283

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
dc6edd0
Updated setup and setup_w_cython files
aawdeh Mar 15, 2019
6da64a9
Updated bin/macs2
aawdeh Mar 15, 2019
3eb837b
Update MACS2/OptValidator.py
aawdeh Mar 15, 2019
1197d43
Added file MACS2/callpeakw_cmd.py
aawdeh Mar 15, 2019
0c2e8bf
Added file MACS2/ComputePileups.pyx
aawdeh Mar 15, 2019
3f2b42d
Added file MACS2/IO/CallPeakUnitPrecompiled.pyx
aawdeh Mar 15, 2019
2aa85be
Added file MACS2/IO/PileupUnit.pyx
aawdeh Mar 15, 2019
074337d
Update file MACS/IO/CallPeakUnitPrecompiled.pyx
aawdeh Mar 15, 2019
fffd1ee
Updated file MACS2/ComputePileups.pyx
aawdeh Mar 15, 2019
f6a1354
Updated file MACS2/callpeakw_cmd.py
aawdeh Mar 15, 2019
f10b772
Updated file MACS2/IO/PairedEndTrack.pyx
aawdeh Mar 15, 2019
5665d0c
Updated file MACS2/IO/FixWidthTrack.pyx
aawdeh Mar 15, 2019
c739b0a
Updated file MACS2/Pileup.pyx
aawdeh Mar 15, 2019
59695d2
Updated file MACS2/PeakDetect.pyx
aawdeh Mar 18, 2019
59d1037
Added Part1/Part1.sh, Part1/getData.py, Part1/getWeights.py and Part1…
aawdeh Mar 18, 2019
5c79ed5
Added file Part1/README.md
aawdeh Mar 18, 2019
ef50f9b
Updated file Part1/README.md
aawdeh Mar 18, 2019
8f3defe
Updated file Part1/README.md
aawdeh Mar 18, 2019
9890119
Updated file Part1/README.md
aawdeh Mar 18, 2019
471cb26
Updated file Part1/README.md
aawdeh Mar 18, 2019
be74f8f
Update callpeakw_cmd.py
aawdeh May 17, 2019
8c91925
Update PileupUnit.pyx
aawdeh May 18, 2019
9566c34
Update PileupUnit.pyx
aawdeh May 18, 2019
d82ba0e
Added README file
Jul 10, 2019
66d2107
Update README.md
aawdeh Jul 10, 2019
d4402be
Update README.md
aawdeh Jul 10, 2019
51f2d7e
Update ManipulateData.py
aawdeh Jul 18, 2019
f2bd293
Update callpeakw_cmd.py
aawdeh Aug 12, 2019
77fcc5e
Update callpeakw_cmd.py
aawdeh Aug 12, 2019
f8c3156
Update callpeakw_cmd.py
aawdeh Aug 24, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
247 changes: 247 additions & 0 deletions MACS2/ComputePileups.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
"""Module Description

Copyright (c) 2008,2009 Yong Zhang, Tao Liu <[email protected]>
Copyright (c) 2010,2011 Tao Liu <[email protected]>
Copyright (c) 2013, Tao Liu lab at UB and Xiaole Shirley Liu lab at DFCI
All rights reserved.

This code is free software; you can redistribute it and/or modify it
under the terms of the BSD License (see the file COPYING included
with the distribution).

@status: experimental
@version: $Revision$
@author: Yong Zhang, Tao Liu
@contact: [email protected]
"""

"""Added for WACS by Aseel Awdeh <[email protected]>

ComputePileups allows the user to compute pileups per sample individually
and returns the pileups as position value arrays.

"""

from itertools import groupby
from operator import itemgetter
import io
import gc # use garbage collectior

from MACS2.IO.PeakIO import PeakIO
from MACS2.IO.BedGraphIO import bedGraphIO
from MACS2.Constants import *
from MACS2.IO.PileupUnit import PileupUnitFromAlignments

cdef str subpeak_letters(short i):
if i < 26:
return chr(97+i)
else:
return subpeak_letters(i / 26) + chr(97 + (i % 26))

class ComputePileup:
"""Class to do the pileup computations.

e.g
>>> from MACS2.ComputePileups import ComputePileup
>>> pd = ComputePileup(treat=treatdata, control=controldata, pvalue=pvalue_cutoff, d=100, gsize=3000000000)
>>> treatment_pileup = pd.pileup_treatment()
"""
def __init__ (self, opt = None,treat = None, control = None, d = None,
slocal = None, llocal = None):
"""Initialize the ComputePileup object.

"""
self.opt = opt
self.info = opt.info
self.debug = opt.debug
self.warn = opt.warn

self.treat = treat
self.control = control
self.ratio_treat2control = None
self.peaks = None
self.final_peaks = None
self.PE_MODE = opt.PE_MODE
self.scoretrack = None

#Added for the weighted pileups
self.controlweight = opt.controlweight #weight for one control
self.pileup_treat_chrom = None
self.pileup_control_chrom = None
#----------------------------------

self.log_pvalue = opt.log_pvalue # -log10pvalue
self.log_qvalue = opt.log_qvalue # -log10qvalue
if d != None:
self.d = d
else:
self.d = self.opt.d
self.end_shift = self.opt.shift
self.gsize = opt.gsize

self.nolambda = opt.nolambda

if slocal != None:
self.sregion = slocal
else:
self.sregion = opt.smalllocal

if llocal != None:
self.lregion = llocal
else:
self.lregion = opt.largelocal

if (self.nolambda):
self.info("#3 !!!! DYNAMIC LAMBDA IS DISABLED !!!!")


def pileup_treatment (self):
"""
To find pileups of treatment.
"""
cdef:
int i
float lambda_bg, effective_depth_in_million
float treat_scale, d
list ctrl_scale_s, ctrl_d_s
long treat_total, control_total
long treat_sum # approx sum of treatment pileup values
long control_sum # approx sum of control pileup values


print("Compute pileup for treatment only.")
treat_total = self.treat.total

#Aseel
#For each control in the control list, calculate the control_total and the ratio_treat2control
if self.PE_MODE:
d = self.treat.average_template_length
treat_sum = self.treat.length
control_total = self.control.total * 2 # in PE mode, entire fragment is counted as 1, in treatment whereas both ends of fragment are counted in control/input.
control_sum = control_total * self.treat.average_template_length
self.ratio_treat2control = float(treat_sum)/control_sum
else:
d = self.d
treat_sum = self.treat.total * self.d
control_total = self.control.total
control_sum = self.control.total * self.d
self.ratio_treat2control = float(treat_sum)/control_sum

if self.opt.ratio != 1.0:
self.ratio_treat2control = self.opt.ratio

# Scaling of controls will always be towards treatment, as there are many controls and one treatment so makes more sense. -- removed if statement
# if MACS decides to scale control to treatment because control sample is bigger
effective_depth_in_million = treat_total / 1000000.0
lambda_bg = float( treat_sum )/ self.gsize
treat_scale = 1.0

#Add array of arrays for scaling controls, also for weights
scorecalculator = PileupUnitFromAlignments( self.treat,
self.control, #Can be a list.. but one control at a time.
d = d,
treat_scaling_factor = treat_scale,
end_shift = self.end_shift,
lambda_bg = lambda_bg,
)

if self.opt.trackline: scorecalculator.enable_trackline()

#Compute pileup
pileup_treat_chrom = scorecalculator.compute_pileup__treatment()
return pileup_treat_chrom

def pileup_control (self):
"""
To find pileups of multiple weighted control data -- one control dataset at a time.
"""
cdef:
int i
float lambda_bg, effective_depth_in_million
float treat_scale, d
list ctrl_scale_s, ctrl_d_s
long treat_total, control_total
long treat_sum # approx sum of treatment pileup values
long control_sum # approx sum of control pileup values


treat_total = self.treat.total

#For each control in the control list, calculate the control_total and the ratio_treat2control
if self.PE_MODE:
d = self.treat.average_template_length
treat_sum = self.treat.length
control_total = self.control.total * 2 # in PE mode, entire fragment is counted as 1, in treatment whereas both ends of fragment are counted in control/input.
control_sum = control_total * self.treat.average_template_length
self.ratio_treat2control = float(treat_sum)/control_sum
else:
d = self.d
treat_sum = self.treat.total * self.d
control_total = self.control.total
control_sum = self.control.total * self.d
self.ratio_treat2control = float(treat_sum)/control_sum

if self.opt.ratio != 1.0:
self.ratio_treat2control = self.opt.ratio

# Scaling of controls will always be towards treatment, as there are many controls and one treatment so makes more sense. -- removed if statement
# if MACS decides to scale control to treatment because control sample is bigger
effective_depth_in_million = treat_total / 1000000.0
lambda_bg = float( treat_sum )/ self.gsize
treat_scale = 1.0

# prepare d_s for control data
if self.sregion:
assert self.d <= self.sregion, "slocal can't be smaller than d!"
if self.lregion:
assert self.d <= self.lregion , "llocal can't be smaller than d!"
assert self.sregion <= self.lregion , "llocal can't be smaller than slocal!"


#For each control in the control list, prepare a list of extension sizes and
# a list of scaling factors for control

# Now prepare a list of extension sizes -- same for all controls
ctrl_d_s = [ self.d ] # note, d doesn't make sense in PE mode.

# slocal size local
if self.sregion:
ctrl_d_s.append( self.sregion )

if self.lregion and self.lregion > self.sregion:
ctrl_d_s.append( self.lregion )

# And a list of scaling factors for each control
ctrl_scale_s = []

# d
# if user wants to scale everything to ChIP data
tmp_v = self.ratio_treat2control
ctrl_scale_s.append( tmp_v )

# rlocal size sregion
# if user want to scale everything to ChIP data
tmp_v = float(self.d)/self.sregion*self.ratio_treat2control
ctrl_scale_s.append( tmp_v )

# llocal size local
# if user want to scale everything to ChIP data
tmp_v = float(self.d)/self.lregion*self.ratio_treat2control
ctrl_scale_s.append( tmp_v )

#Add array of arrays for scaling controls, also for weights --
scorecalculator = PileupUnitFromAlignments( self.treat,
self.control, #Can be a list.. but one control at a time.
d = d, ctrl_d_s = ctrl_d_s,
treat_scaling_factor = treat_scale,
ctrl_scaling_factor_s = ctrl_scale_s, #This changed to take a list of lists (list of controls each having different scaling factors)
ctrl_weight = self.controlweight, #This is also added. A weight is associated with each control. In this case, only one weight passed b/c one control.
end_shift = self.end_shift,
lambda_bg = lambda_bg,
)

if self.opt.trackline: scorecalculator.enable_trackline()

#Compute pileup
pileup_control_chrom = scorecalculator.compute_pileup__control()
return pileup_control_chrom
Loading