Skip to content

Commit

Permalink
First Distributable Pakcage Update!
Browse files Browse the repository at this point in the history
  • Loading branch information
lux563624348 committed Jun 20, 2021
1 parent 762d4b5 commit 0fb0a39
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 49 deletions.
31 changes: 0 additions & 31 deletions environment_hichub_for_mac.yml

This file was deleted.

Binary file modified image/Hub_Myb.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
19 changes: 17 additions & 2 deletions python_package/bin/hichub
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,18 @@ def main():
)
print("welcome")
print("The python env is: " + sys.version)
print("usage: hichub [-h] [-v] {test,diff,convert} ...")
print("hichub -- A toolset to detect and analyze differential Hubs")
print("positional arguments:")
print(" {test,diff,convert} sub-command help")
print(" test Output for test parser")
print(" diff Parser for diff hubs")
print(" convert Convert multi .hic to txt format --> Format should be:")
print(" #chr bin1 bin2 Count")
print("optional arguments:")
print(" -h, --help show this help message and exit")
print(" -v, --version show program's version number and exit")

if (hasattr(args, 'outdir')): # use a output directory to store HicHub output
if not os.path.exists( args.outdir ):
try:
Expand Down Expand Up @@ -153,9 +165,12 @@ def add_diff_parser( subparsers ):

diff_parser_group.add_argument("-r", "--resolution", action="store", type=int, required = True,
dest="res", help="Resolution of HiC txt", metavar="<int>")

diff_parser_group.add_argument("-p", "--pvalue", action="store", type=float, required = False,
default=0.00001, dest="pvalue", help="Optional: pvalue cutoff for output (diff hub)", metavar="<float>")

diff_parser_group.add_argument("-t", "--num_threads", action="store", type=int, required = True,
dest="thread", help="Num_threads", metavar="<int>")
diff_parser_group.add_argument("-t", "--num_threads", action="store", type=int, required = False,
default=1, dest="thread", help="Optional: Number of threads to run, default=1", metavar="<int>")

return

Expand Down
2 changes: 1 addition & 1 deletion python_package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
name='hichub', # Required
# Versions should comply with PEP 440:
# https://www.python.org/dev/peps/pep-0440/
version='0.1.1', # Required
version='0.2.0', # Required

# This is a one-line description or tagline of what your project does. This
# corresponds to the "Summary" metadata field:
Expand Down
2 changes: 1 addition & 1 deletion python_package/src/hichub/Constants.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
hichub_VERSION = "0.1.1"
hichub_VERSION = "0.2.0"
38 changes: 24 additions & 14 deletions python_package/src/hichub/call_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from scipy import stats
from optparse import OptionParser
import sys, os, multiprocessing
from gooey import Gooey
@Gooey
####################################################################################
## FUNCTIONS
### FUNCTION
Expand Down Expand Up @@ -231,10 +233,11 @@ def Return_Pvalue_For_Given_Graph(_df_region, _resolution, _matrix):

return pd.DataFrame(data=pvalue_region, columns=['reg1', 'reg2', '-log10(pvalue)']).sort_values('-log10(pvalue)', ascending=False)

def Main_For_Diff_Regions(df_hic, _col_fore, _col_back, _resolution):
def Main_For_Diff_Regions(df_hic, _col_fore, _col_back, _resolution, _pvalue):
#Create a weight basing on logFC (logFC < 0)
_gapsize=2 ## this parameter is try to avoid blank due to artifacts
logfc_cutoff=0
cut_pvalue=-np.log10(_pvalue)
_df_hic = df_hic
_df_hic[_col_fore+'_weight'] = _df_hic[_col_fore]*_df_hic.log_FC.apply(lambda x: 1 if x > logfc_cutoff else(0))
Norm_window_Size=0 ### To be optimized for boundary
Expand Down Expand Up @@ -265,20 +268,21 @@ def Main_For_Diff_Regions(df_hic, _col_fore, _col_back, _resolution):
Diff_matrix = Return_Sorted_Adjacency_Matrix(graph_tem, 'diff')# Fore_matrix-Back_matrix
df_out = df_out.append(Return_Pvalue_For_Given_Graph(df_hubs, _resolution, Diff_matrix))
df_out = df_out.sort_values(by='-log10(pvalue)', ascending=False)
df_out = df_out[df_out['-log10(pvalue)']>5]
df_out = df_out[df_out['-log10(pvalue)']>cut_pvalue]
#df_out.to_csv(str(len(df_out))+'_'+_col_back+'_'+_col_fore+'_specific_regions.bed', sep='\t', index=None)
df_out.to_csv(_col_back+'_'+_col_fore+'_specific_regions.bed', sep='\t', mode='a', header=False, index=None)
return None

def multi_task(_chr_name, _df_chr, _col_fore, _col_back, _resolution):
def multi_task(_chr_name, _df_chr, _col_fore, _col_back, _resolution, _pvalue):
col_fore=_col_fore
col_back=_col_back
resolution = _resolution
pvalue = _pvalue
df_chr = Norm_df_hic(_df_chr, col_fore, col_back, resolution)
Main_For_Diff_Regions(df_chr, col_fore, col_back, resolution)
Main_For_Diff_Regions(df_chr, col_fore, col_back, resolution, pvalue)
return None

def Multi_Main_For_Diff_Regions(_PATH_interaction, _col_fore, _col_back, _resolution, _num_threads=1):
def Multi_Main_For_Diff_Regions(_PATH_interaction, _col_fore, _col_back, _resolution, _pvalue, _num_threads=1):
#if __name__=='__main__':
if True:
PATH_interaction = _PATH_interaction
Expand All @@ -296,7 +300,7 @@ def Multi_Main_For_Diff_Regions(_PATH_interaction, _col_fore, _col_back, _resol
for df_group in df_groups:
chr_name = df_group[0]
df_hic_chr = df_group[1]
pool.apply_async(multi_task, args=(chr_name, df_hic_chr, col_fore, col_back, resolution))
pool.apply_async(multi_task, args=(chr_name, df_hic_chr, col_fore, col_back, resolution, _pvalue))
pool.close()
pool.join()
print('All subprocesses done.')
Expand All @@ -317,6 +321,7 @@ def run(argv):
col_fore = argv.fore_name
col_back = argv.back_name
resolution = argv.res
pvalue=argv.pvalue
num_threads=argv.thread

print (" ")
Expand All @@ -326,14 +331,14 @@ def run(argv):
print ("Foreground Condition: %s" % col_fore)
print ("Background Condition: %s" % col_back)
print ("Resolution %i" % resolution)
print ("Pvalue cutoff for output (diff hub) is: %s" % pvalue)
print ("Number of threads used is: %i" % num_threads)
print ("End of Summary.")
print (" ")

#### Main
Multi_Main_For_Diff_Regions(PATH_INPUT, col_fore, col_back, resolution, num_threads)

Multi_Main_For_Diff_Regions(PATH_INPUT, col_back, col_fore, resolution, num_threads)
Multi_Main_For_Diff_Regions(PATH_INPUT, col_fore, col_back, resolution, pvalue, num_threads)
Multi_Main_For_Diff_Regions(PATH_INPUT, col_back, col_fore, resolution, pvalue, num_threads)

print(" ")
return None
Expand All @@ -349,11 +354,13 @@ def main(argv):
dest="back_name", help="Name of condition as background.", metavar="<str>")
parser.add_option("-r", "--resolution", action="store", type="int",
dest="res", help="Resolution of HiC txt", metavar="<int>")
parser.add_option("-t", "--num_threads", action="store", type="int",
dest="thread", help="Num_threads", metavar="<int>")
parser.add_option("-p", "--pvalue", action="store", type="float", default =0.00001,
dest="pvalue", help="Optional: pvalue cutoff for output (diff hub)", metavar="<float>")
parser.add_option("-t", "--num_threads", action="store", type="int", default =1,
dest="thread", help="Optional: Number of threads to run, default=1", metavar="<int>")

(opt, args) = parser.parse_args(argv)
if len(argv) < 5:
if len(argv) < 4:
parser.print_help()
sys.exit(1)

Expand All @@ -362,23 +369,26 @@ def main(argv):
col_fore = opt.fore_name
col_back = opt.back_name
resolution = opt.res
pvalue=opt.pvalue
num_threads=opt.thread

print (" ")
print("Run main")
print ("Here is the Summary of your input.")
print ("Input Path of HiC file in txt format: %s" % PATH_INPUT)
print ("Foreground Condition: %s" % col_fore)
print ("Background Condition: %s" % col_back)
print ("Resolution %i" % resolution)
print ("Pvalue cutoff for output (diff hub) is: %s" % pvalue)
print ("Number of threads used is: %i" % num_threads)
print ("End of Summary.")
print (" ")



#### Main
Multi_Main_For_Diff_Regions(PATH_INPUT, col_fore, col_back, resolution, num_threads)
Multi_Main_For_Diff_Regions(PATH_INPUT, col_back, col_fore, resolution, num_threads)
Multi_Main_For_Diff_Regions(PATH_INPUT, col_fore, col_back, resolution, pvalue, num_threads)
Multi_Main_For_Diff_Regions(PATH_INPUT, col_back, col_fore, resolution, pvalue, num_threads)

print(" ")
#### First GeneBoydy
Expand Down

0 comments on commit 0fb0a39

Please sign in to comment.