Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix broken formatting: reformat everything with black #31

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions bin/snaptools
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,4 @@ Description: snaptools - A toolkit for single nuclues ATAC-seq analysis.

from snaptools.parser import parse_args
if __name__ == '__main__':
parse_args()

parse_args()
64 changes: 33 additions & 31 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,39 @@
from setuptools import setup

snaptools_version = '1.4.8'
snaptools_version = "1.4.8"

setup(
name='snaptools',
version=snaptools_version,
author='Rongxin Fang',
author_email='[email protected]',
license='LICENSE',
packages=['snaptools'],
description='A module for working with snap files in Python',
url='https://github.com/r3fang/SnapTools.git',
python_requires='>=2.7',

install_requires=[
"pysam",
"h5py",
"numpy",
"pybedtools>=0.7",
"python-louvain",
"future"
],
keywords = ["Bioinformatics pipeline",
"Single cell analysis",
"Epigenomics",
"Epigenetics",
"ATAC-seq",
"Chromatin Accessibility",
"Functional genomics"],
scripts = ["bin/snaptools"],
zip_safe=False)
name="snaptools",
version=snaptools_version,
author="Rongxin Fang",
author_email="[email protected]",
license="LICENSE",
packages=["snaptools"],
description="A module for working with snap files in Python",
url="https://github.com/r3fang/SnapTools.git",
python_requires=">=2.7",
install_requires=[
"pysam",
"h5py",
"numpy",
"pybedtools>=0.7",
"python-louvain",
"future",
],
keywords=[
"Bioinformatics pipeline",
"Single cell analysis",
"Epigenomics",
"Epigenetics",
"ATAC-seq",
"Chromatin Accessibility",
"Functional genomics",
],
scripts=["bin/snaptools"],
zip_safe=False,
)

if __name__ == '__main__':
f = open("snaptools/__init__.py",'w')
f.write("__version__ = \'"+snaptools_version+"\'"+"\n")
if __name__ == "__main__":
f = open("snaptools/__init__.py", "w")
f.write("__version__ = '" + snaptools_version + "'" + "\n")
f.close()
2 changes: 1 addition & 1 deletion snaptools/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.4.8'
__version__ = "1.4.8"
195 changes: 128 additions & 67 deletions snaptools/add_bmat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
"""
"""

The MIT License

Expand All @@ -25,7 +25,8 @@

"""

import sys, os
import sys
import os
import collections
import gzip
import operator
Expand Down Expand Up @@ -76,106 +77,166 @@
sys.exit(1)


def snap_bmat(snap_file,
bin_size_list,
tmp_folder,
verbose):

def snap_bmat(snap_file, bin_size_list, tmp_folder, verbose):
"""
Pre-processing to create a snap file from a bam that contains alignments or a bed file that contains fragments.

Args:
--------
snap_file:
snap_file:
a snap format file.

Optional
--------
bin_size_list:
bin_size_list:
a list object contains all bin sizes [5000]
verbose:

verbose:
a boolen variable indicates whether to output the progress [True];
"""

if not os.path.exists(snap_file):
print(('error: ' + snap_file + ' does not exist!'));
sys.exit(1);
print(("error: " + snap_file + " does not exist!"))
sys.exit(1)

# check if snap_file is a snap-format file
file_format = snaptools.utilities.checkFileFormat(snap_file);
file_format = snaptools.utilities.checkFileFormat(snap_file)
if file_format != "snap":
print(("error: input file %s is not a snap file!" % snap_file));
sys.exit(1);
print(("error: input file %s is not a snap file!" % snap_file))
sys.exit(1)

# create the bin list
f = h5py.File(snap_file, "a", libver='earliest');
f = h5py.File(snap_file, "a", libver="earliest")

if "AM" in f:
print("error: AM - cell x bin accessibility matrix already exists, delete it first using snap-del ")
print(
"error: AM - cell x bin accessibility matrix already exists, delete it first using snap-del "
)
sys.exit(1)

try:
genome_dict = dict(list(zip([item.decode() for item in f["HD"]["SQ"]["SN"][:]], f["HD"]["SQ"]["SL"][:])))

try:
genome_dict = dict(
list(
zip(
[item.decode() for item in f["HD"]["SQ"]["SN"][:]],
f["HD"]["SQ"]["SL"][:],
)
)
)
except KeyError:
print("error: unable to read genome information")
sys.exit(1)

# extract the barcodes
barcode_dict = snaptools.snap.getBarcodesFromSnap(snap_file);
barcode_dict = snaptools.snap.getBarcodesFromSnap(snap_file)

bin_dict_list = collections.defaultdict(dict);
bin_dict_list = collections.defaultdict(dict)

for bin_size in bin_size_list:
bin_dict = snaptools.utilities.getBinsFromGenomeSize(genome_dict, bin_size);
bin_dict_list[bin_size] = bin_dict;
bin_dict = snaptools.utilities.getBinsFromGenomeSize(genome_dict, bin_size)
bin_dict_list[bin_size] = bin_dict

num_barcode = len(barcode_dict);
num_barcode = len(barcode_dict)
if verbose:
print("===== reading the barcodes and bins ======");
print(("@AM\tnBinSize:%d"%len(list(bin_dict_list.keys()))));
print("@AM\tbinSizeList: %s" % str(list(bin_dict_list.keys())));
print("===== reading the barcodes and bins ======")
print(("@AM\tnBinSize:%d" % len(list(bin_dict_list.keys()))))
print("@AM\tbinSizeList: %s" % str(list(bin_dict_list.keys())))
for bin_size in list(bin_dict_list.keys()):
print(("@AM\tbinSize:%d\tnBin:%d"%(bin_size, len(bin_dict_list[bin_size]))));

idxList = collections.defaultdict(list); # barcode index list
idyList = collections.defaultdict(list); # bin index list
countList = collections.defaultdict(list); # number of count
print(
("@AM\tbinSize:%d\tnBin:%d" % (bin_size, len(bin_dict_list[bin_size])))
)

idxList = collections.defaultdict(list) # barcode index list
idyList = collections.defaultdict(list) # bin index list
countList = collections.defaultdict(list) # number of count

barcode_id = 0
for barcode in f["BD"]["name"]:
_chroms = f["FM"]["fragChrom"][(f["FM"]["barcodePos"][barcode_id] - 1):(f["FM"]["barcodePos"][barcode_id] + f["FM"]["barcodeLen"][barcode_id] - 1)];
_chroms = [item.decode() for item in _chroms];
_start = f["FM"]["fragStart"][(f["FM"]["barcodePos"][barcode_id] - 1):(f["FM"]["barcodePos"][barcode_id] + f["FM"]["barcodeLen"][barcode_id] - 1)]
_len = f["FM"]["fragLen"][(f["FM"]["barcodePos"][barcode_id] - 1):(f["FM"]["barcodePos"][barcode_id] + f["FM"]["barcodeLen"][barcode_id] - 1)]
frag_list_uniq = list(zip(_chroms, _start, _start + _len));
for barcode in f["BD"]["name"]:
_chroms = f["FM"]["fragChrom"][
(f["FM"]["barcodePos"][barcode_id] - 1) : (
f["FM"]["barcodePos"][barcode_id]
+ f["FM"]["barcodeLen"][barcode_id]
- 1
)
]
_chroms = [item.decode() for item in _chroms]
_start = f["FM"]["fragStart"][
(f["FM"]["barcodePos"][barcode_id] - 1) : (
f["FM"]["barcodePos"][barcode_id]
+ f["FM"]["barcodeLen"][barcode_id]
- 1
)
]
_len = f["FM"]["fragLen"][
(f["FM"]["barcodePos"][barcode_id] - 1) : (
f["FM"]["barcodePos"][barcode_id]
+ f["FM"]["barcodeLen"][barcode_id]
- 1
)
]
frag_list_uniq = list(zip(_chroms, _start, _start + _len))

for bin_size in bin_dict_list:
bin_dict = bin_dict_list[bin_size];
bins = collections.defaultdict(lambda : 0);
bin_dict = bin_dict_list[bin_size]
bins = collections.defaultdict(lambda: 0)
for item in frag_list_uniq:
bin_chr = item[0];
for bin_pos in set([int(item[1]/bin_size) * bin_size + 1, int(item[2]/bin_size) * bin_size + 1]):
bins[(bin_chr, bin_pos, bin_pos + bin_size - 1)] += 1;

bin_chr = item[0]
for bin_pos in set(
[
int(item[1] / bin_size) * bin_size + 1,
int(item[2] / bin_size) * bin_size + 1,
]
):
bins[(bin_chr, bin_pos, bin_pos + bin_size - 1)] += 1

for key in bins:
if key in bin_dict and barcode.decode() in barcode_dict:
idyList[bin_size].append(bin_dict[key]);
countList[bin_size].append(bins[key]);
idxList[bin_size].append(barcode_dict[barcode.decode()].id);
barcode_id += 1;
del bin_dict, bins, frag_list_uniq;
dt = h5py.special_dtype(vlen=bytes)
f.create_dataset("AM/nBinSize", data=len(bin_dict_list), dtype="uint32");
f.create_dataset("AM/binSizeList", data=list(bin_dict_list.keys()), dtype="uint32");
idyList[bin_size].append(bin_dict[key])
countList[bin_size].append(bins[key])
idxList[bin_size].append(barcode_dict[barcode.decode()].id)

barcode_id += 1
del bin_dict, bins, frag_list_uniq

dt = h5py.special_dtype(vlen=bytes)
f.create_dataset("AM/nBinSize", data=len(bin_dict_list), dtype="uint32")
f.create_dataset("AM/binSizeList", data=list(bin_dict_list.keys()), dtype="uint32")

for bin_size in bin_dict_list:
f.create_dataset("AM/"+str(bin_size)+"/binChrom",data=[np.string_(key[0]) for key in bin_dict_list[bin_size]], dtype=h5py.special_dtype(vlen=bytes), compression="gzip", compression_opts=9);
f.create_dataset("AM/"+str(bin_size)+"/binStart",data=[key[1] for key in bin_dict_list[bin_size]], dtype="uint32", compression="gzip", compression_opts=9);
f.create_dataset("AM/"+str(bin_size)+"/idx", data=idxList[bin_size], dtype="uint32", compression="gzip", compression_opts=9);
f.create_dataset("AM/"+str(bin_size)+"/idy", data=idyList[bin_size], dtype="uint32", compression="gzip", compression_opts=9);
f.create_dataset("AM/"+str(bin_size)+"/count", data=countList[bin_size], dtype="uint8", compression="gzip", compression_opts=9);
f.close()
f.create_dataset(
"AM/" + str(bin_size) + "/binChrom",
data=[np.string_(key[0]) for key in bin_dict_list[bin_size]],
dtype=h5py.special_dtype(vlen=bytes),
compression="gzip",
compression_opts=9,
)
f.create_dataset(
"AM/" + str(bin_size) + "/binStart",
data=[key[1] for key in bin_dict_list[bin_size]],
dtype="uint32",
compression="gzip",
compression_opts=9,
)
f.create_dataset(
"AM/" + str(bin_size) + "/idx",
data=idxList[bin_size],
dtype="uint32",
compression="gzip",
compression_opts=9,
)
f.create_dataset(
"AM/" + str(bin_size) + "/idy",
data=idyList[bin_size],
dtype="uint32",
compression="gzip",
compression_opts=9,
)
f.create_dataset(
"AM/" + str(bin_size) + "/count",
data=countList[bin_size],
dtype="uint8",
compression="gzip",
compression_opts=9,
)
f.close()
return 0
Loading