Skip to content

Commit

Permalink
Updated to be python3 compatible
Browse files Browse the repository at this point in the history
  • Loading branch information
msauria committed Jul 30, 2024
1 parent 6288030 commit 80867d6
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 44 deletions.
12 changes: 7 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ RUN apt-get --allow-releaseinfo-change update -t oldoldstable && apt-get instal
mariadb-server \
mariadb-client \
wget \
zlib1g-dev
zlib1g-dev \
procps \
&& rm -rf /var/lib/apt/lists/*

RUN conda install -c daler \
pip \
Expand All @@ -24,14 +26,14 @@ RUN conda install -c daler \
pandas \
pyyaml \
sphinx \
pysam
pysam \
colorama \
termcolor
RUN conda install -c daler \
tabix \
bedtools=2.25.0
ENV DISPLAY=:0
ENV LANG C.UTF-8
WORKDIR /opt/pybedtools

RUN pip install https://github.com/AndersenLab/bam-toolbox/archive/0.0.3.tar.gz
RUN apt-get install -y procps \
&& rm -rf /var/lib/apt/lists/*
RUN pip install https://github.com/AndersenLab/bam-toolbox/archive/1.0.0.tar.gz
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
## Installation

```
pip install https://github.com/AndersenLab/bam-toolbox/archive/0.0.3.tar.gz
pip install https://github.com/AndersenLab/bam-toolbox/archive/1.0.0.tar.gz
```

## Usage

bam-toolbox 0.1
bam-toolbox 1.0

usage:
bam <command> [<args>...]
Expand Down
2 changes: 1 addition & 1 deletion bam/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.3"
__version__ = "1.0.0"
32 changes: 16 additions & 16 deletions bam/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,19 @@
coverage
"""
from docopt import docopt
from subprocess import call, check_output, CalledProcessError
from clint.textui import colored, puts, indent
import sys
import bam
import os

from docopt import docopt
from colorama import Fore, just_fix_windows_console, init
from termcolor import colored

import bam


just_fix_windows_console()
init(autoreset=True)

debug = None
if len(sys.argv) == 1:
Expand Down Expand Up @@ -46,7 +52,7 @@ def is_exe(fpath):

def main():
args = docopt(__doc__,
version='bam-toolbox v0.1',
version='bam-toolbox v1.0',
argv = debug,
options_first=True)
argv = [args['<command>']] + args['<args>']
Expand All @@ -59,33 +65,27 @@ def main():
for install_name, program in program_list.items():
check_output(["brew", "tap", "homebrew/science"])
try:
with indent(4):
puts(colored.blue("Installing " + install_name))
print(Fore.BLUE + " Installing " + install_name)
check_output(["brew", "install", install_name])
program_installed.remove(install_name)
except CalledProcessError:
try:
check_output(["which", program])
with indent(4):
puts(colored.blue(program + " previously installed"))
print(Fore.BLUE + " " + program + " previously installed")
program_installed.remove(install_name)
except CalledProcessError:
with indent(4):
puts(colored.red("Error installing " + install_name))
print(Fore.RED + " Error installing " + install_name)
if len(program_installed) == 0:
with indent(4):
puts(colored.blue("Programs successfully installed!"))
print(Fore.BLUE + " Programs successfully installed!")
else:
with indent(4):
puts(colored.red("Error: Not all programs successfully installed: " + ", ".join(program_installed)))
print(Fore.RED + " Error: Not all programs successfully installed: " + ", ".join(program_installed))
elif args["<command>"] == "":
print(__doc__)
for prog in program_list.values():
try:
check_output(["which", prog])
except CalledProcessError:
with indent(4):
puts(colored.red(prog + " not installed. Use a package manager to install or try using 'tb.py setup'\n"))
print(Fore.RED + " " + prog + " not installed. Use a package manager to install or try using 'tb.py setup'\n")
elif args['<command>'] in ['coverage', 'readgroups', 'fastq']:
comm = ['python', getScriptPath() + '/' + args["<command>"] + ".py"] + argv
exit(call(comm))
Expand Down
42 changes: 23 additions & 19 deletions bam/coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,20 @@
--header print header
"""
from docopt import docopt
import sys
from collections import OrderedDict
from clint.textui import colored, indent, puts_err
import os
import re
from subprocess import Popen, PIPE

from datetime import datetime
from collections import OrderedDict

from docopt import docopt
from colorama import Fore, just_fix_windows_console, init
from termcolor import colored

just_fix_windows_console()
init(autoreset=True)

class output_line:

Expand Down Expand Up @@ -72,27 +76,27 @@ def __init__(self, fname, mtchr = None):

def parse_header(self):
header, err = Popen(["samtools", "view", "-H", self.fname], stdout=PIPE, stderr=PIPE).communicate()
if err != "":
if err != b"":
raise Exception(err)
self.header = header
contigs = OrderedDict()
contig_regions = []
for x in re.findall("@SQ\WSN:(?P<chrom>[A-Za-z0-9_]*)\WLN:(?P<length>[0-9]+)", header):
contigs[x[0]] = int(x[1])
region = "%s:%s-%s" % (x[0], "1", x[1])
for x in re.findall(b"@SQ\t[A-Za-z0-9]*SN:(?P<chrom>[A-Za-z0-9_]*)[A-Za-z0-9]*\tLN:(?P<length>[0-9]+)", header):
contigs[x[0].decode('utf-8')] = int(x[1])
region = "%s:%s-%s" % (x[0].decode('utf-8'), "1", x[1].decode('utf-8'))
contig_regions.append(region)
self.contigs = contigs
self.contig_regions = contig_regions

mtchr = [x for x in self.contigs.keys() if x.lower().find("m") == 0]
if len(mtchr) == 1:
self.mtchr = mtchr[0]
with indent(4):
puts_err(colored.blue("\nGuessing Mitochondrial Chromosome: " + self.mtchr + "\n"))
print(Fore.BLUE + "\n Guessing Mitochondrial Chromosome: " + self.mtchr + "\n",
file=sys.stderr)

self.genome_length = sum(contigs.values())
if mtchr:
self.nuclear_length = sum([x for x in contigs.values() if x != contigs[self.mtchr]])
self.nuclear_length = sum([x for x in self.contigs.values() if x != self.contigs[self.mtchr]])


def sum_coverage(self, region=None):
Expand All @@ -101,15 +105,15 @@ def sum_coverage(self, region=None):
pos_covered = 0
cum_depth = 0
for row in comm.stdout:
chrom, pos, depth = row.strip().split("\t")
chrom, pos, depth = row.strip().split(b"\t")
pos_covered += 1
cum_depth += int(depth)
return pos_covered, cum_depth


def iterate_window(bamfile, size):
for chrom, size in bamfile.contigs.items():
for i in xrange(1, size, window):
for i in range(1, size, window):
if i + window > size:
end = size
else:
Expand Down Expand Up @@ -137,10 +141,9 @@ def calc_coverage(bamfile, regions=None, mtchr=None):
# If end extends to far, adjust for chrom
chrom_len = bamfile.contigs[chrom]
if end > chrom_len:
m = "\nSpecified chromosome end extends beyond chromosome length. Set to max of: "
with indent(4):
puts_err(colored.yellow(m + str(chrom_len) + "\n"))
end = chrom_len
m = "\n Specified chromosome end extends beyond chromosome length. Set to max of: "
print(Fore.YELLOW + m + str(chrom_len) + "\n", file=sys.stderr)
end = chrom_len

region = "{c}:{s}-{e}".format(c=chrom, s=start, e=end + 1)
pos_covered, cum_depth = bamfile.sum_coverage(region)
Expand Down Expand Up @@ -199,13 +202,14 @@ def calc_coverage(bamfile, regions=None, mtchr=None):
Calculate coverage genome wide
"""
bam = args["<bam>"]
print(b.contig_regions, file=sys.stderr)
cov = calc_coverage(b, b.contig_regions)

# Genomewide depth
output_dir = {}
output_dir["chrom"] = "genome"
output_dir["start"] = 1
output_dir["end"] = b.genome_length
output_dir["chrom"] = "genome"

bases_mapped = sum([x["bases_mapped"] for x in cov])
output_dir["ATTR"] = "bases_mapped"
Expand All @@ -224,8 +228,8 @@ def calc_coverage(bamfile, regions=None, mtchr=None):

if b.mtchr:
# Nuclear
output_dir["end"] = b.nuclear_length
output_dir["chrom"] = "nuclear"
output_dir["end"] = b.nuclear_length
bases_mapped = sum([x["bases_mapped"] for x in cov if x["chrom"] != b.mtchr])
output_dir["ATTR"] = "bases_mapped"
print(output_line(bam_name, output_dir, bases_mapped))
Expand All @@ -243,6 +247,6 @@ def calc_coverage(bamfile, regions=None, mtchr=None):
print(output_line(bam_name, output_dir, pos_mapped))

# mt:nuclear ratio
output_dir = {"start": 1, "end": b.nuclear_length, "chrom": "genome", "ATTR": "mt_nuclear_ratio"}
output_dir = {"chrom": "genome", "start": 1, "end": b.nuclear_length, "ATTR": "mt_nuclear_ratio"}
mt_nuc = [x for x in cov if x["chrom"] == b.mtchr][0]["depth_of_coverage"] / coverage
print(output_line(bam_name, output_dir, mt_nuc))
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
clint
colorama
termcolor
docopt
pandas
pybedtools

0 comments on commit 80867d6

Please sign in to comment.