Skip to content

Commit

Permalink
v2.3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
yhoogstrate committed Sep 13, 2018
1 parent 32f5726 commit 3c51268
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 5 deletions.
5 changes: 5 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
13-sept-2018: Youri Hoogstrate
* v2.3.0 - Adds arguments -r/--region and -b/--bed-regions to target
specific genomic regions. This way alternative loci can be
excluded for instance.

13-sept-2018: Youri Hoogstrate
* v2.2.0 - Removes --roc argument and adds multiple statistics
including ROC to the -s/--stats argument.
Expand Down
6 changes: 4 additions & 2 deletions bin/bam-lorenz-coverage
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ from blc.blc import bamlorenzcoverage
@click.option('-L', '--lorenz-svg', nargs=1, help='Output figure Lorenz-curve (SVG).')
@click.option('-C', '--coverage-svg', nargs=1, help='Output figure Coverage-graph (SVG).')
@click.option('-s', '--stats', nargs=1, help='Output additional stats to text-file')
def CLI(lorenz_table, coverage_table, lorenz_svg, coverage_svg, input_alignment_file, stats):
@click.option('-r', '--region', nargs=1, help='Scan depth only in selected region <chr:from-to> (all positions: 1-based)')
@click.option('-b', '--bed-regions', nargs=1, help='Scan depth only in selected positions or regions (BED file: start: 0-based & end: 1-based)')
def CLI(lorenz_table, coverage_table, lorenz_svg, coverage_svg, input_alignment_file, stats, region, bed_regions):
b = bamlorenzcoverage()
idx_observed, n = b.bam_file_to_idx(input_alignment_file)
idx_observed, n = b.bam_file_to_idx(input_alignment_file, region, bed_regions)

if coverage_table or coverage_svg:
cumulative_coverage_curves = b.estimate_cumulative_coverage_curves(idx_observed)
Expand Down
2 changes: 1 addition & 1 deletion blc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""[License: GNU General Public License v3 (GPLv3)]
"""

__version_info__ = ('2', '2', '0')
__version_info__ = ('2', '3', '0')
__version__ = '.'.join(__version_info__) if (len(__version_info__) == 3) else '.'.join(__version_info__[0:3]) + "-" + __version_info__[3]
__author__ = 'Youri Hoogstrate'
__homepage__ = 'https://github.com/yhoogstrate/bam-lorenz-coverage'
Expand Down
11 changes: 9 additions & 2 deletions blc/blc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class bamlorenzcoverage:
def __init__(self):
pass

def bam_file_to_idx(self, bam_file):
def bam_file_to_idx(self, bam_file, region=None, bed_regions=None):
"""
Coverage plot needs the zero-statistic - i.e. the number of genomic bases not covered by reads
"""
Expand All @@ -28,8 +28,15 @@ def bam_file_to_idx(self, bam_file):
tmp_filename = os.path.join(tempfile.mkdtemp() + '.fifo')
os.mkfifo(tmp_filename)

cmd = ['-a', bam_file]
if region:
cmd = ['-r', region] + cmd
elif bed_regions:
cmd = ['-b', bed_regions] + cmd
# print(cmd)

# I tried this with the Threading class but this often didnt parallelize
parallel_thread = Process(target=pysam.samtools.depth, args=['-a', bam_file], kwargs={'save_stdout': tmp_filename})
parallel_thread = Process(target=pysam.samtools.depth, args=cmd, kwargs={'save_stdout': tmp_filename})
parallel_thread.start()

fh = os.open(tmp_filename, os.O_RDONLY)
Expand Down
1 change: 1 addition & 0 deletions tests/blc/test_blc_012.sam
1 change: 1 addition & 0 deletions tests/blc/test_blc_013.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
chr1 2 14
1 change: 1 addition & 0 deletions tests/blc/test_blc_013.sam
37 changes: 37 additions & 0 deletions tests/test_class_blc.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,43 @@ def test_011_lorenz_03(self):
self.assertEqual(lc['total_sequenced_bases'], 10)
self.assertEqual(lc['total_covered_positions_of_genome'], 8)

def test_012_region(self):
# everything covered, is at least covered even densely
# x x x x x
# x x x x x
# - - - - - - - - - - - - - -

test_id = 'blc_012'

input_file_sam = TEST_DIR + "test_" + test_id + ".sam"
input_file_bam = T_TEST_DIR + "test_" + test_id + ".bam"

sam_to_sorted_bam(input_file_sam, input_file_bam)

b = bamlorenzcoverage()
idx, n = b.bam_file_to_idx(input_file_bam, 'chr1:2-14')

self.assertEqual(n, 13) # sam header say reference size is 14, but we start at 2nd position

def test_013_bed(self):
# everything covered, is at least covered even densely
# x x x x x
# x x x x x
# - - - - - - - - - - - - - -

test_id = 'blc_013'

input_file_sam = TEST_DIR + "test_" + test_id + ".sam"
input_file_bed = TEST_DIR + "test_" + test_id + ".bed"
input_file_bam = T_TEST_DIR + "test_" + test_id + ".bam"

sam_to_sorted_bam(input_file_sam, input_file_bam)

b = bamlorenzcoverage()
idx, n = b.bam_file_to_idx(input_file_bam, None, input_file_bed)

self.assertEqual(n, 12)


if __name__ == '__main__':
main()

0 comments on commit 3c51268

Please sign in to comment.