Test framework to help make testing bioinformatics easier
- More easily mock out open calls
- Generic test base classes you can use to provide easy functionality to assert common things for sequence type data
- More to come...
Add this project as a dependency in your requirements.txt or setup.py tests_require
This is a handy base class for any generic File Mocking you may want to do.
You can easily tell FileMocker the file paths you expect that will need to be mocked or you can just give it a string and it will mock any open calls and that' string will have the contents you give it.
from biotest import FileMocker, builtins
import mock
# FileMocker will use the dictionary later when you call open to retrieve
# the correct contents
mock_contents = {'foo.txt': 'foo', 'bar.txt': 'bar'}
with mock.patch(FileMocker(mock_contents)) as mock_open:
x = open('foo.txt').read() # Produces 'foo'
x = open('bar.txt').read() # Produces 'bar'
# However, at this time even if a path is relative/absolute to the same file,
# it will be a different instance
# Let's assume that the CWD is /
# Both paths below should represent the same file, however, at this time that
# functonality is not implemented so you will have to watch out for that
mock_contents = {'path/foo.txt': 'foo', '/path/foo.txt': 'bar'}
with mock.patch(FileMocker(mock_contents)) as mock_open:
x = open('path/foo.txt').read() # Produces 'foo'
x = open('/path/foo.txt').read() # Produces 'bar' not 'foo'
You can easily just give it a string if you don't really care about different filenames having different values
from biotest import FileMocker, builtins
import mock
with mock.patch(FileMocker('foo')) as mock_open:
x = open('foo.txt').read() # Produces 'foo'
# you can just keep calling it, same result
x = open('foo.txt').read() # Produces 'foo'
# Doesn't even matter what path you use
x = open('turkey.txt').read() # Produces 'foo'
from biotest import FileMocker, builtins
import mock
with mock.patch(FileMocker()) as mock_open:
# Cannot open a file that does not exist
x = open('foo.txt') # Generates IOError
with mock.patch(FileMocker()) as mock_open:
# You can open a file that does not exist if in write mode
x = open('foo.txt', 'w')
For convienience there is a MockSeqRecord class that simply allows you to utilize the mock.MagicMock class that is spec'd around Bio.SeqRecord.SeqRecord. That is, it ensures that attributes are correct when you try to access/set them.
from biotest import MockSeqRecord
x = MockSeqRecord()
x.id = 'id'
x.description = 'description
x.seq = 'ATGC # YAY, we don't have to make a Seq instance!
x.foo # Will raise AttributeError as it does not exist in SeqRecord class
There is a BioTestCase that inherits directly from unittest.TestCase that gives you some nice functionality to test sequence type data. All you need to do is have your test classes inherit from it.
Additional assertions you get with BioTestCase:
-
assertFilesEqual
Takes two file handles or paths and asserts the contents are equal exactly
You can supply sort=True to sort the contents as well as strip=True to strip all newlines
-
assertSeqRecordEqual
Takes two Bio.SeqRecord.SeqRecord objects and compares them
Asserts
.letter_annotations
is equal in both sequencesAsserts
.seq
is equal in both sequencesAsserts
.id
,.name
and.description
are equal
from biotest import BioTestCase, MockableFile, MockSeqRecord
class TestSomething(BioTestCase):
def test_make_sure_files_equal(self):
f1 = MockableFile('foo.txt', contents='foo')
f2 = MockableFile('bar.txt', contents='bar')
self.assertFilesEqual(f1, f2) # Will generate AssertionError since contents are not equal
def test_make_sure_sequence_records_equal(self):
s = MockSeqRecord()
s.id = 'id'
s.description = 'd'
s.seq = 'ATGC'
s.name = 'name'
s.letter_annotations['phred_quality'] = [40,40,40,40]
self.assertSeqRecordEqual(s, s) # Will test that s is equal to itself
Hypothesis testing is a new-ish way of testing that allows you to "frame" your tests such that you are not locking the functionality of your code with unittests.
Read more at https://hypothesis.readthedocs.org
You can use the seqrec strategy to generate SeqRecord objects with the hypothesis package.
from biotest import BioTestCase, seqrec
class TestSeqRecord(BioTestCase):
@given(seqrec())
def test_something_with_seqrecord(self, record):
self.assertSeqRecordEqual(record, record)
To make it a bit easier you can use the test decorator as follows to do the same thing
from biotest import seq_record_strategy, BioTestCase
class TestSeqRecord(BioTestCase):
@seq_record_strategy()
def test_something_with_seqrecord(self, record):
self.assertSeqRecordEqual(record, record)
You can customize the records that get generated with either way by using any of the following args:
- min_length Default: 1
- max_length Default: 250
- min_qual Default: 0
- max_qual Default: 40
- alphabet Default: ATGCN
class TestSeqRecord(BioTestCase):
@seq_record_strategy(min_length=10, max_length=50, min_qual=20, max_qual=30, alphabet='ATGC')
def test_something_with_seqrecord(self, record):
self.assertSeqRecordEqual(record, record)
Sometimes you may want to test interleaved sequence records
This strategy just gives you a tuple of forward, reverse
Essentially two records from calling seqrec
, but ensuring the ids are the same
for the forward and reverse records.
from biotest import BioTestCase, interleaved_seqrec
class TestSeqRecord(BioTestCase):
@given(interleaved_seqrec)
def test_showing_off_interleaved(self, seqrec)
f, r = seqrec
self.assertTrue(f.id, r.id)
You can generate VCF records in a few different ways:
from biotest import BioTestCase
from biotest import biohypothesis
class TestVCF(BioTestCase):
@given(biohypothesis.vcf_dict_strategy_factory('chr1', 1, 'A'))
def test_vcf_record(self, vcfdict):
#vcfdict will contain common headers that freebayes outputs
# and all fields, regardless if they are FORMAT or INFO are flattened
# into the dictionary
self.assertEqual(1, vcfrec['pos'])
@given(biohypothesis.ref_with_vcf_dicts_strategy_factory())
def test_vcf_records_with_reference(self, seq_vcfs):
# ref_with_vcf_dicts_strategy_factory returns a tuple of
# (ref_sequence, iterable of vcf_record_dicts)
seq, vcfs = list(seq_vcfs[0]), list(seq_vcfs[1])
self.assertGreaterEqual(len(seq), len(vcfs))
# Assert all vcf ref seq chunks are same as on actual reference sequence
# at specified position
for vcf in vcfs:
r = vcf['ref']
p = vcf['pos']
refseq = ''.join(seq[p-1:p+len(r)-1])
self.assertEqual(refseq, r)
@given(biohypothesis.vcf_to_hypothesis_strategy_factory(open('tests/freebayes.header.vcf')))
def test_vcf_records_from_vcf_file(self, vcfrow):
# Another very simple way to generate vcf rows is to supply an existing
# vcf file such as the included test/example freebayes.header.vcf file
self.assertIn('DB', vcfrow)