From 0de7577847ee06821a34874a575ecfe499516eaa Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 21:20:21 -0500
Subject: [PATCH 01/10] baic types and examples

---
 MyPy/Bio/SeqIO.pyi      |  5 +++
 MyPy/Bio/SeqRecord.py   |  6 ++++
 MyPy/Bio/__init__.py    |  0
 MyPy/example.py         | 44 ++++++++++++++++++++++++
 MyPy/toolz/__init__.py  |  0
 MyPy/toolz/dicttoolz.py | 32 +++++++++++++++++
 MyPy/types.py           | 76 +++++++++++++++++++++++++++++++++++++++++
 MyPy/vcf/__init__.py    | 10 ++++++
 MyPy/vcf/model.py       |  3 ++
 9 files changed, 176 insertions(+)
 create mode 100644 MyPy/Bio/SeqIO.pyi
 create mode 100644 MyPy/Bio/SeqRecord.py
 create mode 100644 MyPy/Bio/__init__.py
 create mode 100644 MyPy/example.py
 create mode 100644 MyPy/toolz/__init__.py
 create mode 100644 MyPy/toolz/dicttoolz.py
 create mode 100644 MyPy/types.py
 create mode 100644 MyPy/vcf/__init__.py
 create mode 100644 MyPy/vcf/model.py

diff --git a/MyPy/Bio/SeqIO.pyi b/MyPy/Bio/SeqIO.pyi
new file mode 100644
index 0000000..baaf8a9
--- /dev/null
+++ b/MyPy/Bio/SeqIO.pyi
@@ -0,0 +1,5 @@
+from Bio.SeqRecord import SeqRecord
+from typing import Generator, Any, Iterator
+
+def parse(*anything): # type: (*Any) -> Iterator[SeqRecord]
+  pass
diff --git a/MyPy/Bio/SeqRecord.py b/MyPy/Bio/SeqRecord.py
new file mode 100644
index 0000000..8bdc6c6
--- /dev/null
+++ b/MyPy/Bio/SeqRecord.py
@@ -0,0 +1,6 @@
+# from Bio.SeqIO import SeqIO
+from typing import NamedTuple
+class Stringable(object):
+    def __str__(self): # type: () -> str
+        pass
+SeqRecord = NamedTuple('SeqRecord', [('id', str), ('seq', Stringable)])
diff --git a/MyPy/Bio/__init__.py b/MyPy/Bio/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/MyPy/example.py b/MyPy/example.py
new file mode 100644
index 0000000..4f9bd9f
--- /dev/null
+++ b/MyPy/example.py
@@ -0,0 +1,44 @@
+from typing import List, Dict, Generator, Iterator, Iterable, Tuple
+from Bio import SeqIO
+from itertools import imap
+from Bio.SeqRecord import SeqRecord
+def test_long(): # type: () -> int
+    return 11999999L
+def test_seqIO_map_fails(s): # type: (str) -> List[SeqRecord]
+    return map(lambda x: x.id, SeqIO.parse(s))
+
+#def test_seqIO_map_fails2(s): # type: (str) -> Iterator[SeqRecord]
+#    return map(lambda x: x.id, SeqIO.parse(s))
+def test_seqIO_map_passes(s): # type: (str) -> Iterable[str]
+    return imap(lambda x: x.id, SeqIO.parse(s))
+
+def test_seqIO(s): # type: (str) -> Iterator[SeqRecord]
+    return SeqIO.parse(s)
+def test_list_seqIO(s): # type: (str) -> List[SeqRecord]
+    return list(SeqIO.parse(s))
+def test_seqIO_fails(s): # type: (str) -> List[str]
+    return SeqIO.parse(s)
+def test_should_pass(s): # type: (SeqRecord) -> str
+    return s.id
+def test_should_fail(s): # type: (SeqRecord) -> int
+    return s.id
+#def test_should_fail(): # type: () -> List[SeqRecord]
+#    return 3
+
+#a = test_should_fail()
+def test_ordered_dict(od): # type: (Dict[str,int]) -> Dict[str,int]
+    return 1   #type error 1
+#
+#a = test_ordered_dict(1)   #type error 2
+#
+#def test_me():
+#    a = test_ordered_dict(1)  # type error 3 is not reported
+
+####def test_ordered_dict(od: typing.Dict[str,int]) -> typing.Dict[str,int]:
+####    return 1   #type error 1
+####
+####a = test_ordered_dict(1)   #type error 2
+####
+####def test_me():
+####    a = test_ordered_dict(1)  # type error 3 is not reported
+###
diff --git a/MyPy/toolz/__init__.py b/MyPy/toolz/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/MyPy/toolz/dicttoolz.py b/MyPy/toolz/dicttoolz.py
new file mode 100644
index 0000000..6f7178e
--- /dev/null
+++ b/MyPy/toolz/dicttoolz.py
@@ -0,0 +1,32 @@
+from typing import Dict, Any, Callable, TypeVar
+K = TypeVar('K')
+V = TypeVar('V')
+V2 = TypeVar('V2')
+V3 = TypeVar('V3')
+def merge(d1, d2): # type: (Dict[K,V], Dict[K,V]) -> Dict[K,V]
+    pass
+
+def dissoc(d, k): # type: (Dict[K,V], K) -> Dict[K,V]
+  pass
+
+def merge_with(f, d1, d2): # type: (Callable[[V,V2], V3], Dict[K,V], Dict[K,V2]) -> Dict[K,V3]
+    pass
+
+def valfilter(f, d): # type: (Callable[[V], bool], Dict[K,V]) -> Dict[K,V]
+  pass
+
+
+
+#from typing import Dict, Any, Callable, TypeVar
+#T = TypeVar('T')
+#def merge(d1, d2): # type: (Dict[Any,Any], Dict[Any,Any]) -> Dict[Any,Any]
+#    pass
+#
+#def dissoc(d, k): # type: (Dict[Any,Any], Any) -> Dict[Any,Any]
+#  pass
+#
+#def merge_with(f, d1, d2): # type: (Callable, Dict[Any,Any], Dict[Any,Any]) -> Dict[Any,Any]
+#    pass
+#
+#def valfilter(f, d): # type: (Callable, Dict[Any,Any]) -> Dict[Any,Any]
+#  pass
diff --git a/MyPy/types.py b/MyPy/types.py
new file mode 100644
index 0000000..9c40861
--- /dev/null
+++ b/MyPy/types.py
@@ -0,0 +1,76 @@
+from hypothesis import strategies as st
+from typing import Dict, Tuple, List, Iterator, Set, Union, Optional, TypingMeta, NamedTuple
+import re
+import operator
+from functools import partial
+import string
+from collections import namedtuple, OrderedDict
+compose = lambda f,g: lambda *x: f(g(*x))
+'''
+support:
+- [x] NamedTuple
+- [ ] Automatic function arguments 
+could also say, "given a function, generate random return values that it might give" because functions are also annotated with return values
+'''
+
+# Just an exmaple of a named tuple
+VCFRow = NamedTuple("VCFRow",
+                    [('ref', str),
+                     ('AO', List[int]),
+                     ('DP', int),
+                     ('chrom',str),
+                     ('pos', int),
+                     ('alt', List[str])])
+
+primitives = {
+    str   : st.text(),
+    int   : st.integers(),
+    bool  : st.booleans(),
+    float : st.floats(),
+    type(None) : st.none(),
+    unicode : st.characters(),
+    bytes : st.binary() # this is weird because str == bytes in py2
+} # missing: fractions, decimal
+
+#TODO: add Iterable, handle Sequence, etc.
+def resolve(x): # type: (TypingMeta) -> hypothesis.strategies.SearchStrategy
+   if x in primitives:
+       strat = primitives[x]
+   elif hasattr(x, '_fields'):
+       # NamedTuple isn't a type, so this can't be a subclass check
+       try:
+           #Only way I know how to extract the name so it's pretty...
+           name = re.compile("([^\.]+)'>$").search(str(x)).groups()[0]
+       except:
+           name = str(x) 
+       fts = OrderedDict(x._field_types)
+       nt = namedtuple(name, fts.keys())
+       vals = map(resolve, fts.values())
+       strat = st.tuples(*vals).map(lambda x: nt(*x))
+   elif issubclass(x, Dict):
+       strat = st.dictionaries(*map(resolve, x.__parameters__))
+   elif issubclass(x, Tuple):
+       strat = st.tuples(*map(resolve, x.__tuple_params__))
+   elif issubclass(x, Union):
+       strat = operator.ior(*map(resolve, x.__union_params__))
+   elif issubclass(x, Optional):
+       # Optional[X] is equivalent to Union[X, type(None)]. second param is always Nonetype.
+       value = x.__union_params__[0] 
+       strat = (resolve(value) | st.none())
+   else:  # a list-type-ish
+       collections = {
+           Iterator : lambda x: st.lists(x).map(iter),
+           List : st.lists,
+           Set : st.sets
+          } #TODO: missing: Iterable , etc.
+       # For some reason List[T] not a subclass of List: issubclass(x, List) == False. So do these hijinks
+       params = x.__parameters__
+       assert len(params) == 1, "Wrong type %s, not a list-like" % x
+       matches = filter(lambda k: k == x.__origin__, collections.keys())
+       assert len(matches) == 1, "Should have exactly one match. %s matched with %s" % (x, matches)
+       collection_strat = collections[matches[0]]
+       strat = collection_strat(resolve(params[0]))
+   return strat
+# see https://docs.python.org/3/library/typing.html
+# not Generics
+# not Callables
diff --git a/MyPy/vcf/__init__.py b/MyPy/vcf/__init__.py
new file mode 100644
index 0000000..fbb79f9
--- /dev/null
+++ b/MyPy/vcf/__init__.py
@@ -0,0 +1,10 @@
+from typing import Union, Dict, List, NamedTuple, Iterator
+
+#fields = [("ALT", Union[str, List[str]]), ("REF", str), ("POS", int), ("CHROM", str), ("INFO", Dict[str, Union[int, List[int]]])]
+#
+#VCFRecord = NamedTuple('VCFRecord', fields)
+
+VCFRecord = NamedTuple('VCFRecord', [("ALT", Union[str, List[str]]), ("REF", str), ("POS", int), ("CHROM", str), ("INFO", Dict[str, Union[int, List[int]]])]
+)
+def Reader(s): # type: (str) -> Iterator[VCFRecord]
+    pass
diff --git a/MyPy/vcf/model.py b/MyPy/vcf/model.py
new file mode 100644
index 0000000..02b1e9a
--- /dev/null
+++ b/MyPy/vcf/model.py
@@ -0,0 +1,3 @@
+from typing import Union, Dict, List, NamedTuple, Iterator
+_Record = NamedTuple('VCFRecord', [("ALT", Union[str, List[str]]), ("REF", str), ("POS", int), ("CHROM", str), ("INFO", Dict[str, Union[int, List[int]]])]
+)

From bf1b67bbd4f7f889b1267883119f014187340b39 Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 22:13:49 -0500
Subject: [PATCH 02/10] typechecking works.

---
 MyPy/vcf/model.py                 |  3 --
 bioframework/consensus.py         | 63 ++++++++++++++-----------
 {MyPy => mypy}/Bio/SeqIO.pyi      |  0
 {MyPy => mypy}/Bio/SeqRecord.py   |  0
 {MyPy => mypy}/Bio/__init__.py    |  0
 {MyPy/toolz => mypy}/__init__.py  |  0
 {MyPy => mypy}/example.py         |  0
 mypy/out/docopt.pyi               | 76 +++++++++++++++++++++++++++++++
 mypy/out/hypothesis/__init__.pyi  |  8 ++++
 mypy/out/schema.pyi               | 31 +++++++++++++
 mypy/toolz/__init__.py            |  0
 {MyPy => mypy}/toolz/dicttoolz.py |  0
 {MyPy => mypy}/types.py           |  0
 {MyPy => mypy}/vcf/__init__.py    |  5 +-
 mypy/vcf/model.py                 |  3 ++
 15 files changed, 157 insertions(+), 32 deletions(-)
 delete mode 100644 MyPy/vcf/model.py
 rename {MyPy => mypy}/Bio/SeqIO.pyi (100%)
 rename {MyPy => mypy}/Bio/SeqRecord.py (100%)
 rename {MyPy => mypy}/Bio/__init__.py (100%)
 rename {MyPy/toolz => mypy}/__init__.py (100%)
 rename {MyPy => mypy}/example.py (100%)
 create mode 100644 mypy/out/docopt.pyi
 create mode 100644 mypy/out/hypothesis/__init__.pyi
 create mode 100644 mypy/out/schema.pyi
 create mode 100644 mypy/toolz/__init__.py
 rename {MyPy => mypy}/toolz/dicttoolz.py (100%)
 rename {MyPy => mypy}/types.py (100%)
 rename {MyPy => mypy}/vcf/__init__.py (69%)
 create mode 100644 mypy/vcf/model.py

diff --git a/MyPy/vcf/model.py b/MyPy/vcf/model.py
deleted file mode 100644
index 02b1e9a..0000000
--- a/MyPy/vcf/model.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from typing import Union, Dict, List, NamedTuple, Iterator
-_Record = NamedTuple('VCFRecord', [("ALT", Union[str, List[str]]), ("REF", str), ("POS", int), ("CHROM", str), ("INFO", Dict[str, Union[int, List[int]]])]
-)
diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 05a1970..8c0da19 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -14,7 +14,7 @@
 from functools import partial
 from itertools import ifilter, imap, groupby, takewhile, repeat, starmap, izip_longest
 import os, sys
-from typing import Tuple, Dict, List, Iterator, Iterable, Any, Callable
+from typing import Tuple, Dict, List, Iterator, Iterable, Any, Callable, NamedTuple, BinaryIO
 
 from Bio import SeqIO #done
 from Bio.SeqRecord import SeqRecord #done
@@ -24,15 +24,21 @@
 from toolz.dicttoolz import merge, dissoc, merge_with, valfilter #todo
 from docopt import docopt #ignore
 from schema import Schema, Use #ignore
-from contracts import contract, new_contract #can ignore
+#from contracts import contract, new_contract #can ignore
+#from mypy.types import VCFRow
 #############
 # Constants #
 #############
+VCFRow = NamedTuple("VCFRow",
+                    [('ref', str),
+                     ('AO', List[int]),
+                     ('DP', int),
+                     ('chrom',str),
+                     ('pos', int),
+                     ('alt', List[str])])
+#VcfRow = namedtuple("VcfRow", VCFRow._fields) # type: (*Any) -> VCFRow
 
-AMBIGUITY_TABLE = { 'A': 'A', 'T': 'T', 'G': 'G', 'C': 'C', 'N': 'N',
-                       'AC': 'M', 'AG': 'R', 'AT': 'W', 'CG': 'S', 'CT':
-                       'Y', 'GT': 'K', 'ACG': 'V', 'ACT': 'H', 'AGT': 'D',
-                       'CGT': 'B', 'ACGT': 'N' }
+AMBIGUITY_TABLE = { 'A': 'A', 'T': 'T', 'G': 'G', 'C': 'C', 'N': 'N', 'AC': 'M', 'AG': 'R', 'AT': 'W', 'CG': 'S', 'CT': 'Y', 'GT': 'K', 'ACG': 'V', 'ACT': 'H', 'AGT': 'D', 'CGT': 'B', 'ACGT': 'N' }
 
 MAJORITY_PERCENTAGE = 80
 MIN_DEPTH = 10
@@ -40,7 +46,7 @@
 ###########
 # Reducer #
 ###########
-@contract(reference='string', muts='list(tuple(string, string, int))'   )
+#@contract(reference='string', muts='list(tuple(string, string, int))'   )
 def make_consensus(reference, muts):
     # type: (str, List[Mut]) -> Tuple[str, List[Mut]]
     ''' Actually builds a consensus string by recursively applying
@@ -98,10 +104,10 @@ def call_base_multi_alts(min_depth, majority_percentage, dp, alts, ref):
 
 #@contract(min_depth='number,>=0', majority_percentage='number,>=0,<=100', rec='dict', returns='tuple(string, string, int)')
 def call_many(min_depth, majority_percentage, rec):
-    # type: (int, int, Dict) -> Mut
+    # type: (int, int, VCFRow) -> Mut
     #TODO: switch to generators
-    muts = zip(rec['AO'], rec['alt'])
-    ref, dp, pos = rec['ref'], rec['DP'], rec['pos']
+    muts = zip(rec.AO, rec.alt)
+    ref, dp, pos = rec.ref, rec.DP, rec.pos
     longest_len = max(map(lambda x: len(x[-1]), muts))
     longest_len = max(longest_len, len(ref))
     def fill_gap(r):
@@ -115,22 +121,23 @@ def seq_count(acc, ao_and_nts):
         return map(merge_sum, acc, [{nt:ao} for nt in nts])
     # create a list of {base : count}, where the index matches the position
     mut_dicts = reduce(seq_count, xs, [{}])
-    base_caller = partial(call_base_multi_alts, min_depth, majority_percentage, dp) # type: Callable[[Dict[Any,Any], str], str]
+    base_caller = lambda m,r: call_base_multi_alts(min_depth, majority_percentage, dp, m, r) #   # # ?Callable[[Dict[Any,Any], str], str]
     res = map(base_caller, mut_dicts, ref)
     # trim None values at the end, (which indicate deletion)
     result = takewhile(bool, res)
     return (ref, ''.join(result), pos)
 
-@contract(rec='dict',returns='dict')
+#@contract(rec='dict',returns='dict')
 def flatten_vcf_record(rec):
-    # type: (_Record) -> Dict[str, Any]
+    # type: (_Record) -> VCFRow
     _rec = merge({
   'alt' : rec.ALT, 'ref' : rec.REF,
   'pos' : rec.POS, 'chrom' : rec.CHROM},
         rec.INFO)
     if not hasattr(_rec['alt'], '__iter__'): #TODO: put this somewhere else
-        return merge(_rec, dict(alt=[_rec['alt']], AO=[_rec['AO']]))
-    else: return _rec
+        d = merge(_rec, dict(alt=[_rec['alt']], AO=[_rec['AO']]))
+    else: d = _rec
+    return VCFRow(**d)
 
 ##############
 # Group By   #
@@ -138,16 +145,16 @@ def flatten_vcf_record(rec):
 #NOTE: could possibly drop lists, use fn.Stream all the time,
 # and write a Stream instance for contracts like:
 # https://github.com/AndreaCensi/contracts/blob/831ec7a5260ceb8960540ba0cb6cc26370cf2d82/src/contracts/library/lists.py
-@contract(references='list[N]($SeqRecord),N>0', muts='list(dict)',returns='tuple(list(dict))')
+#@contract(references='list[N]($SeqRecord),N>0', muts='list(dict)',returns='tuple(list(dict))')
 def group_muts_by_refs(references, muts):
-    # type: (List[SeqRecord], List[Dict[Any, Any]]) -> Iterable[List[Dict]]
+    # type: (List[SeqRecord], List[VCFRow]) -> List[List[VCFRow]]
     '''group and sort the mutations so that they match the order of the references.'''
     #NOTE: muts will already be "sorted" in that they are grouped together in the vcf
     #fix the groupby so it doesn't incidentally drain the first object of the group
     unzip = lambda x: zip(*x)
     chroms, groups = unzip(map(lambda kv: (kv[0], list(kv[1])), groupby(muts, get('chrom'))))
-    @contract(key='tuple(string,list)')
-    def index_of_ref(key):
+    #@contract(key='tuple(string,list)')
+    def index_of_ref(key): # type: (Tuple[str, List[SeqRecord]]) -> int
         chrom=key[0]
         index_of_chrom =  map(lambda x: x.id, references).index(chrom)
         return index_of_chrom
@@ -162,13 +169,15 @@ def index_of_ref(key):
 
 #@contract(references='SeqRecord', muts='seq(dict)', mind=int, majority=int)
 def all_consensuses(references, muts, mind, majority):
-    # type: (Iterable[SeqRecord], Iterable[Dict[Any,Any]], int, int) -> Tuple[List[str], Iterator[Tuple[str, List[Mut]]]]
+    # type: (List[SeqRecord], List[VCFRow], int, int) -> Tuple[List[SeqRecord], Iterable[Tuple[str, List[Mut]]]]
     ''' generates conesnsuses, including for flu and other mult-reference VCFs.
     applies filters and base callers to the mutations.
     then builds the consensus using these calls and `make_consensus`'''
     muts_by_ref = group_muts_by_refs(references, muts)
     def single_consensus(muts, ref):
-        the_muts = map(partial(call_many, mind, majority), muts)
+        # type: (List[VCFRow], SeqRecord) -> Tuple[str, List[Mut]]
+        #the_muts = map(partial(call_many, mind, majority), muts)
+        the_muts = map(lambda x: call_many(mind, majority, x), muts)
         ref_and_alt_differ = lambda x: x[0] != x[1]
         # vcf is index-starting-at-1
         #real_muts = map(lambda (a,b,pos): (a,b,pos-1), filter(ref_and_alt_differ, the_muts))
@@ -186,13 +195,13 @@ def consensus_str(ref, consensus): # type: (SeqRecord, str) -> str
 
 #@contract(ref_fasta=str, vcf=str, mind=int, majority=int)
 def run(ref_fasta, freebayes_vcf, outfile, mind, majority):
-    # type: (str, str, str, int, int) -> int
-    refs = SeqIO.parse(ref_fasta, 'fasta')
+    # type: (str, str, BinaryIO, int, int) -> int
+    _refs = SeqIO.parse(ref_fasta, 'fasta')
     with open(freebayes_vcf, 'r') as vcf_handle:
-        muts = imap(flatten_vcf_record, vcf.Reader(vcf_handle))
-        refs, muts = list(refs), list(muts)
-        refs, seqs_and_muts = all_consensuses(refs, muts, mind, majority)
-        strings = imap(consensus_str, refs, imap(get(0), seqs_and_muts))
+        _muts = map(flatten_vcf_record, vcf.Reader(vcf_handle))
+        refs, muts = list(_refs), list(_muts)
+        the_refs, seqs_and_muts = all_consensuses(refs, muts, mind, majority)
+        strings = imap(consensus_str, the_refs, imap(get(0), seqs_and_muts))
         result = '\n'.join(strings)
         outfile.write(result)
         outfile.close()
diff --git a/MyPy/Bio/SeqIO.pyi b/mypy/Bio/SeqIO.pyi
similarity index 100%
rename from MyPy/Bio/SeqIO.pyi
rename to mypy/Bio/SeqIO.pyi
diff --git a/MyPy/Bio/SeqRecord.py b/mypy/Bio/SeqRecord.py
similarity index 100%
rename from MyPy/Bio/SeqRecord.py
rename to mypy/Bio/SeqRecord.py
diff --git a/MyPy/Bio/__init__.py b/mypy/Bio/__init__.py
similarity index 100%
rename from MyPy/Bio/__init__.py
rename to mypy/Bio/__init__.py
diff --git a/MyPy/toolz/__init__.py b/mypy/__init__.py
similarity index 100%
rename from MyPy/toolz/__init__.py
rename to mypy/__init__.py
diff --git a/MyPy/example.py b/mypy/example.py
similarity index 100%
rename from MyPy/example.py
rename to mypy/example.py
diff --git a/mypy/out/docopt.pyi b/mypy/out/docopt.pyi
new file mode 100644
index 0000000..6f9431c
--- /dev/null
+++ b/mypy/out/docopt.pyi
@@ -0,0 +1,76 @@
+# Stubs for docopt (Python 2)
+#
+# NOTE: This dynamically typed stub was automatically generated by stubgen.
+
+from typing import Any
+
+class DocoptLanguageError(Exception): ...
+
+class DocoptExit(SystemExit):
+    usage = ... # type: Any
+    def __init__(self, message=''): ...
+
+class Pattern:
+    def __eq__(self, other): ...
+    def __hash__(self): ...
+    def fix(self): ...
+    def fix_identities(self, uniq=None): ...
+    def fix_repeating_arguments(self): ...
+    @property
+    def either(self): ...
+
+class ChildPattern(Pattern):
+    name = ... # type: Any
+    value = ... # type: Any
+    def __init__(self, name, value=None): ...
+    def flat(self, *types): ...
+    def match(self, left, collected=None): ...
+
+class ParentPattern(Pattern):
+    children = ... # type: Any
+    def __init__(self, *children): ...
+    def flat(self, *types): ...
+
+class Argument(ChildPattern):
+    def single_match(self, left): ...
+    @classmethod
+    def parse(class_, source): ...
+
+class Command(Argument):
+    name = ... # type: Any
+    value = ... # type: Any
+    def __init__(self, name, value=False): ...
+    def single_match(self, left): ...
+
+class Option(ChildPattern):
+    value = ... # type: Any
+    def __init__(self, short=None, long=None, argcount=0, value=False): ...
+    @classmethod
+    def parse(class_, option_description): ...
+    def single_match(self, left): ...
+    @property
+    def name(self): ...
+
+class Required(ParentPattern):
+    def match(self, left, collected=None): ...
+
+class Optional(ParentPattern):
+    def match(self, left, collected=None): ...
+
+class AnyOptions(Optional): ...
+
+class OneOrMore(ParentPattern):
+    def match(self, left, collected=None): ...
+
+class Either(ParentPattern):
+    def match(self, left, collected=None): ...
+
+class TokenStream(list):
+    error = ... # type: Any
+    def __init__(self, source, error): ...
+    def move(self): ...
+    def current(self): ...
+
+class Dict(dict): ...
+
+def docopt(doc, argv=None, help=True, version=None, options_first=False): ...
diff --git a/mypy/out/hypothesis/__init__.pyi b/mypy/out/hypothesis/__init__.pyi
new file mode 100644
index 0000000..764f9f7
--- /dev/null
+++ b/mypy/out/hypothesis/__init__.pyi
@@ -0,0 +1,8 @@
+# Stubs for hypothesis (Python 2)
+#
+# NOTE: This dynamically typed stub was automatically generated by stubgen.
+
+from hypothesis._settings import settings as settings, Verbosity as Verbosity
+from hypothesis.version import __version_info__ as __version_info__, __version__ as __version__
+from hypothesis.control import assume as assume, note as note, reject as reject
+from hypothesis.core import given as given, find as find, example as example, seed as seed
diff --git a/mypy/out/schema.pyi b/mypy/out/schema.pyi
new file mode 100644
index 0000000..3eb2140
--- /dev/null
+++ b/mypy/out/schema.pyi
@@ -0,0 +1,31 @@
+# Stubs for schema (Python 2)
+#
+# NOTE: This dynamically typed stub was automatically generated by stubgen.
+
+from typing import Any
+
+class SchemaError(Exception):
+    autos = ... # type: Any
+    errors = ... # type: Any
+    def __init__(self, autos, errors): ...
+    @property
+    def code(self): ...
+
+class And:
+    def __init__(self, *args, **kw): ...
+    def validate(self, data): ...
+
+class Or(And):
+    def validate(self, data): ...
+
+class Use:
+    def __init__(self, callable_, error=None): ...
+    def validate(self, data): ...
+
+def priority(s): ...
+
+class Schema:
+    def __init__(self, schema, error=None): ...
+    def validate(self, data): ...
+
+class Optional(Schema): ...
diff --git a/mypy/toolz/__init__.py b/mypy/toolz/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/MyPy/toolz/dicttoolz.py b/mypy/toolz/dicttoolz.py
similarity index 100%
rename from MyPy/toolz/dicttoolz.py
rename to mypy/toolz/dicttoolz.py
diff --git a/MyPy/types.py b/mypy/types.py
similarity index 100%
rename from MyPy/types.py
rename to mypy/types.py
diff --git a/MyPy/vcf/__init__.py b/mypy/vcf/__init__.py
similarity index 69%
rename from MyPy/vcf/__init__.py
rename to mypy/vcf/__init__.py
index fbb79f9..29c9cc8 100644
--- a/MyPy/vcf/__init__.py
+++ b/mypy/vcf/__init__.py
@@ -1,4 +1,5 @@
-from typing import Union, Dict, List, NamedTuple, Iterator
+from typing import Union, Dict, List, NamedTuple, Iterator, BinaryIO
+from vcf.model import _Record
 
 #fields = [("ALT", Union[str, List[str]]), ("REF", str), ("POS", int), ("CHROM", str), ("INFO", Dict[str, Union[int, List[int]]])]
 #
@@ -6,5 +7,5 @@
 
 VCFRecord = NamedTuple('VCFRecord', [("ALT", Union[str, List[str]]), ("REF", str), ("POS", int), ("CHROM", str), ("INFO", Dict[str, Union[int, List[int]]])]
 )
-def Reader(s): # type: (str) -> Iterator[VCFRecord]
+def Reader(s): # type: (BinaryIO) -> Iterator[_Record]
     pass
diff --git a/mypy/vcf/model.py b/mypy/vcf/model.py
new file mode 100644
index 0000000..9266869
--- /dev/null
+++ b/mypy/vcf/model.py
@@ -0,0 +1,3 @@
+from typing import Union, Dict, List, NamedTuple, Iterator
+_Record = NamedTuple('_Record', [("ALT", Union[str, List[str]]), ("REF", str), ("POS", int), ("CHROM", str), ("INFO", Dict[str, Union[int, List[int]]])]
+)

From c2aecb8c613804931daec26c82810665c8d7ee95 Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 22:17:27 -0500
Subject: [PATCH 03/10] added readme for types

---
 mypy/README.md | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 mypy/README.md

diff --git a/mypy/README.md b/mypy/README.md
new file mode 100644
index 0000000..aec5641
--- /dev/null
+++ b/mypy/README.md
@@ -0,0 +1,6 @@
+1. Install [mypy](https://github.com/python/mypy#quick-start)
+
+2. Run mypy MYPYPATH=$PWD/mypy:$PWD/mypy/out  mypy --py2 bioframework/consensus.py
+
+If needed, uses `stubgen` to generate more stub files for other libraries.
+

From c09b94bfbb5c46087a16a57428e3678ebc175cbc Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 22:28:22 -0500
Subject: [PATCH 04/10] fixed tests, now passing

---
 bioframework/consensus.py |  2 +-
 tests/test_consensus.py   | 37 +++++++++++++++++++------------------
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index 8c0da19..c6384ea 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -152,7 +152,7 @@ def group_muts_by_refs(references, muts):
     #NOTE: muts will already be "sorted" in that they are grouped together in the vcf
     #fix the groupby so it doesn't incidentally drain the first object of the group
     unzip = lambda x: zip(*x)
-    chroms, groups = unzip(map(lambda kv: (kv[0], list(kv[1])), groupby(muts, get('chrom'))))
+    chroms, groups = unzip(map(lambda kv: (kv[0], list(kv[1])), groupby(muts, lambda x: x.chrom)))
     #@contract(key='tuple(string,list)')
     def index_of_ref(key): # type: (Tuple[str, List[SeqRecord]]) -> int
         chrom=key[0]
diff --git a/tests/test_consensus.py b/tests/test_consensus.py
index 55f81f5..47076c1 100644
--- a/tests/test_consensus.py
+++ b/tests/test_consensus.py
@@ -6,7 +6,7 @@
 from hypothesis import strategies as st
 from hypothesis import given, assume
 from operator import itemgetter as get
-from bioframework.consensus import call_many, all_consensuses, make_consensus
+from bioframework.consensus import call_many, all_consensuses, make_consensus, VCFRow
 import string
 import itertools
 import unittest
@@ -15,7 +15,7 @@
                      st.integers(min_value=1),
                      st.text(alphabet='ACTGN', min_size=1, max_size=6)) \
                      .flatmap(lambda tup:\
-                          vcf_dict_strategy_factory(*tup))
+                          vcf_dict_strategy_factory(*tup)).map(lambda d: VCFRow(**d))
 pos_int = st.integers(min_value=0)
 
 #TODO: these 10, 80 for trhesh and majority_percentage should be factored out and possibly be strategies themselves
@@ -24,23 +24,23 @@ def just_ref(*args):
 class CallBaseHypothesisTest(unittest.TestCase):
     @given(simple_vcf_dict_strategy, pos_int)
     def test_under_mind_is_N(self, mut, mind):
-        assume(mut['DP'] < mind)
+        assume(mut.DP < mind)
         result = call_many(mind, 80, mut)[1]
         self.assertTrue(all(map(lambda x: x == 'N', result)))
 
     @given(simple_vcf_dict_strategy)
     def test_ao_under_minority_is_ref(self, mut):
-        assume(sum(mut['AO']) / mut['DP'] < 0.2)
+        assume(sum(mut.AO) / mut.DP < 0.2)
         result = call_many(0, 80, mut)[1]
-        self.assertEquals(result, mut['ref'])
+        self.assertEquals(result, mut.ref)
 
     @given(simple_vcf_dict_strategy)
     def test_over_majority_is_alt(self, mut):
         #TODO: this is slow
-        assume(sum(mut['AO']) / mut['DP'] > 0.8)
-        assume(len(mut['alt']) == 1)
+        assume(sum(mut.AO) / mut.DP > 0.8)
+        assume(len(mut.alt) == 1)
         result = call_many(0, 80, mut)[1]
-        self.assertEquals(result, mut['alt'][0])
+        self.assertEquals(result, mut.alt[0])
 
 #Commented out because it's not actually always true,
 # e.g. mut={'ref': u'AA', 'pos': 1, 'AO': [784313725491], 'alt': [u'A'],
@@ -48,9 +48,9 @@ def test_over_majority_is_alt(self, mut):
 # should result in AA
 #    @given(simple_vcf_dict_strategy)
 #    def test_over_minoriy_is_not_ref(self, mut):
-#        assume(sum(mut['AO']) / mut['DP'] > 0.2)
+#        assume(sum(mut.AO) / mut.DP > 0.2)
 #        result = call_many(0, 80, mut)[1]
-#        self.assertNotEquals(result, mut['ref'])
+#        self.assertNotEquals(result, mut.ref)
 
 class ConsesusExampleTest(unittest.TestCase):
     def test_make_consensus_example(self):
@@ -62,7 +62,7 @@ def test_make_consensus_example(self):
         self.assertEquals(expected, actual)
 
     def test_single_example(self):
-        muts = [{
+        raw_muts = [{
             'pos' : 2,
             'ref' : 'CG',
             'alt' : ['TT'],
@@ -78,12 +78,13 @@ def test_single_example(self):
             'DP' : 150,
             'chrom' : 'X'
         }]
+        muts = map(lambda d: VCFRow(**d), raw_muts)
         ref = make_seqrec('X', 'ACGTACGT')
         expected = 'ATTTAAGT'
         result = just_ref([ref], muts, 10, 80)
         self.assertEquals(expected, result)
 ref_with_vcf_dicts_strategy = ref_with_vcf_dicts_strategy_factory().map(
-    lambda (r, muts): (make_seqrec(muts[0]['chrom'], r), muts))
+    lambda (r, muts): (make_seqrec(muts[0]['chrom'], r), map(lambda d: VCFRow(**d), muts)))
 from collections import Counter
 countof = lambda c: lambda x: Counter(x).get(c, 0)
 def run_cons(*args):
@@ -100,11 +101,11 @@ class ConsensusHypothesisTest(unittest.TestCase):
     def test_n_count(self, ref_and_muts, rand):
         ref, muts = ref_and_muts
         originalNs = countof('N')(ref)
-        alts = map(get('alt'), muts)
+        alts = map(lambda x: x.alt, muts)
         assume(not any(map(lambda x: 'N' in x, itertools.chain(*alts))))
         # needed because  ACGT -> N
         assume(not filter(lambda x: len(x) > 3, alts))
-        expectedNs = len(filter(lambda x: x['DP'] < 10, muts))  + originalNs
+        expectedNs = len(filter(lambda x: x.DP < 10, muts))  + originalNs
         result = just_ref([ref], muts, 10, 80)
         self.assertEquals(countof('N')(result), expectedNs)
 
@@ -119,7 +120,7 @@ def test_less_or_equal_length_when_no_inserts(self, ref_and_muts):
     def assume_greater_or_equal_length_when_no_deletions(self, ref_and_muts):
         ref, muts = ref_and_muts
         def has_deletion(mut):
-             filter(lambda x: len(x) < mut['ref'], mut['alt'])
+             filter(lambda x: len(x) < mut.ref, mut.alt)
         assume(not any(map(has_deletion, muts)))
         result = just_ref([ref], muts, 10, 80)
         self.assertLesserEqual(len(ref), len(result))
@@ -138,11 +139,11 @@ def test_more_or_equal_ns_with_lower_threshold(self, ref_and_muts, n1, n2):
     @given(ref_with_vcf_dicts_strategy)
     def test_consensus_from_consensus_contains_more_alts(self, ref_and_muts):
         ref, muts = ref_and_muts
-        assume(not any(map(lambda x: len(x['alt']) > 1, muts)))
+        assume(not any(map(lambda x: len(x.alt) > 1, muts)))
         n1 = 10
         cons1, alts = run_cons([ref], muts, n1, 80)
         assume(not any(map(lambda x: len(x[0]) > len(x[1]), alts)))
-        cons2, _ = run_cons([make_seqrec(muts[0]['chrom'], cons1)], muts, n1, 80)
+        cons2, _ = run_cons([make_seqrec(muts[0].chrom, cons1)], muts, n1, 80)
         picked_alts = map(get(1), alts)
         altCounts1 = sum(map(lambda f: f(cons1),  map(countof, picked_alts)))
         altCounts2 = sum(map(lambda f: f(cons2),  map(countof, picked_alts)))
@@ -156,7 +157,7 @@ def test_consensus_from_consensus_contains_more_alts(self, ref_and_muts):
     def test_lower_majority_required_contains_more_alts(self, ref_and_muts, p1, p2):
         ref, muts = ref_and_muts
         assume(p1 < p2)
-        assume(not any(map(lambda x: len(x['alt']) > 1, muts)))
+        assume(not any(map(lambda x: len(x.alt) > 1, muts)))
         n1 = 10
         cons1, alts = run_cons([ref], muts, n1, p1)
         assume(not any(map(lambda x: len(x[0]) > len(x[1]), alts)))

From 6a0bd962f9d3f96907705cd959d1e41a15c97cd5 Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 22:31:54 -0500
Subject: [PATCH 05/10] removing old types-hypothesis generator file

---
 mypy/types.py | 76 ---------------------------------------------------
 1 file changed, 76 deletions(-)
 delete mode 100644 mypy/types.py

diff --git a/mypy/types.py b/mypy/types.py
deleted file mode 100644
index 9c40861..0000000
--- a/mypy/types.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from hypothesis import strategies as st
-from typing import Dict, Tuple, List, Iterator, Set, Union, Optional, TypingMeta, NamedTuple
-import re
-import operator
-from functools import partial
-import string
-from collections import namedtuple, OrderedDict
-compose = lambda f,g: lambda *x: f(g(*x))
-'''
-support:
-- [x] NamedTuple
-- [ ] Automatic function arguments 
-could also say, "given a function, generate random return values that it might give" because functions are also annotated with return values
-'''
-
-# Just an exmaple of a named tuple
-VCFRow = NamedTuple("VCFRow",
-                    [('ref', str),
-                     ('AO', List[int]),
-                     ('DP', int),
-                     ('chrom',str),
-                     ('pos', int),
-                     ('alt', List[str])])
-
-primitives = {
-    str   : st.text(),
-    int   : st.integers(),
-    bool  : st.booleans(),
-    float : st.floats(),
-    type(None) : st.none(),
-    unicode : st.characters(),
-    bytes : st.binary() # this is weird because str == bytes in py2
-} # missing: fractions, decimal
-
-#TODO: add Iterable, handle Sequence, etc.
-def resolve(x): # type: (TypingMeta) -> hypothesis.strategies.SearchStrategy
-   if x in primitives:
-       strat = primitives[x]
-   elif hasattr(x, '_fields'):
-       # NamedTuple isn't a type, so this can't be a subclass check
-       try:
-           #Only way I know how to extract the name so it's pretty...
-           name = re.compile("([^\.]+)'>$").search(str(x)).groups()[0]
-       except:
-           name = str(x) 
-       fts = OrderedDict(x._field_types)
-       nt = namedtuple(name, fts.keys())
-       vals = map(resolve, fts.values())
-       strat = st.tuples(*vals).map(lambda x: nt(*x))
-   elif issubclass(x, Dict):
-       strat = st.dictionaries(*map(resolve, x.__parameters__))
-   elif issubclass(x, Tuple):
-       strat = st.tuples(*map(resolve, x.__tuple_params__))
-   elif issubclass(x, Union):
-       strat = operator.ior(*map(resolve, x.__union_params__))
-   elif issubclass(x, Optional):
-       # Optional[X] is equivalent to Union[X, type(None)]. second param is always Nonetype.
-       value = x.__union_params__[0] 
-       strat = (resolve(value) | st.none())
-   else:  # a list-type-ish
-       collections = {
-           Iterator : lambda x: st.lists(x).map(iter),
-           List : st.lists,
-           Set : st.sets
-          } #TODO: missing: Iterable , etc.
-       # For some reason List[T] not a subclass of List: issubclass(x, List) == False. So do these hijinks
-       params = x.__parameters__
-       assert len(params) == 1, "Wrong type %s, not a list-like" % x
-       matches = filter(lambda k: k == x.__origin__, collections.keys())
-       assert len(matches) == 1, "Should have exactly one match. %s matched with %s" % (x, matches)
-       collection_strat = collections[matches[0]]
-       strat = collection_strat(resolve(params[0]))
-   return strat
-# see https://docs.python.org/3/library/typing.html
-# not Generics
-# not Callables

From 6cbd2e16d83ed64f278cb1bb088f3c7dd165a4c9 Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 22:32:18 -0500
Subject: [PATCH 06/10] added missing typing requirement

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index dbdcd86..26a06bb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 pycontracts
 toolz
 pyvcf
+typing

From 4427de8e70b997a8b6a262342f1dda796c5912ed Mon Sep 17 00:00:00 2001
From: michaelpanciera <michael.panciera.work@gmail.com>
Date: Thu, 3 Mar 2016 22:44:40 -0500
Subject: [PATCH 07/10] missing deps.

---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 26a06bb..8e01b72 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,5 @@ pycontracts
 toolz
 pyvcf
 typing
+docopt
+schema

From 23afece1421c36dcc1fb2cb09c21250e016cb87c Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedwrair024635.amed.ds.army.mil>
Date: Fri, 4 Mar 2016 10:47:59 -0500
Subject: [PATCH 08/10] fix failing test

---
 tests/test_consensus.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/test_consensus.py b/tests/test_consensus.py
index 47076c1..955e086 100644
--- a/tests/test_consensus.py
+++ b/tests/test_consensus.py
@@ -91,7 +91,7 @@ def run_cons(*args):
     _, alt_and_cons = all_consensuses(*args)
     cons, alts  = zip(*alt_and_cons)
     return  cons[0], alts[0]
-class ConsensusHypothesisTest(unittest.TestCase): 
+class ConsensusHypothesisTest(unittest.TestCase):
     #ref_and_muts=(SeqRecord(seq=Seq(u'AAAAAAAAAA', IUPACAmbiguousDNA()), id=u'', name='<unknown name>', description='', dbxrefs=[]), [
 #    {'ref': u'A', 'pos': 1, 'AO': [479, 777, 119, 604], 'alt': [u'G', u'C', u'G', u'TG'], 'chrom': u'', 'DP': 2635},
 #    {'ref': u'A', 'pos': 3, 'AO': [291, 241, 583, 420], 'alt': [u'CTG', u'C', u'G', u'C'], 'chrom': u'', 'DP': 1627}]), rand=random.seed(0))
@@ -102,9 +102,11 @@ def test_n_count(self, ref_and_muts, rand):
         ref, muts = ref_and_muts
         originalNs = countof('N')(ref)
         alts = map(lambda x: x.alt, muts)
-        assume(not any(map(lambda x: 'N' in x, itertools.chain(*alts))))
+        refs = map(lambda x: x.ref, muts)
+        assume(not filter(lambda x: 'N' in x, itertools.chain(*alts)))
+        assume(not filter(lambda x: len(x) > 1, itertools.chain(*alts)))
+        assume(not filter(lambda x: len(x) > 1, refs))
         # needed because  ACGT -> N
-        assume(not filter(lambda x: len(x) > 3, alts))
         expectedNs = len(filter(lambda x: x.DP < 10, muts))  + originalNs
         result = just_ref([ref], muts, 10, 80)
         self.assertEquals(countof('N')(result), expectedNs)
@@ -134,7 +136,7 @@ def test_more_or_equal_ns_with_lower_threshold(self, ref_and_muts, n1, n2):
         cons1 = just_ref([ref], muts, n1, 80)
         cons2 = just_ref([ref], muts, n2, 80)
         nsCount1, nsCount2 = countof('N')(cons1), countof('N')(cons2)
-        self.assertLessEqual(nsCount1, nsCount2) 
+        self.assertLessEqual(nsCount1, nsCount2)
 
     @given(ref_with_vcf_dicts_strategy)
     def test_consensus_from_consensus_contains_more_alts(self, ref_and_muts):
@@ -147,7 +149,7 @@ def test_consensus_from_consensus_contains_more_alts(self, ref_and_muts):
         picked_alts = map(get(1), alts)
         altCounts1 = sum(map(lambda f: f(cons1),  map(countof, picked_alts)))
         altCounts2 = sum(map(lambda f: f(cons2),  map(countof, picked_alts)))
-        self.assertLessEqual(altCounts1, altCounts2) 
+        self.assertLessEqual(altCounts1, altCounts2)
 
 
         #NOTE: the below test appears to be meaningless,

From bb7f93734e53e714b3a2057094dab1d587894268 Mon Sep 17 00:00:00 2001
From: Panciera <michael.panciera@amedwrair024635.amed.ds.army.mil>
Date: Fri, 4 Mar 2016 13:51:19 -0500
Subject: [PATCH 09/10] add support for trimming reference

---
 bioframework/consensus.py | 18 +++++++++++++++---
 mypy/sh.py                |  3 +++
 2 files changed, 18 insertions(+), 3 deletions(-)
 create mode 100644 mypy/sh.py

diff --git a/bioframework/consensus.py b/bioframework/consensus.py
index c6384ea..84bfdf4 100755
--- a/bioframework/consensus.py
+++ b/bioframework/consensus.py
@@ -14,14 +14,17 @@
 from functools import partial
 from itertools import ifilter, imap, groupby, takewhile, repeat, starmap, izip_longest
 import os, sys
+import collections
+
 from typing import Tuple, Dict, List, Iterator, Iterable, Any, Callable, NamedTuple, BinaryIO
 
 from Bio import SeqIO #done
 from Bio.SeqRecord import SeqRecord #done
 import vcf #done
 from vcf.model import _Record
+import sh #todo
 #from toolz import compose
-from toolz.dicttoolz import merge, dissoc, merge_with, valfilter #todo
+from toolz.dicttoolz import merge, dissoc, merge_with, valfilter #done
 from docopt import docopt #ignore
 from schema import Schema, Use #ignore
 #from contracts import contract, new_contract #can ignore
@@ -36,8 +39,6 @@
                      ('chrom',str),
                      ('pos', int),
                      ('alt', List[str])])
-#VcfRow = namedtuple("VcfRow", VCFRow._fields) # type: (*Any) -> VCFRow
-
 AMBIGUITY_TABLE = { 'A': 'A', 'T': 'T', 'G': 'G', 'C': 'C', 'N': 'N', 'AC': 'M', 'AG': 'R', 'AT': 'W', 'CG': 'S', 'CT': 'Y', 'GT': 'K', 'ACG': 'V', 'ACT': 'H', 'AGT': 'D', 'CGT': 'B', 'ACGT': 'N' }
 
 MAJORITY_PERCENTAGE = 80
@@ -192,6 +193,17 @@ def single_consensus(muts, ref):
 def consensus_str(ref, consensus): # type: (SeqRecord, str) -> str
     return ">{0}:Consensus\n{1}".format(ref.id, consensus)
 
+def zero_coverage_positions(bam_file, ref_file): # type: (str, str) -> Iterable[int]
+    pileup = sh.Command('mpileup')(bam_file, f=ref_file, _iter=True)
+    get_pos = lambda x: int(x.split()[1]) # type: Callable[[str],int]
+    return imap(get_pos, pileup)
+
+#TODO: is pileup 0-based or 1-based index?
+def trim_ref(ref, positions): # type: (str, Iterator[int]) -> str
+    start, end = next(positions), collections.deque(positions, 1)[0]
+    return '-'*start + ref[:start:end] + '-'*(len(ref) - end)
+
+
 
 #@contract(ref_fasta=str, vcf=str, mind=int, majority=int)
 def run(ref_fasta, freebayes_vcf, outfile, mind, majority):
diff --git a/mypy/sh.py b/mypy/sh.py
new file mode 100644
index 0000000..ee8e4e9
--- /dev/null
+++ b/mypy/sh.py
@@ -0,0 +1,3 @@
+from typing import Callable, Any, Union, List, Iterator
+def Command(s): # type: (str) -> Callable[...,Union[List[str],Iterator[str]]]
+    pass

From 272a115ed63eaf7faa8452bf359ea081639c1eed Mon Sep 17 00:00:00 2001
From: Mike Panciera <michael.panciera.work@gmail.com>
Date: Fri, 4 Mar 2016 14:14:32 -0500
Subject: [PATCH 10/10] fix readme formatting

---
 mypy/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mypy/README.md b/mypy/README.md
index aec5641..26906fb 100644
--- a/mypy/README.md
+++ b/mypy/README.md
@@ -1,6 +1,6 @@
 1. Install [mypy](https://github.com/python/mypy#quick-start)
 
-2. Run mypy MYPYPATH=$PWD/mypy:$PWD/mypy/out  mypy --py2 bioframework/consensus.py
+2. Run mypy: `MYPYPATH=$PWD/mypy:$PWD/mypy/out  mypy --py2 bioframework/consensus.py`
 
 If needed, uses `stubgen` to generate more stub files for other libraries.