Skip to content

Commit

Permalink
typing for Assembly.__init__ and Assembly.__repr__
Browse files Browse the repository at this point in the history
  • Loading branch information
Kawin committed Oct 6, 2024
1 parent 9986318 commit d5528aa
Showing 1 changed file with 58 additions and 38 deletions.
96 changes: 58 additions & 38 deletions src/pydna/assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
graph.
"""
import os as _os
from Bio.SeqFeature import SeqFeature as _SeqFeature
from Bio.SeqFeature import ExactPosition as _ExactPosition
from Bio.SeqFeature import SimpleLocation as _SimpleLocation
from Bio.SeqFeature import CompoundLocation as _CompoundLocation
Expand All @@ -51,12 +52,19 @@
# from pydna.utils import memorize as _memorize
from pydna._pretty import pretty_str as _pretty_str
from pydna.contig import Contig as _Contig
from pydna.common_sub_strings import common_sub_strings
from pydna.common_sub_strings import common_sub_strings, Match as _Match

from pydna.common_sub_strings import terminal_overlap
from pydna.dseqrecord import Dseqrecord as _Dseqrecord
import networkx as _nx

from copy import deepcopy as _deepcopy
from typing import (
Callable as _Callable,
Dict as _Dict,
List as _List,
NamedTuple as _NamedTuple,
TypedDict as _TypedDict,
)
import itertools as _itertools
import logging as _logging

Expand All @@ -67,9 +75,6 @@
_module_logger = _logging.getLogger("pydna." + __name__)


terminal_overlap


class Assembly(object): # , metaclass=_Memoize):
"""Assembly of a list of linear DNA fragments into linear or circular
constructs. The Assembly is meant to replace the Assembly method as it
Expand Down Expand Up @@ -115,36 +120,37 @@ class Assembly(object): # , metaclass=_Memoize):
"""

def __init__(self, frags=None, limit=25, algorithm=common_sub_strings):
def __init__(
self,
frags: _List[_Dseqrecord],
limit: int = 25,
algorithm: _Callable[[str, str, int], _List[_Match]] = common_sub_strings,
) -> None:
# Fragments is a string subclass with some extra properties
# The order of the fragments has significance
fragments = []
for f in frags:
fragments.append(
{
"upper": str(f.seq).upper(),
"mixed": str(f.seq),
"name": f.name,
"features": f.features,
"nodes": [],
}
)
fragments: _List[_FragmentDict] = [
{
"upper": str(f.seq).upper(),
"mixed": str(f.seq),
"name": f.name,
"features": f.features,
"nodes": [],
}
for f in frags
]

# rcfragments is a dict with fragments as keys and the reverse
# complement as value
rcfragments = dict(
(
f["mixed"],
{
"upper": str(frc.seq).upper(),
"mixed": str(frc.seq),
"name": frc.name,
"features": frc.features,
"nodes": [],
},
)
rcfragments: _Dict[str, _FragmentDict] = {
f["mixed"]: {
"upper": str(frc.seq).upper(),
"mixed": str(frc.seq),
"name": frc.name,
"features": frc.features,
"nodes": [],
}
for f, frc in zip(fragments, (f.rc() for f in frags))
)
}
# The nodemap dict holds nodes and their reverse complements
nodemap = {
"begin": "end",
Expand Down Expand Up @@ -175,8 +181,8 @@ def __init__(self, frags=None, limit=25, algorithm=common_sub_strings):
# case.
node = first["upper"][start_in_first : start_in_first + length]

first["nodes"].append((start_in_first, length, node))
secnd["nodes"].append((start_in_secnd, length, node))
first["nodes"].append(_NodeTuple(start_in_first, length, node))
secnd["nodes"].append(_NodeTuple(start_in_secnd, length, node))

# The same node exists between the reverse complements of
# first and secnd
Expand All @@ -187,25 +193,25 @@ def __init__(self, frags=None, limit=25, algorithm=common_sub_strings):
start_in_secrc = len(secnd["upper"]) - start_in_secnd - length
# noderc is the reverse complement of node
noderc = firrc["upper"][start_in_firrc : start_in_firrc + length]
firrc["nodes"].append((start_in_firrc, length, noderc))
secrc["nodes"].append((start_in_secrc, length, noderc))
firrc["nodes"].append(_NodeTuple(start_in_firrc, length, noderc))
secrc["nodes"].append(_NodeTuple(start_in_secrc, length, noderc))
nodemap[node] = noderc

# first is also compared to the rc of secnd
matches = algorithm(first["upper"], secrc["upper"], limit)

for start_in_first, start_in_secrc, length in matches:
node = first["upper"][start_in_first : start_in_first + length]
first["nodes"].append((start_in_first, length, node))
secrc["nodes"].append((start_in_secrc, length, node))
first["nodes"].append(_NodeTuple(start_in_first, length, node))
secrc["nodes"].append(_NodeTuple(start_in_secrc, length, node))

start_in_firrc, start_in_secnd = (
len(first["upper"]) - start_in_first - length,
len(secnd["upper"]) - start_in_secrc - length,
)
noderc = firrc["upper"][start_in_firrc : start_in_firrc + length]
firrc["nodes"].append((start_in_firrc, length, noderc))
secnd["nodes"].append((start_in_secnd, length, noderc))
firrc["nodes"].append(_NodeTuple(start_in_firrc, length, noderc))
secnd["nodes"].append(_NodeTuple(start_in_secnd, length, noderc))
nodemap[node] = noderc

# A directed graph class that can store multiedges.
Expand Down Expand Up @@ -475,7 +481,7 @@ def assemble_circular(self, length_bound=None):
reverse=True,
)

def __repr__(self):
def __repr__(self) -> _pretty_str:
# https://pyformat.info
return _pretty_str(
"Assembly\n"
Expand Down Expand Up @@ -511,6 +517,20 @@ def __repr__(self):
)


class _NodeTuple(_NamedTuple):
start: int
length: int
shared_seq: str # uppercase


class _FragmentDict(_TypedDict):
upper: str
mixed: str
name: str
features: _List[_SeqFeature]
nodes: _List[_NodeTuple]


if __name__ == "__main__":
import os as _os

Expand Down

0 comments on commit d5528aa

Please sign in to comment.