Skip to content

Commit

Permalink
reformatting;
Browse files Browse the repository at this point in the history
  • Loading branch information
ialarmedalien committed Jan 10, 2025
1 parent 43bad96 commit 0e785dc
Show file tree
Hide file tree
Showing 4 changed files with 333 additions and 248 deletions.
30 changes: 19 additions & 11 deletions genome_loader_scripts/calculate_hash.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,41 @@
from collections.abc import Iterable
"""Utils for calculating hashes."""

import hashlib

DEFAULT_SPLIT = " "

def _hash_string(s):

def _hash_string(s) -> str:
return hashlib.sha256(s.encode("utf-8")).hexdigest()


class HashSeq(str):
def __new__(cls, v):
instance = super().__new__(cls, v.upper())
return instance
def __new__(cls, v) -> "HashSeq":
return super().__new__(cls, v.upper())

@property
def hash_value(self):
return _hash_string(self)


class HashSeqList(list):
def append(self, o):
if isinstance(o, str):
super().append(HashSeq(o))
elif isinstance(o, HashSeq):
super().append(o)
else:
raise ValueError("bad type")
err_msg = f"Invalid type: {type(o)}"
raise TypeError(err_msg)

@property
def hash_value(self):
h_list = [x.hash_value for x in self]
hash_seq = "_".join(sorted(h_list))
return _hash_string(hash_seq)

def extract_features(faa_str, split=DEFAULT_SPLIT, h_func=None):

def extract_features(faa_str: str, split: str = DEFAULT_SPLIT, h_func=None):
features = []
active_seq = None
seq_lines = []
Expand All @@ -57,33 +62,36 @@ def extract_features(faa_str, split=DEFAULT_SPLIT, h_func=None):
features.append(active_seq)
return features

def read_fasta2(f, split=DEFAULT_SPLIT, h_func=None):

def read_fasta2(f: str, split: str = DEFAULT_SPLIT, h_func=None):
if f.endswith(".gz"):
import gzip

with gzip.open(f, "rb") as fh:
return extract_features(fh.read().decode("utf-8"), split, h_func)
else:
with open(f, "r") as fh:
return extract_features(fh.read(), split, h_func)


class Feature:
def __init__(self, feature_id, sequence, description=None, aliases=None):
def __init__(self, feature_id, sequence, description=None, aliases=None) -> None:
self.id = feature_id
self.seq = sequence
self.description = description
self.ontology_terms = {}
self.aliases = aliases

def add_ontology_term(self, ontology_term, value):
def add_ontology_term(self, ontology_term, value) -> None:
if ontology_term not in self.ontology_terms:
self.ontology_terms[ontology_term] = []
if value not in self.ontology_terms[ontology_term]:
self.ontology_terms[ontology_term].append(value)


def contig_set_hash(features):
hl = HashSeqList()
for contig in features:
seq = HashSeq(contig.seq)
hl.append(seq)
return hl.hash_value

Loading

0 comments on commit 0e785dc

Please sign in to comment.