-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathatlas.py
37 lines (31 loc) · 1.06 KB
/
atlas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from nomic import atlas
import numpy as np
import torch
from Bio.SeqUtils.ProtParam import ProteinAnalysis
sstrans = str.maketrans('EMALKNPGSDVIYFWLT', 'HHHHHTTTTTSSSSSSS')
def protein_info(seq: str) -> dict[str]:
pa = ProteinAnalysis(seq.replace('X', '').replace('B', ''))
return {
'sequence': seq,
'molecular_weight': pa.molecular_weight(),
'aromaticity': pa.aromaticity(),
'instability': pa.instability_index(),
'isoelectric_point': pa.isoelectric_point(),
'secondary_structure': seq.translate(sstrans),
'gravy': pa.gravy(),
}
if __name__ == '__main__':
with open('aa.txt') as f:
sequences = [line[:-1] for line in f]
embeddings = torch.load('structural.pt')
project = atlas.map_embeddings(
embeddings=embeddings.numpy(),
data=[protein_info(seq) for seq in sequences],
name='svm',
id_field='sequence',
#reset_project_if_exists=True,
add_datums_if_exists=True,
build_topic_model=True,
is_public=True,
)
print(project)