-
Notifications
You must be signed in to change notification settings - Fork 23
/
process_data.py
executable file
·42 lines (36 loc) · 1.26 KB
/
process_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import csv
import sys
import os
import numpy as np
import json
import argparse
from prody import *
from sidechainnet.utils.measure import *
from tqdm import tqdm
def tocdr(resseq):
if 27 <= resseq <= 38:
return '1'
elif 56 <= resseq <= 65:
return '2'
elif 105 <= resseq <= 117:
return '3'
else:
return '0'
if __name__ == "__main__":
pdb_id, hchain, achain = sys.argv[1:4]
hchain = parsePDB(pdb_id, model=1, chain=hchain)
_, hcoords, hseq, _, _ = get_seq_coords_and_angles(hchain)
hcdr = ''.join([tocdr(res.getResnum()) for res in hchain.iterResidues()])
hcdr = hcdr[:len(hseq)]
hcoords = hcoords.reshape((len(hseq), 14, 3))
hcoords = eval(np.array2string(hcoords, separator=',', threshold=np.inf, precision=3, suppress_small=True))
achain = parsePDB(pdb_id, model=1, chain=achain)
_, acoords, aseq, _, _ = get_seq_coords_and_angles(achain)
acoords = acoords.reshape((len(aseq), 14, 3))
acoords = eval(np.array2string(acoords, separator=',', threshold=np.inf, precision=3, suppress_small=True))
s = json.dumps({
"pdb": pdb_id,
"antibody_seq": hseq, "antibody_cdr": hcdr, "antibody_coords": hcoords,
"antigen_seq": aseq, "antigen_coords": acoords,
})
print(s)