-
Notifications
You must be signed in to change notification settings - Fork 0
/
cv.py
26 lines (21 loc) · 844 Bytes
/
cv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import numpy as np
from statsmodels import robust
def d_func(v):
a, b = v
return a - b
def calculate_cv(data, probe):
"""
Accept a pandas data frame with probe names as columns, PNs as rows.
Each PN should occur twice
Total assay CV is computed by randomizing the measurements before filtering to duplicate PNs.
Returns CV based on the MAD estimator and the pairs of measurements for the PN
Relative CV is defined as repeated CV / total CV
"""
probe_data = data[['PN', probe]]
pns_with_missing_data = probe_data[probe_data.isnull().any(axis=1)]['PN']
subdata = data[~data['PN'].isin(pns_with_missing_data)][['PN', probe]]
d = subdata.groupby('PN').agg(d_func)
lib_mad = robust.mad(d, c=1)
mad = lib_mad[0]
val = 100 * np.sqrt(np.exp((np.log(2) * mad)**2) - 1)
return val