Skip to content

Commit

Permalink
current
Browse files Browse the repository at this point in the history
  • Loading branch information
lilyminium committed Jan 15, 2021
1 parent 4aa7f5e commit ac94428
Show file tree
Hide file tree
Showing 11 changed files with 264 additions and 125 deletions.
7 changes: 3 additions & 4 deletions package/MDAnalysis/analysis/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,6 @@ def _fit_to(mobile_coordinates, ref_coordinates, mobile_atoms,
"""
R, min_rmsd = rotation_matrix(mobile_coordinates, ref_coordinates,
weights=weights)

mobile_atoms.translate(-mobile_com)
mobile_atoms.rotate(R)
mobile_atoms.translate(ref_com)
Expand Down Expand Up @@ -629,9 +628,9 @@ def __init__(self, mobile, reference, select='all', filename=None,
"""
select = rms.process_selection(select)
self.ref_atoms = reference.select_atoms(*select['reference'])
self.mobile_atoms = mobile.select_atoms(*select['mobile'])
if in_memory or isinstance(mobile.trajectory, MemoryReader):
self.ref_atoms = reference.select_atoms(*select['reference']).atoms
self.mobile_atoms = mobile.select_atoms(*select['mobile']).atoms
if in_memory or not isinstance(mobile.trajectory, MemoryReader):
mobile.transfer_to_memory()
filename = None
logger.info("Moved mobile trajectory to in-memory representation")
Expand Down
6 changes: 2 additions & 4 deletions package/MDAnalysis/analysis/clustering/cluster_methods.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,12 @@ def affinity_propagation(similarity, preference, float lam, int max_iter, int co

similarity[np.diag_indices(cn)] = preference
indices = np.tril_indices(cn)

# print(similarity)
# print(np.tril(similarity))


cdef np.ndarray[np.float32_t, ndim=1] sim = np.ravel(similarity[indices]).astype(np.float32)
# sim = np.ascontiguousarray(sim)

print(similarity[-2:])
print(sim)

cdef np.ndarray[long, ndim=1] clusters = np.zeros((cn), dtype=long)

Expand Down
5 changes: 2 additions & 3 deletions package/MDAnalysis/analysis/diffusionmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,13 +231,12 @@ def __init__(self, u, select='all', metric=rmsd, cutoff=1E0-5,
Show detailed progress of the calculation if set to ``True``; the
default is ``False``.
"""
self._u = u
self.atoms = u.select_atoms(select).atoms
self._u = self.atoms.universe
traj = self._u.trajectory

# remember that this must be called before referencing self.n_frames
super(DistanceMatrix, self).__init__(self._u.trajectory, **kwargs)

self.atoms = self._u.select_atoms(select).atoms
self._metric = metric
self._cutoff = cutoff
self._weights = weights
Expand Down
10 changes: 9 additions & 1 deletion package/MDAnalysis/analysis/encore/covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,14 @@ def ml_covariance_estimator(coordinates, reference_coordinates=None):
else:
# Normal covariance calculation: distance to the average
coordinates_offset = coordinates - np.average(coordinates, axis=0)
# print(coordinates_offset[-10:])

# Calculate covariance manually
coordinates_cov = np.zeros((coordinates.shape[1],
coordinates.shape[1]))
for frame in coordinates_offset:
coordinates_cov += np.outer(frame, frame)
# print("cov", coordinates_cov[:10])
coordinates_cov /= coordinates.shape[0]

return coordinates_cov
Expand Down Expand Up @@ -121,6 +123,7 @@ def shrinkage_covariance_estimator( coordinates,
# Call maximum likelihood estimator (note the additional column)
sample = ml_covariance_estimator(np.hstack([x, xmkt[:, np.newaxis]]), 0)
sample *= (t-1)/float(t)
# print("sample", sample[:10])


# Split covariance matrix into components
Expand All @@ -131,6 +134,7 @@ def shrinkage_covariance_estimator( coordinates,
# Prior
prior = np.outer(covmkt, covmkt)/varmkt
prior[np.ma.make_mask(np.eye(n))] = np.diag(sample)


# If shrinkage parameter is not set, estimate it
if shrinkage_parameter is None:
Expand Down Expand Up @@ -162,7 +166,10 @@ def shrinkage_covariance_estimator( coordinates,
# Shrinkage constant
k = (p-r)/c
shrinkage_parameter = max(0, min(1, k/float(t)))

# print(prior[0])
# print("shrinkage_parameter", shrinkage_parameter)
# raise ValueError()

# calculate covariance matrix
sigma = shrinkage_parameter*prior+(1-shrinkage_parameter)*sample

Expand Down Expand Up @@ -222,6 +229,7 @@ def covariance_matrix(ensemble,
reference_coordinates = reference_coordinates.flatten()

sigma = estimator(coordinates, reference_coordinates)
# print(sigma[:5, :5])

# Optionally correct with weights
if weights is not None:
Expand Down
26 changes: 14 additions & 12 deletions package/MDAnalysis/analysis/encore/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,6 @@ def harmonic_ensemble_similarity(sigma1,
# Difference between average vectors
d_avg = x1 - x2



# Distance measure
trace = np.trace(np.dot(sigma1, sigma2_inv) +
np.dot(sigma2, sigma1_inv)
Expand All @@ -296,16 +294,13 @@ def harmonic_ensemble_similarity(sigma1,
d_hes = 0.25 * (np.dot(np.transpose(d_avg),
np.dot(sigma1_inv + sigma2_inv,
d_avg)) + trace)
print(d_avg.shape)
print((sigma1_inv + sigma2_inv).shape)
print((np.dot(sigma1_inv + sigma2_inv,
d_avg)).shape)
print(np.dot(sigma1, sigma2_inv).shape)
print((np.dot(np.transpose(d_avg),
np.dot(sigma1_inv + sigma2_inv,
d_avg)).shape))
# print((sigma1_inv + sigma2_inv)[:10])
print((np.dot(sigma1, sigma2_inv) +
np.dot(sigma2, sigma1_inv))[:10])
print(trace)
raise ValueError()
# print(sigma1[:5, :5])
# print(sigma2[:5, :5])
# print((sigma1_inv + sigma2_inv).dtype)
return d_hes


Expand Down Expand Up @@ -562,7 +557,12 @@ def dimred_ensemble_similarity(kde1, resamples1, kde2, resamples2,
ln_P1P2_exp_P1 = np.average(np.log(
0.5 * (kde1.evaluate(resamples1) + kde2.evaluate(resamples1))))
ln_P1P2_exp_P2 = np.average(np.log(
0.5 * (kde1.evaluate(resamples2) + kde2.evaluate(resamples2))))
0.5 * (kde1.evaluate(resamples2) +
kde2.evaluate(resamples2))))

print(ln_P1_exp_P1)
print(ln_P2_exp_P2)
print(ln_P1P2_exp_P1+ln_P1P2_exp_P2)

return 0.5 * (
ln_P1_exp_P1 - ln_P1P2_exp_P1 + ln_P2_exp_P2 - ln_P1P2_exp_P2)
Expand Down Expand Up @@ -952,6 +952,7 @@ def hes(ensembles,
sigma2=sigmas[j])
values[i, j] = value
values[j, i] = value


# Save details as required
details = {}
Expand Down Expand Up @@ -1495,6 +1496,7 @@ def dres(ensembles,
if allow_collapsed_result and not hasattr(dimensionality_reduction_method,
'__iter__'):
values = values[0]
raise ValueError()

return values, details

Expand Down
76 changes: 53 additions & 23 deletions package/MDAnalysis/analysis/reencore/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from scipy.stats import gaussian_kde

from ...core.ensemble import Ensemble
from .. import align
from ..align import AlignTraj
from ..pca import PCA
from . import utils

def clusters_to_indices(clusters, outlier_label=-1, outlier_index=-1):
Expand Down Expand Up @@ -38,39 +39,50 @@ def ces(ensemble, clusters, outlier_label=-1, outlier_index=-1):
outlier_i = n_cl # marker for new outlier columns
for row, data in zip(frames_per_cl, ensemble.split_array(i_labels)):
labels_, counts_ = np.unique(data, return_counts=True)
# treat outliers as individual clusters
if labels_[0] == outlier_index:
n_outlier_ = counts_[0]
row[outlier_i:outlier_i + n_outlier_] = 1
outlier_i += n_outlier_
labels_ = labels_[1:]
counts_ = counts_[1:]

# normalise over number of frames
row[labels_] = counts_
row /= len(data)
print(clusters[:20])
print(clusters[20:])
print(frames_per_cl)


row /= len(data)
return utils.discrete_js_matrix(frames_per_cl)


def dres(ensemble, subspace, n_components=None, n_resample=1e3):
def dres(ensemble, subspace=PCA, n_components=3, n_resample=1000,
start=None, stop=None, step=None, **kwargs):
if isinstance(subspace, type):
subspace = subspace(ensemble, **kwargs)
if isinstance(subspace, PCA):
if not subspace._calculated:
subspace.run(start=start, stop=stop, step=step)
subspace = subspace.transform(subspace._atoms,
n_components=n_components,
start=start, stop=stop,
step=step)
if n_components is not None:
subspace = subspace[:, :n_components]

n_u = ensemble.n_universes
kdes = [gaussian_kde(x.T) for x in ensemble.split_array(subspace)]
resamples = np.concatenate([k.resample(size=n_resample) for k in kdes],
axis=1) # (n_dim, n_u*n_samples)
pdfs = np.array([k.evaluate(resamples) for k in kdes])
pdfs = pdfs.reshape((n_u, n_resample))
logpdfs = np.broadcast_to(np.log(pdfs).mean(axis=1), (n_u, n_u))

pdfs_ = np.broadcast_to(pdfs, (n_u, n_u, n_resample))
sum_pdfs = pdfs_ + np.transpose(pdfs_, axes=(1, 0, 2))
ln_pq_exp_pq = np.log(0.5 * sum_pdfs).mean(axis=-1)
resamples = [k.resample(size=n_resample) for k in kdes]
# resamples = np.concatenate([k.resample(size=n_resample) for k in kdes],
# axis=1) # (n_dim, n_u*n_samples)
# pdfs = np.array([k.evaluate(resamples) for k in kdes])
# pdfs = np.array(np.split(pdfs, n_u, axis=1)) #pdfs.reshape((n_u, n_u, n_resample))
pdfs = np.zeros((n_u, n_u, n_resample))
for i, k in enumerate(kdes):
pdfs[i] = [k.evaluate(x) for x in resamples]
logpdfs = np.log(pdfs).mean(axis=-1)
pdfsT = pdfs.transpose((1, 0, 2))

sum_pdfs = pdfs + pdfsT
ln_pq_exp_pq = np.log(0.5 * (pdfs + pdfsT)).mean(axis=-1)
print(pdfs.shape)
print(logpdfs)

return 0.5 * (logpdfs + logpdfs.T - ln_pq_exp_pq - ln_pq_exp_pq.T)

Expand Down Expand Up @@ -116,7 +128,8 @@ def hes(ensemble, weights="mass", estimator="shrinkage", align=False,

ensemble.transfer_to_memory()
if align:
align.AlignTraj(ensemble, ensemble, weights=weights[0]).run()
AlignTraj(ensemble, ensemble, weights=weights[0],
in_memory=True).run()

frames = [u.trajectory.timeseries(ag, order="fac")
for ag, u in zip(ensemble._ags, ensemble.universes)]
Expand All @@ -136,24 +149,41 @@ def hes(ensemble, weights="mass", estimator="shrinkage", align=False,
for i, (coords, w) in enumerate(zip(frames[s], weights3)):
avgs[s, i] = coords.mean(axis=0).flatten()
cov = estimator(coords.reshape(len(coords), -1))
# print(cov[:5, :5])
try:
cov = np.dot(w, np.dot(cov, w))
except ValueError:
raise ValueError("weights dimensions don't match selected atoms")
# print(cov[:5, :5])
covs[s, i] = cov

inv_covs = np.zeros((n_s, n_u, n_u, n_a3, n_a3))
for i, sub in enumerate(covs):
for j, arr in enumerate(sub):
inv_covs[i, j] = np.linalg.pinv(arr[0])
# if j == 0:
# print(arr[0])
# print(inv_covs[i, j])

# print(avgs.shape)
diff = avgs - avgs.transpose((0, 2, 1, 3))
cov_prod = covs @ inv_covs

# print(avgs[0, 0, 0][:10])
# print(avgs[0, 1, 0][:10])
# print((avgs[0, 0, 0] - avgs[0, 1, 0])[:10])

# print(diff[0][0, 1][:20])

cov_prod = covs @ inv_covs.transpose((0, 2, 1, 3, 4))
cov_prod += cov_prod.transpose((0, 2, 1, 3, 4))
trace = np.trace(cov_prod, axis1=-2, axis2=-1) - (2 * n_a3)
# print(cov_prod[0, 0, 1, : 10])
# trace = np.trace(cov_prod - 2 * np.identity(n_a3), axis1=-1, axis2=-2)
trace = np.trace(cov_prod, axis1=-1, axis2=-2) - (2 * n_a3)
# print(trace)

inv_cov_ = inv_covs + inv_covs.transpose((0, 2, 1, 3, 4))
prod = np.einsum('ijklm,ijkl->ijkl', inv_covs, diff)
# print(inv_cov_.dtype)
prod = np.einsum('ijklm,ijkl->ijkl', inv_cov_, diff)
similarity = np.einsum('ijkl,ijkl->ijk', diff, prod)
similarity = 0.25 * (similarity + trace)
if estimate_error:
Expand Down
14 changes: 10 additions & 4 deletions package/MDAnalysis/analysis/reencore/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,11 @@ def discrete_js_matrix(matrix):
def max_likelihood_covariance(coordinates, center=True):
if center:
coordinates -= coordinates.mean(axis=0)

cov = np.einsum('ij,ik->jk', coordinates, coordinates)
_, y = coordinates.shape
cov = np.zeros((y, y))
for frame in coordinates:
cov += np.outer(frame, frame)
# np.einsum('ij,ik->jk', coordinates, coordinates, out=cov)
cov /= coordinates.shape[0]
return cov

Expand All @@ -110,7 +113,7 @@ def shrinkage_covariance(coordinates, shrinkage_parameter=None):

if shrinkage_parameter is None:
c = np.linalg.norm(sample - prior, ord="fro") ** 2
inv_t = 1/len(offset)
inv_t = 1/t
y = offset ** 2
p = inv_t * (y.T @ y).sum() - (sample ** 2).sum()
rdiag = inv_t * (y ** 2).sum() - np.trace(sample ** 2)
Expand All @@ -126,7 +129,10 @@ def shrinkage_covariance(coordinates, shrinkage_parameter=None):
k = (p - r) / c
shrinkage_parameter = max(0, min(1, k * inv_t))

return shrinkage_parameter * prior + (1 - shrinkage_parameter) * sample
# print("shrinkage_parameter", shrinkage_parameter)

cov = shrinkage_parameter * prior + (1 - shrinkage_parameter) * sample
return cov


def get_bootstrap_frames(frames, n_samples=100):
Expand Down
Loading

0 comments on commit ac94428

Please sign in to comment.