Skip to content

Commit

Permalink
black
Browse files Browse the repository at this point in the history
  • Loading branch information
xiki-tempula committed Dec 6, 2022
1 parent 06c1de0 commit e950af2
Show file tree
Hide file tree
Showing 37 changed files with 2,053 additions and 1,476 deletions.
125 changes: 73 additions & 52 deletions src/alchemlyb/convergence/convergence.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,16 @@
import logging
from warnings import warn

import pandas as pd
import numpy as np
import pandas as pd

from ..estimators import BAR, TI, FEP_ESTIMATORS, TI_ESTIMATORS
from ..estimators import AutoMBAR as MBAR
from .. import concat
from ..estimators import FEP_ESTIMATORS, TI_ESTIMATORS
from ..postprocessors.units import to_kT


def forward_backward_convergence(df_list, estimator='MBAR', num=10, **kwargs):
'''Forward and backward convergence of the free energy estimate.
def forward_backward_convergence(df_list, estimator="MBAR", num=10, **kwargs):
"""Forward and backward convergence of the free energy estimate.
Generate the free energy estimate as a function of time in both directions,
with the specified number of equally spaced points in the time
Expand Down Expand Up @@ -69,16 +68,17 @@ def forward_backward_convergence(df_list, estimator='MBAR', num=10, **kwargs):
The default for using ``estimator='MBAR'`` was changed from
:class:`~alchemlyb.estimators.MBAR` to :class:`~alchemlyb.estimators.AutoMBAR`.
'''
logger = logging.getLogger('alchemlyb.convergence.'
'forward_backward_convergence')
logger.info('Start convergence analysis.')
logger.info('Check data availability.')
"""
logger = logging.getLogger("alchemlyb.convergence." "forward_backward_convergence")
logger.info("Start convergence analysis.")
logger.info("Check data availability.")
if estimator.upper() != estimator:
warn("Using lower-case strings for the 'estimator' kwarg in "
"convergence.forward_backward_convergence() is deprecated in "
"1.0.0 and only upper case will be accepted in 2.0.0",
DeprecationWarning)
warn(
"Using lower-case strings for the 'estimator' kwarg in "
"convergence.forward_backward_convergence() is deprecated in "
"1.0.0 and only upper case will be accepted in 2.0.0",
DeprecationWarning,
)
estimator = estimator.upper()

if estimator not in (FEP_ESTIMATORS + TI_ESTIMATORS):
Expand All @@ -88,61 +88,77 @@ def forward_backward_convergence(df_list, estimator='MBAR', num=10, **kwargs):
else:
# select estimator class by name
estimator_fit = globals()[estimator](**kwargs).fit
logger.info(f'Use {estimator} estimator for convergence analysis.')
logger.info(f"Use {estimator} estimator for convergence analysis.")

logger.info('Begin forward analysis')
logger.info("Begin forward analysis")
forward_list = []
forward_error_list = []
for i in range(1, num + 1):
logger.info('Forward analysis: {:.2f}%'.format(100 * i / num))
logger.info("Forward analysis: {:.2f}%".format(100 * i / num))
sample = []
for data in df_list:
sample.append(data[:len(data) // num * i])
sample.append(data[: len(data) // num * i])
sample = concat(sample)
result = estimator_fit(sample)
forward_list.append(result.delta_f_.iloc[0, -1])
if estimator.lower() == 'bar':
error = np.sqrt(sum(
[result.d_delta_f_.iloc[i, i + 1] ** 2
for i in range(len(result.d_delta_f_) - 1)]))
if estimator.lower() == "bar":
error = np.sqrt(
sum(
[
result.d_delta_f_.iloc[i, i + 1] ** 2
for i in range(len(result.d_delta_f_) - 1)
]
)
)
forward_error_list.append(error)
else:
forward_error_list.append(result.d_delta_f_.iloc[0, -1])
logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1],
forward_error_list[-1]))
logger.info(
"{:.2f} +/- {:.2f} kT".format(forward_list[-1], forward_error_list[-1])
)

logger.info('Begin backward analysis')
logger.info("Begin backward analysis")
backward_list = []
backward_error_list = []
for i in range(1, num + 1):
logger.info('Backward analysis: {:.2f}%'.format(100 * i / num))
logger.info("Backward analysis: {:.2f}%".format(100 * i / num))
sample = []
for data in df_list:
sample.append(data[-len(data) // num * i:])
sample.append(data[-len(data) // num * i :])
sample = concat(sample)
result = estimator_fit(sample)
backward_list.append(result.delta_f_.iloc[0, -1])
if estimator.lower() == 'bar':
error = np.sqrt(sum(
[result.d_delta_f_.iloc[i, i + 1] ** 2
for i in range(len(result.d_delta_f_) - 1)]))
if estimator.lower() == "bar":
error = np.sqrt(
sum(
[
result.d_delta_f_.iloc[i, i + 1] ** 2
for i in range(len(result.d_delta_f_) - 1)
]
)
)
backward_error_list.append(error)
else:
backward_error_list.append(result.d_delta_f_.iloc[0, -1])
logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1],
backward_error_list[-1]))
logger.info(
"{:.2f} +/- {:.2f} kT".format(backward_list[-1], backward_error_list[-1])
)

convergence = pd.DataFrame(
{'Forward': forward_list,
'Forward_Error': forward_error_list,
'Backward': backward_list,
'Backward_Error': backward_error_list,
'data_fraction': [i / num for i in range(1, num + 1)]})
{
"Forward": forward_list,
"Forward_Error": forward_error_list,
"Backward": backward_list,
"Backward_Error": backward_error_list,
"data_fraction": [i / num for i in range(1, num + 1)],
}
)
convergence.attrs = df_list[0].attrs
return convergence


def _cummean(vals, out_length):
'''The cumulative mean of an array.
"""The cumulative mean of an array.
This function computes the cumulative mean and shapes the result to the
desired length.
Expand All @@ -167,18 +183,19 @@ def _cummean(vals, out_length):
.. versionadded:: 1.0.0
'''
"""
in_length = len(vals)
if in_length < out_length:
out_length = in_length
block = in_length // out_length
reshape = vals[: block*out_length].reshape(block, out_length)
reshape = vals[: block * out_length].reshape(block, out_length)
mean = np.mean(reshape, axis=0)
result = np.cumsum(mean) / np.arange(1, out_length+1)
result = np.cumsum(mean) / np.arange(1, out_length + 1)
return result


def fwdrev_cumavg_Rc(series, precision=0.01, tol=2):
'''Generate the convergence criteria :math:`R_c` for a single simulation.
"""Generate the convergence criteria :math:`R_c` for a single simulation.
The input will be :class:`pandas.Series` generated by
:func:`~alchemlyb.preprocessing.subsampling.decorrelate_u_nk` or
Expand Down Expand Up @@ -241,7 +258,7 @@ def fwdrev_cumavg_Rc(series, precision=0.01, tol=2):
.. _`equation 16`:
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8397498/#FD16
'''
"""
series = to_kT(series)
array = series.to_numpy()
out_length = int(1 / precision)
Expand All @@ -250,9 +267,12 @@ def fwdrev_cumavg_Rc(series, precision=0.01, tol=2):
length = len(g_forward)

convergence = pd.DataFrame(
{'Forward': g_forward,
'Backward': g_backward,
'data_fraction': [i / length for i in range(1, length + 1)]})
{
"Forward": g_forward,
"Backward": g_backward,
"data_fraction": [i / length for i in range(1, length + 1)],
}
)
convergence.attrs = series.attrs

# Final value
Expand All @@ -270,8 +290,9 @@ def fwdrev_cumavg_Rc(series, precision=0.01, tol=2):
# the same as this branch will be triggered.
return 1.0, convergence


def A_c(series_list, precision=0.01, tol=2):
'''Generate the ensemble convergence criteria :math:`A_c` for a set of simulations.
"""Generate the ensemble convergence criteria :math:`A_c` for a set of simulations.
The input is a :class:`list` of :class:`pandas.Series` generated by
:func:`~alchemlyb.preprocessing.subsampling.decorrelate_u_nk` or
Expand Down Expand Up @@ -317,11 +338,11 @@ def A_c(series_list, precision=0.01, tol=2):
.. _`equation 18`:
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8397498/#FD18
'''
logger = logging.getLogger('alchemlyb.convergence.A_c')
"""
logger = logging.getLogger("alchemlyb.convergence.A_c")
n_R_c = len(series_list)
R_c_list = [fwdrev_cumavg_Rc(series, precision, tol)[0] for series in series_list]
logger.info(f'R_c list: {R_c_list}')
logger.info(f"R_c list: {R_c_list}")
# Integrate the R_c_list <= R_c over the range of 0 to 1
array_01 = np.hstack((R_c_list, [0, 1]))
sorted_array = np.sort(np.unique(array_01))
Expand All @@ -330,6 +351,6 @@ def A_c(series_list, precision=0.01, tol=2):
if i == 0:
continue
else:
d_R_c = sorted_array[-i] - sorted_array[-i-1]
d_R_c = sorted_array[-i] - sorted_array[-i - 1]
result += d_R_c * sum(R_c_list <= element) / n_R_c
return result
2 changes: 1 addition & 1 deletion src/alchemlyb/estimators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .mbar_ import MBAR, AutoMBAR
from .bar_ import BAR
from .mbar_ import MBAR, AutoMBAR
from .ti_ import TI

FEP_ESTIMATORS = [MBAR.__name__, AutoMBAR.__name__, BAR.__name__]
Expand Down
52 changes: 32 additions & 20 deletions src/alchemlyb/estimators/bar_.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import numpy as np
import pandas as pd

from sklearn.base import BaseEstimator
from pymbar import BAR as BAR_
from sklearn.base import BaseEstimator

from .base import _EstimatorMixOut


class BAR(BaseEstimator, _EstimatorMixOut):
"""Bennett acceptance ratio (BAR).
Expand Down Expand Up @@ -57,7 +57,13 @@ class BAR(BaseEstimator, _EstimatorMixOut):
"""

def __init__(self, maximum_iterations=10000, relative_tolerance=1.0e-7, method='false-position', verbose=False):
def __init__(
self,
maximum_iterations=10000,
relative_tolerance=1.0e-7,
method="false-position",
verbose=False,
):

self.maximum_iterations = maximum_iterations
self.relative_tolerance = relative_tolerance
Expand Down Expand Up @@ -87,7 +93,10 @@ def fit(self, u_nk):

# group u_nk by lambda states
groups = u_nk.groupby(level=u_nk.index.names[1:])
N_k = [(len(groups.get_group(i)) if i in groups.groups else 0) for i in u_nk.columns]
N_k = [
(len(groups.get_group(i)) if i in groups.groups else 0)
for i in u_nk.columns
]

# Now get free energy differences and their uncertainties for each step
deltas = np.array([])
Expand All @@ -96,19 +105,22 @@ def fit(self, u_nk):
# get us from lambda step k
uk = groups.get_group(self._states_[k])
# get w_F
w_f = uk.iloc[:, k+1] - uk.iloc[:, k]
w_f = uk.iloc[:, k + 1] - uk.iloc[:, k]

# get us from lambda step k+1
uk1 = groups.get_group(self._states_[k+1])
uk1 = groups.get_group(self._states_[k + 1])
# get w_R
w_r = uk1.iloc[:, k] - uk1.iloc[:, k+1]
w_r = uk1.iloc[:, k] - uk1.iloc[:, k + 1]

# now determine df and ddf using pymbar.BAR
df, ddf = BAR_(w_f, w_r,
method=self.method,
maximum_iterations=self.maximum_iterations,
relative_tolerance=self.relative_tolerance,
verbose=self.verbose)
df, ddf = BAR_(
w_f,
w_r,
method=self.method,
maximum_iterations=self.maximum_iterations,
relative_tolerance=self.relative_tolerance,
verbose=self.verbose,
)

deltas = np.append(deltas, df)
d_deltas = np.append(d_deltas, ddf**2)
Expand All @@ -121,14 +133,14 @@ def fit(self, u_nk):
out = []
dout = []
for i in range(len(deltas) - j):
out.append(deltas[i:i + j + 1].sum())
out.append(deltas[i : i + j + 1].sum())

# See https://github.com/alchemistry/alchemlyb/pull/60#issuecomment-430720742
# Error estimate generated by BAR ARE correlated

# Use the BAR uncertainties between two neighbour states
if j == 0:
dout.append(d_deltas[i:i + j + 1].sum())
dout.append(d_deltas[i : i + j + 1].sum())
# Other uncertainties are unknown at this point
else:
dout.append(np.nan)
Expand All @@ -137,14 +149,14 @@ def fit(self, u_nk):
ad_delta += np.diagflat(np.array(dout), k=j + 1)

# yield standard delta_f_ free energies between each state
self._delta_f_ = pd.DataFrame(adelta - adelta.T,
columns=self._states_,
index=self._states_)
self._delta_f_ = pd.DataFrame(
adelta - adelta.T, columns=self._states_, index=self._states_
)

# yield standard deviation d_delta_f_ between each state
self._d_delta_f_ = pd.DataFrame(np.sqrt(ad_delta + ad_delta.T),
columns=self._states_,
index=self._states_)
self._d_delta_f_ = pd.DataFrame(
np.sqrt(ad_delta + ad_delta.T), columns=self._states_, index=self._states_
)
self._delta_f_.attrs = u_nk.attrs
self._d_delta_f_.attrs = u_nk.attrs

Expand Down
9 changes: 5 additions & 4 deletions src/alchemlyb/estimators/base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
class _EstimatorMixOut():
'''This class creates view for the d_delta_f_, delta_f_, states_ for the
estimator class to consume.'''
class _EstimatorMixOut:
"""This class creates view for the d_delta_f_, delta_f_, states_ for the
estimator class to consume."""

_d_delta_f_ = None
_delta_f_ = None
_states_ = None

@property
def d_delta_f_(self):
return self._d_delta_f_
Expand All @@ -15,4 +17,3 @@ def delta_f_(self):
@property
def states_(self):
return self._states_

Loading

0 comments on commit e950af2

Please sign in to comment.