black

alchemistry · Dec 6, 2022 · e950af2 · e950af2
1 parent 06c1de0
commit e950af2
Show file tree

Hide file tree

Showing 37 changed files with 2,053 additions and 1,476 deletions.
diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py
@@ -3,17 +3,16 @@
 import logging
 from warnings import warn
 
-import pandas as pd
 import numpy as np
+import pandas as pd
 
-from ..estimators import BAR, TI, FEP_ESTIMATORS, TI_ESTIMATORS
-from ..estimators import AutoMBAR as MBAR
 from .. import concat
+from ..estimators import FEP_ESTIMATORS, TI_ESTIMATORS
 from ..postprocessors.units import to_kT
 
 
-def forward_backward_convergence(df_list, estimator='MBAR', num=10, **kwargs):
-    '''Forward and backward convergence of the free energy estimate.
+def forward_backward_convergence(df_list, estimator="MBAR", num=10, **kwargs):
+    """Forward and backward convergence of the free energy estimate.
 
     Generate the free energy estimate as a function of time in both directions,
     with the specified number of equally spaced points in the time
@@ -69,16 +68,17 @@ def forward_backward_convergence(df_list, estimator='MBAR', num=10, **kwargs):
        The default for using ``estimator='MBAR'`` was changed from
        :class:`~alchemlyb.estimators.MBAR` to :class:`~alchemlyb.estimators.AutoMBAR`.
 
-    '''
-    logger = logging.getLogger('alchemlyb.convergence.'
-                               'forward_backward_convergence')
-    logger.info('Start convergence analysis.')
-    logger.info('Check data availability.')
+    """
+    logger = logging.getLogger("alchemlyb.convergence." "forward_backward_convergence")
+    logger.info("Start convergence analysis.")
+    logger.info("Check data availability.")
     if estimator.upper() != estimator:
-        warn("Using lower-case strings for the 'estimator' kwarg in "
-             "convergence.forward_backward_convergence() is deprecated in "
-             "1.0.0 and only upper case will be accepted in 2.0.0",
-            DeprecationWarning)
+        warn(
+            "Using lower-case strings for the 'estimator' kwarg in "
+            "convergence.forward_backward_convergence() is deprecated in "
+            "1.0.0 and only upper case will be accepted in 2.0.0",
+            DeprecationWarning,
+        )
         estimator = estimator.upper()
 
     if estimator not in (FEP_ESTIMATORS + TI_ESTIMATORS):
@@ -88,61 +88,77 @@ def forward_backward_convergence(df_list, estimator='MBAR', num=10, **kwargs):
     else:
         # select estimator class by name
         estimator_fit = globals()[estimator](**kwargs).fit
-        logger.info(f'Use {estimator} estimator for convergence analysis.')
+        logger.info(f"Use {estimator} estimator for convergence analysis.")
 
-    logger.info('Begin forward analysis')
+    logger.info("Begin forward analysis")
     forward_list = []
     forward_error_list = []
     for i in range(1, num + 1):
-        logger.info('Forward analysis: {:.2f}%'.format(100 * i / num))
+        logger.info("Forward analysis: {:.2f}%".format(100 * i / num))
         sample = []
         for data in df_list:
-            sample.append(data[:len(data) // num * i])
+            sample.append(data[: len(data) // num * i])
         sample = concat(sample)
         result = estimator_fit(sample)
         forward_list.append(result.delta_f_.iloc[0, -1])
-        if estimator.lower() == 'bar':
-            error = np.sqrt(sum(
-                [result.d_delta_f_.iloc[i, i + 1] ** 2
-                 for i in range(len(result.d_delta_f_) - 1)]))
+        if estimator.lower() == "bar":
+            error = np.sqrt(
+                sum(
+                    [
+                        result.d_delta_f_.iloc[i, i + 1] ** 2
+                        for i in range(len(result.d_delta_f_) - 1)
+                    ]
+                )
+            )
             forward_error_list.append(error)
         else:
             forward_error_list.append(result.d_delta_f_.iloc[0, -1])
-        logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1],
-                                                  forward_error_list[-1]))
+        logger.info(
+            "{:.2f} +/- {:.2f} kT".format(forward_list[-1], forward_error_list[-1])
+        )
 
-    logger.info('Begin backward analysis')
+    logger.info("Begin backward analysis")
     backward_list = []
     backward_error_list = []
     for i in range(1, num + 1):
-        logger.info('Backward analysis: {:.2f}%'.format(100 * i / num))
+        logger.info("Backward analysis: {:.2f}%".format(100 * i / num))
         sample = []
         for data in df_list:
-            sample.append(data[-len(data) // num * i:])
+            sample.append(data[-len(data) // num * i :])
         sample = concat(sample)
         result = estimator_fit(sample)
         backward_list.append(result.delta_f_.iloc[0, -1])
-        if estimator.lower() == 'bar':
-            error = np.sqrt(sum(
-                [result.d_delta_f_.iloc[i, i + 1] ** 2
-                 for i in range(len(result.d_delta_f_) - 1)]))
+        if estimator.lower() == "bar":
+            error = np.sqrt(
+                sum(
+                    [
+                        result.d_delta_f_.iloc[i, i + 1] ** 2
+                        for i in range(len(result.d_delta_f_) - 1)
+                    ]
+                )
+            )
             backward_error_list.append(error)
         else:
             backward_error_list.append(result.d_delta_f_.iloc[0, -1])
-        logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1],
-                                                  backward_error_list[-1]))
+        logger.info(
+            "{:.2f} +/- {:.2f} kT".format(backward_list[-1], backward_error_list[-1])
+        )
 
     convergence = pd.DataFrame(
-        {'Forward': forward_list,
-         'Forward_Error': forward_error_list,
-         'Backward': backward_list,
-         'Backward_Error': backward_error_list,
-         'data_fraction': [i / num for i in range(1, num + 1)]})
+        {
+            "Forward": forward_list,
+            "Forward_Error": forward_error_list,
+            "Backward": backward_list,
+            "Backward_Error": backward_error_list,
+            "data_fraction": [i / num for i in range(1, num + 1)],
+        }
+    )
     convergence.attrs = df_list[0].attrs
     return convergence
 
+
 def _cummean(vals, out_length):
-    '''The cumulative mean of an array.
+    """The cumulative mean of an array.
 
     This function computes the cumulative mean and shapes the result to the
     desired length.
@@ -167,18 +183,19 @@ def _cummean(vals, out_length):
 
     .. versionadded:: 1.0.0
 
-    '''
+    """
     in_length = len(vals)
     if in_length < out_length:
         out_length = in_length
     block = in_length // out_length
-    reshape = vals[: block*out_length].reshape(block, out_length)
+    reshape = vals[: block * out_length].reshape(block, out_length)
     mean = np.mean(reshape, axis=0)
-    result = np.cumsum(mean) / np.arange(1, out_length+1)
+    result = np.cumsum(mean) / np.arange(1, out_length + 1)
     return result
 
+
 def fwdrev_cumavg_Rc(series, precision=0.01, tol=2):
-    '''Generate the convergence criteria :math:`R_c` for a single simulation.
+    """Generate the convergence criteria :math:`R_c` for a single simulation.
 
     The input will be :class:`pandas.Series` generated by
     :func:`~alchemlyb.preprocessing.subsampling.decorrelate_u_nk` or
@@ -241,7 +258,7 @@ def fwdrev_cumavg_Rc(series, precision=0.01, tol=2):
     .. _`equation 16`:
        https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8397498/#FD16
 
-    '''
+    """
     series = to_kT(series)
     array = series.to_numpy()
     out_length = int(1 / precision)
@@ -250,9 +267,12 @@ def fwdrev_cumavg_Rc(series, precision=0.01, tol=2):
     length = len(g_forward)
 
     convergence = pd.DataFrame(
-        {'Forward': g_forward,
-         'Backward': g_backward,
-         'data_fraction': [i / length for i in range(1, length + 1)]})
+        {
+            "Forward": g_forward,
+            "Backward": g_backward,
+            "data_fraction": [i / length for i in range(1, length + 1)],
+        }
+    )
     convergence.attrs = series.attrs
 
     # Final value
@@ -270,8 +290,9 @@ def fwdrev_cumavg_Rc(series, precision=0.01, tol=2):
         # the same as this branch will be triggered.
         return 1.0, convergence
 
+
 def A_c(series_list, precision=0.01, tol=2):
-    '''Generate the ensemble convergence criteria :math:`A_c` for a set of simulations.
+    """Generate the ensemble convergence criteria :math:`A_c` for a set of simulations.
 
     The input is a :class:`list` of :class:`pandas.Series` generated by
     :func:`~alchemlyb.preprocessing.subsampling.decorrelate_u_nk` or
@@ -317,11 +338,11 @@ def A_c(series_list, precision=0.01, tol=2):
     .. _`equation 18`:
        https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8397498/#FD18
 
-    '''
-    logger = logging.getLogger('alchemlyb.convergence.A_c')
+    """
+    logger = logging.getLogger("alchemlyb.convergence.A_c")
     n_R_c = len(series_list)
     R_c_list = [fwdrev_cumavg_Rc(series, precision, tol)[0] for series in series_list]
-    logger.info(f'R_c list: {R_c_list}')
+    logger.info(f"R_c list: {R_c_list}")
     # Integrate the R_c_list <= R_c over the range of 0 to 1
     array_01 = np.hstack((R_c_list, [0, 1]))
     sorted_array = np.sort(np.unique(array_01))
@@ -330,6 +351,6 @@ def A_c(series_list, precision=0.01, tol=2):
         if i == 0:
             continue
         else:
-            d_R_c = sorted_array[-i] - sorted_array[-i-1]
+            d_R_c = sorted_array[-i] - sorted_array[-i - 1]
             result += d_R_c * sum(R_c_list <= element) / n_R_c
     return result
diff --git a/src/alchemlyb/estimators/__init__.py b/src/alchemlyb/estimators/__init__.py
@@ -1,5 +1,5 @@
-from .mbar_ import MBAR, AutoMBAR
 from .bar_ import BAR
+from .mbar_ import MBAR, AutoMBAR
 from .ti_ import TI
 
 FEP_ESTIMATORS = [MBAR.__name__, AutoMBAR.__name__, BAR.__name__]

diff --git a/src/alchemlyb/estimators/bar_.py b/src/alchemlyb/estimators/bar_.py
@@ -1,11 +1,11 @@
 import numpy as np
 import pandas as pd
-
-from sklearn.base import BaseEstimator
 from pymbar import BAR as BAR_
+from sklearn.base import BaseEstimator
 
 from .base import _EstimatorMixOut
 
+
 class BAR(BaseEstimator, _EstimatorMixOut):
     """Bennett acceptance ratio (BAR).
 
@@ -57,7 +57,13 @@ class BAR(BaseEstimator, _EstimatorMixOut):
 
     """
 
-    def __init__(self, maximum_iterations=10000, relative_tolerance=1.0e-7, method='false-position', verbose=False):
+    def __init__(
+        self,
+        maximum_iterations=10000,
+        relative_tolerance=1.0e-7,
+        method="false-position",
+        verbose=False,
+    ):
 
         self.maximum_iterations = maximum_iterations
         self.relative_tolerance = relative_tolerance
@@ -87,7 +93,10 @@ def fit(self, u_nk):
 
         # group u_nk by lambda states
         groups = u_nk.groupby(level=u_nk.index.names[1:])
-        N_k = [(len(groups.get_group(i)) if i in groups.groups else 0) for i in u_nk.columns]
+        N_k = [
+            (len(groups.get_group(i)) if i in groups.groups else 0)
+            for i in u_nk.columns
+        ]
 
         # Now get free energy differences and their uncertainties for each step
         deltas = np.array([])
@@ -96,19 +105,22 @@ def fit(self, u_nk):
             # get us from lambda step k
             uk = groups.get_group(self._states_[k])
             # get w_F
-            w_f = uk.iloc[:, k+1] - uk.iloc[:, k]
+            w_f = uk.iloc[:, k + 1] - uk.iloc[:, k]
 
             # get us from lambda step k+1
-            uk1 = groups.get_group(self._states_[k+1])
+            uk1 = groups.get_group(self._states_[k + 1])
             # get w_R
-            w_r = uk1.iloc[:, k] - uk1.iloc[:, k+1]
+            w_r = uk1.iloc[:, k] - uk1.iloc[:, k + 1]
 
             # now determine df and ddf using pymbar.BAR
-            df, ddf = BAR_(w_f, w_r,
-                             method=self.method,
-                             maximum_iterations=self.maximum_iterations,
-                             relative_tolerance=self.relative_tolerance,
-                             verbose=self.verbose)
+            df, ddf = BAR_(
+                w_f,
+                w_r,
+                method=self.method,
+                maximum_iterations=self.maximum_iterations,
+                relative_tolerance=self.relative_tolerance,
+                verbose=self.verbose,
+            )
 
             deltas = np.append(deltas, df)
             d_deltas = np.append(d_deltas, ddf**2)
@@ -121,14 +133,14 @@ def fit(self, u_nk):
             out = []
             dout = []
             for i in range(len(deltas) - j):
-                out.append(deltas[i:i + j + 1].sum())
+                out.append(deltas[i : i + j + 1].sum())
 
                 # See https://github.com/alchemistry/alchemlyb/pull/60#issuecomment-430720742
                 # Error estimate generated by BAR ARE correlated
 
                 # Use the BAR uncertainties between two neighbour states
                 if j == 0:
-                    dout.append(d_deltas[i:i + j + 1].sum())
+                    dout.append(d_deltas[i : i + j + 1].sum())
                 # Other uncertainties are unknown at this point
                 else:
                     dout.append(np.nan)
@@ -137,14 +149,14 @@ def fit(self, u_nk):
             ad_delta += np.diagflat(np.array(dout), k=j + 1)
 
         # yield standard delta_f_ free energies between each state
-        self._delta_f_ = pd.DataFrame(adelta - adelta.T,
-                                     columns=self._states_,
-                                     index=self._states_)
+        self._delta_f_ = pd.DataFrame(
+            adelta - adelta.T, columns=self._states_, index=self._states_
+        )
 
         # yield standard deviation d_delta_f_ between each state
-        self._d_delta_f_ = pd.DataFrame(np.sqrt(ad_delta + ad_delta.T),
-                                       columns=self._states_,
-                                       index=self._states_)
+        self._d_delta_f_ = pd.DataFrame(
+            np.sqrt(ad_delta + ad_delta.T), columns=self._states_, index=self._states_
+        )
         self._delta_f_.attrs = u_nk.attrs
         self._d_delta_f_.attrs = u_nk.attrs
 

diff --git a/src/alchemlyb/estimators/base.py b/src/alchemlyb/estimators/base.py
@@ -1,9 +1,11 @@
-class _EstimatorMixOut():
-    '''This class creates view for the d_delta_f_, delta_f_, states_ for the
-    estimator class to consume.'''
+class _EstimatorMixOut:
+    """This class creates view for the d_delta_f_, delta_f_, states_ for the
+    estimator class to consume."""
+
     _d_delta_f_ = None
     _delta_f_ = None
     _states_ = None
+
     @property
     def d_delta_f_(self):
         return self._d_delta_f_
@@ -15,4 +17,3 @@ def delta_f_(self):
     @property
     def states_(self):
         return self._states_
-