diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 66e0d457e33b6..7bd3c1aa03d90 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -59,6 +59,19 @@ The ``Series`` object has a method ``cov`` to compute covariance between series Analogously, ``DataFrame`` has a method ``cov`` to compute pairwise covariances among the series in the DataFrame, also excluding NA/null values. +.. _computation.covariance.caveats: + +.. note:: + + Assuming the missing data are missing at random this results in an estimate + for the covariance matrix which is unbiased. However, for many applications + this estimate may not be acceptable because the estimated covariance matrix + is not guaranteed to be positive semi-definite. This could lead to + estimated correlations having absolute values which are greater than one, + and/or a non-invertible covariance matrix. See `Estimation of covariance + matrices `_ + for more details. + .. ipython:: python frame = DataFrame(randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e']) @@ -99,6 +112,12 @@ correlation methods are provided: All of these are currently computed using pairwise complete observations. +.. note:: + + Please see the :ref:`caveats ` associated + with this method of calculating correlation matrices in the + :ref:`covariance section `. + .. ipython:: python frame = DataFrame(randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e']) @@ -325,11 +344,14 @@ Binary rolling moments two ``Series`` or any combination of ``DataFrame/Series`` or ``DataFrame/DataFrame``. Here is the behavior in each case: -- two ``Series``: compute the statistic for the pairing +- two ``Series``: compute the statistic for the pairing. - ``DataFrame/Series``: compute the statistics for each column of the DataFrame - with the passed Series, thus returning a DataFrame -- ``DataFrame/DataFrame``: compute statistic for matching column names, - returning a DataFrame + with the passed Series, thus returning a DataFrame. +- ``DataFrame/DataFrame``: by default compute the statistic for matching column + names, returning a DataFrame. If the keyword argument ``pairwise=True`` is + passed then computes the statistic for each pair of columns, returning a + ``Panel`` whose ``items`` are the dates in question (see :ref:`the next section + `). For example: @@ -340,20 +362,42 @@ For example: .. _stats.moments.corr_pairwise: -Computing rolling pairwise correlations -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Computing rolling pairwise covariances and correlations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In financial data analysis and other fields it's common to compute correlation -matrices for a collection of time series. More difficult is to compute a -moving-window correlation matrix. This can be done using the -``rolling_corr_pairwise`` function, which yields a ``Panel`` whose ``items`` -are the dates in question: +In financial data analysis and other fields it's common to compute covariance +and correlation matrices for a collection of time series. Often one is also +interested in moving-window covariance and correlation matrices. This can be +done by passing the ``pairwise`` keyword argument, which in the case of +``DataFrame`` inputs will yield a ``Panel`` whose ``items`` are the dates in +question. In the case of a single DataFrame argument the ``pairwise`` argument +can even be omitted: + +.. note:: + + Missing values are ignored and each entry is computed using the pairwise + complete observations. Please see the :ref:`covariance section + ` for :ref:`caveats + ` associated with this method of + calculating covariance and correlation matrices. .. ipython:: python - correls = rolling_corr_pairwise(df, 50) + covs = rolling_cov(df[['B','C','D']], df[['A','B','C']], 50, pairwise=True) + covs[df.index[-50]] + +.. ipython:: python + + correls = rolling_corr(df, 50) correls[df.index[-50]] +.. note:: + + Prior to version 0.14 this was available through ``rolling_corr_pairwise`` + which is now simply syntactic sugar for calling ``rolling_corr(..., + pairwise=True)`` and deprecated. This is likely to be removed in a future + release. + You can efficiently retrieve the time series of correlations between two columns using ``ix`` indexing: diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 95537878871b1..344198d6e5ef1 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -183,6 +183,19 @@ These are out-of-bounds selections Because of the default `align` value changes, coordinates of bar plots are now located on integer values (0.0, 1.0, 2.0 ...). This is intended to make bar plot be located on the same coodinates as line plot. However, bar plot may differs unexpectedly when you manually adjust the bar location or drawing area, such as using `set_xlim`, `set_ylim`, etc. In this cases, please modify your script to meet with new coordinates. +- ``pairwise`` keyword was added to the statistical moment functions + ``rolling_cov``, ``rolling_corr``, ``ewmcov``, ``ewmcorr``, + ``expanding_cov``, ``expanding_corr`` to allow the calculation of moving + window covariance and correlation matrices (:issue:`4950`). See + :ref:`Computing rolling pairwise covariances and correlations + ` in the docs. + + .. ipython:: python + + df = DataFrame(np.random.randn(10,4),columns=list('ABCD')) + covs = rolling_cov(df[['A','B','C']], df[['B','C','D']], 5, pairwise=True) + covs[df.index[-1]] + MultiIndexing Using Slicers ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index ec01113abc8f2..523f055eaf605 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -5,14 +5,14 @@ from __future__ import division from functools import wraps +from collections import defaultdict from numpy import NaN import numpy as np from pandas.core.api import DataFrame, Series, Panel, notnull import pandas.algos as algos -import pandas.core.common as com -from pandas.core.common import _values_from_object +import pandas.core.common as pdcom from pandas.util.decorators import Substitution, Appender @@ -31,13 +31,22 @@ #------------------------------------------------------------------------------ # Docs +# The order of arguments for the _doc_template is: +# (header, args, kwargs, returns, notes) + _doc_template = """ %s Parameters ---------- +%s%s +Returns +------- +%s %s -window : int +""" + +_roll_kw = """window : int Size of the moving window. This is the number of observations used for calculating the statistic. min_periods : int, default None @@ -49,11 +58,9 @@ for `freq`. center : boolean, default False Set the labels at the center of the window. - -Returns -------- -%s +""" +_roll_notes = r""" Notes ----- By default, the result is set to the right edge of the window. This can be @@ -65,12 +72,7 @@ """ -_ewm_doc = r"""%s - -Parameters ----------- -%s -com : float. optional +_ewm_kw = r"""com : float. optional Center of mass: :math:`\alpha = 1 / (1 + com)`, span : float, optional Specify decay in terms of span, :math:`\alpha = 2 / (span + 1)` @@ -85,8 +87,9 @@ adjust : boolean, default True Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings (viewing EWMA as a moving average) +""" -%s +_ewm_notes = """ Notes ----- Either center of mass or span must be specified @@ -99,55 +102,51 @@ :math:`c = (s - 1) / 2` So a "20-day EWMA" would have center 9.5. - -Returns -------- -y : type of input argument """ - -_expanding_doc = """ -%s - -Parameters ----------- -%s -min_periods : int, default None +_expanding_kw = """min_periods : int, default None Minimum number of observations in window required to have a value (otherwise result is NA). freq : string or DateOffset object, optional (default None) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. `time_rule` is a legacy alias for `freq`. - -Returns -------- -%s - -Notes ------ -The `freq` keyword is used to conform time series data to a specified -frequency by resampling the data. This is done with the default parameters -of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ -_type_of_input = "y : type of input argument" +_type_of_input_retval = "y : type of input argument" _flex_retval = """y : type depends on inputs - DataFrame / DataFrame -> DataFrame (matches on columns) + DataFrame / DataFrame -> DataFrame (matches on columns) or Panel (pairwise) DataFrame / Series -> Computes result for each column Series / Series -> Series""" -_unary_arg = "arg : Series, DataFrame" +_pairwise_retval = "y : Panel whose items are df1.index values" + +_unary_arg = "arg : Series, DataFrame\n" _binary_arg_flex = """arg1 : Series, DataFrame, or ndarray -arg2 : Series, DataFrame, or ndarray""" +arg2 : Series, DataFrame, or ndarray, optional + if not supplied then will default to arg1 and produce pairwise output +""" _binary_arg = """arg1 : Series, DataFrame, or ndarray -arg2 : Series, DataFrame, or ndarray""" +arg2 : Series, DataFrame, or ndarray +""" + +_pairwise_arg = """df1 : DataFrame +df2 : DataFrame +""" + +_pairwise_kw = """pairwise : bool, default False + If False then only matching columns between arg1 and arg2 will be used and + the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the output + will be a Panel in the case of DataFrame inputs. In the case of missing + elements, only complete pairwise observations will be used. +""" -_bias_doc = r"""bias : boolean, default False +_bias_kw = r"""bias : boolean, default False Use a standard estimation bias correction """ @@ -194,27 +193,47 @@ def rolling_count(arg, window, freq=None, center=False, time_rule=None): return return_hook(result) -@Substitution("Unbiased moving covariance.", _binary_arg_flex, _flex_retval) +@Substitution("Unbiased moving covariance.", _binary_arg_flex, + _roll_kw+_pairwise_kw, _flex_retval, _roll_notes) @Appender(_doc_template) -def rolling_cov(arg1, arg2, window, min_periods=None, freq=None, - center=False, time_rule=None): +def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None, + center=False, time_rule=None, pairwise=None): + if window is None and isinstance(arg2, (int, float)): + window = arg2 + arg2 = arg1 + pairwise = True if pairwise is None else pairwise # only default unset + elif arg2 is None: + arg2 = arg1 + pairwise = True if pairwise is None else pairwise # only default unset arg1 = _conv_timerule(arg1, freq, time_rule) arg2 = _conv_timerule(arg2, freq, time_rule) window = min(window, len(arg1), len(arg2)) def _get_cov(X, Y): - mean = lambda x: rolling_mean(x, window, min_periods,center=center) - count = rolling_count(X + Y, window,center=center) + mean = lambda x: rolling_mean(x, window, min_periods, center=center) + count = rolling_count(X + Y, window, center=center) bias_adj = count / (count - 1) return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj - rs = _flex_binary_moment(arg1, arg2, _get_cov) + rs = _flex_binary_moment(arg1, arg2, _get_cov, pairwise=bool(pairwise)) return rs -@Substitution("Moving sample correlation.", _binary_arg_flex, _flex_retval) +@Substitution("Moving sample correlation.", _binary_arg_flex, + _roll_kw+_pairwise_kw, _flex_retval, _roll_notes) @Appender(_doc_template) -def rolling_corr(arg1, arg2, window, min_periods=None, freq=None, - center=False, time_rule=None): +def rolling_corr(arg1, arg2=None, window=None, min_periods=None, freq=None, + center=False, time_rule=None, pairwise=None): + if window is None and isinstance(arg2, (int, float)): + window = arg2 + arg2 = arg1 + pairwise = True if pairwise is None else pairwise # only default unset + elif arg2 is None: + arg2 = arg1 + pairwise = True if pairwise is None else pairwise # only default unset + arg1 = _conv_timerule(arg1, freq, time_rule) + arg2 = _conv_timerule(arg2, freq, time_rule) + window = min(window, len(arg1), len(arg2)) + def _get_corr(a, b): num = rolling_cov(a, b, window, min_periods, freq=freq, center=center, time_rule=time_rule) @@ -223,16 +242,17 @@ def _get_corr(a, b): rolling_std(b, window, min_periods, freq=freq, center=center, time_rule=time_rule)) return num / den - return _flex_binary_moment(arg1, arg2, _get_corr) + return _flex_binary_moment(arg1, arg2, _get_corr, pairwise=bool(pairwise)) -def _flex_binary_moment(arg1, arg2, f): +def _flex_binary_moment(arg1, arg2, f, pairwise=False): if not (isinstance(arg1,(np.ndarray, Series, DataFrame)) and isinstance(arg2,(np.ndarray, Series, DataFrame))): raise TypeError("arguments to moment function must be of type " "np.ndarray/Series/DataFrame") - if isinstance(arg1, (np.ndarray,Series)) and isinstance(arg2, (np.ndarray,Series)): + if isinstance(arg1, (np.ndarray, Series)) and \ + isinstance(arg2, (np.ndarray,Series)): X, Y = _prep_binary(arg1, arg2) return f(X, Y) elif isinstance(arg1, DataFrame): @@ -241,10 +261,23 @@ def _flex_binary_moment(arg1, arg2, f): X, Y = arg1.align(arg2, join='outer') X = X + 0 * Y Y = Y + 0 * X - res_columns = arg1.columns.union(arg2.columns) - for col in res_columns: - if col in X and col in Y: - results[col] = f(X[col], Y[col]) + if pairwise is False: + res_columns = arg1.columns.union(arg2.columns) + for col in res_columns: + if col in X and col in Y: + results[col] = f(X[col], Y[col]) + elif pairwise is True: + results = defaultdict(dict) + for i, k1 in enumerate(arg1.columns): + for j, k2 in enumerate(arg2.columns): + if j rs.ndim-1: - raise ValueError("Requested axis is larger then no. of argument dimensions") + raise ValueError("Requested axis is larger then no. of argument " + "dimensions") offset = int((window - 1) / 2.) if isinstance(rs, (Series, DataFrame, Panel)): @@ -401,8 +414,9 @@ def _get_center_of_mass(com, span, halflife): return float(com) -@Substitution("Exponentially-weighted moving average", _unary_arg, "") -@Appender(_ewm_doc) +@Substitution("Exponentially-weighted moving average", _unary_arg, _ewm_kw, + _type_of_input_retval, _ewm_notes) +@Appender(_doc_template) def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None, time_rule=None, adjust=True): com = _get_center_of_mass(com, span, halflife) @@ -424,8 +438,9 @@ def _first_valid_index(arr): return notnull(arr).argmax() if len(arr) else 0 -@Substitution("Exponentially-weighted moving variance", _unary_arg, _bias_doc) -@Appender(_ewm_doc) +@Substitution("Exponentially-weighted moving variance", _unary_arg, + _ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes) +@Appender(_doc_template) def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, freq=None, time_rule=None): com = _get_center_of_mass(com, span, halflife) @@ -440,8 +455,9 @@ def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, return result -@Substitution("Exponentially-weighted moving std", _unary_arg, _bias_doc) -@Appender(_ewm_doc) +@Substitution("Exponentially-weighted moving std", _unary_arg, + _ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes) +@Appender(_doc_template) def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, time_rule=None): result = ewmvar(arg, com=com, span=span, halflife=halflife, time_rule=time_rule, @@ -451,38 +467,56 @@ def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, ewmvol = ewmstd -@Substitution("Exponentially-weighted moving covariance", _binary_arg, "") -@Appender(_ewm_doc) -def ewmcov(arg1, arg2, com=None, span=None, halflife=None, min_periods=0, bias=False, - freq=None, time_rule=None): - X, Y = _prep_binary(arg1, arg2) - - X = _conv_timerule(X, freq, time_rule) - Y = _conv_timerule(Y, freq, time_rule) - - mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods) +@Substitution("Exponentially-weighted moving covariance", _binary_arg_flex, + _ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes) +@Appender(_doc_template) +def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, bias=False, + freq=None, time_rule=None, pairwise=None): + if arg2 is None: + arg2 = arg1 + pairwise = True if pairwise is None else pairwise + elif isinstance(arg2, (int, float)) and com is None: + com = arg2 + arg2 = arg1 + pairwise = True if pairwise is None else pairwise + arg1 = _conv_timerule(arg1, freq, time_rule) + arg2 = _conv_timerule(arg2, freq, time_rule) - result = (mean(X * Y) - mean(X) * mean(Y)) - com = _get_center_of_mass(com, span, halflife) + def _get_ewmcov(X, Y): + mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods) + return (mean(X * Y) - mean(X) * mean(Y)) + result = _flex_binary_moment(arg1, arg2, _get_ewmcov, + pairwise=bool(pairwise)) if not bias: + com = _get_center_of_mass(com, span, halflife) result *= (1.0 + 2.0 * com) / (2.0 * com) return result -@Substitution("Exponentially-weighted moving " "correlation", _binary_arg, "") -@Appender(_ewm_doc) -def ewmcorr(arg1, arg2, com=None, span=None, halflife=None, min_periods=0, - freq=None, time_rule=None): - X, Y = _prep_binary(arg1, arg2) - - X = _conv_timerule(X, freq, time_rule) - Y = _conv_timerule(Y, freq, time_rule) +@Substitution("Exponentially-weighted moving correlation", _binary_arg_flex, + _ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes) +@Appender(_doc_template) +def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, + freq=None, time_rule=None, pairwise=None): + if arg2 is None: + arg2 = arg1 + pairwise = True if pairwise is None else pairwise + elif isinstance(arg2, (int, float)) and com is None: + com = arg2 + arg2 = arg1 + pairwise = True if pairwise is None else pairwise + arg1 = _conv_timerule(arg1, freq, time_rule) + arg2 = _conv_timerule(arg2, freq, time_rule) - mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods) - var = lambda x: ewmvar(x, com=com, span=span, halflife=halflife, min_periods=min_periods, - bias=True) - return (mean(X * Y) - mean(X) * mean(Y)) / _zsqrt(var(X) * var(Y)) + def _get_ewmcorr(X, Y): + mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods) + var = lambda x: ewmvar(x, com=com, span=span, halflife=halflife, min_periods=min_periods, + bias=True) + return (mean(X * Y) - mean(X) * mean(Y)) / _zsqrt(var(X) * var(Y)) + result = _flex_binary_moment(arg1, arg2, _get_ewmcorr, + pairwise=bool(pairwise)) + return result def _zsqrt(x): @@ -546,7 +580,7 @@ def _use_window(minp, window): def _rolling_func(func, desc, check_minp=_use_window): - @Substitution(desc, _unary_arg, _type_of_input) + @Substitution(desc, _unary_arg, _roll_kw, _type_of_input_retval, _roll_notes) @Appender(_doc_template) @wraps(func) def f(arg, window, min_periods=None, freq=None, center=False, @@ -729,8 +763,8 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None, if win_type is not None: raise ValueError(('Do not specify window type if using custom ' 'weights')) - window = com._asarray_tuplesafe(window).astype(float) - elif com.is_integer(window): # window size + window = pdcom._asarray_tuplesafe(window).astype(float) + elif pdcom.is_integer(window): # window size if win_type is None: raise ValueError('Must specify window type') try: @@ -779,8 +813,8 @@ def _pop_args(win_type, arg_names, kwargs): def _expanding_func(func, desc, check_minp=_use_window): - @Substitution(desc, _unary_arg, _type_of_input) - @Appender(_expanding_doc) + @Substitution(desc, _unary_arg, _expanding_kw, _type_of_input_retval, "") + @Appender(_doc_template) @wraps(func) def f(arg, min_periods=1, freq=None, center=False, time_rule=None, **kwargs): @@ -876,46 +910,54 @@ def expanding_quantile(arg, quantile, min_periods=1, freq=None, freq=freq, center=center, time_rule=time_rule) -@Substitution("Unbiased expanding covariance.", _binary_arg_flex, _flex_retval) -@Appender(_expanding_doc) -def expanding_cov(arg1, arg2, min_periods=1, freq=None, center=False, - time_rule=None): +@Substitution("Unbiased expanding covariance.", _binary_arg_flex, + _expanding_kw+_pairwise_kw, _flex_retval, "") +@Appender(_doc_template) +def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, center=False, + time_rule=None, pairwise=None): + if arg2 is None: + arg2 = arg1 + pairwise = True if pairwise is None else pairwise + elif isinstance(arg2, (int, float)) and min_periods is None: + min_periods = arg2 + arg2 = arg1 + pairwise = True if pairwise is None else pairwise window = max(len(arg1), len(arg2)) return rolling_cov(arg1, arg2, window, min_periods=min_periods, freq=freq, - center=center, time_rule=time_rule) + center=center, time_rule=time_rule, pairwise=pairwise) -@Substitution("Expanding sample correlation.", _binary_arg_flex, _flex_retval) -@Appender(_expanding_doc) -def expanding_corr(arg1, arg2, min_periods=1, freq=None, center=False, - time_rule=None): +@Substitution("Expanding sample correlation.", _binary_arg_flex, + _expanding_kw+_pairwise_kw, _flex_retval, "") +@Appender(_doc_template) +def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, center=False, + time_rule=None, pairwise=None): + if arg2 is None: + arg2 = arg1 + pairwise = True if pairwise is None else pairwise + elif isinstance(arg2, (int, float)) and min_periods is None: + min_periods = arg2 + arg2 = arg1 + pairwise = True if pairwise is None else pairwise window = max(len(arg1), len(arg2)) return rolling_corr(arg1, arg2, window, min_periods=min_periods, - freq=freq, center=center, time_rule=time_rule) - + freq=freq, center=center, time_rule=time_rule, + pairwise=pairwise) -def expanding_corr_pairwise(df, min_periods=1): - """ - Computes pairwise expanding correlation matrices as Panel whose items are - dates. - - Parameters - ---------- - df : DataFrame - min_periods : int, default 1 - Minimum number of observations in window required to have a value - (otherwise result is NA). - Returns - ------- - correls : Panel - """ - - window = len(df) - - return rolling_corr_pairwise(df, window, min_periods=min_periods) +@Substitution("Deprecated. Use expanding_corr(..., pairwise=True) instead.\n\n" + "Pairwise expanding sample correlation", _pairwise_arg, + _expanding_kw, _pairwise_retval, "") +@Appender(_doc_template) +def expanding_corr_pairwise(df1, df2=None, min_periods=1, freq=None, + center=False, time_rule=None): + import warnings + warnings.warn("expanding_corr_pairwise is deprecated, use expanding_corr(..., pairwise=True)", FutureWarning) + return expanding_corr(df1, df2, min_periods=min_periods, + freq=freq, center=center, time_rule=time_rule, + pairwise=True) def expanding_apply(arg, func, min_periods=1, freq=None, center=False, diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index a8359c102a902..97f08e7052c87 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -586,6 +586,9 @@ def test_rolling_cov(self): result = mom.rolling_cov(A, B, 50, min_periods=25) assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) + def test_rolling_cov_pairwise(self): + self._check_pairwise_moment(mom.rolling_cov, 10, min_periods=5) + def test_rolling_corr(self): A = self.series B = A + randn(len(A)) @@ -603,12 +606,14 @@ def test_rolling_corr(self): assert_almost_equal(result[-1], a.corr(b)) def test_rolling_corr_pairwise(self): - panel = mom.rolling_corr_pairwise(self.frame, 10, min_periods=5) + self._check_pairwise_moment(mom.rolling_corr, 10, min_periods=5) + + def _check_pairwise_moment(self, func, *args, **kwargs): + panel = func(self.frame, *args, **kwargs) - correl = panel.ix[:, 1, 5] - exp = mom.rolling_corr(self.frame[1], self.frame[5], - 10, min_periods=5) - tm.assert_series_equal(correl, exp) + actual = panel.ix[:, 1, 5] + expected = func(self.frame[1], self.frame[5], *args, **kwargs) + tm.assert_series_equal(actual, expected) def test_flex_binary_moment(self): # GH3155 @@ -666,9 +671,15 @@ def _check(method): def test_ewmcov(self): self._check_binary_ew(mom.ewmcov) + def test_ewmcov_pairwise(self): + self._check_pairwise_moment(mom.ewmcov, span=10, min_periods=5) + def test_ewmcorr(self): self._check_binary_ew(mom.ewmcorr) + def test_ewmcorr_pairwise(self): + self._check_pairwise_moment(mom.ewmcorr, span=10, min_periods=5) + def _check_binary_ew(self, func): A = Series(randn(50), index=np.arange(50)) B = A[2:] + randn(48) @@ -746,12 +757,20 @@ def test_expanding_cov(self): def test_expanding_max(self): self._check_expanding(mom.expanding_max, np.max, preserve_nan=False) + def test_expanding_cov_pairwise(self): + result = mom.expanding_cov(self.frame) + + rolling_result = mom.rolling_cov(self.frame, len(self.frame), + min_periods=1) + + for i in result.items: + assert_almost_equal(result[i], rolling_result[i]) + def test_expanding_corr_pairwise(self): - result = mom.expanding_corr_pairwise(self.frame) + result = mom.expanding_corr(self.frame) - rolling_result = mom.rolling_corr_pairwise(self.frame, - len(self.frame), - min_periods=1) + rolling_result = mom.rolling_corr(self.frame, len(self.frame), + min_periods=1) for i in result.items: assert_almost_equal(result[i], rolling_result[i])