Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of corrcoef #2139

Merged
merged 5 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions dpnp/dpnp_iface_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
"amin",
"average",
"bincount",
"corrcoef",
"correlate",
"cov",
"max",
Expand Down Expand Up @@ -360,6 +361,127 @@ def bincount(x1, weights=None, minlength=0):
return call_origin(numpy.bincount, x1, weights=weights, minlength=minlength)


def corrcoef(x, y=None, rowvar=True, *, dtype=None):
"""
Return Pearson product-moment correlation coefficients.

For full documentation refer to :obj:`numpy.corrcoef`.

Parameters
----------
x : {dpnp.ndarray, usm_ndarray}
A 1-D or 2-D array containing multiple variables and observations.
Each row of `x` represents a variable, and each column a single
observation of all those variables. Also see `rowvar` below.
y : {None, dpnp.ndarray, usm_ndarray}, optional
An additional set of variables and observations. `y` has the same
shape as `x`.
antonwolfy marked this conversation as resolved.
Show resolved Hide resolved
Default: ``None``.
rowvar : {bool}, optional
If `rowvar` is ``True``, then each row represents a variable,
with observations in the columns. Otherwise, the relationship
is transposed: each column represents a variable, while the rows
contain observations.
Default: ``True``.
dtype : {None, dtype}, optional
Data-type of the result.
Default: ``None``.

Returns
-------
R : {dpnp.ndarray}
The correlation coefficient matrix of the variables.

See Also
--------
:obj:`dpnp.cov` : Covariance matrix.

Examples
--------
In this example we generate two random arrays, ``xarr`` and ``yarr``, and
compute the row-wise and column-wise Pearson correlation coefficients,
``R``. Since `rowvar` is true by default, we first find the row-wise
Pearson correlation coefficients between the variables of ``xarr``.

>>> import dpnp as np
>>> np.random.seed(123)
>>> xarr = np.random.rand(3, 3).astype(np.float32)
>>> xarr
array([[7.2858386e-17, 2.2066992e-02, 3.9520904e-01],
[4.8012391e-01, 5.9377134e-01, 4.5147297e-01],
[9.0728188e-01, 9.9387854e-01, 5.8399546e-01]], dtype=float32)
>>> R1 = np.corrcoef(xarr)
>>> R1
array([[ 0.99999994, -0.6173796 , -0.9685411 ],
[-0.6173796 , 1. , 0.7937219 ],
[-0.9685411 , 0.7937219 , 0.9999999 ]], dtype=float32)

If we add another set of variables and observations ``yarr``, we can
compute the row-wise Pearson correlation coefficients between the
variables in ``xarr`` and ``yarr``.
antonwolfy marked this conversation as resolved.
Show resolved Hide resolved

>>> yarr = np.random.rand(3, 3).astype(np.float32)
>>> yarr
array([[0.17615308, 0.65354985, 0.15716429],
[0.09373496, 0.2123185 , 0.84086883],
[0.9011005 , 0.45206687, 0.00225109]], dtype=float32)
>>> R2 = np.corrcoef(xarr, yarr)
>>> R2
array([[ 0.99999994, -0.6173796 , -0.968541 , -0.48613155, 0.9951523 ,
-0.8900264 ],
[-0.6173796 , 1. , 0.7937219 , 0.9875833 , -0.53702235,
0.19083664],
[-0.968541 , 0.7937219 , 0.9999999 , 0.6883078 , -0.9393724 ,
0.74857277],
[-0.48613152, 0.9875833 , 0.6883078 , 0.9999999 , -0.39783284,
0.0342579 ],
[ 0.9951523 , -0.53702235, -0.9393725 , -0.39783284, 0.99999994,
-0.9305482 ],
[-0.89002645, 0.19083665, 0.7485727 , 0.0342579 , -0.9305482 ,
1. ]], dtype=float32)

Finally if we use the option ``rowvar=False``, the columns are now
being treated as the variables and we will find the column-wise Pearson
correlation coefficients between variables in ``xarr`` and ``yarr``.
antonwolfy marked this conversation as resolved.
Show resolved Hide resolved

>>> R3 = np.corrcoef(xarr, yarr, rowvar=False)
>>> R3
array([[ 1. , 0.9724453 , -0.9909503 , 0.8104691 , -0.46436927,
-0.1643624 ],
[ 0.9724453 , 1. , -0.9949381 , 0.6515728 , -0.6580445 ,
0.07012729],
[-0.99095035, -0.994938 , 1. , -0.72450536, 0.5790461 ,
0.03047091],
[ 0.8104691 , 0.65157276, -0.72450536, 1. , 0.14243561,
-0.71102554],
[-0.4643693 , -0.6580445 , 0.57904613, 0.1424356 , 0.99999994,
-0.79727215],
[-0.1643624 , 0.07012729, 0.03047091, -0.7110255 , -0.7972722 ,
0.99999994]], dtype=float32)
"""
antonwolfy marked this conversation as resolved.
Show resolved Hide resolved

out = dpnp.cov(x, y, rowvar, dtype=dtype)
if out.ndim == 0:
# scalar covariance
# nan if incorrect value (nan, inf, 0), 1 otherwise
return out / out

d = dpnp.diag(out)

stddev = dpnp.sqrt(d.real)
out /= stddev[:, None]
out /= stddev[None, :]

# Clip real and imaginary parts to [-1, 1]. This does not guarantee
# abs(a[i,j]) <= 1 for complex arrays, but is the best we can do without
# excessive work.
dpnp.clip(out.real, -1, 1, out=out.real)
if dpnp.iscomplexobj(out):
dpnp.clip(out.imag, -1, 1, out=out.imag)

return out


def correlate(x1, x2, mode="valid"):
"""
Cross-correlation of two 1-dimensional sequences.
Expand Down
5 changes: 0 additions & 5 deletions tests/skipped_tests.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -310,11 +310,6 @@ tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_bo
tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit
tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit_2

tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_diag_exception
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_rowvar
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_y

tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[linear]
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[lower]
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[higher]
Expand Down
5 changes: 0 additions & 5 deletions tests/skipped_tests_gpu.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -320,11 +320,6 @@ tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_bo
tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit
tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit_2

tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef
antonwolfy marked this conversation as resolved.
Show resolved Hide resolved
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_diag_exception
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_rowvar
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_y

tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[linear]
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[lower]
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[higher]
Expand Down
55 changes: 55 additions & 0 deletions tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,61 @@ def test_std_error(self):
dpnp.std(ia, ddof="1")


class TestCorrcoef:
antonwolfy marked this conversation as resolved.
Show resolved Hide resolved
@pytest.mark.usefixtures(
"suppress_divide_invalid_numpy_warnings",
"suppress_dof_numpy_warnings",
)
@pytest.mark.parametrize("dtype", get_all_dtypes())
@pytest.mark.parametrize("rowvar", [True, False])
def test_corrcoef(self, dtype, rowvar):
dp_array = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dtype)
np_array = dpnp.asnumpy(dp_array)

expected = numpy.corrcoef(np_array, rowvar=rowvar)
result = dpnp.corrcoef(dp_array, rowvar=rowvar)

assert_dtype_allclose(result, expected)

@pytest.mark.usefixtures(
"suppress_divide_invalid_numpy_warnings",
"suppress_dof_numpy_warnings",
"suppress_mean_empty_slice_numpy_warnings",
)
@pytest.mark.parametrize("shape", [(2, 0), (0, 2)])
def test_corrcoef_empty(self, shape):
dp_array = dpnp.empty(shape, dtype=dpnp.int64)
np_array = dpnp.asnumpy(dp_array)

result = dpnp.corrcoef(dp_array)
expected = numpy.corrcoef(np_array)
assert_dtype_allclose(result, expected)

@pytest.mark.usefixtures("suppress_complex_warning")
@pytest.mark.parametrize("dt_in", get_all_dtypes(no_bool=True))
@pytest.mark.parametrize("dt_out", get_float_complex_dtypes())
def test_corrcoef_dtype(self, dt_in, dt_out):
dp_array = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dt_in)
np_array = dpnp.asnumpy(dp_array)

expected = numpy.corrcoef(np_array, dtype=dt_out)
result = dpnp.corrcoef(dp_array, dtype=dt_out)
assert expected.dtype == result.dtype
assert_allclose(result, expected, rtol=1e-6)

@pytest.mark.usefixtures(
"suppress_divide_invalid_numpy_warnings",
"suppress_dof_numpy_warnings",
)
def test_corrcoef_scalar(self):
dp_array = dpnp.array(5)
np_array = dpnp.asnumpy(dp_array)

result = dpnp.corrcoef(dp_array)
expected = numpy.corrcoef(np_array)
assert_dtype_allclose(result, expected)


@pytest.mark.usefixtures("allow_fall_back_on_numpy")
class TestBincount:
@pytest.mark.parametrize(
Expand Down
6 changes: 6 additions & 0 deletions tests/test_sycl_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ def test_meshgrid(device):
pytest.param("ceil", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
pytest.param("conjugate", [[1.0 + 1.0j, 0.0], [0.0, 1.0 + 1.0j]]),
pytest.param("copy", [1.0, 2.0, 3.0]),
pytest.param("corrcoef", [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]),
pytest.param(
"cos", [-dpnp.pi / 2, -dpnp.pi / 4, 0.0, dpnp.pi / 4, dpnp.pi / 2]
),
Expand Down Expand Up @@ -693,6 +694,11 @@ def test_reduce_hypot(device):
pytest.param("append", [1, 2, 3], [4, 5, 6]),
pytest.param("arctan2", [-1, +1, +1, -1], [-1, -1, +1, +1]),
pytest.param("copysign", [0.0, 1.0, 2.0], [-1.0, 0.0, 1.0]),
pytest.param(
"corrcoef",
[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
[[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]],
),
pytest.param("cross", [1.0, 2.0, 3.0], [4.0, 5.0, 6.0]),
pytest.param("digitize", [0.2, 6.4, 3.0], [0.0, 1.0, 2.5, 4.0]),
pytest.param(
Expand Down
6 changes: 6 additions & 0 deletions tests/test_usm_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,7 @@ def test_norm(usm_type, ord, axis):
pytest.param("cbrt", [1, 8, 27]),
pytest.param("ceil", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
pytest.param("conjugate", [[1.0 + 1.0j, 0.0], [0.0, 1.0 + 1.0j]]),
pytest.param("corrcoef", [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]),
pytest.param(
"cos", [-dp.pi / 2, -dp.pi / 4, 0.0, dp.pi / 4, dp.pi / 2]
),
Expand Down Expand Up @@ -685,6 +686,11 @@ def test_1in_1out(func, data, usm_type):
pytest.param("copysign", [0.0, 1.0, 2.0], [-1.0, 0.0, 1.0]),
pytest.param("cross", [1.0, 2.0, 3.0], [4.0, 5.0, 6.0]),
pytest.param("digitize", [0.2, 6.4, 3.0], [0.0, 1.0, 2.5, 4.0]),
pytest.param(
"corrcoef",
[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
[[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]],
),
# dpnp.dot has 3 different implementations based on input arrays dtype
# checking all of them
pytest.param("dot", [3.0, 4.0, 5.0], [1.0, 2.0, 3.0]),
Expand Down
8 changes: 4 additions & 4 deletions tests/third_party/cupy/statistics_tests/test_correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,26 @@

class TestCorrcoef(unittest.TestCase):
@testing.for_all_dtypes()
@testing.numpy_cupy_allclose()
@testing.numpy_cupy_allclose(type_check=has_support_aspect64())
def test_corrcoef(self, xp, dtype):
a = testing.shaped_arange((2, 3), xp, dtype)
return xp.corrcoef(a)

@testing.for_all_dtypes()
@testing.numpy_cupy_allclose()
@testing.numpy_cupy_allclose(type_check=has_support_aspect64())
def test_corrcoef_diag_exception(self, xp, dtype):
a = testing.shaped_arange((1, 3), xp, dtype)
return xp.corrcoef(a)

@testing.for_all_dtypes()
@testing.numpy_cupy_allclose()
@testing.numpy_cupy_allclose(type_check=has_support_aspect64())
def test_corrcoef_y(self, xp, dtype):
a = testing.shaped_arange((2, 3), xp, dtype)
y = testing.shaped_arange((2, 3), xp, dtype)
return xp.corrcoef(a, y=y)

antonwolfy marked this conversation as resolved.
Show resolved Hide resolved
@testing.for_all_dtypes()
@testing.numpy_cupy_allclose()
@testing.numpy_cupy_allclose(type_check=has_support_aspect64())
def test_corrcoef_rowvar(self, xp, dtype):
a = testing.shaped_arange((2, 3), xp, dtype)
y = testing.shaped_arange((2, 3), xp, dtype)
Expand Down
Loading