From f64a1b85f66a2389ea157558dba3b6fbd15299b9 Mon Sep 17 00:00:00 2001 From: Mathieu Andreux Date: Fri, 10 Feb 2023 17:28:58 +0100 Subject: [PATCH 1/9] better docstring for negative log likelihood --- pydeseq2/utils.py | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/pydeseq2/utils.py b/pydeseq2/utils.py index 16772949..97bdb842 100644 --- a/pydeseq2/utils.py +++ b/pydeseq2/utils.py @@ -223,27 +223,57 @@ def dispersion_trend(normed_mean, coeffs): def nb_nll(counts, mu, alpha): - """Negative log-likelihood of a negative binomial. + """Negative log-likelihood of a negative binomial of parameters `mu` and `alpha`. Unvectorized version. + Mathematically, if `counts` is a vector of counting entries :math:`y_i` + then the likelihood of each entry :math:`y_i` to be drawn from a negative + binomial :math:`NB(\\mu, \alpha)` is [1] + + .. math:: + p(y_i | \\mu, \\alpha) = \\frac{\\Gamma(y_i + \\alpha^{-1})}{ + \\Gamma(y_i + 1)\\Gamma(\\alpha^{-1}) + } + \\left(\\frac{1}{1 + \\alpha \\mu} \\right)^{1/\\alpha} + \\left(\\frac{\\mu}{\\alpha^{-1} + \\mu} \\right)^{y_i} + + As a consequence, assuming there are :math:`n` entries, + the total negative log-likelihood for `counts` is + + .. math:: + \\ell(\\mu, \\alpha) = \\frac{n}{\\alpha} \\log(\\alpha) + + \\sum_i \\left \\lbrace + - \\log \\left( \\frac{\\Gamma(y_i + \\alpha^{-1})}{ + \\Gamma(y_i + 1)\\Gamma(\\alpha^{-1}) + } \\right) + + (\\alpha^{-1} + y_i) \\log (\\alpha^{-1} + y_i) + - y_i \\log \\mu + \\right \\rbrace + + This is implemented in this function. + Parameters ---------- counts : ndarray Observations. mu : float - Mean of the distribution. + Mean of the distribution :math:`\\mu` alpha : float - Dispersion of the distribution, - s.t. the variance is :math:`\\mu + \\alpha * \\mu^2`. + Dispersion of the distribution :math:`\\alpha`, + s.t. the variance is :math:`\\mu + \\alpha \\mu^2`. Returns ------- float Negative log likelihood of the observations counts following :math:`NB(\\mu, \\alpha)`. + + Notes + ----- + [1] https://en.wikipedia.org/wiki/Negative_binomial_distribution """ n = len(counts) From 1c7dad722198093a1eb91d31941f4940a74619b3 Mon Sep 17 00:00:00 2001 From: Mathieu Andreux Date: Fri, 10 Feb 2023 18:03:32 +0100 Subject: [PATCH 2/9] unit test for nb_nll --- tests/test_utils.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/test_utils.py diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..88094eed --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,31 @@ +import numpy as np +import pytest + +from pydeseq2.utils import nb_nll + + +@pytest.mark.parametrize("mu, alpha", [(10, 0.5), (10, 0.1), (3, 0.5), (52, 0.05)]) +def test_nb_nll_moments(mu, alpha): + mu = 10.0 + alpha = 0.5 # should be smaller than 1 + # get the probability of many points + y = np.arange(int(10 * (mu + mu**2 / alpha))) + probas = np.zeros(y.shape) + for i in range(y.size): + # crude trapezoidal interpolation + probas[i] = np.exp(-nb_nll(np.array([y[i]]), mu, alpha)) + # check that probas sums very close to 1 + assert np.allclose(probas.sum(), 1.0) + # Re-sample according to probas + n_montecarlo = int(1e6) + rng = np.random.default_rng(42) + sample = rng.choice(y, size=(n_montecarlo,), p=probas) + # Get the theoretical values + mean_th = mu + var_th = (mu * alpha + 1) * mu + # Check that the mean is in an acceptable range, up to stochasticity + diff = sample.mean() - mean_th + deviation = var_th / np.sqrt(n_montecarlo) + assert np.abs(diff) < 0.2 * deviation + error_var = np.abs(sample.var() - var_th) / var_th + assert error_var < 1 / np.sqrt(n_montecarlo) From a03c3fcac72d592c629499678142706128891c30 Mon Sep 17 00:00:00 2001 From: Mathieu Andreux Date: Fri, 10 Feb 2023 18:14:55 +0100 Subject: [PATCH 3/9] adding a dot --- pydeseq2/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydeseq2/utils.py b/pydeseq2/utils.py index 97bdb842..79ce3552 100644 --- a/pydeseq2/utils.py +++ b/pydeseq2/utils.py @@ -259,7 +259,7 @@ def nb_nll(counts, mu, alpha): Observations. mu : float - Mean of the distribution :math:`\\mu` + Mean of the distribution :math:`\\mu`. alpha : float Dispersion of the distribution :math:`\\alpha`, From 097c9c09f6ce5a1d348811aeb8f38492fa3fee15 Mon Sep 17 00:00:00 2001 From: Mathieu Andreux Date: Fri, 10 Feb 2023 18:25:33 +0100 Subject: [PATCH 4/9] correcting stupid mistake --- tests/test_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 88094eed..a4074a95 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -6,8 +6,6 @@ @pytest.mark.parametrize("mu, alpha", [(10, 0.5), (10, 0.1), (3, 0.5), (52, 0.05)]) def test_nb_nll_moments(mu, alpha): - mu = 10.0 - alpha = 0.5 # should be smaller than 1 # get the probability of many points y = np.arange(int(10 * (mu + mu**2 / alpha))) probas = np.zeros(y.shape) From a753df6cdf21bb05c1051f030ef0931fd57ff93e Mon Sep 17 00:00:00 2001 From: mandreux-owkin <62643750+mandreux-owkin@users.noreply.github.com> Date: Tue, 14 Feb 2023 13:50:10 +0100 Subject: [PATCH 5/9] Update pydeseq2/utils.py Co-authored-by: Boris Muzellec --- pydeseq2/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydeseq2/utils.py b/pydeseq2/utils.py index 79ce3552..4c890603 100644 --- a/pydeseq2/utils.py +++ b/pydeseq2/utils.py @@ -223,7 +223,7 @@ def dispersion_trend(normed_mean, coeffs): def nb_nll(counts, mu, alpha): - """Negative log-likelihood of a negative binomial of parameters `mu` and `alpha`. + """Negative log-likelihood of a negative binomial of parameters ``mu`` and ``alpha``. Unvectorized version. From 0e75e97e7f1f6c176df6ee20f23420e54cb224cb Mon Sep 17 00:00:00 2001 From: mandreux-owkin <62643750+mandreux-owkin@users.noreply.github.com> Date: Tue, 14 Feb 2023 13:50:18 +0100 Subject: [PATCH 6/9] Update pydeseq2/utils.py Co-authored-by: Boris Muzellec --- pydeseq2/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydeseq2/utils.py b/pydeseq2/utils.py index 4c890603..c8d1cb47 100644 --- a/pydeseq2/utils.py +++ b/pydeseq2/utils.py @@ -239,7 +239,7 @@ def nb_nll(counts, mu, alpha): \\left(\\frac{\\mu}{\\alpha^{-1} + \\mu} \\right)^{y_i} As a consequence, assuming there are :math:`n` entries, - the total negative log-likelihood for `counts` is + the total negative log-likelihood for ``counts`` is .. math:: \\ell(\\mu, \\alpha) = \\frac{n}{\\alpha} \\log(\\alpha) + From 91dbb35261043e200464770dd25fea91ccebbfb1 Mon Sep 17 00:00:00 2001 From: mandreux-owkin <62643750+mandreux-owkin@users.noreply.github.com> Date: Tue, 14 Feb 2023 13:50:23 +0100 Subject: [PATCH 7/9] Update pydeseq2/utils.py Co-authored-by: Boris Muzellec --- pydeseq2/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydeseq2/utils.py b/pydeseq2/utils.py index c8d1cb47..c517ff7f 100644 --- a/pydeseq2/utils.py +++ b/pydeseq2/utils.py @@ -227,7 +227,7 @@ def nb_nll(counts, mu, alpha): Unvectorized version. - Mathematically, if `counts` is a vector of counting entries :math:`y_i` + Mathematically, if ``counts`` is a vector of counting entries :math:`y_i` then the likelihood of each entry :math:`y_i` to be drawn from a negative binomial :math:`NB(\\mu, \alpha)` is [1] From 7c4e2882c06e0bd1c6a3a2480dadf915436b2b4c Mon Sep 17 00:00:00 2001 From: Mathieu Andreux Date: Tue, 14 Feb 2023 13:52:36 +0100 Subject: [PATCH 8/9] correcting math rendering --- pydeseq2/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydeseq2/utils.py b/pydeseq2/utils.py index c517ff7f..a818bfbe 100644 --- a/pydeseq2/utils.py +++ b/pydeseq2/utils.py @@ -229,7 +229,7 @@ def nb_nll(counts, mu, alpha): Mathematically, if ``counts`` is a vector of counting entries :math:`y_i` then the likelihood of each entry :math:`y_i` to be drawn from a negative - binomial :math:`NB(\\mu, \alpha)` is [1] + binomial :math:`NB(\\mu, \\alpha)` is [1] .. math:: p(y_i | \\mu, \\alpha) = \\frac{\\Gamma(y_i + \\alpha^{-1})}{ From caf20aab2d679d9ea75fe397621b658d14fbc3e6 Mon Sep 17 00:00:00 2001 From: Mathieu Andreux Date: Tue, 14 Feb 2023 13:52:57 +0100 Subject: [PATCH 9/9] making tests much faster --- tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index a4074a95..d61cb417 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,7 +4,7 @@ from pydeseq2.utils import nb_nll -@pytest.mark.parametrize("mu, alpha", [(10, 0.5), (10, 0.1), (3, 0.5), (52, 0.05)]) +@pytest.mark.parametrize("mu, alpha", [(10, 0.5), (10, 0.1), (3, 0.5), (9, 0.05)]) def test_nb_nll_moments(mu, alpha): # get the probability of many points y = np.arange(int(10 * (mu + mu**2 / alpha)))