Skip to content

Commit

Permalink
Extract Binomial Distribution functions from Statistical (#1974)
Browse files Browse the repository at this point in the history
* Extract Binomial Distribution functions from Statistical
Replace the old MS algorithm for CRITBINOM() (which has now been replaced with te BINOM.INV() function) with a brute force approach - I'll look to refine it later. The MS algorithm is no longer documented, and the implementation produced erroneous results anyway

* Exract the NEGBINOMDIST() function as well; still need to add a cumulative flag to support the additional argument for the newer NEGBINOM.DIST() function
* Rationalise validation of probability arguments
  • Loading branch information
Mark Baker authored Mar 30, 2021
1 parent df93577 commit 029f345
Show file tree
Hide file tree
Showing 17 changed files with 484 additions and 194 deletions.
12 changes: 6 additions & 6 deletions src/PhpSpreadsheet/Calculation/Calculation.php
Original file line number Diff line number Diff line change
Expand Up @@ -418,22 +418,22 @@ class Calculation
],
'BINOMDIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'BINOMDIST'],
'functionCall' => [Statistical\Distributions\Binomial::class, 'distribution'],
'argumentCount' => '4',
],
'BINOM.DIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'BINOMDIST'],
'functionCall' => [Statistical\Distributions\Binomial::class, 'distribution'],
'argumentCount' => '4',
],
'BINOM.DIST.RANGE' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Functions::class, 'DUMMY'],
'functionCall' => [Statistical\Distributions\Binomial::class, 'range'],
'argumentCount' => '3,4',
],
'BINOM.INV' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Functions::class, 'DUMMY'],
'functionCall' => [Statistical\Distributions\Binomial::class, 'inverse'],
'argumentCount' => '3',
],
'BITAND' => [
Expand Down Expand Up @@ -695,7 +695,7 @@ class Calculation
],
'CRITBINOM' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'CRITBINOM'],
'functionCall' => [Statistical\Distributions\Binomial::class, 'inverse'],
'argumentCount' => '3',
],
'CSC' => [
Expand Down Expand Up @@ -1751,7 +1751,7 @@ class Calculation
],
'NEGBINOMDIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'NEGBINOMDIST'],
'functionCall' => [Statistical\Distributions\Binomial::class, 'negative'],
'argumentCount' => '3',
],
'NEGBINOM.DIST' => [
Expand Down
7 changes: 7 additions & 0 deletions src/PhpSpreadsheet/Calculation/Financial/CashFlow/Single.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<?php

namespace PhpOffice\PhpSpreadsheet\Calculation\Financial\CashFlow;

class Single
{
}
170 changes: 18 additions & 152 deletions src/PhpSpreadsheet/Calculation/Statistical.php
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,11 @@ public static function BETAINV($probability, $alpha, $beta, $rMin = 0, $rMax = 1
* experiment. For example, BINOMDIST can calculate the probability that two of the next three
* babies born are male.
*
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\Binomial::distribution()
* Use the distribution() method in the Statistical\Distributions\Binomial class instead
*
* @param mixed (float) $value Number of successes in trials
* @param mixed (float) $trials Number of trials
* @param mixed (float) $probability Probability of success on each trial
Expand All @@ -260,34 +265,7 @@ public static function BETAINV($probability, $alpha, $beta, $rMin = 0, $rMax = 1
*/
public static function BINOMDIST($value, $trials, $probability, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$trials = Functions::flattenSingleValue($trials);
$probability = Functions::flattenSingleValue($probability);

if ((is_numeric($value)) && (is_numeric($trials)) && (is_numeric($probability))) {
$value = floor($value);
$trials = floor($trials);
if (($value < 0) || ($value > $trials)) {
return Functions::NAN();
}
if (($probability < 0) || ($probability > 1)) {
return Functions::NAN();
}
if ((is_numeric($cumulative)) || (is_bool($cumulative))) {
if ($cumulative) {
$summer = 0;
for ($i = 0; $i <= $value; ++$i) {
$summer += MathTrig::COMBIN($trials, $i) * $probability ** $i * (1 - $probability) ** ($trials - $i);
}

return $summer;
}

return MathTrig::COMBIN($trials, $value) * $probability ** $value * (1 - $probability) ** ($trials - $value);
}
}

return Functions::VALUE();
return Statistical\Distributions\Binomial::distribution($value, $trials, $probability, $cumulative);
}

/**
Expand Down Expand Up @@ -510,6 +488,11 @@ public static function COVAR($yValues, $xValues)
*
* See https://support.microsoft.com/en-us/help/828117/ for details of the algorithm used
*
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\Binomial::inverse()
* Use the inverse() method in the Statistical\Distributions\Binomial class instead
*
* @param float $trials number of Bernoulli trials
* @param float $probability probability of a success on each trial
* @param float $alpha criterion value
Expand All @@ -523,110 +506,7 @@ public static function COVAR($yValues, $xValues)
*/
public static function CRITBINOM($trials, $probability, $alpha)
{
$trials = floor(Functions::flattenSingleValue($trials));
$probability = Functions::flattenSingleValue($probability);
$alpha = Functions::flattenSingleValue($alpha);

if ((is_numeric($trials)) && (is_numeric($probability)) && (is_numeric($alpha))) {
$trials = (int) $trials;
if ($trials < 0) {
return Functions::NAN();
} elseif (($probability < 0.0) || ($probability > 1.0)) {
return Functions::NAN();
} elseif (($alpha < 0.0) || ($alpha > 1.0)) {
return Functions::NAN();
}

if ($alpha <= 0.5) {
$t = sqrt(log(1 / ($alpha * $alpha)));
$trialsApprox = 0 - ($t + (2.515517 + 0.802853 * $t + 0.010328 * $t * $t) / (1 + 1.432788 * $t + 0.189269 * $t * $t + 0.001308 * $t * $t * $t));
} else {
$t = sqrt(log(1 / (1 - $alpha) ** 2));
$trialsApprox = $t - (2.515517 + 0.802853 * $t + 0.010328 * $t * $t) / (1 + 1.432788 * $t + 0.189269 * $t * $t + 0.001308 * $t * $t * $t);
}

$Guess = floor($trials * $probability + $trialsApprox * sqrt($trials * $probability * (1 - $probability)));
if ($Guess < 0) {
$Guess = 0;
} elseif ($Guess > $trials) {
$Guess = $trials;
}

$TotalUnscaledProbability = $UnscaledPGuess = $UnscaledCumPGuess = 0.0;
$EssentiallyZero = 10e-12;

$m = floor($trials * $probability);
++$TotalUnscaledProbability;
if ($m == $Guess) {
++$UnscaledPGuess;
}
if ($m <= $Guess) {
++$UnscaledCumPGuess;
}

$PreviousValue = 1;
$Done = false;
$k = $m + 1;
while ((!$Done) && ($k <= $trials)) {
$CurrentValue = $PreviousValue * ($trials - $k + 1) * $probability / ($k * (1 - $probability));
$TotalUnscaledProbability += $CurrentValue;
if ($k == $Guess) {
$UnscaledPGuess += $CurrentValue;
}
if ($k <= $Guess) {
$UnscaledCumPGuess += $CurrentValue;
}
if ($CurrentValue <= $EssentiallyZero) {
$Done = true;
}
$PreviousValue = $CurrentValue;
++$k;
}

$PreviousValue = 1;
$Done = false;
$k = $m - 1;
while ((!$Done) && ($k >= 0)) {
$CurrentValue = $PreviousValue * $k + 1 * (1 - $probability) / (($trials - $k) * $probability);
$TotalUnscaledProbability += $CurrentValue;
if ($k == $Guess) {
$UnscaledPGuess += $CurrentValue;
}
if ($k <= $Guess) {
$UnscaledCumPGuess += $CurrentValue;
}
if ($CurrentValue <= $EssentiallyZero) {
$Done = true;
}
$PreviousValue = $CurrentValue;
--$k;
}

$PGuess = $UnscaledPGuess / $TotalUnscaledProbability;
$CumPGuess = $UnscaledCumPGuess / $TotalUnscaledProbability;

$CumPGuessMinus1 = $CumPGuess - 1;

while (true) {
if (($CumPGuessMinus1 < $alpha) && ($CumPGuess >= $alpha)) {
return $Guess;
} elseif (($CumPGuessMinus1 < $alpha) && ($CumPGuess < $alpha)) {
$PGuessPlus1 = $PGuess * ($trials - $Guess) * $probability / $Guess / (1 - $probability);
$CumPGuessMinus1 = $CumPGuess;
$CumPGuess = $CumPGuess + $PGuessPlus1;
$PGuess = $PGuessPlus1;
++$Guess;
} elseif (($CumPGuessMinus1 >= $alpha) && ($CumPGuess >= $alpha)) {
$PGuessMinus1 = $PGuess * $Guess * (1 - $probability) / ($trials - $Guess + 1) / $probability;
$CumPGuess = $CumPGuessMinus1;
$CumPGuessMinus1 = $CumPGuessMinus1 - $PGuess;
$PGuess = $PGuessMinus1;
--$Guess;
}
}
}

return Functions::VALUE();
return Statistical\Distributions\Binomial::inverse($trials, $probability, $alpha);
}

/**
Expand Down Expand Up @@ -1502,6 +1382,11 @@ public static function MODE(...$args)
* distribution, except that the number of successes is fixed, and the number of trials is
* variable. Like the binomial, trials are assumed to be independent.
*
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\Binomial::negative::mode()
* Use the negative() method in the Statistical\Distributions\Binomial class instead
*
* @param mixed (float) $failures Number of Failures
* @param mixed (float) $successes Threshold number of Successes
* @param mixed (float) $probability Probability of success on each trial
Expand All @@ -1510,26 +1395,7 @@ public static function MODE(...$args)
*/
public static function NEGBINOMDIST($failures, $successes, $probability)
{
$failures = floor(Functions::flattenSingleValue($failures));
$successes = floor(Functions::flattenSingleValue($successes));
$probability = Functions::flattenSingleValue($probability);

if ((is_numeric($failures)) && (is_numeric($successes)) && (is_numeric($probability))) {
if (($failures < 0) || ($successes < 1)) {
return Functions::NAN();
} elseif (($probability < 0) || ($probability > 1)) {
return Functions::NAN();
}
if (Functions::getCompatibilityMode() == Functions::COMPATIBILITY_GNUMERIC) {
if (($failures + $successes - 1) <= 0) {
return Functions::NAN();
}
}

return (MathTrig::COMBIN($failures + $successes - 1, $successes - 1)) * ($probability ** $successes) * ((1 - $probability) ** $failures);
}

return Functions::VALUE();
return Statistical\Distributions\Binomial::negative($failures, $successes, $probability);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,15 @@ protected static function validateBool($value): bool

return (bool) $value;
}

protected static function validateProbability($probability)
{
$probability = self::validateFloat($probability);

if ($probability < 0.0 || $probability > 1.0) {
throw new Exception(Functions::NAN());
}

return $probability;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public static function inverse($probability, $alpha, $beta, $rMin = 0, $rMax = 1
$rMax = Functions::flattenSingleValue($rMax);

try {
$probability = self::validateFloat($probability);
$probability = self::validateProbability($probability);
$alpha = self::validateFloat($alpha);
$beta = self::validateFloat($beta);
$rMax = self::validateFloat($rMax);
Expand All @@ -97,7 +97,7 @@ public static function inverse($probability, $alpha, $beta, $rMin = 0, $rMax = 1
$rMin = $rMax;
$rMax = $tmp;
}
if (($alpha <= 0) || ($beta <= 0) || ($rMin == $rMax) || ($probability <= 0) || ($probability > 1)) {
if (($alpha <= 0) || ($beta <= 0) || ($rMin == $rMax) || ($probability <= 0.0)) {
return Functions::NAN();
}

Expand Down
Loading

0 comments on commit 029f345

Please sign in to comment.