Skip to content

Commit

Permalink
Start implementing Newton-Raphson for the inverse of Statistical Dist…
Browse files Browse the repository at this point in the history
…ributions (#1958)

* Start implementing Newton-Raphson for the inverse of Statistical Distributions, starting with the two-tailed Student-T
* Additional unit tests and validations
* Use the new Newton Raphson class for calculating the Inverse of ChiSquared
* Extract Weibull distribution, and provide unit tests
  • Loading branch information
Mark Baker authored Mar 27, 2021
1 parent c699d14 commit ec25314
Show file tree
Hide file tree
Showing 13 changed files with 493 additions and 165 deletions.
10 changes: 5 additions & 5 deletions src/PhpSpreadsheet/Calculation/Calculation.php
Original file line number Diff line number Diff line change
Expand Up @@ -2391,7 +2391,7 @@ class Calculation
],
'TDIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'TDIST'],
'functionCall' => [Statistical\Distributions\StudentT::class, 'distribution'],
'argumentCount' => '3',
],
'T.DIST' => [
Expand Down Expand Up @@ -2431,12 +2431,12 @@ class Calculation
],
'TINV' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'TINV'],
'functionCall' => [Statistical\Distributions\StudentT::class, 'inverse'],
'argumentCount' => '2',
],
'T.INV' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'TINV'],
'functionCall' => [Statistical\Distributions\StudentT::class, 'inverse'],
'argumentCount' => '2',
],
'T.INV.2T' => [
Expand Down Expand Up @@ -2581,12 +2581,12 @@ class Calculation
],
'WEIBULL' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'WEIBULL'],
'functionCall' => [Statistical\Distributions\Weibull::class, 'distribution'],
'argumentCount' => '4',
],
'WEIBULL.DIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'WEIBULL'],
'functionCall' => [Statistical\Distributions\Weibull::class, 'distribution'],
'argumentCount' => '4',
],
'WORKDAY' => [
Expand Down
129 changes: 18 additions & 111 deletions src/PhpSpreadsheet/Calculation/Statistical.php
Original file line number Diff line number Diff line change
Expand Up @@ -2140,6 +2140,11 @@ public static function STEYX($yValues, $xValues)
*
* Returns the probability of Student's T distribution.
*
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\StudentT::distribution()
* Use the distribution() method in the Statistical\Distributions\StudentT class instead
*
* @param float $value Value for the function
* @param float $degrees degrees of freedom
* @param float $tails number of tails (1 or 2)
Expand All @@ -2148,113 +2153,27 @@ public static function STEYX($yValues, $xValues)
*/
public static function TDIST($value, $degrees, $tails)
{
$value = Functions::flattenSingleValue($value);
$degrees = floor(Functions::flattenSingleValue($degrees));
$tails = floor(Functions::flattenSingleValue($tails));

if ((is_numeric($value)) && (is_numeric($degrees)) && (is_numeric($tails))) {
if (($value < 0) || ($degrees < 1) || ($tails < 1) || ($tails > 2)) {
return Functions::NAN();
}
// tdist, which finds the probability that corresponds to a given value
// of t with k degrees of freedom. This algorithm is translated from a
// pascal function on p81 of "Statistical Computing in Pascal" by D
// Cooke, A H Craven & G M Clark (1985: Edward Arnold (Pubs.) Ltd:
// London). The above Pascal algorithm is itself a translation of the
// fortran algoritm "AS 3" by B E Cooper of the Atlas Computer
// Laboratory as reported in (among other places) "Applied Statistics
// Algorithms", editied by P Griffiths and I D Hill (1985; Ellis
// Horwood Ltd.; W. Sussex, England).
$tterm = $degrees;
$ttheta = atan2($value, sqrt($tterm));
$tc = cos($ttheta);
$ts = sin($ttheta);

if (($degrees % 2) == 1) {
$ti = 3;
$tterm = $tc;
} else {
$ti = 2;
$tterm = 1;
}

$tsum = $tterm;
while ($ti < $degrees) {
$tterm *= $tc * $tc * ($ti - 1) / $ti;
$tsum += $tterm;
$ti += 2;
}
$tsum *= $ts;
if (($degrees % 2) == 1) {
$tsum = Functions::M_2DIVPI * ($tsum + $ttheta);
}
$tValue = 0.5 * (1 + $tsum);
if ($tails == 1) {
return 1 - abs($tValue);
}

return 1 - abs((1 - $tValue) - $tValue);
}

return Functions::VALUE();
return Statistical\Distributions\StudentT::distribution($value, $degrees, $tails);
}

/**
* TINV.
*
* Returns the one-tailed probability of the chi-squared distribution.
*
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\StudentT::inverse()
* Use the inverse() method in the Statistical\Distributions\StudentT class instead
*
* @param float $probability Probability for the function
* @param float $degrees degrees of freedom
*
* @return float|string The result, or a string containing an error
*/
public static function TINV($probability, $degrees)
{
$probability = Functions::flattenSingleValue($probability);
$degrees = floor(Functions::flattenSingleValue($degrees));

if ((is_numeric($probability)) && (is_numeric($degrees))) {
$xLo = 100;
$xHi = 0;

$x = $xNew = 1;
$dx = 1;
$i = 0;

while ((abs($dx) > Functions::PRECISION) && ($i++ < self::MAX_ITERATIONS)) {
// Apply Newton-Raphson step
$result = self::TDIST($x, $degrees, 2);
$error = $result - $probability;
if ($error == 0.0) {
$dx = 0;
} elseif ($error < 0.0) {
$xLo = $x;
} else {
$xHi = $x;
}
// Avoid division by zero
if ($result != 0.0) {
$dx = $error / $result;
$xNew = $x - $dx;
}
// If the NR fails to converge (which for example may be the
// case if the initial guess is too rough) we apply a bisection
// step to determine a more narrow interval around the root.
if (($xNew < $xLo) || ($xNew > $xHi) || ($result == 0.0)) {
$xNew = ($xLo + $xHi) / 2;
$dx = $xNew - $x;
}
$x = $xNew;
}
if ($i == self::MAX_ITERATIONS) {
return Functions::NA();
}

return round($x, 12);
}

return Functions::VALUE();
return Statistical\Distributions\StudentT::inverse($probability, $degrees);
}

/**
Expand Down Expand Up @@ -2421,6 +2340,11 @@ public static function VARPA(...$args)
* Returns the Weibull distribution. Use this distribution in reliability
* analysis, such as calculating a device's mean time to failure.
*
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\Weibull::distribution()
* Use the distribution() method in the Statistical\Distributions\Weibull class instead
*
* @param float $value
* @param float $alpha Alpha Parameter
* @param float $beta Beta Parameter
Expand All @@ -2430,24 +2354,7 @@ public static function VARPA(...$args)
*/
public static function WEIBULL($value, $alpha, $beta, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$alpha = Functions::flattenSingleValue($alpha);
$beta = Functions::flattenSingleValue($beta);

if ((is_numeric($value)) && (is_numeric($alpha)) && (is_numeric($beta))) {
if (($value < 0) || ($alpha <= 0) || ($beta <= 0)) {
return Functions::NAN();
}
if ((is_numeric($cumulative)) || (is_bool($cumulative))) {
if ($cumulative) {
return 1 - exp(0 - ($value / $beta) ** $alpha);
}

return ($alpha / $beta ** $alpha) * $value ** ($alpha - 1) * exp(0 - ($value / $beta) ** $alpha);
}
}

return Functions::VALUE();
return Statistical\Distributions\Weibull::distribution($value, $alpha, $beta, $cumulative);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,55 +73,13 @@ public static function inverse($probability, $degrees)
return Functions::NAN();
}

return self::calculateInverse($degrees, $probability);
}

/**
* @return float|string
*/
protected static function calculateInverse(int $degrees, float $probability)
{
$xLo = 100;
$xHi = 0;

$x = $xNew = 1;
$dx = 1;
$i = 0;

while ((abs($dx) > Functions::PRECISION) && (++$i <= self::MAX_ITERATIONS)) {
// Apply Newton-Raphson step
$result = 1 - (Gamma::incompleteGamma($degrees / 2, $x / 2)
$callback = function ($value) use ($degrees) {
return 1 - (Gamma::incompleteGamma($degrees / 2, $value / 2)
/ Gamma::gammaValue($degrees / 2));
$error = $result - $probability;
};

if ($error == 0.0) {
$dx = 0;
} elseif ($error < 0.0) {
$xLo = $x;
} else {
$xHi = $x;
}

// Avoid division by zero
if ($result != 0.0) {
$dx = $error / $result;
$xNew = $x - $dx;
}

// If the NR fails to converge (which for example may be the
// case if the initial guess is too rough) we apply a bisection
// step to determine a more narrow interval around the root.
if (($xNew < $xLo) || ($xNew > $xHi) || ($result == 0.0)) {
$xNew = ($xLo + $xHi) / 2;
$dx = $xNew - $x;
}
$x = $xNew;
}

if ($i === self::MAX_ITERATIONS) {
return Functions::NA();
}
$newtonRaphson = new NewtonRaphson($callback);

return $x;
return $newtonRaphson->execute($probability);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,11 @@ protected static function calculateInverse(float $probability, float $alpha, flo

while ((abs($dx) > Functions::PRECISION) && (++$i <= self::MAX_ITERATIONS)) {
// Apply Newton-Raphson step
$error = self::calculateDistribution($x, $alpha, $beta, true) - $probability;
if ($error < 0.0) {
$result = self::calculateDistribution($x, $alpha, $beta, true);
$error = $result - $probability;
if ($error == 0.0) {
$dx = 0;
} elseif ($error < 0.0) {
$xLo = $x;
} else {
$xHi = $x;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<?php

namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical\Distributions;

use PhpOffice\PhpSpreadsheet\Calculation\Functions;

class NewtonRaphson
{
private const MAX_ITERATIONS = 256;

protected $callback;

public function __construct(callable $callback)
{
$this->callback = $callback;
}

public function execute($probability)
{
$xLo = 100;
$xHi = 0;

$x = $xNew = 1;
$dx = 1;
$i = 0;

while ((abs($dx) > Functions::PRECISION) && ($i++ < self::MAX_ITERATIONS)) {
// Apply Newton-Raphson step
$result = call_user_func($this->callback, $x);
$error = $result - $probability;

if ($error == 0.0) {
$dx = 0;
} elseif ($error < 0.0) {
$xLo = $x;
} else {
$xHi = $x;
}

// Avoid division by zero
if ($result != 0.0) {
$dx = $error / $result;
$xNew = $x - $dx;
}

// If the NR fails to converge (which for example may be the
// case if the initial guess is too rough) we apply a bisection
// step to determine a more narrow interval around the root.
if (($xNew < $xLo) || ($xNew > $xHi) || ($result == 0.0)) {
$xNew = ($xLo + $xHi) / 2;
$dx = $xNew - $x;
}
$x = $xNew;
}

if ($i == self::MAX_ITERATIONS) {
return Functions::NA();
}

return $x;
}
}
Loading

0 comments on commit ec25314

Please sign in to comment.