Skip to content

Commit

Permalink
Extract a few more Distribution functions from Statistical (#1975)
Browse files Browse the repository at this point in the history
* Extract a few more Distribution functions from Statistical; this time EXPONDIST() and HYPGEOMDIST()

* Extract the F Distribution (although only F.DIST() is implemented so far

* Updae docblocks

* PHPCS
  • Loading branch information
Mark Baker authored Mar 31, 2021
1 parent 029f345 commit 17af132
Show file tree
Hide file tree
Showing 10 changed files with 344 additions and 118 deletions.
8 changes: 4 additions & 4 deletions src/PhpSpreadsheet/Calculation/Calculation.php
Original file line number Diff line number Diff line change
Expand Up @@ -980,12 +980,12 @@ class Calculation
],
'EXPONDIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'EXPONDIST'],
'functionCall' => [Statistical\Distributions\Exponential::class, 'distribution'],
'argumentCount' => '3',
],
'EXPON.DIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'EXPONDIST'],
'functionCall' => [Statistical\Distributions\Exponential::class, 'distribution'],
'argumentCount' => '3',
],
'FACT' => [
Expand All @@ -1010,7 +1010,7 @@ class Calculation
],
'F.DIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'FDIST2'],
'functionCall' => [Statistical\Distributions\F::class, 'distribution'],
'argumentCount' => '4',
],
'F.DIST.RT' => [
Expand Down Expand Up @@ -1248,7 +1248,7 @@ class Calculation
],
'HYPGEOMDIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'HYPGEOMDIST'],
'functionCall' => [Statistical\Distributions\HyperGeometric::class, 'distribution'],
'argumentCount' => '4',
],
'HYPGEOM.DIST' => [
Expand Down
129 changes: 38 additions & 91 deletions src/PhpSpreadsheet/Calculation/Statistical.php
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,12 @@ private static function inverseNcdf($p)
*
* @Deprecated 1.17.0
*
* @see Statistical\Averages::averageDeviations()
* Use the averageDeviations() method in the Statistical\Averages class instead
*
* @param mixed ...$args Data values
*
* @return float|string
*
*@see Statistical\Averages::averageDeviations()
* Use the averageDeviations() method in the Statistical\Averages class instead
*/
public static function AVEDEV(...$args)
{
Expand Down Expand Up @@ -160,12 +160,12 @@ public static function AVERAGE(...$args)
*
* @Deprecated 1.17.0
*
* @see Statistical\Averages::averageA()
* Use the averageA() method in the Statistical\Averages class instead
*
* @param mixed ...$args Data values
*
* @return float|string
*
*@see Statistical\Averages::averageA()
* Use the averageA() method in the Statistical\Averages class instead
*/
public static function AVERAGEA(...$args)
{
Expand Down Expand Up @@ -203,7 +203,7 @@ public static function AVERAGEIF($range, $condition, $averageRange = [])
*
* @Deprecated 1.18.0
*
*@see Statistical\Distributions\Beta::distribution()
* @see Statistical\Distributions\Beta::distribution()
* Use the distribution() method in the Statistical\Distributions\Beta class instead
*
* @param float $value Value at which you want to evaluate the distribution
Expand Down Expand Up @@ -498,11 +498,6 @@ public static function COVAR($yValues, $xValues)
* @param float $alpha criterion value
*
* @return int|string
*
* @TODO Warning. This implementation differs from the algorithm detailed on the MS
* web site in that $CumPGuessMinus1 = $CumPGuess - 1 rather than $CumPGuess - $PGuess
* This eliminates a potential endless loop error, but may have an adverse affect on the
* accuracy of the function (although all my tests have so far returned correct results).
*/
public static function CRITBINOM($trials, $probability, $alpha)
{
Expand Down Expand Up @@ -568,6 +563,11 @@ public static function DEVSQ(...$args)
* such as how long an automated bank teller takes to deliver cash. For example, you can
* use EXPONDIST to determine the probability that the process takes at most 1 minute.
*
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\Exponential::distribution()
* Use the distribution() method in the Statistical\Distributions\Exponential class instead
*
* @param float $value Value of the function
* @param float $lambda The parameter value
* @param bool $cumulative
Expand All @@ -576,24 +576,7 @@ public static function DEVSQ(...$args)
*/
public static function EXPONDIST($value, $lambda, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$lambda = Functions::flattenSingleValue($lambda);
$cumulative = Functions::flattenSingleValue($cumulative);

if ((is_numeric($value)) && (is_numeric($lambda))) {
if (($value < 0) || ($lambda < 0)) {
return Functions::NAN();
}
if ((is_numeric($cumulative)) || (is_bool($cumulative))) {
if ($cumulative) {
return 1 - exp(0 - $value * $lambda);
}

return $lambda * exp(0 - $value * $lambda);
}
}

return Functions::VALUE();
return Statistical\Distributions\Exponential::distribution($value, $lambda, $cumulative);
}

/**
Expand All @@ -604,6 +587,11 @@ public static function EXPONDIST($value, $lambda, $cumulative)
* For example, you can examine the test scores of men and women entering high school, and determine
* if the variability in the females is different from that found in the males.
*
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\F::distribution()
* Use the distribution() method in the Statistical\Distributions\Exponential class instead
*
* @param float $value Value of the function
* @param int $u The numerator degrees of freedom
* @param int $v The denominator degrees of freedom
Expand All @@ -614,34 +602,7 @@ public static function EXPONDIST($value, $lambda, $cumulative)
*/
public static function FDIST2($value, $u, $v, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$u = Functions::flattenSingleValue($u);
$v = Functions::flattenSingleValue($v);
$cumulative = Functions::flattenSingleValue($cumulative);

if (is_numeric($value) && is_numeric($u) && is_numeric($v)) {
if ($value < 0 || $u < 1 || $v < 1) {
return Functions::NAN();
}

$cumulative = (bool) $cumulative;
$u = (int) $u;
$v = (int) $v;

if ($cumulative) {
$adjustedValue = ($u * $value) / ($u * $value + $v);

return Statistical\Distributions\Beta::incompleteBeta($adjustedValue, $u / 2, $v / 2);
}

return (Statistical\Distributions\Gamma::gammaValue(($v + $u) / 2) /
(Statistical\Distributions\Gamma::gammaValue($u / 2) *
Statistical\Distributions\Gamma::gammaValue($v / 2))) *
(($u / $v) ** ($u / 2)) *
(($value ** (($u - 2) / 2)) / ((1 + ($u / $v) * $value) ** (($u + $v) / 2)));
}

return Functions::VALUE();
return Statistical\Distributions\F::distribution($value, $u, $v, $cumulative);
}

/**
Expand Down Expand Up @@ -908,42 +869,26 @@ public static function HARMEAN(...$args)
* Returns the hypergeometric distribution. HYPGEOMDIST returns the probability of a given number of
* sample successes, given the sample size, population successes, and population size.
*
* @param float $sampleSuccesses Number of successes in the sample
* @param float $sampleNumber Size of the sample
* @param float $populationSuccesses Number of successes in the population
* @param float $populationNumber Population size
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\HyperGeometric::distribution()
* Use the distribution() method in the Statistical\Distributions\HyperGeometric class instead
*
* @param mixed (int) $sampleSuccesses Number of successes in the sample
* @param mixed (int) $sampleNumber Size of the sample
* @param mixed (int) $populationSuccesses Number of successes in the population
* @param mixed (int) $populationNumber Population size
*
* @return float|string
*/
public static function HYPGEOMDIST($sampleSuccesses, $sampleNumber, $populationSuccesses, $populationNumber)
{
$sampleSuccesses = Functions::flattenSingleValue($sampleSuccesses);
$sampleNumber = Functions::flattenSingleValue($sampleNumber);
$populationSuccesses = Functions::flattenSingleValue($populationSuccesses);
$populationNumber = Functions::flattenSingleValue($populationNumber);

if ((is_numeric($sampleSuccesses)) && (is_numeric($sampleNumber)) && (is_numeric($populationSuccesses)) && (is_numeric($populationNumber))) {
$sampleSuccesses = floor($sampleSuccesses);
$sampleNumber = floor($sampleNumber);
$populationSuccesses = floor($populationSuccesses);
$populationNumber = floor($populationNumber);

if (($sampleSuccesses < 0) || ($sampleSuccesses > $sampleNumber) || ($sampleSuccesses > $populationSuccesses)) {
return Functions::NAN();
}
if (($sampleNumber <= 0) || ($sampleNumber > $populationNumber)) {
return Functions::NAN();
}
if (($populationSuccesses <= 0) || ($populationSuccesses > $populationNumber)) {
return Functions::NAN();
}

return MathTrig::COMBIN($populationSuccesses, $sampleSuccesses) *
MathTrig::COMBIN($populationNumber - $populationSuccesses, $sampleNumber - $sampleSuccesses) /
MathTrig::COMBIN($populationNumber, $sampleNumber);
}

return Functions::VALUE();
return Statistical\Distributions\HyperGeometric::distribution(
$sampleSuccesses,
$sampleNumber,
$populationSuccesses,
$populationNumber
);
}

/**
Expand Down Expand Up @@ -2148,8 +2093,10 @@ public static function WEIBULL($value, $alpha, $beta, $cumulative)
/**
* ZTEST.
*
* Returns the Weibull distribution. Use this distribution in reliability
* analysis, such as calculating a device's mean time to failure.
* Returns the one-tailed P-value of a z-test.
*
* For a given hypothesized population mean, x, Z.TEST returns the probability that the sample mean would be
* greater than the average of observations in the data set (array) — that is, the observed sample mean.
*
* @param float $dataSet
* @param float $m0 Alpha Parameter
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?php

namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical\Distributions;

use PhpOffice\PhpSpreadsheet\Calculation\Exception;
use PhpOffice\PhpSpreadsheet\Calculation\Functions;

class Exponential
{
use BaseValidations;

/**
* EXPONDIST.
*
* Returns the exponential distribution. Use EXPONDIST to model the time between events,
* such as how long an automated bank teller takes to deliver cash. For example, you can
* use EXPONDIST to determine the probability that the process takes at most 1 minute.
*
* @param mixed (float) $value Value of the function
* @param mixed (float) $lambda The parameter value
* @param mixed (bool) $cumulative
*
* @return float|string
*/
public static function distribution($value, $lambda, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$lambda = Functions::flattenSingleValue($lambda);
$cumulative = Functions::flattenSingleValue($cumulative);

try {
$value = self::validateFloat($value);
$lambda = self::validateFloat($lambda);
$cumulative = self::validateBool($cumulative);
} catch (Exception $e) {
return $e->getMessage();
}

if (($value < 0) || ($lambda < 0)) {
return Functions::NAN();
}

if ($cumulative === true) {
return 1 - exp(0 - $value * $lambda);
}

return $lambda * exp(0 - $value * $lambda);
}
}
59 changes: 59 additions & 0 deletions src/PhpSpreadsheet/Calculation/Statistical/Distributions/F.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
<?php

namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical\Distributions;

use PhpOffice\PhpSpreadsheet\Calculation\Exception;
use PhpOffice\PhpSpreadsheet\Calculation\Functions;

class F
{
use BaseValidations;

/**
* F.DIST.
*
* Returns the F probability distribution.
* You can use this function to determine whether two data sets have different degrees of diversity.
* For example, you can examine the test scores of men and women entering high school, and determine
* if the variability in the females is different from that found in the males.
*
* @param mixed(float) $value Value of the function
* @param mixed(int) $u The numerator degrees of freedom
* @param mixed(int) $v The denominator degrees of freedom
* @param mixed(bool) $cumulative If cumulative is TRUE, F.DIST returns the cumulative distribution function;
* if FALSE, it returns the probability density function.
*
* @return float|string
*/
public static function distribution($value, $u, $v, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$u = Functions::flattenSingleValue($u);
$v = Functions::flattenSingleValue($v);
$cumulative = Functions::flattenSingleValue($cumulative);

try {
$value = self::validateFloat($value);
$u = self::validateInt($u);
$v = self::validateInt($v);
$cumulative = self::validateBool($cumulative);
} catch (Exception $e) {
return $e->getMessage();
}

if ($value < 0 || $u < 1 || $v < 1) {
return Functions::NAN();
}

if ($cumulative) {
$adjustedValue = ($u * $value) / ($u * $value + $v);

return Beta::incompleteBeta($adjustedValue, $u / 2, $v / 2);
}

return (Gamma::gammaValue(($v + $u) / 2) /
(Gamma::gammaValue($u / 2) * Gamma::gammaValue($v / 2))) *
(($u / $v) ** ($u / 2)) *
(($value ** (($u - 2) / 2)) / ((1 + ($u / $v) * $value) ** (($u + $v) / 2)));
}
}
Loading

0 comments on commit 17af132

Please sign in to comment.