Skip to content

Commit

Permalink
Implementation of MS Excel's LOGNORM.DIST(), NORM.S.DIST(), F.DIST(),…
Browse files Browse the repository at this point in the history
… GAUSS() and GAMMA() functions (#1588)

* `GAUSS()` and `GAMMA()`, `NORM.S.DIST()`, `LOGNORM.DIST()` and `F.DIST()` function implementations, and further unit tests for a number of the statistical functions

Co-authored-by: Adrien Crivelli <[email protected]>
  • Loading branch information
Mark Baker and PowerKiKi authored Jul 25, 2020
1 parent e084e89 commit 57213de
Show file tree
Hide file tree
Showing 25 changed files with 602 additions and 21 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).

### Added

- Nothing.
- Implementation of the Excel `LOGNORM.DIST()`, `NORM.S.DIST()`, `GAMMA()` and `GAUSS()` functions. [#1588](https://github.com/PHPOffice/PhpSpreadsheet/pull/1588)

### Changed

Expand Down
36 changes: 18 additions & 18 deletions src/PhpSpreadsheet/Calculation/Calculation.php
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,7 @@ class Calculation
],
'F.DIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Functions::class, 'DUMMY'],
'functionCall' => [Statistical::class, 'FDIST2'],
'argumentCount' => '4',
],
'F.DIST.RT' => [
Expand Down Expand Up @@ -1133,7 +1133,7 @@ class Calculation
],
'GAMMA' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Functions::class, 'DUMMY'],
'functionCall' => [Statistical::class, 'GAMMAFunction'],
'argumentCount' => '1',
],
'GAMMADIST' => [
Expand Down Expand Up @@ -1168,7 +1168,7 @@ class Calculation
],
'GAUSS' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Functions::class, 'DUMMY'],
'functionCall' => [Statistical::class, 'GAUSS'],
'argumentCount' => '1',
],
'GCD' => [
Expand Down Expand Up @@ -1577,7 +1577,7 @@ class Calculation
],
'LOGNORM.DIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Functions::class, 'DUMMY'],
'functionCall' => [Statistical::class, 'LOGNORMDIST2'],
'argumentCount' => '4',
],
'LOGNORM.INV' => [
Expand Down Expand Up @@ -1782,7 +1782,7 @@ class Calculation
],
'NORM.S.DIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Functions::class, 'DUMMY'],
'functionCall' => [Statistical::class, 'NORMSDIST2'],
'argumentCount' => '1,2',
],
'NORMSINV' => [
Expand Down Expand Up @@ -3131,8 +3131,8 @@ public static function wrapResult($value)
}
// Return strings wrapped in quotes
return self::FORMULA_STRING_QUOTE . $value . self::FORMULA_STRING_QUOTE;
// Convert numeric errors to NaN error
} elseif ((is_float($value)) && ((is_nan($value)) || (is_infinite($value)))) {
// Convert numeric errors to NaN error
return Functions::NAN();
}

Expand Down Expand Up @@ -3774,22 +3774,22 @@ private function _parseFormula($formula, ?Cell $pCell = null)
$pCellParent = ($pCell !== null) ? $pCell->getWorksheet() : null;

$regexpMatchString = '/^(' . self::CALCULATION_REGEXP_FUNCTION .
'|' . self::CALCULATION_REGEXP_CELLREF .
'|' . self::CALCULATION_REGEXP_NUMBER .
'|' . self::CALCULATION_REGEXP_STRING .
'|' . self::CALCULATION_REGEXP_OPENBRACE .
'|' . self::CALCULATION_REGEXP_NAMEDRANGE .
'|' . self::CALCULATION_REGEXP_ERROR .
')/sui';
'|' . self::CALCULATION_REGEXP_CELLREF .
'|' . self::CALCULATION_REGEXP_NUMBER .
'|' . self::CALCULATION_REGEXP_STRING .
'|' . self::CALCULATION_REGEXP_OPENBRACE .
'|' . self::CALCULATION_REGEXP_NAMEDRANGE .
'|' . self::CALCULATION_REGEXP_ERROR .
')/sui';

// Start with initialisation
$index = 0;
$stack = new Stack();
$output = [];
$expectingOperator = false; // We use this test in syntax-checking the expression to determine when a
// - is a negation or + is a positive operator rather than an operation
// - is a negation or + is a positive operator rather than an operation
$expectingOperand = false; // We use this test in syntax-checking the expression to determine whether an operand
// should be null in a function call
// should be null in a function call

// IF branch pruning
// currently pending storeKey (last item of the storeKeysStack
Expand Down Expand Up @@ -4172,7 +4172,7 @@ private function _parseFormula($formula, ?Cell $pCell = null)
((preg_match('/^' . self::CALCULATION_REGEXP_CELLREF . '.*/Ui', substr($formula, $index), $match)) &&
($output[count($output) - 1]['type'] == 'Cell Reference') ||
(preg_match('/^' . self::CALCULATION_REGEXP_NAMEDRANGE . '.*/miu', substr($formula, $index), $match)) &&
($output[count($output) - 1]['type'] == 'Named Range' || $output[count($output) - 1]['type'] == 'Value')
($output[count($output) - 1]['type'] == 'Named Range' || $output[count($output) - 1]['type'] == 'Value')
)) {
while ($stack->count() > 0 &&
($o2 = $stack->last()) &&
Expand Down Expand Up @@ -4951,7 +4951,7 @@ private function executeNumericBinaryOperation($operand1, $operand2, $operation,
} else {
if ((Functions::getCompatibilityMode() != Functions::COMPATIBILITY_OPENOFFICE) &&
((is_string($operand1) && !is_numeric($operand1) && strlen($operand1) > 0) ||
(is_string($operand2) && !is_numeric($operand2) && strlen($operand2) > 0))) {
(is_string($operand2) && !is_numeric($operand2) && strlen($operand2) > 0))) {
$result = Functions::VALUE();
} else {
// If we're dealing with non-matrix operations, execute the necessary operation
Expand Down Expand Up @@ -4980,7 +4980,7 @@ private function executeNumericBinaryOperation($operand1, $operand2, $operation,

return false;
}
$result = $operand1 / $operand2;
$result = $operand1 / $operand2;

break;
// Power
Expand Down
163 changes: 161 additions & 2 deletions src/PhpSpreadsheet/Calculation/Statistical.php
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,7 @@ public static function BETADIST($value, $alpha, $beta, $rMin = 0, $rMax = 1)
/**
* BETAINV.
*
* Returns the inverse of the beta distribution.
* Returns the inverse of the Beta distribution.
*
* @param float $probability Probability at which you want to evaluate the distribution
* @param float $alpha Parameter to the distribution
Expand Down Expand Up @@ -1475,6 +1475,62 @@ public static function EXPONDIST($value, $lambda, $cumulative)
return Functions::VALUE();
}

private static function betaFunction($a, $b)
{
return (self::gamma($a) * self::gamma($b)) / self::gamma($a + $b);
}

private static function regularizedIncompleteBeta($value, $a, $b)
{
return self::incompleteBeta($value, $a, $b) / self::betaFunction($a, $b);
}

/**
* F.DIST.
*
* Returns the F probability distribution.
* You can use this function to determine whether two data sets have different degrees of diversity.
* For example, you can examine the test scores of men and women entering high school, and determine
* if the variability in the females is different from that found in the males.
*
* @param float $value Value of the function
* @param int $u The numerator degrees of freedom
* @param int $v The denominator degrees of freedom
* @param bool $cumulative If cumulative is TRUE, F.DIST returns the cumulative distribution function;
* if FALSE, it returns the probability density function.
*
* @return float|string
*/
public static function FDIST2($value, $u, $v, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$u = Functions::flattenSingleValue($u);
$v = Functions::flattenSingleValue($v);
$cumulative = Functions::flattenSingleValue($cumulative);

if (is_numeric($value) && is_numeric($u) && is_numeric($v)) {
if ($value < 0 || $u < 1 || $v < 1) {
return Functions::NAN();
}

$cumulative = (bool) $cumulative;
$u = (int) $u;
$v = (int) $v;

if ($cumulative) {
$adjustedValue = ($u * $value) / ($u * $value + $v);

return self::incompleteBeta($adjustedValue, $u / 2, $v / 2);
}

return (self::gamma(($v + $u) / 2) / (self::gamma($u / 2) * self::gamma($v / 2))) *
(($u / $v) ** ($u / 2)) *
(($value ** (($u - 2) / 2)) / ((1 + ($u / $v) * $value) ** (($u + $v) / 2)));
}

return Functions::VALUE();
}

/**
* FISHER.
*
Expand Down Expand Up @@ -1556,6 +1612,27 @@ public static function FORECAST($xValue, $yValues, $xValues)
return $bestFitLinear->getValueOfYForX($xValue);
}

/**
* GAMMA.
*
* Return the gamma function value.
*
* @param float $value
*
* @return float|string The result, or a string containing an error
*/
public static function GAMMAFunction($value)
{
$value = Functions::flattenSingleValue($value);
if (!is_numeric($value)) {
return Functions::VALUE();
} elseif ((((int) $value) == ((float) $value)) && $value <= 0.0) {
return Functions::NAN();
}

return self::gamma($value);
}

/**
* GAMMADIST.
*
Expand Down Expand Up @@ -1593,7 +1670,7 @@ public static function GAMMADIST($value, $a, $b, $cumulative)
/**
* GAMMAINV.
*
* Returns the inverse of the beta distribution.
* Returns the inverse of the Gamma distribution.
*
* @param float $probability Probability at which you want to evaluate the distribution
* @param float $alpha Parameter to the distribution
Expand Down Expand Up @@ -1677,6 +1754,26 @@ public static function GAMMALN($value)
return Functions::VALUE();
}

/**
* GAUSS.
*
* Calculates the probability that a member of a standard normal population will fall between
* the mean and z standard deviations from the mean.
*
* @param float $value
*
* @return float|string The result, or a string containing an error
*/
public static function GAUSS($value)
{
$value = Functions::flattenSingleValue($value);
if (!is_numeric($value)) {
return Functions::VALUE();
}

return self::NORMDIST($value, 0, 1, true) - 0.5;
}

/**
* GEOMEAN.
*
Expand Down Expand Up @@ -2117,6 +2214,42 @@ public static function LOGNORMDIST($value, $mean, $stdDev)
return Functions::VALUE();
}

/**
* LOGNORM.DIST.
*
* Returns the lognormal distribution of x, where ln(x) is normally distributed
* with parameters mean and standard_dev.
*
* @param float $value
* @param float $mean
* @param float $stdDev
* @param bool $cumulative
*
* @return float|string The result, or a string containing an error
*/
public static function LOGNORMDIST2($value, $mean, $stdDev, $cumulative = false)
{
$value = Functions::flattenSingleValue($value);
$mean = Functions::flattenSingleValue($mean);
$stdDev = Functions::flattenSingleValue($stdDev);
$cumulative = (bool) Functions::flattenSingleValue($cumulative);

if ((is_numeric($value)) && (is_numeric($mean)) && (is_numeric($stdDev))) {
if (($value <= 0) || ($stdDev <= 0)) {
return Functions::NAN();
}

if ($cumulative === true) {
return self::NORMSDIST2((log($value) - $mean) / $stdDev, true);
}

return (1 / (sqrt(2 * M_PI) * $stdDev * $value)) *
exp(0 - ((log($value) - $mean) ** 2 / (2 * $stdDev ** 2)));
}

return Functions::VALUE();
}

/**
* MAX.
*
Expand Down Expand Up @@ -2623,10 +2756,36 @@ public static function NORMINV($probability, $mean, $stdDev)
public static function NORMSDIST($value)
{
$value = Functions::flattenSingleValue($value);
if (!is_numeric($value)) {
return Functions::VALUE();
}

return self::NORMDIST($value, 0, 1, true);
}

/**
* NORM.S.DIST.
*
* Returns the standard normal cumulative distribution function. The distribution has
* a mean of 0 (zero) and a standard deviation of one. Use this function in place of a
* table of standard normal curve areas.
*
* @param float $value
* @param bool $cumulative
*
* @return float|string The result, or a string containing an error
*/
public static function NORMSDIST2($value, $cumulative)
{
$value = Functions::flattenSingleValue($value);
if (!is_numeric($value)) {
return Functions::VALUE();
}
$cumulative = (bool) Functions::flattenSingleValue($cumulative);

return self::NORMDIST($value, 0, 1, $cumulative);
}

/**
* NORMSINV.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<?php

namespace PhpOffice\PhpSpreadsheetTests\Calculation\Functions\Statistical;

use PhpOffice\PhpSpreadsheet\Calculation\Statistical;
use PHPUnit\Framework\TestCase;

class FDist2Test extends TestCase
{
/**
* @dataProvider providerFDIST2
*
* @param mixed $expectedResult
*/
public function testFDIST2($expectedResult, ...$args): void
{
$result = Statistical::FDIST2(...$args);
self::assertEqualsWithDelta($expectedResult, $result, 1E-12);
}

public function providerFDIST2(): array
{
return require 'tests/data/Calculation/Statistical/FDIST2.php';
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?php

namespace PhpOffice\PhpSpreadsheetTests\Calculation\Functions\Statistical;

use PhpOffice\PhpSpreadsheet\Calculation\Statistical;
use PHPUnit\Framework\TestCase;

class GammaTest extends TestCase
{
/**
* @dataProvider providerGAMMA
*
* @param mixed $expectedResult
* @param mixed $testValue
*/
public function testGAMMA($expectedResult, $testValue): void
{
$result = Statistical::GAMMAFunction($testValue);
self::assertEqualsWithDelta($expectedResult, $result, 1E-12);
}

public function providerGAMMA(): array
{
return require 'tests/data/Calculation/Statistical/GAMMA.php';
}
}
Loading

0 comments on commit 57213de

Please sign in to comment.