From 57213deb64f11aac895573cc8d092fac0b298cb0 Mon Sep 17 00:00:00 2001 From: Mark Baker Date: Sat, 25 Jul 2020 12:44:51 +0200 Subject: [PATCH] Implementation of MS Excel's LOGNORM.DIST(), NORM.S.DIST(), F.DIST(), GAUSS() and GAMMA() functions (#1588) * `GAUSS()` and `GAMMA()`, `NORM.S.DIST()`, `LOGNORM.DIST()` and `F.DIST()` function implementations, and further unit tests for a number of the statistical functions Co-authored-by: Adrien Crivelli --- CHANGELOG.md | 2 +- .../Calculation/Calculation.php | 36 ++-- .../Calculation/Statistical.php | 163 +++++++++++++++++- .../Functions/Statistical/FDist2Test.php | 25 +++ .../Functions/Statistical/GammaTest.php | 26 +++ .../Functions/Statistical/GaussTest.php | 26 +++ .../Statistical/LogNormDist2Test.php | 25 +++ .../Functions/Statistical/LogNormDistTest.php | 25 +++ .../Functions/Statistical/NormDistTest.php | 25 +++ .../Functions/Statistical/NormInvTest.php | 25 +++ .../Functions/Statistical/NormSDist2Test.php | 25 +++ .../Functions/Statistical/NormSDistTest.php | 26 +++ .../Functions/Statistical/NormSInvTest.php | 26 +++ .../Functions/Statistical/StandardizeTest.php | 25 +++ tests/data/Calculation/Statistical/FDIST2.php | 17 ++ tests/data/Calculation/Statistical/GAMMA.php | 13 ++ tests/data/Calculation/Statistical/GAUSS.php | 10 ++ .../Calculation/Statistical/LOGNORMDIST.php | 9 + .../Calculation/Statistical/LOGNORMDIST2.php | 23 +++ .../data/Calculation/Statistical/NORMDIST.php | 11 ++ .../data/Calculation/Statistical/NORMINV.php | 11 ++ .../Calculation/Statistical/NORMSDIST.php | 12 ++ .../Calculation/Statistical/NORMSDIST2.php | 17 ++ .../data/Calculation/Statistical/NORMSINV.php | 8 + .../Calculation/Statistical/STANDARDIZE.php | 12 ++ 25 files changed, 602 insertions(+), 21 deletions(-) create mode 100644 tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/FDist2Test.php create mode 100644 tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/GammaTest.php create mode 100644 tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/GaussTest.php create mode 100644 tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/LogNormDist2Test.php create mode 100644 tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/LogNormDistTest.php create mode 100644 tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/NormDistTest.php create mode 100644 tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/NormInvTest.php create mode 100644 tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/NormSDist2Test.php create mode 100644 tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/NormSDistTest.php create mode 100644 tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/NormSInvTest.php create mode 100644 tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/StandardizeTest.php create mode 100644 tests/data/Calculation/Statistical/FDIST2.php create mode 100644 tests/data/Calculation/Statistical/GAMMA.php create mode 100644 tests/data/Calculation/Statistical/GAUSS.php create mode 100644 tests/data/Calculation/Statistical/LOGNORMDIST.php create mode 100644 tests/data/Calculation/Statistical/LOGNORMDIST2.php create mode 100644 tests/data/Calculation/Statistical/NORMDIST.php create mode 100644 tests/data/Calculation/Statistical/NORMINV.php create mode 100644 tests/data/Calculation/Statistical/NORMSDIST.php create mode 100644 tests/data/Calculation/Statistical/NORMSDIST2.php create mode 100644 tests/data/Calculation/Statistical/NORMSINV.php create mode 100644 tests/data/Calculation/Statistical/STANDARDIZE.php diff --git a/CHANGELOG.md b/CHANGELOG.md index f6a0e9ec29..0f0d4c91b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). ### Added -- Nothing. +- Implementation of the Excel `LOGNORM.DIST()`, `NORM.S.DIST()`, `GAMMA()` and `GAUSS()` functions. [#1588](https://github.com/PHPOffice/PhpSpreadsheet/pull/1588) ### Changed diff --git a/src/PhpSpreadsheet/Calculation/Calculation.php b/src/PhpSpreadsheet/Calculation/Calculation.php index af6574bf2f..57506ab941 100644 --- a/src/PhpSpreadsheet/Calculation/Calculation.php +++ b/src/PhpSpreadsheet/Calculation/Calculation.php @@ -996,7 +996,7 @@ class Calculation ], 'F.DIST' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Functions::class, 'DUMMY'], + 'functionCall' => [Statistical::class, 'FDIST2'], 'argumentCount' => '4', ], 'F.DIST.RT' => [ @@ -1133,7 +1133,7 @@ class Calculation ], 'GAMMA' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Functions::class, 'DUMMY'], + 'functionCall' => [Statistical::class, 'GAMMAFunction'], 'argumentCount' => '1', ], 'GAMMADIST' => [ @@ -1168,7 +1168,7 @@ class Calculation ], 'GAUSS' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Functions::class, 'DUMMY'], + 'functionCall' => [Statistical::class, 'GAUSS'], 'argumentCount' => '1', ], 'GCD' => [ @@ -1577,7 +1577,7 @@ class Calculation ], 'LOGNORM.DIST' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Functions::class, 'DUMMY'], + 'functionCall' => [Statistical::class, 'LOGNORMDIST2'], 'argumentCount' => '4', ], 'LOGNORM.INV' => [ @@ -1782,7 +1782,7 @@ class Calculation ], 'NORM.S.DIST' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Functions::class, 'DUMMY'], + 'functionCall' => [Statistical::class, 'NORMSDIST2'], 'argumentCount' => '1,2', ], 'NORMSINV' => [ @@ -3131,8 +3131,8 @@ public static function wrapResult($value) } // Return strings wrapped in quotes return self::FORMULA_STRING_QUOTE . $value . self::FORMULA_STRING_QUOTE; - // Convert numeric errors to NaN error } elseif ((is_float($value)) && ((is_nan($value)) || (is_infinite($value)))) { + // Convert numeric errors to NaN error return Functions::NAN(); } @@ -3774,22 +3774,22 @@ private function _parseFormula($formula, ?Cell $pCell = null) $pCellParent = ($pCell !== null) ? $pCell->getWorksheet() : null; $regexpMatchString = '/^(' . self::CALCULATION_REGEXP_FUNCTION . - '|' . self::CALCULATION_REGEXP_CELLREF . - '|' . self::CALCULATION_REGEXP_NUMBER . - '|' . self::CALCULATION_REGEXP_STRING . - '|' . self::CALCULATION_REGEXP_OPENBRACE . - '|' . self::CALCULATION_REGEXP_NAMEDRANGE . - '|' . self::CALCULATION_REGEXP_ERROR . - ')/sui'; + '|' . self::CALCULATION_REGEXP_CELLREF . + '|' . self::CALCULATION_REGEXP_NUMBER . + '|' . self::CALCULATION_REGEXP_STRING . + '|' . self::CALCULATION_REGEXP_OPENBRACE . + '|' . self::CALCULATION_REGEXP_NAMEDRANGE . + '|' . self::CALCULATION_REGEXP_ERROR . + ')/sui'; // Start with initialisation $index = 0; $stack = new Stack(); $output = []; $expectingOperator = false; // We use this test in syntax-checking the expression to determine when a - // - is a negation or + is a positive operator rather than an operation + // - is a negation or + is a positive operator rather than an operation $expectingOperand = false; // We use this test in syntax-checking the expression to determine whether an operand - // should be null in a function call + // should be null in a function call // IF branch pruning // currently pending storeKey (last item of the storeKeysStack @@ -4172,7 +4172,7 @@ private function _parseFormula($formula, ?Cell $pCell = null) ((preg_match('/^' . self::CALCULATION_REGEXP_CELLREF . '.*/Ui', substr($formula, $index), $match)) && ($output[count($output) - 1]['type'] == 'Cell Reference') || (preg_match('/^' . self::CALCULATION_REGEXP_NAMEDRANGE . '.*/miu', substr($formula, $index), $match)) && - ($output[count($output) - 1]['type'] == 'Named Range' || $output[count($output) - 1]['type'] == 'Value') + ($output[count($output) - 1]['type'] == 'Named Range' || $output[count($output) - 1]['type'] == 'Value') )) { while ($stack->count() > 0 && ($o2 = $stack->last()) && @@ -4951,7 +4951,7 @@ private function executeNumericBinaryOperation($operand1, $operand2, $operation, } else { if ((Functions::getCompatibilityMode() != Functions::COMPATIBILITY_OPENOFFICE) && ((is_string($operand1) && !is_numeric($operand1) && strlen($operand1) > 0) || - (is_string($operand2) && !is_numeric($operand2) && strlen($operand2) > 0))) { + (is_string($operand2) && !is_numeric($operand2) && strlen($operand2) > 0))) { $result = Functions::VALUE(); } else { // If we're dealing with non-matrix operations, execute the necessary operation @@ -4980,7 +4980,7 @@ private function executeNumericBinaryOperation($operand1, $operand2, $operation, return false; } - $result = $operand1 / $operand2; + $result = $operand1 / $operand2; break; // Power diff --git a/src/PhpSpreadsheet/Calculation/Statistical.php b/src/PhpSpreadsheet/Calculation/Statistical.php index 100eb2fa0b..b44e6c6f21 100644 --- a/src/PhpSpreadsheet/Calculation/Statistical.php +++ b/src/PhpSpreadsheet/Calculation/Statistical.php @@ -779,7 +779,7 @@ public static function BETADIST($value, $alpha, $beta, $rMin = 0, $rMax = 1) /** * BETAINV. * - * Returns the inverse of the beta distribution. + * Returns the inverse of the Beta distribution. * * @param float $probability Probability at which you want to evaluate the distribution * @param float $alpha Parameter to the distribution @@ -1475,6 +1475,62 @@ public static function EXPONDIST($value, $lambda, $cumulative) return Functions::VALUE(); } + private static function betaFunction($a, $b) + { + return (self::gamma($a) * self::gamma($b)) / self::gamma($a + $b); + } + + private static function regularizedIncompleteBeta($value, $a, $b) + { + return self::incompleteBeta($value, $a, $b) / self::betaFunction($a, $b); + } + + /** + * F.DIST. + * + * Returns the F probability distribution. + * You can use this function to determine whether two data sets have different degrees of diversity. + * For example, you can examine the test scores of men and women entering high school, and determine + * if the variability in the females is different from that found in the males. + * + * @param float $value Value of the function + * @param int $u The numerator degrees of freedom + * @param int $v The denominator degrees of freedom + * @param bool $cumulative If cumulative is TRUE, F.DIST returns the cumulative distribution function; + * if FALSE, it returns the probability density function. + * + * @return float|string + */ + public static function FDIST2($value, $u, $v, $cumulative) + { + $value = Functions::flattenSingleValue($value); + $u = Functions::flattenSingleValue($u); + $v = Functions::flattenSingleValue($v); + $cumulative = Functions::flattenSingleValue($cumulative); + + if (is_numeric($value) && is_numeric($u) && is_numeric($v)) { + if ($value < 0 || $u < 1 || $v < 1) { + return Functions::NAN(); + } + + $cumulative = (bool) $cumulative; + $u = (int) $u; + $v = (int) $v; + + if ($cumulative) { + $adjustedValue = ($u * $value) / ($u * $value + $v); + + return self::incompleteBeta($adjustedValue, $u / 2, $v / 2); + } + + return (self::gamma(($v + $u) / 2) / (self::gamma($u / 2) * self::gamma($v / 2))) * + (($u / $v) ** ($u / 2)) * + (($value ** (($u - 2) / 2)) / ((1 + ($u / $v) * $value) ** (($u + $v) / 2))); + } + + return Functions::VALUE(); + } + /** * FISHER. * @@ -1556,6 +1612,27 @@ public static function FORECAST($xValue, $yValues, $xValues) return $bestFitLinear->getValueOfYForX($xValue); } + /** + * GAMMA. + * + * Return the gamma function value. + * + * @param float $value + * + * @return float|string The result, or a string containing an error + */ + public static function GAMMAFunction($value) + { + $value = Functions::flattenSingleValue($value); + if (!is_numeric($value)) { + return Functions::VALUE(); + } elseif ((((int) $value) == ((float) $value)) && $value <= 0.0) { + return Functions::NAN(); + } + + return self::gamma($value); + } + /** * GAMMADIST. * @@ -1593,7 +1670,7 @@ public static function GAMMADIST($value, $a, $b, $cumulative) /** * GAMMAINV. * - * Returns the inverse of the beta distribution. + * Returns the inverse of the Gamma distribution. * * @param float $probability Probability at which you want to evaluate the distribution * @param float $alpha Parameter to the distribution @@ -1677,6 +1754,26 @@ public static function GAMMALN($value) return Functions::VALUE(); } + /** + * GAUSS. + * + * Calculates the probability that a member of a standard normal population will fall between + * the mean and z standard deviations from the mean. + * + * @param float $value + * + * @return float|string The result, or a string containing an error + */ + public static function GAUSS($value) + { + $value = Functions::flattenSingleValue($value); + if (!is_numeric($value)) { + return Functions::VALUE(); + } + + return self::NORMDIST($value, 0, 1, true) - 0.5; + } + /** * GEOMEAN. * @@ -2117,6 +2214,42 @@ public static function LOGNORMDIST($value, $mean, $stdDev) return Functions::VALUE(); } + /** + * LOGNORM.DIST. + * + * Returns the lognormal distribution of x, where ln(x) is normally distributed + * with parameters mean and standard_dev. + * + * @param float $value + * @param float $mean + * @param float $stdDev + * @param bool $cumulative + * + * @return float|string The result, or a string containing an error + */ + public static function LOGNORMDIST2($value, $mean, $stdDev, $cumulative = false) + { + $value = Functions::flattenSingleValue($value); + $mean = Functions::flattenSingleValue($mean); + $stdDev = Functions::flattenSingleValue($stdDev); + $cumulative = (bool) Functions::flattenSingleValue($cumulative); + + if ((is_numeric($value)) && (is_numeric($mean)) && (is_numeric($stdDev))) { + if (($value <= 0) || ($stdDev <= 0)) { + return Functions::NAN(); + } + + if ($cumulative === true) { + return self::NORMSDIST2((log($value) - $mean) / $stdDev, true); + } + + return (1 / (sqrt(2 * M_PI) * $stdDev * $value)) * + exp(0 - ((log($value) - $mean) ** 2 / (2 * $stdDev ** 2))); + } + + return Functions::VALUE(); + } + /** * MAX. * @@ -2623,10 +2756,36 @@ public static function NORMINV($probability, $mean, $stdDev) public static function NORMSDIST($value) { $value = Functions::flattenSingleValue($value); + if (!is_numeric($value)) { + return Functions::VALUE(); + } return self::NORMDIST($value, 0, 1, true); } + /** + * NORM.S.DIST. + * + * Returns the standard normal cumulative distribution function. The distribution has + * a mean of 0 (zero) and a standard deviation of one. Use this function in place of a + * table of standard normal curve areas. + * + * @param float $value + * @param bool $cumulative + * + * @return float|string The result, or a string containing an error + */ + public static function NORMSDIST2($value, $cumulative) + { + $value = Functions::flattenSingleValue($value); + if (!is_numeric($value)) { + return Functions::VALUE(); + } + $cumulative = (bool) Functions::flattenSingleValue($cumulative); + + return self::NORMDIST($value, 0, 1, $cumulative); + } + /** * NORMSINV. * diff --git a/tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/FDist2Test.php b/tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/FDist2Test.php new file mode 100644 index 0000000000..a6e3442913 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/FDist2Test.php @@ -0,0 +1,25 @@ +