From ec2531411da6ea57c5d1aa5ec2c7179ee6f58bc4 Mon Sep 17 00:00:00 2001 From: Mark Baker Date: Sat, 27 Mar 2021 13:29:58 +0100 Subject: [PATCH] Start implementing Newton-Raphson for the inverse of Statistical Distributions (#1958) * Start implementing Newton-Raphson for the inverse of Statistical Distributions, starting with the two-tailed Student-T * Additional unit tests and validations * Use the new Newton Raphson class for calculating the Inverse of ChiSquared * Extract Weibull distribution, and provide unit tests --- .../Calculation/Calculation.php | 10 +- .../Calculation/Statistical.php | 129 +++--------------- .../Statistical/Distributions/ChiSquared.php | 52 +------ .../Statistical/Distributions/GammaBase.php | 7 +- .../Distributions/NewtonRaphson.php | 62 +++++++++ .../Statistical/Distributions/StudentT.php | 127 +++++++++++++++++ .../Statistical/Distributions/Weibull.php | 51 +++++++ .../Functions/Statistical/TDistTest.php | 28 ++++ .../Functions/Statistical/TinvTest.php | 27 ++++ .../Functions/Statistical/WeibullTest.php | 29 ++++ tests/data/Calculation/Statistical/TDIST.php | 56 ++++++++ tests/data/Calculation/Statistical/TINV.php | 32 +++++ .../data/Calculation/Statistical/WEIBULL.php | 48 +++++++ 13 files changed, 493 insertions(+), 165 deletions(-) create mode 100644 src/PhpSpreadsheet/Calculation/Statistical/Distributions/NewtonRaphson.php create mode 100644 src/PhpSpreadsheet/Calculation/Statistical/Distributions/StudentT.php create mode 100644 src/PhpSpreadsheet/Calculation/Statistical/Distributions/Weibull.php create mode 100644 tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/TDistTest.php create mode 100644 tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/TinvTest.php create mode 100644 tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/WeibullTest.php create mode 100644 tests/data/Calculation/Statistical/TDIST.php create mode 100644 tests/data/Calculation/Statistical/TINV.php create mode 100644 tests/data/Calculation/Statistical/WEIBULL.php diff --git a/src/PhpSpreadsheet/Calculation/Calculation.php b/src/PhpSpreadsheet/Calculation/Calculation.php index e1ccb74b..6e98fcb0 100644 --- a/src/PhpSpreadsheet/Calculation/Calculation.php +++ b/src/PhpSpreadsheet/Calculation/Calculation.php @@ -2391,7 +2391,7 @@ class Calculation ], 'TDIST' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Statistical::class, 'TDIST'], + 'functionCall' => [Statistical\Distributions\StudentT::class, 'distribution'], 'argumentCount' => '3', ], 'T.DIST' => [ @@ -2431,12 +2431,12 @@ class Calculation ], 'TINV' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Statistical::class, 'TINV'], + 'functionCall' => [Statistical\Distributions\StudentT::class, 'inverse'], 'argumentCount' => '2', ], 'T.INV' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Statistical::class, 'TINV'], + 'functionCall' => [Statistical\Distributions\StudentT::class, 'inverse'], 'argumentCount' => '2', ], 'T.INV.2T' => [ @@ -2581,12 +2581,12 @@ class Calculation ], 'WEIBULL' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Statistical::class, 'WEIBULL'], + 'functionCall' => [Statistical\Distributions\Weibull::class, 'distribution'], 'argumentCount' => '4', ], 'WEIBULL.DIST' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Statistical::class, 'WEIBULL'], + 'functionCall' => [Statistical\Distributions\Weibull::class, 'distribution'], 'argumentCount' => '4', ], 'WORKDAY' => [ diff --git a/src/PhpSpreadsheet/Calculation/Statistical.php b/src/PhpSpreadsheet/Calculation/Statistical.php index 695d7cbd..5f557431 100644 --- a/src/PhpSpreadsheet/Calculation/Statistical.php +++ b/src/PhpSpreadsheet/Calculation/Statistical.php @@ -2140,6 +2140,11 @@ class Statistical * * Returns the probability of Student's T distribution. * + * @Deprecated 1.18.0 + * + * @see Statistical\Distributions\StudentT::distribution() + * Use the distribution() method in the Statistical\Distributions\StudentT class instead + * * @param float $value Value for the function * @param float $degrees degrees of freedom * @param float $tails number of tails (1 or 2) @@ -2148,55 +2153,7 @@ class Statistical */ public static function TDIST($value, $degrees, $tails) { - $value = Functions::flattenSingleValue($value); - $degrees = floor(Functions::flattenSingleValue($degrees)); - $tails = floor(Functions::flattenSingleValue($tails)); - - if ((is_numeric($value)) && (is_numeric($degrees)) && (is_numeric($tails))) { - if (($value < 0) || ($degrees < 1) || ($tails < 1) || ($tails > 2)) { - return Functions::NAN(); - } - // tdist, which finds the probability that corresponds to a given value - // of t with k degrees of freedom. This algorithm is translated from a - // pascal function on p81 of "Statistical Computing in Pascal" by D - // Cooke, A H Craven & G M Clark (1985: Edward Arnold (Pubs.) Ltd: - // London). The above Pascal algorithm is itself a translation of the - // fortran algoritm "AS 3" by B E Cooper of the Atlas Computer - // Laboratory as reported in (among other places) "Applied Statistics - // Algorithms", editied by P Griffiths and I D Hill (1985; Ellis - // Horwood Ltd.; W. Sussex, England). - $tterm = $degrees; - $ttheta = atan2($value, sqrt($tterm)); - $tc = cos($ttheta); - $ts = sin($ttheta); - - if (($degrees % 2) == 1) { - $ti = 3; - $tterm = $tc; - } else { - $ti = 2; - $tterm = 1; - } - - $tsum = $tterm; - while ($ti < $degrees) { - $tterm *= $tc * $tc * ($ti - 1) / $ti; - $tsum += $tterm; - $ti += 2; - } - $tsum *= $ts; - if (($degrees % 2) == 1) { - $tsum = Functions::M_2DIVPI * ($tsum + $ttheta); - } - $tValue = 0.5 * (1 + $tsum); - if ($tails == 1) { - return 1 - abs($tValue); - } - - return 1 - abs((1 - $tValue) - $tValue); - } - - return Functions::VALUE(); + return Statistical\Distributions\StudentT::distribution($value, $degrees, $tails); } /** @@ -2204,6 +2161,11 @@ class Statistical * * Returns the one-tailed probability of the chi-squared distribution. * + * @Deprecated 1.18.0 + * + * @see Statistical\Distributions\StudentT::inverse() + * Use the inverse() method in the Statistical\Distributions\StudentT class instead + * * @param float $probability Probability for the function * @param float $degrees degrees of freedom * @@ -2211,50 +2173,7 @@ class Statistical */ public static function TINV($probability, $degrees) { - $probability = Functions::flattenSingleValue($probability); - $degrees = floor(Functions::flattenSingleValue($degrees)); - - if ((is_numeric($probability)) && (is_numeric($degrees))) { - $xLo = 100; - $xHi = 0; - - $x = $xNew = 1; - $dx = 1; - $i = 0; - - while ((abs($dx) > Functions::PRECISION) && ($i++ < self::MAX_ITERATIONS)) { - // Apply Newton-Raphson step - $result = self::TDIST($x, $degrees, 2); - $error = $result - $probability; - if ($error == 0.0) { - $dx = 0; - } elseif ($error < 0.0) { - $xLo = $x; - } else { - $xHi = $x; - } - // Avoid division by zero - if ($result != 0.0) { - $dx = $error / $result; - $xNew = $x - $dx; - } - // If the NR fails to converge (which for example may be the - // case if the initial guess is too rough) we apply a bisection - // step to determine a more narrow interval around the root. - if (($xNew < $xLo) || ($xNew > $xHi) || ($result == 0.0)) { - $xNew = ($xLo + $xHi) / 2; - $dx = $xNew - $x; - } - $x = $xNew; - } - if ($i == self::MAX_ITERATIONS) { - return Functions::NA(); - } - - return round($x, 12); - } - - return Functions::VALUE(); + return Statistical\Distributions\StudentT::inverse($probability, $degrees); } /** @@ -2421,6 +2340,11 @@ class Statistical * Returns the Weibull distribution. Use this distribution in reliability * analysis, such as calculating a device's mean time to failure. * + * @Deprecated 1.18.0 + * + * @see Statistical\Distributions\Weibull::distribution() + * Use the distribution() method in the Statistical\Distributions\Weibull class instead + * * @param float $value * @param float $alpha Alpha Parameter * @param float $beta Beta Parameter @@ -2430,24 +2354,7 @@ class Statistical */ public static function WEIBULL($value, $alpha, $beta, $cumulative) { - $value = Functions::flattenSingleValue($value); - $alpha = Functions::flattenSingleValue($alpha); - $beta = Functions::flattenSingleValue($beta); - - if ((is_numeric($value)) && (is_numeric($alpha)) && (is_numeric($beta))) { - if (($value < 0) || ($alpha <= 0) || ($beta <= 0)) { - return Functions::NAN(); - } - if ((is_numeric($cumulative)) || (is_bool($cumulative))) { - if ($cumulative) { - return 1 - exp(0 - ($value / $beta) ** $alpha); - } - - return ($alpha / $beta ** $alpha) * $value ** ($alpha - 1) * exp(0 - ($value / $beta) ** $alpha); - } - } - - return Functions::VALUE(); + return Statistical\Distributions\Weibull::distribution($value, $alpha, $beta, $cumulative); } /** diff --git a/src/PhpSpreadsheet/Calculation/Statistical/Distributions/ChiSquared.php b/src/PhpSpreadsheet/Calculation/Statistical/Distributions/ChiSquared.php index 2d5e4496..636189b5 100644 --- a/src/PhpSpreadsheet/Calculation/Statistical/Distributions/ChiSquared.php +++ b/src/PhpSpreadsheet/Calculation/Statistical/Distributions/ChiSquared.php @@ -73,55 +73,13 @@ class ChiSquared return Functions::NAN(); } - return self::calculateInverse($degrees, $probability); - } - - /** - * @return float|string - */ - protected static function calculateInverse(int $degrees, float $probability) - { - $xLo = 100; - $xHi = 0; - - $x = $xNew = 1; - $dx = 1; - $i = 0; - - while ((abs($dx) > Functions::PRECISION) && (++$i <= self::MAX_ITERATIONS)) { - // Apply Newton-Raphson step - $result = 1 - (Gamma::incompleteGamma($degrees / 2, $x / 2) + $callback = function ($value) use ($degrees) { + return 1 - (Gamma::incompleteGamma($degrees / 2, $value / 2) / Gamma::gammaValue($degrees / 2)); - $error = $result - $probability; + }; - if ($error == 0.0) { - $dx = 0; - } elseif ($error < 0.0) { - $xLo = $x; - } else { - $xHi = $x; - } + $newtonRaphson = new NewtonRaphson($callback); - // Avoid division by zero - if ($result != 0.0) { - $dx = $error / $result; - $xNew = $x - $dx; - } - - // If the NR fails to converge (which for example may be the - // case if the initial guess is too rough) we apply a bisection - // step to determine a more narrow interval around the root. - if (($xNew < $xLo) || ($xNew > $xHi) || ($result == 0.0)) { - $xNew = ($xLo + $xHi) / 2; - $dx = $xNew - $x; - } - $x = $xNew; - } - - if ($i === self::MAX_ITERATIONS) { - return Functions::NA(); - } - - return $x; + return $newtonRaphson->execute($probability); } } diff --git a/src/PhpSpreadsheet/Calculation/Statistical/Distributions/GammaBase.php b/src/PhpSpreadsheet/Calculation/Statistical/Distributions/GammaBase.php index ae951af3..3f76787d 100644 --- a/src/PhpSpreadsheet/Calculation/Statistical/Distributions/GammaBase.php +++ b/src/PhpSpreadsheet/Calculation/Statistical/Distributions/GammaBase.php @@ -36,8 +36,11 @@ abstract class GammaBase while ((abs($dx) > Functions::PRECISION) && (++$i <= self::MAX_ITERATIONS)) { // Apply Newton-Raphson step - $error = self::calculateDistribution($x, $alpha, $beta, true) - $probability; - if ($error < 0.0) { + $result = self::calculateDistribution($x, $alpha, $beta, true); + $error = $result - $probability; + if ($error == 0.0) { + $dx = 0; + } elseif ($error < 0.0) { $xLo = $x; } else { $xHi = $x; diff --git a/src/PhpSpreadsheet/Calculation/Statistical/Distributions/NewtonRaphson.php b/src/PhpSpreadsheet/Calculation/Statistical/Distributions/NewtonRaphson.php new file mode 100644 index 00000000..298cdfaf --- /dev/null +++ b/src/PhpSpreadsheet/Calculation/Statistical/Distributions/NewtonRaphson.php @@ -0,0 +1,62 @@ +callback = $callback; + } + + public function execute($probability) + { + $xLo = 100; + $xHi = 0; + + $x = $xNew = 1; + $dx = 1; + $i = 0; + + while ((abs($dx) > Functions::PRECISION) && ($i++ < self::MAX_ITERATIONS)) { + // Apply Newton-Raphson step + $result = call_user_func($this->callback, $x); + $error = $result - $probability; + + if ($error == 0.0) { + $dx = 0; + } elseif ($error < 0.0) { + $xLo = $x; + } else { + $xHi = $x; + } + + // Avoid division by zero + if ($result != 0.0) { + $dx = $error / $result; + $xNew = $x - $dx; + } + + // If the NR fails to converge (which for example may be the + // case if the initial guess is too rough) we apply a bisection + // step to determine a more narrow interval around the root. + if (($xNew < $xLo) || ($xNew > $xHi) || ($result == 0.0)) { + $xNew = ($xLo + $xHi) / 2; + $dx = $xNew - $x; + } + $x = $xNew; + } + + if ($i == self::MAX_ITERATIONS) { + return Functions::NA(); + } + + return $x; + } +} diff --git a/src/PhpSpreadsheet/Calculation/Statistical/Distributions/StudentT.php b/src/PhpSpreadsheet/Calculation/Statistical/Distributions/StudentT.php new file mode 100644 index 00000000..a6d23c6b --- /dev/null +++ b/src/PhpSpreadsheet/Calculation/Statistical/Distributions/StudentT.php @@ -0,0 +1,127 @@ +getMessage(); + } + + if (($value < 0) || ($degrees < 1) || ($tails < 1) || ($tails > 2)) { + return Functions::NAN(); + } + + return self::calculateDistribution($value, $degrees, $tails); + } + + /** + * TINV. + * + * Returns the one-tailed probability of the chi-squared distribution. + * + * @param mixed (float) $probability Probability for the function + * @param mixed (float) $degrees degrees of freedom + * + * @return float|string The result, or a string containing an error + */ + public static function inverse($probability, $degrees) + { + $probability = Functions::flattenSingleValue($probability); + $degrees = Functions::flattenSingleValue($degrees); + + try { + $probability = self::validateFloat($probability); + $degrees = self::validateInt($degrees); + } catch (Exception $e) { + return $e->getMessage(); + } + + if ($probability < 0.0 || $probability > 1.0 || $degrees <= 0) { + return Functions::NAN(); + } + + $callback = function ($value) use ($degrees) { + return self::distribution($value, $degrees, 2); + }; + + $newtonRaphson = new NewtonRaphson($callback); + + return $newtonRaphson->execute($probability); + } + + /** + * @return float|int + */ + private static function calculateDistribution(float $value, int $degrees, int $tails) + { + // tdist, which finds the probability that corresponds to a given value + // of t with k degrees of freedom. This algorithm is translated from a + // pascal function on p81 of "Statistical Computing in Pascal" by D + // Cooke, A H Craven & G M Clark (1985: Edward Arnold (Pubs.) Ltd: + // London). The above Pascal algorithm is itself a translation of the + // fortran algoritm "AS 3" by B E Cooper of the Atlas Computer + // Laboratory as reported in (among other places) "Applied Statistics + // Algorithms", editied by P Griffiths and I D Hill (1985; Ellis + // Horwood Ltd.; W. Sussex, England). + $tterm = $degrees; + $ttheta = atan2($value, sqrt($tterm)); + $tc = cos($ttheta); + $ts = sin($ttheta); + + if (($degrees % 2) === 1) { + $ti = 3; + $tterm = $tc; + } else { + $ti = 2; + $tterm = 1; + } + + $tsum = $tterm; + while ($ti < $degrees) { + $tterm *= $tc * $tc * ($ti - 1) / $ti; + $tsum += $tterm; + $ti += 2; + } + + $tsum *= $ts; + if (($degrees % 2) == 1) { + $tsum = Functions::M_2DIVPI * ($tsum + $ttheta); + } + + $tValue = 0.5 * (1 + $tsum); + if ($tails == 1) { + return 1 - abs($tValue); + } + + return 1 - abs((1 - $tValue) - $tValue); + } +} diff --git a/src/PhpSpreadsheet/Calculation/Statistical/Distributions/Weibull.php b/src/PhpSpreadsheet/Calculation/Statistical/Distributions/Weibull.php new file mode 100644 index 00000000..2064c2e2 --- /dev/null +++ b/src/PhpSpreadsheet/Calculation/Statistical/Distributions/Weibull.php @@ -0,0 +1,51 @@ +getMessage(); + } + + if (($value < 0) || ($alpha <= 0) || ($beta <= 0)) { + return Functions::NAN(); + } + + if ($cumulative) { + return 1 - exp(0 - ($value / $beta) ** $alpha); + } + + return ($alpha / $beta ** $alpha) * $value ** ($alpha - 1) * exp(0 - ($value / $beta) ** $alpha); + } +} diff --git a/tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/TDistTest.php b/tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/TDistTest.php new file mode 100644 index 00000000..a6a2c97e --- /dev/null +++ b/tests/PhpSpreadsheetTests/Calculation/Functions/Statistical/TDistTest.php @@ -0,0 +1,28 @@ +