Start implementing Newton-Raphson for the inverse of Statistical Distributions (#1958)

* Start implementing Newton-Raphson for the inverse of Statistical Distributions, starting with the two-tailed Student-T
* Additional unit tests and validations
* Use the new Newton Raphson class for calculating the Inverse of ChiSquared
* Extract Weibull distribution, and provide unit tests
This commit is contained in:
Mark Baker 2021-03-27 13:29:58 +01:00 committed by GitHub
parent c699d144e2
commit ec2531411d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 493 additions and 165 deletions

View File

@ -2391,7 +2391,7 @@ class Calculation
],
'TDIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'TDIST'],
'functionCall' => [Statistical\Distributions\StudentT::class, 'distribution'],
'argumentCount' => '3',
],
'T.DIST' => [
@ -2431,12 +2431,12 @@ class Calculation
],
'TINV' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'TINV'],
'functionCall' => [Statistical\Distributions\StudentT::class, 'inverse'],
'argumentCount' => '2',
],
'T.INV' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'TINV'],
'functionCall' => [Statistical\Distributions\StudentT::class, 'inverse'],
'argumentCount' => '2',
],
'T.INV.2T' => [
@ -2581,12 +2581,12 @@ class Calculation
],
'WEIBULL' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'WEIBULL'],
'functionCall' => [Statistical\Distributions\Weibull::class, 'distribution'],
'argumentCount' => '4',
],
'WEIBULL.DIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'WEIBULL'],
'functionCall' => [Statistical\Distributions\Weibull::class, 'distribution'],
'argumentCount' => '4',
],
'WORKDAY' => [

View File

@ -2140,6 +2140,11 @@ class Statistical
*
* Returns the probability of Student's T distribution.
*
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\StudentT::distribution()
* Use the distribution() method in the Statistical\Distributions\StudentT class instead
*
* @param float $value Value for the function
* @param float $degrees degrees of freedom
* @param float $tails number of tails (1 or 2)
@ -2148,55 +2153,7 @@ class Statistical
*/
public static function TDIST($value, $degrees, $tails)
{
$value = Functions::flattenSingleValue($value);
$degrees = floor(Functions::flattenSingleValue($degrees));
$tails = floor(Functions::flattenSingleValue($tails));
if ((is_numeric($value)) && (is_numeric($degrees)) && (is_numeric($tails))) {
if (($value < 0) || ($degrees < 1) || ($tails < 1) || ($tails > 2)) {
return Functions::NAN();
}
// tdist, which finds the probability that corresponds to a given value
// of t with k degrees of freedom. This algorithm is translated from a
// pascal function on p81 of "Statistical Computing in Pascal" by D
// Cooke, A H Craven & G M Clark (1985: Edward Arnold (Pubs.) Ltd:
// London). The above Pascal algorithm is itself a translation of the
// fortran algoritm "AS 3" by B E Cooper of the Atlas Computer
// Laboratory as reported in (among other places) "Applied Statistics
// Algorithms", editied by P Griffiths and I D Hill (1985; Ellis
// Horwood Ltd.; W. Sussex, England).
$tterm = $degrees;
$ttheta = atan2($value, sqrt($tterm));
$tc = cos($ttheta);
$ts = sin($ttheta);
if (($degrees % 2) == 1) {
$ti = 3;
$tterm = $tc;
} else {
$ti = 2;
$tterm = 1;
}
$tsum = $tterm;
while ($ti < $degrees) {
$tterm *= $tc * $tc * ($ti - 1) / $ti;
$tsum += $tterm;
$ti += 2;
}
$tsum *= $ts;
if (($degrees % 2) == 1) {
$tsum = Functions::M_2DIVPI * ($tsum + $ttheta);
}
$tValue = 0.5 * (1 + $tsum);
if ($tails == 1) {
return 1 - abs($tValue);
}
return 1 - abs((1 - $tValue) - $tValue);
}
return Functions::VALUE();
return Statistical\Distributions\StudentT::distribution($value, $degrees, $tails);
}
/**
@ -2204,6 +2161,11 @@ class Statistical
*
* Returns the one-tailed probability of the chi-squared distribution.
*
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\StudentT::inverse()
* Use the inverse() method in the Statistical\Distributions\StudentT class instead
*
* @param float $probability Probability for the function
* @param float $degrees degrees of freedom
*
@ -2211,50 +2173,7 @@ class Statistical
*/
public static function TINV($probability, $degrees)
{
$probability = Functions::flattenSingleValue($probability);
$degrees = floor(Functions::flattenSingleValue($degrees));
if ((is_numeric($probability)) && (is_numeric($degrees))) {
$xLo = 100;
$xHi = 0;
$x = $xNew = 1;
$dx = 1;
$i = 0;
while ((abs($dx) > Functions::PRECISION) && ($i++ < self::MAX_ITERATIONS)) {
// Apply Newton-Raphson step
$result = self::TDIST($x, $degrees, 2);
$error = $result - $probability;
if ($error == 0.0) {
$dx = 0;
} elseif ($error < 0.0) {
$xLo = $x;
} else {
$xHi = $x;
}
// Avoid division by zero
if ($result != 0.0) {
$dx = $error / $result;
$xNew = $x - $dx;
}
// If the NR fails to converge (which for example may be the
// case if the initial guess is too rough) we apply a bisection
// step to determine a more narrow interval around the root.
if (($xNew < $xLo) || ($xNew > $xHi) || ($result == 0.0)) {
$xNew = ($xLo + $xHi) / 2;
$dx = $xNew - $x;
}
$x = $xNew;
}
if ($i == self::MAX_ITERATIONS) {
return Functions::NA();
}
return round($x, 12);
}
return Functions::VALUE();
return Statistical\Distributions\StudentT::inverse($probability, $degrees);
}
/**
@ -2421,6 +2340,11 @@ class Statistical
* Returns the Weibull distribution. Use this distribution in reliability
* analysis, such as calculating a device's mean time to failure.
*
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\Weibull::distribution()
* Use the distribution() method in the Statistical\Distributions\Weibull class instead
*
* @param float $value
* @param float $alpha Alpha Parameter
* @param float $beta Beta Parameter
@ -2430,24 +2354,7 @@ class Statistical
*/
public static function WEIBULL($value, $alpha, $beta, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$alpha = Functions::flattenSingleValue($alpha);
$beta = Functions::flattenSingleValue($beta);
if ((is_numeric($value)) && (is_numeric($alpha)) && (is_numeric($beta))) {
if (($value < 0) || ($alpha <= 0) || ($beta <= 0)) {
return Functions::NAN();
}
if ((is_numeric($cumulative)) || (is_bool($cumulative))) {
if ($cumulative) {
return 1 - exp(0 - ($value / $beta) ** $alpha);
}
return ($alpha / $beta ** $alpha) * $value ** ($alpha - 1) * exp(0 - ($value / $beta) ** $alpha);
}
}
return Functions::VALUE();
return Statistical\Distributions\Weibull::distribution($value, $alpha, $beta, $cumulative);
}
/**

View File

@ -73,55 +73,13 @@ class ChiSquared
return Functions::NAN();
}
return self::calculateInverse($degrees, $probability);
}
/**
* @return float|string
*/
protected static function calculateInverse(int $degrees, float $probability)
{
$xLo = 100;
$xHi = 0;
$x = $xNew = 1;
$dx = 1;
$i = 0;
while ((abs($dx) > Functions::PRECISION) && (++$i <= self::MAX_ITERATIONS)) {
// Apply Newton-Raphson step
$result = 1 - (Gamma::incompleteGamma($degrees / 2, $x / 2)
$callback = function ($value) use ($degrees) {
return 1 - (Gamma::incompleteGamma($degrees / 2, $value / 2)
/ Gamma::gammaValue($degrees / 2));
$error = $result - $probability;
};
if ($error == 0.0) {
$dx = 0;
} elseif ($error < 0.0) {
$xLo = $x;
} else {
$xHi = $x;
}
$newtonRaphson = new NewtonRaphson($callback);
// Avoid division by zero
if ($result != 0.0) {
$dx = $error / $result;
$xNew = $x - $dx;
}
// If the NR fails to converge (which for example may be the
// case if the initial guess is too rough) we apply a bisection
// step to determine a more narrow interval around the root.
if (($xNew < $xLo) || ($xNew > $xHi) || ($result == 0.0)) {
$xNew = ($xLo + $xHi) / 2;
$dx = $xNew - $x;
}
$x = $xNew;
}
if ($i === self::MAX_ITERATIONS) {
return Functions::NA();
}
return $x;
return $newtonRaphson->execute($probability);
}
}

View File

@ -36,8 +36,11 @@ abstract class GammaBase
while ((abs($dx) > Functions::PRECISION) && (++$i <= self::MAX_ITERATIONS)) {
// Apply Newton-Raphson step
$error = self::calculateDistribution($x, $alpha, $beta, true) - $probability;
if ($error < 0.0) {
$result = self::calculateDistribution($x, $alpha, $beta, true);
$error = $result - $probability;
if ($error == 0.0) {
$dx = 0;
} elseif ($error < 0.0) {
$xLo = $x;
} else {
$xHi = $x;

View File

@ -0,0 +1,62 @@
<?php
namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical\Distributions;
use PhpOffice\PhpSpreadsheet\Calculation\Functions;
class NewtonRaphson
{
private const MAX_ITERATIONS = 256;
protected $callback;
public function __construct(callable $callback)
{
$this->callback = $callback;
}
public function execute($probability)
{
$xLo = 100;
$xHi = 0;
$x = $xNew = 1;
$dx = 1;
$i = 0;
while ((abs($dx) > Functions::PRECISION) && ($i++ < self::MAX_ITERATIONS)) {
// Apply Newton-Raphson step
$result = call_user_func($this->callback, $x);
$error = $result - $probability;
if ($error == 0.0) {
$dx = 0;
} elseif ($error < 0.0) {
$xLo = $x;
} else {
$xHi = $x;
}
// Avoid division by zero
if ($result != 0.0) {
$dx = $error / $result;
$xNew = $x - $dx;
}
// If the NR fails to converge (which for example may be the
// case if the initial guess is too rough) we apply a bisection
// step to determine a more narrow interval around the root.
if (($xNew < $xLo) || ($xNew > $xHi) || ($result == 0.0)) {
$xNew = ($xLo + $xHi) / 2;
$dx = $xNew - $x;
}
$x = $xNew;
}
if ($i == self::MAX_ITERATIONS) {
return Functions::NA();
}
return $x;
}
}

View File

@ -0,0 +1,127 @@
<?php
namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical\Distributions;
use PhpOffice\PhpSpreadsheet\Calculation\Exception;
use PhpOffice\PhpSpreadsheet\Calculation\Functions;
class StudentT
{
use BaseValidations;
private const MAX_ITERATIONS = 256;
/**
* TDIST.
*
* Returns the probability of Student's T distribution.
*
* @param mixed (float) $value Value for the function
* @param mixed (float) $degrees degrees of freedom
* @param mixed (int) $tails number of tails (1 or 2)
*
* @return float|string The result, or a string containing an error
*/
public static function distribution($value, $degrees, $tails)
{
$value = Functions::flattenSingleValue($value);
$degrees = Functions::flattenSingleValue($degrees);
$tails = Functions::flattenSingleValue($tails);
try {
$value = self::validateFloat($value);
$degrees = self::validateInt($degrees);
$tails = self::validateInt($tails);
} catch (Exception $e) {
return $e->getMessage();
}
if (($value < 0) || ($degrees < 1) || ($tails < 1) || ($tails > 2)) {
return Functions::NAN();
}
return self::calculateDistribution($value, $degrees, $tails);
}
/**
* TINV.
*
* Returns the one-tailed probability of the chi-squared distribution.
*
* @param mixed (float) $probability Probability for the function
* @param mixed (float) $degrees degrees of freedom
*
* @return float|string The result, or a string containing an error
*/
public static function inverse($probability, $degrees)
{
$probability = Functions::flattenSingleValue($probability);
$degrees = Functions::flattenSingleValue($degrees);
try {
$probability = self::validateFloat($probability);
$degrees = self::validateInt($degrees);
} catch (Exception $e) {
return $e->getMessage();
}
if ($probability < 0.0 || $probability > 1.0 || $degrees <= 0) {
return Functions::NAN();
}
$callback = function ($value) use ($degrees) {
return self::distribution($value, $degrees, 2);
};
$newtonRaphson = new NewtonRaphson($callback);
return $newtonRaphson->execute($probability);
}
/**
* @return float|int
*/
private static function calculateDistribution(float $value, int $degrees, int $tails)
{
// tdist, which finds the probability that corresponds to a given value
// of t with k degrees of freedom. This algorithm is translated from a
// pascal function on p81 of "Statistical Computing in Pascal" by D
// Cooke, A H Craven & G M Clark (1985: Edward Arnold (Pubs.) Ltd:
// London). The above Pascal algorithm is itself a translation of the
// fortran algoritm "AS 3" by B E Cooper of the Atlas Computer
// Laboratory as reported in (among other places) "Applied Statistics
// Algorithms", editied by P Griffiths and I D Hill (1985; Ellis
// Horwood Ltd.; W. Sussex, England).
$tterm = $degrees;
$ttheta = atan2($value, sqrt($tterm));
$tc = cos($ttheta);
$ts = sin($ttheta);
if (($degrees % 2) === 1) {
$ti = 3;
$tterm = $tc;
} else {
$ti = 2;
$tterm = 1;
}
$tsum = $tterm;
while ($ti < $degrees) {
$tterm *= $tc * $tc * ($ti - 1) / $ti;
$tsum += $tterm;
$ti += 2;
}
$tsum *= $ts;
if (($degrees % 2) == 1) {
$tsum = Functions::M_2DIVPI * ($tsum + $ttheta);
}
$tValue = 0.5 * (1 + $tsum);
if ($tails == 1) {
return 1 - abs($tValue);
}
return 1 - abs((1 - $tValue) - $tValue);
}
}

View File

@ -0,0 +1,51 @@
<?php
namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical\Distributions;
use PhpOffice\PhpSpreadsheet\Calculation\Exception;
use PhpOffice\PhpSpreadsheet\Calculation\Functions;
class Weibull
{
use BaseValidations;
/**
* WEIBULL.
*
* Returns the Weibull distribution. Use this distribution in reliability
* analysis, such as calculating a device's mean time to failure.
*
* @param mixed (float) $value
* @param mixed (float) $alpha Alpha Parameter
* @param mixed (float) $beta Beta Parameter
* @param mixed (bool) $cumulative
*
* @return float|string (string if result is an error)
*/
public static function distribution($value, $alpha, $beta, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$alpha = Functions::flattenSingleValue($alpha);
$beta = Functions::flattenSingleValue($beta);
$cumulative = Functions::flattenSingleValue($cumulative);
try {
$value = self::validateFloat($value);
$alpha = self::validateFloat($alpha);
$beta = self::validateFloat($beta);
$cumulative = self::validateBool($cumulative);
} catch (Exception $e) {
return $e->getMessage();
}
if (($value < 0) || ($alpha <= 0) || ($beta <= 0)) {
return Functions::NAN();
}
if ($cumulative) {
return 1 - exp(0 - ($value / $beta) ** $alpha);
}
return ($alpha / $beta ** $alpha) * $value ** ($alpha - 1) * exp(0 - ($value / $beta) ** $alpha);
}
}

View File

@ -0,0 +1,28 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Calculation\Functions\Statistical;
use PhpOffice\PhpSpreadsheet\Calculation\Statistical;
use PHPUnit\Framework\TestCase;
class TDistTest extends TestCase
{
/**
* @dataProvider providerTDIST
*
* @param mixed $expectedResult
* @param mixed $degrees
* @param mixed $value
* @param mixed $tails
*/
public function testTDIST($expectedResult, $value, $degrees, $tails): void
{
$result = Statistical::TDIST($value, $degrees, $tails);
self::assertEqualsWithDelta($expectedResult, $result, 1E-12);
}
public function providerTDIST()
{
return require 'tests/data/Calculation/Statistical/TDIST.php';
}
}

View File

@ -0,0 +1,27 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Calculation\Functions\Statistical;
use PhpOffice\PhpSpreadsheet\Calculation\Statistical;
use PHPUnit\Framework\TestCase;
class TinvTest extends TestCase
{
/**
* @dataProvider providerTINV
*
* @param mixed $expectedResult
* @param mixed $probability
* @param mixed $degrees
*/
public function testTINV($expectedResult, $probability, $degrees): void
{
$result = Statistical::TINV($probability, $degrees);
self::assertEqualsWithDelta($expectedResult, $result, 1E-12);
}
public function providerTINV()
{
return require 'tests/data/Calculation/Statistical/TINV.php';
}
}

View File

@ -0,0 +1,29 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Calculation\Functions\Statistical;
use PhpOffice\PhpSpreadsheet\Calculation\Statistical;
use PHPUnit\Framework\TestCase;
class WeibullTest extends TestCase
{
/**
* @dataProvider providerWEIBULL
*
* @param mixed $expectedResult
* @param mixed $value
* @param mixed $alpha
* @param mixed $beta
* @param mixed $cumulative
*/
public function testWEIBULL($expectedResult, $value, $alpha, $beta, $cumulative): void
{
$result = Statistical::WEIBULL($value, $alpha, $beta, $cumulative);
self::assertEqualsWithDelta($expectedResult, $result, 1E-12);
}
public function providerWEIBULL()
{
return require 'tests/data/Calculation/Statistical/WEIBULL.php';
}
}

View File

@ -0,0 +1,56 @@
<?php
return [
[
0.027322464988,
1.959999998, 60, 1,
],
[
0.054644929976,
1.959999998, 60, 2,
],
[
0.170446566151,
1, 10, 1,
],
[
0.340893132302,
1, 10, 2,
],
[
0.028237990213,
2, 25, 1,
],
[
0.056475980427,
2, 25, 2,
],
[
'#VALUE!',
'NaN', 10, 2,
],
[
'#VALUE!',
1, 'NaN', 2,
],
[
'#VALUE!',
1, 10, 'NaN',
],
[
'#NUM!',
-1, 10, 2,
],
[
'#NUM!',
1, 0, 2,
],
[
'#NUM!',
1, 10, 0,
],
[
'#NUM!',
1, 10, 3,
],
];

View File

@ -0,0 +1,32 @@
<?php
return [
[
1.960041187127,
0.05464, 60,
],
[
1.221255395004,
0.25, 10,
],
[
0.699812061312,
0.5, 10,
],
[
'#VALUE!',
'NaN', 10,
],
[
'#VALUE!',
0.5, 'NaN',
],
[
'#NUM!',
-0.5, 10,
],
[
'#NUM!',
0.5, 0,
],
];

View File

@ -0,0 +1,48 @@
<?php
return [
[
0.929581390070,
105, 20, 100, true,
],
[
0.035588864025,
105, 20, 100, false,
],
[
1.10363832351433,
1, 3, 1, false,
],
[
0.985212776817482,
2, 5, 1.5, true,
],
[
'#VALUE!',
'NaN', 5, 1.5, true,
],
[
'#VALUE!',
2, 'NaN', 1.5, true,
],
[
'#VALUE!',
2, 5, 'NaN', true,
],
[
'#VALUE!',
2, 5, 1.5, 'NaN',
],
[
'#NUM!',
-2, 5, 1.5, true,
],
[
'#NUM!',
-2, 0, 1.5, true,
],
[
'#NUM!',
-2, 5, 0, true,
],
];