Extract a few more Distribution functions from Statistical (#1975)

* Extract a few more Distribution functions from Statistical; this time EXPONDIST() and HYPGEOMDIST()

* Extract the F Distribution (although only F.DIST() is implemented so far

* Updae docblocks

* PHPCS
This commit is contained in:
Mark Baker 2021-03-31 21:45:06 +02:00 committed by GitHub
parent 029f345987
commit 17af13281b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 344 additions and 118 deletions

View File

@ -980,12 +980,12 @@ class Calculation
], ],
'EXPONDIST' => [ 'EXPONDIST' => [
'category' => Category::CATEGORY_STATISTICAL, 'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'EXPONDIST'], 'functionCall' => [Statistical\Distributions\Exponential::class, 'distribution'],
'argumentCount' => '3', 'argumentCount' => '3',
], ],
'EXPON.DIST' => [ 'EXPON.DIST' => [
'category' => Category::CATEGORY_STATISTICAL, 'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'EXPONDIST'], 'functionCall' => [Statistical\Distributions\Exponential::class, 'distribution'],
'argumentCount' => '3', 'argumentCount' => '3',
], ],
'FACT' => [ 'FACT' => [
@ -1010,7 +1010,7 @@ class Calculation
], ],
'F.DIST' => [ 'F.DIST' => [
'category' => Category::CATEGORY_STATISTICAL, 'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'FDIST2'], 'functionCall' => [Statistical\Distributions\F::class, 'distribution'],
'argumentCount' => '4', 'argumentCount' => '4',
], ],
'F.DIST.RT' => [ 'F.DIST.RT' => [
@ -1248,7 +1248,7 @@ class Calculation
], ],
'HYPGEOMDIST' => [ 'HYPGEOMDIST' => [
'category' => Category::CATEGORY_STATISTICAL, 'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'HYPGEOMDIST'], 'functionCall' => [Statistical\Distributions\HyperGeometric::class, 'distribution'],
'argumentCount' => '4', 'argumentCount' => '4',
], ],
'HYPGEOM.DIST' => [ 'HYPGEOM.DIST' => [

View File

@ -116,12 +116,12 @@ class Statistical
* *
* @Deprecated 1.17.0 * @Deprecated 1.17.0
* *
* @see Statistical\Averages::averageDeviations()
* Use the averageDeviations() method in the Statistical\Averages class instead
*
* @param mixed ...$args Data values * @param mixed ...$args Data values
* *
* @return float|string * @return float|string
*
*@see Statistical\Averages::averageDeviations()
* Use the averageDeviations() method in the Statistical\Averages class instead
*/ */
public static function AVEDEV(...$args) public static function AVEDEV(...$args)
{ {
@ -160,12 +160,12 @@ class Statistical
* *
* @Deprecated 1.17.0 * @Deprecated 1.17.0
* *
* @see Statistical\Averages::averageA()
* Use the averageA() method in the Statistical\Averages class instead
*
* @param mixed ...$args Data values * @param mixed ...$args Data values
* *
* @return float|string * @return float|string
*
*@see Statistical\Averages::averageA()
* Use the averageA() method in the Statistical\Averages class instead
*/ */
public static function AVERAGEA(...$args) public static function AVERAGEA(...$args)
{ {
@ -203,7 +203,7 @@ class Statistical
* *
* @Deprecated 1.18.0 * @Deprecated 1.18.0
* *
*@see Statistical\Distributions\Beta::distribution() * @see Statistical\Distributions\Beta::distribution()
* Use the distribution() method in the Statistical\Distributions\Beta class instead * Use the distribution() method in the Statistical\Distributions\Beta class instead
* *
* @param float $value Value at which you want to evaluate the distribution * @param float $value Value at which you want to evaluate the distribution
@ -498,11 +498,6 @@ class Statistical
* @param float $alpha criterion value * @param float $alpha criterion value
* *
* @return int|string * @return int|string
*
* @TODO Warning. This implementation differs from the algorithm detailed on the MS
* web site in that $CumPGuessMinus1 = $CumPGuess - 1 rather than $CumPGuess - $PGuess
* This eliminates a potential endless loop error, but may have an adverse affect on the
* accuracy of the function (although all my tests have so far returned correct results).
*/ */
public static function CRITBINOM($trials, $probability, $alpha) public static function CRITBINOM($trials, $probability, $alpha)
{ {
@ -568,6 +563,11 @@ class Statistical
* such as how long an automated bank teller takes to deliver cash. For example, you can * such as how long an automated bank teller takes to deliver cash. For example, you can
* use EXPONDIST to determine the probability that the process takes at most 1 minute. * use EXPONDIST to determine the probability that the process takes at most 1 minute.
* *
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\Exponential::distribution()
* Use the distribution() method in the Statistical\Distributions\Exponential class instead
*
* @param float $value Value of the function * @param float $value Value of the function
* @param float $lambda The parameter value * @param float $lambda The parameter value
* @param bool $cumulative * @param bool $cumulative
@ -576,24 +576,7 @@ class Statistical
*/ */
public static function EXPONDIST($value, $lambda, $cumulative) public static function EXPONDIST($value, $lambda, $cumulative)
{ {
$value = Functions::flattenSingleValue($value); return Statistical\Distributions\Exponential::distribution($value, $lambda, $cumulative);
$lambda = Functions::flattenSingleValue($lambda);
$cumulative = Functions::flattenSingleValue($cumulative);
if ((is_numeric($value)) && (is_numeric($lambda))) {
if (($value < 0) || ($lambda < 0)) {
return Functions::NAN();
}
if ((is_numeric($cumulative)) || (is_bool($cumulative))) {
if ($cumulative) {
return 1 - exp(0 - $value * $lambda);
}
return $lambda * exp(0 - $value * $lambda);
}
}
return Functions::VALUE();
} }
/** /**
@ -604,6 +587,11 @@ class Statistical
* For example, you can examine the test scores of men and women entering high school, and determine * For example, you can examine the test scores of men and women entering high school, and determine
* if the variability in the females is different from that found in the males. * if the variability in the females is different from that found in the males.
* *
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\F::distribution()
* Use the distribution() method in the Statistical\Distributions\Exponential class instead
*
* @param float $value Value of the function * @param float $value Value of the function
* @param int $u The numerator degrees of freedom * @param int $u The numerator degrees of freedom
* @param int $v The denominator degrees of freedom * @param int $v The denominator degrees of freedom
@ -614,34 +602,7 @@ class Statistical
*/ */
public static function FDIST2($value, $u, $v, $cumulative) public static function FDIST2($value, $u, $v, $cumulative)
{ {
$value = Functions::flattenSingleValue($value); return Statistical\Distributions\F::distribution($value, $u, $v, $cumulative);
$u = Functions::flattenSingleValue($u);
$v = Functions::flattenSingleValue($v);
$cumulative = Functions::flattenSingleValue($cumulative);
if (is_numeric($value) && is_numeric($u) && is_numeric($v)) {
if ($value < 0 || $u < 1 || $v < 1) {
return Functions::NAN();
}
$cumulative = (bool) $cumulative;
$u = (int) $u;
$v = (int) $v;
if ($cumulative) {
$adjustedValue = ($u * $value) / ($u * $value + $v);
return Statistical\Distributions\Beta::incompleteBeta($adjustedValue, $u / 2, $v / 2);
}
return (Statistical\Distributions\Gamma::gammaValue(($v + $u) / 2) /
(Statistical\Distributions\Gamma::gammaValue($u / 2) *
Statistical\Distributions\Gamma::gammaValue($v / 2))) *
(($u / $v) ** ($u / 2)) *
(($value ** (($u - 2) / 2)) / ((1 + ($u / $v) * $value) ** (($u + $v) / 2)));
}
return Functions::VALUE();
} }
/** /**
@ -908,42 +869,26 @@ class Statistical
* Returns the hypergeometric distribution. HYPGEOMDIST returns the probability of a given number of * Returns the hypergeometric distribution. HYPGEOMDIST returns the probability of a given number of
* sample successes, given the sample size, population successes, and population size. * sample successes, given the sample size, population successes, and population size.
* *
* @param float $sampleSuccesses Number of successes in the sample * @Deprecated 1.18.0
* @param float $sampleNumber Size of the sample *
* @param float $populationSuccesses Number of successes in the population * @see Statistical\Distributions\HyperGeometric::distribution()
* @param float $populationNumber Population size * Use the distribution() method in the Statistical\Distributions\HyperGeometric class instead
*
* @param mixed (int) $sampleSuccesses Number of successes in the sample
* @param mixed (int) $sampleNumber Size of the sample
* @param mixed (int) $populationSuccesses Number of successes in the population
* @param mixed (int) $populationNumber Population size
* *
* @return float|string * @return float|string
*/ */
public static function HYPGEOMDIST($sampleSuccesses, $sampleNumber, $populationSuccesses, $populationNumber) public static function HYPGEOMDIST($sampleSuccesses, $sampleNumber, $populationSuccesses, $populationNumber)
{ {
$sampleSuccesses = Functions::flattenSingleValue($sampleSuccesses); return Statistical\Distributions\HyperGeometric::distribution(
$sampleNumber = Functions::flattenSingleValue($sampleNumber); $sampleSuccesses,
$populationSuccesses = Functions::flattenSingleValue($populationSuccesses); $sampleNumber,
$populationNumber = Functions::flattenSingleValue($populationNumber); $populationSuccesses,
$populationNumber
if ((is_numeric($sampleSuccesses)) && (is_numeric($sampleNumber)) && (is_numeric($populationSuccesses)) && (is_numeric($populationNumber))) { );
$sampleSuccesses = floor($sampleSuccesses);
$sampleNumber = floor($sampleNumber);
$populationSuccesses = floor($populationSuccesses);
$populationNumber = floor($populationNumber);
if (($sampleSuccesses < 0) || ($sampleSuccesses > $sampleNumber) || ($sampleSuccesses > $populationSuccesses)) {
return Functions::NAN();
}
if (($sampleNumber <= 0) || ($sampleNumber > $populationNumber)) {
return Functions::NAN();
}
if (($populationSuccesses <= 0) || ($populationSuccesses > $populationNumber)) {
return Functions::NAN();
}
return MathTrig::COMBIN($populationSuccesses, $sampleSuccesses) *
MathTrig::COMBIN($populationNumber - $populationSuccesses, $sampleNumber - $sampleSuccesses) /
MathTrig::COMBIN($populationNumber, $sampleNumber);
}
return Functions::VALUE();
} }
/** /**
@ -2148,8 +2093,10 @@ class Statistical
/** /**
* ZTEST. * ZTEST.
* *
* Returns the Weibull distribution. Use this distribution in reliability * Returns the one-tailed P-value of a z-test.
* analysis, such as calculating a device's mean time to failure. *
* For a given hypothesized population mean, x, Z.TEST returns the probability that the sample mean would be
* greater than the average of observations in the data set (array) that is, the observed sample mean.
* *
* @param float $dataSet * @param float $dataSet
* @param float $m0 Alpha Parameter * @param float $m0 Alpha Parameter

View File

@ -0,0 +1,49 @@
<?php
namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical\Distributions;
use PhpOffice\PhpSpreadsheet\Calculation\Exception;
use PhpOffice\PhpSpreadsheet\Calculation\Functions;
class Exponential
{
use BaseValidations;
/**
* EXPONDIST.
*
* Returns the exponential distribution. Use EXPONDIST to model the time between events,
* such as how long an automated bank teller takes to deliver cash. For example, you can
* use EXPONDIST to determine the probability that the process takes at most 1 minute.
*
* @param mixed (float) $value Value of the function
* @param mixed (float) $lambda The parameter value
* @param mixed (bool) $cumulative
*
* @return float|string
*/
public static function distribution($value, $lambda, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$lambda = Functions::flattenSingleValue($lambda);
$cumulative = Functions::flattenSingleValue($cumulative);
try {
$value = self::validateFloat($value);
$lambda = self::validateFloat($lambda);
$cumulative = self::validateBool($cumulative);
} catch (Exception $e) {
return $e->getMessage();
}
if (($value < 0) || ($lambda < 0)) {
return Functions::NAN();
}
if ($cumulative === true) {
return 1 - exp(0 - $value * $lambda);
}
return $lambda * exp(0 - $value * $lambda);
}
}

View File

@ -0,0 +1,59 @@
<?php
namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical\Distributions;
use PhpOffice\PhpSpreadsheet\Calculation\Exception;
use PhpOffice\PhpSpreadsheet\Calculation\Functions;
class F
{
use BaseValidations;
/**
* F.DIST.
*
* Returns the F probability distribution.
* You can use this function to determine whether two data sets have different degrees of diversity.
* For example, you can examine the test scores of men and women entering high school, and determine
* if the variability in the females is different from that found in the males.
*
* @param mixed(float) $value Value of the function
* @param mixed(int) $u The numerator degrees of freedom
* @param mixed(int) $v The denominator degrees of freedom
* @param mixed(bool) $cumulative If cumulative is TRUE, F.DIST returns the cumulative distribution function;
* if FALSE, it returns the probability density function.
*
* @return float|string
*/
public static function distribution($value, $u, $v, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$u = Functions::flattenSingleValue($u);
$v = Functions::flattenSingleValue($v);
$cumulative = Functions::flattenSingleValue($cumulative);
try {
$value = self::validateFloat($value);
$u = self::validateInt($u);
$v = self::validateInt($v);
$cumulative = self::validateBool($cumulative);
} catch (Exception $e) {
return $e->getMessage();
}
if ($value < 0 || $u < 1 || $v < 1) {
return Functions::NAN();
}
if ($cumulative) {
$adjustedValue = ($u * $value) / ($u * $value + $v);
return Beta::incompleteBeta($adjustedValue, $u / 2, $v / 2);
}
return (Gamma::gammaValue(($v + $u) / 2) /
(Gamma::gammaValue($u / 2) * Gamma::gammaValue($v / 2))) *
(($u / $v) ** ($u / 2)) *
(($value ** (($u - 2) / 2)) / ((1 + ($u / $v) * $value) ** (($u + $v) / 2)));
}
}

View File

@ -0,0 +1,56 @@
<?php
namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical\Distributions;
use PhpOffice\PhpSpreadsheet\Calculation\Exception;
use PhpOffice\PhpSpreadsheet\Calculation\Functions;
use PhpOffice\PhpSpreadsheet\Calculation\MathTrig;
class HyperGeometric
{
use BaseValidations;
/**
* HYPGEOMDIST.
*
* Returns the hypergeometric distribution. HYPGEOMDIST returns the probability of a given number of
* sample successes, given the sample size, population successes, and population size.
*
* @param mixed (int) $sampleSuccesses Number of successes in the sample
* @param mixed (int) $sampleNumber Size of the sample
* @param mixed (int) $populationSuccesses Number of successes in the population
* @param mixed (int) $populationNumber Population size
*
* @return float|string
*/
public static function distribution($sampleSuccesses, $sampleNumber, $populationSuccesses, $populationNumber)
{
$sampleSuccesses = Functions::flattenSingleValue($sampleSuccesses);
$sampleNumber = Functions::flattenSingleValue($sampleNumber);
$populationSuccesses = Functions::flattenSingleValue($populationSuccesses);
$populationNumber = Functions::flattenSingleValue($populationNumber);
try {
$sampleSuccesses = self::validateInt($sampleSuccesses);
$sampleNumber = self::validateInt($sampleNumber);
$populationSuccesses = self::validateInt($populationSuccesses);
$populationNumber = self::validateInt($populationNumber);
} catch (Exception $e) {
return $e->getMessage();
}
if (($sampleSuccesses < 0) || ($sampleSuccesses > $sampleNumber) || ($sampleSuccesses > $populationSuccesses)) {
return Functions::NAN();
}
if (($sampleNumber <= 0) || ($sampleNumber > $populationNumber)) {
return Functions::NAN();
}
if (($populationSuccesses <= 0) || ($populationSuccesses > $populationNumber)) {
return Functions::NAN();
}
return MathTrig::COMBIN($populationSuccesses, $sampleSuccesses) *
MathTrig::COMBIN($populationNumber - $populationSuccesses, $sampleNumber - $sampleSuccesses) /
MathTrig::COMBIN($populationNumber, $sampleNumber);
}
}

View File

@ -5,21 +5,21 @@ namespace PhpOffice\PhpSpreadsheetTests\Calculation\Functions\Statistical;
use PhpOffice\PhpSpreadsheet\Calculation\Statistical; use PhpOffice\PhpSpreadsheet\Calculation\Statistical;
use PHPUnit\Framework\TestCase; use PHPUnit\Framework\TestCase;
class FDist2Test extends TestCase class FDistTest extends TestCase
{ {
/** /**
* @dataProvider providerFDIST2 * @dataProvider providerFDIST
* *
* @param mixed $expectedResult * @param mixed $expectedResult
*/ */
public function testFDIST2($expectedResult, ...$args): void public function testFDIST($expectedResult, ...$args): void
{ {
$result = Statistical::FDIST2(...$args); $result = Statistical::FDIST2(...$args);
self::assertEqualsWithDelta($expectedResult, $result, 1E-12); self::assertEqualsWithDelta($expectedResult, $result, 1E-12);
} }
public function providerFDIST2(): array public function providerFDIST(): array
{ {
return require 'tests/data/Calculation/Statistical/FDIST2.php'; return require 'tests/data/Calculation/Statistical/FDIST.php';
} }
} }

View File

@ -1,6 +1,14 @@
<?php <?php
return [ return [
[
1.353352832366,
0.2, 10, false,
],
[
0.864664716763,
0.2, 10, true,
],
[ [
0.606530659713, 0.606530659713,
0.5, 1, false, 0.5, 1, false,
@ -9,12 +17,24 @@ return [
0.393469340287, 0.393469340287,
0.5, 1, true, 0.5, 1, true,
], ],
[
'#VALUE!',
'NAN', 1, true,
],
[ [
'#VALUE!', '#VALUE!',
0.5, 'NAN', true, 0.5, 'NAN', true,
], ],
[
'#VALUE!',
0.5, 1, 'NAN',
],
[ [
'#NUM!', '#NUM!',
-0.5, 1, true, -0.5, 1, true,
], ],
[
'#NUM!',
0.5, -1, true,
],
]; ];

View File

@ -0,0 +1,80 @@
<?php
return [
[
0.001223791709,
15.2069, 6, 4, false,
],
[
0.990000043003,
15.2069, 6, 4, true,
],
[
0.308000821694,
1, 2, 5, false,
],
[
0.568798849629,
1, 2, 5, true,
],
[
0.0241472644208,
5, 1, 2, false,
],
[
0.8451542547285,
5, 1, 2, true,
],
[
0.0006669496615,
65, 2, 1, false,
],
[
0.9126295943339,
65, 2, 1, true,
],
[
4.7306581130012E-6,
65, 8, 5, false,
],
[
0.9998747923834,
65, 8, 5, true,
],
[
0.0017323823929,
7.5, 13, 8, false,
],
[
0.9961476916638,
7.5, 13, 8, true,
],
[
'#VALUE!',
'NAN', 13, 8, true,
],
[
'#VALUE!',
7.5, 'NAN', 8, true,
],
[
'#VALUE!',
7.5, 13, 'NAN', 8, false,
],
[
'#VALUE!',
7.5, 13, 8, 'NAN', false,
],
[
'#NUM!',
-7.5, 13, 8, true,
],
[
'#NUM!',
7.5, 0, 8, true,
],
[
'#NUM!',
7.5, 13, 0, true,
],
];

View File

@ -1,17 +0,0 @@
<?php
return [
[0.0012237917087, 15.2069, 6, 4, false],
[0.99000004300276, 15.2069, 6, 4, true],
[0.0241472644208, 5, 1, 2, false],
[0.84515425472852, 5, 1, 2, true],
[0.0006669496615, 65, 2, 1, false],
[0.9126295943339, 65, 2, 1, true],
[4.7306581130012E-6, 65, 8, 5, false],
[0.99987479238344, 65, 8, 5, true],
[0.0017323823929, 7.5, 13, 8, false],
[0.9961476916638, 7.5, 13, 8, true],
['#NUM!', -1, 1, 2, false],
['#NUM!', -1, 0.5, 2, false],
['#VALUE!', 'NAN', 1, 2, false],
];

View File

@ -25,12 +25,44 @@ return [
'#VALUE!', '#VALUE!',
'NAN', 4, 4, 12, 'NAN', 4, 4, 12,
], ],
[
'#VALUE!',
4, 'NAN', 4, 12,
],
[
'#VALUE!',
4, 4, 'NAN', 12,
],
[
'#VALUE!',
4, 4, 4, 'NAN',
],
[
'#NUM!',
-1, 4, 4, 12,
],
[
'#NUM!',
0, 0, 4, 12,
],
[
'#NUM!',
4, 15, 4, 12,
],
[ [
'#NUM!', '#NUM!',
5, 4, 4, 12, 5, 4, 4, 12,
], ],
[ [
'#NUM!', '#NUM!',
4, 4, 4, 3, 5, 5, 4, 12,
],
[
'#NUM!',
5, 5, -4, 12,
],
[
'#NUM!',
5, 5, 15, 12,
], ],
]; ];