Extract a few more Distribution functions from Statistical (#1975)

* Extract a few more Distribution functions from Statistical; this time EXPONDIST() and HYPGEOMDIST()

* Extract the F Distribution (although only F.DIST() is implemented so far

* Updae docblocks

* PHPCS
This commit is contained in:
Mark Baker 2021-03-31 21:45:06 +02:00 committed by GitHub
parent 029f345987
commit 17af13281b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 344 additions and 118 deletions

View File

@ -980,12 +980,12 @@ class Calculation
],
'EXPONDIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'EXPONDIST'],
'functionCall' => [Statistical\Distributions\Exponential::class, 'distribution'],
'argumentCount' => '3',
],
'EXPON.DIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'EXPONDIST'],
'functionCall' => [Statistical\Distributions\Exponential::class, 'distribution'],
'argumentCount' => '3',
],
'FACT' => [
@ -1010,7 +1010,7 @@ class Calculation
],
'F.DIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'FDIST2'],
'functionCall' => [Statistical\Distributions\F::class, 'distribution'],
'argumentCount' => '4',
],
'F.DIST.RT' => [
@ -1248,7 +1248,7 @@ class Calculation
],
'HYPGEOMDIST' => [
'category' => Category::CATEGORY_STATISTICAL,
'functionCall' => [Statistical::class, 'HYPGEOMDIST'],
'functionCall' => [Statistical\Distributions\HyperGeometric::class, 'distribution'],
'argumentCount' => '4',
],
'HYPGEOM.DIST' => [

View File

@ -116,12 +116,12 @@ class Statistical
*
* @Deprecated 1.17.0
*
* @see Statistical\Averages::averageDeviations()
* Use the averageDeviations() method in the Statistical\Averages class instead
*
* @param mixed ...$args Data values
*
* @return float|string
*
*@see Statistical\Averages::averageDeviations()
* Use the averageDeviations() method in the Statistical\Averages class instead
*/
public static function AVEDEV(...$args)
{
@ -160,12 +160,12 @@ class Statistical
*
* @Deprecated 1.17.0
*
* @see Statistical\Averages::averageA()
* Use the averageA() method in the Statistical\Averages class instead
*
* @param mixed ...$args Data values
*
* @return float|string
*
*@see Statistical\Averages::averageA()
* Use the averageA() method in the Statistical\Averages class instead
*/
public static function AVERAGEA(...$args)
{
@ -203,7 +203,7 @@ class Statistical
*
* @Deprecated 1.18.0
*
*@see Statistical\Distributions\Beta::distribution()
* @see Statistical\Distributions\Beta::distribution()
* Use the distribution() method in the Statistical\Distributions\Beta class instead
*
* @param float $value Value at which you want to evaluate the distribution
@ -498,11 +498,6 @@ class Statistical
* @param float $alpha criterion value
*
* @return int|string
*
* @TODO Warning. This implementation differs from the algorithm detailed on the MS
* web site in that $CumPGuessMinus1 = $CumPGuess - 1 rather than $CumPGuess - $PGuess
* This eliminates a potential endless loop error, but may have an adverse affect on the
* accuracy of the function (although all my tests have so far returned correct results).
*/
public static function CRITBINOM($trials, $probability, $alpha)
{
@ -568,6 +563,11 @@ class Statistical
* such as how long an automated bank teller takes to deliver cash. For example, you can
* use EXPONDIST to determine the probability that the process takes at most 1 minute.
*
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\Exponential::distribution()
* Use the distribution() method in the Statistical\Distributions\Exponential class instead
*
* @param float $value Value of the function
* @param float $lambda The parameter value
* @param bool $cumulative
@ -576,24 +576,7 @@ class Statistical
*/
public static function EXPONDIST($value, $lambda, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$lambda = Functions::flattenSingleValue($lambda);
$cumulative = Functions::flattenSingleValue($cumulative);
if ((is_numeric($value)) && (is_numeric($lambda))) {
if (($value < 0) || ($lambda < 0)) {
return Functions::NAN();
}
if ((is_numeric($cumulative)) || (is_bool($cumulative))) {
if ($cumulative) {
return 1 - exp(0 - $value * $lambda);
}
return $lambda * exp(0 - $value * $lambda);
}
}
return Functions::VALUE();
return Statistical\Distributions\Exponential::distribution($value, $lambda, $cumulative);
}
/**
@ -604,6 +587,11 @@ class Statistical
* For example, you can examine the test scores of men and women entering high school, and determine
* if the variability in the females is different from that found in the males.
*
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\F::distribution()
* Use the distribution() method in the Statistical\Distributions\Exponential class instead
*
* @param float $value Value of the function
* @param int $u The numerator degrees of freedom
* @param int $v The denominator degrees of freedom
@ -614,34 +602,7 @@ class Statistical
*/
public static function FDIST2($value, $u, $v, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$u = Functions::flattenSingleValue($u);
$v = Functions::flattenSingleValue($v);
$cumulative = Functions::flattenSingleValue($cumulative);
if (is_numeric($value) && is_numeric($u) && is_numeric($v)) {
if ($value < 0 || $u < 1 || $v < 1) {
return Functions::NAN();
}
$cumulative = (bool) $cumulative;
$u = (int) $u;
$v = (int) $v;
if ($cumulative) {
$adjustedValue = ($u * $value) / ($u * $value + $v);
return Statistical\Distributions\Beta::incompleteBeta($adjustedValue, $u / 2, $v / 2);
}
return (Statistical\Distributions\Gamma::gammaValue(($v + $u) / 2) /
(Statistical\Distributions\Gamma::gammaValue($u / 2) *
Statistical\Distributions\Gamma::gammaValue($v / 2))) *
(($u / $v) ** ($u / 2)) *
(($value ** (($u - 2) / 2)) / ((1 + ($u / $v) * $value) ** (($u + $v) / 2)));
}
return Functions::VALUE();
return Statistical\Distributions\F::distribution($value, $u, $v, $cumulative);
}
/**
@ -908,42 +869,26 @@ class Statistical
* Returns the hypergeometric distribution. HYPGEOMDIST returns the probability of a given number of
* sample successes, given the sample size, population successes, and population size.
*
* @param float $sampleSuccesses Number of successes in the sample
* @param float $sampleNumber Size of the sample
* @param float $populationSuccesses Number of successes in the population
* @param float $populationNumber Population size
* @Deprecated 1.18.0
*
* @see Statistical\Distributions\HyperGeometric::distribution()
* Use the distribution() method in the Statistical\Distributions\HyperGeometric class instead
*
* @param mixed (int) $sampleSuccesses Number of successes in the sample
* @param mixed (int) $sampleNumber Size of the sample
* @param mixed (int) $populationSuccesses Number of successes in the population
* @param mixed (int) $populationNumber Population size
*
* @return float|string
*/
public static function HYPGEOMDIST($sampleSuccesses, $sampleNumber, $populationSuccesses, $populationNumber)
{
$sampleSuccesses = Functions::flattenSingleValue($sampleSuccesses);
$sampleNumber = Functions::flattenSingleValue($sampleNumber);
$populationSuccesses = Functions::flattenSingleValue($populationSuccesses);
$populationNumber = Functions::flattenSingleValue($populationNumber);
if ((is_numeric($sampleSuccesses)) && (is_numeric($sampleNumber)) && (is_numeric($populationSuccesses)) && (is_numeric($populationNumber))) {
$sampleSuccesses = floor($sampleSuccesses);
$sampleNumber = floor($sampleNumber);
$populationSuccesses = floor($populationSuccesses);
$populationNumber = floor($populationNumber);
if (($sampleSuccesses < 0) || ($sampleSuccesses > $sampleNumber) || ($sampleSuccesses > $populationSuccesses)) {
return Functions::NAN();
}
if (($sampleNumber <= 0) || ($sampleNumber > $populationNumber)) {
return Functions::NAN();
}
if (($populationSuccesses <= 0) || ($populationSuccesses > $populationNumber)) {
return Functions::NAN();
}
return MathTrig::COMBIN($populationSuccesses, $sampleSuccesses) *
MathTrig::COMBIN($populationNumber - $populationSuccesses, $sampleNumber - $sampleSuccesses) /
MathTrig::COMBIN($populationNumber, $sampleNumber);
}
return Functions::VALUE();
return Statistical\Distributions\HyperGeometric::distribution(
$sampleSuccesses,
$sampleNumber,
$populationSuccesses,
$populationNumber
);
}
/**
@ -2148,8 +2093,10 @@ class Statistical
/**
* ZTEST.
*
* Returns the Weibull distribution. Use this distribution in reliability
* analysis, such as calculating a device's mean time to failure.
* Returns the one-tailed P-value of a z-test.
*
* For a given hypothesized population mean, x, Z.TEST returns the probability that the sample mean would be
* greater than the average of observations in the data set (array) that is, the observed sample mean.
*
* @param float $dataSet
* @param float $m0 Alpha Parameter

View File

@ -0,0 +1,49 @@
<?php
namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical\Distributions;
use PhpOffice\PhpSpreadsheet\Calculation\Exception;
use PhpOffice\PhpSpreadsheet\Calculation\Functions;
class Exponential
{
use BaseValidations;
/**
* EXPONDIST.
*
* Returns the exponential distribution. Use EXPONDIST to model the time between events,
* such as how long an automated bank teller takes to deliver cash. For example, you can
* use EXPONDIST to determine the probability that the process takes at most 1 minute.
*
* @param mixed (float) $value Value of the function
* @param mixed (float) $lambda The parameter value
* @param mixed (bool) $cumulative
*
* @return float|string
*/
public static function distribution($value, $lambda, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$lambda = Functions::flattenSingleValue($lambda);
$cumulative = Functions::flattenSingleValue($cumulative);
try {
$value = self::validateFloat($value);
$lambda = self::validateFloat($lambda);
$cumulative = self::validateBool($cumulative);
} catch (Exception $e) {
return $e->getMessage();
}
if (($value < 0) || ($lambda < 0)) {
return Functions::NAN();
}
if ($cumulative === true) {
return 1 - exp(0 - $value * $lambda);
}
return $lambda * exp(0 - $value * $lambda);
}
}

View File

@ -0,0 +1,59 @@
<?php
namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical\Distributions;
use PhpOffice\PhpSpreadsheet\Calculation\Exception;
use PhpOffice\PhpSpreadsheet\Calculation\Functions;
class F
{
use BaseValidations;
/**
* F.DIST.
*
* Returns the F probability distribution.
* You can use this function to determine whether two data sets have different degrees of diversity.
* For example, you can examine the test scores of men and women entering high school, and determine
* if the variability in the females is different from that found in the males.
*
* @param mixed(float) $value Value of the function
* @param mixed(int) $u The numerator degrees of freedom
* @param mixed(int) $v The denominator degrees of freedom
* @param mixed(bool) $cumulative If cumulative is TRUE, F.DIST returns the cumulative distribution function;
* if FALSE, it returns the probability density function.
*
* @return float|string
*/
public static function distribution($value, $u, $v, $cumulative)
{
$value = Functions::flattenSingleValue($value);
$u = Functions::flattenSingleValue($u);
$v = Functions::flattenSingleValue($v);
$cumulative = Functions::flattenSingleValue($cumulative);
try {
$value = self::validateFloat($value);
$u = self::validateInt($u);
$v = self::validateInt($v);
$cumulative = self::validateBool($cumulative);
} catch (Exception $e) {
return $e->getMessage();
}
if ($value < 0 || $u < 1 || $v < 1) {
return Functions::NAN();
}
if ($cumulative) {
$adjustedValue = ($u * $value) / ($u * $value + $v);
return Beta::incompleteBeta($adjustedValue, $u / 2, $v / 2);
}
return (Gamma::gammaValue(($v + $u) / 2) /
(Gamma::gammaValue($u / 2) * Gamma::gammaValue($v / 2))) *
(($u / $v) ** ($u / 2)) *
(($value ** (($u - 2) / 2)) / ((1 + ($u / $v) * $value) ** (($u + $v) / 2)));
}
}

View File

@ -0,0 +1,56 @@
<?php
namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical\Distributions;
use PhpOffice\PhpSpreadsheet\Calculation\Exception;
use PhpOffice\PhpSpreadsheet\Calculation\Functions;
use PhpOffice\PhpSpreadsheet\Calculation\MathTrig;
class HyperGeometric
{
use BaseValidations;
/**
* HYPGEOMDIST.
*
* Returns the hypergeometric distribution. HYPGEOMDIST returns the probability of a given number of
* sample successes, given the sample size, population successes, and population size.
*
* @param mixed (int) $sampleSuccesses Number of successes in the sample
* @param mixed (int) $sampleNumber Size of the sample
* @param mixed (int) $populationSuccesses Number of successes in the population
* @param mixed (int) $populationNumber Population size
*
* @return float|string
*/
public static function distribution($sampleSuccesses, $sampleNumber, $populationSuccesses, $populationNumber)
{
$sampleSuccesses = Functions::flattenSingleValue($sampleSuccesses);
$sampleNumber = Functions::flattenSingleValue($sampleNumber);
$populationSuccesses = Functions::flattenSingleValue($populationSuccesses);
$populationNumber = Functions::flattenSingleValue($populationNumber);
try {
$sampleSuccesses = self::validateInt($sampleSuccesses);
$sampleNumber = self::validateInt($sampleNumber);
$populationSuccesses = self::validateInt($populationSuccesses);
$populationNumber = self::validateInt($populationNumber);
} catch (Exception $e) {
return $e->getMessage();
}
if (($sampleSuccesses < 0) || ($sampleSuccesses > $sampleNumber) || ($sampleSuccesses > $populationSuccesses)) {
return Functions::NAN();
}
if (($sampleNumber <= 0) || ($sampleNumber > $populationNumber)) {
return Functions::NAN();
}
if (($populationSuccesses <= 0) || ($populationSuccesses > $populationNumber)) {
return Functions::NAN();
}
return MathTrig::COMBIN($populationSuccesses, $sampleSuccesses) *
MathTrig::COMBIN($populationNumber - $populationSuccesses, $sampleNumber - $sampleSuccesses) /
MathTrig::COMBIN($populationNumber, $sampleNumber);
}
}

View File

@ -5,21 +5,21 @@ namespace PhpOffice\PhpSpreadsheetTests\Calculation\Functions\Statistical;
use PhpOffice\PhpSpreadsheet\Calculation\Statistical;
use PHPUnit\Framework\TestCase;
class FDist2Test extends TestCase
class FDistTest extends TestCase
{
/**
* @dataProvider providerFDIST2
* @dataProvider providerFDIST
*
* @param mixed $expectedResult
*/
public function testFDIST2($expectedResult, ...$args): void
public function testFDIST($expectedResult, ...$args): void
{
$result = Statistical::FDIST2(...$args);
self::assertEqualsWithDelta($expectedResult, $result, 1E-12);
}
public function providerFDIST2(): array
public function providerFDIST(): array
{
return require 'tests/data/Calculation/Statistical/FDIST2.php';
return require 'tests/data/Calculation/Statistical/FDIST.php';
}
}

View File

@ -1,6 +1,14 @@
<?php
return [
[
1.353352832366,
0.2, 10, false,
],
[
0.864664716763,
0.2, 10, true,
],
[
0.606530659713,
0.5, 1, false,
@ -9,12 +17,24 @@ return [
0.393469340287,
0.5, 1, true,
],
[
'#VALUE!',
'NAN', 1, true,
],
[
'#VALUE!',
0.5, 'NAN', true,
],
[
'#VALUE!',
0.5, 1, 'NAN',
],
[
'#NUM!',
-0.5, 1, true,
],
[
'#NUM!',
0.5, -1, true,
],
];

View File

@ -0,0 +1,80 @@
<?php
return [
[
0.001223791709,
15.2069, 6, 4, false,
],
[
0.990000043003,
15.2069, 6, 4, true,
],
[
0.308000821694,
1, 2, 5, false,
],
[
0.568798849629,
1, 2, 5, true,
],
[
0.0241472644208,
5, 1, 2, false,
],
[
0.8451542547285,
5, 1, 2, true,
],
[
0.0006669496615,
65, 2, 1, false,
],
[
0.9126295943339,
65, 2, 1, true,
],
[
4.7306581130012E-6,
65, 8, 5, false,
],
[
0.9998747923834,
65, 8, 5, true,
],
[
0.0017323823929,
7.5, 13, 8, false,
],
[
0.9961476916638,
7.5, 13, 8, true,
],
[
'#VALUE!',
'NAN', 13, 8, true,
],
[
'#VALUE!',
7.5, 'NAN', 8, true,
],
[
'#VALUE!',
7.5, 13, 'NAN', 8, false,
],
[
'#VALUE!',
7.5, 13, 8, 'NAN', false,
],
[
'#NUM!',
-7.5, 13, 8, true,
],
[
'#NUM!',
7.5, 0, 8, true,
],
[
'#NUM!',
7.5, 13, 0, true,
],
];

View File

@ -1,17 +0,0 @@
<?php
return [
[0.0012237917087, 15.2069, 6, 4, false],
[0.99000004300276, 15.2069, 6, 4, true],
[0.0241472644208, 5, 1, 2, false],
[0.84515425472852, 5, 1, 2, true],
[0.0006669496615, 65, 2, 1, false],
[0.9126295943339, 65, 2, 1, true],
[4.7306581130012E-6, 65, 8, 5, false],
[0.99987479238344, 65, 8, 5, true],
[0.0017323823929, 7.5, 13, 8, false],
[0.9961476916638, 7.5, 13, 8, true],
['#NUM!', -1, 1, 2, false],
['#NUM!', -1, 0.5, 2, false],
['#VALUE!', 'NAN', 1, 2, false],
];

View File

@ -25,12 +25,44 @@ return [
'#VALUE!',
'NAN', 4, 4, 12,
],
[
'#VALUE!',
4, 'NAN', 4, 12,
],
[
'#VALUE!',
4, 4, 'NAN', 12,
],
[
'#VALUE!',
4, 4, 4, 'NAN',
],
[
'#NUM!',
-1, 4, 4, 12,
],
[
'#NUM!',
0, 0, 4, 12,
],
[
'#NUM!',
4, 15, 4, 12,
],
[
'#NUM!',
5, 4, 4, 12,
],
[
'#NUM!',
4, 4, 4, 3,
5, 5, 4, 12,
],
[
'#NUM!',
5, 5, -4, 12,
],
[
'#NUM!',
5, 5, 15, 12,
],
];