From 1c92b7611ab1bb3b6e43c6380830361c7e723d16 Mon Sep 17 00:00:00 2001 From: Mark Baker Date: Mon, 29 Mar 2021 12:59:46 +0200 Subject: [PATCH] Extract Percentile-type functions from Statistics (#1966) * Extract Percentile-type functions from Statistics (e.g. PERCENTILE(), PERCENTRANK(), QUARTILE(), and RANK()) * Unit test for PERCENTILE() with an empty (of numbers) dataset --- .../Calculation/Calculation.php | 14 +- .../Calculation/Statistical.php | 132 +++-------- .../Statistical/BaseValidations.php | 27 +++ .../Calculation/Statistical/Confidence.php | 25 ++- .../Calculation/Statistical/Percentiles.php | 207 ++++++++++++++++++ .../Calculation/Statistical/Permutations.php | 40 ++-- .../Calculation/Statistical/Trends.php | 6 +- .../Calculation/Statistical/PERCENTILE.php | 24 ++ .../Calculation/Statistical/PERCENTRANK.php | 7 +- .../Calculation/Statistical/PERMUTATIONA.php | 8 + .../data/Calculation/Statistical/QUARTILE.php | 4 + tests/data/Calculation/Statistical/RANK.php | 41 ++-- 12 files changed, 378 insertions(+), 157 deletions(-) create mode 100644 src/PhpSpreadsheet/Calculation/Statistical/BaseValidations.php create mode 100644 src/PhpSpreadsheet/Calculation/Statistical/Percentiles.php diff --git a/src/PhpSpreadsheet/Calculation/Calculation.php b/src/PhpSpreadsheet/Calculation/Calculation.php index 4dfcf9ab..4f95c5b4 100644 --- a/src/PhpSpreadsheet/Calculation/Calculation.php +++ b/src/PhpSpreadsheet/Calculation/Calculation.php @@ -1903,7 +1903,7 @@ class Calculation ], 'PERCENTILE' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Statistical::class, 'PERCENTILE'], + 'functionCall' => [Statistical\Percentiles::class, 'PERCENTILE'], 'argumentCount' => '2', ], 'PERCENTILE.EXC' => [ @@ -1913,12 +1913,12 @@ class Calculation ], 'PERCENTILE.INC' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Statistical::class, 'PERCENTILE'], + 'functionCall' => [Statistical\Percentiles::class, 'PERCENTILE'], 'argumentCount' => '2', ], 'PERCENTRANK' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Statistical::class, 'PERCENTRANK'], + 'functionCall' => [Statistical\Percentiles::class, 'PERCENTRANK'], 'argumentCount' => '2,3', ], 'PERCENTRANK.EXC' => [ @@ -1928,7 +1928,7 @@ class Calculation ], 'PERCENTRANK.INC' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Statistical::class, 'PERCENTRANK'], + 'functionCall' => [Statistical\Percentiles::class, 'PERCENTRANK'], 'argumentCount' => '2,3', ], 'PERMUT' => [ @@ -2018,7 +2018,7 @@ class Calculation ], 'QUARTILE' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Statistical::class, 'QUARTILE'], + 'functionCall' => [Statistical\Percentiles::class, 'QUARTILE'], 'argumentCount' => '2', ], 'QUARTILE.EXC' => [ @@ -2028,7 +2028,7 @@ class Calculation ], 'QUARTILE.INC' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Statistical::class, 'QUARTILE'], + 'functionCall' => [Statistical\Percentiles::class, 'QUARTILE'], 'argumentCount' => '2', ], 'QUOTIENT' => [ @@ -2058,7 +2058,7 @@ class Calculation ], 'RANK' => [ 'category' => Category::CATEGORY_STATISTICAL, - 'functionCall' => [Statistical::class, 'RANK'], + 'functionCall' => [Statistical\Percentiles::class, 'RANK'], 'argumentCount' => '2,3', ], 'RANK.AVG' => [ diff --git a/src/PhpSpreadsheet/Calculation/Statistical.php b/src/PhpSpreadsheet/Calculation/Statistical.php index 01caba14..ce80fa79 100644 --- a/src/PhpSpreadsheet/Calculation/Statistical.php +++ b/src/PhpSpreadsheet/Calculation/Statistical.php @@ -1665,45 +1665,18 @@ class Statistical * Excel Function: * PERCENTILE(value1[,value2[, ...]],entry) * + * @Deprecated 1.18.0 + * + * @see Statistical\Percentiles::PERCENTILE() + * Use the PERCENTILE() method in the Statistical\Percentiles class instead + * * @param mixed $args Data values * * @return float|string The result, or a string containing an error */ public static function PERCENTILE(...$args) { - $aArgs = Functions::flattenArray($args); - - // Calculate - $entry = array_pop($aArgs); - - if ((is_numeric($entry)) && (!is_string($entry))) { - if (($entry < 0) || ($entry > 1)) { - return Functions::NAN(); - } - $mArgs = []; - foreach ($aArgs as $arg) { - // Is it a numeric value? - if ((is_numeric($arg)) && (!is_string($arg))) { - $mArgs[] = $arg; - } - } - $mValueCount = count($mArgs); - if ($mValueCount > 0) { - sort($mArgs); - $count = Counts::COUNT($mArgs); - $index = $entry * ($count - 1); - $iBase = floor($index); - if ($index == $iBase) { - return $mArgs[$index]; - } - $iNext = $iBase + 1; - $iProportion = $index - $iBase; - - return $mArgs[$iBase] + (($mArgs[$iNext] - $mArgs[$iBase]) * $iProportion); - } - } - - return Functions::VALUE(); + return Statistical\Percentiles::PERCENTILE(...$args); } /** @@ -1714,6 +1687,11 @@ class Statistical * rather than floored (as MS Excel), so value 3 for a value set of 1, 2, 3, 4 will return * 0.667 rather than 0.666 * + * @Deprecated 1.18.0 + * + * @see Statistical\Percentiles::PERCENTRANK() + * Use the PERCENTRANK() method in the Statistical\Percentiles class instead + * * @param mixed (float[]) $valueSet An array of, or a reference to, a list of numbers * @param mixed (int) $value the number whose rank you want to find * @param mixed (int) $significance the number of significant digits for the returned percentage value @@ -1722,38 +1700,7 @@ class Statistical */ public static function PERCENTRANK($valueSet, $value, $significance = 3) { - $valueSet = Functions::flattenArray($valueSet); - $value = Functions::flattenSingleValue($value); - $significance = ($significance === null) ? 3 : (int) Functions::flattenSingleValue($significance); - - foreach ($valueSet as $key => $valueEntry) { - if (!is_numeric($valueEntry)) { - unset($valueSet[$key]); - } - } - sort($valueSet, SORT_NUMERIC); - $valueCount = count($valueSet); - if ($valueCount == 0) { - return Functions::NAN(); - } - - $valueAdjustor = $valueCount - 1; - if (($value < $valueSet[0]) || ($value > $valueSet[$valueAdjustor])) { - return Functions::NA(); - } - - $pos = array_search($value, $valueSet); - if ($pos === false) { - $pos = 0; - $testValue = $valueSet[0]; - while ($testValue < $value) { - $testValue = $valueSet[++$pos]; - } - --$pos; - $pos += (($value - $valueSet[$pos]) / ($testValue - $valueSet[$pos])); - } - - return round($pos / $valueAdjustor, $significance); + return Statistical\Percentiles::PERCENTRANK($valueSet, $value, $significance); } /** @@ -1811,27 +1758,18 @@ class Statistical * Excel Function: * QUARTILE(value1[,value2[, ...]],entry) * + * @Deprecated 1.18.0 + * + * @see Statistical\Percentiles::QUARTILE() + * Use the QUARTILE() method in the Statistical\Percentiles class instead + * * @param mixed $args Data values * * @return float|string The result, or a string containing an error */ public static function QUARTILE(...$args) { - $aArgs = Functions::flattenArray($args); - $entry = array_pop($aArgs); - - // Calculate - if ((is_numeric($entry)) && (!is_string($entry))) { - $entry = floor($entry); - $entry /= 4; - if (($entry < 0) || ($entry > 1)) { - return Functions::NAN(); - } - - return self::PERCENTILE($aArgs, $entry); - } - - return Functions::VALUE(); + return Statistical\Percentiles::QUARTILE(...$args); } /** @@ -1839,36 +1777,20 @@ class Statistical * * Returns the rank of a number in a list of numbers. * - * @param int $value the number whose rank you want to find - * @param float[] $valueSet An array of, or a reference to, a list of numbers - * @param int $order Order to sort the values in the value set + * @Deprecated 1.18.0 + * + * @see Statistical\Percentiles::RANK() + * Use the RANK() method in the Statistical\Percentiles class instead + * + * @param mixed (float) $value the number whose rank you want to find + * @param mixed (float[]) $valueSet An array of, or a reference to, a list of numbers + * @param mixed (int) $order Order to sort the values in the value set * * @return float|string The result, or a string containing an error */ public static function RANK($value, $valueSet, $order = 0) { - $value = Functions::flattenSingleValue($value); - $valueSet = Functions::flattenArray($valueSet); - $order = ($order === null) ? 0 : (int) Functions::flattenSingleValue($order); - - foreach ($valueSet as $key => $valueEntry) { - if (!is_numeric($valueEntry)) { - unset($valueSet[$key]); - } - } - - if ($order == 0) { - sort($valueSet, SORT_NUMERIC); - } else { - rsort($valueSet, SORT_NUMERIC); - } - - $pos = array_search($value, $valueSet); - if ($pos === false) { - return Functions::NA(); - } - - return ++$pos; + return Statistical\Percentiles::RANK($value, $valueSet, $order); } /** diff --git a/src/PhpSpreadsheet/Calculation/Statistical/BaseValidations.php b/src/PhpSpreadsheet/Calculation/Statistical/BaseValidations.php new file mode 100644 index 00000000..1dbe4212 --- /dev/null +++ b/src/PhpSpreadsheet/Calculation/Statistical/BaseValidations.php @@ -0,0 +1,27 @@ += 1)) { - return Functions::NAN(); - } - if (($stdDev <= 0) || ($size < 1)) { - return Functions::NAN(); - } - - return Statistical::NORMSINV(1 - $alpha / 2) * $stdDev / sqrt($size); + try { + $alpha = self::validateFloat($alpha); + $stdDev = self::validateFloat($stdDev); + $size = self::validateInt($size); + } catch (Exception $e) { + return $e->getMessage(); } - return Functions::VALUE(); + if (($alpha <= 0) || ($alpha >= 1) || ($stdDev <= 0) || ($size < 1)) { + return Functions::NAN(); + } + + return Statistical::NORMSINV(1 - $alpha / 2) * $stdDev / sqrt($size); } } diff --git a/src/PhpSpreadsheet/Calculation/Statistical/Percentiles.php b/src/PhpSpreadsheet/Calculation/Statistical/Percentiles.php new file mode 100644 index 00000000..0001b7bf --- /dev/null +++ b/src/PhpSpreadsheet/Calculation/Statistical/Percentiles.php @@ -0,0 +1,207 @@ +getMessage(); + } + + if (($entry < 0) || ($entry > 1)) { + return Functions::NAN(); + } + + $mArgs = self::percentileFilterValues($aArgs); + $mValueCount = count($mArgs); + if ($mValueCount > 0) { + sort($mArgs); + $count = Counts::COUNT($mArgs); + $index = $entry * ($count - 1); + $iBase = floor($index); + if ($index == $iBase) { + return $mArgs[$index]; + } + $iNext = $iBase + 1; + $iProportion = $index - $iBase; + + return $mArgs[$iBase] + (($mArgs[$iNext] - $mArgs[$iBase]) * $iProportion); + } + + return Functions::NAN(); + } + + /** + * PERCENTRANK. + * + * Returns the rank of a value in a data set as a percentage of the data set. + * Note that the returned rank is simply rounded to the appropriate significant digits, + * rather than floored (as MS Excel), so value 3 for a value set of 1, 2, 3, 4 will return + * 0.667 rather than 0.666 + * + * @param mixed (float[]) $valueSet An array of, or a reference to, a list of numbers + * @param mixed (int) $value the number whose rank you want to find + * @param mixed (int) $significance the number of significant digits for the returned percentage value + * + * @return float|string (string if result is an error) + */ + public static function PERCENTRANK($valueSet, $value, $significance = 3) + { + $valueSet = Functions::flattenArray($valueSet); + $value = Functions::flattenSingleValue($value); + $significance = ($significance === null) ? 3 : Functions::flattenSingleValue($significance); + + try { + $value = self::validateFloat($value); + $significance = self::validateInt($significance); + } catch (Exception $e) { + return $e->getMessage(); + } + + $valueSet = self::rankFilterValues($valueSet); + $valueCount = count($valueSet); + if ($valueCount == 0) { + return Functions::NA(); + } + sort($valueSet, SORT_NUMERIC); + + $valueAdjustor = $valueCount - 1; + if (($value < $valueSet[0]) || ($value > $valueSet[$valueAdjustor])) { + return Functions::NA(); + } + + $pos = array_search($value, $valueSet); + if ($pos === false) { + $pos = 0; + $testValue = $valueSet[0]; + while ($testValue < $value) { + $testValue = $valueSet[++$pos]; + } + --$pos; + $pos += (($value - $valueSet[$pos]) / ($testValue - $valueSet[$pos])); + } + + return round($pos / $valueAdjustor, $significance); + } + + /** + * QUARTILE. + * + * Returns the quartile of a data set. + * + * Excel Function: + * QUARTILE(value1[,value2[, ...]],entry) + * + * @param mixed $args Data values + * + * @return float|string The result, or a string containing an error + */ + public static function QUARTILE(...$args) + { + $aArgs = Functions::flattenArray($args); + $entry = array_pop($aArgs); + + try { + $entry = self::validateFloat($entry); + } catch (Exception $e) { + return $e->getMessage(); + } + + $entry = floor($entry); + $entry /= 4; + if (($entry < 0) || ($entry > 1)) { + return Functions::NAN(); + } + + return self::PERCENTILE($aArgs, $entry); + } + + /** + * RANK. + * + * Returns the rank of a number in a list of numbers. + * + * @param mixed (float) $value the number whose rank you want to find + * @param mixed (float[]) $valueSet An array of, or a reference to, a list of numbers + * @param mixed (int) $order Order to sort the values in the value set + * + * @return float|string The result, or a string containing an error + */ + public static function RANK($value, $valueSet, $order = self::RANK_SORT_DESCENDING) + { + $value = Functions::flattenSingleValue($value); + $valueSet = Functions::flattenArray($valueSet); + $order = ($order === null) ? self::RANK_SORT_DESCENDING : Functions::flattenSingleValue($order); + + try { + $value = self::validateFloat($value); + $order = self::validateInt($order); + } catch (Exception $e) { + return $e->getMessage(); + } + + $valueSet = self::rankFilterValues($valueSet); + if ($order === self::RANK_SORT_DESCENDING) { + rsort($valueSet, SORT_NUMERIC); + } else { + sort($valueSet, SORT_NUMERIC); + } + + $pos = array_search($value, $valueSet); + if ($pos === false) { + return Functions::NA(); + } + + return ++$pos; + } + + protected static function percentileFilterValues(array $dataSet) + { + return array_filter( + $dataSet, + function ($value): bool { + return is_numeric($value) && !is_string($value); + } + ); + } + + protected static function rankFilterValues(array $dataSet) + { + return array_filter( + $dataSet, + function ($value): bool { + return is_numeric($value); + } + ); + } +} diff --git a/src/PhpSpreadsheet/Calculation/Statistical/Permutations.php b/src/PhpSpreadsheet/Calculation/Statistical/Permutations.php index 343a056c..84cdfea1 100644 --- a/src/PhpSpreadsheet/Calculation/Statistical/Permutations.php +++ b/src/PhpSpreadsheet/Calculation/Statistical/Permutations.php @@ -2,11 +2,14 @@ namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical; +use PhpOffice\PhpSpreadsheet\Calculation\Exception; use PhpOffice\PhpSpreadsheet\Calculation\Functions; use PhpOffice\PhpSpreadsheet\Calculation\MathTrig; class Permutations { + use BaseValidations; + /** * PERMUT. * @@ -26,16 +29,18 @@ class Permutations $numObjs = Functions::flattenSingleValue($numObjs); $numInSet = Functions::flattenSingleValue($numInSet); - if ((is_numeric($numObjs)) && (is_numeric($numInSet))) { - $numInSet = floor($numInSet); - if ($numObjs < $numInSet) { - return Functions::NAN(); - } - - return round(MathTrig\Fact::funcFact($numObjs) / MathTrig\Fact::funcFact($numObjs - $numInSet)); + try { + $numObjs = self::validateInt($numObjs); + $numInSet = self::validateInt($numInSet); + } catch (Exception $e) { + return $e->getMessage(); } - return Functions::VALUE(); + if ($numObjs < $numInSet) { + return Functions::NAN(); + } + + return round(MathTrig\Fact::funcFact($numObjs) / MathTrig\Fact::funcFact($numObjs - $numInSet)); } /** @@ -54,16 +59,17 @@ class Permutations $numObjs = Functions::flattenSingleValue($numObjs); $numInSet = Functions::flattenSingleValue($numInSet); - if ((is_numeric($numObjs)) && (is_numeric($numInSet))) { - $numObjs = floor($numObjs); - $numInSet = floor($numInSet); - if ($numObjs < 0 || $numInSet < 0) { - return Functions::NAN(); - } - - return $numObjs ** $numInSet; + try { + $numObjs = self::validateInt($numObjs); + $numInSet = self::validateInt($numInSet); + } catch (Exception $e) { + return $e->getMessage(); } - return Functions::VALUE(); + if ($numObjs < 0 || $numInSet < 0) { + return Functions::NAN(); + } + + return $numObjs ** $numInSet; } } diff --git a/src/PhpSpreadsheet/Calculation/Statistical/Trends.php b/src/PhpSpreadsheet/Calculation/Statistical/Trends.php index b1dfbaef..8c88c54c 100644 --- a/src/PhpSpreadsheet/Calculation/Statistical/Trends.php +++ b/src/PhpSpreadsheet/Calculation/Statistical/Trends.php @@ -8,6 +8,8 @@ use PhpOffice\PhpSpreadsheet\Shared\Trend\Trend; class Trends { + use BaseValidations; + private static function filterTrendValues(array &$array1, array &$array2): void { foreach ($array1 as $key => $value) { @@ -116,11 +118,9 @@ class Trends public static function FORECAST($xValue, $yValues, $xValues) { $xValue = Functions::flattenSingleValue($xValue); - if (!is_numeric($xValue)) { - return Functions::VALUE(); - } try { + $xValue = self::validateFloat($xValue); self::checkTrendArrays($yValues, $xValues); self::validateTrendArrays($yValues, $xValues); } catch (Exception $e) { diff --git a/tests/data/Calculation/Statistical/PERCENTILE.php b/tests/data/Calculation/Statistical/PERCENTILE.php index 121e49c0..cf08ce88 100644 --- a/tests/data/Calculation/Statistical/PERCENTILE.php +++ b/tests/data/Calculation/Statistical/PERCENTILE.php @@ -25,10 +25,34 @@ return [ 48.4, [10.5, 7.2, 200, 5.4, 8.1, 0.8], ], + [ + 2, + [2, 1, 6, 4, 3, 5, 0.2], + ], + [ + 4, + [2, 1, 6, 4, 3, 5, 0.6], + ], + [ + 3.5, + [2, 1, 6, 4, 3, 5, 0.5], + ], + [ + 5.75, + [2, 1, 6, 4, 3, 5, 0.95], + ], [ '#NUM!', [1, 2, 3, 4, -0.3], ], + [ + '#NUM!', + [1, 2, 3, 4, 1.5], + ], + [ + '#NUM!', + ['A', 'B', 0.5], + ], [ '#VALUE!', [1, 2, 3, 4, 'NaN'], diff --git a/tests/data/Calculation/Statistical/PERCENTRANK.php b/tests/data/Calculation/Statistical/PERCENTRANK.php index 3ab019ac..3787a7ac 100644 --- a/tests/data/Calculation/Statistical/PERCENTRANK.php +++ b/tests/data/Calculation/Statistical/PERCENTRANK.php @@ -56,10 +56,15 @@ return [ 2, ], [ - '#NUM!', + '#VALUE!', ['A', 'B', 'C', 'D'], 'E', ], + [ + '#N/A', + ['A', 'B', 'C', 'D'], + 3, + ], [ '#N/A', [1, 2, 3, 4], diff --git a/tests/data/Calculation/Statistical/PERMUTATIONA.php b/tests/data/Calculation/Statistical/PERMUTATIONA.php index 6bc118b3..701f5eac 100644 --- a/tests/data/Calculation/Statistical/PERMUTATIONA.php +++ b/tests/data/Calculation/Statistical/PERMUTATIONA.php @@ -21,6 +21,14 @@ return [ '#NUM!', -1, 2, ], + [ + '#NUM!', + 1, -2, + ], + [ + '#VALUE!', + 'NaN', 31, + ], [ '#VALUE!', 49, 'NaN', diff --git a/tests/data/Calculation/Statistical/QUARTILE.php b/tests/data/Calculation/Statistical/QUARTILE.php index 80b2bf09..26a7902f 100644 --- a/tests/data/Calculation/Statistical/QUARTILE.php +++ b/tests/data/Calculation/Statistical/QUARTILE.php @@ -37,6 +37,10 @@ return [ 9.25, [7, 8, 9, 10, 3], ], + [ + '#NUM!', + [7, 8, 9, 10, -1], + ], [ '#NUM!', [7, 8, 9, 10, 5], diff --git a/tests/data/Calculation/Statistical/RANK.php b/tests/data/Calculation/Statistical/RANK.php index 0640bb43..6cb60e24 100644 --- a/tests/data/Calculation/Statistical/RANK.php +++ b/tests/data/Calculation/Statistical/RANK.php @@ -1,38 +1,53 @@