Merge pull request #2711 from PHPOffice/LookupRef-Sort-Functions

Implementation of the SORT() and SORTBY() Lookup/Reference functions
This commit is contained in:
Mark Baker 2022-03-24 18:30:41 +01:00 committed by GitHub
commit 11edcc9f96
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 748 additions and 4 deletions

View File

@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
### Added
- Implementation of the FILTER() and UNIQUE() Lookup/Reference (array) function
- Implementation of the FILTER(), SORT(), SORTBY() and UNIQUE() Lookup/Reference (array) functions
- Implementation of the ISREF() Information function.
- Added support for reading "formatted" numeric values from Csv files; although default behaviour of reading these values as strings is preserved.

View File

@ -2282,12 +2282,12 @@ class Calculation
],
'SORT' => [
'category' => Category::CATEGORY_LOOKUP_AND_REFERENCE,
'functionCall' => [Functions::class, 'DUMMY'],
'argumentCount' => '1+',
'functionCall' => [LookupRef\Sort::class, 'sort'],
'argumentCount' => '1-4',
],
'SORTBY' => [
'category' => Category::CATEGORY_LOOKUP_AND_REFERENCE,
'functionCall' => [Functions::class, 'DUMMY'],
'functionCall' => [LookupRef\Sort::class, 'sortBy'],
'argumentCount' => '2+',
],
'SQRT' => [

View File

@ -19,6 +19,8 @@ class Filter
return ExcelError::VALUE();
}
$matchArray = self::enumerateArrayKeys($matchArray);
$result = (Matrix::isColumnVector($matchArray))
? self::filterByRow($lookupArray, $matchArray)
: self::filterByColumn($lookupArray, $matchArray);
@ -30,6 +32,20 @@ class Filter
return array_values($result);
}
private static function enumerateArrayKeys(array $sortArray): array
{
array_walk(
$sortArray,
function (&$columns): void {
if (is_array($columns)) {
$columns = array_values($columns);
}
}
);
return array_values($sortArray);
}
private static function filterByRow(array $lookupArray, array $matchArray): array
{
$matchArray = array_values(array_column($matchArray, 0));

View File

@ -0,0 +1,342 @@
<?php
namespace PhpOffice\PhpSpreadsheet\Calculation\LookupRef;
use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
use PhpOffice\PhpSpreadsheet\Calculation\Exception;
use PhpOffice\PhpSpreadsheet\Calculation\Functions;
use PhpOffice\PhpSpreadsheet\Calculation\Information\ExcelError;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
class Sort extends LookupRefValidations
{
public const ORDER_ASCENDING = 1;
public const ORDER_DESCENDING = -1;
/**
* SORT
* The SORT function returns a sorted array of the elements in an array.
* The returned array is the same shape as the provided array argument.
* Both $sortIndex and $sortOrder can be arrays, to provide multi-level sorting.
*
* @param mixed $sortArray The range of cells being sorted
* @param mixed $sortIndex The column or row number within the sortArray to sort on
* @param mixed $sortOrder Flag indicating whether to sort ascending or descending
* Ascending = 1 (self::ORDER_ASCENDING)
* Descending = -1 (self::ORDER_DESCENDING)
* @param mixed $byColumn Whether the sort should be determined by row (the default) or by column
*
* @return mixed The sorted values from the sort range
*/
public static function sort($sortArray, $sortIndex = 1, $sortOrder = self::ORDER_ASCENDING, $byColumn = false)
{
if (!is_array($sortArray)) {
// Scalars are always returned "as is"
return $sortArray;
}
$sortArray = self::enumerateArrayKeys($sortArray);
$byColumn = (bool) $byColumn;
$lookupIndexSize = $byColumn ? count($sortArray) : count($sortArray[0]);
try {
// If $sortIndex and $sortOrder are scalars, then convert them into arrays
if (is_scalar($sortIndex)) {
$sortIndex = [$sortIndex];
$sortOrder = is_scalar($sortOrder) ? [$sortOrder] : $sortOrder;
}
// but the values of those array arguments still need validation
$sortOrder = (empty($sortOrder) ? [self::ORDER_ASCENDING] : $sortOrder);
self::validateArrayArgumentsForSort($sortIndex, $sortOrder, $lookupIndexSize);
} catch (Exception $e) {
return $e->getMessage();
}
// We want a simple, enumrated array of arrays where we can reference column by its index number.
$sortArray = array_values(array_map('array_values', $sortArray));
return ($byColumn === true)
? self::sortByColumn($sortArray, $sortIndex, $sortOrder)
: self::sortByRow($sortArray, $sortIndex, $sortOrder);
}
/**
* SORTBY
* The SORTBY function sorts the contents of a range or array based on the values in a corresponding range or array.
* The returned array is the same shape as the provided array argument.
* Both $sortIndex and $sortOrder can be arrays, to provide multi-level sorting.
*
* @param mixed $sortArray The range of cells being sorted
* @param mixed $args
* At least one additional argument must be provided, The vector or range to sort on
* After that, arguments are passed as pairs:
* sort order: ascending or descending
* Ascending = 1 (self::ORDER_ASCENDING)
* Descending = -1 (self::ORDER_DESCENDING)
* additional arrays or ranges for multi-level sorting
*
* @return mixed The sorted values from the sort range
*/
public static function sortBy($sortArray, ...$args)
{
if (!is_array($sortArray)) {
// Scalars are always returned "as is"
return $sortArray;
}
$sortArray = self::enumerateArrayKeys($sortArray);
$lookupArraySize = count($sortArray);
$argumentCount = count($args);
try {
$sortBy = $sortOrder = [];
for ($i = 0; $i < $argumentCount; $i += 2) {
$sortBy[] = self::validateSortVector($args[$i], $lookupArraySize);
$sortOrder[] = self::validateSortOrder($args[$i + 1] ?? self::ORDER_ASCENDING);
}
} catch (Exception $e) {
return $e->getMessage();
}
return self::processSortBy($sortArray, $sortBy, $sortOrder);
}
private static function enumerateArrayKeys(array $sortArray): array
{
array_walk(
$sortArray,
function (&$columns): void {
if (is_array($columns)) {
$columns = array_values($columns);
}
}
);
return array_values($sortArray);
}
/**
* @param mixed $sortIndex
* @param mixed $sortOrder
*/
private static function validateScalarArgumentsForSort(&$sortIndex, &$sortOrder, int $sortArraySize): void
{
if (is_array($sortIndex) || is_array($sortOrder)) {
throw new Exception(ExcelError::VALUE());
}
$sortIndex = self::validatePositiveInt($sortIndex, false);
if ($sortIndex > $sortArraySize) {
throw new Exception(ExcelError::VALUE());
}
$sortOrder = self::validateSortOrder($sortOrder);
}
/**
* @param mixed $sortVector
*/
private static function validateSortVector($sortVector, int $sortArraySize): array
{
if (!is_array($sortVector)) {
throw new Exception(ExcelError::VALUE());
}
// It doesn't matter if it's a row or a column vectors, it works either way
$sortVector = Functions::flattenArray($sortVector);
if (count($sortVector) !== $sortArraySize) {
throw new Exception(ExcelError::VALUE());
}
return $sortVector;
}
/**
* @param mixed $sortOrder
*/
private static function validateSortOrder($sortOrder): int
{
$sortOrder = self::validateInt($sortOrder);
if (($sortOrder == self::ORDER_ASCENDING || $sortOrder === self::ORDER_DESCENDING) === false) {
throw new Exception(ExcelError::VALUE());
}
return $sortOrder;
}
/**
* @param array $sortIndex
* @param mixed $sortOrder
*/
private static function validateArrayArgumentsForSort(&$sortIndex, &$sortOrder, int $sortArraySize): void
{
// It doesn't matter if they're row or column vectors, it works either way
$sortIndex = Functions::flattenArray($sortIndex);
$sortOrder = Functions::flattenArray($sortOrder);
if (
count($sortOrder) === 0 || count($sortOrder) > $sortArraySize ||
(count($sortOrder) > count($sortIndex))
) {
throw new Exception(ExcelError::VALUE());
}
if (count($sortIndex) > count($sortOrder)) {
// If $sortOrder has fewer elements than $sortIndex, then the last order element is repeated.
$sortOrder = array_merge(
$sortOrder,
array_fill(0, count($sortIndex) - count($sortOrder), array_pop($sortOrder))
);
}
foreach ($sortIndex as $key => &$value) {
self::validateScalarArgumentsForSort($value, $sortOrder[$key], $sortArraySize);
}
}
private static function prepareSortVectorValues(array $sortVector): array
{
// Strings should be sorted case-insensitive; with booleans converted to locale-strings
return array_map(
function ($value) {
if (is_bool($value)) {
return ($value) ? Calculation::getTRUE() : Calculation::getFALSE();
} elseif (is_string($value)) {
return StringHelper::strToLower($value);
}
return $value;
},
$sortVector
);
}
/**
* @param array[] $sortIndex
* @param int[] $sortOrder
*/
private static function processSortBy(array $sortArray, array $sortIndex, $sortOrder): array
{
$sortArguments = [];
$sortData = [];
foreach ($sortIndex as $index => $sortValues) {
$sortData[] = $sortValues;
$sortArguments[] = self::prepareSortVectorValues($sortValues);
$sortArguments[] = $sortOrder[$index] === self::ORDER_ASCENDING ? SORT_ASC : SORT_DESC;
}
$sortArguments = self::applyPHP7Patch($sortArray, $sortArguments);
$sortVector = self::executeVectorSortQuery($sortData, $sortArguments);
return self::sortLookupArrayFromVector($sortArray, $sortVector);
}
/**
* @param int[] $sortIndex
* @param int[] $sortOrder
*/
private static function sortByRow(array $sortArray, array $sortIndex, array $sortOrder): array
{
$sortVector = self::buildVectorForSort($sortArray, $sortIndex, $sortOrder);
return self::sortLookupArrayFromVector($sortArray, $sortVector);
}
/**
* @param int[] $sortIndex
* @param int[] $sortOrder
*/
private static function sortByColumn(array $sortArray, array $sortIndex, array $sortOrder): array
{
$sortArray = Matrix::transpose($sortArray);
$result = self::sortByRow($sortArray, $sortIndex, $sortOrder);
return Matrix::transpose($result);
}
/**
* @param int[] $sortIndex
* @param int[] $sortOrder
*/
private static function buildVectorForSort(array $sortArray, array $sortIndex, array $sortOrder): array
{
$sortArguments = [];
$sortData = [];
foreach ($sortIndex as $index => $sortIndexValue) {
$sortValues = array_column($sortArray, $sortIndexValue - 1);
$sortData[] = $sortValues;
$sortArguments[] = self::prepareSortVectorValues($sortValues);
$sortArguments[] = $sortOrder[$index] === self::ORDER_ASCENDING ? SORT_ASC : SORT_DESC;
}
$sortArguments = self::applyPHP7Patch($sortArray, $sortArguments);
$sortData = self::executeVectorSortQuery($sortData, $sortArguments);
return $sortData;
}
private static function executeVectorSortQuery(array $sortData, array $sortArguments): array
{
$sortData = Matrix::transpose($sortData);
// We need to set an index that can be retained, as array_multisort doesn't maintain numeric keys.
$sortDataIndexed = [];
foreach ($sortData as $key => $value) {
$sortDataIndexed[Coordinate::stringFromColumnIndex($key + 1)] = $value;
}
unset($sortData);
$sortArguments[] = &$sortDataIndexed;
array_multisort(...$sortArguments);
// After the sort, we restore the numeric keys that will now be in the correct, sorted order
$sortedData = [];
foreach (array_keys($sortDataIndexed) as $key) {
$sortedData[] = Coordinate::columnIndexFromString($key) - 1;
}
return $sortedData;
}
private static function sortLookupArrayFromVector(array $sortArray, array $sortVector): array
{
// Building a new array in the correct (sorted) order works; but may be memory heavy for larger arrays
$sortedArray = [];
foreach ($sortVector as $index) {
$sortedArray[] = $sortArray[$index];
}
return $sortedArray;
// uksort(
// $lookupArray,
// function (int $a, int $b) use (array $sortVector) {
// return $sortVector[$a] <=> $sortVector[$b];
// }
// );
//
// return $lookupArray;
}
/**
* Hack to handle PHP 7:
* From PHP 8.0.0, If two members compare as equal in a sort, they retain their original order;
* but prior to PHP 8.0.0, their relative order in the sorted array was undefined.
* MS Excel replicates the PHP 8.0.0 behaviour, retaining the original order of matching elements.
* To replicate that behaviour with PHP 7, we add an extra sort based on the row index.
*/
private static function applyPHP7Patch(array $sortArray, array $sortArguments): array
{
if (PHP_VERSION_ID < 80000) {
$sortArguments[] = range(1, count($sortArray));
$sortArguments[] = SORT_ASC;
}
return $sortArguments;
}
}

View File

@ -0,0 +1,176 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Calculation\Functions\LookupRef;
use PhpOffice\PhpSpreadsheet\Calculation\Information\ExcelError;
use PhpOffice\PhpSpreadsheet\Calculation\LookupRef\Sort;
use PHPUnit\Framework\TestCase;
class SortByTest extends TestCase
{
public function testSortOnScalar(): void
{
$value = 'NON-ARRAY';
$result = Sort::sortBy($value);
self::assertSame($value, $result);
}
/**
* @dataProvider providerSortWithScalarArgumentErrorReturns
*
* @param mixed $sortIndex
* @param mixed$sortOrder
*/
public function testSortByWithArgumentErrorReturns($sortIndex, $sortOrder = 1): void
{
$value = [[1, 2], [3, 4], [5, 6]];
$result = Sort::sortBy($value, $sortIndex, $sortOrder);
self::assertSame(ExcelError::VALUE(), $result);
}
public function providerSortWithScalarArgumentErrorReturns(): array
{
return [
'Non-array sortIndex' => ['A', 1],
'Mismatched sortIndex count' => [[1, 2, 3, 4], 1],
'Non-numeric sortOrder' => [[1, 2, 3], 'A'],
'Invalid negative sortOrder' => [[1, 2, 3], -2],
'Zero sortOrder' => [[1, 2, 3], 0],
'Invalid positive sortOrder' => [[1, 2, 3], 2],
];
}
/**
* @dataProvider providerSortByRow
*/
public function testSortByRow(array $expectedResult, array $matrix, ...$args): void
{
$result = Sort::sortBy($matrix, ...$args);
self::assertSame($expectedResult, $result);
}
public function providerSortByRow(): array
{
return [
'Simple sort by age' => [
[
['Fritz', 19],
['Xi', 19],
['Amy', 22],
['Srivan', 39],
['Tom', 52],
['Fred', 65],
['Hector', 66],
['Sal', 73],
],
$this->sampleDataForSimpleSort(),
array_column($this->sampleDataForSimpleSort(), 1),
],
'Simple sort by name' => [
[
['Amy', 22],
['Fred', 65],
['Fritz', 19],
['Hector', 66],
['Sal', 73],
['Srivan', 39],
['Tom', 52],
['Xi', 19],
],
$this->sampleDataForSimpleSort(),
array_column($this->sampleDataForSimpleSort(), 0),
],
'Row vector' => [
[
['Amy', 22],
['Fred', 65],
['Fritz', 19],
['Hector', 66],
['Sal', 73],
['Srivan', 39],
['Tom', 52],
['Xi', 19],
],
$this->sampleDataForSimpleSort(),
['Tom', 'Fred', 'Amy', 'Sal', 'Fritz', 'Srivan', 'Xi', 'Hector'],
],
'Column vector' => [
[
['Amy', 22],
['Fred', 65],
['Fritz', 19],
['Hector', 66],
['Sal', 73],
['Srivan', 39],
['Tom', 52],
['Xi', 19],
],
$this->sampleDataForSimpleSort(),
[['Tom'], ['Fred'], ['Amy'], ['Sal'], ['Fritz'], ['Srivan'], ['Xi'], ['Hector']],
],
'Sort by region asc, name asc' => [
[
['East', 'Fritz', 19],
['East', 'Tom', 52],
['North', 'Amy', 22],
['North', 'Xi', 19],
['South', 'Hector', 66],
['South', 'Sal', 73],
['West', 'Fred', 65],
['West', 'Srivan', 39],
],
$this->sampleDataForMultiSort(),
array_column($this->sampleDataForMultiSort(), 0),
Sort::ORDER_ASCENDING,
array_column($this->sampleDataForMultiSort(), 1),
],
'Sort by region asc, age desc' => [
[
['East', 'Tom', 52],
['East', 'Fritz', 19],
['North', 'Amy', 22],
['North', 'Xi', 19],
['South', 'Sal', 73],
['South', 'Hector', 66],
['West', 'Fred', 65],
['West', 'Srivan', 39],
],
$this->sampleDataForMultiSort(),
array_column($this->sampleDataForMultiSort(), 0),
Sort::ORDER_ASCENDING,
array_column($this->sampleDataForMultiSort(), 2),
Sort::ORDER_DESCENDING,
],
];
}
private function sampleDataForSimpleSort(): array
{
return [
['Tom', 52],
['Fred', 65],
['Amy', 22],
['Sal', 73],
['Fritz', 19],
['Srivan', 39],
['Xi', 19],
['Hector', 66],
];
}
private function sampleDataForMultiSort(): array
{
return [
['North', 'Amy', 22],
['West', 'Fred', 65],
['East', 'Fritz', 19],
['South', 'Hector', 66],
['South', 'Sal', 73],
['West', 'Srivan', 39],
['East', 'Tom', 52],
['North', 'Xi', 19],
];
}
}

View File

@ -0,0 +1,210 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Calculation\Functions\LookupRef;
use PhpOffice\PhpSpreadsheet\Calculation\Information\ExcelError;
use PhpOffice\PhpSpreadsheet\Calculation\LookupRef\Sort;
use PHPUnit\Framework\TestCase;
class SortTest extends TestCase
{
public function testSortOnScalar(): void
{
$value = 'NON-ARRAY';
$result = Sort::sort($value, 1, -1);
self::assertSame($value, $result);
}
/**
* @dataProvider providerSortWithScalarArgumentErrorReturns
*
* @param mixed $sortIndex
* @param mixed$sortOrder
*/
public function testSortWithScalarArgumentErrorReturns($sortIndex, $sortOrder = 1): void
{
$value = [[1, 2], [3, 4], [5, 6]];
$result = Sort::sort($value, $sortIndex, $sortOrder);
self::assertSame(ExcelError::VALUE(), $result);
}
public function providerSortWithScalarArgumentErrorReturns(): array
{
return [
'Negative sortIndex' => [-1, -1],
'Non-numeric sortIndex' => ['A', -1],
'Zero sortIndex' => [0, -1],
'Too high sortIndex' => [3, -1],
'Non-numeric sortOrder' => [1, 'A'],
'Invalid negative sortOrder' => [1, -2],
'Zero sortOrder' => [1, 0],
'Invalid positive sortOrder' => [1, 2],
'Too many sortOrders (scalar and array)' => [1, [-1, 1]],
'Too many sortOrders (both array)' => [[1, 2], [1, 2, 3]],
'Zero positive sortIndex in vector' => [[0, 1]],
'Too high sortIndex in vector' => [[1, 3]],
'Invalid sortOrder in vector' => [[1, 2], [1, -2]],
];
}
/**
* @dataProvider providerSortByRow
*/
public function testSortByRow(array $expectedResult, array $matrix, int $sortIndex, int $sortOrder = Sort::ORDER_ASCENDING): void
{
$result = Sort::sort($matrix, $sortIndex, $sortOrder);
self::assertSame($expectedResult, $result);
}
public function providerSortByRow(): array
{
return [
[
[[142], [378], [404], [445], [483], [622], [650], [691], [783], [961]],
$this->sampleDataForRow(),
1,
],
[
[[961], [783], [691], [650], [622], [483], [445], [404], [378], [142]],
$this->sampleDataForRow(),
1,
Sort::ORDER_DESCENDING,
],
[
[['Peaches', 25], ['Cherries', 29], ['Grapes', 31], ['Lemons', 34], ['Oranges', 36], ['Apples', 38], ['Pears', 40]],
[['Apples', 38], ['Cherries', 29], ['Grapes', 31], ['Lemons', 34], ['Oranges', 36], ['Peaches', 25], ['Pears', 40]],
2,
],
];
}
/**
* @dataProvider providerSortByRowMultiLevel
*/
public function testSortByRowMultiLevel(array $expectedResult, array $matrix, array $sortIndex, int $sortOrder = Sort::ORDER_ASCENDING): void
{
$result = Sort::sort($matrix, $sortIndex, $sortOrder);
self::assertSame($expectedResult, $result);
}
public function providerSortByRowMultiLevel(): array
{
return [
[
[
['East', 'Grapes', 31],
['East', 'Lemons', 36],
['North', 'Cherries', 29],
['North', 'Grapes', 27],
['North', 'Peaches', 25],
['South', 'Apples', 38],
['South', 'Cherries', 28],
['South', 'Oranges', 36],
['South', 'Pears', 40],
['West', 'Apples', 30],
['West', 'Lemons', 34],
['West', 'Oranges', 25],
],
$this->sampleDataForMultiRow(),
[1, 2],
],
[
[
['East', 'Grapes', 31],
['East', 'Lemons', 36],
['North', 'Peaches', 25],
['North', 'Grapes', 27],
['North', 'Cherries', 29],
['South', 'Cherries', 28],
['South', 'Oranges', 36],
['South', 'Apples', 38],
['South', 'Pears', 40],
['West', 'Oranges', 25],
['West', 'Apples', 30],
['West', 'Lemons', 34],
],
$this->sampleDataForMultiRow(),
[1, 3],
],
[
[
['West', 'Apples', 30],
['South', 'Apples', 38],
['South', 'Cherries', 28],
['North', 'Cherries', 29],
['North', 'Grapes', 27],
['East', 'Grapes', 31],
['West', 'Lemons', 34],
['East', 'Lemons', 36],
['West', 'Oranges', 25],
['South', 'Oranges', 36],
['North', 'Peaches', 25],
['South', 'Pears', 40],
],
$this->sampleDataForMultiRow(),
[2, 3],
],
];
}
/**
* @dataProvider providerSortByColumn
*/
public function testSortByColumn(array $expectedResult, array $matrix, int $sortIndex, int $sortOrder): void
{
$result = Sort::sort($matrix, $sortIndex, $sortOrder, true);
self::assertSame($expectedResult, $result);
}
public function providerSortByColumn(): array
{
return [
[
[[142, 378, 404, 445, 483, 622, 650, 691, 783, 961]],
$this->sampleDataForColumn(),
1,
Sort::ORDER_ASCENDING,
],
[
[[961, 783, 691, 650, 622, 483, 445, 404, 378, 142]],
$this->sampleDataForColumn(),
1,
Sort::ORDER_DESCENDING,
],
];
}
public function sampleDataForRow(): array
{
return [
[622], [961], [691], [445], [378], [483], [650], [783], [142], [404],
];
}
public function sampleDataForMultiRow(): array
{
return [
['South', 'Pears', 40],
['South', 'Apples', 38],
['South', 'Oranges', 36],
['East', 'Lemons', 36],
['West', 'Lemons', 34],
['East', 'Grapes', 31],
['West', 'Apples', 30],
['North', 'Cherries', 29],
['South', 'Cherries', 28],
['North', 'Grapes', 27],
['North', 'Peaches', 25],
['West', 'Oranges', 25],
];
}
public function sampleDataForColumn(): array
{
return [
[622, 961, 691, 445, 378, 483, 650, 783, 142, 404],
];
}
}