Replace manual wildcard logic in MATCH() function with the new WildcardMatch methods (#1919)

* Replace manual wildcard logic in MATCH() function with the new WildcardMatch methods
* Additional unit tests
* Refactor input validations
* Refactor actual search logic into dedicated methods
* Eliminate redundant code
This commit is contained in:
Mark Baker 2021-03-12 18:23:15 +01:00 committed by GitHub
parent 2259de578b
commit baacc83995
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 279 additions and 146 deletions

View File

@ -1616,7 +1616,7 @@ class Calculation
],
'MATCH' => [
'category' => Category::CATEGORY_LOOKUP_AND_REFERENCE,
'functionCall' => [LookupRef::class, 'MATCH'],
'functionCall' => [LookupRef\ExcelMatch::class, 'MATCH'],
'argumentCount' => '2,3',
],
'MAX' => [

View File

@ -10,7 +10,6 @@ use PhpOffice\PhpSpreadsheet\Calculation\LookupRef\RowColumnInformation;
use PhpOffice\PhpSpreadsheet\Calculation\LookupRef\VLookup;
use PhpOffice\PhpSpreadsheet\Cell\Cell;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
class LookupRef
@ -380,145 +379,7 @@ class LookupRef
*/
public static function MATCH($lookupValue, $lookupArray, $matchType = 1)
{
$lookupArray = Functions::flattenArray($lookupArray);
$lookupValue = Functions::flattenSingleValue($lookupValue);
$matchType = ($matchType === null) ? 1 : (int) Functions::flattenSingleValue($matchType);
// MATCH is not case sensitive, so we convert lookup value to be lower cased in case it's string type.
if (is_string($lookupValue)) {
$lookupValue = StringHelper::strToLower($lookupValue);
}
// Lookup_value type has to be number, text, or logical values
if ((!is_numeric($lookupValue)) && (!is_string($lookupValue)) && (!is_bool($lookupValue))) {
return Functions::NA();
}
// Match_type is 0, 1 or -1
if (($matchType !== 0) && ($matchType !== -1) && ($matchType !== 1)) {
return Functions::NA();
}
// Lookup_array should not be empty
$lookupArraySize = count($lookupArray);
if ($lookupArraySize <= 0) {
return Functions::NA();
}
if ($matchType == 1) {
// If match_type is 1 the list has to be processed from last to first
$lookupArray = array_reverse($lookupArray);
$keySet = array_reverse(array_keys($lookupArray));
}
// Lookup_array should contain only number, text, or logical values, or empty (null) cells
foreach ($lookupArray as $i => $lookupArrayValue) {
// check the type of the value
if (
(!is_numeric($lookupArrayValue)) && (!is_string($lookupArrayValue)) &&
(!is_bool($lookupArrayValue)) && ($lookupArrayValue !== null)
) {
return Functions::NA();
}
// Convert strings to lowercase for case-insensitive testing
if (is_string($lookupArrayValue)) {
$lookupArray[$i] = StringHelper::strToLower($lookupArrayValue);
}
if (($lookupArrayValue === null) && (($matchType == 1) || ($matchType == -1))) {
unset($lookupArray[$i]);
}
}
// **
// find the match
// **
if ($matchType === 0 || $matchType === 1) {
foreach ($lookupArray as $i => $lookupArrayValue) {
$typeMatch = ((gettype($lookupValue) === gettype($lookupArrayValue)) || (is_numeric($lookupValue) && is_numeric($lookupArrayValue)));
$exactTypeMatch = $typeMatch && $lookupArrayValue === $lookupValue;
$nonOnlyNumericExactMatch = !$typeMatch && $lookupArrayValue === $lookupValue;
$exactMatch = $exactTypeMatch || $nonOnlyNumericExactMatch;
if ($matchType === 0) {
if ($typeMatch && is_string($lookupValue) && (bool) preg_match('/([\?\*])/', $lookupValue)) {
$splitString = $lookupValue;
$chars = array_map(function ($i) use ($splitString) {
return mb_substr($splitString, $i, 1);
}, range(0, mb_strlen($splitString) - 1));
$length = count($chars);
$pattern = '/^';
for ($j = 0; $j < $length; ++$j) {
if ($chars[$j] === '~') {
if (isset($chars[$j + 1])) {
if ($chars[$j + 1] === '*') {
$pattern .= preg_quote($chars[$j + 1], '/');
++$j;
} elseif ($chars[$j + 1] === '?') {
$pattern .= preg_quote($chars[$j + 1], '/');
++$j;
}
} else {
$pattern .= preg_quote($chars[$j], '/');
}
} elseif ($chars[$j] === '*') {
$pattern .= '.*';
} elseif ($chars[$j] === '?') {
$pattern .= '.{1}';
} else {
$pattern .= preg_quote($chars[$j], '/');
}
}
$pattern .= '$/';
if ((bool) preg_match($pattern, $lookupArrayValue)) {
// exact match
return $i + 1;
}
} elseif ($exactMatch) {
// exact match
return $i + 1;
}
} elseif (($matchType === 1) && $typeMatch && ($lookupArrayValue <= $lookupValue)) {
$i = array_search($i, $keySet);
// The current value is the (first) match
return $i + 1;
}
}
} else {
$maxValueKey = null;
// The basic algorithm is:
// Iterate and keep the highest match until the next element is smaller than the searched value.
// Return immediately if perfect match is found
foreach ($lookupArray as $i => $lookupArrayValue) {
$typeMatch = gettype($lookupValue) === gettype($lookupArrayValue);
$exactTypeMatch = $typeMatch && $lookupArrayValue === $lookupValue;
$nonOnlyNumericExactMatch = !$typeMatch && $lookupArrayValue === $lookupValue;
$exactMatch = $exactTypeMatch || $nonOnlyNumericExactMatch;
if ($exactMatch) {
// Another "special" case. If a perfect match is found,
// the algorithm gives up immediately
return $i + 1;
} elseif ($typeMatch & $lookupArrayValue >= $lookupValue) {
$maxValueKey = $i + 1;
} elseif ($typeMatch & $lookupArrayValue < $lookupValue) {
//Excel algorithm gives up immediately if the first element is smaller than the searched value
break;
}
}
if ($maxValueKey !== null) {
return $maxValueKey;
}
}
// Unsuccessful in finding a match, return #N/A error value
return Functions::NA();
return LookupRef\ExcelMatch::MATCH($lookupValue, $lookupArray, $matchType);
}
/**

View File

@ -0,0 +1,198 @@
<?php
namespace PhpOffice\PhpSpreadsheet\Calculation\LookupRef;
use PhpOffice\PhpSpreadsheet\Calculation\Exception;
use PhpOffice\PhpSpreadsheet\Calculation\Functions;
use PhpOffice\PhpSpreadsheet\Calculation\Internal\WildcardMatch;
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
class ExcelMatch
{
public const MATCHTYPE_SMALLEST_VALUE = -1;
public const MATCHTYPE_FIRST_VALUE = 0;
public const MATCHTYPE_LARGEST_VALUE = 1;
/**
* MATCH.
*
* The MATCH function searches for a specified item in a range of cells
*
* Excel Function:
* =MATCH(lookup_value, lookup_array, [match_type])
*
* @param mixed $lookupValue The value that you want to match in lookup_array
* @param mixed $lookupArray The range of cells being searched
* @param mixed $matchType The number -1, 0, or 1. -1 means above, 0 means exact match, 1 means below.
* If match_type is 1 or -1, the list has to be ordered.
*
* @return int|string The relative position of the found item
*/
public static function MATCH($lookupValue, $lookupArray, $matchType = self::MATCHTYPE_LARGEST_VALUE)
{
$lookupArray = Functions::flattenArray($lookupArray);
$lookupValue = Functions::flattenSingleValue($lookupValue);
$matchType = ($matchType === null)
? self::MATCHTYPE_LARGEST_VALUE
: (int) Functions::flattenSingleValue($matchType);
try {
// Input validation
self::validateLookupValue($lookupValue);
self::validateMatchType($matchType);
self::validateLookupArray($lookupArray);
$keySet = array_keys($lookupArray);
if ($matchType == self::MATCHTYPE_LARGEST_VALUE) {
// If match_type is 1 the list has to be processed from last to first
$lookupArray = array_reverse($lookupArray);
$keySet = array_reverse($keySet);
}
$lookupArray = self::prepareLookupArray($lookupArray, $matchType);
} catch (Exception $e) {
return $e->getMessage();
}
// MATCH() is not case sensitive, so we convert lookup value to be lower cased if it's a string type.
if (is_string($lookupValue)) {
$lookupValue = StringHelper::strToLower($lookupValue);
}
$valueKey = null;
switch ($matchType) {
case self::MATCHTYPE_LARGEST_VALUE:
$valueKey = self::matchLargestValue($lookupArray, $lookupValue, $keySet);
break;
case self::MATCHTYPE_FIRST_VALUE:
$valueKey = self::matchFirstValue($lookupArray, $lookupValue);
break;
case self::MATCHTYPE_SMALLEST_VALUE:
default:
$valueKey = self::matchSmallestValue($lookupArray, $lookupValue);
}
if ($valueKey !== null) {
return ++$valueKey;
}
// Unsuccessful in finding a match, return #N/A error value
return Functions::NA();
}
private static function matchFirstValue($lookupArray, $lookupValue)
{
$wildcardLookup = ((bool) preg_match('/([\?\*])/', $lookupValue));
$wildcard = WildcardMatch::wildcard($lookupValue);
foreach ($lookupArray as $i => $lookupArrayValue) {
$typeMatch = ((gettype($lookupValue) === gettype($lookupArrayValue)) ||
(is_numeric($lookupValue) && is_numeric($lookupArrayValue)));
if (
$typeMatch && is_string($lookupValue) &&
$wildcardLookup && WildcardMatch::compare($lookupArrayValue, $wildcard)
) {
// wildcard match
return $i;
} elseif ($lookupArrayValue === $lookupValue) {
// exact match
return $i;
}
}
return null;
}
private static function matchLargestValue($lookupArray, $lookupValue, $keySet)
{
foreach ($lookupArray as $i => $lookupArrayValue) {
$typeMatch = ((gettype($lookupValue) === gettype($lookupArrayValue)) ||
(is_numeric($lookupValue) && is_numeric($lookupArrayValue)));
if ($typeMatch && ($lookupArrayValue <= $lookupValue)) {
return array_search($i, $keySet);
}
}
return null;
}
private static function matchSmallestValue($lookupArray, $lookupValue)
{
$valueKey = null;
// The basic algorithm is:
// Iterate and keep the highest match until the next element is smaller than the searched value.
// Return immediately if perfect match is found
foreach ($lookupArray as $i => $lookupArrayValue) {
$typeMatch = gettype($lookupValue) === gettype($lookupArrayValue);
if ($lookupArrayValue === $lookupValue) {
// Another "special" case. If a perfect match is found,
// the algorithm gives up immediately
return $i;
} elseif ($typeMatch && $lookupArrayValue >= $lookupValue) {
$valueKey = $i;
} elseif ($typeMatch && $lookupArrayValue < $lookupValue) {
//Excel algorithm gives up immediately if the first element is smaller than the searched value
break;
}
}
return $valueKey;
}
private static function validateLookupValue($lookupValue): void
{
// Lookup_value type has to be number, text, or logical values
if ((!is_numeric($lookupValue)) && (!is_string($lookupValue)) && (!is_bool($lookupValue))) {
throw new Exception(Functions::NA());
}
}
private static function validateMatchType($matchType): void
{
// Match_type is 0, 1 or -1
if (
($matchType !== self::MATCHTYPE_FIRST_VALUE) &&
($matchType !== self::MATCHTYPE_LARGEST_VALUE) && ($matchType !== self::MATCHTYPE_SMALLEST_VALUE)
) {
throw new Exception(Functions::NA());
}
}
private static function validateLookupArray($lookupArray): void
{
// Lookup_array should not be empty
$lookupArraySize = count($lookupArray);
if ($lookupArraySize <= 0) {
throw new Exception(Functions::NA());
}
}
private static function prepareLookupArray($lookupArray, $matchType)
{
// Lookup_array should contain only number, text, or logical values, or empty (null) cells
foreach ($lookupArray as $i => $value) {
// check the type of the value
if ((!is_numeric($value)) && (!is_string($value)) && (!is_bool($value)) && ($value !== null)) {
throw new Exception(Functions::NA());
}
// Convert strings to lowercase for case-insensitive testing
if (is_string($value)) {
$lookupArray[$i] = StringHelper::strToLower($value);
}
if (
($value === null) &&
(($matchType == self::MATCHTYPE_LARGEST_VALUE) || ($matchType == self::MATCHTYPE_SMALLEST_VALUE))
) {
unset($lookupArray[$i]);
}
}
return $lookupArray;
}
}

View File

@ -26,7 +26,6 @@ return [
[2, 0, 0, 3],
0,
],
// Third argument = 1
[
1, // Expected
@ -52,7 +51,6 @@ return [
[2, 0, 0, 3],
1,
],
// Third argument = -1
[
1, // Expected
@ -96,7 +94,12 @@ return [
[8, 8, 3, 2],
-1,
],
[ // Default matchtype
4, // Expected
4, // Input
[2, 0, 0, 3],
null,
],
// match on ranges with empty cells
[
3, // Expected
@ -110,7 +113,6 @@ return [
[1, null, 4, null, null],
1,
],
// 0s are causing errors, because things like 0 == 'x' is true. Thanks PHP!
[
3,
@ -233,7 +235,7 @@ return [
[
2, // Expected
'a*~*c',
['aAAAAA', 'a123456*c', 'az'],
['aAAAAA', 'a123456*c', 'az', 'alembic'],
0,
],
[
@ -272,4 +274,76 @@ return [
[1, 22, 'aaa'],
0,
],
[
'#N/A', // Expected
'abc',
[1, 22, 'aaa'],
0,
],
[
'#N/A', // Expected (Invalid lookup value)
new DateTime('2021-03-11'),
[1, 22, 'aaa'],
1,
],
[
'#N/A', // Expected (Invalid match type)
'abc',
[1, 22, 'aaa'],
123,
],
[
'#N/A', // Expected (Empty lookup array)
'abc',
[],
1,
],
[
8,
'A*e',
['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'],
-1,
],
[
2,
'A*e',
['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'],
0,
],
[
'#N/A',
'A*e',
['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'],
1,
],
[
8,
'A?s*e',
['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'],
-1,
],
[
5,
'A?s*e',
['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'],
0,
],
[
'#N/A',
'A*e',
['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'],
1,
],
[
8,
'*verse',
['Obtuse', 'Amuse', 'Obverse', 'Inverse', 'Assurance', 'Amplitude', 'Adverse', 'Apartment'],
-1,
],
[
3,
'*verse',
['Obtuse', 'Amuse', 'Obverse', 'Inverse', 'Assurance', 'Amplitude', 'Adverse', 'Apartment'],
0,
],
];