Replace manual wildcard logic in MATCH() function with the new WildcardMatch methods (#1919)
* Replace manual wildcard logic in MATCH() function with the new WildcardMatch methods * Additional unit tests * Refactor input validations * Refactor actual search logic into dedicated methods * Eliminate redundant code
This commit is contained in:
parent
2259de578b
commit
baacc83995
|
|
@ -1616,7 +1616,7 @@ class Calculation
|
|||
],
|
||||
'MATCH' => [
|
||||
'category' => Category::CATEGORY_LOOKUP_AND_REFERENCE,
|
||||
'functionCall' => [LookupRef::class, 'MATCH'],
|
||||
'functionCall' => [LookupRef\ExcelMatch::class, 'MATCH'],
|
||||
'argumentCount' => '2,3',
|
||||
],
|
||||
'MAX' => [
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ use PhpOffice\PhpSpreadsheet\Calculation\LookupRef\RowColumnInformation;
|
|||
use PhpOffice\PhpSpreadsheet\Calculation\LookupRef\VLookup;
|
||||
use PhpOffice\PhpSpreadsheet\Cell\Cell;
|
||||
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
|
||||
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
|
||||
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
|
||||
|
||||
class LookupRef
|
||||
|
|
@ -380,145 +379,7 @@ class LookupRef
|
|||
*/
|
||||
public static function MATCH($lookupValue, $lookupArray, $matchType = 1)
|
||||
{
|
||||
$lookupArray = Functions::flattenArray($lookupArray);
|
||||
$lookupValue = Functions::flattenSingleValue($lookupValue);
|
||||
$matchType = ($matchType === null) ? 1 : (int) Functions::flattenSingleValue($matchType);
|
||||
|
||||
// MATCH is not case sensitive, so we convert lookup value to be lower cased in case it's string type.
|
||||
if (is_string($lookupValue)) {
|
||||
$lookupValue = StringHelper::strToLower($lookupValue);
|
||||
}
|
||||
|
||||
// Lookup_value type has to be number, text, or logical values
|
||||
if ((!is_numeric($lookupValue)) && (!is_string($lookupValue)) && (!is_bool($lookupValue))) {
|
||||
return Functions::NA();
|
||||
}
|
||||
|
||||
// Match_type is 0, 1 or -1
|
||||
if (($matchType !== 0) && ($matchType !== -1) && ($matchType !== 1)) {
|
||||
return Functions::NA();
|
||||
}
|
||||
|
||||
// Lookup_array should not be empty
|
||||
$lookupArraySize = count($lookupArray);
|
||||
if ($lookupArraySize <= 0) {
|
||||
return Functions::NA();
|
||||
}
|
||||
|
||||
if ($matchType == 1) {
|
||||
// If match_type is 1 the list has to be processed from last to first
|
||||
|
||||
$lookupArray = array_reverse($lookupArray);
|
||||
$keySet = array_reverse(array_keys($lookupArray));
|
||||
}
|
||||
|
||||
// Lookup_array should contain only number, text, or logical values, or empty (null) cells
|
||||
foreach ($lookupArray as $i => $lookupArrayValue) {
|
||||
// check the type of the value
|
||||
if (
|
||||
(!is_numeric($lookupArrayValue)) && (!is_string($lookupArrayValue)) &&
|
||||
(!is_bool($lookupArrayValue)) && ($lookupArrayValue !== null)
|
||||
) {
|
||||
return Functions::NA();
|
||||
}
|
||||
// Convert strings to lowercase for case-insensitive testing
|
||||
if (is_string($lookupArrayValue)) {
|
||||
$lookupArray[$i] = StringHelper::strToLower($lookupArrayValue);
|
||||
}
|
||||
if (($lookupArrayValue === null) && (($matchType == 1) || ($matchType == -1))) {
|
||||
unset($lookupArray[$i]);
|
||||
}
|
||||
}
|
||||
|
||||
// **
|
||||
// find the match
|
||||
// **
|
||||
|
||||
if ($matchType === 0 || $matchType === 1) {
|
||||
foreach ($lookupArray as $i => $lookupArrayValue) {
|
||||
$typeMatch = ((gettype($lookupValue) === gettype($lookupArrayValue)) || (is_numeric($lookupValue) && is_numeric($lookupArrayValue)));
|
||||
$exactTypeMatch = $typeMatch && $lookupArrayValue === $lookupValue;
|
||||
$nonOnlyNumericExactMatch = !$typeMatch && $lookupArrayValue === $lookupValue;
|
||||
$exactMatch = $exactTypeMatch || $nonOnlyNumericExactMatch;
|
||||
|
||||
if ($matchType === 0) {
|
||||
if ($typeMatch && is_string($lookupValue) && (bool) preg_match('/([\?\*])/', $lookupValue)) {
|
||||
$splitString = $lookupValue;
|
||||
$chars = array_map(function ($i) use ($splitString) {
|
||||
return mb_substr($splitString, $i, 1);
|
||||
}, range(0, mb_strlen($splitString) - 1));
|
||||
|
||||
$length = count($chars);
|
||||
$pattern = '/^';
|
||||
for ($j = 0; $j < $length; ++$j) {
|
||||
if ($chars[$j] === '~') {
|
||||
if (isset($chars[$j + 1])) {
|
||||
if ($chars[$j + 1] === '*') {
|
||||
$pattern .= preg_quote($chars[$j + 1], '/');
|
||||
++$j;
|
||||
} elseif ($chars[$j + 1] === '?') {
|
||||
$pattern .= preg_quote($chars[$j + 1], '/');
|
||||
++$j;
|
||||
}
|
||||
} else {
|
||||
$pattern .= preg_quote($chars[$j], '/');
|
||||
}
|
||||
} elseif ($chars[$j] === '*') {
|
||||
$pattern .= '.*';
|
||||
} elseif ($chars[$j] === '?') {
|
||||
$pattern .= '.{1}';
|
||||
} else {
|
||||
$pattern .= preg_quote($chars[$j], '/');
|
||||
}
|
||||
}
|
||||
|
||||
$pattern .= '$/';
|
||||
if ((bool) preg_match($pattern, $lookupArrayValue)) {
|
||||
// exact match
|
||||
return $i + 1;
|
||||
}
|
||||
} elseif ($exactMatch) {
|
||||
// exact match
|
||||
return $i + 1;
|
||||
}
|
||||
} elseif (($matchType === 1) && $typeMatch && ($lookupArrayValue <= $lookupValue)) {
|
||||
$i = array_search($i, $keySet);
|
||||
|
||||
// The current value is the (first) match
|
||||
return $i + 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$maxValueKey = null;
|
||||
|
||||
// The basic algorithm is:
|
||||
// Iterate and keep the highest match until the next element is smaller than the searched value.
|
||||
// Return immediately if perfect match is found
|
||||
foreach ($lookupArray as $i => $lookupArrayValue) {
|
||||
$typeMatch = gettype($lookupValue) === gettype($lookupArrayValue);
|
||||
$exactTypeMatch = $typeMatch && $lookupArrayValue === $lookupValue;
|
||||
$nonOnlyNumericExactMatch = !$typeMatch && $lookupArrayValue === $lookupValue;
|
||||
$exactMatch = $exactTypeMatch || $nonOnlyNumericExactMatch;
|
||||
|
||||
if ($exactMatch) {
|
||||
// Another "special" case. If a perfect match is found,
|
||||
// the algorithm gives up immediately
|
||||
return $i + 1;
|
||||
} elseif ($typeMatch & $lookupArrayValue >= $lookupValue) {
|
||||
$maxValueKey = $i + 1;
|
||||
} elseif ($typeMatch & $lookupArrayValue < $lookupValue) {
|
||||
//Excel algorithm gives up immediately if the first element is smaller than the searched value
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($maxValueKey !== null) {
|
||||
return $maxValueKey;
|
||||
}
|
||||
}
|
||||
|
||||
// Unsuccessful in finding a match, return #N/A error value
|
||||
return Functions::NA();
|
||||
return LookupRef\ExcelMatch::MATCH($lookupValue, $lookupArray, $matchType);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -0,0 +1,198 @@
|
|||
<?php
|
||||
|
||||
namespace PhpOffice\PhpSpreadsheet\Calculation\LookupRef;
|
||||
|
||||
use PhpOffice\PhpSpreadsheet\Calculation\Exception;
|
||||
use PhpOffice\PhpSpreadsheet\Calculation\Functions;
|
||||
use PhpOffice\PhpSpreadsheet\Calculation\Internal\WildcardMatch;
|
||||
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
|
||||
|
||||
class ExcelMatch
|
||||
{
|
||||
public const MATCHTYPE_SMALLEST_VALUE = -1;
|
||||
public const MATCHTYPE_FIRST_VALUE = 0;
|
||||
public const MATCHTYPE_LARGEST_VALUE = 1;
|
||||
|
||||
/**
|
||||
* MATCH.
|
||||
*
|
||||
* The MATCH function searches for a specified item in a range of cells
|
||||
*
|
||||
* Excel Function:
|
||||
* =MATCH(lookup_value, lookup_array, [match_type])
|
||||
*
|
||||
* @param mixed $lookupValue The value that you want to match in lookup_array
|
||||
* @param mixed $lookupArray The range of cells being searched
|
||||
* @param mixed $matchType The number -1, 0, or 1. -1 means above, 0 means exact match, 1 means below.
|
||||
* If match_type is 1 or -1, the list has to be ordered.
|
||||
*
|
||||
* @return int|string The relative position of the found item
|
||||
*/
|
||||
public static function MATCH($lookupValue, $lookupArray, $matchType = self::MATCHTYPE_LARGEST_VALUE)
|
||||
{
|
||||
$lookupArray = Functions::flattenArray($lookupArray);
|
||||
$lookupValue = Functions::flattenSingleValue($lookupValue);
|
||||
$matchType = ($matchType === null)
|
||||
? self::MATCHTYPE_LARGEST_VALUE
|
||||
: (int) Functions::flattenSingleValue($matchType);
|
||||
|
||||
try {
|
||||
// Input validation
|
||||
self::validateLookupValue($lookupValue);
|
||||
self::validateMatchType($matchType);
|
||||
self::validateLookupArray($lookupArray);
|
||||
|
||||
$keySet = array_keys($lookupArray);
|
||||
if ($matchType == self::MATCHTYPE_LARGEST_VALUE) {
|
||||
// If match_type is 1 the list has to be processed from last to first
|
||||
$lookupArray = array_reverse($lookupArray);
|
||||
$keySet = array_reverse($keySet);
|
||||
}
|
||||
|
||||
$lookupArray = self::prepareLookupArray($lookupArray, $matchType);
|
||||
} catch (Exception $e) {
|
||||
return $e->getMessage();
|
||||
}
|
||||
|
||||
// MATCH() is not case sensitive, so we convert lookup value to be lower cased if it's a string type.
|
||||
if (is_string($lookupValue)) {
|
||||
$lookupValue = StringHelper::strToLower($lookupValue);
|
||||
}
|
||||
|
||||
$valueKey = null;
|
||||
switch ($matchType) {
|
||||
case self::MATCHTYPE_LARGEST_VALUE:
|
||||
$valueKey = self::matchLargestValue($lookupArray, $lookupValue, $keySet);
|
||||
|
||||
break;
|
||||
case self::MATCHTYPE_FIRST_VALUE:
|
||||
$valueKey = self::matchFirstValue($lookupArray, $lookupValue);
|
||||
|
||||
break;
|
||||
case self::MATCHTYPE_SMALLEST_VALUE:
|
||||
default:
|
||||
$valueKey = self::matchSmallestValue($lookupArray, $lookupValue);
|
||||
}
|
||||
|
||||
if ($valueKey !== null) {
|
||||
return ++$valueKey;
|
||||
}
|
||||
|
||||
// Unsuccessful in finding a match, return #N/A error value
|
||||
return Functions::NA();
|
||||
}
|
||||
|
||||
private static function matchFirstValue($lookupArray, $lookupValue)
|
||||
{
|
||||
$wildcardLookup = ((bool) preg_match('/([\?\*])/', $lookupValue));
|
||||
$wildcard = WildcardMatch::wildcard($lookupValue);
|
||||
|
||||
foreach ($lookupArray as $i => $lookupArrayValue) {
|
||||
$typeMatch = ((gettype($lookupValue) === gettype($lookupArrayValue)) ||
|
||||
(is_numeric($lookupValue) && is_numeric($lookupArrayValue)));
|
||||
|
||||
if (
|
||||
$typeMatch && is_string($lookupValue) &&
|
||||
$wildcardLookup && WildcardMatch::compare($lookupArrayValue, $wildcard)
|
||||
) {
|
||||
// wildcard match
|
||||
return $i;
|
||||
} elseif ($lookupArrayValue === $lookupValue) {
|
||||
// exact match
|
||||
return $i;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static function matchLargestValue($lookupArray, $lookupValue, $keySet)
|
||||
{
|
||||
foreach ($lookupArray as $i => $lookupArrayValue) {
|
||||
$typeMatch = ((gettype($lookupValue) === gettype($lookupArrayValue)) ||
|
||||
(is_numeric($lookupValue) && is_numeric($lookupArrayValue)));
|
||||
|
||||
if ($typeMatch && ($lookupArrayValue <= $lookupValue)) {
|
||||
return array_search($i, $keySet);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static function matchSmallestValue($lookupArray, $lookupValue)
|
||||
{
|
||||
$valueKey = null;
|
||||
|
||||
// The basic algorithm is:
|
||||
// Iterate and keep the highest match until the next element is smaller than the searched value.
|
||||
// Return immediately if perfect match is found
|
||||
foreach ($lookupArray as $i => $lookupArrayValue) {
|
||||
$typeMatch = gettype($lookupValue) === gettype($lookupArrayValue);
|
||||
|
||||
if ($lookupArrayValue === $lookupValue) {
|
||||
// Another "special" case. If a perfect match is found,
|
||||
// the algorithm gives up immediately
|
||||
return $i;
|
||||
} elseif ($typeMatch && $lookupArrayValue >= $lookupValue) {
|
||||
$valueKey = $i;
|
||||
} elseif ($typeMatch && $lookupArrayValue < $lookupValue) {
|
||||
//Excel algorithm gives up immediately if the first element is smaller than the searched value
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $valueKey;
|
||||
}
|
||||
|
||||
private static function validateLookupValue($lookupValue): void
|
||||
{
|
||||
// Lookup_value type has to be number, text, or logical values
|
||||
if ((!is_numeric($lookupValue)) && (!is_string($lookupValue)) && (!is_bool($lookupValue))) {
|
||||
throw new Exception(Functions::NA());
|
||||
}
|
||||
}
|
||||
|
||||
private static function validateMatchType($matchType): void
|
||||
{
|
||||
// Match_type is 0, 1 or -1
|
||||
if (
|
||||
($matchType !== self::MATCHTYPE_FIRST_VALUE) &&
|
||||
($matchType !== self::MATCHTYPE_LARGEST_VALUE) && ($matchType !== self::MATCHTYPE_SMALLEST_VALUE)
|
||||
) {
|
||||
throw new Exception(Functions::NA());
|
||||
}
|
||||
}
|
||||
|
||||
private static function validateLookupArray($lookupArray): void
|
||||
{
|
||||
// Lookup_array should not be empty
|
||||
$lookupArraySize = count($lookupArray);
|
||||
if ($lookupArraySize <= 0) {
|
||||
throw new Exception(Functions::NA());
|
||||
}
|
||||
}
|
||||
|
||||
private static function prepareLookupArray($lookupArray, $matchType)
|
||||
{
|
||||
// Lookup_array should contain only number, text, or logical values, or empty (null) cells
|
||||
foreach ($lookupArray as $i => $value) {
|
||||
// check the type of the value
|
||||
if ((!is_numeric($value)) && (!is_string($value)) && (!is_bool($value)) && ($value !== null)) {
|
||||
throw new Exception(Functions::NA());
|
||||
}
|
||||
// Convert strings to lowercase for case-insensitive testing
|
||||
if (is_string($value)) {
|
||||
$lookupArray[$i] = StringHelper::strToLower($value);
|
||||
}
|
||||
if (
|
||||
($value === null) &&
|
||||
(($matchType == self::MATCHTYPE_LARGEST_VALUE) || ($matchType == self::MATCHTYPE_SMALLEST_VALUE))
|
||||
) {
|
||||
unset($lookupArray[$i]);
|
||||
}
|
||||
}
|
||||
|
||||
return $lookupArray;
|
||||
}
|
||||
}
|
||||
|
|
@ -26,7 +26,6 @@ return [
|
|||
[2, 0, 0, 3],
|
||||
0,
|
||||
],
|
||||
|
||||
// Third argument = 1
|
||||
[
|
||||
1, // Expected
|
||||
|
|
@ -52,7 +51,6 @@ return [
|
|||
[2, 0, 0, 3],
|
||||
1,
|
||||
],
|
||||
|
||||
// Third argument = -1
|
||||
[
|
||||
1, // Expected
|
||||
|
|
@ -96,7 +94,12 @@ return [
|
|||
[8, 8, 3, 2],
|
||||
-1,
|
||||
],
|
||||
|
||||
[ // Default matchtype
|
||||
4, // Expected
|
||||
4, // Input
|
||||
[2, 0, 0, 3],
|
||||
null,
|
||||
],
|
||||
// match on ranges with empty cells
|
||||
[
|
||||
3, // Expected
|
||||
|
|
@ -110,7 +113,6 @@ return [
|
|||
[1, null, 4, null, null],
|
||||
1,
|
||||
],
|
||||
|
||||
// 0s are causing errors, because things like 0 == 'x' is true. Thanks PHP!
|
||||
[
|
||||
3,
|
||||
|
|
@ -233,7 +235,7 @@ return [
|
|||
[
|
||||
2, // Expected
|
||||
'a*~*c',
|
||||
['aAAAAA', 'a123456*c', 'az'],
|
||||
['aAAAAA', 'a123456*c', 'az', 'alembic'],
|
||||
0,
|
||||
],
|
||||
[
|
||||
|
|
@ -272,4 +274,76 @@ return [
|
|||
[1, 22, 'aaa'],
|
||||
0,
|
||||
],
|
||||
[
|
||||
'#N/A', // Expected
|
||||
'abc',
|
||||
[1, 22, 'aaa'],
|
||||
0,
|
||||
],
|
||||
[
|
||||
'#N/A', // Expected (Invalid lookup value)
|
||||
new DateTime('2021-03-11'),
|
||||
[1, 22, 'aaa'],
|
||||
1,
|
||||
],
|
||||
[
|
||||
'#N/A', // Expected (Invalid match type)
|
||||
'abc',
|
||||
[1, 22, 'aaa'],
|
||||
123,
|
||||
],
|
||||
[
|
||||
'#N/A', // Expected (Empty lookup array)
|
||||
'abc',
|
||||
[],
|
||||
1,
|
||||
],
|
||||
[
|
||||
8,
|
||||
'A*e',
|
||||
['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'],
|
||||
-1,
|
||||
],
|
||||
[
|
||||
2,
|
||||
'A*e',
|
||||
['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'],
|
||||
0,
|
||||
],
|
||||
[
|
||||
'#N/A',
|
||||
'A*e',
|
||||
['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'],
|
||||
1,
|
||||
],
|
||||
[
|
||||
8,
|
||||
'A?s*e',
|
||||
['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'],
|
||||
-1,
|
||||
],
|
||||
[
|
||||
5,
|
||||
'A?s*e',
|
||||
['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'],
|
||||
0,
|
||||
],
|
||||
[
|
||||
'#N/A',
|
||||
'A*e',
|
||||
['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'],
|
||||
1,
|
||||
],
|
||||
[
|
||||
8,
|
||||
'*verse',
|
||||
['Obtuse', 'Amuse', 'Obverse', 'Inverse', 'Assurance', 'Amplitude', 'Adverse', 'Apartment'],
|
||||
-1,
|
||||
],
|
||||
[
|
||||
3,
|
||||
'*verse',
|
||||
['Obtuse', 'Amuse', 'Obverse', 'Inverse', 'Assurance', 'Amplitude', 'Adverse', 'Apartment'],
|
||||
0,
|
||||
],
|
||||
];
|
||||
|
|
|
|||
Loading…
Reference in New Issue