From baacc83995852934afd1417e97b55b19a54519cd Mon Sep 17 00:00:00 2001 From: Mark Baker Date: Fri, 12 Mar 2021 18:23:15 +0100 Subject: [PATCH] Replace manual wildcard logic in MATCH() function with the new WildcardMatch methods (#1919) * Replace manual wildcard logic in MATCH() function with the new WildcardMatch methods * Additional unit tests * Refactor input validations * Refactor actual search logic into dedicated methods * Eliminate redundant code --- .../Calculation/Calculation.php | 2 +- src/PhpSpreadsheet/Calculation/LookupRef.php | 141 +------------ .../Calculation/LookupRef/ExcelMatch.php | 198 ++++++++++++++++++ tests/data/Calculation/LookupRef/MATCH.php | 84 +++++++- 4 files changed, 279 insertions(+), 146 deletions(-) create mode 100644 src/PhpSpreadsheet/Calculation/LookupRef/ExcelMatch.php diff --git a/src/PhpSpreadsheet/Calculation/Calculation.php b/src/PhpSpreadsheet/Calculation/Calculation.php index 3adcf243..80b6d47b 100644 --- a/src/PhpSpreadsheet/Calculation/Calculation.php +++ b/src/PhpSpreadsheet/Calculation/Calculation.php @@ -1616,7 +1616,7 @@ class Calculation ], 'MATCH' => [ 'category' => Category::CATEGORY_LOOKUP_AND_REFERENCE, - 'functionCall' => [LookupRef::class, 'MATCH'], + 'functionCall' => [LookupRef\ExcelMatch::class, 'MATCH'], 'argumentCount' => '2,3', ], 'MAX' => [ diff --git a/src/PhpSpreadsheet/Calculation/LookupRef.php b/src/PhpSpreadsheet/Calculation/LookupRef.php index f1a147f1..d2cc4f94 100644 --- a/src/PhpSpreadsheet/Calculation/LookupRef.php +++ b/src/PhpSpreadsheet/Calculation/LookupRef.php @@ -10,7 +10,6 @@ use PhpOffice\PhpSpreadsheet\Calculation\LookupRef\RowColumnInformation; use PhpOffice\PhpSpreadsheet\Calculation\LookupRef\VLookup; use PhpOffice\PhpSpreadsheet\Cell\Cell; use PhpOffice\PhpSpreadsheet\Cell\Coordinate; -use PhpOffice\PhpSpreadsheet\Shared\StringHelper; use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; class LookupRef @@ -380,145 +379,7 @@ class LookupRef */ public static function MATCH($lookupValue, $lookupArray, $matchType = 1) { - $lookupArray = Functions::flattenArray($lookupArray); - $lookupValue = Functions::flattenSingleValue($lookupValue); - $matchType = ($matchType === null) ? 1 : (int) Functions::flattenSingleValue($matchType); - - // MATCH is not case sensitive, so we convert lookup value to be lower cased in case it's string type. - if (is_string($lookupValue)) { - $lookupValue = StringHelper::strToLower($lookupValue); - } - - // Lookup_value type has to be number, text, or logical values - if ((!is_numeric($lookupValue)) && (!is_string($lookupValue)) && (!is_bool($lookupValue))) { - return Functions::NA(); - } - - // Match_type is 0, 1 or -1 - if (($matchType !== 0) && ($matchType !== -1) && ($matchType !== 1)) { - return Functions::NA(); - } - - // Lookup_array should not be empty - $lookupArraySize = count($lookupArray); - if ($lookupArraySize <= 0) { - return Functions::NA(); - } - - if ($matchType == 1) { - // If match_type is 1 the list has to be processed from last to first - - $lookupArray = array_reverse($lookupArray); - $keySet = array_reverse(array_keys($lookupArray)); - } - - // Lookup_array should contain only number, text, or logical values, or empty (null) cells - foreach ($lookupArray as $i => $lookupArrayValue) { - // check the type of the value - if ( - (!is_numeric($lookupArrayValue)) && (!is_string($lookupArrayValue)) && - (!is_bool($lookupArrayValue)) && ($lookupArrayValue !== null) - ) { - return Functions::NA(); - } - // Convert strings to lowercase for case-insensitive testing - if (is_string($lookupArrayValue)) { - $lookupArray[$i] = StringHelper::strToLower($lookupArrayValue); - } - if (($lookupArrayValue === null) && (($matchType == 1) || ($matchType == -1))) { - unset($lookupArray[$i]); - } - } - - // ** - // find the match - // ** - - if ($matchType === 0 || $matchType === 1) { - foreach ($lookupArray as $i => $lookupArrayValue) { - $typeMatch = ((gettype($lookupValue) === gettype($lookupArrayValue)) || (is_numeric($lookupValue) && is_numeric($lookupArrayValue))); - $exactTypeMatch = $typeMatch && $lookupArrayValue === $lookupValue; - $nonOnlyNumericExactMatch = !$typeMatch && $lookupArrayValue === $lookupValue; - $exactMatch = $exactTypeMatch || $nonOnlyNumericExactMatch; - - if ($matchType === 0) { - if ($typeMatch && is_string($lookupValue) && (bool) preg_match('/([\?\*])/', $lookupValue)) { - $splitString = $lookupValue; - $chars = array_map(function ($i) use ($splitString) { - return mb_substr($splitString, $i, 1); - }, range(0, mb_strlen($splitString) - 1)); - - $length = count($chars); - $pattern = '/^'; - for ($j = 0; $j < $length; ++$j) { - if ($chars[$j] === '~') { - if (isset($chars[$j + 1])) { - if ($chars[$j + 1] === '*') { - $pattern .= preg_quote($chars[$j + 1], '/'); - ++$j; - } elseif ($chars[$j + 1] === '?') { - $pattern .= preg_quote($chars[$j + 1], '/'); - ++$j; - } - } else { - $pattern .= preg_quote($chars[$j], '/'); - } - } elseif ($chars[$j] === '*') { - $pattern .= '.*'; - } elseif ($chars[$j] === '?') { - $pattern .= '.{1}'; - } else { - $pattern .= preg_quote($chars[$j], '/'); - } - } - - $pattern .= '$/'; - if ((bool) preg_match($pattern, $lookupArrayValue)) { - // exact match - return $i + 1; - } - } elseif ($exactMatch) { - // exact match - return $i + 1; - } - } elseif (($matchType === 1) && $typeMatch && ($lookupArrayValue <= $lookupValue)) { - $i = array_search($i, $keySet); - - // The current value is the (first) match - return $i + 1; - } - } - } else { - $maxValueKey = null; - - // The basic algorithm is: - // Iterate and keep the highest match until the next element is smaller than the searched value. - // Return immediately if perfect match is found - foreach ($lookupArray as $i => $lookupArrayValue) { - $typeMatch = gettype($lookupValue) === gettype($lookupArrayValue); - $exactTypeMatch = $typeMatch && $lookupArrayValue === $lookupValue; - $nonOnlyNumericExactMatch = !$typeMatch && $lookupArrayValue === $lookupValue; - $exactMatch = $exactTypeMatch || $nonOnlyNumericExactMatch; - - if ($exactMatch) { - // Another "special" case. If a perfect match is found, - // the algorithm gives up immediately - return $i + 1; - } elseif ($typeMatch & $lookupArrayValue >= $lookupValue) { - $maxValueKey = $i + 1; - } elseif ($typeMatch & $lookupArrayValue < $lookupValue) { - //Excel algorithm gives up immediately if the first element is smaller than the searched value - break; - } - } - - if ($maxValueKey !== null) { - return $maxValueKey; - } - } - - // Unsuccessful in finding a match, return #N/A error value - return Functions::NA(); + return LookupRef\ExcelMatch::MATCH($lookupValue, $lookupArray, $matchType); } /** diff --git a/src/PhpSpreadsheet/Calculation/LookupRef/ExcelMatch.php b/src/PhpSpreadsheet/Calculation/LookupRef/ExcelMatch.php new file mode 100644 index 00000000..71358bf3 --- /dev/null +++ b/src/PhpSpreadsheet/Calculation/LookupRef/ExcelMatch.php @@ -0,0 +1,198 @@ +getMessage(); + } + + // MATCH() is not case sensitive, so we convert lookup value to be lower cased if it's a string type. + if (is_string($lookupValue)) { + $lookupValue = StringHelper::strToLower($lookupValue); + } + + $valueKey = null; + switch ($matchType) { + case self::MATCHTYPE_LARGEST_VALUE: + $valueKey = self::matchLargestValue($lookupArray, $lookupValue, $keySet); + + break; + case self::MATCHTYPE_FIRST_VALUE: + $valueKey = self::matchFirstValue($lookupArray, $lookupValue); + + break; + case self::MATCHTYPE_SMALLEST_VALUE: + default: + $valueKey = self::matchSmallestValue($lookupArray, $lookupValue); + } + + if ($valueKey !== null) { + return ++$valueKey; + } + + // Unsuccessful in finding a match, return #N/A error value + return Functions::NA(); + } + + private static function matchFirstValue($lookupArray, $lookupValue) + { + $wildcardLookup = ((bool) preg_match('/([\?\*])/', $lookupValue)); + $wildcard = WildcardMatch::wildcard($lookupValue); + + foreach ($lookupArray as $i => $lookupArrayValue) { + $typeMatch = ((gettype($lookupValue) === gettype($lookupArrayValue)) || + (is_numeric($lookupValue) && is_numeric($lookupArrayValue))); + + if ( + $typeMatch && is_string($lookupValue) && + $wildcardLookup && WildcardMatch::compare($lookupArrayValue, $wildcard) + ) { + // wildcard match + return $i; + } elseif ($lookupArrayValue === $lookupValue) { + // exact match + return $i; + } + } + + return null; + } + + private static function matchLargestValue($lookupArray, $lookupValue, $keySet) + { + foreach ($lookupArray as $i => $lookupArrayValue) { + $typeMatch = ((gettype($lookupValue) === gettype($lookupArrayValue)) || + (is_numeric($lookupValue) && is_numeric($lookupArrayValue))); + + if ($typeMatch && ($lookupArrayValue <= $lookupValue)) { + return array_search($i, $keySet); + } + } + + return null; + } + + private static function matchSmallestValue($lookupArray, $lookupValue) + { + $valueKey = null; + + // The basic algorithm is: + // Iterate and keep the highest match until the next element is smaller than the searched value. + // Return immediately if perfect match is found + foreach ($lookupArray as $i => $lookupArrayValue) { + $typeMatch = gettype($lookupValue) === gettype($lookupArrayValue); + + if ($lookupArrayValue === $lookupValue) { + // Another "special" case. If a perfect match is found, + // the algorithm gives up immediately + return $i; + } elseif ($typeMatch && $lookupArrayValue >= $lookupValue) { + $valueKey = $i; + } elseif ($typeMatch && $lookupArrayValue < $lookupValue) { + //Excel algorithm gives up immediately if the first element is smaller than the searched value + break; + } + } + + return $valueKey; + } + + private static function validateLookupValue($lookupValue): void + { + // Lookup_value type has to be number, text, or logical values + if ((!is_numeric($lookupValue)) && (!is_string($lookupValue)) && (!is_bool($lookupValue))) { + throw new Exception(Functions::NA()); + } + } + + private static function validateMatchType($matchType): void + { + // Match_type is 0, 1 or -1 + if ( + ($matchType !== self::MATCHTYPE_FIRST_VALUE) && + ($matchType !== self::MATCHTYPE_LARGEST_VALUE) && ($matchType !== self::MATCHTYPE_SMALLEST_VALUE) + ) { + throw new Exception(Functions::NA()); + } + } + + private static function validateLookupArray($lookupArray): void + { + // Lookup_array should not be empty + $lookupArraySize = count($lookupArray); + if ($lookupArraySize <= 0) { + throw new Exception(Functions::NA()); + } + } + + private static function prepareLookupArray($lookupArray, $matchType) + { + // Lookup_array should contain only number, text, or logical values, or empty (null) cells + foreach ($lookupArray as $i => $value) { + // check the type of the value + if ((!is_numeric($value)) && (!is_string($value)) && (!is_bool($value)) && ($value !== null)) { + throw new Exception(Functions::NA()); + } + // Convert strings to lowercase for case-insensitive testing + if (is_string($value)) { + $lookupArray[$i] = StringHelper::strToLower($value); + } + if ( + ($value === null) && + (($matchType == self::MATCHTYPE_LARGEST_VALUE) || ($matchType == self::MATCHTYPE_SMALLEST_VALUE)) + ) { + unset($lookupArray[$i]); + } + } + + return $lookupArray; + } +} diff --git a/tests/data/Calculation/LookupRef/MATCH.php b/tests/data/Calculation/LookupRef/MATCH.php index 671056dd..03d9bfff 100644 --- a/tests/data/Calculation/LookupRef/MATCH.php +++ b/tests/data/Calculation/LookupRef/MATCH.php @@ -26,7 +26,6 @@ return [ [2, 0, 0, 3], 0, ], - // Third argument = 1 [ 1, // Expected @@ -52,7 +51,6 @@ return [ [2, 0, 0, 3], 1, ], - // Third argument = -1 [ 1, // Expected @@ -96,7 +94,12 @@ return [ [8, 8, 3, 2], -1, ], - + [ // Default matchtype + 4, // Expected + 4, // Input + [2, 0, 0, 3], + null, + ], // match on ranges with empty cells [ 3, // Expected @@ -110,7 +113,6 @@ return [ [1, null, 4, null, null], 1, ], - // 0s are causing errors, because things like 0 == 'x' is true. Thanks PHP! [ 3, @@ -233,7 +235,7 @@ return [ [ 2, // Expected 'a*~*c', - ['aAAAAA', 'a123456*c', 'az'], + ['aAAAAA', 'a123456*c', 'az', 'alembic'], 0, ], [ @@ -272,4 +274,76 @@ return [ [1, 22, 'aaa'], 0, ], + [ + '#N/A', // Expected + 'abc', + [1, 22, 'aaa'], + 0, + ], + [ + '#N/A', // Expected (Invalid lookup value) + new DateTime('2021-03-11'), + [1, 22, 'aaa'], + 1, + ], + [ + '#N/A', // Expected (Invalid match type) + 'abc', + [1, 22, 'aaa'], + 123, + ], + [ + '#N/A', // Expected (Empty lookup array) + 'abc', + [], + 1, + ], + [ + 8, + 'A*e', + ['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'], + -1, + ], + [ + 2, + 'A*e', + ['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'], + 0, + ], + [ + '#N/A', + 'A*e', + ['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'], + 1, + ], + [ + 8, + 'A?s*e', + ['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'], + -1, + ], + [ + 5, + 'A?s*e', + ['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'], + 0, + ], + [ + '#N/A', + 'A*e', + ['Aardvark', 'Apple', 'Armadillo', 'Acre', 'Absolve', 'Amplitude', 'Adverse', 'Apartment'], + 1, + ], + [ + 8, + '*verse', + ['Obtuse', 'Amuse', 'Obverse', 'Inverse', 'Assurance', 'Amplitude', 'Adverse', 'Apartment'], + -1, + ], + [ + 3, + '*verse', + ['Obtuse', 'Amuse', 'Obverse', 'Inverse', 'Assurance', 'Amplitude', 'Adverse', 'Apartment'], + 0, + ], ];