/** * Fuse - Lightweight fuzzy-search * * Copyright (c) 2012 Kirollos Risk . * All Rights Reserved. Apache Software License 2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ (function () { /** * Adapted from "Diff, Match and Patch", by Google * * http://code.google.com/p/google-diff-match-patch/ * * Modified by: Kirollos Risk * ----------------------------------------------- * Details: the algorithm and structure was modified to allow the creation of * instances with a method inside which does the actual * bitap search. The (the string that is searched for) is only defined * once per instance and thus it eliminates redundant re-creation when searching * over a list of strings. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. */ function Searcher(pattern, options) { options = options || {}; // Aproximately where in the text is the pattern expected to be found? var MATCH_LOCATION = options.location || 0, // Determines how close the match must be to the fuzzy location (specified above). // An exact letter match which is 'distance' characters away from the fuzzy location // would score as a complete mismatch. A distance of '0' requires the match be at // the exact location specified, a threshold of '1000' would require a perfect match // to be within 800 characters of the fuzzy location to be found using a 0.8 threshold. MATCH_DISTANCE = options.distance || 1000, // At what point does the match algorithm give up. A threshold of '0.0' requires a perfect match // (of both letters and location), a threshold of '1.0' would match anything. MATCH_THRESHOLD = options.threshold || 0.4, pattern = options.caseSensitive ? pattern : pattern.toLowerCase(), patternLen = pattern.length; if (patternLen > 32) { throw new Error('Pattern length is too long'); } // console.log("pattern is " + pattern +", distance is " + MATCH_DISTANCE+", thresh is " + MATCH_THRESHOLD); var matchmask = 1 << (patternLen - 1); /** * Initialise the alphabet for the Bitap algorithm. * @return {Object} Hash of character locations. * @private */ var pattern_alphabet = (function () { var mask = {}, i = 0; for (i = 0; i < patternLen; i++) { mask[pattern.charAt(i)] = 0; } for (i = 0; i < patternLen; i++) { mask[pattern.charAt(i)] |= 1 << (pattern.length - i - 1); } return mask; })(); /** * Compute and return the score for a match with errors and = start; j--) { // The alphabet is a sparse hash, so the following line generates warnings. charMatch = pattern_alphabet[text.charAt(j - 1)]; if (i === 0) { // First pass: exact match. rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; } else { // Subsequent passes: fuzzy match. rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | (((lastRd[j + 1] | lastRd[j]) << 1) | 1) | lastRd[j + 1]; } if (rd[j] & matchmask) { score = match_bitapScore(i, j - 1); // This match will almost certainly be better than any existing match. // But check anyway. if (score <= scoreThreshold) { // Told you so. scoreThreshold = score; bestLoc = j - 1; locations.push(bestLoc); if (bestLoc > MATCH_LOCATION) { // When passing loc, don't exceed our current distance from loc. start = Math.max(1, 2 * MATCH_LOCATION - bestLoc); } else { // Already passed loc, downhill from here on in. break; } } } } // No hope for a (better) match at greater error levels. if (match_bitapScore(i + 1, MATCH_LOCATION) > scoreThreshold) { break; } lastRd = rd; } return { isMatch: bestLoc >= 0, score: score }; } } /** * @param {Array} list * @param {Object} options * @public */ function Fuse(list, options) { options = options || {}; var keys = options.keys; /** * Searches for all the items whose keys (fuzzy) match the pattern. * @param {String} pattern The pattern string to fuzzy search on. * @return {Array} A list of all serch matches. * @public */ this.search = function (pattern) { //console.time('total'); var searcher = new Searcher(pattern, options), i, j, item, text, dataLen = list.length, bitapResult, rawResults = [], resultMap = {}, rawResultsLen, existingResult, results = [], compute = null; //console.time('search'); /** * Calls for bitap analysis. Builds the raw result list. * @param {String} text The pattern string to fuzzy search on. * @param {String|Int} entity If the is an Array, then entity will be an index, * otherwise it's the item object. * @param {Int} index * @return {Object|Int} * @private */ function analyzeText(text, entity, index) { // Check if the text can be searched if (text !== undefined && text !== null && typeof text === 'string') { // Get the result bitapResult = searcher.search(text); // If a match is found, add the item to , including its score if (bitapResult.isMatch) { //console.log(bitapResult.score); // Check if the item already exists in our results existingResult = resultMap[index]; if (existingResult) { // Use the lowest score existingResult.score = Math.min(existingResult.score, bitapResult.score); } else { // Add it to the raw result list resultMap[index] = { item: entity, score: bitapResult.score }; rawResults.push(resultMap[index]); } } } } // Check the first item in the list, if it's a string, then we assume // that every item in the list is also a string, and thus it's a flattened array. if (typeof list[0] === 'string') { // Iterate over every item for (i = 0; i < dataLen; i++) { analyzeText(list[i], i, i); } } else { // Otherwise, the first item is an Object (hopefully), and thus the searching // is done on the values of the keys of each item. // Iterate over every item for (i = 0; i < dataLen; i++) { item = list[i]; // Iterate over every key for (j = 0; j < keys.length; j++) { analyzeText(item[keys[j]], item, i); } } } //console.timeEnd('search'); // From the results, push into a new array only the item identifier (if specified) // of the entire item. This is because we don't want to return the , // since it contains other metadata; //console.time('build'); rawResultsLen = rawResults.length; for (i = 0; i < rawResultsLen; i++) { results.push(options.id ? rawResults[i].item[options.id] : rawResults[i].item); } //console.timeEnd('build'); //console.timeEnd('total'); return results; } } //Export to Common JS Loader if (typeof module !== 'undefined') { if (typeof module.setExports === 'function') { module.setExports(Fuse); } else if (module.exports) { module.exports = Fuse; } } else { window.Fuse = Fuse; } })();