/** * @license FuzzySearch.js * Autocomplete suggestion engine using approximate string matching * https://github.com/jeancroy/FuzzySearch * * Copyright (c) 2015, Jean Christophe Roy * Licensed under The MIT License. * http://opensource.org/licenses/MIT */ (function () { 'use strict'; /** * @param options * @constructor */ 'use strict'; function FuzzySearch(options) { if (options === undefined) options = {}; if (!(this instanceof FuzzySearch)) return new FuzzySearch(options); FuzzySearch.setOptions(this, options, FuzzySearch.defaultOptions, _privates, true, this._optionsHook) } FuzzySearch.defaultOptions = /** @lends {FuzzySearchOptions.prototype} */{ // // Scoring, include in result // minimum_match: 1.0, // Minimum score to consider two token are not unrelated thresh_include: 2.0, // To be a candidate, score of item must be at least this thresh_relative_to_best: 0.5, // and be at least this fraction of the best score field_good_enough: 20, // If a field have this score, stop searching other fields. (field score is before item related bonus) // // Scoring, bonus // bonus_match_start: 0.5, // Additional value per character in common prefix bonus_token_order: 2.0, // Value of two token properly ordered bonus_position_decay: 0.7, // Exponential decay for position bonus (smaller : more importance to first item) score_per_token: true, // if true, split query&field in token, allow to match in different order // if false, bypass at least half the computation cost, very fast // also disable different token that score different field, because no more token!! score_test_fused: false, // Try one extra match where we disregard token separation. // "oldman" match "old man" score_acronym: false, // jrrt match against John Ronald Reuel Tolkien token_sep: " .,-:", // // Output sort & transform // score_round: 0.1, // Two item that have the same rounded score are sorted alphabetically output_limit: 0, // Return up to N result, 0 to disable sorter: compareResults, // Function used to sort. See signature of Array.sort(sorter) normalize: normalize, // Function used to transform string (lowercase, accents, etc) filter: null, // Select elements to be searched. (done before each search) /**@type {string|function({SearchResult})}*/ output_map: "item", // Transform the output, can be a function or a path string. // output_map="root" return SearchResult object, needed to see the score // output_map="root.item" return original object. // output_map="root.item.somefield" output a field of original object. // (root.) is optional. // // output_map=function(root){ return something(root.item) } // ^this get original object and apply something() on it. join_str: ", ", //String used to join array fields // // Tokens options // token_query_min_length: 2, // Avoid processing very small words, include greater or equal, in query token_field_min_length: 3, // include greater or equal, in item field token_query_max_length: 64, // Shorten large token to give more even performance. token_field_max_length: 64, // Shorten large token to give more even performance. token_fused_max_length: 64, // Shorten large token to give more even performance. //Do not attempt to match token too different in size: n/m = len(field_tok)/len(query_tok) token_min_rel_size: 0.6, // Field token should contain query token. Reject field token that are too small. token_max_rel_size: 10, // Large field token tend to match against everything. Ensure query is long enough to be specific. // // Interactive - suggest as you type. // Avoid doing search that will be discarded without being displayed // This also help prevent lag/ temp freeze // interactive_debounce: 150, // This is initial value. Will try to learn actual time cost. Set to 0 to disable. interactive_mult: 1.2, // Overhead for variability and to allow other things to happens (like redraw, highlight ). interactive_burst: 3, // Allow short burst, prevent flicker due to debounce suppression of a callback // // Data // source: [], keys: [], lazy: false, // when true, any refresh happens only when a user make a search, option stay put until changed. token_re: /\s+/g, //Separator string will be parsed to this re. identify_item: null, // How to uniquely identify an item when adding to the index. Defaults to null, meaning no duplicate detection. Must be a method that takes a single (source) argument. use_index_store: false, // Enable a time vs memory trade-off for faster search (but longer initial warm-up). store_thresh: 0.7, // cutoff point relative to best, to graduate from store phase. store_max_results: 1500 // Maximum number of result to graduate from store, to the full search quality algorithm // Note that store only perform a crude search, ignoring some options, so the best result can be only "meh" here. }; var _privates = /** @lends {FuzzySearch.prototype} */{ keys: [], tags: [], // alternative name for each key, support output alias and per key search index: [], // source is processed using keys, then stored here index_map: {}, // To manage update of record already in dataset nb_indexed: 0, // To manage active count of index store: {}, // Dictionary used for time VS memory trade off. (Optional) tags_re: null, acro_re: null, token_re: null, /**@type {FuzzySearchOptions}*/ options: null, dirty: false, // when true, schedule a source refresh using new or existing source & keys, used once then clear itself. //Information on last search query: null, results: [], start_time: 0, search_time: 0 }; /** * Number of bit in a int. * DEBUG-tip: setting this to zero will force "long string" algorithm for everything! * @const */ var INT_SIZE = 32; function FuzzySearchOptions(defaults, options) { for (var key in defaults) { if (defaults.hasOwnProperty(key)) { //fill self with value from either options or default this[key] = (options.hasOwnProperty(key) && options[key] !== undefined ) ? options[key] : defaults[key]; } } } FuzzySearchOptions.update = function (self, defaults, options) { for (var key in options) { if (options.hasOwnProperty(key) && defaults.hasOwnProperty(key)) { //explicitly set a options to undefined => reset default, else get value self[key] = (options[key] === undefined) ? defaults[key] : options[key]; } } }; /** * Set property of object, * Restrict properties that can be set from a list of available defaults. * * @param {FuzzySearch} self * @param {Object} options * @param {Object} defaults * @param {Object} privates * @param {boolean} reset * @param {function({Object})} hook * */ FuzzySearch.setOptions = function (self, options, defaults, privates, reset, hook) { if (reset) { extend(self, privates); self.options = new FuzzySearchOptions(defaults, options); } else { FuzzySearchOptions.update(self.options, defaults, options); } hook.call(self, options) }; function extend(a, b) { for (var key in b) if (b.hasOwnProperty(key)) a[key] = b[key]; } // // - - - - - - - - - - - - // SET & PARSE SETTINGS // - - - - - - - - - - - - // extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ { /** * Allow to change options after the object has been created. * If source is changed, new source is indexed. * * Optional reset allow to change any setting not in options to defaults. * This is similar to creating new object, but using same pointer. * * @param {Object} options * @param {boolean=} reset */ setOptions: function (options, reset) { if (reset === undefined) reset = options.reset || false; FuzzySearch.setOptions(this, options, FuzzySearch.defaultOptions, _privates, reset, this._optionsHook); }, /** * * @param {Object} options * @private */ _optionsHook: function (options) { //Items of options have been copied into this.options //We still test "option_name in option" to know if we have received something new //This allow to support "shorthand" options and is used to refresh data. var self_options = this.options; //Output stage if ("output_map" in options && typeof options.output_map === "string") { if (self_options.output_map === "alias") self_options.output_map = this.aliasResult; else self_options.output_map = removePrefix(self_options.output_map, ["root", "."]); } this.source = self_options.source; // Input stage, work to allow different syntax for keys definition is done here. var oKeys; if (("keys" in options) && ( ( oKeys = options.keys) !== undefined)) { var key_type = Object.prototype.toString.call(oKeys); var key_index, nb_keys; this.tags = null; if (key_type === "[object String]") { this.keys = oKeys.length ? [oKeys] : []; } else if (key_type === "[object Object]") { this.keys = []; this.tags = []; //we don't know the "length" of dictionary key_index = 0; for (var tag in oKeys) { if (oKeys.hasOwnProperty(tag)) { this.tags[key_index] = tag; this.keys[key_index] = oKeys[tag]; key_index++; } } } else { this.keys = oKeys; } oKeys = this.keys; nb_keys = oKeys.length; for (key_index = -1; ++key_index < nb_keys;) { oKeys[key_index] = removePrefix(oKeys[key_index], ["item", "."]) } if (!this.tags) this.tags = oKeys; this.tags_re = buildTagsRE(this.tags); } if (this.acro_re === null || "acronym_tok" in options) { this.acro_re = buildAcronymRE(self_options.token_sep); } if (this.token_re === null || "token_sep" in options) { this.token_re = self_options.token_re = new RegExp("[" + re_escape(self_options.token_sep) + "]+", "g"); } // Determine if we need to rebuild this.index from this.source if (options.dirty || ("source" in options) || ("keys" in options) || ("use_index_store" in options)) { if (self_options.lazy) this.dirty = true; // Schedule later. else { this._buildIndexFromSource(); this.dirty = false; } } } }); /** * Removes optional prefix of paths. * for example "root.", "." * * @param {string} str - input * @param {Array} prefixes to remove * @returns {string} */ function removePrefix(str, prefixes) { var n = prefixes.length; var offset = 0; for (var i = -1; ++i < n;) { var p = prefixes[i], l = p.length; if (str.substr(offset, l) === p) offset += l; } return (offset > 0) ? str.substr(offset) : str; } function buildTagsRE(tags) { var n = tags.length; if (!n) return null; var tag_str = re_escape(tags[0]); for (var i = 0; ++i < n;) { tag_str += "|" + re_escape(tags[i]); } return new RegExp("(?:^|\\s)\\s*(" + tag_str + "):\\s*", "g"); } function buildAcronymRE(sep) { var n = sep.length; if (!n) return null; var acro_str = re_escape(sep); return new RegExp("(?:^|[" + acro_str + "])+([^" + acro_str + "])[^" + acro_str + "]*", "g"); } // Build regexp for tagged search function re_escape(str) { var re = /[\-\[\]\/\{}\(\)\*\+\?\.\\\^\$\|]/g; return str.replace(re, "\\$&"); } // // - - - - - - - - - - - - // OUTPUT OR POST PROCESS // - - - - - - - - - - - - // 'use strict'; extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ { /** * Given a SearchResult object, recover the value of the best matching field. * This is done on demand for display. * * @param {SearchResult} result * @return {string} original field */ getMatchingField: function (result) { var f = FuzzySearch.generateFields(result.item, [this.keys[result.matchIndex]]); return f[0][result.subIndex]; }, /** * Given a SearchResult object, generate a new object that follow alias structure * @param {SearchResult} result * @return {*} aliased result */ aliasResult: function (result) { var options = this.options; var f = FuzzySearch.generateFields(result.item, this.keys); var out = {}, tags = this.tags, join_str = options.join_str; for (var i = -1, n = f.length; ++i < n;) { out[tags[i]] = f[i].join(join_str) } out._item = result.item; out._score = result.score; out._match = f[result.matchIndex][result.subIndex]; return out; } }); // - - - - - - - - - - - - - - - - - - - - - - // Output stage, prepare results for return //- - - - - - - - - - - - - - - - - - - - - - /** * Own version of Array.prototype.map() * * @param {Array} source * @param transform callback * @param {*=} context (*this* in called function) * @param {number=} max_out * @returns {Array} */ FuzzySearch.map = function (source, transform, context, max_out) { var n = source.length; if (max_out > 0 && max_out < n) n = max_out; if (typeof transform !== "function") return source.slice(0, n); var out = new Array(n); for (var i = -1; ++i < n;) { out[i] = transform.call(context, source[i], i, source); } return out; }; /** * Take an array of objects, return an array containing a field of those object. * * test = [ {key:"A",value:10}, {key:"B",value:20} ] * mapField(test,"value") = [10,20] * * @param source - array to process * @param {string} path - key to address on each item OR function to apply * @param {Number=} [max_out=source.length] - only process first items * @returns {Array} */ FuzzySearch.mapField = function (source, path, max_out) { var n = source.length; if (max_out > 0 && max_out < n) n = max_out; if (path === "") return source.slice(0, n); var out = new Array(n); var obj, i; if (path.indexOf(".") === -1) { //fast case no inner loop for (i = -1; ++i < n;) { obj = source[i]; if (path in obj) out[i] = obj[path]; } } else { //general case var parts = path.split("."); var nb_level = parts.length; for (i = -1; ++i < n;) { obj = source[i]; for (var level = -1; ++level < nb_level;) { var key = parts[level]; if (!(key in obj)) break; obj = obj[key]; } out[i] = obj; } } return out; }; /** * Filter array for item where item[field] >= atleast * * @param array * @param field * @param atleast * @returns {Array} */ FuzzySearch.filterGTE = function (array, field, atleast) { var i = -1, j = -1; var n = array.length; var out = [], obj; while (++i < n) { obj = array[i]; if (obj[field] >= atleast) { out[++j] = obj; } } return out; }; /** * SearchResult constructor * - Internal result list * - Output of search when output_map="" * * @param {*} item * @param {Array} fields * @param {number} item_score * @param {number} matched_field_index * @param {number} matched_field_sub * @param {(string|number)} sortkey * @constructor */ function SearchResult(item, fields, item_score, matched_field_index, matched_field_sub, sortkey) { this.item = item; this.fields = fields; this.score = item_score; this.matchIndex = matched_field_index; this.subIndex = matched_field_sub; this.sortKey = sortkey; } /** * Sort function * first by decreasing order of score, then alphabetical order of sortkey. * * @param {SearchResult} a * @param {SearchResult} b * @returns {number} - ">0" if b before a, "<0" if b after a. */ function compareResults(a, b) { var d = b.score - a.score; if (d !== 0) return d; var ak = a.sortKey, bk = b.sortKey; return ak > bk ? 1 : ( ak < bk ? -1 : 0); } // // - - - - - - - - - - - - // Prepare Query // - - - - - - - - - - - - // extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ { /** * Input: a user search string * Output a query object * * Perform a few transformation to allw faster searching. * String is set to lowercase, some accents removed, split into tokens. * Token too small are filtered out, token too large are trimmed. * Token are packed in group of 32 char, each token is processed to extract an alphabet map. * * If score_test_fused is enabled, we do an extra pass disregarding tokens. * IF score_per_token is disabled this is the only pass we do. * * @param query_string * @returns {Query} * @private */ _prepQuery: function (query_string) { var options = this.options; var opt_tok = options.score_per_token; var opt_fuse = options.score_test_fused; var opt_fuselen = options.token_fused_max_length; var opt_qmin = options.token_field_min_length; var opt_qmax = options.token_field_max_length; var tags = this.tags; var tags_re = this.tags_re; var nb_tags = tags.length; var token_re = this.token_re; var norm, fused, fused_map, children, has_tags, group, words; if (opt_tok && nb_tags && tags_re) { var start = 0, end; var q_index = 0; var q_parts = new Array(nb_tags + 1); var match = tags_re.exec(query_string); has_tags = (match !== null); while (match !== null) { end = match.index; q_parts[q_index] = query_string.substring(start, end); start = end + match[0].length; q_index = tags.indexOf(match[1]) + 1; match = tags_re.exec(query_string); } q_parts[q_index] = query_string.substring(start); children = []; for (var i = -1; ++i < nb_tags;) { var qp = q_parts[i + 1]; if (!qp || !qp.length) continue; norm = options.normalize(qp); fused = norm.substring(0, opt_fuselen); fused_map = (opt_fuse || !opt_tok) ? FuzzySearch.alphabet(fused) : {}; words = FuzzySearch.filterSize(norm.split(token_re), opt_qmin, opt_qmax); group = FuzzySearch.pack_tokens(words); children[i] = new Query(norm, words, group, fused, fused_map, false, []); } norm = options.normalize(q_parts[0]); words = FuzzySearch.filterSize(norm.split(token_re), opt_qmin, opt_qmax); group = FuzzySearch.pack_tokens(words); } else { norm = options.normalize(query_string); words = FuzzySearch.filterSize(norm.split(token_re), opt_qmin, opt_qmax); group = opt_tok ? FuzzySearch.pack_tokens(words) : []; has_tags = false; children = new Array(nb_tags); } fused = norm.substring(0, opt_fuselen); fused_map = (opt_fuse || !opt_tok) ? FuzzySearch.alphabet(fused) : {}; return new Query(norm, words, group, fused, fused_map, has_tags, children) } }); // // Query objects // /** * Hold a query * * @param {string} normalized * @param {Array.} words * @param {Array.} tokens_groups * @param {string} fused_str * @param {Object} fused_map * @param {boolean} has_children * @param {Array} children * * @constructor */ function Query(normalized, words, tokens_groups, fused_str, fused_map, has_children, children) { this.normalized = normalized; this.words = words; this.tokens_groups = tokens_groups; this.fused_str = fused_str; this.fused_map = fused_map; this.fused_score = 0; this.has_children = has_children; this.children = children; } // // Query hold some memory to keep score of it's tokens. // Used in search methods /** * Loop tru each item score and reset to 0, apply to child query */ Query.prototype.resetItem = function () { var groups = this.tokens_groups; for (var group_index = -1, nb_groups = groups.length; ++group_index < nb_groups;) { var score_item = groups[group_index].score_item; for (var i = -1, l = score_item.length; ++i < l;) score_item[i] = 0 } this.fused_score = 0; if (this.has_children) { var children = this.children; for (var child_index = -1, nb_child = children.length; ++child_index < nb_child;) { var child = children[child_index]; if (child) child.resetItem(); } } }; /** * Sum each item score and add to child score */ Query.prototype.scoreItem = function () { var query_score = 0; var groups = this.tokens_groups; for (var group_index = -1, nb_groups = groups.length; ++group_index < nb_groups;) { var group_scores = groups[group_index].score_item; for (var score_index = -1, nb_scores = group_scores.length; ++score_index < nb_scores;) { query_score += group_scores[score_index] } } if (this.fused_score > query_score) query_score = this.fused_score; if (this.has_children) { var children = this.children; for (var child_index = -1, nb_child = children.length; ++child_index < nb_child;) { var child = children[child_index]; if (child) query_score += child.scoreItem(); } } return query_score; }; /** * Hold a group of token for parallel scoring * * @param {Array.} group_tokens * @param {Object} group_map * @param {number} gate * @constructor */ function PackInfo(group_tokens, group_map, gate) { this.tokens = group_tokens; this.map = group_map; this.gate = gate; var t = group_tokens.length, i = -1; var scores = new Array(t); while (++i < t) scores[i] = 0; this.score_item = scores.slice(); this.score_field = scores.slice(); this.field_pos = scores; } // // - - - - - - - - - - - - - - - - - // Prepare Token for search // - - - - - - - - - - - - - - - - - // a normal string can be view as an array of char. // so we map ( position -> char). // // we reverse that relation to map // char -> positions /** * Record position of each character in a token. * If token is small, position is recorded by position of a single bit in an int. * If token is larger than INT_SIZE, position is recorder as array of number. * * @param {string} token * @returns {Object} key value map char->positions (as array of position or single int (can be seen as an array of bit) ) */ FuzzySearch.alphabet = function (token) { var len = token.length; if (len > INT_SIZE) return FuzzySearch.posVector(token); else return FuzzySearch.bitVector(token, {}, 0); }; /** * Apply FuzzySearch.alphabet on multiple tokens * * @param {Array.} tokens * @returns {Array.} */ FuzzySearch.mapAlphabet = function (tokens) { var outlen = tokens.length; var out = new Array(outlen), i = -1; while (++i < outlen) { var t = tokens[i]; if (t.length > INT_SIZE) out[i] = FuzzySearch.posVector(t); else out[i] = FuzzySearch.bitVector(t, {}, 0); } return out; }; /** * Record position of each char using a single bit * * @param {string} token * @param {Object} map - Existing map to modify, can init with {} * @param offset - used for packing multiple word in a single map, can init with 0 * @returns {Object} Key value map char -> int */ FuzzySearch.bitVector = function (token, map, offset) { var len = token.length; var i = -1, c; var b = offset; while (++i < len) { c = token[i]; if (c in map) map[c] |= (1 << b++); else map[c] = (1 << b++); } return map; }; /** * Record position of each char in a token using an array * Append Infinity as a stop marker for llcs_large * * map = posVector("position") * map["p"] -> [0,Inf] * map["o"] -> [1,6,Inf] * * @param {string} pattern * @returns {Object} - key value map char->array of position (as number) */ FuzzySearch.posVector = function (pattern) { var map = {}, c; var m = pattern.length, i = -1; while (++i < m) { c = pattern[i]; if (c in map) map[c].push(i); else map[c] = [i]; } for (c in map) { if (map.hasOwnProperty(c)) { map[c].push(Infinity); } } return map; }; /** * Given a list of tokens, pack them into group of upto INT_SIZE(32) chars. * If a single token is bigger than INT_SIZE create a groupe of a single item * And use posVector instead of bitVector to prepare fallback algorithm. * * @param {Array.} tokens * @returns {Array.} */ FuzzySearch.pack_tokens = function (tokens) { var token_index = -1; var nb_tokens = tokens.length; var large; var groups = []; //For each group while (token_index < nb_tokens) { var group_tokens = []; var group_map = {}; var offset = 0; var gate = 0; //For each token in the group while (++token_index < nb_tokens) { var token = tokens[token_index]; var l = token.length; if (l >= INT_SIZE) { large = new PackInfo([token], FuzzySearch.posVector(token), 0xFFFFFFFF); break; } else if (l + offset >= INT_SIZE) { token_index--; break; } else { group_tokens.push(token); FuzzySearch.bitVector(token, group_map, offset); gate |= ( (1 << ( token.length - 1) ) - 1 ) << offset; offset += l } } if (group_tokens.length > 0) { groups.push(new PackInfo(group_tokens, group_map, gate)); } if (large) { groups.push(large); large = null; } } return groups; }; // //----------------------------- // SCORING FUNCTIONS // --------------------------- // 'use strict'; /** * Score of "search a in b" using self as options. * @param {string} a * @param {string} b */ FuzzySearch.prototype.score = function (a, b) { var aMap = FuzzySearch.alphabet(a); return FuzzySearch.score_map(a, b, aMap, this.options); }; // Adapted from paper: // A fast and practical bit-vector algorithm for // the Longest Common Subsequence problem // Maxime Crochemore et Al. // // With modification from // Bit-parallel LCS-length computation revisited (H Hyyrö, 2004) // http://www.sis.uta.fi/~hh56766/pubs/awoca04.pdf // /** * Score of "search a in b" using precomputed alphabet map * Main algorithm for single query token to score * * @param {string} a * @param {string} b * @param {Object} aMap - See FuzzySearch.alphabet * @param {FuzzySearchOptions} options */ FuzzySearch.score_map = function (a, b, aMap, options) { var j, lcs_len; var m = a.length; var n = b.length; var bonus_prefix = options.bonus_match_start; var k = m < n ? m : n; if (k === 0) return 0; //normalize score against length of both inputs var sz_score = (m + n) / ( 2.0 * m * n); //common prefix is part of lcs var prefix = 0; if (a === b) prefix = k; //speedup equality else { while ((a[prefix] === b[prefix]) && (++prefix < k)) { } } //shortest string consumed if (prefix === k) { lcs_len = prefix; return sz_score * lcs_len * lcs_len + bonus_prefix * prefix; } //alternative algorithm for large string //need to keep this condition in sync with bitvector if (m > INT_SIZE) { lcs_len = FuzzySearch.llcs_large(a, b, aMap, prefix); return sz_score * lcs_len * lcs_len + bonus_prefix * prefix; } var mask = ( 1 << m ) - 1; var S = mask, U, c; j = prefix - 1; while (++j < n) { c = b[j]; if (c in aMap) { // Hyyrö, 2004 S=V'=~V U = S & aMap[c]; S = (S + U) | (S - U); } } // Remove match already accounted in prefix region. mask &= ~( ( 1 << prefix ) - 1 ); // lcs_len is number of 0 in S (at position lower than m) // inverse S, mask it, then do "popcount" operation on 32bit S = ~S & mask; S = S - ((S >> 1) & 0x55555555); S = (S & 0x33333333) + ((S >> 2) & 0x33333333); lcs_len = (((S + (S >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; lcs_len += prefix; return sz_score * lcs_len * lcs_len + bonus_prefix * prefix; }; /** * Call score_map on the first token. * Filter size * * @param {PackInfo} packinfo * @param {string} token * @param {FuzzySearchOptions} options * @return {Array.} score */ FuzzySearch.score_single = function (packinfo, token, options) { var field_tok = packinfo.tokens[0]; var m = field_tok.length; var n = token.length; if (n < options.token_min_rel_size * m || n > options.token_max_rel_size * m) return [0]; return [FuzzySearch.score_map(field_tok, token, packinfo.map, options)]; }; /** * Score multiple query token against a single field token. * Apply above score function in parallel * Computation is done as if everything was one big token, * but ZM bit-vector modify boundary so score are independant * * @param {PackInfo} packinfo * @param {string} field_token * @param {FuzzySearchOptions} options * @returns {Array.} scores */ FuzzySearch.score_pack = function (packinfo, field_token, options) { var packed_tokens = packinfo.tokens; var nb_packed = packed_tokens.length; //single item token can contain either a single word "overflow" or a large word that need special handling if (nb_packed == 1)return FuzzySearch.score_single(packinfo, field_token, options); var S = 0xFFFFFFFF, U, c; var ZM = packinfo.gate | 0; var aMap = packinfo.map; for (var j = -1, n = field_token.length; ++j < n;) { c = field_token[j]; if (c in aMap) { U = S & aMap[c]; S = ( (S & ZM) + (U & ZM) ) | (S - U); } } S = ~S; var bonus_prefix = options.bonus_match_start; var min_rs = options.token_min_rel_size; var max_rs = options.token_max_rel_size; var scores = new Array(nb_packed); var offset = 0; for (var k = -1; ++k < nb_packed;) { var query_tok = packed_tokens[k]; var m = query_tok.length; var lcs_len, prefix; if (n < min_rs * m || n > max_rs * m) { scores[k] = 0; offset += m; continue; } if (query_tok === field_token) prefix = lcs_len = m; else { var p = (m < n) ? m : n; prefix = 0; while ((query_tok[prefix] === field_token[prefix]) && (++prefix < p)) { } lcs_len = prefix; var Sm = ( (S >>> offset) & ( (1 << m) - 1 ) ) >>> prefix; while (Sm) { Sm &= Sm - 1; lcs_len++ } } offset += m; var sz = (m + n) / ( 2.0 * m * n); scores[k] = sz * lcs_len * lcs_len + bonus_prefix * prefix; } return scores; }; // // Compute LLCS, using vectors of position. // // Based on: // An input sensitive online algorithm for LCS computation // Heikki Hyyro 2009 // // We fill the dynamic programing table line per line // but instead of storing the whole line we only store position where the line increase // ( bitvector algorithm store increase yes/no as a bit) this time we will store sequence // // s u r g e r y // g [0,0,0,1,1,1,1] : [3,4] (Add level 1) // s [1,1,1,1,1,1,1] : [0,1] (Make level 1 happens sooner) // u [1,2,2,2,2,2,2] : [0,2] (Add level 2, append to block of consecutive increase) // r [1,2,3,3,3,3,3] : [0,3] (Add level 3, append to block of consecutive increase) // v [1,2,3,3,3,3,3] : [0,3] (v not in surgery, copy) // e [1,2,3,3,4,4,4] : [0,3],[4,5] (Add level 4, create new block for it) // y [1,2,3,3,4,4,5] : [0,3],[4,5],[6,7] (Add level 5, create new block for it) // // There is 2 Basic operations: // - Make a level-up happens sooner // - Add an extra level up at the end. (this is where llcs increase !) // // 12345678901234567890 // Position (for this demo we start at 1) // ii------iii---i--i-- // Increase point of previous line // 12222222345555666777 // Score previous line [1,3] [9,12] [15,16] [18,19] // ---m-m---------m---m // Match of this line // 12233333345555677778 // Score of this line [1,3] [4,5] [10,12] [15,17] [20,21] // ii-i-----ii---ii---i // New increase point // 12345678901234567890 // Position FuzzySearch.llcs_large = function (a, b, aMap, prefix) { //var aMap = FuzzySearch.posVector(a); //Position of next interest point. Interest point are either // - Increase in previous line // - Match on this line var block_start, match_pos; // We encode increase sequence as [start_pos, end_pos+1] // So end-start = length // To avoid dealing with to many edge case we place // a special token at start & end of list var last_line, line_index, last_end, block_end; if (prefix === undefined) prefix = 0; if (prefix) last_line = [new Block(0, prefix), new Block(Infinity, Infinity)]; else last_line = [new Block(Infinity, Infinity)]; var lcs_len = prefix; var match_list, match_index; var block, block_index, block_size; //First line var nb_blocks = last_line.length; var n = b.length, j; for (j = prefix; j < n; j++) { //Each line we process a single character of b var c = b[j]; if (!(c in aMap)) continue; match_list = aMap[c]; //New line // the number of if block can only increase up to llcs+1+sentinel // alternatively each block having >1 item can split. (+1 at end accounted by splitting sentinel) /** @type Array. */ var current_line = new Array(Math.min(2 * nb_blocks, lcs_len + 2)); line_index = -1; //First match match_index = 0; match_pos = match_list[0]; //Place end of first block before the string block_end = -1; block_index = -1; while (++block_index < nb_blocks) { //Place cursor just after last block last_end = block_end; //Read end block block = last_line[block_index]; block_start = block.start; //Encode block as [s,e[ block_end = block.end; //End is position of char that follow last. block_size = block_end - block_start; //Size of block, for sentinel (Inf-Inf=NaN) //get next match from list of matches while (match_pos < last_end) { match_pos = match_list[++match_index]; } // This cover two case // a) no match between two block // b) block happens after last match (so match_pos=Infinity). // At the last block, this will append closing "sentinel" to line if (block_start <= match_pos) { current_line[++line_index] = block; continue; } // // If we have reached here, we have a dominant match ! // Decide where to register the match ... // if (match_pos === last_end) { //End of last block ? (step a.ii) current_line[line_index].end++; } else { //Increase need it's own block ( step a.i) //try to reuse block that will get deleted. if (block_size === 1) { //Can we reuse next block ? block.start = match_pos; block.end = match_pos + 1; current_line[++line_index] = block; } else { //start a new block current_line[++line_index] = new Block(match_pos, match_pos + 1); } } // if not empty, append next block to current line (step a.iii) // (this condition reject "sentinel", it'll get added just after the for loop) if (block_size > 1) { block.start++; // Move start by one current_line[++line_index] = block; } } // If the line finish with a match: // a) llcs at end of this line is one greater than last line, increase score // b) we still need to append sentinel if (block_start > match_pos) { current_line[++line_index] = block; lcs_len++ } //Current become last last_line = current_line; //Count actual number of block because we allocate a bit more. nb_blocks = ++line_index; } return lcs_len; }; /** * A block with start and end position * Used to record consecutive increase position in llcs_large * @param start * @param end * @constructor */ function Block(start, end) { this.start = start; this.end = end; } // // Reference implementation to debug // Might need to swap input to match internal of a given algorithm // /* function lcs(a, b) { var m = a.length; var n = b.length; var i, j; //init m by n array with 0 var C = [], row = [], lcs = []; for (j = 0; j < n; j++) row[j] = 0; for (i = 0; i < m; i++) C[i] = row.slice(); //fill first row and col C[0][0] = (a[0] === b[0]) ? 1 : 0; for (i = 1; i < m; i++) C[i][0] = (a[i] === b[0] || C[i - 1][0]) ? 1 : 0 for (j = 1; j < n; j++) C[0][j] = (a[0] === b[j] || C[0][j - 1]) ? 1 : 0 console.log(JSON.stringify(C[0])); //bulk for (i = 1; i < m; i++) { for (j = 1; j < n; j++) { C[i][j] = (a[i] === b[j]) ? C[i - 1][j - 1] + 1 : Math.max(C[i][j - 1], C[i - 1][j]); } console.log(JSON.stringify(C[i])); } //backtrack i--; j--; while (i > -1 && j > -1) { if (i && C[i][j] == C[i - 1][j]) i--; else if (j && C[i][j] == C[i][j - 1]) j--; else { lcs.push(a[i]); j--; i--; } } return lcs.reverse().join(''); }*/ // main entry of the algorithm (once settings are set) // loop over everything and merge best scores 'use strict'; extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ { /** * Perform a search on the already indexed source. * * @param {string} query_string * @returns {Array} */ search: function (query_string) { var time_start = Date.now(); this.start_time = time_start; var options = this.options; // As long as lazy is set to false, we guarantee that making a search is read only. if (this.dirty && options.lazy) { this._buildIndexFromSource(); this.dirty = false; } var query = this.query = this._prepQuery(query_string); var source = this.index; var results = []; if (options.use_index_store) { source = this._storeSearch(query, source); } if (options.filter) { source = options.filter.call(this, source); } // ---- MAIN SEARCH LOOP ---- // var thresh_include = this._searchIndex(query, source, results); //keep only results that are good enough compared to best results = FuzzySearch.filterGTE(results, "score", thresh_include); // sort by decreasing order of score // equal rounded score: alphabetical order if (typeof options.sorter === "function") results = results.sort(options.sorter); if (options.output_map || options.output_limit > 0) { if (typeof options.output_map === "function") results = FuzzySearch.map(results, options.output_map, this, options.output_limit); else results = FuzzySearch.mapField(results, options.output_map, options.output_limit); } var time_end = Date.now(); this.search_time = time_end - time_start; this.results = results; return results }, /** * Main search loop for a specified source * This separation allow to search a different source, or a subset of source * * @param {Query} query * @param {Array.} source * @param {Array.} results * @returns {number} - thresh_include after this run. * * @private */ _searchIndex: function (query, source, results) { var options = this.options; var opt_bpd = options.bonus_position_decay; var opt_fge = options.field_good_enough; var opt_trb = options.thresh_relative_to_best; var opt_score_tok = options.score_per_token; var opt_round = options.score_round; var thresh_include = options.thresh_include; var best_item_score = 0; var sub_query = query.children; for (var item_index = -1, nb_items = source.length; ++item_index < nb_items;) { //get indexed fields var item = source[item_index]; var item_fields = item.fields; //reset score query.resetItem(); var item_score = 0; var matched_field_index = -1; var matched_node_index = -1; var position_bonus = 1.0; // //Foreach field // for (var field_index = -1, nb_fields = item_fields.length; ++field_index < nb_fields;) { var field_score = 0; var field_node = -1; var field = item_fields[field_index]; var child_query = sub_query[field_index]; //tag search var tagged = !!child_query; for (var node_index = -1, nb_nodes = field.length; ++node_index < nb_nodes;) { var node_score, node = field[node_index]; if (opt_score_tok) { node_score = this._scoreField(node, query); if (tagged) node_score += this._scoreField(node, child_query);//tag search } else node_score = FuzzySearch.score_map(query.fused_str, node.join(" "), query.fused_map, options); if (node_score > field_score) { field_score = node_score; field_node = node_index; } } field_score *= (1.0 + position_bonus); position_bonus *= opt_bpd; if (field_score > item_score) { item_score = field_score; matched_field_index = field_index; matched_node_index = field_node; if (field_score > opt_fge) break; } } // // Different query token match different fields ? // if (opt_score_tok) { var query_score = query.scoreItem(); item_score = 0.5 * item_score + 0.5 * query_score; } // // Keep track of best result, this control inclusion in the list // if (item_score > best_item_score) { best_item_score = item_score; var tmp = item_score * opt_trb; if (tmp > thresh_include) thresh_include = tmp; } // //candidate for best result ? push to list // if (item_score > thresh_include) { item_score = Math.round(item_score / opt_round) * opt_round; results.push(new SearchResult( item.item, item_fields, item_score, matched_field_index, matched_node_index, item_fields[0][0].join(" ") )); } } return thresh_include }, /** * Internal loop that is run for each field in an item * * @param {Array} field_tokens * @param {Query} query * @returns {number} * @private */ _scoreField: function (field_tokens, query) { var groups = query.tokens_groups; var nb_groups = groups.length; var nb_tokens = field_tokens.length; if (!nb_groups || !nb_tokens) return 0; var field_score = 0, sc, bf; var last_index = -1; var options = this.options; var bonus_order = options.bonus_token_order; var minimum_match = options.minimum_match; var token, scores, i; for (var group_index = -1; ++group_index < nb_groups;) { var group_info = groups[group_index]; var nb_scores = group_info.tokens.length; // Each packinfo have their own reusable scratch pad // to store best score information, reset them to 0 var best_of_field = group_info.score_field; for (i = -1; ++i < nb_scores;) best_of_field[i] = 0 var best_index = group_info.field_pos; for (i = -1; ++i < nb_scores;) best_index[i] = 0 for (var field_tk_index = -1; ++field_tk_index < nb_tokens;) { token = field_tokens[field_tk_index]; scores = FuzzySearch.score_pack(group_info, token, options); for (i = -1; ++i < nb_scores;) { sc = scores[i]; bf = best_of_field[i]; //Score is an improvement OR //Score is within a token order bonus from being better, but word are swapped if (sc > bf || ( bf - sc < bonus_order && i > 0 && best_index[i] <= best_index[i - 1] )) { best_of_field[i] = sc; best_index[i] = field_tk_index; } } } var best_match_this_item = group_info.score_item; for (i = -1; ++i < nb_scores;) { sc = best_of_field[i]; field_score += sc; // Give bonus for pair in consecutive order // Only consider positive match for bonus if (sc > minimum_match) { var this_index = best_index[i]; //Bonus is diluted by the distance between words. //Positive match, but out of order get half the bonus. var d = this_index - last_index; var bo = bonus_order * ( 1.0 / (1.0 + Math.abs(d))); if (d > 0) bo *= 2; field_score += bo; sc += bo; last_index = this_index; } if (sc > best_match_this_item[i]) best_match_this_item[i] = sc; } } if (options.score_test_fused) { // field_tokens.join(" "), remove last one if acronym // performance of array.join(" ") and str concat look similar on modern browser. var n = (options.score_acronym) ? nb_tokens - 1 : nb_tokens; var fused_field = field_tokens[0], fi = 0; while (++fi < n) fused_field += " " + field_tokens[fi]; // test "space bar is broken" no token match var fused_score = FuzzySearch.score_map(query.fused_str, fused_field, query.fused_map, options); fused_score += bonus_order; //fused cannot be out of order field_score = fused_score > field_score ? fused_score : field_score; if (fused_score > query.fused_score) { query.fused_score = fused_score; } } return field_score; } }); extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ { /** * Take a `source_item` (unprocessed item from source) and keys and produce * an `item` that's ready to be added to `this.index`. * * Preparation steps: * - Apply lowercase, accent removal * - Split field into token * - Remove small token eg "a" "of" and prefix large token */ _prepItem: function (source_item, keys) { var item_fields = FuzzySearch.generateFields(source_item, keys); var nb_fields = item_fields.length; for (var field_index = -1; ++field_index < nb_fields;) { var field = item_fields[field_index]; for (var node_index = -1, nb_nodes = field.length; ++node_index < nb_nodes;) { var norm = this.options.normalize(field[node_index]); var nodes = norm.split(this.token_re); //Filter size. (If total field length is very small, make an exception. // Eg some movie/Book have a single letter title, filter risk of removing everything ) if (norm.length > 2 * this.options.token_field_min_length) nodes = FuzzySearch.filterSize(nodes, this.options.token_field_min_length, this.options.token_field_max_length); if (this.options.score_acronym) nodes.push(norm.replace(this.acro_re, "$1")); field[node_index] = nodes; } } return new Indexed(source_item, item_fields); }, /** * Add an item to search index AND source collection. * It'll use identify_item to find if the item already exist. * If identify_item is null (default), calling this method is append-only with no duplicate detection * * To update the source, it use the assumption that this.source and this.index can be synced * by array index. That assumption will be true if source is a plain array, and always updated by this library. * Feel free to set `should_update_source` to false to manually manage source collection. * * Keeping source in sync is important to allow to recompute index from source. * This will happens with certain setting changes. * * @param {*} source_item - item to add to search index * @param {boolean=} should_update_source - set to false to skip updating the source. */ add: function(source_item, should_update_source){ // Default to keeping source in sync. if(should_update_source === undefined) should_update_source = true; var item_id = typeof this.options.identify_item === "function" ? this.options.identify_item(source_item) : null; // Find where to insert new item var idx; if (item_id === null) { // No identifier, append to end idx = this.nb_indexed; this.nb_indexed++; } else if (item_id in this.index_map) { // Item exist, update idx = this.index_map[item_id]; } else { // New identifier, append to end & record new this.index_map[item_id] = this.nb_indexed; idx = this.nb_indexed; this.nb_indexed++; } // Compute indexed item and update index var prepared = this._prepItem(source_item, this.keys); this.index[idx] = prepared; // Insert in source; if(should_update_source) this.source[idx] = source_item; if (this.options.use_index_store) { this._storeAdd(prepared, idx); } }, /** * Build (or rebuild) `this.index` from `this.source` * Flatten object into array using specified keys * * @private */ _buildIndexFromSource: function () { var nb_items = this.source.length; this.index = new Array(nb_items); this.index_map = {}; this.nb_indexed = 0; for (var item_index = -1; ++item_index < nb_items;) { var source_item = this.source[item_index]; // Add item to index. // Because we are iterating over source, do not attempt to modify it. this.add(source_item, false); } } }); /** * Original item with cached normalized field * * @param {*} source_item * @param {Array.} fields * @constructor */ function Indexed(source_item, fields) { this.item = source_item; this.fields = fields; } // - - - - - - - - - - - - - - - - - - - - - - // Input stage: prepare field for search //- - - - - - - - - - - - - - - - - - - - - - /** * Given an object to index and a list of field to index * Return a flat list of the values. * * @param {Object} obj * @param {Array.} fieldlist * @returns {Array} */ FuzzySearch.generateFields = function (obj, fieldlist) { if (!fieldlist || !fieldlist.length) return [[obj.toString()]]; var n = fieldlist.length; var indexed_fields = new Array(n); for (var i = -1; ++i < n;) indexed_fields[i] = _collectValues(obj, fieldlist[i].split("."), [], 0); return indexed_fields; }; /** * Traverse an object structure to collect item specified by parts. * If leaf node is an array or dictionary collect every children. * If key is wildcard '*' branch out the search process on each children. * * @param {*} obj - root to process * @param {Array.} parts - array of subkey to direct object traversal "those.that.this"->["those","that","this"] * @param {Array} list - where to put collected items * @param {number} level - index of current position on parts list * @returns {Array} - return list * @private */ function _collectValues(obj, parts, list, level) { var key, i, olen; var nb_level = parts.length; while (level < nb_level) { key = parts[level++]; if (key === "*" || key === "") break; if (!(key in obj)) return list; obj = obj[key]; } var type = Object.prototype.toString.call(obj); var isArray = ( type === '[object Array]' ); var isObject = ( type === '[object Object]' ); if (level === nb_level) { if (isArray) for (i = -1, olen = obj.length; ++i < olen;) list.push(obj[i].toString()); else if (isObject) { for (key in obj) { if (obj.hasOwnProperty(key)) list.push(obj[key].toString()); } } else list.push(obj.toString()); } else if (key === "*") { if (isArray) for (i = -1, olen = obj.length; ++i < olen;) { _collectValues(obj[i], parts, list, level); } else if (isObject) for (key in obj) { if (obj.hasOwnProperty(key)) _collectValues(obj[key], parts, list, level); } } return list; } 'use strict'; extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ { /** * * @param {Indexed} preparedItem * @param {int} idx */ _storeAdd: function (preparedItem, idx) { var keyList = keysFromIndexedItem(preparedItem); if (keyList.length == 0) return; // register idx on all appropriate key for (var i = 0; i < keyList.length; i++) { var key = keyList[i]; if (key in this.store) { // append to existing array of index this.store[key].push(idx); } else { // Format is dict key => array of item index this.store[key] = [idx]; } } }, /** * * @param {Query} preparedQuery * @param {Array.} source */ _storeSearch: function (preparedQuery, source) { // Scan query for index keys. var keyList = keysFromQuery(preparedQuery); if (keyList.length == 0) return []; // return filtered source var idAndCount = retrieveCount(keyList, this.store); if (idAndCount.length == 0) return []; // Get minimum quality and remap to original items. var tresh = idAndCount[0].count * this.options.store_thresh; idAndCount = FuzzySearch.filterGTE(idAndCount, "count", tresh); return FuzzySearch.map(idAndCount, function (x) { return source[x.id] }, this, this.options.store_max_results); } }); /** * * @param {Indexed} preparedItem */ function keysFromIndexedItem(preparedItem) { // Process the nested structure of a prepared item in order to extract index keys. var keyList = []; var keyDict = {}; // item -> fields -> nodes -> word_tokens var fields = preparedItem.fields; for (var i = 0; i < fields.length; i++) { var nodes = fields[i]; for (var j = 0; j < nodes.length; j++) { var words = nodes[j]; for (var k = 0; k < words.length; k++) { keysFromWord(words[k], keyList, keyDict) } } } return keyList; } /** * * @param {Query} query */ function keysFromQuery(query) { var keyList = []; var keyDict = {}; var i, j; var words = query.words; for (i = 0; i < words.length; i++) { keysFromWord(words[i], keyList, keyDict) } var children = query.children; for (i = 0; i < children.length; i++) { words = children[i].words; for (j = 0; j < words; j++) { keysFromWord(words[j], keyList, keyDict) } } return keyList; } function keysFromWord(word, keysList, existingDict) { var len = word.length; if (len == 0) return; if (len >= 3) { // 3o6, 3o5, 3o4, 3o3 select3(word, 6, keysList, existingDict) } if (len >= 2) { // 2o4, 2o3,2o2 select2(word, 4, keysList, existingDict) } // 1o1 strategy: This index by first letter union(word[0], keysList, existingDict); } function select2(str, maxlen, existingList, existingDict) { var len = Math.min(str.length, maxlen); for (var i = 0; i < len - 1; i++) { for (var j = i + 1; j < len; j++) { union(str[i] + str[j], existingList, existingDict) } } return existingList; } function select3(str, maxlen, existingList, existingDict) { var len = Math.min(str.length, maxlen); for (var i = 0; i < len - 2; i++) { for (var j = i + 1; j < len - 1; j++) { for (var k = j + 1; k < len; k++) { union(str[i] + str[j] + str[k], existingList, existingDict) } } } return existingList; } function union(word, existingList, existingDict) { if (!(word in existingDict)) { existingDict[word] = true; existingList.push(word); } } function retrieveCount(keys, store) { // Dictionary idx => count var countPerIndex = {}; if (keys.length == 0) return []; for (var i = 0; i < keys.length; i++) { var key = keys[i]; // Does the key exist in the index ? if (key in store) { // If so add every entry of that key into countPerIndex // Also for each entry, maintain a count of matched keys. var idxList = store[key]; for (var j = 0; j < idxList.length; j++) { var idx = idxList[j]; if (idx in countPerIndex) { countPerIndex[idx]++; } else { countPerIndex[idx] = 1; } } } } // Transform countPerIndex into a sorted list of IdAndCount var outList = []; for (var id in countPerIndex) { if (countPerIndex.hasOwnProperty(id)) { outList.push(new IdAndCount(id, countPerIndex[id])); } } // We can probably filterGte here. // Custom sort decreasing order outList = outList.sort(function (a, b) { return b.count - a.count }); return outList; } function IdAndCount(id, count) { this.id = id; this.count = count; } // // Shared string and array of string functions // 'use strict'; /** * Take a string into a normal form. Allow to compare in a case insensitive way. * Also allow to match accents with their base form "é" vs "e" * Finally standardize token separator to be a single space. * * @param {string} str * @returns {string} - normalized str */ function normalize(str) { if (!str)return ""; return str.toLowerCase().replace(/[^\u0000-\u007E]/g, function (a) { return diacriticsMap[a] || a; }); } function getDiacriticsMap() { // replace most common accents in french-spanish by their base letter //"ãàáäâæẽèéëêìíïîõòóöôœùúüûñç" var from = "\xE3\xE0\xE1\xE4\xE2\xE6\u1EBD\xE8\xE9\xEB\xEA\xEC\xED\xEF\xEE\xF5\xF2\xF3\xF6\xF4\u0153\xF9\xFA\xFC\xFB\xF1\xE7"; var to = "aaaaaaeeeeeiiiioooooouuuunc"; var diacriticsMap = {}; for (var i = 0; i < from.length; i++) { diacriticsMap[from[i]] = to[i] } return diacriticsMap; } var diacriticsMap = getDiacriticsMap(); /** * Process an array of string, filter out item smaller than min, trim item larger than max. * * @param {Array.} array - array of string * @param minSize - filter out item smaller than this * @param maxSize - substring item larger than this * @returns {Array} */ FuzzySearch.filterSize = function (array, minSize, maxSize) { var i = -1, j = -1; var n = array.length; var out = []; var str, slen; while (++i < n) { str = array[i]; slen = str.length; if (slen >= minSize) out[++j] = (slen < maxSize) ? str : str.substr(0, maxSize) } return out; }; // // Extend base option to support highlight // 'use strict'; extend(FuzzySearch.defaultOptions, /** @lends {FuzzySearchOptions.prototype} */{ highlight_prefix: false, // true: force prefix as part of highlight, (false: minimum gap, slower) highlight_bridge_gap: 2, // display small gap as substitution, set to size of gap, 0 to disable highlight_before: '', //tag to put before/after the highlight highlight_after: '' }); /** * Highlight a string using query stored in a FuzzySearch object. * @param {string} str * @param {string=} field */ FuzzySearch.prototype.highlight = function (str, field) { var i, subq; var qnorm = this.query.normalized; if (field && field.length && (i = this.tags.indexOf(field)) > -1 && (subq = this.query.children[i])) qnorm += (qnorm.length ? " " : "") + subq.normalized; return FuzzySearch.highlight(qnorm, str, this.options) }; /** * Highlight string b, from searching a in it. * * @param {string} a - string to search * @param {string} b - string to highlight * @param {FuzzySearchOptions=} options * */ FuzzySearch.highlight = function (a, b, options) { if (options === undefined) options = FuzzySearch.defaultOptions; if (!b) return ""; var open_string = options.highlight_before; var close_string = options.highlight_after; var opt_score_tok = options.score_per_token; var opt_fuse = options.score_test_fused; var opt_acro = options.score_acronym; var token_re = options.token_re; var aa = options.normalize(a); var bb = options.normalize(b); //Normalized needle var a_tokens = aa.split(token_re); //Normalized haystack var b_tokens = bb.split(token_re); //Original spelling haystack var disp_tokens = [], disp_sep = []; splitKeepSep(b, token_re, disp_tokens, disp_sep); var strArr = []; var match_list = []; var fused_score = 0, match_score = 0; if (opt_score_tok) { match_score = FuzzySearch.matchTokens(b_tokens, a_tokens, match_list, options, false); } //Test "space bar is broken" no token match if (opt_fuse || !opt_score_tok || opt_acro) fused_score = FuzzySearch.score_map(aa, bb, FuzzySearch.alphabet(aa), options) + options.bonus_token_order; if (match_score === 0 && fused_score === 0) return b; //shortcut no match if (!opt_score_tok || fused_score > match_score) { a_tokens = [aa]; //everything in a single token b_tokens = [bb]; disp_tokens = [b]; match_list = [0]; } var nbtok = disp_tokens.length, j = -1; while (++j < nbtok) { var i = match_list[j]; if (i === -1) { strArr.push(disp_tokens[j] + disp_sep[j]); continue; } var ta = a_tokens[i]; var tb = b_tokens[j]; var td = disp_tokens[j]; var curr = 0; var start_positions = []; var end_positions = []; FuzzySearch.align(ta, tb, start_positions, end_positions); var len = start_positions.length; var k = -1; while (++k < len) { var s = start_positions[k]; var e = end_positions[k]; if (s > curr) strArr.push(td.substring(curr, s)); strArr.push(open_string + td.substring(s, e) + close_string); curr = e; } strArr.push(td.substring(curr) + disp_sep[j]); } return strArr.join(''); }; function splitKeepSep(str, pattern, tokens, seps) { var tok_index = tokens.length; var match = pattern.exec(str); if (match === null) { tokens[tok_index] = str; seps[tok_index] = ""; return; } var start = 0, end, len; while (match !== null) { end = match.index; len = match[0].length; tokens[tok_index] = str.substring(start, end); seps[tok_index] = str.substr(end, len); start = end + len; tok_index++; match = pattern.exec(str); } tokens[tok_index] = str.substring(start); seps[tok_index] = ""; } // // Smith-Waterman-Gotoh local Alignment // // Smith&Waterman worked the idea of local alignment // While Gotoh 82 worked on affine gap penalty. // // This is the basic algorithm with some optimisation to use less space. // JAligner has been used as a reference implementation to debug. // Some of their implementation detail to save memory has been reused here. // // See pseudo-code on // http://jaligner.sourceforge.net/api/jaligner/SmithWatermanGotoh.html // // /** * Smith-Waterman-Gotoh local Alignment * Build sequences of matches, called send array (seq_start,seq_end) to store them * Return match score * * @param {string} a - string to search * @param {string} b - string to be searched * @param {Array.} seq_start - store for match start * @param {Array.} seq_end - store for match end * @param {FuzzySearchOptions=} options * @returns {number} */ FuzzySearch.align = function (a, b, seq_start, seq_end, options) { if (options === undefined) options = FuzzySearch.defaultOptions; var wm = 100; // score of making a match var wo = -10; // score to open a gap var we = -1; // score to continue an open gap //Traceback directions constants var STOP = 0; var UP = 1; var LEFT = 2; var DIAGONAL = 3; var score_acronym = options.score_acronym; var sep_tokens = options.token_sep; var m = Math.min(a.length + 1, options.token_query_max_length); var n = Math.min(b.length + 1, options.token_field_max_length); // Comon prefix is part of lcs, // but not necessarily part of best alignment (it can introduce an extra gap) // however prefix make sens in an autocomplete scenario and speed things up // var i, j; var k = m < n ? m : n; var prefix_len = 0; if (a === b) { //speedup equality prefix_len = m; m = 0; } else if (options.highlight_prefix) { for (i = 0; i < k && (a[i] === b[i]); i++) prefix_len++; if (prefix_len) { a = a.substring(prefix_len); b = b.substring(prefix_len); m -= prefix_len; n -= prefix_len; } } var vmax = 0, imax = 0, jmax = 0; var trace = new Array(m * n); var pos = n - 1; //m,n = length+1 if (m > 1 && n > 1) { var vrow = new Array(n), vd, v, align; var gapArow = new Array(n), gapA, gapB = 0; for (j = 0; j < n; j++) { gapArow[j] = 0; vrow[j] = 0; trace[j] = STOP; } //DEBUG //var DEBUG_V = []; //var DEBUG_TR = []; for (i = 1; i < m; i++) { gapB = 0; vd = vrow[0]; pos++; trace[pos] = STOP; //DEBUG //DEBUG_V[i] = []; //DEBUG_TR[i] = []; for (j = 1; j < n; j++) { // // Reference "pseudocode" // We try to fill that table, but using o(n) instead o(m*n) memory // If we need traceback we still need o(m*n) but we store a single table instead of 3 // // F[i][j] = f = Math.max(F[i - 1][j] + we, V[i - 1][j] + wo ); // E[i][j] = e = Math.max(E[i][j - 1] + we, V[i][j - 1] + wo ); // align = (a[i - 1] === b[j - 1]) ? V[i - 1][j - 1] + wm : -Infinity; // V[i][j] = v = Math.max(e, f, align, 0); // // Score the options gapA = gapArow[j] = Math.max(gapArow[j] + we, vrow[j] + wo); //f gapB = Math.max(gapB + we, vrow[j - 1] + wo); //e if (score_acronym) align = ( a[i - 1] !== b[j - 1] ) ? -Infinity : ( vd + wm + ( ( i < 2 || sep_tokens.indexOf(a[i - 2]) > -1 ) ? wm : 0) + ( ( j < 2 || sep_tokens.indexOf(b[j - 2]) > -1 ) ? wm : 0) ); else align = ( a[i - 1] === b[j - 1] ) ? vd + wm : -Infinity; vd = vrow[j]; v = vrow[j] = Math.max(align, gapA, gapB, 0); //DEBUG //DEBUG_V[i][j] = v; // Determine the trace back direction pos++; //pos = i * n + j; switch (v) { // what triggered the best score ? //In case of equality, taking gapB get us closer to the start of the string. case gapB: trace[pos] = LEFT; break; case align: trace[pos] = DIAGONAL; if (v > vmax) { vmax = v; imax = i; jmax = j; } break; case gapA: trace[pos] = UP; break; default: trace[pos] = STOP; break; } //DEBUG //DEBUG_TR[i][j] = trace[pos]; } } } //DEBUG //console.table(DEBUG_V); //console.table(DEBUG_TR); // - - - - - - - - - // TRACEBACK // - - - - - - - - - var bridge = options.highlight_bridge_gap; var last_match = 0; if (vmax > 0) { // backtrack to aligned sequence // record start and end of substrings // vmax happens at the end of last substring i = imax; j = jmax; pos = i * n + j; last_match = jmax; seq_end.push(jmax + prefix_len); var backtrack = true; while (backtrack) { switch (trace[pos]) { case UP: i--; pos -= n; break; case LEFT: j--; pos--; break; case DIAGONAL: // if we have traversed a gap // record start/end of sequence // (unless we want to bridge the gap) if (last_match - j > bridge) { seq_start.push(last_match + prefix_len); seq_end.push(j + prefix_len); } j--; i--; last_match = j; pos -= n + 1; break; case STOP: default : backtrack = false; } } //first matched char seq_start.push(last_match + prefix_len); } if (prefix_len) { if (last_match > 0 && last_match <= bridge) { //bridge last match to prefix ? seq_start[seq_start.length - 1] = 0 } else { //add prefix to matches seq_start.push(0); seq_end.push(prefix_len); } } //array were build backward, reverse to sort seq_start.reverse(); seq_end.reverse(); return vmax + prefix_len; }; // // Each query token is matched against a field token // or against nothing (not in field) // // a: [paint] [my] [wall] // b: [wall] [painting] // // match: [1, -1, 0] // // if a[i] match b[j] // then match[i] = j // // if a[i] match nothing // then match[i] = -1 // // return match score // take vector match by reference to output match detail // // Ideal case: // each token of "a" is matched against it's highest score(a[i],b[j]) // // But in case two token have the same best match // We have to check for another pairing, giving highest score // under constraint of 1:1 exclusive match // // To do that we check all possible pairing permutation, // but we restrict ourselves to a set of plausible pairing. // // That is a token a will only consider pairing with a score at least // thresh_relative_to_best * [highest score] // /** * Match token of A again token of B, under constraint that tokens can be matched at most once. * * @param {Array.} a_tokens * @param {Array.} b_tokens * @param {Array.} match - array to store results * @param {FuzzySearchOptions=} options * @param {boolean=} flip - if true score A against B, but return index of B against A. * @returns {number} Score of the best match combination. */ FuzzySearch.matchTokens = function (a_tokens, b_tokens, match, options, flip) { if (options === undefined) options = FuzzySearch.defaultOptions; if (flip === undefined) flip = false; var minimum_match = options.minimum_match; var best_thresh = options.thresh_relative_to_best; var i, j, row; var C = []; var m = a_tokens.length; var n = b_tokens.length; var a_maps = FuzzySearch.mapAlphabet(a_tokens); var a_tok, b_tok, a_mp; var rowmax = minimum_match, imax = -1, jmax = -1, v; var match_count = 0; var thresholds = []; for (i = 0; i < m; i++) { row = []; match[i] = -1; rowmax = minimum_match; a_tok = a_tokens[i]; if (!a_tok.length) { //skip score loop but still fill array for (j = 0; j < n; j++) row[j] = 0; C[i] = row; continue; } a_mp = a_maps[i]; for (j = 0; j < n; j++) { b_tok = b_tokens[j]; if (!b_tok.length) { row[j] = 0; continue; } v = FuzzySearch.score_map(a_tok, b_tok, a_mp, options); row[j] = v; if (v > minimum_match) match_count++; if (v > rowmax) { rowmax = v; imax = i; jmax = j; } } thresholds[i] = rowmax; C[i] = row; } //Shortcut: no match if (match_count === 0) return 0; //Shortcut: single possible pairing if (match_count === 1) { match[imax] = jmax; if (flip) _flipmatch(match, n); return rowmax } //Only consider matching close enough to best match for (i = 0; i < a_tokens.length; i++) { thresholds[i] = Math.max(best_thresh * thresholds[i], minimum_match); } var score = _matchScoreGrid(C, match, thresholds, options.bonus_token_order); //Flip back the problem if necessary if (flip) _flipmatch(match, n); return score; }; /** * Perform the match as FuzzySearch.matchTokens * but token against token score is already computed as C * * This is mostly a preparation phase for _buildScoreTree as well * as a post processing traversal to recover the match. * * @param {Array.>} C - precomputed score * @param {Array.} match - store the position of best matches * @param {Array.} thresholds - Information about the minimum score each token is willing to match * @param {number} order_bonus * @returns {number} - best score * @private */ function _matchScoreGrid(C, match, thresholds, order_bonus) { var i_len = C.length; var i, j; //Traverse score grid to find best permutation var score_tree = []; for (i = 0; i < i_len; i++) { score_tree[i] = {}; } var opt = new TreeOptions(C, score_tree, thresholds, order_bonus); var score = _buildScoreTree(opt, 0, 0).score; var used = 0, item; for (i = 0; i < i_len; i++) { item = score_tree[i][used]; if (!item) break; match[i] = j = item.index; if (j > -1) used |= (1 << j); } return score } // // Cache tree: // // Given 5 node: 1,2,3,4,5 // // What is the best match ... // - knowing that we have passed tru 1->2->3 // - knowing that we have passed tru 2->3->1 // - knowing that we have passed tru 3->1->2 // // All those question have the same answer // because they are equivalent to match {4,5} against {4,5} // ( in an alternate pass we can match {1,3} against {4,5} for example ) // // We store match in j in a bit vector of size 32 // // In addition of saving computation, the cache_tree data structure is used to // trace back the best permutation ! // // In addition of quick testing if an item is already used, used_mask serve // as a key in cache_tree (in addition to level). Ideal key would be a list of available trial // but, used & available are complementary vector (~not operation) so used is a perfectly valid key too... /** * Branch out to try each permutation of items of A against item of B. * - Only try branched not already used. * - Prune branch below token threshold. * - Build a tree to cache sub-problem for which we already have a solution * * @param {TreeOptions} tree_opt * @param {number} used_mask * @param {number} depth * @returns {MatchTrial} best_trial * @private */ function _buildScoreTree(tree_opt, used_mask, depth) { var C = tree_opt.score_grid; var cache_tree = tree_opt.cache_tree; var score_thresholds = tree_opt.score_thresholds; var order_bonus = tree_opt.order_bonus; var ilen = C.length; var jlen = C[depth].length; if (jlen > INT_SIZE) jlen = INT_SIZE; var j, score; var include_thresh = score_thresholds[depth]; var best_score = 0, best_index = -1; var has_child = (depth < ilen - 1); var child_tree = cache_tree[depth + 1], child_key; for (j = 0; j < jlen; j++) { var bit = 1 << j; //if token previously used, skip if (used_mask & bit) continue; //score for this match score = C[depth][j]; //too small of a match, skip if (score < include_thresh) continue; //score for child match //if we already have computed this sub-block get from cache if (has_child) { child_key = used_mask | bit; /** @type MatchTrial */ var trial = (child_key in child_tree) ? child_tree[child_key] : _buildScoreTree(tree_opt, child_key, depth + 1); score += trial.score; if (j < trial.index) { score += order_bonus } } //Because of DFS, first loop that finish is toward the end of the query. //As a heuristic, it's good to match higher index toward the end. So we accept equality. if (score >= best_score) { best_score = score; best_index = j; } } //try the move of "do not match this token against anything" if (has_child) { child_key = used_mask; if (child_key in child_tree) score = child_tree[child_key].score; else score = _buildScoreTree(tree_opt, child_key, depth + 1).score; if (score > best_score) { best_score = score; best_index = -1; } } var best_trial = new MatchTrial(best_score, best_index); cache_tree[depth][used_mask] = best_trial; return best_trial; } /** * * @param score * @param index * @constructor */ function MatchTrial(score, index) { this.score = score; this.index = index; } /** * * @param {Array>} score_grid * @param {Array>} cache_tree * @param {Array} score_thresholds * @param {number} order_bonus * @constructor */ function TreeOptions(score_grid, cache_tree, score_thresholds, order_bonus) { this.score_grid = score_grid; this.cache_tree = cache_tree; this.score_thresholds = score_thresholds; this.order_bonus = order_bonus } /** * Let A,B be two array * Input is an array that map "index of A"->"index of B" * Output is the reverse "index of B"->"index of A" * * Array is modified in place * * @param {Array.} match - array to remap * @param {number} newlen - length of B * @private */ function _flipmatch(match, newlen) { var i, j; var ref = match.slice(); match.length = newlen; for (i = 0; i < newlen; i++) { match[i] = -1; } for (i = 0; i < ref.length; i++) { j = ref[i]; if (j > -1 && j < newlen) match[j] = i; } } // // - - - - - - - - - - - - // UI INTEGRATION // - - - - - - - - - - - - // extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ { /** * Return a Debounced version of FuzzySearch.search. * New function signature allow to specific callback for different phase of the debounce. * De-bounce is adaptative, it will allow short burst and try to learn actual computation time. * * query: term to search * immediate_cb(results) : if search was done without filtering * suppress_cb(cached_results) : debounce has supressed the search, return cache of last result * finally_cb(results): if at least 1 supression occured, make a new search when debounce end and call this. * * @returns {function({string}, function({Array}), function({Array}), function({Array}))} */ getInteractive: function () { var self = this; var options = this.options; var wait = options.interactive_debounce; var mult = options.interactive_mult; var burst = options.interactive_burst; // Debounce off if (wait === 0) { return (function (query, immediate_cb, suppress_cb, finally_cb) { return immediate_cb(self.search(query)) }) } // Debounce var clock = (window.performance && window.performance.now) ? window.performance : Date; var timeout, cache; var count = 0, suppressed = false; return function (query, immediate_cb, suppress_cb, finally_cb) { var later = function () { timeout = null; if (suppressed) { cache = self.search(query); finally_cb(cache); } count = 0; suppressed = false; }; clearTimeout(timeout); timeout = setTimeout(later, wait); if (++count < burst) { suppressed = false; var before = clock.now(); cache = self.search(query); var ret = immediate_cb(cache); var now = clock.now(); //try to learn typical time (time mult factor); wait = 0.5 * wait + 0.5 * mult * (now - before); //console.log(wait); return ret; } else { suppressed = true; //console.log("supress"); return suppress_cb(cache); } } }, /** * Allow the FuzzySearch object to be given as a source to twitter typeahead. * This implement similar interface than Bloodhound object. * * @returns {function({string}, function({Array}) ,function({Array}) )} Interactive version of search. */ __ttAdapter: function ttAdapter() { var debounced = this.getInteractive(); var noop = function (a) { }; return function (query, sync, async) { debounced(query, sync, noop, async); } }, /** * Generate a function compatible with jQuery UI auto-complete Source * * @returns {function( {Object}, {function()} )} Interactive version of search. */ $uiSource: function () { var debounced = this.getInteractive(); var noop = function (a) { }; return function (request, response) { debounced(request.term, response, noop, response); } } }); // // Export FuzzySearch // if (typeof require === 'function' && typeof module !== 'undefined' && module.exports) { // CommonJS-like environments module.exports = FuzzySearch; } else if (typeof define === 'function' && define.amd) { // AMD. Register as an anonymous module. define(function () { return FuzzySearch; }); } else { // Browser globals globalThis['FuzzySearch'] = FuzzySearch; } return FuzzySearch; })();