Diskernet/lib/fz.js

3072 lines
87 KiB
JavaScript

/**
* @license FuzzySearch.js
* Autocomplete suggestion engine using approximate string matching
* https://github.com/jeancroy/FuzzySearch
*
* Copyright (c) 2015, Jean Christophe Roy
* Licensed under The MIT License.
* http://opensource.org/licenses/MIT
*/
(function () { 'use strict';
/**
* @param options
* @constructor
*/
'use strict';
function FuzzySearch(options) {
if (options === undefined) options = {};
if (!(this instanceof FuzzySearch)) return new FuzzySearch(options);
FuzzySearch.setOptions(this, options, FuzzySearch.defaultOptions, _privates, true, this._optionsHook)
}
FuzzySearch.defaultOptions =
/** @lends {FuzzySearchOptions.prototype} */{
//
// Scoring, include in result
//
minimum_match: 1.0, // Minimum score to consider two token are not unrelated
thresh_include: 2.0, // To be a candidate, score of item must be at least this
thresh_relative_to_best: 0.5, // and be at least this fraction of the best score
field_good_enough: 20, // If a field have this score, stop searching other fields. (field score is before item related bonus)
//
// Scoring, bonus
//
bonus_match_start: 0.5, // Additional value per character in common prefix
bonus_token_order: 2.0, // Value of two token properly ordered
bonus_position_decay: 0.7, // Exponential decay for position bonus (smaller : more importance to first item)
score_per_token: true, // if true, split query&field in token, allow to match in different order
// if false, bypass at least half the computation cost, very fast
// also disable different token that score different field, because no more token!!
score_test_fused: false, // Try one extra match where we disregard token separation.
// "oldman" match "old man"
score_acronym: false, // jrrt match against John Ronald Reuel Tolkien
token_sep: " .,-:",
//
// Output sort & transform
//
score_round: 0.1, // Two item that have the same rounded score are sorted alphabetically
output_limit: 0, // Return up to N result, 0 to disable
sorter: compareResults, // Function used to sort. See signature of Array.sort(sorter)
normalize: normalize, // Function used to transform string (lowercase, accents, etc)
filter: null, // Select elements to be searched. (done before each search)
/**@type {string|function({SearchResult})}*/
output_map: "item", // Transform the output, can be a function or a path string.
// output_map="root" return SearchResult object, needed to see the score
// output_map="root.item" return original object.
// output_map="root.item.somefield" output a field of original object.
// (root.) is optional.
//
// output_map=function(root){ return something(root.item) }
// ^this get original object and apply something() on it.
join_str: ", ", //String used to join array fields
//
// Tokens options
//
token_query_min_length: 2, // Avoid processing very small words, include greater or equal, in query
token_field_min_length: 3, // include greater or equal, in item field
token_query_max_length: 64, // Shorten large token to give more even performance.
token_field_max_length: 64, // Shorten large token to give more even performance.
token_fused_max_length: 64, // Shorten large token to give more even performance.
//Do not attempt to match token too different in size: n/m = len(field_tok)/len(query_tok)
token_min_rel_size: 0.6, // Field token should contain query token. Reject field token that are too small.
token_max_rel_size: 10, // Large field token tend to match against everything. Ensure query is long enough to be specific.
//
// Interactive - suggest as you type.
// Avoid doing search that will be discarded without being displayed
// This also help prevent lag/ temp freeze
//
interactive_debounce: 150, // This is initial value. Will try to learn actual time cost. Set to 0 to disable.
interactive_mult: 1.2, // Overhead for variability and to allow other things to happens (like redraw, highlight ).
interactive_burst: 3, // Allow short burst, prevent flicker due to debounce suppression of a callback
//
// Data
//
source: [],
keys: [],
lazy: false, // when true, any refresh happens only when a user make a search, option stay put until changed.
token_re: /\s+/g, //Separator string will be parsed to this re.
identify_item: null, // How to uniquely identify an item when adding to the index. Defaults to null, meaning no duplicate detection. Must be a method that takes a single (source) argument.
use_index_store: false, // Enable a time vs memory trade-off for faster search (but longer initial warm-up).
store_thresh: 0.7, // cutoff point relative to best, to graduate from store phase.
store_max_results: 1500 // Maximum number of result to graduate from store, to the full search quality algorithm
// Note that store only perform a crude search, ignoring some options, so the best result can be only "meh" here.
};
var _privates =
/** @lends {FuzzySearch.prototype} */{
keys: [],
tags: [], // alternative name for each key, support output alias and per key search
index: [], // source is processed using keys, then stored here
index_map: {}, // To manage update of record already in dataset
nb_indexed: 0, // To manage active count of index
store: {}, // Dictionary used for time VS memory trade off. (Optional)
tags_re: null,
acro_re: null,
token_re: null,
/**@type {FuzzySearchOptions}*/
options: null,
dirty: false, // when true, schedule a source refresh using new or existing source & keys, used once then clear itself.
//Information on last search
query: null,
results: [],
start_time: 0,
search_time: 0
};
/**
* Number of bit in a int.
* DEBUG-tip: setting this to zero will force "long string" algorithm for everything!
* @const
*/
var INT_SIZE = 32;
function FuzzySearchOptions(defaults, options) {
for (var key in defaults) {
if (defaults.hasOwnProperty(key)) { //fill self with value from either options or default
this[key] = (options.hasOwnProperty(key) && options[key] !== undefined ) ? options[key] : defaults[key];
}
}
}
FuzzySearchOptions.update = function (self, defaults, options) {
for (var key in options) {
if (options.hasOwnProperty(key) && defaults.hasOwnProperty(key)) {
//explicitly set a options to undefined => reset default, else get value
self[key] = (options[key] === undefined) ? defaults[key] : options[key];
}
}
};
/**
* Set property of object,
* Restrict properties that can be set from a list of available defaults.
*
* @param {FuzzySearch} self
* @param {Object} options
* @param {Object} defaults
* @param {Object} privates
* @param {boolean} reset
* @param {function({Object})} hook
*
*/
FuzzySearch.setOptions = function (self, options, defaults, privates, reset, hook) {
if (reset) {
extend(self, privates);
self.options = new FuzzySearchOptions(defaults, options);
} else {
FuzzySearchOptions.update(self.options, defaults, options);
}
hook.call(self, options)
};
function extend(a, b) {
for (var key in b) if (b.hasOwnProperty(key)) a[key] = b[key];
}
//
// - - - - - - - - - - - -
// SET & PARSE SETTINGS
// - - - - - - - - - - - -
//
extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
/**
* Allow to change options after the object has been created.
* If source is changed, new source is indexed.
*
* Optional reset allow to change any setting not in options to defaults.
* This is similar to creating new object, but using same pointer.
*
* @param {Object} options
* @param {boolean=} reset
*/
setOptions: function (options, reset) {
if (reset === undefined) reset = options.reset || false;
FuzzySearch.setOptions(this, options, FuzzySearch.defaultOptions, _privates, reset, this._optionsHook);
},
/**
*
* @param {Object} options
* @private
*/
_optionsHook: function (options) {
//Items of options have been copied into this.options
//We still test "option_name in option" to know if we have received something new
//This allow to support "shorthand" options and is used to refresh data.
var self_options = this.options;
//Output stage
if ("output_map" in options && typeof options.output_map === "string") {
if (self_options.output_map === "alias") self_options.output_map = this.aliasResult;
else self_options.output_map = removePrefix(self_options.output_map, ["root", "."]);
}
this.source = self_options.source;
// Input stage, work to allow different syntax for keys definition is done here.
var oKeys;
if (("keys" in options) && ( ( oKeys = options.keys) !== undefined)) {
var key_type = Object.prototype.toString.call(oKeys);
var key_index, nb_keys;
this.tags = null;
if (key_type === "[object String]") {
this.keys = oKeys.length ? [oKeys] : [];
}
else if (key_type === "[object Object]") {
this.keys = [];
this.tags = []; //we don't know the "length" of dictionary
key_index = 0;
for (var tag in oKeys) {
if (oKeys.hasOwnProperty(tag)) {
this.tags[key_index] = tag;
this.keys[key_index] = oKeys[tag];
key_index++;
}
}
}
else {
this.keys = oKeys;
}
oKeys = this.keys;
nb_keys = oKeys.length;
for (key_index = -1; ++key_index < nb_keys;) {
oKeys[key_index] = removePrefix(oKeys[key_index], ["item", "."])
}
if (!this.tags) this.tags = oKeys;
this.tags_re = buildTagsRE(this.tags);
}
if (this.acro_re === null || "acronym_tok" in options) {
this.acro_re = buildAcronymRE(self_options.token_sep);
}
if (this.token_re === null || "token_sep" in options) {
this.token_re = self_options.token_re = new RegExp("[" + re_escape(self_options.token_sep) + "]+", "g");
}
// Determine if we need to rebuild this.index from this.source
if (options.dirty || ("source" in options) || ("keys" in options) || ("use_index_store" in options)) {
if (self_options.lazy) this.dirty = true; // Schedule later.
else {
this._buildIndexFromSource();
this.dirty = false;
}
}
}
});
/**
* Removes optional prefix of paths.
* for example "root.", "."
*
* @param {string} str - input
* @param {Array<string>} prefixes to remove
* @returns {string}
*/
function removePrefix(str, prefixes) {
var n = prefixes.length;
var offset = 0;
for (var i = -1; ++i < n;) {
var p = prefixes[i], l = p.length;
if (str.substr(offset, l) === p) offset += l;
}
return (offset > 0) ? str.substr(offset) : str;
}
function buildTagsRE(tags) {
var n = tags.length;
if (!n) return null;
var tag_str = re_escape(tags[0]);
for (var i = 0; ++i < n;) {
tag_str += "|" + re_escape(tags[i]);
}
return new RegExp("(?:^|\\s)\\s*(" + tag_str + "):\\s*", "g");
}
function buildAcronymRE(sep) {
var n = sep.length;
if (!n) return null;
var acro_str = re_escape(sep);
return new RegExp("(?:^|[" + acro_str + "])+([^" + acro_str + "])[^" + acro_str + "]*", "g");
}
// Build regexp for tagged search
function re_escape(str) {
var re = /[\-\[\]\/\{}\(\)\*\+\?\.\\\^\$\|]/g;
return str.replace(re, "\\$&");
}
//
// - - - - - - - - - - - -
// OUTPUT OR POST PROCESS
// - - - - - - - - - - - -
//
'use strict';
extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
/**
* Given a SearchResult object, recover the value of the best matching field.
* This is done on demand for display.
*
* @param {SearchResult} result
* @return {string} original field
*/
getMatchingField: function (result) {
var f = FuzzySearch.generateFields(result.item, [this.keys[result.matchIndex]]);
return f[0][result.subIndex];
},
/**
* Given a SearchResult object, generate a new object that follow alias structure
* @param {SearchResult} result
* @return {*} aliased result
*/
aliasResult: function (result) {
var options = this.options;
var f = FuzzySearch.generateFields(result.item, this.keys);
var out = {}, tags = this.tags, join_str = options.join_str;
for (var i = -1, n = f.length; ++i < n;) {
out[tags[i]] = f[i].join(join_str)
}
out._item = result.item;
out._score = result.score;
out._match = f[result.matchIndex][result.subIndex];
return out;
}
});
// - - - - - - - - - - - - - - - - - - - - - -
// Output stage, prepare results for return
//- - - - - - - - - - - - - - - - - - - - - -
/**
* Own version of Array.prototype.map()
*
* @param {Array} source
* @param transform callback
* @param {*=} context (*this* in called function)
* @param {number=} max_out
* @returns {Array}
*/
FuzzySearch.map = function (source, transform, context, max_out) {
var n = source.length;
if (max_out > 0 && max_out < n) n = max_out;
if (typeof transform !== "function") return source.slice(0, n);
var out = new Array(n);
for (var i = -1; ++i < n;) {
out[i] = transform.call(context, source[i], i, source);
}
return out;
};
/**
* Take an array of objects, return an array containing a field of those object.
*
* test = [ {key:"A",value:10}, {key:"B",value:20} ]
* mapField(test,"value") = [10,20]
*
* @param source - array to process
* @param {string} path - key to address on each item OR function to apply
* @param {Number=} [max_out=source.length] - only process first items
* @returns {Array}
*/
FuzzySearch.mapField = function (source, path, max_out) {
var n = source.length;
if (max_out > 0 && max_out < n) n = max_out;
if (path === "") return source.slice(0, n);
var out = new Array(n);
var obj, i;
if (path.indexOf(".") === -1) {
//fast case no inner loop
for (i = -1; ++i < n;) {
obj = source[i];
if (path in obj) out[i] = obj[path];
}
} else {
//general case
var parts = path.split(".");
var nb_level = parts.length;
for (i = -1; ++i < n;) {
obj = source[i];
for (var level = -1; ++level < nb_level;) {
var key = parts[level];
if (!(key in obj)) break;
obj = obj[key];
}
out[i] = obj;
}
}
return out;
};
/**
* Filter array for item where item[field] >= atleast
*
* @param array
* @param field
* @param atleast
* @returns {Array}
*/
FuzzySearch.filterGTE = function (array, field, atleast) {
var i = -1, j = -1;
var n = array.length;
var out = [], obj;
while (++i < n) {
obj = array[i];
if (obj[field] >= atleast) {
out[++j] = obj;
}
}
return out;
};
/**
* SearchResult constructor
* - Internal result list
* - Output of search when output_map=""
*
* @param {*} item
* @param {Array} fields
* @param {number} item_score
* @param {number} matched_field_index
* @param {number} matched_field_sub
* @param {(string|number)} sortkey
* @constructor
*/
function SearchResult(item, fields, item_score, matched_field_index, matched_field_sub, sortkey) {
this.item = item;
this.fields = fields;
this.score = item_score;
this.matchIndex = matched_field_index;
this.subIndex = matched_field_sub;
this.sortKey = sortkey;
}
/**
* Sort function
* first by decreasing order of score, then alphabetical order of sortkey.
*
* @param {SearchResult} a
* @param {SearchResult} b
* @returns {number} - ">0" if b before a, "<0" if b after a.
*/
function compareResults(a, b) {
var d = b.score - a.score;
if (d !== 0) return d;
var ak = a.sortKey, bk = b.sortKey;
return ak > bk ? 1 : ( ak < bk ? -1 : 0);
}
//
// - - - - - - - - - - - -
// Prepare Query
// - - - - - - - - - - - -
//
extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
/**
* Input: a user search string
* Output a query object
*
* Perform a few transformation to allw faster searching.
* String is set to lowercase, some accents removed, split into tokens.
* Token too small are filtered out, token too large are trimmed.
* Token are packed in group of 32 char, each token is processed to extract an alphabet map.
*
* If score_test_fused is enabled, we do an extra pass disregarding tokens.
* IF score_per_token is disabled this is the only pass we do.
*
* @param query_string
* @returns {Query}
* @private
*/
_prepQuery: function (query_string) {
var options = this.options;
var opt_tok = options.score_per_token;
var opt_fuse = options.score_test_fused;
var opt_fuselen = options.token_fused_max_length;
var opt_qmin = options.token_field_min_length;
var opt_qmax = options.token_field_max_length;
var tags = this.tags;
var tags_re = this.tags_re;
var nb_tags = tags.length;
var token_re = this.token_re;
var norm, fused, fused_map, children, has_tags, group, words;
if (opt_tok && nb_tags && tags_re) {
var start = 0, end;
var q_index = 0;
var q_parts = new Array(nb_tags + 1);
var match = tags_re.exec(query_string);
has_tags = (match !== null);
while (match !== null) {
end = match.index;
q_parts[q_index] = query_string.substring(start, end);
start = end + match[0].length;
q_index = tags.indexOf(match[1]) + 1;
match = tags_re.exec(query_string);
}
q_parts[q_index] = query_string.substring(start);
children = [];
for (var i = -1; ++i < nb_tags;) {
var qp = q_parts[i + 1];
if (!qp || !qp.length) continue;
norm = options.normalize(qp);
fused = norm.substring(0, opt_fuselen);
fused_map = (opt_fuse || !opt_tok) ? FuzzySearch.alphabet(fused) : {};
words = FuzzySearch.filterSize(norm.split(token_re), opt_qmin, opt_qmax);
group = FuzzySearch.pack_tokens(words);
children[i] = new Query(norm, words, group, fused, fused_map, false, []);
}
norm = options.normalize(q_parts[0]);
words = FuzzySearch.filterSize(norm.split(token_re), opt_qmin, opt_qmax);
group = FuzzySearch.pack_tokens(words);
}
else {
norm = options.normalize(query_string);
words = FuzzySearch.filterSize(norm.split(token_re), opt_qmin, opt_qmax);
group = opt_tok ? FuzzySearch.pack_tokens(words) : [];
has_tags = false;
children = new Array(nb_tags);
}
fused = norm.substring(0, opt_fuselen);
fused_map = (opt_fuse || !opt_tok) ? FuzzySearch.alphabet(fused) : {};
return new Query(norm, words, group, fused, fused_map, has_tags, children)
}
});
//
// Query objects
//
/**
* Hold a query
*
* @param {string} normalized
* @param {Array.<string>} words
* @param {Array.<PackInfo>} tokens_groups
* @param {string} fused_str
* @param {Object} fused_map
* @param {boolean} has_children
* @param {Array<Query>} children
*
* @constructor
*/
function Query(normalized, words, tokens_groups, fused_str, fused_map, has_children, children) {
this.normalized = normalized;
this.words = words;
this.tokens_groups = tokens_groups;
this.fused_str = fused_str;
this.fused_map = fused_map;
this.fused_score = 0;
this.has_children = has_children;
this.children = children;
}
//
// Query hold some memory to keep score of it's tokens.
// Used in search methods
/**
* Loop tru each item score and reset to 0, apply to child query
*/
Query.prototype.resetItem = function () {
var groups = this.tokens_groups;
for (var group_index = -1, nb_groups = groups.length; ++group_index < nb_groups;) {
var score_item = groups[group_index].score_item;
for (var i = -1, l = score_item.length; ++i < l;) score_item[i] = 0
}
this.fused_score = 0;
if (this.has_children) {
var children = this.children;
for (var child_index = -1, nb_child = children.length; ++child_index < nb_child;) {
var child = children[child_index];
if (child) child.resetItem();
}
}
};
/**
* Sum each item score and add to child score
*/
Query.prototype.scoreItem = function () {
var query_score = 0;
var groups = this.tokens_groups;
for (var group_index = -1, nb_groups = groups.length; ++group_index < nb_groups;) {
var group_scores = groups[group_index].score_item;
for (var score_index = -1, nb_scores = group_scores.length; ++score_index < nb_scores;) {
query_score += group_scores[score_index]
}
}
if (this.fused_score > query_score) query_score = this.fused_score;
if (this.has_children) {
var children = this.children;
for (var child_index = -1, nb_child = children.length; ++child_index < nb_child;) {
var child = children[child_index];
if (child) query_score += child.scoreItem();
}
}
return query_score;
};
/**
* Hold a group of token for parallel scoring
*
* @param {Array.<string>} group_tokens
* @param {Object} group_map
* @param {number} gate
* @constructor
*/
function PackInfo(group_tokens, group_map, gate) {
this.tokens = group_tokens;
this.map = group_map;
this.gate = gate;
var t = group_tokens.length, i = -1;
var scores = new Array(t);
while (++i < t) scores[i] = 0;
this.score_item = scores.slice();
this.score_field = scores.slice();
this.field_pos = scores;
}
//
// - - - - - - - - - - - - - - - - -
// Prepare Token for search
// - - - - - - - - - - - - - - - - -
// a normal string can be view as an array of char.
// so we map ( position -> char).
//
// we reverse that relation to map
// char -> positions
/**
* Record position of each character in a token.
* If token is small, position is recorded by position of a single bit in an int.
* If token is larger than INT_SIZE, position is recorder as array of number.
*
* @param {string} token
* @returns {Object} key value map char->positions (as array of position or single int (can be seen as an array of bit) )
*/
FuzzySearch.alphabet = function (token) {
var len = token.length;
if (len > INT_SIZE) return FuzzySearch.posVector(token);
else return FuzzySearch.bitVector(token, {}, 0);
};
/**
* Apply FuzzySearch.alphabet on multiple tokens
*
* @param {Array.<string>} tokens
* @returns {Array.<Object>}
*/
FuzzySearch.mapAlphabet = function (tokens) {
var outlen = tokens.length;
var out = new Array(outlen), i = -1;
while (++i < outlen) {
var t = tokens[i];
if (t.length > INT_SIZE) out[i] = FuzzySearch.posVector(t);
else out[i] = FuzzySearch.bitVector(t, {}, 0);
}
return out;
};
/**
* Record position of each char using a single bit
*
* @param {string} token
* @param {Object} map - Existing map to modify, can init with {}
* @param offset - used for packing multiple word in a single map, can init with 0
* @returns {Object} Key value map char -> int
*/
FuzzySearch.bitVector = function (token, map, offset) {
var len = token.length;
var i = -1, c;
var b = offset;
while (++i < len) {
c = token[i];
if (c in map) map[c] |= (1 << b++);
else map[c] = (1 << b++);
}
return map;
};
/**
* Record position of each char in a token using an array
* Append Infinity as a stop marker for llcs_large
*
* map = posVector("position")
* map["p"] -> [0,Inf]
* map["o"] -> [1,6,Inf]
*
* @param {string} pattern
* @returns {Object} - key value map char->array of position (as number)
*/
FuzzySearch.posVector = function (pattern) {
var map = {}, c;
var m = pattern.length, i = -1;
while (++i < m) {
c = pattern[i];
if (c in map) map[c].push(i);
else map[c] = [i];
}
for (c in map) {
if (map.hasOwnProperty(c)) {
map[c].push(Infinity);
}
}
return map;
};
/**
* Given a list of tokens, pack them into group of upto INT_SIZE(32) chars.
* If a single token is bigger than INT_SIZE create a groupe of a single item
* And use posVector instead of bitVector to prepare fallback algorithm.
*
* @param {Array.<string>} tokens
* @returns {Array.<PackInfo>}
*/
FuzzySearch.pack_tokens = function (tokens) {
var token_index = -1;
var nb_tokens = tokens.length;
var large;
var groups = [];
//For each group
while (token_index < nb_tokens) {
var group_tokens = [];
var group_map = {};
var offset = 0;
var gate = 0;
//For each token in the group
while (++token_index < nb_tokens) {
var token = tokens[token_index];
var l = token.length;
if (l >= INT_SIZE) {
large = new PackInfo([token],
FuzzySearch.posVector(token),
0xFFFFFFFF);
break;
}
else if (l + offset >= INT_SIZE) {
token_index--;
break;
}
else {
group_tokens.push(token);
FuzzySearch.bitVector(token, group_map, offset);
gate |= ( (1 << ( token.length - 1) ) - 1 ) << offset;
offset += l
}
}
if (group_tokens.length > 0) {
groups.push(new PackInfo(group_tokens, group_map, gate));
}
if (large) {
groups.push(large);
large = null;
}
}
return groups;
};
//
//-----------------------------
// SCORING FUNCTIONS
// ---------------------------
//
'use strict';
/**
* Score of "search a in b" using self as options.
* @param {string} a
* @param {string} b
*/
FuzzySearch.prototype.score = function (a, b) {
var aMap = FuzzySearch.alphabet(a);
return FuzzySearch.score_map(a, b, aMap, this.options);
};
// Adapted from paper:
// A fast and practical bit-vector algorithm for
// the Longest Common Subsequence problem
// Maxime Crochemore et Al.
//
// With modification from
// Bit-parallel LCS-length computation revisited (H Hyyrö, 2004)
// http://www.sis.uta.fi/~hh56766/pubs/awoca04.pdf
//
/**
* Score of "search a in b" using precomputed alphabet map
* Main algorithm for single query token to score
*
* @param {string} a
* @param {string} b
* @param {Object} aMap - See FuzzySearch.alphabet
* @param {FuzzySearchOptions} options
*/
FuzzySearch.score_map = function (a, b, aMap, options) {
var j, lcs_len;
var m = a.length;
var n = b.length;
var bonus_prefix = options.bonus_match_start;
var k = m < n ? m : n;
if (k === 0) return 0;
//normalize score against length of both inputs
var sz_score = (m + n) / ( 2.0 * m * n);
//common prefix is part of lcs
var prefix = 0;
if (a === b) prefix = k; //speedup equality
else {
while ((a[prefix] === b[prefix]) && (++prefix < k)) {
}
}
//shortest string consumed
if (prefix === k) {
lcs_len = prefix;
return sz_score * lcs_len * lcs_len + bonus_prefix * prefix;
}
//alternative algorithm for large string
//need to keep this condition in sync with bitvector
if (m > INT_SIZE) {
lcs_len = FuzzySearch.llcs_large(a, b, aMap, prefix);
return sz_score * lcs_len * lcs_len + bonus_prefix * prefix;
}
var mask = ( 1 << m ) - 1;
var S = mask, U, c;
j = prefix - 1;
while (++j < n) {
c = b[j];
if (c in aMap) {
// Hyyrö, 2004 S=V'=~V
U = S & aMap[c];
S = (S + U) | (S - U);
}
}
// Remove match already accounted in prefix region.
mask &= ~( ( 1 << prefix ) - 1 );
// lcs_len is number of 0 in S (at position lower than m)
// inverse S, mask it, then do "popcount" operation on 32bit
S = ~S & mask;
S = S - ((S >> 1) & 0x55555555);
S = (S & 0x33333333) + ((S >> 2) & 0x33333333);
lcs_len = (((S + (S >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
lcs_len += prefix;
return sz_score * lcs_len * lcs_len + bonus_prefix * prefix;
};
/**
* Call score_map on the first token.
* Filter size
*
* @param {PackInfo} packinfo
* @param {string} token
* @param {FuzzySearchOptions} options
* @return {Array.<number>} score
*/
FuzzySearch.score_single = function (packinfo, token, options) {
var field_tok = packinfo.tokens[0];
var m = field_tok.length;
var n = token.length;
if (n < options.token_min_rel_size * m || n > options.token_max_rel_size * m) return [0];
return [FuzzySearch.score_map(field_tok, token, packinfo.map, options)];
};
/**
* Score multiple query token against a single field token.
* Apply above score function in parallel
* Computation is done as if everything was one big token,
* but ZM bit-vector modify boundary so score are independant
*
* @param {PackInfo} packinfo
* @param {string} field_token
* @param {FuzzySearchOptions} options
* @returns {Array.<number>} scores
*/
FuzzySearch.score_pack = function (packinfo, field_token, options) {
var packed_tokens = packinfo.tokens;
var nb_packed = packed_tokens.length;
//single item token can contain either a single word "overflow" or a large word that need special handling
if (nb_packed == 1)return FuzzySearch.score_single(packinfo, field_token, options);
var S = 0xFFFFFFFF, U, c;
var ZM = packinfo.gate | 0;
var aMap = packinfo.map;
for (var j = -1, n = field_token.length; ++j < n;) {
c = field_token[j];
if (c in aMap) {
U = S & aMap[c];
S = ( (S & ZM) + (U & ZM) ) | (S - U);
}
}
S = ~S;
var bonus_prefix = options.bonus_match_start;
var min_rs = options.token_min_rel_size;
var max_rs = options.token_max_rel_size;
var scores = new Array(nb_packed);
var offset = 0;
for (var k = -1; ++k < nb_packed;) {
var query_tok = packed_tokens[k];
var m = query_tok.length;
var lcs_len, prefix;
if (n < min_rs * m || n > max_rs * m) {
scores[k] = 0;
offset += m;
continue;
}
if (query_tok === field_token)
prefix = lcs_len = m;
else {
var p = (m < n) ? m : n;
prefix = 0;
while ((query_tok[prefix] === field_token[prefix]) && (++prefix < p)) {
}
lcs_len = prefix;
var Sm = ( (S >>> offset) & ( (1 << m) - 1 ) ) >>> prefix;
while (Sm) {
Sm &= Sm - 1;
lcs_len++
}
}
offset += m;
var sz = (m + n) / ( 2.0 * m * n);
scores[k] = sz * lcs_len * lcs_len + bonus_prefix * prefix;
}
return scores;
};
//
// Compute LLCS, using vectors of position.
//
// Based on:
// An input sensitive online algorithm for LCS computation
// Heikki Hyyro 2009
//
// We fill the dynamic programing table line per line
// but instead of storing the whole line we only store position where the line increase
// ( bitvector algorithm store increase yes/no as a bit) this time we will store sequence
//
// s u r g e r y
// g [0,0,0,1,1,1,1] : [3,4] (Add level 1)
// s [1,1,1,1,1,1,1] : [0,1] (Make level 1 happens sooner)
// u [1,2,2,2,2,2,2] : [0,2] (Add level 2, append to block of consecutive increase)
// r [1,2,3,3,3,3,3] : [0,3] (Add level 3, append to block of consecutive increase)
// v [1,2,3,3,3,3,3] : [0,3] (v not in surgery, copy)
// e [1,2,3,3,4,4,4] : [0,3],[4,5] (Add level 4, create new block for it)
// y [1,2,3,3,4,4,5] : [0,3],[4,5],[6,7] (Add level 5, create new block for it)
//
// There is 2 Basic operations:
// - Make a level-up happens sooner
// - Add an extra level up at the end. (this is where llcs increase !)
//
// 12345678901234567890 // Position (for this demo we start at 1)
// ii------iii---i--i-- // Increase point of previous line
// 12222222345555666777 // Score previous line [1,3] [9,12] [15,16] [18,19]
// ---m-m---------m---m // Match of this line
// 12233333345555677778 // Score of this line [1,3] [4,5] [10,12] [15,17] [20,21]
// ii-i-----ii---ii---i // New increase point
// 12345678901234567890 // Position
FuzzySearch.llcs_large = function (a, b, aMap, prefix) {
//var aMap = FuzzySearch.posVector(a);
//Position of next interest point. Interest point are either
// - Increase in previous line
// - Match on this line
var block_start, match_pos;
// We encode increase sequence as [start_pos, end_pos+1]
// So end-start = length
// To avoid dealing with to many edge case we place
// a special token at start & end of list
var last_line, line_index, last_end, block_end;
if (prefix === undefined) prefix = 0;
if (prefix)
last_line = [new Block(0, prefix), new Block(Infinity, Infinity)];
else
last_line = [new Block(Infinity, Infinity)];
var lcs_len = prefix;
var match_list, match_index;
var block, block_index, block_size;
//First line
var nb_blocks = last_line.length;
var n = b.length, j;
for (j = prefix; j < n; j++) {
//Each line we process a single character of b
var c = b[j];
if (!(c in aMap)) continue;
match_list = aMap[c];
//New line
// the number of if block can only increase up to llcs+1+sentinel
// alternatively each block having >1 item can split. (+1 at end accounted by splitting sentinel)
/** @type Array.<Block> */
var current_line = new Array(Math.min(2 * nb_blocks, lcs_len + 2));
line_index = -1;
//First match
match_index = 0;
match_pos = match_list[0];
//Place end of first block before the string
block_end = -1;
block_index = -1;
while (++block_index < nb_blocks) {
//Place cursor just after last block
last_end = block_end;
//Read end block
block = last_line[block_index];
block_start = block.start; //Encode block as [s,e[
block_end = block.end; //End is position of char that follow last.
block_size = block_end - block_start; //Size of block, for sentinel (Inf-Inf=NaN)
//get next match from list of matches
while (match_pos < last_end) {
match_pos = match_list[++match_index];
}
// This cover two case
// a) no match between two block
// b) block happens after last match (so match_pos=Infinity).
// At the last block, this will append closing "sentinel" to line
if (block_start <= match_pos) {
current_line[++line_index] = block;
continue;
}
//
// If we have reached here, we have a dominant match !
// Decide where to register the match ...
//
if (match_pos === last_end) {
//End of last block ? (step a.ii)
current_line[line_index].end++;
}
else {
//Increase need it's own block ( step a.i)
//try to reuse block that will get deleted.
if (block_size === 1) {
//Can we reuse next block ?
block.start = match_pos;
block.end = match_pos + 1;
current_line[++line_index] = block;
} else {
//start a new block
current_line[++line_index] = new Block(match_pos, match_pos + 1);
}
}
// if not empty, append next block to current line (step a.iii)
// (this condition reject "sentinel", it'll get added just after the for loop)
if (block_size > 1) {
block.start++; // Move start by one
current_line[++line_index] = block;
}
}
// If the line finish with a match:
// a) llcs at end of this line is one greater than last line, increase score
// b) we still need to append sentinel
if (block_start > match_pos) {
current_line[++line_index] = block;
lcs_len++
}
//Current become last
last_line = current_line;
//Count actual number of block because we allocate a bit more.
nb_blocks = ++line_index;
}
return lcs_len;
};
/**
* A block with start and end position
* Used to record consecutive increase position in llcs_large
* @param start
* @param end
* @constructor
*/
function Block(start, end) {
this.start = start;
this.end = end;
}
//
// Reference implementation to debug
// Might need to swap input to match internal of a given algorithm
//
/*
function lcs(a, b) {
var m = a.length;
var n = b.length;
var i, j;
//init m by n array with 0
var C = [], row = [], lcs = [];
for (j = 0; j < n; j++) row[j] = 0;
for (i = 0; i < m; i++) C[i] = row.slice();
//fill first row and col
C[0][0] = (a[0] === b[0]) ? 1 : 0;
for (i = 1; i < m; i++) C[i][0] = (a[i] === b[0] || C[i - 1][0]) ? 1 : 0
for (j = 1; j < n; j++) C[0][j] = (a[0] === b[j] || C[0][j - 1]) ? 1 : 0
console.log(JSON.stringify(C[0]));
//bulk
for (i = 1; i < m; i++) {
for (j = 1; j < n; j++) {
C[i][j] = (a[i] === b[j]) ? C[i - 1][j - 1] + 1 : Math.max(C[i][j - 1], C[i - 1][j]);
}
console.log(JSON.stringify(C[i]));
}
//backtrack
i--;
j--;
while (i > -1 && j > -1) {
if (i && C[i][j] == C[i - 1][j]) i--;
else if (j && C[i][j] == C[i][j - 1]) j--;
else {
lcs.push(a[i]);
j--;
i--;
}
}
return lcs.reverse().join('');
}*/
// main entry of the algorithm (once settings are set)
// loop over everything and merge best scores
'use strict';
extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
/**
* Perform a search on the already indexed source.
*
* @param {string} query_string
* @returns {Array}
*/
search: function (query_string) {
var time_start = Date.now();
this.start_time = time_start;
var options = this.options;
// As long as lazy is set to false, we guarantee that making a search is read only.
if (this.dirty && options.lazy) {
this._buildIndexFromSource();
this.dirty = false;
}
var query = this.query = this._prepQuery(query_string);
var source = this.index;
var results = [];
if (options.use_index_store) {
source = this._storeSearch(query, source);
}
if (options.filter) {
source = options.filter.call(this, source);
}
// ---- MAIN SEARCH LOOP ---- //
var thresh_include = this._searchIndex(query, source, results);
//keep only results that are good enough compared to best
results = FuzzySearch.filterGTE(results, "score", thresh_include);
// sort by decreasing order of score
// equal rounded score: alphabetical order
if (typeof options.sorter === "function")
results = results.sort(options.sorter);
if (options.output_map || options.output_limit > 0) {
if (typeof options.output_map === "function")
results = FuzzySearch.map(results, options.output_map, this, options.output_limit);
else
results = FuzzySearch.mapField(results, options.output_map, options.output_limit);
}
var time_end = Date.now();
this.search_time = time_end - time_start;
this.results = results;
return results
},
/**
* Main search loop for a specified source
* This separation allow to search a different source, or a subset of source
*
* @param {Query} query
* @param {Array.<Indexed>} source
* @param {Array.<SearchResult>} results
* @returns {number} - thresh_include after this run.
*
* @private
*/
_searchIndex: function (query, source, results) {
var options = this.options;
var opt_bpd = options.bonus_position_decay;
var opt_fge = options.field_good_enough;
var opt_trb = options.thresh_relative_to_best;
var opt_score_tok = options.score_per_token;
var opt_round = options.score_round;
var thresh_include = options.thresh_include;
var best_item_score = 0;
var sub_query = query.children;
for (var item_index = -1, nb_items = source.length; ++item_index < nb_items;) {
//get indexed fields
var item = source[item_index];
var item_fields = item.fields;
//reset score
query.resetItem();
var item_score = 0;
var matched_field_index = -1;
var matched_node_index = -1;
var position_bonus = 1.0;
//
//Foreach field
//
for (var field_index = -1, nb_fields = item_fields.length; ++field_index < nb_fields;) {
var field_score = 0;
var field_node = -1;
var field = item_fields[field_index];
var child_query = sub_query[field_index]; //tag search
var tagged = !!child_query;
for (var node_index = -1, nb_nodes = field.length; ++node_index < nb_nodes;) {
var node_score, node = field[node_index];
if (opt_score_tok) {
node_score = this._scoreField(node, query);
if (tagged) node_score += this._scoreField(node, child_query);//tag search
}
else
node_score = FuzzySearch.score_map(query.fused_str, node.join(" "), query.fused_map, options);
if (node_score > field_score) {
field_score = node_score;
field_node = node_index;
}
}
field_score *= (1.0 + position_bonus);
position_bonus *= opt_bpd;
if (field_score > item_score) {
item_score = field_score;
matched_field_index = field_index;
matched_node_index = field_node;
if (field_score > opt_fge) break;
}
}
//
// Different query token match different fields ?
//
if (opt_score_tok) {
var query_score = query.scoreItem();
item_score = 0.5 * item_score + 0.5 * query_score;
}
//
// Keep track of best result, this control inclusion in the list
//
if (item_score > best_item_score) {
best_item_score = item_score;
var tmp = item_score * opt_trb;
if (tmp > thresh_include) thresh_include = tmp;
}
//
//candidate for best result ? push to list
//
if (item_score > thresh_include) {
item_score = Math.round(item_score / opt_round) * opt_round;
results.push(new SearchResult(
item.item,
item_fields,
item_score,
matched_field_index,
matched_node_index,
item_fields[0][0].join(" ")
));
}
}
return thresh_include
},
/**
* Internal loop that is run for each field in an item
*
* @param {Array} field_tokens
* @param {Query} query
* @returns {number}
* @private
*/
_scoreField: function (field_tokens, query) {
var groups = query.tokens_groups;
var nb_groups = groups.length;
var nb_tokens = field_tokens.length;
if (!nb_groups || !nb_tokens) return 0;
var field_score = 0, sc, bf;
var last_index = -1;
var options = this.options;
var bonus_order = options.bonus_token_order;
var minimum_match = options.minimum_match;
var token, scores, i;
for (var group_index = -1; ++group_index < nb_groups;) {
var group_info = groups[group_index];
var nb_scores = group_info.tokens.length;
// Each packinfo have their own reusable scratch pad
// to store best score information, reset them to 0
var best_of_field = group_info.score_field;
for (i = -1; ++i < nb_scores;) best_of_field[i] = 0
var best_index = group_info.field_pos;
for (i = -1; ++i < nb_scores;) best_index[i] = 0
for (var field_tk_index = -1; ++field_tk_index < nb_tokens;) {
token = field_tokens[field_tk_index];
scores = FuzzySearch.score_pack(group_info, token, options);
for (i = -1; ++i < nb_scores;) {
sc = scores[i];
bf = best_of_field[i];
//Score is an improvement OR
//Score is within a token order bonus from being better, but word are swapped
if (sc > bf || ( bf - sc < bonus_order && i > 0 && best_index[i] <= best_index[i - 1] )) {
best_of_field[i] = sc;
best_index[i] = field_tk_index;
}
}
}
var best_match_this_item = group_info.score_item;
for (i = -1; ++i < nb_scores;) {
sc = best_of_field[i];
field_score += sc;
// Give bonus for pair in consecutive order
// Only consider positive match for bonus
if (sc > minimum_match) {
var this_index = best_index[i];
//Bonus is diluted by the distance between words.
//Positive match, but out of order get half the bonus.
var d = this_index - last_index;
var bo = bonus_order * ( 1.0 / (1.0 + Math.abs(d)));
if (d > 0) bo *= 2;
field_score += bo;
sc += bo;
last_index = this_index;
}
if (sc > best_match_this_item[i])
best_match_this_item[i] = sc;
}
}
if (options.score_test_fused) {
// field_tokens.join(" "), remove last one if acronym
// performance of array.join(" ") and str concat look similar on modern browser.
var n = (options.score_acronym) ? nb_tokens - 1 : nb_tokens;
var fused_field = field_tokens[0], fi = 0;
while (++fi < n) fused_field += " " + field_tokens[fi];
// test "space bar is broken" no token match
var fused_score = FuzzySearch.score_map(query.fused_str, fused_field, query.fused_map, options);
fused_score += bonus_order; //fused cannot be out of order
field_score = fused_score > field_score ? fused_score : field_score;
if (fused_score > query.fused_score) {
query.fused_score = fused_score;
}
}
return field_score;
}
});
extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
/**
* Take a `source_item` (unprocessed item from source) and keys and produce
* an `item` that's ready to be added to `this.index`.
*
* Preparation steps:
* - Apply lowercase, accent removal
* - Split field into token
* - Remove small token eg "a" "of" and prefix large token
*/
_prepItem: function (source_item, keys) {
var item_fields = FuzzySearch.generateFields(source_item, keys);
var nb_fields = item_fields.length;
for (var field_index = -1; ++field_index < nb_fields;) {
var field = item_fields[field_index];
for (var node_index = -1, nb_nodes = field.length; ++node_index < nb_nodes;) {
var norm = this.options.normalize(field[node_index]);
var nodes = norm.split(this.token_re);
//Filter size. (If total field length is very small, make an exception.
// Eg some movie/Book have a single letter title, filter risk of removing everything )
if (norm.length > 2 * this.options.token_field_min_length) nodes = FuzzySearch.filterSize(nodes, this.options.token_field_min_length, this.options.token_field_max_length);
if (this.options.score_acronym) nodes.push(norm.replace(this.acro_re, "$1"));
field[node_index] = nodes;
}
}
return new Indexed(source_item, item_fields);
},
/**
* Add an item to search index AND source collection.
* It'll use identify_item to find if the item already exist.
* If identify_item is null (default), calling this method is append-only with no duplicate detection
*
* To update the source, it use the assumption that this.source and this.index can be synced
* by array index. That assumption will be true if source is a plain array, and always updated by this library.
* Feel free to set `should_update_source` to false to manually manage source collection.
*
* Keeping source in sync is important to allow to recompute index from source.
* This will happens with certain setting changes.
*
* @param {*} source_item - item to add to search index
* @param {boolean=} should_update_source - set to false to skip updating the source.
*/
add: function(source_item, should_update_source){
// Default to keeping source in sync.
if(should_update_source === undefined)
should_update_source = true;
var item_id = typeof this.options.identify_item === "function"
? this.options.identify_item(source_item)
: null;
// Find where to insert new item
var idx;
if (item_id === null) {
// No identifier, append to end
idx = this.nb_indexed;
this.nb_indexed++;
}
else if (item_id in this.index_map) {
// Item exist, update
idx = this.index_map[item_id];
}
else {
// New identifier, append to end & record new
this.index_map[item_id] = this.nb_indexed;
idx = this.nb_indexed;
this.nb_indexed++;
}
// Compute indexed item and update index
var prepared = this._prepItem(source_item, this.keys);
this.index[idx] = prepared;
// Insert in source;
if(should_update_source)
this.source[idx] = source_item;
if (this.options.use_index_store) {
this._storeAdd(prepared, idx);
}
},
/**
* Build (or rebuild) `this.index` from `this.source`
* Flatten object into array using specified keys
*
* @private
*/
_buildIndexFromSource: function () {
var nb_items = this.source.length;
this.index = new Array(nb_items);
this.index_map = {};
this.nb_indexed = 0;
for (var item_index = -1; ++item_index < nb_items;) {
var source_item = this.source[item_index];
// Add item to index.
// Because we are iterating over source, do not attempt to modify it.
this.add(source_item, false);
}
}
});
/**
* Original item with cached normalized field
*
* @param {*} source_item
* @param {Array.<string>} fields
* @constructor
*/
function Indexed(source_item, fields) {
this.item = source_item;
this.fields = fields;
}
// - - - - - - - - - - - - - - - - - - - - - -
// Input stage: prepare field for search
//- - - - - - - - - - - - - - - - - - - - - -
/**
* Given an object to index and a list of field to index
* Return a flat list of the values.
*
* @param {Object} obj
* @param {Array.<string>} fieldlist
* @returns {Array}
*/
FuzzySearch.generateFields = function (obj, fieldlist) {
if (!fieldlist || !fieldlist.length) return [[obj.toString()]];
var n = fieldlist.length;
var indexed_fields = new Array(n);
for (var i = -1; ++i < n;)
indexed_fields[i] = _collectValues(obj, fieldlist[i].split("."), [], 0);
return indexed_fields;
};
/**
* Traverse an object structure to collect item specified by parts.
* If leaf node is an array or dictionary collect every children.
* If key is wildcard '*' branch out the search process on each children.
*
* @param {*} obj - root to process
* @param {Array.<string>} parts - array of subkey to direct object traversal "those.that.this"->["those","that","this"]
* @param {Array} list - where to put collected items
* @param {number} level - index of current position on parts list
* @returns {Array} - return list
* @private
*/
function _collectValues(obj, parts, list, level) {
var key, i, olen;
var nb_level = parts.length;
while (level < nb_level) {
key = parts[level++];
if (key === "*" || key === "") break;
if (!(key in obj)) return list;
obj = obj[key];
}
var type = Object.prototype.toString.call(obj);
var isArray = ( type === '[object Array]' );
var isObject = ( type === '[object Object]' );
if (level === nb_level) {
if (isArray)
for (i = -1, olen = obj.length; ++i < olen;) list.push(obj[i].toString());
else if (isObject) {
for (key in obj) {
if (obj.hasOwnProperty(key)) list.push(obj[key].toString());
}
}
else list.push(obj.toString());
}
else if (key === "*") {
if (isArray)
for (i = -1, olen = obj.length; ++i < olen;) {
_collectValues(obj[i], parts, list, level);
}
else if (isObject)
for (key in obj) {
if (obj.hasOwnProperty(key))
_collectValues(obj[key], parts, list, level);
}
}
return list;
}
'use strict';
extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
/**
*
* @param {Indexed} preparedItem
* @param {int} idx
*/
_storeAdd: function (preparedItem, idx) {
var keyList = keysFromIndexedItem(preparedItem);
if (keyList.length == 0) return;
// register idx on all appropriate key
for (var i = 0; i < keyList.length; i++) {
var key = keyList[i];
if (key in this.store) {
// append to existing array of index
this.store[key].push(idx);
}
else {
// Format is dict key => array of item index
this.store[key] = [idx];
}
}
},
/**
*
* @param {Query} preparedQuery
* @param {Array.<Indexed>} source
*/
_storeSearch: function (preparedQuery, source) {
// Scan query for index keys.
var keyList = keysFromQuery(preparedQuery);
if (keyList.length == 0) return [];
// return filtered source
var idAndCount = retrieveCount(keyList, this.store);
if (idAndCount.length == 0) return [];
// Get minimum quality and remap to original items.
var tresh = idAndCount[0].count * this.options.store_thresh;
idAndCount = FuzzySearch.filterGTE(idAndCount, "count", tresh);
return FuzzySearch.map(idAndCount,
function (x) { return source[x.id] },
this, this.options.store_max_results);
}
});
/**
*
* @param {Indexed} preparedItem
*/
function keysFromIndexedItem(preparedItem) {
// Process the nested structure of a prepared item in order to extract index keys.
var keyList = [];
var keyDict = {};
// item -> fields -> nodes -> word_tokens
var fields = preparedItem.fields;
for (var i = 0; i < fields.length; i++) {
var nodes = fields[i];
for (var j = 0; j < nodes.length; j++) {
var words = nodes[j];
for (var k = 0; k < words.length; k++) {
keysFromWord(words[k], keyList, keyDict)
}
}
}
return keyList;
}
/**
*
* @param {Query} query
*/
function keysFromQuery(query) {
var keyList = [];
var keyDict = {};
var i, j;
var words = query.words;
for (i = 0; i < words.length; i++) {
keysFromWord(words[i], keyList, keyDict)
}
var children = query.children;
for (i = 0; i < children.length; i++) {
words = children[i].words;
for (j = 0; j < words; j++) {
keysFromWord(words[j], keyList, keyDict)
}
}
return keyList;
}
function keysFromWord(word, keysList, existingDict) {
var len = word.length;
if (len == 0) return;
if (len >= 3) {
// 3o6, 3o5, 3o4, 3o3
select3(word, 6, keysList, existingDict)
}
if (len >= 2) {
// 2o4, 2o3,2o2
select2(word, 4, keysList, existingDict)
}
// 1o1 strategy: This index by first letter
union(word[0], keysList, existingDict);
}
function select2(str, maxlen, existingList, existingDict) {
var len = Math.min(str.length, maxlen);
for (var i = 0; i < len - 1; i++) {
for (var j = i + 1; j < len; j++) {
union(str[i] + str[j], existingList, existingDict)
}
}
return existingList;
}
function select3(str, maxlen, existingList, existingDict) {
var len = Math.min(str.length, maxlen);
for (var i = 0; i < len - 2; i++) {
for (var j = i + 1; j < len - 1; j++) {
for (var k = j + 1; k < len; k++) {
union(str[i] + str[j] + str[k], existingList, existingDict)
}
}
}
return existingList;
}
function union(word, existingList, existingDict) {
if (!(word in existingDict)) {
existingDict[word] = true;
existingList.push(word);
}
}
function retrieveCount(keys, store) {
// Dictionary idx => count
var countPerIndex = {};
if (keys.length == 0)
return [];
for (var i = 0; i < keys.length; i++) {
var key = keys[i];
// Does the key exist in the index ?
if (key in store) {
// If so add every entry of that key into countPerIndex
// Also for each entry, maintain a count of matched keys.
var idxList = store[key];
for (var j = 0; j < idxList.length; j++) {
var idx = idxList[j];
if (idx in countPerIndex) {
countPerIndex[idx]++;
} else {
countPerIndex[idx] = 1;
}
}
}
}
// Transform countPerIndex into a sorted list of IdAndCount
var outList = [];
for (var id in countPerIndex) {
if (countPerIndex.hasOwnProperty(id)) {
outList.push(new IdAndCount(id, countPerIndex[id]));
}
}
// We can probably filterGte here.
// Custom sort decreasing order
outList = outList.sort(function (a, b) {
return b.count - a.count
});
return outList;
}
function IdAndCount(id, count) {
this.id = id;
this.count = count;
}
//
// Shared string and array of string functions
//
'use strict';
/**
* Take a string into a normal form. Allow to compare in a case insensitive way.
* Also allow to match accents with their base form "é" vs "e"
* Finally standardize token separator to be a single space.
*
* @param {string} str
* @returns {string} - normalized str
*/
function normalize(str) {
if (!str)return "";
return str.toLowerCase().replace(/[^\u0000-\u007E]/g, function (a) {
return diacriticsMap[a] || a;
});
}
function getDiacriticsMap() {
// replace most common accents in french-spanish by their base letter
//"ãàáäâæẽèéëêìíïîõòóöôœùúüûñç"
var from = "\xE3\xE0\xE1\xE4\xE2\xE6\u1EBD\xE8\xE9\xEB\xEA\xEC\xED\xEF\xEE\xF5\xF2\xF3\xF6\xF4\u0153\xF9\xFA\xFC\xFB\xF1\xE7";
var to = "aaaaaaeeeeeiiiioooooouuuunc";
var diacriticsMap = {};
for (var i = 0; i < from.length; i++) {
diacriticsMap[from[i]] = to[i]
}
return diacriticsMap;
}
var diacriticsMap = getDiacriticsMap();
/**
* Process an array of string, filter out item smaller than min, trim item larger than max.
*
* @param {Array.<string>} array - array of string
* @param minSize - filter out item smaller than this
* @param maxSize - substring item larger than this
* @returns {Array}
*/
FuzzySearch.filterSize = function (array, minSize, maxSize) {
var i = -1, j = -1;
var n = array.length;
var out = [];
var str, slen;
while (++i < n) {
str = array[i];
slen = str.length;
if (slen >= minSize) out[++j] = (slen < maxSize) ? str : str.substr(0, maxSize)
}
return out;
};
//
// Extend base option to support highlight
//
'use strict';
extend(FuzzySearch.defaultOptions, /** @lends {FuzzySearchOptions.prototype} */{
highlight_prefix: false, // true: force prefix as part of highlight, (false: minimum gap, slower)
highlight_bridge_gap: 2, // display small gap as substitution, set to size of gap, 0 to disable
highlight_before: '<strong class="highlight">', //tag to put before/after the highlight
highlight_after: '</strong>'
});
/**
* Highlight a string using query stored in a FuzzySearch object.
* @param {string} str
* @param {string=} field
*/
FuzzySearch.prototype.highlight = function (str, field) {
var i, subq;
var qnorm = this.query.normalized;
if (field && field.length && (i = this.tags.indexOf(field)) > -1 && (subq = this.query.children[i])) qnorm += (qnorm.length ? " " : "") + subq.normalized;
return FuzzySearch.highlight(qnorm, str, this.options)
};
/**
* Highlight string b, from searching a in it.
*
* @param {string} a - string to search
* @param {string} b - string to highlight
* @param {FuzzySearchOptions=} options
*
*/
FuzzySearch.highlight = function (a, b, options) {
if (options === undefined) options = FuzzySearch.defaultOptions;
if (!b) return "";
var open_string = options.highlight_before;
var close_string = options.highlight_after;
var opt_score_tok = options.score_per_token;
var opt_fuse = options.score_test_fused;
var opt_acro = options.score_acronym;
var token_re = options.token_re;
var aa = options.normalize(a);
var bb = options.normalize(b);
//Normalized needle
var a_tokens = aa.split(token_re);
//Normalized haystack
var b_tokens = bb.split(token_re);
//Original spelling haystack
var disp_tokens = [], disp_sep = [];
splitKeepSep(b, token_re, disp_tokens, disp_sep);
var strArr = [];
var match_list = [];
var fused_score = 0, match_score = 0;
if (opt_score_tok) {
match_score = FuzzySearch.matchTokens(b_tokens, a_tokens, match_list, options, false);
}
//Test "space bar is broken" no token match
if (opt_fuse || !opt_score_tok || opt_acro) fused_score = FuzzySearch.score_map(aa, bb, FuzzySearch.alphabet(aa), options) + options.bonus_token_order;
if (match_score === 0 && fused_score === 0) return b; //shortcut no match
if (!opt_score_tok || fused_score > match_score) {
a_tokens = [aa]; //everything in a single token
b_tokens = [bb];
disp_tokens = [b];
match_list = [0];
}
var nbtok = disp_tokens.length, j = -1;
while (++j < nbtok) {
var i = match_list[j];
if (i === -1) {
strArr.push(disp_tokens[j] + disp_sep[j]);
continue;
}
var ta = a_tokens[i];
var tb = b_tokens[j];
var td = disp_tokens[j];
var curr = 0;
var start_positions = [];
var end_positions = [];
FuzzySearch.align(ta, tb, start_positions, end_positions);
var len = start_positions.length;
var k = -1;
while (++k < len) {
var s = start_positions[k];
var e = end_positions[k];
if (s > curr) strArr.push(td.substring(curr, s));
strArr.push(open_string + td.substring(s, e) + close_string);
curr = e;
}
strArr.push(td.substring(curr) + disp_sep[j]);
}
return strArr.join('');
};
function splitKeepSep(str, pattern, tokens, seps) {
var tok_index = tokens.length;
var match = pattern.exec(str);
if (match === null) {
tokens[tok_index] = str;
seps[tok_index] = "";
return;
}
var start = 0, end, len;
while (match !== null) {
end = match.index;
len = match[0].length;
tokens[tok_index] = str.substring(start, end);
seps[tok_index] = str.substr(end, len);
start = end + len;
tok_index++;
match = pattern.exec(str);
}
tokens[tok_index] = str.substring(start);
seps[tok_index] = "";
}
//
// Smith-Waterman-Gotoh local Alignment
//
// Smith&Waterman worked the idea of local alignment
// While Gotoh 82 worked on affine gap penalty.
//
// This is the basic algorithm with some optimisation to use less space.
// JAligner has been used as a reference implementation to debug.
// Some of their implementation detail to save memory has been reused here.
//
// See pseudo-code on
// http://jaligner.sourceforge.net/api/jaligner/SmithWatermanGotoh.html
//
//
/**
* Smith-Waterman-Gotoh local Alignment
* Build sequences of matches, called send array (seq_start,seq_end) to store them
* Return match score
*
* @param {string} a - string to search
* @param {string} b - string to be searched
* @param {Array.<number>} seq_start - store for match start
* @param {Array.<number>} seq_end - store for match end
* @param {FuzzySearchOptions=} options
* @returns {number}
*/
FuzzySearch.align = function (a, b, seq_start, seq_end, options) {
if (options === undefined) options = FuzzySearch.defaultOptions;
var wm = 100; // score of making a match
var wo = -10; // score to open a gap
var we = -1; // score to continue an open gap
//Traceback directions constants
var STOP = 0;
var UP = 1;
var LEFT = 2;
var DIAGONAL = 3;
var score_acronym = options.score_acronym;
var sep_tokens = options.token_sep;
var m = Math.min(a.length + 1, options.token_query_max_length);
var n = Math.min(b.length + 1, options.token_field_max_length);
// Comon prefix is part of lcs,
// but not necessarily part of best alignment (it can introduce an extra gap)
// however prefix make sens in an autocomplete scenario and speed things up
//
var i, j;
var k = m < n ? m : n;
var prefix_len = 0;
if (a === b) {
//speedup equality
prefix_len = m;
m = 0;
}
else if (options.highlight_prefix) {
for (i = 0; i < k && (a[i] === b[i]); i++) prefix_len++;
if (prefix_len) {
a = a.substring(prefix_len);
b = b.substring(prefix_len);
m -= prefix_len;
n -= prefix_len;
}
}
var vmax = 0, imax = 0, jmax = 0;
var trace = new Array(m * n);
var pos = n - 1;
//m,n = length+1
if (m > 1 && n > 1) {
var vrow = new Array(n), vd, v, align;
var gapArow = new Array(n), gapA, gapB = 0;
for (j = 0; j < n; j++) {
gapArow[j] = 0;
vrow[j] = 0;
trace[j] = STOP;
}
//DEBUG
//var DEBUG_V = [];
//var DEBUG_TR = [];
for (i = 1; i < m; i++) {
gapB = 0;
vd = vrow[0];
pos++;
trace[pos] = STOP;
//DEBUG
//DEBUG_V[i] = [];
//DEBUG_TR[i] = [];
for (j = 1; j < n; j++) {
//
// Reference "pseudocode"
// We try to fill that table, but using o(n) instead o(m*n) memory
// If we need traceback we still need o(m*n) but we store a single table instead of 3
//
// F[i][j] = f = Math.max(F[i - 1][j] + we, V[i - 1][j] + wo );
// E[i][j] = e = Math.max(E[i][j - 1] + we, V[i][j - 1] + wo );
// align = (a[i - 1] === b[j - 1]) ? V[i - 1][j - 1] + wm : -Infinity;
// V[i][j] = v = Math.max(e, f, align, 0);
//
// Score the options
gapA = gapArow[j] = Math.max(gapArow[j] + we, vrow[j] + wo); //f
gapB = Math.max(gapB + we, vrow[j - 1] + wo); //e
if (score_acronym)
align = ( a[i - 1] !== b[j - 1] ) ? -Infinity : (
vd + wm +
( ( i < 2 || sep_tokens.indexOf(a[i - 2]) > -1 ) ? wm : 0) +
( ( j < 2 || sep_tokens.indexOf(b[j - 2]) > -1 ) ? wm : 0)
);
else
align = ( a[i - 1] === b[j - 1] ) ? vd + wm : -Infinity;
vd = vrow[j];
v = vrow[j] = Math.max(align, gapA, gapB, 0);
//DEBUG
//DEBUG_V[i][j] = v;
// Determine the trace back direction
pos++; //pos = i * n + j;
switch (v) {
// what triggered the best score ?
//In case of equality, taking gapB get us closer to the start of the string.
case gapB:
trace[pos] = LEFT;
break;
case align:
trace[pos] = DIAGONAL;
if (v > vmax) {
vmax = v;
imax = i;
jmax = j;
}
break;
case gapA:
trace[pos] = UP;
break;
default:
trace[pos] = STOP;
break;
}
//DEBUG
//DEBUG_TR[i][j] = trace[pos];
}
}
}
//DEBUG
//console.table(DEBUG_V);
//console.table(DEBUG_TR);
// - - - - - - - - -
// TRACEBACK
// - - - - - - - - -
var bridge = options.highlight_bridge_gap;
var last_match = 0;
if (vmax > 0) {
// backtrack to aligned sequence
// record start and end of substrings
// vmax happens at the end of last substring
i = imax;
j = jmax;
pos = i * n + j;
last_match = jmax;
seq_end.push(jmax + prefix_len);
var backtrack = true;
while (backtrack) {
switch (trace[pos]) {
case UP:
i--;
pos -= n;
break;
case LEFT:
j--;
pos--;
break;
case DIAGONAL:
// if we have traversed a gap
// record start/end of sequence
// (unless we want to bridge the gap)
if (last_match - j > bridge) {
seq_start.push(last_match + prefix_len);
seq_end.push(j + prefix_len);
}
j--;
i--;
last_match = j;
pos -= n + 1;
break;
case STOP:
default :
backtrack = false;
}
}
//first matched char
seq_start.push(last_match + prefix_len);
}
if (prefix_len) {
if (last_match > 0 && last_match <= bridge) {
//bridge last match to prefix ?
seq_start[seq_start.length - 1] = 0
} else {
//add prefix to matches
seq_start.push(0);
seq_end.push(prefix_len);
}
}
//array were build backward, reverse to sort
seq_start.reverse();
seq_end.reverse();
return vmax + prefix_len;
};
//
// Each query token is matched against a field token
// or against nothing (not in field)
//
// a: [paint] [my] [wall]
// b: [wall] [painting]
//
// match: [1, -1, 0]
//
// if a[i] match b[j]
// then match[i] = j
//
// if a[i] match nothing
// then match[i] = -1
//
// return match score
// take vector match by reference to output match detail
//
// Ideal case:
// each token of "a" is matched against it's highest score(a[i],b[j])
//
// But in case two token have the same best match
// We have to check for another pairing, giving highest score
// under constraint of 1:1 exclusive match
//
// To do that we check all possible pairing permutation,
// but we restrict ourselves to a set of plausible pairing.
//
// That is a token a will only consider pairing with a score at least
// thresh_relative_to_best * [highest score]
//
/**
* Match token of A again token of B, under constraint that tokens can be matched at most once.
*
* @param {Array.<string>} a_tokens
* @param {Array.<string>} b_tokens
* @param {Array.<number>} match - array to store results
* @param {FuzzySearchOptions=} options
* @param {boolean=} flip - if true score A against B, but return index of B against A.
* @returns {number} Score of the best match combination.
*/
FuzzySearch.matchTokens = function (a_tokens, b_tokens, match, options, flip) {
if (options === undefined) options = FuzzySearch.defaultOptions;
if (flip === undefined) flip = false;
var minimum_match = options.minimum_match;
var best_thresh = options.thresh_relative_to_best;
var i, j, row;
var C = [];
var m = a_tokens.length;
var n = b_tokens.length;
var a_maps = FuzzySearch.mapAlphabet(a_tokens);
var a_tok, b_tok, a_mp;
var rowmax = minimum_match, imax = -1, jmax = -1, v;
var match_count = 0;
var thresholds = [];
for (i = 0; i < m; i++) {
row = [];
match[i] = -1;
rowmax = minimum_match;
a_tok = a_tokens[i];
if (!a_tok.length) {
//skip score loop but still fill array
for (j = 0; j < n; j++) row[j] = 0;
C[i] = row;
continue;
}
a_mp = a_maps[i];
for (j = 0; j < n; j++) {
b_tok = b_tokens[j];
if (!b_tok.length) {
row[j] = 0;
continue;
}
v = FuzzySearch.score_map(a_tok, b_tok, a_mp, options);
row[j] = v;
if (v > minimum_match) match_count++;
if (v > rowmax) {
rowmax = v;
imax = i;
jmax = j;
}
}
thresholds[i] = rowmax;
C[i] = row;
}
//Shortcut: no match
if (match_count === 0) return 0;
//Shortcut: single possible pairing
if (match_count === 1) {
match[imax] = jmax;
if (flip) _flipmatch(match, n);
return rowmax
}
//Only consider matching close enough to best match
for (i = 0; i < a_tokens.length; i++) {
thresholds[i] = Math.max(best_thresh * thresholds[i], minimum_match);
}
var score = _matchScoreGrid(C, match, thresholds, options.bonus_token_order);
//Flip back the problem if necessary
if (flip) _flipmatch(match, n);
return score;
};
/**
* Perform the match as FuzzySearch.matchTokens
* but token against token score is already computed as C
*
* This is mostly a preparation phase for _buildScoreTree as well
* as a post processing traversal to recover the match.
*
* @param {Array.<Array.<number>>} C - precomputed score
* @param {Array.<number>} match - store the position of best matches
* @param {Array.<number>} thresholds - Information about the minimum score each token is willing to match
* @param {number} order_bonus
* @returns {number} - best score
* @private
*/
function _matchScoreGrid(C, match, thresholds, order_bonus) {
var i_len = C.length;
var i, j;
//Traverse score grid to find best permutation
var score_tree = [];
for (i = 0; i < i_len; i++) {
score_tree[i] = {};
}
var opt = new TreeOptions(C, score_tree, thresholds, order_bonus);
var score = _buildScoreTree(opt, 0, 0).score;
var used = 0, item;
for (i = 0; i < i_len; i++) {
item = score_tree[i][used];
if (!item) break;
match[i] = j = item.index;
if (j > -1) used |= (1 << j);
}
return score
}
//
// Cache tree:
//
// Given 5 node: 1,2,3,4,5
//
// What is the best match ...
// - knowing that we have passed tru 1->2->3
// - knowing that we have passed tru 2->3->1
// - knowing that we have passed tru 3->1->2
//
// All those question have the same answer
// because they are equivalent to match {4,5} against {4,5}
// ( in an alternate pass we can match {1,3} against {4,5} for example )
//
// We store match in j in a bit vector of size 32
//
// In addition of saving computation, the cache_tree data structure is used to
// trace back the best permutation !
//
// In addition of quick testing if an item is already used, used_mask serve
// as a key in cache_tree (in addition to level). Ideal key would be a list of available trial
// but, used & available are complementary vector (~not operation) so used is a perfectly valid key too...
/**
* Branch out to try each permutation of items of A against item of B.
* - Only try branched not already used.
* - Prune branch below token threshold.
* - Build a tree to cache sub-problem for which we already have a solution
*
* @param {TreeOptions} tree_opt
* @param {number} used_mask
* @param {number} depth
* @returns {MatchTrial} best_trial
* @private
*/
function _buildScoreTree(tree_opt, used_mask, depth) {
var C = tree_opt.score_grid;
var cache_tree = tree_opt.cache_tree;
var score_thresholds = tree_opt.score_thresholds;
var order_bonus = tree_opt.order_bonus;
var ilen = C.length;
var jlen = C[depth].length;
if (jlen > INT_SIZE) jlen = INT_SIZE;
var j, score;
var include_thresh = score_thresholds[depth];
var best_score = 0, best_index = -1;
var has_child = (depth < ilen - 1);
var child_tree = cache_tree[depth + 1], child_key;
for (j = 0; j < jlen; j++) {
var bit = 1 << j;
//if token previously used, skip
if (used_mask & bit) continue;
//score for this match
score = C[depth][j];
//too small of a match, skip
if (score < include_thresh) continue;
//score for child match
//if we already have computed this sub-block get from cache
if (has_child) {
child_key = used_mask | bit;
/** @type MatchTrial */
var trial = (child_key in child_tree) ?
child_tree[child_key] :
_buildScoreTree(tree_opt, child_key, depth + 1);
score += trial.score;
if (j < trial.index) {
score += order_bonus
}
}
//Because of DFS, first loop that finish is toward the end of the query.
//As a heuristic, it's good to match higher index toward the end. So we accept equality.
if (score >= best_score) {
best_score = score;
best_index = j;
}
}
//try the move of "do not match this token against anything"
if (has_child) {
child_key = used_mask;
if (child_key in child_tree) score = child_tree[child_key].score;
else score = _buildScoreTree(tree_opt, child_key, depth + 1).score;
if (score > best_score) {
best_score = score;
best_index = -1;
}
}
var best_trial = new MatchTrial(best_score, best_index);
cache_tree[depth][used_mask] = best_trial;
return best_trial;
}
/**
*
* @param score
* @param index
* @constructor
*/
function MatchTrial(score, index) {
this.score = score;
this.index = index;
}
/**
*
* @param {Array<Array<number>>} score_grid
* @param {Array<Object<number,MatchTrial>>} cache_tree
* @param {Array<number>} score_thresholds
* @param {number} order_bonus
* @constructor
*/
function TreeOptions(score_grid, cache_tree, score_thresholds, order_bonus) {
this.score_grid = score_grid;
this.cache_tree = cache_tree;
this.score_thresholds = score_thresholds;
this.order_bonus = order_bonus
}
/**
* Let A,B be two array
* Input is an array that map "index of A"->"index of B"
* Output is the reverse "index of B"->"index of A"
*
* Array is modified in place
*
* @param {Array.<number>} match - array to remap
* @param {number} newlen - length of B
* @private
*/
function _flipmatch(match, newlen) {
var i, j;
var ref = match.slice();
match.length = newlen;
for (i = 0; i < newlen; i++) {
match[i] = -1;
}
for (i = 0; i < ref.length; i++) {
j = ref[i];
if (j > -1 && j < newlen) match[j] = i;
}
}
//
// - - - - - - - - - - - -
// UI INTEGRATION
// - - - - - - - - - - - -
//
extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
/**
* Return a Debounced version of FuzzySearch.search.
* New function signature allow to specific callback for different phase of the debounce.
* De-bounce is adaptative, it will allow short burst and try to learn actual computation time.
*
* query: term to search
* immediate_cb(results) : if search was done without filtering
* suppress_cb(cached_results) : debounce has supressed the search, return cache of last result
* finally_cb(results): if at least 1 supression occured, make a new search when debounce end and call this.
*
* @returns {function({string}, function({Array}), function({Array}), function({Array}))}
*/
getInteractive: function () {
var self = this;
var options = this.options;
var wait = options.interactive_debounce;
var mult = options.interactive_mult;
var burst = options.interactive_burst;
// Debounce off
if (wait === 0) {
return (function (query, immediate_cb, suppress_cb, finally_cb) {
return immediate_cb(self.search(query))
})
}
// Debounce
var clock = (window.performance && window.performance.now) ? window.performance : Date;
var timeout, cache;
var count = 0, suppressed = false;
return function (query, immediate_cb, suppress_cb, finally_cb) {
var later = function () {
timeout = null;
if (suppressed) {
cache = self.search(query);
finally_cb(cache);
}
count = 0;
suppressed = false;
};
clearTimeout(timeout);
timeout = setTimeout(later, wait);
if (++count < burst) {
suppressed = false;
var before = clock.now();
cache = self.search(query);
var ret = immediate_cb(cache);
var now = clock.now();
//try to learn typical time (time mult factor);
wait = 0.5 * wait + 0.5 * mult * (now - before);
//console.log(wait);
return ret;
} else {
suppressed = true;
//console.log("supress");
return suppress_cb(cache);
}
}
},
/**
* Allow the FuzzySearch object to be given as a source to twitter typeahead.
* This implement similar interface than Bloodhound object.
*
* @returns {function({string}, function({Array}) ,function({Array}) )} Interactive version of search.
*/
__ttAdapter: function ttAdapter() {
var debounced = this.getInteractive();
var noop = function (a) {
};
return function (query, sync, async) {
debounced(query, sync, noop, async);
}
},
/**
* Generate a function compatible with jQuery UI auto-complete Source
*
* @returns {function( {Object}, {function()} )} Interactive version of search.
*/
$uiSource: function () {
var debounced = this.getInteractive();
var noop = function (a) {
};
return function (request, response) {
debounced(request.term, response, noop, response);
}
}
});
//
// Export FuzzySearch
//
if (typeof require === 'function' && typeof module !== 'undefined' && module.exports) {
// CommonJS-like environments
module.exports = FuzzySearch;
} else if (typeof define === 'function' && define.amd) {
// AMD. Register as an anonymous module.
define(function () {
return FuzzySearch;
});
} else {
// Browser globals
globalThis['FuzzySearch'] = FuzzySearch;
}
return FuzzySearch;
})();