"Fixed the windowed highlight bug, where incorrect offsets were occuring. Now using correct source array to remove that problem. Also improved highlight matching by adding a score (minima match) to fuzzy.options (3, versus default 1)."
This commit is contained in:
parent
56bb405520
commit
ac74c809eb
|
@ -84,6 +84,13 @@
|
|||
DEBUG && console.log({NDX_FTSIndex});
|
||||
|
||||
// fuzzy (maybe just for queries ?)
|
||||
const REGULAR_SEARCH_OPTIONS_FUZZY = {
|
||||
minimum_match: 1.0
|
||||
};
|
||||
|
||||
const HIGHLIGHT_OPTIONS_FUZZY = {
|
||||
minimum_match: 3.0
|
||||
};
|
||||
const FUZZ_OPTS = {
|
||||
keys: ndxDocFields({namesOnly:true})
|
||||
};
|
||||
|
@ -864,6 +871,7 @@ export default Archivist;
|
|||
if ( maxLength ) {
|
||||
doc = Array.from(doc).slice(0, maxLength).join('');
|
||||
}
|
||||
Object.assign(fuzzy.options, HIGHLIGHT_OPTIONS_FUZZY);
|
||||
const hl = fuzzy.highlight(doc);
|
||||
DEBUG && console.log(query, hl);
|
||||
return hl;
|
||||
|
@ -931,6 +939,7 @@ export default Archivist;
|
|||
url: State.Index.get('ndx'+r.key),
|
||||
score: r.score
|
||||
}));
|
||||
Object.assign(fuzzy.options, REGULAR_SEARCH_OPTIONS_FUZZY);
|
||||
const fuzzRaw = fuzzy.search(query);
|
||||
const fuzz = processFuzzResults(fuzzRaw);
|
||||
|
||||
|
|
|
@ -64,13 +64,13 @@ export function highlight(query, doc, {
|
|||
console.log('Zero highlights, showing first score', scores[0]);
|
||||
return scores.slice(0,1);
|
||||
} else {
|
||||
let better = JSON.parse(JSON.stringify(highlights)).slice(0, 10);
|
||||
let better = Array.from(highlights).slice(0, 10);
|
||||
better = better.map(hl => {
|
||||
const length = Array.from(hl.fragment.text).length;
|
||||
const extra = Math.round(length/2);
|
||||
let {offset} = hl.fragment;
|
||||
const newText = doc.slice(Math.max(0,offset - extra), offset).join('') + hl.fragment.text + doc.slice(offset + length, offset + length + extra).join('');
|
||||
//console.log({newText, oldText:hl.fragment.text});
|
||||
let {offset, symbols} = hl.fragment;
|
||||
const newText = symbols.slice(Math.max(0,offset - extra), offset).join('') + hl.fragment.text + symbols.slice(offset + length, offset + length + extra).join('');
|
||||
DEBUG && console.log({newText, oldText:hl.fragment.text, p:[Math.max(0,offset-extra), offset, offset+length, offset+length+extra], trueText: symbols.slice(offset, offset+length).join('')});
|
||||
hl.fragment.text = newText;
|
||||
const {MaxDist,MinScore,MaxScore} = params(Array.from(newText).length);
|
||||
const distance = ukkonen(query, hl.fragment.text.toLocaleLowerCase(), MaxDist);
|
||||
|
@ -106,7 +106,7 @@ function getFragmenter(chunkSize) {
|
|||
currentFrag = frags.pop();
|
||||
currentFrag.text += nextSymbol;
|
||||
} else {
|
||||
currentFrag = {text:nextSymbol, offset:index};
|
||||
currentFrag = {text:nextSymbol, offset:index, symbols};
|
||||
currentLength = 0;
|
||||
}
|
||||
currentLength++;
|
||||
|
|
7
todo
7
todo
|
@ -1,10 +1,5 @@
|
|||
- highlights are mostly rubbish right now
|
||||
- implement trigram index
|
||||
- try an exact match on the query term if possible for highlight. first one.
|
||||
- don't highlight small matches like:
|
||||
- search: Zuckerberg, top result: Hacker News - Top Links
|
||||
- highlight Ha<strong>cker</strong> News
|
||||
- WTF come on... I need a threshold on this stuff....or like, if I can find a good match in
|
||||
body then don't highlight worse match in title...or maybe I can use ukkonen as part of threshold
|
||||
- we could also add signal from the highlighting to just in time alter the order (e.g. 'hell wiki' search brings google search to top rank, but the Hell wikipedia page has more highlight visible)
|
||||
- Create instant search (or at least instant queries (so search over previous queries -- not results necessarily))
|
||||
- an error in Full text search can corrupt the index and make it unrecoverable...we need to guard against this
|
||||
|
|
Loading…
Reference in New Issue