From 9478a9372919e1c2eef0f1273c8b0f6716aff18e Mon Sep 17 00:00:00 2001 From: Cris Stringfellow <22254235+crislin2046@users.noreply.github.com> Date: Fri, 10 Dec 2021 17:16:13 +0800 Subject: [PATCH] "Adding search" --- archivist.js | 8 ++++++++ package-lock.json | 11 +++++++++++ package.json | 1 + 3 files changed, 20 insertions(+) diff --git a/archivist.js b/archivist.js index 692f249..3ad8539 100644 --- a/archivist.js +++ b/archivist.js @@ -2,6 +2,7 @@ import hasha from 'hasha'; import {URL} from 'url'; import path from 'path'; import fs from 'fs'; +import FlexSearch from 'flexsearch'; import args from './args.js'; import {APP_ROOT, context, sleep, DEBUG} from './common.js'; import {connect} from './protocol.js'; @@ -14,6 +15,11 @@ import {BLOCKED_BODY, BLOCKED_CODE, BLOCKED_HEADERS} from './blockedResponse.js' // that holds the serialized requests // that are saved on disk let Fs, Mode, Close; +const {Index, registerCharset, registerLanguage} = FlexSearch; +const FLEX_OPTS = { + context: true, +}; +const Flex = new Index(FLEX_OPTS); const Cache = new Map(); const State = { Cache, @@ -234,6 +240,7 @@ async function collect({chrome_port:port, mode} = {}) { (Text, {nodeValue}) => Text + nodeValue + ' ', '' ); + if ( false ) { console.log({ page : { @@ -243,6 +250,7 @@ async function collect({chrome_port:port, mode} = {}) { } }); } + Flex.updateAsync(info.url, pageText); } console.log(`Indexed ${info.url} to ${info.title}`); diff --git a/package-lock.json b/package-lock.json index 3510e3a..4d3e712 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,6 +13,7 @@ "chrome-launcher": "latest", "esm": "^3.2.25", "express": "latest", + "flexsearch": "^0.7.21", "hasha": "latest", "node-fetch": "latest", "ws": "latest" @@ -1899,6 +1900,11 @@ "node": ">=8" } }, + "node_modules/flexsearch": { + "version": "0.7.21", + "resolved": "https://registry.npmjs.org/flexsearch/-/flexsearch-0.7.21.tgz", + "integrity": "sha512-W7cHV7Hrwjid6lWmy0IhsWDFQboWSng25U3VVywpHOTJnnAZNPScog67G+cVpeX9f7yDD21ih0WDrMMT+JoaYg==" + }, "node_modules/forwarded": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", @@ -10891,6 +10897,11 @@ "path-exists": "^4.0.0" } }, + "flexsearch": { + "version": "0.7.21", + "resolved": "https://registry.npmjs.org/flexsearch/-/flexsearch-0.7.21.tgz", + "integrity": "sha512-W7cHV7Hrwjid6lWmy0IhsWDFQboWSng25U3VVywpHOTJnnAZNPScog67G+cVpeX9f7yDD21ih0WDrMMT+JoaYg==" + }, "forwarded": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", diff --git a/package.json b/package.json index 91afc1a..d73e938 100644 --- a/package.json +++ b/package.json @@ -35,6 +35,7 @@ "chrome-launcher": "latest", "esm": "^3.2.25", "express": "latest", + "flexsearch": "^0.7.21", "hasha": "latest", "node-fetch": "latest", "ws": "latest"