From d0d7fff38c8f3f85dc574511adced1a234916569 Mon Sep 17 00:00:00 2001 From: Cris Stringfellow <22254235+crislin2046@users.noreply.github.com> Date: Mon, 13 Dec 2021 14:30:20 +0800 Subject: [PATCH] "Big progress on integrating search." --- archivist.js | 22 ++++++++++------------ common.js | 14 ++++++++++++++ protocol.js | 22 +++++++++++++++++++--- todo | 2 -- 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/archivist.js b/archivist.js index 2f11fd0..bfd27d2 100644 --- a/archivist.js +++ b/archivist.js @@ -111,15 +111,13 @@ async function collect({chrome_port:port, mode} = {}) { on("Target.targetInfoChanged", indexURL); on("Target.targetInfoChanged", reloadIfNotLive); - on("Target.targetInfoChanged", attachToTarget); - on("Target.attachedToTarget", installForSession); - on("Fetch.requestPaused", cacheRequest); - on("Runtime.consoleAPICalled", confirmInstall); + await send("Target.setDiscoverTargets", {discover:true}); + await send("Target.setAutoAttach", {autoAttach:true, waitForDebuggerOnStart:false, flatten: true}); await send("Fetch.enable", { patterns: [ { @@ -129,9 +127,6 @@ async function collect({chrome_port:port, mode} = {}) { ], }); - await send("Target.setDiscoverTargets", {discover:true}); - await send("Target.setAutoAttach", {autoAttach:true, waitForDebuggerOnStart:false, flatten: true}); - const {targetInfos:targets} = await send("Target.getTargets", {}); const pageTargets = targets.filter(({type}) => type == 'page'); pageTargets.forEach(attachToTarget); @@ -164,7 +159,7 @@ async function collect({chrome_port:port, mode} = {}) { if ( attached && type == 'page' ) { const {url, targetId} = targetInfo; const sessionId = Sessions.get(targetId); - if ( !!sessionId && !!url && url != "about:blank" && !url.startsWith('chrome') && !ConfirmedInstalls.has(sessionId) ) { + if ( !!sessionId && !neverCache(url) && !ConfirmedInstalls.has(sessionId) ) { console.log({reloadingAsNotConfirmedInstalled:{url, sessionId}}); send("Page.stopLoading", {}, sessionId); send("Page.reload", {}, sessionId); @@ -172,6 +167,10 @@ async function collect({chrome_port:port, mode} = {}) { } } + function neverCache(url) { + return !url || url == "about:blank" || url.startsWith('chrome') || NEVER_CACHE.has(url); + } + async function installForSession({sessionId, targetInfo, waitingForDebugger}) { console.log("installForSession called"); if ( ! sessionId ) { @@ -230,8 +229,6 @@ async function collect({chrome_port:port, mode} = {}) { await untilHas(Installations, sessionId); } - console.log('hi', sessionId); - send("DOMSnapshot.enable", {}, sessionId); await sleep(500); @@ -242,12 +239,13 @@ async function collect({chrome_port:port, mode} = {}) { const pageText = processDoc(flatDoc); //Flex.updateAsync(info.url, pageText).then(r => console.log('Search index update done')); //Flex.addAsync(info.url, pageText).then(r => console.log('Search index update done')); - const res = Flex.add(info.url, pageText); + const res = Flex.update(info.url, pageText); DEBUG && console.log('Flex Index Result>>>', res); State.Indexing.delete(info.targetId); - console.log(`Indexed ${info.url} to ${info.title}`); + const {title, url} = info; + console.log({title, url, indexed: true, searchable: true, indexType: 'full text and full content'}); } async function untilHas(thing, key) { diff --git a/common.js b/common.js index abe860b..219202a 100644 --- a/common.js +++ b/common.js @@ -30,6 +30,20 @@ export const FORBIDDEN_TEXT_PARENT = new Set([ 'SCRIPT', 'NOSCRIPT' ]); +export const ERROR_CODE_SAFE_TO_IGNORE = new Set([ + -32000, /* message: + Can only get response body on requests captured after headers received. + * ignore because: + seems to only happen when new navigation aborts all + pending requests of the unloading page + */ + -32602, /* message: + Invalid InterceptionId. + * ignore because: + seems to only happen when new navigation aborts all + pending requests of the unloading page + */ +]); export const NO_SANDBOX = process.env.DEBUG_22120 || false; diff --git a/protocol.js b/protocol.js index 915f9a4..1922858 100644 --- a/protocol.js +++ b/protocol.js @@ -1,9 +1,10 @@ -import {DEBUG, context} from './common.js'; +import {DEBUG, context, ERROR_CODE_SAFE_TO_IGNORE} from './common.js'; const ROOT_SESSION = "browser"; // actually we use 'tot' but in chrome.debugger.attach 'tot' is // not a supported version string const VERSION = "1.3"; +const MESSAGES = new Map(); function promisify(context, name, err) { return async function(...args) { @@ -77,7 +78,9 @@ export async function connect({port:port = 9222} = {}) { let resolve; const promise = new Promise(res => resolve = res); Resolvers[key] = resolve; - socket.send(JSON.stringify(message)); + const outGoing = JSON.stringify(message); + MESSAGES.set(key, outGoing); + socket.send(outGoing); DEBUG && console.log("Sent", message); return promise; } @@ -86,7 +89,10 @@ export async function connect({port:port = 9222} = {}) { const stringMessage = message; message = JSON.parse(message); if ( message.error ) { - console.warn(message); + const showError = DEBUG || !ERROR_CODE_SAFE_TO_IGNORE.has(message.error.code); + if ( showError ) { + console.warn(message); + } } const {sessionId} = message; const {method, params} = message; @@ -105,6 +111,16 @@ export async function connect({port:port = 9222} = {}) { console.warn(`Resolver failed`, e, key, stringMessage.slice(0,140), resolve); } } + if ( DEBUG ) { + if ( message.error ) { + const showError = DEBUG || !ERROR_CODE_SAFE_TO_IGNORE.has(message.error.code); + if ( showError ) { + const originalMessage = MESSAGES.get(key); + console.warn({originalMessage}); + } + } + } + MESSAGES.delete(key); } else if ( method ) { const listeners = Handlers[method]; if ( Array.isArray(listeners) ) { diff --git a/todo b/todo index e2af652..a2c128c 100644 --- a/todo +++ b/todo @@ -1,3 +1 @@ -- fix error on capture response body regarding headers -- ensure fonts are capturing correctly (the vf fix) - continue making 22120 new