This commit is contained in:
Cris Stringfellow 2023-01-14 11:26:34 +08:00
parent 4f94ed216d
commit 296b15345f
No known key found for this signature in database
10 changed files with 72 additions and 279 deletions

View File

@ -1 +0,0 @@
Sun Jan 2 05:40:42 AM HKT 2022

View File

@ -1,16 +0,0 @@
module.exports = {
"env": {
"es2021": true,
"node": true
},
"extends": "eslint:recommended",
"parserOptions": {
"ecmaVersion": 13,
"sourceType": "module"
},
"ignorePatterns": [
"build/**/*.js"
],
"rules": {
}
};

View File

@ -1 +0,0 @@
Wed Jan 5 02:35:00 PM HKT 2022

View File

@ -1,5 +0,0 @@
.*.swp
# Bundling and packaging
bin/*

View File

@ -1,77 +0,0 @@
{
"name": "diskernet",
"version": "2.6.0",
"description": "Library server and an archivist browser controller.",
"main": "build/22120.js",
"module": "src/app.js",
"bin": {
"diskernet": "build/22120.js"
},
"scripts": {
"bundle": "npx rollup --config",
"start": "node src/app.js",
"setup": "bash ./scripts/build_setup.sh",
"build": "echo 'please run ./scripts/build_setup.sh first (one time only) and then ./scripts/compile.sh'",
"clean": "rm -rf build/* bin/*",
"test": "nodemon src/app.js",
"save": "nodemon src/app.js 22120 save",
"serve": "nodemon src/app.js 22120 serve",
"lint": "watch -n 5 npx eslint .",
"test-hl": "node src/highlighter.js",
"postpublish": "cp package.json .package.build.json",
"prepublishOnly": "npm run bundle && npx webpack ."
},
"repository": {
"type": "git",
"url": "git+https://github.com/dosyago/22120.git"
},
"pkg": {
"patches": {
"./node_modules/fetch-blob/streams.cjs": [
"Object.assign(globalThis, require('node:stream/web'))",
"Object.assign(globalThis, require('stream').web)"
]
},
"scripts": "build/*.js",
"assets": "public/**/*",
"outputPath": "bin"
},
"keywords": [
"web-archive",
"search-engine",
"self-hosted",
"offline",
"archivist",
"library"
],
"author": "@dosy",
"license": "AGPL-3.0",
"bugs": {
"url": "https://github.com/dosyago/22120/issues"
},
"homepage": "https://github.com/dosyago/22120#readme",
"dependencies": {
"chrome-launcher": "latest",
"express": "latest",
"flexsearch": "^0.7.21",
"fz-search": "^1.0.0",
"hasha": "latest",
"natural": "^5.1.11",
"ndx": "^1.0.2",
"ndx-query": "^1.0.1",
"ndx-serializable": "^1.0.0",
"node-fetch": "latest",
"ukkonen": "^1.4.0",
"ws": "latest"
},
"devDependencies": {
"eslint": "^8.4.1",
"esm": "^3.2.25",
"nexe": "^1.1.6",
"nodemon": "latest",
"npx": "^3.0.0",
"webpack": "latest",
"webpack-cli": "latest",
"rollup-plugin-terser": "^7.0.2"
}
}

View File

@ -1,67 +0,0 @@
{
"name": "diskernet",
"version": "2.4.11",
"type": "module",
"description": "Library server and an archivist browser controller.",
"main": "src/app.js",
"module": "dist/22120-module.js",
"bin": {
"diskernet": "build/22120.js"
},
"scripts": {
"bundle": "npx rollup --config",
"start": "node src/app.js",
"setup": "bash ./scripts/build_setup.sh",
"build": "echo 'please run ./scripts/build_setup.sh first (one time only) and then ./scripts/compile.sh'",
"clean": "rm -rf build/* bin/*",
"test": "nodemon src/app.js",
"save": "nodemon src/app.js 22120 save",
"serve": "nodemon src/app.js 22120 serve",
"lint": "watch -n 5 npx eslint .",
"test-hl": "node src/highlighter.js",
"postpublish": "cp package.json .package.build.json",
"prepublishOnly": "npm run bundle && npx webpack ."
},
"repository": {
"type": "git",
"url": "git+https://github.com/dosyago/22120.git"
},
"pkg": {
"scripts": "build/*.js",
"assets": "public/**/*",
"outputPath": "bin"
},
"keywords": [
"archivist",
"library"
],
"author": "@dosy",
"license": "AGPL-3.0",
"bugs": {
"url": "https://github.com/dosyago/22120/issues"
},
"homepage": "https://github.com/dosyago/22120#readme",
"dependencies": {
"chrome-launcher": "latest",
"express": "latest",
"flexsearch": "^0.7.21",
"fz-search": "^1.0.0",
"hasha": "latest",
"natural": "^5.1.11",
"ndx": "^1.0.2",
"ndx-query": "^1.0.1",
"ndx-serializable": "^1.0.0",
"node-fetch": "latest",
"ukkonen": "^1.4.0",
"ws": "latest"
},
"devDependencies": {
"eslint": "^8.4.1",
"esm": "^3.2.25",
"nexe": "^1.1.6",
"nodemon": "latest",
"npx": "^3.0.0",
"webpack": "latest",
"webpack-cli": "latest"
}
}

View File

@ -1,54 +0,0 @@
import path from 'path';
//import {fileURLToPath} from 'url';
export const DEBUG = process.env.DEBUG_22120 || false;
export const SHOW_FETCH = false;
export const CHECK_INTERVAL = 400;
export const TEXT_NODE = 3;
export const MAX_HIGHLIGHTABLE_LENGTH = 0; /* 0 is no max length for highlight */
export const MAX_TITLE_LENGTH = 140;
export const MAX_URL_LENGTH = 140;
export const MAX_HEAD = 140;
/* text nodes inside these elements that are ignored */
export const FORBIDDEN_TEXT_PARENT = new Set([
'STYLE',
'SCRIPT',
'NOSCRIPT',
/* we could remove these last two so as to index them as well */
'DATALIST',
'OPTION'
]);
export const ERROR_CODE_SAFE_TO_IGNORE = new Set([
-32000, /* message:
Can only get response body on requests captured after headers received.
* ignore because:
seems to only happen when new navigation aborts all
pending requests of the unloading page
*/
-32602, /* message:
Invalid InterceptionId.
* ignore because:
seems to only happen when new navigation aborts all
pending requests of the unloading page
*/
]);
export const SNIP_CONTEXT = 31;
export const NO_SANDBOX = process.env.DEBUG_22120 || false;
//export const APP_ROOT = '.';
export const APP_ROOT = __dirname;
//export const APP_ROOT = path.dirname(fileURLToPath(import.meta.url));
export const sleep = ms => new Promise(res => setTimeout(res, ms));
export function say(o) {
console.log(JSON.stringify(o));
}
export function clone(o) {
return JSON.parse(JSON.stringify(o));
}

View File

@ -1,54 +0,0 @@
import path from 'path';
import {fileURLToPath} from 'url';
export const DEBUG = process.env.DEBUG_22120 || false;
export const SHOW_FETCH = false;
export const CHECK_INTERVAL = 400;
export const TEXT_NODE = 3;
export const MAX_HIGHLIGHTABLE_LENGTH = 0; /* 0 is no max length for highlight */
export const MAX_TITLE_LENGTH = 140;
export const MAX_URL_LENGTH = 140;
export const MAX_HEAD = 140;
/* text nodes inside these elements that are ignored */
export const FORBIDDEN_TEXT_PARENT = new Set([
'STYLE',
'SCRIPT',
'NOSCRIPT',
/* we could remove these last two so as to index them as well */
'DATALIST',
'OPTION'
]);
export const ERROR_CODE_SAFE_TO_IGNORE = new Set([
-32000, /* message:
Can only get response body on requests captured after headers received.
* ignore because:
seems to only happen when new navigation aborts all
pending requests of the unloading page
*/
-32602, /* message:
Invalid InterceptionId.
* ignore because:
seems to only happen when new navigation aborts all
pending requests of the unloading page
*/
]);
export const SNIP_CONTEXT = 31;
export const NO_SANDBOX = process.env.DEBUG_22120 || false;
//export const APP_ROOT = '.';
//export const APP_ROOT = __dirname;
export const APP_ROOT = path.dirname(fileURLToPath(import.meta.url));
export const sleep = ms => new Promise(res => setTimeout(res, ms));
export function say(o) {
console.log(JSON.stringify(o));
}
export function clone(o) {
return JSON.parse(JSON.stringify(o));
}

View File

@ -1,9 +1,25 @@
import path from 'path';
import {fileURLToPath} from 'url';
import fs from 'fs';
import os from 'os';
export const DEBUG = process.env.DEBUG_22120 || false;
export const DEBUG = {
debug: process.env.DEBUG_22120 || false,
checkPred: true
}
export const SHOW_FETCH = false;
// server related
export const PUBLIC_SERVER = true;
// crawl related
export const MIN_TIME_PER_PAGE = 10000;
export const MAX_TIME_PER_PAGE = 32000;
export const MIN_WAIT = 200;
export const MAX_WAITS = 300;
export const BATCH_SIZE = 5; // crawl batch size (how many concurrent tabs for crawling)
export const MAX_REAL_URL_LENGTH = 2**15 - 1;
export const CHECK_INTERVAL = 400;
export const TEXT_NODE = 3;
export const MAX_HIGHLIGHTABLE_LENGTH = 0; /* 0 is no max length for highlight */
@ -11,6 +27,21 @@ export const MAX_TITLE_LENGTH = 140;
export const MAX_URL_LENGTH = 140;
export const MAX_HEAD = 140;
export const GO_SECURE = fs.existsSync(path.resolve(os.homedir(), 'local-sslcerts', 'privkey.pem'));
export class RichError extends Error {
constructor(msg) {
let textMessage;
try {
textMessage = JSON.stringify(msg);
} catch(e) {
console.warn(`Could not create RichError from argument ${msg.toString ? msg.toString() : msg} as JSON serialization failed. RichError argument MUST be JSON serializable. Failure error was:`, e);
return;
}
super(textMessage);
}
}
/* text nodes inside these elements that are ignored */
export const FORBIDDEN_TEXT_PARENT = new Set([
'STYLE',
@ -37,7 +68,7 @@ export const ERROR_CODE_SAFE_TO_IGNORE = new Set([
export const SNIP_CONTEXT = 31;
export const NO_SANDBOX = process.env.DEBUG_22120 || false;
export const NO_SANDBOX = (process.env.DEBUG_22120 && process.env.SET_22120_NO_SANDBOX) || false;
//export const APP_ROOT = '.';
//export const APP_ROOT = __dirname;
@ -52,3 +83,29 @@ export function say(o) {
export function clone(o) {
return JSON.parse(JSON.stringify(o));
}
export async function untilTrue(pred, waitOverride = MIN_WAIT, maxWaits = MAX_WAITS) {
if ( waitOverride < 0 ) {
maxWaits = -1;
waitOverride = MIN_WAIT;
}
let waitCount = 0;
let resolve;
const pr = new Promise(res => resolve = res);
setTimeout(checkPred, 0);
return pr;
async function checkPred() {
DEBUG.checkPred && console.log('Checking', pred.toString());
if ( await pred() ) {
return resolve(true);
} else {
waitCount++;
if ( waitCount < maxWaits || maxWaits < 0 ) {
setTimeout(checkPred, waitOverride);
} else {
resolve(false);
}
}
}
}

View File

@ -1,6 +1,6 @@
import Ws from 'ws';
import Fetch from 'node-fetch';
import {SHOW_FETCH, DEBUG, ERROR_CODE_SAFE_TO_IGNORE} from './common.js';
import {untilTrue, SHOW_FETCH, DEBUG, ERROR_CODE_SAFE_TO_IGNORE} from './common.js';
const ROOT_SESSION = "browser";
const MESSAGES = new Map();
@ -8,7 +8,18 @@ const MESSAGES = new Map();
export async function connect({port:port = 9222} = {}) {
let webSocketDebuggerUrl, socket;
try {
({webSocketDebuggerUrl} = await Fetch(`http://localhost:${port}/json/version`).then(r => r.json()));
await untilTrue(async () => {
let result = false;
try {
const {webSocketDebuggerUrl} = await Fetch(`http://127.0.0.1:${port}/json/version`).then(r => r.json());
if ( webSocketDebuggerUrl ) {
result = true;
}
} finally {
return result;
}
});
({webSocketDebuggerUrl} = await Fetch(`http://127.0.0.1:${port}/json/version`).then(r => r.json()));
socket = new Ws(webSocketDebuggerUrl);
} catch(e) {
console.log("Error communicating with browser", e);