Pushing
This commit is contained in:
parent
4f94ed216d
commit
296b15345f
|
@ -1 +0,0 @@
|
|||
Sun Jan 2 05:40:42 AM HKT 2022
|
|
@ -1,16 +0,0 @@
|
|||
module.exports = {
|
||||
"env": {
|
||||
"es2021": true,
|
||||
"node": true
|
||||
},
|
||||
"extends": "eslint:recommended",
|
||||
"parserOptions": {
|
||||
"ecmaVersion": 13,
|
||||
"sourceType": "module"
|
||||
},
|
||||
"ignorePatterns": [
|
||||
"build/**/*.js"
|
||||
],
|
||||
"rules": {
|
||||
}
|
||||
};
|
|
@ -1 +0,0 @@
|
|||
Wed Jan 5 02:35:00 PM HKT 2022
|
|
@ -1,5 +0,0 @@
|
|||
|
||||
.*.swp
|
||||
|
||||
# Bundling and packaging
|
||||
bin/*
|
|
@ -1,77 +0,0 @@
|
|||
{
|
||||
"name": "diskernet",
|
||||
"version": "2.6.0",
|
||||
"description": "Library server and an archivist browser controller.",
|
||||
"main": "build/22120.js",
|
||||
"module": "src/app.js",
|
||||
"bin": {
|
||||
"diskernet": "build/22120.js"
|
||||
},
|
||||
"scripts": {
|
||||
"bundle": "npx rollup --config",
|
||||
"start": "node src/app.js",
|
||||
"setup": "bash ./scripts/build_setup.sh",
|
||||
"build": "echo 'please run ./scripts/build_setup.sh first (one time only) and then ./scripts/compile.sh'",
|
||||
"clean": "rm -rf build/* bin/*",
|
||||
"test": "nodemon src/app.js",
|
||||
"save": "nodemon src/app.js 22120 save",
|
||||
"serve": "nodemon src/app.js 22120 serve",
|
||||
"lint": "watch -n 5 npx eslint .",
|
||||
"test-hl": "node src/highlighter.js",
|
||||
"postpublish": "cp package.json .package.build.json",
|
||||
"prepublishOnly": "npm run bundle && npx webpack ."
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/dosyago/22120.git"
|
||||
},
|
||||
"pkg": {
|
||||
"patches": {
|
||||
"./node_modules/fetch-blob/streams.cjs": [
|
||||
"Object.assign(globalThis, require('node:stream/web'))",
|
||||
"Object.assign(globalThis, require('stream').web)"
|
||||
]
|
||||
},
|
||||
"scripts": "build/*.js",
|
||||
"assets": "public/**/*",
|
||||
"outputPath": "bin"
|
||||
},
|
||||
"keywords": [
|
||||
"web-archive",
|
||||
"search-engine",
|
||||
"self-hosted",
|
||||
"offline",
|
||||
"archivist",
|
||||
"library"
|
||||
],
|
||||
"author": "@dosy",
|
||||
"license": "AGPL-3.0",
|
||||
"bugs": {
|
||||
"url": "https://github.com/dosyago/22120/issues"
|
||||
},
|
||||
"homepage": "https://github.com/dosyago/22120#readme",
|
||||
"dependencies": {
|
||||
"chrome-launcher": "latest",
|
||||
"express": "latest",
|
||||
"flexsearch": "^0.7.21",
|
||||
"fz-search": "^1.0.0",
|
||||
"hasha": "latest",
|
||||
"natural": "^5.1.11",
|
||||
"ndx": "^1.0.2",
|
||||
"ndx-query": "^1.0.1",
|
||||
"ndx-serializable": "^1.0.0",
|
||||
"node-fetch": "latest",
|
||||
"ukkonen": "^1.4.0",
|
||||
"ws": "latest"
|
||||
},
|
||||
"devDependencies": {
|
||||
"eslint": "^8.4.1",
|
||||
"esm": "^3.2.25",
|
||||
"nexe": "^1.1.6",
|
||||
"nodemon": "latest",
|
||||
"npx": "^3.0.0",
|
||||
"webpack": "latest",
|
||||
"webpack-cli": "latest",
|
||||
"rollup-plugin-terser": "^7.0.2"
|
||||
}
|
||||
}
|
|
@ -1,67 +0,0 @@
|
|||
{
|
||||
"name": "diskernet",
|
||||
"version": "2.4.11",
|
||||
"type": "module",
|
||||
"description": "Library server and an archivist browser controller.",
|
||||
"main": "src/app.js",
|
||||
"module": "dist/22120-module.js",
|
||||
"bin": {
|
||||
"diskernet": "build/22120.js"
|
||||
},
|
||||
"scripts": {
|
||||
"bundle": "npx rollup --config",
|
||||
"start": "node src/app.js",
|
||||
"setup": "bash ./scripts/build_setup.sh",
|
||||
"build": "echo 'please run ./scripts/build_setup.sh first (one time only) and then ./scripts/compile.sh'",
|
||||
"clean": "rm -rf build/* bin/*",
|
||||
"test": "nodemon src/app.js",
|
||||
"save": "nodemon src/app.js 22120 save",
|
||||
"serve": "nodemon src/app.js 22120 serve",
|
||||
"lint": "watch -n 5 npx eslint .",
|
||||
"test-hl": "node src/highlighter.js",
|
||||
"postpublish": "cp package.json .package.build.json",
|
||||
"prepublishOnly": "npm run bundle && npx webpack ."
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/dosyago/22120.git"
|
||||
},
|
||||
"pkg": {
|
||||
"scripts": "build/*.js",
|
||||
"assets": "public/**/*",
|
||||
"outputPath": "bin"
|
||||
},
|
||||
"keywords": [
|
||||
"archivist",
|
||||
"library"
|
||||
],
|
||||
"author": "@dosy",
|
||||
"license": "AGPL-3.0",
|
||||
"bugs": {
|
||||
"url": "https://github.com/dosyago/22120/issues"
|
||||
},
|
||||
"homepage": "https://github.com/dosyago/22120#readme",
|
||||
"dependencies": {
|
||||
"chrome-launcher": "latest",
|
||||
"express": "latest",
|
||||
"flexsearch": "^0.7.21",
|
||||
"fz-search": "^1.0.0",
|
||||
"hasha": "latest",
|
||||
"natural": "^5.1.11",
|
||||
"ndx": "^1.0.2",
|
||||
"ndx-query": "^1.0.1",
|
||||
"ndx-serializable": "^1.0.0",
|
||||
"node-fetch": "latest",
|
||||
"ukkonen": "^1.4.0",
|
||||
"ws": "latest"
|
||||
},
|
||||
"devDependencies": {
|
||||
"eslint": "^8.4.1",
|
||||
"esm": "^3.2.25",
|
||||
"nexe": "^1.1.6",
|
||||
"nodemon": "latest",
|
||||
"npx": "^3.0.0",
|
||||
"webpack": "latest",
|
||||
"webpack-cli": "latest"
|
||||
}
|
||||
}
|
|
@ -1,54 +0,0 @@
|
|||
import path from 'path';
|
||||
//import {fileURLToPath} from 'url';
|
||||
|
||||
export const DEBUG = process.env.DEBUG_22120 || false;
|
||||
export const SHOW_FETCH = false;
|
||||
|
||||
export const CHECK_INTERVAL = 400;
|
||||
export const TEXT_NODE = 3;
|
||||
export const MAX_HIGHLIGHTABLE_LENGTH = 0; /* 0 is no max length for highlight */
|
||||
export const MAX_TITLE_LENGTH = 140;
|
||||
export const MAX_URL_LENGTH = 140;
|
||||
export const MAX_HEAD = 140;
|
||||
|
||||
/* text nodes inside these elements that are ignored */
|
||||
export const FORBIDDEN_TEXT_PARENT = new Set([
|
||||
'STYLE',
|
||||
'SCRIPT',
|
||||
'NOSCRIPT',
|
||||
/* we could remove these last two so as to index them as well */
|
||||
'DATALIST',
|
||||
'OPTION'
|
||||
]);
|
||||
export const ERROR_CODE_SAFE_TO_IGNORE = new Set([
|
||||
-32000, /* message:
|
||||
Can only get response body on requests captured after headers received.
|
||||
* ignore because:
|
||||
seems to only happen when new navigation aborts all
|
||||
pending requests of the unloading page
|
||||
*/
|
||||
-32602, /* message:
|
||||
Invalid InterceptionId.
|
||||
* ignore because:
|
||||
seems to only happen when new navigation aborts all
|
||||
pending requests of the unloading page
|
||||
*/
|
||||
]);
|
||||
|
||||
export const SNIP_CONTEXT = 31;
|
||||
|
||||
export const NO_SANDBOX = process.env.DEBUG_22120 || false;
|
||||
|
||||
//export const APP_ROOT = '.';
|
||||
export const APP_ROOT = __dirname;
|
||||
//export const APP_ROOT = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
export const sleep = ms => new Promise(res => setTimeout(res, ms));
|
||||
|
||||
export function say(o) {
|
||||
console.log(JSON.stringify(o));
|
||||
}
|
||||
|
||||
export function clone(o) {
|
||||
return JSON.parse(JSON.stringify(o));
|
||||
}
|
|
@ -1,54 +0,0 @@
|
|||
import path from 'path';
|
||||
import {fileURLToPath} from 'url';
|
||||
|
||||
export const DEBUG = process.env.DEBUG_22120 || false;
|
||||
export const SHOW_FETCH = false;
|
||||
|
||||
export const CHECK_INTERVAL = 400;
|
||||
export const TEXT_NODE = 3;
|
||||
export const MAX_HIGHLIGHTABLE_LENGTH = 0; /* 0 is no max length for highlight */
|
||||
export const MAX_TITLE_LENGTH = 140;
|
||||
export const MAX_URL_LENGTH = 140;
|
||||
export const MAX_HEAD = 140;
|
||||
|
||||
/* text nodes inside these elements that are ignored */
|
||||
export const FORBIDDEN_TEXT_PARENT = new Set([
|
||||
'STYLE',
|
||||
'SCRIPT',
|
||||
'NOSCRIPT',
|
||||
/* we could remove these last two so as to index them as well */
|
||||
'DATALIST',
|
||||
'OPTION'
|
||||
]);
|
||||
export const ERROR_CODE_SAFE_TO_IGNORE = new Set([
|
||||
-32000, /* message:
|
||||
Can only get response body on requests captured after headers received.
|
||||
* ignore because:
|
||||
seems to only happen when new navigation aborts all
|
||||
pending requests of the unloading page
|
||||
*/
|
||||
-32602, /* message:
|
||||
Invalid InterceptionId.
|
||||
* ignore because:
|
||||
seems to only happen when new navigation aborts all
|
||||
pending requests of the unloading page
|
||||
*/
|
||||
]);
|
||||
|
||||
export const SNIP_CONTEXT = 31;
|
||||
|
||||
export const NO_SANDBOX = process.env.DEBUG_22120 || false;
|
||||
|
||||
//export const APP_ROOT = '.';
|
||||
//export const APP_ROOT = __dirname;
|
||||
export const APP_ROOT = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
export const sleep = ms => new Promise(res => setTimeout(res, ms));
|
||||
|
||||
export function say(o) {
|
||||
console.log(JSON.stringify(o));
|
||||
}
|
||||
|
||||
export function clone(o) {
|
||||
return JSON.parse(JSON.stringify(o));
|
||||
}
|
|
@ -1,9 +1,25 @@
|
|||
import path from 'path';
|
||||
import {fileURLToPath} from 'url';
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
|
||||
export const DEBUG = process.env.DEBUG_22120 || false;
|
||||
export const DEBUG = {
|
||||
debug: process.env.DEBUG_22120 || false,
|
||||
checkPred: true
|
||||
}
|
||||
export const SHOW_FETCH = false;
|
||||
|
||||
// server related
|
||||
export const PUBLIC_SERVER = true;
|
||||
|
||||
// crawl related
|
||||
export const MIN_TIME_PER_PAGE = 10000;
|
||||
export const MAX_TIME_PER_PAGE = 32000;
|
||||
export const MIN_WAIT = 200;
|
||||
export const MAX_WAITS = 300;
|
||||
export const BATCH_SIZE = 5; // crawl batch size (how many concurrent tabs for crawling)
|
||||
export const MAX_REAL_URL_LENGTH = 2**15 - 1;
|
||||
|
||||
export const CHECK_INTERVAL = 400;
|
||||
export const TEXT_NODE = 3;
|
||||
export const MAX_HIGHLIGHTABLE_LENGTH = 0; /* 0 is no max length for highlight */
|
||||
|
@ -11,6 +27,21 @@ export const MAX_TITLE_LENGTH = 140;
|
|||
export const MAX_URL_LENGTH = 140;
|
||||
export const MAX_HEAD = 140;
|
||||
|
||||
export const GO_SECURE = fs.existsSync(path.resolve(os.homedir(), 'local-sslcerts', 'privkey.pem'));
|
||||
|
||||
export class RichError extends Error {
|
||||
constructor(msg) {
|
||||
let textMessage;
|
||||
try {
|
||||
textMessage = JSON.stringify(msg);
|
||||
} catch(e) {
|
||||
console.warn(`Could not create RichError from argument ${msg.toString ? msg.toString() : msg} as JSON serialization failed. RichError argument MUST be JSON serializable. Failure error was:`, e);
|
||||
return;
|
||||
}
|
||||
super(textMessage);
|
||||
}
|
||||
}
|
||||
|
||||
/* text nodes inside these elements that are ignored */
|
||||
export const FORBIDDEN_TEXT_PARENT = new Set([
|
||||
'STYLE',
|
||||
|
@ -37,7 +68,7 @@ export const ERROR_CODE_SAFE_TO_IGNORE = new Set([
|
|||
|
||||
export const SNIP_CONTEXT = 31;
|
||||
|
||||
export const NO_SANDBOX = process.env.DEBUG_22120 || false;
|
||||
export const NO_SANDBOX = (process.env.DEBUG_22120 && process.env.SET_22120_NO_SANDBOX) || false;
|
||||
|
||||
//export const APP_ROOT = '.';
|
||||
//export const APP_ROOT = __dirname;
|
||||
|
@ -52,3 +83,29 @@ export function say(o) {
|
|||
export function clone(o) {
|
||||
return JSON.parse(JSON.stringify(o));
|
||||
}
|
||||
|
||||
export async function untilTrue(pred, waitOverride = MIN_WAIT, maxWaits = MAX_WAITS) {
|
||||
if ( waitOverride < 0 ) {
|
||||
maxWaits = -1;
|
||||
waitOverride = MIN_WAIT;
|
||||
}
|
||||
let waitCount = 0;
|
||||
let resolve;
|
||||
const pr = new Promise(res => resolve = res);
|
||||
setTimeout(checkPred, 0);
|
||||
return pr;
|
||||
|
||||
async function checkPred() {
|
||||
DEBUG.checkPred && console.log('Checking', pred.toString());
|
||||
if ( await pred() ) {
|
||||
return resolve(true);
|
||||
} else {
|
||||
waitCount++;
|
||||
if ( waitCount < maxWaits || maxWaits < 0 ) {
|
||||
setTimeout(checkPred, waitOverride);
|
||||
} else {
|
||||
resolve(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import Ws from 'ws';
|
||||
import Fetch from 'node-fetch';
|
||||
import {SHOW_FETCH, DEBUG, ERROR_CODE_SAFE_TO_IGNORE} from './common.js';
|
||||
import {untilTrue, SHOW_FETCH, DEBUG, ERROR_CODE_SAFE_TO_IGNORE} from './common.js';
|
||||
|
||||
const ROOT_SESSION = "browser";
|
||||
const MESSAGES = new Map();
|
||||
|
@ -8,7 +8,18 @@ const MESSAGES = new Map();
|
|||
export async function connect({port:port = 9222} = {}) {
|
||||
let webSocketDebuggerUrl, socket;
|
||||
try {
|
||||
({webSocketDebuggerUrl} = await Fetch(`http://localhost:${port}/json/version`).then(r => r.json()));
|
||||
await untilTrue(async () => {
|
||||
let result = false;
|
||||
try {
|
||||
const {webSocketDebuggerUrl} = await Fetch(`http://127.0.0.1:${port}/json/version`).then(r => r.json());
|
||||
if ( webSocketDebuggerUrl ) {
|
||||
result = true;
|
||||
}
|
||||
} finally {
|
||||
return result;
|
||||
}
|
||||
});
|
||||
({webSocketDebuggerUrl} = await Fetch(`http://127.0.0.1:${port}/json/version`).then(r => r.json()));
|
||||
socket = new Ws(webSocketDebuggerUrl);
|
||||
} catch(e) {
|
||||
console.log("Error communicating with browser", e);
|
||||
|
|
Loading…
Reference in New Issue