This commit is contained in:
Cris Stringfellow 2023-01-15 02:07:52 +08:00
commit 3e8d2ebae9
No known key found for this signature in database
50 changed files with 9985 additions and 0 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

16
.eslintrc.cjs Normal file
View File

@ -0,0 +1,16 @@
module.exports = {
"env": {
"es2021": true,
"node": true
},
"extends": "eslint:recommended",
"parserOptions": {
"ecmaVersion": 13,
"sourceType": "module"
},
"ignorePatterns": [
"build/**/*.js"
],
"rules": {
}
};

3
.github/FUNDING.yml vendored Normal file
View File

@ -0,0 +1,3 @@
# These are supported funding model platforms
custom: https://buy.stripe.com/3cs7tEcC53Yv3zG8xb

124
.gitignore vendored Normal file
View File

@ -0,0 +1,124 @@
.*.swp
# Bundling and packaging
22120.exe
22120.nix
22120.mac
22120.win32.exe
22120.nix32
bin/*
build/*
#Leave these to allow install by npm -g
#22120.js
#*.22120.js
# Library
public/library/cache.json
public/library/http*
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# TypeScript v1 declaration files
typings/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variables file
.env
.env.test
# parcel-bundler cache (https://parceljs.org/)
.cache
# Next.js build output
.next
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and *not* Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public
# vuepress build output
.vuepress/dist
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port

1
.npm.release Normal file
View File

@ -0,0 +1 @@
Sun Jan 15 01:13:51 CST 2023

5
.npmignore Normal file
View File

@ -0,0 +1,5 @@
.*.swp
# Bundling and packaging
bin/*

112
LICENSE.md Normal file
View File

@ -0,0 +1,112 @@
Copyright Dosyago Corporation & Cris Stringfellow (https://dosaygo.com)
22120 and all previously released versions, including binaries, NPM packages, and
Docker images (including all named archivist1 and any other names)
is re-licensed under the following PolyForm Strict License 1.0.0 and all previous
licenses are revoked.
# PolyForm Strict License 1.0.0
<https://polyformproject.org/licenses/strict/1.0.0>
## Acceptance
In order to get any license under these terms, you must agree
to them as both strict obligations and conditions to all
your licenses.
## Copyright License
The licensor grants you a copyright license for the software
to do everything you might do with the software that would
otherwise infringe the licensor's copyright in it for any
permitted purpose, other than distributing the software or
making changes or new works based on the software.
## Patent License
The licensor grants you a patent license for the software that
covers patent claims the licensor can license, or becomes able
to license, that you would infringe by using the software.
## Noncommercial Purposes
Any noncommercial purpose is a permitted purpose.
## Personal Uses
Personal use for research, experiment, and testing for
the benefit of public knowledge, personal study, private
entertainment, hobby projects, amateur pursuits, or religious
observance, without any anticipated commercial application,
is use for a permitted purpose.
## Noncommercial Organizations
Use by any charitable organization, educational institution,
public research organization, public safety or health
organization, environmental protection organization,
or government institution is use for a permitted purpose
regardless of the source of funding or obligations resulting
from the funding.
## Fair Use
You may have "fair use" rights for the software under the
law. These terms do not limit them.
## No Other Rights
These terms do not allow you to sublicense or transfer any of
your licenses to anyone else, or prevent the licensor from
granting licenses to anyone else. These terms do not imply
any other licenses.
## Patent Defense
If you make any written claim that the software infringes or
contributes to infringement of any patent, your patent license
for the software granted under these terms ends immediately. If
your company makes such a claim, your patent license ends
immediately for work on behalf of your company.
## Violations
The first time you are notified in writing that you have
violated any of these terms, or done anything with the software
not covered by your licenses, your licenses can nonetheless
continue if you come into full compliance with these terms,
and take practical steps to correct past violations, within
32 days of receiving notice. Otherwise, all your licenses
end immediately.
## No Liability
***As far as the law allows, the software comes as is, without
any warranty or condition, and the licensor will not be liable
to you for any damages arising out of these terms or the use
or nature of the software, under any kind of legal claim.***
## Definitions
The **licensor** is the individual or entity offering these
terms, and the **software** is the software the licensor makes
available under these terms.
**You** refers to the individual or entity agreeing to these
terms.
**Your company** is any legal entity, sole proprietorship,
or other kind of organization that you work for, plus all
organizations that have control over, are under the control of,
or are under common control with that organization. **Control**
means ownership of substantially all the assets of an entity,
or the power to direct its management and policies by vote,
contract, or otherwise. Control can be direct or indirect.
**Your licenses** are all the licenses granted to you for the
software under these terms.
**Use** means anything you do with the software requiring one
of your licenses.

7
NOTICE Normal file
View File

@ -0,0 +1,7 @@
Copyright Dosyago Corporation & Cris Stringfellow (https://dosaygo.com)
22120 and all previously released versions, including binaries, NPM packages, and
Docker images (including all named archivist1, and all other previous names)
is re-licensed under the following PolyForm Strict License 1.0.0 and all previous
licenses are revoked.

120
README.md Normal file
View File

@ -0,0 +1,120 @@
# :floppy_disk: [DiskerNet](https://github.com/c9fe/22120) [![source lines of code](https://sloc.xyz/github/crisdosyago/Diskernet)](https://sloc.xyz) [![npm downloads (22120)](https://img.shields.io/npm/dt/archivist1?label=npm%20downloads%20%2822120%29)](https://npmjs.com/package/archivist1) [![npm downloads (diskernet, since Jan 2022)](https://img.shields.io/npm/dt/diskernet?label=npm%20downloads%20%28diskernet%2C%20since%20Jan%202022%29)](https://npmjs.com/package/diskernet) [![binary downloads](https://img.shields.io/github/downloads/c9fe/22120/total?label=OS%20binary%20downloads)](https://GitHub.com/crisdosyago/DiskerNet/releases) [![visitors+++](https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2Fc9fe%2F22120&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=%28today%2Ftotal%29%20visitors%2B%2B%2B%20since%20Oct%2027%202020&edge_flat=false)](https://hits.seeyoufarm.com) ![version](https://img.shields.io/npm/v/archivist1) [![DiskerNet slogan](https://img.shields.io/badge/%F0%9F%92%BE%20DiskerNet-an%20internet%20on%20yer%20disc-hotpink)](#DiskerNet)
<h1>DiskerNet</h1>
<p>Introducing DiskerNet - the tool that connects to your browser and makes it easy to save and organize the things you discover online.</p>
<p>With DiskerNet, you can choose between two modes:</p>
<ul>
<li>Archive everything you browse, automatically saving every page you visit</li>
<li>Archive only what you bookmark, allowing you to selectively save the most important content</li>
</ul>
<p>No matter which mode you choose, DiskerNet makes it easy to search, browse, and share your archived content. Plus, our tool is lightweight and doesn't require any special plugins or extensions.</p>
<h2>Key features</h2>
<ul>
<li>Connects to your browser and automatically archives your browsing activity</li>
<li>Two modes: archive everything or only bookmark-worthy content</li>
<li>Easy-to-use interface for searching, browsing, and sharing your archives</li>
<li>Lightweight and doesn't require any special plugins or extensions</li>
</ul>
<h2>Why choose DiskerNet?</h2>
<ul>
<li>Never lose track of your favorite online content again</li>
<li>Save time by quickly finding the information you need</li>
<li>Share your archives with others, or keep them private</li>
<li>Easy to use and doesn't require any extra tools or plugins</li>
</ul>
<p>To get started with DiskerNet, simply download the tool and connect it to your browser. You'll be amazed at how much easier it is to save and organize the things you discover online.</p>
<h2>Licensing</h2>
<p>DiskerNet is licensed under the Polyform Strict License 1.0.0. You can purchase a license for different uses below:</p>
- For personal, research, noncommercial purposes: Buy a Perpetual Non-commercial Use License of the current Version re-upped Monthly to the Latest Version, [USD$1.99 per month, paid yearly](https://buy.stripe.com/7sIg0acC5amT7PW6pl). Or [purchase an Unlimited Time version for the equivalent of 5 years](https://buy.stripe.com/14k5lw31veD96LS29b).
- For part of your internal tooling in your org: Buy a Perpetual Internal Use License of the current Version re-upped Monthly to the Latest Version, [USD $12.99 per month, paid yearly](https://buy.stripe.com/9AQaFQ59D52z3zGdRS), or [purchase an Unlimited Time version for the equivalent of 5 years](https://buy.stripe.com/fZe3do1Xr1Qn9Y4011).
- For anywhere in your business: Buy a Perpetual Small-medium Business License of the current Version re-upped Monthly to the Latest Version, [USD $99 per month, paid yearly](https://buy.stripe.com/eVa8xIcC5gLhfio6po). Or [purchase an Unlimited Time version for the equivalent of 5 years](https://buy.stripe.com/00g5lwcC566D7PW00Z).
## Get it
[Download a release](https://github.com/crisdosyago/Diskernet/releases)
or ...
**Get it on [npm](https://www.npmjs.com/package/diskernet):**
```sh
$ npm i -g diskernet@latest
```
or...
**Build your own binaries:**
```sh
$ git clone https://github.com/crisdosyago/DiskerNet
$ cd DiskerNet
$ npm i
$ ./scripts/build_setup.sh
$ ./scripts/compile.sh
$ cd bin/
```
### Frequently Asked Questions
**What is the licensing for Diskernet?**
Diskernet is licensed under the Polyform Strict License 1.0.0. This license allows individuals to use the tool for free for personal, noncommercial purposes. It also allows businesses and organizations to purchase a license for use in their internal tooling or anywhere in their business.
**Why did you choose the Polyform license for Diskernet?**
We chose the Polyform license for Diskernet because it offers several benefits. It protects our rights as the creators of the tool, it allows individuals to use the tool for free for personal use, and it allows businesses to purchase a license for use in their operations. We believe that the Polyform license strikes a good balance between the interests of the open source community and the rights of the creators of Diskernet.
**Is the Polyform license open source?**
The Polyform license is not an open source license as defined by the Open Source Initiative (OSI). However, it allows individuals to use the tool for free for personal, noncommercial purposes, and it allows businesses to purchase a license for use in their operations.
**Can I modify or distribute Diskernet under the Polyform license?**
No, the Polyform license does not allow users to modify or distribute Diskernet without the permission of the creators. This is to protect our rights as the creators of the tool and to ensure that our work is not used or distributed without our permission.
**Can I use Diskernet for commercial purposes?**
Yes, you can use Diskernet for commercial purposes if you purchase a license from us. The license allows businesses and organizations to use Diskernet for their own purposes, including in their internal tooling or anywhere in their business.
**What is Diskernet?**
Diskernet is a tool for archiving and organizing online content. It connects to your browser and automatically saves the pages you visit, allowing you to easily search, browse, and share your archived content.
**What are the key features of Diskernet?**
The key features of Diskernet include:
- Connects to your browser and automatically archives your browsing activity
- Two modes: archive everything or only bookmark-worthy content
- Easy-to-use interface for searching, browsing, and sharing your archives
- Lightweight and doesn't require any special plugins or extensions
**Why should I use Diskernet?**
There are several reasons to use Diskernet:
- Never lose track of your favorite online content again
- Save time by quickly finding the information you need
- Share your archives with others, or keep them private
- Easy to use and doesn't require any extra tools or plugins
**How do I get started with Diskernet?**
Getting started with Diskernet is easy! Simply download the tool and connect it to your browser. You'll be amazed at how much easier it is to save and organize the things you discover online.
**Does Diskernet work with all browsers?**
No, right now Diskernet is compatible with Chrome and Chromium only (although theoretically compatible with Edge owing to the Remote Debugging Protocol).

275
docs/OLD-README.md Normal file
View File

@ -0,0 +1,275 @@
# :floppy_disk: [DiskerNet](https://github.com/c9fe/22120) [![source lines of code](https://sloc.xyz/github/crisdosyago/Diskernet)](https://sloc.xyz) [![npm downloads (22120)](https://img.shields.io/npm/dt/archivist1?label=npm%20downloads%20%2822120%29)](https://npmjs.com/package/archivist1) [![npm downloads (diskernet, since Jan 2022)](https://img.shields.io/npm/dt/diskernet?label=npm%20downloads%20%28diskernet%2C%20since%20Jan%202022%29)](https://npmjs.com/package/diskernet) [![binary downloads](https://img.shields.io/github/downloads/c9fe/22120/total?label=OS%20binary%20downloads)](https://GitHub.com/crisdosyago/DiskerNet/releases) [![visitors+++](https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2Fc9fe%2F22120&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=%28today%2Ftotal%29%20visitors%2B%2B%2B%20since%20Oct%2027%202020&edge_flat=false)](https://hits.seeyoufarm.com) ![version](https://img.shields.io/npm/v/archivist1)
:floppy_disk: - an internet on yer Disk
**DiskerNet** (codename *PROJECT 22120*) is an archivist browser controller that caches everything you browse, a library server with full text search to serve your archive.
**Now with full text search over your archive.**
This feature is just released in version 2 so it will improve over time.
## And one more thing...
**Coming to a future release, soon!**: The ability to publish your own search engine that you curated with the best resources based on your expert knowledge and experience.
## Get it
[Download a release](https://github.com/crisdosyago/Diskernet/releases)
or ...
**Get it on [npm](https://www.npmjs.com/package/diskernet):**
```sh
$ npm i -g diskernet@latest
```
or...
**Build your own binaries:**
```sh
$ git clone https://github.com/crisdosyago/DiskerNet
$ cd DiskerNet
$ npm i
$ ./scripts/build_setup.sh
$ ./scripts/compile.sh
$ cd bin/
```
<span id=toc></span>
----------------
- [Overview](#classical_building-22120---)
* [License](#license)
* [About](#about)
* [Get 22120](#get-22120)
* [Using](#using)
+ [Pick save mode or serve mode](#pick-save-mode-or-serve-mode)
+ [Exploring your 22120 archive](#exploring-your-22120-archive)
* [Format](#format)
* [Why not WARC (or another format like MHTML) ?](#why-not-warc-or-another-format-like-mhtml-)
* [How it works](#how-it-works)
* [FAQ](#faq)
+ [Do I need to download something?](#do-i-need-to-download-something)
+ [Can I use this with a browser that's not Chrome-based?](#can-i-use-this-with-a-browser-thats-not-chrome-based)
+ [How does this interact with Ad blockers?](#how-does-this-interact-with-ad-blockers)
+ [How secure is running chrome with remote debugging port open?](#how-secure-is-running-chrome-with-remote-debugging-port-open)
+ [Is this free?](#is-this-free)
+ [What if it can't find my chrome?](#what-if-it-cant-find-my-chrome)
+ [What's the roadmap?](#whats-the-roadmap)
+ [What about streaming content?](#what-about-streaming-content)
+ [Can I black list domains to not archive them?](#can-i-black-list-domains-to-not-archive-them)
+ [Is there a DEBUG mode for troubleshooting?](#is-there-a-debug-mode-for-troubleshooting)
+ [Can I version the archive?](#can-i-version-the-archive)
+ [Can I change the archive path?](#can-i-change-the-archive-path)
+ [Can I change this other thing?](#can-i-change-this-other-thing)
------------------
## License
22120 is licensed under Polyform Strict License 1.0.0 (no modification, no distribution). You can purchase a license for different uses below:
- for personal, research, noncommercial purposes:
[Buy a Perpetual Non-commercial Use License of the current Version re-upped Monthly to the Latest Version, USD$1.99 per month](https://buy.stripe.com/fZeg0a45zdz58U028z) [Read license](https://github.com/DOSYCORPS/polyform-licenses/blob/1.0.0/PolyForm-Noncommercial-1.0.0.md)
- for part of your internal tooling in your org: [Buy a Perpetual Internal Use License of the current Version re-upped Monthly to the Latest Version, USD $12.99 per month](https://buy.stripe.com/00g4hsgSlbqXb288wY) [Read license](https://github.com/DOSYCORPS/polyform-licenses/blob/1.0.0/PolyForm-Internal-Use-1.0.0.md)
- for anywhere in your business: [Buy a Perpetual Small-medium Business License of the current Version re-upped Monthly to the Latest Version, USD $99 per month](https://buy.stripe.com/aEUbJUgSl2UreekdRj) [Read license](https://github.com/DOSYCORPS/polyform-licenses/blob/1.0.0/PolyForm-Small-Business-1.0.0.md)
<p align=right><small><a href=#toc>Top</a></small></p>
## About
**This project literally makes your web browsing available COMPLETELY OFFLINE.** Your browser does not even know the difference. It's literally that amazing. Yes.
Save your browsing, then switch off the net and go to `http://localhost:22120` and switch mode to **serve** then browse what you browsed before. It all still works.
**warning: if you have Chrome open, it will close it automatically when you open 22120, and relaunch it. You may lose any unsaved work.**
<p align=right><small><a href=#toc>Top</a></small></p>
## Get 22120
3 ways to get it:
1. Get binary from the [releases page.](https://github.com/c9fe/22120/releases), or
2. Run with npx: `npx diskernet@latest`, or
- `npm i -g diskernet@latest && exlibris`
3. Clone this repo and run as a Node.JS app: `npm i && npm start`
<p align=right><small><a href=#toc>Top</a></small></p>
## Using
### Pick save mode or serve mode
Go to http://localhost:22120 in your browser,
and follow the instructions.
<p align=right><small><a href=#toc>Top</a></small></p>
### Exploring your 22120 archive
Archive will be located in `22120-arc/public/library`\*
But it's not public, don't worry!
You can also check out the archive index, for a listing of every title in the archive. The index is accessible from the control page, which by default is at [http://localhost:22120](http://localhost:22120) (unless you changed the port).
\**Note:`22120-arc` is the archive root of a single archive, and by defualt it is placed in your home directory. But you can change the parent directory for `22120-arc` to have multiple archvies.*
<p align=right><small><a href=#toc>Top</a></small></p>
## Format
The archive format is:
`22120-arc/public/library/<resource-origin>/<path-hash>.json`
Inside the JSON file, is a JSON object with headers, response code, key and a base 64 encoded response body.
<p align=right><small><a href=#toc>Top</a></small></p>
## Why not WARC (or another format like MHTML) ?
**The case for the 22120 format.**
Other formats (like MHTML and SingleFile) save translations of the resources you archive. They create modifications, such as altering the internal structure of the HTML, changing hyperlinks and URLs into "flat" embedded data URIs, or local references, and require other "hacks* in order to save a "perceptually similar" copy of the archived resource.
22120 throws all that out, and calls rubbish on it. 22120 saves a *verbatim* **high-fidelity** copy of the resources your archive. It does not alter their internal structure in any way. Instead it records each resource in its own metadata file. In that way it is more similar to HAR and WARC, but still radically different. Compared to WARC and HAR, our format is radically simplified, throwing out most of the metadata information and unnecessary fields these formats collect.
**Why?**
At 22120, we believe in the resources and in verbatim copies. We don't annoint ourselves as all knowing enough to modify the resource source of truth before we archive it, just so it can "fit the format* we choose. We don't believe we need to decorate with obtuse and superfluous metadata. We don't believe we should be modifying or altering resources we archive. We belive we should save them exactly as they were presented. We believe in simplicity. We believe the format should fit (or at least accommodate, and be suited to) the resource, not the other way around. We don't believe in conflating **metadata** with **content**; so we separate them. We believe separating metadata and content, and keeping the content pure and altered throughout the archiving process is not only the right thing to do, it simplifies every part of the audit trail, because we know that the modifications between archived copies of a resource of due to changes to the resources themselves, not artefacts of the format or archiving process.
Both SingleFile and MHTML require mutilatious modifications of the resources so that the resources can be "forced to fit" the format. At 22120, we believe this is not required (and in any case should never be performed). We see it as akin to lopping off the arms of a Roman statue in order to fit it into a presentation and security display box. How ridiculous! The web may be a more "pliable" medium but that does not mean we should treat it without respect for its inherent content.
**Why is changing the internal structure of resources so bad?**
In our view, the internal structure of the resource as presented, *is the cannon*. Internal structure is not just substitutable "presentation" - no, in fact it encodes vital semantic information such as hyperlink relationships, source choices, and the "strokes" of the resource author as they create their content, even if it's mediated through a web server or web framework.
**Why else is 22120 the obvious and natural choice?**
22120 also archives resources exactly as they are sent to the browser. It runs connected to a browser, and so is able to access the full-scope of resources (with, currently, the exception of video, audio and websockets, for now) in their highest fidelity, without modification, that the browser receives and is able to archive them in the exact format presented to the user. Many resources undergo presentational and processing changes before they are presented to the user. This is the ubiquitous, "web app", where client-side scripting enabled by JavaScript, creates resources and resource views on the fly. These sorts of "hyper resources" or "realtime" or "client side" resources, prevalent in SPAs, are not able to be archived, at least not utilizing the normal archive flow, within traditional `wget`-based archiving tools.
In short, the web is an *online* medium, and it should be archived and presented in the same fashion. 22120 archives content exactly as it is received and presented by a browser, and it also replays that content exactly as if the resource were being taken from online. Yes, it requires a browser for this exercise, but that browser need not be connected to the internet. It is only natural that viewing a web resource requires the web browser. And because of 22120 the browser doesn't know the difference! Resources presented to the browser form a remote web site, and resources given to the browser by 22120, are seen by the browser as ***exactly the same.*** This ensures that the people viewing the archive are also not let down and are given the change to have the exact same experience as if they were viewing the resource online.
<p align=right><small><a href=#toc>Top</a></small></p>
## How it works
Uses DevTools protocol to intercept all requests, and caches responses against a key made of (METHOD and URL) onto disk. It also maintains an in memory set of keys so it knows what it has on disk.
<p align=right><small><a href=#toc>Top</a></small></p>
## FAQ
### Do I need to download something?
Yes. But....If you like **22120**, you might love the clientless hosted version coming in future. You'll be able to build your archives online from any device, without any download, then download the archive to run on any desktop. You'll need to sign up to use it, but you can jump the queue and sign up [today](https://dosyago.com).
### Can I use this with a browser that's not Chrome-based?
No.
<p align=right><small><a href=#toc>Top</a></small></p>
### How does this interact with Ad blockers?
Interacts just fine. The things ad blockers stop will not be archived.
<p align=right><small><a href=#toc>Top</a></small></p>
### How secure is running chrome with remote debugging port open?
Seems pretty secure. It's not exposed to the public internet, and pages you load that tried to use it cannot use the protocol for anything (except to open a new tab, which they can do anyway). It seems there's a potential risk from malicious browser extensions, but we'd need to confirm that and if that's so, work out blocks. See [this useful security related post](https://github.com/c9fe/22120/issues/67) for some info.
<p align=right><small><a href=#toc>Top</a></small></p>
### Is this free?
Yes this is totally free to download and use for personal non-commercial use. If you want to modify or distribute it, or use it commercially (either internally or for customer functions) you need to purchase a [Noncommercial, internal use, or SMB license](#license).
<p align=right><small><a href=#toc>Top</a></small></p>
### What if it can't find my chrome?
See this useful [issue](https://github.com/c9fe/22120/issues/68).
<p align=right><small><a href=#toc>Top</a></small></p>
### What's the roadmap?
- Full text search ✅
- Library server to serve archive publicly.
- Distributed p2p web browser on IPFS
<p align=right><small><a href=#toc>Top</a></small></p>
### What about streaming content?
The following are probably hard (and I haven't thought much about):
- Streaming content (audio, video)
- "Impure" request response pairs (such as if you call GET /endpoint 1 time you get "A", if you call it a second time you get "AA", and other examples like this).
- WebSockets (how to capture and replay that faithfully?)
Probably some way to do this tho.
<p align=right><small><a href=#toc>Top</a></small></p>
### Can I black list domains to not archive them?
Yes! Put any domains into `22120-arc/no.json`\*, eg:
```json
[
"*.horribleplantations.com",
"*.cactusfernfurniture.com",
"*.gustymeadows.com",
"*.nytimes.com",
"*.cnn.co?"
]
```
Will not cache any resource with a host matching those. Wildcards:
- `*` (0 or more anything) and
- `?` (0 or 1 anything)
\**Note: the `no` file is per-archive. `22120-arc` is the archive root of a single archive, and by defualt it is placed in your home directory. But you can change the parent directory for `22120-arc` to have multiple archvies, and each archive requires its own `no` file, if you want a blacklist in that archive.*
<p align=right><small><a href=#toc>Top</a></small></p>
### Is there a DEBUG mode for troubleshooting?
Yes, just make sure you set an environment variable called `DEBUG_22120` to anything non empty.
So for example in posix systems:
```bash
export DEBUG_22120=True
```
<p align=right><small><a href=#toc>Top</a></small></p>
### Can I version the archive?
Yes! But you need to use `git` for versioning. Just initiate a git repo in your archive repository. And when you want to save a snapshot, make a new git commit.
<p align=right><small><a href=#toc>Top</a></small></p>
### Can I change the archive path?
Yes, there's a control for changing the archive path in the control page: http://localhost:22120
<p align=right><small><a href=#toc>Top</a></small></p>
### Can I change this other thing?
There's a few command line arguments. You'll see the format printed as the first printed line when you start the program.
For other things you can examine the source code.
<p align=right><small><a href=#toc>Top</a></small></p>

17
docs/SECURITY.md Normal file
View File

@ -0,0 +1,17 @@
# Security Policy
## Supported Versions
Use this section to tell people about which versions of your project are
currently being supported with security updates.
| Version | Supported |
| ------- | ------------------ |
| Latest | :white_check_mark: |
## Reporting a Vulnerability
To report a vulnerability, contact: cris@dosycorp.com
To view previous responsible disclosure vulnerability reports, mediation write ups, notes and other information, please visit the [Dosyago Responsible Dislcousre Center](https://github.com/dosyago/vulnerability-reports)

4
docs/features.md Normal file
View File

@ -0,0 +1,4 @@
Cool Possible Feature Ideas
- might be nice to have historical documents indexed as well. For example. Every time we reload a page, we could add a new copy to the index, if it's different...or we could add a new copy if it's been more than X time since the last time we added it. So 1 day , or 1 week. Then we show all results in search (maybe in an expander under the main URL, like "historical URL". So you can find a result that was on front page of HN 1 year ago or 3 weeks ago, even if you revisit and reindex HN every day.

12
docs/issues Normal file
View File

@ -0,0 +1,12 @@
- ndx index seems to lose documents.
- e.g.
1. visit goog:hell
2. visit top link: wiki - hell
3. visit hellomagainze.com
4. search hell
5. see results: goog/hell, wiki/hell, hellomag
6. reload wiki - hell
7. search hell
8. see results: wiki/hell, hellomag
- WHERE THE HELL DID goog/hell go?

25
docs/todo Normal file
View File

@ -0,0 +1,25 @@
- complete snippet generation
- sometimes we are not getting any segments. In that case we should just show the first part of the file.
- improve trigram segmenter: lower max segment length, increase fore and aft context
- Index.json is randomly getting clobbered sometimes. Investigate and fix. Important because this breaks the whole archive.
- No idea what's causing this after an small investigation. But I've added a log on saveIndex to see when it writes.
- publish button
- way to selectively add (bookmark mode)
- way to remove (all modes) items from index
- save trigram index to disk
- let's not reindex unless we have changed contentSignature
- let's not write FTS indexes unless we have changed them since last time (UpdatedKeys)
- result paging
- We need to not open other localhosts if we already have one open
- We need to reload on localhost 22120 if we open with that
- throttle how often this can occur per URL
- search improvements
- use different min score options for different sources (noticed URL not match meghan highlight for hello mag even tho query got megan and did match and highlight queen in url)
- get snippets earlier (before rendering in lib server) and use to add to signal
- if we have multiple query terms (multiple determined by some form of tokenization) then try to show all terms present in the snippet. even tho one term may be higher scoring. Should we do multiple passes of ukkonen distance one for whole query and one for each term? This will be easier / faster with trigrams I guess. Basically we want snippet to be a relevant summary that provides signal.
- Another way to improve snippet highlight is to 'revert back' the highlighted text, and calculate their match/ukkonen on the query term. So e.g. if we get q:'israle beverly', hl:['beverly', 'beverly'], it's good overlap, but if we get hl:['is it really'] even tho that might score ok for israle, it's not a good match. so can we 'score that back' if we go match('is it really', 'israel') and see it is low, so we exclude it?
- try an exact match on the query term if possible for highlight. first one.
- we could also add signal from the highlighting to just in time alter the order (e.g. 'hell wiki' search brings google search to top rank, but the Hell wikipedia page has more highlight visible)
- Create instant search (or at least instant queries (so search over previous queries -- not results necessarily))
- an error in Full text search can corrupt the index and make it unrecoverable...we need to guard against this
- this is still happening. sometimes the index is not saved, even on a normal error free restart. unknown why.

2760
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

69
package.json Normal file
View File

@ -0,0 +1,69 @@
{
"name": "diskernet",
"version": "2.7.1",
"type": "module",
"description": "Library server and an archivist browser controller.",
"main": "src/app.js",
"module": "dist/diskernet.mjs",
"bin": {
"diskernet": "build/diskernet.cjs"
},
"scripts": {
"start": "node src/app.js",
"setup": "bash ./scripts/build_setup.sh",
"build": "echo Ensure you 'npm run setup' first && bash ./scripts/compile.sh",
"compile": "npm run build",
"build-only": "bash ./scripts/build_only.sh",
"clean": "rm -rf build/* bin/*",
"super-clean": "npm run clean || : && rm -rf node_modules || : && rm package-lock.json",
"test": "nodemon src/app.js",
"inspect": "node --inspect-brk=127.0.0.1:9999 src/app.js",
"save": "nodemon src/app.js DiskerNet save",
"serve": "nodemon src/app.js DiskerNet serve",
"lint": "watch -n 5 npx eslint .",
"test-hl": "node src/highlighter.js",
"prepublishOnly": "npm run build-only"
},
"repository": {
"type": "git",
"url": "git+https://github.com/dosyago/DiskerNet.git"
},
"pkg": {
"scripts": "build/**/*.js",
"assets": [
"public/**/*",
"build/vendor/**/*"
],
"outputPath": "bin"
},
"keywords": [
"archivist",
"library"
],
"author": "@dosy",
"license": "PolyForm Strict 1.0",
"bugs": {
"url": "https://github.com/dosyago/DiskerNet/issues"
},
"homepage": "https://github.com/dosyago/DiskerNet#readme",
"dependencies": {
"@667/ps-list": "^1.1.3",
"chrome-launcher": "latest",
"express": "latest",
"flexsearch": "^0.7.21",
"fz-search": "^1.0.0",
"hasha": "latest",
"natural": "^5.1.11",
"ndx": "^1.0.2",
"ndx-query": "^1.0.1",
"ndx-serializable": "^1.0.0",
"node-fetch": "latest",
"ukkonen": "^1.4.0",
"ws": "latest"
},
"devDependencies": {
"esbuild": "0.16.17",
"eslint": "^8.4.1",
"nodemon": "latest"
}
}

View File

@ -0,0 +1,133 @@
#!/usr/bin/env node
import fs from 'node:fs';
import path from 'node:path';
import child_process from 'node:child_process';
import {
loadPref,
cache_file,
index_file,
} from '../src/args.js';
const CLEAN = true;
const CONCURRENT = 7;
const sleep = ms => new Promise(res => setTimeout(res, ms));
const problems = new Map();
let cleaning = false;
let made = false;
process.on('exit', cleanup);
process.on('SIGINT', cleanup);
process.on('SIGTERM', cleanup);
process.on('SIGHUP', cleanup);
process.on('SIGUSR2', cleanup);
process.on('beforeExit', cleanup);
console.log({Pref:loadPref(), cache_file: cache_file(), index_file: index_file()});
make();
async function make() {
const indexFile = fs.readFileSync(index_file()).toString();
JSON.parse(indexFile).map(([key, value]) => {
if ( typeof key === "number" ) return;
if ( key.startsWith('ndx') ) return;
if ( value.title === undefined ) {
console.log('no title property', {key, value});
}
const url = key;
const title = value.title.toLocaleLowerCase();
if ( title.length === 0 || title.includes('404') || title.includes('not found') ) {
if ( problems.has(url) ) {
console.log('Found duplicate', url, title, problems.get(url));
}
const prob = {title, dupes:[], dupe:false};
problems.set(url, prob);
const cleaned1 = clean(url);
if ( problems.has(cleaned1) ) {
console.log(`Found duplicate`, {url, title, cleaned1, dupeEntry:problems.get(cleaned1)});
prob.dupe = true;
prob.dupes.push(cleaned1);
url !== cleaned1 && (problems.delete(cleaned1), prob.diff = true);
}
const cleaned2 = clean2(url);
if ( problems.has(cleaned2) ) {
console.log(`Found duplicate`, {url, title, cleaned2, dupeEntry: problems.get(cleaned2)});
prob.dupe = true;
prob.dupes.push(cleaned2);
url !== cleaned2 && (problems.delete(cleaned2), prob.diff = true);
}
}
});
made = true;
cleanup();
}
function cleanup() {
if ( cleaning ) return;
if ( ! made ) return;
cleaning = true;
console.log('cleanup running');
const outData = [...problems.entries()].filter(([key, {dupe}]) => dupe);
outData.sort(([a], [b]) => a.localeCompare(b));
fs.writeFileSync(
path.resolve('.', 'url-cleaned-dupes.json'),
JSON.stringify(outData, null, 2)
);
const {size:bytesWritten} = fs.statSync(
path.resolve('.', 'url-cleaned-dupes.json'),
{bigint: true}
);
console.log(`Wrote ${outData.length} dupe urls in ${bytesWritten} bytes.`);
process.exit(0);
}
function clean(urlString) {
const url = new URL(urlString);
if ( url.hash.startsWith('#!') || url.hostname.includes('google.com') || url.hostname.includes('80s.nyc') ) {
} else {
url.hash = '';
}
for ( const [key, value] of url.searchParams ) {
if ( key.startsWith('utm_') ) {
url.searchParams.delete(key);
}
}
url.pathname = url.pathname.replace(/\/$/, '');
url.protocol = 'https:';
url.pathname = url.pathname.replace(/(\.htm.?|\.php|\.asp.?)$/, '');
if ( url.hostname.startsWith('www.') ) {
url.hostname = url.hostname.replace(/^www./, '');
}
const key = url.toString();
return key;
}
function clean2(urlString) {
const url = new URL(urlString);
url.pathname = '';
return url.toString();
}
function curlCommand(url) {
return `curl -k -L -s -o /dev/null -w '%{url_effective}' ${JSON.stringify(url)} \
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
-H 'Accept-Language: en,en-US;q=0.9,zh-TW;q=0.8,zh-CN;q=0.7,zh;q=0.6,ja;q=0.5' \
-H 'Cache-Control: no-cache' \
-H 'Connection: keep-alive' \
-H 'DNT: 1' \
-H 'Pragma: no-cache' \
-H 'Sec-Fetch-Dest: document' \
-H 'Sec-Fetch-Mode: navigate' \
-H 'Sec-Fetch-Site: none' \
-H 'Sec-Fetch-User: ?1' \
-H 'Upgrade-Insecure-Requests: 1' \
-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36' \
-H 'sec-ch-ua: "Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"' \
-H 'sec-ch-ua-mobile: ?0' \
-H 'sec-ch-ua-platform: "macOS"' \
--compressed ;
`;
}

92
public/find_crawlable.mjs Executable file
View File

@ -0,0 +1,92 @@
#!/usr/bin/env node
import fs from 'node:fs';
import path from 'node:path';
import child_process from 'node:child_process';
const CLEAN = false;
const CONCURRENT = 7;
const sleep = ms => new Promise(res => setTimeout(res, ms));
const entries = [];
let cleaning = false;
process.on('exit', cleanup);
process.on('SIGINT', cleanup);
process.on('SIGTERM', cleanup);
process.on('SIGHUP', cleanup);
process.on('SIGUSR2', cleanup);
process.on('beforeExit', cleanup);
make();
async function make() {
const titlesFile = fs.readFileSync(path.resolve('.', 'topTitles.json')).toString();
const titles = new Map(JSON.parse(titlesFile).map(([url, title]) => [url, {url,title}]));
titles.forEach(({url,title}) => {
if ( title.length === 0 && url.startsWith('https:') && !url.endsWith('.pdf') ) {
entries.push(url);
}
});
cleanup();
}
function cleanup() {
if ( cleaning ) return;
cleaning = true;
console.log('cleanup running');
fs.writeFileSync(
path.resolve('.', 'recrawl-https-3.json'),
JSON.stringify(entries, null, 2)
);
console.log(`Wrote recrawlable urls`);
process.exit(0);
}
function clean(urlString) {
const url = new URL(urlString);
if ( url.hash.startsWith('#!') || url.hostname.includes('google.com') || url.hostname.includes('80s.nyc') ) {
} else {
url.hash = '';
}
for ( const [key, value] of url.searchParams ) {
if ( key.startsWith('utm_') ) {
url.searchParams.delete(key);
}
}
url.pathname = url.pathname.replace(/\/$/, '');
url.protocol = 'https:';
url.pathname = url.pathname.replace(/(\.htm.?|\.php)$/, '');
if ( url.hostname.startsWith('www.') ) {
url.hostname = url.hostname.replace(/^www./, '');
}
const key = url.toString();
return key;
}
function clean2(urlString) {
const url = new URL(urlString);
url.pathname = '';
return url.toString();
}
function curlCommand(url) {
return `curl -k -L -s -o /dev/null -w '%{url_effective}' ${JSON.stringify(url)} \
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
-H 'Accept-Language: en,en-US;q=0.9,zh-TW;q=0.8,zh-CN;q=0.7,zh;q=0.6,ja;q=0.5' \
-H 'Cache-Control: no-cache' \
-H 'Connection: keep-alive' \
-H 'DNT: 1' \
-H 'Pragma: no-cache' \
-H 'Sec-Fetch-Dest: document' \
-H 'Sec-Fetch-Mode: navigate' \
-H 'Sec-Fetch-Site: none' \
-H 'Sec-Fetch-User: ?1' \
-H 'Upgrade-Insecure-Requests: 1' \
-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36' \
-H 'sec-ch-ua: "Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"' \
-H 'sec-ch-ua-mobile: ?0' \
-H 'sec-ch-ua-platform: "macOS"' \
--compressed ;
`;
}

243
public/index.html Normal file
View File

@ -0,0 +1,243 @@
<!DOCTYPE html>
<meta charset=utf-8>
<title>Your HTML Library</title>
<link rel=stylesheet href=/style.css>
<header>
<h1><a href=/>22120</a> &mdash; Internet Offline Library</h1>
</header>
<p>
View <a href=/archive_index.html>the index</a>
</p>
<!--
<form method=POST action=/crawl>
<fieldset>
<legend>Crawl and Index</legend>
<p>
Crawl and index a list of links.
<br>
<small>This will open 1 link at a time, and index it when it has loaded.</small>
<p>
<label>
Links
<br>
<textarea class=long name=links>
https://cnn.com
https://bloomberg.com
https://microsoft.com
https://dosyago.com
https://intel.com
</textarea>
<br>
<small>List format is 1 link per line.</small>
</label>
</p>
<details open>
<summary>Advanced settings</summary>
<p>
<label>
Timeout
<br>
<input required name=timeout
type=number min=1 max=300 value=3.6 step=0.1> <span class=units>seconds</span>
<br>
<small>Seconds to wait for each page to load before indexing.</small>
</label>
<p>
<label>
Depth
<br>
<input required name=depth
type=number min=1 max=20 value=1 step=1> <span class=units>clicks</span>
</label>
<br>
<section class=small>
<strong>Value guide</strong>
<ol>
<li>Only each link.
<li>Plus anything 1 click from the link.
<li>Plus anything 2 clicks from the link.
</ol>
<em>And so on&hellip;</em>
</section>
<p>
<label>
Min Page Crawl Time
<br>
<input name=minPageCrawlTime
type=number min=1 max=60 value=20> <span class=units>seconds</span>
<br>
<small>Seconds to wait for each page to load before indexing.</small>
</label>
<p>
<p>
<label>
Max Page Crawl Time
<br>
<input name=maxPageCrawlTime
type=number min=3 max=120 value=30> <span class=units>seconds</span>
<br>
<small>Max time to allow for each page.</small>
</label>
<p>
<p>
<label>
Batch size
<br>
<input name=batchSize
type=number min=1 max=32 value=2> <span class=units>tabs</span>
<br>
<small>Number of concurrent tabs.</small>
</label>
<p>
<p>
<label>
<input name=saveToFile
type=checkbox checked>
Save the harvested URLs to a file
</label>
<p>
<p>
<label>
<span class=text>Program to run on every page</span>
<br>
<textarea class=long rows=9 name=program>
if ( ! State.titles ) {
State.titles = new Map();
State.onExit.addHandler(() => {
fs.writeFileSync(
path.resolve('.', `titles-${(new Date).toISOString()}.txt`),
JSON.stringify([...State.titles.entries()], null, 2) + '\n'
);
});
}
const {result:{value:data}} = await send("Runtime.evaluate",
{
expression: `(function () {
return {
url: document.location.href,
title: document.title,
};
}())`,
returnByValue: true
},
sessionId
);
State.titles.set(data.url, data.title);
console.log(`Saved ${State.titles.size} titles`);
</textarea>
</label>
</p>
</details>
<p>
<button>Crawl</button>
<script>
{
const button = document.currentScript.previousElementSibling;
let disabled = false;
button.addEventListener('click', click => {
if ( disabled ) return click.preventDefault();
disabled = true;
setTimeout(() => button.disabled = true, 0);
});
}
</script>
</fieldset>
</form>
-->
<form method=GET action=/search>
<fieldset class=search>
<legend>Search your archive</legend>
<input class=search type=search name=query placeholder="search your library">
<button>Search</button>
</fieldset>
</form>
<form method=POST action=/mode>
<fieldset>
<legend>Save or Serve: Mode Control</legend>
<p>
Control whether pages you browse are <label class=cmd for=save>saved to</label>, or
<label class=cmd for=serve>served from</label> your archive
<br>
<small><em class=caps>Pro-Tip:</em> Serve pages when you're offline, and it will still feel like you're online</small>
<p>
<label>
<input type=radio name=mode value=save id=save>
Save
</label>
<label>
<input type=radio name=mode value=serve id=serve>
Serve
</label>
<label>
<input type=radio name=mode value=select id=select>
Select (<em>Bookmark mode</em>)
</label>
<output name=notification>
<p>
<button>Change mode</button>
<script>
{
const form = document.currentScript.closest('form');
form.notification.value = "Getting current mode...";
setTimeout(showCurrentMode, 300);
async function showCurrentMode() {
const mode = await fetch('/mode').then(r => r.text());
console.log({mode});
if ( ! mode ) {
setTimeout(showCurrentMode, 300);
return;
}
form.notification.value = "";
form.querySelector(`[name="mode"][value="${mode}"]`).checked = true;
}
}
</script>
</fieldset>
</form>
<form method=POST action=/base_path>
<fieldset>
<legend id=new_base_path>File system path of archive</legend>
<p>
Set the path to where your archive folder will go
<br>
<small>The default is your home directory</small>
<p>
<label>
Base path
<input class=long type=text name=base_path placeholder="A folder path...">
</label>
<p>
<button>Change base path</button>
<script>
{
const form = document.currentScript.closest('form');
showCurrentLibraryPath();
form.base_path.onchange = e => {
self.target = e.target;
}
async function showCurrentLibraryPath() {
const base_path = await fetch('/base_path').then(r => r.text());
form.querySelector(`[name="base_path"]`).value = base_path;
}
}
</script>
</fieldset>
</form>
<form disabled method=POST action=/publish>
<fieldset>
<legend>Publish your archive</legend>
<p>
Publish a search engine from your archive
<br>
<small>This will generate a server.zip file that you can unzip and run</small>
<p>
<button disabled>Publish</button>
</fieldset>
</form>
<footer>
<cite>
<a rel=author href=https://github.com/crisdosyago/22120>22120 GitHub</a>
</cite>
</footer>

195
public/injection.js Normal file
View File

@ -0,0 +1,195 @@
import {DEBUG as debug} from '../src/common.js';
const DEBUG = debug || false;
export function getInjection({sessionId}) {
// Notes:
// say() function
// why aliased? Resistant to page overwriting
// just a precaution as we are already in an isolated world here, but this makes
// this script more portable if it were introduced globally as well as robust
// against API or behaviour changes of the browser or its remote debugging protocol
// in future
return `
{
const X = 1;
const DEBUG = ${JSON.stringify(DEBUG, null, 2)};
const MIN_CHECK_TEXT = 3000; // min time between checking documentElement.innerText
const MIN_NOTIFY = 5000; // min time between telling controller text maybe changed
const MAX_NOTIFICATIONS = 13; // max times we will tell controller text maybe changed
const OBSERVER_OPTS = {
subtree: true,
childList: true,
characterData: true
};
const Top = globalThis.top;
let lastInnerText;
if ( Top === globalThis ) {
const ConsoleInfo = console.info.bind(console);
const JSONStringify = JSON.stringify.bind(JSON);
const TITLE_CHANGES = 10;
const INITIAL_CHECK_TIME = 500;
const TIME_MULTIPLIER = Math.E;
const sessionId = "${sessionId}";
const sleep = ms => new Promise(res => setTimeout(res, ms));
const handler = throttle(handleFrameMessage, MIN_NOTIFY);
let count = 0;
installTop();
async function installTop() {
console.log("Installing in top frame...");
self.startUrl = location.href;
say({install: { sessionId, startUrl }});
await sleep(500);
beginTitleChecks();
beginTextNotifications();
console.log("Installed.");
}
function beginTitleChecks() {
let lastTitle = null;
let checker;
let timeToNextCheck = INITIAL_CHECK_TIME;
let changesLogged = 0;
check();
console.log('Begun logging title changes.');
function check() {
clearTimeout(checker);
const currentTitle = document.title;
if ( lastTitle !== currentTitle ) {
say({titleChange: {lastTitle, currentTitle, url: location.href, sessionId}});
lastTitle = currentTitle;
changesLogged++;
} else {
// increase check time if there's no change
timeToNextCheck *= TIME_MULTIPLIER;
}
if ( changesLogged < TITLE_CHANGES ) {
checker = setTimeout(check, timeToNextCheck);
} else {
console.log('Finished logging title changes.');
}
}
}
function say(thing) {
ConsoleInfo(JSONStringify(thing));
}
function beginTextNotifications() {
// listen for {textChange:true} messages
// throttle them
// on leading throttle edge send message to controller with
// console.info(JSON.stringify({textChange:...}));
self.addEventListener('message', messageParser);
console.log('Begun notifying of text changes.');
function messageParser({data, origin}) {
let source;
try {
({source} = data.frameTextChangeNotification);
if ( count > MAX_NOTIFICATIONS ) {
self.removeEventListener('message', messageParser);
return;
}
count++;
handler({textChange:{source}});
} catch(e) {
DEBUG.verboseSlow && console.warn('could not parse message', data, e);
}
}
}
function handleFrameMessage({textChange}) {
const {source} = textChange;
console.log('Telling controller that text changed');
say({textChange:{source, sessionId, count}});
}
}
beginTextMutationChecks();
function beginTextMutationChecks() {
// create mutation observer for text
// throttle output
const observer = new MutationObserver(throttle(check, MIN_CHECK_TEXT));
observer.observe(document.documentElement || document, OBSERVER_OPTS);
console.log('Begun observing text changes.');
function check() {
console.log('check');
const textMutated = document.documentElement.innerText !== lastInnerText;
if ( textMutated ) {
DEBUG.verboseSlow && console.log('Text changed');
lastInnerText = document.documentElement.innerText;
Top.postMessage({frameTextChangeNotification:{source:location.href}}, '*');
}
}
}
// javascript throttle function
// source: https://stackoverflow.com/a/59378445
/*
function throttle(func, timeFrame) {
var lastTime = 0;
return function (...args) {
var now = new Date();
if (now - lastTime >= timeFrame) {
func.apply(this, args);
lastTime = now;
}
};
}
*/
// alternate throttle function with trailing edge call
// source: https://stackoverflow.com/a/27078401
///*
// Notes
// Returns a function, that, when invoked, will only be triggered at most once
// during a given window of time. Normally, the throttled function will run
// as much as it can, without ever going more than once per \`wait\` duration;
// but if you'd like to disable the execution on the leading edge, pass
// \`{leading: false}\`. To disable execution on the trailing edge, ditto.
function throttle(func, wait, options) {
var context, args, result;
var timeout = null;
var previous = 0;
if (!options) options = {};
var later = function() {
previous = options.leading === false ? 0 : Date.now();
timeout = null;
result = func.apply(context, args);
if (!timeout) context = args = null;
};
return function() {
var now = Date.now();
if (!previous && options.leading === false) previous = now;
var remaining = wait - (now - previous);
context = this;
args = arguments;
if (remaining <= 0 || remaining > wait) {
if (timeout) {
clearTimeout(timeout);
timeout = null;
}
previous = now;
result = func.apply(context, args);
if (!timeout) context = args = null;
} else if (!timeout && options.trailing !== false) {
timeout = setTimeout(later, remaining);
}
return result;
};
}
//*/
}
`;
}

10
public/library/README.md Normal file
View File

@ -0,0 +1,10 @@
# ALT Default storage directory for library
Remove `public/library/http*` and `public/library/cache.json` from `.gitignore` if you forked this repo and want to commit your library using git.
## Clearing your cache
To clear everything, delete all directories that start with `http` or `https` and delete cache.json
To clear only stuff from domains you don't want, delete all directories you don't want that start with `http` or `https` and DON'T delete cache.json

250
public/make_top.mjs Executable file
View File

@ -0,0 +1,250 @@
#!/usr/bin/env node
import fs from 'node:fs';
import path from 'node:path';
import child_process from 'node:child_process';
const CLEAN = false;
const CONCURRENT = 7;
const sleep = ms => new Promise(res => setTimeout(res, ms));
const entries = [];
const counted = new Set();
const errors = new Map();
let counts;
let cleaning = false;
process.on('exit', cleanup);
process.on('SIGINT', cleanup);
process.on('SIGTERM', cleanup);
process.on('SIGHUP', cleanup);
process.on('SIGUSR2', cleanup);
process.on('beforeExit', cleanup);
make();
async function make() {
const titlesFile = fs.readFileSync(path.resolve('.', 'topTitles.json')).toString();
const titles = new Map(JSON.parse(titlesFile).map(([url, title]) => [url, {url,title}]));
if ( CLEAN ) {
for ( const [url, obj] of titles ) {
const k1 = clean(url);
const k2 = clean2(url);
if ( !titles.has(k1) ) {
titles.set(k1, obj);
}
if ( !titles.has(k2) ) {
titles.set(k2, obj);
}
}
}
const remainingFile = fs.readFileSync(path.resolve('.', 'remainingFile.json')).toString();
const remainingSet = new Set(JSON.parse(remainingFile));
const countsFile = fs.readFileSync(path.resolve('.', 'ran-counts.json')).toString();
counts = new Map(JSON.parse(countsFile).filter(([url, count]) => remainingSet.has(url)));
let current = 0;
for ( const [url, count] of counts ) {
let title;
let realUrl;
if ( titles.has(url) ) {
({title} = titles.get(url));
entries.push({
url,
title,
count,
});
counted.add(url);
} else {
console.log(`Curl call for ${url} in progress...`);
let notifyCurlComplete;
const curlCall = new Promise(res => notifyCurlComplete = res);
do {
await sleep(1000);
} while ( current >= CONCURRENT );
child_process.exec(curlCommand(url), (err, stdout, stderr) => {
if ( ! err && (!stderr || stderr.length == 0)) {
realUrl = stdout;
if ( titles.has(realUrl) ) {
({title} = titles.get(realUrl));
entries.push({
url,
realUrl,
title,
count,
});
counted.add(url);
}
} else {
console.log(`Error on curl for ${url}`, {err, stderr});
errors.set(url, {err, stderr});
}
console.log(`Curl call for ${url} complete!`);
notifyCurlComplete();
});
current += 1;
curlCall.then(() => current -= 1);
}
}
cleanup();
}
async function make_v2() {
const titlesFile = fs.readFileSync(path.resolve('.', 'topTitles.json')).toString();
const titles = new Map(JSON.parse(titlesFile).map(([url, title]) => [url, {url,title}]));
if ( CLEAN ) {
for ( const [url, obj] of titles ) {
const k1 = clean(url);
const k2 = clean2(url);
if ( !titles.has(k1) ) {
titles.set(k1, obj);
}
if ( !titles.has(k2) ) {
titles.set(k2, obj);
}
}
}
const countsFile = fs.readFileSync(path.resolve('.', 'ran-counts.json')).toString();
counts = new Map(JSON.parse(countsFile));
let current = 0;
for ( const [url, count] of counts ) {
let title;
let realUrl;
if ( titles.has(url) ) {
({title} = titles.get(url));
entries.push({
url,
title,
count,
});
counted.add(url);
} else {
console.log(`Curl call for ${url} in progress...`);
let notifyCurlComplete;
const curlCall = new Promise(res => notifyCurlComplete = res);
do {
await sleep(250);
} while ( current >= CONCURRENT );
child_process.exec(curlCommand(url), (err, stdout, stderr) => {
if ( ! err && (!stderr || stderr.length == 0)) {
realUrl = stdout;
if ( titles.has(realUrl) ) {
({title} = titles.get(realUrl));
entries.push({
url,
realUrl,
title,
count,
});
counted.add(url);
}
} else {
console.log(`Error on curl for ${url}`, {err, stderr});
errors.set(url, {err, stderr});
}
console.log(`Curl call for ${url} complete!`);
notifyCurlComplete();
});
current += 1;
curlCall.then(() => current -= 1);
}
}
cleanup();
}
function cleanup() {
if ( cleaning ) return;
cleaning = true;
console.log('cleanup running');
if ( errors.size ) {
fs.writeFileSync(
path.resolve('.', 'errorLinks4.json'),
JSON.stringify([...errors.keys()], null, 2)
);
console.log(`Wrote errors`);
}
if ( counted.size !== counts.size ) {
counted.forEach(url => counts.delete(url));
fs.writeFileSync(
path.resolve('.', 'noTitleFound4.json'),
JSON.stringify([...counts.keys()], null, 2)
)
console.log(`Wrote noTitleFound`);
}
fs.writeFileSync(
path.resolve('.', 'topFrontPageLinksWithCounts4.json'),
JSON.stringify(entries, null, 2)
);
console.log(`Wrote top links with counts`);
process.exit(0);
}
async function make_v1() {
const titlesFile = fs.readFileSync(path.resolve('.', 'topTitles.json')).toString();
const titles = new Map(JSON.parse(titlesFile).map(([url, title]) => [clean(url), {url,title}]));
const countsFile = fs.readFileSync(path.resolve('.', 'counts.json')).toString();
const counts = new Map(JSON.parse(countsFile).map(([url, count]) => [clean(url), count]));
for ( const [key, count] of counts ) {
counts.set(clean2(key), count);
}
const entries = [];
for ( const [key, {url,title}] of titles ) {
entries.push({
url, title,
count: counts.get(key) ||
counts.get(url) ||
counts.get(clean2(key)) ||
console.log(`No count found for`, {key, url, title, c2key: clean2(key)})
});
}
fs.writeFileSync(
path.resolve('.', 'topFrontPageLinks.json'),
JSON.stringify(entries, null, 2)
);
}
function clean(urlString) {
const url = new URL(urlString);
if ( url.hash.startsWith('#!') || url.hostname.includes('google.com') || url.hostname.includes('80s.nyc') ) {
} else {
url.hash = '';
}
for ( const [key, value] of url.searchParams ) {
if ( key.startsWith('utm_') ) {
url.searchParams.delete(key);
}
}
url.pathname = url.pathname.replace(/\/$/, '');
url.protocol = 'https:';
url.pathname = url.pathname.replace(/(\.htm.?|\.php)$/, '');
if ( url.hostname.startsWith('www.') ) {
url.hostname = url.hostname.replace(/^www./, '');
}
const key = url.toString();
return key;
}
function clean2(urlString) {
const url = new URL(urlString);
url.pathname = '';
return url.toString();
}
function curlCommand(url) {
return `curl -k -L -s -o /dev/null -w '%{url_effective}' ${JSON.stringify(url)} \
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
-H 'Accept-Language: en,en-US;q=0.9,zh-TW;q=0.8,zh-CN;q=0.7,zh;q=0.6,ja;q=0.5' \
-H 'Cache-Control: no-cache' \
-H 'Connection: keep-alive' \
-H 'DNT: 1' \
-H 'Pragma: no-cache' \
-H 'Sec-Fetch-Dest: document' \
-H 'Sec-Fetch-Mode: navigate' \
-H 'Sec-Fetch-Site: none' \
-H 'Sec-Fetch-User: ?1' \
-H 'Upgrade-Insecure-Requests: 1' \
-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36' \
-H 'sec-ch-ua: "Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"' \
-H 'sec-ch-ua-mobile: ?0' \
-H 'sec-ch-ua-platform: "macOS"' \
--compressed ;
`;
}

1440
public/none Normal file

File diff suppressed because it is too large Load Diff

117
public/problem_find.mjs Executable file
View File

@ -0,0 +1,117 @@
#!/usr/bin/env node
import fs from 'node:fs';
import path from 'node:path';
import child_process from 'node:child_process';
import {
loadPref,
cache_file,
index_file,
} from '../src/args.js';
const CLEAN = false;
const CONCURRENT = 7;
const sleep = ms => new Promise(res => setTimeout(res, ms));
const problems = new Map();
let cleaning = false;
let made = false;
process.on('exit', cleanup);
process.on('SIGINT', cleanup);
process.on('SIGTERM', cleanup);
process.on('SIGHUP', cleanup);
process.on('SIGUSR2', cleanup);
process.on('beforeExit', cleanup);
console.log({Pref:loadPref(), cache_file: cache_file(), index_file: index_file()});
make();
async function make() {
const indexFile = fs.readFileSync(index_file()).toString();
JSON.parse(indexFile).map(([key, value]) => {
if ( typeof key === "number" ) return;
if ( key.startsWith('ndx') ) return;
if ( value.title === undefined ) {
console.log('no title property', {key, value});
}
const url = key;
const title = value.title.toLocaleLowerCase();
if ( title.length === 0 || title.includes('404') || title.includes('not found') ) {
if ( problems.has(url) ) {
console.log('Found duplicate', url, title, problems.get(url));
}
problems.set(url, title);
}
});
made = true;
cleanup();
}
function cleanup() {
if ( cleaning ) return;
if ( ! made ) return;
cleaning = true;
console.log('cleanup running');
const outData = [...problems.entries()];
fs.writeFileSync(
path.resolve('.', 'url-problems.json'),
JSON.stringify(outData, null, 2)
);
const {size:bytesWritten} = fs.statSync(
path.resolve('.', 'url-problems.json'),
{bigint: true}
);
console.log(`Wrote ${outData.length} problem urls in ${bytesWritten} bytes.`);
process.exit(0);
}
function clean(urlString) {
const url = new URL(urlString);
if ( url.hash.startsWith('#!') || url.hostname.includes('google.com') || url.hostname.includes('80s.nyc') ) {
} else {
url.hash = '';
}
for ( const [key, value] of url.searchParams ) {
if ( key.startsWith('utm_') ) {
url.searchParams.delete(key);
}
}
url.pathname = url.pathname.replace(/\/$/, '');
url.protocol = 'https:';
url.pathname = url.pathname.replace(/(\.htm.?|\.php)$/, '');
if ( url.hostname.startsWith('www.') ) {
url.hostname = url.hostname.replace(/^www./, '');
}
const key = url.toString();
return key;
}
function clean2(urlString) {
const url = new URL(urlString);
url.pathname = '';
return url.toString();
}
function curlCommand(url) {
return `curl -k -L -s -o /dev/null -w '%{url_effective}' ${JSON.stringify(url)} \
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
-H 'Accept-Language: en,en-US;q=0.9,zh-TW;q=0.8,zh-CN;q=0.7,zh;q=0.6,ja;q=0.5' \
-H 'Cache-Control: no-cache' \
-H 'Connection: keep-alive' \
-H 'DNT: 1' \
-H 'Pragma: no-cache' \
-H 'Sec-Fetch-Dest: document' \
-H 'Sec-Fetch-Mode: navigate' \
-H 'Sec-Fetch-Site: none' \
-H 'Sec-Fetch-User: ?1' \
-H 'Upgrade-Insecure-Requests: 1' \
-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36' \
-H 'sec-ch-ua: "Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"' \
-H 'sec-ch-ua-mobile: ?0' \
-H 'sec-ch-ua-platform: "macOS"' \
--compressed ;
`;
}

21
public/redirector.html Normal file
View File

@ -0,0 +1,21 @@
<!DOCTYPE html>
<meta name="referrer" content="no-referrer" />
<h1>About to index archive and index <code id=url-text></code></h1>
<script type=module>
const url = new URLSearchParams(location.search).get('url');
const text = document.querySelector('#url-text');
let valid = false;
try {
new URL(url);
valid = true;
} catch(e) {
console.warn(`URL ${url} is not a valid URL`);
}
if ( valid ) {
text.innerText = url;
setTimeout(() => {
window.location.href = url;
}, 1000);
}
</script>

101
public/style.css Normal file
View File

@ -0,0 +1,101 @@
:root {
font-family: sans-serif;
background: lavenderblush;
}
body {
display: table;
margin: 0 auto 0 min(10%, 5em);
background: white;
padding: 0.5em;
border-bottom: 1px solid purple;
max-width: min(777px, 80%);
}
header {
font-size: smaller;
margin-bottom: 3em;
}
form {
margin-bottom: 2em;
}
label.cmd {
border-bottom: thin solid dodgerblue;
cursor: default;
font-style: italic;
}
:is(form, label)[disabled] {
color: grey;
}
em.caps {
font-style: normal;
font-variant: small-caps;
}
:is(form:hover, form:active) em.caps {
background: yellow;
}
legend {
font-weight: 600;
}
fieldset {
border: thin solid transparent;
}
fieldset.search {
display: flex;
}
button, input, output {
}
button {
}
form .long {
width: 100%;
min-width: 250px;
}
output {
font-size: smaller;
color: purple;
}
h1 {
margin: 0;
}
h2 {
margin-top: 0;
}
small.url {
word-break: break-all;
}
.small {
font-size: smaller;
}
label small {
font-style: italic;
color: darkslategrey;
}
.units {
color: grey;
font-size: smaller;
}
input[type="number"] {
text-align: right;
]
input.search {
flex-grow: 1;
padding: 0.25em 0.5em;
}
input.search,
input.search + button {
font-size: 1em;
}
ol.results {
list-style-type: none;
}
.cent {
text-align: center;
}
.grey {
color: grey;
}

View File

@ -0,0 +1 @@
<script type=module src=injection.js></script>

3
public/top.html Normal file
View File

@ -0,0 +1,3 @@
<script>
</script>

6
run.sh Executable file
View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
phys=$(free -t -m | grep -oP '\d+' | sed '10!d')
alloc=$(echo "$phys * 90/100" | bc )
echo $alloc
node --max-old-space-size=$alloc src/app.js

9
scripts/build_only.sh Executable file
View File

@ -0,0 +1,9 @@
#!/usr/bin/env bash
./node_modules/.bin/esbuild src/app.js --bundle --outfile=dist/diskernet.mjs --format=esm --platform=node --minify --analyze
./node_modules/.bin/esbuild src/app.js --bundle --outfile=build/out.cjs --platform=node --minify --analyze
./node_modules/.bin/esbuild src/app.js --bundle --outfile=build/test.cjs --platform=node
echo "#!/usr/bin/env node" > build/diskernet.cjs
cat build/out.cjs >> build/diskernet.cjs
chmod +x build/diskernet.cjs

22
scripts/build_setup.sh Executable file
View File

@ -0,0 +1,22 @@
#!/usr/bin/env bash
source $HOME/.nvm/nvm.sh
echo "Making build directories..."
mkdir -p dist/
mkdir -p bin/
mkdir -p build/
echo "Setting node to lts/*..."
nvm use --lts
echo "Installing pkg..."
which pkg || npm i -g pkg
echo "Installing esbuild..."
npm install --save-exact esbuild
echo "Done"

9
scripts/compile.sh Executable file
View File

@ -0,0 +1,9 @@
#!/usr/bin/env bash
./scripts/build_only.sh
unset npm_config_prefix
source $HOME/.nvm/nvm.sh
nvm use --lts
pkg --compress GZip .

13
scripts/dl-node.sh Executable file
View File

@ -0,0 +1,13 @@
#!/usr/bin/env bash
unset npm_config_prefix
source $HOME/.nvm/nvm.sh
. $HOME/.profile
nvm install --lts
nvm use --lts
pkg ./src/hello.js
rm -rf hello-*

5
scripts/go_build.sh Executable file
View File

@ -0,0 +1,5 @@
#!/usr/bin/env bash
cp ./.package.build.json ./package.json
cp ./src/.common.build.js ./src/common.js

6
scripts/go_dev.sh Executable file
View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
gut "Just built"
cp ./.package.dev.json ./package.json
cp ./src/.common.dev.js ./src/common.js

48
scripts/old_compile.sh Executable file
View File

@ -0,0 +1,48 @@
#!/usr/bin/env bash
unset npm_config_prefix
source $HOME/.nvm/nvm.sh
. $HOME/.profile
nvm use --lts
echo "Cleaning old build and dist files..."
rm -rf build/* dist/*
echo "Setting build (CJS) mode..."
./scripts/go_build.sh
patch_required=$(grep -ER "require\([\"'](node:)?stream/web[\"']\)" node_modules/*)
files=$(grep -rlER "require\([\"'](node:)?stream/web[\"']\)" node_modules/*)
if [[ ! -z "$patch_required" ]]; then
while IFS= read -r file; do
#echo '--->' $file
#grep -q $file package.json
#if [ $? == 1 ]; then
echo '--->' $file "UNPATCHED!"
echo "Found an error!"
echo "Found something you need to patch before building"
echo "See: https://github.com/vercel/pkg/issues/1451"
echo
echo "$patch_required"
echo
echo "You need to add all these to pkg.patches to replace with require('stream').web"
./scripts/go_dev.sh
exit 1
#fi
#echo "OK"
done <<< $files
fi
echo "Bundling javascript..."
export NODE_ENV='production'
npx webpack
chmod +x ./build/22120.js
echo "Building for windows nix and macos..."
npx pkg --compress Gzip .
echo "Restoring dev (ES module) mode..."
./scripts/go_dev.sh
echo "Rebundling an es module for npm es module import..."
npm run bundle

10
scripts/postinstall.sh Executable file
View File

@ -0,0 +1,10 @@
#!/usr/bin/env bash
which brew || /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
which mkcert || brew install mkcert
mkdir -p $HOME/local-sslcerts
cd $HOME/local-sslcerts
mkcert -key-file privkey.pem -cert-file fullchain.pem localhost
mkcert -install

6
scripts/publish.sh Executable file
View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
./scripts/go_build.sh
gpush minor "$@"
./scripts/go_dev.sh

12
scripts/release.sh Executable file
View File

@ -0,0 +1,12 @@
#!/bin/sh
#./scripts/compile.sh
description=$1
latest_tag=$(git describe --abbrev=0)
grel release -u crisdosyago -r 22120 --tag $latest_tag --name "New release" --description '"'"$description"'"'
grel upload -u crisdosyago -r 22120 --tag $latest_tag --name "diskernet-win.exe" --file bin/diskernet-win.exe
grel upload -u crisdosyago -r 22120 --tag $latest_tag --name "diskernet-linux" --file bin/diskernet-linux
grel upload -u crisdosyago -r 22120 --tag $latest_tag --name "diskernet-macos" --file bin/diskernet-macos

167
src/app.js Normal file
View File

@ -0,0 +1,167 @@
import fs from 'fs';
import ChildProcess from 'child_process';
import readline from 'readline';
import util from 'util';
import {stdin as input, stdout as output} from 'process';
import ChromeLauncher from 'chrome-launcher';
import psList from '@667/ps-list';
import {DEBUG, sleep, NO_SANDBOX, GO_SECURE} from './common.js';
import {Archivist} from './archivist.js';
import LibraryServer from './libraryServer.js';
import args from './args.js';
const {server_port, mode, chrome_port} = args;
const CHROME_OPTS = !NO_SANDBOX ? [
/*'--restore-last-session',*/
`--disk-cache-dir=${args.temp_browser_cache()}`,
`--aggressive-cache-discard`
] : [
/*'--restore-last-session',*/
`--disk-cache-dir=${args.temp_browser_cache()}`,
`--aggressive-cache-discard`,
'--no-sandbox',
];
const LAUNCH_OPTS = {
logLevel: DEBUG ? 'verbose' : 'silent',
port: chrome_port,
chromeFlags:CHROME_OPTS,
userDataDir:false,
startingUrl: `${GO_SECURE ? 'https' : 'http'}://localhost:${args.server_port}`,
ignoreDefaultFlags: true
}
const KILL_ON = {
win32: 'taskkill /IM chrome.exe /F',
darwin: 'kill $(pgrep Chrome)',
freebsd: 'pkill -15 chrome',
linux: 'pkill -15 chrome',
};
let quitting;
start();
async function start() {
console.log(`Running in node...`);
process.on('error', cleanup);
process.on('unhandledRejection', cleanup);
process.on('uncaughtException', cleanup);
process.on('SIGHUP', cleanup);
process.on('beforeExit', cleanup);
process.on('SIGINT', code => cleanup(code, 'signal', {exit:true}));
process.on('SIGTERM', code => cleanup(code, 'signal', {exit:true}));
process.on('SIGQUIT', code => cleanup(code, 'signal', {exit:true}));
process.on('SIGBREAK', code => cleanup(code, 'signal', {exit:true}));
process.on('SIGABRT', code => cleanup(code, 'signal', {exit:true}));
console.log(`Importing dependencies...`);
const {launch:ChromeLaunch} = ChromeLauncher;
let chromeOpen = false;
const list = await psList();
chromeOpen = list.some(({name,cmd}) => name?.match?.(/chrome/g) || cmd?.match?.(/chrome/g));
if ( chromeOpen ) {
console.info(`Seems Chrome is open`);
if ( DEBUG.askFirst ) {
const rl = readline.createInterface({input, output});
const question = util.promisify(rl.question).bind(rl);
console.info(`\nIf you don't shut down Chrome and restart it under DiskerNet control
you will not be able to save or serve your archives.\n`);
const answer = await question("Would you like to shutdown Chrome browser now (y/N) ? ");
if ( answer?.match(/^y/i) ) {
await killChrome();
} else {
console.log(`OK, not shutting it!\n`);
if ( chromeOpen ) {
process.exit(0);
}
}
} else {
await killChrome();
}
}
console.log(`Removing 22120's existing temporary browser cache if it exists...`);
if ( fs.existsSync(args.temp_browser_cache()) ) {
console.log(`Temp browser cache directory (${args.temp_browser_cache()}) exists, deleting...`);
fs.rmdirSync(args.temp_browser_cache(), {recursive:true});
console.log(`Deleted.`);
}
console.log(`Launching library server...`);
await LibraryServer.start({server_port});
console.log(`Library server started.`);
console.log(`Waiting 1 second...`);
await sleep(1000);
console.log(`Launching chrome...`);
try {
await ChromeLaunch(LAUNCH_OPTS);
} catch(e) {
console.log(`Could not launch chrome.`);
DEBUG.verboseSlow && console.info('Chrome launch error:', e);
process.exit(1);
}
console.log(`Chrome started.`);
console.log(`Waiting 1 second...`);
await sleep(1000);
console.log(`Launching archivist and connecting to browser...`);
await Archivist.collect({chrome_port, mode});
console.log(`System ready.`);
}
async function killChrome(wait = true) {
try {
if ( process.platform in KILL_ON ) {
console.log(`Attempting to shut running chrome...`);
const [err] = (await new Promise(
res => ChildProcess.exec(KILL_ON[process.platform], (...a) => res(a))
));
if ( err ) {
console.log(`There was no running chrome.`);
DEBUG.verboseSlow && console.warn("Error closing existing chrome", err);
} else {
console.log(`Running chrome shut down.`);
if ( wait ) {
console.log(`Waiting 1 second...`);
await sleep(1000);
}
}
} else {
console.warn(`If you have chrome running, you may need to shut it down manually and restart 22120.`);
}
} catch(e) {
console.warn("in kill chrome", e);
}
}
async function cleanup(reason, err, {exit = false} = {}) {
console.log(`Cleanup called on reason: ${reason}`, err);
if ( quitting ) {
console.log(`Cleanup already called so not running again.`);
return;
}
quitting = true;
Archivist.shutdown();
LibraryServer.stop();
killChrome(false);
if ( exit ) {
console.log(`Take a breath. Everything's done. DiskerNet is exiting in 3 seconds...`);
await sleep(3000);
process.exit(0);
}
}

1911
src/archivist.js Normal file

File diff suppressed because it is too large Load Diff

157
src/args.js Normal file
View File

@ -0,0 +1,157 @@
import os from 'os';
import path from 'path';
import fs from 'fs';
const server_port = process.env.PORT || process.argv[2] || 22120;
const mode = process.argv[3] || 'save';
const chrome_port = process.argv[4] || 9222;
const Pref = {};
export const CONFIG_DIR = path.resolve(os.homedir(), '.config', 'dosyago', 'DiskerNet');
fs.mkdirSync(CONFIG_DIR, {recursive:true});
const pref_file = path.resolve(CONFIG_DIR, 'config.json');
const cacheId = Math.random();
loadPref();
let BasePath = Pref.BasePath;
export const archive_root = () => path.resolve(BasePath, '22120-arc');
export const no_file = () => path.resolve(archive_root(), 'no.json');
export const temp_browser_cache = () => path.resolve(archive_root(), 'temp-browser-cache' + cacheId);
export const library_path = () => path.resolve(archive_root(), 'public', 'library');
export const cache_file = () => path.resolve(library_path(), 'cache.json');
export const index_file = () => path.resolve(library_path(), 'index.json');
export const fts_index_dir = () => path.resolve(library_path(), 'fts');
const flex_fts_index_dir = base => path.resolve(base || fts_index_dir(), 'flex');
const ndx_fts_index_dir = base => path.resolve(base || fts_index_dir(), 'ndx');
const fuzzy_fts_index_dir = base => path.resolve(base || fts_index_dir(), 'fuzzy');
const results_per_page = 10;
console.log(`Args usage: <server_port> <save|serve> <chrome_port> <library_path>`);
updateBasePath(process.argv[5] || Pref.BasePath || CONFIG_DIR);
const args = {
mode,
server_port,
chrome_port,
updateBasePath,
getBasePath,
library_path,
no_file,
temp_browser_cache,
cache_file,
index_file,
fts_index_dir,
flex_fts_index_dir,
ndx_fts_index_dir,
fuzzy_fts_index_dir,
results_per_page,
CONFIG_DIR
};
export default args;
function updateBasePath(new_base_path, {force:force = false, before: before = []} = {}) {
new_base_path = path.resolve(new_base_path);
if ( !force && (BasePath == new_base_path) ) {
return false;
}
console.log(`Updating base path from ${BasePath} to ${new_base_path}...`);
BasePath = new_base_path;
if ( Array.isArray(before) ) {
for( const task of before ) {
try { task(); } catch(e) {
console.error(`before updateBasePath task failed. Task: ${task}`);
}
}
} else {
throw new TypeError(`If given, argument before to updateBasePath() must be an array of functions.`);
}
if ( !fs.existsSync(library_path()) ) {
console.log(`Archive directory (${library_path()}) does not exist, creating...`);
fs.mkdirSync(library_path(), {recursive:true});
console.log(`Created.`);
}
if ( !fs.existsSync(cache_file()) ) {
console.log(`Cache file does not exist, creating...`);
fs.writeFileSync(cache_file(), JSON.stringify([]));
console.log(`Created!`);
}
if ( !fs.existsSync(index_file()) ) {
//console.log(`INDEXLOG: Index file does not exist, creating...`);
fs.writeFileSync(index_file(), JSON.stringify([]));
console.log(`Created!`);
}
if ( !fs.existsSync(flex_fts_index_dir()) ) {
console.log(`FTS Index directory does not exist, creating...`);
fs.mkdirSync(flex_fts_index_dir(), {recursive:true});
console.log(`Created!`);
}
if ( !fs.existsSync(ndx_fts_index_dir()) ) {
console.log(`NDX FTS Index directory does not exist, creating...`);
fs.mkdirSync(ndx_fts_index_dir(), {recursive:true});
console.log(`Created!`);
}
if ( !fs.existsSync(fuzzy_fts_index_dir()) ) {
console.log(`FUZZY FTS Index directory does not exist, creating...`);
fs.mkdirSync(fuzzy_fts_index_dir(), {recursive:true});
fs.writeFileSync(path.resolve(fuzzy_fts_index_dir(), 'docs.fzz'), JSON.stringify([]));
console.log('Also creating FUZZY FTS Index docs file...');
console.log(`Created all!`);
}
console.log(`Base path updated to: ${BasePath}. Saving to preferences...`);
Pref.BasePath = BasePath;
savePref();
console.log(`Saved!`);
return true;
}
function getBasePath() {
return BasePath;
}
export function loadPref() {
if ( fs.existsSync(pref_file) ) {
try {
Object.assign(Pref, JSON.parse(fs.readFileSync(pref_file)));
} catch(e) {
console.warn("Error reading from preferences file", e);
}
} else {
console.log("Preferences file does not exist. Creating one...");
savePref();
}
return clone(Pref);
}
function savePref() {
try {
fs.writeFileSync(pref_file, JSON.stringify(Pref,null,2));
} catch(e) {
console.warn("Error writing preferences file", pref_file, Pref, e);
}
}
function clone(o) {
return JSON.parse(JSON.stringify(o));
}

29
src/blockedResponse.js Normal file
View File

@ -0,0 +1,29 @@
export const BLOCKED_CODE = 200;
export const BLOCKED_BODY = Buffer.from(`
<style>:root { font-family: system-ui, monospace; }</style>
<h1>Request blocked</h1>
<p>This navigation was prevented by 22120 as a Chrome bug fix for some requests causing issues.</p>
`).toString("base64");
export const BLOCKED_HEADERS = [
{name: "X-Powered-By", value: "Dosyago-Corporation"},
{name: "X-Blocked-Internally", value: "Custom 22120 Chrome bug fix"},
{name: "Accept-Ranges", value: "bytes"},
{name: "Cache-Control", value: "public, max-age=0"},
{name: "Content-Type", value: "text/html; charset=UTF-8"},
{name: "Content-Length", value: `${BLOCKED_BODY.length}`}
];
const BLOCKED_RESPONSE = `
HTTP/1.1 ${BLOCKED_CODE} OK
X-Powered-By: Zanj-Dosyago-Corporation
X-Blocked-Internally: Custom ad blocking
Accept-Ranges: bytes
Cache-Control: public, max-age=0
Content-Type: text/html; charset=UTF-8
Content-Length: ${BLOCKED_BODY.length}
${BLOCKED_BODY}
`;
export default BLOCKED_RESPONSE;

336
src/bookmarker.js Normal file
View File

@ -0,0 +1,336 @@
import os from 'os';
import Path from 'path';
import fs from 'fs';
import {DEBUG as debug} from './common.js';
const DEBUG = debug || false;
// Chrome user data directories by platform.
// Source 1: https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
// Source 2: https://superuser.com/questions/329112/where-are-the-user-profile-directories-of-google-chrome-located-in
const FS_WATCH_OPTS = {
persistent: false,
};
// Note:
// Not all the below are now used or supported by this code
const UDD_PATHS = {
'win': '%LOCALAPPDATA%\\Google\\Chrome\\User Data',
'winxp' : '%USERPROFILE%\\Local Settings\\Application Data\\Google\\Chrome\\User Data',
'macos' : Path.resolve(os.homedir(), 'Library/Application Support/Google/Chrome'),
'nix' : Path.resolve(os.homedir(), '.config/google-chrome'),
'chromeos': '/home/chronos', /* no support */
'ios': 'Library/Application Support/Google/Chrome', /* no support */
};
const PLAT_TABLE = {
'darwin': 'macos',
'linux': 'nix'
};
const PROFILE_DIR_NAME_REGEX = /^(Default|Profile \d+)$/i;
const isProfileDir = name => PROFILE_DIR_NAME_REGEX.test(name);
const BOOKMARK_FILE_NAME_REGEX = /^Bookmarks$/i;
const isBookmarkFile = name => BOOKMARK_FILE_NAME_REGEX.test(name);
const State = {
active: new Set(), /* active Bookmark files (we don't know these until file changes) */
books: {
}
};
export async function* bookmarkChanges() {
// try to get the profile directory
const rootDir = getProfileRootDir();
if ( !fs.existsSync(rootDir) ) {
throw new TypeError(`Sorry! The directory where we thought the Chrome profile directories may be found (${rootDir}), does not exist. We can't monitor changes to your bookmarks, so Bookmark Select Mode is not supported.`);
}
// state constants and variables (including chokidar file glob observer)
const observers = [];
const ps = [];
let change = false;
let notifyChange = false;
let stopLooping = false;
let shuttingDown = false;
// create sufficient observers
const dirs = fs.readdirSync(rootDir, {withFileTypes:true}).reduce((Files, dirent) => {
if ( dirent.isDirectory() && isProfileDir(dirent.name) ) {
const filePath = Path.resolve(rootDir, dirent.name);
if ( fs.existsSync(filePath) ) {
Files.push(filePath);
}
}
return Files;
}, []);
for( const dirPath of dirs ) {
// first read it in
const filePath = Path.resolve(dirPath, 'Bookmarks');
if ( fs.existsSync(filePath) ) {
const data = fs.readFileSync(filePath);
const jData = JSON.parse(data);
State.books[filePath] = flatten(jData, {toMap:true});
}
const observer = fs.watch(dirPath, FS_WATCH_OPTS);
console.log(`Observing ${dirPath}`);
// Note
// allow the parent process to exit
//even if observer is still active somehow
observer.unref();
// listen for all events from the observer
observer.on('change', (event, filename) => {
filename = filename || '';
// listen to everything
const path = Path.resolve(dirPath, filename);
DEBUG.verboseSlow && console.log(event, path);
if ( isBookmarkFile(filename) ) {
if ( ! State.active.has(path) ) {
State.active.add(path);
}
// but only act if it is a bookmark file
DEBUG.verboseSlow && console.log(event, path, notifyChange);
// save the event type and file it happened to
change = {event, path};
// drop the most recently pushed promise from our bookkeeping list
ps.pop();
// resolve the promise in the wait loop to process the bookmark file and emit the changes
notifyChange && notifyChange();
}
});
observer.on('error', error => {
console.warn(`Bookmark file observer for ${dirPath} error`, error);
observers.slice(observers.indexOf(observer), 1);
if ( observers.length ) {
notifyChange && notifyChange();
} else {
stopLooping && stopLooping();
}
});
observer.on('close', () => {
console.info(`Observer for ${dirPath} closed`);
observers.slice(observers.indexOf(observer), 1);
if ( observers.length ) {
notifyChange && notifyChange();
} else {
stopLooping && stopLooping();
}
});
observers.push(observer);
}
// make sure we kill the watcher on process restart or shutdown
process.on('SIGTERM', shutdown);
process.on('SIGHUP', shutdown);
process.on('SIGINT', shutdown);
process.on('SIGBRK', shutdown);
// the main wait loop that enables us to turn a traditional NodeJS eventemitter
// into an asychronous stream generator
waiting: while(true) {
// Note: code resilience
//the below two statements can come in any order in this loop, both work
// get, process and publish changes
// only do if the change is there (first time it won't be because
// we haven't yielded out (async or yield) yet)
if ( change ) {
const {path} = change;
change = false;
try {
const changes = flatten(
JSON.parse(fs.readFileSync(path)),
{toMap:true, map: State.books[path]}
);
for( const changeEvent of changes ) yield changeEvent;
} catch(e) {
console.warn(`Error publishing Bookmarks changes`, e);
}
}
// wait for the next change
// always wait tho (to allow queueing of the next event to process)
try {
await new Promise((res, rej) => {
// save these
notifyChange = res; // so we can turn the next turn of the loop
stopLooping = rej; // so we can break out of the loop (on shutdown)
ps.push({res,rej}); // so we can clean up any left over promises
});
} catch {
ps.pop();
break waiting;
}
}
shutdown();
return true;
async function shutdown() {
if ( shuttingDown ) return;
shuttingDown = true;
console.log('Bookmark observer shutting down...');
// clean up any outstanding waiting promises
while ( ps.length ) {
/* eslint-disable no-empty */
try { ps.pop().rej(); } finally {}
/* eslint-enable no-empty */
}
// stop the waiting loop
stopLooping && setTimeout(() => stopLooping('bookmark watching stopped'), 0);
// clean up any observers
while(observers.length) {
/* eslint-disable no-empty */
try { observers.pop().close(); } finally {}
/* eslint-enable no-empty */
}
console.log('Bookmark observer shut down cleanly.');
}
}
export function hasBookmark(url) {
return Object.keys(State.books).filter(key => {
if ( State.active.size == 0 ) return true;
return State.active.has(key);
}).map(key => State.books[key])
.some(map => map.has(url));
}
function getProfileRootDir() {
const plat = os.platform();
let name = PLAT_TABLE[plat];
let rootDir;
DEBUG.verboseSlow && console.log({plat, name});
if ( !name ) {
if ( plat === 'win32' ) {
// because Chrome profile dir location only changes in XP
// we only care if it's XP or not and so
// we try to resolve based on the version major and minor (given by release)
// source: https://docs.microsoft.com/en-us/windows/win32/sysinfo/operating-system-version?redirectedfrom=MSDN
const rel = os.release();
const ver = parseFloat(rel);
if ( !Number.isNaN(ver) && ver <= 5.2 ) {
// this should be reliable
name = 'winxp';
} else {
// this may not be reliable, but we just do it
name = 'win';
}
} else {
throw new TypeError(
`Sorry! We don't know how to find the default Chrome profile on OS platform: ${plat}`
);
}
}
if ( UDD_PATHS[name] ) {
rootDir = Path.resolve(resolveEnvironmentVariablesToPathSegments(UDD_PATHS[name]));
} else {
throw new TypeError(
`Sorry! We don't know how to find the default Chrome profile on OS name: ${name}`
);
}
return rootDir;
}
function flatten(bookmarkObj, {toMap: toMap = false, map} = {}) {
const nodes = [...Object.values(bookmarkObj.roots)];
const urls = toMap? (map || new Map()) : [];
const urlSet = new Set();
const changes = [];
while(nodes.length) {
const next = nodes.pop();
const {name, type, url} = next;
switch(type) {
case "url":
if ( toMap ) {
if ( map ) {
if ( urls.has(url) ) {
const {name:oldName} = urls.get(url);
if ( name !== oldName ) {
if ( !urlSet.has(url) ) {
changes.push({
type: "Title updated",
url,
oldName,
name
});
}
}
} else {
changes.push({
type: "new",
name, url
});
}
}
if ( !urlSet.has(url) ) {
urls.set(url, next);
}
urlSet.add(url);
} else {
urls.push(next);
}
break;
case "folder":
nodes.push(...next.children);
break;
default:
console.info("New type", type, next);
break;
}
}
if (map) {
[...map.keys()].forEach(url => {
if ( !urlSet.has(url) ) {
changes.push({
type: "delete",
url
});
map.delete(url);
}
});
}
return map ? changes : urls;
}
// source: https://stackoverflow.com/a/33017068
function resolveEnvironmentVariablesToPathSegments(path) {
return path.replace(/%([^%]+)%/g, function(_, key) {
return process.env[key];
});
}
/*
test();
async function test() {
for await ( const change of bookmarkChanges() ) {
console.log(change);
}
}
*/
/*
function* profileDirectoryEnumerator(maxN = 9999) {
let index = 0;
while(index <= maxN) {
const profileDirName = index ? `Profile ${index}` : `Default`;
yield profileDirName;
}
}
*/

115
src/common.js Normal file
View File

@ -0,0 +1,115 @@
import path from 'path';
import {fileURLToPath} from 'url';
import fs from 'fs';
import os from 'os';
const DEEB = false;
export const DEBUG = {
askFirst: true,
verboseSlow: process.env.VERBOSE_DEBUG_22120 || DEEB,
debug: process.env.DEBUG_22120 || DEEB,
checkPred: false
}
export const SHOW_FETCH = false;
// server related
export const PUBLIC_SERVER = true;
// crawl related
export const MIN_TIME_PER_PAGE = 10000;
export const MAX_TIME_PER_PAGE = 32000;
export const MIN_WAIT = 200;
export const MAX_WAITS = 300;
export const BATCH_SIZE = 5; // crawl batch size (how many concurrent tabs for crawling)
export const MAX_REAL_URL_LENGTH = 2**15 - 1;
export const CHECK_INTERVAL = 400;
export const TEXT_NODE = 3;
export const MAX_HIGHLIGHTABLE_LENGTH = 0; /* 0 is no max length for highlight */
export const MAX_TITLE_LENGTH = 140;
export const MAX_URL_LENGTH = 140;
export const MAX_HEAD = 140;
export const GO_SECURE = fs.existsSync(path.resolve(os.homedir(), 'local-sslcerts', 'privkey.pem'));
export class RichError extends Error {
constructor(msg) {
let textMessage;
try {
textMessage = JSON.stringify(msg);
} catch(e) {
console.warn(`Could not create RichError from argument ${msg.toString ? msg.toString() : msg} as JSON serialization failed. RichError argument MUST be JSON serializable. Failure error was:`, e);
return;
}
super(textMessage);
}
}
/* text nodes inside these elements that are ignored */
export const FORBIDDEN_TEXT_PARENT = new Set([
'STYLE',
'SCRIPT',
'NOSCRIPT',
/* we could remove these last two so as to index them as well */
'DATALIST',
'OPTION'
]);
export const ERROR_CODE_SAFE_TO_IGNORE = new Set([
-32000, /* message:
Can only get response body on requests captured after headers received.
* ignore because:
seems to only happen when new navigation aborts all
pending requests of the unloading page
*/
-32602, /* message:
Invalid InterceptionId.
* ignore because:
seems to only happen when new navigation aborts all
pending requests of the unloading page
*/
]);
export const SNIP_CONTEXT = 31;
export const NO_SANDBOX = (process.env.DEBUG_22120 && process.env.SET_22120_NO_SANDBOX) || false;
//export const APP_ROOT = '.';
export const APP_ROOT = path.dirname(process.argv[0]);
//export const APP_ROOT = path.dirname(fileURLToPath(import.meta.url));
export const sleep = ms => new Promise(res => setTimeout(res, ms));
export function say(o) {
console.log(JSON.stringify(o));
}
export function clone(o) {
return JSON.parse(JSON.stringify(o));
}
export async function untilTrue(pred, waitOverride = MIN_WAIT, maxWaits = MAX_WAITS) {
if ( waitOverride < 0 ) {
maxWaits = -1;
waitOverride = MIN_WAIT;
}
let waitCount = 0;
let resolve;
const pr = new Promise(res => resolve = res);
setTimeout(checkPred, 0);
return pr;
async function checkPred() {
DEBUG.checkPred && console.log('Checking', pred.toString());
if ( await pred() ) {
return resolve(true);
} else {
waitCount++;
if ( waitCount < maxWaits || maxWaits < 0 ) {
setTimeout(checkPred, waitOverride);
} else {
resolve(false);
}
}
}
}

1
src/hello.js Normal file
View File

@ -0,0 +1 @@
console.log(`hello...is it me you're looking for?`);

384
src/highlighter.js Normal file
View File

@ -0,0 +1,384 @@
import ukkonen from 'ukkonen';
import {DEBUG} from './common.js';
const MAX_ACCEPT_SCORE = 0.5;
const CHUNK_SIZE = 12;
//testHighlighter();
function params(qLength, chunkSize = CHUNK_SIZE) {
const MaxDist = chunkSize;
const MinScore = Math.abs(qLength - chunkSize);
const MaxScore = Math.max(qLength, chunkSize) - MinScore;
return {MaxDist,MinScore,MaxScore};
}
export function highlight(query, doc, {
/* 0 is no maxLength */
maxLength: maxLength = 0,
maxAcceptScore: maxAcceptScore = MAX_ACCEPT_SCORE,
chunkSize: chunkSize = CHUNK_SIZE
} = {}) {
if ( chunkSize % 2 ) {
throw new TypeError(`chunkSize must be even. Was: ${chunkSize} which is odd.`);
}
doc = Array.from(doc);
if ( maxLength ) {
doc = doc.slice(0, maxLength);
}
const highlights = [];
const extra = chunkSize;
// use array from then length rather than string length to
// give accurate length for all unicode
const qLength = Array.from(query).length;
const {MaxDist,MinScore,MaxScore} = params(qLength, chunkSize);
const doc2 = Array.from(doc);
// make doc length === 0 % chunkSize
doc.splice(doc.length, 0, ...(new Array((chunkSize - doc.length % chunkSize) % chunkSize)).join(' ').split(''));
const fragments = doc.reduce(getFragmenter(chunkSize), []);
//console.log(fragments);
// pad start of doc2 by half chunkSize
doc2.splice(0, 0, ...(new Array(chunkSize/2 + 1)).join(' ').split(''));
// make doc2 length === 0 % chunkSize
doc2.splice(doc2.length, 0, ...(new Array((chunkSize - doc2.length % chunkSize) % chunkSize)).join(' ').split(''));
const fragments2 = doc2.reduce(getFragmenter(chunkSize), []);
query.toLocaleLowerCase();
DEBUG.verboseSlow && console.log(fragments);
const scores = [...fragments, ...fragments2].map(fragment => {
const distance = ukkonen(query, fragment.text.toLocaleLowerCase(), MaxDist);
// the min score possible = the minimum number of edits between
const scaledScore = (distance - MinScore)/MaxScore;
return {score: scaledScore, fragment};
});
// sort ascending (smallest scores win)
scores.sort(({score:a}, {score:b}) => a-b);
for( const {score, fragment} of scores ) {
if ( score > maxAcceptScore ) {
break;
}
highlights.push({score,fragment});
}
let result;
if ( highlights.length === 0 ) {
DEBUG.verboseSlow && console.log('Zero highlights, showing first score', scores[0]);
result = scores.slice(0,1);
} else {
let better = Array.from(highlights).slice(0, 10);
better = better.map(hl => {
const length = Array.from(hl.fragment.text).length;
let {offset, symbols} = hl.fragment;
const newText = symbols.slice(Math.max(0,offset - extra), offset).join('') + hl.fragment.text + symbols.slice(offset + length, offset + length + extra).join('');
DEBUG.verboseSlow && console.log({newText, oldText:hl.fragment.text, p:[Math.max(0,offset-extra), offset, offset+length, offset+length+extra], trueText: symbols.slice(offset, offset+length).join('')});
hl.fragment.text = newText;
const {MaxDist,MinScore,MaxScore} = params(Array.from(newText).length);
const distance = ukkonen(query, hl.fragment.text.toLocaleLowerCase(), MaxDist);
// the min score possible = the minimum number of edits between
const scaledScore = (distance - MinScore)/MaxScore;
hl.score = scaledScore;
return hl;
});
better.sort(({score:a}, {score:b}) => a-b);
DEBUG.verboseSlow && console.log(JSON.stringify({better},null,2));
result = better.slice(0,3);
}
return result;
}
// use overlapping trigrams to index
export function trilight(query, doc, {
/* 0 is no maxLength */
maxLength: maxLength = 0,
ngramSize: ngramSize = 3,
/*minSegmentGap: minSegmentGap = 20,*/
maxSegmentSize: maxSegmentSize = 140,
} = {}) {
query = Array.from(query);
const oDoc = Array.from(doc);
doc = Array.from(doc.toLocaleLowerCase());
if ( maxLength ) {
doc = doc.slice(0, maxLength);
}
const trigrams = doc.reduce(getFragmenter(ngramSize, {overlap:true}), []);
const index = trigrams.reduce((idx, frag) => {
let counts = idx.get(frag.text);
if ( ! counts ) {
counts = [];
idx.set(frag.text, counts);
}
counts.push(frag.offset);
return idx;
}, new Map);
const qtris = query.reduce(getFragmenter(ngramSize, {overlap:true}), []);
const entries = qtris.reduce((E, {text}, qi) => {
const counts = index.get(text);
if ( counts ) {
counts.forEach(di => {
const entry = {text, qi, di};
E.push(entry);
});
}
return E;
}, []);
entries.sort(({di:a}, {di:b}) => a-b);
let lastQi;
let lastDi;
let run;
const runs = entries.reduce((R, {text,qi,di}) => {
if ( ! run ) {
run = {
tris: [text],
qi, di
};
} else {
const dQi = qi - lastQi;
const dDi = di - lastDi;
if ( dQi === 1 && dDi === 1 ) {
run.tris.push(text);
} else {
/* add two for the size 2 suffix of the final trigram */
run.length = run.tris.length + (ngramSize - 1);
R.push(run);
run = {
qi, di,
tris: [text]
};
}
}
lastQi = qi;
lastDi = di;
return R;
}, []);
let lastRun;
const gaps = runs.reduce((G, run) => {
if ( lastRun ) {
const gap = {runs: [lastRun, run], gap: run.di - (lastRun.di + lastRun.length)};
G.push(gap);
}
lastRun = run;
return G;
}, []);
gaps.sort(({gap:a}, {gap:b}) => a-b);
const segments = [];
const runSegMap = {};
while(gaps.length) {
const nextGap = gaps.shift();
const {runs} = nextGap;
const leftSeg = runSegMap[runs[0].di];
const rightSeg = runSegMap[runs[1].di];
let newSegmentLength = 0;
let assigned = false;
if ( leftSeg ) {
newSegmentLength = runs[1].di + runs[1].length - leftSeg.start;
if ( newSegmentLength <= maxSegmentSize ) {
leftSeg.end = runs[1].di + runs[1].length;
leftSeg.score += runs[1].length;
runSegMap[runs[1].di] = leftSeg;
assigned = leftSeg;
}
} else if ( rightSeg ) {
newSegmentLength = rightSeg.end - runs[0].di;
if ( newSegmentLength <= maxSegmentSize ) {
rightSeg.start = runs[0].di;
rightSeg.score += runs[0].length;
runSegMap[runs[0].di] = rightSeg;
assigned = rightSeg;
}
} else {
const newSegment = {
start: runs[0].di,
end: runs[0].di + runs[0].length + nextGap.gap + runs[1].length,
score: runs[0].length + runs[1].length
};
if ( newSegment.end - newSegment.start <= maxSegmentSize ) {
runSegMap[runs[0].di] = newSegment;
runSegMap[runs[1].di] = newSegment;
segments.push(newSegment);
assigned = newSegment;
newSegmentLength = newSegment.end - newSegment.start;
}
}
if ( assigned ) {
DEBUG.verboseSlow && console.log('Assigned ', nextGap, 'to segment', assigned, 'now having length', newSegmentLength);
} else {
DEBUG.verboseSlow && console.log('Gap ', nextGap, `could not be assigned as it would have made an existing
as it would have made an existing segment too long, or it was already too long itself.`
);
}
}
segments.sort(({score:a}, {score:b}) => b-a);
const textSegments = segments.map(({start,end}) => oDoc.slice(start,end).join(''));
//console.log(JSON.stringify({gaps}, null, 2));
DEBUG.verboseSlow && console.log(segments, textSegments);
if ( textSegments.length === 0 ) {
DEBUG.verboseSlow && console.log({query, doc, maxLength, ngramSize, maxSegmentSize,
trigrams,
index,
entries,
runs,
gaps,
segments,
textSegments
});
}
return textSegments.slice(0,3);
}
// returns a function that creates non-overlapping fragments
function getFragmenter(chunkSize, {overlap: overlap = false} = {}) {
if ( !Number.isInteger(chunkSize) || chunkSize < 1 ) {
throw new TypeError(`chunkSize needs to be a whole number greater than 0`);
}
let currentLength;
return function fragment(frags, nextSymbol, index, symbols) {
const pushBack = [];
let currentFrag;
// logic:
// if there are no running fragments OR
// adding the next symbol would exceed chunkSize
// then start a new fragment OTHERWISE
// keep adding to the currentFragment
if ( overlap || (frags.length && ((currentLength + 1) <= chunkSize)) ) {
let count = 1;
if ( overlap ) {
count = Math.min(index+1, chunkSize);
currentFrag = {text:'', offset:index, symbols};
frags.push(currentFrag);
}
while(count--) {
currentFrag = frags.pop();
//console.log({frags,nextSymbol,index,currentFrag});
pushBack.push(currentFrag);
currentFrag.text += nextSymbol;
}
} else {
currentFrag = {text:nextSymbol, offset:index, symbols};
currentLength = 0;
pushBack.push(currentFrag);
}
currentLength++;
while(pushBack.length) {
frags.push(pushBack.pop());
}
return frags;
}
}
// returns a function that creates overlapping fragments
// todo - try this one as well
// tests
/*
function testHighlighter() {
const query = 'metahead search';
const doc = `
Hacker News new | past | comments | ask | show | jobs | submit login
1.
AWS appears to be down again
417 points by riknox 2 hours ago | hide | 260 comments
2.
FreeBSD Jails for Fun and Profit (topikettunen.com)
42 points by kettunen 1 hour ago | hide | discuss
3.
IMF, 10 countries simulate cyber attack on global financial system (nasdaq.com)
33 points by pueblito 1 hour ago | hide | 18 comments
4.
DNA seen through the eyes of a coder (berthub.eu)
116 points by dunefox 3 hours ago | hide | 37 comments
5.
Pure Bash lightweight web server (github.com/remileduc)
74 points by turrini 2 hours ago | hide | 46 comments
6.
Parser Combinators in Haskell (serokell.io)
18 points by aroccoli 1 hour ago | hide | 3 comments
7.
DeepMinds New AI with a Memory Outperforms Algorithms 25 Times Its Size (singularityhub.com)
233 points by darkscape 9 hours ago | hide | 88 comments
8.
Tinder just permabanned me or the problem with big tech (paulefou.com)
90 points by svalee 1 hour ago | hide | 106 comments
9.
Rocky Mountain Basic (wikipedia.org)
12 points by mattowen_uk 1 hour ago | hide | 5 comments
10.
Teller Reveals His Secrets (2012) (smithsonianmag.com)
56 points by Tomte 4 hours ago | hide | 26 comments
11.
Heroku Is Currently Down (heroku.com)
129 points by iamricks 2 hours ago | hide | 29 comments
12. Convictional (YC W19) is hiring engineers to build the future of B2B trade-Remote (ashbyhq.com)
2 hours ago | hide
13.
Scientists find preserved dinosaur embryo preparing to hatch like a bird (theguardian.com)
187 points by Petiver 9 hours ago | hide | 111 comments
14.
I did a Mixergy interview so bad they didn't even release it (robfitz.com)
15 points by robfitz 1 hour ago | hide | 7 comments
15.
Now DuckDuckGo is building its own desktop browser (zdnet.com)
132 points by waldekm 2 hours ago | hide | 64 comments
16.
English has been my pain for 15 years (2013) (antirez.com)
105 points by Tomte 1 hour ago | hide | 169 comments
17.
Polish opposition duo hacked with NSO spyware (apnews.com)
102 points by JumpCrisscross 2 hours ago | hide | 35 comments
18.
Linux Has Grown into a Viable PC Gaming Platform and the Steam Stats Prove It (hothardware.com)
119 points by rbanffy 3 hours ago | hide | 105 comments
19.
LGs new 16:18 monitor (theverge.com)
50 points by tosh 1 hour ago | hide | 25 comments
20.
Construction of radio equipment in a Japanese PoW camp (bournemouth.ac.uk)
117 points by marcodiego 9 hours ago | hide | 16 comments
21.
Everything I've seen on optimizing Postgres on ZFS (vadosware.io)
27 points by EntICOnc 4 hours ago | hide | 2 comments
22.
Microsoft Teams: 1 feature, 4 vulnerabilities (positive.security)
269 points by kerm1t 4 hours ago | hide | 196 comments
23.
Analog computers were the most powerful computers for thousands of years [video] (youtube.com)
103 points by jdkee 9 hours ago | hide | 55 comments
24.
Shipwrecks, Stolen Jewels, Skull-Blasting Are Some of This Years Best Mysteries (atlasobscura.com)
8 points by CapitalistCartr 1 hour ago | hide | 1 comment
25.
Isolating Xwayland in a VM (roscidus.com)
94 points by pmarin 9 hours ago | hide | 32 comments
26.
Show HN: Metaheads, a search engine for Facebook comments (metaheads.xyz)
4 points by jawerty 1 hour ago | hide | 15 comments
27.
Quantum theory based on real numbers can be experimentally falsified (nature.com)
159 points by SquibblesRedux 14 hours ago | hide | 93 comments
28.
Founder of Black Girls Code has been ousted as head of the nonprofit (businessinsider.com)
29 points by healsdata 1 hour ago | hide | 7 comments
29.
Waffle House Poet Laureate (2019) (atlantamagazine.com)
5 points by brudgers 1 hour ago | hide | 4 comments
30.
Earths magnetic field illuminates Biblical history (economist.com)
46 points by helsinkiandrew 8 hours ago | hide | 17 comments
More
`;
console.log(JSON.stringify(highlight(
query, doc
).map(({fragment:{text,offset}}) => offset + ':' + text), null, 2));
console.log(trilight('metahead search', doc.toLocaleLowerCase().replace(/\s+/g, ' ')));
}
*/

4
src/index.js Normal file
View File

@ -0,0 +1,4 @@
/* eslint-disable no-global-assign */
require = require('esm')(module/*, options*/);
module.exports = require('./app.js');
/* eslint-enable no-global-assign */

417
src/libraryServer.js Normal file
View File

@ -0,0 +1,417 @@
import http from 'http';
import https from 'https';
import fs from 'fs';
import os from 'os';
import path from 'path';
import express from 'express';
import args from './args.js';
import {
GO_SECURE,
MAX_REAL_URL_LENGTH,
MAX_HEAD, MAX_HIGHLIGHTABLE_LENGTH, DEBUG,
say, sleep, APP_ROOT,
RichError
} from './common.js';
import {startCrawl, Archivist} from './archivist.js';
import {trilight, highlight} from './highlighter.js';
const SITE_PATH = path.resolve(APP_ROOT, '..', 'public');
const SearchCache = new Map();
const app = express();
let running = false;
let Server, upAt, port;
const LibraryServer = {
start, stop
}
const secure_options = {};
const protocol = GO_SECURE ? https : http;
export default LibraryServer;
async function start({server_port}) {
if ( running ) {
DEBUG.verboseSlow && console.warn(`Attempting to start server when it is not closed. Exiting start()...`);
return;
}
running = true;
try {
const sec = {
key: fs.readFileSync(path.resolve(os.homedir(), 'local-sslcerts', 'privkey.pem')),
cert: fs.readFileSync(path.resolve(os.homedir(), 'local-sslcerts', 'fullchain.pem')),
ca: fs.existsSync(path.resolve(os.homedir(), 'local-sslcerts', 'chain.pem')) ?
fs.readFileSync(path.resolve(os.homedir(), 'local-sslcerts', 'chain.pem'))
:
undefined
};
console.log({sec});
Object.assign(secure_options, sec);
} catch(e) {
console.warn(`No certs found so will use insecure no SSL.`);
}
try {
port = server_port;
addHandlers();
const secure = secure_options.cert && secure_options.key;
const server = protocol.createServer.apply(protocol, GO_SECURE && secure ? [secure_options, app] : [app]);
Server = server.listen(Number(port), err => {
if ( err ) {
running = false;
throw err;
}
upAt = new Date;
say({server_up:{upAt,port}});
});
} catch(e) {
running = false;
DEBUG.verboseSlow && console.error(`Error starting server`, e);
process.exit(1);
}
}
function addHandlers() {
app.use(express.urlencoded({extended:true, limit: '50mb'}));
app.use(express.static(SITE_PATH));
if ( args.library_path() ) {
app.use("/library", express.static(args.library_path()))
}
app.get('/search(.json)?', async (req, res) => {
await Archivist.isReady();
let {query:oquery} = req.query;
if ( ! oquery ) {
return res.end(SearchResultView({results:[], query:'', HL:new Map, page:1}));
}
oquery = oquery.trim();
if ( ! oquery ) {
return res.end(SearchResultView({results:[], query:'', HL:new Map, page:1}));
}
let {page} = req.query;
if ( ! page || ! Number.isInteger(parseInt(page)) ) {
page = 1;
} else {
page = parseInt(page);
}
let resultIds, query, HL;
if ( SearchCache.has(req.query.query) ) {
({query, resultIds, HL} = SearchCache.get(oquery));
} else {
({query, results:resultIds, HL} = await Archivist.search(oquery));
SearchCache.set(req.query.query, {query, resultIds, HL});
}
const start = (page-1)*args.results_per_page;
const results = resultIds.slice(start,start+args.results_per_page).map(docId => Archivist.getDetails(docId))
if ( req.path.endsWith('.json') ) {
res.end(JSON.stringify({
results, query
}, null, 2));
} else {
results.forEach(r => {
/*
r.snippet = '... ' + highlight(query, r.content, {maxLength:MAX_HIGHLIGHTABLE_LENGTH})
.sort(({fragment:{offset:a}}, {fragment:{offset:b}}) => a-b)
.map(hl => Archivist.findOffsets(query, hl.fragment.text))
.join(' ... ');
*/
r.snippet = '... ' + trilight(query, r.content, {maxLength:MAX_HIGHLIGHTABLE_LENGTH})
.map(segment => Archivist.findOffsets(query, segment))
.join(' ... ');
});
res.end(SearchResultView({results, query, HL, page}));
}
});
app.get('/mode', async (req, res) => {
res.end(Archivist.getMode());
});
app.get('/archive_index.html', async (req, res) => {
Archivist.saveIndex();
const index = Archivist.getIndex();
res.end(IndexView(index));
});
app.get('/edit_index.html', async (req, res) => {
Archivist.saveIndex();
const index = Archivist.getIndex();
res.end(IndexView(index, {edit:true}));
});
app.post('/edit_index.html', async (req, res) => {
const {url_to_delete} = req.body;
await Archivist.deleteFromIndexAndSearch(url_to_delete);
res.redirect('/edit_index.html');
});
app.post('/mode', async (req, res) => {
const {mode} = req.body;
Archivist.changeMode(mode);
//res.end(`Mode set to ${mode}`);
res.redirect('/');
});
app.get('/base_path', async (req, res) => {
res.end(args.getBasePath());
});
app.post('/base_path', async (req, res) => {
const {base_path} = req.body;
const change = args.updateBasePath(base_path, {before: [
() => Archivist.beforePathChanged(base_path)
]});
if ( change ) {
await Archivist.afterPathChanged();
Server.close(async () => {
running = false;
console.log(`Server closed.`);
console.log(`Waiting 50ms...`);
await sleep(50);
start({server_port:port});
console.log(`Server restarting.`);
});
//res.end(`Base path set to ${base_path} and saved to preferences. See console for progress. Server restarting...`);
res.redirect('/#new_base_path');
} else {
//res.end(`Base path did not change.`);
res.redirect('/');
}
});
app.post('/crawl', async (req, res) => {
try {
let {
links, timeout, depth, saveToFile,
maxPageCrawlTime, minPageCrawlTime, batchSize,
program,
} = req.body;
const oTimeout = timeout;
timeout = Math.round(parseFloat(timeout)*1000);
depth = Math.round(parseInt(depth));
batchSize = Math.round(parseInt(batchSize));
saveToFile = !!saveToFile;
minPageCrawlTime = Math.round(parseInt(minPageCrawlTime)*1000);
maxPageCrawlTime = Math.round(parseInt(maxPageCrawlTime)*1000);
if ( Number.isNaN(timeout) || Number.isNaN(depth) || typeof links != 'string' ) {
console.warn({invalid:{timeout,depth,links}});
throw new RichError({
status: 400,
message: 'Invalid parameters: timeout, depth or links'
});
}
const urls = links.split(/[\n\s\r]+/g).map(u => u.trim()).filter(u => {
const tooShort = u.length === 0;
if ( tooShort ) return false;
const tooLong = u.length > MAX_REAL_URL_LENGTH;
if ( tooLong ) return false;
let invalid = false;
try {
new URL(u);
} catch {
invalid = true;
};
if ( invalid ) return false;
return true;
}).map(url => ({url,depth:1}));
console.log(`Starting crawl from ${urls.length} URLs, waiting ${oTimeout} seconds for each to load, and continuing to a depth of ${depth} clicks...`);
await startCrawl({
urls, timeout, depth, saveToFile, batchSize, minPageCrawlTime, maxPageCrawlTime, program,
});
res.end(`Starting crawl from ${urls.length} URLs, waiting ${oTimeout} seconds for each to load, and continuing to a depth of ${depth} clicks...`);
} catch(e) {
if ( e instanceof RichError ) {
console.warn(e);
const {status, message} = JSON.parse(e.message);
res.status(status);
res.end(message);
} else {
console.warn(e);
res.sendStatus(500);
}
return;
}
});
}
async function stop() {
let resolve;
const pr = new Promise(res => resolve = res);
console.log(`Closing library server...`);
Server.close(() => {
console.log(`Library server closed.`);
resolve();
});
return pr;
}
function IndexView(urls, {edit:edit = false} = {}) {
return `
<!DOCTYPE html>
<meta charset=utf-8>
<title>
${ edit ? 'Editing ' : ''}
Your HTML Library
</title>
<link rel=stylesheet href=/style.css>
${ edit ? `
<script>
const sleep = ms => new Promise(res => setTimeout(res, ms));
const StrikeThrough = 'line-through';
</script>
` : ''}
<header>
<h1><a href=/>22120</a> &mdash; Archive Index</h1>
</header>
<form method=GET action=/search style="margin-bottom: 1em;">
<fieldset class=search>
<legend>Search your archive</legend>
<input class=search type=search name=query placeholder="search your library">
<button>Search</button>
</fieldset>
</form>
<form style="display:flex; justify-content: end; margin-bottom:0"
method=GET
action=${ edit ? '/archive_index.html' : '/edit_index.html' }>
<details>
<summary style="display:inline-block; cursor: default;">
${ edit ? `
<button
style="
border: 0;
background: 0;
font-size: x-large;
line-height: 0.5;
"
>
&check;
</button>`
:
'&hellip;'
}
</summary>
<div style="position: absolute;">
<button><em style="
font-size:x-large;
line-height:0.5;
position: relative;
top: 0.185em;
">
&#9986;
</em>
edit
</button>
</div>
</details>
</form>
<ul>
${
urls.map(([url,{title, id}]) => `
<li>
${ DEBUG ? id + ':' : ''}
<a target=_blank href=${url}>${(title||url).slice(0, MAX_HEAD)}</a>
${ edit ? `
<form style=display:contents; method=POST action=/edit_index.html>
<input name=url_to_delete type=url hidden value="${url}">
<button
style="font-size: smaller; line-height: 0.618;"
type=button
onclick="double_confirm(event);"
>
X
</button>
</form>
` : ''}
</li>
`).join('\n')
}
</ul>
${ edit ? `
<script>
async function double_confirm(deleteClick) {
const form = deleteClick.target.closest('form');
const link = form.previousElementSibling;
const original = link.style.textDecoration;
link.style.textDecoration = StrikeThrough;
let {host} = new URL(form.url_to_delete.value);
host = host.replace(/^www./i, '');
await sleep(200);
const reallyDelete = confirm(
\`\n are you sure you want to delete this \n\n \${host} \n\n from the internet?\n\`
);
if ( reallyDelete ) return form.submit();
link.style.textDecoration = original;
}
</script>
` : ''}
`
}
function SearchResultView({results, query, HL, page}) {
return `
<!DOCTYPE html>
<meta charset=utf-8>
<title>${query} - 22120 search results</title>
<link rel=stylesheet href=/style.css>
<header>
<h1><a href=/>22120</a> &mdash; Search Results</h1>
</header>
<p>
View <a href=/archive_index.html>your index</a>, or
</p>
<form method=GET action=/search>
<fieldset class=search>
<legend>Search again</legend>
<input class=search type=search name=query placeholder="search your library" value="${query}">
<button>Search</button>
</fieldset>
</form>
<p>
Showing results for <b>${query}</b>
</p>
<ol class=results start="${(page-1)*args.results_per_page+1}">
${
results.map(({snippet, url,title,id}) => `
<li>
${DEBUG ? id + ':' : ''} <a target=_blank href=${url}>${
HL.get(id)?.title||(title||url||'').slice(0, MAX_HEAD)
}</a>
<br>
<small class=url>${
HL.get(id)?.url||(url||'').slice(0, MAX_HEAD)
}</small>
<p>${snippet}</p>
</li>
`).join('\n')
}
</ol>
<p class=cent>
${page > 1 ? `
<a href=/search?query=${encodeURIComponent(query)}&page=${encodeURIComponent(page-1)}>
&lt; Page ${page-1}
</a> |` : ''}
<span class=grey>
Page ${page}
</span>
|
<a href=/search?query=${encodeURIComponent(query)}&page=${encodeURIComponent(page+1)}>
Page ${page+1} &gt;
</a>
</p>
`
}

162
src/protocol.js Normal file
View File

@ -0,0 +1,162 @@
import Ws from 'ws';
import Fetch from 'node-fetch';
import {untilTrue, SHOW_FETCH, DEBUG, ERROR_CODE_SAFE_TO_IGNORE} from './common.js';
const ROOT_SESSION = "browser";
const MESSAGES = new Map();
export async function connect({port:port = 9222} = {}) {
let webSocketDebuggerUrl, socket;
try {
await untilTrue(async () => {
let result = false;
try {
const {webSocketDebuggerUrl} = await Fetch(`http://127.0.0.1:${port}/json/version`).then(r => r.json());
if ( webSocketDebuggerUrl ) {
result = true;
}
} finally {
return result;
}
});
({webSocketDebuggerUrl} = await Fetch(`http://127.0.0.1:${port}/json/version`).then(r => r.json()));
socket = new Ws(webSocketDebuggerUrl);
} catch(e) {
console.log("Error communicating with browser", e);
process.exit(1);
}
const Resolvers = {};
const Handlers = {};
socket.on('message', handle);
let id = 0;
let resolve;
const promise = new Promise(res => resolve = res);
socket.on('open', () => resolve());
await promise;
return {
send,
on, ons, ona,
close
};
async function send(method, params = {}, sessionId) {
const message = {
method, params, sessionId,
id: ++id
};
if ( ! sessionId ) {
delete message[sessionId];
}
const key = `${sessionId||ROOT_SESSION}:${message.id}`;
let resolve;
const promise = new Promise(res => resolve = res);
Resolvers[key] = resolve;
const outGoing = JSON.stringify(message);
MESSAGES.set(key, outGoing);
socket.send(outGoing);
DEBUG.verboseSlow && (SHOW_FETCH || !method.startsWith('Fetch')) && console.log("Sent", message);
return promise;
}
async function handle(message) {
if ( typeof message !== "string" ) {
try {
message += '';
} catch(e) {
message = message.toString();
}
}
const stringMessage = message;
message = JSON.parse(message);
if ( message.error ) {
const showError = DEBUG.protocol || !ERROR_CODE_SAFE_TO_IGNORE.has(message.error.code);
if ( showError ) {
DEBUG.protocol && console.warn(message);
}
}
const {sessionId} = message;
const {method} = message;
const {id, result} = message;
if ( id ) {
const key = `${sessionId||ROOT_SESSION}:${id}`;
const resolve = Resolvers[key];
if ( ! resolve ) {
DEBUG.protocol && console.warn(`No resolver for key`, key, stringMessage.slice(0,140));
} else {
Resolvers[key] = undefined;
try {
await resolve(result);
} catch(e) {
console.warn(`Resolver failed`, e, key, stringMessage.slice(0,140), resolve);
}
}
if ( DEBUG ) {
if ( message.error ) {
const showError = DEBUG || !ERROR_CODE_SAFE_TO_IGNORE.has(message.error.code);
if ( showError ) {
const originalMessage = MESSAGES.get(key);
DEBUG.protocol && console.warn({originalMessage});
}
}
}
MESSAGES.delete(key);
} else if ( method ) {
const listeners = Handlers[method];
if ( Array.isArray(listeners) ) {
for( const func of listeners ) {
try {
func({message, sessionId});
} catch(e) {
console.warn(`Listener failed`, method, e, func.toString().slice(0,140), stringMessage.slice(0,140));
}
}
}
} else {
console.warn(`Unknown message on socket`, message);
}
}
function on(method, handler) {
let listeners = Handlers[method];
if ( ! listeners ) {
Handlers[method] = listeners = [];
}
listeners.push(wrap(handler));
}
function ons(method, handler) {
let listeners = Handlers[method];
if ( ! listeners ) {
Handlers[method] = listeners = [];
}
listeners.push(handler);
}
function ona(method, handler, sessionId) {
let listeners = Handlers[method];
if ( ! listeners ) {
Handlers[method] = listeners = [];
}
listeners.push(({message}) => {
if ( message.sessionId === sessionId ) {
handler(message.params);
} else {
console.log(`No such`, {method, handler, sessionId, message});
}
});
}
function close() {
socket.close();
}
function wrap(fn) {
return ({message}) => fn(message.params)
}
}