diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6484760f1..f58ab18d8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -98,6 +98,7 @@ repos: rev: "v0.8.0.4" hooks: - id: shellcheck + exclude: ^tools/python$ - repo: https://github.com/codespell-project/codespell rev: "v2.1.0" diff --git a/src/js/pyodide.ts b/src/js/pyodide.ts index 6f47bee56..896473bcc 100644 --- a/src/js/pyodide.ts +++ b/src/js/pyodide.ts @@ -204,6 +204,7 @@ export type ConfigType = { stderr?: (msg: string) => void; jsglobals?: object; args: string[]; + _node_mounts: string[]; }; /** @@ -263,6 +264,7 @@ export async function loadPyodide( stderr?: (msg: string) => void; jsglobals?: object; args?: string[]; + _node_mounts?: string[]; } = {} ): Promise { await initNodeModules(); @@ -280,6 +282,7 @@ export async function loadPyodide( homedir: "/home/pyodide", lockFileURL: indexURL! + "repodata.json", args: [], + _node_mounts: [], }; const config = Object.assign(default_config, options) as ConfigType; const pyodide_py_tar_promise = loadBinaryFile( @@ -287,6 +290,13 @@ export async function loadPyodide( ); const Module = createModule(); + Module.preRun.push(() => { + for (const mount of config._node_mounts) { + Module.FS.mkdirTree(mount); + Module.FS.mount(Module.NODEFS, { root: mount }, mount); + } + }); + Module.arguments = config.args; const API: any = { config }; Module.API = API; diff --git a/src/tests/test_cmdline_runner.py b/src/tests/test_cmdline_runner.py new file mode 100644 index 000000000..e8875e6c7 --- /dev/null +++ b/src/tests/test_cmdline_runner.py @@ -0,0 +1,106 @@ +import re +import subprocess +import sys + +import pytest + +import pyodide +from pyodide_build.common import emscripten_version, get_pyodide_root + +mark = pytest.mark.xfail_browsers(chrome="node only", firefox="node only") + +pyodide_root = get_pyodide_root() +script_path = pyodide_root / "tools/python" + + +@mark +def test_python_version(selenium): + result = subprocess.run([script_path, "-V"], capture_output=True, encoding="utf8") + assert result.returncode == 0 + assert result.stdout.strip() == "Python " + sys.version.partition(" ")[0] + assert result.stderr == "" + + +@mark +def test_dash_c(selenium): + result = subprocess.run( + [ + script_path, + "-c", + "from pyodide import __version__; print(__version__)", + ], + capture_output=True, + encoding="utf8", + ) + assert result.returncode == 0 + assert result.stdout.strip() == pyodide.__version__ + assert result.stderr == "" + + +@mark +def test_ensure_future(selenium): + result = subprocess.run( + [ + script_path, + "-c", + """\ +import asyncio +async def test(): + await asyncio.sleep(0.5) + print("done") +asyncio.ensure_future(test()) +""", + ], + capture_output=True, + encoding="utf8", + ) + assert result.returncode == 0 + assert result.stderr == "" + assert result.stdout.strip() == "done" + + +@mark +def test_flush_on_exit(selenium): + result = subprocess.run( + [ + script_path, + "-c", + """\ +print("a", end="") +print("b", end="") +print("c", end="") +""", + ], + capture_output=True, + encoding="utf8", + ) + assert result.returncode == 0 + assert result.stderr == "" + assert result.stdout.strip() == "abc" + + +@mark +def test_dash_m(selenium): + result = subprocess.run( + [script_path, "-m", "platform"], + capture_output=True, + encoding="utf8", + ) + assert result.returncode == 0 + assert result.stderr == "" + assert result.stdout.strip() == f"Emscripten-{emscripten_version()}-wasm32-32bit" + + +@mark +def test_invalid_cmdline_option(selenium): + result = subprocess.run([script_path, "-c"], capture_output=True, encoding="utf8") + assert result.returncode != 0 + assert result.stdout == "" + assert ( + re.sub("/[/a-z]*/tools/python", "<...>/tools/python", result.stderr) + == """\ +Argument expected for the -c option +usage: <...>/tools/python [option] ... [-c cmd | -m mod | file | -] [arg] ... +Try `python -h' for more information. +""" + ) diff --git a/tools/python b/tools/python new file mode 100755 index 000000000..2b4aa83dc --- /dev/null +++ b/tools/python @@ -0,0 +1,235 @@ +#!/bin/sh +":" /* << "EOF" +This file is a bash/node polyglot. This is needed for a few reasons: + +1. In node 14 we must pass `--experimental-wasm-bigint`. In node >14 we cannot +pass --experimental-wasm-bigint + +2. Emscripten vendors node 14 so it is desirable not to require node >= 16 + +3. We could use a bash script in a separate file to determine the flags needed, +but virtualenv looks up the current file and uses it directly. So if we make +python.sh and have it invoke python.js, then the virtualenv will invoke python.js +directly without the `--experimental-wasm-bigint` flag and so the virtualenv won't +work with node 14. + +Keeping the bash script and the JavaScript in the same file makes sure that even +inside the virtualenv the proper shell code is executed. +*/ + +/* +EOF +# bash +set -e + +which node > /dev/null || { \ + >&2 echo "No node executable found on the path" && exit 1; \ +} + +ARGS=$(node -e "$(cat <<"EOF" +const major_version = Number(process.version.split(".")[0].slice(1)); +if(major_version < 14) { + console.error("Need node version >= 14. Got node version", process.version); + process.exit(1); +} +if(major_version === 14){ + process.stdout.write("--experimental-wasm-bigint"); +} else { + // If $ARGS is empty, `let args = process.argv.slice(2)` removes the wrong + // number of arguments. `ARGS=--` is not empty but does nothing. + process.stdout.write("--"); +} +EOF +)") + +exec node "$ARGS" "$0" "$@" +*/ + + +const { loadPyodide } = require("../dist/pyodide"); +const fs = require("fs"); + +/** + * The default stderr/stdout do not handle newline or flush correctly, and stdin + * is also strange. Make a tty that connects Emscripten stdstreams to node + * stdstreams. We will make one tty for stdout and one for stderr, the + * `outstream` argument controls which one we make. + * + * Note that setting Module.stdin / Module.stdout / Module.stderr does not work + * because these cause `isatty(stdstream)` to return false. We want `isatty` to + * be true. (IMO this is an Emscripten issue, maybe someday we can fix it.) + */ +function make_tty_ops(outstream) { + return { + // get_char has 3 particular return values: + // a.) the next character represented as an integer + // b.) undefined to signal that no data is currently available + // c.) null to signal an EOF + get_char(tty) { + if (!tty.input.length) { + var result = null; + var BUFSIZE = 256; + var buf = Buffer.alloc(BUFSIZE); + // read synchronously at most BUFSIZE bytes from file descriptor 0 (stdin) + var bytesRead = fs.readSync(0, buf, 0, BUFSIZE, -1); + if (bytesRead === 0) { + return null; + } + result = buf.slice(0, bytesRead); + tty.input = Array.from(result); + } + return tty.input.shift(); + }, + put_char(tty, val) { + outstream.write(Buffer.from([val])); + }, + flush(tty) {}, + fsync() {}, + }; +} + +/** + * Fix standard streams by replacing them with ttys that work better via make_tty_ops. + */ +function setupStreams(FS, TTY) { + // Create and register devices + let mytty = FS.makedev(FS.createDevice.major++, 0); + let myttyerr = FS.makedev(FS.createDevice.major++, 0); + TTY.register(mytty, make_tty_ops(process.stdout)); + TTY.register(myttyerr, make_tty_ops(process.stderr)); + // Attach devices to files + FS.mkdev("/dev/mytty", mytty); + FS.mkdev("/dev/myttyerr", myttyerr); + // Replace /dev/stdxx with our custom devices + FS.unlink("/dev/stdin"); + FS.unlink("/dev/stdout"); + FS.unlink("/dev/stderr"); + FS.symlink("/dev/mytty", "/dev/stdin"); + FS.symlink("/dev/mytty", "/dev/stdout"); + FS.symlink("/dev/myttyerr", "/dev/stderr"); + // Refresh std streams so they use our new versions + FS.closeStream(0 /* stdin */); + FS.closeStream(1 /* stdout */); + FS.closeStream(2 /* stderr */); + FS.open("/dev/stdin", 0 /* write only */); + FS.open("/dev/stdout", 1 /* read only */); + FS.open("/dev/stderr", 1 /* read only */); +} + +/** + * Determine which native top level directories to mount into the Emscripten + * file system. + * + * This is a bit brittle, if the machine has a top level directory with certain + * names it is possible this could break. The most surprising one here is tmp, I + * am not sure why but if we link tmp then the process silently fails. + */ +function nativeDirsToLink() { + const skipDirs = ["dev", "lib", "proc", "tmp"]; + return fs + .readdirSync("/") + .filter((dir) => !skipDirs.includes(dir)) + .map((dir) => "/" + dir); +} + +async function main() { + let args = process.argv.slice(2); + const _node_mounts = nativeDirsToLink(); + try { + py = await loadPyodide({ + args, + fullStdLib: false, + _node_mounts, + homedir: process.cwd(), + // Strip out standard messages written to stdout and stderr while loading + // After Pyodide is loaded we will replace stdstreams with setupStreams. + stdout(e) { + if (e.trim() === "Python initialization complete") { + return; + } + console.log(e); + }, + stderr(e) { + if ( + [ + "warning: no blob constructor, cannot create blobs with mimetypes", + "warning: no BlobBuilder", + ].includes(e.trim()) + ) { + return; + } + console.warn(e); + }, + }); + } catch (e) { + if (e.constructor.name !== "ExitStatus") { + throw e; + } + // If the user passed `--help`, `--version`, or a set of command line + // arguments that is invalid in some way, we will exit here. + process.exit(e.status); + } + const FS = py.FS; + setupStreams(FS, py._module.TTY); + let sideGlobals = py.runPython("{}"); + globalThis.handleExit = function handleExit(code) { + if (code === undefined) { + code = 0; + } + if (py._module._Py_FinalizeEx() < 0) { + code = 120; + } + // It's important to call `process.exit` immediately after + // `_Py_FinalizeEx` because otherwise any asynchronous tasks still + // scheduled will segfault. + process.exit(code); + }; + + py.runPython( + ` + import asyncio + # Keep the event loop alive until all tasks are finished, or SystemExit or + # KeyboardInterupt is raised. + loop = asyncio.get_event_loop() + # Make sure we don't run _no_in_progress_handler before we finish _run_main. + loop._in_progress += 1 + from js import handleExit + loop._no_in_progress_handler = handleExit + loop._system_exit_handler = handleExit + loop._keyboard_interrupt_handler = lambda: handleExit(130) + + # Make shutil.get_terminal_size tell the terminal size accurately. + import shutil + from js.process import stdout + import os + def get_terminal_size(fallback=(80, 24)): + columns = stdout.columns + rows = stdout.rows + if columns is None: + columns = fallback[0] + if rows is None: + rows = fallback[1] + return os.terminal_size((columns, rows)) + shutil.get_terminal_size = get_terminal_size + `, + { globals: sideGlobals } + ); + + let errcode; + try { + errcode = py._module._run_main(); + } catch (e) { + // If there is some sort of error, add the Python tracebook in addition + // to the JavaScript traceback + py._module._dump_traceback(); + throw e; + } + if (errcode) { + process.exit(errcode); + } + py.runPython("loop._decrement_in_progress()", { globals: sideGlobals }); +} +main().catch((e) => { + console.error(e); + process.exit(1); +});