ENH Add API for mounting native file system (#2987)

This PR adds an API pyodide.mountNativeFS which mounts FileSystemDirectoryHandle into a Pyodide Python file system.

Note that there are some limitations such as:

- File System Access API is only supported by Chromium-based browsers (Safari also implements a portion of it but they only support Origin Private File system so it is not very useful for common users).
- The file system is asynchronous, so one needs to call syncfs to persist changes.

Since it is asynchronous, it does not require any WebWorker, SharedArrayBuffer, or Atomics. But I think we can extend this to a synchronous version using those features
This commit is contained in:
Gyeongjae Choi 2022-09-30 09:24:53 +09:00 committed by GitHub
parent 51803cae8c
commit 7dfee03a82
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 524 additions and 0 deletions

View File

@ -30,6 +30,11 @@ substitutions:
- {{ Enhancement }} Emscripten was updated to Version 3.1.21
{pr}`2958`, {pr}`2950`, {pr}`3027`, {pr}`3107`
- {{ Enhancement }} Added a new API {any}`pyodide.mountNativeFS`
which mounts [FileSystemDirectoryHandle](https://developer.mozilla.org/en-US/docs/Web/API/FileSystemDirectoryHandle)
into the Pyodide file system.
{pr}`2987`
- {{ Enhancement }} Implemented `reverse`, `__reversed__`, `count`, `index`,
`append`, and `pop` for `JsProxy` of Javascript arrays.
{pr}`2970`

View File

@ -57,3 +57,65 @@ pyodide.FS.mount(pyodide.FS.filesystems.NODEFS, { root: "." }, mountDir);
pyodide.runPython("import os; print(os.listdir('/mnt'))");
// ==> The list of files in the Node working directory
```
# (Experimental) Using native file system in the browser
You can access native file system from the browser using the
[File System Access API](https://developer.mozilla.org/en-US/docs/Web/API/File_System_Access_API).
```{admonition} This is experimental
:class: warning
File System Access API is only supported in Chromium based browsers: Chrome and Edge (as of 2022/08/18).
```
## Mounting a directory
Pyodide provides an API {any}`pyodide.mountNativeFS` which mounts
[`FileSystemDirectoryHandle`](https://developer.mozilla.org/en-US/docs/Web/API/FileSystemDirectoryHandle)
into Pyodide Python file system.
```js
const dirHandle = await showDirectoryPicker();
if ((await dirHandle.queryPermission({ mode: "readwrite" })) !== "granted") {
if (
(await dirHandle.requestPermission({ mode: "readwrite" })) !== "granted"
) {
throw Error("Unable to read and write directory");
}
}
const nativefs = await pyodide.mountNativeFS("/mount_dir", dirHandle);
pyodide.runPython(`
import os
print(os.listdir('/mount_dir'))
`);
```
## Synchronizing changes to native file system
Due to browser limitations, the changes in the mounted file system
is not synchronized by default. In order to persist any operations
to an native file system, you must call
```js
// nativefs is the returned from: await pyodide.mountNativeFS('/mount_dir', dirHandle)
pyodide.runPython(`
with open('/mount_dir/new_file.txt', 'w') as f:
f.write("hello");
`);
// new_file.txt does not exist in native file system
await nativefs.syncfs();
// new_file.txt will now exist in native file system
```
or
```js
pyodide.FS.syncfs(false, callback_func);
```

View File

@ -362,6 +362,54 @@ export function unpackArchive(
});
}
type NativeFS = {
syncfs: Function;
};
/**
* Mounts FileSystemDirectoryHandle in to the target directory.
*
* @param path The absolute path of the target mount directory.
* If the directory does not exist, it will be created.
* @param fileSystemHandle FileSystemDirectoryHandle returned by
* navigator.storage.getDirectory() or window.showDirectoryPicker().
*/
export async function mountNativeFS(
path: string,
fileSystemHandle: {
isSameEntry: Function;
queryPermission: Function;
requestPermission: Function;
},
// TODO: support sync file system
// sync: boolean = false
): Promise<NativeFS> {
if (fileSystemHandle.constructor.name !== "FileSystemDirectoryHandle") {
throw new TypeError(
`Expected argument 'fileSystemHandle' to be a FileSystemDirectoryHandle`,
);
}
if (Module.FS.findObject(path) == null) {
Module.FS.mkdirTree(path);
}
Module.FS.mount(
Module.FS.filesystems.NATIVEFS_ASYNC,
{ fileSystemHandle: fileSystemHandle },
path,
);
// sync native ==> browser
await new Promise((resolve, _) => Module.FS.syncfs(true, resolve));
return {
// sync browser ==> native
syncfs: async () =>
new Promise((resolve, _) => Module.FS.syncfs(false, resolve)),
};
}
/**
* @private
*/
@ -431,6 +479,7 @@ export type PyodideInterface = {
toPy: typeof toPy;
pyimport: typeof pyimport;
unpackArchive: typeof unpackArchive;
mountNativeFS: typeof mountNativeFS;
registerComlink: typeof registerComlink;
PythonError: typeof PythonError;
PyBuffer: typeof PyBuffer;
@ -494,6 +543,7 @@ API.makePublicAPI = function (): PyodideInterface {
toPy,
pyimport,
unpackArchive,
mountNativeFS,
registerComlink,
PythonError,
PyBuffer,

View File

@ -9,6 +9,7 @@ export interface Module {
printErr: (a: string) => void;
ENV: { [key: string]: string };
FS: any;
PATH: any;
}
/**

255
src/js/nativefs.ts Normal file
View File

@ -0,0 +1,255 @@
import { Module } from "./module";
export function initializeNativeFS(module: Module) {
const FS = module.FS;
const MEMFS = module.FS.filesystems.MEMFS;
const PATH = module.PATH;
const nativeFSAsync = {
// DIR_MODE: {{{ cDefine('S_IFDIR') }}} | 511 /* 0777 */,
// FILE_MODE: {{{ cDefine('S_IFREG') }}} | 511 /* 0777 */,
DIR_MODE: 16384 | 511,
FILE_MODE: 32768 | 511,
mount: function (mount: any) {
if (!mount.opts.fileSystemHandle) {
throw new Error("opts.fileSystemHandle is required");
}
// reuse all of the core MEMFS functionality
return MEMFS.mount.apply(null, arguments);
},
syncfs: async (mount: any, populate: Boolean, callback: Function) => {
try {
const local = nativeFSAsync.getLocalSet(mount);
const remote = await nativeFSAsync.getRemoteSet(mount);
const src = populate ? remote : local;
const dst = populate ? local : remote;
await nativeFSAsync.reconcile(mount, src, dst);
callback(null);
} catch (e) {
callback(e);
}
},
// Returns file set of emscripten's filesystem at the mountpoint.
getLocalSet: (mount: any) => {
let entries = Object.create(null);
function isRealDir(p: string) {
return p !== "." && p !== "..";
}
function toAbsolute(root: string) {
return (p: string) => {
return PATH.join2(root, p);
};
}
let check = FS.readdir(mount.mountpoint)
.filter(isRealDir)
.map(toAbsolute(mount.mountpoint));
while (check.length) {
let path = check.pop();
let stat = FS.stat(path);
if (FS.isDir(stat.mode)) {
check.push.apply(
check,
FS.readdir(path).filter(isRealDir).map(toAbsolute(path)),
);
}
entries[path] = { timestamp: stat.mtime, mode: stat.mode };
}
return { type: "local", entries: entries };
},
// Returns file set of the real, on-disk filesystem at the mountpoint.
getRemoteSet: async (mount: any) => {
// TODO: this should be a map.
const entries = Object.create(null);
const handles = await getFsHandles(mount.opts.fileSystemHandle);
for (const [path, handle] of handles) {
if (path === ".") continue;
entries[PATH.join2(mount.mountpoint, path)] = {
timestamp:
handle.kind === "file"
? (await handle.getFile()).lastModifiedDate
: new Date(),
mode:
handle.kind === "file"
? nativeFSAsync.FILE_MODE
: nativeFSAsync.DIR_MODE,
};
}
return { type: "remote", entries, handles };
},
loadLocalEntry: (path: string) => {
const lookup = FS.lookupPath(path);
const node = lookup.node;
const stat = FS.stat(path);
if (FS.isDir(stat.mode)) {
return { timestamp: stat.mtime, mode: stat.mode };
} else if (FS.isFile(stat.mode)) {
node.contents = MEMFS.getFileDataAsTypedArray(node);
return {
timestamp: stat.mtime,
mode: stat.mode,
contents: node.contents,
};
} else {
throw new Error("node type not supported");
}
},
storeLocalEntry: (path: string, entry: any) => {
if (FS.isDir(entry["mode"])) {
FS.mkdirTree(path, entry["mode"]);
} else if (FS.isFile(entry["mode"])) {
FS.writeFile(path, entry["contents"], { canOwn: true });
} else {
throw new Error("node type not supported");
}
FS.chmod(path, entry["mode"]);
FS.utime(path, entry["timestamp"], entry["timestamp"]);
},
removeLocalEntry: (path: string) => {
var stat = FS.stat(path);
if (FS.isDir(stat.mode)) {
FS.rmdir(path);
} else if (FS.isFile(stat.mode)) {
FS.unlink(path);
}
},
loadRemoteEntry: async (handle: any) => {
if (handle.kind === "file") {
const file = await handle.getFile();
return {
contents: new Uint8Array(await file.arrayBuffer()),
mode: nativeFSAsync.FILE_MODE,
timestamp: file.lastModifiedDate,
};
} else if (handle.kind === "directory") {
return {
mode: nativeFSAsync.DIR_MODE,
timestamp: new Date(),
};
} else {
throw new Error("unknown kind: " + handle.kind);
}
},
storeRemoteEntry: async (handles: any, path: string, entry: any) => {
const parentDirHandle = handles.get(PATH.dirname(path));
const handle = FS.isFile(entry.mode)
? await parentDirHandle.getFileHandle(PATH.basename(path), {
create: true,
})
: await parentDirHandle.getDirectoryHandle(PATH.basename(path), {
create: true,
});
if (handle.kind === "file") {
const writable = await handle.createWritable();
await writable.write(entry.contents);
await writable.close();
}
handles.set(path, handle);
},
removeRemoteEntry: async (handles: any, path: string) => {
const parentDirHandle = handles.get(PATH.dirname(path));
await parentDirHandle.removeEntry(PATH.basename(path));
handles.delete(path);
},
reconcile: async (mount: any, src: any, dst: any) => {
let total = 0;
const create: Array<string> = [];
Object.keys(src.entries).forEach(function (key) {
const e = src.entries[key];
const e2 = dst.entries[key];
if (
!e2 ||
(FS.isFile(e.mode) &&
e["timestamp"].getTime() > e2["timestamp"].getTime())
) {
create.push(key);
total++;
}
});
// sort paths in ascending order so directory entries are created
// before the files inside them
create.sort();
const remove: Array<string> = [];
Object.keys(dst.entries).forEach(function (key) {
if (!src.entries[key]) {
remove.push(key);
total++;
}
});
// sort paths in descending order so files are deleted before their
// parent directories
remove.sort().reverse();
if (!total) {
return;
}
const handles = src.type === "remote" ? src.handles : dst.handles;
for (const path of create) {
const relPath = PATH.normalize(
path.replace(mount.mountpoint, "/"),
).substring(1);
if (dst.type === "local") {
const handle = handles.get(relPath);
const entry = await nativeFSAsync.loadRemoteEntry(handle);
nativeFSAsync.storeLocalEntry(path, entry);
} else {
const entry = nativeFSAsync.loadLocalEntry(path);
await nativeFSAsync.storeRemoteEntry(handles, relPath, entry);
}
}
for (const path of remove) {
if (dst.type === "local") {
nativeFSAsync.removeLocalEntry(path);
} else {
const relPath = PATH.normalize(
path.replace(mount.mountpoint, "/"),
).substring(1);
await nativeFSAsync.removeRemoteEntry(handles, relPath);
}
}
},
};
module.FS.filesystems.NATIVEFS_ASYNC = nativeFSAsync;
}
const getFsHandles = async (dirHandle: any) => {
const handles: any = [];
async function collect(curDirHandle: any) {
for await (const entry of curDirHandle.values()) {
handles.push(entry);
if (entry.kind === "directory") {
await collect(entry);
}
}
}
await collect(dirHandle);
const result = new Map();
result.set(".", dirHandle);
for (const handle of handles) {
const relativePath = (await dirHandle.resolve(handle)).join("/");
result.set(relativePath, handle);
}
return result;
};

View File

@ -11,6 +11,7 @@ import {
} from "./compat";
import { createModule, setStandardStreams, setHomeDirectory } from "./module";
import { initializeNativeFS } from "./nativefs";
import version from "./version";
import type { PyodideInterface } from "./api.js";
@ -344,6 +345,8 @@ If you updated the Pyodide version, make sure you also updated the 'indexURL' pa
);
}
initializeNativeFS(Module);
// Disable further loading of Emscripten file_packager stuff.
Module.locateFile = (path: string) => {
throw new Error("Didn't expect to load any more file_packager files!");

View File

@ -105,3 +105,151 @@ def test_idbfs_persist_code(selenium_standalone):
)
# remove file
selenium.run_js(f"""pyodide.FS.unlink("{mount_dir}/test_idbfs/__init__.py")""")
@pytest.mark.xfail_browsers(
node="Not available", firefox="Not available", safari="Not available"
)
def test_nativefs_dir(request, selenium_standalone):
# Note: Using *real* native file system requires
# user interaction so it is not available in headless mode.
# So in this test we use OPFS (Origin Private File System)
# which is part of File System Access API but uses indexDB as a backend.
if request.config.option.runner == "playwright":
pytest.xfail("Playwright doesn't support file system access APIs")
selenium = selenium_standalone
selenium.run_js(
"""
root = await navigator.storage.getDirectory();
dirHandleMount = await root.getDirectoryHandle('testdir', { create: true });
testFileHandle = await dirHandleMount.getFileHandle('test_read', { create: true });
writable = await testFileHandle.createWritable();
await writable.write("hello_read");
await writable.close();
fs = await pyodide.mountNativeFS("/mnt/nativefs", dirHandleMount);
"""
)
# Read
selenium.run(
"""
import os
import pathlib
assert len(os.listdir("/mnt/nativefs")) == 1, str(os.listdir("/mnt/nativefs"))
assert os.listdir("/mnt/nativefs") == ["test_read"], str(os.listdir("/mnt/nativefs"))
pathlib.Path("/mnt/nativefs/test_read").read_text() == "hello_read"
"""
)
# Write / Delete / Rename
selenium.run(
"""
import os
import pathlib
pathlib.Path("/mnt/nativefs/test_write").write_text("hello_write")
pathlib.Path("/mnt/nativefs/test_write").read_text() == "hello_write"
pathlib.Path("/mnt/nativefs/test_delete").write_text("This file will be deleted")
pathlib.Path("/mnt/nativefs/test_rename").write_text("This file will be renamed")
"""
)
entries = selenium.run_js(
"""
await fs.syncfs();
entries = {};
for await (const [key, value] of dirHandleMount.entries()) {
entries[key] = value;
}
return entries;
"""
)
assert "test_read" in entries
assert "test_write" in entries
assert "test_delete" in entries
assert "test_rename" in entries
selenium.run(
"""
import os
os.remove("/mnt/nativefs/test_delete")
os.rename("/mnt/nativefs/test_rename", "/mnt/nativefs/test_rename_renamed")
"""
)
entries = selenium.run_js(
"""
await fs.syncfs();
entries = {};
for await (const [key, value] of dirHandleMount.entries()) {
entries[key] = value;
}
return entries;
"""
)
assert "test_delete" not in entries
assert "test_rename" not in entries
assert "test_rename_renamed" in entries
# unmount
files = selenium.run(
"""
import os
os.listdir("/mnt/nativefs")
"""
)
assert "test_read" in entries
assert "test_write" in entries
assert "test_rename_renamed" in entries
selenium.run_js(
"""
await fs.syncfs();
pyodide.FS.unmount("/mnt/nativefs");
"""
)
files = selenium.run(
"""
import os
os.listdir("/mnt/nativefs")
"""
)
assert not len(files)
# Mount again
selenium.run_js(
"""
fs2 = await pyodide.mountNativeFS("/mnt/nativefs", dirHandleMount);
"""
)
# Read again
selenium.run(
"""
import os
import pathlib
assert len(os.listdir("/mnt/nativefs")) == 3, str(os.listdir("/mnt/nativefs"))
pathlib.Path("/mnt/nativefs/test_read").read_text() == "hello_read"
"""
)
selenium.run_js(
"""
await fs2.syncfs();
pyodide.FS.unmount("/mnt/nativefs");
"""
)