diff --git a/docs/conf.py b/docs/conf.py index 36369de28..dd195edd7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -29,8 +29,9 @@ import micropip # noqa # We hacked it so that autodoc will look for submodules, but only if we import # them here. TODO: look these up in the source directory? -import pyodide.webloop import pyodide.console +import pyodide.http +import pyodide.webloop # The full version, including alpha/beta/rc tags. release = version = pyodide.__version__ diff --git a/docs/project/changelog.md b/docs/project/changelog.md index 9ce5445e1..cae0b9ef6 100644 --- a/docs/project/changelog.md +++ b/docs/project/changelog.md @@ -19,6 +19,12 @@ substitutions: error, it will return an empty list instead of raising a `SyntaxError`. {pr}`1819` +- {{Enhancement}} Added a {ref}`pyodide.http.pyfetch` API which provides a + convenience wrapper for the Javascript `fetch` API. The API returns a response + object with various methods that convert the data into various types while + minimizing the number of times the data is copied. + {pr}`1865` + ### JavaScript package - {{Fix}} {any}`loadPyodide ` no longer fails in the diff --git a/packages/micropip/src/micropip/micropip.py b/packages/micropip/src/micropip/micropip.py index f66ad7e97..ad89112f6 100644 --- a/packages/micropip/src/micropip/micropip.py +++ b/packages/micropip/src/micropip/micropip.py @@ -44,23 +44,23 @@ else: if IN_BROWSER: - from js import fetch + from pyodide.http import pyfetch + + async def fetch_bytes(url: str, **kwargs) -> bytes: + return await (await pyfetch(url, **kwargs)).bytes() + + async def fetch_string(url: str, **kwargs) -> str: + return await (await pyfetch(url, **kwargs)).string() + + else: from urllib.request import urlopen, Request - async def fetch(url, headers={}): - fd = urlopen(Request(url, headers=headers)) - fd.statusText = fd.reason + async def fetch_bytes(url: str, **kwargs) -> bytes: + return urlopen(Request(url, headers=kwargs)).read() - async def arrayBuffer(): - class Temp: - def to_py(): - return fd.read() - - return Temp - - fd.arrayBuffer = arrayBuffer - return fd + async def fetch_string(url: str, **kwargs) -> str: + return (await fetch_bytes(url, **kwargs)).decode() if IN_BROWSER: @@ -77,19 +77,9 @@ else: return result -async def _get_url(url): - resp = await fetch(url) - if resp.status >= 400: - raise OSError( - f"Request for {url} failed with status {resp.status}: {resp.statusText}" - ) - return io.BytesIO((await resp.arrayBuffer()).to_py()) - - async def _get_pypi_json(pkgname): url = f"https://pypi.org/pypi/{pkgname}/json" - fd = await _get_url(url) - return json.load(fd) + return json.loads(await fetch_string(url)) def _is_pure_python_wheel(filename: str): @@ -251,8 +241,7 @@ class _PackageManager: async def add_wheel(self, name, wheel, version, extras, ctx, transaction): transaction["locked"][name] = version - response = await fetch(wheel["url"]) - wheel_bytes = (await response.arrayBuffer()).to_py() + wheel_bytes = await fetch_bytes(wheel["url"]) wheel["wheel_bytes"] = wheel_bytes with ZipFile(io.BytesIO(wheel_bytes)) as zip_file: # type: ignore diff --git a/src/py/pyodide/__init__.py b/src/py/pyodide/__init__.py index 44e44e384..2961bf41c 100644 --- a/src/py/pyodide/__init__.py +++ b/src/py/pyodide/__init__.py @@ -28,7 +28,7 @@ from _pyodide._base import ( CodeRunner, should_quiet, ) -from ._util import open_url +from .http import open_url from . import _state # noqa from _pyodide._importhook import register_js_module, unregister_js_module diff --git a/src/py/pyodide/_util.py b/src/py/pyodide/_util.py deleted file mode 100644 index 2105160f5..000000000 --- a/src/py/pyodide/_util.py +++ /dev/null @@ -1,27 +0,0 @@ -from io import StringIO - -try: - from js import XMLHttpRequest -except ImportError: - pass - - -def open_url(url: str) -> StringIO: - """ - Fetches a given URL - - Parameters - ---------- - url : str - URL to fetch - - Returns - ------- - io.StringIO - the contents of the URL. - """ - - req = XMLHttpRequest.new() - req.open("GET", url, False) - req.send(None) - return StringIO(req.response) diff --git a/src/py/pyodide/http.py b/src/py/pyodide/http.py new file mode 100644 index 000000000..7fdbc2d45 --- /dev/null +++ b/src/py/pyodide/http.py @@ -0,0 +1,165 @@ +from io import StringIO +from ._core import JsProxy, to_js +from typing import Any +import json + +try: + from js import XMLHttpRequest +except ImportError: + pass + +from ._core import IN_BROWSER + +if IN_BROWSER: + from js import fetch as _jsfetch, Object + + +def open_url(url: str) -> StringIO: + """Fetches a given URL synchronously. + + The download of binary files is not supported. To download binary + files use :func:`pyodide.utils.fetch` which is asynchronous. + + Parameters + ---------- + url : str + URL to fetch + + Returns + ------- + io.StringIO + the contents of the URL. + """ + + req = XMLHttpRequest.new() + req.open("GET", url, False) + req.send(None) + return StringIO(req.response) + + +class FetchResponse: + """A wrapper for a Javascript fetch response. + + See also the Javascript fetch + `Response `_ api + docs. + + Parameters + ---------- + url + URL to fetch + js_response + A JsProxy of the fetch response + """ + + def __init__(self, url: str, js_response: JsProxy): + self._url = url + self.js_response = js_response + + @property + def body_used(self) -> bool: + """Has the response been used yet? + + (If so, attempting to retreive the body again will raise an OSError.) + """ + return self.js_response.bodyUsed + + @property + def ok(self) -> bool: + """Was the request successful?""" + return self.js_response.ok + + @property + def redirected(self) -> bool: + """Was the request redirected?""" + return self.js_response.redirected + + @property + def status(self) -> str: + """Response status code""" + return self.js_response.status + + @property + def status_text(self) -> str: + """Response status text""" + return self.js_response.statusText + + @property + def type(self) -> str: + """The `type `_ of the response.""" + return self.js_response.type + + @property + def url(self) -> str: + """The `url `_ of the response. + + It may be different than the url passed to fetch. + """ + return self.js_response.url + + def _raise_if_failed(self): + if self.js_response.status >= 400: + raise OSError( + f"Request for {self._url} failed with status {self.status}: {self.status_text}" + ) + if self.js_response.bodyUsed: + raise OSError("Response body is already used") + + def clone(self) -> "FetchResponse": + """Return an identical copy of the FetchResponse. + + This method exists to allow multiple uses of response objects. See + `Response.clone `_ + """ + if self.js_response.bodyUsed: + raise OSError("Response body is already used") + return FetchResponse(self._url, self.js_response.clone()) + + async def buffer(self) -> JsProxy: + """Return the response body as a Javascript ArrayBuffer""" + self._raise_if_failed() + return await self.js_response.arrayBuffer() + + async def string(self) -> str: + """Return the response body as a string""" + self._raise_if_failed() + return await self.js_response.text() + + async def json(self, **kwargs) -> Any: + """Return the response body as a Javascript JSON object. + + Any keyword arguments are passed to `json.loads + `_. + """ + self._raise_if_failed() + return json.loads(await self.string(), **kwargs) + + async def memoryview(self) -> memoryview: + """Return the response body as a memoryview object""" + self._raise_if_failed() + return (await self.buffer()).to_memoryview() + + async def bytes(self) -> bytes: + """Return the response body as a bytes object""" + self._raise_if_failed() + return (await self.buffer()).to_bytes() + + +async def pyfetch(url, **kwargs) -> FetchResponse: + """Fetch the url and return the response. + + This functions provides a similar API to the JavaScript `fetch function + `_ however it is + designed to be convenient to use from Python. The + :class:`pyodide.utils.FetchResponse` has methods with the output types + already converted to Python objects. + + Parameters + ---------- + url URL to fetch. **kwargs Any keyword arguments are passed along as + `optional parameters to the fetch API + `_. + """ + return FetchResponse( + url, await _jsfetch(url, to_js(kwargs, dict_converter=Object.fromEntries)) + )