ENH Add fetch API (#1865)

2021-10-12 09:29:14 -07:00 · 2021-10-12 09:29:14 -07:00 · 0e2417c042
parent 579777dbb7
commit 0e2417c042
6 changed files with 189 additions and 55 deletions
--- a/docs/conf.py
+++ b/docs/conf.py
@ -29,8 +29,9 @@ import micropip  # noqa

 # We hacked it so that autodoc will look for submodules, but only if we import
 # them here. TODO: look these up in the source directory?
-import pyodide.webloop
 import pyodide.console
+import pyodide.http
+import pyodide.webloop

 # The full version, including alpha/beta/rc tags.
 release = version = pyodide.__version__
--- a/docs/project/changelog.md
+++ b/docs/project/changelog.md
@ -19,6 +19,12 @@ substitutions:
  error, it will return an empty list instead of raising a `SyntaxError`.
  {pr}`1819`

+- {{Enhancement}} Added a {ref}`pyodide.http.pyfetch` API which provides a
+  convenience wrapper for the Javascript `fetch` API. The API returns a response
+  object with various methods that convert the data into various types while
+  minimizing the number of times the data is copied.
+  {pr}`1865`
+
 ### JavaScript package

 - {{Fix}} {any}`loadPyodide <globalThis.loadPyodide>` no longer fails in the
--- a/packages/micropip/src/micropip/micropip.py
+++ b/packages/micropip/src/micropip/micropip.py
@ -44,23 +44,23 @@ else:


 if IN_BROWSER:
-    from js import fetch
+    from pyodide.http import pyfetch
+
+    async def fetch_bytes(url: str, **kwargs) -> bytes:
+        return await (await pyfetch(url, **kwargs)).bytes()
+
+    async def fetch_string(url: str, **kwargs) -> str:
+        return await (await pyfetch(url, **kwargs)).string()
+
+
 else:
    from urllib.request import urlopen, Request

-    async def fetch(url, headers={}):
-        fd = urlopen(Request(url, headers=headers))
-        fd.statusText = fd.reason
+    async def fetch_bytes(url: str, **kwargs) -> bytes:
+        return urlopen(Request(url, headers=kwargs)).read()

-        async def arrayBuffer():
-            class Temp:
-                def to_py():
-                    return fd.read()
-
-            return Temp
-
-        fd.arrayBuffer = arrayBuffer
-        return fd
+    async def fetch_string(url: str, **kwargs) -> str:
+        return (await fetch_bytes(url, **kwargs)).decode()


 if IN_BROWSER:
@ -77,19 +77,9 @@ else:
        return result


-async def _get_url(url):
-    resp = await fetch(url)
-    if resp.status >= 400:
-        raise OSError(
-            f"Request for {url} failed with status {resp.status}: {resp.statusText}"
-        )
-    return io.BytesIO((await resp.arrayBuffer()).to_py())
-
-
 async def _get_pypi_json(pkgname):
    url = f"https://pypi.org/pypi/{pkgname}/json"
-    fd = await _get_url(url)
-    return json.load(fd)
+    return json.loads(await fetch_string(url))


 def _is_pure_python_wheel(filename: str):
@ -251,8 +241,7 @@ class _PackageManager:

    async def add_wheel(self, name, wheel, version, extras, ctx, transaction):
        transaction["locked"][name] = version
-        response = await fetch(wheel["url"])
-        wheel_bytes = (await response.arrayBuffer()).to_py()
+        wheel_bytes = await fetch_bytes(wheel["url"])
        wheel["wheel_bytes"] = wheel_bytes

        with ZipFile(io.BytesIO(wheel_bytes)) as zip_file:  # type: ignore
--- a/src/py/pyodide/init.py
+++ b/src/py/pyodide/init.py
@ -28,7 +28,7 @@ from _pyodide._base import (
    CodeRunner,
    should_quiet,
 )
-from ._util import open_url
+from .http import open_url
 from . import _state  # noqa

 from _pyodide._importhook import register_js_module, unregister_js_module
--- a/src/py/pyodide/_util.py
+++ b/src/py/pyodide/_util.py
@ -1,27 +0,0 @@
-from io import StringIO
-
-try:
-    from js import XMLHttpRequest
-except ImportError:
-    pass
-
-
-def open_url(url: str) -> StringIO:
-    """
-    Fetches a given URL
-
-    Parameters
-    ----------
-    url : str
-       URL to fetch
-
-    Returns
-    -------
-    io.StringIO
-        the contents of the URL.
-    """
-
-    req = XMLHttpRequest.new()
-    req.open("GET", url, False)
-    req.send(None)
-    return StringIO(req.response)
--- a/src/py/pyodide/http.py
+++ b/src/py/pyodide/http.py
@ -0,0 +1,165 @@
+from io import StringIO
+from ._core import JsProxy, to_js
+from typing import Any
+import json
+
+try:
+    from js import XMLHttpRequest
+except ImportError:
+    pass
+
+from ._core import IN_BROWSER
+
+if IN_BROWSER:
+    from js import fetch as _jsfetch, Object
+
+
+def open_url(url: str) -> StringIO:
+    """Fetches a given URL synchronously.
+
+    The download of binary files is not supported. To download binary
+     files use :func:`pyodide.utils.fetch` which is asynchronous.
+
+    Parameters
+    ----------
+    url : str
+       URL to fetch
+
+    Returns
+    -------
+    io.StringIO
+        the contents of the URL.
+    """
+
+    req = XMLHttpRequest.new()
+    req.open("GET", url, False)
+    req.send(None)
+    return StringIO(req.response)
+
+
+class FetchResponse:
+    """A wrapper for a Javascript fetch response.
+
+    See also the Javascript fetch
+    `Response <https://developer.mozilla.org/en-US/docs/Web/API/Response>`_ api
+    docs.
+
+    Parameters
+    ----------
+    url
+        URL to fetch
+    js_response
+        A JsProxy of the fetch response
+    """
+
+    def __init__(self, url: str, js_response: JsProxy):
+        self._url = url
+        self.js_response = js_response
+
+    @property
+    def body_used(self) -> bool:
+        """Has the response been used yet?
+
+        (If so, attempting to retreive the body again will raise an OSError.)
+        """
+        return self.js_response.bodyUsed
+
+    @property
+    def ok(self) -> bool:
+        """Was the request successful?"""
+        return self.js_response.ok
+
+    @property
+    def redirected(self) -> bool:
+        """Was the request redirected?"""
+        return self.js_response.redirected
+
+    @property
+    def status(self) -> str:
+        """Response status code"""
+        return self.js_response.status
+
+    @property
+    def status_text(self) -> str:
+        """Response status text"""
+        return self.js_response.statusText
+
+    @property
+    def type(self) -> str:
+        """The `type <https://developer.mozilla.org/en-US/docs/Web/API/Response/type>`_ of the response."""
+        return self.js_response.type
+
+    @property
+    def url(self) -> str:
+        """The `url <https://developer.mozilla.org/en-US/docs/Web/API/Response/url>`_ of the response.
+
+        It may be different than the url passed to fetch.
+        """
+        return self.js_response.url
+
+    def _raise_if_failed(self):
+        if self.js_response.status >= 400:
+            raise OSError(
+                f"Request for {self._url} failed with status {self.status}: {self.status_text}"
+            )
+        if self.js_response.bodyUsed:
+            raise OSError("Response body is already used")
+
+    def clone(self) -> "FetchResponse":
+        """Return an identical copy of the FetchResponse.
+
+        This method exists to allow multiple uses of response objects. See
+        `Response.clone <https://developer.mozilla.org/en-US/docs/Web/API/Response/clone>`_
+        """
+        if self.js_response.bodyUsed:
+            raise OSError("Response body is already used")
+        return FetchResponse(self._url, self.js_response.clone())
+
+    async def buffer(self) -> JsProxy:
+        """Return the response body as a Javascript ArrayBuffer"""
+        self._raise_if_failed()
+        return await self.js_response.arrayBuffer()
+
+    async def string(self) -> str:
+        """Return the response body as a string"""
+        self._raise_if_failed()
+        return await self.js_response.text()
+
+    async def json(self, **kwargs) -> Any:
+        """Return the response body as a Javascript JSON object.
+
+        Any keyword arguments are passed to `json.loads
+        <https://docs.python.org/3.8/library/json.html#json.loads>`_.
+        """
+        self._raise_if_failed()
+        return json.loads(await self.string(), **kwargs)
+
+    async def memoryview(self) -> memoryview:
+        """Return the response body as a memoryview object"""
+        self._raise_if_failed()
+        return (await self.buffer()).to_memoryview()
+
+    async def bytes(self) -> bytes:
+        """Return the response body as a bytes object"""
+        self._raise_if_failed()
+        return (await self.buffer()).to_bytes()
+
+
+async def pyfetch(url, **kwargs) -> FetchResponse:
+    """Fetch the url and return the response.
+
+    This functions provides a similar API to the JavaScript `fetch function
+    <https://developer.mozilla.org/en-US/docs/Web/API/fetch>`_ however it is
+    designed to be convenient to use from Python. The
+    :class:`pyodide.utils.FetchResponse` has methods with the output types
+    already converted to Python objects.
+
+    Parameters
+    ----------
+    url URL to fetch. **kwargs Any keyword arguments are passed along as
+        `optional parameters to the fetch API
+        <https://developer.mozilla.org/en-US/docs/Web/API/fetch#parameters>`_.
+    """
+    return FetchResponse(
+        url, await _jsfetch(url, to_js(kwargs, dict_converter=Object.fromEntries))
+    )