Merge pull request #125 from rth/load_package_custom_url

Fix: Loading packages from custom URLs (part 1)
2018-08-29 11:05:13 -04:00 · 2018-08-29 11:05:13 -04:00 · 79c97f8d51
parent b9f3eec925 2aa98eb6c5
commit 79c97f8d51
8 changed files with 157 additions and 24 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -111,6 +111,9 @@ jobs:
            export PATH=$PWD/firefox:$PATH
            pytest test -v -k chrome

+      - store_artifacts:
+          path: /home/circleci/repo/build/
+
  deploy:
    machine:
      enabled: true
--- a/docs/new_packages.md
+++ b/docs/new_packages.md
@ -50,7 +50,8 @@ The supported keys in the `meta.yaml` file are described below.
 The name of the package. It must match the name of the package used when
 expanding the tarball, which is sometimes different from the name of the package
 in the Python namespace when installed. It must also match the name of the
-directory in which the `meta.yaml` file is placed.
+directory in which the `meta.yaml` file is placed. It can only contain
+alpha-numeric characters and `-`, `_`.

 #### `package/version`

--- a/docs/using_pyodide_from_iodide.md
+++ b/docs/using_pyodide_from_iodide.md
@ -51,7 +51,15 @@ Pyodide. To use other libraries, you'll need to load their package using
 from a Javascript cell. This downloads the file data over the network (as a
 `.data` and `.js` index file) and installs the files in the virtual filesystem.

-When you request a package, all of that package's dependencies are also loaded.
+Packages can be loaded by name, for those included in the official pyodide
+repository (e.g. `pyodide.loadPackage('numpy')`). It is also possible to load
+packages from custom URLs (e.g.
+`pyodide.loadPackage('https://foo/bar/numpy.js')`), in which case the URL must
+end with `<package-name>.js`.
+
+When you request a package from the official repository, all of that package's
+dependencies are also loaded. Dependency resolution is not yet implemented
+when loading packages from custom URLs.

 `pyodide.loadPackage` returns a `Promise`.

--- a/docs/using_pyodide_from_javascript.md
+++ b/docs/using_pyodide_from_javascript.md
@ -39,7 +39,15 @@ Pyodide. To use other libraries, you'll need to load their package using
 `pyodide.loadPackage`. This downloads the file data over the network (as a
 `.data` and `.js` index file) and installs the files in the virtual filesystem.

-When you request a package, all of that package's dependencies are also loaded.
+Packages can be loaded by name, for those included in the official pyodide
+repository (e.g. `pyodide.loadPackage('numpy')`). It is also possible to load
+packages from custom URLs (e.g.
+`pyodide.loadPackage('https://foo/bar/numpy.js')`), in which case the URL must
+end with `<package-name>.js`.
+
+When you request a package from the official repository, all of that package's
+dependencies are also loaded. Dependency resolution is not yet implemented
+when loading packages from custom URLs.

 `pyodide.loadPackage` returns a `Promise`.

--- a/src/pyodide.js
+++ b/src/pyodide.js
@ -2,6 +2,12 @@
 * The main bootstrap script for loading pyodide.
 */

+// Regexp for validating package name and URI
+var package_name_regexp = '[a-z0-9_][a-z0-9_\-]*'
+var package_uri_regexp =
+    new RegExp('^https?://.*?(' + package_name_regexp + ').js$', 'i');
+var package_name_regexp = new RegExp('^' + package_name_regexp + '$', 'i');
+
 var languagePluginLoader = new Promise((resolve, reject) => {
  // This is filled in by the Makefile to be either a local file or the
  // deployed location. TODO: This should be done in a less hacky
@ -11,20 +17,52 @@ var languagePluginLoader = new Promise((resolve, reject) => {
  ////////////////////////////////////////////////////////////
  // Package loading
  var packages = undefined;
-  let loadedPackages = new Set();
+  let loadedPackages = new Array();
+
+  let _uri_to_package_name = (package_uri) => {
+    // Generate a unique package name from URI
+
+    if (package_name_regexp.test(package_uri)) {
+      return package_uri;
+    } else if (package_uri_regexp.test(package_uri)) {
+      let match = package_uri_regexp.exec(package_uri);
+      // Get the regexp group corresponding to the package name
+      return match[1];
+    } else {
+      return null;
+    }
+  };

  let loadPackage = (names) => {
    // DFS to find all dependencies of the requested packages
    let packages = window.pyodide.packages.dependencies;
    let queue = new Array(names);
-    let toLoad = new Set();
+    let toLoad = new Array();
    while (queue.length) {
-      const package = queue.pop();
-      if (!loadedPackages.has(package)) {
-        toLoad.add(package);
+      let package_uri = queue.pop();
+
+      const package = _uri_to_package_name(package_uri);
+
+      if (package == null) {
+        throw new Error(`Invalid package name or URI '${package_uri}'`);
+      } else if (package == package_uri) {
+        package_uri = 'default channel';
+      }
+
+      console.log(`Loading ${package} from ${package_uri}`);
+
+      if (package in loadedPackages) {
+        if (package_uri != loadedPackages[package]) {
+          throw new Error(
+              `URI mismatch, attempting to load package ` +
+              `${package} from ${package_uri} while it is already ` +
+              `loaded from ${loadedPackages[package]}!`);
+        }
+      } else {
+        toLoad[package] = package_uri;
        if (packages.hasOwnProperty(package)) {
          packages[package].forEach((subpackage) => {
-            if (!loadedPackages.has(subpackage) && !toLoad.has(subpackage)) {
+            if (!(subpackage in loadedPackages) && !(subpackage in toLoad)) {
              queue.push(subpackage);
            }
          });
@ -35,25 +73,32 @@ var languagePluginLoader = new Promise((resolve, reject) => {
    }

    let promise = new Promise((resolve, reject) => {
-      if (toLoad.size === 0) {
+      if (Object.keys(toLoad).length === 0) {
        resolve('No new packages to load');
      }

      pyodide.monitorRunDependencies = (n) => {
        if (n === 0) {
-          toLoad.forEach((package) => loadedPackages.add(package));
+          for (let package in toLoad) {
+            loadedPackages[package] = toLoad[package];
+          }
          delete pyodide.monitorRunDependencies;
-          const packageList = Array.from(toLoad.keys()).join(', ');
+          const packageList = Array.from(Object.keys(toLoad)).join(', ');
          resolve(`Loaded ${packageList}`);
        }
      };

-      toLoad.forEach((package) => {
+      for (let package in toLoad) {
        let script = document.createElement('script');
-        script.src = `${baseURL}${package}.js`;
+        let package_uri = toLoad[package];
+        if (package_uri == 'default channel') {
+          script.src = `${baseURL}${package}.js`;
+        } else {
+          script.src = `${package_uri}`;
+        }
        script.onerror = (e) => { reject(e); };
        document.body.appendChild(script);
-      });
+      }

      // We have to invalidate Python's import caches, or it won't
      // see the new files. This is done here so it happens in parallel
--- a/test/conftest.py
+++ b/test/conftest.py
@ -33,9 +33,21 @@ class PackageLoaded:
        return bool(inited)


+def _display_driver_logs(browser, driver):
+    if browser == 'chrome':
+        print('# Selenium browser logs')
+        print(driver.get_log("browser"))
+    elif browser == 'firefox':
+        # browser logs are not available in GeckoDriver
+        # https://github.com/mozilla/geckodriver/issues/284
+        print('Accessing raw browser logs with Selenium is not '
+              'supported by Firefox.')
+
+
 class SeleniumWrapper:
    def __init__(self):
        from selenium.webdriver.support.wait import WebDriverWait
+        from selenium.common.exceptions import TimeoutException

        driver = self.get_driver()
        wait = WebDriverWait(driver, timeout=20)
@ -44,13 +56,21 @@ class SeleniumWrapper:
            raise ValueError(f"{(BUILD_PATH / 'test.html').resolve()} "
                             f"does not exist!")
        driver.get(f'http://127.0.0.1:{PORT}/test.html')
-        wait.until(PyodideInited())
+        try:
+            wait.until(PyodideInited())
+        except TimeoutException as exc:
+            _display_driver_logs(self.browser, driver)
+            raise TimeoutException()
        self.wait = wait
        self.driver = driver

    @property
    def logs(self):
-        return self.driver.execute_script("return window.logs")
+        logs = self.driver.execute_script("return window.logs")
+        return '\n'.join(str(x) for x in logs)
+
+    def clean_logs(self):
+        self.driver.execute_script("window.logs = []")

    def run(self, code):
        return self.run_js(
@ -64,11 +84,18 @@ class SeleniumWrapper:
        return self.driver.execute_script(catch)

    def load_package(self, packages):
+        from selenium.common.exceptions import TimeoutException
+
        self.run_js(
            'window.done = false\n' +
            'pyodide.loadPackage({!r})'.format(packages) +
            '.then(function() { window.done = true; })')
-        self.wait.until(PackageLoaded())
+        try:
+            self.wait.until(PackageLoaded())
+        except TimeoutException as exc:
+            _display_driver_logs(self.browser, self.driver)
+            print(self.logs)
+            raise TimeoutException()

    @property
    def urls(self):
@ -123,7 +150,7 @@ if pytest is not None:
        try:
            yield selenium
        finally:
-            print('\n'.join(str(x) for x in selenium.logs))
+            print(selenium.logs)
            selenium.driver.quit()

    @pytest.fixture(params=['firefox', 'chrome'], scope='module')
@ -144,11 +171,10 @@ if pytest is not None:
    def selenium(_selenium_cached):
        # selenium instance cached at the module level
        try:
-            # clean selenium logs for each test run
-            _selenium_cached.driver.execute_script("window.logs = []")
+            _selenium_cached.clean_logs()
            yield _selenium_cached
        finally:
-            print('\n'.join(str(x) for x in _selenium_cached.logs))
+            print(_selenium_cached.logs)


 PORT = 0
@ -202,5 +228,10 @@ def run_web_server(q):
        httpd.serve_forever()


+@pytest.fixture
+def web_server():
+    return '127.0.0.1', PORT
+
+
 if multiprocessing.current_process().name == 'MainProcess':
    spawn_web_server()
--- a/test/test_package_loading.py
+++ b/test/test_package_loading.py
@ -0,0 +1,36 @@
+import pytest
+from selenium.common.exceptions import WebDriverException
+
+
+def test_load_from_url(selenium_standalone, web_server):
+
+    url, port = web_server
+
+    selenium_standalone.load_package(f"http://{url}:{port}/pyparsing.js")
+    assert "Invalid package name or URI" not in selenium_standalone.logs
+
+    selenium_standalone.run("from pyparsing import Word, alphas")
+    selenium_standalone.run("Word(alphas).parseString('hello')")
+
+    selenium_standalone.load_package(f"http://{url}:{port}/numpy.js")
+    selenium_standalone.run("import numpy as np")
+
+
+def test_uri_mismatch(selenium_standalone):
+    selenium_standalone.load_package('pyparsing')
+    with pytest.raises(WebDriverException,
+                       match="URI mismatch, attempting "
+                             "to load package pyparsing"):
+        selenium_standalone.load_package('http://some_url/pyparsing.js')
+    assert "Invalid package name or URI" not in selenium_standalone.logs
+
+
+def test_invalid_package_name(selenium):
+    with pytest.raises(WebDriverException,
+                       match="Invalid package name or URI"):
+        selenium.load_package('wrong name+$')
+    selenium.clean_logs()
+
+    with pytest.raises(WebDriverException,
+                       match="Invalid package name or URI"):
+        selenium.load_package('tcp://some_url')
--- a/test/test_python.py
+++ b/test/test_python.py
@ -7,7 +7,8 @@ import pytest


 def test_init(selenium_standalone):
-    assert 'Python initialization complete' in selenium_standalone.logs
+    assert ('Python initialization complete'
+            in selenium_standalone.logs.splitlines())
    assert len(selenium_standalone.driver.window_handles) == 1


@ -19,7 +20,7 @@ def test_webbrowser(selenium):

 def test_print(selenium):
    selenium.run("print('This should be logged')")
-    assert 'This should be logged' in selenium.logs
+    assert 'This should be logged' in selenium.logs.splitlines()


 def test_python2js(selenium):