MAINT Refactor benchmark script (#2227)

One can select which benchmark (pystone, numpy, matplotlib, or all) to be run through command-line arguments. We can divide each benchmark to separate CI jobs in the future if needed. Moved pystone benchmark to benchmark directory, preventing it from being included in Pyodide release.
2022-03-03 03:11:36 +09:00 · 2022-03-03 03:11:36 +09:00 · fd8433a013
parent 5b22f5ed50
commit fd8433a013
50 changed files with 139 additions and 112 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -337,7 +337,8 @@ jobs:
      - run:
          name: benchmark
          command: |
-            python benchmark/benchmark.py /usr/local/bin/python3 build/benchmarks.json
+            python benchmark/benchmark.py all --output build/benchmarks.json
+
      - store_artifacts:
          path: /root/repo/build/benchmarks.json

--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,4 +1,4 @@
-exclude: (^.*patches|.*\.cgi$|^packages/micropip/src/micropip/externals|^src/py/lib/pystone.py$)
+exclude: (^.*patches|.*\.cgi$|^packages/micropip/src/micropip/externals|^benchmark/benchmarks$)
 default_language_version:
  python: "3.9"
 repos:
--- a/2
+++ b/2
@ -149,7 +149,7 @@ lint:
 	pre-commit run -a --show-diff-on-failure

 benchmark: all
-	$(HOSTPYTHON) benchmark/benchmark.py $(HOSTPYTHON) build/benchmarks.json
+	$(HOSTPYTHON) benchmark/benchmark.py all --output build/benchmarks.json
 	$(HOSTPYTHON) benchmark/plot_benchmark.py build/benchmarks.json build/benchmarks.png


--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@ -1,3 +1,4 @@
+import argparse
 import json
 import re
 import subprocess
@ -5,12 +6,11 @@ import sys
 from pathlib import Path
 from time import time

-sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "test"))
 sys.path.insert(0, str(Path(__file__).resolve().parents[1]))

 import conftest  # noqa: E402

-SKIP = {"fft", "hyantes", "README"}
+SKIP = {"fft", "hyantes"}


 def print_entry(name, res):
@ -21,12 +21,13 @@ def print_entry(name, res):
    print("")


-def run_native(hostpython, code):
+def run_native(code):
    if "# non-native" in code:
        return float("NaN")
+
    root = Path(__file__).resolve().parents[1]
    output = subprocess.check_output(
-        [hostpython.resolve(), "-c", code],
+        [sys.executable, "-c", code],
        cwd=Path(__file__).resolve().parent,
        env={
            "PYTHONPATH": str(root / "src/py/lib")
@ -45,8 +46,7 @@ def run_wasm(code, selenium, interrupt_buffer):
            pyodide.setInterruptBuffer(interrupt_buffer)
            """
        )
-    if "matplotlib" in code:
-        selenium.load_package("matplotlib")
+
    selenium.run(code)
    try:
        runtime = float(selenium.logs.split("\n")[-1])
@ -56,12 +56,11 @@ def run_wasm(code, selenium, interrupt_buffer):
    return runtime


-def run_all(hostpython, selenium_backends, code):
-    a = run_native(hostpython, code)
-    result = {"native": a}
+def run_all(selenium_backends, code):
+    result = {"native": run_native(code)}
+
    for browser_name, selenium in selenium_backends.items():
        for interrupt_buffer in [False, True]:
-            print(f"Running with: {browser_name} {interrupt_buffer}")
            dt = run_wasm(code, selenium, interrupt_buffer)
            if interrupt_buffer:
                browser_name += "(w/ ib)"
@ -69,11 +68,7 @@ def run_all(hostpython, selenium_backends, code):
    return result


-def get_pystone_benchmarks():
-    yield "pystone", ("import pystone\n" "pystone.main(pystone.LOOPS)\n")
-
-
-def parse_numpy_benchmark(filename):
+def parse_benchmark(filename):
    lines = []
    with open(filename) as fp:
        for line in fp:
@ -84,102 +79,147 @@ def parse_numpy_benchmark(filename):
    return "".join(lines)


-def get_numpy_benchmarks():
-    root = Path(__file__).resolve().parent / "benchmarks"
+def get_benchmark_scripts(scripts_dir, repeat=11, number=5):
+    root = Path(__file__).resolve().parent / scripts_dir
    for filename in sorted(root.iterdir()):
        name = filename.stem
+
        if name in SKIP:
            continue
-        if "canvas" not in str(filename) and "wasm" not in str(filename):
-            content = parse_numpy_benchmark(filename)
-            content += (
-                "import numpy as np\n"
-                "_ = np.empty(())\n"
-                "setup = setup + '\\nfrom __main__ import {}'\n"
-                "from timeit import Timer\n"
-                "t = Timer(run, setup)\n"
-                "r = t.repeat(11, 40)\n"
-                "r.remove(min(r))\n"
-                "r.remove(max(r))\n"
-                "print(np.mean(r))\n".format(name)
-            )
-            yield name, content
+
+        content = parse_benchmark(filename)
+        content += (
+            "import numpy as np\n"
+            "_ = np.empty(())\n"
+            f"setup = setup + '\\nfrom __main__ import {name}'\n"
+            "from timeit import Timer\n"
+            "t = Timer(run, setup)\n"
+            f"r = t.repeat({repeat}, {number})\n"
+            "r.remove(min(r))\n"
+            "r.remove(max(r))\n"
+            "print(np.mean(r))\n"
+        )
+
+        yield name, content
+
+
+def get_pystone_benchmarks():
+    return get_benchmark_scripts("benchmarks/pystone_benchmarks", repeat=5, number=1)
+
+
+def get_numpy_benchmarks():
+    return get_benchmark_scripts("benchmarks/numpy_benchmarks")


 def get_matplotlib_benchmarks():
-    root = Path(__file__).resolve().parent / "benchmarks"
-    for filename in sorted(root.iterdir()):
-        name = filename.stem
-        if name in SKIP:
-            continue
-        if "canvas" in str(filename) or "wasm" in str(filename):
-            content = parse_numpy_benchmark(filename)
-            content += (
-                "import numpy as np\n"
-                "_ = np.empty(())\n"
-                "setup = setup + '\\nfrom __main__ import {}'\n"
-                "from timeit import Timer\n"
-                "t = Timer(run, setup)\n"
-                "r = t.repeat(11, 20)\n"
-                "r.remove(min(r))\n"
-                "r.remove(max(r))\n"
-                "print(np.mean(r))\n".format(name)
-            )
-            yield name, content
+    return get_benchmark_scripts("benchmarks/matplotlib_benchmarks")


-def get_benchmarks():
-    yield from get_pystone_benchmarks()
-    yield from get_numpy_benchmarks()
-    yield from get_matplotlib_benchmarks()
+def get_benchmarks(benchmarks, targets=("all",)):
+    if "all" in targets:
+        for benchmark in benchmarks.values():
+            yield from benchmark()
+    else:
+        for target in targets:
+            yield from benchmarks[target]()


-def main(hostpython):
+def parse_args(benchmarks):
+    benchmarks.append("all")
+
+    parser = argparse.ArgumentParser("Run benchmarks on Pyodide's performance")
+    parser.add_argument(
+        "target",
+        choices=benchmarks,
+        nargs="+",
+        help="Benchmarks to run ('all' to run all benchmarks)",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        default="build/benchmarks.json",
+        help="path to the json file where benchmark results will be saved",
+    )
+    parser.add_argument(
+        "--timeout",
+        default=1200,
+        type=int,
+        help="Browser timeout(sec) for each benchmark (default: %(default)s)",
+    )
+
+    return parser.parse_args()
+
+
+def main():
+
+    BENCHMARKS = {
+        "pystone": get_pystone_benchmarks,
+        "numpy": get_numpy_benchmarks,
+        "matplotlib": get_matplotlib_benchmarks,
+    }
+
+    args = parse_args(list(BENCHMARKS.keys()))
+    targets = [t.lower() for t in args.target]
+    output = Path(args.output).resolve()
+    timeout = args.timeout
+
+    results = {}
+    selenium_backends = {}
+    browser_cls = [
+        ("firefox", conftest.FirefoxWrapper),
+        ("chrome", conftest.ChromeWrapper),
+    ]
+
    with conftest.spawn_web_server() as (hostname, port, log_path):
-        results = {}
-        selenium_backends = {}

-        b = {"native": float("NaN")}
-        browser_cls = [
-            ("firefox", conftest.FirefoxWrapper),
-            ("chrome", conftest.ChromeWrapper),
-        ]
-        for name, cls in browser_cls:
-            t0 = time()
-            selenium_backends[name] = cls(port, script_timeout=1200)
-            b[name] = time() - t0
-            # pre-load numpy for the selenium instance used in benchmarks
-            selenium_backends[name].load_package("numpy")
-        results["selenium init"] = b
-        print_entry("selenium init", b)
+        # selenium initialization time
+        result = {"native": float("NaN")}
+        for browser_name, cls in browser_cls:
+            try:
+                t0 = time()
+                selenium = cls(port, script_timeout=timeout)
+                result[browser_name] = time() - t0
+            finally:
+                selenium.driver.quit()

-        # load packages
+        results["selenium init"] = result
+        print_entry("selenium init", result)
+
+        # package loading time
        for package_name in ["numpy"]:
-            b = {"native": float("NaN")}
+            result = {"native": float("NaN")}
            for browser_name, cls in browser_cls:
-                selenium = cls(port, script_timeout=1200)
+                selenium = cls(port, script_timeout=timeout)
                try:
                    t0 = time()
                    selenium.load_package(package_name)
-                    b[browser_name] = time() - t0
+                    result[browser_name] = time() - t0
                finally:
                    selenium.driver.quit()
-            results["load " + package_name] = b
-            print_entry("load " + package_name, b)

-        for name, content in get_benchmarks():
-            for browser_name, cls in browser_cls:
-                selenium_backends[browser_name].driver.quit()
-                selenium_backends[browser_name] = cls(port, script_timeout=1200)
-                selenium_backends[browser_name].load_package("numpy")
-            results[name] = run_all(hostpython, selenium_backends, content)
-            print_entry(name, results[name])
-        for selenium in selenium_backends.values():
-            selenium.driver.quit()
-    return results
+            results[f"load {package_name}"] = result
+            print_entry(f"load {package_name}", result)
+
+        # run benchmarks
+        for benchmark_name, content in get_benchmarks(BENCHMARKS, targets):
+            try:
+                # instantiate browsers for each benchmark to prevent side effects
+                for browser_name, cls in browser_cls:
+                    selenium_backends[browser_name] = cls(port, script_timeout=timeout)
+                    # pre-load numpy and matplotlib for the selenium instance used in benchmarks
+                    selenium_backends[browser_name].load_package(
+                        ["numpy", "matplotlib"]
+                    )
+
+                results[benchmark_name] = run_all(selenium_backends, content)
+                print_entry(benchmark_name, results[benchmark_name])
+            finally:
+                for selenium in selenium_backends.values():
+                    selenium.driver.quit()
+
+    output.parent.mkdir(exist_ok=True, parents=True)
+    output.write_text(json.dumps(results))


 if __name__ == "__main__":
-    results = main(Path(sys.argv[-2]).resolve())
-    with open(sys.argv[-1], "w") as fp:
-        json.dump(results, fp)
+    main()
--- a/benchmark/benchmarks/matplotlib_benchmarks/canvas_custom_font.py
+++ b/benchmark/benchmarks/matplotlib_benchmarks/canvas_custom_font.py
--- a/benchmark/benchmarks/matplotlib_benchmarks/canvas_image.py
+++ b/benchmark/benchmarks/matplotlib_benchmarks/canvas_image.py
--- a/benchmark/benchmarks/matplotlib_benchmarks/canvas_image_affine.py
+++ b/benchmark/benchmarks/matplotlib_benchmarks/canvas_image_affine.py
--- a/benchmark/benchmarks/matplotlib_benchmarks/canvas_rendering.py
+++ b/benchmark/benchmarks/matplotlib_benchmarks/canvas_rendering.py
--- a/benchmark/benchmarks/matplotlib_benchmarks/canvas_text_rotated.py
+++ b/benchmark/benchmarks/matplotlib_benchmarks/canvas_text_rotated.py
--- a/benchmark/benchmarks/matplotlib_benchmarks/wasm_custom_font.py
+++ b/benchmark/benchmarks/matplotlib_benchmarks/wasm_custom_font.py
--- a/benchmark/benchmarks/matplotlib_benchmarks/wasm_image.py
+++ b/benchmark/benchmarks/matplotlib_benchmarks/wasm_image.py
--- a/benchmark/benchmarks/matplotlib_benchmarks/wasm_image_affine.py
+++ b/benchmark/benchmarks/matplotlib_benchmarks/wasm_image_affine.py
--- a/benchmark/benchmarks/matplotlib_benchmarks/wasm_rendering.py
+++ b/benchmark/benchmarks/matplotlib_benchmarks/wasm_rendering.py
--- a/benchmark/benchmarks/matplotlib_benchmarks/wasm_text_rotated.py
+++ b/benchmark/benchmarks/matplotlib_benchmarks/wasm_text_rotated.py
--- a/benchmark/benchmarks/numpy_benchmarks/allpairs_distances.py
+++ b/benchmark/benchmarks/numpy_benchmarks/allpairs_distances.py
--- a/benchmark/benchmarks/numpy_benchmarks/allpairs_distances_loops.py
+++ b/benchmark/benchmarks/numpy_benchmarks/allpairs_distances_loops.py
--- a/benchmark/benchmarks/numpy_benchmarks/arc_distance.py
+++ b/benchmark/benchmarks/numpy_benchmarks/arc_distance.py
--- a/benchmark/benchmarks/numpy_benchmarks/check_mask.py
+++ b/benchmark/benchmarks/numpy_benchmarks/check_mask.py
--- a/benchmark/benchmarks/numpy_benchmarks/create_grid.py
+++ b/benchmark/benchmarks/numpy_benchmarks/create_grid.py
--- a/benchmark/benchmarks/numpy_benchmarks/cronbach.py
+++ b/benchmark/benchmarks/numpy_benchmarks/cronbach.py
--- a/benchmark/benchmarks/numpy_benchmarks/diffusion.py
+++ b/benchmark/benchmarks/numpy_benchmarks/diffusion.py
--- a/benchmark/benchmarks/numpy_benchmarks/evolve.py
+++ b/benchmark/benchmarks/numpy_benchmarks/evolve.py
--- a/benchmark/benchmarks/numpy_benchmarks/fdtd.py
+++ b/benchmark/benchmarks/numpy_benchmarks/fdtd.py
--- a/benchmark/benchmarks/numpy_benchmarks/fft.py
+++ b/benchmark/benchmarks/numpy_benchmarks/fft.py
--- a/benchmark/benchmarks/numpy_benchmarks/grayscott.py
+++ b/benchmark/benchmarks/numpy_benchmarks/grayscott.py
--- a/benchmark/benchmarks/numpy_benchmarks/grouping.py
+++ b/benchmark/benchmarks/numpy_benchmarks/grouping.py
--- a/benchmark/benchmarks/numpy_benchmarks/growcut.py
+++ b/benchmark/benchmarks/numpy_benchmarks/growcut.py
--- a/benchmark/benchmarks/numpy_benchmarks/harris.py
+++ b/benchmark/benchmarks/numpy_benchmarks/harris.py
--- a/benchmark/benchmarks/numpy_benchmarks/hasting.py
+++ b/benchmark/benchmarks/numpy_benchmarks/hasting.py
--- a/benchmark/benchmarks/numpy_benchmarks/hyantes.py
+++ b/benchmark/benchmarks/numpy_benchmarks/hyantes.py
--- a/benchmark/benchmarks/numpy_benchmarks/julia.py
+++ b/benchmark/benchmarks/numpy_benchmarks/julia.py
--- a/benchmark/benchmarks/numpy_benchmarks/l2norm.py
+++ b/benchmark/benchmarks/numpy_benchmarks/l2norm.py
--- a/benchmark/benchmarks/numpy_benchmarks/large_decimal_list.py
+++ b/benchmark/benchmarks/numpy_benchmarks/large_decimal_list.py
--- a/benchmark/benchmarks/numpy_benchmarks/local_maxima.py
+++ b/benchmark/benchmarks/numpy_benchmarks/local_maxima.py
--- a/benchmark/benchmarks/numpy_benchmarks/log_likelihood.py
+++ b/benchmark/benchmarks/numpy_benchmarks/log_likelihood.py
--- a/benchmark/benchmarks/numpy_benchmarks/lstsqr.py
+++ b/benchmark/benchmarks/numpy_benchmarks/lstsqr.py
--- a/benchmark/benchmarks/numpy_benchmarks/mandel.py
+++ b/benchmark/benchmarks/numpy_benchmarks/mandel.py
--- a/benchmark/benchmarks/numpy_benchmarks/multiple_sum.py
+++ b/benchmark/benchmarks/numpy_benchmarks/multiple_sum.py
--- a/benchmark/benchmarks/numpy_benchmarks/pairwise_loop.py
+++ b/benchmark/benchmarks/numpy_benchmarks/pairwise_loop.py
--- a/benchmark/benchmarks/numpy_benchmarks/periodic_dist.py
+++ b/benchmark/benchmarks/numpy_benchmarks/periodic_dist.py
--- a/benchmark/benchmarks/numpy_benchmarks/repeating.py
+++ b/benchmark/benchmarks/numpy_benchmarks/repeating.py
--- a/benchmark/benchmarks/numpy_benchmarks/reverse_cumsum.py
+++ b/benchmark/benchmarks/numpy_benchmarks/reverse_cumsum.py
--- a/benchmark/benchmarks/numpy_benchmarks/rosen.py
+++ b/benchmark/benchmarks/numpy_benchmarks/rosen.py
--- a/benchmark/benchmarks/numpy_benchmarks/slowparts.py
+++ b/benchmark/benchmarks/numpy_benchmarks/slowparts.py
--- a/benchmark/benchmarks/numpy_benchmarks/smoothing.py
+++ b/benchmark/benchmarks/numpy_benchmarks/smoothing.py
--- a/benchmark/benchmarks/numpy_benchmarks/specialconvolve.py
+++ b/benchmark/benchmarks/numpy_benchmarks/specialconvolve.py
--- a/benchmark/benchmarks/numpy_benchmarks/vibr_energy.py
+++ b/benchmark/benchmarks/numpy_benchmarks/vibr_energy.py
--- a/benchmark/benchmarks/numpy_benchmarks/wave.py
+++ b/benchmark/benchmarks/numpy_benchmarks/wave.py
--- a/benchmark/benchmarks/pystone_benchmarks/pystone.py
+++ b/benchmark/benchmarks/pystone_benchmarks/pystone.py
@ -1,6 +1,9 @@
 #! /usr/bin/env python
 # flake8: noqa

+# setup: pass
+# run: pystone()
+
 """
 "PYSTONE" Benchmark Program

@ -279,22 +282,5 @@ def Func3(EnumParIn):
    return FALSE


-if __name__ == "__main__":
-    import sys
-
-    def error(msg):
-        print(msg, end=" ", file=sys.stderr)
-        print("usage: %s [number_of_loops]" % sys.argv[0], file=sys.stderr)
-        sys.exit(100)
-
-    nargs = len(sys.argv) - 1
-    if nargs > 1:
-        error("%d arguments are too many;" % nargs)
-    elif nargs == 1:
-        try:
-            loops = int(sys.argv[1])
-        except ValueError:
-            error("Invalid argument %r;" % sys.argv[1])
-    else:
-        loops = LOOPS
-    main(loops)
+def pystone():
+    main(LOOPS)
--- a/docs/development/testing.md
+++ b/docs/development/testing.md
@ -76,7 +76,7 @@ To run common benchmarks to understand Pyodide's performance, begin by
 installing the same prerequisites as for testing. Then run:

 ```bash
-make benchmark
+PYODIDE_PACKAGES="numpy,matplotlib" make benchmark
 ```

 ## Linting