MAINT Refactor benchmark script (#2227)

One can select which benchmark (pystone, numpy, matplotlib, or all) to be run through command-line arguments.
We can divide each benchmark to separate CI jobs in the future if needed.
Moved pystone benchmark to benchmark directory, preventing it from being included in Pyodide release.
This commit is contained in:
Gyeongjae Choi 2022-03-03 03:11:36 +09:00 committed by GitHub
parent 5b22f5ed50
commit fd8433a013
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
50 changed files with 139 additions and 112 deletions

View File

@ -337,7 +337,8 @@ jobs:
- run:
name: benchmark
command: |
python benchmark/benchmark.py /usr/local/bin/python3 build/benchmarks.json
python benchmark/benchmark.py all --output build/benchmarks.json
- store_artifacts:
path: /root/repo/build/benchmarks.json

View File

@ -1,4 +1,4 @@
exclude: (^.*patches|.*\.cgi$|^packages/micropip/src/micropip/externals|^src/py/lib/pystone.py$)
exclude: (^.*patches|.*\.cgi$|^packages/micropip/src/micropip/externals|^benchmark/benchmarks$)
default_language_version:
python: "3.9"
repos:

View File

@ -149,7 +149,7 @@ lint:
pre-commit run -a --show-diff-on-failure
benchmark: all
$(HOSTPYTHON) benchmark/benchmark.py $(HOSTPYTHON) build/benchmarks.json
$(HOSTPYTHON) benchmark/benchmark.py all --output build/benchmarks.json
$(HOSTPYTHON) benchmark/plot_benchmark.py build/benchmarks.json build/benchmarks.png

View File

@ -1,3 +1,4 @@
import argparse
import json
import re
import subprocess
@ -5,12 +6,11 @@ import sys
from pathlib import Path
from time import time
sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "test"))
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
import conftest # noqa: E402
SKIP = {"fft", "hyantes", "README"}
SKIP = {"fft", "hyantes"}
def print_entry(name, res):
@ -21,12 +21,13 @@ def print_entry(name, res):
print("")
def run_native(hostpython, code):
def run_native(code):
if "# non-native" in code:
return float("NaN")
root = Path(__file__).resolve().parents[1]
output = subprocess.check_output(
[hostpython.resolve(), "-c", code],
[sys.executable, "-c", code],
cwd=Path(__file__).resolve().parent,
env={
"PYTHONPATH": str(root / "src/py/lib")
@ -45,8 +46,7 @@ def run_wasm(code, selenium, interrupt_buffer):
pyodide.setInterruptBuffer(interrupt_buffer)
"""
)
if "matplotlib" in code:
selenium.load_package("matplotlib")
selenium.run(code)
try:
runtime = float(selenium.logs.split("\n")[-1])
@ -56,12 +56,11 @@ def run_wasm(code, selenium, interrupt_buffer):
return runtime
def run_all(hostpython, selenium_backends, code):
a = run_native(hostpython, code)
result = {"native": a}
def run_all(selenium_backends, code):
result = {"native": run_native(code)}
for browser_name, selenium in selenium_backends.items():
for interrupt_buffer in [False, True]:
print(f"Running with: {browser_name} {interrupt_buffer}")
dt = run_wasm(code, selenium, interrupt_buffer)
if interrupt_buffer:
browser_name += "(w/ ib)"
@ -69,11 +68,7 @@ def run_all(hostpython, selenium_backends, code):
return result
def get_pystone_benchmarks():
yield "pystone", ("import pystone\n" "pystone.main(pystone.LOOPS)\n")
def parse_numpy_benchmark(filename):
def parse_benchmark(filename):
lines = []
with open(filename) as fp:
for line in fp:
@ -84,102 +79,147 @@ def parse_numpy_benchmark(filename):
return "".join(lines)
def get_numpy_benchmarks():
root = Path(__file__).resolve().parent / "benchmarks"
def get_benchmark_scripts(scripts_dir, repeat=11, number=5):
root = Path(__file__).resolve().parent / scripts_dir
for filename in sorted(root.iterdir()):
name = filename.stem
if name in SKIP:
continue
if "canvas" not in str(filename) and "wasm" not in str(filename):
content = parse_numpy_benchmark(filename)
content += (
"import numpy as np\n"
"_ = np.empty(())\n"
"setup = setup + '\\nfrom __main__ import {}'\n"
"from timeit import Timer\n"
"t = Timer(run, setup)\n"
"r = t.repeat(11, 40)\n"
"r.remove(min(r))\n"
"r.remove(max(r))\n"
"print(np.mean(r))\n".format(name)
)
yield name, content
content = parse_benchmark(filename)
content += (
"import numpy as np\n"
"_ = np.empty(())\n"
f"setup = setup + '\\nfrom __main__ import {name}'\n"
"from timeit import Timer\n"
"t = Timer(run, setup)\n"
f"r = t.repeat({repeat}, {number})\n"
"r.remove(min(r))\n"
"r.remove(max(r))\n"
"print(np.mean(r))\n"
)
yield name, content
def get_pystone_benchmarks():
return get_benchmark_scripts("benchmarks/pystone_benchmarks", repeat=5, number=1)
def get_numpy_benchmarks():
return get_benchmark_scripts("benchmarks/numpy_benchmarks")
def get_matplotlib_benchmarks():
root = Path(__file__).resolve().parent / "benchmarks"
for filename in sorted(root.iterdir()):
name = filename.stem
if name in SKIP:
continue
if "canvas" in str(filename) or "wasm" in str(filename):
content = parse_numpy_benchmark(filename)
content += (
"import numpy as np\n"
"_ = np.empty(())\n"
"setup = setup + '\\nfrom __main__ import {}'\n"
"from timeit import Timer\n"
"t = Timer(run, setup)\n"
"r = t.repeat(11, 20)\n"
"r.remove(min(r))\n"
"r.remove(max(r))\n"
"print(np.mean(r))\n".format(name)
)
yield name, content
return get_benchmark_scripts("benchmarks/matplotlib_benchmarks")
def get_benchmarks():
yield from get_pystone_benchmarks()
yield from get_numpy_benchmarks()
yield from get_matplotlib_benchmarks()
def get_benchmarks(benchmarks, targets=("all",)):
if "all" in targets:
for benchmark in benchmarks.values():
yield from benchmark()
else:
for target in targets:
yield from benchmarks[target]()
def main(hostpython):
def parse_args(benchmarks):
benchmarks.append("all")
parser = argparse.ArgumentParser("Run benchmarks on Pyodide's performance")
parser.add_argument(
"target",
choices=benchmarks,
nargs="+",
help="Benchmarks to run ('all' to run all benchmarks)",
)
parser.add_argument(
"-o",
"--output",
default="build/benchmarks.json",
help="path to the json file where benchmark results will be saved",
)
parser.add_argument(
"--timeout",
default=1200,
type=int,
help="Browser timeout(sec) for each benchmark (default: %(default)s)",
)
return parser.parse_args()
def main():
BENCHMARKS = {
"pystone": get_pystone_benchmarks,
"numpy": get_numpy_benchmarks,
"matplotlib": get_matplotlib_benchmarks,
}
args = parse_args(list(BENCHMARKS.keys()))
targets = [t.lower() for t in args.target]
output = Path(args.output).resolve()
timeout = args.timeout
results = {}
selenium_backends = {}
browser_cls = [
("firefox", conftest.FirefoxWrapper),
("chrome", conftest.ChromeWrapper),
]
with conftest.spawn_web_server() as (hostname, port, log_path):
results = {}
selenium_backends = {}
b = {"native": float("NaN")}
browser_cls = [
("firefox", conftest.FirefoxWrapper),
("chrome", conftest.ChromeWrapper),
]
for name, cls in browser_cls:
t0 = time()
selenium_backends[name] = cls(port, script_timeout=1200)
b[name] = time() - t0
# pre-load numpy for the selenium instance used in benchmarks
selenium_backends[name].load_package("numpy")
results["selenium init"] = b
print_entry("selenium init", b)
# selenium initialization time
result = {"native": float("NaN")}
for browser_name, cls in browser_cls:
try:
t0 = time()
selenium = cls(port, script_timeout=timeout)
result[browser_name] = time() - t0
finally:
selenium.driver.quit()
# load packages
results["selenium init"] = result
print_entry("selenium init", result)
# package loading time
for package_name in ["numpy"]:
b = {"native": float("NaN")}
result = {"native": float("NaN")}
for browser_name, cls in browser_cls:
selenium = cls(port, script_timeout=1200)
selenium = cls(port, script_timeout=timeout)
try:
t0 = time()
selenium.load_package(package_name)
b[browser_name] = time() - t0
result[browser_name] = time() - t0
finally:
selenium.driver.quit()
results["load " + package_name] = b
print_entry("load " + package_name, b)
for name, content in get_benchmarks():
for browser_name, cls in browser_cls:
selenium_backends[browser_name].driver.quit()
selenium_backends[browser_name] = cls(port, script_timeout=1200)
selenium_backends[browser_name].load_package("numpy")
results[name] = run_all(hostpython, selenium_backends, content)
print_entry(name, results[name])
for selenium in selenium_backends.values():
selenium.driver.quit()
return results
results[f"load {package_name}"] = result
print_entry(f"load {package_name}", result)
# run benchmarks
for benchmark_name, content in get_benchmarks(BENCHMARKS, targets):
try:
# instantiate browsers for each benchmark to prevent side effects
for browser_name, cls in browser_cls:
selenium_backends[browser_name] = cls(port, script_timeout=timeout)
# pre-load numpy and matplotlib for the selenium instance used in benchmarks
selenium_backends[browser_name].load_package(
["numpy", "matplotlib"]
)
results[benchmark_name] = run_all(selenium_backends, content)
print_entry(benchmark_name, results[benchmark_name])
finally:
for selenium in selenium_backends.values():
selenium.driver.quit()
output.parent.mkdir(exist_ok=True, parents=True)
output.write_text(json.dumps(results))
if __name__ == "__main__":
results = main(Path(sys.argv[-2]).resolve())
with open(sys.argv[-1], "w") as fp:
json.dump(results, fp)
main()

View File

@ -1,6 +1,9 @@
#! /usr/bin/env python
# flake8: noqa
# setup: pass
# run: pystone()
"""
"PYSTONE" Benchmark Program
@ -279,22 +282,5 @@ def Func3(EnumParIn):
return FALSE
if __name__ == "__main__":
import sys
def error(msg):
print(msg, end=" ", file=sys.stderr)
print("usage: %s [number_of_loops]" % sys.argv[0], file=sys.stderr)
sys.exit(100)
nargs = len(sys.argv) - 1
if nargs > 1:
error("%d arguments are too many;" % nargs)
elif nargs == 1:
try:
loops = int(sys.argv[1])
except ValueError:
error("Invalid argument %r;" % sys.argv[1])
else:
loops = LOOPS
main(loops)
def pystone():
main(LOOPS)

View File

@ -76,7 +76,7 @@ To run common benchmarks to understand Pyodide's performance, begin by
installing the same prerequisites as for testing. Then run:
```bash
make benchmark
PYODIDE_PACKAGES="numpy,matplotlib" make benchmark
```
## Linting