Fix wheel compression with the JsDelivr CDN (#3667)

Set the MIME type for wheels and zip files to application/wasm to enable CDN compression. Also optionally disable compression in zip files and wheels to make the brotli compression by the CDN more efficient.

Co-authored-by: Gyeongjae Choi <def6488@gmail.com>
This commit is contained in:
Roman Yurchak 2023-03-21 09:11:47 +01:00 committed by GitHub
parent 59b2c4e1ef
commit 601d939fdd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 216 additions and 25 deletions

View File

@ -14,6 +14,8 @@ defaults: &defaults
# Make sure the ccache dir is consistent between core and package builds
# (it's not the case otherwise)
CCACHE_DIR: /root/.ccache/
# Disable the compression of wheels, so they are better compressed by the CDN
PYODIDE_ZIP_COMPRESSION_LEVEL: 0
orbs:
macos: circleci/macos@2.3.4
@ -467,15 +469,21 @@ jobs:
- run:
name: Deploy to pyodide-cdn2.iodide.io
command: |
# Note: the following compression is the compression of files on S3
# to reduce storage size and outbound AWS traffic. The HTTP
# requests for these files made to S3 will result in a response
# with gzip compression. However because JsDelivr CDN proxies and caches these
# requests, this has no impact on the compression of zip files and
# wheels once the files are served to users via CDN.
find dist/ -type f -print0 | xargs -0 -n1 -I@ bash -c "echo \"Compressing @\"; gzip @; mv @.gz @;"
aws s3 sync dist/ "s3://pyodide-cdn2.iodide.io/v${CIRCLE_TAG}/full/" --exclude '*.data' --cache-control 'max-age=30758400, immutable, public' --content-encoding 'gzip' # 1 year cache
aws s3 sync dist/ "s3://pyodide-cdn2.iodide.io/v${CIRCLE_TAG}/full/" --exclude '*' --include '*.data' --cache-control 'max-age=30758400, immutable, public' --content-type 'application/wasm' --content-encoding 'gzip' # 1 year
aws s3 sync dist/ "s3://pyodide-cdn2.iodide.io/v${CIRCLE_TAG}/full/" --exclude '*.zip' --exclude '*.whl' --exclude "*.tar" --cache-control 'max-age=30758400, immutable, public' --content-encoding 'gzip' # 1 year cache
aws s3 sync dist/ "s3://pyodide-cdn2.iodide.io/v${CIRCLE_TAG}/full/" --exclude '*' --include '*.zip' --include "*.whl" --include "*.tar" --cache-control 'max-age=30758400, immutable, public' --content-type 'application/wasm' --content-encoding 'gzip' # 1 year
- run:
name: Deploy debug version to pyodide-cdn2.iodide.io
command: |
find dist-debug/ -type f -print0 | xargs -0 -n1 -I@ bash -c "echo \"Compressing @\"; gzip @; mv @.gz @;"
aws s3 sync dist-debug/ "s3://pyodide-cdn2.iodide.io/v${CIRCLE_TAG}/debug/" --exclude '*.data' --cache-control 'max-age=30758400, public' --content-encoding 'gzip' # 1 year cache
aws s3 sync dist-debug/ "s3://pyodide-cdn2.iodide.io/v${CIRCLE_TAG}/debug/" --exclude '*' --include '*.data' --cache-control 'max-age=30758400, public' --content-type 'application/wasm' --content-encoding 'gzip' # 1 year cache
aws s3 sync dist-debug/ "s3://pyodide-cdn2.iodide.io/v${CIRCLE_TAG}/debug/" --exclude '*.zip' --exclude "*.whl" --exclude "*.tar" --cache-control 'max-age=30758400, public' --content-encoding 'gzip' # 1 year cache
aws s3 sync dist-debug/ "s3://pyodide-cdn2.iodide.io/v${CIRCLE_TAG}/debug/" --exclude '*' --include '*.zip' --include "*.whl" --include "*.tar" --cache-control 'max-age=30758400, public' --content-type 'application/wasm' --content-encoding 'gzip' # 1 year cache
- run:
name: update 301 redirect for the /latest/* route.
command: |

View File

@ -178,7 +178,7 @@ pyodide_build: ./pyodide-build/pyodide_build/**
which pyodide >/dev/null
dist/python_stdlib.zip: pyodide_build $(CPYTHONLIB)
pyodide create-zipfile $(CPYTHONLIB) src/py --output $@
pyodide create-zipfile $(CPYTHONLIB) src/py --compression-level "$(PYODIDE_ZIP_COMPRESSION_LEVEL)" --output $@
dist/test.html: src/templates/test.html
cp $< $@

View File

@ -35,6 +35,11 @@ export PYTHONINCLUDE=$(PYODIDE_ROOT)/cpython/installs/python-$(PYVERSION)/includ
# Use env variable if defined, otherwise fallback to './'
export PYODIDE_BASE_URL?=./
# The compression level used for zip files and wheels. When distributing via a
# CDN it's more efficient to keep this value to 0, and let the CDN perform the
# Brotli compression.
export PYODIDE_ZIP_COMPRESSION_LEVEL?=6
# For packages that depend on numpy.
# TODO: maybe move this somewhere else?
export NUMPY_LIB=$(HOSTSITEPACKAGES)/numpy/

View File

@ -110,6 +110,12 @@ myst:
you will need to remove it.
{pr}`3621`
- {{ Fix }} Improves the compression of wheel files with the JsDelivr CDN. For
browsers that support the Brotli compression (most modern ones) this should
result in a size reduction of 20-30%. Also most many `pyodide` CLI
sub-commands now support `--compression-level` as an optional parameter.
{pr}`3655`
- {{ Breaking }} Removed deprecated CLI entrypoints `pyodide_build buildall` which is
replaced by `pyodide build-recipes`, and `pyodide-build mkpkg` which is
replaced by `pyodide skeleton pypi` {pr}`3668`

View File

@ -11,7 +11,8 @@ all:
--install \
--install-dir=../dist \
--n-jobs $${PYODIDE_JOBS:-4} \
--log-dir=./build-logs
--log-dir=./build-logs \
--compression-level "$(PYODIDE_ZIP_COMPRESSION_LEVEL)"
update-all:
for pkg in $$(find . -maxdepth 1 ! -name ".*" -type d -exec basename {} \; | tail -n +2); do \

View File

@ -87,7 +87,11 @@ def _py_compile_wheel_name(wheel_name: str) -> str:
def _compile(
input_path: Path, output_path: Path, keep: bool = True, verbose: bool = True
input_path: Path,
output_path: Path,
keep: bool = True,
verbose: bool = True,
compression_level: int = 6,
) -> None:
"""Compile all .py files in the zip archive to .pyc files.
@ -97,19 +101,30 @@ def _compile(
Path to the input archive.
output_path
Path to the output archive.
compression_level
Level of zip compression to apply. 0 means no compression. If a strictly
positive integer is provided, ZIP_DEFLATED option is used.
"""
output_name = output_path.name
with set_log_level(logger, verbose):
logger.debug(f"Running py-compile on {input_path} to {output_path}")
if compression_level > 0:
compression = zipfile.ZIP_DEFLATED
else:
compression = zipfile.ZIP_STORED
with zipfile.ZipFile(
input_path
) as fh_zip_in, TemporaryDirectory() as temp_dir_str:
temp_dir = Path(temp_dir_str)
output_path_tmp = temp_dir / output_name
with zipfile.ZipFile(
output_path_tmp, mode="w", compression=zipfile.ZIP_DEFLATED
output_path_tmp,
mode="w",
compression=compression,
compresslevel=compression_level,
) as fh_zip_out:
for name in fh_zip_in.namelist():
if name.endswith(".pyc"):
@ -153,6 +168,7 @@ def _py_compile_wheel(
wheel_path: Path,
keep: bool = True,
verbose: bool = True,
compression_level: int = 6,
) -> Path:
"""Compile .py files to .pyc in a wheel
@ -168,6 +184,9 @@ def _py_compile_wheel(
path)
verbose
print logging information
compression_level
Level of zip compression to apply. 0 means no compression. If a strictly
positive integer is provided, ZIP_DEFLATED option is used.
Returns
-------

View File

@ -28,7 +28,7 @@ from rich.table import Table
from . import common, recipe
from .buildpkg import needs_rebuild
from .common import find_matching_wheels, find_missing_executables
from .common import find_matching_wheels, find_missing_executables, repack_zip_archive
from .io import MetaConfig, _BuildSpecTypes
from .logger import console_stdout, logger
from .pywasmcross import BuildArgs
@ -713,13 +713,18 @@ def generate_repodata(
def copy_packages_to_dist_dir(
packages: Iterable[BasePackage], output_dir: Path
packages: Iterable[BasePackage], output_dir: Path, compression_level: int = 6
) -> None:
for pkg in packages:
if pkg.package_type == "static_library":
continue
shutil.copy(pkg.dist_artifact_path(), output_dir)
dist_artifact_path = pkg.dist_artifact_path()
shutil.copy(dist_artifact_path, output_dir)
repack_zip_archive(
output_dir / dist_artifact_path.name, compression_level=compression_level
)
test_path = pkg.tests_path()
if test_path:
@ -774,7 +779,9 @@ def copy_logs(pkg_map: dict[str, BasePackage], log_dir: Path) -> None:
logger.warning(f"Warning: {pkg.name} has no build log")
def install_packages(pkg_map: dict[str, BasePackage], output_dir: Path) -> None:
def install_packages(
pkg_map: dict[str, BasePackage], output_dir: Path, compression_level: int = 6
) -> None:
"""
Install packages into the output directory.
- copies build artifacts (wheel, zip, ...) to the output directory
@ -791,7 +798,9 @@ def install_packages(pkg_map: dict[str, BasePackage], output_dir: Path) -> None:
output_dir.mkdir(exist_ok=True, parents=True)
logger.info(f"Copying built packages to {output_dir}")
copy_packages_to_dist_dir(pkg_map.values(), output_dir)
copy_packages_to_dist_dir(
pkg_map.values(), output_dir, compression_level=compression_level
)
repodata_path = output_dir / "repodata.json"
logger.info(f"Writing repodata.json to {repodata_path}")
@ -815,5 +824,7 @@ def set_default_build_args(build_args: BuildArgs) -> BuildArgs:
args.target_install_dir = common.get_make_flag("TARGETINSTALLDIR") # type: ignore[unreachable]
if args.host_install_dir is None:
args.host_install_dir = common.get_make_flag("HOSTINSTALLDIR") # type: ignore[unreachable]
if args.compression_level is None:
args.compression_level = int(common.get_make_flag("PYODIDE_ZIP_COMPRESSION_LEVEL")) # type: ignore[unreachable]
return args

View File

@ -31,6 +31,7 @@ from .common import (
exit_with_stdio,
find_matching_wheels,
find_missing_executables,
make_zip_archive,
set_build_environment,
)
from .io import MetaConfig, _BuildSpec, _SourceSpec
@ -789,7 +790,7 @@ def _build_package_inner(
# and create a zip archive of the .so files
shutil.rmtree(dist_dir, ignore_errors=True)
dist_dir.mkdir(parents=True)
shutil.make_archive(str(dist_dir / src_dir_name), "zip", src_dist_dir)
make_zip_archive(dist_dir / f"{src_dir_name}.zip", src_dist_dir)
else: # wheel
if not finished_wheel:
compile(

View File

@ -148,6 +148,9 @@ def main(
[],
help="Skip building or resolving a single dependency. Use multiple times or provide a comma separated list to skip multiple dependencies.",
),
compression_level: int = typer.Option(
6, help="Compression level to use for the created zip file"
),
ctx: typer.Context = typer.Context,
) -> None:
"""Use pypa/build to build a Python package from source, pypi or url."""
@ -221,6 +224,7 @@ def main(
exports,
ctx.args,
output_lockfile=output_lockfile,
compression_level=compression_level,
)
except BaseException as e:
import traceback

View File

@ -60,6 +60,10 @@ def recipe(
None,
help="Number of packages to build in parallel (default: # of cores in the system)",
),
compression_level: int = typer.Option(
6,
help="Level of zip compression to apply when installing. 0 means no compression.",
),
) -> None:
"""Build packages using yaml recipes and create repodata.json"""
root = Path.cwd()
@ -109,4 +113,6 @@ def recipe(
buildall.copy_logs(pkg_map, log_dir_)
if install:
buildall.install_packages(pkg_map, install_dir_)
buildall.install_packages(
pkg_map, install_dir_, compression_level=compression_level
)

View File

@ -16,11 +16,20 @@ def main(
output: Path = typer.Option(
"python.zip", help="Path to the output zip file. Defaults to python.zip."
),
compression_level: int = typer.Option(
6, help="Compression level to use for the created zip file"
),
) -> None:
"""
Bundle Python standard libraries into a zip file.
"""
create_zipfile(libdir, output, pycompile=pycompile, filterfunc=None)
create_zipfile(
libdir,
output,
pycompile=pycompile,
filterfunc=None,
compression_level=compression_level,
)
typer.echo(f"Zip file created at {output.resolve()}")

View File

@ -8,9 +8,12 @@ from pyodide_build._py_compile import _py_compile_wheel
def main(
wheel_path: Path = typer.Argument(..., help="Path to the input wheel"),
compression_level: int = typer.Option(
6, help="Compression level to use for the created zip file"
),
) -> None:
"""Compile .py files to .pyc in a wheel"""
if wheel_path.suffix != ".whl":
typer.echo(f"Error: only .whl files are supported, got {wheel_path.name}")
sys.exit(1)
_py_compile_wheel(wheel_path, verbose=False)
_py_compile_wheel(wheel_path, verbose=False, compression_level=compression_level)

View File

@ -11,6 +11,7 @@ from collections import deque
from collections.abc import Generator, Iterable, Iterator, Mapping
from contextlib import contextmanager
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any, NoReturn
if sys.version_info < (3, 11, 0):
@ -418,3 +419,48 @@ def get_num_cores() -> int:
import loky
return loky.cpu_count()
def make_zip_archive(
archive_path: Path,
input_dir: Path,
compression_level: int = 6,
) -> None:
"""Create a zip archive out of a input folder
Parameters
----------
archive_path
Path to the zip file that will be created
input_dir
input dir to compress
compression_level
compression level of the resulting zip file.
"""
if compression_level > 0:
compression = zipfile.ZIP_DEFLATED
else:
compression = zipfile.ZIP_STORED
with zipfile.ZipFile(
archive_path, "w", compression=compression, compresslevel=compression_level
) as zf:
for file in input_dir.rglob("*"):
zf.write(file, file.relative_to(input_dir))
def repack_zip_archive(archive_path: Path, compression_level: int = 6) -> None:
"""Repack zip archive with a different compression level"""
if compression_level > 0:
compression = zipfile.ZIP_DEFLATED
else:
compression = zipfile.ZIP_STORED
with TemporaryDirectory() as temp_dir:
input_path = Path(temp_dir) / archive_path.name
shutil.move(archive_path, input_path)
with zipfile.ZipFile(input_path) as fh_zip_in, zipfile.ZipFile(
archive_path, "w", compression=compression, compresslevel=compression_level
) as fh_zip_out:
for name in fh_zip_in.namelist():
fh_zip_out.writestr(name, fh_zip_in.read(name))

View File

@ -27,7 +27,7 @@ from unearth.evaluator import TargetPython
from unearth.finder import PackageFinder
from .. import common
from ..common import chdir
from ..common import chdir, repack_zip_archive
from ..logger import logger
from . import build
@ -194,15 +194,21 @@ def get_project_from_pypi(package_name, extras):
yield Candidate(i.name, i.version, url=i.link.url, extras=extras)
def download_or_build_wheel(url: str, target_directory: Path) -> None:
def download_or_build_wheel(
url: str, target_directory: Path, compression_level: int = 6
) -> None:
parsed_url = urlparse(url)
if parsed_url.path.endswith("gz"):
wheel_file = get_built_wheel(url)
shutil.copy(wheel_file, target_directory)
wheel_path = target_directory / wheel_file.name
elif parsed_url.path.endswith(".whl"):
with open(target_directory / Path(parsed_url.path).name, "wb") as f:
wheel_path = target_directory / Path(parsed_url.path).name
with open(wheel_path, "wb") as f:
f.write(requests.get(url).content)
repack_zip_archive(wheel_path, compression_level=compression_level)
def get_metadata_for_wheel(url):
parsed_url = urlparse(url)
@ -323,6 +329,7 @@ def _resolve_and_build(
build_dependencies: bool,
extras: list[str],
output_lockfile: str | None,
compression_level: int = 6,
) -> None:
requirements = []
@ -394,6 +401,7 @@ def build_dependencies_for_wheel(
exports: str,
build_flags: list[str],
output_lockfile: str | None,
compression_level: int = 6,
) -> None:
"""Extract dependencies from this wheel and build pypi dependencies
for each one in ./dist/
@ -423,6 +431,7 @@ def build_dependencies_for_wheel(
build_dependencies=True,
extras=extras,
output_lockfile=output_lockfile,
compression_level=compression_level,
)
# add the current wheel to the package-versions.txt
if output_lockfile is not None and len(output_lockfile) > 0:

View File

@ -73,6 +73,7 @@ class BuildArgs:
builddir: str = "" # The path to run pypa/build
pythoninclude: str = ""
exports: Literal["whole_archive", "requested", "pyinit"] | list[str] = "pyinit"
compression_level: int = 6
def replay_f2c(args: list[str], dryrun: bool = False) -> list[str] | None:

View File

@ -4,6 +4,7 @@ from pathlib import Path
from tempfile import TemporaryDirectory
from ._py_compile import _compile
from .common import make_zip_archive
# These files are removed from the stdlib
REMOVED_FILES = (
@ -110,6 +111,7 @@ def create_zipfile(
output: Path | str = "python",
pycompile: bool = False,
filterfunc: Callable[[str, list[str]], set[str]] | None = None,
compression_level: int = 6,
) -> None:
"""
Bundle Python standard libraries into a zip file.
@ -140,14 +142,17 @@ def create_zipfile(
This function will be passed to {ref}`shutil.copytree` 's ignore argument.
By default, Pyodide's default filter function is used.
compression_level
Level of zip compression to apply. 0 means no compression. If a strictly
positive integer is provided, ZIP_DEFLATED option is used.
Returns
-------
BytesIO
A BytesIO object containing the zip file.
"""
output = Path(output)
output = output.with_name(output.name.rstrip(".zip"))
archive = Path(output)
with TemporaryDirectory() as temp_dir_str:
temp_dir = Path(temp_dir_str)
@ -160,8 +165,17 @@ def create_zipfile(
shutil.copytree(libdir, temp_dir, ignore=_filterfunc, dirs_exist_ok=True)
archive: Path | str = shutil.make_archive(str(output), "zip", temp_dir)
archive = Path(archive)
make_zip_archive(
archive,
temp_dir,
compression_level=compression_level,
)
if pycompile:
_compile(archive, archive, verbose=False, keep=False)
_compile(
archive,
archive,
verbose=False,
keep=False,
compression_level=compression_level,
)

View File

@ -9,8 +9,10 @@ from pyodide_build.common import (
get_make_environment_vars,
get_make_flag,
get_num_cores,
make_zip_archive,
parse_top_level_import_name,
platform,
repack_zip_archive,
search_pyodide_root,
)
@ -198,3 +200,49 @@ def test_get_num_cores(monkeypatch, num_cpus):
m.setattr(loky, "cpu_count", lambda: num_cpus)
assert get_num_cores() == num_cpus
@pytest.mark.parametrize(
"compression_level, expected_compression_type",
[(6, zipfile.ZIP_DEFLATED), (0, zipfile.ZIP_STORED)],
)
def test_make_zip_archive(tmp_path, compression_level, expected_compression_type):
input_dir = tmp_path / "a"
input_dir.mkdir()
(input_dir / "b.txt").write_text(".")
(input_dir / "c").mkdir()
(input_dir / "c/d").write_bytes(b"")
output_dir = tmp_path / "output.zip"
make_zip_archive(output_dir, input_dir, compression_level=compression_level)
with zipfile.ZipFile(output_dir) as fh:
assert fh.namelist() == ["b.txt", "c/", "c/d"]
assert fh.read("b.txt") == b"."
assert fh.getinfo("b.txt").compress_type == expected_compression_type
@pytest.mark.parametrize(
"compression_level, expected_compression_type, expected_size",
[(6, zipfile.ZIP_DEFLATED, 220), (0, zipfile.ZIP_STORED, 1207)],
)
def test_repack_zip_archive(
tmp_path, compression_level, expected_compression_type, expected_size
):
input_path = tmp_path / "archive.zip"
data = "a" * 1000
with zipfile.ZipFile(
input_path, "w", compression=zipfile.ZIP_BZIP2, compresslevel=3
) as fh:
fh.writestr("a/b.txt", data)
fh.writestr("a/b/c.txt", "d")
repack_zip_archive(input_path, compression_level=compression_level)
with zipfile.ZipFile(input_path) as fh:
assert fh.namelist() == ["a/b.txt", "a/b/c.txt"]
assert fh.getinfo("a/b.txt").compress_type == expected_compression_type
assert input_path.stat().st_size == expected_size