diff --git a/.circleci/config.yml b/.circleci/config.yml index 4f9458019..94115fb49 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -464,8 +464,7 @@ jobs: # similar as possible to the steps in deploy-dev! resource_class: small - docker: - - image: cibuilds/github:0.13 + <<: *defaults steps: - checkout @@ -474,8 +473,11 @@ jobs: - run: name: Install requirements command: | - apk add --no-cache --update python3 make npm - python3 -m pip install awscli + python3 -m pip install -e "./pyodide-build[deploy]" + + wget https://github.com/tcnksm/ghr/releases/download/v0.16.0/ghr_v0.16.0_linux_amd64.tar.gz + tar xzf ghr_v0.16.0_linux_amd64.tar.gz + mv ghr_v0.16.0_linux_amd64/ghr /tmp/ghr-bin - run: name: Deploy Github Releases @@ -487,7 +489,7 @@ jobs: tar cjf dist/pyodide-${CIRCLE_TAG}.tar.bz2 pyodide/ tar cjf dist/pyodide-core-${CIRCLE_TAG}.tar.bz2 pyodide/pyodide{.js,.mjs,.asm.js,.asm.wasm} pyodide/{package,repodata}.json pyodide/python_stdlib.zip tar cjf dist/xbuildenv-${CIRCLE_TAG}.tar.bz2 xbuildenv/ - ghr -t "${GITHUB_TOKEN}" -u "${CIRCLE_PROJECT_USERNAME}" \ + /tmp/ghr-bin -t "${GITHUB_TOKEN}" -u "${CIRCLE_PROJECT_USERNAME}" \ -r "${CIRCLE_PROJECT_REPONAME}" -c "${CIRCLE_SHA1}" \ -delete "${CIRCLE_TAG}" \ dist @@ -500,39 +502,30 @@ jobs: - run: name: Set PYODIDE_BASE_URL command: | + PYODIDE_BASE_URL="https://cdn.jsdelivr.net/pyodide/v${CIRCLE_TAG}/pyc/" make dist/console.html + cp dist/console.html dist-pyc/console.html PYODIDE_BASE_URL="https://cdn.jsdelivr.net/pyodide/v${CIRCLE_TAG}/debug/" make dist/console.html cp dist/console.html dist-debug/console.html PYODIDE_BASE_URL="https://cdn.jsdelivr.net/pyodide/v${CIRCLE_TAG}/full/" make dist/console.html - run: - name: Deploy to pyodide-cdn2.iodide.io + name: Deploy to S3 command: | - # Note: the following compression is the compression of files on S3 - # to reduce storage size and outbound AWS traffic. The HTTP - # requests for these files made to S3 will result in a response - # with gzip compression. However because JsDelivr CDN proxies and caches these - # requests, this has no impact on the compression of zip files and - # wheels once the files are served to users via CDN. - find dist/ -type f -print0 | xargs -0 -n1 -I@ bash -c "echo \"Compressing @\"; gzip @; mv @.gz @;" - aws s3 sync dist/ "s3://pyodide-cdn2.iodide.io/v${CIRCLE_TAG}/full/" --exclude '*.zip' --exclude '*.whl' --exclude "*.tar" --cache-control 'max-age=30758400, immutable, public' --content-encoding 'gzip' # 1 year cache - aws s3 sync dist/ "s3://pyodide-cdn2.iodide.io/v${CIRCLE_TAG}/full/" --exclude '*' --include '*.zip' --include "*.whl" --include "*.tar" --cache-control 'max-age=30758400, immutable, public' --content-type 'application/wasm' --content-encoding 'gzip' # 1 year + python3 tools/deploy_s3.py dist/ "v${CIRCLE_TAG}/full/" --bucket "pyodide-cdn2.iodide.io" --cache-control 'max-age=30758400, immutable, public' - run: - name: Deploy debug version to pyodide-cdn2.iodide.io + name: Deploy debug version to S3 command: | - find dist-debug/ -type f -print0 | xargs -0 -n1 -I@ bash -c "echo \"Compressing @\"; gzip @; mv @.gz @;" - aws s3 sync dist-debug/ "s3://pyodide-cdn2.iodide.io/v${CIRCLE_TAG}/debug/" --exclude '*.zip' --exclude "*.whl" --exclude "*.tar" --cache-control 'max-age=30758400, public' --content-encoding 'gzip' # 1 year cache - aws s3 sync dist-debug/ "s3://pyodide-cdn2.iodide.io/v${CIRCLE_TAG}/debug/" --exclude '*' --include '*.zip' --include "*.whl" --include "*.tar" --cache-control 'max-age=30758400, public' --content-type 'application/wasm' --content-encoding 'gzip' # 1 year cache + python3 tools/deploy_s3.py dist-debug/ "v${CIRCLE_TAG}/debug/" --bucket "pyodide-cdn2.iodide.io" --cache-control 'max-age=30758400, immutable, public' - run: - name: update 301 redirect for the /latest/* route. + name: Deploy the pyc version to S3 command: | - aws s3api put-bucket-website --cli-input-json file://.circleci/s3-website-config.json + python3 tools/deploy_s3.py dist-pyc/ "v${CIRCLE_TAG}/pyc/" --bucket "pyodide-cdn2.iodide.io" --cache-control 'max-age=30758400, immutable, public' deploy-dev: # To reduce chance of deployment issues, try to keep the steps here as # similar as possible to the steps in deploy-release! resource_class: small - docker: - - image: cibuilds/github:0.13 + <<: *defaults steps: - checkout @@ -541,28 +534,27 @@ jobs: - run: name: Install requirements command: | - apk add --no-cache --update python3 make - python3 -m pip install awscli + python3 -m pip install -e "./pyodide-build[deploy]" - run: name: Set PYODIDE_BASE_URL command: | + PYODIDE_BASE_URL="https://cdn.jsdelivr.net/pyodide/dev/pyc/" make dist/console.html + cp dist/console.html dist-pyc/console.html PYODIDE_BASE_URL="https://cdn.jsdelivr.net/pyodide/dev/debug/" make dist/console.html cp dist/console.html dist-debug/console.html PYODIDE_BASE_URL="https://cdn.jsdelivr.net/pyodide/dev/full/" make dist/console.html - run: - name: Deploy to pyodide-cdn2.iodide.io + name: Deploy to S3 command: | - find dist/ -type f -print0 | xargs -0 -n1 -I@ bash -c "echo \"Compressing @\"; gzip @; mv @.gz @;" - aws s3 rm --recursive "s3://pyodide-cdn2.iodide.io/dev/full/" - aws s3 sync dist/ "s3://pyodide-cdn2.iodide.io/dev/full/" --exclude '*.zip' --exclude '*.whl' --exclude "*.tar" --cache-control 'max-age=3600, public' --content-encoding 'gzip' # 1 hour cache - aws s3 sync dist/ "s3://pyodide-cdn2.iodide.io/dev/full/" --exclude '*' --include '*.zip' --include "*.whl" --include "*.tar" --cache-control 'max-age=3600, public' --content-type 'application/wasm' --content-encoding 'gzip' # 1 hour cache + python3 tools/deploy_s3.py dist/ "dev/full/" --bucket "pyodide-cdn2.iodide.io" --cache-control 'max-age=3600, public' --rm-remote-prefix - run: - name: Deploy debug version to pyodide-cdn2.iodide.io + name: Deploy debug version to S3 command: | - find dist-debug/ -type f -print0 | xargs -0 -n1 -I@ bash -c "echo \"Compressing @\"; gzip @; mv @.gz @;" - aws s3 rm --recursive "s3://pyodide-cdn2.iodide.io/dev/debug/" - aws s3 sync dist-debug/ "s3://pyodide-cdn2.iodide.io/dev/debug/" --exclude '*.zip' --exclude '*.whl' --exclude "*.tar" --cache-control 'max-age=3600, public' --content-encoding 'gzip' # 1 hour cache - aws s3 sync dist-debug/ "s3://pyodide-cdn2.iodide.io/dev/debug/" --exclude '*' --include '*.zip' --include "*.whl" --include "*.tar" --cache-control 'max-age=3600, public' --content-type 'application/wasm' --content-encoding 'gzip' # 1 hour cache + python3 tools/deploy_s3.py dist-debug/ "dev/debug/" --bucket "pyodide-cdn2.iodide.io" --cache-control 'max-age=3600, public' --rm-remote-prefix + - run: + name: Deploy pyc version to S3 + command: | + python3 tools/deploy_s3.py dist-pyc/ "dev/pyc/" --bucket "pyodide-cdn2.iodide.io" --cache-control 'max-age=3600, public' --rm-remote-prefix workflows: version: 2 @@ -849,6 +841,7 @@ workflows: - test-packages-firefox - build-pyodide-debug - create-xbuild-env + - build-test-pyc-packages filters: branches: ignore: /.*/ @@ -861,6 +854,7 @@ workflows: - test-core-firefox - test-packages-firefox - build-pyodide-debug + - build-test-pyc-packages filters: branches: only: main diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 672f83a2f..c0db021b7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -57,7 +57,7 @@ jobs: shell: bash -l {0} run: | mkdir test-results - cd pyodide-build && python3 -m pip install -e ".[test]" && cd .. + cd pyodide-build && python3 -m pip install -e ".[test,deploy]" && cd .. python3 -m pip install pytest-cov hypothesis pytz - name: Run tests shell: bash -l {0} @@ -67,7 +67,7 @@ jobs: --verbose \ --runtime=host \ --cov=pyodide_build --cov=pyodide \ - src pyodide-build packages/micropip/ packages/_tests + src pyodide-build packages/micropip/ packages/_tests tools/ - uses: codecov/codecov-action@v3 with: fail_ci_if_error: false diff --git a/docs/project/changelog.md b/docs/project/changelog.md index c5bbd3580..a6f0a630a 100644 --- a/docs/project/changelog.md +++ b/docs/project/changelog.md @@ -139,6 +139,12 @@ iterable`. (Python async _iterables_ that were not also iterators were already _iterable_ and an async iterator.) {pr}`3708` +- {{ Enhancement }} A py-compiled build which has smaller and faster-to-load + packages is now deployed under + `https://cdn.jsdelivr.net/pyodide/v0.23.0/pyc/` (also for future + versions). The exceptions obtained with this builds will not include code + snippets however. {pr}`3701` + - {{ Breaking }} Removed support for calling functions from the root of `pyodide` package directly. This has been deprecated since v0.21.0. Now all functions are only available under submodules. diff --git a/pyodide-build/setup.cfg b/pyodide-build/setup.cfg index 67cd21d45..a6511c423 100644 --- a/pyodide-build/setup.cfg +++ b/pyodide-build/setup.cfg @@ -61,6 +61,9 @@ pyodide.cli = test = pytest pytest-pyodide==0.23.2 +deploy = + boto3 + moto [options.packages.find] where = . diff --git a/tools/calculate_build_cache_key.py b/tools/calculate_build_cache_key.py index 6ea5bcd35..d4d70aa12 100755 --- a/tools/calculate_build_cache_key.py +++ b/tools/calculate_build_cache_key.py @@ -6,8 +6,6 @@ from hashlib import sha256 from pathlib import Path -import pathspec # pip install pathspec - def hash_file(filename): with open(filename, "rb") as f: @@ -23,6 +21,8 @@ def get_ignore_pattern(root: Path) -> list[str]: def main(): + import pathspec # pip install pathspec + root: Path = Path(__file__).parent.parent targets: list[Path] = [ root / "Makefile", diff --git a/tools/deploy_s3.py b/tools/deploy_s3.py new file mode 100644 index 000000000..025cad812 --- /dev/null +++ b/tools/deploy_s3.py @@ -0,0 +1,153 @@ +import gzip +import io +import mimetypes +import shutil +from pathlib import Path + +import boto3 +import botocore +import typer + +app = typer.Typer() + + +def check_s3_object_exists(s3_client, bucket: str, object_name: str): + try: + s3_client.head_object(Bucket=bucket, Key=object_name) + return True + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "404": + return False + + raise + + +def _validate_remote_prefix_to_remove(remote_prefix: Path) -> None: + """Check remote prefix to remove + + Examples + -------- + >>> _validate_remote_prefix_to_remove(Path("dev/full/")) + >>> _validate_remote_prefix_to_remove(Path("dev/abc2/")) + >>> _validate_remote_prefix_to_remove(Path("/")) + Traceback (most recent call last): + ValueError: Remote prefix to remove should be at least 2 levels deep. For example, 'dev/full/' + >>> _validate_remote_prefix_to_remove(Path("v0.17.0/full/")) + Traceback (most recent call last): + ValueError: Remote prefix to remove should start with 'dev' (without leading '/'). For example, 'dev/full/' + """ + prefix_parts = remote_prefix.parts + if len(prefix_parts) < 2: + raise ValueError( + "Remote prefix to remove should be at least 2 levels deep. " + "For example, 'dev/full/'" + ) + if prefix_parts[0] != "dev": + raise ValueError( + "Remote prefix to remove should start with 'dev' (without leading '/'). " + "For example, 'dev/full/'" + ) + + +def _rm_s3_prefix(bucket: str, prefix: str): + """Remove all objects under a given prefix""" + s3 = boto3.resource("s3") + bucket = s3.Bucket(bucket) + for obj in bucket.objects.filter(Prefix=prefix): + obj.delete() + + +@app.command() +def deploy_to_s3_main( + local_folder: Path = typer.Argument(..., help="Path to the local folder"), + remote_prefix: Path = typer.Argument(..., help="Remote prefix"), + bucket: str = typer.Option(..., help="bucket name"), + cache_control: str = typer.Option( + "max-age=30758400, immutable, public", help="Cache control header to set" + ), + pretend: bool = typer.Option(False, help="Don't actually upload anything"), + overwrite: bool = typer.Option(False, help="Overwrite existing files"), + rm_remote_prefix: bool = typer.Option( + False, help="Remove existing files under the remote prefix" + ), +): + """Deploy a dist folder with Pyodide packages to AWS S3""" + s3_client = boto3.client("s3") + + typer.echo(f"Deploying {local_folder} to s3://{bucket}/{remote_prefix}") + typer.echo("Options: ") + typer.echo(f" - {cache_control=}") + typer.echo(f" - {pretend=}") + typer.echo(" - content-encoding: gzip") + + if rm_remote_prefix: + _validate_remote_prefix_to_remove(remote_prefix) + if not pretend: + _rm_s3_prefix(bucket, str(remote_prefix).lstrip("/")) + + for file_path in local_folder.glob("**/*"): + if not file_path.is_file(): + continue + remote_path = remote_prefix / file_path.relative_to(local_folder) + + if not overwrite and check_s3_object_exists( + s3_client, bucket, str(remote_path).lstrip("/") + ): + typer.echo( + f"Cannot upload {file_path} because it already exists on s3://{bucket}/{remote_path}" + ) + typer.echo("Exiting (use --overwrite to overwrite existing files)") + raise typer.Exit() + + with open(file_path, "rb") as fh_in: + # Use gzip compression for storage. This only impacts storage on + # AWS and transfer between S3 and the CDN. It has no impact on the + # compression received by the end user (since the CDN re-compresses + # files). + fh_compressed = io.BytesIO() + with gzip.GzipFile(fileobj=fh_compressed, mode="w"): + shutil.copyfileobj(fh_in, fh_compressed) + + fh_compressed.seek(0) + + content_type = None + if file_path.suffix in (".zip", ".whl", ".tar", ".a"): + content_type = "application/wasm" + elif file_path.suffix == ".ts": + # This will not be correctly detected by mimetypes. + # However, JsDelivr will currently not serve .ts file in the + # custom CDN configuration, so it does not really matter. + content_type = "text/x.typescript" + else: + content_type = mimetypes.guess_type(file_path)[0] + if content_type is None: + content_type = "binary/octet-stream" + + extra_args = { + "CacheControl": cache_control, + "ContentEncoding": "gzip", + "ContentType": content_type, + } + + if not pretend: + s3_client.upload_fileobj( + fh_compressed, + Bucket=bucket, + Key=str(remote_path).lstrip("/"), + ExtraArgs=extra_args, + ) + msg = ( + f"Uploaded {file_path} to s3://{bucket}/{remote_path} ({content_type=})" + ) + if pretend: + msg = "Would have " + msg + + typer.echo(msg) + if pretend: + typer.echo( + "No files were actually uploaded. Set to pretend=False to upload files." + ) + + +if __name__ == "__main__": + app() diff --git a/tools/tests/test_deploy_s3.py b/tools/tests/test_deploy_s3.py new file mode 100644 index 000000000..999c3b7a0 --- /dev/null +++ b/tools/tests/test_deploy_s3.py @@ -0,0 +1,131 @@ +import re +import sys +from pathlib import Path, PurePosixPath + +import boto3 +import pytest +from moto import mock_s3 + +sys.path.append(str(Path(__file__).parents[1])) +from deploy_s3 import check_s3_object_exists, deploy_to_s3_main + + +@mock_s3 +def test_check_s3_object_exists(): + bucket_name = "mybucket" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=bucket_name) + + s3_client.put_object(Bucket=bucket_name, Key="/a/test.txt", Body="test") + + assert check_s3_object_exists(s3_client, bucket_name, "/a/test.txt") is True + assert check_s3_object_exists(s3_client, bucket_name, "/a/test2.txt") is False + + +@mock_s3 +def test_deploy_to_s3_overwrite(tmp_path, capsys): + (tmp_path / "a.whl").write_text("a") + (tmp_path / "b.tar").write_text("b") + (tmp_path / "c.zip").write_text("c") + + bucket_name = "mybucket" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=bucket_name) + + deploy_to_s3_main( + tmp_path, + remote_prefix=PurePosixPath("dev/full/"), + bucket=bucket_name, + cache_control="max-age=30758400", + pretend=False, + overwrite=False, + rm_remote_prefix=False, + ) + + def get_object_list(): + response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix="dev/full/") + return set(obj["Key"] for obj in response["Contents"]) + + assert get_object_list() == {"dev/full/a.whl", "dev/full/b.tar", "dev/full/c.zip"} + + # Writing a second time to the same prefix with overwrite=False should fail + with pytest.raises(Exception): + deploy_to_s3_main( + tmp_path, + remote_prefix=PurePosixPath("dev/full/"), + bucket=bucket_name, + cache_control="max-age=30758400", + pretend=False, + overwrite=False, + rm_remote_prefix=False, + ) + msg = "Cannot upload .* because it already exists" + captured = capsys.readouterr() + # Check for error message in last two lines of output + assert re.search(msg, "\n".join(captured.out.splitlines()[-2:])) + + # Setting overwrite=True should overwrite the files + deploy_to_s3_main( + tmp_path, + remote_prefix=PurePosixPath("dev/full/"), + bucket=bucket_name, + cache_control="max-age=30758400", + pretend=False, + overwrite=True, + rm_remote_prefix=False, + ) + assert get_object_list() == {"dev/full/a.whl", "dev/full/b.tar", "dev/full/c.zip"} + + # Setting rm_remote_prefix=True, should remove remote files that don't exist locally + (tmp_path / "b.tar").unlink() + + deploy_to_s3_main( + tmp_path, + remote_prefix=PurePosixPath("dev/full/"), + bucket=bucket_name, + cache_control="max-age=30758400", + pretend=False, + overwrite=False, + rm_remote_prefix=True, + ) + assert get_object_list() == {"dev/full/c.zip", "dev/full/a.whl"} + + +@mock_s3 +def test_deploy_to_s3_mime_type(tmp_path, capsys): + for ext in ["whl", "tar", "zip", "js", "ts", "json", "ttf", "a", "mjs.map", "mjs"]: + (tmp_path / f"a.{ext}").write_text("a") + + bucket_name = "mybucket" + s3_client = boto3.client("s3", region_name="us-east-1") + s3_client.create_bucket(Bucket=bucket_name) + + deploy_to_s3_main( + tmp_path, + remote_prefix=PurePosixPath(""), + bucket=bucket_name, + cache_control="max-age=30758400", + pretend=False, + overwrite=False, + rm_remote_prefix=False, + ) + + def get_header(key, field="content-type"): + res = s3_client.get_object(Bucket=bucket_name, Key=key) + return res["ResponseMetadata"]["HTTPHeaders"][field] + + assert get_header("a.js", "content-encoding") == "gzip" + + # These MIME types we set explicitly for better CDN compression + assert get_header("a.whl") == "application/wasm" + assert get_header("a.tar") == "application/wasm" + assert get_header("a.zip") == "application/wasm" + assert get_header("a.a") == "application/wasm" + + # The rest we set based on the file extension + assert get_header("a.js") == "text/javascript" + assert get_header("a.mjs") == "text/javascript" + assert get_header("a.ts") == "text/x.typescript" + assert get_header("a.json") == "application/json" + assert get_header("a.ttf") == "font/ttf" + assert get_header("a.mjs.map") == "binary/octet-stream"