Add Fastparquet (#3590)

This commit is contained in:
Joe Marshall 2023-03-02 05:46:58 +00:00 committed by GitHub
parent b19e276cfa
commit 0fdf9a80fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 130 additions and 5 deletions

View File

@ -150,13 +150,13 @@ export STDLIB_MODULE_CFLAGS= $(SIDE_MODULE_CFLAGS) -I Include/ -I . -I Include/i
export CARGO_BUILD_TARGET=wasm32-unknown-emscripten
export CARGO_TARGET_WASM32_UNKNOWN_EMSCRIPTEN_LINKER=emcc
export RUST_TOOLCHAIN=nightly-2022-06-26
export PYO3_CROSS_LIB_DIR=${CPYTHONINSTALL}/lib
export PYO3_CROSS_INCLUDE_DIR=${PYTHONINCLUDE}
# idealy we could automatically include all SIDE_MODULE_LDFLAGS here
export RUSTFLAGS= \
-C link-arg=-sSIDE_MODULE=2 \
-C link-arg=-sWASM_BIGINT \
-Z link-native-libraries=no
.output_vars:
set

View File

@ -389,3 +389,5 @@ but other than that there may be no other issues if you are lucky.
As mentioned [here](https://github.com/pyodide/pyodide/issues/2706#issuecomment-1154655224),
by default certain wasm-related `RUSTFLAGS` are set during `build.script`
and can be removed with `export RUSTFLAGS=""`.
If your project builds using maturin, you need to use maturin 0.14.14 or later. It is pretty easy to patch an existing project (see `projects/fastparquet/meta.yaml` for an example)

View File

@ -140,6 +140,10 @@ myst:
building dependencies of the package. This replaces `pyodide-build buildpkg`.
{pr}`3520`
### Packages
- New packages: fastparquet {pr}`3590`, cramjam {pr}`3590`.
## Version 0.22.1
_January 25, 2023_

View File

@ -0,0 +1,22 @@
package:
name: cramjam
version: 2.6.2
top-level:
- cramjam
source:
url: https://files.pythonhosted.org/packages/03/68/b6a79d363e2796012ad1f0aae9b452050509203e7b7b0ebb0eab97b2b48f/cramjam-2.6.2.tar.gz
sha256: 1ffdc8d1381b5fee57b33b537e38fa7fd29e8d8f3b544dbab1d71dbfaaec3bef
patches:
- patches/patch_maturin_version.patch
about:
home: ""
PyPI: https://pypi.org/project/cramjam
summary: Thin Python bindings to de/compression algorithms in Rust
license: MIT
build:
script: |
rustup toolchain install ${RUST_TOOLCHAIN} && rustup default ${RUST_TOOLCHAIN}
rustup target add wasm32-unknown-emscripten --toolchain ${RUST_TOOLCHAIN}
test:
imports:
- cramjam

View File

@ -0,0 +1,11 @@
diff --git a/pyproject.toml b/pyproject.toml
index bb317951..f67c19ec 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,5 +9,5 @@ documentation = "https://docs.rs/cramjam/latest/cramjam"
repository = "https://github.com/milesgranger/pyrus-cramjam"
[build-system]
-requires = ["maturin>=0.13,<0.14"]
+requires = ["maturin>=0.14.14,<0.15"]
build-backend = "maturin"

View File

@ -0,0 +1,28 @@
from typing import Any
import pytest
from pytest_pyodide import run_in_pyodide
VARIANTS = ("snappy", "brotli", "bzip2", "lz4", "gzip", "deflate", "zstd")
@pytest.mark.parametrize("variant_str", VARIANTS)
@run_in_pyodide(packages=["cramjam"])
def test_variants_simple(selenium, variant_str):
import random
import cramjam
uncompressed: Any = [random.getrandbits(8) for x in range(1048576)]
variant = getattr(cramjam, variant_str)
uncompressed = bytearray(uncompressed)
compressed = variant.compress(uncompressed)
assert compressed.read() != uncompressed
compressed.seek(0)
assert isinstance(compressed, cramjam.Buffer)
decompressed = variant.decompress(compressed, output_len=len(uncompressed))
assert decompressed.read() == uncompressed
assert isinstance(decompressed, cramjam.Buffer)

View File

@ -0,0 +1,20 @@
package:
name: fastparquet
version: 2023.2.0
top-level:
- fastparquet
requirements:
run:
- cramjam
- numpy
- pandas
- fsspec
- packaging
source:
url: https://files.pythonhosted.org/packages/7e/5d/c58795e5550231ce84b39fec783e3f4836082f0162e2436227832716ee02/fastparquet-2023.2.0.tar.gz
sha256: 7611447ce3ff5696539f7e43289da2491ea41f7cb92d4dbada374012b62c51c3
about:
home: https://github.com/dask/fastparquet/
PyPI: https://pypi.org/project/fastparquet
summary: Python support for Parquet file format
license: Apache License 2.0

View File

@ -0,0 +1,23 @@
import pytest
from pytest_pyodide import run_in_pyodide
COMPRESSIONS = ("SNAPPY", "GZIP", "LZ4", "BROTLI", "ZSTD")
# just check that we can read and write random data
@pytest.mark.parametrize("compression", COMPRESSIONS)
@run_in_pyodide(packages=["fastparquet"])
def test_simple_table(selenium, compression):
from pathlib import Path
from tempfile import TemporaryDirectory
import fastparquet
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.randn(131072, 4), columns=list("ABCD"))
with TemporaryDirectory() as td:
name = Path(td) / "test.parquet"
fastparquet.write(name, df, compression=compression)
df2 = fastparquet.ParquetFile(name).to_pandas()
assert df2.equals(df)

13
packages/fsspec/meta.yaml Normal file
View File

@ -0,0 +1,13 @@
package:
name: fsspec
version: 2023.1.0
top-level:
- fsspec
source:
url: https://files.pythonhosted.org/packages/bd/64/f0d369ede0ca54fdd520bdee5086dbaf0af81dac53a2ce847bd1ec6e0bf1/fsspec-2023.1.0-py3-none-any.whl
sha256: b833e2e541e9e8cde0ab549414187871243177feb3d344f9d27b25a93f5d8139
about:
home: http://github.com/fsspec/filesystem_spec
PyPI: https://pypi.org/project/fsspec
summary: File-system specification
license: BSD

View File

@ -52,6 +52,8 @@ BUILD_VARS: set[str] = {
"CARGO_BUILD_TARGET",
"CARGO_TARGET_WASM32_UNKNOWN_EMSCRIPTEN_LINKER",
"RUSTFLAGS",
"PYO3_CROSS_LIB_DIR",
"PYO3_CROSS_INCLUDE_DIR",
"PYODIDE_EMSCRIPTEN_VERSION",
"PLATFORM_TRIPLET",
"SYSCONFIGDATA_DIR",

View File

@ -171,7 +171,7 @@ def make_command_wrapper_symlinks(symlink_dir: Path) -> dict[str, str]:
var = "CXX"
else:
var = symlink.upper()
env[var] = symlink
env[var] = str(symlink_path)
return env

View File

@ -1,7 +1,7 @@
implementation=CPython
version=3.10
version=3.11
shared=true
abi3=false
lib_name=python3.10
lib_name=python3.11
pointer_width=32
suppress_build_script_link_lines=false