Merge pull request #12979 from adrianeboyd/feature/cython-profile-312

Redesigned cython profiling and other minor updates for python 3.12
This commit is contained in:
Adriane Boyd 2023-09-29 08:23:38 +02:00 committed by GitHub
commit 4ec41e98f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
55 changed files with 59 additions and 32 deletions

View File

@ -58,7 +58,7 @@ jobs:
fail-fast: true
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python_version: ["3.11"]
python_version: ["3.11", "3.12.0-rc.2"]
include:
- os: windows-latest
python_version: "3.7"
@ -93,6 +93,7 @@ jobs:
- name: Run mypy
run: |
python -m mypy spacy
if: matrix.python_version != '3.7'
- name: Delete source directory and .egg-info
run: |

View File

@ -33,7 +33,7 @@ pytest-timeout>=1.3.0,<2.0.0
mock>=2.0.0,<3.0.0
flake8>=3.8.0,<6.0.0
hypothesis>=3.27.0,<7.0.0
mypy>=0.990,<1.1.0; platform_machine != "aarch64"
mypy>=1.5.0,<1.6.0; platform_machine != "aarch64" and python_version >= "3.8"
types-mock>=0.1.1
types-setuptools>=57.0.0
types-requests

View File

@ -78,6 +78,7 @@ COMPILER_DIRECTIVES = {
"language_level": -3,
"embedsignature": True,
"annotation_typing": False,
"profile": sys.version_info < (3, 12),
}
# Files to copy into the package that are otherwise not included
COPY_FILES = {

View File

@ -1,3 +1,4 @@
# cython: profile=False
from .errors import Errors
IOB_STRINGS = ("", "I", "O", "B")

View File

@ -133,7 +133,9 @@ def apply(
if len(text_files) > 0:
streams.append(_stream_texts(text_files))
datagen = cast(DocOrStrStream, chain(*streams))
for doc in tqdm.tqdm(nlp.pipe(datagen, batch_size=batch_size, n_process=n_process)):
for doc in tqdm.tqdm(
nlp.pipe(datagen, batch_size=batch_size, n_process=n_process), disable=None
):
docbin.add(doc)
if output_file.suffix == "":
output_file = output_file.with_suffix(".spacy")

View File

@ -89,7 +89,7 @@ class Quartiles:
def annotate(
nlp: Language, docs: List[Doc], batch_size: Optional[int]
) -> numpy.ndarray:
docs = nlp.pipe(tqdm(docs, unit="doc"), batch_size=batch_size)
docs = nlp.pipe(tqdm(docs, unit="doc", disable=None), batch_size=batch_size)
wps = []
while True:
with time_context() as elapsed:

View File

@ -71,7 +71,7 @@ def profile(model: str, inputs: Optional[Path] = None, n_texts: int = 10000) ->
def parse_texts(nlp: Language, texts: Sequence[str]) -> None:
for doc in nlp.pipe(tqdm.tqdm(texts), batch_size=16):
for doc in nlp.pipe(tqdm.tqdm(texts, disable=None), batch_size=16):
pass

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True
# cython: infer_types=True
from typing import Iterable

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True
# cython: infer_types=True
from pathlib import Path
from typing import Iterable, Tuple, Union

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True
# cython: infer_types=True
from typing import Any, Callable, Dict, Iterable
import srsly

View File

@ -1,4 +1,5 @@
# cython: embedsignature=True
# cython: profile=False
# Compiler crashes on memory view coercion without this. Should report bug.
cimport numpy as np
from libc.string cimport memset

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True
# cython: infer_types=True
import warnings
from collections import defaultdict
from itertools import product

View File

@ -1,4 +1,4 @@
# cython: profile=True, binding=True, infer_types=True
# cython: binding=True, infer_types=True
from cpython.object cimport PyObject
from libc.stdint cimport int64_t

View File

@ -1,4 +1,4 @@
# cython: binding=True, infer_types=True, profile=True
# cython: binding=True, infer_types=True
from typing import Iterable, List
from cymem.cymem cimport Pool

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True
# cython: infer_types=True
from preshed.maps cimport map_clear, map_get, map_init, map_iter, map_set
import warnings

View File

@ -1,4 +1,5 @@
# cython: infer_types=True, cdivision=True, boundscheck=False
# cython: profile=False
cimport numpy as np
from libc.math cimport exp
from libc.stdlib cimport calloc, free, realloc

View File

@ -1,4 +1,5 @@
# cython: infer_types
# cython: profile=False
import warnings
import numpy

View File

@ -1,4 +1,4 @@
# cython: profile=False
IDS = {
"": NO_TAG,
"ADJ": ADJ,

View File

@ -1,4 +1,5 @@
# cython: infer_types=True, binding=True
# cython: profile=False
from cython.operator cimport dereference as deref
from libc.stdint cimport UINT32_MAX, uint32_t
from libc.string cimport memset

View File

@ -1,5 +1,4 @@
# cython: infer_types=True
# cython: profile=True
import numpy
from thinc.extra.search cimport Beam

View File

@ -0,0 +1 @@
# cython: profile=False

View File

@ -1,4 +1,4 @@
# cython: profile=True, cdivision=True, infer_types=True
# cython: cdivision=True, infer_types=True
from cymem.cymem cimport Address, Pool
from libc.stdint cimport int32_t
from libcpp.vector cimport vector

View File

@ -1,3 +1,4 @@
# cython: profile=False
from cymem.cymem cimport Pool
from libc.stdint cimport int32_t

View File

@ -1,4 +1,4 @@
# cython: profile=True, infer_types=True
# cython: infer_types=True
"""Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005
for doing pseudo-projective parsing implementation uses the HEAD decoration
scheme.

View File

@ -1,4 +1,5 @@
# cython: infer_types=True
# cython: profile=False
from libcpp.vector cimport vector
from ...tokens.doc cimport Doc

View File

@ -1,4 +1,5 @@
# cython: infer_types=True
# cython: profile=False
from __future__ import print_function
from cymem.cymem cimport Pool

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True
# cython: infer_types=True, binding=True
from collections import defaultdict
from typing import Callable, Optional

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True
# cython: infer_types=True, binding=True
from itertools import islice
from typing import Callable, Dict, Optional, Union

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True
# cython: infer_types=True, binding=True
from typing import Optional
import numpy

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True
# cython: infer_types=True, binding=True
from collections import defaultdict
from typing import Callable, Optional

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True
# cython: infer_types=True, binding=True
import warnings
from typing import Callable, Dict, Iterable, Iterator, Tuple, Union

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True
# cython: infer_types=True, binding=True
from typing import Callable, List, Optional
import srsly

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True
# cython: infer_types=True, binding=True
from itertools import islice
from typing import Callable, Optional

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True
# cython: infer_types=True, binding=True
from itertools import islice
from typing import Callable, Optional

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True
# cython: infer_types=True, binding=True
from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple
import srsly

View File

@ -1,4 +1,5 @@
# cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
# cython: profile=False
from __future__ import print_function
cimport numpy as np

View File

@ -1,4 +1,5 @@
# cython: infer_types=True
# cython: profile=False
cimport cython
from libc.stdint cimport uint32_t
from libc.string cimport memcpy

View File

@ -1,4 +1,5 @@
# cython: optimize.unpack_method_calls=False
# cython: profile=False
IDS = {
"": NIL,
"IS_ALPHA": IS_ALPHA,

View File

@ -1,4 +1,4 @@
# cython: embedsignature=True, profile=True, binding=True
# cython: embedsignature=True, binding=True
cimport cython
from cymem.cymem cimport Pool
from cython.operator cimport dereference as deref

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, bounds_check=False, profile=True
# cython: infer_types=True, bounds_check=False
from cymem.cymem cimport Pool
from libc.string cimport memset

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, bounds_check=False, profile=True
# cython: infer_types=True, bounds_check=False
from typing import Set
cimport cython

View File

@ -1,4 +1,5 @@
# cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
# cython: profile=False
from typing import Generator, List, Tuple
cimport cython

View File

@ -1,3 +1,4 @@
# cython: profile=False
cimport numpy as np
from libc.string cimport memset

View File

@ -1,3 +1,4 @@
# cython: profile=False
cimport numpy as np
import copy

View File

@ -1,3 +1,4 @@
# cython: profile=False
import struct
import weakref
from copy import deepcopy

View File

@ -1,4 +1,5 @@
# cython: infer_types=True
# cython: profile=False
# Compiler crashes on memory view coercion without this. Should report bug.
cimport numpy as np

View File

@ -1,3 +1,4 @@
# cython: profile=False
import re
from itertools import chain
from typing import List, Tuple

View File

@ -1,3 +1,4 @@
# cython: profile=False
from typing import List
import numpy

View File

@ -1,3 +1,4 @@
# cython: profile=False
from collections.abc import Iterable as IterableInstance
import numpy

View File

@ -1,3 +1,4 @@
# cython: profile=False
import warnings
import srsly

View File

@ -302,7 +302,7 @@ def read_vectors(
shape = (truncate_vectors, shape[1])
vectors_data = numpy.zeros(shape=shape, dtype="f")
vectors_keys = []
for i, line in enumerate(tqdm.tqdm(f)):
for i, line in enumerate(tqdm.tqdm(f, disable=None)):
line = line.rstrip()
pieces = line.rsplit(" ", vectors_data.shape[1])
word = pieces.pop(0)

View File

@ -0,0 +1 @@
# cython: profile=False

View File

@ -1068,7 +1068,10 @@ def make_tempdir() -> Generator[Path, None, None]:
rmfunc(path)
try:
shutil.rmtree(str(d), onerror=force_remove)
if sys.version_info >= (3, 12):
shutil.rmtree(str(d), onexc=force_remove)
else:
shutil.rmtree(str(d), onerror=force_remove)
except PermissionError as e:
warnings.warn(Warnings.W091.format(dir=d, msg=e))

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True
# cython: infer_types=True, binding=True
from typing import Callable
from cython.operator cimport dereference as deref

View File

@ -1,4 +1,3 @@
# cython: profile=True
import functools
import numpy