mirror of https://github.com/explosion/spaCy.git
Merge pull request #12979 from adrianeboyd/feature/cython-profile-312
Redesigned cython profiling and other minor updates for python 3.12
This commit is contained in:
commit
4ec41e98f6
|
@ -58,7 +58,7 @@ jobs:
|
|||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||
python_version: ["3.11"]
|
||||
python_version: ["3.11", "3.12.0-rc.2"]
|
||||
include:
|
||||
- os: windows-latest
|
||||
python_version: "3.7"
|
||||
|
@ -93,6 +93,7 @@ jobs:
|
|||
- name: Run mypy
|
||||
run: |
|
||||
python -m mypy spacy
|
||||
if: matrix.python_version != '3.7'
|
||||
|
||||
- name: Delete source directory and .egg-info
|
||||
run: |
|
||||
|
|
|
@ -33,7 +33,7 @@ pytest-timeout>=1.3.0,<2.0.0
|
|||
mock>=2.0.0,<3.0.0
|
||||
flake8>=3.8.0,<6.0.0
|
||||
hypothesis>=3.27.0,<7.0.0
|
||||
mypy>=0.990,<1.1.0; platform_machine != "aarch64"
|
||||
mypy>=1.5.0,<1.6.0; platform_machine != "aarch64" and python_version >= "3.8"
|
||||
types-mock>=0.1.1
|
||||
types-setuptools>=57.0.0
|
||||
types-requests
|
||||
|
|
1
setup.py
1
setup.py
|
@ -78,6 +78,7 @@ COMPILER_DIRECTIVES = {
|
|||
"language_level": -3,
|
||||
"embedsignature": True,
|
||||
"annotation_typing": False,
|
||||
"profile": sys.version_info < (3, 12),
|
||||
}
|
||||
# Files to copy into the package that are otherwise not included
|
||||
COPY_FILES = {
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# cython: profile=False
|
||||
from .errors import Errors
|
||||
|
||||
IOB_STRINGS = ("", "I", "O", "B")
|
||||
|
|
|
@ -133,7 +133,9 @@ def apply(
|
|||
if len(text_files) > 0:
|
||||
streams.append(_stream_texts(text_files))
|
||||
datagen = cast(DocOrStrStream, chain(*streams))
|
||||
for doc in tqdm.tqdm(nlp.pipe(datagen, batch_size=batch_size, n_process=n_process)):
|
||||
for doc in tqdm.tqdm(
|
||||
nlp.pipe(datagen, batch_size=batch_size, n_process=n_process), disable=None
|
||||
):
|
||||
docbin.add(doc)
|
||||
if output_file.suffix == "":
|
||||
output_file = output_file.with_suffix(".spacy")
|
||||
|
|
|
@ -89,7 +89,7 @@ class Quartiles:
|
|||
def annotate(
|
||||
nlp: Language, docs: List[Doc], batch_size: Optional[int]
|
||||
) -> numpy.ndarray:
|
||||
docs = nlp.pipe(tqdm(docs, unit="doc"), batch_size=batch_size)
|
||||
docs = nlp.pipe(tqdm(docs, unit="doc", disable=None), batch_size=batch_size)
|
||||
wps = []
|
||||
while True:
|
||||
with time_context() as elapsed:
|
||||
|
|
|
@ -71,7 +71,7 @@ def profile(model: str, inputs: Optional[Path] = None, n_texts: int = 10000) ->
|
|||
|
||||
|
||||
def parse_texts(nlp: Language, texts: Sequence[str]) -> None:
|
||||
for doc in nlp.pipe(tqdm.tqdm(texts), batch_size=16):
|
||||
for doc in nlp.pipe(tqdm.tqdm(texts, disable=None), batch_size=16):
|
||||
pass
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True
|
||||
# cython: infer_types=True
|
||||
|
||||
from typing import Iterable
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True
|
||||
# cython: infer_types=True
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Tuple, Union
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True
|
||||
# cython: infer_types=True
|
||||
from typing import Any, Callable, Dict, Iterable
|
||||
|
||||
import srsly
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# cython: embedsignature=True
|
||||
# cython: profile=False
|
||||
# Compiler crashes on memory view coercion without this. Should report bug.
|
||||
cimport numpy as np
|
||||
from libc.string cimport memset
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True
|
||||
# cython: infer_types=True
|
||||
import warnings
|
||||
from collections import defaultdict
|
||||
from itertools import product
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: profile=True, binding=True, infer_types=True
|
||||
# cython: binding=True, infer_types=True
|
||||
from cpython.object cimport PyObject
|
||||
from libc.stdint cimport int64_t
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: binding=True, infer_types=True, profile=True
|
||||
# cython: binding=True, infer_types=True
|
||||
from typing import Iterable, List
|
||||
|
||||
from cymem.cymem cimport Pool
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True
|
||||
# cython: infer_types=True
|
||||
from preshed.maps cimport map_clear, map_get, map_init, map_iter, map_set
|
||||
|
||||
import warnings
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# cython: infer_types=True, cdivision=True, boundscheck=False
|
||||
# cython: profile=False
|
||||
cimport numpy as np
|
||||
from libc.math cimport exp
|
||||
from libc.stdlib cimport calloc, free, realloc
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# cython: infer_types
|
||||
# cython: profile=False
|
||||
import warnings
|
||||
|
||||
import numpy
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
|
||||
# cython: profile=False
|
||||
IDS = {
|
||||
"": NO_TAG,
|
||||
"ADJ": ADJ,
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# cython: infer_types=True, binding=True
|
||||
# cython: profile=False
|
||||
from cython.operator cimport dereference as deref
|
||||
from libc.stdint cimport UINT32_MAX, uint32_t
|
||||
from libc.string cimport memset
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
# cython: infer_types=True
|
||||
# cython: profile=True
|
||||
import numpy
|
||||
|
||||
from thinc.extra.search cimport Beam
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
# cython: profile=False
|
|
@ -1,4 +1,4 @@
|
|||
# cython: profile=True, cdivision=True, infer_types=True
|
||||
# cython: cdivision=True, infer_types=True
|
||||
from cymem.cymem cimport Address, Pool
|
||||
from libc.stdint cimport int32_t
|
||||
from libcpp.vector cimport vector
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# cython: profile=False
|
||||
from cymem.cymem cimport Pool
|
||||
from libc.stdint cimport int32_t
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: profile=True, infer_types=True
|
||||
# cython: infer_types=True
|
||||
"""Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005
|
||||
for doing pseudo-projective parsing implementation uses the HEAD decoration
|
||||
scheme.
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# cython: infer_types=True
|
||||
# cython: profile=False
|
||||
from libcpp.vector cimport vector
|
||||
|
||||
from ...tokens.doc cimport Doc
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# cython: infer_types=True
|
||||
# cython: profile=False
|
||||
from __future__ import print_function
|
||||
|
||||
from cymem.cymem cimport Pool
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True, binding=True
|
||||
# cython: infer_types=True, binding=True
|
||||
from collections import defaultdict
|
||||
from typing import Callable, Optional
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True, binding=True
|
||||
# cython: infer_types=True, binding=True
|
||||
from itertools import islice
|
||||
from typing import Callable, Dict, Optional, Union
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True, binding=True
|
||||
# cython: infer_types=True, binding=True
|
||||
from typing import Optional
|
||||
|
||||
import numpy
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True, binding=True
|
||||
# cython: infer_types=True, binding=True
|
||||
from collections import defaultdict
|
||||
from typing import Callable, Optional
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True, binding=True
|
||||
# cython: infer_types=True, binding=True
|
||||
import warnings
|
||||
from typing import Callable, Dict, Iterable, Iterator, Tuple, Union
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True, binding=True
|
||||
# cython: infer_types=True, binding=True
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
import srsly
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True, binding=True
|
||||
# cython: infer_types=True, binding=True
|
||||
from itertools import islice
|
||||
from typing import Callable, Optional
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True, binding=True
|
||||
# cython: infer_types=True, binding=True
|
||||
from itertools import islice
|
||||
from typing import Callable, Optional
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True, binding=True
|
||||
# cython: infer_types=True, binding=True
|
||||
from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple
|
||||
|
||||
import srsly
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
|
||||
# cython: profile=False
|
||||
from __future__ import print_function
|
||||
|
||||
cimport numpy as np
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# cython: infer_types=True
|
||||
# cython: profile=False
|
||||
cimport cython
|
||||
from libc.stdint cimport uint32_t
|
||||
from libc.string cimport memcpy
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# cython: optimize.unpack_method_calls=False
|
||||
# cython: profile=False
|
||||
IDS = {
|
||||
"": NIL,
|
||||
"IS_ALPHA": IS_ALPHA,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: embedsignature=True, profile=True, binding=True
|
||||
# cython: embedsignature=True, binding=True
|
||||
cimport cython
|
||||
from cymem.cymem cimport Pool
|
||||
from cython.operator cimport dereference as deref
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, bounds_check=False, profile=True
|
||||
# cython: infer_types=True, bounds_check=False
|
||||
from cymem.cymem cimport Pool
|
||||
from libc.string cimport memset
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, bounds_check=False, profile=True
|
||||
# cython: infer_types=True, bounds_check=False
|
||||
from typing import Set
|
||||
|
||||
cimport cython
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
|
||||
# cython: profile=False
|
||||
from typing import Generator, List, Tuple
|
||||
|
||||
cimport cython
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# cython: profile=False
|
||||
cimport numpy as np
|
||||
from libc.string cimport memset
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# cython: profile=False
|
||||
cimport numpy as np
|
||||
|
||||
import copy
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# cython: profile=False
|
||||
import struct
|
||||
import weakref
|
||||
from copy import deepcopy
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# cython: infer_types=True
|
||||
# cython: profile=False
|
||||
# Compiler crashes on memory view coercion without this. Should report bug.
|
||||
cimport numpy as np
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# cython: profile=False
|
||||
import re
|
||||
from itertools import chain
|
||||
from typing import List, Tuple
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# cython: profile=False
|
||||
from typing import List
|
||||
|
||||
import numpy
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# cython: profile=False
|
||||
from collections.abc import Iterable as IterableInstance
|
||||
|
||||
import numpy
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# cython: profile=False
|
||||
import warnings
|
||||
|
||||
import srsly
|
||||
|
|
|
@ -302,7 +302,7 @@ def read_vectors(
|
|||
shape = (truncate_vectors, shape[1])
|
||||
vectors_data = numpy.zeros(shape=shape, dtype="f")
|
||||
vectors_keys = []
|
||||
for i, line in enumerate(tqdm.tqdm(f)):
|
||||
for i, line in enumerate(tqdm.tqdm(f, disable=None)):
|
||||
line = line.rstrip()
|
||||
pieces = line.rsplit(" ", vectors_data.shape[1])
|
||||
word = pieces.pop(0)
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
# cython: profile=False
|
|
@ -1068,7 +1068,10 @@ def make_tempdir() -> Generator[Path, None, None]:
|
|||
rmfunc(path)
|
||||
|
||||
try:
|
||||
shutil.rmtree(str(d), onerror=force_remove)
|
||||
if sys.version_info >= (3, 12):
|
||||
shutil.rmtree(str(d), onexc=force_remove)
|
||||
else:
|
||||
shutil.rmtree(str(d), onerror=force_remove)
|
||||
except PermissionError as e:
|
||||
warnings.warn(Warnings.W091.format(dir=d, msg=e))
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# cython: infer_types=True, profile=True, binding=True
|
||||
# cython: infer_types=True, binding=True
|
||||
from typing import Callable
|
||||
|
||||
from cython.operator cimport dereference as deref
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
# cython: profile=True
|
||||
import functools
|
||||
|
||||
import numpy
|
||||
|
|
Loading…
Reference in New Issue