diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f68280be2..1058b4673 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -58,7 +58,7 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python_version: ["3.11"] + python_version: ["3.11", "3.12.0-rc.2"] include: - os: windows-latest python_version: "3.7" @@ -93,6 +93,7 @@ jobs: - name: Run mypy run: | python -m mypy spacy + if: matrix.python_version != '3.7' - name: Delete source directory and .egg-info run: | diff --git a/requirements.txt b/requirements.txt index 48d188ec9..a8ba956a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,7 +33,7 @@ pytest-timeout>=1.3.0,<2.0.0 mock>=2.0.0,<3.0.0 flake8>=3.8.0,<6.0.0 hypothesis>=3.27.0,<7.0.0 -mypy>=0.990,<1.1.0; platform_machine != "aarch64" +mypy>=1.5.0,<1.6.0; platform_machine != "aarch64" and python_version >= "3.8" types-mock>=0.1.1 types-setuptools>=57.0.0 types-requests diff --git a/setup.py b/setup.py index 3b6fae37b..33178662d 100755 --- a/setup.py +++ b/setup.py @@ -78,6 +78,7 @@ COMPILER_DIRECTIVES = { "language_level": -3, "embedsignature": True, "annotation_typing": False, + "profile": sys.version_info < (3, 12), } # Files to copy into the package that are otherwise not included COPY_FILES = { diff --git a/spacy/attrs.pyx b/spacy/attrs.pyx index 97b5d5e36..363dd094d 100644 --- a/spacy/attrs.pyx +++ b/spacy/attrs.pyx @@ -1,3 +1,4 @@ +# cython: profile=False from .errors import Errors IOB_STRINGS = ("", "I", "O", "B") diff --git a/spacy/cli/apply.py b/spacy/cli/apply.py index 8c4b4c8bf..ffd810506 100644 --- a/spacy/cli/apply.py +++ b/spacy/cli/apply.py @@ -133,7 +133,9 @@ def apply( if len(text_files) > 0: streams.append(_stream_texts(text_files)) datagen = cast(DocOrStrStream, chain(*streams)) - for doc in tqdm.tqdm(nlp.pipe(datagen, batch_size=batch_size, n_process=n_process)): + for doc in tqdm.tqdm( + nlp.pipe(datagen, batch_size=batch_size, n_process=n_process), disable=None + ): docbin.add(doc) if output_file.suffix == "": output_file = output_file.with_suffix(".spacy") diff --git a/spacy/cli/benchmark_speed.py b/spacy/cli/benchmark_speed.py index a683d1591..c7fd771c3 100644 --- a/spacy/cli/benchmark_speed.py +++ b/spacy/cli/benchmark_speed.py @@ -89,7 +89,7 @@ class Quartiles: def annotate( nlp: Language, docs: List[Doc], batch_size: Optional[int] ) -> numpy.ndarray: - docs = nlp.pipe(tqdm(docs, unit="doc"), batch_size=batch_size) + docs = nlp.pipe(tqdm(docs, unit="doc", disable=None), batch_size=batch_size) wps = [] while True: with time_context() as elapsed: diff --git a/spacy/cli/profile.py b/spacy/cli/profile.py index e1f720327..e5b8f1193 100644 --- a/spacy/cli/profile.py +++ b/spacy/cli/profile.py @@ -71,7 +71,7 @@ def profile(model: str, inputs: Optional[Path] = None, n_texts: int = 10000) -> def parse_texts(nlp: Language, texts: Sequence[str]) -> None: - for doc in nlp.pipe(tqdm.tqdm(texts), batch_size=16): + for doc in nlp.pipe(tqdm.tqdm(texts, disable=None), batch_size=16): pass diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx index 53fc9b036..4369676e2 100644 --- a/spacy/kb/candidate.pyx +++ b/spacy/kb/candidate.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True +# cython: infer_types=True from typing import Iterable diff --git a/spacy/kb/kb.pyx b/spacy/kb/kb.pyx index 6ad4c3564..c7db34e16 100644 --- a/spacy/kb/kb.pyx +++ b/spacy/kb/kb.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True +# cython: infer_types=True from pathlib import Path from typing import Iterable, Tuple, Union diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx index 02773cbae..2b21f246a 100644 --- a/spacy/kb/kb_in_memory.pyx +++ b/spacy/kb/kb_in_memory.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True +# cython: infer_types=True from typing import Any, Callable, Dict, Iterable import srsly diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 60d22e615..f803d5e93 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -1,4 +1,5 @@ # cython: embedsignature=True +# cython: profile=False # Compiler crashes on memory view coercion without this. Should report bug. cimport numpy as np from libc.string cimport memset diff --git a/spacy/matcher/dependencymatcher.pyx b/spacy/matcher/dependencymatcher.pyx index 1f66d99b2..ab5f5d5d1 100644 --- a/spacy/matcher/dependencymatcher.pyx +++ b/spacy/matcher/dependencymatcher.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True +# cython: infer_types=True import warnings from collections import defaultdict from itertools import product diff --git a/spacy/matcher/levenshtein.pyx b/spacy/matcher/levenshtein.pyx index e823ce99d..e394f2cf4 100644 --- a/spacy/matcher/levenshtein.pyx +++ b/spacy/matcher/levenshtein.pyx @@ -1,4 +1,4 @@ -# cython: profile=True, binding=True, infer_types=True +# cython: binding=True, infer_types=True from cpython.object cimport PyObject from libc.stdint cimport int64_t diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index 167f85af4..9a9ed4212 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -1,4 +1,4 @@ -# cython: binding=True, infer_types=True, profile=True +# cython: binding=True, infer_types=True from typing import Iterable, List from cymem.cymem cimport Pool diff --git a/spacy/matcher/phrasematcher.pyx b/spacy/matcher/phrasematcher.pyx index 26633e6d6..4efcdb05c 100644 --- a/spacy/matcher/phrasematcher.pyx +++ b/spacy/matcher/phrasematcher.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True +# cython: infer_types=True from preshed.maps cimport map_clear, map_get, map_init, map_iter, map_set import warnings diff --git a/spacy/ml/parser_model.pyx b/spacy/ml/parser_model.pyx index ae60972aa..f004c562e 100644 --- a/spacy/ml/parser_model.pyx +++ b/spacy/ml/parser_model.pyx @@ -1,4 +1,5 @@ # cython: infer_types=True, cdivision=True, boundscheck=False +# cython: profile=False cimport numpy as np from libc.math cimport exp from libc.stdlib cimport calloc, free, realloc diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index ecbbed729..cef45b04d 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -1,4 +1,5 @@ # cython: infer_types +# cython: profile=False import warnings import numpy diff --git a/spacy/parts_of_speech.pyx b/spacy/parts_of_speech.pyx index e71fb917f..98e3570ec 100644 --- a/spacy/parts_of_speech.pyx +++ b/spacy/parts_of_speech.pyx @@ -1,4 +1,4 @@ - +# cython: profile=False IDS = { "": NO_TAG, "ADJ": ADJ, diff --git a/spacy/pipeline/_edit_tree_internals/edit_trees.pyx b/spacy/pipeline/_edit_tree_internals/edit_trees.pyx index 78cd25622..7abd9f2a6 100644 --- a/spacy/pipeline/_edit_tree_internals/edit_trees.pyx +++ b/spacy/pipeline/_edit_tree_internals/edit_trees.pyx @@ -1,4 +1,5 @@ # cython: infer_types=True, binding=True +# cython: profile=False from cython.operator cimport dereference as deref from libc.stdint cimport UINT32_MAX, uint32_t from libc.string cimport memset diff --git a/spacy/pipeline/_parser_internals/_beam_utils.pyx b/spacy/pipeline/_parser_internals/_beam_utils.pyx index de8f0bf7b..ac04be5a7 100644 --- a/spacy/pipeline/_parser_internals/_beam_utils.pyx +++ b/spacy/pipeline/_parser_internals/_beam_utils.pyx @@ -1,5 +1,4 @@ # cython: infer_types=True -# cython: profile=True import numpy from thinc.extra.search cimport Beam diff --git a/spacy/pipeline/_parser_internals/_state.pyx b/spacy/pipeline/_parser_internals/_state.pyx index e69de29bb..61bf62038 100644 --- a/spacy/pipeline/_parser_internals/_state.pyx +++ b/spacy/pipeline/_parser_internals/_state.pyx @@ -0,0 +1 @@ +# cython: profile=False diff --git a/spacy/pipeline/_parser_internals/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx index bcb4626fb..e13754944 100644 --- a/spacy/pipeline/_parser_internals/arc_eager.pyx +++ b/spacy/pipeline/_parser_internals/arc_eager.pyx @@ -1,4 +1,4 @@ -# cython: profile=True, cdivision=True, infer_types=True +# cython: cdivision=True, infer_types=True from cymem.cymem cimport Address, Pool from libc.stdint cimport int32_t from libcpp.vector cimport vector diff --git a/spacy/pipeline/_parser_internals/ner.pyx b/spacy/pipeline/_parser_internals/ner.pyx index 6c4f8e245..e4312bd2f 100644 --- a/spacy/pipeline/_parser_internals/ner.pyx +++ b/spacy/pipeline/_parser_internals/ner.pyx @@ -1,3 +1,4 @@ +# cython: profile=False from cymem.cymem cimport Pool from libc.stdint cimport int32_t diff --git a/spacy/pipeline/_parser_internals/nonproj.pyx b/spacy/pipeline/_parser_internals/nonproj.pyx index 93ad14feb..7de19851e 100644 --- a/spacy/pipeline/_parser_internals/nonproj.pyx +++ b/spacy/pipeline/_parser_internals/nonproj.pyx @@ -1,4 +1,4 @@ -# cython: profile=True, infer_types=True +# cython: infer_types=True """Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005 for doing pseudo-projective parsing implementation uses the HEAD decoration scheme. diff --git a/spacy/pipeline/_parser_internals/stateclass.pyx b/spacy/pipeline/_parser_internals/stateclass.pyx index fdb5004bb..e3b063b7d 100644 --- a/spacy/pipeline/_parser_internals/stateclass.pyx +++ b/spacy/pipeline/_parser_internals/stateclass.pyx @@ -1,4 +1,5 @@ # cython: infer_types=True +# cython: profile=False from libcpp.vector cimport vector from ...tokens.doc cimport Doc diff --git a/spacy/pipeline/_parser_internals/transition_system.pyx b/spacy/pipeline/_parser_internals/transition_system.pyx index aabbdfa24..e035053b3 100644 --- a/spacy/pipeline/_parser_internals/transition_system.pyx +++ b/spacy/pipeline/_parser_internals/transition_system.pyx @@ -1,4 +1,5 @@ # cython: infer_types=True +# cython: profile=False from __future__ import print_function from cymem.cymem cimport Pool diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx index 57f091788..18a220bd6 100644 --- a/spacy/pipeline/dep_parser.pyx +++ b/spacy/pipeline/dep_parser.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True, binding=True +# cython: infer_types=True, binding=True from collections import defaultdict from typing import Callable, Optional diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index 7ca3908bd..d415ae43c 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True, binding=True +# cython: infer_types=True, binding=True from itertools import islice from typing import Callable, Dict, Optional, Union diff --git a/spacy/pipeline/multitask.pyx b/spacy/pipeline/multitask.pyx index 2a62a50d5..f33a90fde 100644 --- a/spacy/pipeline/multitask.pyx +++ b/spacy/pipeline/multitask.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True, binding=True +# cython: infer_types=True, binding=True from typing import Optional import numpy diff --git a/spacy/pipeline/ner.pyx b/spacy/pipeline/ner.pyx index 15c092ae9..bb009dc7a 100644 --- a/spacy/pipeline/ner.pyx +++ b/spacy/pipeline/ner.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True, binding=True +# cython: infer_types=True, binding=True from collections import defaultdict from typing import Callable, Optional diff --git a/spacy/pipeline/pipe.pyx b/spacy/pipeline/pipe.pyx index 90775c465..72ea7e45a 100644 --- a/spacy/pipeline/pipe.pyx +++ b/spacy/pipeline/pipe.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True, binding=True +# cython: infer_types=True, binding=True import warnings from typing import Callable, Dict, Iterable, Iterator, Tuple, Union diff --git a/spacy/pipeline/sentencizer.pyx b/spacy/pipeline/sentencizer.pyx index 76f296644..08ba9d989 100644 --- a/spacy/pipeline/sentencizer.pyx +++ b/spacy/pipeline/sentencizer.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True, binding=True +# cython: infer_types=True, binding=True from typing import Callable, List, Optional import srsly diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx index 37ddcc3c0..df093baa9 100644 --- a/spacy/pipeline/senter.pyx +++ b/spacy/pipeline/senter.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True, binding=True +# cython: infer_types=True, binding=True from itertools import islice from typing import Callable, Optional diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index 4c5265a78..34e85d49c 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True, binding=True +# cython: infer_types=True, binding=True from itertools import islice from typing import Callable, Optional diff --git a/spacy/pipeline/trainable_pipe.pyx b/spacy/pipeline/trainable_pipe.pyx index e5865e070..8f219b327 100644 --- a/spacy/pipeline/trainable_pipe.pyx +++ b/spacy/pipeline/trainable_pipe.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True, binding=True +# cython: infer_types=True, binding=True from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple import srsly diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx index 11c8fafc7..9a278fc13 100644 --- a/spacy/pipeline/transition_parser.pyx +++ b/spacy/pipeline/transition_parser.pyx @@ -1,4 +1,5 @@ # cython: infer_types=True, cdivision=True, boundscheck=False, binding=True +# cython: profile=False from __future__ import print_function cimport numpy as np diff --git a/spacy/strings.pyx b/spacy/strings.pyx index b0799d6fc..376a13175 100644 --- a/spacy/strings.pyx +++ b/spacy/strings.pyx @@ -1,4 +1,5 @@ # cython: infer_types=True +# cython: profile=False cimport cython from libc.stdint cimport uint32_t from libc.string cimport memcpy diff --git a/spacy/symbols.pyx b/spacy/symbols.pyx index d1deeb0e7..f7713577b 100644 --- a/spacy/symbols.pyx +++ b/spacy/symbols.pyx @@ -1,4 +1,5 @@ # cython: optimize.unpack_method_calls=False +# cython: profile=False IDS = { "": NIL, "IS_ALPHA": IS_ALPHA, diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index 8fc95bea0..a239eaf45 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -1,4 +1,4 @@ -# cython: embedsignature=True, profile=True, binding=True +# cython: embedsignature=True, binding=True cimport cython from cymem.cymem cimport Pool from cython.operator cimport dereference as deref diff --git a/spacy/tokens/_retokenize.pyx b/spacy/tokens/_retokenize.pyx index f28d2e088..b0e4ff85c 100644 --- a/spacy/tokens/_retokenize.pyx +++ b/spacy/tokens/_retokenize.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, bounds_check=False, profile=True +# cython: infer_types=True, bounds_check=False from cymem.cymem cimport Pool from libc.string cimport memset diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 8fc2c4b3c..745eb5ff3 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, bounds_check=False, profile=True +# cython: infer_types=True, bounds_check=False from typing import Set cimport cython diff --git a/spacy/tokens/graph.pyx b/spacy/tokens/graph.pyx index 1cbec09f4..6c4ce6ce3 100644 --- a/spacy/tokens/graph.pyx +++ b/spacy/tokens/graph.pyx @@ -1,4 +1,5 @@ # cython: infer_types=True, cdivision=True, boundscheck=False, binding=True +# cython: profile=False from typing import Generator, List, Tuple cimport cython diff --git a/spacy/tokens/morphanalysis.pyx b/spacy/tokens/morphanalysis.pyx index ba7c638f6..ea5d07fa4 100644 --- a/spacy/tokens/morphanalysis.pyx +++ b/spacy/tokens/morphanalysis.pyx @@ -1,3 +1,4 @@ +# cython: profile=False cimport numpy as np from libc.string cimport memset diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index cf90e416b..af3ba8db5 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -1,3 +1,4 @@ +# cython: profile=False cimport numpy as np import copy diff --git a/spacy/tokens/span_group.pyx b/spacy/tokens/span_group.pyx index d245a1425..257c907bc 100644 --- a/spacy/tokens/span_group.pyx +++ b/spacy/tokens/span_group.pyx @@ -1,3 +1,4 @@ +# cython: profile=False import struct import weakref from copy import deepcopy diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index de967ba25..9fd4118d6 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -1,4 +1,5 @@ # cython: infer_types=True +# cython: profile=False # Compiler crashes on memory view coercion without this. Should report bug. cimport numpy as np diff --git a/spacy/training/align.pyx b/spacy/training/align.pyx index 79fec73c4..c68110e30 100644 --- a/spacy/training/align.pyx +++ b/spacy/training/align.pyx @@ -1,3 +1,4 @@ +# cython: profile=False import re from itertools import chain from typing import List, Tuple diff --git a/spacy/training/alignment_array.pyx b/spacy/training/alignment_array.pyx index b0be1512b..f0eb5cf39 100644 --- a/spacy/training/alignment_array.pyx +++ b/spacy/training/alignment_array.pyx @@ -1,3 +1,4 @@ +# cython: profile=False from typing import List import numpy diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx index 3f0cf5ade..abdcecf71 100644 --- a/spacy/training/example.pyx +++ b/spacy/training/example.pyx @@ -1,3 +1,4 @@ +# cython: profile=False from collections.abc import Iterable as IterableInstance import numpy diff --git a/spacy/training/gold_io.pyx b/spacy/training/gold_io.pyx index 2fc36e41f..afbdf4631 100644 --- a/spacy/training/gold_io.pyx +++ b/spacy/training/gold_io.pyx @@ -1,3 +1,4 @@ +# cython: profile=False import warnings import srsly diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py index 82d4ebf24..062170221 100644 --- a/spacy/training/initialize.py +++ b/spacy/training/initialize.py @@ -302,7 +302,7 @@ def read_vectors( shape = (truncate_vectors, shape[1]) vectors_data = numpy.zeros(shape=shape, dtype="f") vectors_keys = [] - for i, line in enumerate(tqdm.tqdm(f)): + for i, line in enumerate(tqdm.tqdm(f, disable=None)): line = line.rstrip() pieces = line.rsplit(" ", vectors_data.shape[1]) word = pieces.pop(0) diff --git a/spacy/typedefs.pyx b/spacy/typedefs.pyx index e69de29bb..61bf62038 100644 --- a/spacy/typedefs.pyx +++ b/spacy/typedefs.pyx @@ -0,0 +1 @@ +# cython: profile=False diff --git a/spacy/util.py b/spacy/util.py index c5c57d67d..8464e411f 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -1068,7 +1068,10 @@ def make_tempdir() -> Generator[Path, None, None]: rmfunc(path) try: - shutil.rmtree(str(d), onerror=force_remove) + if sys.version_info >= (3, 12): + shutil.rmtree(str(d), onexc=force_remove) + else: + shutil.rmtree(str(d), onerror=force_remove) except PermissionError as e: warnings.warn(Warnings.W091.format(dir=d, msg=e)) diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index 2817bcad4..6ff99bb59 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -1,4 +1,4 @@ -# cython: infer_types=True, profile=True, binding=True +# cython: infer_types=True, binding=True from typing import Callable from cython.operator cimport dereference as deref diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 48e8fcb90..4004a70e0 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -1,4 +1,3 @@ -# cython: profile=True import functools import numpy