From 439f30faadea4b63efe01f9e79ecf048a08eeadd Mon Sep 17 00:00:00 2001 From: Eduard Zorita Date: Sat, 7 Aug 2021 12:30:03 +0200 Subject: [PATCH] Add stub files for main cython classes (#8427) * Add stub files for main API classes * Add contributor agreement for ezorita * Update types for ndarray and hash() * Fix __getitem__ and __iter__ * Add attributes of Doc and Token classes * Overload type hints for Span.__getitem__ * Fix type hint overload for Span.__getitem__ Co-authored-by: Luca Dorigo --- .github/contributors/ezorita.md | 106 ++++++++++++++++ spacy/lexeme.pyi | 61 ++++++++++ spacy/matcher/matcher.pyi | 41 +++++++ spacy/strings.pyi | 22 ++++ spacy/tokens/_retokenize.pyi | 17 +++ spacy/tokens/doc.pyi | 180 +++++++++++++++++++++++++++ spacy/tokens/morphanalysis.pyi | 20 +++ spacy/tokens/span.pyi | 124 +++++++++++++++++++ spacy/tokens/span_group.pyi | 24 ++++ spacy/tokens/token.pyi | 208 ++++++++++++++++++++++++++++++++ spacy/vocab.pyi | 78 ++++++++++++ 11 files changed, 881 insertions(+) create mode 100644 .github/contributors/ezorita.md create mode 100644 spacy/lexeme.pyi create mode 100644 spacy/matcher/matcher.pyi create mode 100644 spacy/strings.pyi create mode 100644 spacy/tokens/_retokenize.pyi create mode 100644 spacy/tokens/doc.pyi create mode 100644 spacy/tokens/morphanalysis.pyi create mode 100644 spacy/tokens/span.pyi create mode 100644 spacy/tokens/span_group.pyi create mode 100644 spacy/tokens/token.pyi create mode 100644 spacy/vocab.pyi diff --git a/.github/contributors/ezorita.md b/.github/contributors/ezorita.md new file mode 100644 index 000000000..e5f3f5283 --- /dev/null +++ b/.github/contributors/ezorita.md @@ -0,0 +1,106 @@ +# spaCy contributor agreement + +This spaCy Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +[ExplosionAI GmbH](https://explosion.ai/legal). The term +**"you"** shall mean the person or entity identified below. + +If you agree to be bound by these terms, fill in the information requested +below and include the filled-in version with your first pull request, under the +folder [`.github/contributors/`](/.github/contributors/). The name of the file +should be your GitHub username, with the extension `.md`. For example, the user +example_user would create the file `.github/contributors/example_user.md`. + +Read this agreement carefully before signing. These terms and conditions +constitute a binding legal agreement. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. We may publicly disclose your + participation in the project, including the fact that you have signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [x] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect to my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | -------------------- | +| Name | Eduard Zorita | +| Company name (if applicable) | | +| Title or role (if applicable) | | +| Date | 06/17/2021 | +| GitHub username | ezorita | +| Website (optional) | | diff --git a/spacy/lexeme.pyi b/spacy/lexeme.pyi new file mode 100644 index 000000000..4eae6be43 --- /dev/null +++ b/spacy/lexeme.pyi @@ -0,0 +1,61 @@ +from typing import ( + Union, + Any, +) +from thinc.types import Floats1d +from .tokens import Doc, Span, Token +from .vocab import Vocab + +class Lexeme: + def __init__(self, vocab: Vocab, orth: int) -> None: ... + def __richcmp__(self, other: Lexeme, op: int) -> bool: ... + def __hash__(self) -> int: ... + def set_attrs(self, **attrs: Any) -> None: ... + def set_flag(self, flag_id: int, value: bool) -> None: ... + def check_flag(self, flag_id: int) -> bool: ... + def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ... + @property + def has_vector(self) -> bool: ... + @property + def vector_norm(self) -> float: ... + vector: Floats1d + rank: str + sentiment: float + @property + def orth_(self) -> str: ... + @property + def text(self) -> str: ... + lower: str + norm: int + shape: int + prefix: int + suffix: int + cluster: int + lang: int + prob: float + lower_: str + norm_: str + shape_: str + prefix_: str + suffix_: str + lang_: str + flags: int + @property + def is_oov(self) -> bool: ... + is_stop: bool + is_alpha: bool + is_ascii: bool + is_digit: bool + is_lower: bool + is_upper: bool + is_title: bool + is_punct: bool + is_space: bool + is_bracket: bool + is_quote: bool + is_left_punct: bool + is_right_punct: bool + is_currency: bool + like_url: bool + like_num: bool + like_email: bool diff --git a/spacy/matcher/matcher.pyi b/spacy/matcher/matcher.pyi new file mode 100644 index 000000000..3be065bcd --- /dev/null +++ b/spacy/matcher/matcher.pyi @@ -0,0 +1,41 @@ +from typing import Any, List, Dict, Tuple, Optional, Callable, Union, Iterator, Iterable +from ..vocab import Vocab +from ..tokens import Doc, Span + +class Matcher: + def __init__(self, vocab: Vocab, validate: bool = ...) -> None: ... + def __reduce__(self) -> Any: ... + def __len__(self) -> int: ... + def __contains__(self, key: str) -> bool: ... + def add( + self, + key: str, + patterns: List[List[Dict[str, Any]]], + *, + on_match: Optional[ + Callable[[Matcher, Doc, int, List[Tuple[Any, ...]]], Any] + ] = ..., + greedy: Optional[str] = ... + ) -> None: ... + def remove(self, key: str) -> None: ... + def has_key(self, key: Union[str, int]) -> bool: ... + def get( + self, key: Union[str, int], default: Optional[Any] = ... + ) -> Tuple[Optional[Callable[[Any], Any]], List[List[Dict[Any, Any]]]]: ... + def pipe( + self, + docs: Iterable[Tuple[Doc, Any]], + batch_size: int = ..., + return_matches: bool = ..., + as_tuples: bool = ..., + ) -> Union[ + Iterator[Tuple[Tuple[Doc, Any], Any]], Iterator[Tuple[Doc, Any]], Iterator[Doc] + ]: ... + def __call__( + self, + doclike: Union[Doc, Span], + *, + as_spans: bool = ..., + allow_missing: bool = ..., + with_alignments: bool = ... + ) -> Union[List[Tuple[int, int, int]], List[Span]]: ... diff --git a/spacy/strings.pyi b/spacy/strings.pyi new file mode 100644 index 000000000..57bf71b93 --- /dev/null +++ b/spacy/strings.pyi @@ -0,0 +1,22 @@ +from typing import Optional, Iterable, Iterator, Union, Any +from pathlib import Path + +def get_string_id(key: str) -> int: ... + +class StringStore: + def __init__( + self, strings: Optional[Iterable[str]] = ..., freeze: bool = ... + ) -> None: ... + def __getitem__(self, string_or_id: Union[bytes, str, int]) -> Union[str, int]: ... + def as_int(self, key: Union[bytes, str, int]) -> int: ... + def as_string(self, key: Union[bytes, str, int]) -> str: ... + def add(self, string: str) -> int: ... + def __len__(self) -> int: ... + def __contains__(self, string: str) -> bool: ... + def __iter__(self) -> Iterator[str]: ... + def __reduce__(self) -> Any: ... + def to_disk(self, path: Union[str, Path]) -> None: ... + def from_disk(self, path: Union[str, Path]) -> StringStore: ... + def to_bytes(self, **kwargs: Any) -> bytes: ... + def from_bytes(self, bytes_data: bytes, **kwargs: Any) -> StringStore: ... + def _reset_and_load(self, strings: Iterable[str]) -> None: ... diff --git a/spacy/tokens/_retokenize.pyi b/spacy/tokens/_retokenize.pyi new file mode 100644 index 000000000..b829b71a3 --- /dev/null +++ b/spacy/tokens/_retokenize.pyi @@ -0,0 +1,17 @@ +from typing import Dict, Any, Union, List, Tuple +from .doc import Doc +from .span import Span +from .token import Token + +class Retokenizer: + def __init__(self, doc: Doc) -> None: ... + def merge(self, span: Span, attrs: Dict[Union[str, int], Any] = ...) -> None: ... + def split( + self, + token: Token, + orths: List[str], + heads: List[Union[Token, Tuple[Token, int]]], + attrs: Dict[Union[str, int], List[Any]] = ..., + ) -> None: ... + def __enter__(self) -> Retokenizer: ... + def __exit__(self, *args: Any) -> None: ... diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi new file mode 100644 index 000000000..8688fb91f --- /dev/null +++ b/spacy/tokens/doc.pyi @@ -0,0 +1,180 @@ +from typing import ( + Callable, + Protocol, + Iterable, + Iterator, + Optional, + Union, + Tuple, + List, + Dict, + Any, + overload, +) +from cymem.cymem import Pool +from thinc.types import Floats1d, Floats2d, Ints2d +from .span import Span +from .token import Token +from ._dict_proxies import SpanGroups +from ._retokenize import Retokenizer +from ..lexeme import Lexeme +from ..vocab import Vocab +from .underscore import Underscore +from pathlib import Path +import numpy + +class DocMethod(Protocol): + def __call__(self: Doc, *args: Any, **kwargs: Any) -> Any: ... + +class Doc: + vocab: Vocab + mem: Pool + spans: SpanGroups + max_length: int + length: int + sentiment: float + cats: Dict[str, float] + user_hooks: Dict[str, Callable[..., Any]] + user_token_hooks: Dict[str, Callable[..., Any]] + user_span_hooks: Dict[str, Callable[..., Any]] + tensor: numpy.ndarray + user_data: Dict[str, Any] + has_unknown_spaces: bool + @classmethod + def set_extension( + cls, + name: str, + default: Optional[Any] = ..., + getter: Optional[Callable[[Doc], Any]] = ..., + setter: Optional[Callable[[Doc, Any], None]] = ..., + method: Optional[DocMethod] = ..., + force: bool = ..., + ) -> None: ... + @classmethod + def get_extension( + cls, name: str + ) -> Tuple[ + Optional[Any], + Optional[DocMethod], + Optional[Callable[[Doc], Any]], + Optional[Callable[[Doc, Any], None]], + ]: ... + @classmethod + def has_extension(cls, name: str) -> bool: ... + @classmethod + def remove_extension( + cls, name: str + ) -> Tuple[ + Optional[Any], + Optional[DocMethod], + Optional[Callable[[Doc], Any]], + Optional[Callable[[Doc, Any], None]], + ]: ... + def __init__( + self, + vocab: Vocab, + words: Optional[List[str]] = ..., + spaces: Optional[List[bool]] = ..., + user_data: Optional[Dict[Any, Any]] = ..., + tags: Optional[List[str]] = ..., + pos: Optional[List[str]] = ..., + morphs: Optional[List[str]] = ..., + lemmas: Optional[List[str]] = ..., + heads: Optional[List[int]] = ..., + deps: Optional[List[str]] = ..., + sent_starts: Optional[List[Union[bool, None]]] = ..., + ents: Optional[List[str]] = ..., + ) -> None: ... + @property + def _(self) -> Underscore: ... + @property + def is_tagged(self) -> bool: ... + @property + def is_parsed(self) -> bool: ... + @property + def is_nered(self) -> bool: ... + @property + def is_sentenced(self) -> bool: ... + def has_annotation( + self, attr: Union[int, str], *, require_complete: bool = ... + ) -> bool: ... + @overload + def __getitem__(self, i: int) -> Token: ... + @overload + def __getitem__(self, i: slice) -> Span: ... + def __iter__(self) -> Iterator[Token]: ... + def __len__(self) -> int: ... + def __unicode__(self) -> str: ... + def __bytes__(self) -> bytes: ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + @property + def doc(self) -> Doc: ... + def char_span( + self, + start_idx: int, + end_idx: int, + label: Union[int, str] = ..., + kb_id: Union[int, str] = ..., + vector: Optional[Floats1d] = ..., + alignment_mode: str = ..., + ) -> Span: ... + def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ... + @property + def has_vector(self) -> bool: ... + vector: Floats1d + vector_norm: float + @property + def text(self) -> str: ... + @property + def text_with_ws(self) -> str: ... + ents: Tuple[Span] + def set_ents( + self, + entities: List[Span], + *, + blocked: Optional[List[Span]] = ..., + missing: Optional[List[Span]] = ..., + outside: Optional[List[Span]] = ..., + default: str = ... + ) -> None: ... + @property + def noun_chunks(self) -> Iterator[Span]: ... + @property + def sents(self) -> Iterator[Span]: ... + @property + def lang(self) -> int: ... + @property + def lang_(self) -> str: ... + def count_by( + self, attr_id: int, exclude: Optional[Any] = ..., counts: Optional[Any] = ... + ) -> Dict[Any, int]: ... + def from_array(self, attrs: List[int], array: Ints2d) -> Doc: ... + @staticmethod + def from_docs( + docs: List[Doc], + ensure_whitespace: bool = ..., + attrs: Optional[Union[Tuple[Union[str, int]], List[Union[int, str]]]] = ..., + ) -> Doc: ... + def get_lca_matrix(self) -> Ints2d: ... + def copy(self) -> Doc: ... + def to_disk( + self, path: Union[str, Path], *, exclude: Iterable[str] = ... + ) -> None: ... + def from_disk( + self, path: Union[str, Path], *, exclude: Union[List[str], Tuple[str]] = ... + ) -> Doc: ... + def to_bytes(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ... + def from_bytes( + self, bytes_data: bytes, *, exclude: Union[List[str], Tuple[str]] = ... + ) -> Doc: ... + def to_dict(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ... + def from_dict( + self, msg: bytes, *, exclude: Union[List[str], Tuple[str]] = ... + ) -> Doc: ... + def extend_tensor(self, tensor: Floats2d) -> None: ... + def retokenize(self) -> Retokenizer: ... + def to_json(self, underscore: Optional[List[str]] = ...) -> Dict[str, Any]: ... + def to_utf8_array(self, nr_char: int = ...) -> Ints2d: ... + @staticmethod + def _get_array_attrs() -> Tuple[Any]: ... diff --git a/spacy/tokens/morphanalysis.pyi b/spacy/tokens/morphanalysis.pyi new file mode 100644 index 000000000..c7e05e58f --- /dev/null +++ b/spacy/tokens/morphanalysis.pyi @@ -0,0 +1,20 @@ +from typing import Any, Dict, Iterator, List, Union +from ..vocab import Vocab + +class MorphAnalysis: + def __init__( + self, vocab: Vocab, features: Union[Dict[str, str], str] = ... + ) -> None: ... + @classmethod + def from_id(cls, vocab: Vocab, key: Any) -> MorphAnalysis: ... + def __contains__(self, feature: str) -> bool: ... + def __iter__(self) -> Iterator[str]: ... + def __len__(self) -> int: ... + def __hash__(self) -> int: ... + def __eq__(self, other: MorphAnalysis) -> bool: ... + def __ne__(self, other: MorphAnalysis) -> bool: ... + def get(self, field: Any) -> List[str]: ... + def to_json(self) -> str: ... + def to_dict(self) -> Dict[str, str]: ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... diff --git a/spacy/tokens/span.pyi b/spacy/tokens/span.pyi new file mode 100644 index 000000000..4f65abace --- /dev/null +++ b/spacy/tokens/span.pyi @@ -0,0 +1,124 @@ +from typing import Callable, Protocol, Iterator, Optional, Union, Tuple, Any, overload +from thinc.types import Floats1d, Ints2d, FloatsXd +from .doc import Doc +from .token import Token +from .underscore import Underscore +from ..lexeme import Lexeme +from ..vocab import Vocab + +class SpanMethod(Protocol): + def __call__(self: Span, *args: Any, **kwargs: Any) -> Any: ... + +class Span: + @classmethod + def set_extension( + cls, + name: str, + default: Optional[Any] = ..., + getter: Optional[Callable[[Span], Any]] = ..., + setter: Optional[Callable[[Span, Any], None]] = ..., + method: Optional[SpanMethod] = ..., + force: bool = ..., + ) -> None: ... + @classmethod + def get_extension( + cls, name: str + ) -> Tuple[ + Optional[Any], + Optional[SpanMethod], + Optional[Callable[[Span], Any]], + Optional[Callable[[Span, Any], None]], + ]: ... + @classmethod + def has_extension(cls, name: str) -> bool: ... + @classmethod + def remove_extension( + cls, name: str + ) -> Tuple[ + Optional[Any], + Optional[SpanMethod], + Optional[Callable[[Span], Any]], + Optional[Callable[[Span, Any], None]], + ]: ... + def __init__( + self, + doc: Doc, + start: int, + end: int, + label: int = ..., + vector: Optional[Floats1d] = ..., + vector_norm: Optional[float] = ..., + kb_id: Optional[int] = ..., + ) -> None: ... + def __richcmp__(self, other: Span, op: int) -> bool: ... + def __hash__(self) -> int: ... + def __len__(self) -> int: ... + def __repr__(self) -> str: ... + @overload + def __getitem__(self, i: int) -> Token: ... + @overload + def __getitem__(self, i: slice) -> Span: ... + def __iter__(self) -> Iterator[Token]: ... + @property + def _(self) -> Underscore: ... + def as_doc(self, *, copy_user_data: bool = ...) -> Doc: ... + def get_lca_matrix(self) -> Ints2d: ... + def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ... + @property + def vocab(self) -> Vocab: ... + @property + def sent(self) -> Span: ... + @property + def ents(self) -> Tuple[Span]: ... + @property + def has_vector(self) -> bool: ... + @property + def vector(self) -> Floats1d: ... + @property + def vector_norm(self) -> float: ... + @property + def tensor(self) -> FloatsXd: ... + @property + def sentiment(self) -> float: ... + @property + def text(self) -> str: ... + @property + def text_with_ws(self) -> str: ... + @property + def noun_chunks(self) -> Iterator[Span]: ... + @property + def root(self) -> Token: ... + def char_span( + self, + start_idx: int, + end_idx: int, + label: int = ..., + kb_id: int = ..., + vector: Optional[Floats1d] = ..., + ) -> Span: ... + @property + def conjuncts(self) -> Tuple[Token]: ... + @property + def lefts(self) -> Iterator[Token]: ... + @property + def rights(self) -> Iterator[Token]: ... + @property + def n_lefts(self) -> int: ... + @property + def n_rights(self) -> int: ... + @property + def subtree(self) -> Iterator[Token]: ... + start: int + end: int + start_char: int + end_char: int + label: int + kb_id: int + ent_id: int + ent_id_: str + @property + def orth_(self) -> str: ... + @property + def lemma_(self) -> str: ... + label_: str + kb_id_: str diff --git a/spacy/tokens/span_group.pyi b/spacy/tokens/span_group.pyi new file mode 100644 index 000000000..4bd6bec27 --- /dev/null +++ b/spacy/tokens/span_group.pyi @@ -0,0 +1,24 @@ +from typing import Any, Dict, Iterable +from .doc import Doc +from .span import Span + +class SpanGroup: + def __init__( + self, + doc: Doc, + *, + name: str = ..., + attrs: Dict[str, Any] = ..., + spans: Iterable[Span] = ... + ) -> None: ... + def __repr__(self) -> str: ... + @property + def doc(self) -> Doc: ... + @property + def has_overlap(self) -> bool: ... + def __len__(self) -> int: ... + def append(self, span: Span) -> None: ... + def extend(self, spans: Iterable[Span]) -> None: ... + def __getitem__(self, i: int) -> Span: ... + def to_bytes(self) -> bytes: ... + def from_bytes(self, bytes_data: bytes) -> SpanGroup: ... diff --git a/spacy/tokens/token.pyi b/spacy/tokens/token.pyi new file mode 100644 index 000000000..23d028ffd --- /dev/null +++ b/spacy/tokens/token.pyi @@ -0,0 +1,208 @@ +from typing import ( + Callable, + Protocol, + Iterator, + Optional, + Union, + Tuple, + Any, +) +from thinc.types import Floats1d, FloatsXd +from .doc import Doc +from .span import Span +from .morphanalysis import MorphAnalysis +from ..lexeme import Lexeme +from ..vocab import Vocab +from .underscore import Underscore + +class TokenMethod(Protocol): + def __call__(self: Token, *args: Any, **kwargs: Any) -> Any: ... + +class Token: + i: int + doc: Doc + vocab: Vocab + @classmethod + def set_extension( + cls, + name: str, + default: Optional[Any] = ..., + getter: Optional[Callable[[Token], Any]] = ..., + setter: Optional[Callable[[Token, Any], None]] = ..., + method: Optional[TokenMethod] = ..., + force: bool = ..., + ) -> None: ... + @classmethod + def get_extension( + cls, name: str + ) -> Tuple[ + Optional[Any], + Optional[TokenMethod], + Optional[Callable[[Token], Any]], + Optional[Callable[[Token, Any], None]], + ]: ... + @classmethod + def has_extension(cls, name: str) -> bool: ... + @classmethod + def remove_extension( + cls, name: str + ) -> Tuple[ + Optional[Any], + Optional[TokenMethod], + Optional[Callable[[Token], Any]], + Optional[Callable[[Token, Any], None]], + ]: ... + def __init__(self, vocab: Vocab, doc: Doc, offset: int) -> None: ... + def __hash__(self) -> int: ... + def __len__(self) -> int: ... + def __unicode__(self) -> str: ... + def __bytes__(self) -> bytes: ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + def __richcmp__(self, other: Token, op: int) -> bool: ... + @property + def _(self) -> Underscore: ... + def nbor(self, i: int = ...) -> Token: ... + def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ... + def has_morph(self) -> bool: ... + morph: MorphAnalysis + @property + def lex(self) -> Lexeme: ... + @property + def lex_id(self) -> int: ... + @property + def rank(self) -> int: ... + @property + def text(self) -> str: ... + @property + def text_with_ws(self) -> str: ... + @property + def prob(self) -> float: ... + @property + def sentiment(self) -> float: ... + @property + def lang(self) -> int: ... + @property + def idx(self) -> int: ... + @property + def cluster(self) -> int: ... + @property + def orth(self) -> int: ... + @property + def lower(self) -> int: ... + @property + def norm(self) -> int: ... + @property + def shape(self) -> int: ... + @property + def prefix(self) -> int: ... + @property + def suffix(self) -> int: ... + lemma: int + pos: int + tag: int + dep: int + @property + def has_vector(self) -> bool: ... + @property + def vector(self) -> Floats1d: ... + @property + def vector_norm(self) -> float: ... + @property + def tensor(self) -> Optional[FloatsXd]: ... + @property + def n_lefts(self) -> int: ... + @property + def n_rights(self) -> int: ... + @property + def sent(self) -> Span: ... + sent_start: bool + is_sent_start: Optional[bool] + is_sent_end: Optional[bool] + @property + def lefts(self) -> Iterator[Token]: ... + @property + def rights(self) -> Iterator[Token]: ... + @property + def children(self) -> Iterator[Token]: ... + @property + def subtree(self) -> Iterator[Token]: ... + @property + def left_edge(self) -> Token: ... + @property + def right_edge(self) -> Token: ... + @property + def ancestors(self) -> Iterator[Token]: ... + def is_ancestor(self, descendant: Token) -> bool: ... + def has_head(self) -> bool: ... + head: Token + @property + def conjuncts(self) -> Tuple[Token]: ... + ent_type: int + ent_type_: str + @property + def ent_iob(self) -> int: ... + @classmethod + def iob_strings(cls) -> Tuple[str]: ... + @property + def ent_iob_(self) -> str: ... + ent_id: int + ent_id_: str + ent_kb_id: int + ent_kb_id_: str + @property + def whitespace_(self) -> str: ... + @property + def orth_(self) -> str: ... + @property + def lower_(self) -> str: ... + norm_: str + @property + def shape_(self) -> str: ... + @property + def prefix_(self) -> str: ... + @property + def suffix_(self) -> str: ... + @property + def lang_(self) -> str: ... + lemma_: str + pos_: str + tag_: str + def has_dep(self) -> bool: ... + dep_: str + @property + def is_oov(self) -> bool: ... + @property + def is_stop(self) -> bool: ... + @property + def is_alpha(self) -> bool: ... + @property + def is_ascii(self) -> bool: ... + @property + def is_digit(self) -> bool: ... + @property + def is_lower(self) -> bool: ... + @property + def is_upper(self) -> bool: ... + @property + def is_title(self) -> bool: ... + @property + def is_punct(self) -> bool: ... + @property + def is_space(self) -> bool: ... + @property + def is_bracket(self) -> bool: ... + @property + def is_quote(self) -> bool: ... + @property + def is_left_punct(self) -> bool: ... + @property + def is_right_punct(self) -> bool: ... + @property + def is_currency(self) -> bool: ... + @property + def like_url(self) -> bool: ... + @property + def like_num(self) -> bool: ... + @property + def like_email(self) -> bool: ... diff --git a/spacy/vocab.pyi b/spacy/vocab.pyi new file mode 100644 index 000000000..0a8ef6198 --- /dev/null +++ b/spacy/vocab.pyi @@ -0,0 +1,78 @@ +from typing import ( + Callable, + Iterator, + Optional, + Union, + Tuple, + List, + Dict, + Any, +) +from thinc.types import Floats1d, FloatsXd +from . import Language +from .strings import StringStore +from .lexeme import Lexeme +from .lookups import Lookups +from .tokens import Doc, Span +from pathlib import Path + +def create_vocab( + lang: Language, defaults: Any, vectors_name: Optional[str] = ... +) -> Vocab: ... + +class Vocab: + def __init__( + self, + lex_attr_getters: Optional[Dict[str, Callable[[str], Any]]] = ..., + strings: Optional[Union[List[str], StringStore]] = ..., + lookups: Optional[Lookups] = ..., + oov_prob: float = ..., + vectors_name: Optional[str] = ..., + writing_system: Dict[str, Any] = ..., + get_noun_chunks: Optional[Callable[[Union[Doc, Span]], Iterator[Span]]] = ..., + ) -> None: ... + @property + def lang(self) -> Language: ... + def __len__(self) -> int: ... + def add_flag( + self, flag_getter: Callable[[str], bool], flag_id: int = ... + ) -> int: ... + def __contains__(self, key: str) -> bool: ... + def __iter__(self) -> Iterator[Lexeme]: ... + def __getitem__(self, id_or_string: Union[str, int]) -> Lexeme: ... + @property + def vectors_length(self) -> int: ... + def reset_vectors( + self, *, width: Optional[int] = ..., shape: Optional[int] = ... + ) -> None: ... + def prune_vectors(self, nr_row: int, batch_size: int = ...) -> Dict[str, float]: ... + def get_vector( + self, + orth: Union[int, str], + minn: Optional[int] = ..., + maxn: Optional[int] = ..., + ) -> FloatsXd: ... + def set_vector(self, orth: Union[int, str], vector: Floats1d) -> None: ... + def has_vector(self, orth: Union[int, str]) -> bool: ... + lookups: Lookups + def to_disk( + self, path: Union[str, Path], *, exclude: Union[List[str], Tuple[str]] = ... + ) -> None: ... + def from_disk( + self, path: Union[str, Path], *, exclude: Union[List[str], Tuple[str]] = ... + ) -> Vocab: ... + def to_bytes(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ... + def from_bytes( + self, bytes_data: bytes, *, exclude: Union[List[str], Tuple[str]] = ... + ) -> Vocab: ... + +def pickle_vocab(vocab: Vocab) -> Any: ... +def unpickle_vocab( + sstore: StringStore, + vectors: Any, + morphology: Any, + data_dir: Any, + lex_attr_getters: Any, + lookups: Any, + get_noun_chunks: Any, +) -> Vocab: ...