spaCy/spacy/vocab.pyi

85 lines
2.8 KiB
Python

from contextlib import contextmanager
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Union
from cymem.cymem import Pool
from thinc.types import Floats1d, FloatsXd
from . import Language
from .lexeme import Lexeme
from .lookups import Lookups
from .morphology import Morphology
from .strings import StringStore
from .tokens import Doc, Span
from .vectors import Vectors
def create_vocab(
lang: Optional[str], defaults: Any, vectors_name: Optional[str] = ...
) -> Vocab: ...
class Vocab:
cfg: Dict[str, Any]
get_noun_chunks: Optional[Callable[[Union[Doc, Span]], Iterator[Span]]]
lookups: Lookups
morphology: Morphology
strings: StringStore
vectors: Vectors
writing_system: Dict[str, Any]
def __init__(
self,
lex_attr_getters: Optional[Dict[str, Callable[[str], Any]]] = ...,
strings: Optional[Union[List[str], StringStore]] = ...,
lookups: Optional[Lookups] = ...,
oov_prob: float = ...,
vectors_name: Optional[str] = ...,
writing_system: Dict[str, Any] = ...,
get_noun_chunks: Optional[Callable[[Union[Doc, Span]], Iterator[Span]]] = ...,
) -> None: ...
@property
def lang(self) -> str: ...
def __len__(self) -> int: ...
def add_flag(
self, flag_getter: Callable[[str], bool], flag_id: int = ...
) -> int: ...
def __contains__(self, key: str) -> bool: ...
def __iter__(self) -> Iterator[Lexeme]: ...
def __getitem__(self, id_or_string: Union[str, int]) -> Lexeme: ...
@property
def vectors_length(self) -> int: ...
def reset_vectors(
self, *, width: Optional[int] = ..., shape: Optional[int] = ...
) -> None: ...
def deduplicate_vectors(self) -> None: ...
def prune_vectors(self, nr_row: int, batch_size: int = ...) -> Dict[str, float]: ...
def get_vector(
self,
orth: Union[int, str],
minn: Optional[int] = ...,
maxn: Optional[int] = ...,
) -> FloatsXd: ...
def set_vector(self, orth: Union[int, str], vector: Floats1d) -> None: ...
def has_vector(self, orth: Union[int, str]) -> bool: ...
def to_disk(
self, path: Union[str, Path], *, exclude: Iterable[str] = ...
) -> None: ...
def from_disk(
self, path: Union[str, Path], *, exclude: Iterable[str] = ...
) -> Vocab: ...
def to_bytes(self, *, exclude: Iterable[str] = ...) -> bytes: ...
def from_bytes(
self, bytes_data: bytes, *, exclude: Iterable[str] = ...
) -> Vocab: ...
@contextmanager
def memory_zone(self, mem: Optional[Pool] = None) -> Iterator[Pool]: ...
def pickle_vocab(vocab: Vocab) -> Any: ...
def unpickle_vocab(
sstore: StringStore,
vectors: Any,
morphology: Any,
_unused_object: Any,
lex_attr_getters: Any,
lookups: Any,
get_noun_chunks: Any,
) -> Vocab: ...