2021-08-07 10:30:03 +00:00
|
|
|
from typing import (
|
|
|
|
Callable,
|
|
|
|
Iterator,
|
|
|
|
Optional,
|
|
|
|
Union,
|
|
|
|
Tuple,
|
|
|
|
List,
|
|
|
|
Dict,
|
|
|
|
Any,
|
|
|
|
)
|
|
|
|
from thinc.types import Floats1d, FloatsXd
|
|
|
|
from . import Language
|
|
|
|
from .strings import StringStore
|
|
|
|
from .lexeme import Lexeme
|
|
|
|
from .lookups import Lookups
|
|
|
|
from .tokens import Doc, Span
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
def create_vocab(
|
|
|
|
lang: Language, defaults: Any, vectors_name: Optional[str] = ...
|
|
|
|
) -> Vocab: ...
|
|
|
|
|
|
|
|
class Vocab:
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
lex_attr_getters: Optional[Dict[str, Callable[[str], Any]]] = ...,
|
|
|
|
strings: Optional[Union[List[str], StringStore]] = ...,
|
|
|
|
lookups: Optional[Lookups] = ...,
|
|
|
|
oov_prob: float = ...,
|
|
|
|
vectors_name: Optional[str] = ...,
|
|
|
|
writing_system: Dict[str, Any] = ...,
|
|
|
|
get_noun_chunks: Optional[Callable[[Union[Doc, Span]], Iterator[Span]]] = ...,
|
|
|
|
) -> None: ...
|
|
|
|
@property
|
|
|
|
def lang(self) -> Language: ...
|
|
|
|
def __len__(self) -> int: ...
|
|
|
|
def add_flag(
|
|
|
|
self, flag_getter: Callable[[str], bool], flag_id: int = ...
|
|
|
|
) -> int: ...
|
|
|
|
def __contains__(self, key: str) -> bool: ...
|
|
|
|
def __iter__(self) -> Iterator[Lexeme]: ...
|
|
|
|
def __getitem__(self, id_or_string: Union[str, int]) -> Lexeme: ...
|
|
|
|
@property
|
|
|
|
def vectors_length(self) -> int: ...
|
|
|
|
def reset_vectors(
|
|
|
|
self, *, width: Optional[int] = ..., shape: Optional[int] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def prune_vectors(self, nr_row: int, batch_size: int = ...) -> Dict[str, float]: ...
|
|
|
|
def get_vector(
|
|
|
|
self,
|
|
|
|
orth: Union[int, str],
|
|
|
|
minn: Optional[int] = ...,
|
|
|
|
maxn: Optional[int] = ...,
|
|
|
|
) -> FloatsXd: ...
|
|
|
|
def set_vector(self, orth: Union[int, str], vector: Floats1d) -> None: ...
|
|
|
|
def has_vector(self, orth: Union[int, str]) -> bool: ...
|
|
|
|
lookups: Lookups
|
|
|
|
def to_disk(
|
|
|
|
self, path: Union[str, Path], *, exclude: Union[List[str], Tuple[str]] = ...
|
|
|
|
) -> None: ...
|
|
|
|
def from_disk(
|
|
|
|
self, path: Union[str, Path], *, exclude: Union[List[str], Tuple[str]] = ...
|
|
|
|
) -> Vocab: ...
|
|
|
|
def to_bytes(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ...
|
|
|
|
def from_bytes(
|
|
|
|
self, bytes_data: bytes, *, exclude: Union[List[str], Tuple[str]] = ...
|
|
|
|
) -> Vocab: ...
|
|
|
|
|
|
|
|
def pickle_vocab(vocab: Vocab) -> Any: ...
|
|
|
|
def unpickle_vocab(
|
|
|
|
sstore: StringStore,
|
|
|
|
vectors: Any,
|
|
|
|
morphology: Any,
|
2021-09-15 21:07:21 +00:00
|
|
|
_unused_object: Any,
|
2021-08-07 10:30:03 +00:00
|
|
|
lex_attr_getters: Any,
|
|
|
|
lookups: Any,
|
|
|
|
get_noun_chunks: Any,
|
|
|
|
) -> Vocab: ...
|