spaCy/spacy/__init__.py

import sys
from pathlib import Path
from typing import Any, Dict, Iterable, Union

# set library-specific custom warning handling before doing anything else
from .errors import setup_default_warnings

setup_default_warnings()  # noqa: E402

# These are imported as part of the API
from thinc.api import Config, prefer_gpu, require_cpu, require_gpu  # noqa: F401

from . import pipeline  # noqa: F401
from . import util
from .about import __version__  # noqa: F401
from .cli.info import info  # noqa: F401
from .errors import Errors
from .glossary import explain  # noqa: F401
from .language import Language
from .util import logger, registry  # noqa: F401
from .vocab import Vocab

if sys.maxunicode == 65535:
    raise SystemError(Errors.E130)


def load(
    name: Union[str, Path],
    *,
    vocab: Union[Vocab, bool] = True,
    disable: Union[str, Iterable[str]] = util._DEFAULT_EMPTY_PIPES,
    enable: Union[str, Iterable[str]] = util._DEFAULT_EMPTY_PIPES,
    exclude: Union[str, Iterable[str]] = util._DEFAULT_EMPTY_PIPES,
    config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
) -> Language:
    """Load a spaCy model from an installed package or a local path.

    name (str): Package name or model path.
    vocab (Vocab): A Vocab object. If True, a vocab is created.
    disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
        pipes will be loaded but they won't be run unless you explicitly
        enable them by calling nlp.enable_pipe.
    enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
        pipes will be disabled (but can be enabled later using nlp.enable_pipe).
    exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
        components won't be loaded.
    config (Dict[str, Any] / Config): Config overrides as nested dict or dict
        keyed by section values in dot notation.
    RETURNS (Language): The loaded nlp object.
    """
    return util.load_model(
        name,
        vocab=vocab,
        disable=disable,
        enable=enable,
        exclude=exclude,
        config=config,
    )


def blank(
    name: str,
    *,
    vocab: Union[Vocab, bool] = True,
    config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
    meta: Dict[str, Any] = util.SimpleFrozenDict(),
) -> Language:
    """Create a blank nlp object for a given language code.

    name (str): The language code, e.g. "en".
    vocab (Vocab): A Vocab object. If True, a vocab is created.
    config (Dict[str, Any] / Config): Optional config overrides.
    meta (Dict[str, Any]): Overrides for nlp.meta.
    RETURNS (Language): The nlp object.
    """
    LangClass = util.get_lang_class(name)
    # We should accept both dot notation and nested dict here for consistency
    config = util.dot_to_dict(config)
    return LangClass.from_config(config, vocab=vocab, meta=meta)