From 1c3bcfb4880a4ecb20e514ac3b73460ac286948b Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 18 Aug 2020 01:22:59 +0200 Subject: [PATCH] Update docs and util consistency --- spacy/util.py | 86 ++++++++++++++++++++++++++++------ website/docs/api/language.md | 2 +- website/docs/api/top-level.md | 36 ++++++++++---- website/src/components/code.js | 6 +-- 4 files changed, 103 insertions(+), 27 deletions(-) diff --git a/spacy/util.py b/spacy/util.py index e8f78f2f2..5eff82866 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -249,7 +249,16 @@ def load_model_from_package( disable: Iterable[str] = tuple(), config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), ) -> "Language": - """Load a model from an installed package.""" + """Load a model from an installed package. + + name (str): The package name. + vocab (Vocab / True): Optional vocab to pass in on initialization. If True, + a new Vocab object will be created. + disable (Iterable[str]): Names of pipeline components to disable. + config (Dict[str, Any] / Config): Config overrides as nested dict or dict + keyed by section values in dot notation. + RETURNS (Language): The loaded nlp object. + """ cls = importlib.import_module(name) return cls.load(vocab=vocab, disable=disable, config=config) @@ -263,7 +272,17 @@ def load_model_from_path( config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), ) -> "Language": """Load a model from a data directory path. Creates Language class with - pipeline from config.cfg and then calls from_disk() with path.""" + pipeline from config.cfg and then calls from_disk() with path. + + name (str): Package name or model path. + meta (Dict[str, Any]): Optional model meta. + vocab (Vocab / True): Optional vocab to pass in on initialization. If True, + a new Vocab object will be created. + disable (Iterable[str]): Names of pipeline components to disable. + config (Dict[str, Any] / Config): Config overrides as nested dict or dict + keyed by section values in dot notation. + RETURNS (Language): The loaded nlp object. + """ if not model_path.exists(): raise IOError(Errors.E052.format(path=model_path)) if not meta: @@ -284,6 +303,15 @@ def load_model_from_config( ) -> Tuple["Language", Config]: """Create an nlp object from a config. Expects the full config file including a section "nlp" containing the settings for the nlp object. + + name (str): Package name or model path. + meta (Dict[str, Any]): Optional model meta. + vocab (Vocab / True): Optional vocab to pass in on initialization. If True, + a new Vocab object will be created. + disable (Iterable[str]): Names of pipeline components to disable. + auto_fill (bool): Whether to auto-fill config with missing defaults. + validate (bool): Whether to show config validation errors. + RETURNS (Language): The loaded nlp object. """ if "nlp" not in config: raise ValueError(Errors.E985.format(config=config)) @@ -308,6 +336,13 @@ def load_model_from_init_py( ) -> "Language": """Helper function to use in the `load()` method of a model package's __init__.py. + + vocab (Vocab / True): Optional vocab to pass in on initialization. If True, + a new Vocab object will be created. + disable (Iterable[str]): Names of pipeline components to disable. + config (Dict[str, Any] / Config): Config overrides as nested dict or dict + keyed by section values in dot notation. + RETURNS (Language): The loaded nlp object. """ model_path = Path(init_file).parent meta = get_model_meta(model_path) @@ -325,7 +360,14 @@ def load_config( overrides: Dict[str, Any] = SimpleFrozenDict(), interpolate: bool = False, ) -> Config: - """Load a config file. Takes care of path validation and section order.""" + """Load a config file. Takes care of path validation and section order. + + path (Union[str, Path]): Path to the config file. + overrides: (Dict[str, Any]): Config overrides as nested dict or + dict keyed by section values in dot notation. + interpolate (bool): Whether to interpolate and resolve variables. + RETURNS (Config): The loaded config. + """ config_path = ensure_path(path) if not config_path.exists() or not config_path.is_file(): raise IOError(Errors.E053.format(path=config_path, name="config.cfg")) @@ -337,7 +379,12 @@ def load_config( def load_config_from_str( text: str, overrides: Dict[str, Any] = SimpleFrozenDict(), interpolate: bool = False ): - """Load a full config from a string.""" + """Load a full config from a string. Wrapper around Thinc's Config.from_str. + + text (str): The string config to load. + interpolate (bool): Whether to interpolate and resolve variables. + RETURNS (Config): The loaded config. + """ return Config(section_order=CONFIG_SECTION_ORDER).from_str( text, overrides=overrides, interpolate=interpolate, ) @@ -435,19 +482,18 @@ def get_base_version(version: str) -> str: return Version(version).base_version -def get_model_meta(path: Union[str, Path]) -> Dict[str, Any]: - """Get model meta.json from a directory path and validate its contents. +def load_meta(path: Union[str, Path]) -> Dict[str, Any]: + """Load a model meta.json from a path and validate its contents. - path (str / Path): Path to model directory. - RETURNS (Dict[str, Any]): The model's meta data. + path (Union[str, Path]): Path to meta.json. + RETURNS (Dict[str, Any]): The loaded meta. """ - model_path = ensure_path(path) - if not model_path.exists(): - raise IOError(Errors.E052.format(path=model_path)) - meta_path = model_path / "meta.json" - if not meta_path.is_file(): - raise IOError(Errors.E053.format(path=meta_path, name="meta.json")) - meta = srsly.read_json(meta_path) + path = ensure_path(path) + if not path.parent.exists(): + raise IOError(Errors.E052.format(path=path.parent)) + if not path.exists() or not path.is_file(): + raise IOError(Errors.E053.format(path=path, name="meta.json")) + meta = srsly.read_json(path) for setting in ["lang", "name", "version"]: if setting not in meta or not meta[setting]: raise ValueError(Errors.E054.format(setting=setting)) @@ -471,6 +517,16 @@ def get_model_meta(path: Union[str, Path]) -> Dict[str, Any]: return meta +def get_model_meta(path: Union[str, Path]) -> Dict[str, Any]: + """Get model meta.json from a directory path and validate its contents. + + path (str / Path): Path to model directory. + RETURNS (Dict[str, Any]): The model's meta data. + """ + model_path = ensure_path(path) + return load_meta(model_path / "meta.json") + + def is_package(name: str) -> bool: """Check if string maps to a package installed via pip. diff --git a/website/docs/api/language.md b/website/docs/api/language.md index 7d44d47d9..871adc0f2 100644 --- a/website/docs/api/language.md +++ b/website/docs/api/language.md @@ -40,7 +40,7 @@ Initialize a `Language` object. | `meta` | Custom meta data for the `Language` class. Is written to by models to add model meta data. ~~dict~~ | | `create_tokenizer` | Optional function that receives the `nlp` object and returns a tokenizer. ~~Callable[[Language], Callable[[str], Doc]]~~ | -## Language.from_config {#from_config tag="classmethod"} +## Language.from_config {#from_config tag="classmethod" new="3"} Create a `Language` object from a loaded config. Will set up the tokenizer and language data, add pipeline components based on the pipeline and components diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md index 0f87b8fd0..c44e4e5b4 100644 --- a/website/docs/api/top-level.md +++ b/website/docs/api/top-level.md @@ -70,7 +70,7 @@ Create a blank model of a given language class. This function is the twin of | `name` | [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) of the language class to load. ~~str~~ | | **RETURNS** | An empty `Language` object of the appropriate subclass. ~~Language~~ | -#### spacy.info {#spacy.info tag="function"} +### spacy.info {#spacy.info tag="function"} The same as the [`info` command](/api/cli#info). Pretty-print information about your installation, models and local setup from within spaCy. To get the model @@ -585,20 +585,40 @@ A helper function to use in the `load()` method of a model package's | `config` 3 | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~ | | **RETURNS** | `Language` class with the loaded model. ~~Language~~ | -### util.get_model_meta {#util.get_model_meta tag="function" new="2"} +### util.load_config {#util.load_config tag="function" new="3"} -Get a model's meta.json from a directory path and validate its contents. +Load a model's [`config.cfg`](/api/data-formats#config) from a file path. The +config typically includes details about the model pipeline and how its +components are created, as well as all training settings and hyperparameters. > #### Example > > ```python -> meta = util.get_model_meta("/path/to/model") +> config = util.load_config("/path/to/model/config.cfg") +> print(config.to_str()) > ``` -| Name | Description | -| ----------- | --------------------------------------------- | -| `path` | Path to model directory. ~~Union[str, Path]~~ | -| **RETURNS** | The model's meta data. ~~Dict[str, Any]~~ | +| Name | Description | +| ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `path` | Path to the model's `config.cfg`. ~~Union[str, Path]~~ | +| `overrides` | Optional config overrides to replace in loaded config. Can be provided as nested dict, or as flat dict with keys in dot notation, e.g. `"nlp.pipeline"`. ~~Dict[str, Any]~~ | +| `interpolate` | Whether to interpolate the config and replace variables like `${paths:train}` with their values. Defaults to `False`. ~~bool~~ | +| **RETURNS** | The model's config. ~~Config~~ | + +### util.load_meta {#util.load_meta tag="function" new="3"} + +Get a model's `meta.json` from a file path and validate its contents. + +> #### Example +> +> ```python +> meta = util.load_meta("/path/to/model/meta.json") +> ``` + +| Name | Description | +| ----------- | ----------------------------------------------------- | +| `path` | Path to the model's `meta.json`. ~~Union[str, Path]~~ | +| **RETURNS** | The model's meta data. ~~Dict[str, Any]~~ | ### util.is_package {#util.is_package tag="function"} diff --git a/website/src/components/code.js b/website/src/components/code.js index f514c752d..0d1d214ae 100644 --- a/website/src/components/code.js +++ b/website/src/components/code.js @@ -62,12 +62,12 @@ function linkType(el, showLink = true) { export const TypeAnnotation = ({ lang = 'python', link = true, children }) => { // Hacky, but we're temporarily replacing a dot to prevent it from being split during highlighting - const TMP_DOT = '•' + const TMP_DOT = '۔' const code = Array.isArray(children) ? children.join('') : children || '' const [rawText, meta] = code.split(/(?= \(.+\)$)/) - const rawStr = rawText.replace('.', TMP_DOT) + const rawStr = rawText.replace(/\./g, TMP_DOT) const rawHtml = lang === 'none' || !code ? code : highlightCode(lang, rawStr) - const html = rawHtml.replace(TMP_DOT, '.').replace(/\n/g, ' ') + const html = rawHtml.replace(new RegExp(TMP_DOT, 'g'), '.').replace(/\n/g, ' ') const result = htmlToReact(html) const elements = Array.isArray(result) ? result : [result] const annotClassNames = classNames(