Merge branch 'develop' of https://github.com/explosion/spaCy into develop

2020-08-27 03:22:11 +02:00 · 2020-08-27 03:22:11 +02:00 · e1e1760fd6
parent 95adb58f15 cdc114e212
commit e1e1760fd6
8 changed files with 17 additions and 72 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -5,7 +5,7 @@ thinc>=8.0.0a30,<8.0.0a40
 blis>=0.4.0,<0.5.0
 ml_datasets>=0.1.1
 murmurhash>=0.28.0,<1.1.0
-wasabi>=0.7.1,<1.1.0
+wasabi>=0.8.0,<1.1.0
 srsly>=2.1.0,<3.0.0
 catalogue>=0.0.7,<1.1.0
 typer>=0.3.0,<0.4.0
--- a/setup.cfg
+++ b/setup.cfg
@ -42,7 +42,7 @@ install_requires =
    preshed>=3.0.2,<3.1.0
    thinc>=8.0.0a30,<8.0.0a40
    blis>=0.4.0,<0.5.0
-    wasabi>=0.7.1,<1.1.0
+    wasabi>=0.8.0,<1.1.0
    srsly>=2.1.0,<3.0.0
    catalogue>=0.0.7,<1.1.0
    typer>=0.3.0,<0.4.0
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@ -1,7 +1,7 @@
 from typing import Optional, Dict, Any, Union
 import platform
 from pathlib import Path
-from wasabi import Printer
+from wasabi import Printer, MarkdownRenderer
 import srsly

 from ._util import app, Arg, Opt
@ -97,12 +97,13 @@ def get_markdown(data: Dict[str, Any], title: Optional[str] = None) -> str:
    title (str / None): Title, will be rendered as headline 2.
    RETURNS (str): The Markdown string.
    """
-    markdown = []
+    md = MarkdownRenderer()
+    if title:
+        md.add(md.title(2, title))
+    items = []
    for key, value in data.items():
        if isinstance(value, str) and Path(value).exists():
            continue
-        markdown.append(f"* **{key}:** {value}")
-    result = "\n{}\n".format("\n".join(markdown))
-    if title:
-        result = f"\n## {title}\n{result}"
-    return result
+        items.append(f"{md.bold(f'{key}:')} {value}")
+    md.add(md.list(items))
+    return f"\n{md.text}\n"
--- a/spacy/cli/project/document.py
+++ b/spacy/cli/project/document.py
@ -1,6 +1,5 @@
-from typing import Iterable, Optional
 from pathlib import Path
-from wasabi import msg
+from wasabi import msg, MarkdownRenderer

 from ...util import working_dir
 from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
@ -107,34 +106,3 @@ def project_document(
        with output_file.open("w") as f:
            f.write(content)
        msg.good("Saved project documentation", output_file)
-
-
-class MarkdownRenderer:
-    """Simple helper for generating raw Markdown."""
-
-    def __init__(self, no_emoji: bool = False):
-        self.data = []
-        self.no_emoji = no_emoji
-
-    @property
-    def text(self):
-        return "\n\n".join(self.data)
-
-    def add(self, content: str) -> None:
-        self.data.append(content)
-
-    def table(self, data: Iterable[Iterable[str]], header: Iterable[str]) -> str:
-        head = f"| {' | '.join(header)} |"
-        divider = f"| {' | '.join('---' for _ in header)} |"
-        body = "\n".join(f"| {' | '.join(row)} |" for row in data)
-        return f"{head}\n{divider}\n{body}"
-
-    def title(self, level: int, text: str, emoji: Optional[str] = None) -> str:
-        prefix = f"{emoji} " if emoji and not self.no_emoji else ""
-        return f"{'#' * level} {prefix}{text}"
-
-    def code(self, text: str) -> str:
-        return f"`{text}`"
-
-    def link(self, text: str, url: str) -> str:
-        return f"[{text}]({url})"
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -273,10 +273,6 @@ class Errors:
            "existing extension, set `force=True` on `{obj}.set_extension`.")
    E091 = ("Invalid extension attribute {name}: expected callable or None, "
            "but got: {value}")
-    E092 = ("Could not find or assign name for word vectors. Ususally, the "
-            "name is read from the model's meta.json in vector.name. "
-            "Alternatively, it is built from the 'lang' and 'name' keys in "
-            "the meta.json. Vector names are required to avoid issue #1660.")
    E093 = ("token.ent_iob values make invalid sequence: I without B\n{seq}")
    E094 = ("Error reading line {line_num} in vectors file {loc}.")
    E095 = ("Can't write to frozen dictionary. This is likely an internal "
--- a/spacy/language.py
+++ b/spacy/language.py
@ -1538,7 +1538,6 @@ class Language:
        def deserialize_vocab(path: Path) -> None:
            if path.exists():
                self.vocab.from_disk(path)
-            _fix_pretrained_vectors_name(self)

        path = util.ensure_path(path)
        deserializers = {}
@ -1605,14 +1604,10 @@ class Language:
            # from self.vocab.vectors, so set the name directly
            self.vocab.vectors.name = data.get("vectors", {}).get("name")

-        def deserialize_vocab(b):
-            self.vocab.from_bytes(b)
-            _fix_pretrained_vectors_name(self)
-
        deserializers = {}
        deserializers["config.cfg"] = lambda b: self.config.from_bytes(b)
        deserializers["meta.json"] = deserialize_meta
-        deserializers["vocab"] = deserialize_vocab
+        deserializers["vocab"] = self.vocab.from_bytes
        deserializers["tokenizer"] = lambda b: self.tokenizer.from_bytes(
            b, exclude=["vocab"]
        )
@ -1646,25 +1641,6 @@ class FactoryMeta:
    default_score_weights: Optional[Dict[str, float]] = None  # noqa: E704


-def _fix_pretrained_vectors_name(nlp: Language) -> None:
-    # TODO: Replace this once we handle vectors consistently as static
-    # data
-    if "vectors" in nlp.meta and "name" in nlp.meta["vectors"]:
-        nlp.vocab.vectors.name = nlp.meta["vectors"]["name"]
-    elif not nlp.vocab.vectors.size:
-        nlp.vocab.vectors.name = None
-    elif "name" in nlp.meta and "lang" in nlp.meta:
-        vectors_name = f"{nlp.meta['lang']}_{nlp.meta['name']}.vectors"
-        nlp.vocab.vectors.name = vectors_name
-    else:
-        raise ValueError(Errors.E092)
-    for name, proc in nlp.pipeline:
-        if not hasattr(proc, "cfg") or not isinstance(proc.cfg, dict):
-            continue
-        proc.cfg.setdefault("deprecation_fixes", {})
-        proc.cfg["deprecation_fixes"]["vectors_name"] = nlp.vocab.vectors.name
-
-
 class DisabledPipes(list):
    """Manager for temporary pipeline disabling."""

--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@ -146,8 +146,12 @@ validation error with more details.
 > #### Example
 >
 > ```cli
-> $ python -m spacy init fill-config base.cfg config.cfg
+> $ python -m spacy init fill-config base.cfg config.cfg --diff
 > ```
+>
+> #### Example diff
+>
+> ![Screenshot of visual diff in terminal](../images/cli_init_fill-config_diff.jpg)

 ```cli
 $ python -m spacy init fill-config [base_path] [output_file] [--diff]
--- a/website/docs/images/cli_init_fill-config_diff.jpg
+++ b/website/docs/images/cli_init_fill-config_diff.jpg