mirror of https://github.com/explosion/spaCy.git
Auto-generate README in spacy packge
This commit is contained in:
parent
caba63b74f
commit
cdcbd1023a
|
@ -1,7 +1,7 @@
|
||||||
from typing import Optional, Union, Any, Dict, List, Tuple
|
from typing import Optional, Union, Any, Dict, List, Tuple
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from wasabi import Printer, get_raw_input
|
from wasabi import Printer, MarkdownRenderer, get_raw_input
|
||||||
import srsly
|
import srsly
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
@ -134,6 +134,11 @@ def package(
|
||||||
file_path = package_path / model_name_v / file_name
|
file_path = package_path / model_name_v / file_name
|
||||||
if file_path.exists():
|
if file_path.exists():
|
||||||
shutil.copy(str(file_path), str(main_path))
|
shutil.copy(str(file_path), str(main_path))
|
||||||
|
readme_path = main_path / "README.md"
|
||||||
|
if not readme_path.exists():
|
||||||
|
readme = generate_readme(meta)
|
||||||
|
create_file(readme_path, readme)
|
||||||
|
create_file(package_path / model_name_v / "README.md", readme)
|
||||||
imports = []
|
imports = []
|
||||||
for code_path in code_paths:
|
for code_path in code_paths:
|
||||||
imports.append(code_path.stem)
|
imports.append(code_path.stem)
|
||||||
|
@ -234,6 +239,105 @@ def generate_meta(existing_meta: Dict[str, Any], msg: Printer) -> Dict[str, Any]
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
|
|
||||||
|
def generate_readme(meta: Dict[str, Any]) -> str:
|
||||||
|
"""
|
||||||
|
Generate a Markdown-formatted README text from a model meta.json. Used
|
||||||
|
within the GitHub release notes and as content for README.md file added
|
||||||
|
to model packages.
|
||||||
|
"""
|
||||||
|
md = MarkdownRenderer()
|
||||||
|
lang = meta["lang"]
|
||||||
|
name = f"{lang}_{meta['name']}"
|
||||||
|
version = meta["version"]
|
||||||
|
pipeline = ", ".join([md.code(p) for p in meta.get("pipeline", [])])
|
||||||
|
components = ", ".join([md.code(p) for p in meta.get("components", [])])
|
||||||
|
vecs = meta.get("vectors", {})
|
||||||
|
vectors = f"{vecs.get('keys', 0)} keys, {vecs.get('vectors', 0)} unique vectors ({ vecs.get('width', 0)} dimensions)"
|
||||||
|
author = meta.get("author", "n/a")
|
||||||
|
notes = meta.get("notes", "")
|
||||||
|
table_data = [
|
||||||
|
(md.bold("Name"), md.code(name)),
|
||||||
|
(md.bold("Version"), md.code(version)),
|
||||||
|
(md.bold("spaCy"), md.code(meta["spacy_version"])),
|
||||||
|
(md.bold("Default Pipeline"), pipeline),
|
||||||
|
(md.bold("Components"), components),
|
||||||
|
(md.bold("Vectors"), vectors),
|
||||||
|
(md.bold("Sources"), _format_sources(meta.get("sources"))),
|
||||||
|
(md.bold("License"), md.code(meta.get("license", "n/a"))),
|
||||||
|
(md.bold("Author"), md.link(author, meta["url"]) if "url" in meta else author),
|
||||||
|
]
|
||||||
|
# Put together Markdown body
|
||||||
|
md.add(meta.get("description", ""))
|
||||||
|
md.add(md.table(table_data, ["Feature", "Description"]))
|
||||||
|
md.add(md.title(3, "Label Scheme"))
|
||||||
|
md.add(_format_label_scheme(meta.get("labels")))
|
||||||
|
md.add(md.title(3, "Accuracy"))
|
||||||
|
md.add(_format_accuracy(meta.get("performance")))
|
||||||
|
if notes:
|
||||||
|
md.add(notes)
|
||||||
|
return md.text
|
||||||
|
|
||||||
|
|
||||||
|
def _format_sources(data: Any) -> str:
|
||||||
|
if not data or not isinstance(data, list):
|
||||||
|
return "n/a"
|
||||||
|
sources = []
|
||||||
|
for source in data:
|
||||||
|
if not isinstance(source, dict):
|
||||||
|
source = {"name": source}
|
||||||
|
name = source.get("name")
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
url = source.get("url")
|
||||||
|
author = source.get("author")
|
||||||
|
result = name if not url else "[{}]({})".format(name, url)
|
||||||
|
if author:
|
||||||
|
result += " ({})".format(author)
|
||||||
|
sources.append(result)
|
||||||
|
return "<br />".join(sources)
|
||||||
|
|
||||||
|
|
||||||
|
def _format_accuracy(data: Dict[str, Any], exclude: List[str] = ["speed"]) -> str:
|
||||||
|
if not data:
|
||||||
|
return ""
|
||||||
|
md = MarkdownRenderer()
|
||||||
|
scalars = [(k, v) for k, v in data.items() if isinstance(v, (int, float))]
|
||||||
|
scores = [
|
||||||
|
(md.code(acc.upper()), f"{score*100:.2f}")
|
||||||
|
for acc, score in scalars
|
||||||
|
if acc not in exclude
|
||||||
|
]
|
||||||
|
md.add(md.table(scores, ["Type", "Score"]))
|
||||||
|
return md.text
|
||||||
|
|
||||||
|
|
||||||
|
def _format_label_scheme(data: Dict[str, Any]) -> str:
|
||||||
|
if not data:
|
||||||
|
return ""
|
||||||
|
md = MarkdownRenderer()
|
||||||
|
n_labels = 0
|
||||||
|
n_pipes = 0
|
||||||
|
label_data = []
|
||||||
|
for pipe, labels in data.items():
|
||||||
|
if not labels:
|
||||||
|
continue
|
||||||
|
col1 = md.bold(md.code(pipe))
|
||||||
|
col2 = ", ".join(
|
||||||
|
[md.code(label.replace("|", "\|")) for label in labels]
|
||||||
|
) # noqa: W605
|
||||||
|
label_data.append((col1, col2))
|
||||||
|
n_labels += len(labels)
|
||||||
|
n_pipes += 1
|
||||||
|
if not label_data:
|
||||||
|
return ""
|
||||||
|
label_info = f"View label scheme ({n_labels} labels for {n_pipes} components)"
|
||||||
|
md.add("<details>")
|
||||||
|
md.add(f"<summary>{label_info}</summary>")
|
||||||
|
md.add(md.table(label_data, ["Component", "Labels"]))
|
||||||
|
md.add("</details>")
|
||||||
|
return md.text
|
||||||
|
|
||||||
|
|
||||||
TEMPLATE_SETUP = """
|
TEMPLATE_SETUP = """
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import io
|
import io
|
||||||
|
|
Loading…
Reference in New Issue