Auto-generate README in spacy packge

This commit is contained in:
Ines Montani 2021-06-22 12:06:25 +10:00
parent caba63b74f
commit cdcbd1023a
1 changed files with 105 additions and 1 deletions

View File

@ -1,7 +1,7 @@
from typing import Optional, Union, Any, Dict, List, Tuple from typing import Optional, Union, Any, Dict, List, Tuple
import shutil import shutil
from pathlib import Path from pathlib import Path
from wasabi import Printer, get_raw_input from wasabi import Printer, MarkdownRenderer, get_raw_input
import srsly import srsly
import sys import sys
@ -134,6 +134,11 @@ def package(
file_path = package_path / model_name_v / file_name file_path = package_path / model_name_v / file_name
if file_path.exists(): if file_path.exists():
shutil.copy(str(file_path), str(main_path)) shutil.copy(str(file_path), str(main_path))
readme_path = main_path / "README.md"
if not readme_path.exists():
readme = generate_readme(meta)
create_file(readme_path, readme)
create_file(package_path / model_name_v / "README.md", readme)
imports = [] imports = []
for code_path in code_paths: for code_path in code_paths:
imports.append(code_path.stem) imports.append(code_path.stem)
@ -234,6 +239,105 @@ def generate_meta(existing_meta: Dict[str, Any], msg: Printer) -> Dict[str, Any]
return meta return meta
def generate_readme(meta: Dict[str, Any]) -> str:
"""
Generate a Markdown-formatted README text from a model meta.json. Used
within the GitHub release notes and as content for README.md file added
to model packages.
"""
md = MarkdownRenderer()
lang = meta["lang"]
name = f"{lang}_{meta['name']}"
version = meta["version"]
pipeline = ", ".join([md.code(p) for p in meta.get("pipeline", [])])
components = ", ".join([md.code(p) for p in meta.get("components", [])])
vecs = meta.get("vectors", {})
vectors = f"{vecs.get('keys', 0)} keys, {vecs.get('vectors', 0)} unique vectors ({ vecs.get('width', 0)} dimensions)"
author = meta.get("author", "n/a")
notes = meta.get("notes", "")
table_data = [
(md.bold("Name"), md.code(name)),
(md.bold("Version"), md.code(version)),
(md.bold("spaCy"), md.code(meta["spacy_version"])),
(md.bold("Default Pipeline"), pipeline),
(md.bold("Components"), components),
(md.bold("Vectors"), vectors),
(md.bold("Sources"), _format_sources(meta.get("sources"))),
(md.bold("License"), md.code(meta.get("license", "n/a"))),
(md.bold("Author"), md.link(author, meta["url"]) if "url" in meta else author),
]
# Put together Markdown body
md.add(meta.get("description", ""))
md.add(md.table(table_data, ["Feature", "Description"]))
md.add(md.title(3, "Label Scheme"))
md.add(_format_label_scheme(meta.get("labels")))
md.add(md.title(3, "Accuracy"))
md.add(_format_accuracy(meta.get("performance")))
if notes:
md.add(notes)
return md.text
def _format_sources(data: Any) -> str:
if not data or not isinstance(data, list):
return "n/a"
sources = []
for source in data:
if not isinstance(source, dict):
source = {"name": source}
name = source.get("name")
if not name:
continue
url = source.get("url")
author = source.get("author")
result = name if not url else "[{}]({})".format(name, url)
if author:
result += " ({})".format(author)
sources.append(result)
return "<br />".join(sources)
def _format_accuracy(data: Dict[str, Any], exclude: List[str] = ["speed"]) -> str:
if not data:
return ""
md = MarkdownRenderer()
scalars = [(k, v) for k, v in data.items() if isinstance(v, (int, float))]
scores = [
(md.code(acc.upper()), f"{score*100:.2f}")
for acc, score in scalars
if acc not in exclude
]
md.add(md.table(scores, ["Type", "Score"]))
return md.text
def _format_label_scheme(data: Dict[str, Any]) -> str:
if not data:
return ""
md = MarkdownRenderer()
n_labels = 0
n_pipes = 0
label_data = []
for pipe, labels in data.items():
if not labels:
continue
col1 = md.bold(md.code(pipe))
col2 = ", ".join(
[md.code(label.replace("|", "\|")) for label in labels]
) # noqa: W605
label_data.append((col1, col2))
n_labels += len(labels)
n_pipes += 1
if not label_data:
return ""
label_info = f"View label scheme ({n_labels} labels for {n_pipes} components)"
md.add("<details>")
md.add(f"<summary>{label_info}</summary>")
md.add(md.table(label_data, ["Component", "Labels"]))
md.add("</details>")
return md.text
TEMPLATE_SETUP = """ TEMPLATE_SETUP = """
#!/usr/bin/env python #!/usr/bin/env python
import io import io