mirror of https://github.com/explosion/spaCy.git
Raise error in spacy package when model name is not a valid python identifier (#10192)
* MultiHashEmbed vector docs correction * raise error for invalid identifier as model name * more succinct error message * update success message * permitted package name + double underscore * clarify package name error * clarify underscore run message * tweak language + simplify underscore run * cleanup underscore run warning * spacing correction * Update spacy/tests/test_cli.py Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
3877f78ff9
commit
ee662ec381
|
@ -7,6 +7,7 @@ from collections import defaultdict
|
|||
from catalogue import RegistryError
|
||||
import srsly
|
||||
import sys
|
||||
import re
|
||||
|
||||
from ._util import app, Arg, Opt, string_to_list, WHEEL_SUFFIX, SDIST_SUFFIX
|
||||
from ..schemas import validate, ModelMetaSchema
|
||||
|
@ -109,6 +110,24 @@ def package(
|
|||
", ".join(meta["requirements"]),
|
||||
)
|
||||
if name is not None:
|
||||
if not name.isidentifier():
|
||||
msg.fail(
|
||||
f"Model name ('{name}') is not a valid module name. "
|
||||
"This is required so it can be imported as a module.",
|
||||
"We recommend names that use ASCII A-Z, a-z, _ (underscore), "
|
||||
"and 0-9. "
|
||||
"For specific details see: https://docs.python.org/3/reference/lexical_analysis.html#identifiers",
|
||||
exits=1,
|
||||
)
|
||||
if not _is_permitted_package_name(name):
|
||||
msg.fail(
|
||||
f"Model name ('{name}') is not a permitted package name. "
|
||||
"This is required to correctly load the model with spacy.load.",
|
||||
"We recommend names that use ASCII A-Z, a-z, _ (underscore), "
|
||||
"and 0-9. "
|
||||
"For specific details see: https://www.python.org/dev/peps/pep-0426/#name",
|
||||
exits=1,
|
||||
)
|
||||
meta["name"] = name
|
||||
if version is not None:
|
||||
meta["version"] = version
|
||||
|
@ -162,7 +181,7 @@ def package(
|
|||
imports="\n".join(f"from . import {m}" for m in imports)
|
||||
)
|
||||
create_file(package_path / "__init__.py", init_py)
|
||||
msg.good(f"Successfully created package '{model_name_v}'", main_path)
|
||||
msg.good(f"Successfully created package directory '{model_name_v}'", main_path)
|
||||
if create_sdist:
|
||||
with util.working_dir(main_path):
|
||||
util.run_command([sys.executable, "setup.py", "sdist"], capture=False)
|
||||
|
@ -171,8 +190,14 @@ def package(
|
|||
if create_wheel:
|
||||
with util.working_dir(main_path):
|
||||
util.run_command([sys.executable, "setup.py", "bdist_wheel"], capture=False)
|
||||
wheel = main_path / "dist" / f"{model_name_v}{WHEEL_SUFFIX}"
|
||||
wheel_name_squashed = re.sub("_+", "_", model_name_v)
|
||||
wheel = main_path / "dist" / f"{wheel_name_squashed}{WHEEL_SUFFIX}"
|
||||
msg.good(f"Successfully created binary wheel", wheel)
|
||||
if "__" in model_name:
|
||||
msg.warn(
|
||||
f"Model name ('{model_name}') contains a run of underscores. "
|
||||
"Runs of underscores are not significant in installed package names.",
|
||||
)
|
||||
|
||||
|
||||
def has_wheel() -> bool:
|
||||
|
@ -422,6 +447,14 @@ def _format_label_scheme(data: Dict[str, Any]) -> str:
|
|||
return md.text
|
||||
|
||||
|
||||
def _is_permitted_package_name(package_name: str) -> bool:
|
||||
# regex from: https://www.python.org/dev/peps/pep-0426/#name
|
||||
permitted_match = re.search(
|
||||
r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", package_name, re.IGNORECASE
|
||||
)
|
||||
return permitted_match is not None
|
||||
|
||||
|
||||
TEMPLATE_SETUP = """
|
||||
#!/usr/bin/env python
|
||||
import io
|
||||
|
|
|
@ -17,6 +17,7 @@ from spacy.cli.debug_data import _get_labels_from_spancat
|
|||
from spacy.cli.download import get_compatibility, get_version
|
||||
from spacy.cli.init_config import RECOMMENDATIONS, init_config, fill_config
|
||||
from spacy.cli.package import get_third_party_dependencies
|
||||
from spacy.cli.package import _is_permitted_package_name
|
||||
from spacy.cli.validate import get_model_pkgs
|
||||
from spacy.lang.en import English
|
||||
from spacy.lang.nl import Dutch
|
||||
|
@ -695,6 +696,17 @@ def test_get_labels_from_model(factory_name, pipe_name):
|
|||
assert _get_labels_from_model(nlp, factory_name) == set(labels)
|
||||
|
||||
|
||||
def test_permitted_package_names():
|
||||
# https://www.python.org/dev/peps/pep-0426/#name
|
||||
assert _is_permitted_package_name("Meine_Bäume") == False
|
||||
assert _is_permitted_package_name("_package") == False
|
||||
assert _is_permitted_package_name("package_") == False
|
||||
assert _is_permitted_package_name(".package") == False
|
||||
assert _is_permitted_package_name("package.") == False
|
||||
assert _is_permitted_package_name("-package") == False
|
||||
assert _is_permitted_package_name("package-") == False
|
||||
|
||||
|
||||
def test_debug_data_compile_gold():
|
||||
nlp = English()
|
||||
pred = Doc(nlp.vocab, words=["Token", ".", "New", "York", "City"])
|
||||
|
@ -707,4 +719,4 @@ def test_debug_data_compile_gold():
|
|||
ref = Doc(nlp.vocab, words=["Token", ".", "New York City"], sent_starts=[True, False, True], ents=["O", "B-ENT", "I-ENT"])
|
||||
eg = Example(pred, ref)
|
||||
data = _compile_gold([eg], ["ner"], nlp, True)
|
||||
assert data["boundary_cross_ents"] == 1
|
||||
assert data["boundary_cross_ents"] == 1
|
Loading…
Reference in New Issue