Provide debug data info for floret vectors (#10592)

This commit is contained in:
Adriane Boyd 2022-03-31 15:11:32 +02:00 committed by GitHub
parent 36d3af3013
commit e3ccc1973b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 29 additions and 20 deletions

View File

@ -19,6 +19,7 @@ from ..morphology import Morphology
from ..language import Language from ..language import Language
from ..util import registry, resolve_dot_names from ..util import registry, resolve_dot_names
from ..compat import Literal from ..compat import Literal
from ..vectors import Mode as VectorsMode
from .. import util from .. import util
@ -170,6 +171,14 @@ def debug_data(
show=verbose, show=verbose,
) )
if len(nlp.vocab.vectors): if len(nlp.vocab.vectors):
if nlp.vocab.vectors.mode == VectorsMode.floret:
msg.info(
f"floret vectors with {len(nlp.vocab.vectors)} vectors, "
f"{nlp.vocab.vectors_length} dimensions, "
f"{nlp.vocab.vectors.minn}-{nlp.vocab.vectors.maxn} char "
f"n-gram subwords"
)
else:
msg.info( msg.info(
f"{len(nlp.vocab.vectors)} vectors ({nlp.vocab.vectors.n_keys} " f"{len(nlp.vocab.vectors)} vectors ({nlp.vocab.vectors.n_keys} "
f"unique keys, {nlp.vocab.vectors_length} dimensions)" f"unique keys, {nlp.vocab.vectors_length} dimensions)"