mirror of https://github.com/explosion/spaCy.git
Fix Vectors.n_keys for floret vectors (#10394)
Fix `Vectors.n_keys` for floret vectors to match docstring description and avoid W007 warnings in similarity methods.
This commit is contained in:
parent
3f68bbcfec
commit
8e93fa8507
|
@ -535,6 +535,10 @@ def test_floret_vectors(floret_vectors_vec_str, floret_vectors_hashvec_str):
|
||||||
# every word has a vector
|
# every word has a vector
|
||||||
assert nlp.vocab[word * 5].has_vector
|
assert nlp.vocab[word * 5].has_vector
|
||||||
|
|
||||||
|
# n_keys is -1 for floret
|
||||||
|
assert nlp_plain.vocab.vectors.n_keys > 0
|
||||||
|
assert nlp.vocab.vectors.n_keys == -1
|
||||||
|
|
||||||
# check that single and batched vector lookups are identical
|
# check that single and batched vector lookups are identical
|
||||||
words = [s for s in nlp_plain.vocab.vectors]
|
words = [s for s in nlp_plain.vocab.vectors]
|
||||||
single_vecs = OPS.to_numpy(OPS.asarray([nlp.vocab[word].vector for word in words]))
|
single_vecs = OPS.to_numpy(OPS.asarray([nlp.vocab[word].vector for word in words]))
|
||||||
|
|
|
@ -170,6 +170,8 @@ cdef class Vectors:
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vectors#n_keys
|
DOCS: https://spacy.io/api/vectors#n_keys
|
||||||
"""
|
"""
|
||||||
|
if self.mode == Mode.floret:
|
||||||
|
return -1
|
||||||
return len(self.key2row)
|
return len(self.key2row)
|
||||||
|
|
||||||
def __reduce__(self):
|
def __reduce__(self):
|
||||||
|
|
|
@ -328,8 +328,8 @@ will be counted individually. In `floret` mode, the keys table is not used.
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ----------- | -------------------------------------------- |
|
| ----------- | ----------------------------------------------------------------------------- |
|
||||||
| **RETURNS** | The number of all keys in the table. ~~int~~ |
|
| **RETURNS** | The number of all keys in the table. Returns `-1` for floret vectors. ~~int~~ |
|
||||||
|
|
||||||
## Vectors.most_similar {#most_similar tag="method"}
|
## Vectors.most_similar {#most_similar tag="method"}
|
||||||
|
|
||||||
|
@ -348,7 +348,7 @@ supported for `floret` mode.
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| -------------- | --------------------------------------------------------------------------- |
|
| -------------- | --------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `queries` | An array with one or more vectors. ~~numpy.ndarray~~ |
|
| `queries` | An array with one or more vectors. ~~numpy.ndarray~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `batch_size` | The batch size to use. Default to `1024`. ~~int~~ |
|
| `batch_size` | The batch size to use. Default to `1024`. ~~int~~ |
|
||||||
|
@ -385,7 +385,7 @@ Change the embedding matrix to use different Thinc ops.
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
|-------|----------------------------------------------------------|
|
| ----- | -------------------------------------------------------- |
|
||||||
| `ops` | The Thinc ops to switch the embedding matrix to. ~~Ops~~ |
|
| `ops` | The Thinc ops to switch the embedding matrix to. ~~Ops~~ |
|
||||||
|
|
||||||
## Vectors.to_disk {#to_disk tag="method"}
|
## Vectors.to_disk {#to_disk tag="method"}
|
||||||
|
|
Loading…
Reference in New Issue