mirror of https://github.com/explosion/spaCy.git
additional information if doc is empty
This commit is contained in:
parent
1d6aec805d
commit
1724a4f75b
|
@ -5,7 +5,7 @@ import pytest
|
|||
import re
|
||||
from mock import Mock
|
||||
from spacy.matcher import Matcher, DependencyMatcher
|
||||
from spacy.tokens import Doc, Token
|
||||
from spacy.tokens import Doc, Token, Span
|
||||
from ..doc.test_underscore import clean_underscore
|
||||
|
||||
|
||||
|
@ -458,3 +458,10 @@ def test_matcher_callback(en_vocab):
|
|||
doc = Doc(en_vocab, words=["This", "is", "a", "test", "."])
|
||||
matches = matcher(doc)
|
||||
mock.assert_called_once_with(matcher, doc, 0, matches)
|
||||
|
||||
def test_matcher_span(matcher):
|
||||
text = "JavaScript is good but Java is better"
|
||||
doc = Doc(matcher.vocab, words=text.split())
|
||||
span = Span(doc, 0, 3)
|
||||
matches = matcher(span.as_doc())
|
||||
assert len(matches) == 1
|
|
@ -260,7 +260,7 @@ cdef class Doc:
|
|||
def is_nered(self):
|
||||
"""Check if the document has named entities set. Will return True if
|
||||
*any* of the tokens has a named entity tag set (even if the others are
|
||||
unknown values).
|
||||
unknown values), or if the document is empty.
|
||||
"""
|
||||
if len(self) == 0:
|
||||
return True
|
||||
|
|
|
@ -657,10 +657,10 @@ The L2 norm of the document's vector representation.
|
|||
| `user_data` | - | A generic storage area, for user custom data. |
|
||||
| `lang` <Tag variant="new">2.1</Tag> | int | Language of the document's vocabulary. |
|
||||
| `lang_` <Tag variant="new">2.1</Tag> | unicode | Language of the document's vocabulary. |
|
||||
| `is_tagged` | bool | A flag indicating that the document has been part-of-speech tagged. |
|
||||
| `is_parsed` | bool | A flag indicating that the document has been syntactically parsed. |
|
||||
| `is_sentenced` | bool | A flag indicating that sentence boundaries have been applied to the document. |
|
||||
| `is_nered` <Tag variant="new">2.1</Tag> | bool | A flag indicating that named entities have been set. Will return `True` if _any_ of the tokens has an entity tag set, even if the others are unknown. |
|
||||
| `is_tagged` | bool | A flag indicating that the document has been part-of-speech tagged. Returns `True` if the `Doc` is empty. |
|
||||
| `is_parsed` | bool | A flag indicating that the document has been syntactically parsed. Returns `True` if the `Doc` is empty. |
|
||||
| `is_sentenced` | bool | A flag indicating that sentence boundaries have been applied to the document. Returns `True` if the `Doc` is empty. |
|
||||
| `is_nered` <Tag variant="new">2.1</Tag> | bool | A flag indicating that named entities have been set. Will return `True` if the `Doc` is empty, or if _any_ of the tokens has an entity tag set, even if the others are unknown. |
|
||||
| `sentiment` | float | The document's positivity/negativity score, if available. |
|
||||
| `user_hooks` | dict | A dictionary that allows customization of the `Doc`'s properties. |
|
||||
| `user_token_hooks` | dict | A dictionary that allows customization of properties of `Token` children. |
|
||||
|
|
Loading…
Reference in New Issue