mirror of https://github.com/explosion/spaCy.git
💫 Allow Span to take text label (#3031)
Fixes #3027. * Allow Span.__init__ to take unicode values for the `label` argument. * Allow `Span.label_` to be writeable. - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [ ] My changes don't require a change to the documentation, or if they do, I've added all required information.
This commit is contained in:
parent
11a29af751
commit
2c2db0c492
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from spacy.attrs import ORTH, LENGTH
|
from spacy.attrs import ORTH, LENGTH
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc, Span
|
||||||
from spacy.vocab import Vocab
|
from spacy.vocab import Vocab
|
||||||
|
|
||||||
from ..util import get_doc
|
from ..util import get_doc
|
||||||
|
@ -154,6 +154,17 @@ def test_span_as_doc(doc):
|
||||||
assert span.text == span_doc.text.strip()
|
assert span.text == span_doc.text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def test_span_string_label(doc):
|
||||||
|
span = Span(doc, 0, 1, label='hello')
|
||||||
|
assert span.label_ == 'hello'
|
||||||
|
assert span.label == doc.vocab.strings['hello']
|
||||||
|
|
||||||
|
def test_span_string_set_label(doc):
|
||||||
|
span = Span(doc, 0, 1)
|
||||||
|
span.label_ = 'hello'
|
||||||
|
assert span.label_ == 'hello'
|
||||||
|
assert span.label == doc.vocab.strings['hello']
|
||||||
|
|
||||||
def test_span_ents_property(doc):
|
def test_span_ents_property(doc):
|
||||||
"""Test span.ents for the """
|
"""Test span.ents for the """
|
||||||
doc.ents = [
|
doc.ents = [
|
||||||
|
|
|
@ -15,7 +15,7 @@ from ..parts_of_speech cimport univ_pos_t
|
||||||
from ..util import normalize_slice
|
from ..util import normalize_slice
|
||||||
from ..attrs cimport IS_PUNCT, IS_SPACE
|
from ..attrs cimport IS_PUNCT, IS_SPACE
|
||||||
from ..lexeme cimport Lexeme
|
from ..lexeme cimport Lexeme
|
||||||
from ..compat import is_config
|
from ..compat import is_config, basestring_
|
||||||
from ..errors import Errors, TempErrors, Warnings, user_warning, models_warning
|
from ..errors import Errors, TempErrors, Warnings, user_warning, models_warning
|
||||||
from .underscore import Underscore, get_ext_args
|
from .underscore import Underscore, get_ext_args
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@ cdef class Span:
|
||||||
raise ValueError(Errors.E046.format(name=name))
|
raise ValueError(Errors.E046.format(name=name))
|
||||||
return Underscore.span_extensions.pop(name)
|
return Underscore.span_extensions.pop(name)
|
||||||
|
|
||||||
def __cinit__(self, Doc doc, int start, int end, attr_t label=0,
|
def __cinit__(self, Doc doc, int start, int end, label=0,
|
||||||
vector=None, vector_norm=None):
|
vector=None, vector_norm=None):
|
||||||
"""Create a `Span` object from the slice `doc[start : end]`.
|
"""Create a `Span` object from the slice `doc[start : end]`.
|
||||||
|
|
||||||
|
@ -64,6 +64,8 @@ cdef class Span:
|
||||||
self.end_char = self.doc[end - 1].idx + len(self.doc[end - 1])
|
self.end_char = self.doc[end - 1].idx + len(self.doc[end - 1])
|
||||||
else:
|
else:
|
||||||
self.end_char = 0
|
self.end_char = 0
|
||||||
|
if isinstance(label, basestring_):
|
||||||
|
label = doc.vocab.strings.add(label)
|
||||||
if label not in doc.vocab.strings:
|
if label not in doc.vocab.strings:
|
||||||
raise ValueError(Errors.E084.format(label=label))
|
raise ValueError(Errors.E084.format(label=label))
|
||||||
self.label = label
|
self.label = label
|
||||||
|
@ -601,6 +603,8 @@ cdef class Span:
|
||||||
"""RETURNS (unicode): The span's label."""
|
"""RETURNS (unicode): The span's label."""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.doc.vocab.strings[self.label]
|
return self.doc.vocab.strings[self.label]
|
||||||
|
def __set__(self, unicode label_):
|
||||||
|
self.label = self.doc.vocab.strings.add(label_)
|
||||||
|
|
||||||
|
|
||||||
cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1:
|
cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1:
|
||||||
|
|
Loading…
Reference in New Issue