From 2c2db0c492213e0dec3e5c4493b60e686c8c0096 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 8 Dec 2018 13:08:41 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=92=AB=20Allow=20Span=20to=20take=20text?= =?UTF-8?q?=20label=20(#3031)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #3027. * Allow Span.__init__ to take unicode values for the `label` argument. * Allow `Span.label_` to be writeable. - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [ ] My changes don't require a change to the documentation, or if they do, I've added all required information. --- spacy/tests/doc/test_span.py | 13 ++++++++++++- spacy/tokens/span.pyx | 8 ++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py index 6fd22b3ff..11fc0f228 100644 --- a/spacy/tests/doc/test_span.py +++ b/spacy/tests/doc/test_span.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import pytest from spacy.attrs import ORTH, LENGTH -from spacy.tokens import Doc +from spacy.tokens import Doc, Span from spacy.vocab import Vocab from ..util import get_doc @@ -154,6 +154,17 @@ def test_span_as_doc(doc): assert span.text == span_doc.text.strip() +def test_span_string_label(doc): + span = Span(doc, 0, 1, label='hello') + assert span.label_ == 'hello' + assert span.label == doc.vocab.strings['hello'] + +def test_span_string_set_label(doc): + span = Span(doc, 0, 1) + span.label_ = 'hello' + assert span.label_ == 'hello' + assert span.label == doc.vocab.strings['hello'] + def test_span_ents_property(doc): """Test span.ents for the """ doc.ents = [ diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 6cba46a22..29082b894 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -15,7 +15,7 @@ from ..parts_of_speech cimport univ_pos_t from ..util import normalize_slice from ..attrs cimport IS_PUNCT, IS_SPACE from ..lexeme cimport Lexeme -from ..compat import is_config +from ..compat import is_config, basestring_ from ..errors import Errors, TempErrors, Warnings, user_warning, models_warning from .underscore import Underscore, get_ext_args @@ -42,7 +42,7 @@ cdef class Span: raise ValueError(Errors.E046.format(name=name)) return Underscore.span_extensions.pop(name) - def __cinit__(self, Doc doc, int start, int end, attr_t label=0, + def __cinit__(self, Doc doc, int start, int end, label=0, vector=None, vector_norm=None): """Create a `Span` object from the slice `doc[start : end]`. @@ -64,6 +64,8 @@ cdef class Span: self.end_char = self.doc[end - 1].idx + len(self.doc[end - 1]) else: self.end_char = 0 + if isinstance(label, basestring_): + label = doc.vocab.strings.add(label) if label not in doc.vocab.strings: raise ValueError(Errors.E084.format(label=label)) self.label = label @@ -601,6 +603,8 @@ cdef class Span: """RETURNS (unicode): The span's label.""" def __get__(self): return self.doc.vocab.strings[self.label] + def __set__(self, unicode label_): + self.label = self.doc.vocab.strings.add(label_) cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1: