From 8b7ac77c233dc1693fd6e21abd66af5330e6a34b Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 19 Aug 2017 16:18:09 +0200 Subject: [PATCH] Allow span label to be string in Doc.char_span --- spacy/tokens/doc.pyx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 75088b010..dd52c4cbf 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -238,16 +238,18 @@ cdef class Doc: def doc(self): return self - def char_span(self, int start_idx, int end_idx, attr_t label=0, vector=None): + def char_span(self, int start_idx, int end_idx, label=0, vector=None): """Create a `Span` object from the slice `doc.text[start : end]`. doc (Doc): The parent document. start (int): The index of the first character of the span. end (int): The index of the first character after the span. - label (uint64): A label to attach to the Span, e.g. for named entities. + label (uint64 or string): A label to attach to the Span, e.g. for named entities. vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span. RETURNS (Span): The newly constructed object. """ + if not isinstance(label, int): + label = self.vocab.strings.add(label) cdef int start = token_by_start(self.c, self.length, start_idx) if start == -1: return None