diff --git a/spacy/attrs.pxd b/spacy/attrs.pxd index 073de3565..a8ee9cac0 100644 --- a/spacy/attrs.pxd +++ b/spacy/attrs.pxd @@ -83,6 +83,7 @@ cpdef enum attr_id_t: ENT_IOB ENT_TYPE HEAD + SENT_START SPACY PROB diff --git a/spacy/attrs.pyx b/spacy/attrs.pyx index 49a1e0438..bf2687d22 100644 --- a/spacy/attrs.pyx +++ b/spacy/attrs.pyx @@ -85,6 +85,7 @@ IDS = { "ENT_IOB": ENT_IOB, "ENT_TYPE": ENT_TYPE, "HEAD": HEAD, + "SENT_START": SENT_START, "SPACY": SPACY, "PROB": PROB, "LANG": LANG, diff --git a/spacy/symbols.pxd b/spacy/symbols.pxd index 1a46f509f..0b713cb21 100644 --- a/spacy/symbols.pxd +++ b/spacy/symbols.pxd @@ -82,6 +82,7 @@ cpdef enum symbol_t: ENT_IOB ENT_TYPE HEAD + SENT_START SPACY PROB diff --git a/spacy/symbols.pyx b/spacy/symbols.pyx index 662aca777..9f4009579 100644 --- a/spacy/symbols.pyx +++ b/spacy/symbols.pyx @@ -84,6 +84,7 @@ IDS = { "ENT_IOB": ENT_IOB, "ENT_TYPE": ENT_TYPE, "HEAD": HEAD, + "SENT_START": SENT_START, "SPACY": SPACY, "PROB": PROB, diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 014b84746..faddba6ba 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -24,6 +24,7 @@ from ..typedefs cimport attr_t, flags_t from ..attrs cimport attr_id_t from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER from ..attrs cimport LENGTH, POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB, ENT_TYPE +from ..attrs cimport SENT_START from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t from ..syntax.iterators import CHUNKERS from ..util import normalize_slice @@ -52,6 +53,8 @@ cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil: return token.dep elif feat_name == HEAD: return token.head + elif feat_name == SENT_START: + return token.sent_start elif feat_name == SPACY: return token.spacy elif feat_name == ENT_IOB: @@ -559,6 +562,7 @@ cdef class Doc: for i in range(self.length): self.c[i] = parsed[i] +<<<<<<< HEAD def from_array(self, attrs, int[:, :] array): """Load attributes from a numpy array. Write to a `Doc` object, from an `(M, N)` array of attributes. @@ -567,6 +571,18 @@ cdef class Doc: array (numpy.ndarray[ndim=2, dtype='int32']) The attribute values to load. RETURNS (Doc): Itself. """ +======= + def from_array(self, attrs, array): + if SENT_START in attrs and HEAD in attrs: + raise ValueError( + "Conflicting attributes specified in doc.from_array():\n" + "(HEAD, SENT_START)\n" + "The HEAD attribute currently sets sentence boundaries implicitly,\n" + "based on the tree structure. This means the HEAD attribute would " + "potentially override the sentence boundaries set by SENT_START.\n" + "See https://github.com/spacy-io/spaCy/issues/235 for details and " + "workarounds, and to propose solutions.") +>>>>>>> 45ad8684... * Add SENT_START attribute cdef int i, col cdef attr_id_t attr_id cdef TokenC* tokens = self.c