From a002264fec3f49e85f530bf8cb3d16be0a049071 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 16 Oct 2017 19:34:21 +0200 Subject: [PATCH] Remove caching of Token in Doc, as caused cycle. --- spacy/tokens/doc.pyx | 13 ++----------- spacy/tokens/token.pxd | 3 --- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 05d393d2b..bf48cf4f5 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -140,7 +140,6 @@ cdef class Doc: self.user_span_hooks = {} self.tensor = numpy.zeros((0,), dtype='float32') self.user_data = {} - self._py_tokens = [] self._vector = None self.noun_chunks_iterator = _get_chunker(self.vocab.lang) cdef unicode orth @@ -209,10 +208,7 @@ cdef class Doc: if i < 0: i = self.length + i bounds_check(i, self.length, PADDING) - if self._py_tokens[i] is not None: - return self._py_tokens[i] - else: - return Token.cinit(self.vocab, &self.c[i], i, self) + return Token.cinit(self.vocab, &self.c[i], i, self) def __iter__(self): """Iterate over `Token` objects, from which the annotations can be @@ -226,10 +222,7 @@ cdef class Doc: """ cdef int i for i in range(self.length): - if self._py_tokens[i] is not None: - yield self._py_tokens[i] - else: - yield Token.cinit(self.vocab, &self.c[i], i, self) + yield Token.cinit(self.vocab, &self.c[i], i, self) def __len__(self): """The number of tokens in the document. @@ -535,7 +528,6 @@ cdef class Doc: self.length += 1 # Set morphological attributes, e.g. by lemma, if possible self.vocab.morphology.assign_untagged(t) - self._py_tokens.append(None) return t.idx + t.lex.length + t.spacy @cython.boundscheck(False) @@ -841,7 +833,6 @@ cdef class Doc: # Set the left/right children, left/right edges set_children_from_heads(self.c, self.length) # Clear the cached Python objects - self._py_tokens = [None] * self.length # Return the merged Python object return self[start] diff --git a/spacy/tokens/token.pxd b/spacy/tokens/token.pxd index f63a0490c..b408e04eb 100644 --- a/spacy/tokens/token.pxd +++ b/spacy/tokens/token.pxd @@ -19,10 +19,7 @@ cdef class Token: if offset < 0 or offset >= doc.length: msg = "Attempt to access token at %d, max length %d" raise IndexError(msg % (offset, doc.length)) - if doc._py_tokens[offset] != None: - return doc._py_tokens[offset] cdef Token self = Token.__new__(Token, vocab, doc, offset) - doc._py_tokens[offset] = self return self #cdef inline TokenC struct_from_attrs(Vocab vocab, attrs):