From ed6c85fa3ce72e09486d510047a4049ab7bac85c Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 22 Jul 2017 20:04:03 +0200 Subject: [PATCH] Fix loading of text categories in GoldParse --- spacy/gold.pyx | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/spacy/gold.pyx b/spacy/gold.pyx index 8260cdd02..001e19504 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -148,7 +148,7 @@ def minibatch(items, size=8): ''' items = iter(items) while True: - batch_size = next(size) #if hasattr(size, '__next__') else size + batch_size = next(size) if hasattr(size, '__next__') else size batch = list(cytoolz.take(int(batch_size), items)) if len(batch) == 0: break @@ -428,17 +428,7 @@ cdef class GoldParse: self.c.has_dep = self.mem.alloc(len(doc), sizeof(int)) self.c.ner = self.mem.alloc(len(doc), sizeof(Transition)) - self.cats = [] - for item in cats: - if isinstance(item, int): - self.cats.append((0, len(doc.text), self.vocab.strings[item])) - elif isinstance(item, str): - self.cats.append((0, len(doc.text), item)) - elif hasattr(item, '__len__') and len(item) == 3: - start_char, end_char, label = item - if isinstance(label, int): - label = self.vocab.strings[label] - self.cats.append((start_char, end_char, label)) + self.cats = list(cats) self.words = [None] * len(doc) self.tags = [None] * len(doc) self.heads = [None] * len(doc)