Fix dependencies in morphologizer

This commit is contained in:
Matthew Honnibal 2019-03-07 00:16:51 +01:00
parent 3993f41cc4
commit 010f846d5f
1 changed files with 2 additions and 6 deletions

View File

@ -1,12 +1,8 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from collections import OrderedDict, defaultdict from collections import OrderedDict, defaultdict
import cytoolz
import ujson
import numpy import numpy
cimport numpy as np cimport numpy as np
from .util import msgpack
from .util import msgpack_numpy
from thinc.api import chain from thinc.api import chain
from thinc.neural.util import to_categorical, copy_array, get_array_module from thinc.neural.util import to_categorical, copy_array, get_array_module
@ -16,7 +12,7 @@ from ._ml import Tok2Vec, build_morphologizer_model
from ._ml import link_vectors_to_models, zero_init, flatten from ._ml import link_vectors_to_models, zero_init, flatten
from ._ml import create_default_optimizer from ._ml import create_default_optimizer
from .errors import Errors, TempErrors from .errors import Errors, TempErrors
from .compat import json_dumps, basestring_ from .compat import basestring_
from .tokens.doc cimport Doc from .tokens.doc cimport Doc
from .vocab cimport Vocab from .vocab cimport Vocab
from .morphology cimport Morphology from .morphology cimport Morphology
@ -58,7 +54,7 @@ class Morphologizer(Pipe):
return doc return doc
def pipe(self, stream, batch_size=128, n_threads=-1): def pipe(self, stream, batch_size=128, n_threads=-1):
for docs in cytoolz.partition_all(batch_size, stream): for docs in util.minibatch(stream, size=batch_size):
docs = list(docs) docs = list(docs)
features, tokvecs = self.predict(docs) features, tokvecs = self.predict(docs)
self.set_annotations(docs, features, tensors=tokvecs) self.set_annotations(docs, features, tensors=tokvecs)