mirror of https://github.com/explosion/spaCy.git
Fix formatting
This commit is contained in:
parent
8eb0b7b779
commit
d2d35b63b7
|
@ -48,7 +48,7 @@ from .parts_of_speech import X
|
|||
|
||||
|
||||
class SentenceSegmenter(object):
|
||||
'''A simple spaCy hook, to allow custom sentence boundary detection logic
|
||||
"""A simple spaCy hook, to allow custom sentence boundary detection logic
|
||||
(that doesn't require the dependency parse).
|
||||
|
||||
To change the sentence boundary detection strategy, pass a generator
|
||||
|
@ -57,7 +57,7 @@ class SentenceSegmenter(object):
|
|||
|
||||
Sentence detection strategies should be generators that take `Doc` objects
|
||||
and yield `Span` objects for each sentence.
|
||||
'''
|
||||
"""
|
||||
name = 'sbd'
|
||||
|
||||
def __init__(self, vocab, strategy=None):
|
||||
|
@ -89,30 +89,30 @@ class BaseThincComponent(object):
|
|||
|
||||
@classmethod
|
||||
def Model(cls, *shape, **kwargs):
|
||||
'''Initialize a model for the pipe.'''
|
||||
"""Initialize a model for the pipe."""
|
||||
raise NotImplementedError
|
||||
|
||||
def __init__(self, vocab, model=True, **cfg):
|
||||
'''Create a new pipe instance.'''
|
||||
"""Create a new pipe instance."""
|
||||
raise NotImplementedError
|
||||
|
||||
def __call__(self, doc):
|
||||
'''Apply the pipe to one document. The document is
|
||||
"""Apply the pipe to one document. The document is
|
||||
modified in-place, and returned.
|
||||
|
||||
|
||||
Both __call__ and pipe should delegate to the `predict()`
|
||||
and `set_annotations()` methods.
|
||||
'''
|
||||
"""
|
||||
scores = self.predict([doc])
|
||||
self.set_annotations([doc], scores)
|
||||
return doc
|
||||
|
||||
def pipe(self, stream, batch_size=128, n_threads=-1):
|
||||
'''Apply the pipe to a stream of documents.
|
||||
"""Apply the pipe to a stream of documents.
|
||||
|
||||
Both __call__ and pipe should delegate to the `predict()`
|
||||
and `set_annotations()` methods.
|
||||
'''
|
||||
"""
|
||||
for docs in cytoolz.partition_all(batch_size, stream):
|
||||
docs = list(docs)
|
||||
scores = self.predict(docs)
|
||||
|
@ -120,43 +120,43 @@ class BaseThincComponent(object):
|
|||
yield from docs
|
||||
|
||||
def predict(self, docs):
|
||||
'''Apply the pipeline's model to a batch of docs, without
|
||||
"""Apply the pipeline's model to a batch of docs, without
|
||||
modifying them.
|
||||
'''
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def set_annotations(self, docs, scores):
|
||||
'''Modify a batch of documents, using pre-computed scores.'''
|
||||
"""Modify a batch of documents, using pre-computed scores."""
|
||||
raise NotImplementedError
|
||||
|
||||
def update(self, docs, golds, drop=0., sgd=None, losses=None):
|
||||
'''Learn from a batch of documents and gold-standard information,
|
||||
"""Learn from a batch of documents and gold-standard information,
|
||||
updating the pipe's model.
|
||||
|
||||
Delegates to predict() and get_loss().
|
||||
'''
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def get_loss(self, docs, golds, scores):
|
||||
'''Find the loss and gradient of loss for the batch of
|
||||
documents and their predicted scores.'''
|
||||
"""Find the loss and gradient of loss for the batch of
|
||||
documents and their predicted scores."""
|
||||
raise NotImplementedError
|
||||
|
||||
def begin_training(self, gold_tuples=tuple(), pipeline=None):
|
||||
'''Initialize the pipe for training, using data exampes if available.
|
||||
If no model has been initialized yet, the model is added.'''
|
||||
"""Initialize the pipe for training, using data exampes if available.
|
||||
If no model has been initialized yet, the model is added."""
|
||||
if self.model is True:
|
||||
self.model = self.Model(**self.cfg)
|
||||
link_vectors_to_models(self.vocab)
|
||||
|
||||
def use_params(self, params):
|
||||
'''Modify the pipe's model, to use the given parameter values.
|
||||
'''
|
||||
"""Modify the pipe's model, to use the given parameter values.
|
||||
"""
|
||||
with self.model.use_params(params):
|
||||
yield
|
||||
|
||||
def to_bytes(self, **exclude):
|
||||
'''Serialize the pipe to a bytestring.'''
|
||||
"""Serialize the pipe to a bytestring."""
|
||||
serialize = OrderedDict((
|
||||
('cfg', lambda: json_dumps(self.cfg)),
|
||||
('model', lambda: self.model.to_bytes()),
|
||||
|
@ -165,7 +165,7 @@ class BaseThincComponent(object):
|
|||
return util.to_bytes(serialize, exclude)
|
||||
|
||||
def from_bytes(self, bytes_data, **exclude):
|
||||
'''Load the pipe from a bytestring.'''
|
||||
"""Load the pipe from a bytestring."""
|
||||
def load_model(b):
|
||||
if self.model is True:
|
||||
self.cfg['pretrained_dims'] = self.vocab.vectors_length
|
||||
|
@ -181,7 +181,7 @@ class BaseThincComponent(object):
|
|||
return self
|
||||
|
||||
def to_disk(self, path, **exclude):
|
||||
'''Serialize the pipe to disk.'''
|
||||
"""Serialize the pipe to disk."""
|
||||
serialize = OrderedDict((
|
||||
('cfg', lambda p: p.open('w').write(json_dumps(self.cfg))),
|
||||
('vocab', lambda p: self.vocab.to_disk(p)),
|
||||
|
@ -190,7 +190,7 @@ class BaseThincComponent(object):
|
|||
util.to_disk(path, serialize, exclude)
|
||||
|
||||
def from_disk(self, path, **exclude):
|
||||
'''Load the pipe from disk.'''
|
||||
"""Load the pipe from disk."""
|
||||
def load_model(p):
|
||||
if self.model is True:
|
||||
self.cfg['pretrained_dims'] = self.vocab.vectors_length
|
||||
|
@ -596,7 +596,7 @@ class SimilarityHook(BaseThincComponent):
|
|||
return Siamese(Pooling(max_pool, mean_pool), CauchySimilarity(length))
|
||||
|
||||
def __call__(self, doc):
|
||||
'''Install similarity hook'''
|
||||
"""Install similarity hook"""
|
||||
doc.user_hooks['similarity'] = self.predict
|
||||
return doc
|
||||
|
||||
|
|
Loading…
Reference in New Issue