86 lines
2.9 KiB
Python
86 lines
2.9 KiB
Python
class Pipeline(object):
|
|
"""Defines a pipeline for transforming sequence data.
|
|
|
|
The input is assumed to be utf-8 encoded `str` (Python 3) or
|
|
`unicode` (Python 2).
|
|
|
|
Attributes:
|
|
convert_token: The function to apply to input sequence data.
|
|
pipes: The Pipelines that will be applid to input sequence
|
|
data in order.
|
|
"""
|
|
def __init__(self, convert_token=None):
|
|
"""Create a pipeline.
|
|
|
|
Arguments:
|
|
convert_token: The function to apply to input sequence data.
|
|
If None, the identity function is used. Default: None
|
|
"""
|
|
if convert_token is None:
|
|
self.convert_token = Pipeline.identity
|
|
elif callable(convert_token):
|
|
self.convert_token = convert_token
|
|
else:
|
|
raise ValueError("Pipeline input convert_token {} is not None "
|
|
"or callable".format(convert_token))
|
|
self.pipes = [self]
|
|
|
|
def __call__(self, x, *args):
|
|
"""Apply the the current Pipeline(s) to an input.
|
|
|
|
Arguments:
|
|
x: The input to process with the Pipeline(s).
|
|
Positional arguments: Forwarded to the `call` function
|
|
of the Pipeline(s).
|
|
"""
|
|
for pipe in self.pipes:
|
|
x = pipe.call(x, *args)
|
|
return x
|
|
|
|
def call(self, x, *args):
|
|
"""Apply _only_ the convert_token function of the current pipeline
|
|
to the input. If the input is a list, a list with the results of
|
|
applying the `convert_token` function to all input elements is
|
|
returned.
|
|
|
|
Arguments:
|
|
x: The input to apply the convert_token function to.
|
|
Positional arguments: Forwarded to the `convert_token` function
|
|
of the current Pipeline.
|
|
"""
|
|
if isinstance(x, list):
|
|
return [self.convert_token(tok, *args) for tok in x]
|
|
return self.convert_token(x, *args)
|
|
|
|
def add_before(self, pipeline):
|
|
"""Add a Pipeline to be applied before this processing pipeline.
|
|
|
|
Arguments:
|
|
pipeline: The Pipeline or callable to apply before this
|
|
Pipeline.
|
|
"""
|
|
if not isinstance(pipeline, Pipeline):
|
|
pipeline = Pipeline(pipeline)
|
|
self.pipes = pipeline.pipes[:] + self.pipes[:]
|
|
return self
|
|
|
|
def add_after(self, pipeline):
|
|
"""Add a Pipeline to be applied after this processing pipeline.
|
|
|
|
Arguments:
|
|
pipeline: The Pipeline or callable to apply after this
|
|
Pipeline.
|
|
"""
|
|
if not isinstance(pipeline, Pipeline):
|
|
pipeline = Pipeline(pipeline)
|
|
self.pipes = self.pipes[:] + pipeline.pipes[:]
|
|
return self
|
|
|
|
@staticmethod
|
|
def identity(x):
|
|
"""Return a copy of the input.
|
|
|
|
This is here for serialization compatibility with pickle.
|
|
"""
|
|
return x
|