mirror of https://github.com/explosion/spaCy.git
Use disable argument (list) for serialization
This commit is contained in:
parent
faff1c23fb
commit
353f0ef8d7
|
@ -173,13 +173,13 @@ class Language(object):
|
||||||
flat_list.append(pipe)
|
flat_list.append(pipe)
|
||||||
self.pipeline = flat_list
|
self.pipeline = flat_list
|
||||||
|
|
||||||
def __call__(self, text, **disabled):
|
def __call__(self, text, disable=[]):
|
||||||
"""'Apply the pipeline to some text. The text can span multiple sentences,
|
"""'Apply the pipeline to some text. The text can span multiple sentences,
|
||||||
and can contain arbtrary whitespace. Alignment into the original string
|
and can contain arbtrary whitespace. Alignment into the original string
|
||||||
is preserved.
|
is preserved.
|
||||||
|
|
||||||
text (unicode): The text to be processed.
|
text (unicode): The text to be processed.
|
||||||
**disabled: Elements of the pipeline that should not be run.
|
disable (list): Names of the pipeline components to disable.
|
||||||
RETURNS (Doc): A container for accessing the annotations.
|
RETURNS (Doc): A container for accessing the annotations.
|
||||||
|
|
||||||
EXAMPLE:
|
EXAMPLE:
|
||||||
|
@ -190,7 +190,7 @@ class Language(object):
|
||||||
doc = self.make_doc(text)
|
doc = self.make_doc(text)
|
||||||
for proc in self.pipeline:
|
for proc in self.pipeline:
|
||||||
name = getattr(proc, 'name', None)
|
name = getattr(proc, 'name', None)
|
||||||
if name in disabled and not disabled[name]:
|
if name in disable:
|
||||||
continue
|
continue
|
||||||
proc(doc)
|
proc(doc)
|
||||||
return doc
|
return doc
|
||||||
|
@ -323,7 +323,7 @@ class Language(object):
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def pipe(self, texts, n_threads=2, batch_size=1000, **disabled):
|
def pipe(self, texts, n_threads=2, batch_size=1000, disable=[]):
|
||||||
"""Process texts as a stream, and yield `Doc` objects in order. Supports
|
"""Process texts as a stream, and yield `Doc` objects in order. Supports
|
||||||
GIL-free multi-threading.
|
GIL-free multi-threading.
|
||||||
|
|
||||||
|
@ -331,7 +331,7 @@ class Language(object):
|
||||||
n_threads (int): The number of worker threads to use. If -1, OpenMP will
|
n_threads (int): The number of worker threads to use. If -1, OpenMP will
|
||||||
decide how many to use at run time. Default is 2.
|
decide how many to use at run time. Default is 2.
|
||||||
batch_size (int): The number of texts to buffer.
|
batch_size (int): The number of texts to buffer.
|
||||||
**disabled: Pipeline components to exclude.
|
disable (list): Names of the pipeline components to disable.
|
||||||
YIELDS (Doc): Documents in the order of the original text.
|
YIELDS (Doc): Documents in the order of the original text.
|
||||||
|
|
||||||
EXAMPLE:
|
EXAMPLE:
|
||||||
|
@ -343,7 +343,7 @@ class Language(object):
|
||||||
docs = texts
|
docs = texts
|
||||||
for proc in self.pipeline:
|
for proc in self.pipeline:
|
||||||
name = getattr(proc, 'name', None)
|
name = getattr(proc, 'name', None)
|
||||||
if name in disabled and not disabled[name]:
|
if name in disable:
|
||||||
continue
|
continue
|
||||||
if hasattr(proc, 'pipe'):
|
if hasattr(proc, 'pipe'):
|
||||||
docs = proc.pipe(docs, n_threads=n_threads, batch_size=batch_size)
|
docs = proc.pipe(docs, n_threads=n_threads, batch_size=batch_size)
|
||||||
|
@ -353,12 +353,14 @@ class Language(object):
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
yield doc
|
yield doc
|
||||||
|
|
||||||
def to_disk(self, path, **exclude):
|
def to_disk(self, path, disable=[]):
|
||||||
"""Save the current state to a directory.
|
"""Save the current state to a directory. If a model is loaded, this
|
||||||
|
will include the model.
|
||||||
|
|
||||||
path (unicode or Path): A path to a directory, which will be created if
|
path (unicode or Path): A path to a directory, which will be created if
|
||||||
it doesn't exist. Paths may be either strings or `Path`-like objects.
|
it doesn't exist. Paths may be either strings or `Path`-like objects.
|
||||||
**exclude: Named attributes to prevent from being saved.
|
disable (list): Nameds of pipeline components to disable and prevent
|
||||||
|
from being saved.
|
||||||
|
|
||||||
EXAMPLE:
|
EXAMPLE:
|
||||||
>>> nlp.to_disk('/path/to/models')
|
>>> nlp.to_disk('/path/to/models')
|
||||||
|
@ -370,7 +372,7 @@ class Language(object):
|
||||||
raise IOError("Output path must be a directory")
|
raise IOError("Output path must be a directory")
|
||||||
props = {}
|
props = {}
|
||||||
for name, value in self.__dict__.items():
|
for name, value in self.__dict__.items():
|
||||||
if name in exclude:
|
if name in disable:
|
||||||
continue
|
continue
|
||||||
if hasattr(value, 'to_disk'):
|
if hasattr(value, 'to_disk'):
|
||||||
value.to_disk(path / name)
|
value.to_disk(path / name)
|
||||||
|
@ -379,13 +381,14 @@ class Language(object):
|
||||||
with (path / 'props.pickle').open('wb') as file_:
|
with (path / 'props.pickle').open('wb') as file_:
|
||||||
dill.dump(props, file_)
|
dill.dump(props, file_)
|
||||||
|
|
||||||
def from_disk(self, path, **exclude):
|
def from_disk(self, path, disable=[]):
|
||||||
"""Loads state from a directory. Modifies the object in place and
|
"""Loads state from a directory. Modifies the object in place and
|
||||||
returns it.
|
returns it. If the saved `Language` object contains a model, the
|
||||||
|
model will be loaded.
|
||||||
|
|
||||||
path (unicode or Path): A path to a directory. Paths may be either
|
path (unicode or Path): A path to a directory. Paths may be either
|
||||||
strings or `Path`-like objects.
|
strings or `Path`-like objects.
|
||||||
**exclude: Named attributes to prevent from being loaded.
|
disable (list): Names of the pipeline components to disable.
|
||||||
RETURNS (Language): The modified `Language` object.
|
RETURNS (Language): The modified `Language` object.
|
||||||
|
|
||||||
EXAMPLE:
|
EXAMPLE:
|
||||||
|
@ -394,35 +397,36 @@ class Language(object):
|
||||||
"""
|
"""
|
||||||
path = util.ensure_path(path)
|
path = util.ensure_path(path)
|
||||||
for name in path.iterdir():
|
for name in path.iterdir():
|
||||||
if name not in exclude and hasattr(self, str(name)):
|
if name not in disable and hasattr(self, str(name)):
|
||||||
getattr(self, name).from_disk(path / name)
|
getattr(self, name).from_disk(path / name)
|
||||||
with (path / 'props.pickle').open('rb') as file_:
|
with (path / 'props.pickle').open('rb') as file_:
|
||||||
bytes_data = file_.read()
|
bytes_data = file_.read()
|
||||||
self.from_bytes(bytes_data, **exclude)
|
self.from_bytes(bytes_data, disable)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def to_bytes(self, **exclude):
|
def to_bytes(self, disable=[]):
|
||||||
"""Serialize the current state to a binary string.
|
"""Serialize the current state to a binary string.
|
||||||
|
|
||||||
**exclude: Named attributes to prevent from being serialized.
|
disable (list): Nameds of pipeline components to disable and prevent
|
||||||
|
from being serialized.
|
||||||
RETURNS (bytes): The serialized form of the `Language` object.
|
RETURNS (bytes): The serialized form of the `Language` object.
|
||||||
"""
|
"""
|
||||||
props = dict(self.__dict__)
|
props = dict(self.__dict__)
|
||||||
for key in exclude:
|
for key in disable:
|
||||||
if key in props:
|
if key in props:
|
||||||
props.pop(key)
|
props.pop(key)
|
||||||
return dill.dumps(props, -1)
|
return dill.dumps(props, -1)
|
||||||
|
|
||||||
def from_bytes(self, bytes_data, **exclude):
|
def from_bytes(self, bytes_data, disable=[]):
|
||||||
"""Load state from a binary string.
|
"""Load state from a binary string.
|
||||||
|
|
||||||
bytes_data (bytes): The data to load from.
|
bytes_data (bytes): The data to load from.
|
||||||
**exclude: Named attributes to prevent from being loaded.
|
disable (list): Names of the pipeline components to disable.
|
||||||
RETURNS (Language): The `Language` object.
|
RETURNS (Language): The `Language` object.
|
||||||
"""
|
"""
|
||||||
props = dill.loads(bytes_data)
|
props = dill.loads(bytes_data)
|
||||||
for key, value in props.items():
|
for key, value in props.items():
|
||||||
if key not in exclude:
|
if key not in disable:
|
||||||
setattr(self, key, value)
|
setattr(self, key, value)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
|
@ -73,15 +73,26 @@ p
|
||||||
+cell The text to be processed.
|
+cell The text to be processed.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code **disabled]
|
+cell #[code disable]
|
||||||
+cell -
|
+cell list
|
||||||
+cell Elements of the pipeline that should not be run.
|
+cell
|
||||||
|
| Names of pipeline components to
|
||||||
|
| #[+a("/docs/usage/language-processing-pipeline#disabling") disable].
|
||||||
|
|
||||||
+footrow
|
+footrow
|
||||||
+cell returns
|
+cell returns
|
||||||
+cell #[code Doc]
|
+cell #[code Doc]
|
||||||
+cell A container for accessing the annotations.
|
+cell A container for accessing the annotations.
|
||||||
|
|
||||||
|
+infobox("⚠️ Deprecation note")
|
||||||
|
.o-block
|
||||||
|
| Pipeline components to prevent from being loaded can now be added as
|
||||||
|
| a list to #[code disable], instead of specifying one keyword argument
|
||||||
|
| per component.
|
||||||
|
|
||||||
|
+code-new doc = nlp(u"I don't want parsed", disable=['parser'])
|
||||||
|
+code-old doc = nlp(u"I don't want parsed", parse=False)
|
||||||
|
|
||||||
+h(2, "pipe") Language.pipe
|
+h(2, "pipe") Language.pipe
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
|
@ -112,6 +123,13 @@ p
|
||||||
+cell int
|
+cell int
|
||||||
+cell The number of texts to buffer.
|
+cell The number of texts to buffer.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code disable]
|
||||||
|
+cell list
|
||||||
|
+cell
|
||||||
|
| Names of pipeline components to
|
||||||
|
| #[+a("/docs/usage/language-processing-pipeline#disabling") disable].
|
||||||
|
|
||||||
+footrow
|
+footrow
|
||||||
+cell yields
|
+cell yields
|
||||||
+cell #[code Doc]
|
+cell #[code Doc]
|
||||||
|
@ -227,8 +245,11 @@ p
|
||||||
|
|
||||||
+h(2, "to_disk") Language.to_disk
|
+h(2, "to_disk") Language.to_disk
|
||||||
+tag method
|
+tag method
|
||||||
|
+tag-new(2)
|
||||||
|
|
||||||
p Save the current state to a directory.
|
p
|
||||||
|
| Save the current state to a directory. If a model is loaded, this will
|
||||||
|
| #[strong include the model].
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
nlp.to_disk('/path/to/models')
|
nlp.to_disk('/path/to/models')
|
||||||
|
@ -242,14 +263,21 @@ p Save the current state to a directory.
|
||||||
| Paths may be either strings or #[code Path]-like objects.
|
| Paths may be either strings or #[code Path]-like objects.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code **exclude]
|
+cell #[code disable]
|
||||||
+cell -
|
+cell list
|
||||||
+cell Named attributes to prevent from being saved.
|
+cell
|
||||||
|
| Names of pipeline components to
|
||||||
|
| #[+a("/docs/usage/language-processing-pipeline#disabling") disable]
|
||||||
|
| and prevent from being saved.
|
||||||
|
|
||||||
+h(2, "from_disk") Language.from_disk
|
+h(2, "from_disk") Language.from_disk
|
||||||
+tag method
|
+tag method
|
||||||
|
+tag-new(2)
|
||||||
|
|
||||||
p Loads state from a directory. Modifies the object in place and returns it.
|
p
|
||||||
|
| Loads state from a directory. Modifies the object in place and returns
|
||||||
|
| it. If the saved #[code Language] object contains a model, the
|
||||||
|
| #[strong model will be loaded].
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
from spacy.language import Language
|
from spacy.language import Language
|
||||||
|
@ -264,15 +292,28 @@ p Loads state from a directory. Modifies the object in place and returns it.
|
||||||
| #[code Path]-like objects.
|
| #[code Path]-like objects.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code **exclude]
|
+cell #[code disable]
|
||||||
+cell -
|
+cell list
|
||||||
+cell Named attributes to prevent from being loaded.
|
+cell
|
||||||
|
| Names of pipeline components to
|
||||||
|
| #[+a("/docs/usage/language-processing-pipeline#disabling") disable].
|
||||||
|
|
||||||
+footrow
|
+footrow
|
||||||
+cell returns
|
+cell returns
|
||||||
+cell #[code Language]
|
+cell #[code Language]
|
||||||
+cell The modified #[code Language] object.
|
+cell The modified #[code Language] object.
|
||||||
|
|
||||||
|
+infobox("⚠️ Deprecation note")
|
||||||
|
.o-block
|
||||||
|
| As of spaCy v2.0, the #[code save_to_directory] method has been
|
||||||
|
| renamed to #[code to_disk], to improve consistency across classes.
|
||||||
|
| Pipeline components to prevent from being loaded can now be added as
|
||||||
|
| a list to #[code disable], instead of specifying one keyword argument
|
||||||
|
| per component.
|
||||||
|
|
||||||
|
+code-new nlp = English().from_disk(disable=['tagger', 'ner'])
|
||||||
|
+code-old nlp = spacy.load('en', tagger=False, entity=False)
|
||||||
|
|
||||||
+h(2, "to_bytes") Language.to_bytes
|
+h(2, "to_bytes") Language.to_bytes
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
|
@ -283,9 +324,12 @@ p Serialize the current state to a binary string.
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row
|
+row
|
||||||
+cell #[code **exclude]
|
+cell #[code disable]
|
||||||
+cell -
|
+cell list
|
||||||
+cell Named attributes to prevent from being serialized.
|
+cell
|
||||||
|
| Names of pipeline components to
|
||||||
|
| #[+a("/docs/usage/language-processing-pipeline#disabling") disable]
|
||||||
|
| and prevent from being serialized.
|
||||||
|
|
||||||
+footrow
|
+footrow
|
||||||
+cell returns
|
+cell returns
|
||||||
|
@ -310,15 +354,26 @@ p Load state from a binary string.
|
||||||
+cell The data to load from.
|
+cell The data to load from.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code **exclude]
|
+cell #[code disable]
|
||||||
+cell -
|
+cell list
|
||||||
+cell Named attributes to prevent from being loaded.
|
+cell
|
||||||
|
| Names of pipeline components to
|
||||||
|
| #[+a("/docs/usage/language-processing-pipeline#disabling") disable].
|
||||||
|
|
||||||
+footrow
|
+footrow
|
||||||
+cell returns
|
+cell returns
|
||||||
+cell #[code Language]
|
+cell #[code Language]
|
||||||
+cell The #[code Language] object.
|
+cell The #[code Language] object.
|
||||||
|
|
||||||
|
+infobox("⚠️ Deprecation note")
|
||||||
|
.o-block
|
||||||
|
| Pipeline components to prevent from being loaded can now be added as
|
||||||
|
| a list to #[code disable], instead of specifying one keyword argument
|
||||||
|
| per component.
|
||||||
|
|
||||||
|
+code-new nlp = English().from_bytes(bytes, disable=['tagger', 'ner'])
|
||||||
|
+code-old nlp = English().from_bytes('en', tagger=False, entity=False)
|
||||||
|
|
||||||
+h(2, "attributes") Attributes
|
+h(2, "attributes") Attributes
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
|
|
Loading…
Reference in New Issue