From 353f0ef8d750b0b96867e1e3f4922389ab8329bb Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Fri, 26 May 2017 12:33:54 +0200
Subject: [PATCH] Use disable argument (list) for serialization

---
 spacy/language.py              | 46 ++++++++++--------
 website/docs/api/language.jade | 89 +++++++++++++++++++++++++++-------
 2 files changed, 97 insertions(+), 38 deletions(-)

diff --git a/spacy/language.py b/spacy/language.py
index b20bb4617..39e60c017 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -173,13 +173,13 @@ class Language(object):
                 flat_list.append(pipe)
         self.pipeline = flat_list
 
-    def __call__(self, text, **disabled):
+    def __call__(self, text, disable=[]):
         """'Apply the pipeline to some text. The text can span multiple sentences,
         and can contain arbtrary whitespace. Alignment into the original string
         is preserved.
 
         text (unicode): The text to be processed.
-        **disabled: Elements of the pipeline that should not be run.
+        disable (list): Names of the pipeline components to disable.
         RETURNS (Doc): A container for accessing the annotations.
 
         EXAMPLE:
@@ -190,7 +190,7 @@ class Language(object):
         doc = self.make_doc(text)
         for proc in self.pipeline:
             name = getattr(proc, 'name', None)
-            if name in disabled and not disabled[name]:
+            if name in disable:
                 continue
             proc(doc)
         return doc
@@ -323,7 +323,7 @@ class Language(object):
             except StopIteration:
                 pass
 
-    def pipe(self, texts, n_threads=2, batch_size=1000, **disabled):
+    def pipe(self, texts, n_threads=2, batch_size=1000, disable=[]):
         """Process texts as a stream, and yield `Doc` objects in order. Supports
         GIL-free multi-threading.
 
@@ -331,7 +331,7 @@ class Language(object):
         n_threads (int): The number of worker threads to use. If -1, OpenMP will
             decide how many to use at run time. Default is 2.
         batch_size (int): The number of texts to buffer.
-        **disabled: Pipeline components to exclude.
+        disable (list): Names of the pipeline components to disable.
         YIELDS (Doc): Documents in the order of the original text.
 
         EXAMPLE:
@@ -343,7 +343,7 @@ class Language(object):
         docs = texts
         for proc in self.pipeline:
             name = getattr(proc, 'name', None)
-            if name in disabled and not disabled[name]:
+            if name in disable:
                 continue
             if hasattr(proc, 'pipe'):
                 docs = proc.pipe(docs, n_threads=n_threads, batch_size=batch_size)
@@ -353,12 +353,14 @@ class Language(object):
         for doc in docs:
             yield doc
 
-    def to_disk(self, path, **exclude):
-        """Save the current state to a directory.
+    def to_disk(self, path, disable=[]):
+        """Save the current state to a directory.  If a model is loaded, this
+        will include the model.
 
         path (unicode or Path): A path to a directory, which will be created if
             it doesn't exist. Paths may be either strings or `Path`-like objects.
-        **exclude: Named attributes to prevent from being saved.
+        disable (list): Nameds of pipeline components to disable and prevent
+            from being saved.
 
         EXAMPLE:
             >>> nlp.to_disk('/path/to/models')
@@ -370,7 +372,7 @@ class Language(object):
             raise IOError("Output path must be a directory")
         props = {}
         for name, value in self.__dict__.items():
-            if name in exclude:
+            if name in disable:
                 continue
             if hasattr(value, 'to_disk'):
                 value.to_disk(path / name)
@@ -379,13 +381,14 @@ class Language(object):
         with (path / 'props.pickle').open('wb') as file_:
             dill.dump(props, file_)
 
-    def from_disk(self, path, **exclude):
+    def from_disk(self, path, disable=[]):
         """Loads state from a directory. Modifies the object in place and
-        returns it.
+        returns it. If the saved `Language` object contains a model, the
+        model will be loaded.
 
         path (unicode or Path): A path to a directory. Paths may be either
             strings or `Path`-like objects.
-        **exclude: Named attributes to prevent from being loaded.
+        disable (list): Names of the pipeline components to disable.
         RETURNS (Language): The modified `Language` object.
 
         EXAMPLE:
@@ -394,35 +397,36 @@ class Language(object):
         """
         path = util.ensure_path(path)
         for name in path.iterdir():
-            if name not in exclude and hasattr(self, str(name)):
+            if name not in disable and hasattr(self, str(name)):
                 getattr(self, name).from_disk(path / name)
         with (path / 'props.pickle').open('rb') as file_:
             bytes_data = file_.read()
-        self.from_bytes(bytes_data, **exclude)
+        self.from_bytes(bytes_data, disable)
         return self
 
-    def to_bytes(self, **exclude):
+    def to_bytes(self, disable=[]):
         """Serialize the current state to a binary string.
 
-        **exclude: Named attributes to prevent from being serialized.
+        disable (list): Nameds of pipeline components to disable and prevent
+            from being serialized.
         RETURNS (bytes): The serialized form of the `Language` object.
         """
         props = dict(self.__dict__)
-        for key in exclude:
+        for key in disable:
             if key in props:
                 props.pop(key)
         return dill.dumps(props, -1)
 
-    def from_bytes(self, bytes_data, **exclude):
+    def from_bytes(self, bytes_data, disable=[]):
         """Load state from a binary string.
 
         bytes_data (bytes): The data to load from.
-        **exclude: Named attributes to prevent from being loaded.
+        disable (list): Names of the pipeline components to disable.
         RETURNS (Language): The `Language` object.
         """
         props = dill.loads(bytes_data)
         for key, value in props.items():
-            if key not in exclude:
+            if key not in disable:
                 setattr(self, key, value)
         return self
 
diff --git a/website/docs/api/language.jade b/website/docs/api/language.jade
index 455165bca..a22bee5f1 100644
--- a/website/docs/api/language.jade
+++ b/website/docs/api/language.jade
@@ -73,15 +73,26 @@ p
         +cell The text to be processed.
 
     +row
-        +cell #[code **disabled]
-        +cell -
-        +cell Elements of the pipeline that should not be run.
+        +cell #[code disable]
+        +cell list
+        +cell
+            |  Names of pipeline components to
+            |  #[+a("/docs/usage/language-processing-pipeline#disabling") disable].
 
     +footrow
         +cell returns
         +cell #[code Doc]
         +cell A container for accessing the annotations.
 
++infobox("⚠️ Deprecation note")
+    .o-block
+        |  Pipeline components to prevent from being loaded can now be added as
+        |  a list to #[code disable], instead of specifying one keyword argument
+        |  per component.
+
+    +code-new doc = nlp(u"I don't want parsed", disable=['parser'])
+    +code-old doc = nlp(u"I don't want parsed", parse=False)
+
 +h(2, "pipe") Language.pipe
     +tag method
 
@@ -112,6 +123,13 @@ p
         +cell int
         +cell The number of texts to buffer.
 
+    +row
+        +cell #[code disable]
+        +cell list
+        +cell
+            |  Names of pipeline components to
+            |  #[+a("/docs/usage/language-processing-pipeline#disabling") disable].
+
     +footrow
         +cell yields
         +cell #[code Doc]
@@ -227,8 +245,11 @@ p
 
 +h(2, "to_disk") Language.to_disk
     +tag method
+    +tag-new(2)
 
-p Save the current state to a directory.
+p
+    |  Save the current state to a directory. If a model is loaded, this will
+    |  #[strong include the model].
 
 +aside-code("Example").
     nlp.to_disk('/path/to/models')
@@ -242,14 +263,21 @@ p Save the current state to a directory.
             |  Paths may be either strings or #[code Path]-like objects.
 
     +row
-        +cell #[code **exclude]
-        +cell -
-        +cell Named attributes to prevent from being saved.
+        +cell #[code disable]
+        +cell list
+        +cell
+            |  Names of pipeline components to
+            |  #[+a("/docs/usage/language-processing-pipeline#disabling") disable]
+            |  and prevent from being saved.
 
 +h(2, "from_disk") Language.from_disk
     +tag method
+    +tag-new(2)
 
-p Loads state from a directory. Modifies the object in place and returns it.
+p
+    |  Loads state from a directory. Modifies the object in place and returns
+    |  it. If the saved #[code Language] object contains a model, the
+    |  #[strong model will be loaded].
 
 +aside-code("Example").
     from spacy.language import Language
@@ -264,15 +292,28 @@ p Loads state from a directory. Modifies the object in place and returns it.
             |  #[code Path]-like objects.
 
     +row
-        +cell #[code **exclude]
-        +cell -
-        +cell Named attributes to prevent from being loaded.
+        +cell #[code disable]
+        +cell list
+        +cell
+            |  Names of pipeline components to
+            |  #[+a("/docs/usage/language-processing-pipeline#disabling") disable].
 
     +footrow
         +cell returns
         +cell #[code Language]
         +cell The modified #[code Language] object.
 
++infobox("⚠️ Deprecation note")
+    .o-block
+        |  As of spaCy v2.0, the #[code save_to_directory] method has been
+        |  renamed to #[code to_disk], to improve consistency across classes.
+        |  Pipeline components to prevent from being loaded can now be added as
+        |  a list to #[code disable], instead of specifying one keyword argument
+        |  per component.
+
+    +code-new nlp = English().from_disk(disable=['tagger', 'ner'])
+    +code-old nlp = spacy.load('en', tagger=False, entity=False)
+
 +h(2, "to_bytes") Language.to_bytes
     +tag method
 
@@ -283,9 +324,12 @@ p Serialize the current state to a binary string.
 
 +table(["Name", "Type", "Description"])
     +row
-        +cell #[code **exclude]
-        +cell -
-        +cell Named attributes to prevent from being serialized.
+        +cell #[code disable]
+        +cell list
+        +cell
+            |  Names of pipeline components to
+            |  #[+a("/docs/usage/language-processing-pipeline#disabling") disable]
+            |  and prevent from being serialized.
 
     +footrow
         +cell returns
@@ -310,15 +354,26 @@ p Load state from a binary string.
         +cell The data to load from.
 
     +row
-        +cell #[code **exclude]
-        +cell -
-        +cell Named attributes to prevent from being loaded.
+        +cell #[code disable]
+        +cell list
+        +cell
+            |  Names of pipeline components to
+            |  #[+a("/docs/usage/language-processing-pipeline#disabling") disable].
 
     +footrow
         +cell returns
         +cell #[code Language]
         +cell The #[code Language] object.
 
++infobox("⚠️ Deprecation note")
+    .o-block
+        |  Pipeline components to prevent from being loaded can now be added as
+        |  a list to #[code disable], instead of specifying one keyword argument
+        |  per component.
+
+    +code-new nlp = English().from_bytes(bytes, disable=['tagger', 'ner'])
+    +code-old nlp = English().from_bytes('en', tagger=False, entity=False)
+
 +h(2, "attributes") Attributes
 
 +table(["Name", "Type", "Description"])