From 39872de1f6e49c4b59ed747a2f15ca448a52f7db Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Sat, 19 Sep 2020 01:17:02 +0200
Subject: [PATCH] Introducing the gpu_allocator (#6091)

* rename 'use_pytorch_for_gpu_memory' to 'gpu_allocator'

* --code instead of --code-path

* update documentation

* avoid querying the "system" section directly

* add explanation of gpu_allocator to TF/PyTorch section in docs

* fix typo

* fix typo 2

* use set_gpu_allocator from thinc 8.0.0a34

* default null instead of empty string
---
 pyproject.toml                                |  2 +-
 requirements.txt                              |  2 +-
 setup.cfg                                     |  4 ++--
 spacy/cli/debug_model.py                      |  9 +++++++--
 spacy/cli/pretrain.py                         | 17 +++++++++--------
 spacy/cli/templates/quickstart_training.jinja |  2 +-
 spacy/cli/train.py                            | 13 ++++++-------
 spacy/default_config.cfg                      |  4 ++--
 spacy/schemas.py                              |  1 +
 website/docs/api/cli.md                       |  4 +++-
 website/docs/api/data-formats.md              |  1 +
 website/docs/api/top-level.md                 | 14 ++++++++------
 website/docs/usage/layers-architectures.md    | 12 ++++++++++++
 13 files changed, 54 insertions(+), 31 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a413a099c..5290660aa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ requires = [
     "cymem>=2.0.2,<2.1.0",
     "preshed>=3.0.2,<3.1.0",
     "murmurhash>=0.28.0,<1.1.0",
-    "thinc>=8.0.0a33,<8.0.0a40",
+    "thinc>=8.0.0a34,<8.0.0a40",
     "blis>=0.4.0,<0.5.0",
     "pytokenizations",
     "pathy"
diff --git a/requirements.txt b/requirements.txt
index 55fe627b8..4d6c1dfd0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 # Our libraries
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=8.0.0a33,<8.0.0a40
+thinc>=8.0.0a34,<8.0.0a40
 blis>=0.4.0,<0.5.0
 ml_datasets==0.2.0a0
 murmurhash>=0.28.0,<1.1.0
diff --git a/setup.cfg b/setup.cfg
index 359e63172..dd0975800 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -34,13 +34,13 @@ setup_requires =
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
     murmurhash>=0.28.0,<1.1.0
-    thinc>=8.0.0a33,<8.0.0a40
+    thinc>=8.0.0a34,<8.0.0a40
 install_requires =
     # Our libraries
     murmurhash>=0.28.0,<1.1.0
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
-    thinc>=8.0.0a33,<8.0.0a40
+    thinc>=8.0.0a34,<8.0.0a40
     blis>=0.4.0,<0.5.0
     wasabi>=0.8.0,<1.1.0
     srsly>=2.1.0,<3.0.0
diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index a4899a458..349849f58 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -2,7 +2,7 @@ from typing import Dict, Any, Optional
 from pathlib import Path
 from wasabi import msg
 from thinc.api import require_gpu, fix_random_seed, set_dropout_rate, Adam
-from thinc.api import Model, data_validation
+from thinc.api import Model, data_validation, set_gpu_allocator
 import typer
 
 from ._util import Arg, Opt, debug_cli, show_validation_error
@@ -53,7 +53,12 @@ def debug_model_cli(
     }
     config_overrides = parse_config_overrides(ctx.args)
     with show_validation_error(config_path):
-        config = util.load_config(config_path, overrides=config_overrides)
+        config = util.load_config(
+            config_path, overrides=config_overrides, interpolate=True
+        )
+        allocator = config["training"]["gpu_allocator"]
+        if use_gpu >= 0 and allocator:
+            set_gpu_allocator(allocator)
         nlp, config = util.load_model_from_config(config_path)
     seed = config["training"]["seed"]
     if seed is not None:
diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index aec077eb7..9e913396e 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -4,10 +4,9 @@ import time
 import re
 from collections import Counter
 from pathlib import Path
-from thinc.api import Config
-from thinc.api import use_pytorch_for_gpu_memory, require_gpu
+from thinc.api import require_gpu, set_gpu_allocator
 from thinc.api import set_dropout_rate, to_categorical, fix_random_seed
-from thinc.api import CosineDistance, L2Distance
+from thinc.api import Config, CosineDistance, L2Distance
 from wasabi import msg
 import srsly
 from functools import partial
@@ -32,7 +31,7 @@ def pretrain_cli(
     ctx: typer.Context,  # This is only used to read additional arguments
     config_path: Path = Arg(..., help="Path to config file", exists=True, dir_okay=False),
     output_dir: Path = Arg(..., help="Directory to write weights to on each epoch"),
-    code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
     resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"),
     epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."),
     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
@@ -99,10 +98,12 @@ def pretrain(
     epoch_resume: Optional[int] = None,
     use_gpu: int = -1,
 ):
-    if config["system"].get("seed") is not None:
-        fix_random_seed(config["system"]["seed"])
-    if use_gpu >= 0 and config["system"].get("use_pytorch_for_gpu_memory"):
-        use_pytorch_for_gpu_memory()
+    if config["training"]["seed"] is not None:
+        fix_random_seed(config["training"]["seed"])
+    allocator = config["training"]["gpu_allocator"]
+    if use_gpu >= 0 and allocator:
+        set_gpu_allocator(allocator)
+
     nlp, config = util.load_model_from_config(config)
     P_cfg = config["pretraining"]
     corpus = dot_to_object(config, P_cfg["corpus"])
diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index 00b77af4d..ef608e5e8 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -8,7 +8,7 @@ train = ""
 dev = ""
 
 [system]
-use_pytorch_for_gpu_memory = {{ "true" if use_transformer else "false" }}
+gpu_allocator = {{ "pytorch" if use_transformer else "" }}
 
 [nlp]
 lang = "{{ lang }}"
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 50306b350..debecd0b1 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -6,8 +6,7 @@ from pathlib import Path
 from wasabi import msg
 import thinc
 import thinc.schedules
-from thinc.api import use_pytorch_for_gpu_memory, require_gpu, fix_random_seed
-from thinc.api import Config, Optimizer
+from thinc.api import Config, Optimizer, require_gpu, fix_random_seed, set_gpu_allocator
 import random
 import typer
 import logging
@@ -29,7 +28,7 @@ def train_cli(
     ctx: typer.Context,  # This is only used to read additional arguments
     config_path: Path = Arg(..., help="Path to config file", exists=True),
     output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store trained pipeline in"),
-    code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
     verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
     resume: bool = Opt(False, "--resume", "-R", help="Resume training"),
@@ -79,11 +78,11 @@ def train(
         config = util.load_config(
             config_path, overrides=config_overrides, interpolate=True
         )
-    if config.get("training", {}).get("seed") is not None:
+    if config["training"]["seed"] is not None:
         fix_random_seed(config["training"]["seed"])
-    if config.get("system", {}).get("use_pytorch_for_gpu_memory"):
-        # It feels kind of weird to not have a default for this.
-        use_pytorch_for_gpu_memory()
+    allocator = config["training"]["gpu_allocator"]
+    if use_gpu >= 0 and allocator:
+        set_gpu_allocator(allocator)
     # Use original config here before it's resolved to functions
     sourced_components = get_sourced_components(config)
     with show_validation_error(config_path):
diff --git a/spacy/default_config.cfg b/spacy/default_config.cfg
index c7c9593d7..f4a453f2a 100644
--- a/spacy/default_config.cfg
+++ b/spacy/default_config.cfg
@@ -6,7 +6,7 @@ init_tok2vec = null
 
 [system]
 seed = 0
-use_pytorch_for_gpu_memory = false
+gpu_allocator = null
 
 [nlp]
 lang = null
@@ -52,6 +52,7 @@ limit = 0
 # Training hyper-parameters and additional features.
 [training]
 seed = ${system.seed}
+gpu_allocator = ${system.gpu_allocator}
 dropout = 0.1
 accumulate_gradient = 1
 # Extra resources for transfer-learning or pseudo-rehearsal
@@ -75,7 +76,6 @@ train_corpus = "corpora.train"
 [training.logger]
 @loggers = "spacy.ConsoleLogger.v1"
 
-
 [training.batcher]
 @batchers = "spacy.batch_by_words.v1"
 discard_oversize = false
diff --git a/spacy/schemas.py b/spacy/schemas.py
index 06bc4beed..db71af9ca 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -207,6 +207,7 @@ class ConfigSchemaTraining(BaseModel):
     max_steps: StrictInt = Field(..., title="Maximum number of update steps to train for")
     eval_frequency: StrictInt = Field(..., title="How often to evaluate during training (steps)")
     seed: Optional[StrictInt] = Field(..., title="Random seed")
+    gpu_allocator: Optional[StrictStr] = Field(..., title="Memory allocator when running on GPU")
     accumulate_gradient: StrictInt = Field(..., title="Whether to divide the batch up into substeps")
     score_weights: Dict[StrictStr, Union[StrictFloat, StrictInt]] = Field(..., title="Scores to report and their weights for selecting final model")
     init_tok2vec: Optional[StrictStr] = Field(..., title="Path to pretrained tok2vec weights")
diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md
index bd65a1516..7374e1e3f 100644
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@@ -763,6 +763,7 @@ $ python -m spacy train [config_path] [--output] [--code] [--verbose] [overrides
 | `--output`, `-o`  | Directory to store trained pipeline in. Will be created if it doesn't exist. ~~Optional[Path] \(positional)~~                                                                              |
 | `--code`, `-c`    | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~       |
 | `--verbose`, `-V` | Show more detailed messages during training. ~~bool (flag)~~                                                                                                                               |
+| `--gpu-id`, `-g`  | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~                                                                                                                                 |
 | `--help`, `-h`    | Show help message and available arguments. ~~bool (flag)~~                                                                                                                                 |
 | overrides         | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. ~~Any (option/flag)~~ |
 | **CREATES**       | The final trained pipeline and the best trained pipeline.                                                                                                                                  |
@@ -798,11 +799,12 @@ $ python -m spacy pretrain [config_path] [output_dir] [--code] [--resume-path] [
 
 | Name                    | Description                                                                                                                                                                           |
 | ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `output_dir`            | Directory to save binary weights to on each epoch. ~~Path (positional)~~                                                                                                              |
 | `config_path`           | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. ~~Path (positional)~~                                                           |
+| `output_dir`            | Directory to save binary weights to on each epoch. ~~Path (positional)~~                                                                                                              |
 | `--code`, `-c`          | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~  |
 | `--resume-path`, `-r`   | Path to pretrained weights from which to resume pretraining. ~~Optional[Path] \(option)~~                                                                                             |
 | `--epoch-resume`, `-er` | The epoch to resume counting from when using `--resume-path`. Prevents unintended overwriting of existing weight files. ~~Optional[int] \(option)~~                                   |
+| `--gpu-id`, `-g`        | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~                                                                                                                            |
 | `--help`, `-h`          | Show help message and available arguments. ~~bool (flag)~~                                                                                                                            |
 | overrides               | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--training.dropout 0.2`. ~~Any (option/flag)~~ |
 | **CREATES**             | The pretrained weights that can be used to initialize `spacy train`.                                                                                                                  |
diff --git a/website/docs/api/data-formats.md b/website/docs/api/data-formats.md
index 3ed846b9e..6e80bb409 100644
--- a/website/docs/api/data-formats.md
+++ b/website/docs/api/data-formats.md
@@ -189,6 +189,7 @@ process that are used when you run [`spacy train`](/api/cli#train).
 | `dev_corpus`          | Dot notation of the config location defining the dev corpus. Defaults to `corpora.dev`. ~~str~~                                                                                                              |
 | `dropout`             | The dropout rate. Defaults to `0.1`. ~~float~~                                                                                                                                                               |
 | `eval_frequency`      | How often to evaluate during training (steps). Defaults to `200`. ~~int~~                                                                                                                                    |
+| `gpu_allocator`       | Library for cupy to route GPU memory allocation to. Can be "pytorch" or "tensorflow". Defaults to variable `${system.gpu_allocator}`. ~~str~~                                                                |
 | `frozen_components`   | Pipeline component names that are "frozen" and shouldn't be updated during training. See [here](/usage/training#config-components) for details. Defaults to `[]`. ~~List[str]~~                              |
 | `init_tok2vec`        | Optional path to pretrained tok2vec weights created with [`spacy pretrain`](/api/cli#pretrain). Defaults to variable `${paths.init_tok2vec}`. ~~Optional[str]~~                                              |
 | `max_epochs`          | Maximum number of epochs to train for. Defaults to `0`. ~~int~~                                                                                                                                              |
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index 5d850be01..3f51d21aa 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -145,9 +145,10 @@ pipelines.
 > nlp = spacy.load("en_core_web_sm")
 > ```
 
-| Name        | Description                             |
-| ----------- | --------------------------------------- |
-| **RETURNS** | Whether the GPU was activated. ~~bool~~ |
+| Name        | Description                                      |
+| ----------- | ------------------------------------------------ |
+| `gpu_id`    | Device index to select. Defaults to `0`. ~~int~~ |
+| **RETURNS** | Whether the GPU was activated. ~~bool~~          |
 
 ### spacy.require_gpu {#spacy.require_gpu tag="function" new="2.0.14"}
 
@@ -164,9 +165,10 @@ and _before_ loading any pipelines.
 > nlp = spacy.load("en_core_web_sm")
 > ```
 
-| Name        | Description     |
-| ----------- | --------------- |
-| **RETURNS** | `True` ~~bool~~ |
+| Name        | Description                                      |
+| ----------- | ------------------------------------------------ |
+| `gpu_id`    | Device index to select. Defaults to `0`. ~~int~~ |
+| **RETURNS** | `True` ~~bool~~                                  |
 
 ## displaCy {#displacy source="spacy/displacy"}
 
diff --git a/website/docs/usage/layers-architectures.md b/website/docs/usage/layers-architectures.md
index aefc64ece..f9787d815 100644
--- a/website/docs/usage/layers-architectures.md
+++ b/website/docs/usage/layers-architectures.md
@@ -356,6 +356,18 @@ that training configs are complete and experiments fully reproducible.
 
 </Infobox>
 
+Note that when using a PyTorch or Tensorflow model, it is recommended to set the GPU
+memory allocator accordingly. When `gpu_allocator` is set to "pytorch" or
+"tensorflow" in the training config, cupy will allocate memory via those respective libraries,
+preventing OOM errors when there's available memory sitting in the other
+library's pool.
+
+```ini
+### config.cfg (excerpt)
+[training]
+gpu_allocator = "pytorch"
+```
+
 ## Custom models with Thinc {#thinc}
 
 Of course it's also possible to define the `Model` from the previous section