From 6918d99b6c631b5256aa24302050b085af841cc8 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 20 Apr 2020 22:06:28 +0200 Subject: [PATCH] Improve GPU usage for train-with-config (#5330) * Adjust for no ops in Optimizer * Fix gpu in train-from-config * Update train-from-config script * Fix parser * Fix GPU efficiency of padding backprop --- spacy/cli/train_from_config.py | 37 ++++++++++++++++++------------- spacy/ml/_precomputable_affine.py | 19 +++++----------- spacy/syntax/nn_parser.pyx | 1 + spacy/util.py | 2 -- 4 files changed, 27 insertions(+), 32 deletions(-) diff --git a/spacy/cli/train_from_config.py b/spacy/cli/train_from_config.py index 5b09909c7..933b275c4 100644 --- a/spacy/cli/train_from_config.py +++ b/spacy/cli/train_from_config.py @@ -1,4 +1,5 @@ from typing import Optional, Dict, List, Union, Sequence +from timeit import default_timer as timer from pydantic import BaseModel, FilePath import plac import tqdm @@ -146,30 +147,29 @@ def train_from_config_cli( if output_path is not None and not output_path.exists(): output_path.mkdir() - try: - train_from_config( - config_path, - {"train": train_path, "dev": dev_path}, - output_path=output_path, - meta_path=meta_path, - raw_text=raw_text, - ) - except KeyboardInterrupt: - msg.warn("Cancelled.") + train_from_config( + config_path, + {"train": train_path, "dev": dev_path}, + output_path=output_path, + meta_path=meta_path, + raw_text=raw_text, + ) def train_from_config( config_path, data_paths, raw_text=None, meta_path=None, output_path=None, ): msg.info(f"Loading config from: {config_path}") - config = util.load_config(config_path, create_objects=True) + config = util.load_config(config_path, create_objects=False) + nlp_config = config["nlp"] use_gpu = config["training"]["use_gpu"] if use_gpu >= 0: msg.info("Using GPU") + util.use_gpu(use_gpu) else: msg.info("Using CPU") + config = util.load_config(config_path, create_objects=True) msg.info("Creating nlp from config") - nlp_config = util.load_config(config_path, create_objects=False)["nlp"] nlp = util.load_model_from_config(nlp_config) optimizer = config["optimizer"] training = config["training"] @@ -240,12 +240,17 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg): nlp, gold_preproc=cfg["gold_preproc"], ignore_misaligned=True ) ) + n_words = sum(len(ex.doc) for ex in dev_examples) + start_time = timer() scorer = nlp.evaluate(dev_examples) + end_time = timer() + wps = n_words / (end_time - start_time) scores = scorer.scores # Calculate a weighted sum based on score_weights for the main score weights = cfg["score_weights"] weighted_score = sum(scores[s] * weights.get(s, 0.0) for s in weights) - return weighted_score, scorer.scores + scores["speed"] = wps + return weighted_score, scores return evaluate @@ -346,13 +351,13 @@ def setup_printer(training, nlp): def print_row(info): losses = [ - "{0:.2f}".format(info["losses"].get(pipe_name, 0.0)) + "{0:.2f}".format(float(info["losses"].get(pipe_name, 0.0))) for pipe_name in nlp.pipe_names ] scores = [ - "{0:.2f}".format(info["other_scores"].get(col, 0.0)) for col in score_cols + "{0:.2f}".format(float(info["other_scores"].get(col, 0.0))) for col in score_cols ] - data = [info["step"]] + losses + scores + ["{0:.2f}".format(info["score"])] + data = [info["step"]] + losses + scores + ["{0:.2f}".format(float(info["score"]))] msg.row(data, widths=table_widths, aligns=table_aligns) return print_row diff --git a/spacy/ml/_precomputable_affine.py b/spacy/ml/_precomputable_affine.py index a752ef49a..c7328bad9 100644 --- a/spacy/ml/_precomputable_affine.py +++ b/spacy/ml/_precomputable_affine.py @@ -79,23 +79,14 @@ def _backprop_precomputable_affine_padding(model, dY, ids): # for b in range(nB): # for f in range(nF): # if ids[b, f] < 0: - # d_pad[0, f] += dY[b] + # d_pad[f] += dY[b] # # Which can be rewritten as: # - # for b in range(nB): - # d_pad[0, ids[b] < 0] += dY[b] - # - # I don't know how to avoid the loop without building a whole array :(. - # Cursed numpy. - # - # Note by Sofie: rewritten to longer loop because "CuPy only supports slices that consist of one boolean array." - d_pad = model.ops.alloc((1, nF, nO, nP)) - for b in range(nB): - for f in range(nF): - if ids[b, f] < 0: - d_pad[0, f] += dY[b] - return d_pad + # (ids < 0).T @ dY + mask = model.ops.asarray(ids < 0, dtype="f") + d_pad = model.ops.gemm(mask, dY.reshape(nB, nO*nP), trans1=True) + return d_pad.reshape((1, nF, nO, nP)) def init(model, X=None, Y=None): diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index f480e3528..01d6d5bfe 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -216,6 +216,7 @@ cdef class Parser: # expand our model output. self._resize() model = self.model.predict(docs) + W_param = model.vec2scores.get_param("W") weights = get_c_weights(model) for state in batch: if not state.is_final(): diff --git a/spacy/util.py b/spacy/util.py index ef9082140..ea3023629 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -784,7 +784,6 @@ VECTORS_KEY = "spacy_pretrained_vectors" def create_default_optimizer(): - ops = get_current_ops() learn_rate = env_opt("learn_rate", 0.001) beta1 = env_opt("optimizer_B1", 0.9) beta2 = env_opt("optimizer_B2", 0.999) @@ -798,7 +797,6 @@ def create_default_optimizer(): beta1=beta1, beta2=beta2, eps=eps, - ops=ops, grad_clip=grad_clip, L2_is_weight_decay=L2_is_weight_decay, )