From 96fe314d8dc7c0ef2ef3d5d8e67d32fc09d2e3d9 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 27 Sep 2018 13:54:34 +0200 Subject: [PATCH] Fix bug when too many entity types. Fixes #2800 --- spacy/syntax/nn_parser.pyx | 3 ++- spacy/tests/regression/test_issue2800.py | 34 ++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 spacy/tests/regression/test_issue2800.py diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 8dfae7925..15858d160 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -747,7 +747,8 @@ cdef class Parser: def transition_batch(self, states, float[:, ::1] scores): cdef StateClass state - cdef int[500] is_valid # TODO: Unhack + cdef Pool mem = Pool() + is_valid = mem.alloc(self.moves.n_moves, sizeof(int)) cdef float* c_scores = &scores[0, 0] for state in states: self.moves.set_valid(is_valid, state.c) diff --git a/spacy/tests/regression/test_issue2800.py b/spacy/tests/regression/test_issue2800.py new file mode 100644 index 000000000..5f5b9fa51 --- /dev/null +++ b/spacy/tests/regression/test_issue2800.py @@ -0,0 +1,34 @@ +'''Test issue that arises when too many labels are added to NER model.''' +import random +from ...lang.en import English + +def train_model(train_data, entity_types): + nlp = English(pipeline=[]) + + ner = nlp.create_pipe("ner") + nlp.add_pipe(ner) + + for entity_type in list(entity_types): + ner.add_label(entity_type) + + optimizer = nlp.begin_training() + + # Start training + for i in range(20): + losses = {} + index = 0 + random.shuffle(train_data) + + for statement, entities in train_data: + nlp.update([statement], [entities], sgd=optimizer, losses=losses, drop=0.5) + return nlp + + +def test_train_with_many_entity_types(): + train_data = [] + train_data.extend([("One sentence", {"entities": []})]) + entity_types = [str(i) for i in range(1000)] + + model = train_model(train_data, entity_types) + +