diff --git a/spacy/tests/regression/test_issue2501-3000.py b/spacy/tests/regression/test_issue2501-3000.py index 8997c8a56..cf29c2535 100644 --- a/spacy/tests/regression/test_issue2501-3000.py +++ b/spacy/tests/regression/test_issue2501-3000.py @@ -13,6 +13,7 @@ from spacy.vocab import Vocab from spacy.compat import pickle from spacy._ml import link_vectors_to_models import numpy +import random from ..util import get_doc @@ -138,6 +139,26 @@ def test_issue2782(text, lang_cls): assert doc[0].like_num +def test_issue2800(): + """Test issue that arises when too many labels are added to NER model. + Used to cause segfault. + """ + train_data = [] + train_data.extend([("One sentence", {"entities": []})]) + entity_types = [str(i) for i in range(1000)] + nlp = English() + ner = nlp.create_pipe("ner") + nlp.add_pipe(ner) + for entity_type in list(entity_types): + ner.add_label(entity_type) + optimizer = nlp.begin_training() + for i in range(20): + losses = {} + random.shuffle(train_data) + for statement, entities in train_data: + nlp.update([statement], [entities], sgd=optimizer, losses=losses, drop=0.5) + + def test_issue2822(it_tokenizer): """Test that the abbreviation of poco is kept as one word.""" doc = it_tokenizer("Vuoi un po' di zucchero?") diff --git a/spacy/tests/regression/test_issue2800.py b/spacy/tests/regression/test_issue2800.py deleted file mode 100644 index e0d54ff37..000000000 --- a/spacy/tests/regression/test_issue2800.py +++ /dev/null @@ -1,25 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import random -from spacy.lang.en import English - - -def test_train_with_many_entity_types(): - """Test issue that arises when too many labels are added to NER model. - NB: currently causes segfault! - """ - train_data = [] - train_data.extend([("One sentence", {"entities": []})]) - entity_types = [str(i) for i in range(1000)] - nlp = English(pipeline=[]) - ner = nlp.create_pipe("ner") - nlp.add_pipe(ner) - for entity_type in list(entity_types): - ner.add_label(entity_type) - optimizer = nlp.begin_training() - for i in range(20): - losses = {} - random.shuffle(train_data) - for statement, entities in train_data: - nlp.update([statement], [entities], sgd=optimizer, losses=losses, drop=0.5)