diff --git a/spacy/cli/converters/iob2json.py b/spacy/cli/converters/iob2json.py index 50de6a0bc..7d2c1fcfe 100644 --- a/spacy/cli/converters/iob2json.py +++ b/spacy/cli/converters/iob2json.py @@ -1,11 +1,11 @@ # coding: utf8 from __future__ import unicode_literals +import re + from ...gold import iob_to_biluo from ...util import minibatch -import re - def iob2json(input_data, n_sents=10, *args, **kwargs): """ diff --git a/spacy/lang/fa/stop_words.py b/spacy/lang/fa/stop_words.py index 8ec941ef5..682fb7a71 100644 --- a/spacy/lang/fa/stop_words.py +++ b/spacy/lang/fa/stop_words.py @@ -1,7 +1,6 @@ # coding: utf8 from __future__ import unicode_literals -# stop words from HAZM package # Stop words from HAZM package STOP_WORDS = set( diff --git a/spacy/lang/id/norm_exceptions.py b/spacy/lang/id/norm_exceptions.py index ca0e18b41..09ac6a6d3 100644 --- a/spacy/lang/id/norm_exceptions.py +++ b/spacy/lang/id/norm_exceptions.py @@ -1,10 +1,3 @@ -""" -Slang and abbreviations - -Daftar kosakata yang sering salah dieja -https://id.wikipedia.org/wiki/Wikipedia:Daftar_kosakata_bahasa_Indonesia_yang_sering_salah_dieja - -""" # coding: utf8 from __future__ import unicode_literals diff --git a/spacy/lang/id/stop_words.py b/spacy/lang/id/stop_words.py index 6a5a05a5a..0a9f91947 100644 --- a/spacy/lang/id/stop_words.py +++ b/spacy/lang/id/stop_words.py @@ -1,6 +1,3 @@ -""" -List of stop words in Bahasa Indonesia. -""" # coding: utf8 from __future__ import unicode_literals diff --git a/spacy/lang/id/tokenizer_exceptions.py b/spacy/lang/id/tokenizer_exceptions.py index cc0f2a078..86fe611bf 100644 --- a/spacy/lang/id/tokenizer_exceptions.py +++ b/spacy/lang/id/tokenizer_exceptions.py @@ -1,7 +1,3 @@ -""" -Daftar singkatan dan Akronim dari: -https://id.wiktionary.org/wiki/Wiktionary:Daftar_singkatan_dan_akronim_bahasa_Indonesia#A -""" # coding: utf8 from __future__ import unicode_literals diff --git a/spacy/matcher.pyx b/spacy/matcher.pyx index 025424d37..23449aa57 100644 --- a/spacy/matcher.pyx +++ b/spacy/matcher.pyx @@ -291,8 +291,6 @@ cdef char get_quantifier(PatternStateC state) nogil: DEF PADDING = 5 -DEF PADDING = 5 - cdef TokenPatternC* init_pattern(Pool mem, attr_t entity_id, object token_specs) except NULL: diff --git a/spacy/tests/doc/test_span_merge.py b/spacy/tests/doc/test_span_merge.py index 868df394c..75a28380c 100644 --- a/spacy/tests/doc/test_span_merge.py +++ b/spacy/tests/doc/test_span_merge.py @@ -53,24 +53,7 @@ def test_spans_merge_heads(en_tokenizer): def test_spans_merge_non_disjoint(en_tokenizer): text = "Los Angeles start." - tokens = en_tokenizer(text) - doc = get_doc(tokens.vocab, [t.text for t in tokens]) - with pytest.raises(ValueError): - with doc.retokenize() as retokenizer: - retokenizer.merge( - doc[0:2], - attrs={"tag": "NNP", "lemma": "Los Angeles", "ent_type": "GPE"}, - ) - retokenizer.merge( - doc[0:1], - attrs={"tag": "NNP", "lemma": "Los Angeles", "ent_type": "GPE"}, - ) - - -def test_spans_merge_non_disjoint(en_tokenizer): - text = "Los Angeles start." - tokens = en_tokenizer(text) - doc = get_doc(tokens.vocab, [t.text for t in tokens]) + doc = en_tokenizer(text) with pytest.raises(ValueError): with doc.retokenize() as retokenizer: retokenizer.merge( diff --git a/spacy/tests/regression/test_issue2800.py b/spacy/tests/regression/test_issue2800.py deleted file mode 100644 index 7f4df9dab..000000000 --- a/spacy/tests/regression/test_issue2800.py +++ /dev/null @@ -1,36 +0,0 @@ -'''Test issue that arises when too many labels are added to NER model.''' -from __future__ import unicode_literals - -import random -from ...lang.en import English - -def train_model(train_data, entity_types): - nlp = English(pipeline=[]) - - ner = nlp.create_pipe("ner") - nlp.add_pipe(ner) - - for entity_type in list(entity_types): - ner.add_label(entity_type) - - optimizer = nlp.begin_training() - - # Start training - for i in range(20): - losses = {} - index = 0 - random.shuffle(train_data) - - for statement, entities in train_data: - nlp.update([statement], [entities], sgd=optimizer, losses=losses, drop=0.5) - return nlp - - -def test_train_with_many_entity_types(): - train_data = [] - train_data.extend([("One sentence", {"entities": []})]) - entity_types = [str(i) for i in range(1000)] - - model = train_model(train_data, entity_types) - - diff --git a/spacy/tests/test_symlink_windows.py b/spacy/tests/test_symlink_windows.py deleted file mode 100644 index a19395af8..000000000 --- a/spacy/tests/test_symlink_windows.py +++ /dev/null @@ -1,40 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import pytest -import os -from pathlib import Path - -from ..compat import symlink_to, symlink_remove, path2str - - -def target_local_path(): - return "./foo-target" - - -def link_local_path(): - return "./foo-symlink" - - -@pytest.fixture(scope="function") -def setup_target(request): - target = Path(target_local_path()) - if not target.exists(): - os.mkdir(path2str(target)) - - # yield -- need to cleanup even if assertion fails - # https://github.com/pytest-dev/pytest/issues/2508#issuecomment-309934240 - def cleanup(): - symlink_remove(Path(link_local_path())) - os.rmdir(target_local_path()) - - request.addfinalizer(cleanup) - - -def test_create_symlink_windows(setup_target): - target = Path(target_local_path()) - link = Path(link_local_path()) - assert target.exists() - - symlink_to(link, target) - assert link.exists()