diff --git a/spacy/lang/tr/lex_attrs.py b/spacy/lang/tr/lex_attrs.py index 3615f4b4c..d9e12c4aa 100644 --- a/spacy/lang/tr/lex_attrs.py +++ b/spacy/lang/tr/lex_attrs.py @@ -62,6 +62,7 @@ _ordinal_words = [ _ordinal_endings = ("inci", "ıncı", "nci", "ncı", "uncu", "üncü") + def like_num(text): if text.startswith(("+", "-", "±", "~")): text = text[1:] @@ -75,11 +76,11 @@ def like_num(text): text_lower = text.lower() - #Check cardinal number + # Check cardinal number if text_lower in _num_words: return True - #Check ordinal number + # Check ordinal number if text_lower in _ordinal_words: return True if text_lower.endswith(_ordinal_endings): diff --git a/spacy/lang/tr/syntax_iterators.py b/spacy/lang/tr/syntax_iterators.py index 665ccb590..d9b342949 100644 --- a/spacy/lang/tr/syntax_iterators.py +++ b/spacy/lang/tr/syntax_iterators.py @@ -49,11 +49,10 @@ def noun_chunks(doclike): prev_end = word.left_edge.i yield word.left_edge.i, extend_right(word), np_label elif word.dep == conj: - cc_token = word.left_edge + cc_token = word.left_edge prev_end = cc_token.i - yield cc_token.right_edge.i + 1, extend_right(word), np_label # Shave off cc tokens from the NP - - + # Shave off cc tokens from the NP + yield cc_token.right_edge.i + 1, extend_right(word), np_label SYNTAX_ITERATORS = {"noun_chunks": noun_chunks} diff --git a/spacy/language.py b/spacy/language.py index 24e593043..dd790e85f 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1,5 +1,5 @@ from typing import Optional, Any, Dict, Callable, Iterable, Union, List, Pattern -from typing import Tuple, Iterator +from typing import Tuple from dataclasses import dataclass import random import itertools @@ -1197,7 +1197,9 @@ class Language: doc = Doc(self.vocab, words=["x", "y", "z"]) get_examples = lambda: [Example.from_dict(doc, {})] if not hasattr(get_examples, "__call__"): - err = Errors.E930.format(method="Language.initialize", obj=type(get_examples)) + err = Errors.E930.format( + method="Language.initialize", obj=type(get_examples) + ) raise TypeError(err) # Make sure the config is interpolated so we can resolve subsections config = self.config.interpolate() diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py index 7f8ab6768..3b0de899b 100644 --- a/spacy/tests/conftest.py +++ b/spacy/tests/conftest.py @@ -239,10 +239,12 @@ def th_tokenizer(): def tr_tokenizer(): return get_lang_class("tr")().tokenizer + @pytest.fixture(scope="session") def tr_vocab(): return get_lang_class("tr").Defaults.create_vocab() + @pytest.fixture(scope="session") def tt_tokenizer(): return get_lang_class("tt")().tokenizer diff --git a/spacy/tests/lang/tr/test_parser.py b/spacy/tests/lang/tr/test_parser.py index ff71ac3d4..b23d0869c 100644 --- a/spacy/tests/lang/tr/test_parser.py +++ b/spacy/tests/lang/tr/test_parser.py @@ -225,7 +225,7 @@ def test_tr_noun_chunks_acl_nmod(tr_tokenizer): assert chunks[0].text_with_ws == "en sevdiğim ses sanatçısı " -def test_tr_noun_chunks_acl_nmod(tr_tokenizer): +def test_tr_noun_chunks_acl_nmod2(tr_tokenizer): text = "bildiğim bir turizm şirketi" heads = [3, 3, 3, 3] deps = ["acl", "det", "nmod", "ROOT"] @@ -308,7 +308,7 @@ def test_tr_noun_chunks_np_recursive_four_nouns(tr_tokenizer): assert len(chunks) == 1 assert chunks[0].text_with_ws == "kızına piyano dersi verdiğim hanım " - + def test_tr_noun_chunks_np_recursive_no_nmod(tr_tokenizer): text = "içine birkaç çiçek konmuş olan bir vazo" heads = [3, 2, 3, 6, 3, 6, 6] @@ -326,7 +326,7 @@ def test_tr_noun_chunks_np_recursive_no_nmod(tr_tokenizer): def test_tr_noun_chunks_np_recursive_long_two_acls(tr_tokenizer): text = "içine Simge'nin bahçesinden toplanmış birkaç çiçeğin konmuş olduğu bir vazo" heads = [6, 2, 3, 5, 5, 6, 9, 6, 9, 9] - deps = ["obl", "nmod" , "obl", "acl", "det", "nsubj", "acl", "aux", "det", "ROOT"] + deps = ["obl", "nmod", "obl", "acl", "det", "nsubj", "acl", "aux", "det", "ROOT"] pos = ["ADP", "PROPN", "NOUN", "VERB", "DET", "NOUN", "VERB", "AUX", "DET", "NOUN"] tokens = tr_tokenizer(text) doc = Doc( @@ -334,7 +334,10 @@ def test_tr_noun_chunks_np_recursive_long_two_acls(tr_tokenizer): ) chunks = list(doc.noun_chunks) assert len(chunks) == 1 - assert chunks[0].text_with_ws == "içine Simge'nin bahçesinden toplanmış birkaç çiçeğin konmuş olduğu bir vazo " + assert ( + chunks[0].text_with_ws + == "içine Simge'nin bahçesinden toplanmış birkaç çiçeğin konmuş olduğu bir vazo " + ) def test_tr_noun_chunks_two_nouns_in_nmod(tr_tokenizer): @@ -350,7 +353,8 @@ def test_tr_noun_chunks_two_nouns_in_nmod(tr_tokenizer): assert len(chunks) == 1 assert chunks[0].text_with_ws == "kız ve erkek çocuklar " -def test_tr_noun_chunks_two_nouns_in_nmod(tr_tokenizer): + +def test_tr_noun_chunks_two_nouns_in_nmod2(tr_tokenizer): text = "tatlı ve gürbüz çocuklar" heads = [3, 2, 0, 3] deps = ["amod", "cc", "conj", "ROOT"] @@ -378,6 +382,7 @@ def test_tr_noun_chunks_conj_simple(tr_tokenizer): assert chunks[0].text_with_ws == "ben " assert chunks[1].text_with_ws == "Sen " + def test_tr_noun_chunks_conj_three(tr_tokenizer): text = "sen, ben ve ondan" heads = [0, 2, 0, 4, 0] @@ -394,7 +399,7 @@ def test_tr_noun_chunks_conj_three(tr_tokenizer): assert chunks[2].text_with_ws == "sen " -def test_tr_noun_chunks_conj_three(tr_tokenizer): +def test_tr_noun_chunks_conj_three2(tr_tokenizer): text = "ben ya da sen ya da onlar" heads = [0, 3, 1, 0, 6, 4, 3] deps = ["ROOT", "cc", "fixed", "conj", "cc", "fixed", "conj"] @@ -499,7 +504,7 @@ def test_tr_noun_chunks_flat_names_and_title(tr_tokenizer): assert chunks[0].text_with_ws == "Gazi Mustafa Kemal " -def test_tr_noun_chunks_flat_names_and_title(tr_tokenizer): +def test_tr_noun_chunks_flat_names_and_title2(tr_tokenizer): text = "Ahmet Vefik Paşa" heads = [2, 0, 2] deps = ["nmod", "flat", "ROOT"] diff --git a/spacy/tests/lang/tr/test_text.py b/spacy/tests/lang/tr/test_text.py index 01e279d76..ed7dbb805 100644 --- a/spacy/tests/lang/tr/test_text.py +++ b/spacy/tests/lang/tr/test_text.py @@ -15,8 +15,8 @@ from spacy.lang.tr.lex_attrs import like_num "üçüncü", "beşinci", "100üncü", - "8inci" - ] + "8inci", + ], ) def test_tr_lex_attrs_like_number_cardinal_ordinal(word): assert like_num(word) @@ -26,4 +26,3 @@ def test_tr_lex_attrs_like_number_cardinal_ordinal(word): def test_tr_lex_attrs_capitals(word): assert like_num(word) assert like_num(word.upper()) - diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py index ff2e33fc7..e0c63d09e 100644 --- a/spacy/tests/pipeline/test_entity_linker.py +++ b/spacy/tests/pipeline/test_entity_linker.py @@ -446,7 +446,7 @@ def test_overfitting_IO(): return mykb # Create the Entity Linker component and add it to the pipeline - entity_linker = nlp.add_pipe("entity_linker", last=True,) + entity_linker = nlp.add_pipe("entity_linker", last=True) entity_linker.set_kb(create_kb) assert "Q2146908" in entity_linker.vocab.strings assert "Q2146908" in entity_linker.kb.vocab.strings diff --git a/spacy/tests/regression/test_issue6207.py b/spacy/tests/regression/test_issue6207.py index 47e3803e9..9d8b047bf 100644 --- a/spacy/tests/regression/test_issue6207.py +++ b/spacy/tests/regression/test_issue6207.py @@ -6,8 +6,8 @@ def test_issue6207(en_tokenizer): # Make spans s1 = doc[:4] - s2 = doc[3:6] # overlaps with s1 - s3 = doc[5:7] # overlaps with s2, not s1 + s2 = doc[3:6] # overlaps with s1 + s3 = doc[5:7] # overlaps with s2, not s1 result = filter_spans((s1, s2, s3)) assert s1 in result diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py index 8ca7f8b66..e8884e6b2 100644 --- a/spacy/tests/test_models.py +++ b/spacy/tests/test_models.py @@ -1,10 +1,8 @@ from typing import List - import pytest from thinc.api import fix_random_seed, Adam, set_dropout_rate from numpy.testing import assert_array_equal import numpy - from spacy.ml.models import build_Tok2Vec_model, MultiHashEmbed, MaxoutWindowEncoder from spacy.ml.models import build_text_classifier, build_simple_cnn_text_classifier from spacy.ml.staticvectors import StaticVectors @@ -188,12 +186,7 @@ def test_models_update_consistently(seed, dropout, model_func, kwargs, get_X): assert_array_equal(get_all_params(model1), get_all_params(model2)) -@pytest.mark.parametrize( - "model_func,kwargs", - [ - (StaticVectors, {"nO": 128, "nM": 300}), - ] -) +@pytest.mark.parametrize("model_func,kwargs", [(StaticVectors, {"nO": 128, "nM": 300})]) def test_empty_docs(model_func, kwargs): nlp = English() model = model_func(**kwargs).initialize() @@ -201,7 +194,7 @@ def test_empty_docs(model_func, kwargs): for n_docs in range(3): docs = [nlp("") for _ in range(n_docs)] # Test predict - _ = model.predict(docs) + model.predict(docs) # Test backprop output, backprop = model.begin_update(docs) - _ = backprop(output) + backprop(output)