mirror of https://github.com/explosion/spaCy.git
Tidy up and auto-format
This commit is contained in:
parent
74972744e5
commit
539b0c10da
|
@ -62,6 +62,7 @@ _ordinal_words = [
|
|||
|
||||
_ordinal_endings = ("inci", "ıncı", "nci", "ncı", "uncu", "üncü")
|
||||
|
||||
|
||||
def like_num(text):
|
||||
if text.startswith(("+", "-", "±", "~")):
|
||||
text = text[1:]
|
||||
|
@ -75,11 +76,11 @@ def like_num(text):
|
|||
|
||||
text_lower = text.lower()
|
||||
|
||||
#Check cardinal number
|
||||
# Check cardinal number
|
||||
if text_lower in _num_words:
|
||||
return True
|
||||
|
||||
#Check ordinal number
|
||||
# Check ordinal number
|
||||
if text_lower in _ordinal_words:
|
||||
return True
|
||||
if text_lower.endswith(_ordinal_endings):
|
||||
|
|
|
@ -49,11 +49,10 @@ def noun_chunks(doclike):
|
|||
prev_end = word.left_edge.i
|
||||
yield word.left_edge.i, extend_right(word), np_label
|
||||
elif word.dep == conj:
|
||||
cc_token = word.left_edge
|
||||
cc_token = word.left_edge
|
||||
prev_end = cc_token.i
|
||||
yield cc_token.right_edge.i + 1, extend_right(word), np_label # Shave off cc tokens from the NP
|
||||
|
||||
|
||||
# Shave off cc tokens from the NP
|
||||
yield cc_token.right_edge.i + 1, extend_right(word), np_label
|
||||
|
||||
|
||||
SYNTAX_ITERATORS = {"noun_chunks": noun_chunks}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from typing import Optional, Any, Dict, Callable, Iterable, Union, List, Pattern
|
||||
from typing import Tuple, Iterator
|
||||
from typing import Tuple
|
||||
from dataclasses import dataclass
|
||||
import random
|
||||
import itertools
|
||||
|
@ -1197,7 +1197,9 @@ class Language:
|
|||
doc = Doc(self.vocab, words=["x", "y", "z"])
|
||||
get_examples = lambda: [Example.from_dict(doc, {})]
|
||||
if not hasattr(get_examples, "__call__"):
|
||||
err = Errors.E930.format(method="Language.initialize", obj=type(get_examples))
|
||||
err = Errors.E930.format(
|
||||
method="Language.initialize", obj=type(get_examples)
|
||||
)
|
||||
raise TypeError(err)
|
||||
# Make sure the config is interpolated so we can resolve subsections
|
||||
config = self.config.interpolate()
|
||||
|
|
|
@ -239,10 +239,12 @@ def th_tokenizer():
|
|||
def tr_tokenizer():
|
||||
return get_lang_class("tr")().tokenizer
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def tr_vocab():
|
||||
return get_lang_class("tr").Defaults.create_vocab()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def tt_tokenizer():
|
||||
return get_lang_class("tt")().tokenizer
|
||||
|
|
|
@ -225,7 +225,7 @@ def test_tr_noun_chunks_acl_nmod(tr_tokenizer):
|
|||
assert chunks[0].text_with_ws == "en sevdiğim ses sanatçısı "
|
||||
|
||||
|
||||
def test_tr_noun_chunks_acl_nmod(tr_tokenizer):
|
||||
def test_tr_noun_chunks_acl_nmod2(tr_tokenizer):
|
||||
text = "bildiğim bir turizm şirketi"
|
||||
heads = [3, 3, 3, 3]
|
||||
deps = ["acl", "det", "nmod", "ROOT"]
|
||||
|
@ -308,7 +308,7 @@ def test_tr_noun_chunks_np_recursive_four_nouns(tr_tokenizer):
|
|||
assert len(chunks) == 1
|
||||
assert chunks[0].text_with_ws == "kızına piyano dersi verdiğim hanım "
|
||||
|
||||
|
||||
|
||||
def test_tr_noun_chunks_np_recursive_no_nmod(tr_tokenizer):
|
||||
text = "içine birkaç çiçek konmuş olan bir vazo"
|
||||
heads = [3, 2, 3, 6, 3, 6, 6]
|
||||
|
@ -326,7 +326,7 @@ def test_tr_noun_chunks_np_recursive_no_nmod(tr_tokenizer):
|
|||
def test_tr_noun_chunks_np_recursive_long_two_acls(tr_tokenizer):
|
||||
text = "içine Simge'nin bahçesinden toplanmış birkaç çiçeğin konmuş olduğu bir vazo"
|
||||
heads = [6, 2, 3, 5, 5, 6, 9, 6, 9, 9]
|
||||
deps = ["obl", "nmod" , "obl", "acl", "det", "nsubj", "acl", "aux", "det", "ROOT"]
|
||||
deps = ["obl", "nmod", "obl", "acl", "det", "nsubj", "acl", "aux", "det", "ROOT"]
|
||||
pos = ["ADP", "PROPN", "NOUN", "VERB", "DET", "NOUN", "VERB", "AUX", "DET", "NOUN"]
|
||||
tokens = tr_tokenizer(text)
|
||||
doc = Doc(
|
||||
|
@ -334,7 +334,10 @@ def test_tr_noun_chunks_np_recursive_long_two_acls(tr_tokenizer):
|
|||
)
|
||||
chunks = list(doc.noun_chunks)
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].text_with_ws == "içine Simge'nin bahçesinden toplanmış birkaç çiçeğin konmuş olduğu bir vazo "
|
||||
assert (
|
||||
chunks[0].text_with_ws
|
||||
== "içine Simge'nin bahçesinden toplanmış birkaç çiçeğin konmuş olduğu bir vazo "
|
||||
)
|
||||
|
||||
|
||||
def test_tr_noun_chunks_two_nouns_in_nmod(tr_tokenizer):
|
||||
|
@ -350,7 +353,8 @@ def test_tr_noun_chunks_two_nouns_in_nmod(tr_tokenizer):
|
|||
assert len(chunks) == 1
|
||||
assert chunks[0].text_with_ws == "kız ve erkek çocuklar "
|
||||
|
||||
def test_tr_noun_chunks_two_nouns_in_nmod(tr_tokenizer):
|
||||
|
||||
def test_tr_noun_chunks_two_nouns_in_nmod2(tr_tokenizer):
|
||||
text = "tatlı ve gürbüz çocuklar"
|
||||
heads = [3, 2, 0, 3]
|
||||
deps = ["amod", "cc", "conj", "ROOT"]
|
||||
|
@ -378,6 +382,7 @@ def test_tr_noun_chunks_conj_simple(tr_tokenizer):
|
|||
assert chunks[0].text_with_ws == "ben "
|
||||
assert chunks[1].text_with_ws == "Sen "
|
||||
|
||||
|
||||
def test_tr_noun_chunks_conj_three(tr_tokenizer):
|
||||
text = "sen, ben ve ondan"
|
||||
heads = [0, 2, 0, 4, 0]
|
||||
|
@ -394,7 +399,7 @@ def test_tr_noun_chunks_conj_three(tr_tokenizer):
|
|||
assert chunks[2].text_with_ws == "sen "
|
||||
|
||||
|
||||
def test_tr_noun_chunks_conj_three(tr_tokenizer):
|
||||
def test_tr_noun_chunks_conj_three2(tr_tokenizer):
|
||||
text = "ben ya da sen ya da onlar"
|
||||
heads = [0, 3, 1, 0, 6, 4, 3]
|
||||
deps = ["ROOT", "cc", "fixed", "conj", "cc", "fixed", "conj"]
|
||||
|
@ -499,7 +504,7 @@ def test_tr_noun_chunks_flat_names_and_title(tr_tokenizer):
|
|||
assert chunks[0].text_with_ws == "Gazi Mustafa Kemal "
|
||||
|
||||
|
||||
def test_tr_noun_chunks_flat_names_and_title(tr_tokenizer):
|
||||
def test_tr_noun_chunks_flat_names_and_title2(tr_tokenizer):
|
||||
text = "Ahmet Vefik Paşa"
|
||||
heads = [2, 0, 2]
|
||||
deps = ["nmod", "flat", "ROOT"]
|
||||
|
|
|
@ -15,8 +15,8 @@ from spacy.lang.tr.lex_attrs import like_num
|
|||
"üçüncü",
|
||||
"beşinci",
|
||||
"100üncü",
|
||||
"8inci"
|
||||
]
|
||||
"8inci",
|
||||
],
|
||||
)
|
||||
def test_tr_lex_attrs_like_number_cardinal_ordinal(word):
|
||||
assert like_num(word)
|
||||
|
@ -26,4 +26,3 @@ def test_tr_lex_attrs_like_number_cardinal_ordinal(word):
|
|||
def test_tr_lex_attrs_capitals(word):
|
||||
assert like_num(word)
|
||||
assert like_num(word.upper())
|
||||
|
||||
|
|
|
@ -446,7 +446,7 @@ def test_overfitting_IO():
|
|||
return mykb
|
||||
|
||||
# Create the Entity Linker component and add it to the pipeline
|
||||
entity_linker = nlp.add_pipe("entity_linker", last=True,)
|
||||
entity_linker = nlp.add_pipe("entity_linker", last=True)
|
||||
entity_linker.set_kb(create_kb)
|
||||
assert "Q2146908" in entity_linker.vocab.strings
|
||||
assert "Q2146908" in entity_linker.kb.vocab.strings
|
||||
|
|
|
@ -6,8 +6,8 @@ def test_issue6207(en_tokenizer):
|
|||
|
||||
# Make spans
|
||||
s1 = doc[:4]
|
||||
s2 = doc[3:6] # overlaps with s1
|
||||
s3 = doc[5:7] # overlaps with s2, not s1
|
||||
s2 = doc[3:6] # overlaps with s1
|
||||
s3 = doc[5:7] # overlaps with s2, not s1
|
||||
|
||||
result = filter_spans((s1, s2, s3))
|
||||
assert s1 in result
|
||||
|
|
|
@ -1,10 +1,8 @@
|
|||
from typing import List
|
||||
|
||||
import pytest
|
||||
from thinc.api import fix_random_seed, Adam, set_dropout_rate
|
||||
from numpy.testing import assert_array_equal
|
||||
import numpy
|
||||
|
||||
from spacy.ml.models import build_Tok2Vec_model, MultiHashEmbed, MaxoutWindowEncoder
|
||||
from spacy.ml.models import build_text_classifier, build_simple_cnn_text_classifier
|
||||
from spacy.ml.staticvectors import StaticVectors
|
||||
|
@ -188,12 +186,7 @@ def test_models_update_consistently(seed, dropout, model_func, kwargs, get_X):
|
|||
assert_array_equal(get_all_params(model1), get_all_params(model2))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model_func,kwargs",
|
||||
[
|
||||
(StaticVectors, {"nO": 128, "nM": 300}),
|
||||
]
|
||||
)
|
||||
@pytest.mark.parametrize("model_func,kwargs", [(StaticVectors, {"nO": 128, "nM": 300})])
|
||||
def test_empty_docs(model_func, kwargs):
|
||||
nlp = English()
|
||||
model = model_func(**kwargs).initialize()
|
||||
|
@ -201,7 +194,7 @@ def test_empty_docs(model_func, kwargs):
|
|||
for n_docs in range(3):
|
||||
docs = [nlp("") for _ in range(n_docs)]
|
||||
# Test predict
|
||||
_ = model.predict(docs)
|
||||
model.predict(docs)
|
||||
# Test backprop
|
||||
output, backprop = model.begin_update(docs)
|
||||
_ = backprop(output)
|
||||
backprop(output)
|
||||
|
|
Loading…
Reference in New Issue