mirror of https://github.com/explosion/spaCy.git
Also lower-case in orth variants
This commit is contained in:
parent
19d99fc9e7
commit
42df49133d
|
@ -272,12 +272,17 @@ class GoldCorpus(object):
|
||||||
def make_orth_variants(nlp, raw, paragraph_tuples, orth_variant_level=0.0):
|
def make_orth_variants(nlp, raw, paragraph_tuples, orth_variant_level=0.0):
|
||||||
if random.random() >= orth_variant_level:
|
if random.random() >= orth_variant_level:
|
||||||
return raw, paragraph_tuples
|
return raw, paragraph_tuples
|
||||||
|
if random.random() >= 0.5:
|
||||||
|
lower = True
|
||||||
|
raw = raw.lower()
|
||||||
ndsv = nlp.Defaults.single_orth_variants
|
ndsv = nlp.Defaults.single_orth_variants
|
||||||
ndpv = nlp.Defaults.paired_orth_variants
|
ndpv = nlp.Defaults.paired_orth_variants
|
||||||
# modify words in paragraph_tuples
|
# modify words in paragraph_tuples
|
||||||
variant_paragraph_tuples = []
|
variant_paragraph_tuples = []
|
||||||
for sent_tuples, brackets in paragraph_tuples:
|
for sent_tuples, brackets in paragraph_tuples:
|
||||||
ids, words, tags, heads, labels, ner = sent_tuples
|
ids, words, tags, heads, labels, ner = sent_tuples
|
||||||
|
if lower:
|
||||||
|
words = [w.lower() for w in words]
|
||||||
# single variants
|
# single variants
|
||||||
punct_choices = [random.choice(x["variants"]) for x in ndsv]
|
punct_choices = [random.choice(x["variants"]) for x in ndsv]
|
||||||
for word_idx in range(len(words)):
|
for word_idx in range(len(words)):
|
||||||
|
|
Loading…
Reference in New Issue