mirror of https://github.com/explosion/spaCy.git
Improve max length check in corpus
This commit is contained in:
parent
2fa56484b2
commit
1f7709e9a6
|
@ -45,22 +45,22 @@ class Corpus:
|
||||||
|
|
||||||
def make_examples(self, nlp, reference_docs, max_length=0):
|
def make_examples(self, nlp, reference_docs, max_length=0):
|
||||||
for reference in reference_docs:
|
for reference in reference_docs:
|
||||||
if len(reference) >= max_length >= 1:
|
if len(reference) == 0:
|
||||||
if reference.is_sentenced:
|
continue
|
||||||
for ref_sent in reference.sents:
|
elif max_length == 0 or len(reference) < max_length:
|
||||||
eg = Example(
|
yield Example(
|
||||||
nlp.make_doc(ref_sent.text),
|
|
||||||
ref_sent.as_doc()
|
|
||||||
)
|
|
||||||
if len(eg.x):
|
|
||||||
yield eg
|
|
||||||
else:
|
|
||||||
eg = Example(
|
|
||||||
nlp.make_doc(reference.text),
|
nlp.make_doc(reference.text),
|
||||||
reference
|
reference
|
||||||
)
|
)
|
||||||
if len(eg.x):
|
elif reference.is_sentenced:
|
||||||
yield eg
|
for ref_sent in reference.sents:
|
||||||
|
if len(ref_sent) == 0:
|
||||||
|
continue
|
||||||
|
elif max_length == 0 or len(ref_sent) < max_length:
|
||||||
|
yield Example(
|
||||||
|
nlp.make_doc(ref_sent.text),
|
||||||
|
ref_sent.as_doc()
|
||||||
|
)
|
||||||
|
|
||||||
def make_examples_gold_preproc(self, nlp, reference_docs):
|
def make_examples_gold_preproc(self, nlp, reference_docs):
|
||||||
for reference in reference_docs:
|
for reference in reference_docs:
|
||||||
|
|
Loading…
Reference in New Issue