mirror of https://github.com/explosion/spaCy.git
Increase length limit for pretrain
This commit is contained in:
parent
1b240f2119
commit
6bd1cc57ee
|
@ -78,7 +78,7 @@ def make_update(model, docs, optimizer, drop=0.):
|
|||
return loss
|
||||
|
||||
|
||||
def make_docs(nlp, batch):
|
||||
def make_docs(nlp, batch, min_length=1, max_length=500):
|
||||
docs = []
|
||||
for record in batch:
|
||||
text = record["text"]
|
||||
|
@ -91,7 +91,7 @@ def make_docs(nlp, batch):
|
|||
heads = numpy.asarray(heads, dtype="uint64")
|
||||
heads = heads.reshape((len(doc), 1))
|
||||
doc = doc.from_array([HEAD], heads)
|
||||
if len(doc) >= 1 and len(doc) < 200:
|
||||
if len(doc) >= min_length and len(doc) < max_length:
|
||||
docs.append(doc)
|
||||
return docs
|
||||
|
||||
|
|
Loading…
Reference in New Issue