Increase length limit for pretrain

This commit is contained in:
Matthew Honnibal 2018-11-30 20:58:18 +00:00
parent 1b240f2119
commit 6bd1cc57ee
1 changed files with 2 additions and 2 deletions

View File

@ -78,7 +78,7 @@ def make_update(model, docs, optimizer, drop=0.):
return loss
def make_docs(nlp, batch):
def make_docs(nlp, batch, min_length=1, max_length=500):
docs = []
for record in batch:
text = record["text"]
@ -91,7 +91,7 @@ def make_docs(nlp, batch):
heads = numpy.asarray(heads, dtype="uint64")
heads = heads.reshape((len(doc), 1))
doc = doc.from_array([HEAD], heads)
if len(doc) >= 1 and len(doc) < 200:
if len(doc) >= min_length and len(doc) < max_length:
docs.append(doc)
return docs