spaCy/spacy/tests/regression/test_issue1506.py

47 lines
1.1 KiB
Python
Raw Normal View History

# coding: utf8
from __future__ import unicode_literals
import gc
from ...lang.en import English
def test_issue1506():
nlp = English()
def string_generator():
for _ in range(10001):
yield "It's sentence produced by that bug."
yield "Oh snap."
for _ in range(10001):
yield "I erase lemmas."
for _ in range(10001):
yield "It's sentence produced by that bug."
for _ in range(10001):
yield "It's sentence produced by that bug."
anchor = None
remember = None
for i, d in enumerate(nlp.pipe(string_generator())):
if i == 9999:
anchor = d
elif 10001 == i:
remember = d
elif i == 10002:
del anchor
gc.collect()
# We should run cleanup more than one time to actually cleanup data.
# In first run — clean up only mark strings as «not hitted».
if i == 20000 or i == 30000:
gc.collect()
2017-11-14 15:01:37 +00:00
for t in d:
str(t.lemma_)
assert remember.text == 'Oh snap.'