2017-11-11 00:11:27 +00:00
|
|
|
# coding: utf8
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2017-11-14 14:45:50 +00:00
|
|
|
import gc
|
|
|
|
|
2017-11-11 00:11:27 +00:00
|
|
|
from ...lang.en import English
|
|
|
|
|
|
|
|
|
|
|
|
def test_issue1506():
|
|
|
|
nlp = English()
|
|
|
|
|
|
|
|
def string_generator():
|
2017-11-11 08:31:59 +00:00
|
|
|
for _ in range(10001):
|
2017-11-15 14:55:48 +00:00
|
|
|
yield u"It's sentence produced by that bug."
|
2017-11-11 00:11:27 +00:00
|
|
|
|
2017-11-14 19:58:46 +00:00
|
|
|
for _ in range(10001):
|
2017-11-15 14:55:48 +00:00
|
|
|
yield u"I erase some hbdsaj lemmas."
|
2017-11-14 19:58:46 +00:00
|
|
|
|
2017-11-11 08:31:59 +00:00
|
|
|
for _ in range(10001):
|
2017-11-15 14:55:48 +00:00
|
|
|
yield u"I erase lemmas."
|
2017-11-11 00:11:27 +00:00
|
|
|
|
2017-11-11 08:31:59 +00:00
|
|
|
for _ in range(10001):
|
2017-11-15 14:55:48 +00:00
|
|
|
yield u"It's sentence produced by that bug."
|
2017-11-11 00:11:27 +00:00
|
|
|
|
2017-11-14 14:45:50 +00:00
|
|
|
for _ in range(10001):
|
2017-11-15 14:55:48 +00:00
|
|
|
yield u"It's sentence produced by that bug."
|
2017-11-14 14:45:50 +00:00
|
|
|
|
|
|
|
for i, d in enumerate(nlp.pipe(string_generator())):
|
2017-11-14 17:28:13 +00:00
|
|
|
# We should run cleanup more than one time to actually cleanup data.
|
|
|
|
# In first run — clean up only mark strings as «not hitted».
|
2017-11-14 17:45:04 +00:00
|
|
|
if i == 10000 or i == 20000 or i == 30000:
|
2017-11-14 17:28:13 +00:00
|
|
|
gc.collect()
|
|
|
|
|
2017-11-14 15:01:37 +00:00
|
|
|
for t in d:
|
|
|
|
str(t.lemma_)
|