2015-02-11 23:05:06 +00:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
import pytest
|
|
|
|
import gc
|
|
|
|
|
|
|
|
from spacy.en import English
|
|
|
|
|
|
|
|
|
|
|
|
def get_orphan_token(text, i):
|
|
|
|
nlp = English()
|
|
|
|
tokens = nlp(text)
|
|
|
|
gc.collect()
|
|
|
|
token = tokens[i]
|
|
|
|
del tokens
|
|
|
|
return token
|
|
|
|
|
|
|
|
|
|
|
|
def test_orphan():
|
|
|
|
orphan = get_orphan_token('An orphan token', 1)
|
|
|
|
gc.collect()
|
|
|
|
dummy = get_orphan_token('Load and flush the memory', 0)
|
|
|
|
dummy = get_orphan_token('Load again...', 0)
|
|
|
|
assert orphan.orth_ == 'orphan'
|
|
|
|
assert orphan.pos_ == 'ADJ'
|
|
|
|
assert orphan.head.orth_ == 'token'
|
2015-02-16 16:49:31 +00:00
|
|
|
|
|
|
|
|
|
|
|
def _orphan_from_list(toks):
|
|
|
|
''' Take the tokens from nlp(), append them to a list, return the list '''
|
|
|
|
lst = []
|
|
|
|
for tok in toks:
|
|
|
|
lst.append(tok)
|
|
|
|
return lst
|
|
|
|
|
2015-04-19 19:39:18 +00:00
|
|
|
|
2015-02-16 16:49:31 +00:00
|
|
|
def test_list_orphans():
|
|
|
|
# Test case from NSchrading
|
|
|
|
nlp = English()
|
|
|
|
samples = ["a", "test blah wat okay"]
|
|
|
|
lst = []
|
|
|
|
for sample in samples:
|
|
|
|
# Go through all the samples, call nlp() on each to get tokens,
|
|
|
|
# pass those tokens to the _orphan_from_list() function, get a list back
|
|
|
|
# and put all results in another list
|
|
|
|
lst.extend(_orphan_from_list(nlp(sample)))
|
|
|
|
# go through the list of all tokens and try to print orth_
|
|
|
|
orths = ['a', 'test', 'blah', 'wat', 'okay']
|
|
|
|
for i, l in enumerate(lst):
|
|
|
|
assert l.orth_ == orths[i]
|