spaCy/tests/tokens/test_token_references.py

from __future__ import unicode_literals
import pytest
import gc

from spacy.en import English

# Let this have its own instances, as we have to be careful about memory here
# that's the point, after all

def get_orphan_token(text, i):
    nlp = English(load_vectors=False)
    tokens = nlp(text)
    gc.collect()
    token = tokens[i]
    del tokens
    return token


def test_orphan():
    orphan = get_orphan_token('An orphan token', 1)
    gc.collect()
    dummy = get_orphan_token('Load and flush the memory', 0)
    dummy = get_orphan_token('Load again...', 0)
    assert orphan.orth_ == 'orphan'
    assert orphan.pos_ == 'NOUN'
    assert orphan.head.orth_ == 'token'


def _orphan_from_list(toks):
    ''' Take the tokens from nlp(), append them to a list, return the list '''
    lst = []
    for tok in toks:
        lst.append(tok)
    return lst


def test_list_orphans():
    # Test case from NSchrading
    nlp = English(load_vectors=False)
    samples = ["a", "test blah wat okay"]
    lst = []
    for sample in samples:
        # Go through all the samples, call nlp() on each to get tokens,
        # pass those tokens to the _orphan_from_list() function, get a list back
        # and put all results in another list
        lst.extend(_orphan_from_list(nlp(sample)))
    # go through the list of all tokens and try to print orth_
    orths = ['a', 'test', 'blah', 'wat', 'okay']
    for i, l in enumerate(lst):
        assert l.orth_  == orths[i]
* Pass ownership of C data to Token instances if Tokens object is being garbage-collected, but Token instances are staying alive. 2015-02-11 23:05:06 +00:00			`from __future__ import unicode_literals`
			`import pytest`
			`import gc`

			`from spacy.en import English`

* More work on reorganising tests, using conftest.py 2015-06-07 16:02:24 +00:00			`# Let this have its own instances, as we have to be careful about memory here`
			`# that's the point, after all`
* Pass ownership of C data to Token instances if Tokens object is being garbage-collected, but Token instances are staying alive. 2015-02-11 23:05:06 +00:00
			`def get_orphan_token(text, i):`
* More work on reorganising tests, using conftest.py 2015-06-07 16:02:24 +00:00			`nlp = English(load_vectors=False)`
* Pass ownership of C data to Token instances if Tokens object is being garbage-collected, but Token instances are staying alive. 2015-02-11 23:05:06 +00:00			`tokens = nlp(text)`
			`gc.collect()`
			`token = tokens[i]`
			`del tokens`
			`return token`


			`def test_orphan():`
			`orphan = get_orphan_token('An orphan token', 1)`
			`gc.collect()`
			`dummy = get_orphan_token('Load and flush the memory', 0)`
			`dummy = get_orphan_token('Load again...', 0)`
			`assert orphan.orth_ == 'orphan'`
* Fix test_token_references test 2015-06-07 16:33:04 +00:00			`assert orphan.pos_ == 'NOUN'`
* Pass ownership of C data to Token instances if Tokens object is being garbage-collected, but Token instances are staying alive. 2015-02-11 23:05:06 +00:00			`assert orphan.head.orth_ == 'token'`
* Add test from NSchrading 2015-02-16 16:49:31 +00:00

			`def _orphan_from_list(toks):`
			`''' Take the tokens from nlp(), append them to a list, return the list '''`
			`lst = []`
			`for tok in toks:`
			`lst.append(tok)`
			`return lst`

Tweak line spacing 2015-04-19 19:39:18 +00:00
* Add test from NSchrading 2015-02-16 16:49:31 +00:00			`def test_list_orphans():`
			`# Test case from NSchrading`
* Avoid laoding vectors in test_token_references 2015-06-07 17:03:16 +00:00			`nlp = English(load_vectors=False)`
* Add test from NSchrading 2015-02-16 16:49:31 +00:00			`samples = ["a", "test blah wat okay"]`
			`lst = []`
			`for sample in samples:`
			`# Go through all the samples, call nlp() on each to get tokens,`
			`# pass those tokens to the _orphan_from_list() function, get a list back`
			`# and put all results in another list`
			`lst.extend(_orphan_from_list(nlp(sample)))`
			`# go through the list of all tokens and try to print orth_`
			`orths = ['a', 'test', 'blah', 'wat', 'okay']`
			`for i, l in enumerate(lst):`
			`assert l.orth_ == orths[i]`