spaCy/spacy/tests/regression/test_issue5152.py

# coding: utf8
from __future__ import unicode_literals

from spacy.lang.en import English


def test_issue5152():
    # Test that the comparison between a Span and a Token, goes well
    # There was a bug when the number of tokens in the span equaled the number of characters in the token (!)
    nlp = English()
    text = nlp("Talk about being boring!")
    text_var = nlp("Talk of being boring!")
    y = nlp("Let")

    span = text[0:3]  # Talk about being
    span_2 = text[0:3]  # Talk about being
    span_3 = text_var[0:3]  # Talk of being
    token = y[0]  # Let
    assert span.similarity(token) == 0.0
    assert span.similarity(span_2) == 1.0
    assert span_2.similarity(span_3) < 1.0
Fix compat for v2.x branch 2020-05-22 12:22:36 +00:00			`# coding: utf8`
			`from __future__ import unicode_literals`

bugfix in span similarity (#5155) (#5358) * bugfix in span similarity * also rewrite doc.pyx for clarity * formatting Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> 2020-04-27 14:51:27 +00:00			`from spacy.lang.en import English`


			`def test_issue5152():`
			`# Test that the comparison between a Span and a Token, goes well`
			`# There was a bug when the number of tokens in the span equaled the number of characters in the token (!)`
			`nlp = English()`
			`text = nlp("Talk about being boring!")`
			`text_var = nlp("Talk of being boring!")`
			`y = nlp("Let")`

			`span = text[0:3] # Talk about being`
			`span_2 = text[0:3] # Talk about being`
			`span_3 = text_var[0:3] # Talk of being`
			`token = y[0] # Let`
			`assert span.similarity(token) == 0.0`
			`assert span.similarity(span_2) == 1.0`
			`assert span_2.similarity(span_3) < 1.0`