Add (and test) Span.sentiment attribute. By default we average token.span, but can override with custom hook. Re Issue #667

This commit is contained in:
Matthew Honnibal 2016-12-02 11:05:50 +01:00
parent 296d33a4fc
commit f6e356aada
2 changed files with 49 additions and 0 deletions

View File

@ -1,6 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from spacy.attrs import HEAD from spacy.attrs import HEAD
from spacy.en import English from spacy.en import English
from spacy.tokens.doc import Doc
import numpy as np import numpy as np
import pytest import pytest
@ -49,3 +50,44 @@ def test_sent(doc):
assert span.sent.text == 'This is a sentence.' assert span.sent.text == 'This is a sentence.'
span = doc[6:7] span = doc[6:7]
assert span.sent.root.left_edge.text == 'This' assert span.sent.root.left_edge.text == 'This'
def test_default_sentiment(EN):
'''Test new span.sentiment property's default averaging behaviour'''
good = EN.vocab[u'good']
good.sentiment = 3.0
bad = EN.vocab[u'bad']
bad.sentiment = -2.0
doc = Doc(EN.vocab, [u'good', 'stuff', u'bad', u'stuff'])
good_stuff = doc[:2]
assert good_stuff.sentiment == 3.0 / 2
bad_stuff = doc[-2:]
assert bad_stuff.sentiment == -2. / 2
good_stuff_bad = doc[:-1]
assert good_stuff_bad.sentiment == (3.+-2) / 3.
def test_override_sentiment(EN):
'''Test new span.sentiment property's default averaging behaviour'''
good = EN.vocab[u'good']
good.sentiment = 3.0
bad = EN.vocab[u'bad']
bad.sentiment = -2.0
doc = Doc(EN.vocab, [u'good', 'stuff', u'bad', u'stuff'])
doc.user_span_hooks['sentiment'] = lambda span: 10.0
good_stuff = doc[:2]
assert good_stuff.sentiment == 10.0
bad_stuff = doc[-2:]
assert bad_stuff.sentiment == 10.0
good_stuff_bad = doc[:-1]
assert good_stuff_bad.sentiment == 10.0

View File

@ -179,6 +179,13 @@ cdef class Span:
self._vector_norm = sqrt(norm) if norm != 0 else 0 self._vector_norm = sqrt(norm) if norm != 0 else 0
return self._vector_norm return self._vector_norm
property sentiment:
def __get__(self):
if 'sentiment' in self.doc.user_span_hooks:
return self.doc.user_span_hooks['sentiment'](self)
else:
return sum([token.sentiment for token in self]) / len(self)
property text: property text:
def __get__(self): def __get__(self):
text = self.text_with_ws text = self.text_with_ws