* Add _handler to resolve Issue #123

This commit is contained in:
Matthew Honnibal 2015-10-15 02:44:23 +11:00
parent c772df2f0c
commit 0bc48e82d0
1 changed files with 37 additions and 0 deletions

37
examples/_handler.py Normal file
View File

@ -0,0 +1,37 @@
# encoding: utf8
from __future__ import unicode_literals, print_function
from math import sqrt
from numpy import dot
from numpy.linalg import norm
def handle_tweet(spacy, tweet_data, query):
text = tweet_data.get('text', u'')
# Twython returns either bytes or unicode, depending on tweet.
# ಠ_ಠ #APIshaming
try:
match_tweet(spacy, text, query)
except TypeError:
match_tweet(spacy, text.decode('utf8'), query)
def match_tweet(spacy, text, query):
def get_vector(word):
return spacy.vocab[word].repvec
tweet = spacy(text)
tweet = [w.repvec for w in tweet if w.is_alpha and w.lower_ != query]
if tweet:
accept = map(get_vector, 'child classroom teach'.split())
reject = map(get_vector, 'mouth hands giveaway'.split())
y = sum(max(cos(w1, w2), 0) for w1 in tweet for w2 in accept)
n = sum(max(cos(w1, w2), 0) for w1 in tweet for w2 in reject)
if (y / (y + n)) >= 0.5 or True:
print(text)
def cos(v1, v2):
return dot(v1, v2) / (norm(v1) * norm(v2))