spaCy/examples/_handler.py

# encoding: utf8
from __future__ import unicode_literals, print_function

from math import sqrt
from numpy import dot
from numpy.linalg import norm


def handle_tweet(spacy, tweet_data, query):
    text = tweet_data.get('text', u'')
    # Twython returns either bytes or unicode, depending on tweet.
    # ಠ_ಠ #APIshaming
    try:
        match_tweet(spacy, text, query)
    except TypeError:
        match_tweet(spacy, text.decode('utf8'), query)


def match_tweet(spacy, text, query):
    def get_vector(word):
        return spacy.vocab[word].repvec

    tweet = spacy(text)
    tweet = [w.repvec for w in tweet if w.is_alpha and w.lower_ != query]
    if tweet:
        accept = map(get_vector, 'child classroom teach'.split())
        reject = map(get_vector, 'mouth hands giveaway'.split())
        
        y = sum(max(cos(w1, w2), 0) for w1 in tweet for w2 in accept)
        n = sum(max(cos(w1, w2), 0) for w1 in tweet for w2 in reject)
        
        if (y / (y + n)) >= 0.5 or True:
            print(text)


def cos(v1, v2):
    return dot(v1, v2) / (norm(v1) * norm(v2))
* Add _handler to resolve Issue #123 2015-10-14 15:44:23 +00:00			`# encoding: utf8`
			`from __future__ import unicode_literals, print_function`

			`from math import sqrt`
			`from numpy import dot`
			`from numpy.linalg import norm`


			`def handle_tweet(spacy, tweet_data, query):`
			`text = tweet_data.get('text', u'')`
			`# Twython returns either bytes or unicode, depending on tweet.`
			`# ಠ_ಠ #APIshaming`
			`try:`
			`match_tweet(spacy, text, query)`
			`except TypeError:`
			`match_tweet(spacy, text.decode('utf8'), query)`


			`def match_tweet(spacy, text, query):`
			`def get_vector(word):`
			`return spacy.vocab[word].repvec`

			`tweet = spacy(text)`
			`tweet = [w.repvec for w in tweet if w.is_alpha and w.lower_ != query]`
			`if tweet:`
			`accept = map(get_vector, 'child classroom teach'.split())`
			`reject = map(get_vector, 'mouth hands giveaway'.split())`

			`y = sum(max(cos(w1, w2), 0) for w1 in tweet for w2 in accept)`
			`n = sum(max(cos(w1, w2), 0) for w1 in tweet for w2 in reject)`

			`if (y / (y + n)) >= 0.5 or True:`
			`print(text)`


			`def cos(v1, v2):`
			`return dot(v1, v2) / (norm(v1) * norm(v2))`