From 096a80170d23365e1b8ff9d3749bb6caa379abdd Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Fri, 27 Oct 2017 01:48:39 +0200
Subject: [PATCH] Remove old example files

---
 examples/_handler.py       | 37 -------------------
 examples/parallel_parse.py | 74 --------------------------------------
 2 files changed, 111 deletions(-)
 delete mode 100644 examples/_handler.py
 delete mode 100644 examples/parallel_parse.py

diff --git a/examples/_handler.py b/examples/_handler.py
deleted file mode 100644
index cebfe8968..000000000
--- a/examples/_handler.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# encoding: utf8
-from __future__ import unicode_literals, print_function
-
-from math import sqrt
-from numpy import dot
-from numpy.linalg import norm
-
-
-def handle_tweet(spacy, tweet_data, query):
-    text = tweet_data.get('text', u'')
-    # Twython returns either bytes or unicode, depending on tweet.
-    # ಠ_ಠ #APIshaming
-    try:
-        match_tweet(spacy, text, query)
-    except TypeError:
-        match_tweet(spacy, text.decode('utf8'), query)
-
-
-def match_tweet(spacy, text, query):
-    def get_vector(word):
-        return spacy.vocab[word].repvec
-
-    tweet = spacy(text)
-    tweet = [w.repvec for w in tweet if w.is_alpha and w.lower_ != query]
-    if tweet:
-        accept = map(get_vector, 'child classroom teach'.split())
-        reject = map(get_vector, 'mouth hands giveaway'.split())
-        
-        y = sum(max(cos(w1, w2), 0) for w1 in tweet for w2 in accept)
-        n = sum(max(cos(w1, w2), 0) for w1 in tweet for w2 in reject)
-        
-        if (y / (y + n)) >= 0.5 or True:
-            print(text)
-
-
-def cos(v1, v2):
-    return dot(v1, v2) / (norm(v1) * norm(v2))
diff --git a/examples/parallel_parse.py b/examples/parallel_parse.py
deleted file mode 100644
index 5cdd0778b..000000000
--- a/examples/parallel_parse.py
+++ /dev/null
@@ -1,74 +0,0 @@
-from __future__ import print_function, unicode_literals, division
-import io
-import bz2
-import logging
-from toolz import partition
-from os import path
-import re
-
-import spacy.en
-from spacy.tokens import Doc
-
-from joblib import Parallel, delayed
-import plac
-import ujson
-
-
-def parallelize(func, iterator, n_jobs, extra, backend='multiprocessing'):
-    extra = tuple(extra)
-    return Parallel(n_jobs=n_jobs, backend=backend)(delayed(func)(*(item + extra))
-                    for item in iterator)
-
-
-def iter_comments(loc):
-    with bz2.BZ2File(loc) as file_:
-        for i, line in enumerate(file_):
-            yield ujson.loads(line)['body']
-
-
-pre_format_re = re.compile(r'^[\`\*\~]')
-post_format_re = re.compile(r'[\`\*\~]$')
-url_re = re.compile(r'\[([^]]+)\]\(%%URL\)')
-link_re = re.compile(r'\[([^]]+)\]\(https?://[^\)]+\)')
-def strip_meta(text):
-    text = link_re.sub(r'\1', text)
-    text = text.replace('&gt;', '>').replace('&lt;', '<')
-    text = pre_format_re.sub('', text)
-    text = post_format_re.sub('', text)
-    return text.strip()
-
-
-def save_parses(batch_id, input_, out_dir, n_threads, batch_size):
-    out_loc = path.join(out_dir, '%d.bin' % batch_id)
-    if path.exists(out_loc):
-        return None
-    print('Batch', batch_id)
-    nlp = spacy.en.English()
-    nlp.matcher = None
-    with open(out_loc, 'wb') as file_:
-        texts = (strip_meta(text) for text in input_)
-        texts = (text for text in texts if text.strip())
-        for doc in nlp.pipe(texts, batch_size=batch_size, n_threads=n_threads):
-            file_.write(doc.to_bytes())
-
-@plac.annotations(
-    in_loc=("Location of input file"),
-    out_dir=("Location of input file"),
-    n_process=("Number of processes", "option", "p", int),
-    n_thread=("Number of threads per process", "option", "t", int),
-    batch_size=("Number of texts to accumulate in a buffer", "option", "b", int)
-)
-def main(in_loc, out_dir, n_process=1, n_thread=4, batch_size=100):
-    if not path.exists(out_dir):
-        path.join(out_dir)
-    if n_process >= 2:
-        texts = partition(200000, iter_comments(in_loc))
-        parallelize(save_parses, enumerate(texts), n_process, [out_dir, n_thread, batch_size],
-                   backend='multiprocessing')
-    else:
-        save_parses(0, iter_comments(in_loc), out_dir, n_thread, batch_size)
-
-
-
-if __name__ == '__main__':
-    plac.call(main)