From 3f2e3cbd2784ebd2c15aafd5f02830a9369ada84 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 31 May 2018 16:22:43 +0200 Subject: [PATCH] Add links to Reddit data (see #2401) --- examples/information_extraction/phrase_matcher.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/information_extraction/phrase_matcher.py b/examples/information_extraction/phrase_matcher.py index a7feea264..28266bbd1 100644 --- a/examples/information_extraction/phrase_matcher.py +++ b/examples/information_extraction/phrase_matcher.py @@ -34,6 +34,10 @@ formatted in jsonl as a sequence of entries like this: {"text":"Appalachia"} {"text":"Argentina"} +Reddit comments corpus: +* https://files.pushshift.io/reddit/ +* https://archive.org/details/2015_reddit_comments_corpus + Compatible with: spaCy v2.0.0+ """ from __future__ import print_function, unicode_literals, division