mirror of https://github.com/explosion/spaCy.git
Add links to Reddit data (see #2401)
This commit is contained in:
parent
b8ef9c1000
commit
3f2e3cbd27
|
@ -34,6 +34,10 @@ formatted in jsonl as a sequence of entries like this:
|
|||
{"text":"Appalachia"}
|
||||
{"text":"Argentina"}
|
||||
|
||||
Reddit comments corpus:
|
||||
* https://files.pushshift.io/reddit/
|
||||
* https://archive.org/details/2015_reddit_comments_corpus
|
||||
|
||||
Compatible with: spaCy v2.0.0+
|
||||
"""
|
||||
from __future__ import print_function, unicode_literals, division
|
||||
|
|
Loading…
Reference in New Issue