mirror of https://github.com/explosion/spaCy.git
* Add pieces of home page as separate jade files
This commit is contained in:
parent
6f7b3efe85
commit
b57a3ddd7e
|
@ -0,0 +1,40 @@
|
|||
p With Python 2.7 or Python 3, using Linux or OSX, run:
|
||||
|
||||
pre.language-bash: code
|
||||
| $ pip install spacy
|
||||
| $ python -m spacy.en.download</code></pre>
|
||||
|
||||
p
|
||||
| The download command fetches and installs about 300mb of data, for
|
||||
| the parser model and word vectors, which it installs within the spacy.en
|
||||
| package directory.
|
||||
|
||||
p
|
||||
| If you're stuck using a server with an old version of Python, and you
|
||||
| don't have root access, I've prepared a bootstrap script to help you
|
||||
| compile a local Python install. Run:
|
||||
|
||||
pre.language-bash: code
|
||||
| $ curl https://raw.githubusercontent.com/honnibal/spaCy/master/bootstrap_python_env.sh | bash && source .env/bin/activate
|
||||
|
||||
p
|
||||
| The other way to install the package is to clone the github repository,
|
||||
| and build it from source. This installs an additional dependency,
|
||||
| Cython. If you're using Python 2, I also recommend installing fabric
|
||||
| and fabtools – this is how I build the project.
|
||||
|
||||
pre.language-bash: code
|
||||
| $ git clone https://github.com/honnibal/spaCy.git
|
||||
| $ cd spaCy
|
||||
| $ virtualenv .env && source .env/bin/activate
|
||||
| $ export PYTHONPATH=`pwd`
|
||||
| $ pip install -r requirements.txt
|
||||
| $ python setup.py build_ext --inplace
|
||||
| $ python -m spacy.en.download
|
||||
| $ pip install pytest
|
||||
| $ py.test tests/
|
||||
|
||||
p
|
||||
| Python packaging is awkward at the best of times, and it's particularly tricky
|
||||
| with C extensions, built via Cython, requiring large data files. So,
|
||||
| please report issues as you encounter them.
|
|
@ -0,0 +1,37 @@
|
|||
- var slogan = "Build Tomorrow's Language Technologies"
|
||||
- var tag_line = "spaCy – " + slogan
|
||||
|
||||
|
||||
doctype html
|
||||
html(lang="en")
|
||||
head
|
||||
meta(charset="utf-8")
|
||||
title!= tag_line
|
||||
meta(name="description" content="")
|
||||
meta(name="author" content="Matthew Honnibal")
|
||||
link(rel="stylesheet" href="css/style.css")
|
||||
<!--[if lt IE 9]>
|
||||
script(src="http://html5shiv.googlecode.com/svn/trunk/html5.js")
|
||||
<![endif]-->
|
||||
|
||||
body(id="home" role="document")
|
||||
header(role="banner")
|
||||
h1(class="logo")!= tag_line
|
||||
div(class="slogan")!= slogan
|
||||
|
||||
nav(role="navigation")
|
||||
ul
|
||||
li: a(href="#") Home
|
||||
li: a(href="#") Docs
|
||||
li: a(href="#") License
|
||||
li: a(href="#") More
|
||||
|
||||
main(id="content" role="main")
|
||||
block intro_block
|
||||
|
||||
block body_block
|
||||
|
||||
footer(role="contentinfo")
|
||||
|
||||
script(src="js/prism.js")
|
||||
script(src="js/details_polyfill.js")
|
|
@ -0,0 +1,109 @@
|
|||
mixin example(name)
|
||||
details
|
||||
summary
|
||||
h4= name
|
||||
block
|
||||
|
||||
|
||||
+example("Load resources and process text")
|
||||
pre.language-python: code
|
||||
| from __future__ import unicode_literals, print_function
|
||||
| from spacy.en import English
|
||||
| nlp = English()
|
||||
| doc = nlp('Hello, world. Here are two sentences.')
|
||||
|
||||
+example("Get tokens and sentences")
|
||||
pre.language-python: code
|
||||
| token = doc[0]
|
||||
| sentence = doc.sents[0]
|
||||
| assert token[0] is sentence[0]
|
||||
|
||||
+example("Use integer IDs for any string")
|
||||
pre.language-python: code
|
||||
| hello_id = nlp.vocab.strings['Hello']
|
||||
| hello_str = nlp.vocab.strings[hello_id]
|
||||
|
|
||||
| assert token.orth == hello_id == 52
|
||||
| assert token.orth_ == hello_str == 'Hello'
|
||||
|
||||
+example("Get and set string views and flags")
|
||||
pre.language-python: code
|
||||
| assert token.shape_ == 'Xxxx'
|
||||
| for lexeme in nlp.vocab:
|
||||
| if lexeme.is_alpha:
|
||||
| lexeme.shape_ = 'W'
|
||||
| elif lexeme.is_digit:
|
||||
| lexeme.shape_ = 'D'
|
||||
| elif lexeme.is_punct:
|
||||
| lexeme.shape_ = 'P'
|
||||
| else:
|
||||
| lexeme.shape_ = 'M'
|
||||
| assert token.shape_ == 'W'
|
||||
|
||||
+example("Export to numpy arrays")
|
||||
pre.language-python: code
|
||||
| from spacy.en.attrs import ORTH, LIKE_URL, IS_OOV
|
||||
|
|
||||
| attr_ids = [ORTH, LIKE_URL, IS_OOV]
|
||||
| doc_array = doc.to_array(attr_ids)
|
||||
| assert doc_array.shape == (len(doc), len(attrs)
|
||||
| assert doc[0].orth == doc_array[0, 0]
|
||||
| assert doc[1].orth == doc_array[1, 0]
|
||||
| assert doc[0].like_url == doc_array[0, 1]
|
||||
| assert doc_array[, 1] == [t.like_url for t in doc]
|
||||
|
||||
+example("Word vectors")
|
||||
pre.language-python: code
|
||||
| doc = nlp("Apples and oranges are similar. Boots and hippos aren't.")
|
||||
|
|
||||
| apples = doc[0]
|
||||
| oranges = doc[1]
|
||||
| boots = doc[6]
|
||||
| hippos = doc[8]
|
||||
|
|
||||
| assert apples.similarity(oranges) > boots.similarity(hippos)
|
||||
|
||||
|
||||
+example("Part-of-speech tags")
|
||||
pre.language-python: code
|
||||
| doc[0].pos
|
||||
| doc[0].tag
|
||||
|
||||
+example("Syntactic dependencies")
|
||||
pre.language-python: code
|
||||
| for head in tokens:
|
||||
| for child in head.lefts:
|
||||
| assert child.head is head
|
||||
| for child in head.rights:
|
||||
| assert child.head is head
|
||||
| sent = nlp('The four wheels on the bus turned quickly.')
|
||||
| wheels = sent[2]
|
||||
| bus = sent[5]
|
||||
| assert len(list(wheels.lefts)) == 2
|
||||
| assert len(list(wheels.rights)) == 1
|
||||
| assert len(list(wheels.children)) == 3
|
||||
| assert len(list(bus.lefts)) == 1
|
||||
| assert len(list(bus.rights)) == 0
|
||||
| assert len(list(bus.children)) == 1
|
||||
|
|
||||
| assert len(list(wheels.subtree)) == 6
|
||||
|
||||
+example("Named entities")
|
||||
pre.language-python: code
|
||||
| doc.ents
|
||||
| token.ent_type
|
||||
| token.ent_iob
|
||||
|
||||
+example("Define custom NER rules")
|
||||
pre.language-python: code
|
||||
| nlp.matcher
|
||||
|
||||
+example("Calculate inline mark-up on original string")
|
||||
pre.language-python: code
|
||||
| token.string
|
||||
| token.spacy
|
||||
| token.whitespace_
|
||||
|
||||
+example("Efficient binary serialization")
|
||||
pre.language-python: code
|
||||
|
|
Loading…
Reference in New Issue