diff --git a/docs/redesign/installation.jade b/docs/redesign/installation.jade new file mode 100644 index 000000000..05f89dd24 --- /dev/null +++ b/docs/redesign/installation.jade @@ -0,0 +1,40 @@ +p With Python 2.7 or Python 3, using Linux or OSX, run: + +pre.language-bash: code + | $ pip install spacy + | $ python -m spacy.en.download + +p + | The download command fetches and installs about 300mb of data, for + | the parser model and word vectors, which it installs within the spacy.en + | package directory. + +p + | If you're stuck using a server with an old version of Python, and you + | don't have root access, I've prepared a bootstrap script to help you + | compile a local Python install. Run: + +pre.language-bash: code + | $ curl https://raw.githubusercontent.com/honnibal/spaCy/master/bootstrap_python_env.sh | bash && source .env/bin/activate + +p + | The other way to install the package is to clone the github repository, + | and build it from source. This installs an additional dependency, + | Cython. If you're using Python 2, I also recommend installing fabric + | and fabtools – this is how I build the project. + +pre.language-bash: code + | $ git clone https://github.com/honnibal/spaCy.git + | $ cd spaCy + | $ virtualenv .env && source .env/bin/activate + | $ export PYTHONPATH=`pwd` + | $ pip install -r requirements.txt + | $ python setup.py build_ext --inplace + | $ python -m spacy.en.download + | $ pip install pytest + | $ py.test tests/ + +p + | Python packaging is awkward at the best of times, and it's particularly tricky + | with C extensions, built via Cython, requiring large data files. So, + | please report issues as you encounter them. diff --git a/docs/redesign/online_demo.jade b/docs/redesign/online_demo.jade new file mode 100644 index 000000000..e69de29bb diff --git a/docs/redesign/outline.jade b/docs/redesign/outline.jade new file mode 100644 index 000000000..2389dc71e --- /dev/null +++ b/docs/redesign/outline.jade @@ -0,0 +1,37 @@ +- var slogan = "Build Tomorrow's Language Technologies" +- var tag_line = "spaCy – " + slogan + + +doctype html +html(lang="en") + head + meta(charset="utf-8") + title!= tag_line + meta(name="description" content="") + meta(name="author" content="Matthew Honnibal") + link(rel="stylesheet" href="css/style.css") + + + body(id="home" role="document") + header(role="banner") + h1(class="logo")!= tag_line + div(class="slogan")!= slogan + + nav(role="navigation") + ul + li: a(href="#") Home + li: a(href="#") Docs + li: a(href="#") License + li: a(href="#") More + + main(id="content" role="main") + block intro_block + + block body_block + + footer(role="contentinfo") + + script(src="js/prism.js") + script(src="js/details_polyfill.js") diff --git a/docs/redesign/usage_examples.jade b/docs/redesign/usage_examples.jade new file mode 100644 index 000000000..d429339d4 --- /dev/null +++ b/docs/redesign/usage_examples.jade @@ -0,0 +1,109 @@ +mixin example(name) + details + summary + h4= name + block + + ++example("Load resources and process text") + pre.language-python: code + | from __future__ import unicode_literals, print_function + | from spacy.en import English + | nlp = English() + | doc = nlp('Hello, world. Here are two sentences.') + ++example("Get tokens and sentences") + pre.language-python: code + | token = doc[0] + | sentence = doc.sents[0] + | assert token[0] is sentence[0] + ++example("Use integer IDs for any string") + pre.language-python: code + | hello_id = nlp.vocab.strings['Hello'] + | hello_str = nlp.vocab.strings[hello_id] + | + | assert token.orth == hello_id == 52 + | assert token.orth_ == hello_str == 'Hello' + ++example("Get and set string views and flags") + pre.language-python: code + | assert token.shape_ == 'Xxxx' + | for lexeme in nlp.vocab: + | if lexeme.is_alpha: + | lexeme.shape_ = 'W' + | elif lexeme.is_digit: + | lexeme.shape_ = 'D' + | elif lexeme.is_punct: + | lexeme.shape_ = 'P' + | else: + | lexeme.shape_ = 'M' + | assert token.shape_ == 'W' + ++example("Export to numpy arrays") + pre.language-python: code + | from spacy.en.attrs import ORTH, LIKE_URL, IS_OOV + | + | attr_ids = [ORTH, LIKE_URL, IS_OOV] + | doc_array = doc.to_array(attr_ids) + | assert doc_array.shape == (len(doc), len(attrs) + | assert doc[0].orth == doc_array[0, 0] + | assert doc[1].orth == doc_array[1, 0] + | assert doc[0].like_url == doc_array[0, 1] + | assert doc_array[, 1] == [t.like_url for t in doc] + ++example("Word vectors") + pre.language-python: code + | doc = nlp("Apples and oranges are similar. Boots and hippos aren't.") + | + | apples = doc[0] + | oranges = doc[1] + | boots = doc[6] + | hippos = doc[8] + | + | assert apples.similarity(oranges) > boots.similarity(hippos) + + ++example("Part-of-speech tags") + pre.language-python: code + | doc[0].pos + | doc[0].tag + ++example("Syntactic dependencies") + pre.language-python: code + | for head in tokens: + | for child in head.lefts: + | assert child.head is head + | for child in head.rights: + | assert child.head is head + | sent = nlp('The four wheels on the bus turned quickly.') + | wheels = sent[2] + | bus = sent[5] + | assert len(list(wheels.lefts)) == 2 + | assert len(list(wheels.rights)) == 1 + | assert len(list(wheels.children)) == 3 + | assert len(list(bus.lefts)) == 1 + | assert len(list(bus.rights)) == 0 + | assert len(list(bus.children)) == 1 + | + | assert len(list(wheels.subtree)) == 6 + ++example("Named entities") + pre.language-python: code + | doc.ents + | token.ent_type + | token.ent_iob + ++example("Define custom NER rules") + pre.language-python: code + | nlp.matcher + ++example("Calculate inline mark-up on original string") + pre.language-python: code + | token.string + | token.spacy + | token.whitespace_ + ++example("Efficient binary serialization") + pre.language-python: code + |