* More work on docs

2015-08-12 22:39:48 +02:00 · 2015-08-12 22:39:48 +02:00 · 1db080047b
parent c767ab9fdf
commit 1db080047b
1 changed files with 63 additions and 223 deletions
--- a/docs/redesign/spacy_home.jade
+++ b/docs/redesign/spacy_home.jade
@ -1,22 +1,33 @@
- var slogan = "Build Tomorrow's Language Technologies"
+extends ./outline.jade
- var tag_line = "spaCy &ndash; #{slogan}"
+
- var a_minor_miracle = '<a href="">a minor miracle</a>'
+// Notes
 //
 // 1. Where to put version notice? Should say something like
 //   2015-08-12: v0.89
 //   and be a link
 //   
 //   Only needs to appear on home page.
 - var slogan = "Build Tomorrow's Language Technologies"
 - var tag_line = "spaCy &ndash; " + slogan
 mixin lede
  - var state_of_the_art = '<a href="#">state-of-the-art</a>'
  - var a_minor_miracle = '<a href="">a minor miracle</a>'
  - var great_documentation = '<a href="">great documentation</a>'
 mixin lede()
  p.
-    <strong>spaCy</strong> is a library for industrial-strength NLP in Python and
+    <a href="https://github.com/honnibal/spaCy"><strong>spaCy</strong></a> is a
-    Cython.  It features state-of-the-art speed and accuracy, a concise API, and
+    library for industrial-strength NLP in Python and Cython.  It features
-    great documentation.  If you're a small company doing NLP, we want spaCy to
+    !{state_of_the_art} speed and accuracy, a concise API, and great documentation.
-    seem like !{a_minor_miracle}.
+    If you're a small company doing NLP, we want <strong>spaCy</strong> to seem
    like !{a_minor_miracle}.
 mixin overview()
  p.
    Overview text
 mixin example()
  p.
    Example text
 mixin benchmarks()
  p.
    Benchmarks
@ -25,18 +36,11 @@ mixin get_started()
  p.
    Get Started
 mixin example(name)
  details
    summary
      span(class="example-name")= name
    block
 mixin comparison(name)
  details
    summary
-      h4
+      h4= name
        name
    block
@ -52,215 +56,51 @@ mixin row(...cells)
      td= cell
-doctype html
+mixin social      
-html(lang="en")
+  footer(role="contentinfo")
-  head
+    a(href="http://twitter.com/share?text=[ARTICLE HEADLINE]&url=[ARTICLE LINK]&via=honnibal" title="Share on Twitter" rel="nofollow" class="button button-twitter") Share on Twitter
    meta(charset="utf-8")
    title!= tag_line
    meta(name="description" content="")
    meta(name="author" content="Matthew Honnibal")
    link(rel="stylesheet" href="css/style.css")
    <!--[if lt IE 9]>
    script(src="http://html5shiv.googlecode.com/svn/trunk/html5.js")
    <![endif]-->
-  body(id="page" role="document")
+    div.discuss
-    header(role="banner")
+      a(href="#" title="Discuss on Hacker News" rel="nofollow" class="button button-hn")
-      h1(class="logo")!= tag_line
+        | Discuss on Hacker News
      div(class="slogan")!= slogan
-    nav(role="navigation")
+      a(href="#" title="Discuss on Reddit" rel="nofollow" class="button button-reddit")
-      ul
+        | Discuss on Reddit
        li: a(href="#") Home
        li: a(href="#") Docs
        li: a(href="#") License
        li: a(href="#") Blog
-    main(id="content" role="main")
+
 mixin Section(title_text, link_name, include_file)
  a(name=link_name): h3 #{title_text}
  if (link_name == "example-use")
    include ./usage_examples.jade
  else if (link_name == "online-demo")
    include ./online_demo.jade
  else if (link_name == "comparisons")
    include ./comparisons.jade
  else if (link_name == "install")
    include ./installation.jade
 block intro_block
  section(class="intro")
    +lede
    nav(role="navigation")
      ul
-            li: a(href="#overview" class="button") Examples
+        li: a(href="#example-use" class="button") Examples
-            li: a(href="#overview" class="button") Comparisons
+        li: a(href="#online-demo" class="button") Demo
-            li: a(href="#example-use" class="button") Demo
+        li: a(href="#comparisons" class="button") Comparisons
-            li: a(href="#get-started" class="button") Install
+        li: a(href="#install" class="button") Install v0.89
 block body_block
  article(class="page landing-page")
        a(name="example-use"): h3 Usage by Example
-        +example("Load resources and process text")
+    +Section("Usage by Example", "example-use", "./usage_examples.jade")
          pre.language-python
            code
              | from __future__ import unicode_literals, print_function
              | from spacy.en import English
              | nlp = English()
              | doc = nlp('Hello, world. Here are two sentences.')
-        +example("Get tokens and sentences")
+    +Section("Online Demo", "online-demo", "./online_demo.jade")
          pre.language-python
            code
              | token = doc[0]
              | sentence = doc.sents[0]
              | assert token[0] is sentence[0]
-        +example("Use integer IDs for any string")
+    +Section("Comparisons and Benchmarks", "comparisons", "./comparisons.jade")
          pre.language-python
            code
              | hello_id = nlp.vocab.strings['Hello']
              | hello_str = nlp.vocab.strings[hello_id]
              | 
              | assert token.orth == hello_id == 52
              | assert token.orth_ == hello_str == 'Hello'
-        +example("Get and set string views and flags")
+    +Section("Install", "install", "./install.jade")
          pre.language-python
            code
              | assert token.shape_ == 'Xxxx'
              | for lexeme in nlp.vocab:
              |     if lexeme.is_alpha:
              |         lexeme.shape_ = 'W'
              |     elif lexeme.is_digit:
              |         lexeme.shape_ = 'D'
              |     elif lexeme.is_punct:
              |         lexeme.shape_ = 'P'
              |     else:
              |         lexeme.shape_ = 'M'
              | assert token.shape_ == 'W'
        +example("Export to numpy arrays")
          pre.language-python
            code
              | Do me
        +example("Word vectors")
          pre.language-python
            code
              | Do me
        +example("Part-of-speech tags")
          pre.language-python
            code
              | Do me
        +example("Syntactic dependencies")
          pre.language-python
            code
              | Do me
        +example("Named entities")
          pre.language-python
            code
              | Do me
        +example("Define custom NER rules")
          pre.language-python
            code
              | Do me
        +example("Calculate inline mark-up on original string")
          pre.language-python
            code
              | Do me
        +example("Efficient binary serialization")
          pre.language-python
            code
              | Do me
        a(name="benchmarks"): h3 Benchmarks
        +comparison("spaCy vs. NLTK")
        +comparison("spaCy vs. Pattern")
        +comparison("spaCy vs. CoreNLP")
        +comparison("spaCy vs. ClearNLP")
        +comparison("spaCy vs. OpenNLP")
        +comparison("spaCy vs. GATE")
        details
          summary: h4 Independent Evaluation
          p
            | Independent evaluation by Yahoo! Labs and Emory
            | University, to appear at ACL 2015. Higher is better.
          table
            thead
              +columns("System", "Language", "Accuracy", "Speed")
            tbody
              +row("spaCy v0.86", "Cython", "91.9", "13,963")
              +row("spaCy v0.84", "Cython", "90.6", "13,963")
              +row("ClearNLP", "Java", "91.7", "10,271")
              +row("CoreNLP", "Java", "89.6", "8,602")
              +row("MATE", "Java", "92.5", "550")
              +row("Turbo", "C++", "92.4", "349")
              +row("Yara", "Java", "92.3", "340")
          p
            | Accuracy is % unlabelled arcs correct, speed is tokens per second.
          p
            | Joel Tetreault and Amanda Stent (Yahoo! Labs) and Jin-ho Choi (Emory)
            | performed a detailed comparison of the best parsers available.
            | All numbers above are taken from the pre-print they kindly made
            | available to me, except for spaCy v0.86. 
          p
            | I'm particularly grateful to the authors for discussion of their
            | results, which led to the improvement in accuracy between v0.84 and
            | v0.86.  A tip from Jin-ho developer of ClearNLP) was particularly
            | useful.
        details
          summary: h4 Detailed Accuracy Comparison
        details
          summary: h4 Detailed Speed Comparison
          table
            thead
              tr
                th.
                th(colspan=3) Absolute (ms per doc)
                th(colspan=3) Relative (to spaCy)
            tbody
              tr
                td: strong System
                td: strong Split
                td: strong Tag
                td: strong Parse
                td: strong Split
                td: strong Tag
                td: strong Parse
              +row("spaCy", "0.2ms", "1ms", "19ms", "1x", "1x", "1x")
              +row("spaCy", "0.2ms", "1ms", "19ms", "1x", "1x", "1x")
              +row("CoreNLP", "2ms", "10ms", "49ms", "10x", "10x", "2.6x")
              +row("ZPar", "1ms", "8ms", "850ms", "5x", "8x", "44.7x")
              +row("NLTK", "4ms", "443ms", "n/a", "20x", "443x", "n/a")
          p
            | <strong>Set up</strong>: 100,000 plain-text documents were streamed
            | from an SQLite3 database, and processed with an NLP library, to one
            | of three levels of detail &ndash; tokenization, tagging, or parsing.
            | The tasks are additive: to parse the text you have to tokenize and
            | tag it.  The  pre-processing was not subtracted from the times &ndash;
            | I report the time required for the pipeline to complete.  I report
            | mean times per document, in milliseconds.
          p
            | <strong>Hardware</strong>: Intel i7-3770 (2012)
        a(name="get-started"): h3 Get started
          +get_started
      footer(role="contentinfo")
      script(src="js/prism.js")