spaCy/website/src/widgets/landing.js

303 lines
13 KiB
JavaScript
Raw Normal View History

import React from 'react'
import PropTypes from 'prop-types'
import { StaticQuery, graphql } from 'gatsby'
import {
LandingHeader,
LandingTitle,
LandingSubtitle,
LandingGrid,
LandingCard,
2019-03-18 15:24:52 +00:00
LandingCol,
LandingDemo,
LandingBannerGrid,
LandingBanner,
} from '../components/landing'
import { H2 } from '../components/typography'
2020-09-12 15:05:10 +00:00
import { InlineCode } from '../components/code'
import Button from '../components/button'
import Link from '../components/link'
2020-05-21 18:45:33 +00:00
2020-09-12 15:05:10 +00:00
import QuickstartTraining from './quickstart-training'
import Project from './project'
2020-10-15 09:16:06 +00:00
import Features from './features'
2020-05-21 18:45:33 +00:00
import courseImage from '../../docs/images/course.jpg'
2020-09-12 15:05:10 +00:00
import prodigyImage from '../../docs/images/prodigy_overview.jpg'
import projectsImage from '../../docs/images/projects.png'
import irlBackground from '../images/spacy-irl.jpg'
2020-09-12 15:05:10 +00:00
import Benchmarks from 'usage/_benchmarks-models.md'
function getCodeExample(nightly) {
return `# pip install -U ${nightly ? 'spacy-nightly --pre' : 'spacy'}
2019-03-22 18:02:15 +00:00
# python -m spacy download en_core_web_sm
import spacy
2020-09-08 08:33:48 +00:00
# Load English tokenizer, tagger, parser and NER
2019-03-22 18:02:15 +00:00
nlp = spacy.load("en_core_web_sm")
# Process whole documents
2019-03-22 18:02:15 +00:00
text = ("When Sebastian Thrun started working on self-driving cars at "
"Google in 2007, few people outside of the company took him "
"seriously. “I can tell you very senior CEOs of major American "
"car companies would shake my hand and turn away because I wasnt "
"worth talking to,” said Thrun, in an interview with Recode earlier "
"this week.")
doc = nlp(text)
2019-03-22 18:02:15 +00:00
# Analyze syntax
print("Noun phrases:", [chunk.text for chunk in doc.noun_chunks])
print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])
# Find named entities, phrases and concepts
for entity in doc.ents:
print(entity.text, entity.label_)
`
}
const Landing = ({ data }) => {
2020-10-15 09:16:06 +00:00
const { nightly } = data
const codeExample = getCodeExample(nightly)
return (
<>
2020-07-01 11:03:04 +00:00
<LandingHeader nightly={data.nightly}>
<LandingTitle>
Industrial-Strength
<br />
Natural Language
<br />
Processing
</LandingTitle>
<LandingSubtitle>in Python</LandingSubtitle>
</LandingHeader>
<LandingGrid blocks>
<LandingCard title="Get things done" url="/usage/spacy-101" button="Get started">
spaCy is designed to help you do real work to build real products, or gather
real insights. The library respects your time, and tries to avoid wasting it.
2020-09-12 15:05:10 +00:00
It's easy to install, and its API is simple and productive.
</LandingCard>
<LandingCard
title="Blazing fast"
url="/usage/facts-figures"
button="Facts &amp; Figures"
>
spaCy excels at large-scale information extraction tasks. It's written from the
2020-09-12 15:05:10 +00:00
ground up in carefully memory-managed Cython. If your application needs to
process entire web dumps, spaCy is the library you want to be using.
2019-04-19 13:23:08 +00:00
</LandingCard>
2020-09-12 15:05:10 +00:00
<LandingCard title="Awesome ecosystem" url="/usage/projects" button="Read more">
In the five years since its release, spaCy has become an industry standard with
a huge ecosystem. Choose from a variety of plugins, integrate with your machine
learning stack and build custom components and workflows.
</LandingCard>
</LandingGrid>
<LandingGrid>
<LandingDemo title="Edit the code &amp; try spaCy">{codeExample}</LandingDemo>
2019-03-18 15:24:52 +00:00
<LandingCol>
<H2>Features</H2>
2020-10-15 09:16:06 +00:00
<Features />
2019-03-18 15:24:52 +00:00
</LandingCol>
</LandingGrid>
2020-09-12 15:05:10 +00:00
<LandingBannerGrid>
<LandingBanner
label="New in v3.0"
title="Transformer-based pipelines, new training system, project templates &amp; more"
to="/usage/v3"
button="See what's new"
small
>
spaCy v3.0 features all new <strong>transformer-based pipelines</strong> that
bring spaCy's accuracy right up to the current <strong>state-of-the-art</strong>
. You can use any pretrained transformer to train your own pipelines, and even
share one transformer between multiple components with{' '}
<strong>multi-task learning</strong>. Training is now fully configurable and
extensible, and you can define your own custom models using{' '}
<strong>PyTorch</strong>, <strong>TensorFlow</strong> and other frameworks. The
new spaCy projects system lets you describe whole{' '}
<strong>end-to-end workflows</strong> in a single file, giving you an easy path
from prototype to production, and making it easy to clone and adapt
best-practice projects for your own use cases.
</LandingBanner>
<LandingBanner
title="Prodigy: Radically efficient machine teaching"
label="From the makers of spaCy"
to="https://prodi.gy"
button="Try it out"
background="#f6f6f6"
color="#000"
small
>
<Link to="https://prodi.gy" hidden>
{/** Update image */}
<img
src={prodigyImage}
alt="Prodigy: Radically efficient machine teaching"
/>
</Link>
<br />
<br />
Prodigy is an <strong>annotation tool</strong> so efficient that data scientists
can do the annotation themselves, enabling a new level of rapid iteration.
Whether you're working on entity recognition, intent detection or image
classification, Prodigy can help you <strong>train and evaluate</strong> your
models faster.
</LandingBanner>
</LandingBannerGrid>
<LandingGrid cols={2} style={{ gridTemplateColumns: '1fr calc(80ch + 14rem)' }}>
<LandingCol>
<H2>Reproducible training for custom pipelines</H2>
<p>
spaCy v3.0 introduces a comprehensive and extensible system for{' '}
<strong>configuring your training runs</strong>. Your configuration file
will describe every detail of your training run, with no hidden defaults,
making it easy to <strong>rerun your experiments</strong> and track changes.
You can use the quickstart widget or the{' '}
<Link to="/api/cli#init-config">
<InlineCode>init config</InlineCode>
</Link>{' '}
command to get started, or clone a project template for an end-to-end
workflow.
</p>
<p>
<Button to="/usage/training">Get started</Button>
</p>
</LandingCol>
<LandingCol>
<QuickstartTraining />
</LandingCol>
</LandingGrid>
<LandingGrid cols={2}>
<LandingCol>
<Link to="/usage/projects" hidden>
<img src={projectsImage} />
</Link>
<br />
<br />
<br />
2020-09-20 15:44:58 +00:00
<Project id="pipelines/tagger_parser_ud" title="Get started">
The easiest way to get started is to clone a project template and run it
 for example, this template for training a{' '}
<strong>part-of-speech tagger</strong> and{' '}
<strong>dependency parser</strong> on a Universal Dependencies treebank.
2020-09-12 15:05:10 +00:00
</Project>
</LandingCol>
<LandingCol>
<H2>End-to-end workflows from prototype to production</H2>
<p>
spaCy's new project system gives you a smooth path from prototype to
production. It lets you keep track of all those{' '}
<strong>data transformation</strong>, preprocessing and{' '}
<strong>training steps</strong>, so you can make sure your project is always
ready to hand over for automation. It features source asset download,
command execution, checksum verification, and caching with a variety of
backends and integrations.
</p>
<p>
<Button to="/usage/projects">Try it out</Button>
</p>
</LandingCol>
</LandingGrid>
2020-10-16 09:46:33 +00:00
<LandingBannerGrid>
<LandingBanner
title="spaCy v3.0 nightly: Transformer-based pipelines, new training system, project templates &amp; more"
label="Try the pre-release"
to="https://nightly.spacy.io"
button="See what's new"
background="#8758fe"
color="#ffffff"
small
>
spaCy v3.0 features all new <strong>transformer-based pipelines</strong> that
bring spaCy's accuracy right up to the current <strong>state-of-the-art</strong>
. You can use any pretrained transformer to train your own pipelines, and even
share one transformer between multiple components with{' '}
<strong>multi-task learning</strong>. Training is now fully configurable and
extensible, and you can define your own custom models using{' '}
<strong>PyTorch</strong>, <strong>TensorFlow</strong> and other frameworks. The
new spaCy projects system lets you describe whole{' '}
<strong>end-to-end workflows</strong> in a single file, giving you an easy path
from prototype to production, and making it easy to clone and adapt
best-practice projects for your own use cases.
</LandingBanner>
<LandingBanner
title="Prodigy: Radically efficient machine teaching"
label="From the makers of spaCy"
to="https://prodi.gy"
button="Try it out"
background="#f6f6f6"
color="#000"
small
>
<Link to="https://prodi.gy" hidden>
<img
src={prodigyImage}
alt="Prodigy: Radically efficient machine teaching"
/>
</Link>
<br />
<br />
Prodigy is an <strong>annotation tool</strong> so efficient that data scientists
can do the annotation themselves, enabling a new level of rapid iteration.
Whether you're working on entity recognition, intent detection or image
classification, Prodigy can help you <strong>train and evaluate</strong> your
models faster.
</LandingBanner>
</LandingBannerGrid>
2020-09-12 15:05:10 +00:00
<LandingGrid cols={2} style={{ gridTemplateColumns: '1fr 60%' }}>
2019-03-18 15:24:52 +00:00
<LandingCol>
<H2>Benchmarks</H2>
<p>
2020-09-12 15:05:10 +00:00
spaCy v3.0 introduces transformer-based pipelines that bring spaCy's
accuracy right up to the current <strong>state-of-the-art</strong>. You can
also use a CPU-optimized pipeline, which is less accurate but much cheaper
to run.
</p>
<p>
2020-09-23 20:02:31 +00:00
<Button to="/usage/facts-figures#benchmarks">More results</Button>
</p>
2019-03-18 15:24:52 +00:00
</LandingCol>
2019-03-18 15:24:52 +00:00
<LandingCol>
2020-09-12 15:05:10 +00:00
<Benchmarks />
2019-03-18 15:24:52 +00:00
</LandingCol>
</LandingGrid>
</>
)
}
Landing.propTypes = {
data: PropTypes.shape({
repo: PropTypes.string,
languages: PropTypes.arrayOf(
PropTypes.shape({
models: PropTypes.arrayOf(PropTypes.string),
})
),
}),
}
export default () => (
<StaticQuery query={landingQuery} render={({ site }) => <Landing data={site.siteMetadata} />} />
)
const landingQuery = graphql`
query LandingQuery {
site {
siteMetadata {
2020-07-01 11:03:04 +00:00
nightly
repo
}
}
}
`