mirror of https://github.com/explosion/spaCy.git
Remove docs references to starters for now (see #6262) [ci skip]
This commit is contained in:
parent
5a6ed01ce0
commit
c655742b8b
|
@ -58,5 +58,7 @@ redirects = [
|
|||
{from = "/universe", to = "/universe/project/:id", query = {id = ":id"}, force = true},
|
||||
{from = "/universe", to = "/universe/category/:category", query = {category = ":category"}, force = true},
|
||||
# Renamed universe projects
|
||||
{from = "/universe/project/spacy-pytorch-transformers", to = "/universe/project/spacy-transformers", force = true}
|
||||
{from = "/universe/project/spacy-pytorch-transformers", to = "/universe/project/spacy-transformers", force = true},
|
||||
# Old model pages
|
||||
{from = "/models/en-starters", to = "/models/en", force = true},
|
||||
]
|
||||
|
|
|
@ -68,8 +68,8 @@ representation consists of 300 dimensions of `0`, which means it's practically
|
|||
nonexistent. If your application will benefit from a **large vocabulary** with
|
||||
more vectors, you should consider using one of the larger pipeline packages or
|
||||
loading in a full vector package, for example,
|
||||
[`en_vectors_web_lg`](/models/en-starters#en_vectors_web_lg), which includes
|
||||
over **1 million unique vectors**.
|
||||
[`en_core_web_lg`](/models/en#en_core_web_lg), which includes **685k unique
|
||||
vectors**.
|
||||
|
||||
spaCy is able to compare two objects, and make a prediction of **how similar
|
||||
they are**. Predicting similarity is useful for building recommendation systems
|
||||
|
|
|
@ -1859,9 +1859,8 @@ pruning the vectors will be taken care of automatically if you set the `--prune`
|
|||
flag. You can also do it manually in the following steps:
|
||||
|
||||
1. Start with a **word vectors package** that covers a huge vocabulary. For
|
||||
instance, the [`en_vectors_web_lg`](/models/en-starters#en_vectors_web_lg)
|
||||
starter provides 300-dimensional GloVe vectors for over 1 million terms of
|
||||
English.
|
||||
instance, the [`en_core_web_lg`](/models/en#en_core_web_lg) package provides
|
||||
300-dimensional GloVe vectors for 685k terms of English.
|
||||
2. If your vocabulary has values set for the `Lexeme.prob` attribute, the
|
||||
lexemes will be sorted by descending probability to determine which vectors
|
||||
to prune. Otherwise, lexemes will be sorted by their order in the `Vocab`.
|
||||
|
@ -1869,7 +1868,7 @@ flag. You can also do it manually in the following steps:
|
|||
vectors you want to keep.
|
||||
|
||||
```python
|
||||
nlp = spacy.load('en_vectors_web_lg')
|
||||
nlp = spacy.load("en_core_web_lg")
|
||||
n_vectors = 105000 # number of vectors to keep
|
||||
removed_words = nlp.vocab.prune_vectors(n_vectors)
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ For more details and a behind-the-scenes look at the new release,
|
|||
>
|
||||
> ```bash
|
||||
> $ python -m spacy pretrain ./raw_text.jsonl
|
||||
> en_vectors_web_lg ./pretrained-model
|
||||
> en_core_web_lg ./pretrained-model
|
||||
> ```
|
||||
|
||||
spaCy v2.1 introduces a new CLI command, `spacy pretrain`, that can make your
|
||||
|
|
|
@ -226,8 +226,6 @@ exports.createPages = ({ graphql, actions }) => {
|
|||
|
||||
const langs = result.data.site.siteMetadata.languages
|
||||
const modelLangs = langs.filter(({ models }) => models && models.length)
|
||||
const starterLangs = langs.filter(({ starters }) => starters && starters.length)
|
||||
|
||||
modelLangs.forEach(({ code, name, models, example, has_examples }, i) => {
|
||||
const slug = `/models/${code}`
|
||||
const next = i < modelLangs.length - 1 ? modelLangs[i + 1] : null
|
||||
|
@ -247,28 +245,6 @@ exports.createPages = ({ graphql, actions }) => {
|
|||
},
|
||||
})
|
||||
})
|
||||
|
||||
starterLangs.forEach(({ code, name, starters }, i) => {
|
||||
const slug = `/models/${code}-starters`
|
||||
const next = i < starterLangs.length - 1 ? starterLangs[i + 1] : null
|
||||
createPage({
|
||||
path: slug,
|
||||
component: DEFAULT_TEMPLATE,
|
||||
context: {
|
||||
id: `${code}-starters`,
|
||||
slug: slug,
|
||||
isIndex: false,
|
||||
title: name,
|
||||
section: 'models',
|
||||
sectionTitle: sections.models.title,
|
||||
theme: sections.models.theme,
|
||||
next: next
|
||||
? { title: next.name, slug: `/models/${next.code}-starters` }
|
||||
: null,
|
||||
meta: { models: starters, isStarters: true },
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
)
|
||||
})
|
||||
|
|
|
@ -52,19 +52,6 @@ const Docs = ({ pageContext, children }) => (
|
|||
id: model,
|
||||
})),
|
||||
}))
|
||||
if (sidebar.items.length > 2) {
|
||||
sidebar.items[2].items = languages
|
||||
.filter(({ starters }) => starters && starters.length)
|
||||
.map(lang => ({
|
||||
text: lang.name,
|
||||
url: `/models/${lang.code}-starters`,
|
||||
isActive: id === `${lang.code}-starters`,
|
||||
menu: lang.starters.map(model => ({
|
||||
text: model,
|
||||
id: model,
|
||||
})),
|
||||
}))
|
||||
}
|
||||
}
|
||||
const sourcePath = source ? github(source) : null
|
||||
const currentSource = getCurrentSource(slug, isIndex)
|
||||
|
|
|
@ -374,7 +374,7 @@ const Models = ({ pageContext, repo, children }) => {
|
|||
const [initialized, setInitialized] = useState(false)
|
||||
const [compatibility, setCompatibility] = useState({})
|
||||
const { id, title, meta } = pageContext
|
||||
const { models, isStarters } = meta
|
||||
const { models } = meta
|
||||
const baseUrl = `https://raw.githubusercontent.com/${repo}/master`
|
||||
|
||||
useEffect(() => {
|
||||
|
@ -388,26 +388,9 @@ const Models = ({ pageContext, repo, children }) => {
|
|||
}
|
||||
}, [initialized, baseUrl])
|
||||
|
||||
const modelTitle = title
|
||||
const modelTeaser = `Available trained pipelines for ${title}`
|
||||
const starterTitle = `${title} starters`
|
||||
const starterTeaser = `Available transfer learning starter packs for ${title}`
|
||||
|
||||
return (
|
||||
<>
|
||||
<Title
|
||||
title={isStarters ? starterTitle : modelTitle}
|
||||
teaser={isStarters ? starterTeaser : modelTeaser}
|
||||
/>
|
||||
{isStarters && (
|
||||
<Section>
|
||||
<p>
|
||||
Starter packs are pretrained weights you can initialize your models with to
|
||||
achieve better accuracy, like word vectors (which will be used as features
|
||||
during training).
|
||||
</p>
|
||||
</Section>
|
||||
)}
|
||||
<Title title={title} teaser={`Available trained pipelines for ${title}`} />
|
||||
<StaticQuery
|
||||
query={query}
|
||||
render={({ site }) =>
|
||||
|
|
Loading…
Reference in New Issue