mirror of https://github.com/explosion/spaCy.git
Add models documentation
This commit is contained in:
parent
808f7ee417
commit
22dd929b65
|
@ -1,7 +1,7 @@
|
|||
# spaCy
|
||||
spacy/data/
|
||||
corpora/
|
||||
models/
|
||||
/models/
|
||||
keys/
|
||||
|
||||
# Website
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
{
|
||||
"sidebar": {
|
||||
"Models": {
|
||||
"Overview": "./"
|
||||
},
|
||||
|
||||
"Language models": {
|
||||
"English": "en",
|
||||
"German": "de",
|
||||
"Spanish": "es",
|
||||
"French": "fr",
|
||||
"Multi-Language": "xx"
|
||||
}
|
||||
},
|
||||
|
||||
"index": {
|
||||
"title": "Models Overview",
|
||||
"teaser": "Downloadable statistical models for spaCy to predict and assign linguistic features.",
|
||||
"quickstart": true,
|
||||
"menu": {
|
||||
"Quickstart": "quickstart",
|
||||
"Installation": "install",
|
||||
"Naming Conventions": "conventions"
|
||||
}
|
||||
},
|
||||
|
||||
"MODELS": {
|
||||
"en": ["en_core_web_sm", "en_core_web_lg", "en_vectors_web_lg"],
|
||||
"de": ["de_dep_news_sm"],
|
||||
"es": ["es_core_web_sm"],
|
||||
"fr": [],
|
||||
"xx": ["xx_ent_wiki_sm"]
|
||||
},
|
||||
|
||||
"MODEL_META": {
|
||||
"core": "Vocabulary, syntax, entities, vectors",
|
||||
"dep": "Vocabulary, syntax",
|
||||
"ent": "Named entities",
|
||||
"vectors": "Word vectors",
|
||||
"web": "written text (blogs, news, comments)",
|
||||
"news": "written text (news, media)",
|
||||
"wiki": "Wikipedia",
|
||||
"uas": "Unlabelled dependencies",
|
||||
"las": "Labelled dependencies",
|
||||
"tags_acc": "Part-of-speech tags",
|
||||
"ents_f": "Entities (F-score)",
|
||||
"pipeline": "Processing pipeline components in order",
|
||||
"sources": "Sources of training data"
|
||||
},
|
||||
|
||||
"MODEL_LICENSES": {
|
||||
"CC BY-SA": "https://creativecommons.org/licenses/by-sa/3.0/",
|
||||
"CC BY-SA 3.0": "https://creativecommons.org/licenses/by-sa/3.0/",
|
||||
"CC BY-NC": "https://creativecommons.org/licenses/by-nc/3.0/",
|
||||
"CC BY-NC 3.0": "https://creativecommons.org/licenses/by-nc/3.0/"
|
||||
},
|
||||
|
||||
"MODEL_ACCURACY": {
|
||||
"uas": "UAS",
|
||||
"las": "LAS",
|
||||
"tags_acc": "POS",
|
||||
"ents_f": "NER F"
|
||||
},
|
||||
|
||||
"LANGUAGES": {
|
||||
"en": "English",
|
||||
"de": "German",
|
||||
"fr": "French",
|
||||
"es": "Spanish",
|
||||
"it": "Italian",
|
||||
"pt": "Portuguese",
|
||||
"nl": "Dutch",
|
||||
"sv": "Swedish",
|
||||
"fi": "Finnish",
|
||||
"nb": "Norwegian Bokmål",
|
||||
"da": "Danish",
|
||||
"hu": "Hungarian",
|
||||
"pl": "Polish",
|
||||
"he": "Hebrew",
|
||||
"bn": "Bengali",
|
||||
"id": "Indonesian",
|
||||
"th": "Thai",
|
||||
"zh": "Chinese",
|
||||
"ja": "Japanese",
|
||||
"xx": "Multi-language"
|
||||
},
|
||||
|
||||
"EXAMPLE_SENTENCES": {
|
||||
"en": "This is a sentence.",
|
||||
"de": "Dies ist ein Satz.",
|
||||
"fr": "C'est une phrase.",
|
||||
"es": "Esto es una frase.",
|
||||
"xx": "This is a sentence about Facebook."
|
||||
}
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
//- 💫 DOCS > MODELS > DE
|
||||
|
||||
include ../_includes/_mixins
|
||||
|
||||
//- This is a placeholder. The page is rendered via the template at
|
||||
//- /_includes/_page-model.jade.
|
|
@ -0,0 +1,6 @@
|
|||
//- 💫 DOCS > MODELS > EN
|
||||
|
||||
include ../_includes/_mixins
|
||||
|
||||
//- This is a placeholder. The page is rendered via the template at
|
||||
//- /_includes/_page-model.jade.
|
|
@ -0,0 +1,6 @@
|
|||
//- 💫 DOCS > MODELS > ES
|
||||
|
||||
include ../_includes/_mixins
|
||||
|
||||
//- This is a placeholder. The page is rendered via the template at
|
||||
//- /_includes/_page-model.jade.
|
|
@ -0,0 +1,6 @@
|
|||
//- 💫 DOCS > MODELS > FR
|
||||
|
||||
include ../_includes/_mixins
|
||||
|
||||
//- This is a placeholder. The page is rendered via the template at
|
||||
//- /_includes/_page-model.jade.
|
|
@ -0,0 +1,98 @@
|
|||
//- 💫 DOCS > MODELS
|
||||
|
||||
include ../_includes/_mixins
|
||||
|
||||
+section("quickstart")
|
||||
p
|
||||
| spaCy v2.0 features new neural models for #[strong tagging],
|
||||
| #[strong parsing] and #[strong entity recognition]. The models have
|
||||
| been designed and implemented from scratch specifically for spaCy, to
|
||||
| give you an unmatched balance of speed, size and accuracy. A novel
|
||||
| bloom embedding strategy with subword features is used to support
|
||||
| huge vocabularies in tiny tables. Convolutional layers with residual
|
||||
| connections, layer normalization and maxout non-linearity are used,
|
||||
| giving much better efficiency than the standard BiLSTM solution. For
|
||||
| more details, see the notes on the
|
||||
| #[+a("/api/#nn-models") model architecture].
|
||||
|
||||
p
|
||||
| The parser and NER use an imitation learning objective to
|
||||
| deliver #[strong accuracy in-line with the latest research systems],
|
||||
| even when evaluated from raw text. With these innovations, spaCy
|
||||
| v2.0's models are #[strong 10× smaller],
|
||||
| #[strong 20% more accurate], and #[strong just as fast] as the
|
||||
| previous generation.
|
||||
|
||||
include ../usage/_models/_quickstart
|
||||
|
||||
+section("install")
|
||||
+h(2, "install") Installation & Usage
|
||||
|
||||
include ../usage/_models/_install-basics
|
||||
|
||||
+infobox
|
||||
| For more details on how to use models with spaCy, see the
|
||||
| #[+a("/usage/models") usage guide on models].
|
||||
|
||||
+section("conventions")
|
||||
+h(2, "model-naming") Model naming conventions
|
||||
|
||||
p
|
||||
| In general, spaCy expects all model packages to follow the naming
|
||||
| convention of #[code [lang]_[name]]. For spaCy's models, we also
|
||||
| chose to divide the name into three components:
|
||||
|
||||
+table
|
||||
+row
|
||||
+cell #[+label Type]
|
||||
+cell
|
||||
| Model capabilities (e.g. #[code core] for general-purpose
|
||||
| model with vocabulary, syntax, entities and word vectors, or
|
||||
| #[code depent] for only vocab, syntax and entities).
|
||||
+row
|
||||
+cell #[+label Genre]
|
||||
+cell
|
||||
| Type of text the model is trained on, e.g. #[code web] or
|
||||
| #[code news].
|
||||
+row
|
||||
+cell #[+label Size]
|
||||
+cell Model size indicator, #[code sm], #[code md] or #[code lg].
|
||||
|
||||
p
|
||||
| For example, #[code en_core_web_sm] is a small English model trained
|
||||
| on written web text (blogs, news, comments), that includes
|
||||
| vocabulary, vectors, syntax and entities.
|
||||
|
||||
+h(3, "model-versioning") Model versioning
|
||||
|
||||
p
|
||||
| Additionally, the model versioning reflects both the compatibility
|
||||
| with spaCy, as well as the major and minor model version. A model
|
||||
| version #[code a.b.c] translates to:
|
||||
|
||||
+table
|
||||
+row
|
||||
+cell #[code a]
|
||||
+cell
|
||||
| #[strong spaCy major version]. For example, #[code 2] for
|
||||
| spaCy v2.x.
|
||||
+row
|
||||
+cell #[code b]
|
||||
+cell
|
||||
| #[strong Model major version]. Models with a different major
|
||||
| version can't be loaded by the same code. For example,
|
||||
| changing the width of the model, adding hidden layers or
|
||||
| changing the activation changes the model major version.
|
||||
+row
|
||||
+cell #[code c]
|
||||
+cell
|
||||
| #[strong Model minor version]. Same model structure, but
|
||||
| different parameter values, e.g. from being trained on
|
||||
| different data, for different numbers of iterations, etc.
|
||||
|
||||
p
|
||||
| For a detailed compatibility overview, see the
|
||||
| #[+a(gh("spacy-models", "compatibility.json")) #[code compatibility.json]]
|
||||
| in the models repository. This is also the source of spaCy's internal
|
||||
| compatibility check, performed when you run the
|
||||
| #[+api("cli#download") #[code download]] command.
|
|
@ -0,0 +1,6 @@
|
|||
//- 💫 DOCS > MODELS > XX
|
||||
|
||||
include ../_includes/_mixins
|
||||
|
||||
//- This is a placeholder. The page is rendered via the template at
|
||||
//- /_includes/_page-model.jade.
|
Loading…
Reference in New Issue