mirror of https://github.com/explosion/spaCy.git
Update default config [ci skip]
This commit is contained in:
parent
86c3ec9c2b
commit
5762876dcc
|
@ -2,7 +2,6 @@
|
|||
train = null
|
||||
dev = null
|
||||
vectors = null
|
||||
vocab_data = null
|
||||
init_tok2vec = null
|
||||
|
||||
[system]
|
||||
|
@ -11,8 +10,13 @@ gpu_allocator = null
|
|||
|
||||
[nlp]
|
||||
lang = null
|
||||
# List of pipeline component names, in order. The names should correspond to
|
||||
# components defined in the [components block]
|
||||
pipeline = []
|
||||
# Components that are loaded but disabled by default
|
||||
disabled = []
|
||||
# Optional callbacks to modify the nlp object before it's initialized, after
|
||||
# it's created and after the pipeline has been set up
|
||||
before_creation = null
|
||||
after_creation = null
|
||||
after_pipeline_creation = null
|
||||
|
@ -20,6 +24,7 @@ after_pipeline_creation = null
|
|||
[nlp.tokenizer]
|
||||
@tokenizers = "spacy.Tokenizer.v1"
|
||||
|
||||
# The pipeline components and their models
|
||||
[components]
|
||||
|
||||
# Readers for corpora like dev and train.
|
||||
|
@ -38,8 +43,7 @@ max_length = 0
|
|||
limit = 0
|
||||
# Apply some simply data augmentation, where we replace tokens with variations.
|
||||
# This is especially useful for punctuation and case replacement, to help
|
||||
# generalize beyond corpora that don't have smart-quotes, or only have smart
|
||||
# quotes, etc.
|
||||
# generalize beyond corpora that don't/only have smart quotes etc.
|
||||
augmenter = null
|
||||
|
||||
[corpora.dev]
|
||||
|
@ -53,6 +57,7 @@ gold_preproc = false
|
|||
max_length = 0
|
||||
# Limitation on number of training examples
|
||||
limit = 0
|
||||
# Optional callback for data augmentation
|
||||
augmenter = null
|
||||
|
||||
# Training hyper-parameters and additional features.
|
||||
|
@ -102,17 +107,18 @@ use_averages = false
|
|||
eps = 1e-8
|
||||
learn_rate = 0.001
|
||||
|
||||
# The 'initialize' step is run before training or pretraining. Components and
|
||||
# the tokenizer can each define their own arguments via their .initialize
|
||||
# methods that are populated by the config. This lets them gather resources like
|
||||
# lookup tables and build label sets, construct vocabularies, etc.
|
||||
# These settings are used when nlp.initialize() is called (typically before
|
||||
# training or pretraining). Components and the tokenizer can each define their
|
||||
# own arguments via their initialize methods that are populated by the config.
|
||||
# This lets them gather data resources, build label sets etc.
|
||||
[initialize]
|
||||
vocab_data = ${paths.vocab_data}
|
||||
lookups = null
|
||||
vectors = ${paths.vectors}
|
||||
# Extra resources for transfer-learning or pseudo-rehearsal
|
||||
init_tok2vec = ${paths.init_tok2vec}
|
||||
# Data and lookups for vocabulary
|
||||
vocab_data = null
|
||||
lookups = null
|
||||
# Arguments passed to the tokenizer's initialize method
|
||||
tokenizer = {}
|
||||
# Arguments passed to the initialize methods of the components (keyed by component name)
|
||||
# Arguments for initialize methods of the components (keyed by component)
|
||||
components = {}
|
||||
|
|
Loading…
Reference in New Issue