Update default config [ci skip]

This commit is contained in:
Ines Montani 2020-10-01 22:27:37 +02:00
parent 86c3ec9c2b
commit 5762876dcc
1 changed files with 16 additions and 10 deletions

View File

@ -2,7 +2,6 @@
train = null
dev = null
vectors = null
vocab_data = null
init_tok2vec = null
[system]
@ -11,8 +10,13 @@ gpu_allocator = null
[nlp]
lang = null
# List of pipeline component names, in order. The names should correspond to
# components defined in the [components block]
pipeline = []
# Components that are loaded but disabled by default
disabled = []
# Optional callbacks to modify the nlp object before it's initialized, after
# it's created and after the pipeline has been set up
before_creation = null
after_creation = null
after_pipeline_creation = null
@ -20,6 +24,7 @@ after_pipeline_creation = null
[nlp.tokenizer]
@tokenizers = "spacy.Tokenizer.v1"
# The pipeline components and their models
[components]
# Readers for corpora like dev and train.
@ -38,8 +43,7 @@ max_length = 0
limit = 0
# Apply some simply data augmentation, where we replace tokens with variations.
# This is especially useful for punctuation and case replacement, to help
# generalize beyond corpora that don't have smart-quotes, or only have smart
# quotes, etc.
# generalize beyond corpora that don't/only have smart quotes etc.
augmenter = null
[corpora.dev]
@ -53,6 +57,7 @@ gold_preproc = false
max_length = 0
# Limitation on number of training examples
limit = 0
# Optional callback for data augmentation
augmenter = null
# Training hyper-parameters and additional features.
@ -102,17 +107,18 @@ use_averages = false
eps = 1e-8
learn_rate = 0.001
# The 'initialize' step is run before training or pretraining. Components and
# the tokenizer can each define their own arguments via their .initialize
# methods that are populated by the config. This lets them gather resources like
# lookup tables and build label sets, construct vocabularies, etc.
# These settings are used when nlp.initialize() is called (typically before
# training or pretraining). Components and the tokenizer can each define their
# own arguments via their initialize methods that are populated by the config.
# This lets them gather data resources, build label sets etc.
[initialize]
vocab_data = ${paths.vocab_data}
lookups = null
vectors = ${paths.vectors}
# Extra resources for transfer-learning or pseudo-rehearsal
init_tok2vec = ${paths.init_tok2vec}
# Data and lookups for vocabulary
vocab_data = null
lookups = null
# Arguments passed to the tokenizer's initialize method
tokenizer = {}
# Arguments passed to the initialize methods of the components (keyed by component name)
# Arguments for initialize methods of the components (keyed by component)
components = {}