# Training hyper-parameters and additional features. [training] # Whether to train on sequences with 'gold standard' sentence boundaries # and tokens. If you set this to true, take care to ensure your run-time # data is passed in sentence-by-sentence via some prior preprocessing. gold_preproc = false # Limitations on training document length or number of examples. max_length = 0 limit = 0 # Data augmentation orth_variant_level = 0.0 dropout = 0.1 # Controls early-stopping. 0 or -1 mean unlimited. patience = 1600 max_epochs = 0 max_steps = 20000 eval_frequency = 400 # Other settings seed = 0 accumulate_gradient = 1 use_pytorch_for_gpu_memory = false # Control how scores are printed and checkpoints are evaluated. scores = ["speed", "tags_acc", "uas", "las", "ents_f"] score_weights = {"las": 0.4, "ents_f": 0.4, "tags_acc": 0.2} # These settings are invalid for the transformer models. init_tok2vec = null discard_oversize = false [training.batch_size] @schedules = "compounding.v1" start = 1000 stop = 1000 compound = 1.001 [training.optimizer] @optimizers = "Adam.v1" beta1 = 0.9 beta2 = 0.999 L2_is_weight_decay = true L2 = 0.01 grad_clip = 1.0 use_averages = true eps = 1e-8 learn_rate = 0.001 [pretraining] max_epochs = 1000 min_length = 5 max_length = 500 dropout = 0.2 n_save_every = null batch_size = 3000 seed = ${training:seed} use_pytorch_for_gpu_memory = ${training:use_pytorch_for_gpu_memory} tok2vec_model = "nlp.pipeline.tok2vec.model" [pretraining.optimizer] @optimizers = "Adam.v1" beta1 = 0.9 beta2 = 0.999 L2_is_weight_decay = true L2 = 0.01 grad_clip = 1.0 use_averages = true eps = 1e-8 learn_rate = 0.001 [pretraining.loss_func] @losses = "CosineDistance.v1" normalize = true [nlp] lang = "en" vectors = null [nlp.pipeline.tok2vec] factory = "tok2vec" [nlp.pipeline.senter] factory = "senter" [nlp.pipeline.ner] factory = "ner" learn_tokens = false min_action_freq = 1 beam_width = 1 beam_update_prob = 1.0 [nlp.pipeline.tagger] factory = "tagger" [nlp.pipeline.parser] factory = "parser" learn_tokens = false min_action_freq = 1 beam_width = 1 beam_update_prob = 1.0 [nlp.pipeline.senter.model] @architectures = "spacy.Tagger.v1" [nlp.pipeline.senter.model.tok2vec] @architectures = "spacy.Tok2VecTensors.v1" width = ${nlp.pipeline.tok2vec.model:width} [nlp.pipeline.tagger.model] @architectures = "spacy.Tagger.v1" [nlp.pipeline.tagger.model.tok2vec] @architectures = "spacy.Tok2VecTensors.v1" width = ${nlp.pipeline.tok2vec.model:width} [nlp.pipeline.parser.model] @architectures = "spacy.TransitionBasedParser.v1" nr_feature_tokens = 8 hidden_width = 128 maxout_pieces = 3 use_upper = false [nlp.pipeline.parser.model.tok2vec] @architectures = "spacy.Tok2VecTensors.v1" width = ${nlp.pipeline.tok2vec.model:width} [nlp.pipeline.ner.model] @architectures = "spacy.TransitionBasedParser.v1" nr_feature_tokens = 3 hidden_width = 128 maxout_pieces = 3 use_upper = false [nlp.pipeline.ner.model.tok2vec] @architectures = "spacy.Tok2VecTensors.v1" width = ${nlp.pipeline.tok2vec.model:width} [nlp.pipeline.tok2vec.model] @architectures = "spacy.HashEmbedCNN.v1" pretrained_vectors = ${nlp:vectors} width = 256 depth = 6 window_size = 1 embed_size = 10000 maxout_pieces = 3 subword_features = true dropout = null