diff --git a/spacy/_ml.py b/spacy/_ml.py index c08dce100..a32d2cf20 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -48,11 +48,11 @@ def cosine(vec1, vec2): def create_default_optimizer(ops, **cfg): learn_rate = util.env_opt("learn_rate", 0.001) - beta1 = util.env_opt("optimizer_B1", 0.8) - beta2 = util.env_opt("optimizer_B2", 0.8) - eps = util.env_opt("optimizer_eps", 0.00001) + beta1 = util.env_opt("optimizer_B1", 0.9) + beta2 = util.env_opt("optimizer_B2", 0.999) + eps = util.env_opt("optimizer_eps", 1e-8) L2 = util.env_opt("L2_penalty", 1e-6) - max_grad_norm = util.env_opt("grad_norm_clip", 5.0) + max_grad_norm = util.env_opt("grad_norm_clip", 1.0) optimizer = Adam(ops, learn_rate, L2=L2, beta1=beta1, beta2=beta2, eps=eps) optimizer.max_grad_norm = max_grad_norm optimizer.device = ops.device