From d5aacba674734b749c2be97280776103ce01bcc0 Mon Sep 17 00:00:00 2001 From: Giovanni Campagna Date: Fri, 1 Nov 2019 17:36:36 -0700 Subject: [PATCH] embeddings: allow passing full locale tags as --locale This way, we don't need to do anything too special in Genie, and we can call decanlp with --locale zh-tw or --locale zh-cn if needed to distinguish --- decanlp/utils/embeddings.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/decanlp/utils/embeddings.py b/decanlp/utils/embeddings.py index 1ce32b67..5b3dfee6 100644 --- a/decanlp/utils/embeddings.py +++ b/decanlp/utils/embeddings.py @@ -69,7 +69,9 @@ class AlmondEmbeddings(torchtext.vocab.Vectors): def load_embeddings(args, logger=_logger, load_almond_embeddings=True): logger.info(f'Getting pretrained word vectors') - if args.locale == 'en': + language = args.locale.split('-')[0] + + if language == 'en': char_vectors = torchtext.vocab.CharNGram(cache=args.embeddings) if args.small_glove: glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings, name="6B", dim=50) @@ -80,7 +82,7 @@ def load_embeddings(args, logger=_logger, load_almond_embeddings=True): # Chinese word embeddings else: # default to fastText - vectors = [torchtext.vocab.FastText(cache=args.embeddings, language=args.locale)] + vectors = [torchtext.vocab.FastText(cache=args.embeddings, language=language)] if load_almond_embeddings and args.almond_type_embeddings: vectors.append(AlmondEmbeddings())