From 36b1197c9a2c7f1c71d7eabce76fe13c6480b028 Mon Sep 17 00:00:00 2001 From: Giovanni Campagna Date: Tue, 24 Mar 2020 18:54:44 -0700 Subject: [PATCH] numericalizer/transformer: remove bogus assertions (#9) These assertions do not mean much, because those tokens are guaranteed to be in the decoder vocabulary regardless of the assertion, and they won't necessarily have the same ID in the decoder and the true vocabulary. Also the mask_id assertion fails for XLMR, because mask_id is 250004. --- genienlp/data_utils/numericalizer/transformer.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/genienlp/data_utils/numericalizer/transformer.py b/genienlp/data_utils/numericalizer/transformer.py index 3d00a3b5..c4d82904 100644 --- a/genienlp/data_utils/numericalizer/transformer.py +++ b/genienlp/data_utils/numericalizer/transformer.py @@ -99,12 +99,6 @@ class TransformerNumericalizer(object): self.mask_id = self._tokenizer.mask_token_id self.generative_vocab_size = len(self._decoder_words) - assert self.init_id < self.generative_vocab_size - assert self.eos_id < self.generative_vocab_size - assert self.unk_id < self.generative_vocab_size - assert self.pad_id < self.generative_vocab_size - assert self.mask_id < self.generative_vocab_size - self.decoder_vocab = DecoderVocabulary(self._decoder_words, self._tokenizer, pad_token=self.pad_token, eos_token=self.eos_token)