numericalizer/transformer: remove bogus assertions (#9)

These assertions do not mean much, because those tokens are guaranteed
to be in the decoder vocabulary regardless of the assertion, and
they won't necessarily have the same ID in the decoder and the true
vocabulary. Also the mask_id assertion fails for XLMR, because
mask_id is 250004.
This commit is contained in:
Giovanni Campagna 2020-03-24 18:54:44 -07:00 committed by GitHub
parent 1e2dbce017
commit 36b1197c9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 0 additions and 6 deletions

View File

@ -99,12 +99,6 @@ class TransformerNumericalizer(object):
self.mask_id = self._tokenizer.mask_token_id self.mask_id = self._tokenizer.mask_token_id
self.generative_vocab_size = len(self._decoder_words) self.generative_vocab_size = len(self._decoder_words)
assert self.init_id < self.generative_vocab_size
assert self.eos_id < self.generative_vocab_size
assert self.unk_id < self.generative_vocab_size
assert self.pad_id < self.generative_vocab_size
assert self.mask_id < self.generative_vocab_size
self.decoder_vocab = DecoderVocabulary(self._decoder_words, self._tokenizer, self.decoder_vocab = DecoderVocabulary(self._decoder_words, self._tokenizer,
pad_token=self.pad_token, eos_token=self.eos_token) pad_token=self.pad_token, eos_token=self.eos_token)