numericalizer/transformer: remove bogus assertions (#9)
These assertions do not mean much, because those tokens are guaranteed to be in the decoder vocabulary regardless of the assertion, and they won't necessarily have the same ID in the decoder and the true vocabulary. Also the mask_id assertion fails for XLMR, because mask_id is 250004.
This commit is contained in:
parent
1e2dbce017
commit
36b1197c9a
|
@ -99,12 +99,6 @@ class TransformerNumericalizer(object):
|
||||||
self.mask_id = self._tokenizer.mask_token_id
|
self.mask_id = self._tokenizer.mask_token_id
|
||||||
self.generative_vocab_size = len(self._decoder_words)
|
self.generative_vocab_size = len(self._decoder_words)
|
||||||
|
|
||||||
assert self.init_id < self.generative_vocab_size
|
|
||||||
assert self.eos_id < self.generative_vocab_size
|
|
||||||
assert self.unk_id < self.generative_vocab_size
|
|
||||||
assert self.pad_id < self.generative_vocab_size
|
|
||||||
assert self.mask_id < self.generative_vocab_size
|
|
||||||
|
|
||||||
self.decoder_vocab = DecoderVocabulary(self._decoder_words, self._tokenizer,
|
self.decoder_vocab = DecoderVocabulary(self._decoder_words, self._tokenizer,
|
||||||
pad_token=self.pad_token, eos_token=self.eos_token)
|
pad_token=self.pad_token, eos_token=self.eos_token)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue