Skip to content

Commit

Permalink
MT5 should have an autotokenizer (huggingface#8743)
Browse files Browse the repository at this point in the history
* MT5 should have an autotokenizer

* Different configurations should be able to point to same tokenizers
  • Loading branch information
LysandreJik authored Nov 24, 2020
1 parent 6fdd0bb commit e09e54f
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 10 deletions.
2 changes: 2 additions & 0 deletions src/transformers/models/auto/tokenization_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
MarianConfig,
MBartConfig,
MobileBertConfig,
MT5Config,
OpenAIGPTConfig,
PegasusConfig,
ProphetNetConfig,
Expand Down Expand Up @@ -173,6 +174,7 @@
[
(RetriBertConfig, (RetriBertTokenizer, RetriBertTokenizerFast)),
(T5Config, (T5Tokenizer, T5TokenizerFast)),
(MT5Config, (T5Tokenizer, T5TokenizerFast)),
(MobileBertConfig, (MobileBertTokenizer, MobileBertTokenizerFast)),
(DistilBertConfig, (DistilBertTokenizer, DistilBertTokenizerFast)),
(AlbertConfig, (AlbertTokenizer, AlbertTokenizerFast)),
Expand Down
12 changes: 2 additions & 10 deletions tests/test_tokenization_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,21 +99,13 @@ def test_parents_and_children_in_mappings(self):

for mapping in mappings:
mapping = tuple(mapping.items())
for index, (child_config, (child_model_py, child_model_fast)) in enumerate(mapping[1:]):
for parent_config, (parent_model_py, parent_model_fast) in mapping[: index + 1]:
for index, (child_config, _) in enumerate(mapping[1:]):
for parent_config, _ in mapping[: index + 1]:
with self.subTest(
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
):
self.assertFalse(issubclass(child_config, parent_config))

# Check for Slow tokenizer implementation if provided
if child_model_py and parent_model_py:
self.assertFalse(issubclass(child_model_py, parent_model_py))

# Check for Fast tokenizer implementation if provided
if child_model_fast and parent_model_fast:
self.assertFalse(issubclass(child_model_fast, parent_model_fast))

@require_tokenizers
def test_from_pretrained_use_fast_toggle(self):
self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased", use_fast=False), BertTokenizer)
Expand Down

0 comments on commit e09e54f

Please sign in to comment.