From c1a264fc465d31cc64b3b88a587c5920c4502cdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stardust=C2=B7=E5=87=8F?= Date: Sat, 23 Sep 2023 21:06:18 +0800 Subject: [PATCH] Create tokenizer_config.json --- bert/bert-large-japanese-v2/tokenizer_config.json | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 bert/bert-large-japanese-v2/tokenizer_config.json diff --git a/bert/bert-large-japanese-v2/tokenizer_config.json b/bert/bert-large-japanese-v2/tokenizer_config.json new file mode 100644 index 000000000..dfbcc4690 --- /dev/null +++ b/bert/bert-large-japanese-v2/tokenizer_config.json @@ -0,0 +1,10 @@ +{ + "tokenizer_class": "BertJapaneseTokenizer", + "model_max_length": 512, + "do_lower_case": false, + "word_tokenizer_type": "mecab", + "subword_tokenizer_type": "wordpiece", + "mecab_kwargs": { + "mecab_dic": "unidic_lite" + } +}