diff --git a/examples/dora_finetuning/dora_finetuning.py b/examples/dora_finetuning/dora_finetuning.py index d7ecd54fc4..a62106ab50 100644 --- a/examples/dora_finetuning/dora_finetuning.py +++ b/examples/dora_finetuning/dora_finetuning.py @@ -6,7 +6,7 @@ AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, - DataCollatorWithPadding, + DataCollatorForLanguageModeling, Trainer, TrainingArguments, ) @@ -95,7 +95,7 @@ def tokenize_function(examples): tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=dataset["train"].column_names) # Data collator to dynamically pad the batched examples - data_collator = DataCollatorWithPadding(tokenizer) + data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False) # Define training arguments training_args = TrainingArguments(