Update comment

yifan1130 · Sep 22, 2023 · 2285ef2 · 2285ef2
1 parent ef0d437
commit 2285ef2
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/eval/mmlu/run_eval.py b/eval/mmlu/run_eval.py
@@ -81,8 +81,8 @@ def eval_hf_model(args, subject, model, tokenizer, dev_df, test_df, batch_size=1
         prompts.append(prompt)
 
     # get the answer for all examples
-    # note: here we cannot directly use convert_tokens_to_ids because the some tokenizers will automatically add space prefix.
-    # adding a prefix space here, as that's expected from the prompt, should raise a warning if this returns more than one token
+    # adding a prefix space here, as that's expected from the prompt
+    # TODO: should raise a warning if this returns more than one token
     answer_choice_ids = [tokenizer.encode(" " + answer_choice, add_special_tokens=False)[-1] for answer_choice in choices]
     pred_indices, all_probs = get_next_word_predictions(
         model, tokenizer, prompts, candidate_token_ids=answer_choice_ids, return_token_predictions=False, batch_size=batch_size