Fix MMLU answer prompts and logprobs normalization

yifan1130 · Sep 22, 2023 · ad2385f · ad2385f
1 parent 609312b
commit ad2385f
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 3 deletions.
diff --git a/eval/mmlu/run_eval.py b/eval/mmlu/run_eval.py
@@ -67,7 +67,8 @@ def eval_hf_model(args, subject, model, tokenizer, dev_df, test_df, batch_size=1
 
     # get the answer for all examples
     # note: here we cannot directly use convert_tokens_to_ids because the some tokenizers will automatically add space prefix.
-    answer_choice_ids = [tokenizer.encode(answer_choice, add_special_tokens=False)[0] for answer_choice in choices]
+    # adding a prefix space here, as that's expected from the prompt, should raise a warning if this returns more than one token
+    answer_choice_ids = [tokenizer.encode(" " + answer_choice, add_special_tokens=False)[-1] for answer_choice in choices]
     pred_indices, all_probs = get_next_word_predictions(
         model, tokenizer, prompts, candidate_token_ids=answer_choice_ids, return_token_predictions=False, batch_size=batch_size
     )

diff --git a/eval/utils.py b/eval/utils.py
@@ -111,9 +111,9 @@ def get_next_word_predictions(model, tokenizer, prompts, candidate_token_ids=Non
             attention_mask = attention_mask.cuda()
 
         batch_logits = model(batch_input_ids, attention_mask).logits[:, -1, :]
-        if candidate_token_ids is not None:
-            batch_logits = batch_logits[:, candidate_token_ids]
         batch_probs = torch.softmax(batch_logits, dim=-1)
+        if candidate_token_ids is not None:
+            batch_probs = batch_probs[:, candidate_token_ids]
         batch_prediction_indices = torch.argmax(batch_probs, dim=-1)
         if return_token_predictions:
             if candidate_token_ids is not None: