Skip to content

Commit

Permalink
Merge pull request allenai#62 from OyvindTafjord/fix-logprobs
Browse files Browse the repository at this point in the history
Fix MMLU answer token and probability calculation
  • Loading branch information
yizhongw authored Sep 22, 2023
2 parents 28d5366 + 2285ef2 commit 12950e0
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
5 changes: 3 additions & 2 deletions eval/mmlu/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ def eval_hf_model(args, subject, model, tokenizer, dev_df, test_df, batch_size=1
prompts.append(prompt)

# get the answer for all examples
# note: here we cannot directly use convert_tokens_to_ids because the some tokenizers will automatically add space prefix.
answer_choice_ids = [tokenizer.encode(answer_choice, add_special_tokens=False)[0] for answer_choice in choices]
# adding a prefix space here, as that's expected from the prompt
# TODO: should raise a warning if this returns more than one token
answer_choice_ids = [tokenizer.encode(" " + answer_choice, add_special_tokens=False)[-1] for answer_choice in choices]
pred_indices, all_probs = get_next_word_predictions(
model, tokenizer, prompts, candidate_token_ids=answer_choice_ids, return_token_predictions=False, batch_size=batch_size
)
Expand Down
4 changes: 2 additions & 2 deletions eval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,9 @@ def get_next_word_predictions(model, tokenizer, prompts, candidate_token_ids=Non
attention_mask = attention_mask.cuda()

batch_logits = model(input_ids=batch_input_ids, attention_mask=attention_mask).logits[:, -1, :]
if candidate_token_ids is not None:
batch_logits = batch_logits[:, candidate_token_ids]
batch_probs = torch.softmax(batch_logits, dim=-1)
if candidate_token_ids is not None:
batch_probs = batch_probs[:, candidate_token_ids]
batch_prediction_indices = torch.argmax(batch_probs, dim=-1)
if return_token_predictions:
if candidate_token_ids is not None:
Expand Down

0 comments on commit 12950e0

Please sign in to comment.