Specifying the alpaca eval reference path as the regenerated version …

…with 2048 max tokens.
yifan1130 · Sep 29, 2023 · 171c3f1 · 171c3f1
1 parent 63f870d
commit 171c3f1
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 1 deletion.
diff --git a/eval/alpaca_farm/run_eval.py b/eval/alpaca_farm/run_eval.py
@@ -102,7 +102,7 @@ def main(args):
         default="data/eval/alpaca_farm/davinci_003_outputs_2048_token.json",
         help="Path to the reference outputs. "
              "Alpaca_eval leaderboard use davinci_003 to generate the reference outputs, "
-             "but they limit the max_tokens to 300. Here regenerated reference outputs with max_tokens=2048.",
+             "but they limit the max_tokens to 300. Here we regenerated reference outputs with max_tokens=2048.",
     )
     parser.add_argument(
         "--save_dir",

diff --git a/scripts/eval/alpaca_farm.sh b/scripts/eval/alpaca_farm.sh
@@ -3,6 +3,7 @@
 # use vllm for generation
 python -m eval.alpaca_farm.run_eval \
     --model_name_or_path ../checkpoints/tulu_v1_7B/ \
+    --reference_path data/eval/alpaca_farm/davinci_003_outputs_2048_token.json \
     --save_dir results/alpaca_farm/tulu_v1_7B/ \
     --eval_batch_size 20 \
     --use_vllm \
@@ -13,6 +14,7 @@ python -m eval.alpaca_farm.run_eval \
 # use normal huggingface generation function
 python -m eval.alpaca_farm.run_eval \
     --model_name_or_path ../checkpoints/tulu_v1_7B/ \
+    --reference_path data/eval/alpaca_farm/davinci_003_outputs_2048_token.json \
     --save_dir results/alpaca_farm/tulu_v1_7B/ \
     --eval_batch_size 20 \
     --use_chat_format \