Skip to content

Commit

Permalink
add
Browse files Browse the repository at this point in the history
  • Loading branch information
karina-openai committed Oct 30, 2024
1 parent 1dc42a2 commit fb8dbdf
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 22 deletions.
45 changes: 24 additions & 21 deletions demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from . import common
from .drop_eval import DropEval
from .gpqa_eval import GPQAEval
from .humaneval_eval import HumanEval
# from .humaneval_eval import HumanEval
from .math_eval import MathEval
from .mgsm_eval import MGSMEval
from .mmlu_eval import MMLUEval
Expand All @@ -17,20 +17,20 @@
)
from .sampler.o1_chat_completion_sampler import O1ChatCompletionSampler

# from .sampler.claude_sampler import ClaudeCompletionSampler, CLAUDE_SYSTEM_MESSAGE_LMSYS
from .sampler.claude_sampler import ClaudeCompletionSampler, CLAUDE_SYSTEM_MESSAGE_LMSYS


def main():
debug = False
debug = True
n_repeats = 16
samplers = {
# chatgpt models:
"o1-preview": O1ChatCompletionSampler(
model="o1-preview",
),
"o1-mini": O1ChatCompletionSampler(
model="o1-mini",
),
# "o1-preview": O1ChatCompletionSampler(
# model="o1-preview",
# ),
# "o1-mini": O1ChatCompletionSampler(
# model="o1-mini",
# ),
# "gpt-4-turbo-2024-04-09_assistant": ChatCompletionSampler(
# model="gpt-4-turbo-2024-04-09",
# system_message=OPENAI_SYSTEM_MESSAGE_API,
Expand All @@ -44,17 +44,20 @@ def main():
# system_message=OPENAI_SYSTEM_MESSAGE_API,
# max_tokens=2048,
# ),
"gpt-4o_chatgpt": ChatCompletionSampler(
model="gpt-4o",
system_message=OPENAI_SYSTEM_MESSAGE_CHATGPT,
max_tokens=2048,
),
"gpt-4o-mini-2024-07-18": ChatCompletionSampler(
model="gpt-4o-mini-2024-07-18",
system_message=OPENAI_SYSTEM_MESSAGE_API,
max_tokens=2048,
),
# "gpt-4o_chatgpt": ChatCompletionSampler(
# model="gpt-4o",
# system_message=OPENAI_SYSTEM_MESSAGE_CHATGPT,
# max_tokens=2048,
# ),
# "gpt-4o-mini-2024-07-18": ChatCompletionSampler(
# model="gpt-4o-mini-2024-07-18",
# system_message=OPENAI_SYSTEM_MESSAGE_API,
# max_tokens=2048,
# ),
# claude models:
"claude-3-opus-20240229_empty": ClaudeCompletionSampler(
model="claude-3-opus-20240229", system_message=None,
),
# "claude-3-opus-20240229_empty": ClaudeCompletionSampler(
# model="claude-3-opus-20240229", system_message=None,
# ),
Expand All @@ -81,8 +84,8 @@ def get_evals(eval_name):
return MGSMEval(num_examples_per_lang=10 if debug else 250)
case "drop":
return DropEval(num_examples=10 if debug else 2000, train_samples_per_prompt=3)
case "humaneval":
return HumanEval(num_examples=10 if debug else None)
# case "humaneval":
# return HumanEval(num_examples=10 if debug else None)
case "simpleqa":
return SimpleQAEval(
grader_model = grading_sampler,
Expand Down
2 changes: 1 addition & 1 deletion simpleqa_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class SimpleQAEval(Eval):
def __init__(self, grader_model: SamplerBase, num_examples: int | None = None, n_repeats: int = 1):
df = pandas.read_csv(
bf.BlobFile(
f"https://openaipublic.blob.core.windows.net/simple-evals/simpleqa/simple_qa_test_data.csv"
f"az://openaipublic/simple-evals/simple_qa_test_set.csv"
)
)
examples = [row.to_dict() for _, row in df.iterrows()]
Expand Down

0 comments on commit fb8dbdf

Please sign in to comment.