adding aimo custom eval (huggingface#154)

--------- Co-authored-by: Nathan Habib <[email protected]> Co-authored-by: lewtun <[email protected]> Co-authored-by: Clémentine Fourrier <[email protected]>
filbench · May 3, 2024 · 9e3078e · 9e3078e
1 parent c89b386
commit 9e3078e
Showing 1 changed file with 72 additions and 0 deletions.
diff --git a/community_tasks/aimo_evals.py b/community_tasks/aimo_evals.py
@@ -0,0 +1,72 @@
+# MIT License
+
+# Copyright (c) 2024 The HuggingFace Team
+# Copyright (c) 2024 Philip May, Deutsche Telekom AG
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# ruff: noqa: F405, F403, F401
+"""
+Custom evaluation tasks for lighteval.
+
+This file generally create just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval.
+This module implements the ...
+"""
+
+from lighteval.tasks.lighteval_task import LightevalTaskConfig
+from lighteval.tasks.requests import Doc
+
+
+task = LightevalTaskConfig(
+    name="aimo_progress_prize_1",
+    prompt_function="prompt",
+    suite=["community"],
+    hf_subset="",
+    hf_repo="lighteval/aimo_progress_prize_1",
+    hf_avail_splits=["train"],
+    evaluation_splits=["train"],
+    few_shots_split="train",
+    few_shots_select="sequential",
+    metric=["quasi_exact_match_math"],
+    generation_size=2048,
+    stop_sequence=None,
+)
+
+
+def aimo_prompt(line, task_name: str = None):
+    return Doc(
+        task_name=task_name,
+        choices=[str(line["answer"])],
+        gold_index=0,
+        query=line["problem"],
+    )
+
+
+# STORE YOUR EVALS
+_TASKS = [task]
+
+
+# MODULE LOGIC
+# You should not need to touch this
+# Convert to dict for lighteval
+TASKS_TABLE = [task.as_dict() for task in _TASKS]
+
+if __name__ == "__main__":
+    print(t["name"] for t in TASKS_TABLE)
+    print(len(TASKS_TABLE))