Fix a few comment and docstring typos and a typehint (huggingface#177)

--------- Co-authored-by: Nathan Habib <[email protected]> Co-authored-by: Clémentine Fourrier <[email protected]>
ryantzr1 · May 12, 2024 · 061283f · 061283f
1 parent 981e10a
commit 061283f
Show file tree

Hide file tree

Showing 9 changed files with 15 additions and 15 deletions.
diff --git a/community_tasks/_template.py b/community_tasks/_template.py
@@ -40,7 +40,7 @@
 
 
 # EVAL WITH NO SUBSET ##
-# This is how you create a simple tasks (like hellaswag) which has one single subset
+# This is how you create a simple task (like hellaswag) which has one single subset
 # attached to it, and one evaluation possible.
 task = LightevalTaskConfig(
     name="myothertask",

diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py
@@ -24,7 +24,7 @@
 """
 Custom evaluation tasks for lighteval
 
-This file generally create just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval.
+This file generally creates just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval.
 """
 import random
 import re

diff --git a/community_tasks/german_rag_evals.py b/community_tasks/german_rag_evals.py
@@ -25,7 +25,7 @@
 """
 Custom evaluation tasks for lighteval.
 
-This file generally create just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval.
+This file generally creates just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval.
 This module implements the 4 tasks of deutsche-telekom/Ger-RAG-eval.
 See: https://huggingface.co/datasets/deutsche-telekom/Ger-RAG-eval
 """

diff --git a/examples/nanotron/custom_evaluation_tasks.py b/examples/nanotron/custom_evaluation_tasks.py
@@ -24,7 +24,7 @@
 """
 Custom evaluation tasks for lighteval
 
-This file generally create just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval.
+This file generally creates just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval.
 """
 import re
 from dataclasses import asdict

diff --git a/src/lighteval/metrics/metrics_sample.py b/src/lighteval/metrics/metrics_sample.py
@@ -213,21 +213,21 @@ def __init__(self, length_normalization: bool = False, ignore_first_space: bool
             length_normalization (bool, optional): Whether log-likelihood scores should be normalized for sentence length. Defaults to False.
                 Should be True for most cases.
             ignore_first_space (bool, optional): Whether to ignore the first token's log prob (if it's a space only). Defaults to False.
-                Only case when it should be True is when the possible choices (for example `A`,`B` ...) have an extra
+                The only case when it should be True is when the possible choices (for example `A`,`B` ...) have an extra
                 space added in front of them to manage tokenization issues (` A`, ` B`, ...) for some models.
         """
         self.length_normalization = length_normalization
         self.ignore_first_space = ignore_first_space
 
     def compute(self, gold_ixs: list[int], choices_logprob: list[float], formatted_doc: Doc, **kwargs) -> int:
-        """Computs the log likelihood accuracy: is the choice with the highest logprob in `choices_logprob` present
-        in the `gold_idxs`?
+        """Computes the log likelihood accuracy: is the choice with the highest logprob in `choices_logprob` present
+        in the `gold_ixs`?
 
         Args:
             gold_ixs (list[int]): All the gold choices indices
             choices_logprob (list[float]): Summed log-probabilities of all the possible choices for the model, ordered as the choices.
             formatted_doc (Doc): Original document for the sample.
-                Used to get the original choices's length for possible normalisation
+                Used to get the original choices' length for possible normalization
 
         Returns:
             int: The eval score: 1 if the best log-prob choice is in gold, 0 otherwise.
@@ -258,7 +258,7 @@ def __init__(self, at: int) -> None:
 
     def compute(self, choices_logprob: list[float], gold_ixs: list[int], **kwargs) -> int:
         """Computes the recall at the requested depth level: looks at the `n` best predicted choices (with the
-        highest log probabilies) and see if there is an actual gold among them.
+        highest log probabilities) and see if there is an actual gold among them.
 
         Args:
             gold_ixs (list[int]): All the gold choices indices
@@ -277,7 +277,7 @@ def __init__(self, length_normalization: bool = False):
         """A mean reciprocal rank class.
 
         Args:
-            length_normalization (bool, optional): Whether to use normalisation be choice length when computing the best log-probabilities. Defaults to False.
+            length_normalization (bool, optional): Whether to use normalization on choice length when computing the best log-probabilities. Defaults to False.
         """
         self.length_normalization = length_normalization
 
@@ -288,7 +288,7 @@ def compute(self, choices_logprob: list[float], gold_ixs: list[float], formatted
             gold_ixs (list[int]): All the gold choices indices
             choices_logprob (list[float]): Summed log-probabilities of all the possible choices for the model, ordered as the choices.
             formatted_doc (Doc): Original document for the sample.
-                Used to get the original choices's length for possible normalisation
+                Used to get the original choices' length for possible normalization
 
         Returns:
             float: MRR score.

diff --git a/src/lighteval/metrics/sample_preparator.py b/src/lighteval/metrics/sample_preparator.py
@@ -132,7 +132,7 @@ def prepare(self, logprobs: list[float] | float, reference_text: str, **kwargs):
         """Prepares an individual perplexity example to the format expected by metrics computed at the corpus level (aggregated).
 
         Args:
-            logprobs (list[float]): List of the logprobabilities computed for each item of the sequence or single aggregated logprob over the sequence
+            logprobs (list[float]): List of the log-probabilities computed for each item of the sequence or single aggregated logprob over the sequence
             reference_text (str): Current reference text for which to compute the length in self.units_type
 
         Returns:

diff --git a/src/lighteval/models/abstract_model.py b/src/lighteval/models/abstract_model.py
@@ -118,7 +118,7 @@ def loglikelihood(
 
     @abstractmethod
     def loglikelihood_rolling(
-        self, requests: list[LoglikelihoodRollingRequest], override_bs=None
+        self, requests: list[LoglikelihoodRollingRequest], override_bs: Optional[int] = None
     ) -> list[LoglikelihoodReturn]:
         """This function is used to compute the log likelihood of the context for perplexity metrics."""
         return NotImplemented

diff --git a/src/lighteval/models/model_config.py b/src/lighteval/models/model_config.py
@@ -349,7 +349,7 @@ def create_model_config(args: Namespace, accelerator: Union["Accelerator", None]
                 raise ValueError("You need to specify a base model when using adapter weights")
             return AdapterModelConfig(**args_dict)
         if config["merged_weights"]["base_model"] not in ["", None]:
-            raise ValueError("You can't specifify a base model if you are not using delta/adapter weights")
+            raise ValueError("You can't specify a base model if you are not using delta/adapter weights")
         return BaseModelConfig(**args_dict)
 
     raise ValueError(f"Unknown model type in your model config file: {config['type']}")
diff --git a/src/lighteval/tasks/requests.py b/src/lighteval/tasks/requests.py
@@ -155,7 +155,7 @@ class TaskExampleId(NamedTuple):
 class Doc:
     """
     Dataclass used to represent the content of a task example
-    almost every field is optional, but some tasks require some fields to be present
+    almost every field is optional, but some tasks require some fields to be present.
     When adding a new task, please add the required fields to the doc class.
     Each task will have a different set of fields needed.
     """