diff --git a/dspy/signatures/signature.py b/dspy/signatures/signature.py index 314d6c712..74fa3ee73 100644 --- a/dspy/signatures/signature.py +++ b/dspy/signatures/signature.py @@ -35,7 +35,9 @@ def __call__(cls, *args, **kwargs): # noqa: ANN002 def __new__(mcs, signature_name, bases, namespace, **kwargs): # noqa: N804 # Set `str` as the default type for all fields raw_annotations = namespace.get("__annotations__", {}) - for name, _field in namespace.items(): + for name, field in namespace.items(): + if not isinstance(field, FieldInfo): + continue # Don't add types to non-field attributes if not name.startswith("__") and name not in raw_annotations: raw_annotations[name] = str namespace["__annotations__"] = raw_annotations @@ -272,9 +274,8 @@ def make_signature( def _parse_signature(signature: str) -> Tuple[Type, Field]: - pattern = r"^\s*[\w\s,:]+\s*->\s*[\w\s,:]+\s*$" - if not re.match(pattern, signature): - raise ValueError(f"Invalid signature format: '{signature}'") + if signature.count("->") != 1: + raise ValueError(f"Invalid signature format: '{signature}', must contain exactly one '->'.") fields = {} inputs_str, outputs_str = map(str.strip, signature.split("->")) diff --git a/dspy/teleprompt/signature_opt_typed.py b/dspy/teleprompt/signature_opt_typed.py index 1921f8cd1..c86d297c1 100644 --- a/dspy/teleprompt/signature_opt_typed.py +++ b/dspy/teleprompt/signature_opt_typed.py @@ -1,4 +1,5 @@ import textwrap +from dataclasses import dataclass from typing import Generic, Literal, TypeVar import pydantic @@ -99,28 +100,48 @@ class GenerateInstructionInitial(Signature, Generic[T]): return GenerateInstructionInitial -class GenerateSignature(dspy.Signature, Generic[T]): - __doc__ = textwrap.dedent("""\ - You are an instruction optimizer for large language models. +def generate_with_avoidance(signatures_to_avoid: list[BaseModel]) -> type[Signature]: + class GenerateSignature(dspy.Signature, Generic[T]): + __doc__ = textwrap.dedent("""\ + You are an instruction optimizer for large language models. + + I will give some task instructions I've tried, along with their corresponding validation scores. + - The instructions are arranged in order based on their scores, where higher scores indicate better quality. + - Your task is to propose a new instruction that will lead a good language model to perform the task even better. + - Be creative, and think out of the box. + - Don't repeat instructions, descriptions and prefixes that have already been attempted. + """) + + analysis: str = OutputField(desc="Consider what made the previous instructions good or bad.") + proposed_signature: T = OutputField(desc="A signature that will likely lead to a high score.") + score: float = OutputField( + desc="The expected score for the new signature. Don't write anything after this number.", + ) + + @pydantic.field_validator("proposed_signature") + @classmethod + def check_signature_not_attempted(cls, s: T) -> T: + if s in signatures_to_avoid: + raise ValueError("Never propose a signature already in the list above.") + return s - I will give some task instructions I've tried, along with their corresponding validation scores. - - The instructions are arranged in order based on their scores, where higher scores indicate better quality. - - Your task is to propose a new instruction that will lead a good language model to perform the task even better. - - Be creative, and think out of the box. - - Don't repeat instructions, descriptions and prefixes that have already been attempted. - """) + return GenerateSignature - analysis: str = OutputField(desc="Consider what made the previous instructions good or bad.") - proposed_signature: T = OutputField(desc="A signature that will likely lead to a high score.") - score: float = OutputField(desc="The expected score for the new signature. Don't write anything after this number.") + +@dataclass +class OptimizerResult: + program: dspy.Program + signatures: list[dict[str, Signature]] + scores: list[float] def optimize_signature( student, evaluator, n_iterations=10, - strategy: Literal["best", "last"] = "best", - sorted_order: Literal["increasing", "decreasing"] = "increasing", + sorted_order: Literal["increasing", "decreasing", "chronological"] = "increasing", + strategy: Literal["last", "best"] = "best", + max_examples=20, # Formerly part of the constructor prompt_model=None, initial_prompts=2, @@ -139,10 +160,12 @@ def optimize_signature( The evaluator to use to score the program. n_iterations : int, optional The number of iterations to run, by default 10 - strategy : Literal["best", "last"], optional - The strategy to use to select the final program, by default "best" - sorted_order : Literal["increasing", "decreasing"], optional + max_examples : int, optional + The maximum number of examples to use for the evaluator, by default 20 + sorted_order : Literal["increasing", "decreasing", "chronological"], optional The order in which to sort the scores, by default "increasing" + strategy : Literal["last", "best"], optional + The strategy to use to select the final program, by default "best" prompt_model : dspy.LanguageModel, optional The language model to use to generate prompts, by default None initial_prompts : int, optional @@ -222,16 +245,22 @@ def optimize_signature( # TODO: Parallelize this for name, _p in named_predictors: SignatureInfo = type(candidates[name][0]) # noqa: N806 - generator = TypedPredictor(GenerateSignature[SignatureInfo]) - - demos = [ - dspy.Example( - proposed_signature=info, - score=sc, - ) - for info, sc in zip(candidates[name], scores) - ] - demos.sort(key=(lambda x: x.score), reverse=(sorted_order == "decreasing")) + + demos = [dspy.Example(proposed_signature=info, score=sc) for info, sc in zip(candidates[name], scores)] + if sorted_order == "chronological": + demos = demos[-max_examples:] + elif sorted_order == "increasing": + demos.sort(key=(lambda x: x.score), reverse=False) + demos = demos[-max_examples:] + elif sorted_order == "decreasing": + demos.sort(key=(lambda x: x.score), reverse=True) + demos = demos[:max_examples] + else: + raise ValueError(f"Invalid sorted_order: {sorted_order}") + + # We can only tell the LM to avoid the signatures we are actually giving it as demos. + avoid = [ex.proposed_signature for ex in demos] + generator = TypedPredictor(generate_with_avoidance(avoid)[SignatureInfo]) generator.predictor.demos = demos if verbose: @@ -240,12 +269,16 @@ def optimize_signature( candidates[name].append(new_signature) if strategy == "last": - return module - - if strategy == "best": + pass + elif strategy == "best": i = scores.index(max(scores)) for name, p in named_predictors: p.signature = candidates[name][i].to_signature() - return module + else: + raise ValueError(f"Invalid strategy: {strategy}") - raise ValueError(f"Invalid strategy: {strategy}") + return OptimizerResult( + program=module, + signatures=[{name: sigs[i].to_signature()} for name, sigs in candidates.items() for i in range(n_iterations)], + scores=scores, + ) diff --git a/examples/functional/signature_opt_typed.ipynb b/examples/functional/signature_opt_typed.ipynb index 7447a965e..feab8d635 100644 --- a/examples/functional/signature_opt_typed.ipynb +++ b/examples/functional/signature_opt_typed.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -17,9 +17,18 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/homebrew/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "import dspy\n", "turbo = dspy.OpenAI(model='gpt-3.5-turbo', max_tokens=4000)\n", @@ -29,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -40,7 +49,7 @@ ")" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -51,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -60,7 +69,7 @@ "(20, 50)" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -80,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -93,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -101,7 +110,7 @@ "output_type": "stream", "text": [ "Found 1 typed predictors to optimize.\n", - "Generating 4 initial signatures for base...\n", + "Generating 6 initial signatures for base...\n", "\n", "================================================================================\n", "Running eval iteration 0...\n" @@ -111,8 +120,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 16 / 50 (32.0): 100%|██████████| 50/50 [00:00<00:00, 4290.32it/s]\n", - "/Users/ahle/repos/dspy/dspy/evaluate/evaluate.py:142: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", + "Average Metric: 16 / 50 (32.0): 100%|██████████| 50/50 [00:00<00:00, 3086.59it/s]\n", + "/Users/ahle/repos/dspy/dspy/evaluate/evaluate.py:145: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", " df = df.applymap(truncate_cell)\n" ] }, @@ -130,14 +139,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 16 / 50 (32.0): 100%|██████████| 50/50 [00:02<00:00, 22.35it/s]\n" + "Average Metric: 1 / 50 (2.0): 100%|██████████| 50/50 [00:00<00:00, 1268.65it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Average Metric: 16 / 50 (32.0%)\n", + "Average Metric: 1 / 50 (2.0%)\n", "\n", "================================================================================\n", "Running eval iteration 2...\n" @@ -147,14 +156,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 19 / 50 (38.0): 100%|██████████| 50/50 [00:04<00:00, 10.28it/s]\n" + "Average Metric: 17 / 50 (34.0): 100%|██████████| 50/50 [00:00<00:00, 1031.35it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Average Metric: 19 / 50 (38.0%)\n", + "Average Metric: 17 / 50 (34.0%)\n", "\n", "================================================================================\n", "Running eval iteration 3...\n" @@ -164,14 +173,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 11 / 50 (22.0): 100%|██████████| 50/50 [00:05<00:00, 8.63it/s]\n" + "Average Metric: 16 / 50 (32.0): 100%|██████████| 50/50 [00:00<00:00, 1364.88it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Average Metric: 11 / 50 (22.0%)\n", + "Average Metric: 16 / 50 (32.0%)\n", "\n", "================================================================================\n", "Running eval iteration 4...\n" @@ -181,15 +190,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 15 / 50 (30.0): 100%|██████████| 50/50 [00:02<00:00, 24.53it/s]\n" + "Average Metric: 6 / 50 (12.0): 100%|██████████| 50/50 [00:00<00:00, 892.68it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Average Metric: 15 / 50 (30.0%)\n", - "Generating new signature for base...\n", + "Average Metric: 6 / 50 (12.0%)\n", "\n", "================================================================================\n", "Running eval iteration 5...\n" @@ -199,15 +207,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 18 / 50 (36.0): 100%|██████████| 50/50 [00:02<00:00, 21.89it/s]\n" + "Average Metric: 5 / 50 (10.0): 100%|██████████| 50/50 [00:00<00:00, 1055.56it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Average Metric: 18 / 50 (36.0%)\n", - "Generating new signature for base...\n", + "Average Metric: 5 / 50 (10.0%)\n", "\n", "================================================================================\n", "Running eval iteration 6...\n" @@ -217,15 +224,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 6 / 50 (12.0): 100%|██████████| 50/50 [00:03<00:00, 13.65it/s]\n" + "Average Metric: 12 / 50 (24.0): 100%|██████████| 50/50 [00:00<00:00, 942.15it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Average Metric: 6 / 50 (12.0%)\n", + "Average Metric: 12 / 50 (24.0%)\n", "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 7 to avoid.\n", "\n", "================================================================================\n", "Running eval iteration 7...\n" @@ -235,69 +243,770 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 17 / 50 (34.0): 100%|██████████| 50/50 [00:02<00:00, 19.56it/s]" + "Average Metric: 17 / 50 (34.0): 100%|██████████| 50/50 [00:00<00:00, 1054.12it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Average Metric: 17 / 50 (34.0%)\n" + "Average Metric: 17 / 50 (34.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 8 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 8...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "\n" + "Average Metric: 17 / 50 (34.0): 100%|██████████| 50/50 [00:00<00:00, 957.29it/s]\n" ] - } - ], - "source": [ - "from dspy.evaluate import Evaluate\n", - "from dspy.evaluate.metrics import answer_exact_match\n", - "from dspy.functional import TypedPredictor\n", - "from dspy.teleprompt.signature_opt_typed import optimize_signature\n", - "\n", - "evaluator = Evaluate(devset=devset, metric=answer_exact_match, num_threads=10, display_progress=True)\n", - "\n", - "program = optimize_signature(\n", - " student=TypedPredictor(BasicQA),\n", - " evaluator=evaluator,\n", - " initial_prompts=4,\n", - " n_iterations=8,\n", - " verbose=True,\n", - " prompt_model=gpt4,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ + }, { "name": "stdout", "output_type": "stream", "text": [ - "StringSignature(question -> answer\n", - " instructions='You are highly intelligent. Please provide short, factual answers to the following questions.'\n", - " question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Inquiry:', 'desc': '${question}'})\n", - " answer = Field(annotation=str required=True json_schema_extra={'desc': 'usually between 1 and 5 words', '__dspy_field_type': 'output', 'prefix': 'Reply:'})\n", - ")\n" + "Average Metric: 17 / 50 (34.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 9 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 9...\n" ] - } - ], - "source": [ - "print(program.signature)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 12 / 50 (24.0): 100%|██████████| 50/50 [00:00<00:00, 1015.95it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 12 / 50 (24.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 10 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 10...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 11 / 50 (22.0): 100%|██████████| 50/50 [00:00<00:00, 839.64it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 11 / 50 (22.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 11 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 11...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 15 / 50 (30.0): 100%|██████████| 50/50 [00:00<00:00, 833.32it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 15 / 50 (30.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 12 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 12...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 15 / 50 (30.0): 100%|██████████| 50/50 [00:00<00:00, 1105.97it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 15 / 50 (30.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 13 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 13...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 17 / 50 (34.0): 100%|██████████| 50/50 [00:00<00:00, 1112.59it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 17 / 50 (34.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 14 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 14...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 17 / 50 (34.0): 100%|██████████| 50/50 [00:00<00:00, 1096.58it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 17 / 50 (34.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 15 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 15...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 16 / 50 (32.0): 100%|██████████| 50/50 [00:00<00:00, 1092.70it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 16 / 50 (32.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 16 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 16...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 16 / 50 (32.0): 100%|██████████| 50/50 [00:00<00:00, 1097.79it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 16 / 50 (32.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 17 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 17...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 17 / 50 (34.0): 100%|██████████| 50/50 [00:00<00:00, 547.69it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 17 / 50 (34.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 18 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 18...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 16 / 50 (32.0): 100%|██████████| 50/50 [00:00<00:00, 964.67it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 16 / 50 (32.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 19 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 19...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 19 / 50 (38.0): 100%|██████████| 50/50 [00:00<00:00, 1014.22it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 19 / 50 (38.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 20 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 20...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 16 / 50 (32.0): 100%|██████████| 50/50 [00:00<00:00, 906.14it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 16 / 50 (32.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 21 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 21...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 17 / 50 (34.0): 100%|██████████| 50/50 [00:00<00:00, 1017.81it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 17 / 50 (34.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 22 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 22...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 19 / 50 (38.0): 100%|██████████| 50/50 [00:00<00:00, 1032.48it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 19 / 50 (38.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 23 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 23...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 15 / 50 (30.0): 100%|██████████| 50/50 [00:00<00:00, 726.33it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 15 / 50 (30.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 24 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 24...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 15 / 50 (30.0): 100%|██████████| 50/50 [00:00<00:00, 957.55it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 15 / 50 (30.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 25 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 25...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 16 / 50 (32.0): 100%|██████████| 50/50 [00:00<00:00, 1009.53it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 16 / 50 (32.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 26 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 26...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 15 / 50 (30.0): 100%|██████████| 50/50 [00:00<00:00, 1064.53it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 15 / 50 (30.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 27 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 27...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 18 / 50 (36.0): 100%|██████████| 50/50 [00:00<00:00, 1052.90it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 18 / 50 (36.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 28 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 28...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 20 / 50 (40.0): 100%|██████████| 50/50 [00:00<00:00, 731.18it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 20 / 50 (40.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 29 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 29...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 16 / 50 (32.0): 100%|██████████| 50/50 [00:02<00:00, 18.61it/s]\n", + "/Users/ahle/repos/dspy/dspy/evaluate/evaluate.py:145: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", + " df = df.applymap(truncate_cell)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 16 / 50 (32.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 30 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 30...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 17 / 50 (34.0): 100%|██████████| 50/50 [00:02<00:00, 18.23it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 17 / 50 (34.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 31 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 31...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 19 / 50 (38.0): 100%|██████████| 50/50 [00:02<00:00, 20.82it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 19 / 50 (38.0%)\n", + "Generating new signature for base...\n", + "Tested the signature, and it's not in the list of 32 to avoid.\n", + "\n", + "================================================================================\n", + "Running eval iteration 32...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Average Metric: 17 / 49 (34.7): 98%|█████████▊| 49/50 [00:14<00:00, 20.66it/s]" + ] + } + ], + "source": [ + "from dspy.evaluate import Evaluate\n", + "from dspy.evaluate.metrics import answer_exact_match\n", + "from dspy.functional import TypedPredictor\n", + "from dspy.teleprompt.signature_opt_typed import optimize_signature\n", + "\n", + "evaluator = Evaluate(devset=devset, metric=answer_exact_match, num_threads=10, display_progress=True)\n", + "\n", + "result = optimize_signature(\n", + " student=TypedPredictor(BasicQA),\n", + " evaluator=evaluator,\n", + " initial_prompts=6,\n", + " n_iterations=100,\n", + " max_examples=30,\n", + " verbose=True,\n", + " prompt_model=gpt4,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check the final program after optimization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "predictor = Predict(BasicQA(question -> answer\n", + " instructions='Answer questions with short factoid answers.'\n", + " question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}'})\n", + " answer = Field(annotation=str required=True json_schema_extra={'desc': 'often between 1 and 5 words', '__dspy_field_type': 'output', 'prefix': 'Answer:'})\n", + "))" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result.program" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot the scores over time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.plot(result.scores)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "You are an instruction optimizer for large language models.\n", + "\n", + "I will give some task instructions I've tried, along with their corresponding validation scores.\n", + "- The instructions are arranged in order based on their scores, where higher scores indicate better quality.\n", + "- Your task is to propose a new instruction that will lead a good language model to perform the task even better.\n", + "- Be creative, and think out of the box.\n", + "- Don't repeat instructions, descriptions and prefixes that have already been attempted.\n", + "\n", + "---\n", + "\n", + "Follow the following format.\n", + "\n", + "Analysis: Consider what made the previous instructions good or bad.\n", + "Proposed Signature: A signature that will likely lead to a high score.. Respond with a single JSON object. JSON Schema: {\"properties\": {\"instructions\": {\"description\": \"The instructions for the task\", \"title\": \"Instructions\", \"type\": \"string\"}, \"question_prefix\": {\"description\": \"The prefix for question\", \"title\": \"Question Prefix\", \"type\": \"string\"}, \"question_desc\": {\"description\": \"The description for question\", \"title\": \"Question Desc\", \"type\": \"string\"}, \"answer_prefix\": {\"description\": \"The prefix for answer\", \"title\": \"Answer Prefix\", \"type\": \"string\"}, \"answer_desc\": {\"description\": \"The description for answer\", \"title\": \"Answer Desc\", \"type\": \"string\"}}, \"required\": [\"instructions\", \"question_prefix\", \"question_desc\", \"answer_prefix\", \"answer_desc\"], \"title\": \"SignatureInfo[BasicQA]\", \"type\": \"object\"}\n", + "Score: The expected score for the new signature. Don't write anything after this number. (Respond with a single float value)\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"You are an expert in your field. Respond to the inquiries with short, factual answers.\",\"question_prefix\":\"Inquiry:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Response:\",\"answer_desc\":\"typically a few words, factual\"}\n", + "Score: 34.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with extensive knowledge, provide precise and factual responses to the questions. Keep your answers brief, typically within 1-5 words.\",\"question_prefix\":\"Interrogative:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Rejoinder:\",\"answer_desc\":\"a concise, factual answer\"}\n", + "Score: 34.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a vast database of information, your task is to provide accurate and succinct answers to the following questions. Your responses should be factual and typically consist of 1-5 words.\",\"question_prefix\":\"Prompt:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Retort:\",\"answer_desc\":\"a short, factual answer\"}\n", + "Score: 34.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with access to a vast knowledge base, your task is to provide accurate, factual answers to the questions posed. The questions will be about various topics, and your responses should be succinct, typically consisting of 1-5 words.\",\"question_prefix\":\"Inquiry:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Response:\",\"answer_desc\":\"a brief, factual answer\"}\n", + "Score: 34.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a vast reservoir of information, your task is to provide precise, factual answers to the questions asked. Your responses should be succinct, typically consisting of 1-5 words, and cover a wide range of topics.\",\"question_prefix\":\"Enquiry:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Rejoinder:\",\"answer_desc\":\"a brief, factual answer\"}\n", + "Score: 34.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a vast knowledge base, your task is to provide accurate, factual answers to a wide range of questions. Your responses should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge and understanding.\",\"question_prefix\":\"Inquiry:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Retort:\",\"answer_desc\":\"a brief, factual answer demonstrating accuracy and understanding\"}\n", + "Score: 34.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a vast knowledge repository, your task is to provide precise, factual answers to the questions posed. Your responses should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy and precision are paramount.\",\"question_prefix\":\"Interrogative:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Rejoinder:\",\"answer_desc\":\"a brief, factual answer demonstrating precision and breadth of knowledge\"}\n", + "Score: 34.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a comprehensive knowledge repository, your task is to provide precise, factual responses to the inquiries posed. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy, precision, and brevity are paramount.\",\"question_prefix\":\"Interrogative Scrutiny:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Succinct Rejoinder:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and understanding\"}\n", + "Score: 34.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a comprehensive knowledge base, your task is to provide precise, factual answers to the questions posed. Your responses should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy, precision, and brevity are paramount.\",\"question_prefix\":\"Interrogative Inquiry:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Succinct Riposte:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and understanding\"}\n", + "Score: 34.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a vast knowledge base, your task is to provide accurate, factual answers to the questions posed. Your responses should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a broad spectrum of topics. Accuracy, precision, and brevity are paramount. Consider the context of the question to provide the most relevant answer.\",\"question_prefix\":\"Interrogative Assessment:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Concise Counterpoint:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and understanding\"}\n", + "Score: 34.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a vast knowledge base, your task is to provide accurate, factual answers to the questions posed. Your responses should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a broad spectrum of topics. Accuracy, precision, and brevity are paramount. Consider the context of the question to provide the most relevant and accurate answer.\",\"question_prefix\":\"Interrogative Analysis:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Concise Clarification:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and understanding\"}\n", + "Score: 34.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a vast knowledge repository, your task is to provide precise, factual responses to the questions asked. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy, precision, and brevity are paramount. Consider the context and nuances of the question to provide the most relevant and accurate answer.\",\"question_prefix\":\"Interrogative Query:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Precise Response:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and contextual understanding\"}\n", + "Score: 34.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a comprehensive knowledge base, your task is to provide accurate, factual answers to the questions posed. Your responses should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a broad spectrum of topics. Precision, brevity, and accuracy are paramount.\",\"question_prefix\":\"Interrogative Probe:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Concise Rebuttal:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and understanding\"}\n", + "Score: 36.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a vast knowledge repository, your task is to provide precise, factual responses to the inquiries posed. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy, precision, and brevity are paramount.\",\"question_prefix\":\"Interrogative Examination:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Succinct Rejoinder:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and understanding\"}\n", + "Score: 36.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a comprehensive knowledge base, your task is to provide precise, factual answers to the questions posed. Your responses should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a broad spectrum of topics. Accuracy, precision, and brevity are paramount. Remember, your goal is to provide the most accurate and concise answer possible.\",\"question_prefix\":\"Interrogative Assessment:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Concise Counterpoint:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and understanding\"}\n", + "Score: 36.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a comprehensive knowledge base, your task is to provide precise, factual answers to the questions posed. Your responses should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a broad spectrum of topics. Accuracy, precision, and brevity are paramount. Additionally, consider the context of the question to provide the most relevant and accurate answer.\",\"question_prefix\":\"Contextual Inquiry:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Contextual Response:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and contextual understanding\"}\n", + "Score: 36.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with an extensive knowledge base, your task is to provide accurate, factual answers to the questions posed. Your responses should be succinct, typically consisting of 1-5 words, and demonstrate your wide-ranging knowledge across various topics. Accuracy, precision, and brevity are key. Consider the context of the question to provide the most relevant and accurate answer.\",\"question_prefix\":\"Interrogative Exploration:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Concise Clarification:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and understanding\"}\n", + "Score: 36.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a vast knowledge repository, your task is to provide precise, factual responses to the questions posed. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy, precision, and brevity are paramount. Consider the context of the question to provide the most relevant and accurate answer.\",\"question_prefix\":\"Interrogative Exploration:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Precise Rejoinder:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and contextual understanding\"}\n", + "Score: 36.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a comprehensive knowledge repository, your task is to provide precise, factual responses to the questions posed. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics.\",\"question_prefix\":\"Examination:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Rejoinder:\",\"answer_desc\":\"a brief, factual answer demonstrating precision and understanding\"}\n", + "Score: 38.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a vast knowledge repository, your task is to provide precise, factual responses to the questions posed. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy and precision are paramount.\",\"question_prefix\":\"Examination:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Rejoinder:\",\"answer_desc\":\"a brief, factual answer demonstrating precision and understanding\"}\n", + "Score: 38.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a vast knowledge repository, your task is to provide precise, factual responses to the questions posed. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy, precision, and brevity are paramount.\",\"question_prefix\":\"Interrogative Probe:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Succinct Retort:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and understanding\"}\n", + "Score: 38.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a comprehensive knowledge base, your task is to provide precise, factual responses to the questions posed. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy, precision, and brevity are paramount.\",\"question_prefix\":\"Interrogative Query:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Succinct Rebuttal:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and understanding\"}\n", + "Score: 38.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a vast knowledge repository, your task is to provide precise, factual responses to the questions posed. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy, precision, and brevity are paramount. Consider the context of the question to provide the most relevant and accurate answer.\",\"question_prefix\":\"Interrogative Scrutiny:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Precise Rejoinder:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and contextual understanding\"}\n", + "Score: 38.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a vast knowledge repository, your task is to provide precise, factual responses to the questions posed. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy, precision, and brevity are paramount. Consider the context of the question to provide the most relevant and accurate answer.\",\"question_prefix\":\"Interrogative Scrutiny:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Precise Rejoinder:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and contextual understanding\"}\n", + "Score: 38.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a vast knowledge repository, your task is to provide precise, factual responses to the questions asked. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy, precision, and brevity are paramount. Consider the context and nuances of the question to provide the most relevant and accurate answer.\",\"question_prefix\":\"Interrogative Dissection:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Precise Elucidation:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and contextual understanding\"}\n", + "Score: 38.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a comprehensive knowledge repository, your task is to provide precise, factual responses to the inquiries posed. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy, precision, and brevity are paramount.\",\"question_prefix\":\"Interrogative Examination:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Succinct Rejoinder:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and understanding\"}\n", + "Score: 40.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a comprehensive knowledge repository, your task is to provide precise, factual responses to the inquiries posed. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy, precision, and brevity are paramount.\",\"question_prefix\":\"Interrogative Examination:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Succinct Rejoinder:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and understanding\"}\n", + "Score: 40.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a comprehensive knowledge base, your task is to provide precise, factual answers to the questions posed. Your responses should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a broad spectrum of topics. Accuracy, precision, and brevity are paramount.\",\"question_prefix\":\"Interrogative Assessment:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Concise Counterpart:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and understanding\"}\n", + "Score: 40.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a comprehensive knowledge repository, your task is to provide precise, factual responses to the questions posed. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy, precision, and brevity are paramount. Consider the context and nuances of the question to provide the most relevant and accurate answer.\",\"question_prefix\":\"Interrogative Dissection:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Precise Elucidation:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and contextual understanding\"}\n", + "Score: 42.0\n", + "\n", + "---\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a comprehensive knowledge repository, your task is to provide precise, factual responses to the questions posed. Your answers should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a wide array of topics. Accuracy, precision, and brevity are paramount. Consider the context and nuances of the question to provide the most relevant and accurate answer.\",\"question_prefix\":\"Interrogative Dissection:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Precise Elucidation:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and contextual understanding\"}\n", + "Score: 42.0\n", + "\n", + "---\n", + "\n", + "Analysis:\u001b[32m The instructions that scored higher were those that emphasized the AI's extensive knowledge base and its ability to provide precise, factual responses. They also stressed the importance of brevity, typically limiting responses to 1-5 words. The instructions that scored lower did not emphasize these points as strongly. The use of unique prefixes for questions and answers, such as \"Interrogative Dissection\" and \"Precise Elucidation\", seemed to contribute to higher scores as well.\n", + "\n", + "Proposed Signature: {\"instructions\":\"As an AI with a comprehensive knowledge base, your task is to provide precise, factual answers to the questions posed. Your responses should be succinct, typically consisting of 1-5 words, and demonstrate your extensive knowledge across a broad spectrum of topics. Accuracy, precision, and brevity are paramount. Consider the context and nuances of the question to provide the most relevant and accurate answer.\",\"question_prefix\":\"Interrogative Probe:\",\"question_desc\":\"${question}\",\"answer_prefix\":\"Concise Clarification:\",\"answer_desc\":\"a brief, factual answer demonstrating precision, brevity, and contextual understanding\"}\n", + "\n", + "Score: 44.0\u001b[0m\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "gpt4.inspect_history(n=1)" + ] } ], "metadata": { diff --git a/examples/quiz/DSPy_QuizGen_Cache b/examples/quiz/DSPy_QuizGen_Cache new file mode 160000 index 000000000..27d6d433e --- /dev/null +++ b/examples/quiz/DSPy_QuizGen_Cache @@ -0,0 +1 @@ +Subproject commit 27d6d433e73b91d3cf677ecf1d757813fcbd611d diff --git a/tests/functional/test_functional.py b/tests/functional/test_functional.py index 2289ce697..2d680d3b9 100644 --- a/tests/functional/test_functional.py +++ b/tests/functional/test_functional.py @@ -635,3 +635,20 @@ class GenericSignature(dspy.Signature, Generic[T]): dspy.settings.configure(lm=lm) assert predictor().output == 23 + + +def test_field_validator_in_signature(): + class ValidatedSignature(dspy.Signature): + a: str = dspy.OutputField() + + @pydantic.field_validator("a") + @classmethod + def space_in_a(cls, a: str) -> str: + if not " " in a: + raise ValueError("a must contain a space") + return a + + with pytest.raises(pydantic.ValidationError): + _ = ValidatedSignature(a="no-space") + + _ = ValidatedSignature(a="with space") diff --git a/tests/functional/test_signature_opt_typed.py b/tests/functional/test_signature_opt_typed.py index 44adb9d0e..85c8c8d56 100644 --- a/tests/functional/test_signature_opt_typed.py +++ b/tests/functional/test_signature_opt_typed.py @@ -1,12 +1,10 @@ -import json +from typing import Generic, TypeVar + +import pydantic import dspy from dspy.evaluate import Evaluate from dspy.functional import TypedPredictor -from dspy.teleprompt.signature_opt_typed import ( - GenerateSignature, - make_info, - optimize_signature, -) +from dspy.teleprompt.signature_opt_typed import optimize_signature, make_info from dspy.utils import DummyLM from dspy.evaluate import Evaluate @@ -14,11 +12,6 @@ from dspy.functional import TypedPredictor -class BasicQA(dspy.Signature): - question: str = dspy.InputField() - answer: str = dspy.OutputField() - - hotpotqa = [ ex.with_inputs("question") for ex in [ @@ -106,44 +99,11 @@ class BasicQA(dspy.Signature): ] -def old_test_signature_info(): - info = make_info(BasicQA) - SignatureInfo = type(info) - - devset = [ - dspy.Example( - instructions="Answer the following questions", - question_desc="Some question to answer", - question_prefix="Q: ", - answer_desc="A short answer to the question", - answer_prefix="A: ", - ), - ] - - lm = DummyLM( - [ - json.dumps(dict(devset[0])), # Proposed signature - ] - ) - dspy.settings.configure(lm=lm) - - generator = TypedPredictor(GenerateInstructionGivenAttempts[SignatureInfo]) - - res = generator(attempted_signatures=[ScoredSignature[SignatureInfo](signature=info, score=50)]) - assert res.proposed_signature == SignatureInfo(**devset[0]) - - # Test the "to_signature" method - - class OutputSignature(dspy.Signature): - """Answer the following questions""" - - question: str = dspy.InputField(desc="Some question to answer", prefix="Q: ") - answer: str = dspy.OutputField(desc="A short answer to the question", prefix="A: ") - - assert res.proposed_signature.to_signature().equals(OutputSignature) - - def test_opt(): + class BasicQA(dspy.Signature): + question: str = dspy.InputField() + answer: str = dspy.OutputField() + qa_model = DummyLM([]) prompt_model = DummyLM( [ @@ -154,7 +114,7 @@ def test_opt(): ) dspy.settings.configure(lm=qa_model) - program = optimize_signature( + result = optimize_signature( student=TypedPredictor(BasicQA), evaluator=Evaluate(devset=hotpotqa, metric=answer_exact_match, num_threads=1), initial_prompts=1, @@ -172,4 +132,62 @@ class ExpectedSignature(dspy.Signature): question: str = dspy.InputField(desc="$q", prefix="Q:") answer: str = dspy.OutputField(desc="$a", prefix="A:") - assert program.signature.equals(ExpectedSignature) + assert result.program.signature.equals(ExpectedSignature) + + assert result.scores == [0, 0] + + +def test_opt_composed(): + class MyModule(dspy.Module): + def __init__(self): + self.p1 = TypedPredictor("question:str -> considerations:list[str]", max_retries=1) + self.p2 = TypedPredictor("considerations:list[str] -> answer:str", max_retries=1) + + def forward(self, question): + considerations = self.p1(question=question).considerations + return self.p2(considerations=considerations) + + class ExpectedSignature1(dspy.Signature): + "I1" + + question: str = dspy.InputField(desc="$q", prefix="Q:") + considerations: list[str] = dspy.OutputField(desc="$c", prefix="C:") + + info1 = make_info(ExpectedSignature1) + + class ExpectedSignature2(dspy.Signature): + "I2" + + considerations: list[str] = dspy.InputField(desc="$c", prefix="C:") + answer: str = dspy.OutputField(desc="$a", prefix="A:") + + info2 = make_info(ExpectedSignature2) + + T = TypeVar("T") + + class OutputWrapper(pydantic.BaseModel, Generic[T]): + value: list[T] + + qa_model = DummyLM([]) + prompt_model = DummyLM( + [ + "some thoughts", + OutputWrapper[type(info1)](value=[info1]).model_dump_json(), + "some thoughts", + OutputWrapper[type(info2)](value=[info2]).model_dump_json(), + ] + ) + dspy.settings.configure(lm=qa_model) + + result = optimize_signature( + student=MyModule(), + evaluator=lambda x: 0, # We don't care about the evaluator here + initial_prompts=1, + n_iterations=2, + verbose=True, + prompt_model=prompt_model, + strategy="last", + ) + + assert result.program.p1.signature.equals(ExpectedSignature1) + assert result.program.p2.signature.equals(ExpectedSignature2)