Skip to content

Commit

Permalink
Force the optimizer to not repeat signatures
Browse files Browse the repository at this point in the history
  • Loading branch information
thomasahle committed Mar 6, 2024
1 parent a82862a commit 1e638a1
Show file tree
Hide file tree
Showing 6 changed files with 343 additions and 1,678 deletions.
9 changes: 5 additions & 4 deletions dspy/signatures/signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ def __call__(cls, *args, **kwargs): # noqa: ANN002
def __new__(mcs, signature_name, bases, namespace, **kwargs): # noqa: N804
# Set `str` as the default type for all fields
raw_annotations = namespace.get("__annotations__", {})
for name, _field in namespace.items():
for name, field in namespace.items():
if not isinstance(field, FieldInfo):
continue # Don't add types to non-field attributes
if not name.startswith("__") and name not in raw_annotations:
raw_annotations[name] = str
namespace["__annotations__"] = raw_annotations
Expand Down Expand Up @@ -272,9 +274,8 @@ def make_signature(


def _parse_signature(signature: str) -> Tuple[Type, Field]:
pattern = r"^\s*[\w\s,:]+\s*->\s*[\w\s,:]+\s*$"
if not re.match(pattern, signature):
raise ValueError(f"Invalid signature format: '{signature}'")
if signature.count("->") != 1:
raise ValueError(f"Invalid signature format: '{signature}', must contain exactly one '->'.")

fields = {}
inputs_str, outputs_str = map(str.strip, signature.split("->"))
Expand Down
41 changes: 28 additions & 13 deletions dspy/teleprompt/signature_opt_typed.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,20 +100,32 @@ class GenerateInstructionInitial(Signature, Generic[T]):
return GenerateInstructionInitial


class GenerateSignature(dspy.Signature, Generic[T]):
__doc__ = textwrap.dedent("""\
You are an instruction optimizer for large language models.
def generate_with_avoidance(signatures_to_avoid: list[BaseModel]) -> type[Signature]:
class GenerateSignature(dspy.Signature, Generic[T]):
__doc__ = textwrap.dedent("""\
You are an instruction optimizer for large language models.
I will give some task instructions I've tried, along with their corresponding validation scores.
- The instructions are arranged in order based on their scores, where higher scores indicate better quality.
- Your task is to propose a new instruction that will lead a good language model to perform the task even better.
- Be creative, and think out of the box.
- Don't repeat instructions, descriptions and prefixes that have already been attempted.
""")

I will give some task instructions I've tried, along with their corresponding validation scores.
- The instructions are arranged in order based on their scores, where higher scores indicate better quality.
- Your task is to propose a new instruction that will lead a good language model to perform the task even better.
- Be creative, and think out of the box.
- Don't repeat instructions, descriptions and prefixes that have already been attempted.
""")
analysis: str = OutputField(desc="Consider what made the previous instructions good or bad.")
proposed_signature: T = OutputField(desc="A signature that will likely lead to a high score.")
score: float = OutputField(
desc="The expected score for the new signature. Don't write anything after this number."
)

@pydantic.field_validator("proposed_signature")
@classmethod
def check_signature_not_attempted(cls, s: T) -> T:
if s in signatures_to_avoid:
raise ValueError("Never propose a signature already in the list above.")
return s

analysis: str = OutputField(desc="Consider what made the previous instructions good or bad.")
proposed_signature: T = OutputField(desc="A signature that will likely lead to a high score.")
score: float = OutputField(desc="The expected score for the new signature. Don't write anything after this number.")
return GenerateSignature


@dataclass
Expand Down Expand Up @@ -233,7 +245,6 @@ def optimize_signature(
# TODO: Parallelize this
for name, _p in named_predictors:
SignatureInfo = type(candidates[name][0]) # noqa: N806
generator = TypedPredictor(GenerateSignature[SignatureInfo])

demos = [dspy.Example(proposed_signature=info, score=sc) for info, sc in zip(candidates[name], scores)]
if sorted_order == "chronological":
Expand All @@ -246,6 +257,10 @@ def optimize_signature(
demos = demos[:max_examples]
else:
raise ValueError(f"Invalid sorted_order: {sorted_order}")

# We can only tell the LM to avoid the signatures we are actually giving it as demos.
avoid = [ex.proposed_signature for ex in demos]
generator = TypedPredictor(generate_with_avoidance(avoid)[SignatureInfo])
generator.predictor.demos = demos

if verbose:
Expand Down
Loading

0 comments on commit 1e638a1

Please sign in to comment.