forked from stanfordnlp/dspy
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
folder structure change, example driven optimization, mix of lm gener…
…ation, teacher based output gen.
- Loading branch information
1 parent
cab9eb4
commit 56d4378
Showing
6 changed files
with
184 additions
and
88 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .synthesizer import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import dspy | ||
|
||
from typing import Union, List, Optional | ||
from pydantic import BaseModel, field_validator | ||
|
||
class SynthesizerArguments(BaseModel): | ||
# [TODO] | ||
feedback_mode: Optional[str] = None | ||
num_example_for_feedback: Optional[int] = None | ||
|
||
input_lm_model: Optional[dspy.LM] = None | ||
output_lm_model: Optional[dspy.LM] = None | ||
output_teacher_module: Optional[Union[dspy.Module, dspy.Predict]] = None | ||
|
||
num_example_for_optim: Optional[int] = None | ||
|
||
@field_validator(fields=["feedback_mode", "num_example_for_feedback"]) | ||
def validate_feedback_mode(cls, value): | ||
if value and value not in ["human", "llm"]: | ||
raise ValueError("Feedback mode should be either 'human' or 'llm'.") | ||
|
||
if value and not cls.num_example_for_feedback: | ||
raise ValueError("Number of examples for feedback is required when feedback mode is provided.") | ||
|
||
return value |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
INPUT_GENERATION_TASK_WITH_EXAMPLES = f"""Create synthetic data using the task description and the provided knowledge seed. Your task is to generate diverse and imaginative data that aligns with the given task description and knowledge seed. You are encouraged to be creative and not limit yourself, allowing for a wide range of synthetic data that reflects the characteristics and details provided in the task description. The data should be unique and varied, showcasing originality and creativity while maintaining relevance to the task and knowledge seed. | ||
Additionally I'll be providing you some data I generated before hand, make sure the data you generate if consistent with task I provided but different from the data I provided in every way possible.""" | ||
|
||
INPUT_GENERATION_TASK_WITH_FEEDBACK = f"""""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import dspy | ||
|
||
from .utils import format_examples | ||
|
||
class UnderstandTask(dspy.Signature): | ||
"""I'll be providing you a task description, your task is to prepare a concise, comprehensible summary that captures the broad essence and purpose of the task this description aim to address. Your summary should illuminate the general objective and the type of problem being solved, offering a clear picture of what the task entails at a high level. Avoid getting into the nuances of individual datapoints, specifics about models, examples, algorithms, or any intricate technicalities. Your explanation should serve to clarify the task's overall goal and its basic premise, without touching on methodologies or solutions.""" | ||
|
||
task_description = dspy.InputField( | ||
prefix="Task Description:", | ||
desc="Description of the task.", | ||
) | ||
explanation = dspy.OutputField( | ||
prefix="Task Description:", | ||
desc="Explanation of the task.", | ||
) | ||
|
||
class ExplainTask(dspy.Signature): | ||
"""Analyze the provided set of datapoints carefully, and prepare a concise, comprehensible summary that captures the broad essence and purpose of the task these datapoints aim to address. Your summary should illuminate the general objective and the type of problem being solved, offering a clear picture of what the task entails at a high level. Avoid getting into the nuances of individual datapoints, specifics about models, examples, algorithms, or any intricate technicalities. Your explanation should serve to clarify the task's overall goal and its basic premise, without touching on methodologies or solutions.""" | ||
|
||
examples = dspy.InputField( | ||
prefix="Examples Datapoints:-", | ||
desc="List of datapoints to analyze and explain the task.", | ||
format=format_examples, | ||
) | ||
explanation = dspy.OutputField( | ||
prefix="Task Description:", | ||
desc="Explanation of the task.", | ||
) | ||
|
||
class GenerateFieldDescription(dspy.Signature): | ||
"""Generate a concise and informative description for a given field based on the provided name and task description. This description should be no longer than 10 words and should be in simple english.""" | ||
|
||
task_description = dspy.InputField( | ||
prefix="Task Description:", | ||
desc="Description of the task the field is an input to.", | ||
) | ||
field_name = dspy.InputField( | ||
prefix="Field Name:", | ||
desc="Name of the field to generate synthetic data for.", | ||
) | ||
field_description = dspy.OutputField( | ||
prefix="Field Description:", | ||
desc="Description of the field.", | ||
) | ||
|
||
class GenerateInputFieldsData(dspy.Signature): | ||
"""Create synthetic data using the task description and the provided knowledge seed. Your task is to generate diverse and imaginative data that aligns with the given task description and knowledge seed. You are encouraged to be creative and not limit yourself, allowing for a wide range of synthetic data that reflects the characteristics and details provided in the task description. The data should be unique and varied, showcasing originality and creativity while maintaining relevance to the task and knowledge seed. | ||
A knowledge seed is the index of the knowledge base you have, each index represents a different knowledge base.""" | ||
|
||
knowledge_seed = dspy.InputField( | ||
prefix="Knowledge Seed:", | ||
desc="Seed for the knowledge base search to base the inputs around.", | ||
format=lambda x: str(x), | ||
) | ||
task_description = dspy.InputField( | ||
prefix="Task Description:", | ||
desc="Description of the task the field is an input to.", | ||
) | ||
|
||
class GenerateOutputFieldsData(dspy.Signature): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import dspy | ||
from typing import List | ||
|
||
def format_examples(examples: List[dspy.Example]) -> str: | ||
if isinstance(examples, str): | ||
return examples | ||
|
||
formatted_example = "" | ||
|
||
for example in examples: | ||
input_keys = example.inputs().keys() | ||
label_keys = example.labels().keys() | ||
|
||
formatted_example += "Inputs:\n" | ||
for key in input_keys: | ||
formatted_example += f"{key}: {example[key]}\n" | ||
|
||
formatted_example += "Outputs:\n" | ||
for key in label_keys: | ||
formatted_example += f"{key}: {example[key]}\n" | ||
|
||
return formatted_example |