Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
thomasahle committed Mar 15, 2024
1 parent 96ee792 commit 4244c78
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 36 deletions.
8 changes: 4 additions & 4 deletions dspy/teleprompt/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,13 @@ def _prepare_predictor_mappings(self):
student, teacher = self.student, self.teacher

assert len(student.predictors()) == len(
teacher.predictors()
teacher.predictors(),
), "Student and teacher must have the same number of predictors."

for (name1, predictor1), (name2, predictor2) in zip(student.named_predictors(), teacher.named_predictors()):
assert name1 == name2, "Student and teacher must have the same program structure."
assert predictor1.signature.equals(
predictor2.signature
predictor2.signature,
), f"Student and teacher must have the same signatures. {type(predictor1.signature)} != {type(predictor2.signature)}"
assert id(predictor1) != id(predictor2), "Student and teacher must be different objects."

Expand Down Expand Up @@ -195,11 +195,11 @@ def _bootstrap_one_example(self, example, round_idx=0):
# TODO: Look closer into this. It's a bit tricky to reproduce.
print(f"Failed to find predictor {predictor} in {self.predictor2name}.")
print(
"Are you doing this in a notebook (Jupyter)? This might be caused by redefining values by rerunning cells."
"Are you doing this in a notebook (Jupyter)? This might be caused by redefining values by rerunning cells.",
)
print("Try restarting the notebook, or open an issue.")
raise KeyError(
f"Failed to find predictor {id(predictor)} {predictor} in {self.predictor2name}."
f"Failed to find predictor {id(predictor)} {predictor} in {self.predictor2name}.",
) from e

name2traces[predictor_name].append(demo)
Expand Down
28 changes: 18 additions & 10 deletions dspy/teleprompt/copro_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class BasicGenerateInstruction(Signature):
basic_instruction = dspy.InputField(desc="The initial instructions before optimization")
proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
proposed_prefix_for_output_field = dspy.OutputField(
desc="The string at the end of the prompt, which will help the model start solving the task"
desc="The string at the end of the prompt, which will help the model start solving the task",
)


Expand All @@ -51,7 +51,7 @@ class GenerateInstructionGivenAttempts(dspy.Signature):
attempted_instructions = dspy.InputField(format=dsp.passages2text)
proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
proposed_prefix_for_output_field = dspy.OutputField(
desc="The string at the end of the prompt, which will help the model start solving the task"
desc="The string at the end of the prompt, which will help the model start solving the task",
)


Expand Down Expand Up @@ -153,11 +153,15 @@ def compile(self, student, *, trainset, eval_kwargs):
if self.prompt_model:
with dspy.settings.context(lm=self.prompt_model):
instruct = dspy.Predict(
BasicGenerateInstruction, n=self.breadth - 1, temperature=self.init_temperature
BasicGenerateInstruction,
n=self.breadth - 1,
temperature=self.init_temperature,
)(basic_instruction=basic_instruction)
else:
instruct = dspy.Predict(
BasicGenerateInstruction, n=self.breadth - 1, temperature=self.init_temperature
BasicGenerateInstruction,
n=self.breadth - 1,
temperature=self.init_temperature,
)(basic_instruction=basic_instruction)
# Add in our initial prompt as a candidate as well
instruct.completions.proposed_instruction.append(basic_instruction)
Expand All @@ -175,7 +179,7 @@ def compile(self, student, *, trainset, eval_kwargs):

# For each iteration in depth...
for d in range(
self.depth
self.depth,
): # TODO: fix this so that we eval the new batch of predictors with the new best followoing predictors
print(f"Iteration Depth: {d+1}/{self.depth}.")

Expand Down Expand Up @@ -214,7 +218,7 @@ def compile(self, student, *, trainset, eval_kwargs):
print(f"Predictor {i+1}")
self._print_signature(predictor)
print(
f"At Depth {d+1}/{self.depth}, Evaluating Prompt Candidate #{c_i+1}/{len(candidates_)} for Predictor {p_i+1} of {len(module.predictors())}."
f"At Depth {d+1}/{self.depth}, Evaluating Prompt Candidate #{c_i+1}/{len(candidates_)} for Predictor {p_i+1} of {len(module.predictors())}.",
)
score = evaluate(module_clone, devset=trainset, **eval_kwargs)
if self.verbose and self.prompt_model:
Expand Down Expand Up @@ -264,7 +268,7 @@ def compile(self, student, *, trainset, eval_kwargs):
self._set_signature(p_new, updated_signature)
if self.verbose:
print(
f"Updating Predictor {id(p_old)} to:\ni: {best_candidate['instruction']}\np: {best_candidate['prefix']}"
f"Updating Predictor {id(p_old)} to:\ni: {best_candidate['instruction']}\np: {best_candidate['prefix']}",
)
if self.verbose:
print("Full predictor with update: ")
Expand Down Expand Up @@ -305,11 +309,15 @@ def compile(self, student, *, trainset, eval_kwargs):
if self.prompt_model:
with dspy.settings.context(lm=self.prompt_model):
instr = dspy.Predict(
GenerateInstructionGivenAttempts, n=self.breadth, temperature=self.init_temperature
GenerateInstructionGivenAttempts,
n=self.breadth,
temperature=self.init_temperature,
)(attempted_instructions=attempts)
else:
instr = dspy.Predict(
GenerateInstructionGivenAttempts, n=self.breadth, temperature=self.init_temperature
GenerateInstructionGivenAttempts,
n=self.breadth,
temperature=self.init_temperature,
)(attempted_instructions=attempts)

if self.verbose and self.prompt_model:
Expand All @@ -318,7 +326,7 @@ def compile(self, student, *, trainset, eval_kwargs):
new_candidates[id(p_base)] = instr.completions
all_candidates[id(p_base)].proposed_instruction.extend(instr.completions.proposed_instruction)
all_candidates[id(p_base)].proposed_prefix_for_output_field.extend(
instr.completions.proposed_prefix_for_output_field
instr.completions.proposed_prefix_for_output_field,
)

if self.verbose and self.prompt_model:
Expand Down
4 changes: 2 additions & 2 deletions dspy/teleprompt/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def compile(
if teacher is None:
print(
"WARNING: Using a vanilla teacher. "
"Are you sure you want to use BootstrapFinetune without a compiled teacher?"
"Are you sure you want to use BootstrapFinetune without a compiled teacher?",
)

teachers = teacher if isinstance(teacher, list) else [teacher]
Expand Down Expand Up @@ -136,7 +136,7 @@ def compile(

compiler_config = {
"save": "".join(
random.Random(time.time()).choices(string.ascii_uppercase + string.digits, k=13)
random.Random(time.time()).choices(string.ascii_uppercase + string.digits, k=13),
), # https://stackoverflow.com/a/2257449/1493011
"peft": peft,
"fp16": False,
Expand Down
5 changes: 4 additions & 1 deletion dspy/teleprompt/knn_fewshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ def forward_pass(*args, **kwargs):
knn_trainset = self.KNN(**kwargs)
few_shot_bootstrap = BootstrapFewShot()
compiled_program = few_shot_bootstrap.compile(
student, teacher=teacher, trainset=knn_trainset, valset=valset
student,
teacher=teacher,
trainset=knn_trainset,
valset=valset,
)
return compiled_program(**kwargs)

Expand Down
40 changes: 25 additions & 15 deletions dspy/teleprompt/mipro_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class BasicGenerateInstruction(Signature):
basic_instruction = dspy.InputField(desc="The initial instructions before optimization")
proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
proposed_prefix_for_output_field = dspy.OutputField(
desc="The string at the end of the prompt, which will help the model start solving the task"
desc="The string at the end of the prompt, which will help the model start solving the task",
)


Expand All @@ -62,7 +62,7 @@ class BasicGenerateInstructionWithDataObservations(Signature):
observations = dspy.InputField(desc="Observations about the dataset and task")
proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
proposed_prefix_for_output_field = dspy.OutputField(
desc="The string at the end of the prompt, which will help the model start solving the task"
desc="The string at the end of the prompt, which will help the model start solving the task",
)


Expand All @@ -77,7 +77,7 @@ class BasicGenerateInstructionWithExamples(dspy.Signature):
examples = dspy.InputField(format=dsp.passages2text, desc="Example(s) of the task")
proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
proposed_prefix_for_output_field = dspy.OutputField(
desc="The string at the end of the prompt, which will help the model start solving the task"
desc="The string at the end of the prompt, which will help the model start solving the task",
)


Expand All @@ -91,7 +91,7 @@ class BasicGenerateInstructionWithExamplesAndDataObservations(dspy.Signature):
basic_instruction = dspy.InputField(desc="The initial instructions before optimization")
proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
proposed_prefix_for_output_field = dspy.OutputField(
desc="The string at the end of the prompt, which will help the model start solving the task"
desc="The string at the end of the prompt, which will help the model start solving the task",
)


Expand All @@ -100,7 +100,7 @@ class ObservationSummarizer(dspy.Signature):

observations = dspy.InputField(desc="Observations I have made about my dataset")
summary = dspy.OutputField(
desc="Two to Three sentence summary of only the most significant highlights of my observations"
desc="Two to Three sentence summary of only the most significant highlights of my observations",
)


Expand All @@ -126,7 +126,7 @@ class DatasetDescriptorWithPriorObservations(dspy.Signature):
examples = dspy.InputField(desc="Sample data points from the dataset")
prior_observations = dspy.InputField(desc="Some prior observations I made about the data")
observations = dspy.OutputField(
desc="Somethings that holds true for most or all of the data you observed or COMPLETE if you have nothing to add"
desc="Somethings that holds true for most or all of the data you observed or COMPLETE if you have nothing to add",
)


Expand Down Expand Up @@ -180,7 +180,8 @@ def _observe_data(self, trainset, max_iterations=10):
for b in range(self.view_data_batch_size, len(trainset), self.view_data_batch_size):
upper_lim = min(len(trainset), b + self.view_data_batch_size)
output = dspy.Predict(DatasetDescriptorWithPriorObservations, n=1, temperature=1.0)(
prior_observations=observations, examples=(trainset[b:upper_lim].__repr__())
prior_observations=observations,
examples=(trainset[b:upper_lim].__repr__()),
)
iterations += 1
if len(output["observations"]) >= 8 and output["observations"][:8].upper() == "COMPLETE":
Expand Down Expand Up @@ -293,15 +294,17 @@ def _generate_first_N_candidates( # noqa: N802
instruct = new_instruct
else:
instruct.completions.proposed_instruction.extend(
new_instruct.completions.proposed_instruction
new_instruct.completions.proposed_instruction,
)
instruct.completions.proposed_prefix_for_output_field.extend(
new_instruct.completions.proposed_prefix_for_output_field
new_instruct.completions.proposed_prefix_for_output_field,
)
# Just data
elif view_data:
instruct = dspy.Predict(
BasicGenerateInstructionWithDataObservations, n=N - 1, temperature=self.init_temperature
BasicGenerateInstructionWithDataObservations,
n=N - 1,
temperature=self.init_temperature,
)(basic_instruction=basic_instruction, observations=self.observations)
# Just examples
elif view_examples:
Expand All @@ -327,7 +330,7 @@ def _generate_first_N_candidates( # noqa: N802
# Neither
else:
instruct = dspy.Predict(BasicGenerateInstruction, n=N - 1, temperature=self.init_temperature)(
basic_instruction=basic_instruction
basic_instruction=basic_instruction,
)

# Add in our initial prompt as a candidate as well
Expand Down Expand Up @@ -365,7 +368,7 @@ def compile(

estimated_task_model_calls_wo_module_calls = len(trainset) * num_trials # M * T * P
estimated_prompt_model_calls = 10 + self.num_candidates * len(
student.predictors()
student.predictors(),
) # num data summary calls + N * P

user_message = textwrap.dedent(f"""\
Expand Down Expand Up @@ -454,7 +457,12 @@ def compile(

# Generate N candidate prompts
instruction_candidates, _ = self._generate_first_N_candidates(
module, self.num_candidates, view_data, view_examples, demo_candidates, trainset
module,
self.num_candidates,
view_data,
view_examples,
demo_candidates,
trainset,
)

# Reset demo_candidates to None for our optimization if the user asked for no fewshot examples
Expand Down Expand Up @@ -486,11 +494,13 @@ def objective(trial):

# Suggest the index of the instruction candidate to use in our trial
instruction_idx = trial.suggest_categorical(
f"{id(p_old)}_predictor_instruction", range(len(p_instruction_candidates))
f"{id(p_old)}_predictor_instruction",
range(len(p_instruction_candidates)),
)
if demo_candidates:
demos_idx = trial.suggest_categorical(
f"{id(p_old)}_predictor_demos", range(len(p_demo_candidates))
f"{id(p_old)}_predictor_demos",
range(len(p_demo_candidates)),
)
trial_logs[trial_num][f"{id(p_old)}_predictor_instruction"] = instruction_idx
if demo_candidates:
Expand Down
2 changes: 1 addition & 1 deletion dspy/teleprompt/signature_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(
track_stats=False,
):
print(
"\u001b[31m[WARNING] SignatureOptimizer has been deprecated and replaced with COPRO. SignatureOptimizer will be removed in a future release. \u001b[31m"
"\u001b[31m[WARNING] SignatureOptimizer has been deprecated and replaced with COPRO. SignatureOptimizer will be removed in a future release. \u001b[31m",
)
super().__init__(prompt_model, metric, breadth, depth, init_temperature, verbose, track_stats)

Expand Down
2 changes: 1 addition & 1 deletion dspy/teleprompt/signature_opt_bayesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(
view_data_batch_size=10,
):
print(
"\u001b[31m[WARNING] BayesianSignatureOptimizer has been deprecated and replaced with MIPRO. BayesianSignatureOptimizer will be removed in a future release. \u001b[31m"
"\u001b[31m[WARNING] BayesianSignatureOptimizer has been deprecated and replaced with MIPRO. BayesianSignatureOptimizer will be removed in a future release. \u001b[31m",
)

super().__init__(
Expand Down
4 changes: 2 additions & 2 deletions dspy/teleprompt/teleprompt_optuna.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __init__(
def objective(self, trial):
program2 = self.student.reset_copy()
for (name, compiled_predictor), (_, program2_predictor) in zip(
self.compiled_teleprompter.named_predictors(), program2.named_predictors()
self.compiled_teleprompter.named_predictors(), program2.named_predictors(),
):
all_demos = compiled_predictor.demos
demo_index = trial.suggest_int(f"demo_index_for_{name}", 0, len(all_demos) - 1)
Expand Down Expand Up @@ -69,7 +69,7 @@ def compile(self, student, *, teacher=None, max_demos, trainset, valset=None):
max_rounds=self.max_rounds,
)
self.compiled_teleprompter = teleprompter_optimize.compile(
self.student, teacher=self.teacher, trainset=self.trainset
self.student, teacher=self.teacher, trainset=self.trainset,
)
study = optuna.create_study(direction="maximize")
study.optimize(self.objective, n_trials=self.num_candidate_sets)
Expand Down

0 comments on commit 4244c78

Please sign in to comment.