Skip to content

Commit

Permalink
adjust to some recent changes
Browse files Browse the repository at this point in the history
  • Loading branch information
usaito committed Feb 21, 2021
1 parent a3d171b commit a341835
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 339 deletions.
26 changes: 16 additions & 10 deletions examples/multiclass/evaluate_off_policy_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@
SelfNormalizedInverseProbabilityWeighting(),
DoublyRobust(),
SelfNormalizedDoublyRobust(),
SwitchDoublyRobust(tau=1, estimator_name="switch-dr (tau=1)"),
SwitchDoublyRobust(tau=100, estimator_name="switch-dr (tau=100)"),
DoublyRobustWithShrinkage(lambda_=1, estimator_name="dr-os (lambda=1)"),
DoublyRobustWithShrinkage(lambda_=100, estimator_name="dr-os (lambda=100)"),
SwitchDoublyRobust(tau=1., estimator_name="switch-dr (tau=1)"),
SwitchDoublyRobust(tau=100., estimator_name="switch-dr (tau=100)"),
DoublyRobustWithShrinkage(lambda_=1., estimator_name="dr-os (lambda=1)"),
DoublyRobustWithShrinkage(lambda_=100., estimator_name="dr-os (lambda=100)"),
]

if __name__ == "__main__":
Expand Down Expand Up @@ -175,7 +175,8 @@ def process(i: int):
)
# evaluate estimators' performances using relative estimation error (relative-ee)
ope = OffPolicyEvaluation(
bandit_feedback=bandit_feedback, ope_estimators=ope_estimators,
bandit_feedback=bandit_feedback,
ope_estimators=ope_estimators,
)
relative_ee_i = ope.evaluate_performance_of_estimators(
ground_truth_policy_value=ground_truth_policy_value,
Expand All @@ -185,12 +186,17 @@ def process(i: int):

return relative_ee_i

processed = Parallel(backend="multiprocessing", n_jobs=n_jobs, verbose=50,)(
[delayed(process)(i) for i in np.arange(n_runs)]
)
processed = Parallel(
backend="multiprocessing",
n_jobs=n_jobs,
verbose=50,
)([delayed(process)(i) for i in np.arange(n_runs)])
relative_ee_dict = {est.estimator_name: dict() for est in ope_estimators}
for i, relative_ee_i in enumerate(processed):
for (estimator_name, relative_ee_,) in relative_ee_i.items():
for (
estimator_name,
relative_ee_,
) in relative_ee_i.items():
relative_ee_dict[estimator_name][i] = relative_ee_
relative_ee_df = DataFrame(relative_ee_dict).describe().T.round(6)

Expand All @@ -201,6 +207,6 @@ def process(i: int):
print("=" * 45)

# save results of the evaluation of off-policy estimators in './logs' directory.
log_path = Path("./logs")
log_path = Path(f"./logs/{dataset_name}")
log_path.mkdir(exist_ok=True, parents=True)
relative_ee_df.to_csv(log_path / "relative_ee_of_ope_estimators.csv")
95 changes: 28 additions & 67 deletions examples/quickstart/multiclass.ipynb

Large diffs are not rendered by default.

38 changes: 12 additions & 26 deletions examples/quickstart/obd.ipynb

Large diffs are not rendered by default.

242 changes: 61 additions & 181 deletions examples/quickstart/online.ipynb

Large diffs are not rendered by default.

70 changes: 39 additions & 31 deletions examples/quickstart/synthetic.ipynb

Large diffs are not rendered by default.

50 changes: 26 additions & 24 deletions examples/synthetic/evaluate_off_policy_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@
SelfNormalizedInverseProbabilityWeighting(),
DoublyRobust(),
SelfNormalizedDoublyRobust(),
SwitchDoublyRobust(tau=1, estimator_name="switch-dr (tau=1)"),
SwitchDoublyRobust(tau=100, estimator_name="switch-dr (tau=100)"),
DoublyRobustWithShrinkage(lambda_=1, estimator_name="dr-os (lambda=1)"),
DoublyRobustWithShrinkage(lambda_=100, estimator_name="dr-os (lambda=100)"),
SwitchDoublyRobust(tau=1., estimator_name="switch-dr (tau=1)"),
SwitchDoublyRobust(tau=100., estimator_name="switch-dr (tau=100)"),
DoublyRobustWithShrinkage(lambda_=1., estimator_name="dr-os (lambda=1)"),
DoublyRobustWithShrinkage(lambda_=100., estimator_name="dr-os (lambda=100)"),
]

if __name__ == "__main__":
Expand Down Expand Up @@ -119,15 +119,15 @@
behavior_policy_function=linear_behavior_policy,
random_state=random_state,
)
# define evaluation policy using IPWLearner
evaluation_policy = IPWLearner(
n_actions=dataset.n_actions,
base_classifier=base_model_dict[base_model_for_evaluation_policy](
**hyperparams[base_model_for_evaluation_policy]
),
)

def process(i: int):
# define evaluation policy using IPWLearner
evaluation_policy = IPWLearner(
n_actions=dataset.n_actions,
base_classifier=base_model_dict[base_model_for_evaluation_policy](
**hyperparams[base_model_for_evaluation_policy]
),
)
# sample new training and test sets of synthetic logged bandit feedback
bandit_feedback_train = dataset.obtain_batch_bandit_feedback(n_rounds=n_rounds)
bandit_feedback_test = dataset.obtain_batch_bandit_feedback(n_rounds=n_rounds)
Expand All @@ -142,13 +142,6 @@ def process(i: int):
action_dist = evaluation_policy.predict(
context=bandit_feedback_test["context"],
)
# estimate the ground-truth policy values of the evaluation policy
# using the full expected reward contained in the test set of synthetic bandit feedback
ground_truth_policy_value = np.average(
bandit_feedback_test["expected_reward"],
weights=action_dist[:, :, 0],
axis=1,
).mean()
# estimate the mean reward function of the test set of synthetic bandit feedback with ML model
regression_model = RegressionModel(
n_actions=dataset.n_actions,
Expand All @@ -166,22 +159,31 @@ def process(i: int):
)
# evaluate estimators' performances using relative estimation error (relative-ee)
ope = OffPolicyEvaluation(
bandit_feedback=bandit_feedback_test, ope_estimators=ope_estimators,
bandit_feedback=bandit_feedback_test,
ope_estimators=ope_estimators,
)
relative_ee_i = ope.evaluate_performance_of_estimators(
ground_truth_policy_value=ground_truth_policy_value,
ground_truth_policy_value=dataset.calc_ground_truth_policy_value(
expected_reward=bandit_feedback_test["expected_reward"],
action_dist=action_dist,
),
action_dist=action_dist,
estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
)

return relative_ee_i

processed = Parallel(backend="multiprocessing", n_jobs=n_jobs, verbose=50,)(
[delayed(process)(i) for i in np.arange(n_runs)]
)
processed = Parallel(
backend="multiprocessing",
n_jobs=n_jobs,
verbose=50,
)([delayed(process)(i) for i in np.arange(n_runs)])
relative_ee_dict = {est.estimator_name: dict() for est in ope_estimators}
for i, relative_ee_i in enumerate(processed):
for (estimator_name, relative_ee_,) in relative_ee_i.items():
for (
estimator_name,
relative_ee_,
) in relative_ee_i.items():
relative_ee_dict[estimator_name][i] = relative_ee_
relative_ee_df = DataFrame(relative_ee_dict).describe().T.round(6)

Expand Down

0 comments on commit a341835

Please sign in to comment.