Skip to content

Commit

Permalink
Merge pull request st-tech#169 from st-tech/feature/fix-meta
Browse files Browse the repository at this point in the history
Fix meta
  • Loading branch information
usaito authored Apr 28, 2022
2 parents da4c7c9 + dbcd54e commit 44e7412
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 11 deletions.
13 changes: 8 additions & 5 deletions obp/dataset/synthetic_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class SyntheticBanditDatasetWithActionEmbeds(SyntheticBanditDataset):
reward_type: str, default='binary'
Whether the rewards are 'binary' or 'continuous'.
When 'binary', rewards are sampled from the Bernoulli distribution.
When 'continuous', rewards are sampled from the truncated Normal distribution with `scale=1`.
When 'continuous', rewards are sampled from the Normal distribution.
The mean parameter of the reward distribution is determined by the `reward_function` specified by the next argument.
reward_function: Callable[[np.ndarray, np.ndarray], np.ndarray]], default=None
Expand Down Expand Up @@ -73,7 +73,7 @@ class SyntheticBanditDatasetWithActionEmbeds(SyntheticBanditDataset):
while a negative value leads to a sub-optimal behavior policy.
n_cat_per_dim: int, default=10
Number of categories (cardinality) per category dimension.
Number of categories (cardinality) per each category dimension.
latent_param_mat_dim: int, default=5
Number of dimensions of the latent parameter matrix to define the expected rewards.
Expand All @@ -85,7 +85,8 @@ class SyntheticBanditDatasetWithActionEmbeds(SyntheticBanditDataset):
p_e_a_param_std: int or float, default=1.0
Standard deviation of the normal distribution to sample the parameters of the action embedding distribution.
A large value generates a near-deterministic embedding distribution, while a small value generates a near-uniform embedding distribution.
A large value generates a near-deterministic embedding distribution,
while a small value generates a near-uniform embedding distribution.
n_unobserved_cat_dim: int, default=0
Number of unobserved category dimensions.
Expand Down Expand Up @@ -333,7 +334,7 @@ def obtain_batch_bandit_feedback(self, n_rounds: int) -> BanditFeedback:
n_rounds=n_rounds,
n_actions=self.n_actions,
action_context=self.action_context_reg[
:, 1:
:, self.n_unobserved_cat_dim :
], # action context used for training a reg model
action_embed=action_embed[
:, self.n_unobserved_cat_dim :
Expand All @@ -344,7 +345,9 @@ def obtain_batch_bandit_feedback(self, n_rounds: int) -> BanditFeedback:
reward=rewards,
expected_reward=q_x_a,
q_x_e=q_x_e[:, :, self.n_unobserved_cat_dim :],
p_e_a=self.p_e_a[:, :, self.n_unobserved_cat_dim :],
p_e_a=self.p_e_a[
:, :, self.n_unobserved_cat_dim :
], # true probability distribution of the action embeddings
pi_b=pi_b[:, :, np.newaxis],
pscore=pi_b[np.arange(n_rounds), actions],
)
3 changes: 2 additions & 1 deletion obp/ope/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,8 @@ def estimate_student_t_lower_bound(x: np.ndarray, delta: float = 0.05) -> float:
check_scalar(delta, "delta", (int, float), min_val=0.0, max_val=1.0)

n = x.shape[0]
ci = sqrt(var(x) / (n - 1)) * stats.t(n - 1).ppf(1.0 - delta)
ci = sqrt(var(x) / (n - 1))
ci *= stats.t(n - 1).ppf(1.0 - delta)
lower_bound_estimate = x.mean() - ci

return lower_bound_estimate
26 changes: 21 additions & 5 deletions obp/ope/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,15 +149,26 @@ def _create_estimator_inputs(
pass
elif isinstance(value_or_dict, dict):
for estimator_name, value in value_or_dict.items():
expected_dim = 1
if var_name in ["p_e_a", "pi_b"]:
expected_dim = 3
elif var_name in ["action_embed"]:
expected_dim = 2
check_array(
array=value,
name=f"{var_name}[{estimator_name}]",
expected_dim=1,
expected_dim=expected_dim,
)
if value.shape[0] != action_dist.shape[0]:
raise ValueError(
f"Expected `{var_name}[{estimator_name}].shape[0] == action_dist.shape[0]`, but found it False"
)
if var_name != "p_e_a":
if value.shape[0] != action_dist.shape[0]:
raise ValueError(
f"Expected `{var_name}[{estimator_name}].shape[0] == action_dist.shape[0]`, but found it False"
)
else:
if value.shape[0] != action_dist.shape[1]:
raise ValueError(
f"Expected `{var_name}[{estimator_name}].shape[0] == action_dist.shape[1]`, but found it False"
)
else:
expected_dim = 1
if var_name in ["p_e_a", "pi_b"]:
Expand All @@ -172,6 +183,11 @@ def _create_estimator_inputs(
raise ValueError(
f"Expected `{var_name}.shape[0] == action_dist.shape[0]`, but found it False"
)
else:
if value.shape[0] != action_dist.shape[1]:
raise ValueError(
f"Expected `{var_name}[{estimator_name}].shape[0] == action_dist.shape[1]`, but found it False"
)

estimator_inputs = {
estimator_name: {
Expand Down

0 comments on commit 44e7412

Please sign in to comment.