Merge pull request st-tech#169 from st-tech/feature/fix-meta

Fix meta
niiboy · Apr 28, 2022 · 44e7412 · 44e7412
2 parents da4c7c9 + dbcd54e
commit 44e7412
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 11 deletions.
diff --git a/obp/dataset/synthetic_embed.py b/obp/dataset/synthetic_embed.py
@@ -44,7 +44,7 @@ class SyntheticBanditDatasetWithActionEmbeds(SyntheticBanditDataset):
     reward_type: str, default='binary'
         Whether the rewards are 'binary' or 'continuous'.
         When 'binary', rewards are sampled from the Bernoulli distribution.
-        When 'continuous', rewards are sampled from the truncated Normal distribution with `scale=1`.
+        When 'continuous', rewards are sampled from the Normal distribution.
         The mean parameter of the reward distribution is determined by the `reward_function` specified by the next argument.
 
     reward_function: Callable[[np.ndarray, np.ndarray], np.ndarray]], default=None
@@ -73,7 +73,7 @@ class SyntheticBanditDatasetWithActionEmbeds(SyntheticBanditDataset):
         while a negative value leads to a sub-optimal behavior policy.
 
     n_cat_per_dim: int, default=10
-        Number of categories (cardinality) per category dimension.
+        Number of categories (cardinality) per each category dimension.
 
     latent_param_mat_dim: int, default=5
         Number of dimensions of the latent parameter matrix to define the expected rewards.
@@ -85,7 +85,8 @@ class SyntheticBanditDatasetWithActionEmbeds(SyntheticBanditDataset):
 
     p_e_a_param_std: int or float, default=1.0
         Standard deviation of the normal distribution to sample the parameters of the action embedding distribution.
-        A large value generates a near-deterministic embedding distribution, while a small value generates a near-uniform embedding distribution.
+        A large value generates a near-deterministic embedding distribution,
+        while a small value generates a near-uniform embedding distribution.
 
     n_unobserved_cat_dim: int, default=0
         Number of unobserved category dimensions.
@@ -333,7 +334,7 @@ def obtain_batch_bandit_feedback(self, n_rounds: int) -> BanditFeedback:
             n_rounds=n_rounds,
             n_actions=self.n_actions,
             action_context=self.action_context_reg[
-                :, 1:
+                :, self.n_unobserved_cat_dim :
             ],  # action context used for training a reg model
             action_embed=action_embed[
                 :, self.n_unobserved_cat_dim :
@@ -344,7 +345,9 @@ def obtain_batch_bandit_feedback(self, n_rounds: int) -> BanditFeedback:
             reward=rewards,
             expected_reward=q_x_a,
             q_x_e=q_x_e[:, :, self.n_unobserved_cat_dim :],
-            p_e_a=self.p_e_a[:, :, self.n_unobserved_cat_dim :],
+            p_e_a=self.p_e_a[
+                :, :, self.n_unobserved_cat_dim :
+            ],  # true probability distribution of the action embeddings
             pi_b=pi_b[:, :, np.newaxis],
             pscore=pi_b[np.arange(n_rounds), actions],
         )
diff --git a/obp/ope/helper.py b/obp/ope/helper.py
@@ -227,7 +227,8 @@ def estimate_student_t_lower_bound(x: np.ndarray, delta: float = 0.05) -> float:
     check_scalar(delta, "delta", (int, float), min_val=0.0, max_val=1.0)
 
     n = x.shape[0]
-    ci = sqrt(var(x) / (n - 1)) * stats.t(n - 1).ppf(1.0 - delta)
+    ci = sqrt(var(x) / (n - 1))
+    ci *= stats.t(n - 1).ppf(1.0 - delta)
     lower_bound_estimate = x.mean() - ci
 
     return lower_bound_estimate
diff --git a/obp/ope/meta.py b/obp/ope/meta.py
@@ -149,15 +149,26 @@ def _create_estimator_inputs(
                 pass
             elif isinstance(value_or_dict, dict):
                 for estimator_name, value in value_or_dict.items():
+                    expected_dim = 1
+                    if var_name in ["p_e_a", "pi_b"]:
+                        expected_dim = 3
+                    elif var_name in ["action_embed"]:
+                        expected_dim = 2
                     check_array(
                         array=value,
                         name=f"{var_name}[{estimator_name}]",
-                        expected_dim=1,
+                        expected_dim=expected_dim,
                     )
-                    if value.shape[0] != action_dist.shape[0]:
-                        raise ValueError(
-                            f"Expected `{var_name}[{estimator_name}].shape[0] == action_dist.shape[0]`, but found it False"
-                        )
+                    if var_name != "p_e_a":
+                        if value.shape[0] != action_dist.shape[0]:
+                            raise ValueError(
+                                f"Expected `{var_name}[{estimator_name}].shape[0] == action_dist.shape[0]`, but found it False"
+                            )
+                    else:
+                        if value.shape[0] != action_dist.shape[1]:
+                            raise ValueError(
+                                f"Expected `{var_name}[{estimator_name}].shape[0] == action_dist.shape[1]`, but found it False"
+                            )
             else:
                 expected_dim = 1
                 if var_name in ["p_e_a", "pi_b"]:
@@ -172,6 +183,11 @@ def _create_estimator_inputs(
                         raise ValueError(
                             f"Expected `{var_name}.shape[0] == action_dist.shape[0]`, but found it False"
                         )
+                else:
+                    if value.shape[0] != action_dist.shape[1]:
+                        raise ValueError(
+                            f"Expected `{var_name}[{estimator_name}].shape[0] == action_dist.shape[1]`, but found it False"
+                        )
 
         estimator_inputs = {
             estimator_name: {