From d5b00e046302f0ff7e77783573ac340088404bb7 Mon Sep 17 00:00:00 2001 From: usaito Date: Sun, 8 Nov 2020 01:43:32 +0900 Subject: [PATCH 1/4] add obp.dataset.multiclass --- docs/obp.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/obp.rst b/docs/obp.rst index 05fee0ad..0ed028a5 100644 --- a/docs/obp.rst +++ b/docs/obp.rst @@ -31,6 +31,7 @@ dataset module obp.dataset.base obp.dataset.real obp.dataset.synthetic + obp.dataset.multiclass simulator module From 09d848477c797e432e24540a0fe1af391185fc86 Mon Sep 17 00:00:00 2001 From: usaito Date: Sun, 8 Nov 2020 01:43:52 +0900 Subject: [PATCH 2/4] rerun with a new version --- examples/quickstart/quickstart.ipynb | 65 ++++++++++--------- .../quickstart/quickstart_synthetic.ipynb | 14 ++-- 2 files changed, 41 insertions(+), 38 deletions(-) diff --git a/examples/quickstart/quickstart.ipynb b/examples/quickstart/quickstart.ipynb index 497498ee..288d8597 100644 --- a/examples/quickstart/quickstart.ipynb +++ b/examples/quickstart/quickstart.ipynb @@ -44,7 +44,7 @@ "output_type": "stream", "name": "stdout", "text": [ - "0.3.1\n" + "0.3.2\n" ] } ], @@ -73,17 +73,20 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": { "tags": [] }, "outputs": [ { - "output_type": "stream", - "name": "stdout", - "text": [ - "dict_keys(['n_rounds', 'n_actions', 'action', 'position', 'reward', 'reward_test', 'pscore', 'context', 'action_context'])\n" - ] + "output_type": "execute_result", + "data": { + "text/plain": [ + "dict_keys(['n_rounds', 'n_actions', 'action', 'position', 'reward', 'reward_test', 'pscore', 'context', 'action_context'])" + ] + }, + "metadata": {}, + "execution_count": 4 } ], "source": [ @@ -96,7 +99,7 @@ "bandit_feedback = dataset.obtain_batch_bandit_feedback()\n", "\n", "# `bandit_feedback` is a dictionary storing logged bandit feedback\n", - "print(bandit_feedback.keys())" + "bandit_feedback.keys()" ] }, { @@ -108,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -119,7 +122,7 @@ ] }, "metadata": {}, - "execution_count": 4 + "execution_count": 5 } ], "source": [ @@ -129,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -140,7 +143,7 @@ ] }, "metadata": {}, - "execution_count": 5 + "execution_count": 6 } ], "source": [ @@ -150,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -161,7 +164,7 @@ ] }, "metadata": {}, - "execution_count": 6 + "execution_count": 7 } ], "source": [ @@ -171,7 +174,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -182,7 +185,7 @@ ] }, "metadata": {}, - "execution_count": 7 + "execution_count": 8 } ], "source": [ @@ -192,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -203,7 +206,7 @@ ] }, "metadata": {}, - "execution_count": 8 + "execution_count": 9 } ], "source": [ @@ -234,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": { "tags": [] }, @@ -295,7 +298,7 @@ ] }, "metadata": {}, - "execution_count": 9 + "execution_count": 10 } ], "source": [ @@ -340,7 +343,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -365,7 +368,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": { "tags": [] }, @@ -388,7 +391,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -404,7 +407,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -419,7 +422,7 @@ "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
estimated_policy_valuerelative_estimated_policy_value
ipw0.0045531.198126
dm0.0034040.895800
dr0.0046511.224077
\n
" }, "metadata": {}, - "execution_count": 13 + "execution_count": 14 } ], "source": [ @@ -429,7 +432,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -444,7 +447,7 @@ "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
mean95.0% CI (lower)95.0% CI (upper)
ipw0.0045400.0015690.009069
dm0.0034070.0033660.003445
dr0.0046400.0016250.009240
\n
" }, "metadata": {}, - "execution_count": 14 + "execution_count": 15 } ], "source": [ @@ -456,14 +459,14 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [ { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "image/png": "\n" }, "metadata": {} @@ -481,14 +484,14 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [ { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "image/png": "\n" }, "metadata": {} diff --git a/examples/quickstart/quickstart_synthetic.ipynb b/examples/quickstart/quickstart_synthetic.ipynb index 36cad41f..ae6469d4 100644 --- a/examples/quickstart/quickstart_synthetic.ipynb +++ b/examples/quickstart/quickstart_synthetic.ipynb @@ -75,7 +75,7 @@ "output_type": "stream", "name": "stdout", "text": [ - "0.3.1\n" + "0.3.2\n" ] } ], @@ -333,14 +333,14 @@ "output_type": "stream", "name": "stdout", "text": [ - " mean 95.0% CI (lower) 95.0% CI (upper)\nipw 0.632349 0.628700 0.636076\ndm 0.612067 0.610717 0.613459\ndr 0.632265 0.628844 0.636009 \n\n" + " mean 95.0% CI (lower) 95.0% CI (upper)\nipw 0.632001 0.628205 0.635098\ndm 0.612102 0.611062 0.613282\ndr 0.632011 0.628249 0.635968 \n\n" ] }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "image/png": "\n" }, "metadata": {} @@ -373,14 +373,14 @@ "output_type": "stream", "name": "stdout", "text": [ - " mean 95.0% CI (lower) 95.0% CI (upper)\nipw 0.635290 0.631430 0.639023\ndm 0.611352 0.609821 0.612917\ndr 0.634975 0.631641 0.639456 \n\n" + " mean 95.0% CI (lower) 95.0% CI (upper)\nipw 0.635143 0.631193 0.639453\ndm 0.611249 0.609818 0.612619\ndr 0.635139 0.631754 0.638721 \n\n" ] }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "image/png": "\n" }, "metadata": {} @@ -413,14 +413,14 @@ "output_type": "stream", "name": "stdout", "text": [ - " mean 95.0% CI (lower) 95.0% CI (upper)\nipw 0.606584 0.603318 0.610094\ndm 0.606224 0.604829 0.607496\ndr 0.606971 0.603938 0.609665 \n\n" + " mean 95.0% CI (lower) 95.0% CI (upper)\nipw 0.606567 0.603520 0.609491\ndm 0.606244 0.604977 0.607734\ndr 0.606968 0.603830 0.609725 \n\n" ] }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "image/png": "\n" }, "metadata": {} From b5a3a66c8bb8321cb853e653d27aea3fa265ebc6 Mon Sep 17 00:00:00 2001 From: usaito Date: Sun, 8 Nov 2020 01:44:48 +0900 Subject: [PATCH 3/4] fix some docstring inconsistencies --- obp/dataset/multiclass.py | 24 ++++++++++--------- obp/dataset/real.py | 14 +++++------ obp/dataset/synthetic.py | 18 +++++++-------- obp/ope/estimators.py | 46 ++++++++++++++++++------------------- obp/ope/meta.py | 24 +++++++++---------- obp/ope/regression_model.py | 4 ++-- obp/policy/base.py | 22 +++++++++--------- obp/policy/contextfree.py | 42 ++++++++++++++++----------------- obp/policy/linear.py | 26 ++++++++++----------- obp/policy/logistic.py | 34 +++++++++++++-------------- obp/policy/offline.py | 2 +- obp/utils.py | 10 ++++---- 12 files changed, 134 insertions(+), 132 deletions(-) diff --git a/obp/dataset/multiclass.py b/obp/dataset/multiclass.py index 1b50a070..5f2a2050 100644 --- a/obp/dataset/multiclass.py +++ b/obp/dataset/multiclass.py @@ -24,16 +24,16 @@ class MultiClassToBanditReduction(BaseSyntheticBanditDataset): A machine learning classifier such as logistic regression is used to construct behavior and evaluation policies as follows. 1. Split the original data into training (:math:`\\mathcal{D}_{\\mathrm{tr}}`) and evaluation (:math:`\\mathcal{D}_{\\mathrm{ev}}`) sets. - 2. Train classifiers on :math:`\\mathcal{D}_{\\mathrm{tr}}` and regard them as base deterministic policies :math:`\\pi_{\\mathrm{det},b}` and :math:`\\pi_{\\mathrm{det},e}`. - 3. Construct behavior (:math:`\\pi_{b}`) and evaluation (:math:`\\pi_{e}`) policies based on :math:`\\pi_{\\mathrm{det}}` as + 2. Train classifiers on :math:`\\mathcal{D}_{\\mathrm{tr}}` and obtain base deterministic policies :math:`\\pi_{\\mathrm{det},b}` and :math:`\\pi_{\\mathrm{det},e}`. + 3. Construct behavior (:math:`\\pi_{b}`) and evaluation (:math:`\\pi_{e}`) policies based on :math:`\\pi_{\\mathrm{det},b}` and :math:`\\pi_{\\mathrm{det},e}` as .. math:: - \\pi_b (a | x) := \\alpha_b \\pi_{\\mathrm{det},b} (a|x) + (1.0 - \\alpha_b) \\pi_{u} (a|x) + \\pi_b (a | x) := \\alpha_b \\cdot \\pi_{\\mathrm{det},b} (a|x) + (1.0 - \\alpha_b) \\cdot \\pi_{u} (a|x) .. math:: - \\pi_e (a | x) := \\alpha_e \\pi_{\\mathrm{det},e} (a|x) + (1.0 - \\alpha_e) \\pi_{u} (a|x) + \\pi_e (a | x) := \\alpha_e \\cdot \\pi_{\\mathrm{det},e} (a|x) + (1.0 - \\alpha_e) \\cdot \\pi_{u} (a|x) where :math:`\\pi_{u}` is a uniform random policy and :math:`\\alpha_b` and :math:`\\alpha_e` are set by the user. @@ -60,11 +60,11 @@ class MultiClassToBanditReduction(BaseSyntheticBanditDataset): base_classifier_b: ClassifierMixin Machine learning classifier used to construct a behavior policy. - alpha_b: float, default: 0.9 + alpha_b: float, default=0.9 Ration of a uniform random policy when constructing a **behavior** policy. Must be in the [0, 1) interval to make the behavior policy a stochastic one. - dataset_name: str, default: None + dataset_name: str, default=None Name of the dataset. Examples @@ -187,7 +187,7 @@ def split_train_eval( If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the evaluation split. If int, represents the absolute number of test samples. - random_state: int, default: None + random_state: int, default=None Controls the random seed in train-evaluation split. """ @@ -213,12 +213,12 @@ def obtain_batch_bandit_feedback( Please call `self.split_train_eval()` before calling this method. Parameters - ---------- + ----------- eval_size: float or int, default=0.25 If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling actions. Returns @@ -261,10 +261,12 @@ def obtain_action_dist_by_eval_policy( ) -> np.ndarray: """Obtain action choice probabilities by an evaluation policy. - base_classifier_e: ClassifierMixin, default: None + Parameters + ----------- + base_classifier_e: ClassifierMixin, default=None Machine learning classifier used to construct a behavior policy. - alpha_e: float, default: 1.0 + alpha_e: float, default=1.0 Ration of a uniform random policy when constructing an **evaluation** policy. Must be in the [0, 1] interval (evaluation policy can be deterministic). diff --git a/obp/dataset/real.py b/obp/dataset/real.py index 861ef25d..605622e8 100644 --- a/obp/dataset/real.py +++ b/obp/dataset/real.py @@ -32,10 +32,10 @@ class OpenBanditDataset(BaseRealBanditDataset): campaign: str One of the three possible campaigns considered in ZOZOTOWN, "all", "men", and "women". - data_path: Path, default: Path('./obd') + data_path: Path, default=Path('./obd') Path that stores Open Bandit Dataset. - dataset_name: str, default: 'obd' + dataset_name: str, default='obd' Name of the dataset. References @@ -109,13 +109,13 @@ def calc_on_policy_policy_value_estimate( campaign: str One of the three possible campaigns considered in ZOZOTOWN (i.e., "all", "men", and "women"). - data_path: Path, default: Path('./obd') + data_path: Path, default=Path('./obd') Path that stores Open Bandit Dataset. test_size: float, default=0.3 If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. - is_timeseries_split: bool, default: False + is_timeseries_split: bool, default=False If true, split the original logged badnit feedback data by time series. Returns @@ -178,7 +178,7 @@ def obtain_batch_bandit_feedback( test_size: float, default=0.3 If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. - is_timeseries_split: bool, default: False + is_timeseries_split: bool, default=False If true, split the original logged badnit feedback data by time series. Returns @@ -233,10 +233,10 @@ def sample_bootstrap_bandit_feedback( test_size: float, default=0.3 If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. - is_timeseries_split: bool, default: False + is_timeseries_split: bool, default=False If true, split the original logged badnit feedback data by time series. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling logged bandit dataset. Returns diff --git a/obp/dataset/synthetic.py b/obp/dataset/synthetic.py index 5e748acd..db4a9628 100644 --- a/obp/dataset/synthetic.py +++ b/obp/dataset/synthetic.py @@ -31,29 +31,29 @@ class SyntheticBanditDataset(BaseSyntheticBanditDataset): n_actions: int Number of actions. - dim_context: int, default: 1 + dim_context: int, default=1 Number of dimensions of context vectors. - reward_type: str, default: 'binary' + reward_type: str, default='binary' Type of reward variable, must be either 'binary' or 'continuous'. When 'binary' is given, rewards are sampled from the Bernoulli distribution. When 'continuous' is given, rewards are sampled from the truncated Normal distribution with `scale=1`. - reward_function: Callable[[np.ndarray, np.ndarray], np.ndarray]], default: None + reward_function: Callable[[np.ndarray, np.ndarray], np.ndarray]], default=None Function generating expected reward with context and action context vectors, i.e., :math:`\\mu: \\mathcal{X} \\times \\mathcal{A} \\rightarrow \\mathbb{R}`. If None is set, context **independent** expected reward for each action will be sampled from the uniform distribution automatically. - behavior_policy_function: Callable[[np.ndarray, np.ndarray], np.ndarray], default: None + behavior_policy_function: Callable[[np.ndarray, np.ndarray], np.ndarray], default=None Function generating probability distribution over action space, i.e., :math:`\\pi: \\mathcal{X} \\rightarrow \\Delta(\\mathcal{A})`. If None is set, context **independent** uniform distribution will be used (uniform random behavior policy). - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling synthetic bandit dataset. - dataset_name: str, default: 'synthetic_bandit_dataset' + dataset_name: str, default='synthetic_bandit_dataset' Name of the dataset. Examples @@ -252,7 +252,7 @@ def logistic_reward_function( action_context: array-like, shape (n_actions, dim_action_context) Vector representation for each action. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling dataset. Returns @@ -292,7 +292,7 @@ def linear_reward_function( action_context: array-like, shape (n_actions, dim_action_context) Vector representation for each action. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling dataset. Returns @@ -332,7 +332,7 @@ def linear_behavior_policy( action_context: array-like, shape (n_actions, dim_action_context) Vector representation for each action. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling dataset. Returns diff --git a/obp/ope/estimators.py b/obp/ope/estimators.py index 6fd87544..45d29d1f 100644 --- a/obp/ope/estimators.py +++ b/obp/ope/estimators.py @@ -49,7 +49,7 @@ class ReplayMethod(BaseOffPolicyEstimator): Parameters ---------- - estimator_name: str, default: 'rm'. + estimator_name: str, default='rm'. Name of off-policy estimator. References @@ -151,13 +151,13 @@ def estimate_interval( position: array-like, shape (n_rounds,) Positions of each round in the given logged bandit feedback. - alpha: float, default: 0.05 + alpha: float, default=0.05 P-value. - n_bootstrap_samples: int, default: 10000 + n_bootstrap_samples: int, default=10000 Number of resampling performed in the bootstrap procedure. - random_state: int, default: None + random_state: int, default=None Controls the random seed in bootstrap sampling. Returns @@ -197,7 +197,7 @@ class InverseProbabilityWeighting(BaseOffPolicyEstimator): Parameters ------------ - estimator_name: str, default: 'ipw'. + estimator_name: str, default='ipw'. Name of off-policy estimator. References @@ -320,13 +320,13 @@ def estimate_interval( Distribution over actions or the action choice probabilities by the evaluation policy (can be deterministic), i.e., :math:`\\pi_e(a|x)`. - alpha: float, default: 0.05 + alpha: float, default=0.05 P-value. - n_bootstrap_samples: int, default: 10000 + n_bootstrap_samples: int, default=10000 Number of resampling performed in the bootstrap procedure. - random_state: int, default: None + random_state: int, default=None Controls the random seed in bootstrap sampling. Returns @@ -372,7 +372,7 @@ class SelfNormalizedInverseProbabilityWeighting(InverseProbabilityWeighting): Parameters ---------- - estimator_name: str, default: 'snipw'. + estimator_name: str, default='snipw'. Name of off-policy estimator. References @@ -451,7 +451,7 @@ class DirectMethod(BaseOffPolicyEstimator): Parameters ---------- - estimator_name: str, default: 'dm'. + estimator_name: str, default='dm'. Name of off-policy estimator. References @@ -554,13 +554,13 @@ def estimate_interval( estimated_rewards_by_reg_model: array-like, shape (n_rounds, n_actions, len_list) Estimated rewards for each round, action, and position by regression model, i.e., :math:`\\hat{q}(x_t,a_t)`. - alpha: float, default: 0.05 + alpha: float, default=0.05 P-value. - n_bootstrap_samples: int, default: 10000 + n_bootstrap_samples: int, default=10000 Number of resampling performed in the bootstrap procedure. - random_state: int, default: None + random_state: int, default=None Controls the random seed in bootstrap sampling. Returns @@ -611,7 +611,7 @@ class DoublyRobust(InverseProbabilityWeighting): Parameters ---------- - estimator_name: str, default: 'dr'. + estimator_name: str, default='dr'. Name of off-policy estimator. References @@ -760,13 +760,13 @@ def estimate_interval( estimated_rewards_by_reg_model: array-like, shape (n_rounds, n_actions, len_list) Estimated rewards for each round, action, and position by regression model, i.e., :math:`\\hat{q}(x_t,a_t)`. - alpha: float, default: 0.05 + alpha: float, default=0.05 P-value. - n_bootstrap_samples: int, default: 10000 + n_bootstrap_samples: int, default=10000 Number of resampling performed in the bootstrap procedure. - random_state: int, default: None + random_state: int, default=None Controls the random seed in bootstrap sampling. Returns @@ -815,7 +815,7 @@ class SelfNormalizedDoublyRobust(DoublyRobust): Parameters ---------- - estimator_name: str, default: 'sndr'. + estimator_name: str, default='sndr'. Name of off-policy estimator. References @@ -906,11 +906,11 @@ class SwitchInverseProbabilityWeighting(DoublyRobust): Parameters ---------- - tau: float, default: 1 + tau: float, default=1 Switching hyperparameter. When importance weight is larger than this parameter, the DM estimator is applied, otherwise the IPW estimator is applied. This hyperparameter should be larger than 1., otherwise it is meaningless. - estimator_name: str, default: 'switch-ipw'. + estimator_name: str, default='switch-ipw'. Name of off-policy estimator. References @@ -1007,11 +1007,11 @@ class SwitchDoublyRobust(DoublyRobust): Parameters ---------- - tau: float, default: 1 + tau: float, default=1 Switching hyperparameter. When importance weight is larger than this parameter, the DM estimator is applied, otherwise the DR estimator is applied. This hyperparameter should be larger than 0., otherwise it is meaningless. - estimator_name: str, default: 'switch-dr'. + estimator_name: str, default='switch-dr'. Name of off-policy estimator. References @@ -1127,7 +1127,7 @@ class DoublyRobustWithShrinkage(DoublyRobust): lambda_: float Shrinkage hyperparameter. This hyperparameter should be larger than 0., otherwise it is meaningless. - estimator_name: str, default: 'dr-os'. + estimator_name: str, default='dr-os'. Name of off-policy estimator. References diff --git a/obp/ope/meta.py b/obp/ope/meta.py index 4bf13393..ce09e523 100644 --- a/obp/ope/meta.py +++ b/obp/ope/meta.py @@ -161,13 +161,13 @@ def estimate_intervals( Estimated expected rewards for the given logged bandit feedback at each item and position by regression model. When it is not given, model-dependent estimators such as DM and DR cannot be used. - alpha: float, default: 0.05 + alpha: float, default=0.05 P-value. - n_bootstrap_samples: int, default: 100 + n_bootstrap_samples: int, default=100 Number of resampling performed in the bootstrap procedure. - random_state: int, default: None + random_state: int, default=None Controls the random seed in bootstrap sampling. Returns @@ -218,13 +218,13 @@ def summarize_off_policy_estimates( Estimated expected rewards for the given logged bandit feedback at each item and position by regression model. When it is not given, model-dependent estimators such as DM and DR cannot be used. - alpha: float, default: 0.05 + alpha: float, default=0.05 P-value. - n_bootstrap_samples: int, default: 100 + n_bootstrap_samples: int, default=100 Number of resampling performed in the bootstrap procedure. - random_state: int, default: None + random_state: int, default=None Controls the random seed in bootstrap sampling. Returns @@ -277,24 +277,24 @@ def visualize_off_policy_estimates( Estimated expected rewards for the given logged bandit feedback at each item and position by regression model. When it is not given, model-dependent estimators such as DM and DR cannot be used. - alpha: float, default: 0.05 + alpha: float, default=0.05 P-value. - n_bootstrap_samples: int, default: 100 + n_bootstrap_samples: int, default=100 Number of resampling performed in the bootstrap procedure. - random_state: int, default: None + random_state: int, default=None Controls the random seed in bootstrap sampling. - is_relative: bool, default: False, + is_relative: bool, default=False, If True, the method visualizes the estimated policy values of evaluation policy relative to the ground-truth policy value of behavior policy. - fig_dir: Path, default: None + fig_dir: Path, default=None Path to store the bar figure. If 'None' is given, the figure will not be saved. - fig_name: str, default: "estimated_policy_value.png" + fig_name: str, default="estimated_policy_value.png" Name of the bar figure. """ diff --git a/obp/ope/regression_model.py b/obp/ope/regression_model.py index 6cd501fc..d429752e 100644 --- a/obp/ope/regression_model.py +++ b/obp/ope/regression_model.py @@ -28,7 +28,7 @@ class RegressionModel(BaseEstimator): n_actions: int Number of actions. - len_list: int, default: 1 + len_list: int, default=1 Length of a list of recommended actions in each impression. When Open Bandit Dataset is used, 3 should be set. @@ -101,7 +101,7 @@ def fit( reward: array-like, shape (n_rounds,) Observed rewards (or outcome) in each round, i.e., :math:`r_t`. - pscore: Optional[np.ndarray], default: None + pscore: Optional[np.ndarray], default=None Propensity scores, the action choice probabilities by behavior policy, in the training logged bandit feedback. diff --git a/obp/policy/base.py b/obp/policy/base.py index d2a428a0..c0f363ae 100644 --- a/obp/policy/base.py +++ b/obp/policy/base.py @@ -22,14 +22,14 @@ class BaseContextFreePolicy(metaclass=ABCMeta): n_actions: int Number of actions. - len_list: int, default: 1 + len_list: int, default=1 Length of a list of recommended actions in each impression. When Open Bandit Dataset is used, 3 should be set. - batch_size: int, default: 1 + batch_size: int, default=1 Number of samples used in a batch parameter update. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling actions. """ @@ -95,20 +95,20 @@ class BaseContextualPolicy(metaclass=ABCMeta): n_actions: int Number of actions. - len_list: int, default: 1 + len_list: int, default=1 Length of a list of recommended actions in each impression. When Open Bandit Dataset is used, 3 should be set. - batch_size: int, default: 1 + batch_size: int, default=1 Number of samples used in a batch parameter update. - alpha_: float, default: 1. + alpha_: float, default=1. Prior parameter for the online logistic regression. - lambda_: float, default: 1. + lambda_: float, default=1. Regularization hyperparameter for the online logistic regression. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling actions. """ @@ -180,7 +180,7 @@ class BaseOffPolicyLearner(metaclass=ABCMeta): n_actions: int Number of actions. - len_list: int, default: 1 + len_list: int, default=1 Length of a list of recommended actions in each impression. When Open Bandit Dataset is used, 3 should be set. @@ -234,7 +234,7 @@ def _create_train_data_for_opl( reward: array-like, shape (n_actions,) Observed rewards (or outcome) in each round, i.e., :math:`r_t`. - pscore: array-like, shape (n_actions,), default: None + pscore: array-like, shape (n_actions,), default=None Propensity scores or the action choice probabilities by behavior policy, i.e., :math:`\\pi_b(a_t|x_t)`. Returns @@ -266,7 +266,7 @@ def fit( reward: array-like, shape (n_rounds,) Observed rewards (or outcome) in each round, i.e., :math:`r_t`. - pscore: array-like, shape (n_rounds,), default: None + pscore: array-like, shape (n_rounds,), default=None Propensity scores or the action choice probabilities by behavior policy, i.e., :math:`\\pi_b(a_t|x_t)`. position: array-like, shape (n_rounds,), default=None diff --git a/obp/policy/contextfree.py b/obp/policy/contextfree.py index 0b893d17..e8072d03 100644 --- a/obp/policy/contextfree.py +++ b/obp/policy/contextfree.py @@ -30,20 +30,20 @@ class EpsilonGreedy(BaseContextFreePolicy): n_actions: int Number of actions. - len_list: int, default: 1 + len_list: int, default=1 Length of a list of recommended actions in each impression. When Open Bandit Dataset is used, 3 should be set. - batch_size: int, default: 1 + batch_size: int, default=1 Number of samples used in a batch parameter update. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling actions. - epsilon: float, default: 1. + epsilon: float, default=1. Exploration hyperparameter that must take value in the range of [0., 1.]. - policy_name: str, default: f'egreedy_{epsilon}'. + policy_name: str, default=f'egreedy_{epsilon}'. Name of bandit policy. """ @@ -105,20 +105,20 @@ class Random(EpsilonGreedy): n_actions: int Number of actions. - len_list: int, default: 1 + len_list: int, default=1 Length of a list of recommended actions in each impression. When Open Bandit Dataset is used, 3 should be set. - batch_size: int, default: 1 + batch_size: int, default=1 Number of samples used in a batch parameter update. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling actions. - epsilon: float, default: 1. + epsilon: float, default=1. Exploration hyperparameter that must take value in the range of [0., 1.]. - policy_name: str, default: 'random'. + policy_name: str, default='random'. Name of bandit policy. """ @@ -132,7 +132,7 @@ def compute_batch_action_dist( Parameters ---------- - n_rounds: int, default: 1 + n_rounds: int, default=1 Number of rounds in the distribution over actions. (the size of the first axis of `action_dist`) @@ -157,30 +157,30 @@ class BernoulliTS(BaseContextFreePolicy): n_actions: int Number of actions. - len_list: int, default: 1 + len_list: int, default=1 Length of a list of recommended actions in each impression. When Open Bandit Dataset is used, 3 should be set. - batch_size: int, default: 1 + batch_size: int, default=1 Number of samples used in a batch parameter update. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling actions. - alpha: array-like, shape (n_actions, ), default: None + alpha: array-like, shape (n_actions, ), default=None Prior parameter vector for Beta distributions. - beta: array-like, shape (n_actions, ), default: None + beta: array-like, shape (n_actions, ), default=None Prior parameter vector for Beta distributions. - is_zozotown_prior: bool, default: False + is_zozotown_prior: bool, default=False Whether to use hyperparameters for the beta distribution used at the start of the data collection period in ZOZOTOWN. - campaign: str, default: None + campaign: str, default=None One of the three possible campaigns considered in ZOZOTOWN, "all", "men", and "women". - policy_name: str, default: 'bts' + policy_name: str, default='bts' Name of bandit policy. """ @@ -245,11 +245,11 @@ def compute_batch_action_dist( Parameters ---------- - n_rounds: int, default: 1 + n_rounds: int, default=1 Number of rounds in the distribution over actions. (the size of the first axis of `action_dist`) - n_sim: int, default: 100000 + n_sim: int, default=100000 Number of simulations in the Monte Carlo simulation to compute the distribution over actions. Returns diff --git a/obp/policy/linear.py b/obp/policy/linear.py index 63f9a6dd..d63b25e9 100644 --- a/obp/policy/linear.py +++ b/obp/policy/linear.py @@ -21,20 +21,20 @@ class LinEpsilonGreedy(BaseContextualPolicy): n_actions: int Number of actions. - len_list: int, default: 1 + len_list: int, default=1 Length of a list of recommended actions in each impression. When Open Bandit Dataset is used, 3 should be set. - batch_size: int, default: 1 + batch_size: int, default=1 Number of samples used in a batch parameter update. - n_trial: int, default: 0 + n_trial: int, default=0 Current number of trials in a bandit simulation. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling actions. - epsilon: float, default: 0. + epsilon: float, default=0. Exploration hyperparameter that must take value in the range of [0., 1.]. References @@ -140,17 +140,17 @@ class LinUCB(BaseContextualPolicy): n_actions: int Number of actions. - len_list: int, default: 1 + len_list: int, default=1 Length of a list of recommended actions in each impression. When Open Bandit Dataset is used, 3 should be set. - batch_size: int, default: 1 + batch_size: int, default=1 Number of samples used in a batch parameter update. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling actions. - epsilon: float, default: 0. + epsilon: float, default=0. Exploration hyperparameter that must take value in the range of [0., 1.]. References @@ -257,17 +257,17 @@ class LinTS(BaseContextualPolicy): n_actions: int Number of actions. - len_list: int, default: 1 + len_list: int, default=1 Length of a list of recommended actions in each impression. When Open Bandit Dataset is used, 3 should be set. - batch_size: int, default: 1 + batch_size: int, default=1 Number of samples used in a batch parameter update. - alpha_: float, default: 1. + alpha_: float, default=1. Prior parameter for the online logistic regression. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling actions. """ diff --git a/obp/policy/logistic.py b/obp/policy/logistic.py index a2d39604..c8945791 100644 --- a/obp/policy/logistic.py +++ b/obp/policy/logistic.py @@ -25,23 +25,23 @@ class LogisticEpsilonGreedy(BaseContextualPolicy): n_actions: int Number of actions. - len_list: int, default: 1 + len_list: int, default=1 Length of a list of recommended actions in each impression. When Open Bandit Dataset is used, 3 should be set. - batch_size: int, default: 1 + batch_size: int, default=1 Number of samples used in a batch parameter update. - alpha_: float, default: 1. + alpha_: float, default=1. Prior parameter for the online logistic regression. - lambda_: float, default: 1. + lambda_: float, default=1. Regularization hyperparameter for the online logistic regression. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling actions. - epsilon: float, default: 0. + epsilon: float, default=0. Exploration hyperparameter that must take value in the range of [0., 1.]. """ @@ -131,23 +131,23 @@ class LogisticUCB(BaseContextualPolicy): n_actions: int Number of actions. - len_list: int, default: 1 + len_list: int, default=1 Length of a list of recommended actions in each impression. When Open Bandit Dataset is used, 3 should be set. - batch_size: int, default: 1 + batch_size: int, default=1 Number of samples used in a batch parameter update. - alpha_: float, default: 1. + alpha_: float, default=1. Prior parameter for the online logistic regression. - lambda_: float, default: 1. + lambda_: float, default=1. Regularization hyperparameter for the online logistic regression. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling actions. - epsilon: float, default: 0. + epsilon: float, default=0. Exploration hyperparameter that must take value in the range of [0., 1.]. References @@ -244,20 +244,20 @@ class LogisticTS(BaseContextualPolicy): n_actions: int Number of actions. - len_list: int, default: 1 + len_list: int, default=1 Length of a list of recommended actions in each impression. When Open Bandit Dataset is used, 3 should be set. - batch_size: int, default: 1 + batch_size: int, default=1 Number of samples used in a batch parameter update. - alpha_: float, default: 1. + alpha_: float, default=1. Prior parameter for the online logistic regression. - lambda_: float, default: 1. + lambda_: float, default=1. Regularization hyperparameter for the online logistic regression. - random_state: int, default: None + random_state: int, default=None Controls the random seed in sampling actions. References diff --git a/obp/policy/offline.py b/obp/policy/offline.py index 7684e7b8..d7944268 100644 --- a/obp/policy/offline.py +++ b/obp/policy/offline.py @@ -51,7 +51,7 @@ def _create_train_data_for_opl( reward: array-like, shape (n_rounds,) Observed rewards (or outcome) in each round, i.e., :math:`r_t`. - pscore: array-like, shape (n_rounds,), default: None + pscore: array-like, shape (n_rounds,), default=None Propensity scores, the probability of selecting each action by behavior policy, in the given logged bandit feedback. diff --git a/obp/utils.py b/obp/utils.py index 4ae9473d..04fe2052 100644 --- a/obp/utils.py +++ b/obp/utils.py @@ -24,13 +24,13 @@ def estimate_confidence_interval_by_bootstrap( samples: array-like Empirical observed samples to be used to estimate cumulative distribution function. - alpha: float, default: 0.05 + alpha: float, default=0.05 P-value. - n_bootstrap_samples: int, default: 10000 + n_bootstrap_samples: int, default=10000 Number of resampling performed in the bootstrap procedure. - random_state: int, default: None + random_state: int, default=None Controls the random seed in bootstrap sampling. Returns @@ -179,10 +179,10 @@ def check_bandit_feedback_inputs( reward: array-like, shape (n_rounds,) Observed rewards (or outcome) in each round, i.e., :math:`r_t`. - position: array-like, shape (n_rounds,), default: None + position: array-like, shape (n_rounds,), default=None Positions of each round in the given logged bandit feedback. - pscore: array-like, shape (n_rounds,), default: None + pscore: array-like, shape (n_rounds,), default=None Propensity scores, the probability of selecting each action by behavior policy, in the given logged bandit feedback. From f09f2180b1b52f109848b524b5dfeff0732e6446 Mon Sep 17 00:00:00 2001 From: usaito Date: Sun, 8 Nov 2020 01:45:03 +0900 Subject: [PATCH 4/4] update version --- obp/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/obp/version.py b/obp/version.py index 260c070a..f9aa3e11 100644 --- a/obp/version.py +++ b/obp/version.py @@ -1 +1 @@ -__version__ = "0.3.1" +__version__ = "0.3.2"