forked from st-tech/zr-obp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrefs.bib
288 lines (249 loc) · 10.5 KB
/
refs.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
@inproceedings{Beygelzimer2009,
title={The offset tree for learning with partial labels},
author={Beygelzimer, Alina and Langford, John},
booktitle={Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining},
pages={129--138},
year={2009}
}
@article{Su2019,
title={Doubly robust off-policy evaluation with shrinkage},
author={Su, Yi and Dimakopoulou, Maria and Krishnamurthy, Akshay and Dud{\'\i}k, Miroslav},
journal={arXiv preprint arXiv:1907.09623},
year={2019}
}
@article{Lefortier2016,
title={Large-scale validation of counterfactual learning methods: A test-bed},
author={Lefortier, Damien and Swaminathan, Adith and Gu, Xiaotao and Joachims, Thorsten and de Rijke, Maarten},
journal={arXiv preprint arXiv:1612.00367},
year={2016}
}
@inproceedings{Agrawal2013,
title={Thompson sampling for contextual bandits with linear payoffs},
author={Agrawal, Shipra and Goyal, Navin},
booktitle={International Conference on Machine Learning},
pages={127--135},
year={2013}
}
@inproceedings{Mahajan2012,
title={Logucb: an explore-exploit algorithm for comments recommendation},
author={Mahajan, Dhruv Kumar and Rastogi, Rajeev and Tiwari, Charu and Mitra, Adway},
booktitle={Proceedings of the 21st ACM international conference on Information and knowledge management},
pages={6--15},
year={2012}
}
@article{Breiman1996,
title={Bagging predictors},
author={Breiman, Leo},
journal={Machine learning},
volume={24},
number={2},
pages={123--140},
year={1996},
publisher={Springer}
}
@article{hu2020open,
title={Open Graph Benchmark: Datasets for Machine Learning on Graphs},
author={Hu, Weihua and Fey, Matthias and Zitnik, Marinka and Dong, Yuxiao and Ren, Hongyu and Liu, Bowen and Catasta, Michele and Leskovec, Jure},
journal={arXiv preprint arXiv:2005.00687},
year={2020}
}
@inproceedings{xie2019towards,
title={Towards Optimal Off-Policy Evaluation for Reinforcement Learning with Marginalized Importance Sampling},
author={Xie, Tengyang and Ma, Yifei and Wang, Yu-Xiang},
booktitle={Advances in Neural Information Processing Systems},
pages={9665--9675},
year={2019}
}
@article{Narita2020,
title={Off-policy Bandit and Reinforcement Learning},
author={Narita, Yusuke and Yasui, Shota and Yata, Kohei},
journal={arXiv preprint arXiv:2002.08536},
year={2020}
}
@article{Kang2007,
title={Demystifying double robustness: A comparison of alternative strategies for estimating a population mean from incomplete data},
author={Kang, Joseph DY and Schafer, Joseph L and others},
journal={Statistical science},
volume={22},
number={4},
pages={523--539},
year={2007},
publisher={Institute of Mathematical Statistics}
}
@inproceedings{Chapelle2011,
title={An empirical evaluation of thompson sampling},
author={Chapelle, Olivier and Li, Lihong},
booktitle={Advances in neural information processing systems},
pages={2249--2257},
year={2011}
}
@inproceedings{Narita2019,
title={Efficient counterfactual learning from bandit feedback},
author={Narita, Yusuke and Yasui, Shota and Yata, Kohei},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={33},
pages={4634--4641},
year={2019}
}
@inproceedings{Kallus2019,
Author = {Kallus, Nathan and Uehara, Masatoshi},
Booktitle = {Advances in Neural Information Processing Systems},
Date-Added = {2020-01-27 03:24:10 -0500},
Date-Modified = {2020-01-27 03:24:10 -0500},
Title = {Intrinsically efficient, stable, and bounded off-policy evaluation for reinforcement learning},
Year = {2019}}
@inproceedings{Irpan2019OffPolicyEV,
Author = {Alex Irpan and Kanishka Rao and Konstantinos Bousmalis and Chris Harris and Julian Ibarz and Sergey Levine},
Booktitle = {Advances in Neural Information Processing Systems},
Title = {Off-Policy Evaluation via Off-Policy Classification},
Year = {2019}}
@inproceedings{munos2016safe,
Author = {Munos, Remi and Stepleton, Tom and Harutyunyan, Anna and Bellemare, Marc},
Booktitle = {Advances in Neural Information Processing Systems},
Pages = {1054--1062},
Title = {Safe and Efficient Off-Policy Reinforcement Learning},
Year = {2016}}
@inproceedings{Precup2000,
Author = {Doina Precup and Richard S. Sutton and Satinder Singh},
Journal = {ICML},
Booktitle = {Proceedings of the 17th International Conference on Machine Learning},
Pages = {759-766},
Title = {{Eligibility Traces for Off-Policy Policy Evaluation}},
Year = {2000}}
@inproceedings{precup2001temporal,
Author = {Doina Precup and Richard S. Sutton and Sanjoy Dasgupta},
Journal = {ICML},
Booktitle = {Proceedings of the 18th International Conference on Machine Learning},
Pages = {417-424},
Title = {{Off-Policy Temporal-Difference Learning with Function Approximation}},
Year = {2001}}
@article{jiang2015doubly,
Author = {Jiang, Nan and Li, Lihong},
Journal = {arXiv preprint arXiv:1511.03722},
Title = {Doubly robust off-policy value evaluation for reinforcement learning},
Year = {2015}}
@inproceedings{ke2017lightgbm,
Author = {Ke, Guolin and Meng, Qi and Finley, Thomas and Wang, Taifeng and Chen, Wei and Ma, Weidong and Ye, Qiwei and Liu, Tie-Yan},
Booktitle = {Advances in Neural Information Processing Systems},
Pages = {3146--3154},
Title = {Lightgbm: A highly efficient gradient boosting decision tree},
Year = {2017}}
@inproceedings{Swaminathan2015b,
Acmid = {2969600},
Author = {Swaminathan, Adith and Joachims, Thorsten},
Booktitle = {Advances in Neural Information Processing Systems},
Numpages = {9},
Pages = {3231--3239},
Title = {{The Self-normalized Estimator for Counterfactual Learning}},
Year = {2015},
Bdsk-Url-1 = {http://dl.acm.org/citation.cfm?id=2969442.2969600}}
@inproceedings{Wang2016,
Author = {Wang, Yu-Xiang and Agarwal, Alekh and Dudik, Miroslav},
Date-Added = {2019-12-29 13:23:35 -0500},
Date-Modified = {2019-12-29 13:23:35 -0500},
Journal = {ICML},
Booktitle = {Proceedings of the 34th International Conference on Machine Learning},
Pages = {3589-3597},
Title = {{Optimal and Adaptive Off-policy Evaluation in Contextual Bandits}},
Year = {2017}}
@inproceedings{swaminathan2017off,
Author = {Swaminathan, Adith and Krishnamurthy, Akshay and Agarwal, Alekh and Dudik, Miro and Langford, John and Jose, Damien and Zitouni, Imed},
Booktitle = {Advances in Neural Information Processing Systems},
Date-Added = {2019-12-29 13:23:35 -0500},
Date-Modified = {2019-12-29 13:23:35 -0500},
Journal = {NIPS},
Pages = {3635--3645},
Title = {{Off-policy Evaluation for Slate Recommendation}},
Year = {2017}}
@inproceedings{Strehl2010,
Author = {Alex Strehl and Langford, John and Li, Lihong and Kakade, Sham M},
Booktitle = {Advances in Neural Information Processing Systems},
Date-Added = {2019-12-29 13:23:35 -0500},
Date-Modified = {2019-12-29 13:23:35 -0500},
Journal = {NIPS},
Pages = {2217--2225},
Title = {{Learning from Logged Implicit Exploration Data}},
Year = {2010},
Bdsk-Url-1 = {http://papers.nips.cc/paper/3977-learning-from-logged-implicit-exploration-data.pdf}}
@inproceedings{Li2012,
Author = {Li, Lihong and Chu, Wei and Langford, John and Moon, Taesup and Wang, Xuanhui},
Booktitle = {Journal of Machine Learning Research: Workshop and Conference Proceedings},
Date-Added = {2019-12-29 13:23:35 -0500},
Date-Modified = {2019-12-29 13:23:35 -0500},
Journal = {Journal of Machine Learning Research: Workshop and Conference Proceedings},
Numpages = {18},
Pages = {19-36},
Title = {{An Unbiased Offline Evaluation of Contextual Bandit Algorithms with Generalized Linear Models}},
Volume = {26},
Year = {2012}}
@article{horvitz1952generalization,
Author = {Horvitz, Daniel G and Thompson, Donovan J},
Date-Added = {2019-12-29 13:23:35 -0500},
Date-Modified = {2019-12-29 13:23:35 -0500},
Journal = {Journal of the American Statistical Association},
Number = {260},
Pages = {663--685},
Publisher = {Taylor \& Francis Group},
Title = {{A Generalization of Sampling Without Replacement from a Finite Universe}},
Volume = {47},
Year = {1952}}
@inproceedings{Li2010,
Author = {Li, Lihong and Chu, Wei and Langford, John and Schapire, Robert E},
Booktitle = {Proceedings of the 19th International Conference on World Wide Web},
Date-Added = {2019-12-29 13:23:35 -0500},
Date-Modified = {2019-12-29 13:23:35 -0500},
Journal = {WWW},
Organization = {ACM},
Pages = {661--670},
Title = {{A Contextual-bandit Approach to Personalized News Article Recommendation}},
Year = {2010}}
@inproceedings{Farajtabar2018,
Author = {Mehrdad Farajtabar and Yinlam Chow and Mohammad Ghavamzadeh},
Booktitle = {Proceedings of the 35th International Conference on Machine Learning},
Title = {More Robust Doubly Robust Off-policy Evaluation},
Pages = {1447--1456},
Year = {2018}}
@article{newey2018crossfitting,
Author = {Newey, Whitney K. and Robins, James M.},
Journal = {Arxiv},
Title = {Cross-Fitting and Fast Remainder Rates for Semiparametric Estimation},
Year = {2018}}
@article{Dudik2014,
Author = {Dud{\'\i}k, Miroslav and Erhan, Dumitru and Langford, John and Li, Lihong},
Journal = {Statistical Science},
Pages = {485-511},
Title = {{Doubly Robust Policy Evaluation and Optimization}},
Volume = {29},
Year = {2014}}
@article{Chernozhukov2018,
Author = {Chernozhukov, Victor and Chetverikov, Denis and Demirer, Mert and Duflo, Esther and Hansen, Christian and Newey, Whitney and Robins, James},
Journal = {The Econometrics Journal},
Number = {1},
Pages = {C1-C68},
Title = {Double/debiased machine learning for treatment and structural parameters},
Volume = {21},
Year = {2018}}
@inproceedings{Jiang16,
Author = {Nan Jiang and Lihong Li},
Booktitle = {Proceedings of the 33rd International Conference on Machine Learning},
Pages = {652--661},
Title = {Doubly Robust Off-policy Value Evaluation for Reinforcement Learning},
Year = {2016}}
@inproceedings{Thomas16,
Author = {Philip Thomas and Emma Brunskill},
Booktitle = {Proceedings of the 33rd International Conference on Machine Learning},
Pages = {2139--2148},
Title = {Data-Efficient Off-Policy Policy Evaluation for Reinforcement Learning},
Year = {2016}}
@article{Rohde2018,
title={RecoGym: A Reinforcement Learning Environment for the problem of Product Recommendation in Online Advertising},
author={Rohde, David and Bonner, Stephen and Dunlop, Travis and Vasile, Flavian and Karatzoglou, Alexandros},
journal={arXiv preprint arXiv:1808.00720},
year={2018}
}
@article{Cortes2018,
title={Adapting multi-armed bandits policies to contextual bandits scenarios},
author={Cortes, David},
journal={arXiv preprint arXiv:1811.04383},
year={2018}
}