docs/refs.bib

@inproceedings{Beygelzimer2009,
  title={The offset tree for learning with partial labels},
  author={Beygelzimer, Alina and Langford, John},
  booktitle={Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining},
  pages={129--138},
  year={2009}
}

@article{Su2019,
  title={Doubly robust off-policy evaluation with shrinkage},
  author={Su, Yi and Dimakopoulou, Maria and Krishnamurthy, Akshay and Dud{\'\i}k, Miroslav},
  journal={arXiv preprint arXiv:1907.09623},
  year={2019}
}

@article{Lefortier2016,
  title={Large-scale validation of counterfactual learning methods: A test-bed},
  author={Lefortier, Damien and Swaminathan, Adith and Gu, Xiaotao and Joachims, Thorsten and de Rijke, Maarten},
  journal={arXiv preprint arXiv:1612.00367},
  year={2016}
}

@inproceedings{Agrawal2013,
  title={Thompson sampling for contextual bandits with linear payoffs},
  author={Agrawal, Shipra and Goyal, Navin},
  booktitle={International Conference on Machine Learning},
  pages={127--135},
  year={2013}
}

@inproceedings{Mahajan2012,
  title={Logucb: an explore-exploit algorithm for comments recommendation},
  author={Mahajan, Dhruv Kumar and Rastogi, Rajeev and Tiwari, Charu and Mitra, Adway},
  booktitle={Proceedings of the 21st ACM international conference on Information and knowledge management},
  pages={6--15},
  year={2012}
}

@article{Breiman1996,
  title={Bagging predictors},
  author={Breiman, Leo},
  journal={Machine learning},
  volume={24},
  number={2},
  pages={123--140},
  year={1996},
  publisher={Springer}
}

@article{hu2020open,
  title={Open Graph Benchmark: Datasets for Machine Learning on Graphs},
  author={Hu, Weihua and Fey, Matthias and Zitnik, Marinka and Dong, Yuxiao and Ren, Hongyu and Liu, Bowen and Catasta, Michele and Leskovec, Jure},
  journal={arXiv preprint arXiv:2005.00687},
  year={2020}
}

@inproceedings{xie2019towards,
  title={Towards Optimal Off-Policy Evaluation for Reinforcement Learning with Marginalized Importance Sampling},
  author={Xie, Tengyang and Ma, Yifei and Wang, Yu-Xiang},
  booktitle={Advances in Neural Information Processing Systems},
  pages={9665--9675},
  year={2019}
}


@article{Narita2020,
  title={Off-policy Bandit and Reinforcement Learning},
  author={Narita, Yusuke and Yasui, Shota and Yata, Kohei},
  journal={arXiv preprint arXiv:2002.08536},
  year={2020}
}

@article{Kang2007,
  title={Demystifying double robustness: A comparison of alternative strategies for estimating a population mean from incomplete data},
  author={Kang, Joseph DY and Schafer, Joseph L and others},
  journal={Statistical science},
  volume={22},
  number={4},
  pages={523--539},
  year={2007},
  publisher={Institute of Mathematical Statistics}
}

@inproceedings{Chapelle2011,
  title={An empirical evaluation of thompson sampling},
  author={Chapelle, Olivier and Li, Lihong},
  booktitle={Advances in neural information processing systems},
  pages={2249--2257},
  year={2011}
}

@inproceedings{Narita2019,
  title={Efficient counterfactual learning from bandit feedback},
  author={Narita, Yusuke and Yasui, Shota and Yata, Kohei},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={33},
  pages={4634--4641},
  year={2019}
}

@inproceedings{Kallus2019,
	Author = {Kallus, Nathan and Uehara, Masatoshi},
	Booktitle = {Advances in Neural Information Processing Systems},
	Date-Added = {2020-01-27 03:24:10 -0500},
	Date-Modified = {2020-01-27 03:24:10 -0500},
	Title = {Intrinsically efficient, stable, and bounded off-policy evaluation for reinforcement learning},
	Year = {2019}}

@inproceedings{Irpan2019OffPolicyEV,
	Author = {Alex Irpan and Kanishka Rao and Konstantinos Bousmalis and Chris Harris and Julian Ibarz and Sergey Levine},
	Booktitle = {Advances in Neural Information Processing Systems},
	Title = {Off-Policy Evaluation via Off-Policy Classification},
	Year = {2019}}

@inproceedings{munos2016safe,
	Author = {Munos, Remi and Stepleton, Tom and Harutyunyan, Anna and Bellemare, Marc},
	Booktitle = {Advances in Neural Information Processing Systems},
	Pages = {1054--1062},
	Title = {Safe and Efficient Off-Policy Reinforcement Learning},
	Year = {2016}}

@inproceedings{Precup2000,
	Author = {Doina Precup and Richard S. Sutton and Satinder Singh},
	Journal = {ICML},
	Booktitle = {Proceedings of the 17th International Conference on Machine Learning},
	Pages = {759-766},
	Title = {{Eligibility Traces for Off-Policy Policy Evaluation}},
	Year = {2000}}

@inproceedings{precup2001temporal,
	Author = {Doina Precup and Richard S. Sutton and Sanjoy Dasgupta},
	Journal = {ICML},
	Booktitle = {Proceedings of the 18th International Conference on Machine Learning},
	Pages = {417-424},
	Title = {{Off-Policy Temporal-Difference Learning with Function Approximation}},
	Year = {2001}}

@article{jiang2015doubly,
	Author = {Jiang, Nan and Li, Lihong},
	Journal = {arXiv preprint arXiv:1511.03722},
	Title = {Doubly robust off-policy value evaluation for reinforcement learning},
	Year = {2015}}

@inproceedings{ke2017lightgbm,
	Author = {Ke, Guolin and Meng, Qi and Finley, Thomas and Wang, Taifeng and Chen, Wei and Ma, Weidong and Ye, Qiwei and Liu, Tie-Yan},
	Booktitle = {Advances in Neural Information Processing Systems},
	Pages = {3146--3154},
	Title = {Lightgbm: A highly efficient gradient boosting decision tree},
	Year = {2017}}

@inproceedings{Swaminathan2015b,
	Acmid = {2969600},
	Author = {Swaminathan, Adith and Joachims, Thorsten},
	Booktitle = {Advances in Neural Information Processing Systems},
	Numpages = {9},
	Pages = {3231--3239},
	Title = {{The Self-normalized Estimator for Counterfactual Learning}},
	Year = {2015},
	Bdsk-Url-1 = {http://dl.acm.org/citation.cfm?id=2969442.2969600}}


@inproceedings{Wang2016,
	Author = {Wang, Yu-Xiang and Agarwal, Alekh and Dudik, Miroslav},
	Date-Added = {2019-12-29 13:23:35 -0500},
	Date-Modified = {2019-12-29 13:23:35 -0500},
	Journal = {ICML},
	Booktitle = {Proceedings of the 34th International Conference on Machine Learning},
	Pages = {3589-3597},
	Title = {{Optimal and Adaptive Off-policy Evaluation in Contextual Bandits}},
	Year = {2017}}

@inproceedings{swaminathan2017off,
	Author = {Swaminathan, Adith and Krishnamurthy, Akshay and Agarwal, Alekh and Dudik, Miro and Langford, John and Jose, Damien and Zitouni, Imed},
	Booktitle = {Advances in Neural Information Processing Systems},
	Date-Added = {2019-12-29 13:23:35 -0500},
	Date-Modified = {2019-12-29 13:23:35 -0500},
	Journal = {NIPS},
	Pages = {3635--3645},
	Title = {{Off-policy Evaluation for Slate Recommendation}},
	Year = {2017}}

@inproceedings{Strehl2010,
	Author = {Alex Strehl and Langford, John and Li, Lihong and Kakade, Sham M},
	Booktitle = {Advances in Neural Information Processing Systems},
	Date-Added = {2019-12-29 13:23:35 -0500},
	Date-Modified = {2019-12-29 13:23:35 -0500},
	Journal = {NIPS},
	Pages = {2217--2225},
	Title = {{Learning from Logged Implicit Exploration Data}},
	Year = {2010},
	Bdsk-Url-1 = {http://papers.nips.cc/paper/3977-learning-from-logged-implicit-exploration-data.pdf}}


@inproceedings{Li2012,
	Author = {Li, Lihong and Chu, Wei and Langford, John and Moon, Taesup and Wang, Xuanhui},
	Booktitle = {Journal of Machine Learning Research: Workshop and Conference Proceedings},
	Date-Added = {2019-12-29 13:23:35 -0500},
	Date-Modified = {2019-12-29 13:23:35 -0500},
	Journal = {Journal of Machine Learning Research: Workshop and Conference Proceedings},
	Numpages = {18},
	Pages = {19-36},
	Title = {{An Unbiased Offline Evaluation of Contextual Bandit Algorithms with Generalized Linear Models}},
	Volume = {26},
	Year = {2012}}


@article{horvitz1952generalization,
	Author = {Horvitz, Daniel G and Thompson, Donovan J},
	Date-Added = {2019-12-29 13:23:35 -0500},
	Date-Modified = {2019-12-29 13:23:35 -0500},
	Journal = {Journal of the American Statistical Association},
	Number = {260},
	Pages = {663--685},
	Publisher = {Taylor \& Francis Group},
	Title = {{A Generalization of Sampling Without Replacement from a Finite Universe}},
	Volume = {47},
	Year = {1952}}

@inproceedings{Li2010,
	Author = {Li, Lihong and Chu, Wei and Langford, John and Schapire, Robert E},
	Booktitle = {Proceedings of the 19th International Conference on World Wide Web},
	Date-Added = {2019-12-29 13:23:35 -0500},
	Date-Modified = {2019-12-29 13:23:35 -0500},
	Journal = {WWW},
	Organization = {ACM},
	Pages = {661--670},
	Title = {{A Contextual-bandit Approach to Personalized News Article Recommendation}},
	Year = {2010}}

@inproceedings{Farajtabar2018,
	Author = {Mehrdad Farajtabar and Yinlam Chow and Mohammad Ghavamzadeh},
	Booktitle = {Proceedings of the 35th International Conference on Machine Learning},
	Title = {More Robust Doubly Robust Off-policy Evaluation},
	Pages = {1447--1456},
	Year = {2018}}

@article{newey2018crossfitting,
	Author = {Newey, Whitney K. and Robins, James M.},
	Journal = {Arxiv},
	Title = {Cross-Fitting and Fast Remainder Rates for Semiparametric Estimation},
	Year = {2018}}

@article{Dudik2014,
	Author = {Dud{\'\i}k, Miroslav and Erhan, Dumitru and Langford, John and Li, Lihong},
	Journal = {Statistical Science},
	Pages = {485-511},
	Title = {{Doubly Robust Policy Evaluation and Optimization}},
	Volume = {29},
	Year = {2014}}

@article{Chernozhukov2018,
	Author = {Chernozhukov, Victor and Chetverikov, Denis and Demirer, Mert and Duflo, Esther and Hansen, Christian and Newey, Whitney and Robins, James},
	Journal = {The Econometrics Journal},
	Number = {1},
	Pages = {C1-C68},
	Title = {Double/debiased machine learning for treatment and structural parameters},
	Volume = {21},
	Year = {2018}}

@inproceedings{Jiang16,
	Author = {Nan Jiang and Lihong Li},
	Booktitle = {Proceedings of the 33rd International Conference on Machine Learning},
	Pages = {652--661},
	Title = {Doubly Robust Off-policy Value Evaluation for Reinforcement Learning},
	Year = {2016}}

@inproceedings{Thomas16,
	Author = {Philip Thomas and Emma Brunskill},
	Booktitle = {Proceedings of the 33rd International Conference on Machine Learning},
	Pages = {2139--2148},
	Title = {Data-Efficient Off-Policy Policy Evaluation for Reinforcement Learning},
	Year = {2016}}


@article{Rohde2018,
  title={RecoGym: A Reinforcement Learning Environment for the problem of Product Recommendation in Online Advertising},
  author={Rohde, David and Bonner, Stephen and Dunlop, Travis and Vasile, Flavian and Karatzoglou, Alexandros},
  journal={arXiv preprint arXiv:1808.00720},
  year={2018}
}


@article{Cortes2018,
  title={Adapting multi-armed bandits policies to contextual bandits scenarios},
  author={Cortes, David},
  journal={arXiv preprint arXiv:1811.04383},
  year={2018}
}