@techreport{KanginPugeaultTech2018a,
author = {Kangin, Dmitry and Pugeault, Nicolas},
title = {On-Policy Trust Region Policy Optimisation with Replay Buffers},
publisher={arXiv},
institution={University of Exeter},
year = {2018},
url = {http://hdl.handle.net/10871/35684}
}