From b18632910339f788e61eb1e950ff72d132c7cab9 Mon Sep 17 00:00:00 2001 From: Szymon Sidor Date: Sat, 14 Nov 2015 18:28:44 -0500 Subject: [PATCH] Cleaned up the repo --- notebooks/Goofiness.ipynb | 296 ++++++ MLP.ipynb => notebooks/MLP.ipynb | 0 .../XOR Network.ipynb | 0 demo.ipynb => notebooks/karpathy_game.ipynb | 983 +++++++----------- notebooks/my_sim/0.svg | 164 +++ notebooks/my_sim/1.svg | 164 +++ notebooks/my_sim/10.svg | 164 +++ notebooks/my_sim/100.svg | 164 +++ notebooks/my_sim/101.svg | 164 +++ notebooks/my_sim/102.svg | 164 +++ notebooks/my_sim/103.svg | 164 +++ notebooks/my_sim/104.svg | 164 +++ notebooks/my_sim/105.svg | 164 +++ notebooks/my_sim/106.svg | 164 +++ notebooks/my_sim/107.svg | 164 +++ notebooks/my_sim/108.svg | 164 +++ notebooks/my_sim/109.svg | 164 +++ notebooks/my_sim/11.svg | 164 +++ notebooks/my_sim/110.svg | 164 +++ notebooks/my_sim/111.svg | 164 +++ notebooks/my_sim/112.svg | 164 +++ notebooks/my_sim/113.svg | 164 +++ notebooks/my_sim/114.svg | 164 +++ notebooks/my_sim/115.svg | 164 +++ notebooks/my_sim/116.svg | 164 +++ notebooks/my_sim/117.svg | 164 +++ notebooks/my_sim/118.svg | 164 +++ notebooks/my_sim/119.svg | 164 +++ notebooks/my_sim/12.svg | 164 +++ notebooks/my_sim/120.svg | 164 +++ notebooks/my_sim/121.svg | 164 +++ notebooks/my_sim/122.svg | 164 +++ notebooks/my_sim/123.svg | 164 +++ notebooks/my_sim/124.svg | 164 +++ notebooks/my_sim/125.svg | 164 +++ notebooks/my_sim/126.svg | 164 +++ notebooks/my_sim/127.svg | 164 +++ notebooks/my_sim/128.svg | 164 +++ notebooks/my_sim/13.svg | 164 +++ notebooks/my_sim/14.svg | 164 +++ notebooks/my_sim/15.svg | 164 +++ notebooks/my_sim/16.svg | 164 +++ notebooks/my_sim/17.svg | 164 +++ notebooks/my_sim/18.svg | 164 +++ notebooks/my_sim/19.svg | 164 +++ notebooks/my_sim/2.svg | 164 +++ notebooks/my_sim/20.svg | 164 +++ notebooks/my_sim/21.svg | 164 +++ notebooks/my_sim/22.svg | 164 +++ notebooks/my_sim/23.svg | 164 +++ notebooks/my_sim/24.svg | 164 +++ notebooks/my_sim/25.svg | 164 +++ notebooks/my_sim/26.svg | 164 +++ notebooks/my_sim/27.svg | 164 +++ notebooks/my_sim/28.svg | 164 +++ notebooks/my_sim/29.svg | 164 +++ notebooks/my_sim/3.svg | 164 +++ notebooks/my_sim/30.svg | 164 +++ notebooks/my_sim/31.svg | 164 +++ notebooks/my_sim/32.svg | 164 +++ notebooks/my_sim/33.svg | 164 +++ notebooks/my_sim/34.svg | 164 +++ notebooks/my_sim/35.svg | 164 +++ notebooks/my_sim/36.svg | 164 +++ notebooks/my_sim/37.svg | 164 +++ notebooks/my_sim/38.svg | 164 +++ notebooks/my_sim/39.svg | 164 +++ notebooks/my_sim/4.svg | 164 +++ notebooks/my_sim/40.svg | 164 +++ notebooks/my_sim/41.svg | 164 +++ notebooks/my_sim/42.svg | 164 +++ notebooks/my_sim/43.svg | 164 +++ notebooks/my_sim/44.svg | 164 +++ notebooks/my_sim/45.svg | 164 +++ notebooks/my_sim/46.svg | 164 +++ notebooks/my_sim/47.svg | 164 +++ notebooks/my_sim/48.svg | 164 +++ notebooks/my_sim/49.svg | 164 +++ notebooks/my_sim/5.svg | 164 +++ notebooks/my_sim/50.svg | 164 +++ notebooks/my_sim/51.svg | 164 +++ notebooks/my_sim/52.svg | 164 +++ notebooks/my_sim/53.svg | 164 +++ notebooks/my_sim/54.svg | 164 +++ notebooks/my_sim/55.svg | 164 +++ notebooks/my_sim/56.svg | 164 +++ notebooks/my_sim/57.svg | 164 +++ notebooks/my_sim/58.svg | 164 +++ notebooks/my_sim/59.svg | 164 +++ notebooks/my_sim/6.svg | 164 +++ notebooks/my_sim/60.svg | 164 +++ notebooks/my_sim/61.svg | 164 +++ notebooks/my_sim/62.svg | 164 +++ notebooks/my_sim/63.svg | 164 +++ notebooks/my_sim/64.svg | 164 +++ notebooks/my_sim/65.svg | 164 +++ notebooks/my_sim/66.svg | 164 +++ notebooks/my_sim/67.svg | 164 +++ notebooks/my_sim/68.svg | 164 +++ notebooks/my_sim/69.svg | 164 +++ notebooks/my_sim/7.svg | 164 +++ notebooks/my_sim/70.svg | 164 +++ notebooks/my_sim/71.svg | 164 +++ notebooks/my_sim/72.svg | 164 +++ notebooks/my_sim/73.svg | 164 +++ notebooks/my_sim/74.svg | 164 +++ notebooks/my_sim/75.svg | 164 +++ notebooks/my_sim/76.svg | 164 +++ notebooks/my_sim/77.svg | 164 +++ notebooks/my_sim/78.svg | 164 +++ notebooks/my_sim/79.svg | 164 +++ notebooks/my_sim/8.svg | 164 +++ notebooks/my_sim/80.svg | 164 +++ notebooks/my_sim/81.svg | 164 +++ notebooks/my_sim/82.svg | 164 +++ notebooks/my_sim/83.svg | 164 +++ notebooks/my_sim/84.svg | 164 +++ notebooks/my_sim/85.svg | 164 +++ notebooks/my_sim/86.svg | 164 +++ notebooks/my_sim/87.svg | 164 +++ notebooks/my_sim/88.svg | 164 +++ notebooks/my_sim/89.svg | 164 +++ notebooks/my_sim/9.svg | 164 +++ notebooks/my_sim/90.svg | 164 +++ notebooks/my_sim/91.svg | 164 +++ notebooks/my_sim/92.svg | 164 +++ notebooks/my_sim/93.svg | 164 +++ notebooks/my_sim/94.svg | 164 +++ notebooks/my_sim/95.svg | 164 +++ notebooks/my_sim/96.svg | 164 +++ notebooks/my_sim/97.svg | 164 +++ notebooks/my_sim/98.svg | 164 +++ notebooks/my_sim/99.svg | 164 +++ notebooks/tf_rl | 1 + make_gif.sh => scripts/make_gif.sh | 0 tf_rl/__init__.py | 1 + tf_rl/controller/__init__.py | 2 + tf_rl/controller/discrete_deepq.py | 190 ++++ .../controller/human_controller.py | 2 +- tf_models.py => tf_rl/models.py | 0 tf_rl/simulate.py | 102 ++ tf_rl/simulation/__init__.py | 1 + tf_rl/simulation/karpathy_game.py | 282 +++++ tf_rl/utils/__init__.py | 0 event_queue.py => tf_rl/utils/event_queue.py | 0 geometry.py => tf_rl/utils/geometry.py | 0 getch.py => tf_rl/utils/getch.py | 0 svg.py => tf_rl/utils/svg.py | 0 148 files changed, 22397 insertions(+), 619 deletions(-) create mode 100644 notebooks/Goofiness.ipynb rename MLP.ipynb => notebooks/MLP.ipynb (100%) rename XOR Network.ipynb => notebooks/XOR Network.ipynb (100%) rename demo.ipynb => notebooks/karpathy_game.ipynb (50%) create mode 100644 notebooks/my_sim/0.svg create mode 100644 notebooks/my_sim/1.svg create mode 100644 notebooks/my_sim/10.svg create mode 100644 notebooks/my_sim/100.svg create mode 100644 notebooks/my_sim/101.svg create mode 100644 notebooks/my_sim/102.svg create mode 100644 notebooks/my_sim/103.svg create mode 100644 notebooks/my_sim/104.svg create mode 100644 notebooks/my_sim/105.svg create mode 100644 notebooks/my_sim/106.svg create mode 100644 notebooks/my_sim/107.svg create mode 100644 notebooks/my_sim/108.svg create mode 100644 notebooks/my_sim/109.svg create mode 100644 notebooks/my_sim/11.svg create mode 100644 notebooks/my_sim/110.svg create mode 100644 notebooks/my_sim/111.svg create mode 100644 notebooks/my_sim/112.svg create mode 100644 notebooks/my_sim/113.svg create mode 100644 notebooks/my_sim/114.svg create mode 100644 notebooks/my_sim/115.svg create mode 100644 notebooks/my_sim/116.svg create mode 100644 notebooks/my_sim/117.svg create mode 100644 notebooks/my_sim/118.svg create mode 100644 notebooks/my_sim/119.svg create mode 100644 notebooks/my_sim/12.svg create mode 100644 notebooks/my_sim/120.svg create mode 100644 notebooks/my_sim/121.svg create mode 100644 notebooks/my_sim/122.svg create mode 100644 notebooks/my_sim/123.svg create mode 100644 notebooks/my_sim/124.svg create mode 100644 notebooks/my_sim/125.svg create mode 100644 notebooks/my_sim/126.svg create mode 100644 notebooks/my_sim/127.svg create mode 100644 notebooks/my_sim/128.svg create mode 100644 notebooks/my_sim/13.svg create mode 100644 notebooks/my_sim/14.svg create mode 100644 notebooks/my_sim/15.svg create mode 100644 notebooks/my_sim/16.svg create mode 100644 notebooks/my_sim/17.svg create mode 100644 notebooks/my_sim/18.svg create mode 100644 notebooks/my_sim/19.svg create mode 100644 notebooks/my_sim/2.svg create mode 100644 notebooks/my_sim/20.svg create mode 100644 notebooks/my_sim/21.svg create mode 100644 notebooks/my_sim/22.svg create mode 100644 notebooks/my_sim/23.svg create mode 100644 notebooks/my_sim/24.svg create mode 100644 notebooks/my_sim/25.svg create mode 100644 notebooks/my_sim/26.svg create mode 100644 notebooks/my_sim/27.svg create mode 100644 notebooks/my_sim/28.svg create mode 100644 notebooks/my_sim/29.svg create mode 100644 notebooks/my_sim/3.svg create mode 100644 notebooks/my_sim/30.svg create mode 100644 notebooks/my_sim/31.svg create mode 100644 notebooks/my_sim/32.svg create mode 100644 notebooks/my_sim/33.svg create mode 100644 notebooks/my_sim/34.svg create mode 100644 notebooks/my_sim/35.svg create mode 100644 notebooks/my_sim/36.svg create mode 100644 notebooks/my_sim/37.svg create mode 100644 notebooks/my_sim/38.svg create mode 100644 notebooks/my_sim/39.svg create mode 100644 notebooks/my_sim/4.svg create mode 100644 notebooks/my_sim/40.svg create mode 100644 notebooks/my_sim/41.svg create mode 100644 notebooks/my_sim/42.svg create mode 100644 notebooks/my_sim/43.svg create mode 100644 notebooks/my_sim/44.svg create mode 100644 notebooks/my_sim/45.svg create mode 100644 notebooks/my_sim/46.svg create mode 100644 notebooks/my_sim/47.svg create mode 100644 notebooks/my_sim/48.svg create mode 100644 notebooks/my_sim/49.svg create mode 100644 notebooks/my_sim/5.svg create mode 100644 notebooks/my_sim/50.svg create mode 100644 notebooks/my_sim/51.svg create mode 100644 notebooks/my_sim/52.svg create mode 100644 notebooks/my_sim/53.svg create mode 100644 notebooks/my_sim/54.svg create mode 100644 notebooks/my_sim/55.svg create mode 100644 notebooks/my_sim/56.svg create mode 100644 notebooks/my_sim/57.svg create mode 100644 notebooks/my_sim/58.svg create mode 100644 notebooks/my_sim/59.svg create mode 100644 notebooks/my_sim/6.svg create mode 100644 notebooks/my_sim/60.svg create mode 100644 notebooks/my_sim/61.svg create mode 100644 notebooks/my_sim/62.svg create mode 100644 notebooks/my_sim/63.svg create mode 100644 notebooks/my_sim/64.svg create mode 100644 notebooks/my_sim/65.svg create mode 100644 notebooks/my_sim/66.svg create mode 100644 notebooks/my_sim/67.svg create mode 100644 notebooks/my_sim/68.svg create mode 100644 notebooks/my_sim/69.svg create mode 100644 notebooks/my_sim/7.svg create mode 100644 notebooks/my_sim/70.svg create mode 100644 notebooks/my_sim/71.svg create mode 100644 notebooks/my_sim/72.svg create mode 100644 notebooks/my_sim/73.svg create mode 100644 notebooks/my_sim/74.svg create mode 100644 notebooks/my_sim/75.svg create mode 100644 notebooks/my_sim/76.svg create mode 100644 notebooks/my_sim/77.svg create mode 100644 notebooks/my_sim/78.svg create mode 100644 notebooks/my_sim/79.svg create mode 100644 notebooks/my_sim/8.svg create mode 100644 notebooks/my_sim/80.svg create mode 100644 notebooks/my_sim/81.svg create mode 100644 notebooks/my_sim/82.svg create mode 100644 notebooks/my_sim/83.svg create mode 100644 notebooks/my_sim/84.svg create mode 100644 notebooks/my_sim/85.svg create mode 100644 notebooks/my_sim/86.svg create mode 100644 notebooks/my_sim/87.svg create mode 100644 notebooks/my_sim/88.svg create mode 100644 notebooks/my_sim/89.svg create mode 100644 notebooks/my_sim/9.svg create mode 100644 notebooks/my_sim/90.svg create mode 100644 notebooks/my_sim/91.svg create mode 100644 notebooks/my_sim/92.svg create mode 100644 notebooks/my_sim/93.svg create mode 100644 notebooks/my_sim/94.svg create mode 100644 notebooks/my_sim/95.svg create mode 100644 notebooks/my_sim/96.svg create mode 100644 notebooks/my_sim/97.svg create mode 100644 notebooks/my_sim/98.svg create mode 100644 notebooks/my_sim/99.svg create mode 120000 notebooks/tf_rl rename make_gif.sh => scripts/make_gif.sh (100%) create mode 100644 tf_rl/__init__.py create mode 100644 tf_rl/controller/__init__.py create mode 100644 tf_rl/controller/discrete_deepq.py rename human_control.py => tf_rl/controller/human_controller.py (93%) rename tf_models.py => tf_rl/models.py (100%) create mode 100644 tf_rl/simulate.py create mode 100644 tf_rl/simulation/__init__.py create mode 100644 tf_rl/simulation/karpathy_game.py create mode 100644 tf_rl/utils/__init__.py rename event_queue.py => tf_rl/utils/event_queue.py (100%) rename geometry.py => tf_rl/utils/geometry.py (100%) rename getch.py => tf_rl/utils/getch.py (100%) rename svg.py => tf_rl/utils/svg.py (100%) diff --git a/notebooks/Goofiness.ipynb b/notebooks/Goofiness.ipynb new file mode 100644 index 00000000..989978b4 --- /dev/null +++ b/notebooks/Goofiness.ipynb @@ -0,0 +1,296 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "t = tf.random_normal((4,3))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Exception AssertionError: AssertionError() in > ignored\n" + ] + } + ], + "source": [ + "sess = tf.InteractiveSession()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[array([[ 0.72746545, -0.68051934, 0.21055566],\n", + " [ 0.85468984, -1.73917162, -1.21599209],\n", + " [ 0.58213192, -0.90190017, -0.64913821],\n", + " [-0.86752349, 0.16742691, 0.84491307]], dtype=float32),\n", + " array([0, 0, 0, 2])]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sess.run([t, tf.arg_max(t, dimension=1)])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "?tf.arg_max" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "c = tf.constant(0.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[array([[ 0.34724277, 0.6608656 , 0.43116933],\n", + " [-0.48315111, 0.892003 , 0.60531151],\n", + " [ 0.17994949, 0.13526289, -0.71118593],\n", + " [ 0.2168636 , -0.0341404 , -1.04781568]], dtype=float32),\n", + " array([[ 0.17362139, 0.3304328 , 0.21558467],\n", + " [-0.24157555, 0.4460015 , 0.30265576],\n", + " [ 0.08997475, 0.06763145, -0.35559297],\n", + " [ 0.1084318 , -0.0170702 , -0.52390784]], dtype=float32)]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sess.run([t, c*t])" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[array([[ 0.46160161, -2.05513716, -2.90273142],\n", + " [-1.03117085, -0.45882538, -0.44779259],\n", + " [-0.61726028, -0.33546519, 0.38373214],\n", + " [ 0.83618456, -0.21057898, 2.06498384]], dtype=float32),\n", + " array([-4.49626684, -1.93778872, -0.56899333, 2.69058943], dtype=float32)]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sess.run([t, tf.reduce_sum(t, reduction_indices=[1,])])" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "i = tf.placeholder(tf.int32, (4,))" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Bad slice index Tensor(\"Placeholder_1:0\", shape=TensorShape([Dimension(4)]), dtype=int32) of type ", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0msess\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mt\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m{\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m/usr/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.pyc\u001b[0m in \u001b[0;36m_SliceHelper\u001b[1;34m(tensor, slice_spec)\u001b[0m\n\u001b[0;32m 127\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Ellipsis is not currently supported\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 128\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 129\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Bad slice index %s of type %s\"\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 130\u001b[0m \u001b[0msliced\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mslice\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindices\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msizes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0msqueeze_dims\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mTypeError\u001b[0m: Bad slice index Tensor(\"Placeholder_1:0\", shape=TensorShape([Dimension(4)]), dtype=int32) of type " + ] + } + ], + "source": [ + "sess.run(t[i], {i: np.array([0,1,1,1])})" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[array([[ 0.02137903, 1.23828769, 0.53914273],\n", + " [-1.06534982, -0.21909024, -1.19062781],\n", + " [-0.50089341, -1.03411007, 0.02972561],\n", + " [ 1.17602372, 0.27990618, -1.23782802]], dtype=float32),\n", + " array([ 1.23828769, -0.21909024, 0.02972561, 1.17602372], dtype=float32)]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sess.run([t, tf.reduce_max(t, 1)])" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "?tf.train.RMSPropOptimizer" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "too many values to unpack", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m4\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mValueError\u001b[0m: too many values to unpack" + ] + } + ], + "source": [ + "a,b,c = [1,2,3,4,5]" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "x = tf.train.SummaryWriter(\"logs/\", sess.graph_def)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "?x.add_summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.8" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/MLP.ipynb b/notebooks/MLP.ipynb similarity index 100% rename from MLP.ipynb rename to notebooks/MLP.ipynb diff --git a/XOR Network.ipynb b/notebooks/XOR Network.ipynb similarity index 100% rename from XOR Network.ipynb rename to notebooks/XOR Network.ipynb diff --git a/demo.ipynb b/notebooks/karpathy_game.ipynb similarity index 50% rename from demo.ipynb rename to notebooks/karpathy_game.ipynb index e27f490a..9b9d7207 100644 --- a/demo.ipynb +++ b/notebooks/karpathy_game.ipynb @@ -2,11 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 13, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", @@ -15,29 +24,26 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import random\n", - "import time\n", "import tempfile\n", + "import tensorflow as tf\n", "\n", - "from IPython.display import clear_output, display, HTML\n", - "from euclid import Circle, Point2, Vector2, LineSegment2\n", - "\n", - "import svg\n", + "from tf_rl.controller import DiscreteDeepQ\n", + "from tf_rl.simulation import KarpathyGame\n", + "from tf_rl import simulate\n", + "from tf_rl.models import MLP\n", "\n", - "from event_queue import EventQueue\n" + "from __future__ import print_function" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": { "collapsed": false }, @@ -46,604 +52,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "/tmp/tmpnbvdvZ\n" + "/tmp/tmpK4C_he\n" ] } ], "source": [ "LOG_DIR = tempfile.mkdtemp()\n", - "print LOG_DIR" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "from collections import deque\n", - "from tf_models import MLP\n", - "\n", - "class DeepQ(object):\n", - " def __init__(self, observation_size,\n", - " num_actions,\n", - " observation_to_actions,\n", - " optimizer,\n", - " session,\n", - " random_action_probability=0.05,\n", - " exploration_period=1000,\n", - " minibatch_size=32,\n", - " discount_rate=0.95,\n", - " max_experience=30000,\n", - " summary_writer=None):\n", - " \"\"\"Initialized the Deepq object.\n", - " \n", - " Based on:\n", - " https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf\n", - " \n", - " Parameters\n", - " -------\n", - " observation_size : int\n", - " length of the vector passed as observation\n", - " num_actions : int\n", - " number of actions that the model can execute\n", - " observation_to_actions: dali model\n", - " model that implements activate function\n", - " that can take in observation vector or a batch\n", - " and returns scores (of unbounded values) for each\n", - " action for each observation.\n", - " input shape: [batch_size, observation_size]\n", - " output shape: [batch_size, num_actions]\n", - " optimizer: tf.solver.*\n", - " optimizer for prediction error\n", - " session: tf.Session\n", - " session on which to execute the computation\n", - " random_action_probability: float (0 to 1)\n", - " exploration_period: int\n", - " probability of choosing a random \n", - " action (epsilon form paper) annealed linearly\n", - " from 1 to random_action_probability over\n", - " exploration_period\n", - " minibatch_size: int\n", - " number of state,action,reward,newstate\n", - " tuples considered during experience reply\n", - " dicount_rate: float (0 to 1)\n", - " how much we care about future rewards.\n", - " max_experience: int\n", - " maximum size of the reply buffer\n", - " summary_writer: tf.train.SummaryWriter\n", - " writer to log metrics\n", - " \"\"\"\n", - " # memorize arguments\n", - " self.observation_size = observation_size\n", - " self.num_actions = num_actions\n", - " \n", - " self.observation_to_actions = observation_to_actions\n", - " self.optimizer = optimizer\n", - " self.s = session\n", - " \n", - " self.random_action_probability = random_action_probability\n", - " self.exploration_period = exploration_period\n", - " self.minibatch_size = minibatch_size\n", - " self.discount_rate = tf.constant(discount_rate)\n", - " self.max_experience = max_experience\n", - " \n", - " # deepq state\n", - " self.actions_executed_so_far = 0\n", - " self.experience = deque()\n", - " \n", - " self.iteration = 0\n", - " self.summary_writer = summary_writer\n", - " \n", - " self.create_variables()\n", - " \n", - " def linear_annealing(self, n, total, p_initial, p_final):\n", - " \"\"\"Linear annealing between p_initial and p_final\n", - " over total steps - computes value at step n\"\"\"\n", - " if n >= total:\n", - " return p_final\n", - " else:\n", - " return p_initial - (n * (p_initial - p_final)) / (total)\n", - "\n", - " def create_variables(self):\n", - " # FOR REGULAR ACTION SCORE COMPUTATION\n", - " with tf.name_scope(\"observation\"):\n", - " self.observation = tf.placeholder(tf.float32, (None, self.observation_size), name=\"observation\")\n", - " self.action_scores = self.observation_to_actions(self.observation)\n", - " self.predicted_actions = tf.argmax(self.action_scores, dimension=1, name=\"predicted_actions\")\n", - " \n", - " with tf.name_scope(\"future_rewards\"):\n", - " # FOR PREDICTING TARGET FUTURE REWARDS\n", - " self.observation_mask = tf.placeholder(tf.float32, (None,), name=\"observation_mask\") \n", - " self.rewards = tf.placeholder(tf.float32, (None,), name=\"rewards\")\n", - " target_values = tf.reduce_max(self.action_scores, reduction_indices=[1,]) * self.observation_mask\n", - " self.future_rewards = self.rewards + self.discount_rate * target_values\n", - " \n", - " with tf.name_scope(\"q_value_precition\"):\n", - " # FOR PREDICTION ERROR\n", - " self.action_mask = tf.placeholder(tf.float32, (None, self.num_actions))\n", - " self.masked_action_scores = tf.reduce_sum(self.action_scores * self.action_mask, reduction_indices=[1,])\n", - " self.precomputed_future_rewards = tf.placeholder(tf.float32, (None,))\n", - " temp_diff = self.masked_action_scores - self.precomputed_future_rewards\n", - " self.prediction_error = tf.reduce_mean(tf.square(temp_diff))\n", - " self.train_op = self.optimizer.minimize(self.prediction_error)\n", - " \n", - " self.metrics = [\n", - " tf.scalar_summary(\"prediction_error\", self.prediction_error)\n", - " ]\n", - " \n", - " def action(self, observation):\n", - " \"\"\"Given observation returns the action that should be chosen using\n", - " DeepQ learning strategy. Does not backprop.\"\"\"\n", - " assert len(observation.shape) == 1, \\\n", - " \"Action is performed based on single observation.\"\n", - "\n", - " self.actions_executed_so_far += 1\n", - " exploration_p = self.linear_annealing(self.actions_executed_so_far,\n", - " self.exploration_period,\n", - " 1.0,\n", - " self.random_action_probability)\n", - " \n", - " if random.random() < exploration_p:\n", - " return random.randint(0, self.num_actions - 1)\n", - " else:\n", - " return self.s.run(self.predicted_actions, {self.observation: observation[np.newaxis,:]})[0]\n", - " \n", - " def store(self, observation, action, reward, newobservation):\n", - " \"\"\"Store experience, where starting with observation and\n", - " execution action, we arrived at the newobservation and got the\n", - " reward reward\n", - " \n", - " If newstate is None, the state/action pair is assumed to be terminal\n", - " \"\"\"\n", - " self.experience.append((observation, action, reward, newobservation))\n", - " if len(self.experience) > self.max_experience:\n", - " self.experience.popleft()\n", - " \n", - " def training_step(self):\n", - " \"\"\"Pick a self.minibatch_size exeperiences from reply buffer\n", - " and backpropage the value function.\n", - " \"\"\"\n", - " if len(self.experience) < self.minibatch_size:\n", - " return\n", - " \n", - " # sample experience. \n", - " samples = random.sample(range(len(self.experience)), self.minibatch_size)\n", - " samples = [self.experience[i] for i in samples]\n", - " \n", - " # bach states\n", - " states = np.empty((len(samples), self.observation_size))\n", - " newstates = np.empty((len(samples), self.observation_size))\n", - " action_mask = np.zeros((len(samples), self.num_actions))\n", - " \n", - " newstates_mask = np.empty((len(samples),))\n", - " rewards = np.empty((len(samples),))\n", - " \n", - " for i, (state, action, reward, newstate) in enumerate(samples):\n", - " states[i] = state\n", - " action_mask[i] = 0\n", - " action_mask[i][action] = 1\n", - " rewards[i] = reward\n", - " if newstate is not None:\n", - " newstates[i] = state\n", - " newstates_mask[i] = 1\n", - " else:\n", - " newstates[i] = 0\n", - " newstates_mask[i] = 0\n", - " \n", - "\n", - " future_rewards = self.s.run(self.future_rewards, {\n", - " self.observation: newstates,\n", - " self.observation_mask: newstates_mask,\n", - " self.rewards: rewards,\n", - " })\n", - " \n", - " res = self.s.run([self.prediction_error, self.train_op] + self.metrics, {\n", - " self.observation: states,\n", - " self.action_mask: action_mask,\n", - " self.precomputed_future_rewards: future_rewards,\n", - " })\n", - " cost, metrics = res[0], res[2:]\n", - " \n", - " if self.summary_writer is not None:\n", - " for metric in metrics:\n", - " self.summary_writer.add_summary(metric, self.iteration)\n", - " self.iteration += 1 " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import math\n", - "\n", - "from collections import defaultdict\n", - "\n", - "class GameObject(object):\n", - " def __init__(self, position, speed, obj_type, settings):\n", - " \"\"\"Esentially represents circles of different kinds, which have\n", - " position and speed.\"\"\"\n", - " self.settings = settings\n", - " self.radius = self.settings[\"object_radius\"]\n", - " \n", - " self.obj_type = obj_type\n", - " self.position = position\n", - " self.speed = speed\n", - " self.bounciness = 1.0\n", - " \n", - " def wall_collisions(self):\n", - " \"\"\"Update speed upon collision with the wall.\"\"\"\n", - " world_size = self.settings[\"world_size\"]\n", - "\n", - " for dim in range(2):\n", - " if self.position[dim] - self.radius <= 0 and self.speed[dim] < 0:\n", - " self.speed[dim] = - self.speed[dim] * self.bounciness\n", - " elif self.position[dim] + self.radius + 1 >= world_size[dim] and self.speed[dim] > 0:\n", - " self.speed[dim] = - self.speed[dim] * self.bounciness\n", - " \n", - " def move(self, dt):\n", - " \"\"\"Move as if dt seconds passed\"\"\"\n", - " self.position += dt * self.speed\n", - " self.position = Point2(*self.position)\n", - " \n", - " def step(self, dt):\n", - " \"\"\"Move and bounce of walls.\"\"\"\n", - " self.wall_collisions()\n", - " self.move(dt)\n", - " \n", - " def as_circle(self):\n", - " return Circle(self.position, float(self.radius))\n", - " \n", - " def draw(self):\n", - " \"\"\"Return svg object for this item.\"\"\"\n", - " color = self.settings[\"colors\"][self.obj_type]\n", - " return svg.Circle(self.position + Point2(10, 10), self.radius, color=color)\n", - "\n", - "class KarpathyGame(object):\n", - " def __init__(self, settings):\n", - " \"\"\"Initiallize game simulator with settings\"\"\"\n", - " self.settings = settings\n", - " self.size = self.settings[\"world_size\"]\n", - " self.walls = [LineSegment2(Point2(0,0), Point2(0,self.size[1])),\n", - " LineSegment2(Point2(0,self.size[1]), Point2(self.size[0], self.size[1])),\n", - " LineSegment2(Point2(self.size[0], self.size[1]), Point2(self.size[0], 0)),\n", - " LineSegment2(Point2(self.size[0], 0), Point2(0,0))]\n", - " \n", - " self.hero = GameObject(Point2(*self.settings[\"hero_initial_position\"]),\n", - " Vector2(*self.settings[\"hero_initial_speed\"]),\n", - " \"hero\",\n", - " self.settings)\n", - " if not self.settings[\"hero_bounces_off_walls\"]:\n", - " self.hero.bounciness = 0.0\n", - " \n", - " self.objects = []\n", - " for obj_type, number in settings[\"num_objects\"].items():\n", - " for _ in range(number):\n", - " self.spawn_object(obj_type)\n", - " \n", - " self.observation_lines = self.generate_observation_lines()\n", - " \n", - " self.object_reward = 0\n", - " self.collected_rewards = []\n", - " \n", - " # every observation_line sees one of objects or wall and\n", - " # two numbers representing speed of the object (if applicable)\n", - " self.eye_observation_size = len(self.settings[\"objects\"]) + 3\n", - " # additionally there are two numbers representing agents own speed.\n", - " self.observation_size = self.eye_observation_size * len(self.observation_lines) + 2\n", - " \n", - " self.directions = [Vector2(*d) for d in [[1,0], [0,1], [-1,0],[0,-1]]]\n", - " self.num_actions = len(self.directions)\n", - " \n", - " self.objects_eaten = defaultdict(lambda: 0)\n", - " \n", - " def perform_action(self, action_id):\n", - " \"\"\"Change speed to one of hero vectors\"\"\"\n", - " assert 0 <= action_id < self.num_actions\n", - " self.hero.speed *= 0.8\n", - " self.hero.speed += self.directions[action_id] * self.settings[\"delta_v\"]\n", - " \n", - " def spawn_object(self, obj_type):\n", - " \"\"\"Spawn object of a given type and add it to the objects array\"\"\"\n", - " radius = self.settings[\"object_radius\"]\n", - " position = np.random.uniform([radius, radius], np.array(self.size) - radius)\n", - " position = Point2(float(position[0]), float(position[1]))\n", - " max_speed = np.array(self.settings[\"maximum_speed\"])\n", - " speed = np.random.uniform(-max_speed, max_speed).astype(float)\n", - " speed = Vector2(float(speed[0]), float(speed[1]))\n", - "\n", - " self.objects.append(GameObject(position, speed, obj_type, self.settings)) \n", - " \n", - " def step(self, dt):\n", - " \"\"\"Simulate all the objects for a given ammount of time.\n", - " \n", - " Also resolve collisions with the hero\"\"\"\n", - " for obj in self.objects + [self.hero] :\n", - " obj.step(dt)\n", - " self.resolve_collisions()\n", - "\n", - " def squared_distance(self, p1, p2):\n", - " return (p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2\n", - " \n", - " def resolve_collisions(self):\n", - " \"\"\"If hero touches, hero eats. Also reward gets updated.\"\"\"\n", - " collision_distance = 2 * self.settings[\"object_radius\"]\n", - " collision_distance2 = collision_distance ** 2\n", - " to_remove = []\n", - " for obj in self.objects:\n", - " if self.squared_distance(self.hero.position, obj.position) < collision_distance2:\n", - " to_remove.append(obj)\n", - " for obj in to_remove:\n", - " self.objects.remove(obj)\n", - " self.objects_eaten[obj.obj_type] += 1\n", - " self.object_reward += self.settings[\"object_reward\"][obj.obj_type]\n", - " self.spawn_object(obj.obj_type)\n", - " \n", - " def inside_walls(self, point):\n", - " \"\"\"Check if the point is inside the walls\"\"\"\n", - " EPS = 1e-4\n", - " return (EPS <= point[0] < self.size[0] - EPS and\n", - " EPS <= point[1] < self.size[1] - EPS)\n", - " \n", - " def observe(self):\n", - " \"\"\"Return observation vector. For all the observation directions it returns representation\n", - " of the closest object to the hero - might be nothing, another object or a wall.\n", - " Representation of observation for all the directions will be concatenated.\n", - " \"\"\"\n", - " num_obj_types = len(self.settings[\"objects\"]) + 1 # and wall\n", - " max_speed_x, max_speed_y = self.settings[\"maximum_speed\"]\n", - " \n", - " observable_distance = self.settings[\"observation_line_length\"]\n", - " \n", - " relevant_objects = [obj for obj in self.objects \n", - " if obj.position.distance(self.hero.position) < observable_distance]\n", - " # objects sorted from closest to furthest\n", - " relevant_objects.sort(key=lambda x: x.position.distance(self.hero.position))\n", - " \n", - " observation = np.zeros(self.observation_size)\n", - " observation_offset = 0 \n", - " for i, observation_line in enumerate(self.observation_lines):\n", - " # shift to hero position\n", - " observation_line = LineSegment2(self.hero.position + Vector2(*observation_line.p1),\n", - " self.hero.position + Vector2(*observation_line.p2))\n", - "\n", - " observed_object = None\n", - " # if end of observation line is outside of walls, we see the wall.\n", - " if not self.inside_walls(observation_line.p2):\n", - " observed_object = \"**wall**\"\n", - " for obj in relevant_objects:\n", - " if observation_line.distance(obj.position) < self.settings[\"object_radius\"]:\n", - " observed_object = obj\n", - " break\n", - " object_type_id = None\n", - " speed_x, speed_y = 0, 0\n", - " proximity = 0\n", - " if observed_object == \"**wall**\": # wall seen \n", - " object_type_id = num_obj_types - 1\n", - " # a wall has fairly low speed...\n", - " speed_x, speed_y = 0, 0\n", - " # best candidate is intersection between\n", - " # observation_line and a wall, that's\n", - " # closest to the hero\n", - " best_candidate = None\n", - " for wall in self.walls:\n", - " candidate = observation_line.intersect(wall)\n", - " if candidate is not None:\n", - " if (best_candidate is None or \n", - " best_candidate.distance(self.hero.position) >\n", - " candidate.distance(self.hero.position)):\n", - " best_candidate = candidate\n", - " if best_candidate is None:\n", - " # assume it is due to rounding errors\n", - " # and wall is barely touching observation line\n", - " proximity = observable_distance\n", - " else:\n", - " proximity = best_candidate.distance(self.hero.position)\n", - " elif observed_object is not None: # agent seen\n", - " object_type_id = self.settings[\"objects\"].index(observed_object.obj_type)\n", - " speed_x, speed_y = tuple(observed_object.speed)\n", - " intersection_segment = obj.as_circle().intersect(observation_line)\n", - " assert intersection_segment is not None\n", - " try:\n", - " proximity = min(intersection_segment.p1.distance(self.hero.position),\n", - " intersection_segment.p2.distance(self.hero.position))\n", - " except AttributeError:\n", - " proximity = observable_distance\n", - " for object_type_idx_loop in range(num_obj_types):\n", - " observation[observation_offset + object_type_idx_loop] = 1.0\n", - " if object_type_id is not None:\n", - " observation[observation_offset + object_type_id] = proximity / observable_distance\n", - " observation[observation_offset + num_obj_types] = speed_x / max_speed_x\n", - " observation[observation_offset + num_obj_types + 1] = speed_y / max_speed_y\n", - " assert num_obj_types + 2 == self.eye_observation_size\n", - " observation_offset += self.eye_observation_size\n", - " \n", - " observation[observation_offset] = self.hero.speed[0] / max_speed_x\n", - " observation[observation_offset + 1] = self.hero.speed[1] / max_speed_y\n", - " assert observation_offset + 2 == self.observation_size\n", - " \n", - " return observation \n", - " \n", - " def distance_to_walls(self):\n", - " \"\"\"Returns distance of a hero to walls\"\"\"\n", - " res = float('inf')\n", - " for wall in self.walls:\n", - " res = min(res, self.hero.position.distance(wall))\n", - " return res - self.settings[\"object_radius\"]\n", - " \n", - " def collect_reward(self):\n", - " \"\"\"Return accumulated object eating score + current distance to walls score\"\"\"\n", - " wall_reward = self.settings[\"wall_distance_penalty\"] * \\\n", - " np.exp(-self.distance_to_walls() / self.settings[\"tolerable_distance_to_wall\"])\n", - " assert wall_reward < 1e-3, \"You are rewarding hero for being close to the wall!\"\n", - " total_reward = wall_reward + self.object_reward\n", - " self.object_reward = 0\n", - " self.collected_rewards.append(total_reward)\n", - " return total_reward\n", - " \n", - " def plot_reward(self, smoothing = 30):\n", - " \"\"\"Plot evolution of reward over time.\"\"\"\n", - " plottable = self.collected_rewards[:]\n", - " while len(plottable) > 1000:\n", - " for i in range(0, len(plottable) - 1, 2):\n", - " plottable[i//2] = (plottable[i] + plottable[i+1]) / 2\n", - " plottable = plottable[:(len(plottable) // 2)]\n", - " x = []\n", - " for i in range(smoothing, len(plottable)):\n", - " chunk = plottable[i-smoothing:i]\n", - " x.append(sum(chunk) / len(chunk))\n", - " plt.plot(list(range(len(x))), x)\n", - " \n", - " def generate_observation_lines(self):\n", - " \"\"\"Generate observation segments in settings[\"num_observation_lines\"] directions\"\"\"\n", - " result = []\n", - " start = Point2(0.0, 0.0)\n", - " end = Point2(self.settings[\"observation_line_length\"],\n", - " self.settings[\"observation_line_length\"])\n", - " for angle in np.linspace(0, 2*np.pi, self.settings[\"num_observation_lines\"], endpoint=False):\n", - " rotation = Point2(math.cos(angle), math.sin(angle))\n", - " current_start = Point2(start[0] * rotation[0], start[1] * rotation[1])\n", - " current_end = Point2(end[0] * rotation[0], end[1] * rotation[1])\n", - " result.append( LineSegment2(current_start, current_end))\n", - " return result\n", - " \n", - " def _repr_html_(self):\n", - " return self.to_html()\n", - " \n", - " def to_html(self, stats=[]):\n", - " \"\"\"Return svg representation of the simulator\"\"\"\n", - " scene = svg.Scene((self.size[0] + 20, self.size[1] + 20 + 20 * len(stats)))\n", - " scene.add(svg.Rectangle((10, 10), self.size))\n", - "\n", - " \n", - " for line in self.observation_lines:\n", - " scene.add(svg.Line(line.p1 + self.hero.position + Point2(10,10),\n", - " line.p2 + self.hero.position + Point2(10,10)))\n", - " \n", - " for obj in self.objects + [self.hero] :\n", - " scene.add(obj.draw())\n", - " \n", - " offset = self.size[1] + 15\n", - " for txt in stats: \n", - " scene.add(svg.Text((10, offset + 20), txt, 15))\n", - " offset += 20\n", - " \n", - " return scene\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from os.path import join, exists\n", - "from os import makedirs\n", - "\n", - "def simulate(game,\n", - " controller,\n", - " fps=60,\n", - " actions_per_game_second=60,\n", - " simulation_resultion=0.001,\n", - " speed=1.0,\n", - " store_every_nth=5,\n", - " train_every_nth=5,\n", - " save_path=None):\n", - " \"\"\"Start the simulation. Performs three tasks\n", - " \n", - " - visualizes simulation in iPython notebook\n", - " - advances game simulator state\n", - " - reports state to controller and chooses actions\n", - " to be performed.\n", - " \"\"\"\n", - " eq = EventQueue()\n", - " \n", - " time_between_frames = 1.0 / fps\n", - " game_time_between_actions = 1.0 / actions_per_game_second\n", - " \n", - " simulation_resultion /= speed\n", - " \n", - " vis_s = {\n", - " 'last_image': 0\n", - " }\n", - " \n", - " if save_path is not None:\n", - " if not exists(save_path):\n", - " makedirs(save_path)\n", - " \n", - " ###### VISUALIZATION\n", - " def visualize():\n", - " recent_reward = game.collected_rewards[-100:] + [0]\n", - " objects_eaten_str = ', '.join([\"%s: %s\" % (o,c) for o,c in game.objects_eaten.items()])\n", - " clear_output(wait=True)\n", - " svg_html = game.to_html([\n", - " \"DTW = %.1f\" % (game.distance_to_walls(),),\n", - " \"experience = %d\" % (len(controller.experience),),\n", - " \"reward = %.1f\" % (sum(recent_reward)/len(recent_reward),),\n", - " \"objects eaten => %s\" % (objects_eaten_str,),\n", - " ])\n", - " display(svg_html)\n", - " if save_path is not None:\n", - " img_path = join(save_path, \"%d.svg\" % (vis_s['last_image'],))\n", - " with open(img_path, \"w\") as f:\n", - " svg_html.write_svg(f)\n", - " vis_s['last_image'] += 1\n", - " \n", - " eq.schedule_recurring(visualize, time_between_frames)\n", - "\n", - " \n", - " ###### CONTROL\n", - " ctrl_s = {\n", - " 'last_observation': None,\n", - " 'last_action': None,\n", - " 'actions_so_far': 0,\n", - " }\n", - " \n", - " def control():\n", - " # sense\n", - " new_observation = game.observe()\n", - " reward = game.collect_reward()\n", - " # store last transition\n", - " ctrl_s['actions_so_far'] += 1\n", - " if ctrl_s['last_observation'] is not None and ctrl_s['actions_so_far'] % store_every_nth == 0:\n", - " controller.store(ctrl_s['last_observation'], ctrl_s['last_action'], reward, new_observation)\n", - " # act\n", - " new_action = controller.action(new_observation)\n", - " game.perform_action(new_action)\n", - " ctrl_s['last_action'] = new_action\n", - " ctrl_s['last_observation'] = new_observation\n", - " \n", - " #train\n", - " if ctrl_s['last_observation'] is not None and ctrl_s['actions_so_far'] % train_every_nth == 0:\n", - " controller.training_step()\n", - " \n", - " ##### SIMULATION\n", - " sim_s = {\n", - " 'simulated_up_to': time.time(),\n", - " 'game_time_since_last_action': 0,\n", - " }\n", - " def simulate_game():\n", - " while sim_s['simulated_up_to'] < time.time():\n", - " game.step(simulation_resultion)\n", - " sim_s['simulated_up_to'] += simulation_resultion / speed\n", - " sim_s['game_time_since_last_action'] += simulation_resultion\n", - " if sim_s['game_time_since_last_action'] > game_time_between_actions:\n", - " control()\n", - " sim_s['game_time_since_last_action'] = 0\n", - " \n", - " eq.schedule_recurring(simulate_game, time_between_frames)\n", - " \n", - " eq.run()" + "print(LOG_DIR)" ] }, { @@ -704,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "metadata": { "collapsed": false }, @@ -713,8 +128,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Exception AssertionError: AssertionError() in > ignored\n", - "Exception AssertionError: AssertionError() in > ignored\n" + "Exception AssertionError: AssertionError() in > ignored\n" ] } ], @@ -732,7 +146,7 @@ "optimizer = tf.train.RMSPropOptimizer(learning_rate= 0.0001, decay=0.9)\n", "\n", "# DeepQ object\n", - "current_controller = DeepQ(g.observation_size, g.num_actions, brain, optimizer, session,\n", + "current_controller = DiscreteDeepQ(g.observation_size, g.num_actions, brain, optimizer, session,\n", " discount_rate=0.9, exploration_period=5000, max_experience=10000, summary_writer=journalist)\n", "session.run(tf.initialize_all_variables())\n", "journalist.add_graph(session.graph_def)" @@ -753,12 +167,349 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 24, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + " DTW = 143.2\n", + "\n", + " \n", + "\n", + " \n", + "\n", + " experience = 10\n", + "\n", + " \n", + "\n", + " \n", + "\n", + " reward = 0.0\n", + "\n", + " \n", + "\n", + " \n", + "\n", + " objects eaten => friend: 1\n", + "\n", + " \n", + "\n", + " \n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "ename": "KeyboardInterrupt", "evalue": "", @@ -766,13 +517,9 @@ "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[0mstore_every_nth\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m4\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[0mtrain_every_nth\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m4\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m save_path=\"my_sim\")\n\u001b[0m", - "\u001b[1;32m\u001b[0m in \u001b[0;36msimulate\u001b[1;34m(game, controller, fps, actions_per_game_second, simulation_resultion, speed, store_every_nth, train_every_nth, save_path)\u001b[0m\n\u001b[0;32m 95\u001b[0m \u001b[0meq\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mschedule_recurring\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msimulate_game\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtime_between_frames\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 96\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 97\u001b[1;33m \u001b[0meq\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;32m/home/sidor/projects/dali-deepq/event_queue.pyc\u001b[0m in \u001b[0;36mrun\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mnow\u001b[0m \u001b[1;33m<\u001b[0m \u001b[0mevent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mts\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mevent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mts\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mnow\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 52\u001b[1;33m \u001b[0mevent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 53\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m/home/sidor/projects/dali-deepq/event_queue.pyc\u001b[0m in \u001b[0;36mrecuring_f\u001b[1;34m()\u001b[0m\n\u001b[0;32m 38\u001b[0m from now\"\"\"\n\u001b[0;32m 39\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mrecuring_f\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 40\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 41\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mschedule\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrecuring_f\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0minterval\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 42\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mschedule\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrecuring_f\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0minterval\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m\u001b[0m in \u001b[0;36mvisualize\u001b[1;34m()\u001b[0m\n\u001b[0;32m 42\u001b[0m \u001b[1;34m\"experience = %d\"\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcontroller\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexperience\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[1;34m\"reward = %.1f\"\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrecent_reward\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m/\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrecent_reward\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 44\u001b[1;33m \u001b[1;34m\"objects eaten => %s\"\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mobjects_eaten_str\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 45\u001b[0m ])\n\u001b[0;32m 46\u001b[0m \u001b[0mdisplay\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msvg_html\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m\u001b[0m in \u001b[0;36mto_html\u001b[1;34m(self, stats)\u001b[0m\n\u001b[0;32m 260\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 261\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mline\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobservation_lines\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 262\u001b[1;33m scene.add(svg.Line(line.p1 + self.hero.position + Point2(10,10),\n\u001b[0m\u001b[0;32m 263\u001b[0m line.p2 + self.hero.position + Point2(10,10)))\n\u001b[0;32m 264\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m/usr/lib/python2.7/site-packages/euclid.pyc\u001b[0m in \u001b[0;36m__add__\u001b[1;34m(self, other)\u001b[0m\n\u001b[0;32m 153\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 154\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__add__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mother\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 155\u001b[1;33m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mother\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mVector2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 156\u001b[0m \u001b[1;31m# Vector + Vector -> Vector\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 157\u001b[0m \u001b[1;31m# Vector + Point -> Point\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[0mstore_every_nth\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m4\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[0mtrain_every_nth\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m4\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m save_path=\"my_sim\")\n\u001b[0m", + "\u001b[1;32m/home/sidor/projects/dali-deepq/notebooks/tf_rl/simulate.py\u001b[0m in \u001b[0;36msimulate\u001b[1;34m(game, controller, fps, actions_per_game_second, simulation_resultion, speed, store_every_nth, train_every_nth, save_path)\u001b[0m\n\u001b[0;32m 100\u001b[0m \u001b[0meq\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mschedule_recurring\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msimulate_game\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtime_between_frames\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 101\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 102\u001b[1;33m \u001b[0meq\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m/home/sidor/projects/dali-deepq/notebooks/tf_rl/utils/event_queue.py\u001b[0m in \u001b[0;36mrun\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 49\u001b[0m \u001b[0mnow\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mnow\u001b[0m \u001b[1;33m<\u001b[0m \u001b[0mevent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mts\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 51\u001b[1;33m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mevent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mts\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mnow\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 52\u001b[0m \u001b[0mevent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } diff --git a/notebooks/my_sim/0.svg b/notebooks/my_sim/0.svg new file mode 100644 index 00000000..5aa10e0c --- /dev/null +++ b/notebooks/my_sim/0.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 190.0 + + + experience = 0 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/1.svg b/notebooks/my_sim/1.svg new file mode 100644 index 00000000..77d4a098 --- /dev/null +++ b/notebooks/my_sim/1.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 190.0 + + + experience = 0 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/10.svg b/notebooks/my_sim/10.svg new file mode 100644 index 00000000..5b2959c4 --- /dev/null +++ b/notebooks/my_sim/10.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 203.7 + + + experience = 0 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/100.svg b/notebooks/my_sim/100.svg new file mode 100644 index 00000000..5f0bc267 --- /dev/null +++ b/notebooks/my_sim/100.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 199.8 + + + experience = 8 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/101.svg b/notebooks/my_sim/101.svg new file mode 100644 index 00000000..f4a8c615 --- /dev/null +++ b/notebooks/my_sim/101.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 196.3 + + + experience = 8 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/102.svg b/notebooks/my_sim/102.svg new file mode 100644 index 00000000..bbccf92b --- /dev/null +++ b/notebooks/my_sim/102.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 190.7 + + + experience = 8 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/103.svg b/notebooks/my_sim/103.svg new file mode 100644 index 00000000..fddca061 --- /dev/null +++ b/notebooks/my_sim/103.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 187.9 + + + experience = 8 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/104.svg b/notebooks/my_sim/104.svg new file mode 100644 index 00000000..e60cb7a7 --- /dev/null +++ b/notebooks/my_sim/104.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 185.1 + + + experience = 8 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/105.svg b/notebooks/my_sim/105.svg new file mode 100644 index 00000000..f617a1e8 --- /dev/null +++ b/notebooks/my_sim/105.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 182.8 + + + experience = 8 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/106.svg b/notebooks/my_sim/106.svg new file mode 100644 index 00000000..1548544d --- /dev/null +++ b/notebooks/my_sim/106.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 178.3 + + + experience = 8 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/107.svg b/notebooks/my_sim/107.svg new file mode 100644 index 00000000..ef56bdfd --- /dev/null +++ b/notebooks/my_sim/107.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 176.0 + + + experience = 8 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/108.svg b/notebooks/my_sim/108.svg new file mode 100644 index 00000000..d8d56f06 --- /dev/null +++ b/notebooks/my_sim/108.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 172.7 + + + experience = 8 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/109.svg b/notebooks/my_sim/109.svg new file mode 100644 index 00000000..d737a4b6 --- /dev/null +++ b/notebooks/my_sim/109.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 166.1 + + + experience = 8 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/11.svg b/notebooks/my_sim/11.svg new file mode 100644 index 00000000..bd431aa9 --- /dev/null +++ b/notebooks/my_sim/11.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 206.9 + + + experience = 1 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/110.svg b/notebooks/my_sim/110.svg new file mode 100644 index 00000000..f632a648 --- /dev/null +++ b/notebooks/my_sim/110.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 162.8 + + + experience = 9 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/111.svg b/notebooks/my_sim/111.svg new file mode 100644 index 00000000..cea6513a --- /dev/null +++ b/notebooks/my_sim/111.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 160.2 + + + experience = 9 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/112.svg b/notebooks/my_sim/112.svg new file mode 100644 index 00000000..b5767d37 --- /dev/null +++ b/notebooks/my_sim/112.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 157.6 + + + experience = 9 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/113.svg b/notebooks/my_sim/113.svg new file mode 100644 index 00000000..099cc4aa --- /dev/null +++ b/notebooks/my_sim/113.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 152.3 + + + experience = 9 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/114.svg b/notebooks/my_sim/114.svg new file mode 100644 index 00000000..794ce738 --- /dev/null +++ b/notebooks/my_sim/114.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 151.7 + + + experience = 9 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/115.svg b/notebooks/my_sim/115.svg new file mode 100644 index 00000000..a450cbcf --- /dev/null +++ b/notebooks/my_sim/115.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 151.0 + + + experience = 9 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/116.svg b/notebooks/my_sim/116.svg new file mode 100644 index 00000000..680acb0c --- /dev/null +++ b/notebooks/my_sim/116.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 150.4 + + + experience = 9 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/117.svg b/notebooks/my_sim/117.svg new file mode 100644 index 00000000..320b888b --- /dev/null +++ b/notebooks/my_sim/117.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 149.3 + + + experience = 9 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/118.svg b/notebooks/my_sim/118.svg new file mode 100644 index 00000000..13a71331 --- /dev/null +++ b/notebooks/my_sim/118.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 148.8 + + + experience = 9 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/119.svg b/notebooks/my_sim/119.svg new file mode 100644 index 00000000..d0cd9998 --- /dev/null +++ b/notebooks/my_sim/119.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 148.3 + + + experience = 9 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/12.svg b/notebooks/my_sim/12.svg new file mode 100644 index 00000000..802c920c --- /dev/null +++ b/notebooks/my_sim/12.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 209.2 + + + experience = 1 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/120.svg b/notebooks/my_sim/120.svg new file mode 100644 index 00000000..327d8a03 --- /dev/null +++ b/notebooks/my_sim/120.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 147.9 + + + experience = 9 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/121.svg b/notebooks/my_sim/121.svg new file mode 100644 index 00000000..2599ff0f --- /dev/null +++ b/notebooks/my_sim/121.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 144.1 + + + experience = 9 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/122.svg b/notebooks/my_sim/122.svg new file mode 100644 index 00000000..269b213e --- /dev/null +++ b/notebooks/my_sim/122.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 142.2 + + + experience = 9 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/123.svg b/notebooks/my_sim/123.svg new file mode 100644 index 00000000..904cf21f --- /dev/null +++ b/notebooks/my_sim/123.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 140.3 + + + experience = 10 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/124.svg b/notebooks/my_sim/124.svg new file mode 100644 index 00000000..70842b57 --- /dev/null +++ b/notebooks/my_sim/124.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 140.3 + + + experience = 10 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/125.svg b/notebooks/my_sim/125.svg new file mode 100644 index 00000000..133bebb9 --- /dev/null +++ b/notebooks/my_sim/125.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 140.2 + + + experience = 10 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/126.svg b/notebooks/my_sim/126.svg new file mode 100644 index 00000000..a344a3eb --- /dev/null +++ b/notebooks/my_sim/126.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 140.2 + + + experience = 10 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/127.svg b/notebooks/my_sim/127.svg new file mode 100644 index 00000000..8283c904 --- /dev/null +++ b/notebooks/my_sim/127.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 141.7 + + + experience = 10 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/128.svg b/notebooks/my_sim/128.svg new file mode 100644 index 00000000..7b2fd6bb --- /dev/null +++ b/notebooks/my_sim/128.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 143.2 + + + experience = 10 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/13.svg b/notebooks/my_sim/13.svg new file mode 100644 index 00000000..861fb5f4 --- /dev/null +++ b/notebooks/my_sim/13.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 211.4 + + + experience = 1 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/14.svg b/notebooks/my_sim/14.svg new file mode 100644 index 00000000..12578c76 --- /dev/null +++ b/notebooks/my_sim/14.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 215.5 + + + experience = 1 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/15.svg b/notebooks/my_sim/15.svg new file mode 100644 index 00000000..71feabfb --- /dev/null +++ b/notebooks/my_sim/15.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 217.3 + + + experience = 1 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/16.svg b/notebooks/my_sim/16.svg new file mode 100644 index 00000000..371d6463 --- /dev/null +++ b/notebooks/my_sim/16.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 219.2 + + + experience = 1 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/17.svg b/notebooks/my_sim/17.svg new file mode 100644 index 00000000..1186a224 --- /dev/null +++ b/notebooks/my_sim/17.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 222.4 + + + experience = 1 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/18.svg b/notebooks/my_sim/18.svg new file mode 100644 index 00000000..a87dce89 --- /dev/null +++ b/notebooks/my_sim/18.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 223.9 + + + experience = 1 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/19.svg b/notebooks/my_sim/19.svg new file mode 100644 index 00000000..5ed7ae57 --- /dev/null +++ b/notebooks/my_sim/19.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 225.3 + + + experience = 1 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/2.svg b/notebooks/my_sim/2.svg new file mode 100644 index 00000000..771b9f6f --- /dev/null +++ b/notebooks/my_sim/2.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 191.5 + + + experience = 0 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/20.svg b/notebooks/my_sim/20.svg new file mode 100644 index 00000000..19973b2b --- /dev/null +++ b/notebooks/my_sim/20.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 226.8 + + + experience = 1 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/21.svg b/notebooks/my_sim/21.svg new file mode 100644 index 00000000..35ae2440 --- /dev/null +++ b/notebooks/my_sim/21.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 229.1 + + + experience = 1 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/22.svg b/notebooks/my_sim/22.svg new file mode 100644 index 00000000..7f43594d --- /dev/null +++ b/notebooks/my_sim/22.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 230.3 + + + experience = 1 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/23.svg b/notebooks/my_sim/23.svg new file mode 100644 index 00000000..e4cdd423 --- /dev/null +++ b/notebooks/my_sim/23.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 231.4 + + + experience = 2 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/24.svg b/notebooks/my_sim/24.svg new file mode 100644 index 00000000..36f4362a --- /dev/null +++ b/notebooks/my_sim/24.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 230.8 + + + experience = 2 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/25.svg b/notebooks/my_sim/25.svg new file mode 100644 index 00000000..12f50189 --- /dev/null +++ b/notebooks/my_sim/25.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 229.7 + + + experience = 2 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/26.svg b/notebooks/my_sim/26.svg new file mode 100644 index 00000000..c144d725 --- /dev/null +++ b/notebooks/my_sim/26.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 229.1 + + + experience = 2 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/27.svg b/notebooks/my_sim/27.svg new file mode 100644 index 00000000..64c3ac87 --- /dev/null +++ b/notebooks/my_sim/27.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 230.2 + + + experience = 2 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/28.svg b/notebooks/my_sim/28.svg new file mode 100644 index 00000000..bd931b5d --- /dev/null +++ b/notebooks/my_sim/28.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 232.3 + + + experience = 2 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/29.svg b/notebooks/my_sim/29.svg new file mode 100644 index 00000000..5d1f8471 --- /dev/null +++ b/notebooks/my_sim/29.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 233.3 + + + experience = 2 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/3.svg b/notebooks/my_sim/3.svg new file mode 100644 index 00000000..92adceb7 --- /dev/null +++ b/notebooks/my_sim/3.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 193.0 + + + experience = 0 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/30.svg b/notebooks/my_sim/30.svg new file mode 100644 index 00000000..f5073b38 --- /dev/null +++ b/notebooks/my_sim/30.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 234.1 + + + experience = 2 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/31.svg b/notebooks/my_sim/31.svg new file mode 100644 index 00000000..3cb21b4b --- /dev/null +++ b/notebooks/my_sim/31.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 235.0 + + + experience = 2 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/32.svg b/notebooks/my_sim/32.svg new file mode 100644 index 00000000..ad65d4bf --- /dev/null +++ b/notebooks/my_sim/32.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 236.6 + + + experience = 2 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/33.svg b/notebooks/my_sim/33.svg new file mode 100644 index 00000000..639fdbdb --- /dev/null +++ b/notebooks/my_sim/33.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 238.8 + + + experience = 2 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/34.svg b/notebooks/my_sim/34.svg new file mode 100644 index 00000000..21800890 --- /dev/null +++ b/notebooks/my_sim/34.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 239.0 + + + experience = 2 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/35.svg b/notebooks/my_sim/35.svg new file mode 100644 index 00000000..a5002301 --- /dev/null +++ b/notebooks/my_sim/35.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 234.7 + + + experience = 3 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/36.svg b/notebooks/my_sim/36.svg new file mode 100644 index 00000000..e614b6e2 --- /dev/null +++ b/notebooks/my_sim/36.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 234.4 + + + experience = 3 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/37.svg b/notebooks/my_sim/37.svg new file mode 100644 index 00000000..c6cee19a --- /dev/null +++ b/notebooks/my_sim/37.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 234.2 + + + experience = 3 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/38.svg b/notebooks/my_sim/38.svg new file mode 100644 index 00000000..e2f4067b --- /dev/null +++ b/notebooks/my_sim/38.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 234.0 + + + experience = 3 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/39.svg b/notebooks/my_sim/39.svg new file mode 100644 index 00000000..34da0a35 --- /dev/null +++ b/notebooks/my_sim/39.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 232.1 + + + experience = 3 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/4.svg b/notebooks/my_sim/4.svg new file mode 100644 index 00000000..8befb20f --- /dev/null +++ b/notebooks/my_sim/4.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 196.0 + + + experience = 0 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/40.svg b/notebooks/my_sim/40.svg new file mode 100644 index 00000000..11d6d750 --- /dev/null +++ b/notebooks/my_sim/40.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 230.4 + + + experience = 3 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/41.svg b/notebooks/my_sim/41.svg new file mode 100644 index 00000000..d5a62963 --- /dev/null +++ b/notebooks/my_sim/41.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 228.7 + + + experience = 3 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/42.svg b/notebooks/my_sim/42.svg new file mode 100644 index 00000000..8eee173c --- /dev/null +++ b/notebooks/my_sim/42.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 224.1 + + + experience = 3 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/43.svg b/notebooks/my_sim/43.svg new file mode 100644 index 00000000..803ee0b8 --- /dev/null +++ b/notebooks/my_sim/43.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 221.3 + + + experience = 3 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/44.svg b/notebooks/my_sim/44.svg new file mode 100644 index 00000000..6ba059b4 --- /dev/null +++ b/notebooks/my_sim/44.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 218.4 + + + experience = 3 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/45.svg b/notebooks/my_sim/45.svg new file mode 100644 index 00000000..33e4468f --- /dev/null +++ b/notebooks/my_sim/45.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 215.6 + + + experience = 3 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/46.svg b/notebooks/my_sim/46.svg new file mode 100644 index 00000000..7bf3eaf5 --- /dev/null +++ b/notebooks/my_sim/46.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 214.0 + + + experience = 3 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/47.svg b/notebooks/my_sim/47.svg new file mode 100644 index 00000000..1bf417ee --- /dev/null +++ b/notebooks/my_sim/47.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 213.3 + + + experience = 3 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/48.svg b/notebooks/my_sim/48.svg new file mode 100644 index 00000000..856442f3 --- /dev/null +++ b/notebooks/my_sim/48.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 212.5 + + + experience = 4 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/49.svg b/notebooks/my_sim/49.svg new file mode 100644 index 00000000..360a5f91 --- /dev/null +++ b/notebooks/my_sim/49.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 211.9 + + + experience = 4 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/5.svg b/notebooks/my_sim/5.svg new file mode 100644 index 00000000..b1f12330 --- /dev/null +++ b/notebooks/my_sim/5.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 197.2 + + + experience = 0 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/50.svg b/notebooks/my_sim/50.svg new file mode 100644 index 00000000..63c0b9eb --- /dev/null +++ b/notebooks/my_sim/50.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 210.6 + + + experience = 4 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/51.svg b/notebooks/my_sim/51.svg new file mode 100644 index 00000000..9464385d --- /dev/null +++ b/notebooks/my_sim/51.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 210.0 + + + experience = 4 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/52.svg b/notebooks/my_sim/52.svg new file mode 100644 index 00000000..b71376dc --- /dev/null +++ b/notebooks/my_sim/52.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 208.0 + + + experience = 4 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/53.svg b/notebooks/my_sim/53.svg new file mode 100644 index 00000000..1862ba86 --- /dev/null +++ b/notebooks/my_sim/53.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 204.0 + + + experience = 4 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/54.svg b/notebooks/my_sim/54.svg new file mode 100644 index 00000000..6dab56ca --- /dev/null +++ b/notebooks/my_sim/54.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 202.0 + + + experience = 4 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/55.svg b/notebooks/my_sim/55.svg new file mode 100644 index 00000000..44d45b95 --- /dev/null +++ b/notebooks/my_sim/55.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 200.4 + + + experience = 4 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/56.svg b/notebooks/my_sim/56.svg new file mode 100644 index 00000000..f5d97f13 --- /dev/null +++ b/notebooks/my_sim/56.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 198.8 + + + experience = 4 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/57.svg b/notebooks/my_sim/57.svg new file mode 100644 index 00000000..fe5ac9d2 --- /dev/null +++ b/notebooks/my_sim/57.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 195.6 + + + experience = 4 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/58.svg b/notebooks/my_sim/58.svg new file mode 100644 index 00000000..c7ffbefa --- /dev/null +++ b/notebooks/my_sim/58.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 194.3 + + + experience = 4 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/59.svg b/notebooks/my_sim/59.svg new file mode 100644 index 00000000..cbe436e1 --- /dev/null +++ b/notebooks/my_sim/59.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 193.0 + + + experience = 4 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/6.svg b/notebooks/my_sim/6.svg new file mode 100644 index 00000000..692e7c65 --- /dev/null +++ b/notebooks/my_sim/6.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 198.4 + + + experience = 0 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/60.svg b/notebooks/my_sim/60.svg new file mode 100644 index 00000000..2c2f9882 --- /dev/null +++ b/notebooks/my_sim/60.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 190.5 + + + experience = 5 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/61.svg b/notebooks/my_sim/61.svg new file mode 100644 index 00000000..159dfc87 --- /dev/null +++ b/notebooks/my_sim/61.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 190.9 + + + experience = 5 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/62.svg b/notebooks/my_sim/62.svg new file mode 100644 index 00000000..55fcdd45 --- /dev/null +++ b/notebooks/my_sim/62.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 191.4 + + + experience = 5 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/63.svg b/notebooks/my_sim/63.svg new file mode 100644 index 00000000..841b48a2 --- /dev/null +++ b/notebooks/my_sim/63.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 191.9 + + + experience = 5 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/64.svg b/notebooks/my_sim/64.svg new file mode 100644 index 00000000..a468fff1 --- /dev/null +++ b/notebooks/my_sim/64.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 194.3 + + + experience = 5 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/65.svg b/notebooks/my_sim/65.svg new file mode 100644 index 00000000..a03ab53a --- /dev/null +++ b/notebooks/my_sim/65.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 196.1 + + + experience = 5 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/66.svg b/notebooks/my_sim/66.svg new file mode 100644 index 00000000..4c9aecab --- /dev/null +++ b/notebooks/my_sim/66.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 198.0 + + + experience = 5 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/67.svg b/notebooks/my_sim/67.svg new file mode 100644 index 00000000..09550a84 --- /dev/null +++ b/notebooks/my_sim/67.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 202.9 + + + experience = 5 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/68.svg b/notebooks/my_sim/68.svg new file mode 100644 index 00000000..cab460f0 --- /dev/null +++ b/notebooks/my_sim/68.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 205.9 + + + experience = 5 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/69.svg b/notebooks/my_sim/69.svg new file mode 100644 index 00000000..f64245c4 --- /dev/null +++ b/notebooks/my_sim/69.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 208.9 + + + experience = 5 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/7.svg b/notebooks/my_sim/7.svg new file mode 100644 index 00000000..e66a60df --- /dev/null +++ b/notebooks/my_sim/7.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 200.8 + + + experience = 0 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/70.svg b/notebooks/my_sim/70.svg new file mode 100644 index 00000000..f2a51a62 --- /dev/null +++ b/notebooks/my_sim/70.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 214.3 + + + experience = 5 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/71.svg b/notebooks/my_sim/71.svg new file mode 100644 index 00000000..9f2adbd9 --- /dev/null +++ b/notebooks/my_sim/71.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 216.7 + + + experience = 5 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/72.svg b/notebooks/my_sim/72.svg new file mode 100644 index 00000000..2a4b1ef8 --- /dev/null +++ b/notebooks/my_sim/72.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 219.1 + + + experience = 5 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/73.svg b/notebooks/my_sim/73.svg new file mode 100644 index 00000000..831aff0d --- /dev/null +++ b/notebooks/my_sim/73.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 221.5 + + + experience = 6 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/74.svg b/notebooks/my_sim/74.svg new file mode 100644 index 00000000..cd8b68ec --- /dev/null +++ b/notebooks/my_sim/74.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 225.4 + + + experience = 6 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/75.svg b/notebooks/my_sim/75.svg new file mode 100644 index 00000000..1f34dbc8 --- /dev/null +++ b/notebooks/my_sim/75.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 227.3 + + + experience = 6 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/76.svg b/notebooks/my_sim/76.svg new file mode 100644 index 00000000..07f2a0d5 --- /dev/null +++ b/notebooks/my_sim/76.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 229.2 + + + experience = 6 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/77.svg b/notebooks/my_sim/77.svg new file mode 100644 index 00000000..dd67d7f2 --- /dev/null +++ b/notebooks/my_sim/77.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 230.8 + + + experience = 6 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/78.svg b/notebooks/my_sim/78.svg new file mode 100644 index 00000000..82047970 --- /dev/null +++ b/notebooks/my_sim/78.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 233.8 + + + experience = 6 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/79.svg b/notebooks/my_sim/79.svg new file mode 100644 index 00000000..3968668e --- /dev/null +++ b/notebooks/my_sim/79.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 235.4 + + + experience = 6 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/8.svg b/notebooks/my_sim/8.svg new file mode 100644 index 00000000..857390f5 --- /dev/null +++ b/notebooks/my_sim/8.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 201.8 + + + experience = 0 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/80.svg b/notebooks/my_sim/80.svg new file mode 100644 index 00000000..bfc9e455 --- /dev/null +++ b/notebooks/my_sim/80.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 236.6 + + + experience = 6 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/81.svg b/notebooks/my_sim/81.svg new file mode 100644 index 00000000..e31c1ca2 --- /dev/null +++ b/notebooks/my_sim/81.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 239.1 + + + experience = 6 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/82.svg b/notebooks/my_sim/82.svg new file mode 100644 index 00000000..c957e4ee --- /dev/null +++ b/notebooks/my_sim/82.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 239.7 + + + experience = 6 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/83.svg b/notebooks/my_sim/83.svg new file mode 100644 index 00000000..031bf347 --- /dev/null +++ b/notebooks/my_sim/83.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 238.7 + + + experience = 6 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/84.svg b/notebooks/my_sim/84.svg new file mode 100644 index 00000000..5e0f8cf5 --- /dev/null +++ b/notebooks/my_sim/84.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 237.7 + + + experience = 6 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/85.svg b/notebooks/my_sim/85.svg new file mode 100644 index 00000000..d9481261 --- /dev/null +++ b/notebooks/my_sim/85.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 235.8 + + + experience = 7 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/86.svg b/notebooks/my_sim/86.svg new file mode 100644 index 00000000..daf07dd4 --- /dev/null +++ b/notebooks/my_sim/86.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 236.5 + + + experience = 7 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/87.svg b/notebooks/my_sim/87.svg new file mode 100644 index 00000000..5c4d1c2c --- /dev/null +++ b/notebooks/my_sim/87.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 237.2 + + + experience = 7 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/88.svg b/notebooks/my_sim/88.svg new file mode 100644 index 00000000..d9114eac --- /dev/null +++ b/notebooks/my_sim/88.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 238.6 + + + experience = 7 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/89.svg b/notebooks/my_sim/89.svg new file mode 100644 index 00000000..f98750fa --- /dev/null +++ b/notebooks/my_sim/89.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 239.3 + + + experience = 7 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/9.svg b/notebooks/my_sim/9.svg new file mode 100644 index 00000000..87f1fe49 --- /dev/null +++ b/notebooks/my_sim/9.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 202.7 + + + experience = 0 + + + reward = 0.0 + + + objects eaten => + + + diff --git a/notebooks/my_sim/90.svg b/notebooks/my_sim/90.svg new file mode 100644 index 00000000..58ad80ae --- /dev/null +++ b/notebooks/my_sim/90.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 237.3 + + + experience = 7 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/91.svg b/notebooks/my_sim/91.svg new file mode 100644 index 00000000..0ec5097e --- /dev/null +++ b/notebooks/my_sim/91.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 235.2 + + + experience = 7 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/92.svg b/notebooks/my_sim/92.svg new file mode 100644 index 00000000..237417b4 --- /dev/null +++ b/notebooks/my_sim/92.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 230.0 + + + experience = 7 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/93.svg b/notebooks/my_sim/93.svg new file mode 100644 index 00000000..845f492e --- /dev/null +++ b/notebooks/my_sim/93.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 226.8 + + + experience = 7 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/94.svg b/notebooks/my_sim/94.svg new file mode 100644 index 00000000..207d7c2c --- /dev/null +++ b/notebooks/my_sim/94.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 223.6 + + + experience = 7 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/95.svg b/notebooks/my_sim/95.svg new file mode 100644 index 00000000..0bf663a6 --- /dev/null +++ b/notebooks/my_sim/95.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 218.0 + + + experience = 7 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/96.svg b/notebooks/my_sim/96.svg new file mode 100644 index 00000000..095914dd --- /dev/null +++ b/notebooks/my_sim/96.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 215.4 + + + experience = 7 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/97.svg b/notebooks/my_sim/97.svg new file mode 100644 index 00000000..28132325 --- /dev/null +++ b/notebooks/my_sim/97.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 212.9 + + + experience = 7 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/98.svg b/notebooks/my_sim/98.svg new file mode 100644 index 00000000..291e98f7 --- /dev/null +++ b/notebooks/my_sim/98.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 206.9 + + + experience = 8 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/my_sim/99.svg b/notebooks/my_sim/99.svg new file mode 100644 index 00000000..8383915b --- /dev/null +++ b/notebooks/my_sim/99.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DTW = 203.4 + + + experience = 8 + + + reward = 0.0 + + + objects eaten => friend: 1 + + + diff --git a/notebooks/tf_rl b/notebooks/tf_rl new file mode 120000 index 00000000..60819747 --- /dev/null +++ b/notebooks/tf_rl @@ -0,0 +1 @@ +../tf_rl \ No newline at end of file diff --git a/make_gif.sh b/scripts/make_gif.sh similarity index 100% rename from make_gif.sh rename to scripts/make_gif.sh diff --git a/tf_rl/__init__.py b/tf_rl/__init__.py new file mode 100644 index 00000000..f7700f1a --- /dev/null +++ b/tf_rl/__init__.py @@ -0,0 +1 @@ +from .simulate import simulate diff --git a/tf_rl/controller/__init__.py b/tf_rl/controller/__init__.py new file mode 100644 index 00000000..55018e02 --- /dev/null +++ b/tf_rl/controller/__init__.py @@ -0,0 +1,2 @@ +from .discrete_deepq import DiscreteDeepQ +from .human_controller import HumanController diff --git a/tf_rl/controller/discrete_deepq.py b/tf_rl/controller/discrete_deepq.py new file mode 100644 index 00000000..4148dfbe --- /dev/null +++ b/tf_rl/controller/discrete_deepq.py @@ -0,0 +1,190 @@ +import tensorflow as tf +import random + +from collections import deque + +class DiscreteDeepQ(object): + def __init__(self, observation_size, + num_actions, + observation_to_actions, + optimizer, + session, + random_action_probability=0.05, + exploration_period=1000, + minibatch_size=32, + discount_rate=0.95, + max_experience=30000, + summary_writer=None): + """Initialized the Deepq object. + + Based on: + https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf + + Parameters + ------- + observation_size : int + length of the vector passed as observation + num_actions : int + number of actions that the model can execute + observation_to_actions: dali model + model that implements activate function + that can take in observation vector or a batch + and returns scores (of unbounded values) for each + action for each observation. + input shape: [batch_size, observation_size] + output shape: [batch_size, num_actions] + optimizer: tf.solver.* + optimizer for prediction error + session: tf.Session + session on which to execute the computation + random_action_probability: float (0 to 1) + exploration_period: int + probability of choosing a random + action (epsilon form paper) annealed linearly + from 1 to random_action_probability over + exploration_period + minibatch_size: int + number of state,action,reward,newstate + tuples considered during experience reply + dicount_rate: float (0 to 1) + how much we care about future rewards. + max_experience: int + maximum size of the reply buffer + summary_writer: tf.train.SummaryWriter + writer to log metrics + """ + # memorize arguments + self.observation_size = observation_size + self.num_actions = num_actions + + self.observation_to_actions = observation_to_actions + self.optimizer = optimizer + self.s = session + + self.random_action_probability = random_action_probability + self.exploration_period = exploration_period + self.minibatch_size = minibatch_size + self.discount_rate = tf.constant(discount_rate) + self.max_experience = max_experience + + # deepq state + self.actions_executed_so_far = 0 + self.experience = deque() + + self.iteration = 0 + self.summary_writer = summary_writer + + self.create_variables() + + def linear_annealing(self, n, total, p_initial, p_final): + """Linear annealing between p_initial and p_final + over total steps - computes value at step n""" + if n >= total: + return p_final + else: + return p_initial - (n * (p_initial - p_final)) / (total) + + def create_variables(self): + # FOR REGULAR ACTION SCORE COMPUTATION + with tf.name_scope("observation"): + self.observation = tf.placeholder(tf.float32, (None, self.observation_size), name="observation") + self.action_scores = self.observation_to_actions(self.observation) + self.predicted_actions = tf.argmax(self.action_scores, dimension=1, name="predicted_actions") + + with tf.name_scope("future_rewards"): + # FOR PREDICTING TARGET FUTURE REWARDS + self.observation_mask = tf.placeholder(tf.float32, (None,), name="observation_mask") + self.rewards = tf.placeholder(tf.float32, (None,), name="rewards") + target_values = tf.reduce_max(self.action_scores, reduction_indices=[1,]) * self.observation_mask + self.future_rewards = self.rewards + self.discount_rate * target_values + + with tf.name_scope("q_value_precition"): + # FOR PREDICTION ERROR + self.action_mask = tf.placeholder(tf.float32, (None, self.num_actions)) + self.masked_action_scores = tf.reduce_sum(self.action_scores * self.action_mask, reduction_indices=[1,]) + self.precomputed_future_rewards = tf.placeholder(tf.float32, (None,)) + temp_diff = self.masked_action_scores - self.precomputed_future_rewards + self.prediction_error = tf.reduce_mean(tf.square(temp_diff)) + self.train_op = self.optimizer.minimize(self.prediction_error) + + self.metrics = [ + tf.scalar_summary("prediction_error", self.prediction_error) + ] + + def action(self, observation): + """Given observation returns the action that should be chosen using + DeepQ learning strategy. Does not backprop.""" + assert len(observation.shape) == 1, \ + "Action is performed based on single observation." + + self.actions_executed_so_far += 1 + exploration_p = self.linear_annealing(self.actions_executed_so_far, + self.exploration_period, + 1.0, + self.random_action_probability) + + if random.random() < exploration_p: + return random.randint(0, self.num_actions - 1) + else: + return self.s.run(self.predicted_actions, {self.observation: observation[np.newaxis,:]})[0] + + def store(self, observation, action, reward, newobservation): + """Store experience, where starting with observation and + execution action, we arrived at the newobservation and got the + reward reward + + If newstate is None, the state/action pair is assumed to be terminal + """ + self.experience.append((observation, action, reward, newobservation)) + if len(self.experience) > self.max_experience: + self.experience.popleft() + + def training_step(self): + """Pick a self.minibatch_size exeperiences from reply buffer + and backpropage the value function. + """ + if len(self.experience) < self.minibatch_size: + return + + # sample experience. + samples = random.sample(range(len(self.experience)), self.minibatch_size) + samples = [self.experience[i] for i in samples] + + # bach states + states = np.empty((len(samples), self.observation_size)) + newstates = np.empty((len(samples), self.observation_size)) + action_mask = np.zeros((len(samples), self.num_actions)) + + newstates_mask = np.empty((len(samples),)) + rewards = np.empty((len(samples),)) + + for i, (state, action, reward, newstate) in enumerate(samples): + states[i] = state + action_mask[i] = 0 + action_mask[i][action] = 1 + rewards[i] = reward + if newstate is not None: + newstates[i] = state + newstates_mask[i] = 1 + else: + newstates[i] = 0 + newstates_mask[i] = 0 + + + future_rewards = self.s.run(self.future_rewards, { + self.observation: newstates, + self.observation_mask: newstates_mask, + self.rewards: rewards, + }) + + res = self.s.run([self.prediction_error, self.train_op] + self.metrics, { + self.observation: states, + self.action_mask: action_mask, + self.precomputed_future_rewards: future_rewards, + }) + cost, metrics = res[0], res[2:] + + if self.summary_writer is not None: + for metric in metrics: + self.summary_writer.add_summary(metric, self.iteration) + self.iteration += 1 diff --git a/human_control.py b/tf_rl/controller/human_controller.py similarity index 93% rename from human_control.py rename to tf_rl/controller/human_controller.py index ac92dc9d..fd752aaa 100644 --- a/human_control.py +++ b/tf_rl/controller/human_controller.py @@ -1,4 +1,4 @@ -from getch import getch +from tf_rl.utils.getch import getch from redis import StrictRedis diff --git a/tf_models.py b/tf_rl/models.py similarity index 100% rename from tf_models.py rename to tf_rl/models.py diff --git a/tf_rl/simulate.py b/tf_rl/simulate.py new file mode 100644 index 00000000..9b546ac8 --- /dev/null +++ b/tf_rl/simulate.py @@ -0,0 +1,102 @@ +import time + +from IPython.display import clear_output, display, HTML +from os.path import join, exists +from os import makedirs + +from tf_rl.utils.event_queue import EventQueue + +def simulate(game, + controller, + fps=60, + actions_per_game_second=60, + simulation_resultion=0.001, + speed=1.0, + store_every_nth=5, + train_every_nth=5, + save_path=None): + """Start the simulation. Performs three tasks + + - visualizes simulation in iPython notebook + - advances game simulator state + - reports state to controller and chooses actions + to be performed. + """ + eq = EventQueue() + + time_between_frames = 1.0 / fps + game_time_between_actions = 1.0 / actions_per_game_second + + simulation_resultion /= speed + + vis_s = { + 'last_image': 0 + } + + if save_path is not None: + if not exists(save_path): + makedirs(save_path) + + ###### VISUALIZATION + def visualize(): + recent_reward = game.collected_rewards[-100:] + [0] + objects_eaten_str = ', '.join(["%s: %s" % (o,c) for o,c in game.objects_eaten.items()]) + clear_output(wait=True) + svg_html = game.to_html([ + "DTW = %.1f" % (game.distance_to_walls(),), + "experience = %d" % (len(controller.experience),), + "reward = %.1f" % (sum(recent_reward)/len(recent_reward),), + "objects eaten => %s" % (objects_eaten_str,), + ]) + display(svg_html) + if save_path is not None: + img_path = join(save_path, "%d.svg" % (vis_s['last_image'],)) + with open(img_path, "w") as f: + svg_html.write_svg(f) + vis_s['last_image'] += 1 + + eq.schedule_recurring(visualize, time_between_frames) + + + ###### CONTROL + ctrl_s = { + 'last_observation': None, + 'last_action': None, + 'actions_so_far': 0, + } + + def control(): + # sense + new_observation = game.observe() + reward = game.collect_reward() + # store last transition + ctrl_s['actions_so_far'] += 1 + if ctrl_s['last_observation'] is not None and ctrl_s['actions_so_far'] % store_every_nth == 0: + controller.store(ctrl_s['last_observation'], ctrl_s['last_action'], reward, new_observation) + # act + new_action = controller.action(new_observation) + game.perform_action(new_action) + ctrl_s['last_action'] = new_action + ctrl_s['last_observation'] = new_observation + + #train + if ctrl_s['last_observation'] is not None and ctrl_s['actions_so_far'] % train_every_nth == 0: + controller.training_step() + + ##### SIMULATION + sim_s = { + 'simulated_up_to': time.time(), + 'game_time_since_last_action': 0, + } + def simulate_game(): + while sim_s['simulated_up_to'] < time.time(): + game.step(simulation_resultion) + sim_s['simulated_up_to'] += simulation_resultion / speed + sim_s['game_time_since_last_action'] += simulation_resultion + if sim_s['game_time_since_last_action'] > game_time_between_actions: + control() + sim_s['game_time_since_last_action'] = 0 + + eq.schedule_recurring(simulate_game, time_between_frames) + + eq.run() diff --git a/tf_rl/simulation/__init__.py b/tf_rl/simulation/__init__.py new file mode 100644 index 00000000..12ecb86f --- /dev/null +++ b/tf_rl/simulation/__init__.py @@ -0,0 +1 @@ +from .karpathy_game import KarpathyGame diff --git a/tf_rl/simulation/karpathy_game.py b/tf_rl/simulation/karpathy_game.py new file mode 100644 index 00000000..52186371 --- /dev/null +++ b/tf_rl/simulation/karpathy_game.py @@ -0,0 +1,282 @@ +import math +import matplotlib.pyplot as plt +import numpy as np +import random +import time + +from collections import defaultdict +from euclid import Circle, Point2, Vector2, LineSegment2 + +import tf_rl.utils.svg as svg + +class GameObject(object): + def __init__(self, position, speed, obj_type, settings): + """Esentially represents circles of different kinds, which have + position and speed.""" + self.settings = settings + self.radius = self.settings["object_radius"] + + self.obj_type = obj_type + self.position = position + self.speed = speed + self.bounciness = 1.0 + + def wall_collisions(self): + """Update speed upon collision with the wall.""" + world_size = self.settings["world_size"] + + for dim in range(2): + if self.position[dim] - self.radius <= 0 and self.speed[dim] < 0: + self.speed[dim] = - self.speed[dim] * self.bounciness + elif self.position[dim] + self.radius + 1 >= world_size[dim] and self.speed[dim] > 0: + self.speed[dim] = - self.speed[dim] * self.bounciness + + def move(self, dt): + """Move as if dt seconds passed""" + self.position += dt * self.speed + self.position = Point2(*self.position) + + def step(self, dt): + """Move and bounce of walls.""" + self.wall_collisions() + self.move(dt) + + def as_circle(self): + return Circle(self.position, float(self.radius)) + + def draw(self): + """Return svg object for this item.""" + color = self.settings["colors"][self.obj_type] + return svg.Circle(self.position + Point2(10, 10), self.radius, color=color) + +class KarpathyGame(object): + def __init__(self, settings): + """Initiallize game simulator with settings""" + self.settings = settings + self.size = self.settings["world_size"] + self.walls = [LineSegment2(Point2(0,0), Point2(0,self.size[1])), + LineSegment2(Point2(0,self.size[1]), Point2(self.size[0], self.size[1])), + LineSegment2(Point2(self.size[0], self.size[1]), Point2(self.size[0], 0)), + LineSegment2(Point2(self.size[0], 0), Point2(0,0))] + + self.hero = GameObject(Point2(*self.settings["hero_initial_position"]), + Vector2(*self.settings["hero_initial_speed"]), + "hero", + self.settings) + if not self.settings["hero_bounces_off_walls"]: + self.hero.bounciness = 0.0 + + self.objects = [] + for obj_type, number in settings["num_objects"].items(): + for _ in range(number): + self.spawn_object(obj_type) + + self.observation_lines = self.generate_observation_lines() + + self.object_reward = 0 + self.collected_rewards = [] + + # every observation_line sees one of objects or wall and + # two numbers representing speed of the object (if applicable) + self.eye_observation_size = len(self.settings["objects"]) + 3 + # additionally there are two numbers representing agents own speed. + self.observation_size = self.eye_observation_size * len(self.observation_lines) + 2 + + self.directions = [Vector2(*d) for d in [[1,0], [0,1], [-1,0],[0,-1]]] + self.num_actions = len(self.directions) + + self.objects_eaten = defaultdict(lambda: 0) + + def perform_action(self, action_id): + """Change speed to one of hero vectors""" + assert 0 <= action_id < self.num_actions + self.hero.speed *= 0.8 + self.hero.speed += self.directions[action_id] * self.settings["delta_v"] + + def spawn_object(self, obj_type): + """Spawn object of a given type and add it to the objects array""" + radius = self.settings["object_radius"] + position = np.random.uniform([radius, radius], np.array(self.size) - radius) + position = Point2(float(position[0]), float(position[1])) + max_speed = np.array(self.settings["maximum_speed"]) + speed = np.random.uniform(-max_speed, max_speed).astype(float) + speed = Vector2(float(speed[0]), float(speed[1])) + + self.objects.append(GameObject(position, speed, obj_type, self.settings)) + + def step(self, dt): + """Simulate all the objects for a given ammount of time. + + Also resolve collisions with the hero""" + for obj in self.objects + [self.hero] : + obj.step(dt) + self.resolve_collisions() + + def squared_distance(self, p1, p2): + return (p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2 + + def resolve_collisions(self): + """If hero touches, hero eats. Also reward gets updated.""" + collision_distance = 2 * self.settings["object_radius"] + collision_distance2 = collision_distance ** 2 + to_remove = [] + for obj in self.objects: + if self.squared_distance(self.hero.position, obj.position) < collision_distance2: + to_remove.append(obj) + for obj in to_remove: + self.objects.remove(obj) + self.objects_eaten[obj.obj_type] += 1 + self.object_reward += self.settings["object_reward"][obj.obj_type] + self.spawn_object(obj.obj_type) + + def inside_walls(self, point): + """Check if the point is inside the walls""" + EPS = 1e-4 + return (EPS <= point[0] < self.size[0] - EPS and + EPS <= point[1] < self.size[1] - EPS) + + def observe(self): + """Return observation vector. For all the observation directions it returns representation + of the closest object to the hero - might be nothing, another object or a wall. + Representation of observation for all the directions will be concatenated. + """ + num_obj_types = len(self.settings["objects"]) + 1 # and wall + max_speed_x, max_speed_y = self.settings["maximum_speed"] + + observable_distance = self.settings["observation_line_length"] + + relevant_objects = [obj for obj in self.objects + if obj.position.distance(self.hero.position) < observable_distance] + # objects sorted from closest to furthest + relevant_objects.sort(key=lambda x: x.position.distance(self.hero.position)) + + observation = np.zeros(self.observation_size) + observation_offset = 0 + for i, observation_line in enumerate(self.observation_lines): + # shift to hero position + observation_line = LineSegment2(self.hero.position + Vector2(*observation_line.p1), + self.hero.position + Vector2(*observation_line.p2)) + + observed_object = None + # if end of observation line is outside of walls, we see the wall. + if not self.inside_walls(observation_line.p2): + observed_object = "**wall**" + for obj in relevant_objects: + if observation_line.distance(obj.position) < self.settings["object_radius"]: + observed_object = obj + break + object_type_id = None + speed_x, speed_y = 0, 0 + proximity = 0 + if observed_object == "**wall**": # wall seen + object_type_id = num_obj_types - 1 + # a wall has fairly low speed... + speed_x, speed_y = 0, 0 + # best candidate is intersection between + # observation_line and a wall, that's + # closest to the hero + best_candidate = None + for wall in self.walls: + candidate = observation_line.intersect(wall) + if candidate is not None: + if (best_candidate is None or + best_candidate.distance(self.hero.position) > + candidate.distance(self.hero.position)): + best_candidate = candidate + if best_candidate is None: + # assume it is due to rounding errors + # and wall is barely touching observation line + proximity = observable_distance + else: + proximity = best_candidate.distance(self.hero.position) + elif observed_object is not None: # agent seen + object_type_id = self.settings["objects"].index(observed_object.obj_type) + speed_x, speed_y = tuple(observed_object.speed) + intersection_segment = obj.as_circle().intersect(observation_line) + assert intersection_segment is not None + try: + proximity = min(intersection_segment.p1.distance(self.hero.position), + intersection_segment.p2.distance(self.hero.position)) + except AttributeError: + proximity = observable_distance + for object_type_idx_loop in range(num_obj_types): + observation[observation_offset + object_type_idx_loop] = 1.0 + if object_type_id is not None: + observation[observation_offset + object_type_id] = proximity / observable_distance + observation[observation_offset + num_obj_types] = speed_x / max_speed_x + observation[observation_offset + num_obj_types + 1] = speed_y / max_speed_y + assert num_obj_types + 2 == self.eye_observation_size + observation_offset += self.eye_observation_size + + observation[observation_offset] = self.hero.speed[0] / max_speed_x + observation[observation_offset + 1] = self.hero.speed[1] / max_speed_y + assert observation_offset + 2 == self.observation_size + + return observation + + def distance_to_walls(self): + """Returns distance of a hero to walls""" + res = float('inf') + for wall in self.walls: + res = min(res, self.hero.position.distance(wall)) + return res - self.settings["object_radius"] + + def collect_reward(self): + """Return accumulated object eating score + current distance to walls score""" + wall_reward = self.settings["wall_distance_penalty"] * \ + np.exp(-self.distance_to_walls() / self.settings["tolerable_distance_to_wall"]) + assert wall_reward < 1e-3, "You are rewarding hero for being close to the wall!" + total_reward = wall_reward + self.object_reward + self.object_reward = 0 + self.collected_rewards.append(total_reward) + return total_reward + + def plot_reward(self, smoothing = 30): + """Plot evolution of reward over time.""" + plottable = self.collected_rewards[:] + while len(plottable) > 1000: + for i in range(0, len(plottable) - 1, 2): + plottable[i//2] = (plottable[i] + plottable[i+1]) / 2 + plottable = plottable[:(len(plottable) // 2)] + x = [] + for i in range(smoothing, len(plottable)): + chunk = plottable[i-smoothing:i] + x.append(sum(chunk) / len(chunk)) + plt.plot(list(range(len(x))), x) + + def generate_observation_lines(self): + """Generate observation segments in settings["num_observation_lines"] directions""" + result = [] + start = Point2(0.0, 0.0) + end = Point2(self.settings["observation_line_length"], + self.settings["observation_line_length"]) + for angle in np.linspace(0, 2*np.pi, self.settings["num_observation_lines"], endpoint=False): + rotation = Point2(math.cos(angle), math.sin(angle)) + current_start = Point2(start[0] * rotation[0], start[1] * rotation[1]) + current_end = Point2(end[0] * rotation[0], end[1] * rotation[1]) + result.append( LineSegment2(current_start, current_end)) + return result + + def _repr_html_(self): + return self.to_html() + + def to_html(self, stats=[]): + """Return svg representation of the simulator""" + scene = svg.Scene((self.size[0] + 20, self.size[1] + 20 + 20 * len(stats))) + scene.add(svg.Rectangle((10, 10), self.size)) + + + for line in self.observation_lines: + scene.add(svg.Line(line.p1 + self.hero.position + Point2(10,10), + line.p2 + self.hero.position + Point2(10,10))) + + for obj in self.objects + [self.hero] : + scene.add(obj.draw()) + + offset = self.size[1] + 15 + for txt in stats: + scene.add(svg.Text((10, offset + 20), txt, 15)) + offset += 20 + + return scene + diff --git a/tf_rl/utils/__init__.py b/tf_rl/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/event_queue.py b/tf_rl/utils/event_queue.py similarity index 100% rename from event_queue.py rename to tf_rl/utils/event_queue.py diff --git a/geometry.py b/tf_rl/utils/geometry.py similarity index 100% rename from geometry.py rename to tf_rl/utils/geometry.py diff --git a/getch.py b/tf_rl/utils/getch.py similarity index 100% rename from getch.py rename to tf_rl/utils/getch.py diff --git a/svg.py b/tf_rl/utils/svg.py similarity index 100% rename from svg.py rename to tf_rl/utils/svg.py