-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathtf_algorithmsv2.py
102 lines (76 loc) · 3.98 KB
/
tf_algorithmsv2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import tensorflow as tf
class Algorithms:
def __init__(self, X, All_U, phi, psi, K_hat, cost, epsilon=1e-4):
self.X = X # Sampled data points
self.N = X.shape[1] # Number of data points in dataset
self.All_U = All_U # Contains all possible actions (if discrete)
self.u_bounds = [tf.math.reduce_min(All_U), tf.math.reduce_max(All_U)]
self.num_unique_actions = All_U.shape[1]
self.phi = phi # Dictionary function for X
self.psi = psi # Dictionary function for U
self.K_hat = K_hat # Estimated Koopman Tensor
self.cost = cost # Cost function to optimize
self.epsilon = epsilon
self.w = tf.Variable(tf.fill([K_hat.shape[0],1], 0.3), name='weights') # Default weights of 1s
self.optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
def K_u(self, u):
''' Pick out Koopman operator given a particular action '''
psi_u = self.psi(u)[:,0]
return tf.cast(tf.einsum('ijz,z->ij', self.K_hat, psi_u), tf.float32)
def inner_pi_u(self, u, x):
phi_x_prime = tf.linalg.matmul(self.K_u(u), self.phi(x)) # Column vector
weighted_phi_x_prime = tf.linalg.matmul(tf.transpose(self.w), phi_x_prime) # Shape: (1,1)
inner = tf.add(self.cost(x, u), weighted_phi_x_prime)
return -inner
def pi_u(self, u, x):
return tf.math.exp(self.inner_pi_u(u, x))
# def pi(self, u, x):
# @tf.autograph.experimental.do_not_convert
# def compute_numerator(i):
# u = self.U[:,i]
# u = tf.reshape(u, [tf.shape(u)[0],1])
# return self.pi_u(u, x)
# Z_x = tf.math.reduce_sum(tf.map_fn(fn=compute_numerator, elems=tf.range(self.U.shape[1]), dtype=tf.float32))
# numerator = self.pi_u(u, x)
# pi_value = tf.divide(numerator, Z_x)
# return pi_value
def discreteBellmanError(self):
''' Equation 12 in writeup '''
@tf.autograph.experimental.do_not_convert
def computeError(i):
x = self.X[:,i].reshape(-1,1)
phi_x = self.phi(x)
inner_pi_us = []
for u in self.All_U.T:
u = u.reshape(-1,1)
inner_pi_us.append(self.inner_pi_u(u, x))
# inner_pi_us = np.real(inner_pi_us)
max_inner_pi_u = tf.math.reduce_max(inner_pi_us)
pi_us = tf.math.exp(inner_pi_us - max_inner_pi_u)
Z_x = tf.math.reduce_sum(pi_us)
expectation_u = 0
pis = pi_us / Z_x
for i,u in enumerate(self.All_U.T):
u = u.reshape(-1,1)
expectation_u += (self.cost(x, u) + tf.math.log(pis[i]) + tf.transpose(self.w) @ self.K_u(u) @ phi_x) * pis[i]
error = tf.math.pow((tf.transpose(self.w) @ phi_x - expectation_u), 2)
return error
totals = tf.map_fn(fn=computeError, elems=tf.range(self.N), dtype=tf.float32)
return tf.math.reduce_sum(totals)
def algorithm2(self):
# Compute initial Bellman error (before any updates)
bellmanError = self.discreteBellmanError()
print("Initial bellman error:", bellmanError)
bellmanErrors = [bellmanError]
# Loop until convergence (while weights are not good enough)
while bellmanError >= self.epsilon:
# Run gradient descent with TensorFlow
with tf.GradientTape() as tape: # Tell TensorFlow to remember the computation within for gradient computation
loss = self.discreteBellmanError() # Compute Bellman error with current weights
grads = tape.gradient(loss, [self.w]) # Compute gradient with respect to weights
self.optimizer.apply_gradients(zip(grads, [self.w])) # Descend via back propogation (update weights)
# Recompute bellman error with new weights
bellmanError = self.discreteBellmanError()
print("Current bellman error:", bellmanError)
bellmanErrors.append(bellmanError)
return bellmanErrors