-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathplanning.py
232 lines (194 loc) · 10.4 KB
/
planning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# Script that makes the forward planning
import numpy as np
import os
import torch
import torch.utils.data as data
from torchvision import transforms
from torch.nn.parallel import DistributedDataParallel as DDP
from torchvision.utils import save_image
from tqdm import tqdm
from dataset import DynamicsDataset
class Planner:
# Planner will sample example ending and in each step example actions and embeddings
# then it will find the closest observation to that embedding
def __init__(self, num_trial, len_traj, num_traj, sec_interval, poly_max_deg,
test_roots, device, model_load_file, dump_images, dump_images_file=None):
# Initialize the embedding size
self.z_dim = 8 # Dimension of the embeddings
self.sec_interval = sec_interval
self.poly_max_deg = poly_max_deg
self.action_dim = 2*(poly_max_deg+1) # Dimension of the actions
# self.sec_interval = 2 # Will be used when we're fitting the action
# self.fps = 20
# self.action_mean = np.array([-1.3e-2, 4.0e-2, -5.0e-2, 4.0e-2, 2.0e+1, 5.9e+3, 2.9e-4, -1.1e-3,
# 1.2e-3, 5.0e-4, -9.6e-4, -1.2e-2]) # These values are taken by looking at the data
# self.action_std = np.array([3.9e+0, 1.9e+1, 3.4e+1, 2.5e+1, 7.0e+0, 3.5e+3, 4.2e-2, 2.1e-1, 3.7e-1,
# 2.8e-1, 1.0e-1, 1.4e-1])
self.action_mean = np.array([
-1.7e-5, 5.0e-4, -6.0e-3, 3.7e-2, -1.4e-1,
3.5e-1, -4.5e-1, 3.0e-1, -6.3e-2, 2.0e+1,
5.8e+3, 7.2e-7, -1.8e-5, 2.1e-4, -1.1e-3,
4.3e-3, -1.0e-2, 1.5e-2, -1.2e-2, 5.4e-3,
-6.0e-4, -9.8e-3
])
self.action_std = np.array([
1.2e-2, 3.0e-1, 3.2e+0, 1.9e+1, 6.8e+1, 1.5e+2,
2.1e+2, 1.7e+2, 7.0e+1, 1.2e+1, 3.5e+3, 1.4e-4,
3.6e-3, 3.8e-2, 2.3e-1, 8.0e-1, 1.8e+0, 2.4e+0,
1.9e+0, 7.6e-1, 1.4e-1, 1.5e-1
])
self.num_traj = num_traj # Number of trajectories to plan
self.len_traj = len_traj # Length of each trajectory
self.num_trial = num_trial # Number of predicted embeddings in each step before finding the closest one to the desired embedding
self.dump_images = dump_images
self.dump_images_file = dump_images_file
# Initialize the test data loader with the given test roots
self.bs = 128
self.test_dset = DynamicsDataset(roots=test_roots,
sec_interval=sec_interval,
poly_max_deg=poly_max_deg)
self.test_loader = data.DataLoader(self.test_dset, batch_size=self.bs, shuffle=False,
num_workers=4, pin_memory=True)
# Load the saved model
self.device = device
checkpoint = torch.load(model_load_file, map_location=device)
self.encoder = DDP(checkpoint['encoder'].to(device), device_ids=[0])
self.trans = DDP(checkpoint['trans'].to(device), device_ids=[0])
# Embeddings and actions to be held for each trajectory
self.traj_zs = np.zeros((self.num_traj, self.len_traj+1, self.z_dim))
self.traj_actions = np.zeros((self.num_traj, self.len_traj, self.action_dim)) # There will be one less action then the observations
# Initialize the inverse transform to be used for observations
self.inv_trans = transforms.Compose([transforms.Normalize(mean = [ 0., 0., 0. ],
std = [ 1/0.5, 1/0.5, 1/0.5 ]),
transforms.Normalize(mean = [ -0.5, -0.5, -0.5 ],
std = [ 1., 1., 1. ]),
])
def get_all_embeddings(self):
pbar = tqdm(total=len(self.test_loader))
# Create arrays to get all the embeddings
self.Z_curr = np.zeros((len(self.test_dset), self.z_dim))
self.Z_next = np.zeros((len(self.test_dset), self.z_dim))
# self.Z_next_predict = np.zeros((len(self.test_dset), self.z_dim))
ep = 0
for batch in self.test_loader:
# Get current batch
obs, obs_next, actions = [el.to(self.device) for el in batch]
# Get the current embeddings
z, z_next = self.encoder(obs), self.encoder(obs_next) # NOTE: we don't really need predicted embeddings here
# z_next_predict = self.trans(z, actions)
self.Z_curr[ep*self.bs:(ep+1)*self.bs, :] = z[:,:].cpu().detach().numpy()
self.Z_next[ep*self.bs:(ep+1)*self.bs, :] = z_next[:,:].cpu().detach().numpy()
# self.Z_next_predict[ep*self.bs:(ep+1)*self.bs, :] = z_next_predict[:,:].cpu().detach().numpy()
ep += 1
pbar.update(1)
pbar.close()
def choose_random_traj(self):
# Chooses randomly from self.Z_curr and self.Z_next as the start and end goals of the trajectory
# Get random starting embeddings
start_rand_idx = np.random.choice(range(self.Z_curr.shape[0]), self.num_traj)
end_rand_idx = np.random.choice(range(self.Z_curr.shape[0]), self.num_traj)
self.z_starts = self.Z_curr[start_rand_idx, :]
self.z_ends = self.Z_curr[end_rand_idx, :]
print('self.z_starts.shape: {}, self.z_ends.shape: {}'.format(
self.z_starts.shape, self.z_ends.shape
))
# Sample one action from a gaussian distribution with the given means
# and stds
# TODO: This can be changed to do knn imitation
def sample_action(self):
rand_act = np.random.normal(self.action_mean, self.action_std)
# rand_act = np.expand_dims(rand_act, 0) # Shape: (1, action_dim)
# print('rand_act.shape: {}'.format(rand_act.shape))
return rand_act
# Method to plan one step for one trajectory's embeddings
# It sample action for each trial, predict the next embedding
# And find the closest observation to that predicted next embedding
# z_curr, z_end's shape is (1, z_dim)
def single_plan(self, z_curr, z_end):
zs, actions = [], []
with torch.no_grad():
for _ in range(self.num_trial):
action = torch.FloatTensor(self.sample_action()).to(self.device)
z_next = self.trans(z_curr, action)
zs.append(z_next)
actions.append(action)
# Calculate the l2 distances between z_end and zs - NOTE: this might be wrong
zs = torch.stack(zs, dim=0)
# print('zs.shape: {}'.format(zs.shape))
# print('z_end.shape: {}'.format(z_end.shape))
dists = torch.norm(zs - z_end, dim=1, p=None)
# print('dists.shape: {}'.format(dists.shape))
idx = torch.argmin(dists)
# print('idx: {}'.format(idx))
return actions[idx], zs[idx]
# Plan trajectory for one trajectory and update self.traj_actions/zs arrays
def plan_traj(self, traj_id, z_start, z_end):
# z_start/z_end.shape: (1, z_dim)
self.traj_zs[traj_id,0,:] = z_start[:]
z_start = torch.FloatTensor(z_start).to(self.device)
z_end = torch.FloatTensor(z_end).to(self.device)
for step in range(1, self.len_traj):
print(f'Step: {step}')
action, z_start = self.single_plan(z_start, z_end) # Update the z_start in each step
self.traj_actions[traj_id, step-1, :] = action.cpu().detach().numpy()
self.traj_zs[traj_id, step, :] = z_start.cpu().detach().numpy()
self.traj_zs[traj_id, -1, :] = z_end.cpu().detach().numpy() # Add the final embedding to show
# Plan all the trajectories - and dumps images if required
def plan_all(self):
# for each wanted trajectory call plan_traj as the starting embedding from z_starts and z_ends
for traj_id in range(self.num_traj):
z_start = self.z_starts[traj_id, :]
z_end = self.z_ends[traj_id, :]
self.plan_traj(traj_id, z_start, z_end)
print(f'----\nTrajectory {traj_id} planned\n----')
if self.dump_images:
self.dump_trajectories()
# finds the closest embeddings for each predicted embedding and then retrieves
# the corresponding observation
def dump_trajectories(self):
imgs = np.zeros((self.num_traj, self.len_traj+1, 3, 480, 480)) # Last one will be the wanted observation - Will be straightened to -1,480,480,3
for traj_id in range(self.num_traj):
for step in range(self.len_traj+1): # TODO: 5-10 icin de yap
# For each embedding in self.traj_zs, find the closest embedding in self.Z_next
curr_z_next_predict = self.traj_zs[traj_id, step, :]
dist = np.linalg.norm(self.Z_next - curr_z_next_predict, axis=1)
# print('dist.shape: {}'.format(dist.shape))
closest_z_idx = np.argsort(dist)[0]
# Get the closest observation
obs, _, _ = self.test_dset.getitem(closest_z_idx)
obs = self.inv_trans(obs) # Inverse transform the observation
# print('obs.shape: {}'.format(obs.shape))
imgs[traj_id, step, :] = obs[:]
imgs = np.reshape(imgs, (-1, 3,480,480)) # Flatten the first dimension
imgs = torch.FloatTensor(imgs)
save_image(imgs, self.dump_images_file, nrow=self.len_traj+1)
if __name__ == '__main__':
torch.cuda.empty_cache()
# Start the multiprocessing to load the saved models properly
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = "29503"
torch.distributed.init_process_group(backend='gloo', rank=0, world_size=1)
torch.cuda.set_device(0)
test_roots = [
"data/28012018_111041",
"data/28012018_124452",
"data/29032022_195715",
"data/28012018_122358",
"data/28012018_120304",
"data/28012018_124425"
]
planner = Planner(
num_trial=100, # For each step it should sample and guess 100 different actions
len_traj=20, # It should try to reach in 10 steps
num_traj=10,
sec_interval=5,
poly_max_deg=10,
test_roots=test_roots,
device=torch.device('cuda:0'),
model_load_file='out/train_sec_5_adim_22_zdim_8/checkpoint_40.pt',
dump_images=True,
dump_images_file='train_sec_5_adim_22_zdim_8_planning.png'
)
planner.get_all_embeddings()
planner.choose_random_traj()
planner.plan_all()