Skip to content

Commit

Permalink
Merge pull request #49 from kindredresearch/sparse_reacher
Browse files Browse the repository at this point in the history
Added sparse reward option to ur5_reacher
  • Loading branch information
armahmood authored Oct 9, 2019
2 parents 4e186de + 88519d9 commit 6bc7d2b
Showing 1 changed file with 21 additions and 7 deletions.
28 changes: 21 additions & 7 deletions senseact/envs/ur/reacher_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,13 +412,17 @@ def _compute_sensation_(self, name, sensor_window, timestamp_window, index_windo
self._target_diff_ = self._q_[-1, self._joint_indices] - self._target_

self._reward_.value = self._compute_reward_()
if self._reward_type == "sparse":
done = self._reward_.value >= 0
else:
done = 0
# TODO: use the correct obs that matches the observation_space
return np.concatenate((self._q_[:, self._joint_indices].flatten(),
self._qd_[:, self._joint_indices].flatten() / self._speed_high,
self._target_diff_,
self._action_ / self._action_high,
[self._reward_.value],
[0]))
[done]))

def _compute_actuation_(self, action, timestamp, index):
"""Creates and sends actuation packets to the communicator.
Expand Down Expand Up @@ -693,17 +697,27 @@ def _compute_reward_(self):
A float reward.
"""
if self._target_type == "position":
dist = np.linalg.norm(self._target_diff_, ord=2)
if self._reward_type == "linear":
reward_dist = -np.linalg.norm(self._target_diff_, ord=2)
reward_dist = -dist
elif self._reward_type == "precision":
reward_dist = -np.linalg.norm(self._target_diff_, ord=2) +\
np.exp( -np.linalg.norm(self._target_diff_, ord=2)**2 / 0.01)
reward_dist = -dist +\
np.exp( -dist**2 / 0.01)
elif self._reward_type == "sparse":
if dist < 0.05:
reward_dist = 0
else:
reward_dist = -0.1

elif self._target_type == "angle":
dist = np.linalg.norm(self._target_diff_, ord=1)
if self._reward_type == "linear":
reward_dist = -np.linalg.norm(self._target_diff_, ord=1)
reward_dist = -dist
elif self._reward_type == "precision":
reward_dist = -np.linalg.norm(self._target_diff_, ord=1) +\
np.exp(-np.linalg.norm(self._target_diff_, ord=1) ** 2 / 0.01)
reward_dist = -dist +\
np.exp(-dist ** 2 / 0.01)
elif self._reward_type == "sparse":
raise NotImplementedError

# TODO: doublecheck whether '0' or '-1' should be used as the index
reward_vel = -self._vel_penalty * np.square(self._qd_[-1, self._joint_indices]).sum()
Expand Down

0 comments on commit 6bc7d2b

Please sign in to comment.