Skip to content

Commit

Permalink
Debugging: add rews to info
Browse files Browse the repository at this point in the history
  • Loading branch information
arth-shukla committed Mar 5, 2024
1 parent 6425e94 commit 7773722
Showing 1 changed file with 54 additions and 0 deletions.
54 changes: 54 additions & 0 deletions mani_skill2/envs/scenes/tasks/pick.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,10 @@ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):

# ---------------------------------------------------

new_info = dict()
for k in ["success", "fail", "is_grasped", "robot_force", "robot_cumulative_force"]:
new_info[k] = info[k].clone()

if torch.any(robot_too_far):
# prevent torso and arm moving too much
arm_torso_qvel = self.agent.robot.qvel[..., 3:-2][robot_too_far]
Expand All @@ -390,6 +394,13 @@ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
robot_getting_closer_rew = (1 - torch.tanh(robot_to_obj_dist[robot_too_far] / 5))
too_far_reward += robot_getting_closer_rew

x = torch.zeros(self.num_envs, dtype=arm_torso_still_rew.dtype)
x[robot_too_far] = arm_torso_still_rew
new_info["arm_torso_still_rew"] = x.clone()
x = torch.zeros(self.num_envs, dtype=robot_getting_closer_rew.dtype)
x[robot_too_far] = robot_getting_closer_rew
new_info["robot_getting_closer_rew"] = x.clone()


if torch.any(robot_close_enough):
# robot_too_far gives max +2 reward
Expand Down Expand Up @@ -424,6 +435,23 @@ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
close_enough_reward += arm_resting_orientation_rew


x = torch.zeros(self.num_envs, dtype=reaching_rew.dtype)
x[robot_close_enough] = reaching_rew
new_info["reaching_rew"] = x.clone()
x = torch.zeros(self.num_envs, dtype=ee_still_rew.dtype)
x[robot_close_enough] = ee_still_rew
new_info["ee_still_rew"] = x.clone()
x = torch.zeros(self.num_envs, dtype=grasp_rew.dtype)
x[robot_close_enough] = grasp_rew
new_info["grasp_rew"] = x.clone()
x = torch.zeros(self.num_envs, dtype=success_rew.dtype)
x[robot_close_enough] = success_rew
new_info["success_rew"] = x.clone()
x = torch.zeros(self.num_envs, dtype=arm_resting_orientation_rew.dtype)
x[robot_close_enough] = arm_resting_orientation_rew
new_info["arm_resting_orientation_rew"] = x.clone()


if torch.any(not_grasped):
# penalty for torso moving up and down too much
tqvel_z = self.agent.robot.qvel[..., 3][not_grasped]
Expand All @@ -437,6 +465,14 @@ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
not_grasped_reward += ee_over_obj_rew


x = torch.zeros(self.num_envs, dtype=torso_not_moving_rew.dtype)
x[not_grasped] = torso_not_moving_rew
new_info["torso_not_moving_rew"] = x.clone()
x = torch.zeros(self.num_envs, dtype=ee_over_obj_rew.dtype)
x[not_grasped] = ee_over_obj_rew
new_info["ee_over_obj_rew"] = x.clone()


if torch.any(is_grasped):
# not_grasped reward has max of +2
# so, we add +2 to grasped reward so reward only increases as task proceeds
Expand All @@ -453,12 +489,25 @@ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
is_grasped_reward += base_still_rew


x = torch.zeros(self.num_envs, dtype=place_rew.dtype)
x[is_grasped] = place_rew
new_info["place_rew"] = x.clone()
x = torch.zeros(self.num_envs, dtype=base_still_rew.dtype)
x[is_grasped] = base_still_rew
new_info["base_still_rew"] = x.clone()


if torch.any(ee_rest):
qvel = self.agent.robot.qvel[..., :-2][ee_rest]
static_rew = (1 - torch.tanh(torch.norm(qvel, dim=1)))
ee_rest_reward += static_rew


x = torch.zeros(self.num_envs, dtype=static_rew.dtype)
x[ee_rest] = static_rew
new_info["static_rew"] = x.clone()


# add rewards to specific envs
reward[robot_too_far] += too_far_reward
reward[robot_close_enough] += close_enough_reward
Expand All @@ -476,6 +525,11 @@ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
cum_col_pen = (info["robot_cumulative_force"] > self.robot_cumulative_force_limit).float()
reward -= cum_col_pen

for k in list(info.keys()):
info.pop(k, False)

info.update(new_info)

return reward

def compute_normalized_dense_reward(
Expand Down

0 comments on commit 7773722

Please sign in to comment.