Skip to content

Commit

Permalink
Merge pull request real-stanford#7 from columbia-ai-robotics/cchi/bug…
Browse files Browse the repository at this point in the history
…_fix_eval_sample

fixed bug where only n_envs samples of metrics are used
  • Loading branch information
cheng-chi authored Jun 1, 2023
2 parents 27395b7 + 5e36d50 commit 74b6391
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 6 deletions.
10 changes: 9 additions & 1 deletion diffusion_policy/env_runner/blockpush_lowdim_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,15 @@ def run(self, policy: BaseLowdimPolicy):
prefix_counts = collections.defaultdict(lambda : 0)

log_data = dict()
for i in range(len(self.env_fns)):
# results reported in the paper are generated using the commented out line below
# which will only report and average metrics from first n_envs initial condition and seeds
# fortunately this won't invalidate our conclusion since
# 1. This bug only affects the variance of metrics, not their mean
# 2. All baseline methods are evaluated using the same code
# to completely reproduce reported numbers, uncomment this line:
# for i in range(len(self.env_fns)):
# and comment out this line
for i in range(n_inits):
seed = self.env_seeds[i]
prefix = self.env_prefixs[i]
this_rewards = all_rewards[i]
Expand Down
10 changes: 9 additions & 1 deletion diffusion_policy/env_runner/kitchen_lowdim_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,15 @@ def run(self, policy: BaseLowdimPolicy):
log_data = dict()
prefix_total_reward_map = collections.defaultdict(list)
prefix_n_completed_map = collections.defaultdict(list)
for i in range(len(self.env_fns)):
# results reported in the paper are generated using the commented out line below
# which will only report and average metrics from first n_envs initial condition and seeds
# fortunately this won't invalidate our conclusion since
# 1. This bug only affects the variance of metrics, not their mean
# 2. All baseline methods are evaluated using the same code
# to completely reproduce reported numbers, uncomment this line:
# for i in range(len(self.env_fns)):
# and comment out this line
for i in range(n_inits):
seed = self.env_seeds[i]
prefix = self.env_prefixs[i]
this_rewards = all_rewards[i]
Expand Down
10 changes: 9 additions & 1 deletion diffusion_policy/env_runner/pusht_image_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,15 @@ def run(self, policy: BaseImagePolicy):
# log
max_rewards = collections.defaultdict(list)
log_data = dict()
for i in range(len(self.env_fns)):
# results reported in the paper are generated using the commented out line below
# which will only report and average metrics from first n_envs initial condition and seeds
# fortunately this won't invalidate our conclusion since
# 1. This bug only affects the variance of metrics, not their mean
# 2. All baseline methods are evaluated using the same code
# to completely reproduce reported numbers, uncomment this line:
# for i in range(len(self.env_fns)):
# and comment out this line
for i in range(n_inits):
seed = self.env_seeds[i]
prefix = self.env_prefixs[i]
max_reward = np.max(all_rewards[i])
Expand Down
10 changes: 9 additions & 1 deletion diffusion_policy/env_runner/pusht_keypoints_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,15 @@ def run(self, policy: BaseLowdimPolicy):
# log
max_rewards = collections.defaultdict(list)
log_data = dict()
for i in range(len(self.env_fns)):
# results reported in the paper are generated using the commented out line below
# which will only report and average metrics from first n_envs initial condition and seeds
# fortunately this won't invalidate our conclusion since
# 1. This bug only affects the variance of metrics, not their mean
# 2. All baseline methods are evaluated using the same code
# to completely reproduce reported numbers, uncomment this line:
# for i in range(len(self.env_fns)):
# and comment out this line
for i in range(n_inits):
seed = self.env_seeds[i]
prefix = self.env_prefixs[i]
max_reward = np.max(all_rewards[i])
Expand Down
10 changes: 9 additions & 1 deletion diffusion_policy/env_runner/robomimic_image_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,15 @@ def run(self, policy: BaseImagePolicy):
# log
max_rewards = collections.defaultdict(list)
log_data = dict()
for i in range(len(self.env_fns)):
# results reported in the paper are generated using the commented out line below
# which will only report and average metrics from first n_envs initial condition and seeds
# fortunately this won't invalidate our conclusion since
# 1. This bug only affects the variance of metrics, not their mean
# 2. All baseline methods are evaluated using the same code
# to completely reproduce reported numbers, uncomment this line:
# for i in range(len(self.env_fns)):
# and comment out this line
for i in range(n_inits):
seed = self.env_seeds[i]
prefix = self.env_prefixs[i]
max_reward = np.max(all_rewards[i])
Expand Down
10 changes: 9 additions & 1 deletion diffusion_policy/env_runner/robomimic_lowdim_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,15 @@ def run(self, policy: BaseLowdimPolicy):
# log
max_rewards = collections.defaultdict(list)
log_data = dict()
for i in range(len(self.env_fns)):
# results reported in the paper are generated using the commented out line below
# which will only report and average metrics from first n_envs initial condition and seeds
# fortunately this won't invalidate our conclusion since
# 1. This bug only affects the variance of metrics, not their mean
# 2. All baseline methods are evaluated using the same code
# to completely reproduce reported numbers, uncomment this line:
# for i in range(len(self.env_fns)):
# and comment out this line
for i in range(n_inits):
seed = self.env_seeds[i]
prefix = self.env_prefixs[i]
max_reward = np.max(all_rewards[i])
Expand Down

0 comments on commit 74b6391

Please sign in to comment.