Skip to content

Finetune: AWAC #49

Merged
merged 16 commits into from
Jun 13, 2023
623 changes: 623 additions & 0 deletions algorithms/finetune/awac.py

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions configs/finetune/awac/antmaze/large_diverse_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
awac_lambda: 0.1
batch_size: 256
buffer_size: 10000000
checkpoints_path: null
deterministic_torch: false
device: cuda
env_name: antmaze-large-diverse-v2
eval_frequency: 50000
gamma: 0.99
group: awac-antmaze-large-diverse-v2-multiseed-v0
hidden_dim: 256
learning_rate: 0.0003
n_test_episodes: 100
normalize_reward: true
offline_iterations: 1000000
online_iterations: 1000000
project: CORL
seed: 42
tau: 0.005
test_seed: 69
20 changes: 20 additions & 0 deletions configs/finetune/awac/antmaze/large_play_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
awac_lambda: 0.1
batch_size: 256
buffer_size: 10000000
checkpoints_path: null
deterministic_torch: false
device: cuda
env_name: antmaze-large-play-v2
eval_frequency: 50000
gamma: 0.99
group: awac-antmaze-large-play-v2-multiseed-v0
hidden_dim: 256
learning_rate: 0.0003
n_test_episodes: 100
normalize_reward: true
offline_iterations: 1000000
online_iterations: 1000000
project: CORL
seed: 42
tau: 0.005
test_seed: 69
20 changes: 20 additions & 0 deletions configs/finetune/awac/antmaze/medium_diverse_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
awac_lambda: 0.1
batch_size: 256
buffer_size: 10000000
checkpoints_path: null
deterministic_torch: false
device: cuda
env_name: antmaze-medium-diverse-v2
eval_frequency: 50000
gamma: 0.99
group: awac-antmaze-medium-diverse-v2-multiseed-v0
hidden_dim: 256
learning_rate: 0.0003
n_test_episodes: 100
normalize_reward: true
offline_iterations: 1000000
online_iterations: 1000000
project: CORL
seed: 42
tau: 0.005
test_seed: 69
20 changes: 20 additions & 0 deletions configs/finetune/awac/antmaze/medium_play_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
awac_lambda: 0.1
batch_size: 256
buffer_size: 10000000
checkpoints_path: null
deterministic_torch: false
device: cuda
env_name: antmaze-medium-play-v2
eval_frequency: 50000
gamma: 0.99
group: awac-antmaze-medium-play-v2-multiseed-v0
hidden_dim: 256
learning_rate: 0.0003
n_test_episodes: 100
normalize_reward: true
offline_iterations: 1000000
online_iterations: 1000000
project: CORL
seed: 42
tau: 0.005
test_seed: 69
20 changes: 20 additions & 0 deletions configs/finetune/awac/antmaze/umaze_diverse_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
awac_lambda: 0.1
batch_size: 256
buffer_size: 10000000
checkpoints_path: null
deterministic_torch: false
device: cuda
env_name: antmaze-umaze-diverse-v2
eval_frequency: 50000
gamma: 0.99
group: awac-antmaze-umaze-diverse-v2-multiseed-v0
hidden_dim: 256
learning_rate: 0.0003
n_test_episodes: 100
normalize_reward: true
offline_iterations: 1000000
online_iterations: 1000000
project: CORL
seed: 42
tau: 0.005
test_seed: 69
20 changes: 20 additions & 0 deletions configs/finetune/awac/antmaze/umaze_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
awac_lambda: 0.1
batch_size: 256
buffer_size: 10000000
checkpoints_path: null
deterministic_torch: false
device: cuda
env_name: antmaze-umaze-v2
eval_frequency: 50000
gamma: 0.99
group: awac-antmaze-umaze-v2-multiseed-v0
hidden_dim: 256
learning_rate: 0.0003
n_test_episodes: 100
normalize_reward: true
offline_iterations: 1000000
online_iterations: 1000000
project: CORL
seed: 42
tau: 0.005
test_seed: 69
20 changes: 20 additions & 0 deletions configs/finetune/awac/door/cloned_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
awac_lambda: 0.1
batch_size: 256
buffer_size: 10000000
checkpoints_path: null
deterministic_torch: false
device: cuda
env_name: door-cloned-v1
eval_frequency: 5000
gamma: 0.99
group: awac-door-cloned-v1-multiseed-v0
hidden_dim: 256
learning_rate: 0.0003
n_test_episodes: 10
normalize_reward: false
offline_iterations: 1000000
online_iterations: 1000000
project: CORL
seed: 42
tau: 0.005
test_seed: 69
20 changes: 20 additions & 0 deletions configs/finetune/awac/hammer/cloned_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
awac_lambda: 0.1
batch_size: 256
buffer_size: 10000000
checkpoints_path: null
deterministic_torch: false
device: cuda
env_name: hammer-cloned-v1
eval_frequency: 5000
gamma: 0.99
group: awac-hammer-cloned-v1-multiseed-v0
hidden_dim: 256
learning_rate: 0.0003
n_test_episodes: 10
normalize_reward: false
offline_iterations: 1000000
online_iterations: 1000000
project: CORL
seed: 42
tau: 0.005
test_seed: 69
20 changes: 20 additions & 0 deletions configs/finetune/awac/pen/cloned_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
awac_lambda: 0.1
batch_size: 256
buffer_size: 10000000
checkpoints_path: null
deterministic_torch: false
device: cuda
env_name: pen-cloned-v1
eval_frequency: 5000
gamma: 0.99
group: awac-pen-cloned-v1-multiseed-v0
hidden_dim: 256
learning_rate: 0.0003
n_test_episodes: 10
normalize_reward: false
offline_iterations: 1000000
online_iterations: 1000000
project: CORL
seed: 42
tau: 0.005
test_seed: 69
20 changes: 20 additions & 0 deletions configs/finetune/awac/relocate/cloned_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
awac_lambda: 0.1
batch_size: 256
buffer_size: 10000000
checkpoints_path: null
deterministic_torch: false
device: cuda
env_name: relocate-cloned-v1
eval_frequency: 5000
gamma: 0.99
group: awac-relocate-cloned-v1-multiseed-v0
hidden_dim: 256
learning_rate: 0.0003
n_test_episodes: 10
normalize_reward: false
offline_iterations: 1000000
online_iterations: 1000000
project: CORL
seed: 42
tau: 0.005
test_seed: 69