Skip to content

Commit

Permalink
Finetune: SPOT (tinkoff-ai#47)
Browse files Browse the repository at this point in the history
  • Loading branch information
DT6A authored Jun 13, 2023
1 parent 457db3b commit 3e0dfd3
Show file tree
Hide file tree
Showing 11 changed files with 1,298 additions and 0 deletions.
918 changes: 918 additions & 0 deletions algorithms/finetune/spot.py

Large diffs are not rendered by default.

38 changes: 38 additions & 0 deletions configs/finetune/spot/antmaze/large_diverse_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
actor_init_w: 0.001
actor_lr: 0.0001
batch_size: 256
beta: 0.5
buffer_size: 2000000
checkpoints_path: null
critic_init_w: 0.003
critic_lr: 0.0003
device: cuda
discount: 0.99
env: antmaze-large-diverse-v2
eval_freq: 50000
eval_seed: 0
expl_noise: 0.1
group: SPOT-D4RL
iwae: false
lambd: 0.025
lambd_cool: true
lambd_end: 0.2
load_model: ''
n_episodes: 100
name: SPOT
noise_clip: 0.5
normalize: false
normalize_reward: true
num_samples: 1
offline_iterations: 1000000
online_discount: 0.995
online_iterations: 1000000
policy_freq: 2
policy_noise: 0.2
project: CORL
seed: 0
tau: 0.005
vae_hidden_dim: 750
vae_iterations: 100000
vae_latent_dim: null
vae_lr: 0.001
38 changes: 38 additions & 0 deletions configs/finetune/spot/antmaze/large_play_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
actor_init_w: 0.001
actor_lr: 0.0001
batch_size: 256
beta: 0.5
buffer_size: 2000000
checkpoints_path: null
critic_init_w: 0.003
critic_lr: 0.0003
device: cuda
discount: 0.99
env: antmaze-large-play-v2
eval_freq: 50000
eval_seed: 0
expl_noise: 0.1
group: SPOT-D4RL
iwae: false
lambd: 0.025
lambd_cool: true
lambd_end: 0.2
load_model: ''
n_episodes: 100
name: SPOT
noise_clip: 0.5
normalize: false
normalize_reward: true
num_samples: 1
offline_iterations: 1000000
online_discount: 0.995
online_iterations: 1000000
policy_freq: 2
policy_noise: 0.2
project: CORL
seed: 0
tau: 0.005
vae_hidden_dim: 750
vae_iterations: 100000
vae_latent_dim: null
vae_lr: 0.001
38 changes: 38 additions & 0 deletions configs/finetune/spot/antmaze/medium_diverse_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
actor_init_w: 0.001
actor_lr: 0.0001
batch_size: 256
beta: 0.5
buffer_size: 2000000
checkpoints_path: null
critic_init_w: 0.003
critic_lr: 0.0003
device: cuda
discount: 0.99
env: antmaze-medium-diverse-v2
eval_freq: 50000
eval_seed: 0
expl_noise: 0.1
group: SPOT-D4RL
iwae: false
lambd: 0.025
lambd_cool: true
lambd_end: 0.2
load_model: ''
n_episodes: 100
name: SPOT
noise_clip: 0.5
normalize: false
normalize_reward: true
num_samples: 1
offline_iterations: 1000000
online_discount: 0.995
online_iterations: 1000000
policy_freq: 2
policy_noise: 0.2
project: CORL
seed: 0
tau: 0.005
vae_hidden_dim: 750
vae_iterations: 100000
vae_latent_dim: null
vae_lr: 0.001
38 changes: 38 additions & 0 deletions configs/finetune/spot/antmaze/medium_play_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
actor_init_w: 0.001
actor_lr: 0.0001
batch_size: 256
beta: 0.5
buffer_size: 2000000
checkpoints_path: null
critic_init_w: 0.003
critic_lr: 0.0003
device: cuda
discount: 0.99
env: antmaze-medium-play-v2
eval_freq: 50000
eval_seed: 0
expl_noise: 0.1
group: SPOT-D4RL
iwae: false
lambd: 0.05
lambd_cool: true
lambd_end: 0.2
load_model: ''
n_episodes: 100
name: SPOT
noise_clip: 0.5
normalize: false
normalize_reward: true
num_samples: 1
offline_iterations: 1000000
online_discount: 0.995
online_iterations: 1000000
policy_freq: 2
policy_noise: 0.2
project: CORL
seed: 0
tau: 0.005
vae_hidden_dim: 750
vae_iterations: 100000
vae_latent_dim: null
vae_lr: 0.001
38 changes: 38 additions & 0 deletions configs/finetune/spot/antmaze/umaze_diverse_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
actor_init_w: 0.001
actor_lr: 0.0001
batch_size: 256
beta: 0.5
buffer_size: 2000000
checkpoints_path: null
critic_init_w: 0.003
critic_lr: 0.0003
device: cuda
discount: 0.99
env: antmaze-umaze-diverse-v2
eval_freq: 50000
eval_seed: 0
expl_noise: 0.1
group: SPOT-D4RL
iwae: false
lambd: 0.25
lambd_cool: true
lambd_end: 0.2
load_model: ''
n_episodes: 100
name: SPOT
noise_clip: 0.5
normalize: false
normalize_reward: true
num_samples: 1
offline_iterations: 1000000
online_discount: 0.995
online_iterations: 1000000
policy_freq: 2
policy_noise: 0.2
project: CORL
seed: 0
tau: 0.005
vae_hidden_dim: 750
vae_iterations: 100000
vae_latent_dim: null
vae_lr: 0.001
38 changes: 38 additions & 0 deletions configs/finetune/spot/antmaze/umaze_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
actor_init_w: 0.001
actor_lr: 0.0001
batch_size: 256
beta: 0.5
buffer_size: 2000000
checkpoints_path: null
critic_init_w: 0.003
critic_lr: 0.0003
device: cuda
discount: 0.99
env: antmaze-umaze-v2
eval_freq: 50000
eval_seed: 0
expl_noise: 0.1
group: SPOT-D4RL
iwae: false
lambd: 0.25
lambd_cool: true
lambd_end: 0.2
load_model: ''
n_episodes: 100
name: SPOT
noise_clip: 0.5
normalize: false
normalize_reward: true
num_samples: 1
offline_iterations: 1000000
online_discount: 0.995
online_iterations: 1000000
policy_freq: 2
policy_noise: 0.2
project: CORL
seed: 0
tau: 0.005
vae_hidden_dim: 750
vae_iterations: 100000
vae_latent_dim: null
vae_lr: 0.001
38 changes: 38 additions & 0 deletions configs/finetune/spot/door/cloned_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
actor_init_w: 0.001
actor_lr: 0.0001
batch_size: 256
beta: 0.5
buffer_size: 2000000
checkpoints_path: null
critic_init_w: 0.003
critic_lr: 0.0003
device: cuda
discount: 0.99
env: door-cloned-v1
eval_freq: 5000
eval_seed: 0
expl_noise: 0.1
group: SPOT-D4RL
iwae: false
lambd: 1.0
lambd_cool: true
lambd_end: 0.5
load_model: ''
n_episodes: 10
name: SPOT
noise_clip: 0.5
normalize: false
normalize_reward: false
num_samples: 1
offline_iterations: 1000000
online_discount: 0.99
online_iterations: 1000000
policy_freq: 2
policy_noise: 0.2
project: CORL
seed: 0
tau: 0.005
vae_hidden_dim: 750
vae_iterations: 100000
vae_latent_dim: null
vae_lr: 0.001
38 changes: 38 additions & 0 deletions configs/finetune/spot/hammer/cloned_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
actor_init_w: 0.001
actor_lr: 0.0001
batch_size: 256
beta: 0.5
buffer_size: 2000000
checkpoints_path: null
critic_init_w: 0.003
critic_lr: 0.0003
device: cuda
discount: 0.99
env: hammer-cloned-v1
eval_freq: 5000
eval_seed: 0
expl_noise: 0.1
group: SPOT-D4RL
iwae: false
lambd: 1.0
lambd_cool: true
lambd_end: 0.5
load_model: ''
n_episodes: 10
name: SPOT
noise_clip: 0.5
normalize: false
normalize_reward: false
num_samples: 1
offline_iterations: 1000000
online_discount: 0.99
online_iterations: 1000000
policy_freq: 2
policy_noise: 0.2
project: CORL
seed: 0
tau: 0.005
vae_hidden_dim: 750
vae_iterations: 100000
vae_latent_dim: null
vae_lr: 0.001
38 changes: 38 additions & 0 deletions configs/finetune/spot/pen/cloned_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
actor_init_w: 0.001
actor_lr: 0.0001
batch_size: 256
beta: 0.5
buffer_size: 2000000
checkpoints_path: null
critic_init_w: 0.003
critic_lr: 0.0003
device: cuda
discount: 0.99
env: pen-cloned-v1
eval_freq: 5000
eval_seed: 0
expl_noise: 0.1
group: SPOT-D4RL
iwae: false
lambd: 1.0
lambd_cool: true
lambd_end: 0.5
load_model: ''
n_episodes: 10
name: SPOT
noise_clip: 0.5
normalize: false
normalize_reward: false
num_samples: 1
offline_iterations: 1000000
online_discount: 0.99
online_iterations: 1000000
policy_freq: 2
policy_noise: 0.2
project: CORL
seed: 0
tau: 0.005
vae_hidden_dim: 750
vae_iterations: 100000
vae_latent_dim: null
vae_lr: 0.001
38 changes: 38 additions & 0 deletions configs/finetune/spot/relocate/cloned_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
actor_init_w: 0.001
actor_lr: 0.0001
batch_size: 256
beta: 0.5
buffer_size: 2000000
checkpoints_path: null
critic_init_w: 0.003
critic_lr: 0.0003
device: cuda
discount: 0.99
env: relocate-cloned-v1
eval_freq: 5000
eval_seed: 0
expl_noise: 0.1
group: SPOT-D4RL
iwae: false
lambd: 1.0
lambd_cool: true
lambd_end: 0.5
load_model: ''
n_episodes: 10
name: SPOT
noise_clip: 0.5
normalize: false
normalize_reward: false
num_samples: 1
offline_iterations: 1000000
online_discount: 0.99
online_iterations: 1000000
policy_freq: 2
policy_noise: 0.2
project: CORL
seed: 0
tau: 0.005
vae_hidden_dim: 750
vae_iterations: 100000
vae_latent_dim: null
vae_lr: 0.001

0 comments on commit 3e0dfd3

Please sign in to comment.