-
Notifications
You must be signed in to change notification settings - Fork 3
/
default_config_cmc_ppo.yaml
42 lines (39 loc) · 2.86 KB
/
default_config_cmc_ppo.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
env_name: !!str MountainCarContinuous-v0
device: !!str cuda # torch device (cuda:0 or cpu)
render_env: !!bool False # render environment
agents:
ppo:
train_episodes: !!int 10000 # maximum number of episodes to optimize
test_episodes: !!int 1 # maximum number of episodes to optimize
init_episodes: !!int 0 # number of episodes to fill the replay buffer
update_episodes: !!float 10 # update policy every x episodes (can be float)
ppo_epochs: !!int 80 # update policy for x epochs
gamma: !!float 0.99 # discount factor
lr: !!float 3e-4 # learning rate
vf_coef: !!float 1 # value function coefficient (see PPO paper)
ent_coef: !!float 0.01 # entropy coefficient (see PPO paper)
eps_clip: !!float 0.2 # trust region size (see PPO paper)
rb_size: !!int 1000000 # size of the replay buffer
same_action_num: !!int 5 # how often to perform the same action subsequently
activation_fn: !!str relu # activation function for actor/critic ('tanh', 'relu', 'leakyrelu' or 'prelu')
hidden_size: !!int 64 # size of the actor/critic hidden layer
hidden_layer: !!int 2 # number of hidden layers
action_std: !!float 0.5 # action noise standard deviation
print_rate: 10 # update rate of avg meters
early_out_num: !!int 50 # based on how many training episodes shall an early out happen
early_out_virtual_diff: !!float 0.02 # performance difference for an early out for virtual envs
icm:
lr: !!float 1e-4 # learning rate
beta: !!float 0.2 # weight used to trade off action vs state prediction in the ICM loss function
eta: !!float 0.5 # weight used to compute intrinsic reward from loss value
feature_dim: !!int 64 # feature dimension of features model
hidden_size: !!int 128 # size of all ICM sub-network layers
envs:
MountainCarContinuous-v0:
solved_reward: !!float 90 # used for early out in RL agent training
max_steps: !!int 999 # maximum number of steps per episode REMOVE
activation_fn: !!str leakyrelu # activation function of the virtual environment
hidden_size: 96 # size of the hidden layer of the virtual environment
hidden_layer: !!int 2 # number of hidden layers of the virtual environment
info_dim: !!int 0 # additional information dimension from step function
reward_env_type: !!int 0 # type of reward shaping function