File size: 4,048 Bytes
ae4d1af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
actent: 0.0003
actor:
  act: silu
  fan: avg
  inputs: [deter, stoch]
  layers: 2
  maxstd: 1.0
  minstd: 0.1
  norm: layer
  outnorm: false
  outscale: 1.0
  symlog_inputs: false
  unimix: 0.01
  units: 512
  winit: normal
actor_dist_cont: normal
actor_dist_disc: onehot
actor_grad_cont: backprop
actor_grad_disc: reinforce
actor_opt: {clip: 100.0, eps: 1e-05, lateclip: 0.0, lr: 3e-05, opt: adam, warmup: 0,
  wd: 0.0}
batch_length: 64
batch_size: 16
cont_head:
  act: silu
  dist: binary
  fan: avg
  inputs: [deter, stoch]
  layers: 2
  norm: layer
  outnorm: false
  outscale: 1.0
  units: 512
  winit: normal
critic:
  act: silu
  bins: 255
  dist: symlog_disc
  fan: avg
  inputs: [deter, stoch]
  layers: 2
  norm: layer
  outnorm: false
  outscale: 0.0
  symlog_inputs: false
  units: 512
  winit: normal
critic_opt: {clip: 100.0, eps: 1e-05, lateclip: 0.0, lr: 3e-05, opt: adam, warmup: 0,
  wd: 0.0}
critic_slowreg: logprob
critic_type: vfunction
data_loaders: 8
decoder:
  act: silu
  cnn: resnet
  cnn_blocks: 0
  cnn_depth: 32
  cnn_keys: image
  cnn_sigmoid: false
  fan: avg
  image_dist: mse
  inputs: [deter, stoch]
  minres: 4
  mlp_keys: $^
  mlp_layers: 5
  mlp_units: 1024
  norm: layer
  outscale: 1.0
  resize: stride
  vector_dist: symlog_mse
  winit: normal
disag_head:
  act: silu
  dist: mse
  fan: avg
  inputs: [deter, stoch, action]
  layers: 2
  norm: layer
  outscale: 1.0
  units: 512
  winit: normal
disag_models: 8
disag_target: [stoch]
dyn_loss: {free: 1.0, impl: kl}
encoder: {act: silu, cnn: resnet, cnn_blocks: 0, cnn_depth: 32, cnn_keys: image, fan: avg,
  minres: 4, mlp_keys: $^, mlp_layers: 5, mlp_units: 1024, norm: layer, resize: stride,
  symlog_inputs: true, winit: normal}
env:
  atari:
    actions: all
    gray: false
    lives: unused
    noops: 0
    repeat: 4
    resize: opencv
    size: [64, 64]
    sticky: true
  dmc:
    camera: -1
    repeat: 2
    size: [64, 64]
  dmlab:
    episodic: true
    repeat: 4
    size: [64, 64]
  loconav:
    camera: -1
    repeat: 2
    size: [64, 64]
  minecraft:
    break_speed: 100.0
    size: [64, 64]
envs: {amount: 4, checks: false, discretize: 0, length: 0, parallel: process, reset: true,
  restart: true}
eval_dir: ''
expl_behavior: None
expl_opt: {clip: 100.0, eps: 1e-05, lr: 0.0001, opt: adam, warmup: 0, wd: 0.0}
expl_rewards: {disag: 0.1, extr: 1.0}
filter: .*
grad_heads: [decoder, reward, cont]
horizon: 333
imag_horizon: 15
imag_unroll: false
jax:
  debug: false
  debug_nans: false
  jit: true
  logical_cpus: 0
  metrics_every: 10
  platform: gpu
  policy_devices: [1]
  prealloc: true
  precision: float16
  train_devices: [1]
logdir: ./logdir/dmc_cartpole_balance
loss_scales: {actor: 1.0, cont: 1.0, critic: 1.0, dyn: 0.5, image: 1.0, rep: 0.1,
  reward: 1.0, slowreg: 1.0, vector: 1.0}
method: name
model_opt: {clip: 1000.0, eps: 1e-08, lateclip: 0.0, lr: 0.0001, opt: adam, warmup: 0,
  wd: 0.0}
rep_loss: {free: 1.0, impl: kl}
replay: uniform
replay_online: false
replay_size: 1000000.0
retnorm: {decay: 0.99, impl: perc_ema, max: 1.0, perchi: 95.0, perclo: 5.0}
return_lambda: 0.95
reward_head:
  act: silu
  bins: 255
  dist: symlog_disc
  fan: avg
  inputs: [deter, stoch]
  layers: 2
  norm: layer
  outnorm: false
  outscale: 0.0
  units: 512
  winit: normal
rssm: {act: silu, action_clip: 1.0, classes: 32, deter: 512, fan: avg, initial: learned,
  norm: layer, stoch: 32, unimix: 0.01, units: 512, unroll: false, winit: normal}
run:
  actor_addr: ipc:///tmp/5551
  actor_batch: 32
  eval_eps: 1
  eval_every: 1000000.0
  eval_fill: 0
  eval_initial: true
  eval_samples: 1
  expl_until: 0
  from_checkpoint: ''
  log_every: 300
  log_keys_max: ^$
  log_keys_mean: (log_entropy)
  log_keys_sum: ^$
  log_keys_video: [image]
  log_zeros: false
  save_every: 900
  script: train
  steps: 10000000000.0
  sync_every: 10
  train_fill: 0
  train_ratio: 512.0
seed: 0
slow_critic_fraction: 0.02
slow_critic_update: 1
task: dmc_cartpole_balance
task_behavior: Greedy
wrapper: {checks: false, discretize: 0, length: 0, reset: true}