actent: 0.0003 actor: act: silu fan: avg inputs: [deter, stoch] layers: 2 maxstd: 1.0 minstd: 0.1 norm: layer outnorm: false outscale: 1.0 symlog_inputs: false unimix: 0.01 units: 512 winit: normal actor_dist_cont: normal actor_dist_disc: onehot actor_grad_cont: backprop actor_grad_disc: reinforce actor_opt: {clip: 100.0, eps: 1e-05, lateclip: 0.0, lr: 3e-05, opt: adam, warmup: 0, wd: 0.0} batch_length: 64 batch_size: 16 cont_head: act: silu dist: binary fan: avg inputs: [deter, stoch] layers: 2 norm: layer outnorm: false outscale: 1.0 units: 512 winit: normal critic: act: silu bins: 255 dist: symlog_disc fan: avg inputs: [deter, stoch] layers: 2 norm: layer outnorm: false outscale: 0.0 symlog_inputs: false units: 512 winit: normal critic_opt: {clip: 100.0, eps: 1e-05, lateclip: 0.0, lr: 3e-05, opt: adam, warmup: 0, wd: 0.0} critic_slowreg: logprob critic_type: vfunction data_loaders: 8 decoder: act: silu cnn: resnet cnn_blocks: 0 cnn_depth: 32 cnn_keys: image cnn_sigmoid: false fan: avg image_dist: mse inputs: [deter, stoch] minres: 4 mlp_keys: $^ mlp_layers: 5 mlp_units: 1024 norm: layer outscale: 1.0 resize: stride vector_dist: symlog_mse winit: normal disag_head: act: silu dist: mse fan: avg inputs: [deter, stoch, action] layers: 2 norm: layer outscale: 1.0 units: 512 winit: normal disag_models: 8 disag_target: [stoch] dyn_loss: {free: 1.0, impl: kl} encoder: {act: silu, cnn: resnet, cnn_blocks: 0, cnn_depth: 32, cnn_keys: image, fan: avg, minres: 4, mlp_keys: $^, mlp_layers: 5, mlp_units: 1024, norm: layer, resize: stride, symlog_inputs: true, winit: normal} env: atari: actions: needed gray: false lives: unused noops: 30 repeat: 4 resize: opencv size: [64, 64] sticky: false dmc: camera: -1 repeat: 2 size: [64, 64] dmlab: episodic: true repeat: 4 size: [64, 64] loconav: camera: -1 repeat: 2 size: [64, 64] minecraft: break_speed: 100.0 size: [64, 64] envs: {amount: 1, checks: false, discretize: 0, length: 0, parallel: process, reset: true, restart: true} eval_dir: '' expl_behavior: None expl_opt: {clip: 100.0, eps: 1e-05, lr: 0.0001, opt: adam, warmup: 0, wd: 0.0} expl_rewards: {disag: 0.1, extr: 1.0} filter: .* grad_heads: [decoder, reward, cont] horizon: 333 imag_horizon: 15 imag_unroll: false jax: debug: false debug_nans: false jit: true logical_cpus: 0 metrics_every: 10 platform: gpu policy_devices: [1] prealloc: true precision: float32 train_devices: [1] logdir: ./logdir/atari_alien loss_scales: {actor: 1.0, cont: 1.0, critic: 1.0, dyn: 0.5, image: 1.0, rep: 0.1, reward: 1.0, slowreg: 1.0, vector: 1.0} method: name model_opt: {clip: 1000.0, eps: 1e-08, lateclip: 0.0, lr: 0.0001, opt: adam, warmup: 0, wd: 0.0} rep_loss: {free: 1.0, impl: kl} replay: uniform replay_online: false replay_size: 1000000.0 retnorm: {decay: 0.99, impl: perc_ema, max: 1.0, perchi: 95.0, perclo: 5.0} return_lambda: 0.95 reward_head: act: silu bins: 255 dist: symlog_disc fan: avg inputs: [deter, stoch] layers: 2 norm: layer outnorm: false outscale: 0.0 units: 512 winit: normal rssm: {act: silu, action_clip: 1.0, classes: 32, deter: 512, fan: avg, initial: learned, norm: layer, stoch: 32, unimix: 0.01, units: 512, unroll: false, winit: normal} run: actor_addr: ipc:///tmp/5551 actor_batch: 32 eval_eps: 100 eval_every: 100000.0 eval_fill: 0 eval_initial: false eval_samples: 1 expl_until: 0 from_checkpoint: '' log_every: 300 log_keys_max: ^$ log_keys_mean: (log_entropy) log_keys_sum: ^$ log_keys_video: [image] log_zeros: false save_every: 900 script: train_eval steps: 150000.0 sync_every: 10 train_fill: 0 train_ratio: 1024.0 seed: 0 slow_critic_fraction: 0.02 slow_critic_update: 1 task: atari_alien task_behavior: Greedy wrapper: {checks: false, discretize: 0, length: 0, reset: true}