{ | |
"hook_point_in": "blocks.5.hook_attn_out", | |
"hook_point_out": "blocks.5.hook_attn_out", | |
"use_decoder_bias": true, | |
"apply_decoder_bias_to_pre_encoder": false, | |
"expansion_factor": 8, | |
"d_model": 4096, | |
"d_sae": 32768, | |
"norm_activation": "token-wise", | |
"dataset_average_activation_norm": null, | |
"decoder_exactly_fixed_norm": false, | |
"sparsity_include_decoder_norm": true, | |
"use_glu_encoder": false, | |
"init_decoder_norm": null, | |
"init_encoder_norm": null, | |
"init_encoder_with_decoder_transpose": true, | |
"l1_coefficient": 1.6e-05, | |
"l1_coefficient_warmup_steps": 14648, | |
"lp": 1, | |
"use_ghost_grads": false, | |
"tp_size": 1, | |
"ddp_size": 1 | |
} |