2023-10-19 17:45:43 +02:00
|
|
|
batch_size = 64
|
2023-12-14 13:44:35 +01:00
|
|
|
steps = 1000000
|
|
|
|
print_every = 50000
|
2023-12-13 20:42:30 +01:00
|
|
|
seed = 0
|
2023-10-19 17:45:43 +02:00
|
|
|
cnn_storage = "./res/cnn.eqx"
|
2023-12-13 20:42:30 +01:00
|
|
|
|
|
|
|
[[sae]]
|
|
|
|
layer = 6
|
2023-12-14 13:44:35 +01:00
|
|
|
hidden_size = 300
|
2023-12-13 20:42:30 +01:00
|
|
|
input_size = 64
|
2023-12-14 13:44:35 +01:00
|
|
|
learning_rate = 1e-3
|
|
|
|
l1 = 3e-4 # from Neel Nanda's sae git
|
2023-12-13 20:42:30 +01:00
|
|
|
|
|
|
|
[[sae]]
|
|
|
|
layer = 6
|
2023-12-14 13:44:35 +01:00
|
|
|
hidden_size = 300
|
2023-12-13 20:42:30 +01:00
|
|
|
input_size = 64
|
2023-12-14 13:44:35 +01:00
|
|
|
learning_rate = 3e-4
|
|
|
|
l1 = 3e-4
|
2023-12-13 20:42:30 +01:00
|
|
|
|
|
|
|
[[sae]]
|
|
|
|
layer = 6
|
2023-12-14 13:44:35 +01:00
|
|
|
hidden_size = 300
|
2023-12-13 20:42:30 +01:00
|
|
|
input_size = 64
|
2023-12-14 13:44:35 +01:00
|
|
|
learning_rate = 1e-4
|
|
|
|
l1 = 3e-4
|
2023-12-13 20:42:30 +01:00
|
|
|
|
|
|
|
[[sae]]
|
|
|
|
layer = 6
|
2023-12-14 13:44:35 +01:00
|
|
|
hidden_size = 300
|
2023-12-13 20:42:30 +01:00
|
|
|
input_size = 64
|
2023-12-14 13:44:35 +01:00
|
|
|
learning_rate = 3e-5
|
|
|
|
l1 = 3e-4
|