Upload all_config.yaml
Browse files- all_config.yaml +46 -0
all_config.yaml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
arch:
|
| 2 |
+
H_cycles: 3
|
| 3 |
+
H_layers: 0
|
| 4 |
+
L_cycles: 4
|
| 5 |
+
L_layers: 2
|
| 6 |
+
expansion: 4
|
| 7 |
+
forward_dtype: bfloat16
|
| 8 |
+
halt_exploration_prob: 0.1
|
| 9 |
+
halt_max_steps: 16
|
| 10 |
+
hidden_size: 512
|
| 11 |
+
loss:
|
| 12 |
+
loss_type: stablemax_cross_entropy
|
| 13 |
+
name: losses@ACTLossHead
|
| 14 |
+
mlp_t: false
|
| 15 |
+
name: recursive_reasoning.trm@TinyRecursiveReasoningModel_ACTV1
|
| 16 |
+
no_ACT_continue: true
|
| 17 |
+
num_heads: 8
|
| 18 |
+
pos_encodings: rope
|
| 19 |
+
puzzle_emb_len: 16
|
| 20 |
+
puzzle_emb_ndim: 512
|
| 21 |
+
beta1: 0.9
|
| 22 |
+
beta2: 0.95
|
| 23 |
+
checkpoint_every_eval: true
|
| 24 |
+
checkpoint_path: checkpoints/Arc2concept-aug-1000-ACT-torch/pretrain_att_arc2concept_auto
|
| 25 |
+
data_paths:
|
| 26 |
+
- data/arc2concept-aug-1000
|
| 27 |
+
data_paths_test: []
|
| 28 |
+
ema: true
|
| 29 |
+
ema_rate: 0.999
|
| 30 |
+
epochs: 100000
|
| 31 |
+
eval_interval: 10000
|
| 32 |
+
eval_save_outputs: []
|
| 33 |
+
evaluators: []
|
| 34 |
+
freeze_weights: false
|
| 35 |
+
global_batch_size: 768
|
| 36 |
+
load_checkpoint: null
|
| 37 |
+
lr: 0.0001
|
| 38 |
+
lr_min_ratio: 1.0
|
| 39 |
+
lr_warmup_steps: 2000
|
| 40 |
+
min_eval_interval: 0
|
| 41 |
+
project_name: Arc2concept-aug-1000-ACT-torch
|
| 42 |
+
puzzle_emb_lr: 0.01
|
| 43 |
+
puzzle_emb_weight_decay: 0.1
|
| 44 |
+
run_name: pretrain_att_arc2concept_auto
|
| 45 |
+
seed: 0
|
| 46 |
+
weight_decay: 0.1
|