pushkin05 commited on
Commit
a2cc025
·
verified ·
1 Parent(s): 5490ded

Upload all_config.yaml

Browse files
Files changed (1) hide show
  1. all_config.yaml +46 -0
all_config.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ arch:
2
+ H_cycles: 3
3
+ H_layers: 0
4
+ L_cycles: 4
5
+ L_layers: 2
6
+ expansion: 4
7
+ forward_dtype: bfloat16
8
+ halt_exploration_prob: 0.1
9
+ halt_max_steps: 16
10
+ hidden_size: 512
11
+ loss:
12
+ loss_type: stablemax_cross_entropy
13
+ name: losses@ACTLossHead
14
+ mlp_t: false
15
+ name: recursive_reasoning.trm@TinyRecursiveReasoningModel_ACTV1
16
+ no_ACT_continue: true
17
+ num_heads: 8
18
+ pos_encodings: rope
19
+ puzzle_emb_len: 16
20
+ puzzle_emb_ndim: 512
21
+ beta1: 0.9
22
+ beta2: 0.95
23
+ checkpoint_every_eval: true
24
+ checkpoint_path: checkpoints/Arc2concept-aug-1000-ACT-torch/pretrain_att_arc2concept_auto
25
+ data_paths:
26
+ - data/arc2concept-aug-1000
27
+ data_paths_test: []
28
+ ema: true
29
+ ema_rate: 0.999
30
+ epochs: 100000
31
+ eval_interval: 10000
32
+ eval_save_outputs: []
33
+ evaluators: []
34
+ freeze_weights: false
35
+ global_batch_size: 768
36
+ load_checkpoint: null
37
+ lr: 0.0001
38
+ lr_min_ratio: 1.0
39
+ lr_warmup_steps: 2000
40
+ min_eval_interval: 0
41
+ project_name: Arc2concept-aug-1000-ACT-torch
42
+ puzzle_emb_lr: 0.01
43
+ puzzle_emb_weight_decay: 0.1
44
+ run_name: pretrain_att_arc2concept_auto
45
+ seed: 0
46
+ weight_decay: 0.1