| experiment_set_ctx_length,name,run_tag,model_type,num_params,num_tokens_training,num_flops_training,val/.dclm_loss,token_param_ratio,width_depth_ratio,Preset Token Param Ratio,experiment_set,context_length,learning_rate,global_batch_size,num_train_steps,val/.dclm_perplexity,Preset Num Params,Model Size,embedding_dim,num_blocks,num_heads,proj_factor_ffn,ffn_multiple_of,ffn_dim,head_dim_qk,head_dim_v,IsoFLOP,train/.loss_mean,run_id,model_checkpoint_paths |
| tokenparam_ctx8192,dclm_mLSTMv1_160M_ctx8192_lr0.003_steps3500_nh6_gbs128,scl_mlstm_160Mv2,mlstm_v1,164110224.0,3670016000.0,2.809832926150656e+18,3.278110362507386,22.36311614564611,64.0,22,tokenparam,8192,0.003,128.0,3500.0,26.525601540965525,,160M,768,12,6,2.667,64,2112,64.0,128,,3.280884430124334,i5dhe6am,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_160M_ctx8192_lr0.003_steps3500_nh6_2025-02-11T17:42:34/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_160M_ctx8192_lr0.003_steps5000_nh6_gbs128,scl_mlstm_160Mv2,mlstm_v1,164110224.0,5242880000.0,4.01404703735808e+18,3.2102737541221664,31.947308779494445,64.0,extra,tokenparam,8192,0.003,128.0,5000.0,24.785870530158633,,160M,768,12,6,2.667,64,2112,64.0,128,,3.2076875312801953,5q50n1xs,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_160M_ctx8192_lr0.003_steps5000_nh6_2025-01-18T09:17:10/3/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_160M_ctx8192_lr0.003_steps7000_nh6_gbs128,scl_mlstm_160Mv2,mlstm_v1,164110224.0,7340032000.0,5.619665852301312e+18,3.1541237014870225,44.72623229129222,64.0,44,tokenparam,8192,0.003,128.0,7000.0,23.432494232056126,,160M,768,12,6,2.667,64,2112,64.0,128,,3.1532559833972273,9fda092j,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_160M_ctx8192_lr0.003_steps7000_nh6_2025-02-11T15:52:54/1/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_160M_ctx8192_lr0.003_steps8000_nh6_gbs128,scl_mlstm_160Mv2,mlstm_v1,164110224.0,8388608000.0,6.422475259772928e+18,3.1375079384862015,51.11569404719111,64.0,extra,tokenparam,8192,0.003,128.0,8000.0,23.046362283533377,,160M,768,12,6,2.667,64,2112,64.0,128,,3.136242322186817,cy3br36c,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_160M_ctx8192_lr0.003_steps8000_nh6_2025-01-18T09:17:10/2/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_160M_ctx8192_lr0.003_steps18000_nh6_gbs128,scl_mlstm_160Mv2,mlstm_v1,164110224.0,18874368000.0,1.4450569334489088e+19,3.0566959489287817,115.01031160618,64.0,110,tokenparam,8192,0.003,128.0,18000.0,21.257206109674595,,160M,768,12,6,2.667,64,2112,64.0,128,,3.056779156387037,hs5khr4o,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_160M_ctx8192_lr0.003_steps18000_nh6_2025-01-18T09:17:10/1/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_160M_ctx8192_lr0.003_steps36000_nh6_gbs128,scl_mlstm_160Mv2,mlstm_v1,164110224.0,37748736000.0,2.8901138668978176e+19,3.011088257360189,230.02062321236,64.0,220,tokenparam,8192,0.003,128.0,36000.0,20.30948985511296,,160M,768,12,6,2.667,64,2112,64.0,128,,3.0106920301059077,uju2tyxb,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_160M_ctx8192_lr0.003_steps36000_nh6_2025-01-18T09:17:10/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_160M_ctx8192_lr0.003_steps87000_nh6_gbs128,scl_mlstm_160Mv2,mlstm_v1,164110224.0,91226112000.0,6.984441845003059e+19,2.9735369771297897,555.8831727632033,64.0,550,tokenparam,8192,0.003,128.0,87000.0,19.560984137709212,,160M,768,12,6,2.667,64,2112,64.0,128,,2.9733398660476626,vufn7vk6,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_160M_ctx8192_lr0.003_steps87000_nh6_2025-02-12T11:25:46/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_160M_ctx8192_lr0.003_steps173000_nh6_gbs128,scl_mlstm_160Mv2,mlstm_v1,164110224.0,181403648000.0,1.3888602749258957e+20,2.9556999120938414,1105.376883770508,64.0,1100,tokenparam,8192,0.003,128.0,173000.0,19.215166942125414,,160M,768,12,6,2.667,64,2112,64.0,128,,2.9564085600096623,5c16ap2i,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_160M_ctx8192_lr0.003_steps173000_nh6_2025-01-18T09:15:55/1/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_160M_ctx8192_lr0.003_steps345000_nh6_gbs128,scl_mlstm_160Mv2,mlstm_v1,164110224.0,361758720000.0,2.7696924557770752e+20,2.9410484262024865,2204.364305785117,64.0,2200,tokenparam,8192,0.003,128.0,345000.0,18.935688580930666,,160M,768,12,6,2.667,64,2112,64.0,128,,2.9425100714688632,y5s6gd5v,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_160M_ctx8192_lr0.003_steps345000_nh6_2025-01-18T09:15:55/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_400M_ctx8192_lr0.003_steps10000_gbs128,scl_mlstm_400M,mlstm_v1,406856896.0,10485760000.0,2.29104177905664e+19,2.8957374019019237,25.77259990697073,42.666666666666664,22,tokenparam,8192,0.003,128.0,10000.0,18.09684116737921,,400M,1024,24,4,2.667,64,2752,128.0,256,,2.89987456718397,xx828oas,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_400M_ctx8192_lr0.003_steps10000_2024-12-20T05:24:44/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_400M_ctx8192_lr0.003_steps18000_gbs128,scl_mlstm_400M,mlstm_v1,406856896.0,18874368000.0,4.123875202301952e+19,2.829953722298775,46.39067983254731,42.666666666666664,44,tokenparam,8192,0.003,128.0,18000.0,16.944676645712995,,400M,1024,24,4,2.667,64,2752,128.0,256,,2.8302206877701774,sbug1edk,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_400M_ctx8192_lr0.003_steps18000_2024-12-21T12:07:09/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_400M_ctx8192_lr0.003_steps46000_gbs128,scl_mlstm_400M,mlstm_v1,406856896.0,48234496000.0,1.0538792183660544e+20,2.755944605072565,118.55395957206535,42.666666666666664,110,tokenparam,8192,0.003,128.0,46000.0,15.735898093770633,,400M,1024,24,4,2.667,64,2752,128.0,256,,2.7566076499819614,6ugtb2jc,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_400M_ctx8192_lr0.003_steps46000_2024-12-19T16:13:20/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_400M_ctx8192_lr0.003_steps87000_gbs128,scl_mlstm_400M,mlstm_v1,406856896.0,91226112000.0,1.9932063477792768e+20,2.726996022162898,224.22161919064536,42.666666666666664,220,tokenparam,8192,0.003,128.0,87000.0,15.286896478987856,,400M,1024,24,4,2.667,64,2752,128.0,256,,2.727117137051683,rthncln6,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_400M_ctx8192_lr0.003_steps87000_2024-12-20T05:25:49/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_400M_ctx8192_lr0.001_steps215000_gbs128,scl_mlstm_400M,mlstm_v1,406856896.0,225443840000.0,4.925739824971776e+20,2.6893338728873974,554.1108979998706,42.666666666666664,550,tokenparam,8192,0.001,128.0,215000.0,14.721866019381165,,400M,1024,24,4,2.667,64,2752,128.0,256,,2.6891374976656324,swn8e6ti-1o4g3ovh,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_400M_ctx8192_lr0.001_steps215000_2025-01-25T17:20:36/0/wandb"", ""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_400M_ctx8192_lr0.001_steps215000_2025-01-27T08:02:23/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_400M_ctx8192_lr0.001_steps427000_gbs128,scl_mlstm_400M,mlstm_v1,406856896.0,447741952000.0,9.782748396571853e+20,2.671950739265968,1100.4900160276502,42.666666666666664,1100,tokenparam,8192,0.001,128.0,427000.0,14.468165300392341,,400M,1024,24,4,2.667,64,2752,128.0,256,,2.672378960124619,zxcdigqn,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_400M_ctx8192_lr0.001_steps427000_2024-12-21T11:48:25/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_830M_ctx8192_lr0.001_steps10000_gbs256,scl_mlstm_830M,mlstm_v1,841496256.0,20971520000.0,9.842062494007296e+19,2.7351935791616513,24.92170327612248,64.0,22,tokenparam,8192,0.001,256.0,10000.0,15.412726729887366,,830M,1536,24,4,2.667,64,4160,192.0,384,,2.7365498341079704,ds89wtop,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_830M_ctx8192_lr0.001_steps10000_2024-12-20T21:52:22/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_830M_ctx8192_lr0.001_steps18000_gbs256,scl_mlstm_830M,mlstm_v1,841496256.0,37748736000.0,1.7715712489213133e+20,2.6669577115236422,44.859065897020464,64.0,44,tokenparam,8192,0.001,256.0,18000.0,14.396105397919353,,830M,1536,24,4,2.667,64,4160,192.0,384,,2.666146242724846,5m5wsr4o,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_830M_ctx8192_lr0.001_steps18000_2024-12-21T12:08:33/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_830M_ctx8192_lr0.001_steps46000_gbs256,scl_mlstm_830M,mlstm_v1,841496256.0,96468992000.0,4.527348747243356e+20,2.5888205193607936,114.6398350701634,64.0,110,tokenparam,8192,0.001,256.0,46000.0,13.314058664040141,,830M,1536,24,4,2.667,64,4160,192.0,384,,2.5891423691014452,6k64ww8i,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_830M_ctx8192_lr0.001_steps46000_2024-12-19T16:26:13/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_830M_ctx8192_lr0.001_steps90000_gbs256,scl_mlstm_830M,mlstm_v1,841496256.0,188743680000.0,8.857856244606566e+20,2.547089323657933,224.29532948510231,64.0,220,tokenparam,8192,0.001,256.0,90000.0,12.769880647201964,,830M,1536,24,4,2.667,64,4160,192.0,384,,2.547342435897689,g0zpitjh,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_830M_ctx8192_lr0.001_steps90000_2024-12-20T13:49:58/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_830M_ctx8192_lr0.001_steps220000_gbs256,scl_mlstm_830M,mlstm_v1,841496256.0,461373440000.0,2.165253748681605e+21,2.5121869205541936,548.2774720746945,64.0,550,tokenparam,8192,0.001,256.0,220000.0,12.331869411350965,,830M,1536,24,4,2.667,64,4160,192.0,384,,2.5149441292902144,zn5ec7lj,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_830M_ctx8192_lr0.001_steps220000_2025-01-15T07:34:58/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_830M_ctx8192_lr0.0008_steps442000_gbs256,scl_mlstm_830M,mlstm_v1,841496256.0,926941184000.0,4.350191622351225e+21,2.4934367650721505,1101.5392848046135,64.0,1100,tokenparam,8192,0.0008,256.0,442000.0,12.102799204613392,,830M,1536,24,4,2.667,64,4160,192.0,384,,2.4960969955944505,m97a5wxv,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_830M_ctx8192_lr0.0008_steps442000_2024-12-21T11:51:08/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_1.4B_ctx8192_lr0.0008_steps16000_gbs256,scl_mlstm_1.4B,mlstm_v1,1420839104.0,33554432000.0,2.7140328023025254e+20,2.6101635729035024,23.615926606704654,85.33333333333333,22,tokenparam,8192,0.0008,256.0,16000.0,13.601275470002303,,1.4B,2048,24,4,2.667,64,5504,256.0,512,,2.609242605310148,z87rj8uv,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_1.4B_ctx8192_lr0.0008_steps16000_2024-12-20T22:11:45/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_1.4B_ctx8192_lr0.0008_steps31000_gbs256,scl_mlstm_1.4B,mlstm_v1,1420839104.0,65011712000.0,5.258438554461143e+20,2.5397554125606026,45.75585780049027,85.33333333333333,44,tokenparam,8192,0.0008,256.0,31000.0,12.676570061815875,,1.4B,2048,24,4,2.667,64,5504,256.0,512,,2.5394553827499933,exazu89h,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_1.4B_ctx8192_lr0.0008_steps31000_2024-12-21T12:09:17/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_1.4B_ctx8192_lr0.0008_steps76000_gbs256,scl_mlstm_1.4B,mlstm_v1,1420839104.0,159383552000.0,1.2891655810936996e+21,2.4725286928321255,112.1756513818471,85.33333333333333,110,tokenparam,8192,0.0008,256.0,76000.0,11.852380018200053,,1.4B,2048,24,4,2.667,64,5504,256.0,512,,2.4697254157502373,1zy4evam,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_1.4B_ctx8192_lr0.0008_steps76000_2024-12-19T17:58:28/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_1.4B_ctx8192_lr0.0008_steps150000_gbs256,scl_mlstm_1.4B,mlstm_v1,1420839104.0,314572800000.0,2.5444057521586176e+21,2.4353908299840468,221.3993119378561,85.33333333333333,220,tokenparam,8192,0.0008,256.0,150000.0,11.420281229598338,,1.4B,2048,24,4,2.667,64,5504,256.0,512,,2.43399524610663,bwgc7tr8,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_1.4B_ctx8192_lr0.0008_steps150000_2024-12-20T22:13:14/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_1.4B_ctx8192_lr0.0008_steps375000_gbs256,scl_mlstm_1.4B,mlstm_v1,1420839104.0,786432000000.0,6.361014380396544e+21,2.4042889044813234,553.4982798446404,85.33333333333333,550,tokenparam,8192,0.0008,256.0,375000.0,11.070555260337002,,1.4B,2048,24,4,2.667,64,5504,256.0,512,,2.4047138576447042,on5wxj1y,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_1.4B_ctx8192_lr0.0008_steps375000_2025-01-14T10:12:53/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_1.4B_ctx8192_lr0.0007_steps746000_gbs256,scl_mlstm_1.4B,mlstm_v1,1420839104.0,1564475392000.0,1.2654177940735525e+22,2.3885712025275554,1101.0925780376044,85.33333333333333,1100,tokenparam,8192,0.0007,256.0,746000.0,10.897911904919802,,1.4B,2048,24,4,2.667,64,5504,256.0,512,,2.3914777992864704,7wwry7p1,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_1.4B_ctx8192_lr0.0007_steps746000_2024-12-21T11:53:25/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_2.7B_ctx8192_lr0.0007_steps16000_gbs512,scl_mlstm_2.7B,mlstm_v1,2780449600.0,67108864000.0,1.0880097572660183e+21,2.452768103112142,24.135975706950415,80.0,22,tokenparam,8192,0.0007,512.0,16000.0,11.620468896079252,,2.7B,2560,32,5,2.667,64,6848,256.0,512,,2.4551876922392735,vpes3xm6,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_2.7B_ctx8192_lr0.0007_steps16000_2024-12-20T14:01:21/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_2.7B_ctx8192_lr0.0007_steps31000_gbs512,scl_mlstm_2.7B,mlstm_v1,2780449600.0,130023424000.0,2.1080189047029105e+21,2.387846790788132,46.76345293221643,80.0,44,tokenparam,8192,0.0007,512.0,31000.0,10.890020188371846,,2.7B,2560,32,5,2.667,64,6848,256.0,512,,2.388620179389791,5pxfizmt,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_2.7B_ctx8192_lr0.0007_steps31000_2024-12-21T12:10:01/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_2.7B_ctx8192_lr0.0007_steps76000_gbs512,scl_mlstm_2.7B,mlstm_v1,2780449600.0,318767104000.0,5.168046347013587e+21,2.323063792438522,114.64588460801447,80.0,110,tokenparam,8192,0.0007,512.0,76000.0,10.20689826751314,,2.7B,2560,32,5,2.667,64,6848,256.0,512,,2.3245244987948928,qvdid4dr,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_2.7B_ctx8192_lr0.0007_steps76000_2024-12-20T21:51:11/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_2.7B_ctx8192_lr0.0007_steps146000_gbs512,scl_mlstm_2.7B,mlstm_v1,2780449600.0,612368384000.0,9.928089035052417e+21,2.2893084303492217,220.24077832592255,80.0,220,tokenparam,8192,0.0007,512.0,146000.0,9.868110834861506,,2.7B,2560,32,5,2.667,64,6848,256.0,512,,2.2891629306736547,b6nbl9yz,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_2.7B_ctx8192_lr0.0007_steps146000_2024-12-20T14:02:09/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_7B_ctx8192_lr0.0005_steps73000_gbs512,scl_mlstm_7B,mlstm_v1,6865424896.0,306184192000.0,1.2382212666616185e+22,2.2073267971891695,44.597995992701335,128.0,44,tokenparam,8192,0.0005,512.0,73000.0,9.091380775867698,,7B,4096,32,8,2.667,64,10944,256.0,512,,2.206035895198121,roo8xyr6-cb4q3k1y,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_7B_ctx8192_lr0.0005_steps73000_2024-12-21T12:10:56/0/wandb"", ""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_7B_ctx8192_lr0.0005_steps73000_2024-12-26T16:37:57/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_7B_ctx8192_lr0.0005_steps76000_gbs256,scl_mlstm_7B,mlstm_v1,6865424896.0,159383552000.0,6.445535360704315e+21,2.2533885104910385,23.21539517428289,128.0,22,tokenparam,8192,0.0005,256.0,76000.0,9.519939659458746,,7B,4096,32,8,2.667,64,10944,256.0,512,,2.25183173169402,8egjdt0c,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_7B_ctx8192_lr0.0005_steps76000_2024-12-20T04:57:54/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_7B_ctx8192_lr0.0005_steps181000_gbs512,scl_mlstm_7B,mlstm_v1,6865424896.0,759169024000.0,3.070110263914424e+22,2.148010794813511,110.57859280382112,128.0,110,tokenparam,8192,0.0005,512.0,181000.0,8.567798325945512,,7B,4096,32,8,2.667,64,10944,256.0,512,,2.1502073137546587,ui1zi0hi,"[""/nfs-gpu/xlstm/outputs_beck/sclaw/dclm_mLSTMv1_7B_ctx8192_lr0.0005_steps181000_2024-12-21T06:50:36/0/wandb""]" |
| tokenparam_ctx8192,dclm_mLSTMv1_7B_ctx8192_gbs512,dclm_mLSTMv1_7B_longrun_pretraining_final,mlstm_v1,6865424896.0,2097152000000.0,8.4809675798741e+22,2.1029509335787417,305.4657259774064,128.0,extra,tokenparam,8192,0.0004,512.0,500000.0,8.190303328112677,,7B long,4096,32,8,2.667,64,10944,256.0,512,,2.1004479801719813,hphy3hsq-yhf1xdoc-222xc3yh-jd8qgcje,"[""/nfs-gpu/xlstm/logs/outputs/xlstm-jax/DCLM/dclm_mLSTMv1_7B_ctx8192_2024-11-06T22:18:42/0/wandb"", ""/nfs-gpu/xlstm/logs/outputs/xlstm-jax/DCLM/dclm_mLSTMv1_7B_ctx8192_2024-11-15T07:47:05/0/wandb"", ""/nfs-gpu/xlstm/logs/outputs/xlstm-jax/DCLM/dclm_mLSTMv1_7B_ctx8192_2024-11-15T12:48:22/0/wandb"", ""/nfs-gpu/xlstm/logs/outputs/xlstm-jax/DCLM/dclm_mLSTMv1_7B_ctx8192_2024-11-19T11:12:49/0/wandb""]" |
|
|