dacorvo HF Staff commited on
Commit
61f0505
·
verified ·
1 Parent(s): 9c892fd

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +33 -0
  2. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/0199e8ce7d3dfb946b04.json +58 -0
  3. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/57f1cbf66f8cce26a28c.json +58 -0
  4. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/703a7072b170148b97b6.json +58 -0
  5. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/708d4f031d164c862b46.json +58 -0
  6. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/c20327a4effcee88b4bc.json +58 -0
  7. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/02d0a60d8a2b9329cad1.json +58 -0
  8. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/105d8a3d06237ca2d1ff.json +58 -0
  9. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/110b833c3035ce194ed5.json +58 -0
  10. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/206662284977c7458a21.json +58 -0
  11. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/a2d3fdcb2fe5b2d84e1d.json +58 -0
  12. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/63e1314219a229b693b7.json +58 -0
  13. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/b183c08457fabacfd307.json +58 -0
  14. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/c568be536e0d41423ee2.json +58 -0
  15. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/e22e76b093ae9ec91f61.json +58 -0
  16. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/b526548d5134fc230616.json +220 -0
  17. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.3-8b-instruct/93bc6f4b62a5e89361f7.json +58 -0
  18. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.3-8b-instruct/afa97256ccb78518bca2.json +58 -0
  19. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/0105f379a23b1ef1189f.json +220 -0
  20. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/5ff4a6b24814913a6853.json +220 -0
  21. neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/99aac1ef07573c9c0fa0.json +220 -0
  22. neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/compile_flags.json +1 -0
  23. neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.done +0 -0
  24. neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.hlo_module.pb +3 -0
  25. neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.neff +3 -0
  26. neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/wrapped_neff.hlo +3 -0
  27. neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/compile_flags.json +1 -0
  28. neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.done +0 -0
  29. neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.hlo_module.pb +3 -0
  30. neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.neff +3 -0
  31. neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/compile_flags.json +1 -0
  32. neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.done +0 -0
  33. neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.hlo_module.pb +3 -0
  34. neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.neff +3 -0
  35. neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/wrapped_neff.hlo +3 -0
  36. neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/compile_flags.json +1 -0
  37. neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.done +0 -0
  38. neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.hlo_module.pb +3 -0
  39. neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.neff +3 -0
  40. neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/compile_flags.json +1 -0
  41. neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.done +0 -0
  42. neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.hlo_module.pb +3 -0
  43. neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.neff +3 -0
  44. neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/wrapped_neff.hlo +3 -0
  45. neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/compile_flags.json +1 -0
  46. neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.done +0 -0
  47. neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.hlo_module.pb +3 -0
  48. neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.neff +3 -0
  49. neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/compile_flags.json +1 -0
  50. neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.done +0 -0
.gitattributes CHANGED
@@ -5527,3 +5527,36 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/model.neff
5527
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5528
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/model.neff filter=lfs diff=lfs merge=lfs -text
5529
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5527
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5528
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/model.neff filter=lfs diff=lfs merge=lfs -text
5529
  neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5530
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5531
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5532
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5533
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5534
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5535
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5536
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5537
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5538
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5539
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5540
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5541
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5542
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5543
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5544
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5545
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5546
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_674568d1b9318305658e+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5547
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d28c9ae9fde139cbc82+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5548
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_6e9b477efc8aefc5e1e1+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5549
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_7111d7c478e6e5afd0bb+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5550
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5551
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5552
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_a01b2de18a488f8d7b42+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5553
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5554
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5555
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5556
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5557
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_cd5f0b0df65e1b4a6bf5+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5558
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_d77cbfa6866ffd3a3f7d+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
5559
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5560
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
5561
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
5562
+ neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/0199e8ce7d3dfb946b04.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 2048,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 8192,
17
+ "logits_scaling": 8.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 1,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
26
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
27
+ "continuous_batching": false,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 8,
32
+ "max_batch_size": 1,
33
+ "max_context_length": 4096,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 4096,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.2",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 4096,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 8
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 5000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49155
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/57f1cbf66f8cce26a28c.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 2048,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 8192,
17
+ "logits_scaling": 8.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 8,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
26
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
27
+ "continuous_batching": true,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "max_batch_size": 8,
33
+ "max_context_length": 4096,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 4096,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.2",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 4096,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 2
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 5000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49155
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/703a7072b170148b97b6.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 2048,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 8192,
17
+ "logits_scaling": 8.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 1,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
26
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
27
+ "continuous_batching": false,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "max_batch_size": 1,
33
+ "max_context_length": 4096,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 4096,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.2",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 4096,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 2
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 5000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49155
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/708d4f031d164c862b46.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 2048,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 8192,
17
+ "logits_scaling": 8.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 32,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
26
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
27
+ "continuous_batching": true,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 8,
32
+ "max_batch_size": 32,
33
+ "max_context_length": 4096,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 4096,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.2",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 4096,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 8
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 5000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49155
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/c20327a4effcee88b4bc.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 2048,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 8192,
17
+ "logits_scaling": 8.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 4,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
26
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
27
+ "continuous_batching": true,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "max_batch_size": 4,
33
+ "max_context_length": 4096,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 4096,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.2",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 4096,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 2
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 5000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49155
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/02d0a60d8a2b9329cad1.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-8b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.0078125,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 12800,
17
+ "logits_scaling": 16.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 8,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
26
+ "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35",
27
+ "continuous_batching": true,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "max_batch_size": 8,
33
+ "max_context_length": 4096,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 4096,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.2",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 4096,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 2
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 10000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49155
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/105d8a3d06237ca2d1ff.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-8b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.0078125,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 12800,
17
+ "logits_scaling": 16.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 32,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
26
+ "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35",
27
+ "continuous_batching": true,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 8,
32
+ "max_batch_size": 32,
33
+ "max_context_length": 4096,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 4096,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.2",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 4096,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 8
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 10000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49155
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/110b833c3035ce194ed5.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-8b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.0078125,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 12800,
17
+ "logits_scaling": 16.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 1,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
26
+ "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35",
27
+ "continuous_batching": false,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 8,
32
+ "max_batch_size": 1,
33
+ "max_context_length": 4096,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 4096,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.2",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 4096,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 8
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 10000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49155
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/206662284977c7458a21.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-8b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.0078125,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 12800,
17
+ "logits_scaling": 16.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 1,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
26
+ "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35",
27
+ "continuous_batching": false,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "max_batch_size": 1,
33
+ "max_context_length": 4096,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 4096,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.2",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 4096,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 2
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 10000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49155
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/a2d3fdcb2fe5b2d84e1d.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-8b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.0078125,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 12800,
17
+ "logits_scaling": 16.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 4,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
26
+ "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35",
27
+ "continuous_batching": true,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "max_batch_size": 4,
33
+ "max_context_length": 4096,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 4096,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.2",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 4096,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 2
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 10000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49155
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/63e1314219a229b693b7.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.3-8b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 0.0078125,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 12800,
17
+ "logits_scaling": 16.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 1,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct",
26
+ "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b",
27
+ "continuous_batching": false,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 8,
32
+ "max_batch_size": 1,
33
+ "max_context_length": 16384,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 16384,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.2",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 16384,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 8
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 10000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49159
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/b183c08457fabacfd307.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.3-8b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 0.0078125,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 12800,
17
+ "logits_scaling": 16.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 4,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct",
26
+ "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b",
27
+ "continuous_batching": true,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 8,
32
+ "max_batch_size": 4,
33
+ "max_context_length": 4096,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 4096,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.2",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 4096,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 8
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 10000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49159
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/c568be536e0d41423ee2.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.3-8b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 0.0078125,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 12800,
17
+ "logits_scaling": 16.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 4,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct",
26
+ "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b",
27
+ "continuous_batching": true,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 8,
32
+ "max_batch_size": 4,
33
+ "max_context_length": 16384,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 16384,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.2",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 16384,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 8
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 10000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49159
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/e22e76b093ae9ec91f61.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.3-8b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 0.0078125,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 12800,
17
+ "logits_scaling": 16.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 1,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct",
26
+ "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b",
27
+ "continuous_batching": false,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 8,
32
+ "max_batch_size": 1,
33
+ "max_context_length": 4096,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 4096,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.2",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 4096,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 8
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 10000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49159
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/b526548d5134fc230616.json ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
4
+ "_task": "text-generation",
5
+ "attention_bias": false,
6
+ "attention_chunk_size": 8192,
7
+ "attention_dropout": 0.0,
8
+ "attn_scale": 0.1,
9
+ "attn_temperature_tuning": true,
10
+ "dtype": "bfloat16",
11
+ "floor_scale": 8192,
12
+ "for_llm_compressor": false,
13
+ "head_dim": 128,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 5120,
16
+ "initializer_range": 0.02,
17
+ "interleave_moe_layer_step": 1,
18
+ "intermediate_size": 8192,
19
+ "intermediate_size_mlp": 16384,
20
+ "layer_types": [
21
+ "chunked_attention",
22
+ "chunked_attention",
23
+ "chunked_attention",
24
+ "full_attention",
25
+ "chunked_attention",
26
+ "chunked_attention",
27
+ "chunked_attention",
28
+ "full_attention",
29
+ "chunked_attention",
30
+ "chunked_attention",
31
+ "chunked_attention",
32
+ "full_attention",
33
+ "chunked_attention",
34
+ "chunked_attention",
35
+ "chunked_attention",
36
+ "full_attention",
37
+ "chunked_attention",
38
+ "chunked_attention",
39
+ "chunked_attention",
40
+ "full_attention",
41
+ "chunked_attention",
42
+ "chunked_attention",
43
+ "chunked_attention",
44
+ "full_attention",
45
+ "chunked_attention",
46
+ "chunked_attention",
47
+ "chunked_attention",
48
+ "full_attention",
49
+ "chunked_attention",
50
+ "chunked_attention",
51
+ "chunked_attention",
52
+ "full_attention",
53
+ "chunked_attention",
54
+ "chunked_attention",
55
+ "chunked_attention",
56
+ "full_attention",
57
+ "chunked_attention",
58
+ "chunked_attention",
59
+ "chunked_attention",
60
+ "full_attention",
61
+ "chunked_attention",
62
+ "chunked_attention",
63
+ "chunked_attention",
64
+ "full_attention",
65
+ "chunked_attention",
66
+ "chunked_attention",
67
+ "chunked_attention",
68
+ "full_attention"
69
+ ],
70
+ "max_position_embeddings": 10485760,
71
+ "model_type": "llama4_text",
72
+ "moe_layers": [
73
+ 0,
74
+ 1,
75
+ 2,
76
+ 3,
77
+ 4,
78
+ 5,
79
+ 6,
80
+ 7,
81
+ 8,
82
+ 9,
83
+ 10,
84
+ 11,
85
+ 12,
86
+ 13,
87
+ 14,
88
+ 15,
89
+ 16,
90
+ 17,
91
+ 18,
92
+ 19,
93
+ 20,
94
+ 21,
95
+ 22,
96
+ 23,
97
+ 24,
98
+ 25,
99
+ 26,
100
+ 27,
101
+ 28,
102
+ 29,
103
+ 30,
104
+ 31,
105
+ 32,
106
+ 33,
107
+ 34,
108
+ 35,
109
+ 36,
110
+ 37,
111
+ 38,
112
+ 39,
113
+ 40,
114
+ 41,
115
+ 42,
116
+ 43,
117
+ 44,
118
+ 45,
119
+ 46,
120
+ 47
121
+ ],
122
+ "neuron": {
123
+ "_serialized_key": "NxDNeuronConfig",
124
+ "batch_size": 1,
125
+ "capacity_factor": null,
126
+ "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
127
+ "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
128
+ "continuous_batching": false,
129
+ "ep_degree": 1,
130
+ "fused_qkv": false,
131
+ "glu_mlp": true,
132
+ "local_ranks_size": 16,
133
+ "max_batch_size": 1,
134
+ "max_context_length": 4096,
135
+ "max_topk": 256,
136
+ "n_active_tokens": 4096,
137
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
138
+ "on_device_sampling": true,
139
+ "optimum_neuron_version": "0.4.3.dev0",
140
+ "output_logits": false,
141
+ "pp_degree": 1,
142
+ "sequence_length": 4096,
143
+ "speculation_length": 0,
144
+ "start_rank_id": 0,
145
+ "target": "trn1",
146
+ "torch_dtype": "bfloat16",
147
+ "tp_degree": 16
148
+ },
149
+ "no_rope_layers": [
150
+ 1,
151
+ 1,
152
+ 1,
153
+ 0,
154
+ 1,
155
+ 1,
156
+ 1,
157
+ 0,
158
+ 1,
159
+ 1,
160
+ 1,
161
+ 0,
162
+ 1,
163
+ 1,
164
+ 1,
165
+ 0,
166
+ 1,
167
+ 1,
168
+ 1,
169
+ 0,
170
+ 1,
171
+ 1,
172
+ 1,
173
+ 0,
174
+ 1,
175
+ 1,
176
+ 1,
177
+ 0,
178
+ 1,
179
+ 1,
180
+ 1,
181
+ 0,
182
+ 1,
183
+ 1,
184
+ 1,
185
+ 0,
186
+ 1,
187
+ 1,
188
+ 1,
189
+ 0,
190
+ 1,
191
+ 1,
192
+ 1,
193
+ 0,
194
+ 1,
195
+ 1,
196
+ 1,
197
+ 0
198
+ ],
199
+ "num_attention_heads": 40,
200
+ "num_experts_per_tok": 1,
201
+ "num_hidden_layers": 48,
202
+ "num_key_value_heads": 8,
203
+ "num_local_experts": 16,
204
+ "output_router_logits": false,
205
+ "rms_norm_eps": 1e-05,
206
+ "rope_scaling": {
207
+ "factor": 16.0,
208
+ "high_freq_factor": 1.0,
209
+ "low_freq_factor": 1.0,
210
+ "original_max_position_embeddings": 8192,
211
+ "rope_type": "llama3"
212
+ },
213
+ "rope_theta": 500000.0,
214
+ "router_aux_loss_coef": 0.001,
215
+ "router_jitter_noise": 0.0,
216
+ "tie_word_embeddings": false,
217
+ "use_cache": true,
218
+ "use_qk_norm": true,
219
+ "vocab_size": 202048
220
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.3-8b-instruct/93bc6f4b62a5e89361f7.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.3-8b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 0.0078125,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 12800,
17
+ "logits_scaling": 16.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 4,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct",
26
+ "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b",
27
+ "continuous_batching": true,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 8,
32
+ "max_batch_size": 4,
33
+ "max_context_length": 16384,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 16384,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.3.dev1",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 16384,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 8
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 10000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49159
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.3-8b-instruct/afa97256ccb78518bca2.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.3-8b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 0.0078125,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 4096,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 12800,
17
+ "logits_scaling": 16.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 4,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct",
26
+ "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b",
27
+ "continuous_batching": true,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 8,
32
+ "max_batch_size": 4,
33
+ "max_context_length": 4096,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 4096,
36
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.3.dev1",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "sequence_length": 4096,
42
+ "speculation_length": 0,
43
+ "start_rank_id": 0,
44
+ "target": "trn1",
45
+ "torch_dtype": "bfloat16",
46
+ "tp_degree": 8
47
+ },
48
+ "num_attention_heads": 32,
49
+ "num_hidden_layers": 40,
50
+ "num_key_value_heads": 8,
51
+ "residual_multiplier": 0.22,
52
+ "rms_norm_eps": 1e-05,
53
+ "rope_scaling": null,
54
+ "rope_theta": 10000000.0,
55
+ "tie_word_embeddings": true,
56
+ "use_cache": true,
57
+ "vocab_size": 49159
58
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/0105f379a23b1ef1189f.json ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
4
+ "_task": "text-generation",
5
+ "attention_bias": false,
6
+ "attention_chunk_size": 8192,
7
+ "attention_dropout": 0.0,
8
+ "attn_scale": 0.1,
9
+ "attn_temperature_tuning": true,
10
+ "dtype": "bfloat16",
11
+ "floor_scale": 8192,
12
+ "for_llm_compressor": false,
13
+ "head_dim": 128,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 5120,
16
+ "initializer_range": 0.02,
17
+ "interleave_moe_layer_step": 1,
18
+ "intermediate_size": 8192,
19
+ "intermediate_size_mlp": 16384,
20
+ "layer_types": [
21
+ "chunked_attention",
22
+ "chunked_attention",
23
+ "chunked_attention",
24
+ "full_attention",
25
+ "chunked_attention",
26
+ "chunked_attention",
27
+ "chunked_attention",
28
+ "full_attention",
29
+ "chunked_attention",
30
+ "chunked_attention",
31
+ "chunked_attention",
32
+ "full_attention",
33
+ "chunked_attention",
34
+ "chunked_attention",
35
+ "chunked_attention",
36
+ "full_attention",
37
+ "chunked_attention",
38
+ "chunked_attention",
39
+ "chunked_attention",
40
+ "full_attention",
41
+ "chunked_attention",
42
+ "chunked_attention",
43
+ "chunked_attention",
44
+ "full_attention",
45
+ "chunked_attention",
46
+ "chunked_attention",
47
+ "chunked_attention",
48
+ "full_attention",
49
+ "chunked_attention",
50
+ "chunked_attention",
51
+ "chunked_attention",
52
+ "full_attention",
53
+ "chunked_attention",
54
+ "chunked_attention",
55
+ "chunked_attention",
56
+ "full_attention",
57
+ "chunked_attention",
58
+ "chunked_attention",
59
+ "chunked_attention",
60
+ "full_attention",
61
+ "chunked_attention",
62
+ "chunked_attention",
63
+ "chunked_attention",
64
+ "full_attention",
65
+ "chunked_attention",
66
+ "chunked_attention",
67
+ "chunked_attention",
68
+ "full_attention"
69
+ ],
70
+ "max_position_embeddings": 10485760,
71
+ "model_type": "llama4_text",
72
+ "moe_layers": [
73
+ 0,
74
+ 1,
75
+ 2,
76
+ 3,
77
+ 4,
78
+ 5,
79
+ 6,
80
+ 7,
81
+ 8,
82
+ 9,
83
+ 10,
84
+ 11,
85
+ 12,
86
+ 13,
87
+ 14,
88
+ 15,
89
+ 16,
90
+ 17,
91
+ 18,
92
+ 19,
93
+ 20,
94
+ 21,
95
+ 22,
96
+ 23,
97
+ 24,
98
+ 25,
99
+ 26,
100
+ 27,
101
+ 28,
102
+ 29,
103
+ 30,
104
+ 31,
105
+ 32,
106
+ 33,
107
+ 34,
108
+ 35,
109
+ 36,
110
+ 37,
111
+ 38,
112
+ 39,
113
+ 40,
114
+ 41,
115
+ 42,
116
+ 43,
117
+ 44,
118
+ 45,
119
+ 46,
120
+ 47
121
+ ],
122
+ "neuron": {
123
+ "_serialized_key": "NxDNeuronConfig",
124
+ "batch_size": 1,
125
+ "capacity_factor": null,
126
+ "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
127
+ "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
128
+ "continuous_batching": false,
129
+ "ep_degree": 1,
130
+ "fused_qkv": false,
131
+ "glu_mlp": true,
132
+ "local_ranks_size": 16,
133
+ "max_batch_size": 1,
134
+ "max_context_length": 16384,
135
+ "max_topk": 256,
136
+ "n_active_tokens": 16384,
137
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
138
+ "on_device_sampling": true,
139
+ "optimum_neuron_version": "0.4.3.dev1",
140
+ "output_logits": false,
141
+ "pp_degree": 1,
142
+ "sequence_length": 16384,
143
+ "speculation_length": 0,
144
+ "start_rank_id": 0,
145
+ "target": "trn1",
146
+ "torch_dtype": "bfloat16",
147
+ "tp_degree": 16
148
+ },
149
+ "no_rope_layers": [
150
+ 1,
151
+ 1,
152
+ 1,
153
+ 0,
154
+ 1,
155
+ 1,
156
+ 1,
157
+ 0,
158
+ 1,
159
+ 1,
160
+ 1,
161
+ 0,
162
+ 1,
163
+ 1,
164
+ 1,
165
+ 0,
166
+ 1,
167
+ 1,
168
+ 1,
169
+ 0,
170
+ 1,
171
+ 1,
172
+ 1,
173
+ 0,
174
+ 1,
175
+ 1,
176
+ 1,
177
+ 0,
178
+ 1,
179
+ 1,
180
+ 1,
181
+ 0,
182
+ 1,
183
+ 1,
184
+ 1,
185
+ 0,
186
+ 1,
187
+ 1,
188
+ 1,
189
+ 0,
190
+ 1,
191
+ 1,
192
+ 1,
193
+ 0,
194
+ 1,
195
+ 1,
196
+ 1,
197
+ 0
198
+ ],
199
+ "num_attention_heads": 40,
200
+ "num_experts_per_tok": 1,
201
+ "num_hidden_layers": 48,
202
+ "num_key_value_heads": 8,
203
+ "num_local_experts": 16,
204
+ "output_router_logits": false,
205
+ "rms_norm_eps": 1e-05,
206
+ "rope_scaling": {
207
+ "factor": 16.0,
208
+ "high_freq_factor": 1.0,
209
+ "low_freq_factor": 1.0,
210
+ "original_max_position_embeddings": 8192,
211
+ "rope_type": "llama3"
212
+ },
213
+ "rope_theta": 500000.0,
214
+ "router_aux_loss_coef": 0.001,
215
+ "router_jitter_noise": 0.0,
216
+ "tie_word_embeddings": false,
217
+ "use_cache": true,
218
+ "use_qk_norm": true,
219
+ "vocab_size": 202048
220
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/5ff4a6b24814913a6853.json ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
4
+ "_task": "text-generation",
5
+ "attention_bias": false,
6
+ "attention_chunk_size": 8192,
7
+ "attention_dropout": 0.0,
8
+ "attn_scale": 0.1,
9
+ "attn_temperature_tuning": true,
10
+ "dtype": "bfloat16",
11
+ "floor_scale": 8192,
12
+ "for_llm_compressor": false,
13
+ "head_dim": 128,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 5120,
16
+ "initializer_range": 0.02,
17
+ "interleave_moe_layer_step": 1,
18
+ "intermediate_size": 8192,
19
+ "intermediate_size_mlp": 16384,
20
+ "layer_types": [
21
+ "chunked_attention",
22
+ "chunked_attention",
23
+ "chunked_attention",
24
+ "full_attention",
25
+ "chunked_attention",
26
+ "chunked_attention",
27
+ "chunked_attention",
28
+ "full_attention",
29
+ "chunked_attention",
30
+ "chunked_attention",
31
+ "chunked_attention",
32
+ "full_attention",
33
+ "chunked_attention",
34
+ "chunked_attention",
35
+ "chunked_attention",
36
+ "full_attention",
37
+ "chunked_attention",
38
+ "chunked_attention",
39
+ "chunked_attention",
40
+ "full_attention",
41
+ "chunked_attention",
42
+ "chunked_attention",
43
+ "chunked_attention",
44
+ "full_attention",
45
+ "chunked_attention",
46
+ "chunked_attention",
47
+ "chunked_attention",
48
+ "full_attention",
49
+ "chunked_attention",
50
+ "chunked_attention",
51
+ "chunked_attention",
52
+ "full_attention",
53
+ "chunked_attention",
54
+ "chunked_attention",
55
+ "chunked_attention",
56
+ "full_attention",
57
+ "chunked_attention",
58
+ "chunked_attention",
59
+ "chunked_attention",
60
+ "full_attention",
61
+ "chunked_attention",
62
+ "chunked_attention",
63
+ "chunked_attention",
64
+ "full_attention",
65
+ "chunked_attention",
66
+ "chunked_attention",
67
+ "chunked_attention",
68
+ "full_attention"
69
+ ],
70
+ "max_position_embeddings": 10485760,
71
+ "model_type": "llama4_text",
72
+ "moe_layers": [
73
+ 0,
74
+ 1,
75
+ 2,
76
+ 3,
77
+ 4,
78
+ 5,
79
+ 6,
80
+ 7,
81
+ 8,
82
+ 9,
83
+ 10,
84
+ 11,
85
+ 12,
86
+ 13,
87
+ 14,
88
+ 15,
89
+ 16,
90
+ 17,
91
+ 18,
92
+ 19,
93
+ 20,
94
+ 21,
95
+ 22,
96
+ 23,
97
+ 24,
98
+ 25,
99
+ 26,
100
+ 27,
101
+ 28,
102
+ 29,
103
+ 30,
104
+ 31,
105
+ 32,
106
+ 33,
107
+ 34,
108
+ 35,
109
+ 36,
110
+ 37,
111
+ 38,
112
+ 39,
113
+ 40,
114
+ 41,
115
+ 42,
116
+ 43,
117
+ 44,
118
+ 45,
119
+ 46,
120
+ 47
121
+ ],
122
+ "neuron": {
123
+ "_serialized_key": "NxDNeuronConfig",
124
+ "batch_size": 2,
125
+ "capacity_factor": null,
126
+ "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
127
+ "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
128
+ "continuous_batching": true,
129
+ "ep_degree": 1,
130
+ "fused_qkv": false,
131
+ "glu_mlp": true,
132
+ "local_ranks_size": 16,
133
+ "max_batch_size": 2,
134
+ "max_context_length": 4096,
135
+ "max_topk": 256,
136
+ "n_active_tokens": 4096,
137
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
138
+ "on_device_sampling": true,
139
+ "optimum_neuron_version": "0.4.3.dev1",
140
+ "output_logits": false,
141
+ "pp_degree": 1,
142
+ "sequence_length": 4096,
143
+ "speculation_length": 0,
144
+ "start_rank_id": 0,
145
+ "target": "trn1",
146
+ "torch_dtype": "bfloat16",
147
+ "tp_degree": 16
148
+ },
149
+ "no_rope_layers": [
150
+ 1,
151
+ 1,
152
+ 1,
153
+ 0,
154
+ 1,
155
+ 1,
156
+ 1,
157
+ 0,
158
+ 1,
159
+ 1,
160
+ 1,
161
+ 0,
162
+ 1,
163
+ 1,
164
+ 1,
165
+ 0,
166
+ 1,
167
+ 1,
168
+ 1,
169
+ 0,
170
+ 1,
171
+ 1,
172
+ 1,
173
+ 0,
174
+ 1,
175
+ 1,
176
+ 1,
177
+ 0,
178
+ 1,
179
+ 1,
180
+ 1,
181
+ 0,
182
+ 1,
183
+ 1,
184
+ 1,
185
+ 0,
186
+ 1,
187
+ 1,
188
+ 1,
189
+ 0,
190
+ 1,
191
+ 1,
192
+ 1,
193
+ 0,
194
+ 1,
195
+ 1,
196
+ 1,
197
+ 0
198
+ ],
199
+ "num_attention_heads": 40,
200
+ "num_experts_per_tok": 1,
201
+ "num_hidden_layers": 48,
202
+ "num_key_value_heads": 8,
203
+ "num_local_experts": 16,
204
+ "output_router_logits": false,
205
+ "rms_norm_eps": 1e-05,
206
+ "rope_scaling": {
207
+ "factor": 16.0,
208
+ "high_freq_factor": 1.0,
209
+ "low_freq_factor": 1.0,
210
+ "original_max_position_embeddings": 8192,
211
+ "rope_type": "llama3"
212
+ },
213
+ "rope_theta": 500000.0,
214
+ "router_aux_loss_coef": 0.001,
215
+ "router_jitter_noise": 0.0,
216
+ "tie_word_embeddings": false,
217
+ "use_cache": true,
218
+ "use_qk_norm": true,
219
+ "vocab_size": 202048
220
+ }
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/99aac1ef07573c9c0fa0.json ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
4
+ "_task": "text-generation",
5
+ "attention_bias": false,
6
+ "attention_chunk_size": 8192,
7
+ "attention_dropout": 0.0,
8
+ "attn_scale": 0.1,
9
+ "attn_temperature_tuning": true,
10
+ "dtype": "bfloat16",
11
+ "floor_scale": 8192,
12
+ "for_llm_compressor": false,
13
+ "head_dim": 128,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 5120,
16
+ "initializer_range": 0.02,
17
+ "interleave_moe_layer_step": 1,
18
+ "intermediate_size": 8192,
19
+ "intermediate_size_mlp": 16384,
20
+ "layer_types": [
21
+ "chunked_attention",
22
+ "chunked_attention",
23
+ "chunked_attention",
24
+ "full_attention",
25
+ "chunked_attention",
26
+ "chunked_attention",
27
+ "chunked_attention",
28
+ "full_attention",
29
+ "chunked_attention",
30
+ "chunked_attention",
31
+ "chunked_attention",
32
+ "full_attention",
33
+ "chunked_attention",
34
+ "chunked_attention",
35
+ "chunked_attention",
36
+ "full_attention",
37
+ "chunked_attention",
38
+ "chunked_attention",
39
+ "chunked_attention",
40
+ "full_attention",
41
+ "chunked_attention",
42
+ "chunked_attention",
43
+ "chunked_attention",
44
+ "full_attention",
45
+ "chunked_attention",
46
+ "chunked_attention",
47
+ "chunked_attention",
48
+ "full_attention",
49
+ "chunked_attention",
50
+ "chunked_attention",
51
+ "chunked_attention",
52
+ "full_attention",
53
+ "chunked_attention",
54
+ "chunked_attention",
55
+ "chunked_attention",
56
+ "full_attention",
57
+ "chunked_attention",
58
+ "chunked_attention",
59
+ "chunked_attention",
60
+ "full_attention",
61
+ "chunked_attention",
62
+ "chunked_attention",
63
+ "chunked_attention",
64
+ "full_attention",
65
+ "chunked_attention",
66
+ "chunked_attention",
67
+ "chunked_attention",
68
+ "full_attention"
69
+ ],
70
+ "max_position_embeddings": 10485760,
71
+ "model_type": "llama4_text",
72
+ "moe_layers": [
73
+ 0,
74
+ 1,
75
+ 2,
76
+ 3,
77
+ 4,
78
+ 5,
79
+ 6,
80
+ 7,
81
+ 8,
82
+ 9,
83
+ 10,
84
+ 11,
85
+ 12,
86
+ 13,
87
+ 14,
88
+ 15,
89
+ 16,
90
+ 17,
91
+ 18,
92
+ 19,
93
+ 20,
94
+ 21,
95
+ 22,
96
+ 23,
97
+ 24,
98
+ 25,
99
+ 26,
100
+ 27,
101
+ 28,
102
+ 29,
103
+ 30,
104
+ 31,
105
+ 32,
106
+ 33,
107
+ 34,
108
+ 35,
109
+ 36,
110
+ 37,
111
+ 38,
112
+ 39,
113
+ 40,
114
+ 41,
115
+ 42,
116
+ 43,
117
+ 44,
118
+ 45,
119
+ 46,
120
+ 47
121
+ ],
122
+ "neuron": {
123
+ "_serialized_key": "NxDNeuronConfig",
124
+ "batch_size": 1,
125
+ "capacity_factor": null,
126
+ "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
127
+ "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
128
+ "continuous_batching": false,
129
+ "ep_degree": 1,
130
+ "fused_qkv": false,
131
+ "glu_mlp": true,
132
+ "local_ranks_size": 16,
133
+ "max_batch_size": 1,
134
+ "max_context_length": 4096,
135
+ "max_topk": 256,
136
+ "n_active_tokens": 4096,
137
+ "neuronxcc_version": "2.21.18209.0+043b1bf7",
138
+ "on_device_sampling": true,
139
+ "optimum_neuron_version": "0.4.3.dev1",
140
+ "output_logits": false,
141
+ "pp_degree": 1,
142
+ "sequence_length": 4096,
143
+ "speculation_length": 0,
144
+ "start_rank_id": 0,
145
+ "target": "trn1",
146
+ "torch_dtype": "bfloat16",
147
+ "tp_degree": 16
148
+ },
149
+ "no_rope_layers": [
150
+ 1,
151
+ 1,
152
+ 1,
153
+ 0,
154
+ 1,
155
+ 1,
156
+ 1,
157
+ 0,
158
+ 1,
159
+ 1,
160
+ 1,
161
+ 0,
162
+ 1,
163
+ 1,
164
+ 1,
165
+ 0,
166
+ 1,
167
+ 1,
168
+ 1,
169
+ 0,
170
+ 1,
171
+ 1,
172
+ 1,
173
+ 0,
174
+ 1,
175
+ 1,
176
+ 1,
177
+ 0,
178
+ 1,
179
+ 1,
180
+ 1,
181
+ 0,
182
+ 1,
183
+ 1,
184
+ 1,
185
+ 0,
186
+ 1,
187
+ 1,
188
+ 1,
189
+ 0,
190
+ 1,
191
+ 1,
192
+ 1,
193
+ 0,
194
+ 1,
195
+ 1,
196
+ 1,
197
+ 0
198
+ ],
199
+ "num_attention_heads": 40,
200
+ "num_experts_per_tok": 1,
201
+ "num_hidden_layers": 48,
202
+ "num_key_value_heads": 8,
203
+ "num_local_experts": 16,
204
+ "output_router_logits": false,
205
+ "rms_norm_eps": 1e-05,
206
+ "rope_scaling": {
207
+ "factor": 16.0,
208
+ "high_freq_factor": 1.0,
209
+ "low_freq_factor": 1.0,
210
+ "original_max_position_embeddings": 8192,
211
+ "rope_type": "llama3"
212
+ },
213
+ "rope_theta": 500000.0,
214
+ "router_aux_loss_coef": 0.001,
215
+ "router_jitter_noise": 0.0,
216
+ "tie_word_embeddings": false,
217
+ "use_cache": true,
218
+ "use_qk_norm": true,
219
+ "vocab_size": 202048
220
+ }
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e4c741fe5e0f5b099d40b9978c367531955572f5eba53147210debe29f33a26
3
+ size 1367989
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c0e63b1bec594156292e975af5ebac2aa3bb0d8c959215f7db5c093a259807e
3
+ size 4363264
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00f129e0558f3266c48325433d7aa8aa9b924e2d1f17314cb8cdba266262ed49
3
+ size 4546847
neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65796c06ab7af92ba6a9644f2a7c7c097975c802b5cedd1e47a90662ab848112
3
+ size 912376
neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5eb5eae4c845851230e3741e6fc70cff56ba8b46ba4d31250494828d4a27ac1
3
+ size 2059264
neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dd067bb8e7f5373540ba520c274ee981f02b303c76853620a457e0545ac079a
3
+ size 1023328
neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38267b698da6fcc510460c0e5b4e0f7ee008df2357326e82aad15c83b85edf84
3
+ size 6329344
neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7fb8a1567382a6d23c42f1d970257f9116648f1bc3404e556e772f97c1f482a
3
+ size 6513830
neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab5dd1e88dcbe6ba0ab874f03f2b1d4744e635d4846a2aac3c8e710312fdfcd0
3
+ size 406532870
neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b92be29e4a8951ff436d82973d192d2ec52cce53a81a90c78ce3a361cb44091d
3
+ size 142459904
neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55f01613d0337aeb0f0977a78b99e1c0cee9913120fd05928157a424acb5f406
3
+ size 102782272
neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38aaaaff890bd48bf0b499b1329062e1f3b38252834fcef1dabdb86d716d92d1
3
+ size 7732224
neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66883cb96f56ad390822ff24b144b0714b2f91b0f4ca936d0f79de494d245975
3
+ size 8071521
neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.done ADDED
File without changes
neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73ee9dde7d549ac3fdaa10dca307a1c7c739776b0c9c6ac1b10a319466fe3c12
3
+ size 103921698
neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4684630f182582053fedae0234b6c417b237c88c377d4a07affedf4ea457a602
3
+ size 33506304
neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.done ADDED
File without changes