dacorvo HF Staff commited on 19 days ago

Commit

61f0505

verified ·

1 Parent(s): 9c892fd

Synchronizing local compiler cache.

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +33 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/0199e8ce7d3dfb946b04.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/57f1cbf66f8cce26a28c.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/703a7072b170148b97b6.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/708d4f031d164c862b46.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/c20327a4effcee88b4bc.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/02d0a60d8a2b9329cad1.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/105d8a3d06237ca2d1ff.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/110b833c3035ce194ed5.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/206662284977c7458a21.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/a2d3fdcb2fe5b2d84e1d.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/63e1314219a229b693b7.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/b183c08457fabacfd307.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/c568be536e0d41423ee2.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/e22e76b093ae9ec91f61.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/b526548d5134fc230616.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.3-8b-instruct/93bc6f4b62a5e89361f7.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.3-8b-instruct/afa97256ccb78518bca2.json +58 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/0105f379a23b1ef1189f.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/5ff4a6b24814913a6853.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/99aac1ef07573c9c0fa0.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/wrapped_neff.hlo +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/wrapped_neff.hlo +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/wrapped_neff.hlo +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.done +0 -0

.gitattributes CHANGED Viewed

@@ -5527,3 +5527,36 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/model.neff
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text

 neuronxcc-2.21.18209.0+043b1bf7/MODULE_2b50aca1bfecfc81fbd4+b75984e0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_4e217d4c91b1cc9c870b+24627afa/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_48eec7431affa34fe653+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_578000ba6d5d4c786c7c+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_5c3459bd7465308fd768+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_674568d1b9318305658e+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_6d28c9ae9fde139cbc82+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_6e9b477efc8aefc5e1e1+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_7111d7c478e6e5afd0bb+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_78ce8440963abfb49a3f+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_a01b2de18a488f8d7b42+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_a059abca04a006eb09ca+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_b0e3c3ea84816b6f29bc+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_cd5f0b0df65e1b4a6bf5+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_d77cbfa6866ffd3a3f7d+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_e36f5e0ce14d1b9618e7+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_f0be96de5ac44fbcdedb+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/0199e8ce7d3dfb946b04.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.1-2b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.1,
+  "attention_multiplier": 0.015625,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "logits_scaling": 8.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
+    "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 5000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49155
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/57f1cbf66f8cce26a28c.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.1-2b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.1,
+  "attention_multiplier": 0.015625,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "logits_scaling": 8.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
+    "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 8,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 5000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49155
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/703a7072b170148b97b6.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.1-2b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.1,
+  "attention_multiplier": 0.015625,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "logits_scaling": 8.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
+    "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 5000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49155
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/708d4f031d164c862b46.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.1-2b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.1,
+  "attention_multiplier": 0.015625,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "logits_scaling": 8.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
+    "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 5000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49155
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-2b-instruct/c20327a4effcee88b4bc.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.1-2b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.1,
+  "attention_multiplier": 0.015625,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "logits_scaling": 8.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
+    "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 4,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 5000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49155
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/02d0a60d8a2b9329cad1.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.1-8b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.1,
+  "attention_multiplier": 0.0078125,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
+    "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 8,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49155
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/105d8a3d06237ca2d1ff.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.1-8b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.1,
+  "attention_multiplier": 0.0078125,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
+    "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49155
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/110b833c3035ce194ed5.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.1-8b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.1,
+  "attention_multiplier": 0.0078125,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
+    "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49155
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/206662284977c7458a21.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.1-8b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.1,
+  "attention_multiplier": 0.0078125,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
+    "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49155
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.1-8b-instruct/a2d3fdcb2fe5b2d84e1d.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.1-8b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.1,
+  "attention_multiplier": 0.0078125,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.1-8b-instruct",
+    "checkpoint_revision": "4009206d5fc95d2e65a7b7633e159d6e97e25d35",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 4,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49155
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/63e1314219a229b693b7.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.3-8b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_multiplier": 0.0078125,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct",
+    "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 1,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49159
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/b183c08457fabacfd307.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.3-8b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_multiplier": 0.0078125,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct",
+    "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 4,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49159
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/c568be536e0d41423ee2.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.3-8b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_multiplier": 0.0078125,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct",
+    "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 4,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49159
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2/granite/ibm-granite/granite-3.3-8b-instruct/e22e76b093ae9ec91f61.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.3-8b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_multiplier": 0.0078125,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct",
+    "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49159
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/b526548d5134fc230616.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "dtype": "bfloat16",
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.3.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.3-8b-instruct/93bc6f4b62a5e89361f7.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.3-8b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_multiplier": 0.0078125,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct",
+    "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 4,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.3.dev1",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49159
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/granite/ibm-granite/granite-3.3-8b-instruct/afa97256ccb78518bca2.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "ibm-granite/granite-3.3-8b-instruct",
+  "_task": "text-generation",
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_multiplier": 0.0078125,
+  "dtype": "bfloat16",
+  "embedding_multiplier": 12.0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct",
+    "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 4,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.3.dev1",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "vocab_size": 49159
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/0105f379a23b1ef1189f.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "dtype": "bfloat16",
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 1,
+    "max_context_length": 16384,
+    "max_topk": 256,
+    "n_active_tokens": 16384,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.3.dev1",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 16384,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/5ff4a6b24814913a6853.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "dtype": "bfloat16",
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 2,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": true,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 2,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.3.dev1",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/99aac1ef07573c9c0fa0.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "dtype": "bfloat16",
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.3.dev1",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e4c741fe5e0f5b099d40b9978c367531955572f5eba53147210debe29f33a26
+size 1367989

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c0e63b1bec594156292e975af5ebac2aa3bb0d8c959215f7db5c093a259807e
+size 4363264

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e435d4382c111b7ee99+a02c3a36/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:00f129e0558f3266c48325433d7aa8aa9b924e2d1f17314cb8cdba266262ed49
+size 4546847

neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65796c06ab7af92ba6a9644f2a7c7c097975c802b5cedd1e47a90662ab848112
+size 912376

neuronxcc-2.21.18209.0+043b1bf7/MODULE_14f98cf04a3a75f2e0cf+24129607/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5eb5eae4c845851230e3741e6fc70cff56ba8b46ba4d31250494828d4a27ac1
+size 2059264

neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2dd067bb8e7f5373540ba520c274ee981f02b303c76853620a457e0545ac079a
+size 1023328

neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38267b698da6fcc510460c0e5b4e0f7ee008df2357326e82aad15c83b85edf84
+size 6329344

neuronxcc-2.21.18209.0+043b1bf7/MODULE_15ad19e95aa1fe88acee+a02c3a36/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7fb8a1567382a6d23c42f1d970257f9116648f1bc3404e556e772f97c1f482a
+size 6513830

neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab5dd1e88dcbe6ba0ab874f03f2b1d4744e635d4846a2aac3c8e710312fdfcd0
+size 406532870

neuronxcc-2.21.18209.0+043b1bf7/MODULE_17cf73e81fe85ca950ea+24129607/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b92be29e4a8951ff436d82973d192d2ec52cce53a81a90c78ce3a361cb44091d
+size 142459904

neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55f01613d0337aeb0f0977a78b99e1c0cee9913120fd05928157a424acb5f406
+size 102782272

neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38aaaaff890bd48bf0b499b1329062e1f3b38252834fcef1dabdb86d716d92d1
+size 7732224

neuronxcc-2.21.18209.0+043b1bf7/MODULE_229575da8168b5a68b32+a02c3a36/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66883cb96f56ad390822ff24b144b0714b2f91b0f4ca936d0f79de494d245975
+size 8071521

neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73ee9dde7d549ac3fdaa10dca307a1c7c739776b0c9c6ac1b10a319466fe3c12
+size 103921698

neuronxcc-2.21.18209.0+043b1bf7/MODULE_2c14be573c3fe002ab6d+24129607/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4684630f182582053fedae0234b6c417b237c88c377d4a07affedf4ea457a602
+size 33506304

neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_3da5ea5dabca8d6b773e+a02c3a36/model.done ADDED Viewed

File without changes