dacorvo HF Staff commited on Oct 10

Commit

de014da

verified ·

1 Parent(s): eb86d8a

Synchronizing local compiler cache.

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +62 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/92d06886229b28ba55c9.json +190 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/e713d700b97520ccbaf3.json +190 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/189ee41803917b6da16a.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/1c21e73cac3aa14addb0.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/3d7c1140c7502fe98f6e.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/3fde3020fc5b36354a0b.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/a39eb45a4062f4473f18.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/bfb5193dff82d5fb4061.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/c1e026d76ac15a6be5b0.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/d2028eebb97faf6a698e.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/e432fb219f614ac9c5c3.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/e66e3270b649b5ec2c17.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/7c2cfae61ea18e16664b.json +82 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/f6c9f13c77075dffcf24.json +82 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/63a4d7661cc54355920b.json +134 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0/qwen2/Qwen/Qwen2.5-0.5B/9d0b9126b8fed3c361f7.json +82 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/wrapped_neff.hlo +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/wrapped_neff.hlo +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/wrapped_neff.hlo +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/wrapped_neff.hlo +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_1023294089137432912+e30acd3a/model.neff +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_10907752543637211265+e30acd3a/model.neff +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_10945286243277389437+e30acd3a/model.neff +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_10d60f8fad55e1974521+a9d440f5/model.neff +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_10d60f8fad55e1974521+a9d440f5/wrapped_neff.hlo +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_11400907432531398953+e30acd3a/model.neff +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/wrapped_neff.hlo +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_12311847081050718470+e30acd3a/model.neff +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_12324009378304635855+e30acd3a/model.neff +0 -0

.gitattributes CHANGED Viewed

@@ -5126,3 +5126,65 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_e51f8e75d0439314e217+a9d440f5/model.neff
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_e51f8e75d0439314e217+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text

 neuronxcc-2.21.18209.0+043b1bf7/MODULE_e51f8e75d0439314e217+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_24c207a15e469c523101+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_320d87d791127581beca+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_56bb43c14df243f59b9e+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_793c288702976d9a9911+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_97c0ff77a22597fd516a+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_9f1c978a730025d88f0e+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_a4a3e81c085b07aab6a4+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_b832fb0510340d63437b+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_debb5ac3650b1ea8899d+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_e9b2f3506069901c000d+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec52a194ef2b40528966+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa62c309b752d85d2be8+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/92d06886229b28ba55c9.json ADDED Viewed

	@@ -0,0 +1,190 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 2,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 1048576,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    1,
+    3,
+    5,
+    7,
+    9,
+    11,
+    13,
+    15,
+    17,
+    19,
+    21,
+    23,
+    25,
+    27,
+    29,
+    31,
+    33,
+    35,
+    37,
+    39,
+    41,
+    43,
+    45,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
+    "checkpoint_revision": "73d14711bcc77c16df3470856949c3764056b617",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 64,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn2",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 64
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 128,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": false,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/e713d700b97520ccbaf3.json ADDED Viewed

	@@ -0,0 +1,190 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 2,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 1048576,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    1,
+    3,
+    5,
+    7,
+    9,
+    11,
+    13,
+    15,
+    17,
+    19,
+    21,
+    23,
+    25,
+    27,
+    29,
+    31,
+    33,
+    35,
+    37,
+    39,
+    41,
+    43,
+    45,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
+    "checkpoint_revision": "73d14711bcc77c16df3470856949c3764056b617",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 64,
+    "max_batch_size": 4,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn2",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 64
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 128,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": false,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/189ee41803917b6da16a.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn2",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/1c21e73cac3aa14addb0.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 4,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/3d7c1140c7502fe98f6e.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/3fde3020fc5b36354a0b.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 16,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn2",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/a39eb45a4062f4473f18.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 8,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/bfb5193dff82d5fb4061.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 4,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/c1e026d76ac15a6be5b0.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 4,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn2",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/d2028eebb97faf6a698e.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 8,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 8,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn2",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/e432fb219f614ac9c5c3.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/e66e3270b649b5ec2c17.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 32,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn2",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 32
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/7c2cfae61ea18e16664b.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen2.5-0.5B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "hidden_act": "silu",
+  "hidden_size": 896,
+  "initializer_range": 0.02,
+  "intermediate_size": 4864,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 24,
+  "model_type": "qwen2",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen2.5-0.5B",
+    "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "float32",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 14,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_mrope": false,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/f6c9f13c77075dffcf24.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen2.5-0.5B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "hidden_act": "silu",
+  "hidden_size": 896,
+  "initializer_range": 0.02,
+  "intermediate_size": 4864,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 24,
+  "model_type": "qwen2",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 2,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen2.5-0.5B",
+    "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 2,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "float16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 14,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_mrope": false,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/63a4d7661cc54355920b.json ADDED Viewed

	@@ -0,0 +1,134 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "HuggingFaceTB/SmolLM3-3B",
+  "_task": "text-generation",
+  "architectures": [
+    "SmolLM3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 65536,
+  "max_window_layers": 28,
+  "mlp_bias": false,
+  "model_type": "smollm3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 32,
+    "capacity_factor": null,
+    "checkpoint_id": "HuggingFaceTB/SmolLM3-3B",
+    "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 8,
+    "max_batch_size": 32,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 8
+  },
+  "no_rope_layer_interval": 4,
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 16,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 4,
+  "pretraining_tp": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 5000000.0,
+  "sliding_window": null,
+  "use_cache": false,
+  "use_sliding_window": false,
+  "vocab_size": 128256
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0/qwen2/Qwen/Qwen2.5-0.5B/9d0b9126b8fed3c361f7.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen2.5-0.5B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "hidden_act": "silu",
+  "hidden_size": 896,
+  "initializer_range": 0.02,
+  "intermediate_size": 4864,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 24,
+  "model_type": "qwen2",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen2.5-0.5B",
+    "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 14,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_mrope": false,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ff1ffbb3a791b1c36ae1319a865a5564b10e1e83ee848dd2fb3c2bc9acf0921
+size 102479643

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32cc8c880a2cc4b487bae5ef92e7add8c29d88e9e74fd1de9672e5cff7db6f8e
+size 4834304

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b5e0c18a0a887bff9e5f6669decaddbb7e1483e15dacbc99bda7bde1caa1c49
+size 5173724

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22e4b6de1df0c178588eef5edb3866b12ef453071858429ba8e9089a467e55e6
+size 103307507

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3eee937486cface48af548d55a794c337698827c86b0bc7ef84b36941b9cf31b
+size 7588864

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc2c1479a5fe411b5290a2bbbe52d14ebd54a9b739775901022f3adb7f6d0dc9
+size 7928284

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8815b5584c7dacbd715a9bee3bedbcf3606b5bdb06960536624ba05b7cb25eda
+size 105618580

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f63a18c435dd0f739a365673f43e31532c75d2cca517648697c6d4d1ce1ce28
+size 14797824

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a059c8fb267fb45fc18669117aad72fea82f53cd4aea74fd01b0fce0573fe70e
+size 15139158

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:533e90f4e441dc6105323721d6d979f31ced33e5299c590db811dba7e8bfd6e8
+size 105706212

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:400b0601dd93f0ebb2e5b993813630eafb168ac65553108b36630d1c497d9dbe
+size 25263104

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4160843e4f9dcf060943c8a66793d3132df9397ce079a911c1f6890df21b702
+size 25603080

neuronxcc-2.21.18209.0+043b1bf7/MODULE_1023294089137432912+e30acd3a/model.neff CHANGED Viewed

Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1023294089137432912+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1023294089137432912+e30acd3a/model.neff differ

neuronxcc-2.21.18209.0+043b1bf7/MODULE_10907752543637211265+e30acd3a/model.neff CHANGED Viewed

Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10907752543637211265+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10907752543637211265+e30acd3a/model.neff differ

neuronxcc-2.21.18209.0+043b1bf7/MODULE_10945286243277389437+e30acd3a/model.neff CHANGED Viewed

Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10945286243277389437+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10945286243277389437+e30acd3a/model.neff differ

neuronxcc-2.21.18209.0+043b1bf7/MODULE_10d60f8fad55e1974521+a9d440f5/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:25d5794197cc1c440a87ceb511f5c1be579e938e1804d40a69b8ef1de5520571
 size 277504

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e64a278dbf0c045ccb9ea741ee91d91ab9ba571401072f8f5fc1d0484700fc7
 size 277504

neuronxcc-2.21.18209.0+043b1bf7/MODULE_10d60f8fad55e1974521+a9d440f5/wrapped_neff.hlo CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f620b1d7b37b2896c0fbf86202063926fb1a18d68cc3d20eecca1c10090bc877
 size 285854

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7a05a0e98e0e38d6aa3fd477a824b23547291df269f411559460f9f97c66192
 size 285854

neuronxcc-2.21.18209.0+043b1bf7/MODULE_11400907432531398953+e30acd3a/model.neff CHANGED Viewed

Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11400907432531398953+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11400907432531398953+e30acd3a/model.neff differ

neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_8f951ccc-7e17-4fbd-94b5-f2642cb99e0b/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35
+size 1165

neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e0c9ded24def6d0a6c3cebcdc5522c189fc492723ad673389ad51b70cae51501
+size 103424

neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/wrapped_neff.hlo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8838a67b9bf87d116c128ddaebe8d77d446d7111e5b1243076365791a97348e
+size 104320

neuronxcc-2.21.18209.0+043b1bf7/MODULE_12311847081050718470+e30acd3a/model.neff CHANGED Viewed

Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12311847081050718470+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12311847081050718470+e30acd3a/model.neff differ

neuronxcc-2.21.18209.0+043b1bf7/MODULE_12324009378304635855+e30acd3a/model.neff CHANGED Viewed

Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12324009378304635855+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12324009378304635855+e30acd3a/model.neff differ