diff --git a/.gitattributes b/.gitattributes index 1acf5bf579fc2effa667bbbcc5de22cc41ea1baa..dbc7cbe095fd9b3fc63df2facb3d27dd7421d6d8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5126,3 +5126,65 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_e51f8e75d0439314e217+a9d440f5/model.neff neuronxcc-2.21.18209.0+043b1bf7/MODULE_e51f8e75d0439314e217+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_fc37bb64f7e86324075c+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_fef991cead1748c4d101+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_24c207a15e469c523101+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_320d87d791127581beca+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_56bb43c14df243f59b9e+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_793c288702976d9a9911+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_97c0ff77a22597fd516a+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_9f1c978a730025d88f0e+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a4a3e81c085b07aab6a4+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b832fb0510340d63437b+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_debb5ac3650b1ea8899d+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_e9b2f3506069901c000d+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec52a194ef2b40528966+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa62c309b752d85d2be8+877608f3/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/92d06886229b28ba55c9.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/92d06886229b28ba55c9.json new file mode 100644 index 0000000000000000000000000000000000000000..0858ea720de759cc8429192ea3995705bb22cc24 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/92d06886229b28ba55c9.json @@ -0,0 +1,190 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3, + 5, + 7, + 9, + 11, + 13, + 15, + 17, + 19, + 21, + 23, + 25, + 27, + 29, + 31, + 33, + 35, + 37, + 39, + 41, + 43, + 45, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "checkpoint_revision": "73d14711bcc77c16df3470856949c3764056b617", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 64, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 64 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 128, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": false, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/e713d700b97520ccbaf3.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/e713d700b97520ccbaf3.json new file mode 100644 index 0000000000000000000000000000000000000000..4d9b994f800fcd18b21ba1f4be9b2fbcd0b54404 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/e713d700b97520ccbaf3.json @@ -0,0 +1,190 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3, + 5, + 7, + 9, + 11, + 13, + 15, + 17, + 19, + 21, + 23, + 25, + 27, + 29, + 31, + 33, + 35, + 37, + 39, + 41, + 43, + 45, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "checkpoint_revision": "73d14711bcc77c16df3470856949c3764056b617", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 64, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 64 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 128, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": false, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/189ee41803917b6da16a.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/189ee41803917b6da16a.json new file mode 100644 index 0000000000000000000000000000000000000000..dc98446c85fa2c076e26b3ef638d3889f3cdc6ec --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/189ee41803917b6da16a.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/1c21e73cac3aa14addb0.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/1c21e73cac3aa14addb0.json new file mode 100644 index 0000000000000000000000000000000000000000..ebd4aa2f3c762e10cf54f3a0d141376ab9bdd44a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/1c21e73cac3aa14addb0.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/3d7c1140c7502fe98f6e.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/3d7c1140c7502fe98f6e.json new file mode 100644 index 0000000000000000000000000000000000000000..ce92afbb0625036f8bde75d70ca0fd96e15182ec --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/3d7c1140c7502fe98f6e.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/3fde3020fc5b36354a0b.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/3fde3020fc5b36354a0b.json new file mode 100644 index 0000000000000000000000000000000000000000..07c0d7deaf380bbc77722cc91d667eae60415e05 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/3fde3020fc5b36354a0b.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/a39eb45a4062f4473f18.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/a39eb45a4062f4473f18.json new file mode 100644 index 0000000000000000000000000000000000000000..60780fb9ff93ffb1dc96452e2b6eac10d80233b7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/a39eb45a4062f4473f18.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/bfb5193dff82d5fb4061.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/bfb5193dff82d5fb4061.json new file mode 100644 index 0000000000000000000000000000000000000000..3cea16bbf9e453029cf0b802bae58ab843a075a3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/bfb5193dff82d5fb4061.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/c1e026d76ac15a6be5b0.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/c1e026d76ac15a6be5b0.json new file mode 100644 index 0000000000000000000000000000000000000000..d4036a1ffb189c4a9b1622c4f8c78641d9281474 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/c1e026d76ac15a6be5b0.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/d2028eebb97faf6a698e.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/d2028eebb97faf6a698e.json new file mode 100644 index 0000000000000000000000000000000000000000..6140bfc90e71157b5248338583e0f9b0a23e27e1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/d2028eebb97faf6a698e.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/e432fb219f614ac9c5c3.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/e432fb219f614ac9c5c3.json new file mode 100644 index 0000000000000000000000000000000000000000..0c3c77a9025717bf489227f3856edf35f14295c6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/e432fb219f614ac9c5c3.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 16, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/e66e3270b649b5ec2c17.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/e66e3270b649b5ec2c17.json new file mode 100644 index 0000000000000000000000000000000000000000..1090064dd7e7ef6d6a3e9b34545a77e3af33fecc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/e66e3270b649b5ec2c17.json @@ -0,0 +1,220 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 32, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 32 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/7c2cfae61ea18e16664b.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/7c2cfae61ea18e16664b.json new file mode 100644 index 0000000000000000000000000000000000000000..731aba8c25c2b2278c41615dbc0b4296d17b6a6c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/7c2cfae61ea18e16664b.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/f6c9f13c77075dffcf24.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/f6c9f13c77075dffcf24.json new file mode 100644 index 0000000000000000000000000000000000000000..931841d00eca6eef4a9c952bc3885e5b288fdebe --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/f6c9f13c77075dffcf24.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 2, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 2, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/63a4d7661cc54355920b.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/63a4d7661cc54355920b.json new file mode 100644 index 0000000000000000000000000000000000000000..892122a64196a0892d95ff9c2bedbd76e88e7e1a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0.dev0/smollm3/HuggingFaceTB/SmolLM3-3B/63a4d7661cc54355920b.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 8, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0.dev0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 8 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0/qwen2/Qwen/Qwen2.5-0.5B/9d0b9126b8fed3c361f7.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0/qwen2/Qwen/Qwen2.5-0.5B/9d0b9126b8fed3c361f7.json new file mode 100644 index 0000000000000000000000000000000000000000..44f4aab2ecede814a557600b359db487dbf6a762 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0/qwen2/Qwen/Qwen2.5-0.5B/9d0b9126b8fed3c361f7.json @@ -0,0 +1,82 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.0", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8a1e3789c81e0e1776b900c15e618cf50f5dd9c8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff1ffbb3a791b1c36ae1319a865a5564b10e1e83ee848dd2fb3c2bc9acf0921 +size 102479643 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0cd2e8f07217f4f8aaa234ef9eb4e89c2e510798 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32cc8c880a2cc4b487bae5ef92e7add8c29d88e9e74fd1de9672e5cff7db6f8e +size 4834304 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0c968f976cac8f982c0aa7097549ef1efdf1b5f2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c1c92e265d540052c2e+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b5e0c18a0a887bff9e5f6669decaddbb7e1483e15dacbc99bda7bde1caa1c49 +size 5173724 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7da1247fbb30c640bdbb346f83a90df808661297 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22e4b6de1df0c178588eef5edb3866b12ef453071858429ba8e9089a467e55e6 +size 103307507 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..26dbb5d130fbd07dcdaf837c10a7733b0e9e0cc7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eee937486cface48af548d55a794c337698827c86b0bc7ef84b36941b9cf31b +size 7588864 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1b01db0d56a2dddc94fb08e8d0cb03825f391367 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0c2afb109a81f924f7e9+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc2c1479a5fe411b5290a2bbbe52d14ebd54a9b739775901022f3adb7f6d0dc9 +size 7928284 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0e55e4b9726d5f5681e931b5e151c4910e16fb58 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8815b5584c7dacbd715a9bee3bedbcf3606b5bdb06960536624ba05b7cb25eda +size 105618580 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c236eaca5f4dd59630fe36129cdb828b5a2726b4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f63a18c435dd0f739a365673f43e31532c75d2cca517648697c6d4d1ce1ce28 +size 14797824 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..09e8014bfcb6e05bf44fecbbad9823be22e110aa --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0ca220bc75b3fb7d4e02+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a059c8fb267fb45fc18669117aad72fea82f53cd4aea74fd01b0fce0573fe70e +size 15139158 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6ef8bb643c199a0100bd4edfd3c5c8aea8b4056e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:533e90f4e441dc6105323721d6d979f31ced33e5299c590db811dba7e8bfd6e8 +size 105706212 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..21a8544153b88ff6e7178ee13fdb1b7c8d1b140e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:400b0601dd93f0ebb2e5b993813630eafb168ac65553108b36630d1c497d9dbe +size 25263104 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..78a52761dcb6d71b6dd1359f68b47e8bf380c8db --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_0dfc4a6617f4a9f8c8d1+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4160843e4f9dcf060943c8a66793d3132df9397ce079a911c1f6890df21b702 +size 25603080 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1023294089137432912+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1023294089137432912+e30acd3a/model.neff index 47b2d23efdec25948d751e5baad723efe5b9a39e..46cabc60bcc88fb29895bd63d7b5b98496c9998c 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1023294089137432912+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1023294089137432912+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10907752543637211265+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10907752543637211265+e30acd3a/model.neff index 510b036b61c7b9600ae9939ba04efa41aacc911a..998f9c266292836a3fb9e502cc4a677d6f661a3c 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10907752543637211265+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10907752543637211265+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10945286243277389437+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10945286243277389437+e30acd3a/model.neff index b3e975f7f00da9c640759fdf60fff34aebae9872..9a78649acdf9c9ff01e2d0cfc108b5d53729db4b 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10945286243277389437+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10945286243277389437+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10d60f8fad55e1974521+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10d60f8fad55e1974521+a9d440f5/model.neff index cafe1cac829756a87728eee6b9d78c97155d9a92..71564859b80db6a21e15b154a0136c0f7656396d 100644 --- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10d60f8fad55e1974521+a9d440f5/model.neff +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10d60f8fad55e1974521+a9d440f5/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25d5794197cc1c440a87ceb511f5c1be579e938e1804d40a69b8ef1de5520571 +oid sha256:7e64a278dbf0c045ccb9ea741ee91d91ab9ba571401072f8f5fc1d0484700fc7 size 277504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10d60f8fad55e1974521+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10d60f8fad55e1974521+a9d440f5/wrapped_neff.hlo index c8351239656605d9cc3c897b47e410351ce9da01..b01c5d466958d8078bbe530dc2d2463a0a058faf 100644 --- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10d60f8fad55e1974521+a9d440f5/wrapped_neff.hlo +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_10d60f8fad55e1974521+a9d440f5/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f620b1d7b37b2896c0fbf86202063926fb1a18d68cc3d20eecca1c10090bc877 +oid sha256:b7a05a0e98e0e38d6aa3fd477a824b23547291df269f411559460f9f97c66192 size 285854 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11400907432531398953+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11400907432531398953+e30acd3a/model.neff index 72aeee836a900438c9b69b214d32510e64830888..2742ed78f4cb2ae4b1b8c9082913b566abb47ac4 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11400907432531398953+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_11400907432531398953+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e61a38c5b458c62676713f798b1dd5f93ea3e537 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_8f951ccc-7e17-4fbd-94b5-f2642cb99e0b/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a2752c25c9f7e1092e2b55c010694e168610ce3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35 +size 1165 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..87a25bbbd8e24a5aec96e0c10850864e3d7aba99 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0c9ded24def6d0a6c3cebcdc5522c189fc492723ad673389ad51b70cae51501 +size 103424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..091bd0188485a9fcd4b49ce73ba46210ef267416 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1229cd5e7cd44cac2147+3aa04906/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8838a67b9bf87d116c128ddaebe8d77d446d7111e5b1243076365791a97348e +size 104320 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12311847081050718470+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12311847081050718470+e30acd3a/model.neff index ca9a7926d1dd5da1979461b2e1b7a4d9ba4e5b92..bf545a021307d38d5b947059fa4e1c5954b69000 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12311847081050718470+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12311847081050718470+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12324009378304635855+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12324009378304635855+e30acd3a/model.neff index f8850142d42d734eef03aeb74aeb52b52f6ab92b..3e48b42c11dffc635c15f0d7d6087506de9e6782 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12324009378304635855+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12324009378304635855+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12509236927544110827+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12509236927544110827+e30acd3a/model.neff index 8620ffb79cc29393b853a86d962d0523a1f842c9..ec0d32f6ff51f2f95eea6438e3325377af2935c8 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12509236927544110827+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12509236927544110827+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12650363609878702055+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12650363609878702055+e30acd3a/model.neff index 48ef8cf6170dadd8b37cc9d86f2a440795d9b71f..ce8d4604cfb0118518c99966b9cdd50d579e0bfd 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12650363609878702055+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12650363609878702055+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12666804432653521811+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12666804432653521811+e30acd3a/model.neff index cda36a864cbdb9a858f02fdb99b859ee90f0cb7e..88d77819c765a8a16e0f8a732443cc0c3575f3bc 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12666804432653521811+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12666804432653521811+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1277765475941548362+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1277765475941548362+e30acd3a/model.neff index c109d2153de1428bc7f6b831cc32d75112addc2c..2fc95f47fcc3fc216d9cc675fd464c3b1f116a1b 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1277765475941548362+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1277765475941548362+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12795949123662846630+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12795949123662846630+e30acd3a/model.neff index 51292e8a7b566810c329d2359183c57f81b6e51a..ee41a4cb2dcc67f093ec3cffbed373a0e3b4c755 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12795949123662846630+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_12795949123662846630+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13092192882592555392+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13092192882592555392+e30acd3a/model.neff index 6f7ec76ca6ad471ba4d0af06ba86aceeaecd43ae..0447e7de2e631ee668a68485e467ea384ba0626b 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13092192882592555392+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13092192882592555392+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13129006844218143067+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13129006844218143067+e30acd3a/model.neff index 51c937be941b45c91057d69a89965edc798f8ba3..d75abe563ff530c7c4d2d078b48a21cca1249571 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13129006844218143067+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13129006844218143067+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13163168066471565112+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13163168066471565112+e30acd3a/model.neff index 790ab1b929b30a0667d7832261dd3345be6a21c8..46ad11cae7db15540bb26566b22b49870f993347 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13163168066471565112+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13163168066471565112+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13385766575326555678+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13385766575326555678+e30acd3a/model.neff index b52111b85f89973185feb66cce513fdef076606b..9c8d26380f1557da499b9a6fb92036cb349d083f 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13385766575326555678+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13385766575326555678+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13521002922414225272+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13521002922414225272+e30acd3a/model.neff index d6152c240921640a8e222af704527111bd5e10c1..d43889ef27931233d2093b47978a4c0cca4566a2 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13521002922414225272+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13521002922414225272+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13673338043232097095+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13673338043232097095+e30acd3a/model.neff index b438ec3c467e19398fdde99785dca45028455950..71f70ba962ce89e86ee226533c52e94b88fd44e2 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13673338043232097095+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_13673338043232097095+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14024125047097359821+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14024125047097359821+e30acd3a/model.neff index 0198c4307cd10fd94b8ed28281a28853dc79ae1c..bce5ab16e6121f0b237cff76d543046e6a7a4188 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14024125047097359821+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14024125047097359821+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14406838977173684020+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14406838977173684020+e30acd3a/model.neff index 1a2a776e730c32dfdbd090b306aac61294fd7799..20439e380c2d1cdedef21ffb85c89b97b53cdc40 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14406838977173684020+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14406838977173684020+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14488951057292576015+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14488951057292576015+e30acd3a/model.neff index 9fae89a6d64d310fa70c188e489c8b519e665f7a..6dd7f26f44eadb4a7719066077d9ee63699b9af6 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14488951057292576015+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14488951057292576015+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14607415948795306857+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14607415948795306857+e30acd3a/model.neff index 1a51175e4161b8282f74f48175acafdb7bea24c1..d9bf1ee2452e135923c9ba0456a953fa18e90003 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14607415948795306857+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_14607415948795306857+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15104978417860996248+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15104978417860996248+e30acd3a/model.neff index de1f53afa2f144d74ea9a59819187f652b33d515..7089f292a699cf97753d73a29b2df453a6d1c2bf 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15104978417860996248+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15104978417860996248+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15195479995167874327+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15195479995167874327+e30acd3a/model.neff index 094e1f28b26e3f3ae177a376d08aeaa9e7602363..3fad515f87035553b5027ae19d83b7284bec8e82 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15195479995167874327+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15195479995167874327+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15226158922329678840+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15226158922329678840+e30acd3a/model.neff index ac9b8087d048fd5acf839338584db9baa640814c..7f81b0939673d8368d6361f4d0072ffe9c354713 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15226158922329678840+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15226158922329678840+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15445992300537187360+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15445992300537187360+e30acd3a/model.neff index 166373cf7e47e2ae459850e13214d0efe50165c7..9070838855bea33c26e9ea35a581c4be1acabc5b 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15445992300537187360+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15445992300537187360+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15549583432468528942+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15549583432468528942+e30acd3a/model.neff index 9a768af4c1b38353df99768fd4e7c1f136652e0a..f6501b2548ca486bf23eb83384ca6ca7ab6411f2 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15549583432468528942+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15549583432468528942+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15883166014121986340+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15883166014121986340+e30acd3a/model.neff index 0713b1ee9d7c34a3c2317ca4e0553cb0891882d6..3ba7cf11c291130ab3e3f3a4c97691ae0d0f05a8 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15883166014121986340+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15883166014121986340+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15974718484747567133+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15974718484747567133+e30acd3a/model.neff index eddb872ccb08fa1e769242eeef634f302ab6b319..2fe2c3226834ba44dfd3f0b9334fbf63666c9872 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15974718484747567133+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_15974718484747567133+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16063773584643651549+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16063773584643651549+e30acd3a/model.neff index 29aafc038ba470a56ca4efe6becd7b3abd08749f..2ebfee66f4d490a1da2124c5496b960215675229 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16063773584643651549+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16063773584643651549+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16540141349946602462+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16540141349946602462+e30acd3a/model.neff index 9ca542af2d7a0c2104a343e262a2b18e75b3b054..832cd25449e8338f9c4be294eed390f2c616e0d7 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16540141349946602462+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_16540141349946602462+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1659424179484095552+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1659424179484095552+e30acd3a/model.neff index 09e213a0026edc1b564a1d5ec9e09c31091de85d..80c23e82b6a51cdb3797bb237cef3a33593c98a9 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1659424179484095552+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1659424179484095552+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17037369046574255528+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17037369046574255528+e30acd3a/model.neff index f23030715dc11edf08210512f85c7c2dd90f775b..4b034266cbfc2d64d674e7a6fcd9e2b7cae50452 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17037369046574255528+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17037369046574255528+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17162153672426857671+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17162153672426857671+e30acd3a/model.neff index 7ac530bed8968e1e1c8db3b81a09a24a69961f8d..1537f4617f6154f6af2e859aabe50b1c57bf2b1b 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17162153672426857671+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17162153672426857671+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17184761711863280677+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17184761711863280677+e30acd3a/model.neff index a76ddc280c313d4b71dbd6cd240e891d351b4a93..920023191f4ef8ada430481cae37d2447aa8fa7e 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17184761711863280677+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17184761711863280677+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17685401492131160329+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17685401492131160329+e30acd3a/model.neff index 774dce31159675a2e679614c1d26b8b2093fca5d..d3af2cf04b8098621b5737dca56d4f294effad4d 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17685401492131160329+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17685401492131160329+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17713911408407405055+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17713911408407405055+e30acd3a/model.neff index b7d58cf24f4baa0d2aaa8e18ccbbf325b9e85d69..ebf3945fe21c45aa7ac49351d4f7b1c9488bc04a 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17713911408407405055+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17713911408407405055+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17795910220177952420+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17795910220177952420+e30acd3a/model.neff index 21e3aa8467421e2c3a392d8dfb6c4a2354396f16..d5f4da09dc6fa0810e2417e69b525a78251a402a 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17795910220177952420+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_17795910220177952420+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1837940185986854500+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1837940185986854500+e30acd3a/model.neff index 3294c17edac1668b75a8614d58d41ebf59e14d04..1f9793a24c62e897a4481784500e50eb69107210 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1837940185986854500+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1837940185986854500+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1870165519051566644+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1870165519051566644+e30acd3a/model.neff index d92d4f35f5503b2792418d1ce2e91b3d19459482..1e26638767893f2220cdf7e666074ff163b798d7 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1870165519051566644+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1870165519051566644+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..007dd1e00bc31af8a48e9b442d88eb64bf2427e7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_d7fea5c8-2e49-4df3-ae16-8ab5f50d65a5/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..07bf62460bc7813e4a95f208598e2fa4ae551d41 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76dc525043c8d0fc6b46382b17659e6a2f2e981788c635021a8a61aa832d2866 +size 14480 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..62ccf75a4d5591cbbc61a15c40047002c44f508d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01fc497dc88bf8f6e4adcd435353a3942c72e422e5cbb517f7fa88765dd64f0f +size 267264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..00fbed0f7a1bc02e03af0e93245aecf3e0008f1f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1a0a6acfff620b4cb3f3+58b54a50/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:725e6274d619d57efcfce0956399800c26a62e640720eb4b8b151bb82afd10a0 +size 272962 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e723a454cc81b7a4c3222a790a17daf7a829ff36 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9dd4fdec2ec731ea62a63ed2c89fbd8fa91583f5670e978b318c72dc66d97e3 +size 104372351 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5580f550388313e1123cfe02340dc707a4123ce3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0235c055d29c3d0642a9db6471e4258f38d6c41d39891ec0287afc12b1efbf2b +size 10957824 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ad853e74ec24788d9c0227fed438afa591dcc13a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1bef594b6c63d7e78ccc+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b351042f9b690c337149dac6d8d77239eeeea644769fd21d559c7f5efa12c92 +size 11297244 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..c7545daae02942ebb2098a860b5e8ecb3e05ef51 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_a0c5302c-4e34-4ed1-886a-888c93804409/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a2752c25c9f7e1092e2b55c010694e168610ce3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35 +size 1165 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fc9cacd910bf4463bfbfcaa619ded82e978f5ec8 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76e84752efb2ff5c8cb7f18888959eddcbd49a97023fd8f0a70a2fe1ae76f229 +size 103424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..316c54b5b5d481f98532b2f1489a3ce59248fb00 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_1d8119012decad3afce0+7a6033f9/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14bbd7f0d5b9978db168798f8ff440c06d1e6f21c36dc860bc6329fc13ecfe6b +size 104320 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2240472231266337587+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2240472231266337587+e30acd3a/model.neff index 431a76fa0e10f21e1269dc5164532f4d9a940a89..c63b44bbc3af7dc5d436059731ec87a161648748 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2240472231266337587+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2240472231266337587+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2241641840113569106+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2241641840113569106+e30acd3a/model.neff index 27414ee23ae5fb2f308b2529629617ae0de511e8..920f3f4f4787295595b25525271d36e248d9aa69 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2241641840113569106+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2241641840113569106+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2447413629898598397+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2447413629898598397+e30acd3a/model.neff index 66568301c42b514ad3f23ace555609d186b2042d..4ba37c21c3c45e52ebdfe324fcc4c989ea642ebf 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2447413629898598397+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2447413629898598397+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2477619772153140591+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2477619772153140591+e30acd3a/model.neff index 36531fe9eabde65764a31413b863b1a7fb823e8f..685e43a3afb6be45d1a84b0f8cce25535103ffcf 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2477619772153140591+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2477619772153140591+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_24c207a15e469c523101+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_24c207a15e469c523101+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_24c207a15e469c523101+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_24c207a15e469c523101+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_24c207a15e469c523101+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_24c207a15e469c523101+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_24c207a15e469c523101+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6e70aecbfd8c5abe99f46e84b8dc950a9b5d2647 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_24c207a15e469c523101+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5815ebddbe261f3d1ae2145fff8498e96ee72ad6fb3fdfd67a04cf0f4ff69f0 +size 104898043 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_24c207a15e469c523101+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_24c207a15e469c523101+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..425a091bde49a85f8f585264818c228815c71172 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_24c207a15e469c523101+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:560f39b44c179228d0d467a55a4d17020a845db5ae7fe47f092c3b120d3b66c4 +size 33854464 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2840230091891637644+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2840230091891637644+e30acd3a/model.neff index dfebfd01b9bd1d28626c6cc47517d7bf4ed3dec3..c90b163e96929af971063f8499083f9701a5e71f 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2840230091891637644+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_2840230091891637644+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3006935121772265506+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3006935121772265506+e30acd3a/model.neff index 92f5531b0dbb9add7069b9ea498099f671541a50..34fc858b2db7224efe645a33b45a413ea53543e2 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3006935121772265506+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3006935121772265506+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3097762131279755404+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3097762131279755404+e30acd3a/model.neff index 1df28e30dfc9c639daec312473f03de3d7d87ecf..fb7275408c4a0ec32ddc21e52a6a36e87b7ea4f5 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3097762131279755404+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3097762131279755404+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5a8c5cf1085ffef543fe61e900a0ea18a0626cbe --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_6555e8c1-6bbe-41ba-a39e-092d0fd1b68f/compiler_workdir/NeuronLlamaMLP/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e6084ce6d845ff91b01c0cbfd758d24019f38ef9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a99355de7043422bc03a8df795afecf586999ec744407bc4fbffe63f002ba2df +size 1931 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..17cdcdd5c7f1d203477cc2b57e4330e49a891619 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8b61e9381d94ccc9cdb8d5ac9283e20ddaf2ec22a09af2a0a87b808606fa996 +size 134144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e93e049a3bff8f9fe36ff87d8c21d39116a9da8b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_315b2743497ff0386da9+0da2eea7/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eed6a592c5c584448e49c4a9f277cc1cd55bd2fd7a8078e21a835bfb1565a836 +size 136222 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_320d87d791127581beca+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_320d87d791127581beca+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_320d87d791127581beca+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_320d87d791127581beca+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_320d87d791127581beca+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_320d87d791127581beca+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_320d87d791127581beca+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..597a25c5eb022e6da1099834ebf3594ca47640bc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_320d87d791127581beca+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128524b58989eb6b9eb22edc20ac88ca0c7eabc34bb6267a4513c7b31a4b7251 +size 104897021 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_320d87d791127581beca+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_320d87d791127581beca+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b855bd6051087999fb27c1ef03b3f894442ecb20 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_320d87d791127581beca+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bed038bdaf0d435520d5456c1fad489ce9c11f6d5169a8ccfb42ca36e697006e +size 33045504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3742470769527135970+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3742470769527135970+e30acd3a/model.neff index ebb1422ed9574ed3c3dd6210b8ac46dada7b541e..515638783f74a85409c480fa99aaded2b1b81c47 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3742470769527135970+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3742470769527135970+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3802233204730345176+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3802233204730345176+e30acd3a/model.neff index 49ddc879c04dbabda5ac46b6e4a5a5facb20fde0..77db2ca3c0405ddde7e15785f24107027e10b75e 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3802233204730345176+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3802233204730345176+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3918194918412354509+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3918194918412354509+e30acd3a/model.neff index 80d5ce188a8cbcf9148bb9796eb62740d7133a63..60bc91e19d24ed4c6c793e5c36141552d4c5369e 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3918194918412354509+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3918194918412354509+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8e1cb268db2845d93e6ba388cf895fbb26e99ddb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6285479803672b6be4d5016890b5f3f4f87b28dcc3bee1c4cfb3041e8d187ac5 +size 103785563 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..87a25add33bc5fe73b7331a48f0b25b85fcbdba9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdef25878eb3b192aaf55a368ad9f0546d67b02208d810377da3755fde4cf22e +size 9391104 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1ace202f64d7b9765757fcd7ee3b9b13d5adaf91 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3a0b53d0c2e168ed7240+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9be77657d8706fb54568abbf9a2352cdc7dbfb33375bcafb7d00acadadba2e0 +size 9700476 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4609340858424122400+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4609340858424122400+e30acd3a/model.neff index 1e93c59bb3313fe7c7511f6ca69b52f564b3f5de..efad80c3e364d7c9ef830814f6b506c5cd89c8a3 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4609340858424122400+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4609340858424122400+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_46247ec53206419a1ab6+23b5bfa3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_46247ec53206419a1ab6+23b5bfa3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..f7ab48ad38c7f41f5bfb555cf6b21d9b217e5fae --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_46247ec53206419a1ab6+23b5bfa3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_8aebe027-be02-4ce5-8630-8f7b63cbace5/compiler_workdir/Llama3RotaryEmbedding/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_46247ec53206419a1ab6+23b5bfa3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_46247ec53206419a1ab6+23b5bfa3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_46247ec53206419a1ab6+23b5bfa3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_46247ec53206419a1ab6+23b5bfa3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f1e2473c54cffac949ecf31d3b6ec636c290545b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_46247ec53206419a1ab6+23b5bfa3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a29d4cf27aec9af90e53324eff9ee69e033679482f4016a9dd3e5416c7c68a +size 1584 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_46247ec53206419a1ab6+23b5bfa3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_46247ec53206419a1ab6+23b5bfa3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bc8563dc033ecf645d2e335d9a5623440c89975a Binary files /dev/null and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_46247ec53206419a1ab6+23b5bfa3/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4692571821501481255+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4692571821501481255+e30acd3a/model.neff index 701b62c4666b41195f57ad786b8079c7647792d1..70d6eba60130a5fde6d60245c4f38c652e6ab8f4 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4692571821501481255+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4692571821501481255+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4728386a3e07df8ecf99+9a8f54a2/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4728386a3e07df8ecf99+9a8f54a2/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..d72650d0ce8106925a6a86213517471fc5f640c3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4728386a3e07df8ecf99+9a8f54a2/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_df76b43f-b29a-4924-9a66-982114689ec1/compiler_workdir/Llama3RotaryEmbedding/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4728386a3e07df8ecf99+9a8f54a2/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4728386a3e07df8ecf99+9a8f54a2/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4728386a3e07df8ecf99+9a8f54a2/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4728386a3e07df8ecf99+9a8f54a2/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f1e2473c54cffac949ecf31d3b6ec636c290545b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4728386a3e07df8ecf99+9a8f54a2/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a29d4cf27aec9af90e53324eff9ee69e033679482f4016a9dd3e5416c7c68a +size 1584 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4728386a3e07df8ecf99+9a8f54a2/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4728386a3e07df8ecf99+9a8f54a2/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..74c5c474e676e9716b74641afecae6a90cb2e5cd Binary files /dev/null and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4728386a3e07df8ecf99+9a8f54a2/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4729947242617427400+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4729947242617427400+e30acd3a/model.neff index 2c1797351aa02958edfdf014d0951e6a53954729..569e8c3bb9f118d41deecc255b2239283e6247f8 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4729947242617427400+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4729947242617427400+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4748150422668476963+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4748150422668476963+e30acd3a/model.neff index 201066e03e9d5ea24f043f1a65a96c07c23eb489..a5fa8ce5d6e7ab2663115779c4ee97950bafb963 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4748150422668476963+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4748150422668476963+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4982309848892198153+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4982309848892198153+e30acd3a/model.neff index d10e4751cd2ca9ba3370519ba587e03c71893d9f..e5ec1ceeeacb93b80e4fb279e672122f9373dc07 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4982309848892198153+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4982309848892198153+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eccdf4537d43b1b6dce6f776b7d0319ea18bbcce --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12f7640624dd097c4c870d4c6afc656367e9d481e5ad6f28177c5fe467b616c2 +size 579648 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4b14b9577b2b26c777092e4f6c8a41bd1256f6ce --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d41c3a9d58e992e58c02a318ad434fe835b836ecb0deb56116cbe3810ca4c55 +size 1844224 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c1c9ede255f6af7df92759ff9a9452803b4f10c7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4a6975528090471b6bd3+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:541edefa4e3c17b0cc479b22d9fa41c5321d3b6ca0a5b45b638002a97801a1d9 +size 2000673 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..c4fa91ef578bab9c31aaddef6c32c62e1179ef7b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_32b77d8f-8c4a-49c8-a6ff-0085cb3627bc/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a11cc062167102b0f4faa9fdf16388d72d9e0015 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4590b82eb167fd46963abdbb717cc18cb9511961a3393d100eeacdbdc45b0243 +size 11280 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a8b8305e1fb972d5628df578b9ed0e22eb1bb9e6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31c9936206faee73c0d4b498ea26fe0d2e2a968b2f57e57decafbb38aca5491f +size 1444864 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..80fc7fe092f938196dddfb710cdaf6644df6caf0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4cd12e88bce1104ae8ee+a1098300/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad72b4d13ae011d7dc21a077c15e892cd82dc8f1208e4fb5346740806bf6853f +size 1447845 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5056926643664195969+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5056926643664195969+e30acd3a/model.neff index af824e45e759c2a6ce926ea3415c2d13a61d7a6e..784cc8d976286a993854c621493454c4b4725128 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5056926643664195969+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5056926643664195969+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5060945789978167091+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5060945789978167091+e30acd3a/model.neff index 2bc4c77d156930ece350bfc902cd2b3c8e594a9a..4b9b594117b141cf4562b41cffb26a6452d9cacc 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5060945789978167091+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5060945789978167091+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5068307057019708073+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5068307057019708073+e30acd3a/model.neff index b8012feff4c70e7429365b109f89027be05fe143..58320f9ef42a942a56691a2e0fb27648b3755946 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5068307057019708073+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5068307057019708073+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5221425564004302780+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5221425564004302780+e30acd3a/model.neff index 1faa18fd91edccfa86990030f9a938a48a39219b..bd77c56abdc819e303d1d205e6954e190034ab70 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5221425564004302780+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5221425564004302780+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5301538834954885513+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5301538834954885513+e30acd3a/model.neff index c18f641061aadb246b042b657b1bf9a1a4a18aa5..99f34c7e1d224d0590c83a7b8a0175c3fdfdebbf 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5301538834954885513+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5301538834954885513+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5323357013706876100+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5323357013706876100+e30acd3a/model.neff index 08309d31c1537c92f7de8484f165c027afc25778..ce525309553c0945f60bfc1f12dee7357f2b1269 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5323357013706876100+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5323357013706876100+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5346694134112720644+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5346694134112720644+e30acd3a/model.neff index c0eb99fd46aca922c3d23c2b3c287c0e23d82d3e..8cd0ba216875d1100e9d5435cdce8155ef8f1756 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5346694134112720644+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_5346694134112720644+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_56bb43c14df243f59b9e+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_56bb43c14df243f59b9e+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_56bb43c14df243f59b9e+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_56bb43c14df243f59b9e+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_56bb43c14df243f59b9e+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_56bb43c14df243f59b9e+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_56bb43c14df243f59b9e+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8568d02db4ebc62e4b169a2f5f74d18c9dd40150 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_56bb43c14df243f59b9e+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2fc3d644141fa7c8471823283cdccd749a4090ada48297e4ecede98573e2113 +size 104898043 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_56bb43c14df243f59b9e+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_56bb43c14df243f59b9e+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..19b977cce47f287118c2226d266486499941a47e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_56bb43c14df243f59b9e+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5fb6d70c16092abf899c96b4ce67ed7c2cfcf25fc677be5ffc3937d7555cbda +size 33854464 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578110665359387607+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578110665359387607+e30acd3a/model.neff index 56698eef51b562845b7c4b36576355ec24e09fcb..c82e8efe03af5de748e69cd51dc01f1596f5c640 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578110665359387607+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_578110665359387607+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..a9c42d241bb0fd2a719f50dfdf3d46b8093ac2c9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_608a921c-936c-48af-bcae-94ed9c28c13c/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a2752c25c9f7e1092e2b55c010694e168610ce3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35 +size 1165 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d13811a2264a775f7740eab221729104387f3f66 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea820b83d11fd8a76a23222dbba51eb3f0699249e04d4ffa7792892678d30dde +size 103424 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..edc926896604b624a2a2be17a8af7e222f00599b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_580532c7b0fa4e5cdc8f+8aadbcaf/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616e6ba64584062258220e0bd5880509edd0312b5f8cbf8683b1530203dfe3c8 +size 104320 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_607905c97e8a375ad8f1+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_607905c97e8a375ad8f1+ed72d204/model.hlo_module.pb index 3bc3f31495433a1c655de36b2b057c05dcd1e241..8468917315ed356a74977f22a02766fb51c48ec5 100644 --- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_607905c97e8a375ad8f1+ed72d204/model.hlo_module.pb +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_607905c97e8a375ad8f1+ed72d204/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24624bc45be46654efb5eb289cc62aae8e0818687d6378e93a7244553928e7d7 +oid sha256:af3142c4bad7a94c63a15b00820f862283a5850959694be41f2c59430ff7f6a0 size 83504 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_607905c97e8a375ad8f1+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_607905c97e8a375ad8f1+ed72d204/model.neff index 86c7c778a8a47247adc5e252734eb9705415a758..79e4875e3336b1d728ba048a7e41cab73de79134 100644 --- a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_607905c97e8a375ad8f1+ed72d204/model.neff +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_607905c97e8a375ad8f1+ed72d204/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1def2aa1595ee48cf7ddf6dfa4a7d2c7ffa27f4a0ec104c45bce6bd29fc6238f +oid sha256:99bf9690c4445022c2cba13de91f2d3d8fda91bf90dccd93cbab9551d84e4a77 size 328704 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6208374896869439318+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6208374896869439318+e30acd3a/model.neff index 6ea53da95e1416b2ff03d9c0d32796a618ddda41..7f760b9adfd50e90a90cacd1278622ae47fb4fd3 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6208374896869439318+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6208374896869439318+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6305938804443382221+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6305938804443382221+e30acd3a/model.neff index 4c3382f2cfaa645ae104d42958fda1a56a305a10..3cf0f5b28104289aeb13cf0eee4b1336f4a02808 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6305938804443382221+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6305938804443382221+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6462012722744130561+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6462012722744130561+e30acd3a/model.neff index f3dfea01a40834be6354033fb1dbfb93fb2559ef..4c1d1e5c6f828f42ffef185475b8adc3c9aa5738 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6462012722744130561+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6462012722744130561+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6465053070255633885+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6465053070255633885+e30acd3a/model.neff index 2ceca84e934b6cca16c827e054d804aac2e0bf12..a07e5af477a6ea33164e9ae2fde2460984ea597f 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6465053070255633885+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6465053070255633885+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6601989946514187606+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6601989946514187606+e30acd3a/model.neff index 9ebc81577b01eeb00bee14030e799d4f51b604c1..492d779de8a765c9844a5cdd8dc6246abea10acb 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6601989946514187606+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6601989946514187606+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_689110558945142051+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_689110558945142051+e30acd3a/model.neff index 42a3be7f2772ae82deaa8dc18656112d85901729..553da315b6da0a9d3cddd5d23e1ed2f1e522dae0 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_689110558945142051+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_689110558945142051+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6954418380069475056+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6954418380069475056+e30acd3a/model.neff index 49ea00276e8f8efa4eb05461db51beb12d302b18..98b108794ffa2416c1ad516202003f75d809b4b3 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6954418380069475056+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6954418380069475056+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..cd8c17d59443348e0f4e3368e2b4da9fc15f013d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_f9b988b7-7cf2-43fb-a4e8-1ed277a94ac3/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..07bf62460bc7813e4a95f208598e2fa4ae551d41 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76dc525043c8d0fc6b46382b17659e6a2f2e981788c635021a8a61aa832d2866 +size 14480 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2dcba58a18fd06d830f56ebce08ad93f0b8f1b9f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71ffeb111b901627c05ea15a753337ef5065ea065372c1660ecb538df1912f77 +size 267264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d97dc5816e65320764b7783f1a8573341a350104 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_6fa218ba1695f9493ef4+00958a24/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:351e2af5a0ecb78ae2395897a0ad45b7bd5213e8161961b6070765adf87e054e +size 272962 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7071522469786365265+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7071522469786365265+e30acd3a/model.neff index 9a9ac8f17d803df3666923b833a7193b48e868a1..ddb275da0de02437a5a9dee23d4bd6be5635dbef 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7071522469786365265+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7071522469786365265+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..cf7e42d46626101b8a3bfcf71fb8ed3076998176 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_58f619b6-c7be-4bba-a3b0-0d43086e327e/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..be5670c6258a72b3253c262c04b0a3c084a1810d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dc99fd8fa13fb76a08ae04c8275958cd87878cfbf86f63ae613d7efcf775bfc +size 29412 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6e073394fdaf25e79af0512bc27252a55d23a15b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1249a2f372834c8de59a597eb0f1ce503c5e13ce0bf018aa73d3f7952f425ce +size 328704 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..69940255345833d5a8dd40a3aac1aff80af284aa --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_73dd17d3d6329d01c2ed+c85d9c4e/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:734bd583b0e8d462bfd8386c535428245a4630009b56ac09a5174cc2a6b6fdbe +size 334452 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7430361747835832819+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7430361747835832819+e30acd3a/model.neff index 62a5f17e0d249c1b6280fac6f91da5dae89808cf..6b17afa5338aede0d237ac3d08d0f78a5b36af2d 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7430361747835832819+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7430361747835832819+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_748714541699003438+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_748714541699003438+e30acd3a/model.neff index 9a5f278abb2521091cfea6c31fd6839a952beacd..7a0d26688fd0d1cbe97cb9eeaa785cf0c2b9585a 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_748714541699003438+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_748714541699003438+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..61e8b6b36951edf525971c5a37911f29a286b398 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8307b84004a552451aa6122d7b02c445753c9dfd76290e3ef3e4c1c68e905de2 +size 102996729 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c3c56561495b01716d723d5c4f0442be5a6ba3bc --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be145c96622634ad4e1b80d38a2b301061c31c6c8566e1b963c08865962f9969 +size 7005184 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c0a21afa6411226f2579bef82c924d0b933bbbde --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_75e22a6ef5ef139d5357+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c65a910da8ea4a4c3f16f7209109b0d14aa00863dc0d1f5f6ea32ebb59014d44 +size 7314549 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7760252933458031364+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7760252933458031364+e30acd3a/model.neff index b528fa315a1498ffc7bb030bea935676e23c2a78..6bedb3b4129c9fe37c68e549bc2aef6b11992653 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7760252933458031364+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7760252933458031364+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_780259796876411187+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_780259796876411187+e30acd3a/model.neff index bcb72e395bfb8acd812c623f51922aa304ac512e..794b5cc7d81170da171305f9d84f2fd54b687c4a 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_780259796876411187+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_780259796876411187+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_788940165194100575+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_788940165194100575+e30acd3a/model.neff index 884d074619cba67f9aac6218101c32acd01440fa..fbb4c720ee45a5e9a59c75ff5beb279bd52df052 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_788940165194100575+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_788940165194100575+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793c288702976d9a9911+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793c288702976d9a9911+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793c288702976d9a9911+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793c288702976d9a9911+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793c288702976d9a9911+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793c288702976d9a9911+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793c288702976d9a9911+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bbcfe962270dfeb120838faea3b409cd513a3eda --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793c288702976d9a9911+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b91883672e4991d70a00b10018f7c1ee836e79b4361b9ca03631acc4fdd733 +size 107294513 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793c288702976d9a9911+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793c288702976d9a9911+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5effb2d0ff76bc78b3586df362e440fe2a88e82f --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_793c288702976d9a9911+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a693121b1b193587eefecb504db914904e072006ad9b1d6d0b83adbaf96a5888 +size 39793664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..173e1a15628631cb7ff16378adba0f269796d36a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9010ff7fe9928313d84d5544708c41b63c9fbd9783c51bee3304b727c5001024 +size 104857772 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ce17662c1b8faaa8e2f836a67e9987975a88e55b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e615d192dec352ca5d0ed7b376f532351b9aface24a5f2abb674d42a0db49832 +size 20214784 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a1613629f8991d3e27419d456724939d4c432e9e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_7ce961870ffaa37cc217+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bc88cc04eafc19b9da6009f60fb74e5f1564c4a1033967abae367f0b2ffb79e +size 20555096 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093733071045345337+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093733071045345337+e30acd3a/model.neff index 7479ad52d4894ce83aad89e607d56e1f218b01f4..dece9ae444bfe032bc5be1f6f5664c53834f231f 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093733071045345337+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8093733071045345337+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8432304590411733968+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8432304590411733968+e30acd3a/model.neff index 00630a53a0ff69917dd04b9e2be2a90ed7e9db82..16067964bb8438f07bc88cea6d27a41cd2cd1c13 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8432304590411733968+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8432304590411733968+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8468241434736974290+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8468241434736974290+e30acd3a/model.neff index 58f2c2b99f5460179489d270033aaef7712c45d2..a302f4527187b1b2e1d41b9216caa54496f8cc87 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8468241434736974290+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8468241434736974290+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8476840015321783067+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8476840015321783067+e30acd3a/model.neff index 4792cd97389e6ebc1b4627380df78cbb24aea175..b16858d0bdcb662c29c6a7b3587ef0b4e1f1ab60 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8476840015321783067+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8476840015321783067+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..214d94e854c51d23aa923e6a9702f15329d6abf2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba8cef6eb4834db160b9794f909ea51151625bdbc006631bf4edea6c5683568 +size 104639996 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cd3b3aabc2611b68045611d962d46425605ea0d9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82cef033e735a81c96c3adeda6565176a760de2e955d181836b1d184c6e46269 +size 11807744 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b14ecd2a2fdab02bb0c8066307a6f7bb8a33f804 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8633e15902caf849591d+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb8313df7b5ee2a9e5d3a9cbe0a8762e95bf75fc796cd3ac61cbc2ae037f781c +size 12149078 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8657275884604457834+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8657275884604457834+e30acd3a/model.neff index 54a7b31c92e3848fb3ec88ad07db5ca43f7c2470..e2a4ebebe190a1903230ed3f61779941ea5ca981 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8657275884604457834+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8657275884604457834+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_879201820668420060+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_879201820668420060+e30acd3a/model.neff index 72f0ee130b91aa64ddaa9d322641190f7f2e52c0..f91c8c2e141fbefdcef865fbb599afeca579f700 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_879201820668420060+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_879201820668420060+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8874447663297084929+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8874447663297084929+e30acd3a/model.neff index a1c90fa3fa5609d1ba0e905077cb5daf34fb033b..ed0782524777a870d0b4612f1af801febc4eea84 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8874447663297084929+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8874447663297084929+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9359742670556022940+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9359742670556022940+e30acd3a/model.neff index eaba36afa56068de6191c70ce71fb30f56ca34dc..2b572fd0f6fdc4b07863048a9b37ce62d94f455a 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9359742670556022940+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9359742670556022940+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9360214757141243910+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9360214757141243910+e30acd3a/model.neff index 227497f989f0192b6774488751868a0e9b0d68f8..0909307bc2ae71e642a6ebd30109ae3391538d3c 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9360214757141243910+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9360214757141243910+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9551663534243818596+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9551663534243818596+e30acd3a/model.neff index 38d3615eb241b8eee11be2d0b4e5ca668016a8b0..8592b740992116c8d08e37f753a8c8614bd01334 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9551663534243818596+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9551663534243818596+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9721314421976720364+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9721314421976720364+e30acd3a/model.neff index 0c1f9637bbf05b14beee2937dc718ec872ac6bab..def40964839629aa91b0d40d1b97857bca41cedf 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9721314421976720364+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9721314421976720364+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_97c0ff77a22597fd516a+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_97c0ff77a22597fd516a+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_97c0ff77a22597fd516a+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_97c0ff77a22597fd516a+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_97c0ff77a22597fd516a+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_97c0ff77a22597fd516a+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_97c0ff77a22597fd516a+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5a66898a50a0e34a43032815951d9e640798be49 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_97c0ff77a22597fd516a+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe7f5606c2a3271bd0d1902ab1e64b60af66d985122035339e63c8491b3d7fd0 +size 103713112 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_97c0ff77a22597fd516a+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_97c0ff77a22597fd516a+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a10b475388b07e19c4f09e79f62042b3c913efec --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_97c0ff77a22597fd516a+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d7fc802f42b1e26604d39e7ade0890f7b0f34cf6c71000177f98e1385a5cf51 +size 28273664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9884039268981168463+e30acd3a/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9884039268981168463+e30acd3a/model.neff index f8f23b4c0f906be721df644f4394ebf20a1c3084..ee19a7759dcd1c655c2231ff47a793b5437bcc9f 100644 Binary files a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9884039268981168463+e30acd3a/model.neff and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9884039268981168463+e30acd3a/model.neff differ diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..00370b8ba81a08ea41f47b11e9f437942e3fd8f1 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_a05010ff-9d84-40d4-b487-fd609b12f897/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..07bf62460bc7813e4a95f208598e2fa4ae551d41 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76dc525043c8d0fc6b46382b17659e6a2f2e981788c635021a8a61aa832d2866 +size 14480 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ddc34d3191b32e6fd6ed20b260b0e29069dd79f5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:755c408ec347f7eddd2281b47f14c870f8edabca2fc83922146958eb8f605327 +size 267264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2737adde6185ff7bca220eb95c2bee809d244633 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9c0e3086d07ddd9ff175+315bf1af/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e78cfb6065815983c81d442185d9b240fbda8a991d738e791dcc548665e75bf6 +size 272962 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9f1c978a730025d88f0e+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9f1c978a730025d88f0e+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9f1c978a730025d88f0e+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9f1c978a730025d88f0e+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9f1c978a730025d88f0e+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9f1c978a730025d88f0e+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9f1c978a730025d88f0e+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d0fcf8fa4fc9a2479d1c52f45121fab9f9aeefc6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9f1c978a730025d88f0e+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7d251b0416e63fb3afeadfe702c73e68a93ba05b048f3aca5086cca15e1f025 +size 107184005 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9f1c978a730025d88f0e+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9f1c978a730025d88f0e+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3ae3f67f6ee1486b70981e4f52442767562bff71 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_9f1c978a730025d88f0e+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38085b8668b1248e2f57e8ce477d5edf0477b42baa35801023494dcce4b6c1b1 +size 39906304 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a4a3e81c085b07aab6a4+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a4a3e81c085b07aab6a4+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a4a3e81c085b07aab6a4+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a4a3e81c085b07aab6a4+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a4a3e81c085b07aab6a4+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a4a3e81c085b07aab6a4+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a4a3e81c085b07aab6a4+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d0011e22177f609dc66cb6e2ab184942fa934063 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a4a3e81c085b07aab6a4+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d13acf5ccb296b02c4c78cd9fba0521f23f116fe912580b6eeaf3df47f16c65 +size 593552 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a4a3e81c085b07aab6a4+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a4a3e81c085b07aab6a4+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2f7494aba07532628522ab546db39881d62cc640 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a4a3e81c085b07aab6a4+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9c8381b2f8241852a387cf662795259d139c6ec6b340ae8626fe48ed30758b +size 3769344 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..d9044dbc91d2afa7feb4abb287f3972493a1f981 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_087121ed-8c74-46a6-ac82-257672dab103/compiler_workdir/NeuronLlamaMLP/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e6084ce6d845ff91b01c0cbfd758d24019f38ef9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a99355de7043422bc03a8df795afecf586999ec744407bc4fbffe63f002ba2df +size 1931 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..522cdcc92e5530b348069f9e9f7c246004368cb0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea1e2362c38e0500c683950b7751a10b375e2c4917c728774ff4d178200d169b +size 134144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..34f00606b6366dd91dd3cc65901fcca64197c3c5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a69791d72cf8725fb00e+e805b8c0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccde9d0e25d034d95e96445b2b4e94d85c7973d9d14dea3610e5743e65daa575 +size 136222 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8da50ffdebbe4b136d67d64bb87ac42d154163d7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ddccc7fc9d8218c1bea3932336d479625ce18919 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c3938bfedc3e462e04f693959f698e326630dbd2c327308efc7bd491ebce5fd +size 103501154 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a2ed757df877845074581b735babfe4a19df6870 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f66bd884ead5ed4b0d0325943f25d12355a7ff32796b1cd180921d6c55d9768 +size 8070144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e6dee4ae0314709b7c8977ceebd9747f41382fae --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b46b54a1df4af0029c62+747527b0/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:101cef1e5363cd0cfdfb9fe6200f5cabe330590cb05ce7afc8a60a98b68a9f4f +size 8411464 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b832fb0510340d63437b+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b832fb0510340d63437b+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b832fb0510340d63437b+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b832fb0510340d63437b+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b832fb0510340d63437b+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b832fb0510340d63437b+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b832fb0510340d63437b+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a0f06f6d84fcc62cfc606b81e7a11f4e3993785 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b832fb0510340d63437b+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a803913ae52e97577f7f70fd72070dc644f4ac3ecd3516ce4a3cb63cd412bd7a +size 103713112 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b832fb0510340d63437b+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b832fb0510340d63437b+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..27a83785e3a945ad23deaa1854591f3ed4a0fec3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b832fb0510340d63437b+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61bcdbc12b066c44977a238a198d2a5765ee4faf06aca33540eef9ad44d00827 +size 28273664 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..f6edb3e1a68e30064e5926d6bc8f813a60514b32 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_6e03359e-f79c-40da-9d76-64b02034f0b3/compiler_workdir/NeuronLlamaMLP/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e6084ce6d845ff91b01c0cbfd758d24019f38ef9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a99355de7043422bc03a8df795afecf586999ec744407bc4fbffe63f002ba2df +size 1931 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..01a68fa890a5bd9cc2c8f20f201b2fc71de71567 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6389a1c362c081d2dbc006519f00ac2f23492757ef78edb6c0871b7a3505b1ec +size 134144 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b40d0d4c43f002d69db5cd98668a01e890ddae93 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d5188e62408152b5598c+2ebd2f30/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad199f2d14437f24c77ab10c8fcbe8d4043bdd017dbd97d1f74d696c481c6bbf +size 136222 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..681f3ef082ada704c81c158ae9d18f97da44f804 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a05d55a3ba9ed1af510844c9797965d4cf4c24eb231b3e0e25021c14e5d55ed +size 588406 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a1b97aa026d67f4aa79aa23dd18dac93aa596b58 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b0367178594e5d06a4d673bacedf57a9ec308c24ecf886ce7836e085b8d469b +size 1577984 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d4dabe7a797c2f31d1085c14ecd290352a821359 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_d7db5c9ba7baaf53944a+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b0a5a8c6a9b1446501d166f5f702e23a1d76f515b63487b0e8e7d16619e586 +size 1734318 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_debb5ac3650b1ea8899d+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_debb5ac3650b1ea8899d+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_debb5ac3650b1ea8899d+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_debb5ac3650b1ea8899d+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_debb5ac3650b1ea8899d+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_debb5ac3650b1ea8899d+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_debb5ac3650b1ea8899d+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..20be1dd94620829360b81e10732c81b631ba34f2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_debb5ac3650b1ea8899d+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e5cb73b8c011d77e86420ccf1ecfef686a5f15078a41f52ab7f6381d97b5205 +size 104787528 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_debb5ac3650b1ea8899d+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_debb5ac3650b1ea8899d+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..07987dc284daa14a5234338d1ffc0f494194ac05 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_debb5ac3650b1ea8899d+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c65c5afe9d5596bf4520930ba97bea5f70f179e1c134b54330601ab3fdfa5a5 +size 33864704 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1fd6cbfd7c306259df3a7f3fe093e8b8f387c0d3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_5546e923-6848-4bd9-bf40-b99b69d2806e/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8496781848c02ecd308b5b2c5a3c342b702425e7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e79a5cc325f9a317d158d931d75ec73ae1cfb16a4d83343a2593edd95d0044 +size 8979 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c541eca02e2b2d5df9b0d74e30edfbf793d2c5f3 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb61abb31c160a55489e1ffa421fb933e4b307ee72fd711b70858e2892a1a983 +size 246784 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..67a8a32c1ac2cab45f6e674704535530a721ef3a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e2afad6a215666c7282f+f4980c5f/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58f8b6834bc38905d708beb327983fe6578f59d1258831ce827ec3a463bee332 +size 249608 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e9b2f3506069901c000d+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e9b2f3506069901c000d+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e9b2f3506069901c000d+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e9b2f3506069901c000d+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e9b2f3506069901c000d+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e9b2f3506069901c000d+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e9b2f3506069901c000d+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fd74277fd28481cd8452f7c098bc8bee5d624b64 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e9b2f3506069901c000d+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf0668ec8bf937c3ad51115c1672f0975e91ce3c271b86602e070920d35cfdb2 +size 686865 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e9b2f3506069901c000d+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e9b2f3506069901c000d+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1ae8c0e05f90f0c18451fa46d0cf9db98b0baa0b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_e9b2f3506069901c000d+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c73ade53c5cf1ef57eeadafdd0836ea56cdb8c386f74afe342b6f23bda2a63d6 +size 27997184 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec52a194ef2b40528966+ed72d204/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec52a194ef2b40528966+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec52a194ef2b40528966+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec52a194ef2b40528966+ed72d204/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec52a194ef2b40528966+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec52a194ef2b40528966+ed72d204/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec52a194ef2b40528966+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eaaa1644d3cd0eb15e0cd80a26285f44c3aeb31e --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec52a194ef2b40528966+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:972c3c081b15d0ea23118fd57c5cc264144cc02b0d4ef35fd043896e8c643254 +size 103602611 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec52a194ef2b40528966+ed72d204/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec52a194ef2b40528966+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1e6e6a508779f6b1b023c1e1a104e90ab54da197 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec52a194ef2b40528966+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bca496f300de7148cece67692de52bc0ac081276d89b822e71fff5248d2a8b1 +size 28161024 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..b245c234c971c3d612063ceeb5273563127e184c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_64195d75-d440-4ca3-a029-f8f0c48b0133/compiler_workdir/DecoderLayerWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..be5670c6258a72b3253c262c04b0a3c084a1810d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dc99fd8fa13fb76a08ae04c8275958cd87878cfbf86f63ae613d7efcf775bfc +size 29412 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..627a565a336a6dedc8f48e726fccf4605badbd73 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20dffa7f23e6d914659099c4c4ec93751928b0ee2bfb2dfec241e7ecd148005 +size 328704 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1d95a700a97c15307eb2dd8144732ba8325775ca --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f9a173c45d9639a1e62c+e9978ceb/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:187963ef951202b9c76511f15b87469ecccd03c8fc0eda9537e7c01f01b1eadb +size 334452 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa62c309b752d85d2be8+877608f3/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa62c309b752d85d2be8+877608f3/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fc27c67e0f3bd30aba010defcc6c2d0646e87b83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa62c309b752d85d2be8+877608f3/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa62c309b752d85d2be8+877608f3/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa62c309b752d85d2be8+877608f3/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa62c309b752d85d2be8+877608f3/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa62c309b752d85d2be8+877608f3/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4a33161c1f14dfe8eca67360502ba6e321193a11 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa62c309b752d85d2be8+877608f3/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb799c98bd850bde224e7708ea396bfe5f711ececcc69a43d0269e506f2b239 +size 104896685 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa62c309b752d85d2be8+877608f3/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa62c309b752d85d2be8+877608f3/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..74fc189629667dc5ad8c29f53199ce6a68f2a7c7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fa62c309b752d85d2be8+877608f3/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13d7f16407b4daec2498c5df70bdbf9cfaffb1ca35ca009df2775a9a24fa1356 +size 33004544 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fb0de3cd135fddb7b326+b044e06f/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fb0de3cd135fddb7b326+b044e06f/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..8f101b7ec301a4644e64a95f017de359442a74bb --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fb0de3cd135fddb7b326+b044e06f/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_8aeb4ba4-cee4-4c89-b84c-cf1f4eb85673/compiler_workdir/Llama3RotaryEmbedding/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fb0de3cd135fddb7b326+b044e06f/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fb0de3cd135fddb7b326+b044e06f/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fb0de3cd135fddb7b326+b044e06f/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fb0de3cd135fddb7b326+b044e06f/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f1e2473c54cffac949ecf31d3b6ec636c290545b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fb0de3cd135fddb7b326+b044e06f/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a29d4cf27aec9af90e53324eff9ee69e033679482f4016a9dd3e5416c7c68a +size 1584 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fb0de3cd135fddb7b326+b044e06f/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fb0de3cd135fddb7b326+b044e06f/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..181f8c0d49bc84b17fecb9f40ef01f1382b5c916 Binary files /dev/null and b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_fb0de3cd135fddb7b326+b044e06f/model.neff differ