Update Qwen3-8B-groupFP8 checkpoint

Files changed (6) hide show

config.json CHANGED Viewed

@@ -5,6 +5,7 @@
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
   "eos_token_id": 151645,
   "head_dim": 128,
   "hidden_act": "silu",
@@ -67,9 +68,11 @@
           "num_bits": 8,
           "observer": null,
           "observer_kwargs": {},
           "strategy": "group",
           "symmetric": true,
-          "type": "float"
         },
         "output_activations": null,
         "targets": [
@@ -81,11 +84,13 @@
           "dynamic": false,
           "group_size": 32,
           "num_bits": 8,
-          "observer": "minmax",
           "observer_kwargs": {},
           "strategy": "group",
           "symmetric": true,
-          "type": "float"
         }
       }
     },
@@ -99,15 +104,14 @@
     "quantization_status": "compressed",
     "sparsity_config": {},
     "transform_config": {},
-    "version": "0.11.0"
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "rope_theta": 1000000,
   "sliding_window": null,
   "tie_word_embeddings": false,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.55.2",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
+  "dtype": "bfloat16",
   "eos_token_id": 151645,
   "head_dim": 128,
   "hidden_act": "silu",
           "num_bits": 8,
           "observer": null,
           "observer_kwargs": {},
+          "scale_dtype": "torch.bfloat16",
           "strategy": "group",
           "symmetric": true,
+          "type": "float",
+          "zp_dtype": null
         },
         "output_activations": null,
         "targets": [
           "dynamic": false,
           "group_size": 32,
           "num_bits": 8,
+          "observer": "memoryless_minmax",
           "observer_kwargs": {},
+          "scale_dtype": "torch.bfloat16",
           "strategy": "group",
           "symmetric": true,
+          "type": "float",
+          "zp_dtype": null
         }
       }
     },
     "quantization_status": "compressed",
     "sparsity_config": {},
     "transform_config": {},
+    "version": "0.14.1.a20260323"
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "rope_theta": 1000000,
   "sliding_window": null,
   "tie_word_embeddings": false,
+  "transformers_version": "4.57.6",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

generation_config.json CHANGED Viewed

@@ -9,5 +9,5 @@
   "temperature": 0.6,
   "top_k": 20,
   "top_p": 0.95,
-  "transformers_version": "4.55.2"
 }

   "temperature": 0.6,
   "top_k": 20,
   "top_p": 0.95,
+  "transformers_version": "4.57.6"
 }

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e749aaa1f2da50a538735ad8aecf28adc353eafcbd2b929deee9c99d951a715
-size 4979507600

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7156108519c67518ed275f0f1b590fa6b268c0fc957be5b1561aa967472c726
+size 4982653448

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6206895e408bdf0b1ecce471fe484a43bf94c82057eea51eec86700f7c639d3a
-size 4890383672

 version https://git-lfs.github.com/spec/v1
+oid sha256:47bf6c8110d8bd53d1afc58189d53e5245bdb0b663919b5852fef4fd74a3ee06
+size 4887237816

model.safetensors.index.json CHANGED Viewed

@@ -190,7 +190,7 @@
     "model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
     "model.layers.18.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
     "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
-    "model.layers.18.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
     "model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
     "model.layers.18.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
     "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",

     "model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
     "model.layers.18.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
     "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.18.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
     "model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
     "model.layers.18.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
     "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",

recipe.yaml CHANGED Viewed

@@ -13,7 +13,9 @@ quant_stage:
             block_structure: null
             dynamic: false
             actorder: null
-            observer: minmax
             observer_kwargs: {}
           input_activations:
             num_bits: 8
@@ -24,9 +26,12 @@ quant_stage:
             block_structure: null
             dynamic: true
             actorder: null
             observer: null
             observer_kwargs: {}
           output_activations: null
           format: null
       targets: [Linear]
       ignore: [lm_head]

             block_structure: null
             dynamic: false
             actorder: null
+            scale_dtype: torch.bfloat16
+            zp_dtype: null
+            observer: memoryless_minmax
             observer_kwargs: {}
           input_activations:
             num_bits: 8
             block_structure: null
             dynamic: true
             actorder: null
+            scale_dtype: torch.bfloat16
+            zp_dtype: null
             observer: null
             observer_kwargs: {}
           output_activations: null
           format: null
       targets: [Linear]
       ignore: [lm_head]
+      bypass_divisibility_checks: false