jbgjbg commited on
Commit
90aca23
·
verified ·
1 Parent(s): ec4a18e

Update Qwen3-8B-groupFP8 checkpoint

Browse files
config.json CHANGED
@@ -5,6 +5,7 @@
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
 
8
  "eos_token_id": 151645,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
@@ -67,9 +68,11 @@
67
  "num_bits": 8,
68
  "observer": null,
69
  "observer_kwargs": {},
 
70
  "strategy": "group",
71
  "symmetric": true,
72
- "type": "float"
 
73
  },
74
  "output_activations": null,
75
  "targets": [
@@ -81,11 +84,13 @@
81
  "dynamic": false,
82
  "group_size": 32,
83
  "num_bits": 8,
84
- "observer": "minmax",
85
  "observer_kwargs": {},
 
86
  "strategy": "group",
87
  "symmetric": true,
88
- "type": "float"
 
89
  }
90
  }
91
  },
@@ -99,15 +104,14 @@
99
  "quantization_status": "compressed",
100
  "sparsity_config": {},
101
  "transform_config": {},
102
- "version": "0.11.0"
103
  },
104
  "rms_norm_eps": 1e-06,
105
  "rope_scaling": null,
106
  "rope_theta": 1000000,
107
  "sliding_window": null,
108
  "tie_word_embeddings": false,
109
- "torch_dtype": "bfloat16",
110
- "transformers_version": "4.55.2",
111
  "use_cache": true,
112
  "use_sliding_window": false,
113
  "vocab_size": 151936
 
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
+ "dtype": "bfloat16",
9
  "eos_token_id": 151645,
10
  "head_dim": 128,
11
  "hidden_act": "silu",
 
68
  "num_bits": 8,
69
  "observer": null,
70
  "observer_kwargs": {},
71
+ "scale_dtype": "torch.bfloat16",
72
  "strategy": "group",
73
  "symmetric": true,
74
+ "type": "float",
75
+ "zp_dtype": null
76
  },
77
  "output_activations": null,
78
  "targets": [
 
84
  "dynamic": false,
85
  "group_size": 32,
86
  "num_bits": 8,
87
+ "observer": "memoryless_minmax",
88
  "observer_kwargs": {},
89
+ "scale_dtype": "torch.bfloat16",
90
  "strategy": "group",
91
  "symmetric": true,
92
+ "type": "float",
93
+ "zp_dtype": null
94
  }
95
  }
96
  },
 
104
  "quantization_status": "compressed",
105
  "sparsity_config": {},
106
  "transform_config": {},
107
+ "version": "0.14.1.a20260323"
108
  },
109
  "rms_norm_eps": 1e-06,
110
  "rope_scaling": null,
111
  "rope_theta": 1000000,
112
  "sliding_window": null,
113
  "tie_word_embeddings": false,
114
+ "transformers_version": "4.57.6",
 
115
  "use_cache": true,
116
  "use_sliding_window": false,
117
  "vocab_size": 151936
generation_config.json CHANGED
@@ -9,5 +9,5 @@
9
  "temperature": 0.6,
10
  "top_k": 20,
11
  "top_p": 0.95,
12
- "transformers_version": "4.55.2"
13
  }
 
9
  "temperature": 0.6,
10
  "top_k": 20,
11
  "top_p": 0.95,
12
+ "transformers_version": "4.57.6"
13
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e749aaa1f2da50a538735ad8aecf28adc353eafcbd2b929deee9c99d951a715
3
- size 4979507600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7156108519c67518ed275f0f1b590fa6b268c0fc957be5b1561aa967472c726
3
+ size 4982653448
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6206895e408bdf0b1ecce471fe484a43bf94c82057eea51eec86700f7c639d3a
3
- size 4890383672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47bf6c8110d8bd53d1afc58189d53e5245bdb0b663919b5852fef4fd74a3ee06
3
+ size 4887237816
model.safetensors.index.json CHANGED
@@ -190,7 +190,7 @@
190
  "model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
191
  "model.layers.18.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
192
  "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
193
- "model.layers.18.mlp.gate_proj.weight_scale": "model-00002-of-00002.safetensors",
194
  "model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
195
  "model.layers.18.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
196
  "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
 
190
  "model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
191
  "model.layers.18.mlp.down_proj.weight_scale": "model-00002-of-00002.safetensors",
192
  "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
193
+ "model.layers.18.mlp.gate_proj.weight_scale": "model-00001-of-00002.safetensors",
194
  "model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
195
  "model.layers.18.mlp.up_proj.weight_scale": "model-00002-of-00002.safetensors",
196
  "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
recipe.yaml CHANGED
@@ -13,7 +13,9 @@ quant_stage:
13
  block_structure: null
14
  dynamic: false
15
  actorder: null
16
- observer: minmax
 
 
17
  observer_kwargs: {}
18
  input_activations:
19
  num_bits: 8
@@ -24,9 +26,12 @@ quant_stage:
24
  block_structure: null
25
  dynamic: true
26
  actorder: null
 
 
27
  observer: null
28
  observer_kwargs: {}
29
  output_activations: null
30
  format: null
31
  targets: [Linear]
32
  ignore: [lm_head]
 
 
13
  block_structure: null
14
  dynamic: false
15
  actorder: null
16
+ scale_dtype: torch.bfloat16
17
+ zp_dtype: null
18
+ observer: memoryless_minmax
19
  observer_kwargs: {}
20
  input_activations:
21
  num_bits: 8
 
26
  block_structure: null
27
  dynamic: true
28
  actorder: null
29
+ scale_dtype: torch.bfloat16
30
+ zp_dtype: null
31
  observer: null
32
  observer_kwargs: {}
33
  output_activations: null
34
  format: null
35
  targets: [Linear]
36
  ignore: [lm_head]
37
+ bypass_divisibility_checks: false