Update README.md
Browse files
README.md
CHANGED
|
@@ -141,35 +141,24 @@ The benchmarks and corresponding scores listed in the table below are taken dire
|
|
| 141 |
|
| 142 |
|Benchmark|Metric|Gemma 3 1B|Gemma 3 4B|Motif 2.6B|Improvement(over 1B)|Improvement(over 4B)|
|
| 143 |
|---|---|---|---|---|---|---|
|
| 144 |
-
|
|
| 145 |
-
|
|
| 146 |
-
|
|
| 147 |
-
|
|
| 148 |
-
|
|
| 149 |
-
|
|
| 150 |
-
|
|
| 151 |
-
|
|
| 152 |
-
|
|
| 153 |
-
|||||
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
|
| 162 |
-
|
|
| 163 |
-
|
|
| 164 |
-
|
|
| 165 |
-
|MMLU-Pro|5-shot, CoT|24.74|29.23|-|-|-|
|
| 166 |
-
|SuperGPQA|5-shot, CoT|15.03|17.68|-|-|-|
|
| 167 |
-
|BBH|3-shot, CoT|41.47|51.7|48.56|+17.10%|-6.07%|
|
| 168 |
-
|GPQA|5-shot, CoT|26.77|24.24|26.78|+0.04%|+10.48%|
|
| 169 |
-
|GSM8K|4-shot, CoT|59.59|43.97|76.49|+28.36%|+73.96%|
|
| 170 |
-
|MATH||4-shot, CoT|32.44|26.1|40.2|+23.92%|54.02%|
|
| 171 |
-
|EvalPlus|0-shot|36.23|43.23|59.57|+64.42%|37.80%|
|
| 172 |
-
|MultiPL-E|0-shot|24.58|28.06|-|-|-|
|
| 173 |
-
|MBPP|3-shot|36.6|46.4|60.3|+64.75%|+29.96%|
|
| 174 |
-
|CRUX-O|1-shot|27|34|28.1|+4.07%|-17.35%|
|
| 175 |
-
|||||**Average**|**+25.39%**|**+20.53%**|
|
|
|
|
| 141 |
|
| 142 |
|Benchmark|Metric|Gemma 3 1B|Gemma 3 4B|Motif 2.6B|Improvement(over 1B)|Improvement(over 4B)|
|
| 143 |
|---|---|---|---|---|---|---|
|
| 144 |
+
|HellaS|10-shot|62.3|77.2|69.89|+12.18%|-9.47%|
|
| 145 |
+
|BoolQ|0-shot|63.2|72.3|67.76|+7.22%|-6.28%|
|
| 146 |
+
|PIQA|0-shot|73.8|79.6|75.59|+2.43%|-5.04%|
|
| 147 |
+
|SIQA|0-shot|48.9|51.9|61.97|+26.73%|+19.40%|
|
| 148 |
+
|TQA|5-shot|39.8|65.8|54.97|+38.12%|-16.46%|
|
| 149 |
+
|NQ|5-shot|9.48|20|10.91|+15.08%|-45.45%|
|
| 150 |
+
|ARC-C|25-shot|38.4|56.2|75.08|+95.52%|+33.59%|
|
| 151 |
+
|ARC-E|0-shot|73|82.4|87.21|+19.47%|+5.84%|
|
| 152 |
+
|WinoG|5-shot|58.2|64.7|67.09|+15.27%|+3.69%|
|
| 153 |
+
|BBH|few-shot, CoT|28.4|50.9|48.56|+70.99%|-4.60%|
|
| 154 |
+
|Drop|1-shot, F1|42.4|60.1|29.33|-30.83%|-51.20%|
|
| 155 |
+
|MMLU|5-shot|-|59.6|57.93|-|-2.80%|
|
| 156 |
+
|MMLUpro|5-shot, CoT|-|29.2|-|-|-|
|
| 157 |
+
|AGIE|3-5-shot|-|42.1|-|-|-|
|
| 158 |
+
|MATH|4-shot, CoT|-|24.2|40.2|-|+66.12%|
|
| 159 |
+
|GSM8K|8-shot, CoT|-|38.4|77.71|-|+102.37%|
|
| 160 |
+
|GPQA Diamond|5-shot, CoT|-|15|31.81|-|+112.07%|
|
| 161 |
+
|MBPP|3-shot|-|46|60.3|-|+31.09%|
|
| 162 |
+
|HumanE|0-shot|-|36|68.3|-|+89.72%|
|
| 163 |
+
|IFEval|-|80.2|90.2|74.02|-7.71%|-17.94%|
|
| 164 |
+
|||||**Average**|**+22.04%**|**+16.93%**|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|