ankitdhiman commited on
Commit
2c0d1ef
·
verified ·
1 Parent(s): 99a9f97

update special token map

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +95 -2
  2. tokenizer.json +2 -2
  3. tokenizer_config.json +129 -2
special_tokens_map.json CHANGED
@@ -1,4 +1,97 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "boi_token": "<start_of_image>",
3
  "bos_token": {
4
  "content": "<bos>",
@@ -9,7 +102,7 @@
9
  },
10
  "eoi_token": "<end_of_image>",
11
  "eos_token": {
12
- "content": "<eos>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
@@ -17,7 +110,7 @@
17
  },
18
  "image_token": "<image_soft_token>",
19
  "pad_token": {
20
- "content": "<pad>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_start|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|im_end|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<|object_ref_start|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<|object_ref_end|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<|box_start|>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "<|box_end|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<|quad_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<|quad_end|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<|vision_start|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "<|vision_end|>",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "<|vision_pad|>",
75
+ "lstrip": false,
76
+ "normalized": false,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "<|image_pad|>",
82
+ "lstrip": false,
83
+ "normalized": false,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ },
87
+ {
88
+ "content": "<|video_pad|>",
89
+ "lstrip": false,
90
+ "normalized": false,
91
+ "rstrip": false,
92
+ "single_word": false
93
+ }
94
+ ],
95
  "boi_token": "<start_of_image>",
96
  "bos_token": {
97
  "content": "<bos>",
 
102
  },
103
  "eoi_token": "<end_of_image>",
104
  "eos_token": {
105
+ "content": "<|im_end|>",
106
  "lstrip": false,
107
  "normalized": false,
108
  "rstrip": false,
 
110
  },
111
  "image_token": "<image_soft_token>",
112
  "pad_token": {
113
+ "content": "<|endoftext|>",
114
  "lstrip": false,
115
  "normalized": false,
116
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
- size 33384568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2701c34a16714309510090c5450f7a51772e4d76fd29ca8fc668b4be8427f0f
3
+ size 33387239
tokenizer_config.json CHANGED
@@ -51321,13 +51321,140 @@
51321
  "rstrip": false,
51322
  "single_word": false,
51323
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51324
  }
51325
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51326
  "boi_token": "<start_of_image>",
51327
  "bos_token": "<bos>",
51328
  "clean_up_tokenization_spaces": false,
51329
  "eoi_token": "<end_of_image>",
51330
- "eos_token": "<eos>",
51331
  "extra_special_tokens": {
51332
  "boi_token": "<start_of_image>",
51333
  "eoi_token": "<end_of_image>",
@@ -51335,7 +51462,7 @@
51335
  },
51336
  "image_token": "<image_soft_token>",
51337
  "model_max_length": 1000000000000000019884624838656,
51338
- "pad_token": "<pad>",
51339
  "processor_class": "Gemma3Processor",
51340
  "sp_model_kwargs": null,
51341
  "spaces_between_special_tokens": false,
 
51321
  "rstrip": false,
51322
  "single_word": false,
51323
  "special": true
51324
+ },
51325
+ "262145": {
51326
+ "content": "<|im_end|>",
51327
+ "lstrip": false,
51328
+ "normalized": false,
51329
+ "rstrip": false,
51330
+ "single_word": false,
51331
+ "special": true
51332
+ },
51333
+ "262146": {
51334
+ "content": "<|endoftext|>",
51335
+ "lstrip": false,
51336
+ "normalized": false,
51337
+ "rstrip": false,
51338
+ "single_word": false,
51339
+ "special": true
51340
+ },
51341
+ "262147": {
51342
+ "content": "<|im_start|>",
51343
+ "lstrip": false,
51344
+ "normalized": false,
51345
+ "rstrip": false,
51346
+ "single_word": false,
51347
+ "special": true
51348
+ },
51349
+ "262148": {
51350
+ "content": "<|object_ref_start|>",
51351
+ "lstrip": false,
51352
+ "normalized": false,
51353
+ "rstrip": false,
51354
+ "single_word": false,
51355
+ "special": true
51356
+ },
51357
+ "262149": {
51358
+ "content": "<|object_ref_end|>",
51359
+ "lstrip": false,
51360
+ "normalized": false,
51361
+ "rstrip": false,
51362
+ "single_word": false,
51363
+ "special": true
51364
+ },
51365
+ "262150": {
51366
+ "content": "<|box_start|>",
51367
+ "lstrip": false,
51368
+ "normalized": false,
51369
+ "rstrip": false,
51370
+ "single_word": false,
51371
+ "special": true
51372
+ },
51373
+ "262151": {
51374
+ "content": "<|box_end|>",
51375
+ "lstrip": false,
51376
+ "normalized": false,
51377
+ "rstrip": false,
51378
+ "single_word": false,
51379
+ "special": true
51380
+ },
51381
+ "262152": {
51382
+ "content": "<|quad_start|>",
51383
+ "lstrip": false,
51384
+ "normalized": false,
51385
+ "rstrip": false,
51386
+ "single_word": false,
51387
+ "special": true
51388
+ },
51389
+ "262153": {
51390
+ "content": "<|quad_end|>",
51391
+ "lstrip": false,
51392
+ "normalized": false,
51393
+ "rstrip": false,
51394
+ "single_word": false,
51395
+ "special": true
51396
+ },
51397
+ "262154": {
51398
+ "content": "<|vision_start|>",
51399
+ "lstrip": false,
51400
+ "normalized": false,
51401
+ "rstrip": false,
51402
+ "single_word": false,
51403
+ "special": true
51404
+ },
51405
+ "262155": {
51406
+ "content": "<|vision_end|>",
51407
+ "lstrip": false,
51408
+ "normalized": false,
51409
+ "rstrip": false,
51410
+ "single_word": false,
51411
+ "special": true
51412
+ },
51413
+ "262156": {
51414
+ "content": "<|vision_pad|>",
51415
+ "lstrip": false,
51416
+ "normalized": false,
51417
+ "rstrip": false,
51418
+ "single_word": false,
51419
+ "special": true
51420
+ },
51421
+ "262157": {
51422
+ "content": "<|image_pad|>",
51423
+ "lstrip": false,
51424
+ "normalized": false,
51425
+ "rstrip": false,
51426
+ "single_word": false,
51427
+ "special": true
51428
+ },
51429
+ "262158": {
51430
+ "content": "<|video_pad|>",
51431
+ "lstrip": false,
51432
+ "normalized": false,
51433
+ "rstrip": false,
51434
+ "single_word": false,
51435
+ "special": true
51436
  }
51437
  },
51438
+ "additional_special_tokens": [
51439
+ "<|im_start|>",
51440
+ "<|im_end|>",
51441
+ "<|object_ref_start|>",
51442
+ "<|object_ref_end|>",
51443
+ "<|box_start|>",
51444
+ "<|box_end|>",
51445
+ "<|quad_start|>",
51446
+ "<|quad_end|>",
51447
+ "<|vision_start|>",
51448
+ "<|vision_end|>",
51449
+ "<|vision_pad|>",
51450
+ "<|image_pad|>",
51451
+ "<|video_pad|>"
51452
+ ],
51453
  "boi_token": "<start_of_image>",
51454
  "bos_token": "<bos>",
51455
  "clean_up_tokenization_spaces": false,
51456
  "eoi_token": "<end_of_image>",
51457
+ "eos_token": "<|im_end|>",
51458
  "extra_special_tokens": {
51459
  "boi_token": "<start_of_image>",
51460
  "eoi_token": "<end_of_image>",
 
51462
  },
51463
  "image_token": "<image_soft_token>",
51464
  "model_max_length": 1000000000000000019884624838656,
51465
+ "pad_token": "<|endoftext|>",
51466
  "processor_class": "Gemma3Processor",
51467
  "sp_model_kwargs": null,
51468
  "spaces_between_special_tokens": false,