"""This file is used to store the list of all models that are used in the MTEB benchmark. It is generated by running the this script. It is intended to be used as a reference for the models that are used in the benchmark, and it used to link the model to the benchmark so that it is easier to see that we use the model. Discussed in this issue: https://github.com/embeddings-benchmark/mteb/issues/4309 """ from pathlib import Path import mteb path_to_self = Path(__file__) models = mteb.get_model_metas() # print all model names and add them to this file as a list of model: unused = ["org/model_name"] model_names = [model.name for model in models if model.name not in unused] def format_list_as_python_code(lst): """Format a list of strings as a Python list of strings. Example: input: ["model1", "model2", "model3"] output: models = [ "model1", "model2", "model3", ] """ formatted_list = "models = [\n" for item in lst: formatted_list += f' "{item}",\n' formatted_list += "]\n" return formatted_list def insert_into_self(formatted_list): """Insert the formatted list into this file between the INSERT START and INSERT END comments.""" with path_to_self.open("r") as f: content = f.read() new_content = content.replace( "# INSERT START\n# INSERT END", f"# INSERT START\n{formatted_list}\n# INSERT END", ) with path_to_self.open("w") as f: f.write(new_content) formatted_list = format_list_as_python_code(model_names) insert_into_self(formatted_list) print(f"Inserted {len(model_names)} models into {path_to_self}") # INSERT START models = [ "Snowflake/snowflake-arctic-embed-l", "Snowflake/snowflake-arctic-embed-l-v2.0", "Snowflake/snowflake-arctic-embed-m", "Snowflake/snowflake-arctic-embed-m-long", "Snowflake/snowflake-arctic-embed-m-v1.5", "Snowflake/snowflake-arctic-embed-m-v2.0", "Snowflake/snowflake-arctic-embed-s", "Snowflake/snowflake-arctic-embed-xs", "mteb/baseline-bm25s", "zeroentropy/zembed-1", "zeroentropy/zerank-1", "zeroentropy/zerank-1-small", "zeroentropy/zerank-2", "google/vggish", "Omartificial-Intelligence-Space/Arabic-Triplet-Matryoshka-V2", "Kingsoft-LLM/QZhou-Embedding", "Kingsoft-LLM/QZhou-Embedding-Zh", "samaya-ai/promptriever-llama2-7b-v1", "samaya-ai/promptriever-llama3.1-8b-v1", "samaya-ai/promptriever-llama3.1-8b-instruct-v1", "samaya-ai/promptriever-mistral-v0.1-7b-v1", "sbintuitions/sarashina-embedding-v1-1b", "sbintuitions/sarashina-embedding-v2-1b", "mixedbread-ai/mxbai-edge-colbert-v0-17m", "mixedbread-ai/mxbai-edge-colbert-v0-32m", "mixedbread-ai/mxbai-embed-2d-large-v1", "mixedbread-ai/mxbai-embed-large-v1", "mixedbread-ai/mxbai-embed-xsmall-v1", "mixedbread-ai/mxbai-rerank-base-v1", "mixedbread-ai/mxbai-rerank-large-v1", "mixedbread-ai/mxbai-rerank-xsmall-v1", "vidore/colpali-v1.1", "vidore/colpali-v1.2", "vidore/colpali-v1.3", "voyageai/voyage-multimodal-3", "eagerworks/eager-embed-v1", "stephantulkens/NIFE-gte-modernbert-base_as_router", "stephantulkens/NIFE-mxbai-embed-large-v1_as_router", "Bytedance/Seed1.6-embedding", "microsoft/LLM2CLIP-Openai-B-16", "microsoft/LLM2CLIP-Openai-L-14-224", "microsoft/LLM2CLIP-Openai-L-14-336", "FacebookAI/xlm-roberta-base", "FacebookAI/xlm-roberta-large", "codefuse-ai/C2LLM-0.5B", "codefuse-ai/C2LLM-7B", "codefuse-ai/F2LLM-0.6B", "codefuse-ai/F2LLM-1.7B", "codefuse-ai/F2LLM-4B", "codefuse-ai/F2LLM-v2-0.6B", "codefuse-ai/F2LLM-v2-14B", "codefuse-ai/F2LLM-v2-160M", "codefuse-ai/F2LLM-v2-1.7B", "codefuse-ai/F2LLM-v2-330M", "codefuse-ai/F2LLM-v2-4B", "codefuse-ai/F2LLM-v2-80M", "codefuse-ai/F2LLM-v2-8B", "ibm-granite/granite-vision-3.3-2b-embedding", "openai/clip-vit-base-patch16", "openai/clip-vit-base-patch32", "openai/clip-vit-large-patch14", "shibing624/text2vec-base-chinese", "shibing624/text2vec-base-chinese-paraphrase", "shibing624/text2vec-base-multilingual", "LCO-Embedding/LCO-Embedding-Omni-3B", "LCO-Embedding/LCO-Embedding-Omni-7B", "kakaobrain/align-base", "IEITYuan/Yuan-embedding-2.0-en", "facebook/metaclip-2-mt5-worldwide-b32", "dmedhi/PawanEmbd-68M", "BAAI/bge-reranker-v2-m3", "jinaai/jina-reranker-v2-base-multilingual", "cross-encoder/ms-marco-MiniLM-L12-v2", "cross-encoder/ms-marco-MiniLM-L2-v2", "cross-encoder/ms-marco-MiniLM-L4-v2", "cross-encoder/ms-marco-MiniLM-L6-v2", "cross-encoder/ms-marco-TinyBERT-L2-v2", "emillykkejensen/EmbeddingGemma-Scandi-300m", "emillykkejensen/mmBERTscandi-base-embedding", "emillykkejensen/Qwen3-Embedding-Scandi-0.6B", "BAAI/bge-base-en", "BAAI/bge-base-en-v1.5", "BAAI/bge-base-zh", "BAAI/bge-base-zh-v1.5", "BAAI/bge-en-icl", "BAAI/bge-large-en", "BAAI/bge-large-en-v1.5", "BAAI/bge-large-zh", "BAAI/bge-large-zh-v1.5", "BAAI/bge-m3", "BAAI/bge-m3-unsupervised", "BAAI/bge-multilingual-gemma2", "BAAI/bge-small-en", "BAAI/bge-small-en-v1.5", "BAAI/bge-small-zh", "BAAI/bge-small-zh-v1.5", "manu/bge-m3-custom-fr", "spartan8806/atles-champion-embedding", "prdev/mini-gte", "SamilPwC-AXNode-GenAI/PwC-Embedding_expr", "m3hrdadfi/bert-zwnj-wnli-mean-tokens", "sbunlp/fabert", "HooshvareLab/bert-base-parsbert-uncased", "m3hrdadfi/roberta-zwnj-wnli-mean-tokens", "myrkur/sentence-transformer-parsbert-fa", "PartAI/TookaBERT-Base", "PartAI/Tooka-SBERT", "PartAI/Tooka-SBERT-V2-Large", "PartAI/Tooka-SBERT-V2-Small", "castorini/repllama-v1-7b-lora-passage", "samaya-ai/RepLLaMA-reproduced", "nomic-ai/nomic-embed-code", "nomic-ai/nomic-embed-text-v2-moe", "nomic-ai/nomic-embed-text-v1", "nomic-ai/nomic-embed-text-v1.5", "nomic-ai/nomic-embed-text-v1-ablated", "nomic-ai/nomic-embed-text-v1-unsupervised", "nomic-ai/modernbert-embed-base", "nomic-ai/nomic-embed-vision-v1.5", "bflhc/MoD-Embedding", "ReasonIR/ReasonIR-8B", "yibinlei/LENS-d4000", "yibinlei/LENS-d8000", "facebook/dinov2-base", "facebook/dinov2-giant", "facebook/dinov2-large", "facebook/dinov2-small", "facebook/webssl-dino1b-full2b-224", "facebook/webssl-dino2b-full2b-224", "facebook/webssl-dino2b-heavy2b-224", "facebook/webssl-dino2b-light2b-224", "facebook/webssl-dino300m-full2b-224", "facebook/webssl-dino3b-full2b-224", "facebook/webssl-dino3b-heavy2b-224", "facebook/webssl-dino3b-light2b-224", "facebook/webssl-dino5b-full2b-224", "facebook/webssl-dino7b-full8b-224", "facebook/webssl-dino7b-full8b-378", "facebook/webssl-dino7b-full8b-518", "facebook/webssl-mae1b-full2b-224", "facebook/webssl-mae300m-full2b-224", "facebook/webssl-mae700m-full2b-224", "TencentBAC/Conan-embedding-v2", "Gameselo/STS-multilingual-mpnet-base-v2", "Haon-Chen/speed-embedding-7b-instruct", "Hum-Works/lodestone-base-4096-v1", "Jaume/gemma-2b-embeddings", "Lajavaness/bilingual-embedding-base", "Lajavaness/bilingual-embedding-large", "Lajavaness/bilingual-embedding-small", "Mihaiii/Bulbasaur", "Mihaiii/Ivysaur", "Mihaiii/Squirtle", "Mihaiii/Venusaur", "Mihaiii/Wartortle", "Mihaiii/gte-micro", "Mihaiii/gte-micro-v4", "Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka", "Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet", "Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka", "Omartificial-Intelligence-Space/Arabic-labse-Matryoshka", "Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet", "Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka", "OrdalieTech/Solon-embeddings-large-0.1", "aari1995/German_Semantic_STS_V2", "abhinand/MedEmbed-small-v0.1", "avsolatorio/GIST-all-MiniLM-L6-v2", "avsolatorio/GIST-Embedding-v0", "avsolatorio/GIST-large-Embedding-v0", "avsolatorio/GIST-small-Embedding-v0", "avsolatorio/NoInstruct-small-Embedding-v0", "bigscience/sgpt-bloom-7b1-msmarco", "brahmairesearch/slx-v0.1", "TencentBAC/Conan-embedding-v1", "consciousAI/cai-lunaris-text-embeddings", "consciousAI/cai-stellaris-text-embeddings", "deepfile/embedder-100p", "DMetaSoul/Dmeta-embedding-zh-small", "dwzhu/e5-base-4k", "llmrails/ember-v1", "infgrad/stella-base-en-v2", "izhx/udever-bloom-1b1", "izhx/udever-bloom-3b", "izhx/udever-bloom-560m", "izhx/udever-bloom-7b1", "malenia1/ternary-weight-embedding", "manu/sentence_croissant_alpha_v0.2", "manu/sentence_croissant_alpha_v0.3", "manu/sentence_croissant_alpha_v0.4", "omarelshehy/arabic-english-sts-matryoshka", "openbmb/MiniCPM-Embedding", "DMetaSoul/sbert-chinese-general-v1", "sdadas/mmlw-e5-base", "sdadas/mmlw-e5-large", "sdadas/mmlw-e5-small", "sdadas/mmlw-roberta-base", "sdadas/mmlw-roberta-large", "silma-ai/silma-embeddding-matryoshka-v0.1", "thenlper/gte-base", "thenlper/gte-large", "thenlper/gte-small", "lier007/xiaobu-embedding", "lier007/xiaobu-embedding-v2", "Classical/Yinka", "Kowshik24/bangla-sentence-transformer-ft-matryoshka-paraphrase-multilingual-mpnet-base-v2", "facebook/wav2vec2-base", "facebook/wav2vec2-base-960h", "facebook/wav2vec2-large", "facebook/wav2vec2-large-xlsr-53", "facebook/wav2vec2-lv-60-espeak-cv-ft", "facebook/wav2vec2-xls-r-1b", "facebook/wav2vec2-xls-r-2b", "facebook/wav2vec2-xls-r-2b-21-to-en", "facebook/wav2vec2-xls-r-300m", "vitouphy/wav2vec2-xls-r-300m-phoneme", "laion/clap-htsat-fused", "laion/clap-htsat-unfused", "laion/larger_clap_general", "laion/larger_clap_music", "laion/larger_clap_music_and_speech", "colbert-ir/colbertv2.0", "jinaai/jina-colbert-v2", "lightonai/ColBERT-Zero", "lightonai/ColBERT-Zero-supervised", "lightonai/ColBERT-Zero-unsupervised", "lightonai/GTE-ModernColBERT-v1", "lightonai/LateOn-Code", "lightonai/LateOn-Code-edge", "lightonai/LateOn-Code-edge-pretrain", "lightonai/LateOn-Code-pretrain", "lightonai/Reason-ModernColBERT", "OpenSearch-AI/Ops-Colqwen3-4B", "Alibaba-NLP/gte-base-en-v1.5", "thenlper/gte-base-zh", "thenlper/gte-large-zh", "Alibaba-NLP/gte-modernbert-base", "Alibaba-NLP/gte-multilingual-base", "Alibaba-NLP/gte-Qwen1.5-7B-instruct", "Alibaba-NLP/gte-Qwen2-1.5B-instruct", "Alibaba-NLP/gte-Qwen2-7B-instruct", "thenlper/gte-small-zh", "jinaai/jina-clip-v1", "jinaai/jina-clip-v2", "intfloat/e5-base", "intfloat/e5-base-v2", "intfloat/e5-large-v2", "intfloat/e5-small", "intfloat/e5-small-v2", "intfloat/e5-large", "intfloat/multilingual-e5-base", "intfloat/multilingual-e5-large", "intfloat/multilingual-e5-small", "tencent/Youtu-Embedding", "moka-ai/m3e-base", "moka-ai/m3e-large", "moka-ai/m3e-small", "Bytedance/Seed1.6-embedding-1215", "QuanSun/EVA02-CLIP-B-16", "QuanSun/EVA02-CLIP-L-14", "QuanSun/EVA02-CLIP-bigE-14", "QuanSun/EVA02-CLIP-bigE-14-plus", "Salesforce/blip-image-captioning-base", "Salesforce/blip-image-captioning-large", "Salesforce/blip-itm-base-coco", "Salesforce/blip-itm-base-flickr", "Salesforce/blip-itm-large-coco", "Salesforce/blip-itm-large-flickr", "Salesforce/blip-vqa-base", "Salesforce/blip-vqa-capfilt-large", "Salesforce/SFR-Embedding-2_R", "Salesforce/SFR-Embedding-Code-2B_R", "Salesforce/SFR-Embedding-Mistral", "Cohere/Cohere-embed-v4.0", "Cohere/Cohere-embed-v4.0 (output_dtype=binary)", "Cohere/Cohere-embed-v4.0 (output_dtype=int8)", "cohere/embed-english-v3.0", "cohere/embed-multilingual-v3.0", "jinaai/jina-embedding-b-en-v1", "jinaai/jina-embedding-s-en-v1", "jinaai/jina-embeddings-v2-base-en", "jinaai/jina-embeddings-v2-small-en", "jinaai/jina-embeddings-v3", "jinaai/jina-embeddings-v4", "jinaai/jina-embeddings-v5-text-nano", "jinaai/jina-embeddings-v5-text-small", "jinaai/jina-reranker-v3", "lyrebird/wav2clip", "microsoft/msclap-2022", "microsoft/msclap-2023", "voyageai/voyage-2", "voyageai/voyage-3", "voyageai/voyage-3.5", "voyageai/voyage-3.5 (output_dtype=binary)", "voyageai/voyage-3.5 (output_dtype=int8)", "voyageai/voyage-3-m-exp", "voyageai/voyage-3-large", "voyageai/voyage-3-lite", "voyageai/voyage-4", "voyageai/voyage-4-large", "voyageai/voyage-4-large (embed_dim=2048)", "voyageai/voyage-4-lite", "voyageai/voyage-4-nano", "voyageai/voyage-code-2", "voyageai/voyage-code-3", "voyageai/voyage-finance-2", "voyageai/voyage-large-2", "voyageai/voyage-large-2-instruct", "voyageai/voyage-law-2", "voyageai/voyage-multilingual-2", "microsoft/unispeech-sat-base-100h-libri-ft", "MongoDB/mdbr-leaf-ir", "MongoDB/mdbr-leaf-mt", "KennethEnevoldsen/dfm-sentence-encoder-large", "KennethEnevoldsen/dfm-sentence-encoder-medium", "microsoft/wavlm-base", "microsoft/wavlm-base-plus", "microsoft/wavlm-base-plus-sd", "microsoft/wavlm-base-plus-sv", "microsoft/wavlm-base-sd", "microsoft/wavlm-base-sv", "microsoft/wavlm-large", "jxm/cde-small-v1", "jxm/cde-small-v2", "Sailesh97/Hinvec", "w601sxs/b1ade-embed", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl", "google/flan-t5-xxl", "jhu-clsp/FollowIR-7B", "meta-llama/Llama-2-7b-hf", "meta-llama/Llama-2-7b-chat-hf", "mistralai/Mistral-7B-Instruct-v0.2", "castorini/monot5-3b-msmarco-10k", "castorini/monot5-base-msmarco-10k", "castorini/monot5-large-msmarco-10k", "castorini/monot5-small-msmarco-10k", "unicamp-dl/mt5-base-mmarco-v2", "facebook/seamless-m4t-v2-large", "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct", "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct", "fangxq/XYZ-embedding", "rasgaard/m2v-dfm-large", "bedrock/amazon-titan-embed-text-v1", "bedrock/amazon-titan-embed-text-v2", "bedrock/cohere-embed-english-v3", "bedrock/cohere-embed-multilingual-v3", "Cohere/Cohere-embed-english-v3.0", "Cohere/Cohere-embed-english-light-v3.0", "Cohere/Cohere-embed-multilingual-v3.0", "Cohere/Cohere-embed-multilingual-light-v3.0", "bisectgroup/BiCA-base", "Qodo/Qodo-Embed-1-1.5B", "Qodo/Qodo-Embed-1-7B", "WhereIsAI/UAE-Large-V1", "GeoGPT-Research-Project/GeoEmbedding", "nanovdr/NanoVDR-S-Multi", "infgrad/stella-base-zh-v3-1792d", "NovaSearch/stella_en_1.5B_v5", "NovaSearch/stella_en_400M_v5", "dunzhang/stella-large-zh-v3-1792d", "dunzhang/stella-mrl-large-zh-v3.5-1792d", "iampanda/zpoint_large_embedding_zh", "sensenova/piccolo-base-zh", "sensenova/piccolo-large-zh-v2", "facebook/SONAR", "Qwen/Qwen2-Audio-7B", "OpenSearch-AI/Ops-MoA-Conan-embedding-v1", "OpenSearch-AI/Ops-MoA-Yuan-embedding-1.0", "Querit/Querit", "facebook/hubert-base-ls960", "facebook/hubert-large-ls960-ft", "ByteDance-Seed/Seed1.5-Embedding", "baseline/Human", "bflhc/Octen-Embedding-0.6B", "bflhc/Octen-Embedding-4B", "bflhc/Octen-Embedding-8B", "VPLabs/SearchMap_Preview", "ByteDance/ListConRanker", "Linq-AI-Research/Linq-Embed-Mistral", "infly/inf-retriever-v1", "infly/inf-retriever-v1-1.5b", "OrdalieTech/Solon-embeddings-mini-beta-1.1", "BAAI/bge-visualized-base", "BAAI/bge-visualized-m3", "laion/CLIP-ViT-B-16-DataComp.XL-s13B-b90K", "laion/CLIP-ViT-B-32-DataComp.XL-s13B-b90K", "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", "laion/CLIP-ViT-H-14-laion2B-s32B-b79K", "laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K", "laion/CLIP-ViT-L-14-laion2B-s32B-b82K", "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", "laion/CLIP-ViT-g-14-laion2B-s34B-b88K", "qihoo360/Zhinao-ChineseModernBert-Embedding", "mteb/baseline-random-cross-encoder", "mteb/baseline-random-encoder", "nvidia/llama-nemoretriever-colembed-1b-v1", "nvidia/llama-nemoretriever-colembed-3b-v1", "nvidia/llama-nemotron-colembed-vl-3b-v2", "nvidia/llama-nemotron-embed-vl-1b-v2", "nvidia/nemotron-colembed-vl-4b-v2", "nvidia/nemotron-colembed-vl-8b-v2", "infgrad/Jasper-Token-Compression-600M", "NovaSearch/jasper_en_vision_language_v1", "KBLab/sentence-bert-swedish-cased", "cl-nagoya/ruri-base", "cl-nagoya/ruri-base-v2", "cl-nagoya/ruri-large", "cl-nagoya/ruri-large-v2", "cl-nagoya/ruri-small", "cl-nagoya/ruri-small-v2", "cl-nagoya/ruri-v3-130m", "cl-nagoya/ruri-v3-30m", "cl-nagoya/ruri-v3-310m", "cl-nagoya/ruri-v3-70m", "Qwen/Qwen3-VL-Embedding-2B", "Qwen/Qwen3-VL-Embedding-8B", "Salesforce/blip2-opt-2.7b", "Salesforce/blip2-opt-6.7b-coco", "fyaronskiy/english_code_retriever", "BMRetriever/BMRetriever-1B", "BMRetriever/BMRetriever-2B", "BMRetriever/BMRetriever-410M", "BMRetriever/BMRetriever-7B", "google/siglip-base-patch16-224", "google/siglip-base-patch16-256", "google/siglip-base-patch16-256-multilingual", "google/siglip-base-patch16-384", "google/siglip-base-patch16-512", "google/siglip-large-patch16-256", "google/siglip-large-patch16-384", "google/siglip-so400m-patch14-224", "google/siglip-so400m-patch14-384", "google/siglip-so400m-patch16-256-i18n", "sentence-transformers/all-MiniLM-L12-v2", "sentence-transformers/all-MiniLM-L6-v2", "sentence-transformers/all-mpnet-base-v2", "facebook/contriever-msmarco", "sentence-transformers/gtr-t5-base", "sentence-transformers/gtr-t5-large", "sentence-transformers/gtr-t5-xl", "sentence-transformers/gtr-t5-xxl", "sentence-transformers/LaBSE", "keeeeenw/MicroLlama-text-embedding", "sentence-transformers/multi-qa-MiniLM-L6-cos-v1", "sentence-transformers/multi-qa-mpnet-base-dot-v1", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", "sentence-transformers/sentence-t5-base", "sentence-transformers/sentence-t5-large", "sentence-transformers/sentence-t5-xl", "sentence-transformers/sentence-t5-xxl", "sentence-transformers/static-retrieval-mrl-en-v1", "sentence-transformers/static-similarity-mrl-multilingual-v1", "llamaindex/vdr-2b-multi-v1", "ICT-TIME-and-Querit/BOOM_4B_v1", "royokong/e5-v", "facebook/encodec_24khz", "amazon/Titan-text-embeddings-v2", "openai/text-embedding-3-large", "openai/text-embedding-3-large (embed_dim=512)", "openai/text-embedding-3-small", "openai/text-embedding-3-small (embed_dim=512)", "openai/text-embedding-ada-002", "IEITYuan/Yuan-embedding-2.0-zh", "sergeyzh/BERTA", "deepvk/deberta-v1-base", "DeepPavlov/distilrubert-small-cased-conversational", "ai-forever/FRIDA", "ai-sage/Giga-Embeddings-instruct", "cointegrated/LaBSE-en-ru", "sergeyzh/LaBSE-ru-turbo", "ai-forever/ru-en-RoSBERTa", "DeepPavlov/rubert-base-cased", "DeepPavlov/rubert-base-cased-sentence", "sergeyzh/rubert-mini-frida", "cointegrated/rubert-tiny", "cointegrated/rubert-tiny2", "sergeyzh/rubert-tiny-turbo", "ai-forever/sbert_large_mt_nlu_ru", "ai-forever/sbert_large_nlu_ru", "deepvk/USER2-base", "deepvk/USER2-small", "deepvk/USER-base", "deepvk/USER-bge-m3", "nvidia/NV-Embed-v1", "nvidia/NV-Embed-v2", "nvidia/llama-embed-nemotron-8b", "nvidia/llama-nemotron-rerank-1b-v2", "NbAiLab/nb-bert-base", "NbAiLab/nb-bert-large", "NbAiLab/nb-sbert-base", "GritLM/GritLM-7B", "GritLM/GritLM-8x7B", "speechbrain/m-ctc-t-large", "clips/e5-base-trm-nl", "clips/e5-large-trm-nl", "clips/e5-small-trm-nl", "andersborges/model2vecdk", "andersborges/model2vecdk-stem", "MIT/ast-finetuned-audioset-10-10-0.4593", "KFST/XLMRoberta-en-da-sv-nb", "panalexeu/xlm-roberta-ua-distilled", "nyu-visionx/moco-v3-vit-b", "nyu-visionx/moco-v3-vit-l", "Shuu12121/CodeSearch-ModernBERT-Crow-Plus", "minishlab/M2V_base_glove", "minishlab/M2V_base_glove_subword", "minishlab/M2V_base_output", "minishlab/M2V_multilingual_output", "minishlab/potion-base-2M", "minishlab/potion-base-32M", "minishlab/potion-base-4M", "minishlab/potion-base-8M", "minishlab/potion-multilingual-128M", "minishlab/potion-retrieval-32M", "NeuML/pubmedbert-base-embeddings-100K", "NeuML/pubmedbert-base-embeddings-1M", "NeuML/pubmedbert-base-embeddings-2M", "NeuML/pubmedbert-base-embeddings-500K", "NeuML/pubmedbert-base-embeddings-8M", "asapp/sew-d-base-plus-400k-ft-ls100h", "asapp/sew-d-mid-400k-ft-ls100h", "asapp/sew-d-tiny-100k-ft-ls100h", "telepix/PIXIE-Rune-v1.0", "VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1", "VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1", "VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1", "VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1", "VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1", "VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1", "ibm-granite/granite-embedding-107m-multilingual", "ibm-granite/granite-embedding-125m-english", "ibm-granite/granite-embedding-278m-multilingual", "ibm-granite/granite-embedding-30m-english", "ibm-granite/granite-embedding-english-r2", "ibm-granite/granite-embedding-small-english-r2", "geoffsee/auto-g-embed-st", "nomic-ai/colnomic-embed-multimodal-3b", "nomic-ai/colnomic-embed-multimodal-7b", "vidore/colqwen2-v1.0", "vidore/colqwen2.5-v0.2", "TomoroAI/tomoro-colqwen3-embed-4b", "athrael-soju/colqwen3.5-4.5B-v3", "TomoroAI/tomoro-colqwen3-embed-8b", "ApsaraStackMaaS/EvoQwen2.5-VL-Retriever-3B-v1", "ApsaraStackMaaS/EvoQwen2.5-VL-Retriever-7B-v1", "BeastyZ/e5-R-mistral-7b", "intfloat/multilingual-e5-large-instruct", "intfloat/e5-mistral-7b-instruct", "zeta-alpha-ai/Zeta-Alpha-E5-Mistral", "google/yamnet", "codesage/codesage-base-v2", "codesage/codesage-large-v2", "codesage/codesage-small-v2", "Tarka-AIR/Tarka-Embedding-150M-V1", "Tarka-AIR/Tarka-Embedding-350M-V1", "perplexity-ai/pplx-embed-v1-0.6b", "perplexity-ai/pplx-embed-v1-4b", "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1", "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5", "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2", "HIT-TMG/KaLM-embedding-multilingual-mini-v1", "KaLM-Embedding/KaLM-embedding-multilingual-mini-instruct-v2.5", "tencent/KaLM-Embedding-Gemma3-12B-2511", "Qwen/Qwen3-Embedding-0.6B", "Qwen/Qwen3-Embedding-4B", "Qwen/Qwen3-Embedding-8B", "annamodels/LGAI-Embedding-Preview", "opensearch-project/opensearch-neural-sparse-encoding-doc-v1", "opensearch-project/opensearch-neural-sparse-encoding-doc-v2-distill", "opensearch-project/opensearch-neural-sparse-encoding-doc-v2-mini", "opensearch-project/opensearch-neural-sparse-encoding-doc-v3-distill", "opensearch-project/opensearch-neural-sparse-encoding-doc-v3-gte", "AITeamVN/Vietnamese_Embedding", "bkai-foundation-models/vietnamese-bi-encoder", "contextboxai/halong_embedding", "GreenNode/GreenNode-Embedding-E5-Large-VN-V1", "GreenNode/GreenNode-Embedding-KaLM-Mini-Instruct-VN-V1", "GreenNode/GreenNode-Embedding-Large-VN-Mixed-V1", "GreenNode/GreenNode-Embedding-Large-VN-V1", "VoVanPhuc/sup-SimCSE-VietNamese-phobert-base", "mteb/baseline-bb25", "openai/whisper-base", "openai/whisper-large-v3", "openai/whisper-medium", "openai/whisper-small", "openai/whisper-tiny", "microsoft/speecht5_asr", "microsoft/speecht5_multimodal", "microsoft/speecht5_tts", "Mira190/Euler-Legal-Embedding-V1", "MCINext/Hakim", "MCINext/Hakim-small", "MCINext/Hakim-unsup", "TIGER-Lab/VLM2Vec-Full", "TIGER-Lab/VLM2Vec-LoRA", "richinfoai/ritrieve_zh_v1", "vidore/colSmol-256M", "vidore/colSmol-500M", "facebook/mms-1b-all", "facebook/mms-1b-fl102", "facebook/mms-1b-l1107", "facebook/data2vec-audio-base-960h", "facebook/data2vec-audio-large-960h", "ManiacLabs/miniac-embed", "nomic-ai/nomic-embed-multimodal-3b", "nomic-ai/nomic-embed-multimodal-7b", "manveertamber/cadet-embed-base-v1", "google/embeddinggemma-300m", "google/gemini-embedding-001", "google/text-embedding-004", "google/text-embedding-005", "google/text-multilingual-embedding-002", "OpenMuQ/MuQ-MuLan-large", "speechbrain/cnn14-esc50", "McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised", "McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse", "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised", "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse", "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised", "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse", "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised", "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse", "intfloat/mmE5-mllama-11b-instruct", ] # INSERT END