harrier-oss-v1-270m / mteb_v2_eval_prompts.json
frontierai's picture
Upload model
31de22b verified
{
"AILAStatutes-query": "Identifying the most relevant statutes for a given situation",
"AfriSentiClassification": "Given a text, categorized by sentiment into positive, negative, or neutral",
"AlloProfClusteringS2S.v2": "Identify the topic of document titles from Allo Prof dataset",
"AlloprofReranking-query": "Given a question, retrieve passages that answer the question",
"AmazonCounterfactualClassification": "Given an Amazon review, judge whether it is counterfactual.",
"ArXivHierarchicalClusteringP2P": "Identify the main and secondary category of Arxiv papers based on the titles and abstracts",
"ArXivHierarchicalClusteringS2S": "Identify the main and secondary category of Arxiv papers based on the titles",
"ArguAna-query": "Given a claim, find documents that refute the claim",
"ArmenianParaphrasePC": "Retrieve semantically similar text",
"BUCC.v2": "Retrieve parallel sentences",
"BelebeleRetrieval-query": "Retrieval the relevant passage for the given query",
"BibleNLPBitextMining": "Retrieve parallel sentences",
"BigPatentClustering.v2": "Identify the category of documents from the Big Patent dataset",
"BiorxivClusteringP2P.v2": "Identify the main category of Biorxiv papers based on the titles and abstracts",
"BornholmBitextMining": "Retrieve parallel sentences",
"BrazilianToxicTweetsClassification": "Classify the toxic tweets in Brazilian Portuguese into one of the six categories: LGBTQ+phobia, Xenophobia, Obscene, Insult, Misogyny and Racism.",
"BulgarianStoreReviewSentimentClassfication": "Classify user reviews into positive, negative or mixed sentiment",
"CEDRClassification": "Given a comment as query, classify expressed emotions into joy, sadness, surprise, fear, and anger",
"CLSClusteringP2P.v2": "Identify the main category of scholar papers based on the titles and abstracts",
"CSFDSKMovieReviewSentimentClassification": "Given a movie review, classify its rating on a scale from 0 to 5",
"CTKFactsNLI": "Retrieve semantically similar text",
"CataloniaTweetClassification": "Given a tweet, classify its sentiment into AGAINST, FAVOR or NEUTRAL towards Catalonia's independence.",
"Core17InstructionRetrieval-query": "Retrieve relevant passages for the given query with conditions",
"CovidRetrieval-query": "Given a question on COVID-19, retrieve news articles that answer the question",
"CyrillicTurkicLangClassification": "Given a text, classify its language",
"CzechProductReviewSentimentClassification": "Classify product reviews into positive, neutral, or negative sentiment",
"DBpediaClassification": "Given the following text, retrieve the appropriate DBpedia category including Company, EducationalInstitution, Artist, Athlete, OfficeHolder, MeanOfTransportation, Building, NaturalPlace, Village, Animal, Plant, Album, Film, WrittenWork.",
"DalajClassification": "Classify texts based on linguistic acceptability in Swedish",
"DiaBlaBitextMining": "Retrieve parallel sentences",
"EstonianValenceClassification": "Given a news article, categorized by sentiment into negatiivne, positiivne, neutraalne or vastuolulin",
"FaroeseSTS": "Retrieve semantically similar text",
"FilipinoShopeeReviewsClassification": "Given a shop review, classify its rating on a scale from 1 to 5",
"FinParaSTS": "Retrieve semantically similar text",
"FinancialPhrasebankClassification": "Given financial news, categorized by sentiment into positive, negative, or neutral",
"FloresBitextMining": "Retrieve parallel sentences",
"GermanSTSBenchmark": "Retrieve semantically similar text",
"GreekLegalCodeClassification": "Given a greek legal text, classify its topic",
"GujaratiNewsClassification": "Given a Gujarati news articles, classify ist topic",
"HALClusteringS2S.v2": "Identify the topic of titles from HAL",
"HagridRetrieval-query": "Given a question, retrieve relevant responses",
"IN22GenBitextMining": "Retrieve parallel sentences",
"IndicCrosslingualSTS": "Retrieve semantically similar text",
"IndicGenBenchFloresBitextMining": "Retrieve parallel sentences",
"IndicLangClassification": "Given a text, classify its language",
"IndonesianIdClickbaitClassification": "Given an Indonesian news headlines, classify its into clickbait or non-clickbait",
"IsiZuluNewsClassification": "Given a news article, classify its topic",
"ItaCaseholdClassification": "Given a judgments, classify its topic",
"JSICK": "Retrieve semantically similar text",
"KorHateSpeechMLClassification": "Given a Korean online news comments, classify its fine-grained hate speech classes",
"KorSarcasmClassification": "Given a twitter, categorized it into sarcasm or not_sarcasm",
"KurdishSentimentClassification": "Given a text, categorized by sentiment into positive or negative",
"LEMBPasskeyRetrieval-query": "Retrieval the relevant passage for the given query",
"LegalBenchCorporateLobbying-query": "Given a query, retrieve relevant legal bill summaries",
"MIRACLRetrievalHardNegatives-query": "Retrieve Wikipedia passages that answer the question",
"MLQARetrieval-query": "Retrieval the relevant passage for the given query",
"MacedonianTweetSentimentClassification": "Given a Macedonian tweet, categorized by sentiment into positive, negative, or neutral",
"MalteseNewsClassification": "Given a maltese new, classify its topic",
"MasakhaNEWSClassification": "Classify the News in the given texts into one of the seven category: politics,sports,health,business,entertainment,technology,religion ",
"MasakhaNEWSClusteringS2S": "Identify the topic or theme of the given news articles based on the titles",
"MassiveIntentClassification": "Given a user utterance as query, find the user intents",
"MedrxivClusteringP2P.v2": "Identify the main category of Medrxiv papers based on the titles and abstracts",
"MultiEURLEXMultilabelClassification": "Given a text, classify its topic",
"MultiHateClassification": "Given a text, categorized by sentiment into hate or non-hate",
"NTREXBitextMining": "Retrieve parallel sentences",
"NepaliNewsClassification": "Given a news article, categorized it into business, entertainment or sports",
"News21InstructionRetrieval-query": "Retrieve relevant passages for the given query with conditions",
"NollySentiBitextMining": "Retrieve parallel sentences",
"NordicLangClassification": "Given a text in a Nordic language, classify the language into one of the following categories: Danish, Swedish, Norwegian (Nynorsk), Norwegian (Bokmål), Faroese, Icelandic.",
"NorwegianCourtsBitextMining": "Retrieve parallel sentences",
"NusaParagraphEmotionClassification": "Classify the emotion into one of the following categories: fear, sadness, anger, happy, love, surprise, shame.",
"NusaTranslationBitextMining": "Retrieve parallel sentences",
"NusaX-senti": "Given a text, categorized by sentiment into positive or negative",
"NusaXBitextMining": "Retrieve parallel sentences",
"OdiaNewsClassification": "Given a news article, categorized it into business, entertainment or sports",
"OpusparcusPC": "Retrieve semantically similar text",
"PAC": "Classify Polish contract clauses into one of the following two types: \"Safe Contract Clauses\" and \"Unfair Contract Clauses\".",
"PawsXPairClassification": "Retrieve semantically similar text",
"PlscClusteringP2P.v2": "Identify the category of titles+abstracts from Library of Science",
"PoemSentimentClassification": "Given the following verse from a poem, classify its sentiment as negative, neutral, positive, or mixed.",
"PolEmo2.0-OUT": "Classify the sentiment of products and school online reviews",
"PpcPC": "Retrieve semantically similar text",
"PunjabiNewsClassification": "Given a news article, categorized it into two-classes",
"RTE3": "Retrieve semantically similar text",
"Robust04InstructionRetrieval-query": "Retrieve relevant passages for the given query with conditions",
"RomaniBibleClustering": "Identify verses from the Bible in Kalderash Romani by book.",
"RuBQReranking-query": "Given a question, retrieve Wikipedia passages that answer the question",
"SCIDOCS-query": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper",
"SIB200ClusteringS2S": "Identify the category of documents",
"SICK-R": "Retrieve semantically similar text",
"STS12": "Retrieve semantically related sentences",
"STS13": "Retrieve semantically similar text",
"STS14": "Retrieve semantically similar text",
"STS15": "Retrieve semantically similar text",
"STS17": "Retrieve semantically similar text",
"STS22.v2": "Given a document, retrieve semantically related documents",
"STSB": "Retrieve semantically similar text",
"STSBenchmark": "Retrieve semantically similar text",
"STSES": "Given a Spanish sentence, retrieve semantically related Spanish sentences",
"ScalaClassification": "Classify passages into correct or correct in Scandinavian Languages based on linguistic acceptability",
"SemRel24STS": "Retrieve semantically similar text",
"SentimentAnalysisHindi": "Given a hindi text, categorized by sentiment into positive, negative or neutral",
"SinhalaNewsClassification": "Given a news article, categorized it into political, business, technology, sports and Entertainment",
"SiswatiNewsClassification": "Identify fine-grained news categories in Siswati language.",
"SlovakMovieReviewSentimentClassification": "Given a movie review, categorized it into positive or negative",
"SpartQA-query": "Given the following spatial reasoning question, retrieve the right answer.",
"SprintDuplicateQuestions": "Find questions that have the same meaning as the input question",
"StackExchangeClustering.v2": "Identify the topic or theme of StackExchange posts based on the titles",
"StackOverflowQA-query": "Given a question about coding, retrieval code or passage that can solve user's question",
"StatcanDialogueDatasetRetrieval-query": "Retrieval the relevant passage for the given query",
"SwahiliNewsClassification": "Given a news article, classify its domain",
"SwednClusteringP2P": "Identify news categories in Swedish passages",
"SwissJudgementClassification": "Given a news article, categorized it into approval or dismissal",
"T2Reranking-query": "Given a Chinese search query, retrieve web passages that answer the question",
"TERRa": "Given a premise, retrieve a hypothesis that is entailed by the premise",
"TRECCOVID-query": "Given a medical query, retrieve documents that answer the query",
"Tatoeba": "Retrieve parallel sentences",
"TempReasonL1-query": "Given the following question about time, retrieve the correct answer.",
"ToxicConversationsClassification": "Classify the given comments as either toxic or not toxic",
"TswanaNewsClassification": "Given a news article, classify its topic",
"TweetTopicSingleClassification": "Gvien a twitter, classify its topic",
"TwitterHjerneRetrieval-query": "Retrieve answers to questions asked in Danish tweets",
"TwitterURLCorpus": "Find tweets that have the same meaning as the input tweet",
"VoyageMMarcoReranking-query": "Given a Japanese search query, retrieve web passages that answer the question",
"WebLINXCandidatesReranking-query": "Retrieval the relevant passage for the given query",
"WikiCitiesClustering": "Identify of Wikipedia articles of cities by country",
"WikiClusteringP2P.v2": "Identify the category of wiki passages",
"WikipediaRerankingMultilingual-query": "Retrieval the relevant passage for the given query",
"WikipediaRetrievalMultilingual-query": "Retrieval the relevant passage for the given query",
"WinoGrande-query": "Given the following sentence, retrieve an appropriate answer to fill in the missing underscored part.",
"XNLI": "Retrieve semantically similar text",
"indonli": "Retrieve semantically similar text"
}