{"@context":"https://schema.org","@id":"https://sup.ai/models","@type":"CollectionPage","about":{"@id":"https://sup.ai/#software","@type":"SoftwareApplication","applicationCategory":"WebApplication","name":"Sup AI"},"breadcrumb":{"@type":"BreadcrumbList","itemListElement":[{"@type":"ListItem","item":"https://sup.ai","name":"Home","position":1},{"@type":"ListItem","item":"https://sup.ai/models","name":"Models","position":2}]},"description":"Explore our comprehensive collection of AI models from leading providers. Find the perfect model for your needs.","inLanguage":"en-US","isPartOf":{"@id":"https://sup.ai/#website"},"mainEntity":{"@type":"ItemList","description":"Available AI models on Sup AI platform","itemListElement":[{"@type":"ListItem","item":{"@id":"https://sup.ai/models/aion-labs-aion-1.0","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Aion Labs"},"category":"AI Model","description":"Aion-1.0 is a multi-model system designed for high performance across various tasks, including reasoning and coding. It is built on DeepSeek-R1, augmented with additional models and techniques such as Tree of Thoughts (ToT) and Mixture of Experts (MoE). It is Aion Lab's most powerful reasoning model.","name":"Aion 1.0","url":"https://sup.ai/models/aion-labs-aion-1.0"},"position":1},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/aion-labs-aion-1.0-mini","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Aion Labs"},"category":"AI Model","description":"Aion-1.0-Mini 32B parameter model is a distilled version of the DeepSeek-R1 model, designed for strong performance in reasoning domains such as mathematics, coding, and logic. It is a modified variant of a FuseAI model that outperforms R1-Distill-Qwen-32B and R1-Distill-Llama-70B, with benchmark results independently replicated for verification.","name":"Aion 1.0 Mini","url":"https://sup.ai/models/aion-labs-aion-1.0-mini"},"position":2},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/aion-labs-aion-2.0","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Aion Labs"},"category":"AI Model","description":"Aion-2.0 is a variant of DeepSeek V3.2 optimized for immersive roleplaying and storytelling. It is particularly strong at introducing tension, crises, and conflict into stories, making narratives feel more engaging. It also handles mature and darker themes with more nuance and depth.","name":"Aion 2.0","url":"https://sup.ai/models/aion-labs-aion-2.0"},"position":3},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/aion-labs-aion-rp-llama-3.1-8b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Aion Labs"},"category":"AI Model","description":"Aion-RP-Llama-3.1-8B ranks the highest in the character evaluation portion of the RPBench-Auto benchmark, a roleplaying-specific variant of Arena-Hard-Auto, where LLMs evaluate each other's responses. It is a fine-tuned base model rather than an instruct model, designed to produce more natural and varied writing.","name":"Aion RP 1.0 8B","url":"https://sup.ai/models/aion-labs-aion-rp-llama-3.1-8b"},"position":4},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/cohere-aya-expanse-32b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Cohere"},"category":"AI Model","description":"A highly performant 32B multilingual model designed to rival monolingual performance through innovations in instruction tuning with data arbitrage, preference training, and model merging. Serves 23 languages including Arabic, Chinese, Japanese, Korean, and major European languages. With 128K context window, it handles substantial multilingual workloads effectively.","name":"Aya Expanse 32B","url":"https://sup.ai/models/cohere-aya-expanse-32b"},"position":5},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/cohere-aya-expanse-8b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Cohere"},"category":"AI Model","description":"A compact 8B multilingual model designed to rival monolingual performance through innovations in instruction tuning with data arbitrage, preference training, and model merging. Serves 23 languages with fast response times and low latency. Ideal for high-throughput multilingual workloads where cost and speed matter.","name":"Aya Expanse 8B","url":"https://sup.ai/models/cohere-aya-expanse-8b"},"position":6},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/cohere-aya-vision-32b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"16,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Cohere"},"category":"AI Model","description":"A state-of-the-art 32B multimodal model excelling at a variety of critical benchmarks for language, text, and image capabilities. Serves 23 languages with full image understanding, allowing you to pass in images and text and get a single coherent response. Focused on state-of-the-art multilingual performance.","name":"Aya Vision 32B","url":"https://sup.ai/models/cohere-aya-vision-32b"},"position":7},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/cohere-aya-vision-8b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"16,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Cohere"},"category":"AI Model","description":"A compact 8B multimodal model excelling at a variety of critical benchmarks for language, text, and image capabilities. Focused on low latency and best-in-class performance with image understanding across multiple languages.","name":"Aya Vision 8B","url":"https://sup.ai/models/cohere-aya-vision-8b"},"position":8},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-1.0","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"9,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"Anthropic's first publicly available large language model. Claude 1.0 offered basic text generation and reasoning with a 9K context window. This model has been retired and is no longer available for use.","name":"Claude 1.0","url":"https://sup.ai/models/anthropic-claude-1.0"},"position":9},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-1.1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"9,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"An incremental improvement over Claude 1.0 with better instruction following and reduced harmful outputs. 9K context window. This model has been retired and is no longer available for use.","name":"Claude 1.1","url":"https://sup.ai/models/anthropic-claude-1.1"},"position":10},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-1.2","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"9,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"A refined version of Claude 1 with improved helpfulness and safety characteristics. 9K context window. This model has been retired and is no longer available for use.","name":"Claude 1.2","url":"https://sup.ai/models/anthropic-claude-1.2"},"position":11},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-1.3","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"100,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"The final Claude 1 release, introducing the breakthrough 100K token context window. Claude 1.3 offered significantly improved long-document understanding and reasoning over earlier versions. This model has been retired and is no longer available for use.","name":"Claude 1.3","url":"https://sup.ai/models/anthropic-claude-1.3"},"position":12},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-2.0","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"100,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"Anthropic's second-generation large language model with 100K context window. Claude 2.0 offered improved reasoning, coding, and math capabilities over Claude 1. This model has been retired and is no longer available for use.","name":"Claude 2.0","url":"https://sup.ai/models/anthropic-claude-2.0"},"position":13},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-2.1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"Anthropic's improved Claude 2 with doubled 200K context window and reduced hallucination rates. Claude 2.1 introduced beta tool use capabilities. This model has been retired and is no longer available for use.","name":"Claude 2.1","url":"https://sup.ai/models/anthropic-claude-2.1"},"position":14},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-haiku-3","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"The original fast and affordable Claude 3 model. Haiku 3 delivers rapid responses at rock-bottom pricing, processing 21K tokens per second for prompts under 32K tokens. With 200K context, vision capabilities, and tool use support, it handles basic tasks reliably. Limited to 4K output tokens. Superseded by Haiku 4.5 which offers dramatically better performance at a modest price increase.","name":"Claude Haiku 3","url":"https://sup.ai/models/anthropic-claude-haiku-3"},"position":15},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-haiku-3.5","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"Anthropic's fast and cost-efficient model from October 2024. Haiku 3.5 delivered near-Claude-3-Opus-level performance at budget pricing with 200K context and 8K output tokens. Scored 88.1% on HumanEval and 40.6% on SWE-bench Verified. This model has been retired and is no longer available for use.","name":"Claude Haiku 3.5","url":"https://sup.ai/models/anthropic-claude-haiku-3.5"},"position":16},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-4.5-haiku","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"Fast, efficient, and surprisingly capable? Haiku delivers near-flagship performance at budget-friendly pricing. This model excels at coding, agent workflows, and computer use tasks with Claude's characteristic helpfulness and safety. With 200K context and support for files and images, it's perfect for production deployments, sub-agent systems, and any scenario where you need reliable intelligence without the premium cost. Lightning-fast response times make it ideal for real-time applications.","name":"Claude Haiku 4.5","url":"https://sup.ai/models/anthropic-claude-4.5-haiku"},"position":17},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-instant-1.0","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"9,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"Anthropic's first fast and affordable model. Claude Instant 1.0 offered quick response times for simple tasks with a 9K context window. This model has been retired and is no longer available for use.","name":"Claude Instant 1.0","url":"https://sup.ai/models/anthropic-claude-instant-1.0"},"position":18},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-instant-1.1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"100,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"An improved version of Claude Instant with expanded 100K context window. Claude Instant 1.1 offered faster response times for classification, summarization, and text generation tasks. This model has been retired and is no longer available for use.","name":"Claude Instant 1.1","url":"https://sup.ai/models/anthropic-claude-instant-1.1"},"position":19},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-instant-1.2","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"100,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"Anthropic's final and best Instant model, designed for high-throughput tasks. Claude Instant 1.2 offered 100K context with quick response times for simple classification, summarization, and text generation tasks. This model has been retired and is no longer available for use.","name":"Claude Instant 1.2","url":"https://sup.ai/models/anthropic-claude-instant-1.2"},"position":20},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-opus-3","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"The original Claude 3 flagship model, once the most intelligent Claude available. Opus 3 excelled at complex reasoning, nuanced analysis, and creative tasks. With 200K context and vision capabilities but limited to 4K output tokens. This model has been retired and is no longer available for use.","name":"Claude Opus 3","url":"https://sup.ai/models/anthropic-claude-opus-3"},"position":21},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-opus-4","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"The first Claude 4 flagship model, delivering breakthrough coding and agentic performance with 72.5% on SWE-bench Verified. Opus 4 excels at autonomous research, multi-step reasoning, and complex problem-solving with 200K context and extended thinking capabilities. Superseded by Opus 4.1 and later generations.","name":"Claude Opus 4","url":"https://sup.ai/models/anthropic-claude-opus-4"},"position":22},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-opus-4.1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"The previous Opus generation delivering superior coding and agentic performance with 74.5% on SWE-bench Verified. Excellent for complex multi-step problems requiring rigor and precision.","name":"Claude Opus 4.1","url":"https://sup.ai/models/anthropic-claude-opus-4.1"},"position":23},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-opus-4.5","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"The previous Opus generation for the hardest problems. Opus 4.5 excels at extremely complex reasoning, advanced coding challenges, sophisticated research, and intricate multi-step planning. With enhanced general intelligence and vision capabilities, this model tackles problems that push the boundaries of AI capability.","name":"Claude Opus 4.5","url":"https://sup.ai/models/anthropic-claude-opus-4.5"},"position":24},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-opus-4.6","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"The previous Opus generation for coding and professional work, built to power agents that take on whole categories of real-world work. Opus 4.6 excels across the entire SDLC, breaking through on hard problems, identifying complex bugs, and demonstrating deeper codebase understanding. It also delivers a step-change in knowledge work, with near-production-ready documents, presentations, and spreadsheets on the first pass. With 1M context window and 128K max output, this was the ultimate problem-solver before being superseded by Opus 4.7.","name":"Claude Opus 4.6","url":"https://sup.ai/models/anthropic-claude-opus-4.6"},"position":25},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-opus-4.6-fast","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"The fast-inference variant of Claude Opus 4.6, delivering identical model quality at up to 2.5x faster output tokens per second for latency-sensitive agentic workflows. Same weights, same capabilities, same 1M context window, and same 128K max output as Opus 4.6, but with a faster inference configuration. Priced at 6x standard Opus 4.6 rates (prompt cache and data residency multipliers stack on top). Speed gains are focused on output tokens per second, not time to first token. Not available with Batch API or Priority Tier. Superseded by Opus 4.7.","name":"Claude Opus 4.6 Fast","url":"https://sup.ai/models/anthropic-claude-opus-4.6-fast"},"position":26},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-opus-4.7","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"Anthropic's most capable model for complex reasoning and agentic coding. Opus 4.7 delivers a step-change jump over Opus 4.6 on long-horizon agentic work, knowledge-worker tasks, and vision. New in this release: high-resolution image support up to 2576px / 3.75MP with 1:1 pixel-to-coordinate mapping for computer use and document understanding, the `xhigh` effort level for the hardest coding and agentic problems, and task budgets for token-aware agent loops. Adaptive thinking is the only supported thinking mode (extended-thinking budgets are removed). Features a 1M context window at standard pricing and 128K max output, with improved file-system memory, chart and figure analysis, and .docx/.pptx editing.","name":"Claude Opus 4.7","url":"https://sup.ai/models/anthropic-claude-opus-4.7"},"position":27},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-sonnet-3","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"The original Claude 3 balanced model offering a good combination of intelligence and speed. Sonnet 3 handled coding, analysis, and general tasks with 200K context and vision support. Limited to 4K output tokens. This model has been retired and is no longer available for use.","name":"Claude Sonnet 3","url":"https://sup.ai/models/anthropic-claude-sonnet-3"},"position":28},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-sonnet-3.5","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"Claude's breakthrough mid-tier model from 2024, setting new standards for intelligence at its price point. The original June 2024 release and October 2024 update both delivered exceptional coding, analysis, and multimodal capabilities with 200K context and 8K output. The v2 update added computer use and PDF support. Both versions have been retired and are no longer available for use.","name":"Claude Sonnet 3.5","url":"https://sup.ai/models/anthropic-claude-sonnet-3.5"},"position":29},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-sonnet-3.7","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"The first hybrid reasoning model from Anthropic, combining standard responses with extended thinking mode. Sonnet 3.7 introduced transparent step-by-step reasoning and excelled at coding and front-end development. With 200K context, 64K output, and thinking capabilities, it bridged the gap between Claude 3.5 and Claude 4. This model has been retired and is no longer available for use.","name":"Claude Sonnet 3.7","url":"https://sup.ai/models/anthropic-claude-sonnet-3.7"},"position":30},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-sonnet-4","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"The first Claude 4 Sonnet model, offering a strong balance of intelligence and speed with extended thinking capabilities. Sonnet 4 handles complex coding, analysis, and reasoning tasks with 1M context window (beta) and 64K output. Superseded by Sonnet 4.5 which brings improved nuance and performance.","name":"Claude Sonnet 4","url":"https://sup.ai/models/anthropic-claude-sonnet-4"},"position":31},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-sonnet-4.5","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"Our go-to model for sophisticated work that demands both intelligence and nuance. Sonnet 4.5 excels at complex coding projects, nuanced writing, detailed analysis, and thoughtful problem-solving. With thinking capabilities, multimodal support, and Claude's renowned ability to follow instructions precisely, this model strikes the perfect balance between capability and cost for professional-grade work. Ideal for when quality truly matters.","name":"Claude Sonnet 4.5","url":"https://sup.ai/models/anthropic-claude-sonnet-4.5"},"position":32},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthropic-claude-sonnet-4.6","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthropic"},"category":"AI Model","description":"The latest Claude Sonnet model, building on 4.5's strengths with continued improvements. Sonnet 4.6 delivers excellent performance on complex coding projects, nuanced writing, and detailed analysis. Features thinking capabilities, multimodal support, and precise instruction following. A strong balance of capability and cost for professional-grade work.","name":"Claude Sonnet 4.6","url":"https://sup.ai/models/anthropic-claude-sonnet-4.6"},"position":33},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alfredpros-codellama-7b-instruct-solidity","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"4,096"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alfredpros"},"category":"AI Model","description":"A finetuned 7 billion parameters Code LLaMA - Instruct model to generate Solidity smart contract using 4-bit QLoRA finetuning provided by PEFT library.","name":"CodeLLaMa 7B Instruct Solidity","url":"https://sup.ai/models/alfredpros-codellama-7b-instruct-solidity"},"position":34},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/arcee-coder-large","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Arcee"},"category":"AI Model","description":"Coder-Large is a 32 B-parameter offspring of Qwen 2.5-Instruct that has been further trained on permissively-licensed GitHub, CodeSearchNet and synthetic bug-fix corpora. It supports a 32k context window, enabling multi-file refactoring or long diff review in a single call, and understands 30-plus programming languages with special attention to TypeScript, Go and Terraform. Internal benchmarks show 5-8 pt gains over CodeLlama-34 B-Python on HumanEval and competitive BugFix scores thanks to a reinforcement pass that rewards compilable output. The model emits structured explanations alongside code blocks by default, making it suitable for educational tooling as well as production copilot scenarios. Cost-wise, Together AI prices it well below proprietary incumbents, so teams can scale interactive coding without runaway spend. ","name":"Coder Large","url":"https://sup.ai/models/arcee-coder-large"},"position":35},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-codestral","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"256,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Our cutting-edge language model for coding released August 2025.","name":"Codestral","url":"https://sup.ai/models/mistral-codestral"},"position":36},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/deepcogito-cogito-v2.1-671b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"DeepCogito"},"category":"AI Model","description":"Cogito v2.1 671B MoE represents one of the strongest open models globally, matching performance of frontier closed and open models. This model is trained using self play with reinforcement learning to reach state-of-the-art performance on multiple categories (instruction following, coding, longer queries and creative writing). This advanced system demonstrates significant progress toward scalable superintelligence through policy improvement.","name":"Cogito v2.1 671B","url":"https://sup.ai/models/deepcogito-cogito-v2.1-671b"},"position":37},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/cohere-command","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"4,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Cohere"},"category":"AI Model","description":"An instruction-following conversational model that performs language tasks with high quality and a 4K context window. This model has been deprecated by Cohere as of September 2025.","name":"Command","url":"https://sup.ai/models/cohere-command"},"position":38},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/cohere-command-a","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Cohere"},"category":"AI Model","description":"Cohere's most performant model, excelling at tool use, agents, retrieval augmented generation (RAG), and multilingual use cases. Command A has a 256K context window, only requires two GPUs to run, and has 150% higher throughput compared to Command R+ 08-2024. Ideal for enterprise deployments requiring strong tool integration and multilingual support.","name":"Command A","url":"https://sup.ai/models/cohere-command-a"},"position":39},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/cohere-command-a-reasoning","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Cohere"},"category":"AI Model","description":"Cohere's first reasoning model, able to think before generating an output in a way that allows it to perform well in certain kinds of nuanced problem-solving and agent-based tasks in 23 languages. With a 256K context window and 32K max output, it excels at complex analytical work.","name":"Command A Reasoning","url":"https://sup.ai/models/cohere-command-a-reasoning"},"position":40},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/cohere-command-a-translate","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Cohere"},"category":"AI Model","description":"Cohere's state-of-the-art machine translation model, excelling at a variety of translation tasks on 23 languages including English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Chinese, Arabic, Russian, Polish, Turkish, Vietnamese, Dutch, Czech, Indonesian, Ukrainian, Romanian, Greek, Hindi, Hebrew, and Persian.","name":"Command A Translate","url":"https://sup.ai/models/cohere-command-a-translate"},"position":41},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/cohere-command-a-vision","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Cohere"},"category":"AI Model","description":"Cohere's first model capable of processing images, excelling in enterprise use cases such as analyzing charts, graphs, and diagrams, table understanding, OCR, document Q&A, and object detection. Officially supports English, Portuguese, Italian, French, German, and Spanish with a 128K context window.","name":"Command A Vision","url":"https://sup.ai/models/cohere-command-a-vision"},"position":42},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/cohere-command-light","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"4,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Cohere"},"category":"AI Model","description":"A smaller, faster version of Command. Almost as capable but with much lower latency and cost. 4K context window. This model has been deprecated by Cohere as of September 2025.","name":"Command Light","url":"https://sup.ai/models/cohere-command-light"},"position":43},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/cohere-command-r","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Cohere"},"category":"AI Model","description":"An instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. Best suited for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents. 128K context window with 4K max output.","name":"Command R","url":"https://sup.ai/models/cohere-command-r"},"position":44},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/cohere-command-r-plus","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Cohere"},"category":"AI Model","description":"An instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. Best suited for complex RAG workflows and multi-step tool use. 128K context window with 4K max output.","name":"Command R+","url":"https://sup.ai/models/cohere-command-r-plus"},"position":45},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/cohere-command-r7b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Cohere"},"category":"AI Model","description":"A small, fast model that excels at RAG, tool use, agents, and similar tasks requiring complex reasoning and multiple steps. With 128K context window and extremely low pricing, it is ideal for high-throughput production workloads where cost and speed are paramount.","name":"Command R7B","url":"https://sup.ai/models/cohere-command-r7b"},"position":46},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/thedrummer-cydonia-24b-v4.1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"TheDrummer"},"category":"AI Model","description":"Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.","name":"Cydonia 24B V4.1","url":"https://sup.ai/models/thedrummer-cydonia-24b-v4.1"},"position":47},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/nousresearch-deephermes-3-mistral-24b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Nous Research"},"category":"AI Model","description":"DeepHermes 3 (Mistral 24B Preview) is an instruction-tuned language model by Nous Research based on Mistral-Small-24B, designed for chat, function calling, and advanced multi-turn reasoning. It introduces a dual-mode system that toggles between intuitive chat responses and structured \"deep reasoning\" mode using special system prompts. Fine-tuned via distillation from R1, it supports structured output (JSON mode) and function call syntax for agent-based applications. DeepHermes 3 supports a reasoning toggle via system prompt, allowing users to switch between fast, intuitive responses and deliberate, multi-step reasoning. When activated with a specific system instruction, the model enters a deep thinking mode, generating extended chains of thought wrapped in `` tags before delivering a final answer.","name":"DeepHermes 3 Mistral 24B","url":"https://sup.ai/models/nousresearch-deephermes-3-mistral-24b"},"position":48},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/deepseek-deepseek-r1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"DeepSeek"},"category":"AI Model","description":"DeepSeek's advanced reasoning model from the R1 series, released May 2025. Built for deep chain-of-thought reasoning, it excels at complex mathematical, logical, and analytical problems. With 131K context and 32K output window, it handles substantial reasoning tasks. Superseded by the V3.2 Speciale model which achieves stronger reasoning at lower cost.","name":"DeepSeek R1","url":"https://sup.ai/models/deepseek-deepseek-r1"},"position":49},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/tngtech-deepseek-r1t-chimera","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"163,840"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"163,840"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"TNG Technology"},"category":"AI Model","description":"DeepSeek-R1T-Chimera is created by merging DeepSeek-R1 and DeepSeek-V3 (0324), combining the reasoning capabilities of R1 with the token efficiency improvements of V3. It is based on a DeepSeek-MoE Transformer architecture and is optimized for general text generation tasks. The model merges pretrained weights from both source models to balance performance across reasoning, efficiency, and instruction-following tasks. It is released under the MIT license and intended for research and commercial use.","name":"DeepSeek R1T Chimera","url":"https://sup.ai/models/tngtech-deepseek-r1t-chimera"},"position":50},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/tngtech-deepseek-r1t2-chimera","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"163,840"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"163,840"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"TNG Technology"},"category":"AI Model","description":"DeepSeek-TNG-R1T2-Chimera is the second-generation Chimera model from TNG Tech. It is a 671 B-parameter mixture-of-experts text-generation model assembled from DeepSeek-AI's R1-0528, R1, and V3-0324 checkpoints with an Assembly-of-Experts merge. The tri-parent design yields strong reasoning performance while running roughly 20 % faster than the original R1 and more than 2x faster than R1-0528 under vLLM, giving a favorable cost-to-intelligence trade-off. The checkpoint supports contexts up to 60 k tokens in standard use (tested to ~130 k) and maintains consistent token behaviour, making it suitable for long-context analysis, dialogue and other open-ended generation tasks.","name":"DeepSeek R1T2 Chimera","url":"https://sup.ai/models/tngtech-deepseek-r1t2-chimera"},"position":51},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/deepseek-deepseek-v3","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"DeepSeek"},"category":"AI Model","description":"DeepSeek's updated V3 model released on March 24, 2025. A reliable general-purpose model with 131K context window and tool support. Offers excellent cost-effectiveness for standard chat and completion tasks. Superseded by V3.1 and V3.2 series with improved capabilities.","name":"DeepSeek V3","url":"https://sup.ai/models/deepseek-deepseek-v3"},"position":52},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/deepseek-deepseek-v3.1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"DeepSeek"},"category":"AI Model","description":"DeepSeek's powerful open-source model with thinking capabilities and tool support. With 131K context and cache-based pricing, it offers strong performance for general reasoning, coding, and analysis tasks. Superseded by the V3.2 series with expanded context window and improved efficiency.","name":"DeepSeek V3.1","url":"https://sup.ai/models/deepseek-deepseek-v3.1"},"position":53},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/nex-agi-deepseek-v3.1-nex-n1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"163,840"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Nex AGI"},"category":"AI Model","description":"DeepSeek V3.1 Nex-N1 is the flagship release of the Nex-N1 series - a post-trained model designed to highlight agent autonomy, tool use, and real-world productivity. Nex-N1 demonstrates competitive performance across all evaluation scenarios, showing particularly strong results in practical coding and HTML generation tasks.","name":"DeepSeek V3.1 Nex N1","url":"https://sup.ai/models/nex-agi-deepseek-v3.1-nex-n1"},"position":54},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/deepseek-deepseek-v3.1-terminus","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"DeepSeek"},"category":"AI Model","description":"The production-optimized variant of DeepSeek V3.1, tuned for stable and consistent output quality. Shares the same pricing and context capabilities as V3.1 with improved deployment characteristics. Superseded by the V3.2 series.","name":"DeepSeek V3.1 Terminus","url":"https://sup.ai/models/deepseek-deepseek-v3.1-terminus"},"position":55},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/deepseek-deepseek-v3.2","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"163,842"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"DeepSeek"},"category":"AI Model","description":"An innovative model pioneering breakthrough efficiency in long-context processing. Using revolutionary Sparse Attention technology, this model handles massive contexts (128K tokens) with exceptional speed and minimal resource use while maintaining quality. With thinking capabilities and an impressive 65K output window, it excels at tasks requiring extensive context understanding. Perfect for processing large documents, codebases, or datasets where traditional models slow down. Exceptional value for long-context work.","name":"DeepSeek V3.2","url":"https://sup.ai/models/deepseek-deepseek-v3.2"},"position":56},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/deepseek-deepseek-v3.2-exp","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"163,840"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"DeepSeek"},"category":"AI Model","description":"An innovative experimental model pioneering breakthrough efficiency in long-context processing. Using revolutionary Sparse Attention technology, this model handles massive contexts (164K tokens) with exceptional speed and minimal resource use while maintaining quality. With thinking capabilities and an impressive 65K output window, it excels at tasks requiring extensive context understanding. Perfect for processing large documents, codebases, or datasets where traditional models slow down. Exceptional value for long-context work.","name":"DeepSeek V3.2 Experimental","url":"https://sup.ai/models/deepseek-deepseek-v3.2-exp"},"position":57},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/deepseek-deepseek-v3.2-exp-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"163,840"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"DeepSeek"},"category":"AI Model","description":"The thinking-optimized variant of our experimental long-context model, designed for deep reasoning over extensive information. This model allocates more computational resources to explicit reasoning while processing massive contexts efficiently. With 164K context support, it excels at analytical tasks requiring both breadth of information and depth of thought. Choose this when you need thorough, reasoned analysis of large amounts of information. Extremely cost-effective for research and analysis.","name":"DeepSeek V3.2 Experimental Thinking","url":"https://sup.ai/models/deepseek-deepseek-v3.2-exp-thinking"},"position":58},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/deepseek-deepseek-v3.2-speciale","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"163,840"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"DeepSeek"},"category":"AI Model","description":"The most powerful reasoning model in the DeepSeek lineup, pushing the absolute boundaries of AI reasoning capabilities. V3.2-Speciale achieves gold-medal performance in IMO, CMO, ICPC World Finals, and IOI 2025, rivaling Gemini-3.0-Pro on complex tasks. This model excels at the hardest mathematical, logical, and competitive programming challenges where no other model can compete. Note: Optimized purely for reasoning-does not support tool calls and consumes more tokens than standard models. Choose this when you need world-class reasoning on the most demanding problems.","name":"DeepSeek V3.2 Speciale","url":"https://sup.ai/models/deepseek-deepseek-v3.2-speciale"},"position":59},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/deepseek-deepseek-v3.2-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"DeepSeek"},"category":"AI Model","description":"The thinking-optimized variant of our long-context model, designed for deep reasoning over extensive information. This model allocates more computational resources to explicit reasoning while processing massive contexts efficiently. With 128K context support, it excels at analytical tasks requiring both breadth of information and depth of thought. Choose this when you need thorough, reasoned analysis of large amounts of information. Extremely cost-effective for research and analysis.","name":"DeepSeek V3.2 Thinking","url":"https://sup.ai/models/deepseek-deepseek-v3.2-thinking"},"position":60},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/deepseek-deepseek-v4-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"DeepSeek"},"category":"AI Model","description":"The efficiency-optimized DeepSeek V4 model with 284B parameters (13B activated) and a massive 1M token context window. Built on a new hybrid attention architecture (Compressed Sparse Attention + Heavily Compressed Attention) for dramatically lower cost on long contexts, it runs at just a fraction of V3.2 inference cost while matching or exceeding its quality. Switches between fast non-thinking responses and explicit chain-of-thought reasoning with configurable effort (up to \"max\" for the hardest problems). Tool calls are supported in both modes. Ideal for high-throughput chat, coding assistance, and agent workflows over large documents or codebases.","name":"DeepSeek V4 Flash","url":"https://sup.ai/models/deepseek-deepseek-v4-flash"},"position":61},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/deepseek-deepseek-v4-pro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"262,144"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"DeepSeek"},"category":"AI Model","description":"The flagship DeepSeek V4 model, a 1.6T parameter Mixture-of-Experts (49B activated) with a 1M token context window. Matches or exceeds leading closed-source models on coding and agentic benchmarks (93.5% LiveCodeBench, 80.6% SWE-Verified, 3206 Codeforces rating at max reasoning effort). Switches between fast non-thinking responses and explicit chain-of-thought reasoning with configurable effort (up to \"max\" for the hardest problems like mathematics, competitive programming, and scientific analysis). Tool calls are supported in both modes. Choose this when you need frontier-level quality on complex tasks.","name":"DeepSeek V4 Pro","url":"https://sup.ai/models/deepseek-deepseek-v4-pro"},"position":62},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-devstral","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"262,144"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Official devstral-2512 Mistral AI model","name":"Devstral","url":"https://sup.ai/models/mistral-devstral"},"position":63},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-devstral-medium","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"262,144"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Our medium code-agentic model.","name":"Devstral Medium","url":"https://sup.ai/models/mistral-devstral-medium"},"position":64},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-devstral-small","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Our small open-source code-agentic model.","name":"Devstral Small","url":"https://sup.ai/models/mistral-devstral-small"},"position":65},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openrouter-elephant","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenRouter"},"category":"AI Model","description":"Elephant Alpha is a 100B-parameter text model focused on intelligence efficiency, delivering strong performance while minimizing token usage. It supports a 256K context window with up to 32K output tokens, and excels at code completion and debugging tasks, rapid document processing, and lightweight agent interactions.","name":"Elephant","url":"https://sup.ai/models/openrouter-elephant"},"position":66},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/baidu-ernie-4.5-21b-a3b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"120,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Baidu"},"category":"AI Model","description":"A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through heterogeneous MoE structures and modality-isolated routing. Supporting an extensive 131K token context length, the model achieves efficient inference via multi-expert parallel collaboration and quantization, while advanced post-training techniques including SFT, DPO, and UPO ensure optimized performance across diverse applications with specialized routing and balancing losses for superior task handling.","name":"ERNIE 4.5 21B A3B","url":"https://sup.ai/models/baidu-ernie-4.5-21b-a3b"},"position":67},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/baidu-ernie-4.5-21b-a3b-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Baidu"},"category":"AI Model","description":"ERNIE-4.5-21B-A3B-Thinking is Baidu's upgraded lightweight MoE model, refined to boost reasoning depth and quality for top-tier performance in logical puzzles, math, science, coding, text generation, and expert-level academic benchmarks.","name":"ERNIE 4.5 21B A3B Thinking","url":"https://sup.ai/models/baidu-ernie-4.5-21b-a3b-thinking"},"position":68},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/baidu-ernie-4.5-300b-a47b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"123,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"12,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Baidu"},"category":"AI Model","description":"ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in both English and Chinese. Optimized for high-throughput inference and efficient scaling, it uses a heterogeneous MoE structure with advanced routing and quantization strategies, including FP8 and 2-bit formats. This version is fine-tuned for language-only tasks and supports reasoning, tool parameters, and extended context lengths up to 131k tokens. Suitable for general-purpose LLM applications with high reasoning and throughput demands.","name":"ERNIE 4.5 300B A47B ","url":"https://sup.ai/models/baidu-ernie-4.5-300b-a47b"},"position":69},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/baidu-ernie-4.5-vl-28b-a3b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"30,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Baidu"},"category":"AI Model","description":"A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative heterogeneous MoE structure with modality-isolated routing. Built with scaling-efficient infrastructure for high-throughput training and inference, the model leverages advanced post-training techniques including SFT, DPO, and UPO for optimized performance, while supporting an impressive 131K context length and RLVR alignment for superior cross-modal reasoning and generation capabilities.","name":"ERNIE 4.5 VL 28B A3B","url":"https://sup.ai/models/baidu-ernie-4.5-vl-28b-a3b"},"position":70},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/baidu-ernie-4.5-vl-424b-a47b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"123,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Baidu"},"category":"AI Model","description":"ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu's ERNIE 4.5 series, featuring 424B total parameters with 47B active per token. It is trained jointly on text and image data using a heterogeneous MoE architecture and modality-isolated routing to enable high-fidelity cross-modal reasoning, image understanding, and long-context generation (up to 131k tokens). Fine-tuned with techniques like SFT, DPO, UPO, and RLVR, this model supports both \"thinking\" and non-thinking inference modes. Designed for vision-language tasks in English and Chinese, it is optimized for efficient scaling and can operate under 4-bit/8-bit quantization.","name":"ERNIE 4.5 VL 424B A47B ","url":"https://sup.ai/models/baidu-ernie-4.5-vl-424b-a47b"},"position":71},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemini-2.0-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"A fast and cost-effective model with native tool use, code execution, and web search grounding. Supports a 1M token context window with multimodal inputs including text, images, audio, and video. Experimental thinking support enables configurable reasoning. Ideal for high-volume tasks that need broad capability at minimal cost.","name":"Gemini 2.0 Flash","url":"https://sup.ai/models/google-gemini-2.0-flash"},"position":72},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemini-2.0-flash-lite","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"The most affordable Gemini model, optimized for cost efficiency and low latency. Supports a 1M token context window with multimodal inputs and function calling. No native code execution, web search, or thinking support. Best suited for high-volume simple tasks where cost is the primary concern.","name":"Gemini 2.0 Flash Lite","url":"https://sup.ai/models/google-gemini-2.0-flash-lite"},"position":73},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemini-2.5-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"The perfect all-rounder combining intelligence, speed, and value. This thinking model delivers excellent performance across diverse tasks with a massive 1M context window and full multimodal support. Whether you need code generation, document analysis, visual understanding, or complex reasoning, Flash handles it with grace. The sweet spot between capability and cost makes it our recommended choice for most professional work. With strong thinking capabilities, it provides both quality and transparency.","name":"Gemini 2.5 Flash","url":"https://sup.ai/models/google-gemini-2.5-flash"},"position":74},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemini-2.5-flash-image","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"The first hybrid reasoning image generator combining speed, intelligence, and creative control. Nano Banana creates images from text, edits them conversationally across multiple turns, and generates interleaved text-and-image responses. With configurable thinking budgets, you control the balance between quality, cost, and speed. Locale-aware generation ensures culturally appropriate visuals for global audiences. Perfect for rapid creative iteration, conversational image editing, and projects requiring both visual and textual content together. Fast, flexible, and surprisingly capable.","name":"Gemini 2.5 Flash Image (Nano Banana)","url":"https://sup.ai/models/google-gemini-2.5-flash-image"},"position":75},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemini-2.5-flash-lite","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Blazingly fast and incredibly affordable, without sacrificing capability. This lightweight model offers an extraordinary 1M token context window with multimodal support at breakthrough pricing. With configurable thinking and tool connectivity, it handles diverse tasks from quick queries to complex document analysis. The massive context window means you can process entire books, codebases, or datasets in a single request. Perfect for high-volume applications and cost-conscious projects that still need quality results.","name":"Gemini 2.5 Flash Lite","url":"https://sup.ai/models/google-gemini-2.5-flash-lite"},"position":76},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemini-2.5-pro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"The previous-generation advanced Gemini reasoning model capable of solving complex problems with 1M context and comprehensive multimodal support including audio and video.","name":"Gemini 2.5 Pro","url":"https://sup.ai/models/google-gemini-2.5-pro"},"position":77},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemini-3-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Google's most intelligent model balanced for speed and cost, combining frontier intelligence with superior search and grounding. Gemini 3 Flash delivers exceptional reasoning capabilities across a massive 1M context window while maintaining fast response times. With full multimodal support including vision and tool use, it excels at complex analytical tasks, research, and code generation. The perfect choice when you need top-tier intelligence and speed or cost is a consideration.","name":"Gemini 3 Flash","url":"https://sup.ai/models/google-gemini-3-flash"},"position":78},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemini-3-pro-preview","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"The most advanced Gemini model, pushing the boundaries of multimodal reasoning and complex problem-solving. This preview model excels at sophisticated analytical tasks with support for text, images, audio, video, and documents. With a 1M context window and enhanced reasoning capabilities, it tackles problems that require deep understanding across multiple modalities. Choose this for cutting-edge multimodal work, advanced research, or when you need the absolute best in visual and analytical reasoning. The future of multimodal AI.","name":"Gemini 3 Pro","url":"https://sup.ai/models/google-gemini-3-pro-preview"},"position":79},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemini-3-pro-image","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"65,536"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Professional-grade image generation delivering studio-quality, production-ready visuals with unparalleled precision and control. Building on Nano Banana's foundation, the Pro version adds enhanced reasoning, deep world knowledge, sophisticated text rendering and translation within images, and studio-level fine controls. Create high-fidelity visuals with accurate text, cultural nuance, and functional design precision. Perfect for professional projects, marketing materials, product designs, and any work requiring publication-ready quality. The ultimate image generation model for serious creative work.","name":"Gemini 3 Pro Image (Nano Banana Pro)","url":"https://sup.ai/models/google-gemini-3-pro-image"},"position":80},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemini-3.1-flash-image","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Gemini 3.1 Flash Image, a.k.a. \"Nano Banana 2,\" is Google's latest state-of-the-art image generation and editing model, delivering Pro-level visual quality at Flash speed. It combines advanced contextual understanding with fast, cost-efficient inference, making complex image generation and iterative edits significantly more accessible.","name":"Gemini 3.1 Flash Image (Nano Banana 2)","url":"https://sup.ai/models/google-gemini-3.1-flash-image"},"position":81},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemini-3.1-flash-lite","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Gemini 3.1 Flash Lite is Google's high-efficiency model optimized for high-volume use cases. It outperforms Gemini 2.5 Flash Lite on overall quality and approaches Gemini 2.5 Flash performance across key capabilities. Improvements span audio input/ASR, RAG snippet ranking, translation, data extraction, and code completion. Supports full thinking levels (minimal, low, medium, high) for fine-grained cost/performance trade-offs. Priced at half the cost of Gemini 3 Flash.","name":"Gemini 3.1 Flash Lite","url":"https://sup.ai/models/google-gemini-3.1-flash-lite"},"position":82},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemini-3.1-pro-preview","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"The most advanced Gemini model with significantly improved reasoning, SWE and agentic capabilities. Building on Gemini 3 Pro, this model delivers better token efficiency, expanded thinking levels, and stronger performance on complex problem-solving benchmarks. With a 1M context window and full multimodal support, it excels at ambitious agentic workflows, coding, multi-step function calling, planning, and deep knowledge tasks. Choose this for the most demanding analytical, research, and engineering challenges.","name":"Gemini 3.1 Pro","url":"https://sup.ai/models/google-gemini-3.1-pro-preview"},"position":83},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemma-2-27b-it","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,192"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"2,048"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Gemma 2 27B by Google is an open model built from the same research and technology used to create the Gemini models. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Usage of Gemma is subject to Google's Gemma Terms of Use.","name":"Gemma 2 27B","url":"https://sup.ai/models/google-gemma-2-27b-it"},"position":84},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemma-2-9b-it","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,192"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class. Designed for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness. Usage of Gemma is subject to Google's Gemma Terms of Use.","name":"Gemma 2 9B","url":"https://sup.ai/models/google-gemma-2-9b-it"},"position":85},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemma-3-12b-it","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 12B is the second largest in the family of Gemma 3 models after Gemma 3 27B.","name":"Gemma 3 12B","url":"https://sup.ai/models/google-gemma-3-12b-it"},"position":86},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemma-3-27b-it","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to Gemma 2.","name":"Gemma 3 27B","url":"https://sup.ai/models/google-gemma-3-27b-it"},"position":87},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemma-3-4b-it","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.","name":"Gemma 3 4B","url":"https://sup.ai/models/google-gemma-3-4b-it"},"position":88},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemma-3n-e2b-it","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,192"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"2,048"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Gemma 3n E2B IT is a multimodal, instruction-tuned model developed by Google DeepMind, designed to operate efficiently at an effective parameter size of 2B while leveraging a 6B architecture. Based on the MatFormer architecture, it supports nested submodels and modular composition via the Mix-and-Match framework. Gemma 3n models are optimized for low-resource deployment, offering 32K context length and strong multilingual and reasoning performance across common benchmarks. This variant is trained on a diverse corpus including code, math, web, and multimodal data.","name":"Gemma 3n 2B","url":"https://sup.ai/models/google-gemma-3n-e2b-it"},"position":89},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemma-3n-e4b-it","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs-including text, visual data, and audio-enabling diverse tasks such as text generation, speech recognition, translation, and image analysis. Leveraging innovations like Per-Layer Embedding (PLE) caching and the MatFormer architecture, Gemma 3n dynamically manages memory usage and computational load by selectively activating model parameters, significantly reducing runtime resource requirements. This model supports a wide linguistic range (trained in over 140 languages) and features a flexible 32K token context window. Gemma 3n can selectively load parameters, optimizing memory and computational efficiency based on the task or device capabilities, making it well-suited for privacy-focused, offline-capable applications and on-device AI solutions.","name":"Gemma 3n 4B","url":"https://sup.ai/models/google-gemma-3n-e4b-it"},"position":90},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemma-4-26b-a4b-it","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"262,144"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Gemma 4 26B A4B IT is an instruction-tuned Mixture-of-Experts (MoE) model from Google DeepMind. Despite 25.2B total parameters, only 3.8B activate per token during inference, delivering near-31B quality at a fraction of the compute cost. Supports multimodal input including text, images, and video (up to 60s at 1fps). Features a 256K token context window, native function calling, configurable thinking/reasoning mode, and structured output support. Released under Apache 2.0.","name":"Gemma 4 26B A4B","url":"https://sup.ai/models/google-gemma-4-26b-a4b-it"},"position":91},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-gemma-4-31b-it","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Gemma 4 31B Instruct is Google DeepMind's 30.7B dense multimodal model supporting text and image input with text output. Features a 256K token context window, configurable thinking/reasoning mode, native function calling, and multilingual support across 140+ languages. Strong on coding, reasoning, and document understanding tasks. Apache 2.0 license.","name":"Gemma 4 31B","url":"https://sup.ai/models/google-gemma-4-31b-it"},"position":92},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-4-32b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"A highly cost-effective 32B foundation model with enhanced capabilities in tool use, online search, and code-related intelligent tasks. Pre-trained on 15T of high-quality data including abundant synthetic reasoning data, it performs comparably to much larger models on many benchmarks. At just $0.1 per million tokens for both input and output, it delivers exceptional value for production workloads requiring tool invocation, information extraction, and code generation.","name":"GLM 4 32B","url":"https://sup.ai/models/zai-glm-4-32b"},"position":93},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-4.5","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"96,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"The most powerful GLM reasoning model with 355B total parameters and 32B active per forward pass using Mixture-of-Experts architecture. GLM-4.5 ranks second globally among all models on aggregated benchmarks, first among domestic and open-source models. Purpose-built for agent-oriented applications, it excels at tool invocation, web browsing, software engineering, and front-end development. Supports hybrid reasoning modes for both complex thinking and instant responses.","name":"GLM 4.5","url":"https://sup.ai/models/zai-glm-4.5"},"position":94},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-4.5-air","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"96,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"A streamlined, efficient agent-focused model using Mixture-of-Experts architecture. With 106B total parameters but only 12B active per task, this model delivers impressive intelligence while remaining fast and cost-effective. Purpose-built for agentic applications, it excels at tool use and autonomous workflows. The thinking capabilities provide transparency in decision-making. With 128K context and 96K output, it handles substantial tasks comfortably. Perfect for production agent systems where you need reliability and efficiency without breaking the budget.","name":"GLM 4.5 Air","url":"https://sup.ai/models/zai-glm-4.5-air"},"position":95},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-4.5-airx","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"96,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"The high-speed variant of GLM-4.5-Air, delivering ultra-fast response times while maintaining strong performance. With 106B total parameters and 12B active per forward pass, it combines the efficiency of the Air architecture with optimized inference speed exceeding 100 tokens per second. Ideal for low-latency production deployments where speed matters alongside intelligent agent capabilities.","name":"GLM 4.5 AirX","url":"https://sup.ai/models/zai-glm-4.5-airx"},"position":96},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-4.5-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"96,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"A completely free GLM model with strong reasoning, coding, and agent capabilities. Despite being free, it delivers impressive performance suitable for a wide range of tasks including development workflows, agent applications, and general reasoning. With 200K context and thinking support, it provides substantial capability at zero cost - perfect for experimentation, prototyping, and budget-sensitive production use.","name":"GLM 4.5 Flash","url":"https://sup.ai/models/zai-glm-4.5-flash"},"position":97},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-4.5-x","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"96,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"The premium high-speed variant of GLM-4.5, delivering the full reasoning power of the flagship 355B MoE model with ultra-fast inference. Optimized for scenarios requiring both strong reasoning capabilities and rapid response times, it provides the best of both worlds for demanding production workloads. Ideal for interactive agent applications and real-time coding assistance where latency is critical.","name":"GLM 4.5 X","url":"https://sup.ai/models/zai-glm-4.5-x"},"position":98},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-4.5v","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"64,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"A visual reasoning model based on the MoE architecture with 106B total parameters and 12B active. Achieves state-of-the-art performance among open-source VLMs of its scale across image, video, document understanding, and GUI tasks. Features a flexible thinking mode toggle for balancing speed and reasoning depth. Excels at webpage code generation from screenshots, object detection, document parsing, and long video analysis.","name":"GLM 4.5V","url":"https://sup.ai/models/zai-glm-4.5v"},"position":99},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-4.6","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"96,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"The latest and most capable GLM model with comprehensive improvements across all domains. This versatile model excels at real-world coding, handles long contexts up to 200K tokens, and delivers strong performance in reasoning, research, writing, and agentic workflows. With thinking capabilities and an impressive 96K output window, it tackles diverse professional tasks with confidence. The well-rounded upgrade brings enhanced capabilities across the board while maintaining excellent value. Choose this for sophisticated work requiring versatility and depth.","name":"GLM 4.6","url":"https://sup.ai/models/zai-glm-4.6"},"position":100},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-4.6v","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"24,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"A capable multimodal model achieving state-of-the-art visual understanding among models of similar scale. GLM 4.6V combines strong image analysis with the reasoning and tool use capabilities of the GLM family. With 128K context support and vision capabilities, it handles image understanding, document analysis, and visual reasoning tasks effectively. An excellent choice for multimodal workflows where you need reliable visual comprehension without premium pricing.","name":"GLM 4.6V","url":"https://sup.ai/models/zai-glm-4.6v"},"position":101},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-4.6v-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"24,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"A completely free multimodal model with native function calling support from the GLM-4.6V series. Handles image, video, and document understanding at zero cost while supporting tool invocation for building multimodal agents. With 128K context, it provides substantial capability for visual understanding workflows without any API costs.","name":"GLM 4.6V Flash","url":"https://sup.ai/models/zai-glm-4.6v-flash"},"position":102},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-4.6v-flashx","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"24,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"A lightweight, high-speed multimodal model from the GLM-4.6V series with native function calling and thinking mode support. Delivers fast visual understanding at a fraction of the cost of the flagship GLM-4.6V while maintaining strong capabilities across image, video, and document tasks. Ideal for production multimodal agents requiring low latency and affordable pricing.","name":"GLM 4.6V FlashX","url":"https://sup.ai/models/zai-glm-4.6v-flashx"},"position":103},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-4.7","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"204,800"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"The latest and most capable GLM model with comprehensive improvements across all domains. This versatile model excels at real-world coding, handles long contexts up to 205K tokens, and delivers strong performance in reasoning, research, writing, and agentic workflows. With thinking capabilities and an impressive 131K output window, it tackles diverse professional tasks with confidence. The well-rounded upgrade brings enhanced capabilities across the board while maintaining excellent value. Choose this for sophisticated work requiring versatility and depth.","name":"GLM 4.7","url":"https://sup.ai/models/zai-glm-4.7"},"position":104},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-4.7-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"A completely free model from the GLM-4.7 series that achieves open-source SOTA scores among comparable-sized models on SWE-bench Verified and agent benchmarks. Excels at both frontend and backend development, plus general tasks like writing, translation, and role-playing. With 200K context, thinking support, and zero cost, it provides exceptional value for development workflows and agent applications.","name":"GLM 4.7 Flash","url":"https://sup.ai/models/zai-glm-4.7-flash"},"position":105},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-4.7-flashx","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"A lightweight, high-speed variant of GLM-4.7 delivering enhanced general capabilities and optimized agentic coding at a fraction of the cost. With 200K context, thinking support, and rapid inference, it balances strong programming ability with affordability. Ideal for high-throughput development workflows and agent systems where speed and cost-efficiency matter.","name":"GLM 4.7 FlashX","url":"https://sup.ai/models/zai-glm-4.7-flashx"},"position":106},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-5","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"202,800"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"Zai's new-generation flagship foundation model designed for Agentic Engineering. GLM-5 delivers state-of-the-art open-source performance in coding and agent capabilities, with usability in real programming scenarios approaching Claude Opus 4.5. Built for complex system engineering and long-range agent tasks, it provides reliable productivity across demanding workflows. With 203K context, 131K output, thinking capabilities, and implicit caching, it excels at sophisticated agentic applications requiring depth and persistence.","name":"GLM 5","url":"https://sup.ai/models/zai-glm-5"},"position":107},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-5-turbo","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"GLM 5 Turbo is a foundation model deeply optimized for agentic scenarios. It has been specifically optimized since the training phase for core agent requirements such as tool invocation, instruction following, scheduled and persistent tasks, and long-chain execution. It delivers substantial improvements over GLM-5 in agent workflows, with enhanced stability and reliability in multi-step tasks, stronger comprehension of complex multi-layered instructions, and better time-aware execution for long-running tasks.","name":"GLM 5 Turbo","url":"https://sup.ai/models/zai-glm-5-turbo"},"position":108},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-5.1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"202,752"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"Zai's flagship foundation model designed for long-horizon autonomous tasks. GLM-5.1 achieves 58.4 on SWE-Bench Pro, surpassing GPT-5.4, with overall performance aligned with Claude Opus 4.6. Built for sustained autonomous operation on single tasks for up to 8 hours, completing the full loop from planning and execution to iterative optimization. With 203K context, 128K output, thinking capabilities, and tool support, it excels at complex system engineering requiring depth, persistence, and production-grade deliverables.","name":"GLM 5.1","url":"https://sup.ai/models/zai-glm-5.1"},"position":109},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/zai-glm-5v-turbo","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Zai"},"category":"AI Model","description":"GLM-5V-Turbo is Zai's multimodal coding foundation model with powerful vision understanding capabilities, supporting images, video, and files. Optimized for agentic scenarios including frontend code generation from design mockups, autonomous GUI exploration, and visual debugging. Features a 200K context window and 128K max output, with support for thinking mode and tool invocation. Excels at document comprehension across PDFs, video object tracking, and webpage recreation from screenshots.","name":"GLM 5V Turbo","url":"https://sup.ai/models/zai-glm-5v-turbo"},"position":110},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alpindale-goliath-120b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"6,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"1,024"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alpindale"},"category":"AI Model","description":"A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.","name":"Goliath 120B","url":"https://sup.ai/models/alpindale-goliath-120b"},"position":111},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-audio","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"The GPT Audio model is OpenAI's first generally available audio model. It features an upgraded decoder for more natural sounding voices and maintains better voice consistency.","name":"GPT Audio","url":"https://sup.ai/models/openai-gpt-audio"},"position":112},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-audio-mini","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"A cost-efficient version of GPT Audio. It features an upgraded decoder for more natural sounding voices and maintains better voice consistency.","name":"GPT Audio Mini","url":"https://sup.ai/models/openai-gpt-audio-mini"},"position":113},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-oss-120b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT OSS 120B is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.","name":"GPT OSS 120B","url":"https://sup.ai/models/openai-gpt-oss-120b"},"position":114},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-oss-20b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT OSS 20B is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware.","name":"GPT OSS 20B","url":"https://sup.ai/models/openai-gpt-oss-20b"},"position":115},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-oss-safeguard-20b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT OSS Safeguard 20B is a safety reasoning model from OpenAI built upon GPT OSS 20B. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, LLM filtering, and trust and safety labeling.","name":"GPT OSS Safeguard 20B","url":"https://sup.ai/models/openai-gpt-oss-safeguard-20b"},"position":116},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-3.5-turbo","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"16,385"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.","name":"GPT-3.5 Turbo","url":"https://sup.ai/models/openai-gpt-3.5-turbo"},"position":117},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-3.5-turbo-16k","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"16,385"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"This model offers four times the context length of GPT-3.5 Turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost.","name":"GPT-3.5 Turbo 16K","url":"https://sup.ai/models/openai-gpt-3.5-turbo-16k"},"position":118},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-3.5-turbo-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"4,095"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations.","name":"GPT-3.5 Turbo Instruct","url":"https://sup.ai/models/openai-gpt-3.5-turbo-instruct"},"position":119},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-4","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,191"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"OpenAI's GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning capabilities.","name":"GPT-4","url":"https://sup.ai/models/openai-gpt-4"},"position":120},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-4-turbo","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling. Training data: up to December 2023.","name":"GPT-4 Turbo","url":"https://sup.ai/models/openai-gpt-4-turbo"},"position":121},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-4.1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding, instruction compliance, and multimodal understanding benchmarks.","name":"GPT-4.1","url":"https://sup.ai/models/openai-gpt-4.1"},"position":122},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-4.1-mini","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and shows strong coding ability and vision understanding.","name":"GPT-4.1 Mini","url":"https://sup.ai/models/openai-gpt-4.1-mini"},"position":123},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-4.1-nano","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"For tasks that demand low latency, GPT-4.1 Nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window. It's ideal for tasks like classification or autocompletion.","name":"GPT-4.1 Nano","url":"https://sup.ai/models/openai-gpt-4.1-nano"},"position":124},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-4o","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-4o is OpenAI's multimodal AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of GPT-4 Turbo while being twice as fast and 50% more cost-effective.","name":"GPT-4o","url":"https://sup.ai/models/openai-gpt-4o"},"position":125},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-4o-audio","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"The GPT-4o Audio model adds support for audio inputs as prompts. This enhancement allows the model to detect nuances within audio recordings and add depth to generated user experiences.","name":"GPT-4o Audio","url":"https://sup.ai/models/openai-gpt-4o-audio"},"position":126},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-4o-mini","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-4o Mini is OpenAI's advanced small model, many multiples more affordable than other recent frontier models. It maintains SOTA intelligence while being significantly more cost-effective.","name":"GPT-4o Mini","url":"https://sup.ai/models/openai-gpt-4o-mini"},"position":127},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-4o-mini-search","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-4o Mini Search is a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.","name":"GPT-4o Mini Search","url":"https://sup.ai/models/openai-gpt-4o-mini-search"},"position":128},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-4o-search","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-4o Search is a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.","name":"GPT-4o Search","url":"https://sup.ai/models/openai-gpt-4o-search"},"position":129},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"OpenAI's original GPT-5 flagship excelling at complex reasoning, broad knowledge, advanced coding, and multi-step agentic tasks.","name":"GPT-5","url":"https://sup.ai/models/openai-gpt-5"},"position":130},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5-chat","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5 Chat is designed for advanced, natural, multimodal, and context-aware conversations for enterprise applications.","name":"GPT-5 Chat","url":"https://sup.ai/models/openai-gpt-5-chat"},"position":131},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5-codex","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5 Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review.","name":"GPT-5 Codex","url":"https://sup.ai/models/openai-gpt-5-codex"},"position":132},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5-mini","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"The sweet spot for everyday AI work? intelligent, fast, and affordable. This model excels at reasoning, conversation, and general tasks with an optimal balance of capability and cost. With 400K context, multimodal support, and thinking capabilities, it handles most professional work confidently. The GPT-5 architecture delivers reliable quality across coding, writing, analysis, and problem-solving. Our most popular choice for teams who need consistent, high-quality performance without premium pricing. An excellent general-purpose workhorse.","name":"GPT-5 Mini","url":"https://sup.ai/models/openai-gpt-5-mini"},"position":133},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5-nano","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"Lightning-fast and incredibly cost-effective for high-throughput workloads. This model specializes in straightforward instructions and classification tasks where speed is essential. With a massive 400K context window and multimodal support, it processes large volumes of simple tasks efficiently. The thinking capability is tuned for quick, decisive responses rather than deep contemplation. Perfect for production systems handling thousands of simple requests, real-time classification, or any scenario requiring fast, economical processing with GPT-5 architecture.","name":"GPT-5 Nano","url":"https://sup.ai/models/openai-gpt-5-nano"},"position":134},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5-pro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"272,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"The ultimate thinking machine for problems that demand maximum intelligence and computational effort. GPT-5 Pro allocates massive compute resources to think deeply and thoroughly about the hardest challenges. With an extraordinary 272K output window and extensive thinking capabilities, this model tackles problems other AIs simply cannot solve. Requests may take minutes to complete as it works through complex reasoning chains. For cutting-edge research, groundbreaking problem-solving, and situations where correctness is paramount and time is secondary.","name":"GPT-5 Pro","url":"https://sup.ai/models/openai-gpt-5-pro"},"position":135},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"OpenAI's flagship model with adaptive thinking that allocates computational effort based on question complexity. This model excels at sophisticated reasoning, deep real-world knowledge, advanced coding challenges, and complex multi-step workflows. It intelligently spends more time on hard problems while responding quickly to simpler ones. Perfect for professional work requiring OpenAI's best capabilities across reasoning, knowledge, and technical execution.","name":"GPT-5.1","url":"https://sup.ai/models/openai-gpt-5.1"},"position":136},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.1-chat","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5.1 Chat is the fast, lightweight member of the 5.1 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively think on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning.","name":"GPT-5.1 Chat","url":"https://sup.ai/models/openai-gpt-5.1-chat"},"position":137},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.1-codex","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5.1 Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs.","name":"GPT-5.1 Codex","url":"https://sup.ai/models/openai-gpt-5.1-codex"},"position":138},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.1-codex-max","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5.1 Codex Max is OpenAI's agentic coding model, designed for long-running, high-context software development tasks. It is based on an updated version of the 5.1 reasoning stack and trained on agentic workflows spanning software engineering, mathematics, and research. GPT-5.1 Codex Max delivers faster performance, improved reasoning, and higher token efficiency across the development lifecycle.","name":"GPT-5.1 Codex Max","url":"https://sup.ai/models/openai-gpt-5.1-codex-max"},"position":139},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.1-codex-mini","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"100,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5.1 Codex Mini is a smaller and faster version of GPT-5.1 Codex.","name":"GPT-5.1 Codex Mini","url":"https://sup.ai/models/openai-gpt-5.1-codex-mini"},"position":140},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.1-instant","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"A conversational variant of GPT-5 with warmer tone, improved instruction following, and adaptive reasoning. Designed for purely conversational applications rather than research.","name":"GPT-5.1 Instant","url":"https://sup.ai/models/openai-gpt-5.1-instant"},"position":141},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.1-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"OpenAI's premier thinking model with precisely tuned adaptive reasoning. This upgraded version excels at complex analytical tasks, sophisticated coding, and multi-step problem-solving with transparent thought processes.","name":"GPT-5.1 Thinking","url":"https://sup.ai/models/openai-gpt-5.1-thinking"},"position":142},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.2","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"OpenAI's best general-purpose model, part of the GPT-5 flagship model family. GPT-5.2 is their most intelligent model yet for both general and agentic tasks. With a 400K context window, multimodal capabilities including image generation, and advanced reasoning, this model excels at sophisticated coding, complex analysis, and multi-step workflows. The ideal choice for professional work requiring OpenAI's cutting-edge capabilities.","name":"GPT-5.2","url":"https://sup.ai/models/openai-gpt-5.2"},"position":143},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.2-chat","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5.2 Chat is the fast, lightweight member of the 5.2 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively think on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning.","name":"GPT-5.2 Chat","url":"https://sup.ai/models/openai-gpt-5.2-chat"},"position":144},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.2-codex","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5.2 Codex is an upgraded version of GPT-5.1 Codex optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1 Codex, 5.2 Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Codex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically, providing fast responses for small tasks while sustaining extended multi-hour runs for large projects.","name":"GPT-5.2 Codex","url":"https://sup.ai/models/openai-gpt-5.2-codex"},"position":145},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.2-pro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"The ultimate version of GPT-5.2 that produces smarter and more precise responses. This model allocates massive compute resources to think deeply and thoroughly about the hardest challenges. With a 400K context window, multimodal capabilities including image generation, and maximum reasoning power, it tackles problems that require the highest quality thinking available. For cutting-edge research, groundbreaking problem-solving, and situations where precision and correctness are paramount.","name":"GPT-5.2 Pro","url":"https://sup.ai/models/openai-gpt-5.2-pro"},"position":146},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.3-chat","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5.3 Chat is an update to ChatGPT's most-used model that makes everyday conversations smoother, more useful, and more directly helpful. It delivers more accurate answers with better contextualization and significantly reduces unnecessary refusals, caveats, and overly cautious phrasing that can interrupt conversational flow.","name":"GPT-5.3 Chat","url":"https://sup.ai/models/openai-gpt-5.3-chat"},"position":147},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.3-codex","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5.3 Codex is OpenAI's most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2 Codex with the broader reasoning and professional knowledge capabilities of GPT-5.2. It achieves state-of-the-art results on SWE-Bench Pro and strong performance on Terminal-Bench 2.0 and OSWorld-Verified, reflecting improved multi-language coding, terminal proficiency, and real-world computer-use skills. The model is optimized for long-running, tool-using workflows and supports interactive steering during execution, making it suitable for complex development tasks, debugging, deployment, and iterative product work. Beyond coding, GPT-5.3 Codex performs strongly on structured knowledge-work benchmarks such as GDPval, supporting tasks like document drafting, spreadsheet analysis, slide creation, and operational research across domains.","name":"GPT-5.3 Codex","url":"https://sup.ai/models/openai-gpt-5.3-codex"},"position":148},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.4","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5.4 is OpenAI's latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling high-context reasoning, coding, and multimodal analysis within the same workflow. The model delivers improved performance in coding, document understanding, tool use, and instruction following. It is designed as a strong default for both general-purpose tasks and software engineering, capable of generating production-quality code, synthesizing information across multiple sources, and executing complex multi-step workflows with fewer iterations and greater token efficiency.","name":"GPT-5.4","url":"https://sup.ai/models/openai-gpt-5.4"},"position":149},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.4-mini","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5.4 Mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads. With a 400K context window and 128K max output, it supports text and image inputs with reasoning token support. It delivers strong performance in coding, computer use, and sub-agent scenarios while maintaining significantly lower cost and latency than the full GPT-5.4.","name":"GPT-5.4 Mini","url":"https://sup.ai/models/openai-gpt-5.4-mini"},"position":150},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.4-nano","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"400,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5.4 Nano is designed for tasks where speed and cost matter most, such as classification, data extraction, ranking, and sub-agents. With a 400K context window and 128K max output, it supports text and image inputs with reasoning token support. It delivers GPT-5.4-class capabilities at the lowest price point in the family, making it ideal for high-throughput production workloads.","name":"GPT-5.4 Nano","url":"https://sup.ai/models/openai-gpt-5.4-nano"},"position":151},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.4-pro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5.4 Pro is OpenAI's most advanced model, building on GPT-5.4's unified architecture with enhanced reasoning capabilities for complex, high-stakes tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs. Optimized for step-by-step reasoning, instruction following, and accuracy, GPT-5.4 Pro excels at agentic coding, long-context workflows, and multi-step problem solving.","name":"GPT-5.4 Pro","url":"https://sup.ai/models/openai-gpt-5.4-pro"},"position":152},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.5","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5.5 is OpenAI's newest frontier model for the most complex professional work. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, delivering top-tier performance across coding, professional knowledge, and multi-step agentic tasks. Reasoning effort supports none, low, medium (default), high, and extra-high.","name":"GPT-5.5","url":"https://sup.ai/models/openai-gpt-5.5"},"position":153},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-gpt-5.5-pro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"GPT-5.5 Pro uses more compute to think harder and provide consistently better answers than GPT-5.5. Built for the toughest problems, it supports multi-turn Responses API requests including through the Batch API. Requests may take several minutes to finish, so background mode is recommended.","name":"GPT-5.5 Pro","url":"https://sup.ai/models/openai-gpt-5.5-pro"},"position":154},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/ibm-granite-4.0-h-micro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"IBM"},"category":"AI Model","description":"Granite-4.0-H-Micro is a 3B parameter from the Granite 4 family of models. These models are the latest in a series of models released by IBM. They are fine-tuned for long context tool calling. ","name":"Granite 4.0 Micro","url":"https://sup.ai/models/ibm-granite-4.0-h-micro"},"position":155},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xai-grok-2-vision","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"xAI"},"category":"AI Model","description":"xAI's legacy vision model with 32K context supporting text and image inputs with function calling and structured outputs. Superseded by Grok 4.","name":"Grok 2 Vision","url":"https://sup.ai/models/xai-grok-2-vision"},"position":156},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xai-grok-3","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"xAI"},"category":"AI Model","description":"xAI's previous-generation flagship text model with 131K context, function calling, and structured output support. Superseded by the Grok 4 series.","name":"Grok 3","url":"https://sup.ai/models/xai-grok-3"},"position":157},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xai-grok-3-mini","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"xAI"},"category":"AI Model","description":"xAI's compact thinking model with 131K context and reasoning capabilities at an affordable price point. Supports function calling and structured outputs. Superseded by Grok 4.1 Fast Reasoning.","name":"Grok 3 Mini","url":"https://sup.ai/models/xai-grok-3-mini"},"position":158},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xai-grok-4","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"256,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"xAI"},"category":"AI Model","description":"xAI's premier flagship model combining exceptional natural language understanding, mathematical prowess, and sophisticated reasoning. This well-rounded model excels across diverse domains?from creative writing to complex calculations to logical problem-solving. With 256K context for both input and output, thinking capabilities, and vision support, it handles virtually any task with intelligence and nuance. The true jack-of-all-trades that masters most of them. Perfect when you need a single model that performs excellently across the board.","name":"Grok 4","url":"https://sup.ai/models/xai-grok-4"},"position":159},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xai-grok-4-fast-non-reasoning","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"2M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"256,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"xAI"},"category":"AI Model","description":"xAI's previous-generation fast multimodal model with 2M context and cost-efficient performance. Designed for rapid agentic workflows without extended reasoning.","name":"Grok 4 Fast","url":"https://sup.ai/models/xai-grok-4-fast-non-reasoning"},"position":160},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xai-grok-4-fast-reasoning","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"2M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"256,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"xAI"},"category":"AI Model","description":"xAI's previous-generation thinking model with 2M context and cost-efficient agentic performance. Combined rapid execution with reasoning capabilities.","name":"Grok 4 Fast Reasoning","url":"https://sup.ai/models/xai-grok-4-fast-reasoning"},"position":161},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xai-grok-4.1-fast-non-reasoning","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"2M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"30,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"xAI"},"category":"AI Model","description":"xAI's speed demon for agentic workflows requiring rapid, accurate tool execution. With an extraordinary 2M context window, this model processes massive amounts of information while maintaining blazing-fast response times. Optimized specifically for tool calling and task completion, it excels at real-world applications like customer support, financial analysis, and automated workflows where speed is critical. The non-reasoning variant prioritizes quick responses over extended thought processes. Choose this when you need rapid, reliable agent performance at incredible value.","name":"Grok 4.1 Fast","url":"https://sup.ai/models/xai-grok-4.1-fast-non-reasoning"},"position":162},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xai-grok-4.1-fast-reasoning","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"2M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"30,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"xAI"},"category":"AI Model","description":"xAI's intelligent agent combining massive context, thinking capabilities, and tool mastery. With a 2M context window and reasoning mode, this model thoughtfully navigates complex agentic workflows while maintaining speed. The perfect balance between rapid execution and intelligent decision-making for sophisticated real-world applications. Excels at scenarios requiring both tool orchestration and reasoning?like nuanced customer support, complex financial analysis, and adaptive workflows. Choose this when your agents need to think and act intelligently.","name":"Grok 4.1 Fast Reasoning","url":"https://sup.ai/models/xai-grok-4.1-fast-reasoning"},"position":163},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xai-grok-4.20-non-reasoning","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"2M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"256,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"xAI"},"category":"AI Model","description":"Grok 4.20 is xAI's latest flagship model without extended reasoning, supporting text and image inputs with a 2M context window. It provides function calling and structured output capabilities for rapid, high-quality responses. Ideal for tasks where speed matters more than deep deliberation.","name":"Grok 4.20","url":"https://sup.ai/models/xai-grok-4.20-non-reasoning"},"position":164},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xai-grok-4.20-multi-agent","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"2M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"256,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"xAI"},"category":"AI Model","description":"Grok 4.20 Multi-Agent is optimized for orchestrating multiple agents that collaborate on research tasks. It coordinates web search and X search across agents to gather, synthesize, and iterate on research in real time, refining results based on intermediate findings. With a 2M context window and reasoning capabilities, it excels at complex research workflows requiring multi-source analysis.","name":"Grok 4.20 Multi-Agent","url":"https://sup.ai/models/xai-grok-4.20-multi-agent"},"position":165},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xai-grok-4.20-reasoning","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"2M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"256,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"xAI"},"category":"AI Model","description":"Grok 4.20 Reasoning is xAI's latest flagship model with extended reasoning capabilities, supporting text and image inputs with a 2M context window. It combines deep thinking with function calling and structured output for complex, multi-step problem solving. Ideal for tasks requiring careful deliberation and analysis.","name":"Grok 4.20 Reasoning","url":"https://sup.ai/models/xai-grok-4.20-reasoning"},"position":166},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xai-grok-code-fast-1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"256,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"xAI"},"category":"AI Model","description":"xAI's lightweight agentic coding model designed for rapid, budget-friendly reasoning with interleaved tool-calling and reasoning traces. Proficient in TypeScript, Python, Java, Rust, C++, and Go. Built for the modern development loop of planning, writing, testing, and debugging. Excels at zero-to-one projects, codebase Q&A, bug fixes, and agentic coding workflows at 4x speed and 1/10th the cost of competing models.","name":"Grok Code Fast 1","url":"https://sup.ai/models/xai-grok-code-fast-1"},"position":167},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/nousresearch-hermes-2-pro-llama-3-8b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,192"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Nous Research"},"category":"AI Model","description":"Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.","name":"Hermes 2 Pro Llama 3 8B","url":"https://sup.ai/models/nousresearch-hermes-2-pro-llama-3-8b"},"position":168},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/nousresearch-hermes-3-llama-3.1-405b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Nous Research"},"category":"AI Model","description":"Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board. Hermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user. The Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills. Hermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.","name":"Hermes 3 405B Instruct","url":"https://sup.ai/models/nousresearch-hermes-3-llama-3.1-405b"},"position":169},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/nousresearch-hermes-3-llama-3.1-70b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"65,536"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Nous Research"},"category":"AI Model","description":"Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board. Hermes 3 70B is a competitive, if not superior finetune of the Llama-3.1 70B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user. The Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.","name":"Hermes 3 70B Instruct","url":"https://sup.ai/models/nousresearch-hermes-3-llama-3.1-70b"},"position":170},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/nousresearch-hermes-4-405b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Nous Research"},"category":"AI Model","description":"Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research. It introduces a hybrid reasoning mode, where the model can choose to deliberate internally with ... traces or respond directly, offering flexibility between speed and depth. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. The model is instruction-tuned with an expanded post-training corpus (~60B tokens) emphasizing reasoning traces, improving performance in math, code, STEM, and logical reasoning, while retaining broad assistant utility. It also supports structured outputs, including JSON mode, schema adherence, function calling, and tool use. Hermes 4 is trained for steerability, lower refusal rates, and alignment toward neutral, user-directed behavior.","name":"Hermes 4 405B","url":"https://sup.ai/models/nousresearch-hermes-4-405b"},"position":171},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/nousresearch-hermes-4-70b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Nous Research"},"category":"AI Model","description":"Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either respond directly or generate explicit ... reasoning traces before answering. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. This 70B variant is trained with the expanded post-training corpus (~60B tokens) emphasizing verified reasoning data, leading to improvements in mathematics, coding, STEM, logic, and structured outputs while maintaining general assistant performance. It supports JSON mode, schema adherence, function calling, and tool use, and is designed for greater steerability with reduced refusal rates.","name":"Hermes 4 70B","url":"https://sup.ai/models/nousresearch-hermes-4-70b"},"position":172},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/tencent-hunyuan-a13b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Tencent"},"category":"AI Model","description":"Hunyuan-A13B is a 13B active parameter Mixture-of-Experts (MoE) language model developed by Tencent, with a total parameter count of 80B and support for reasoning via Chain-of-Thought. It offers competitive benchmark performance across mathematics, science, coding, and multi-turn reasoning tasks, while maintaining high inference efficiency via Grouped Query Attention (GQA) and quantization support (FP8, GPTQ, etc.).","name":"Hunyuan A13B Instruct","url":"https://sup.ai/models/tencent-hunyuan-a13b-instruct"},"position":173},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/tencent-hy3","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"262,144"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Tencent"},"category":"AI Model","description":"Hy3 preview is a high-efficiency Mixture-of-Experts model from Tencent designed for agentic workflows and production use. It supports configurable reasoning levels across disabled, low, and high modes, allowing it to balance latency and depth of reasoning for a wide range of tasks.","name":"Hy3","url":"https://sup.ai/models/tencent-hy3"},"position":174},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/inflection-inflection-3-pi","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"1,024"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Inflection"},"category":"AI Model","description":"Inflection 3 Pi powers Inflection's Pi chatbot, including backstory, emotional intelligence, productivity, and safety. It has access to recent news, and excels in scenarios like customer support and roleplay. Pi has been trained to mirror your tone and style, if you use more emojis, so will Pi! Try experimenting with various prompts and conversation styles.","name":"Inflection 3 Pi","url":"https://sup.ai/models/inflection-inflection-3-pi"},"position":175},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/inflection-inflection-3-productivity","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"1,024"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Inflection"},"category":"AI Model","description":"Inflection 3 Productivity is optimized for following instructions. It is better for tasks requiring JSON output or precise adherence to provided guidelines. It has access to recent news.","name":"Inflection 3 Productivity","url":"https://sup.ai/models/inflection-inflection-3-productivity"},"position":176},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/prime-intellect-intellect-3","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Prime Intellect"},"category":"AI Model","description":"INTELLECT-3 is a 106B-parameter Mixture-of-Experts model (12B active) post-trained from GLM-4.5-Air-Base using supervised fine-tuning (SFT) followed by large-scale reinforcement learning (RL). It offers state-of-the-art performance for its size across math, code, science, and general reasoning, consistently outperforming many larger frontier models. Designed for strong multi-step problem solving, it maintains high accuracy on structured tasks while remaining efficient at inference thanks to its MoE architecture.","name":"INTELLECT-3","url":"https://sup.ai/models/prime-intellect-intellect-3"},"position":177},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/opengvlab-internvl3-78b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenGVLab"},"category":"AI Model","description":"The InternVL3 series is an advanced multimodal large language model (MLLM). Compared to InternVL 2.5, InternVL3 demonstrates stronger multimodal perception and reasoning capabilities. In addition, InternVL3 is benchmarked against the Qwen2.5 Chat models, whose pre-trained base models serve as the initialization for its language component. Benefiting from Native Multimodal Pre-Training, the InternVL3 series surpasses the Qwen2.5 series in overall text performance.","name":"InternVL3 78B","url":"https://sup.ai/models/opengvlab-internvl3-78b"},"position":178},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/ai21-jamba-large-1.7","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"AI21 Labs"},"category":"AI Model","description":"Jamba Large 1.7 is the latest model in the Jamba open family, offering improvements in grounding, instruction-following, and overall efficiency. Built on a hybrid SSM-Transformer architecture with a 256K context window, it delivers more accurate, contextually grounded responses and better steerability than previous versions.","name":"Jamba Large 1.7","url":"https://sup.ai/models/ai21-jamba-large-1.7"},"position":179},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/kwaipilot-kat-coder-pro-v1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"KwaiPilot"},"category":"AI Model","description":"KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KAT-Coder series. Designed specifically for agentic coding tasks, it excels in real-world software engineering scenarios, achieving 73.4% solve rate on the SWE-Bench Verified benchmark. The model has been optimized for tool-use capability, multi-turn interaction, instruction following, generalization, and comprehensive capabilities through a multi-stage training process, including mid-training, supervised fine-tuning (SFT), reinforcement fine-tuning (RFT), and scalable agentic RL.","name":"KAT Coder Pro V1","url":"https://sup.ai/models/kwaipilot-kat-coder-pro-v1"},"position":180},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/kwaipilot-kat-coder-pro-v2","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"80,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"KwaiPilot"},"category":"AI Model","description":"KAT-Coder-Pro V2 is the latest high-performance model in KwaiKAT's KAT-Coder series, designed for complex enterprise-grade software engineering and SaaS integration. It builds on the agentic coding strengths of earlier versions, with a focus on large-scale production environments, multi-system coordination, and seamless integration across modern software stacks, while also supporting web aesthetics generation to produce production-grade landing pages and presentation decks.","name":"KAT Coder Pro V2","url":"https://sup.ai/models/kwaipilot-kat-coder-pro-v2"},"position":181},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/moonshotai-kimi-k2","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Moonshot AI"},"category":"AI Model","description":"Kimi K2 is a Mixture-of-Experts (MoE) foundation model with 1 trillion total parameters and 32 billion activated parameters. Outperforms leading open-source models across general knowledge reasoning, programming, mathematics, and agent tasks. Context length 256K with automatic context caching, ToolCalls, JSON Mode, Partial Mode, and internet search support.","name":"Kimi K2","url":"https://sup.ai/models/moonshotai-kimi-k2"},"position":182},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/moonshotai-kimi-k2-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"262,144"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Moonshot AI"},"category":"AI Model","description":"A thinking model built on the Kimi K2 foundation with general agentic and reasoning capabilities, specializing in deep reasoning tasks. With a 262K context window, it combines chain-of-thought reasoning with tool calling for complex problem-solving. Supports automatic context caching, ToolCalls, JSON Mode, Partial Mode, and internet search.","name":"Kimi K2 Thinking","url":"https://sup.ai/models/moonshotai-kimi-k2-thinking"},"position":183},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/moonshotai-kimi-k2-thinking-turbo","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"262,144"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Moonshot AI"},"category":"AI Model","description":"The ultimate autonomous thinking agent capable of executing hundreds of sequential tool calls with coherent reasoning throughout. This model can chain 200-300 tool operations without human intervention, maintaining logical consistency across complex multi-step problems. Built specifically as a thinking agent, it reasons step-by-step while acting, achieving state-of-the-art results on the hardest benchmarks. With a massive 262K equal input/output window, it handles truly extensive workflows. The Turbo variant delivers this capability at exceptional speed. For complex autonomous projects requiring persistent reasoning and action.","name":"Kimi K2 Thinking Turbo","url":"https://sup.ai/models/moonshotai-kimi-k2-thinking-turbo"},"position":184},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/moonshotai-kimi-k2-turbo","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Moonshot AI"},"category":"AI Model","description":"High-speed version of Kimi K2, always aligned with the latest kimi-k2. Same model parameters with output speed up to 60 tokens/sec (max 100 tokens/sec). Context length 262K with automatic context caching, ToolCalls, JSON Mode, Partial Mode, and internet search support.","name":"Kimi K2 Turbo","url":"https://sup.ai/models/moonshotai-kimi-k2-turbo"},"position":185},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/moonshotai-kimi-k2.5","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"252,144"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Moonshot AI"},"category":"AI Model","description":"Kimi's most versatile model featuring a native multimodal architecture that supports both visual and text input. Combines thinking and non-thinking modes with dialogue and agent capabilities. With a 262K context window and massive 252K output capacity, it handles complex multimodal workflows at an exceptional price point.","name":"Kimi K2.5","url":"https://sup.ai/models/moonshotai-kimi-k2.5"},"position":186},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/moonshotai-kimi-k2.6","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"252,144"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Moonshot AI"},"category":"AI Model","description":"Kimi's latest and most intelligent model. Next-generation native multimodal architecture with breakthroughs in long-horizon coding, instruction compliance, and self-correction. Handles text, image, and video input across thinking and non-thinking modes, with a 262K context window and 252K output capacity for complex multimodal and agent workflows.","name":"Kimi K2.6","url":"https://sup.ai/models/moonshotai-kimi-k2.6"},"position":187},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/liquid-lfm-2-24b-a2b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Liquid"},"category":"AI Model","description":"LFM2-24B-A2B is the largest model in the LFM2 family of hybrid architectures designed for efficient on-device deployment. Built as a 24B parameter Mixture-of-Experts model with only 2B active parameters per token, it delivers high-quality generation while maintaining low inference costs. The model fits within 32 GB of RAM, making it practical to run on consumer laptops and desktops without sacrificing capability.","name":"LFM 2 24B A2B","url":"https://sup.ai/models/liquid-lfm-2-24b-a2b"},"position":188},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/liquid-lfm2-8b-a1b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Liquid"},"category":"AI Model","description":"LFM2-8B-A1B is an efficient on-device Mixture-of-Experts (MoE) model from Liquid AI's LFM2 family, built for fast, high-quality inference on edge hardware. It uses 8.3B total parameters with only ~1.5B active per token, delivering strong performance while keeping compute and memory usage low-making it ideal for phones, tablets, and laptops.","name":"LFM 2 8B A1B","url":"https://sup.ai/models/liquid-lfm2-8b-a1b"},"position":189},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/liquid-lfm-2.2-6b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Liquid"},"category":"AI Model","description":"LFM2 is a new generation of hybrid models developed by Liquid AI, specifically designed for edge AI and on-device deployment. It sets a new standard in terms of quality, speed, and memory efficiency.","name":"LFM 2.2 6B","url":"https://sup.ai/models/liquid-lfm-2.2-6b"},"position":190},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/liquid-lfm-2.5-1.2b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Liquid"},"category":"AI Model","description":"LFM2.5-1.2B-Instruct is a compact, high-performance instruction-tuned model built for fast on-device AI. It delivers strong chat quality in a 1.2B parameter footprint, with efficient edge inference and broad runtime support.","name":"LFM 2.5 1.2B Instruct","url":"https://sup.ai/models/liquid-lfm-2.5-1.2b-instruct"},"position":191},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/liquid-lfm-2.5-1.2b-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Liquid"},"category":"AI Model","description":"LFM2.5-1.2B-Thinking is a lightweight reasoning-focused model optimized for agentic tasks, data extraction, and RAG-while still running comfortably on edge devices. It supports long context (up to 32K tokens) and is designed to provide higher-quality \"thinking\" responses in a small 1.2B model.","name":"LFM 2.5 1.2B Thinking","url":"https://sup.ai/models/liquid-lfm-2.5-1.2b-thinking"},"position":192},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/inclusionai-ling-2.6-1t","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"inclusionAI"},"category":"AI Model","description":"Ling-2.6-1T is an instant (instruct) model from inclusionAI and the company's trillion-parameter flagship, designed for real-world agents that require fast execution and high efficiency at scale. It uses a \"fast thinking\" strategy to deliver strong performance on reasoning, coding, and agent tasks while keeping latency and token usage low.","name":"Ling 2.6 1T","url":"https://sup.ai/models/inclusionai-ling-2.6-1t"},"position":193},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/inclusionai-ling-2.6-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"inclusionAI"},"category":"AI Model","description":"Ling-2.6-flash is an instant (instruct) model from inclusionAI with 104B total parameters and 7.4B active parameters, designed for real-world agents that require fast responses, strong execution, and high token efficiency.","name":"Ling 2.6 Flash","url":"https://sup.ai/models/inclusionai-ling-2.6-flash"},"position":194},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-3-70b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,192"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"Meta's Llama 3 70B instruct-tuned version was optimized for high quality dialogue usecases. It has demonstrated strong performance compared to leading closed-source models in human evaluations. Usage of this model is subject to Meta's Acceptable Use Policy.","name":"Llama 3 70B Instruct","url":"https://sup.ai/models/meta-llama-3-70b-instruct"},"position":195},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-3-8b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,192"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"Meta's Llama 3 8B instruct-tuned version was optimized for high quality dialogue usecases. It has demonstrated strong performance compared to leading closed-source models in human evaluations. Usage of this model is subject to Meta's Acceptable Use Policy.","name":"Llama 3 8B Instruct","url":"https://sup.ai/models/meta-llama-3-8b-instruct"},"position":196},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/sao10k-l3-lunaris-8b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,192"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Sao10k"},"category":"AI Model","description":"Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge. This model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning. For best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.","name":"Llama 3 8B Lunaris","url":"https://sup.ai/models/sao10k-l3-lunaris-8b"},"position":197},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/sao10k-l3-euryale-70b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,192"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Sao10k"},"category":"AI Model","description":"Euryale 70B v2.1 is a model focused on creative roleplay. Better prompt adherence. Better anatomy / spatial awareness. Adapts much better to unique and custom formatting / reply formats. Very creative, lots of unique swipes. Is not restrictive during roleplays.","name":"Llama 3 Euryale 70B v2.1","url":"https://sup.ai/models/sao10k-l3-euryale-70b"},"position":198},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-3.1-405b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"Meta's Llama 3.1 405B base pre-trained model. It has demonstrated strong performance compared to leading closed-source models in human evaluations. Usage of this model is subject to Meta's Acceptable Use Policy.","name":"Llama 3.1 405B","url":"https://sup.ai/models/meta-llama-3.1-405b"},"position":199},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-3.1-405b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"Meta's Llama 3.1 405B instruct-tuned version is optimized for high quality dialogue usecases with 128K context. It has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations. Usage of this model is subject to Meta's Acceptable Use Policy.","name":"Llama 3.1 405B Instruct","url":"https://sup.ai/models/meta-llama-3.1-405b-instruct"},"position":200},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/sao10k-l3.1-70b-hanami-x1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"16,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Sao10k"},"category":"AI Model","description":"An experiment over Euryale v2.2.","name":"Llama 3.1 70B Hanami x1","url":"https://sup.ai/models/sao10k-l3.1-70b-hanami-x1"},"position":201},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-3.1-70b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"Meta's Llama 3.1 70B instruct-tuned version is optimized for high quality dialogue usecases with 128K context. It has demonstrated strong performance compared to leading closed-source models in human evaluations. Usage of this model is subject to Meta's Acceptable Use Policy.","name":"Llama 3.1 70B Instruct","url":"https://sup.ai/models/meta-llama-3.1-70b-instruct"},"position":202},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-3.1-8b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"16,384"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"Meta's Llama 3.1 8B instruct-tuned version is fast and efficient. It has demonstrated strong performance compared to leading closed-source models in human evaluations. Usage of this model is subject to Meta's Acceptable Use Policy.","name":"Llama 3.1 8B Instruct","url":"https://sup.ai/models/meta-llama-3.1-8b-instruct"},"position":203},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/sao10k-l3.1-euryale-70b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Sao10k"},"category":"AI Model","description":"Euryale L3.1 70B v2.2 is a model focused on creative roleplay. It is the successor of Euryale L3 70B v2.1.","name":"Llama 3.1 Euryale 70B v2.2","url":"https://sup.ai/models/sao10k-l3.1-euryale-70b"},"position":204},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/nvidia-llama-3.1-nemotron-70b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"NVIDIA"},"category":"AI Model","description":"NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses. Leveraging Llama 3.1 70B architecture and Reinforcement Learning from Human Feedback (RLHF), it excels in automatic alignment benchmarks. This model is tailored for applications requiring high accuracy in helpfulness and response generation, suitable for diverse user queries across multiple domains. Usage of this model is subject to Meta's Acceptable Use Policy.","name":"Llama 3.1 Nemotron 70B Instruct","url":"https://sup.ai/models/nvidia-llama-3.1-nemotron-70b-instruct"},"position":205},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/nvidia-llama-3.1-nemotron-ultra-253b-v1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"NVIDIA"},"category":"AI Model","description":"Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta's Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node. Note: you must include `detailed thinking on` in the system prompt to enable reasoning.","name":"Llama 3.1 Nemotron Ultra 253B v1","url":"https://sup.ai/models/nvidia-llama-3.1-nemotron-ultra-253b-v1"},"position":206},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-3.2-11b-vision-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis. Its ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research. Usage of this model is subject to Meta's Acceptable Use Policy.","name":"Llama 3.2 11B Vision Instruct","url":"https://sup.ai/models/meta-llama-3.2-11b-vision-instruct"},"position":207},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-3.2-1b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"60,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"60,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis. Its smaller size allows it to operate efficiently in low-resource environments while maintaining strong task performance. Supporting eight core languages and fine-tunable for more, it is ideal for businesses or developers seeking lightweight yet powerful AI solutions that can operate in diverse multilingual settings without the high computational demand of larger models. Usage of this model is subject to Meta's Acceptable Use Policy.","name":"Llama 3.2 1B Instruct","url":"https://sup.ai/models/meta-llama-3.2-1b-instruct"},"position":208},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-3.2-3b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages. Trained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings. Usage of this model is subject to Meta's Acceptable Use Policy.","name":"Llama 3.2 3B Instruct","url":"https://sup.ai/models/meta-llama-3.2-3b-instruct"},"position":209},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-3.3-70b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"A balanced model combining performance with efficiency for conversational AI. Designed for content creation, enterprise applications, and research with strong language understanding. Handles summarization, classification, sentiment analysis, and code generation.","name":"Llama 3.3 70B","url":"https://sup.ai/models/meta-llama-3.3-70b"},"position":210},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/sao10k-l3.3-euryale-70b-v2.3","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Sao10k"},"category":"AI Model","description":"Euryale L3.3 70B is a model focused on creative roleplay. It is the successor of Euryale L3 70B v2.2.","name":"Llama 3.3 Euryale 70B","url":"https://sup.ai/models/sao10k-l3.3-euryale-70b-v2.3"},"position":211},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/nvidia-llama-3.3-nemotron-super-49b-v1.5","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"NVIDIA"},"category":"AI Model","description":"Llama-3.3-Nemotron-Super-49B-v1.5 is a 49B-parameter, English-centric reasoning/chat model derived from Meta's Llama-3.3-70B-Instruct with a 128K context. It's post-trained for agentic workflows (RAG, tool calling) via SFT across math, code, science, and multi-turn chat, followed by multiple RL stages; Reward-aware Preference Optimization (RPO) for alignment, RL with Verifiable Rewards (RLVR) for step-wise reasoning, and iterative DPO to refine tool-use behavior. A distillation-driven Neural Architecture Search (\"Puzzle\") replaces some attention blocks and varies FFN widths to shrink memory footprint and improve throughput, enabling single-GPU (H100/H200) deployment while preserving instruction following and CoT quality. In internal evaluations (NeMo-Skills, up to 16 runs, temp = 0.6, top_p = 0.95), the model reports strong reasoning/coding results, e.g., MATH500 pass@1 = 97.4, AIME-2024 = 87.5, AIME-2025 = 82.71, GPQA = 71.97, LiveCodeBench (24.10-25.02) = 73.58, and MMLU-Pro (CoT) = 79.53. The model targets practical inference efficiency (high tokens/s, reduced VRAM) with Transformers/vLLM support and explicit \"reasoning on/off\" modes (chat-first defaults, greedy recommended when disabled). Suitable for building agents, assistants, and long-context retrieval systems where balanced accuracy-to-cost and reliable tool use matter.","name":"Llama 3.3 Nemotron Super 49B V1.5","url":"https://sup.ai/models/nvidia-llama-3.3-nemotron-super-49b-v1.5"},"position":212},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-4-maverick","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"A multimodal model from the Llama 4 collection with MoE architecture for text and image tasks. Designed for multimodal experiences with vision capabilities.","name":"Llama 4 Maverick 17B","url":"https://sup.ai/models/meta-llama-4-maverick"},"position":213},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-4-scout","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"A compact multimodal model using mixture-of-experts architecture for text and image understanding. Designed for efficient multimodal experiences with vision support.","name":"Llama 4 Scout 17B","url":"https://sup.ai/models/meta-llama-4-scout"},"position":214},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-guard-3-8b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM that generates text indicating whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated. Llama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.","name":"Llama Guard 3 8B","url":"https://sup.ai/models/meta-llama-guard-3-8b"},"position":215},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-guard-4-12b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"163,840"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"163,840"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated. Llama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.","name":"Llama Guard 4 12B","url":"https://sup.ai/models/meta-llama-guard-4-12b"},"position":216},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meta-llama-guard-2-8b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,192"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meta"},"category":"AI Model","description":"This safeguard model has 8B parameters and is based on the Llama 3 family. It can do both prompt and response classification. LlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated. For best results, please use raw prompt input or the completions endpoint, instead of the chat API. Usage of this model is subject to Meta's Acceptable Use Policy.","name":"LlamaGuard 2 8B","url":"https://sup.ai/models/meta-llama-guard-2-8b"},"position":217},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/eleutherai-llemma-7b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"4,096"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"EleutherAI"},"category":"AI Model","description":"Llemma 7B is a language model for mathematics. It was initialized with Code Llama 7B weights, and trained on the Proof-Pile-2 for 200B tokens. Llemma models are particularly strong at chain-of-thought mathematical reasoning and using computational tools for mathematics, such as Python and formal theorem provers.","name":"Llemma 7b","url":"https://sup.ai/models/eleutherai-llemma-7b"},"position":218},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/meituan-longcat-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Meituan"},"category":"AI Model","description":"LongCat-Flash-Chat is a large-scale Mixture-of-Experts (MoE) model with 560B total parameters, of which 18.6B-31.3B (≈27B on average) are dynamically activated per input. It introduces a shortcut-connected MoE design to reduce communication overhead and achieve high throughput while maintaining training stability through advanced scaling strategies such as hyperparameter transfer, deterministic computation, and multi-stage optimization. This release, LongCat-Flash-Chat, is a non-thinking foundation model optimized for conversational and agentic tasks. It supports long context windows up to 128K tokens and shows competitive performance across reasoning, coding, instruction following, and domain benchmarks, with particular strengths in tool use and complex multi-step interactions.","name":"LongCat Flash","url":"https://sup.ai/models/meituan-longcat-flash"},"position":219},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/neversleep-llama-3.1-lumimaid-8b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"NeverSleep"},"category":"AI Model","description":"Lumimaid v0.2 8B is a finetune of Llama 3.1 8B with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged. Usage of this model is subject to Meta's Acceptable Use Policy.","name":"Lumimaid v0.2 8B","url":"https://sup.ai/models/neversleep-llama-3.1-lumimaid-8b"},"position":220},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-lyria-3-clip-preview","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"30 second duration clips are priced at $0.04 per clip. Lyria 3 is Google's family of music generation models, available through the Gemini API. With Lyria 3, you can generate high-quality, 48kHz stereo audio from text prompts or from images. These models deliver structural coherence, including vocals, timed lyrics, and full instrumental arrangements. Lyria 3 Clip can generate short clips, loops, previews.","name":"Lyria 3 Clip Preview","url":"https://sup.ai/models/google-lyria-3-clip-preview"},"position":221},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/google-lyria-3-pro-preview","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Google"},"category":"AI Model","description":"Full-length songs are priced at $0.08 per song. Lyria 3 is Google's family of music generation models, available through the Gemini API. With Lyria 3, you can generate high-quality, 48kHz stereo audio from text prompts or from images. These models deliver structural coherence, including vocals, timed lyrics, and full instrumental arrangements. Lyria 3 Pro can generate full-length songs with verses, choruses, bridges.","name":"Lyria 3 Pro Preview","url":"https://sup.ai/models/google-lyria-3-pro-preview"},"position":222},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/arcee-maestro-reasoning","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Arcee"},"category":"AI Model","description":"Maestro Reasoning is Arcee's flagship analysis model: a 32 B-parameter derivative of Qwen 2.5-32 B tuned with DPO and chain-of-thought RL for step-by-step logic. Compared to the earlier 7 B preview, the production 32 B release widens the context window to 128 k tokens and doubles pass-rate on MATH and GSM-8K, while also lifting code completion accuracy. Its instruction style encourages structured \"thought → answer\" traces that can be parsed or hidden according to user preference. That transparency pairs well with audit-focused industries like finance or healthcare where seeing the reasoning path matters. In Arcee Conductor, Maestro is automatically selected for complex, multi-constraint queries that smaller SLMs bounce. ","name":"Maestro Reasoning","url":"https://sup.ai/models/arcee-maestro-reasoning"},"position":223},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-magistral-medium","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Our frontier-class reasoning model release candidate September 2025.","name":"Magistral Medium","url":"https://sup.ai/models/mistral-magistral-medium"},"position":224},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-magistral-small","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Our efficient reasoning model released September 2025.","name":"Magistral Small","url":"https://sup.ai/models/mistral-magistral-small"},"position":225},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/anthracite-magnum-v4-72b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"16,384"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"2,048"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Anthracite"},"category":"AI Model","description":"This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus. The model is fine-tuned on top of Qwen2.5 72B.","name":"Magnum v4 72B","url":"https://sup.ai/models/anthracite-magnum-v4-72b"},"position":226},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/inception-mercury","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Inception"},"category":"AI Model","description":"Mercury is a diffusion-based large language model from Inception Labs, designed for ultra-fast inference with sub-second latency. It supports a 128K context window, native tool calling, and structured outputs. Mercury excels at general-purpose reasoning, chat, and agent workflows where speed is paramount.","name":"Mercury","url":"https://sup.ai/models/inception-mercury"},"position":227},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/inception-mercury-2","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Inception"},"category":"AI Model","description":"Mercury 2 is an extremely fast reasoning LLM and the first reasoning diffusion LLM (dLLM). Instead of generating tokens sequentially, Mercury 2 produces and refines multiple tokens in parallel, achieving over 1,000 tokens/sec on standard GPUs. It supports tunable reasoning levels, 128K context, native tool use, and schema-aligned JSON output. Built for coding workflows where latency compounds, real-time voice and search, and agent loops.","name":"Mercury 2","url":"https://sup.ai/models/inception-mercury-2"},"position":228},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/inception-mercury-coder","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Inception"},"category":"AI Model","description":"Mercury Coder is a diffusion-based code-specialized language model from Inception Labs, optimized for code generation, editing, and completion with ultra-fast inference. It supports a 128K chat context window, native tool calling, and structured outputs. Ideal for coding agents and development workflows where speed and accuracy are critical.","name":"Mercury Coder","url":"https://sup.ai/models/inception-mercury-coder"},"position":229},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xiaomi-mimo-v2-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Xiaomi"},"category":"AI Model","description":"MiMo-V2-Flash is an open-source foundation language model developed by Xiaomi. It is a Mixture-of-Experts model with 309B total parameters and 15B active parameters, adopting hybrid attention architecture. MiMo-V2-Flash supports a hybrid-thinking toggle and a 256K context window, and excels at reasoning, coding, and agent scenarios. On SWE-bench Verified and SWE-bench Multilingual, MiMo-V2-Flash ranks as the top #1 open-source model globally, delivering performance comparable to Claude Sonnet 4.5 while costing only about 3.5% as much. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean.","name":"MiMo V2 Flash","url":"https://sup.ai/models/xiaomi-mimo-v2-flash"},"position":230},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xiaomi-mimo-v2-omni","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Xiaomi"},"category":"AI Model","description":"MiMo V2 Omni is a frontier omni-modal model that natively processes image, video, and audio inputs within a unified architecture. It combines strong multimodal perception with agentic capability (visual grounding, multi-step planning, tool use, and code execution), making it well-suited for complex real-world tasks that span modalities. 256K context window.","name":"MiMo V2 Omni","url":"https://sup.ai/models/xiaomi-mimo-v2-omni"},"position":231},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xiaomi-mimo-v2-pro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Xiaomi"},"category":"AI Model","description":"MiMo V2 Pro is Xiaomi's flagship foundation model, featuring over 1T total parameters and a 1M context length, deeply optimized for agentic scenarios. It ranks among the global top tier in standard benchmarks, with perceived performance approaching that of Opus 4.6. MiMo-V2-Pro is designed to serve as the brain of agent systems, orchestrating complex workflows, driving production engineering tasks, and delivering results reliably.","name":"MiMo V2 Pro","url":"https://sup.ai/models/xiaomi-mimo-v2-pro"},"position":232},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xiaomi-mimo-v2.5","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Xiaomi"},"category":"AI Model","description":"MiMo-V2.5 is a native omnimodal model by Xiaomi. It delivers Pro-level agentic performance at roughly half the inference cost, while surpassing MiMo-V2-Omni in multimodal perception across image and video understanding.","name":"MiMo V2.5","url":"https://sup.ai/models/xiaomi-mimo-v2.5"},"position":233},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/xiaomi-mimo-v2.5-pro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.05M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Xiaomi"},"category":"AI Model","description":"MiMo-V2.5-Pro is Xiaomi's flagship model, delivering strong performance in general agentic capabilities, complex software engineering, and long-horizon tasks, with top rankings on benchmarks such as ClawEval, GDPVal, and SWE-bench Pro.","name":"MiMo V2.5 Pro","url":"https://sup.ai/models/xiaomi-mimo-v2.5-pro"},"position":234},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/minimax-minimax-m1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"40,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"MiniMax"},"category":"AI Model","description":"MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it to process long sequences-up to 1 million tokens-while maintaining competitive FLOP efficiency. With 456 billion total parameters and 45.9B active per token, this variant is optimized for complex, multi-step reasoning tasks. Trained via a custom reinforcement learning pipeline (CISPO), M1 excels in long-context understanding, software engineering, agentic tool use, and mathematical reasoning. Benchmarks show strong performance across FullStackBench, SWE-bench, MATH, GPQA, and TAU-Bench, often outperforming other open models like DeepSeek R1 and Qwen3-235B.","name":"MiniMax M1","url":"https://sup.ai/models/minimax-minimax-m1"},"position":235},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/minimax-minimax-m2","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"204,800"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"MiniMax"},"category":"AI Model","description":"A Mixture-of-Experts model with 230B total parameters and only 10B activated per inference, delivering exceptional efficiency. Built for the agentic era with function calling, advanced reasoning, and real-time streaming capabilities. With a 200K shared context window and 128K max output (including chain-of-thought), it handles massive contexts for coding and agentic work. Superseded by MiniMax M2.1 with improved coding and refactoring capabilities.","name":"MiniMax M2","url":"https://sup.ai/models/minimax-minimax-m2"},"position":236},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/minimax-minimax-m2-her","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"204,800"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"2,048"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"MiniMax"},"category":"AI Model","description":"MiniMax M2-her is a dialogue-first model purpose-built for role-playing and immersive multi-turn conversations. Developed from three years of role-play optimization, it excels at intuitive preference alignment (reading between the lines to adapt to user style), dynamic story progression (driving narrative forward with vivid prose), and high-fidelity world experience (maintaining strict coherence with established lore and character voice). Supports rich role settings including system roles, user personas, conversation groups, and example dialogue learning. With a 200K context window and 2K max output, it prioritizes deep emotional connection and long-horizon character consistency over raw output length.","name":"MiniMax M2-her","url":"https://sup.ai/models/minimax-minimax-m2-her"},"position":237},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/minimax-minimax-m2.1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"204,800"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"MiniMax"},"category":"AI Model","description":"A 230B MoE model (10B active) optimized for code generation and refactoring with polyglot programming mastery. Features enhanced reasoning capabilities and precision code refactoring across multiple languages. With a 200K shared context window, 131K max output, and ~60 tps output speed, it handles substantial coding tasks with confidence. Superseded by MiniMax M2.5 with SOTA coding performance.","name":"MiniMax M2.1","url":"https://sup.ai/models/minimax-minimax-m2.1"},"position":238},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/minimax-minimax-m2.1-highspeed","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"204,800"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"MiniMax"},"category":"AI Model","description":"The highspeed variant of MiniMax M2.1, delivering the same polyglot code mastery and precision refactoring at significantly faster inference speeds (~100 tokens per second vs ~60 tps standard). Ideal for latency-sensitive applications and real-time coding assistance. Shares the same 200K shared context window and 131K max output. Superseded by MiniMax M2.5 Highspeed.","name":"MiniMax M2.1 Highspeed","url":"https://sup.ai/models/minimax-minimax-m2.1-highspeed"},"position":239},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/minimax-minimax-m2.5","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"204,800"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"MiniMax"},"category":"AI Model","description":"MiniMax M2.5 is a SOTA large language model designed for real-world productivity, achieving 80.2% on SWE-Bench Verified and 51.3% on Multi-SWE-Bench. A 230B MoE model (10B active) capable of handling the entire development process of complex systems across Web, Android, iOS, Windows, and Mac platforms. Excels at coding, agentic tool use, search, and office productivity. With a 200K shared context window, 131K max output, and ~60 tps output speed, it delivers peak performance at exceptional value.","name":"MiniMax M2.5","url":"https://sup.ai/models/minimax-minimax-m2.5"},"position":240},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/minimax-minimax-m2.5-highspeed","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"204,800"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"MiniMax"},"category":"AI Model","description":"The highspeed variant of MiniMax M2.5, delivering the same SOTA coding performance at significantly faster inference speeds (~100 tokens per second vs ~60 tps standard). Same quality as M2.5 for full-stack development across all platforms with dramatically lower latency. Shares the same 200K shared context window and 131K max output. Ideal for real-time coding assistance and latency-sensitive production deployments.","name":"MiniMax M2.5 Highspeed","url":"https://sup.ai/models/minimax-minimax-m2.5-highspeed"},"position":241},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/minimax-minimax-m2.7","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"204,800"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"MiniMax"},"category":"AI Model","description":"MiniMax M2.7 is the latest generation of the MiniMax M-series, a 230B MoE model with 10B active parameters. It delivers enhanced agentic capabilities including tool calling, reasoning, and multi-step task execution. With a 200K shared context window, 131K max output, and ~60 tps output speed, it builds on the strengths of M2.5 with improved performance across coding, agent workflows, and real-world productivity tasks.","name":"MiniMax M2.7","url":"https://sup.ai/models/minimax-minimax-m2.7"},"position":242},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/minimax-minimax-m2.7-highspeed","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"204,800"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"MiniMax"},"category":"AI Model","description":"The highspeed variant of MiniMax M2.7, delivering the same agentic and coding performance at significantly faster inference speeds (~100 tokens per second vs ~60 tps standard). Unlike previous generations, M2.7 Highspeed matches the standard variant on pricing while delivering dramatically lower latency. Shares the same 200K shared context window and 131K max output.","name":"MiniMax M2.7 Highspeed","url":"https://sup.ai/models/minimax-minimax-m2.7-highspeed"},"position":243},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/minimax-minimax-01","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"1M"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"MiniMax"},"category":"AI Model","description":"MiniMax-01 combines MiniMax-Text-01 for text generation and MiniMax-VL-01 for image understanding. It has 456 billion parameters, with 45.9 billion parameters activated per inference, and can handle a context of up to 4 million tokens. The text model adopts a hybrid architecture that combines Lightning Attention, Softmax Attention, and Mixture-of-Experts (MoE). The image model adopts the \"ViT-MLP-LLM\" framework and is trained on top of the text model.","name":"MiniMax-01","url":"https://sup.ai/models/minimax-minimax-01"},"position":244},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-ministral-14b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"262,144"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Ministral 3 (a.k.a. Tinystral) 14B Instruct.","name":"Ministral 14B","url":"https://sup.ai/models/mistral-ministral-14b"},"position":245},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-ministral-3b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Ministral 3 (a.k.a. Tinystral) 3B Instruct.","name":"Ministral 3B","url":"https://sup.ai/models/mistral-ministral-3b"},"position":246},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-ministral-8b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"262,144"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Ministral 3 (a.k.a. Tinystral) 8B Instruct.","name":"Ministral 8B","url":"https://sup.ai/models/mistral-ministral-8b"},"position":247},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-mistral-7b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"A 7B transformer model, fast-deployed and easily customisable.","name":"Mistral 7B","url":"https://sup.ai/models/mistral-mistral-7b"},"position":248},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-mistral-large","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"262,144"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Open-weight, general-purpose, flagship multimodal and multilingual model.","name":"Mistral Large","url":"https://sup.ai/models/mistral-mistral-large"},"position":249},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-mistral-medium","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Update on Mistral Medium 3 with improved capabilities.","name":"Mistral Medium","url":"https://sup.ai/models/mistral-mistral-medium"},"position":250},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-mistral-nemo","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Our best multilingual open source model released July 2024.","name":"Mistral Nemo","url":"https://sup.ai/models/mistral-mistral-nemo"},"position":251},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-mistral-saba","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"A 24B-parameter language model designed for the Middle East and South Asia, supporting Arabic, Tamil, Malayalam, and other regional languages.","name":"Mistral Saba","url":"https://sup.ai/models/mistral-mistral-saba"},"position":252},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-mistral-small","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Our latest enterprise-grade small model with the latest version released June 2025.","name":"Mistral Small","url":"https://sup.ai/models/mistral-mistral-small"},"position":253},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-mistral-small-4-119b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Mistral Small 4 119B is the next major release in the Mistral Small family, unifying the capabilities of several flagship Mistral models into a single system. It combines strong reasoning from Magistral, multimodal understanding from Pixtral, and agentic coding capabilities from Devstral, enabling one model to handle complex analysis, software development, and visual tasks within the same workflow.","name":"Mistral Small 4 119B","url":"https://sup.ai/models/mistral-mistral-small-4-119b"},"position":254},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-mistral-small-creative","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Official Mistral Small Creative Mistral AI model","name":"Mistral Small Creative","url":"https://sup.ai/models/mistral-mistral-small-creative"},"position":255},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-mistral-tiny","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Our best multilingual open source model released July 2024.","name":"Mistral Tiny","url":"https://sup.ai/models/mistral-mistral-tiny"},"position":256},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-mixtral-8x22b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"65,536"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Mixtral 8x22B is currently the most performant open model. A 22B sparse Mixture-of-Experts (SMoE). Uses only 39B active parameters out of 141B.","name":"Mixtral 8x22B","url":"https://sup.ai/models/mistral-mixtral-8x22b"},"position":257},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-mixtral-8x7b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"A 7B sparse Mixture-of-Experts (SMoE). Uses 12.9B active parameters out of 45B total.","name":"Mixtral 8x7B","url":"https://sup.ai/models/mistral-mixtral-8x7b"},"position":258},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/allenai-molmo-2-8b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"36,864"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"36,864"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Allen AI"},"category":"AI Model","description":"Molmo2-8B is an open vision-language model developed by the Allen Institute for AI (Ai2) as part of the Molmo2 family, supporting image, video, and multi-image understanding and grounding. It is based on Qwen3-8B and uses SigLIP 2 as its vision backbone, outperforming other open-weight, open-data models on short videos, counting, and captioning, while remaining competitive on long-video tasks.","name":"Molmo2 8B","url":"https://sup.ai/models/allenai-molmo-2-8b"},"position":259},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/moonshotai-moonshot-v1-128k","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Moonshot AI"},"category":"AI Model","description":"Moonshot V1 128K is a legacy text generation model with a 131,072 token context window. Superseded by Kimi K2 with superior coding, reasoning, and agent capabilities.","name":"Moonshot V1 128K","url":"https://sup.ai/models/moonshotai-moonshot-v1-128k"},"position":260},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/moonshotai-moonshot-v1-128k-vision","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Moonshot AI"},"category":"AI Model","description":"Moonshot V1 128K Vision is a legacy multimodal model with a 131,072 token context window supporting image understanding. Superseded by Kimi K2.5 with native multimodal architecture.","name":"Moonshot V1 128K Vision","url":"https://sup.ai/models/moonshotai-moonshot-v1-128k-vision"},"position":261},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/moonshotai-moonshot-v1-32k","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Moonshot AI"},"category":"AI Model","description":"Moonshot V1 32K is a legacy text generation model with a 32,768 token context window. Superseded by Kimi K2 with superior coding, reasoning, and agent capabilities.","name":"Moonshot V1 32K","url":"https://sup.ai/models/moonshotai-moonshot-v1-32k"},"position":262},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/moonshotai-moonshot-v1-32k-vision","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Moonshot AI"},"category":"AI Model","description":"Moonshot V1 32K Vision is a legacy multimodal model with a 32,768 token context window supporting image understanding. Superseded by Kimi K2.5 with native multimodal architecture.","name":"Moonshot V1 32K Vision","url":"https://sup.ai/models/moonshotai-moonshot-v1-32k-vision"},"position":263},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/moonshotai-moonshot-v1-8k","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,192"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Moonshot AI"},"category":"AI Model","description":"Moonshot V1 8K is a legacy text generation model with an 8,192 token context window. Superseded by Kimi K2 with superior coding, reasoning, and agent capabilities.","name":"Moonshot V1 8K","url":"https://sup.ai/models/moonshotai-moonshot-v1-8k"},"position":264},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/moonshotai-moonshot-v1-8k-vision","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,192"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Moonshot AI"},"category":"AI Model","description":"Moonshot V1 8K Vision is a legacy multimodal model with an 8,192 token context window supporting image understanding. Superseded by Kimi K2.5 with native multimodal architecture.","name":"Moonshot V1 8K Vision","url":"https://sup.ai/models/moonshotai-moonshot-v1-8k-vision"},"position":265},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/morph-morph-v3-fast","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"81,920"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"38,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Morph"},"category":"AI Model","description":"Morph's fastest apply model for code edits. ~10,500 tokens/sec with 96% accuracy for rapid code transformations. The model requires the prompt to be in the following format: {instruction} {initial_code} {edit_snippet} Zero Data Retention is enabled for Morph.","name":"Morph V3 Fast","url":"https://sup.ai/models/morph-morph-v3-fast"},"position":266},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/morph-morph-v3-large","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Morph"},"category":"AI Model","description":"Morph's high-accuracy apply model for complex code edits. ~4,500 tokens/sec with 98% accuracy for precise code transformations. The model requires the prompt to be in the following format: {instruction} {initial_code} {edit_snippet} Zero Data Retention is enabled for Morph.","name":"Morph V3 Large","url":"https://sup.ai/models/morph-morph-v3-large"},"position":267},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/gryphe-mythomax-l2-13b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"4,096"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Gryphe"},"category":"AI Model","description":"One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge","name":"MythoMax 13B","url":"https://sup.ai/models/gryphe-mythomax-l2-13b"},"position":268},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/nvidia-nemotron-3-nano-30b-a3b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"NVIDIA"},"category":"AI Model","description":"NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems. The model is fully open with open-weights, datasets and recipes so developers can easily customize, optimize, and deploy the model on their infrastructure for maximum privacy and security. Note: For the free endpoint, all prompts and output are logged to improve the provider's model and its product and services. Please do not upload any personal, confidential, or otherwise sensitive information. This is a trial use only. Do not use for production or business-critical systems.","name":"Nemotron 3 Nano 30B A3B","url":"https://sup.ai/models/nvidia-nemotron-3-nano-30b-a3b"},"position":269},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/nvidia-nemotron-3-super-120b-a12b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"NVIDIA"},"category":"AI Model","description":"NVIDIA Nemotron 3 Super is a 120B-parameter open hybrid MoE model, activating just 12B parameters for maximum compute efficiency and accuracy in complex multi-agent applications. Built on a hybrid Mamba-Transformer Mixture-of-Experts architecture with multi-token prediction (MTP), it delivers over 50% higher token generation compared to leading open models. Latent MoE enables calling 4 experts for the inference cost of only one, improving intelligence and generalization. Multi-environment RL training across 10+ environments delivers leading accuracy on benchmarks including AIME 2025, TerminalBench, and SWE-Bench Verified. Fully open with weights, datasets, and recipes under the NVIDIA Open License, Nemotron 3 Super allows easy customization and secure deployment anywhere, from workstation to cloud.","name":"Nemotron 3 Super","url":"https://sup.ai/models/nvidia-nemotron-3-super-120b-a12b"},"position":270},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/nvidia-nemotron-nano-12b-v2-vl","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"NVIDIA"},"category":"AI Model","description":"NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba's memory-efficient sequence modeling for significantly higher throughput and lower latency. The model supports inputs of text and multi-image documents, producing natural-language outputs. It is trained on high-quality NVIDIA-curated synthetic datasets optimized for optical-character recognition, chart reasoning, and multimodal comprehension. Nemotron Nano 2 VL achieves leading results on OCRBench v2 and scores ≈ 74 average across MMMU, MathVista, AI2D, OCRBench, OCR-Reasoning, ChartQA, DocVQA, and Video-MME-surpassing prior open VL baselines. With Efficient Video Sampling (EVS), it handles long-form videos while reducing inference cost. Open-weights, training data, and fine-tuning recipes are released under a permissive NVIDIA open license, with deployment supported across NeMo, NIM, and major inference runtimes.","name":"Nemotron Nano 12B 2 VL","url":"https://sup.ai/models/nvidia-nemotron-nano-12b-v2-vl"},"position":271},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/nvidia-nemotron-nano-9b-v2","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"NVIDIA"},"category":"AI Model","description":"NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. The model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.","name":"Nemotron Nano 9B V2","url":"https://sup.ai/models/nvidia-nemotron-nano-9b-v2"},"position":272},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/neversleep-noromaid-20b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"4,096"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"2,048"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"NeverSleep"},"category":"AI Model","description":"A collab between IkariDev and Undi. This merge is suitable for RP, ERP, and general knowledge. #merge #uncensored","name":"Noromaid 20B","url":"https://sup.ai/models/neversleep-noromaid-20b"},"position":273},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/amazon-nova-2-lite","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Amazon"},"category":"AI Model","description":"Amazon's fast, cost-effective reasoning model built for everyday tasks. Nova 2 Lite delivers strong multimodal understanding across text, images, video, and documents with a massive 1M context window and extended thinking capabilities. With built-in tools for code interpretation and web grounding, it handles complex analytical tasks while keeping costs low. Perfect for production applications that need reliable intelligence and broad context without the premium price tag.","name":"Nova 2 Lite","url":"https://sup.ai/models/amazon-nova-2-lite"},"position":274},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/amazon-nova-lite","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"300,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"5,120"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Amazon"},"category":"AI Model","description":"Amazon's fast, cost-effective multimodal model processing text and images with a generous 300K context window. Nova Lite handles up to multiple images per request and delivers rapid responses for customer interactions, document analysis, and visual understanding tasks. An excellent choice for applications needing broad multimodal capabilities without premium costs.","name":"Nova Lite","url":"https://sup.ai/models/amazon-nova-lite"},"position":275},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/amazon-nova-micro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"5,120"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Amazon"},"category":"AI Model","description":"Amazon's fastest and most cost-effective text-only model delivering the lowest latency in the Nova family. Nova Micro excels at text summarization, translation, classification, and simple coding tasks. With a 128K context window and tool support, it handles a broad range of text-based workflows. Perfect for high-volume applications and cost-conscious projects where speed matters most.","name":"Nova Micro","url":"https://sup.ai/models/amazon-nova-micro"},"position":276},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/amazon-nova-premier","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"25,600"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Amazon"},"category":"AI Model","description":"Amazon's most capable model designed for complex multimodal tasks and agentic workflows. Nova Premier processes text, images, video, and documents with a massive 1M token context window, enabling analysis of extensive content in a single request. It excels at complex reasoning, detailed analysis, and multi-step problem solving where accuracy is paramount. Choose this when you need maximum intelligence from the Nova family.","name":"Nova Premier","url":"https://sup.ai/models/amazon-nova-premier"},"position":277},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/amazon-nova-pro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"300,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"5,120"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Amazon"},"category":"AI Model","description":"Amazon's balanced multimodal model offering the best combination of accuracy, speed, and cost for general tasks. Nova Pro processes text, images, video, and documents with a 300K context window and delivers strong performance on visual question answering and video understanding benchmarks. The ideal choice when you need reliable multimodal capabilities with a good balance of quality and cost.","name":"Nova Pro","url":"https://sup.ai/models/amazon-nova-pro"},"position":278},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-o1","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"100,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"The o1 model series is designed to spend more time thinking before responding. Trained with large-scale reinforcement learning to reason using chain of thought. The o1 models are optimized for math, science, programming, and other STEM-related tasks.","name":"o1","url":"https://sup.ai/models/openai-o1"},"position":279},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-o1-pro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"100,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o1 Pro model uses more compute to think harder and provide consistently better answers.","name":"o1 Pro","url":"https://sup.ai/models/openai-o1-pro"},"position":280},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-o3","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"100,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following. Use it to think through multi-step problems that involve analysis across text, code, and images.","name":"o3","url":"https://sup.ai/models/openai-o3"},"position":281},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-o3-deep-research","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"100,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"o3 Deep Research is OpenAI's advanced model for deep research, designed to tackle complex, multi-step research tasks. This model always uses web search which adds additional cost.","name":"o3 Deep Research","url":"https://sup.ai/models/openai-o3-deep-research"},"position":282},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-o3-mini","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"100,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"o3 Mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. The model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming.","name":"o3 Mini","url":"https://sup.ai/models/openai-o3-mini"},"position":283},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-o3-mini-high","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"100,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"o3 Mini High is the same model as o3 Mini with reasoning effort set to high. o3 Mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding.","name":"o3 Mini High","url":"https://sup.ai/models/openai-o3-mini-high"},"position":284},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-o3-pro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"100,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3 Pro model uses more compute to think harder and provide consistently better answers.","name":"o3 Pro","url":"https://sup.ai/models/openai-o3-pro"},"position":285},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-o4-mini","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"100,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"o4 Mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance, outperforming its predecessor o3 Mini and even approaching o3 in some domains.","name":"o4 Mini","url":"https://sup.ai/models/openai-o4-mini"},"position":286},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-o4-mini-deep-research","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"100,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"o4 Mini Deep Research is OpenAI's faster, more affordable deep research model, ideal for tackling complex, multi-step research tasks. This model always uses web search which adds additional cost.","name":"o4 Mini Deep Research","url":"https://sup.ai/models/openai-o4-mini-deep-research"},"position":287},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/openai-o4-mini-high","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"100,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"OpenAI"},"category":"AI Model","description":"o4 Mini High is the same model as o4 Mini with reasoning effort set to high. o4 Mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities.","name":"o4 Mini High","url":"https://sup.ai/models/openai-o4-mini-high"},"position":288},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/allenai-olmo-2-32b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Allen AI"},"category":"AI Model","description":"OLMo-2 32B Instruct is a supervised instruction-finetuned variant of the OLMo-2 32B March 2025 base model. It excels in complex reasoning and instruction-following tasks across diverse benchmarks such as GSM8K, MATH, IFEval, and general NLP evaluation. Developed by AI2, OLMo-2 32B is part of an open, research-oriented initiative, trained primarily on English-language datasets to advance the understanding and development of open-source language models.","name":"Olmo 2 32B Instruct","url":"https://sup.ai/models/allenai-olmo-2-32b-instruct"},"position":289},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/allenai-olmo-3-32b-think","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"65,536"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Allen AI"},"category":"AI Model","description":"Olmo 3 32B Think is a large-scale, 32-billion-parameter model purpose-built for deep reasoning, complex logic chains and advanced instruction-following scenarios. Its capacity enables strong performance on demanding evaluation tasks and highly nuanced conversational reasoning. Developed by Ai2 under the Apache 2.0 license, Olmo 3 32B Think embodies the Olmo initiative's commitment to openness, offering full transparency across weights, code and training methodology.","name":"Olmo 3 32B Think","url":"https://sup.ai/models/allenai-olmo-3-32b-think"},"position":290},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/allenai-olmo-3-7b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"65,536"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Allen AI"},"category":"AI Model","description":"Olmo 3 7B Instruct is a supervised instruction-fine-tuned variant of the Olmo 3 7B base model, optimized for instruction-following, question-answering, and natural conversational dialogue. By leveraging high-quality instruction data and an open training pipeline, it delivers strong performance across everyday NLP tasks while remaining accessible and easy to integrate. Developed by Ai2 under the Apache 2.0 license, the model offers a transparent, community-friendly option for instruction-driven applications.","name":"Olmo 3 7B Instruct","url":"https://sup.ai/models/allenai-olmo-3-7b-instruct"},"position":291},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/allenai-olmo-3-7b-think","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"65,536"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Allen AI"},"category":"AI Model","description":"Olmo 3 7B Think is a research-oriented language model in the Olmo family designed for advanced reasoning and instruction-driven tasks. It excels at multi-step problem solving, logical inference, and maintaining coherent conversational context. Developed by Ai2 under the Apache 2.0 license, Olmo 3 7B Think supports transparent, fully open experimentation and provides a lightweight yet capable foundation for academic research and practical NLP workflows.","name":"Olmo 3 7B Think","url":"https://sup.ai/models/allenai-olmo-3-7b-think"},"position":292},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/allenai-olmo-3.1-32b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"65,536"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Allen AI"},"category":"AI Model","description":"Olmo 3.1 32B Instruct is a large-scale, 32-billion-parameter instruction-tuned language model engineered for high-performance conversational AI, multi-turn dialogue, and practical instruction following. As part of the Olmo 3.1 family, this variant emphasizes responsiveness to complex user directions and robust chat interactions while retaining strong capabilities on reasoning and coding benchmarks. Developed by Ai2 under the Apache 2.0 license, Olmo 3.1 32B Instruct reflects the Olmo initiative's commitment to openness and transparency.","name":"Olmo 3.1 32B Instruct","url":"https://sup.ai/models/allenai-olmo-3.1-32b-instruct"},"position":293},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/allenai-olmo-3.1-32b-think","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"65,536"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Allen AI"},"category":"AI Model","description":"Olmo 3.1 32B Think is a large-scale, 32-billion-parameter model designed for deep reasoning, complex multi-step logic, and advanced instruction following. Building on the Olmo 3 series, version 3.1 delivers refined reasoning behavior and stronger performance across demanding evaluations and nuanced conversational tasks. Developed by Ai2 under the Apache 2.0 license, Olmo 3.1 32B Think continues the Olmo initiative's commitment to openness, providing full transparency across model weights, code, and training methodology.","name":"Olmo 3.1 32B Think","url":"https://sup.ai/models/allenai-olmo-3.1-32b-think"},"position":294},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/writer-palmyra-x5","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1.04M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Writer"},"category":"AI Model","description":"Palmyra X5 is Writer's most advanced model, purpose-built for building and scaling AI agents across the enterprise. It delivers industry-leading speed and efficiency on context windows up to 1 million tokens, powered by a novel transformer architecture and hybrid attention mechanisms. This enables faster inference and expanded memory for processing large volumes of enterprise data, critical for scaling AI agents.","name":"Palmyra X5","url":"https://sup.ai/models/writer-palmyra-x5"},"position":295},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/microsoft-phi-4","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"16,384"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Microsoft"},"category":"AI Model","description":"Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed. At 14 billion parameters, it was trained on a mix of high-quality synthetic datasets, data from curated websites, and academic materials. It has undergone careful improvement to follow instructions accurately and maintain strong safety standards. It works best with English language inputs.","name":"Phi 4","url":"https://sup.ai/models/microsoft-phi-4"},"position":296},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-pixtral-12b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"A compact 12B multimodal model with image understanding alongside text capabilities.","name":"Pixtral 12B","url":"https://sup.ai/models/mistral-pixtral-12b"},"position":297},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-pixtral-large","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"Official pixtral-large-2411 Mistral AI model","name":"Pixtral Large","url":"https://sup.ai/models/mistral-pixtral-large"},"position":298},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/baidu-qianfan-ocr-fast","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"65,536"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"28,672"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Baidu"},"category":"AI Model","description":"Qianfan-OCR-Fast is a domain-specific multimodal large model purpose-built for OCR. By leveraging specialized OCR training data while preserving versatile multimodal intelligence, it provides a powerful performance upgrade over Qianfan-OCR.","name":"Qianfan OCR Fast","url":"https://sup.ai/models/baidu-qianfan-ocr-fast"},"position":299},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qvq-max","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Tongyi Qianwen QVQ visual reasoning model supports visual input and chain-of-thought output, demonstrating stronger capabilities in mathematics, programming, visual analysis, creation, and general tasks. This model is a historical snapshot of QVQ-Max from March 25, 2025, and is expected to be maintained for one month after the release date of the next snapshot version (to be determined).","name":"QVQ Max","url":"https://sup.ai/models/alibaba-qvq-max"},"position":300},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-max-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The most capable Qwen reasoning model, integrating thinking and non-thinking modes for comprehensive problem-solving. In thinking mode, it combines deliberate reasoning with web search, web extraction, and code interpreter tools to tackle complex challenges with greater accuracy. With a 256K context window and 65K output tokens, this model excels at problems requiring both deep thought and external tool use.","name":"Qwen 3 Max Thinking","url":"https://sup.ai/models/alibaba-qwen3-max-thinking"},"position":301},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3 Flash model (snapshot 2025-07-28) offers a powerful fusion of thinking and non-thinking modes with dynamic in-conversation switching, excelling in complex reasoning while showing significant gains in instruction following and text comprehension. It supports a 1M context length and is billed on a tiered model corresponding to context usage.","name":"Qwen Flash","url":"https://sup.ai/models/alibaba-qwen-flash"},"position":302},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-flash-character","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,192"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen Role-Playing Model Series is specifically optimized for muti-language anthropomorphic interaction scenarios. It demonstrates advanced capabilities in character consistency maintenance, context-aware dialogue progression, and empathetic engagement, enabling precise personalized character embodiment. This version significantly enhances Japanese linguistic localization (including dialects and honorifics), human-like role-playing authenticity, narrative coherence control, and scenario-based cognitive intelligence.","name":"Qwen Flash Character","url":"https://sup.ai/models/alibaba-qwen-flash-character"},"position":303},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-max","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The most effective model in the Qwen series, with improved code capabilities in both Chinese and English, logical abilities, and multilingual capabilities. The model's response detail and format clarity have been improved, as well as its creative abilities, JSON format compliance, and role-playing capabilities. This model is a snapshot version of Qwen-Max from January 25, 2025, and is expected to be maintained until one month before the next snapshot goes live.","name":"Qwen Max","url":"https://sup.ai/models/alibaba-qwen-max"},"position":304},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-mt-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"16,384"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen-MT-Flash, a large language model from the Qwen series, has been fully upgraded with the Qwen 3 architecture for significantly enhanced performance and translation quality. It provides rapid, cost-effective translation across 92 languages, while supporting advanced features such as terminology intervention, format preservation, and domain-specific adaptation. It is the ideal choice for applications requiring a powerful balance of speed, quality, and cost.","name":"Qwen MT Flash","url":"https://sup.ai/models/alibaba-qwen-mt-flash"},"position":305},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-mt-lite","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"16,384"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen-MT-Lite is a large language model of the Qwen model series that specializes in multi-lingual translation. It provides high-quality and rapid translation services across 32 languages at a cost-effective price. It offers features such as terminology intervention, format preservation, and domain-specific translation to cater to the diverse needs of various applications, ensuring both efficiency and performance.","name":"Qwen MT Lite","url":"https://sup.ai/models/alibaba-qwen-mt-lite"},"position":306},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-mt-plus","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"4,096"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"2,048"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen-MT-Plus, the flagship translation model from our Qwen series, is now fully upgraded with the Qwen3 architecture. It supports 92 languages and delivers exceptionally accurate and natural-sounding translations. Its advanced capabilities in contextual understanding, terminology control, and format preservation make it a superior choice over traditional models, especially for specialized domains.","name":"Qwen MT Plus","url":"https://sup.ai/models/alibaba-qwen-mt-plus"},"position":307},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-mt-turbo","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"4,096"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"2,048"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen-MT-Turbo, a large language model from the Qwen series, has been fully upgraded with the Qwen 3 architecture for significantly enhanced performance and translation quality. It provides rapid, cost-effective translation across 92 languages, while supporting advanced features such as terminology intervention, format preservation, and domain-specific adaptation. It is the ideal choice for applications requiring a powerful balance of speed, quality, and cost.","name":"Qwen MT Turbo","url":"https://sup.ai/models/alibaba-qwen-mt-turbo"},"position":308},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-plus","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen series of models, which are well-balanced in capabilities, offer reasoning performance and speed that fall between Qwen-Max and Qwen-Turbo, making them suitable for moderately complex tasks. Compared to previous versions, it shows significant improvements in both Chinese and English code generation, logical reasoning, and multilingual abilities. The response style has been greatly adjusted to align with human preferences, with noticeable enhancements in the level of detail and clarity of responses. Specialized improvements have been made in creative writing, adherence to JSON formatting, and role-playing abilities.","name":"Qwen Plus","url":"https://sup.ai/models/alibaba-qwen-plus"},"position":309},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-plus-character","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The role-playing model of the Qwen series. This is a dynamically updated version, and notifications will be provided in advance for any model updates. It is suitable for anthropomorphic role-playing and has optimized capabilities in following predefined character instructions, advancing conversations, and demonstrating active listening and empathy. Additionally, it supports the deep restoration of personalized characters.","name":"Qwen Plus Character","url":"https://sup.ai/models/alibaba-qwen-plus-character"},"position":310},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-plus-character-ja","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,192"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"512"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen Role-Playing Model Series is specifically optimized for Japanese anthropomorphic interaction scenarios. It demonstrates advanced capabilities in character consistency maintenance, context-aware dialogue progression, and empathetic engagement, enabling precise personalized character embodiment. This version significantly enhances Japanese linguistic localization (including dialects and honorifics), human-like role-playing authenticity, narrative coherence control, and scenario-based cognitive intelligence.","name":"Qwen Plus Character Japanese","url":"https://sup.ai/models/alibaba-qwen-plus-character-ja"},"position":311},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-plus-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.","name":"Qwen Plus Thinking","url":"https://sup.ai/models/alibaba-qwen-plus-thinking"},"position":312},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwq-plus","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen QwQ Reasoning Model Enhanced Edition, based on the Qwen2.5 model training QwQ reasoning model, through reinforcement learning greatly improved the model's reasoning ability. The core indicators of the model such as mathematics code (AIME 24/25, livecodebench) and some general indicators (IFEval, LiveBench, etc.) have reached the level of DeepSeek-R1 full-blooded edition.","name":"Qwen QwQ Plus","url":"https://sup.ai/models/alibaba-qwq-plus"},"position":313},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-turbo","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3 Turbo is a new hybrid reasoning model enabling dynamic switching between reasoning and non-reasoning modes mid-dialogue. With fewer parameters, it rivals the QwQ-32B in reasoning performance while surpassing the Qwen2.5-Turbo in general capabilities, achieving state-of-the-art (SOTA) performance at its scale. This model is a snapshot version as of April 28, 2025","name":"Qwen Turbo","url":"https://sup.ai/models/alibaba-qwen-turbo"},"position":314},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-vl-max","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The model has improved math and reasoning capabilities, with the response style adjusted to better align with human preferences. The clarity and detail of responses have been significantly enhanced. This is the snapshot version as of April 8, 2025.","name":"Qwen VL Max","url":"https://sup.ai/models/alibaba-qwen-vl-max"},"position":315},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-vl-plus","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"This model is a snapshot version of Qwen-VL-Plus as of August 15, 2025. It approaches the general capabilities of Qwen2.5-VL-32B, with improved performance in object and person recognition, enhanced accuracy in real-world scenarios, and reduced hallucinations.","name":"Qwen VL Plus","url":"https://sup.ai/models/alibaba-qwen-vl-plus"},"position":316},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen2.5-14b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen2.5-14B-Instruct is an open source instruction-tuned model that has 14 billion parameters. This model supports a context length of up to 131,072 tokens. To ensure smooth operation and output, the API of this model limits user maximum input to 129,024 tokens and the maximum output to 8,192 tokens.","name":"Qwen2.5 14B Instruct","url":"https://sup.ai/models/alibaba-qwen2.5-14b-instruct"},"position":317},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen2.5-14b-instruct-1m","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The 14B model of the Qwen2.5 series has gained significantly more knowledge compared to Qwen2, and has greatly improved in programming and mathematical abilities. Additionally, the new model has made improvements in executing instructions, generating long texts, understanding structured data (such as tables), and generating structured outputs, particularly JSON. It supports a context of 1M tokens.","name":"Qwen2.5 14B Instruct 1M","url":"https://sup.ai/models/alibaba-qwen2.5-14b-instruct-1m"},"position":318},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen2.5-32b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen2.5-32B-Instruct is an open source instruction-tuned model that has 32 billion parameters. This model supports a context length of up to 131,072 tokens. To ensure smooth operation and output, the API of this model limits user maximum input to 129,024 tokens and the maximum output to 8,192 tokens.","name":"Qwen2.5 32B Instruct","url":"https://sup.ai/models/alibaba-qwen2.5-32b-instruct"},"position":319},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen2.5-72b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen2.5-72B-Instruct is an open source instruction-tuned model that has 72 billion parameters. This model supports a context length of up to 131,072 tokens. To ensure smooth operation and output, the API of this model limits user maximum input to 129,024 tokens and the maximum output to 8,192 tokens.","name":"Qwen2.5 72B Instruct","url":"https://sup.ai/models/alibaba-qwen2.5-72b-instruct"},"position":320},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen2.5-7b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen2.5-7B-Instruct is an open source instruction-tuned model that has 7 billion parameters. This model supports a context length of up to 131,072 tokens. To ensure smooth operation and output, the API of this model limits user maximum input to 129,024 tokens and the maximum output to 8,192 tokens.","name":"Qwen2.5 7B Instruct","url":"https://sup.ai/models/alibaba-qwen2.5-7b-instruct"},"position":321},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen2.5-7b-instruct-1m","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The 7B model of the Qwen2.5 series has gained significantly more knowledge compared to Qwen2, and has greatly improved in programming and mathematical abilities. Additionally, the new model has made improvements in executing instructions, generating long texts, understanding structured data (such as tables), and generating structured outputs, particularly JSON. It supports a context of 1M tokens.","name":"Qwen2.5 7B Instruct 1M","url":"https://sup.ai/models/alibaba-qwen2.5-7b-instruct-1m"},"position":322},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen2.5-coder-32b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). Qwen2.5-Coder brings significant improvements in code generation, code reasoning, and code fixing, as well as a more comprehensive foundation for real-world applications such as Code Agents, enhancing coding capabilities while maintaining its strengths in mathematics and general competencies.","name":"Qwen2.5 Coder 32B Instruct","url":"https://sup.ai/models/alibaba-qwen2.5-coder-32b-instruct"},"position":323},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen2.5-coder-7b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen2.5-Coder-7B-Instruct is a 7B parameter instruction-tuned language model optimized for code-related tasks such as code generation, reasoning, and bug fixing. Based on the Qwen2.5 architecture, it incorporates enhancements like RoPE, SwiGLU, RMSNorm, and GQA attention with support for up to 128K tokens using YaRN-based extrapolation. It is trained on a large corpus of source code, synthetic data, and text-code grounding, providing robust performance across programming languages and agentic coding workflows. This model is part of the Qwen2.5-Coder family and offers strong compatibility with tools like vLLM for efficient deployment. Released under the Apache 2.0 license.","name":"Qwen2.5 Coder 7B Instruct","url":"https://sup.ai/models/alibaba-qwen2.5-coder-7b-instruct"},"position":324},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen2.5-vl-32b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen2.5VL series model has reached a level close to Qwen2.5VL-72B in answering math and subject questions, with the reply style significantly adjusted towards human preferences, especially for objective queries such as mathematics, logical reasoning, knowledge Q&A, etc., where the model's detailed responses and clear formatting have been notably improved. This version is the 32B version.","name":"Qwen2.5 VL 32B Instruct","url":"https://sup.ai/models/alibaba-qwen2.5-vl-32b-instruct"},"position":325},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen2.5-vl-3b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"2,048"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen2.5-VL open-source series model, with improved instruction following, mathematics, problem-solving, and coding capabilities, enhanced ability to recognize all things, supports diverse formats for direct and precise location of visual elements, supports understanding of up to one hour of video and second-level event moment positioning, can understand time sequence and speed, based on parsing and positioning capabilities supports manipulation of OS or Mobile agents, strong key information extraction capability and Json format output capability, this version is a 3B version suitable for mobile device use.","name":"Qwen2.5 VL 3B Instruct","url":"https://sup.ai/models/alibaba-qwen2.5-vl-3b-instruct"},"position":326},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen2.5-vl-72b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"2,048"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen2.5-VL open-source series model, with improved instruction following, mathematics, problem-solving, and coding capabilities, enhanced ability to recognize all things, supports diverse formats for direct and precise positioning of visual elements, supports understanding of up to one hour of video and second-level event moment positioning, can understand time sequence and speed, based on parsing and positioning capabilities supports controlling OS or Mobile agents, strong key information extraction capabilities and Json format output capabilities, this version is the 72B version, the most powerful version in this series.","name":"Qwen2.5 VL 72B Instruct","url":"https://sup.ai/models/alibaba-qwen2.5-vl-72b-instruct"},"position":327},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen2.5-vl-7b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"2,048"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The open-source series model Qwen2.5-VL, with enhanced instruction following, mathematics, problem-solving, and coding capabilities, has improved its ability to recognize all things, supports diverse formats for direct and precise positioning of visual elements, supports understanding of videos up to one hour long and second-level event moment positioning, can understand time sequence and speed, based on parsing and positioning capabilities supports controlling OS or Mobile Agents, has strong key information extraction capabilities and Json format output capabilities, this version is the 7B version, with relatively balanced computational volume and performance.","name":"Qwen2.5 VL 7B Instruct","url":"https://sup.ai/models/alibaba-qwen2.5-vl-7b-instruct"},"position":328},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-0.6b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3 hybrid reasoning model supports seamless switching between thinking and non-thinking modes during conversations. It outperforms the Qwen2.5 small-scale series in general capabilities.","name":"Qwen3 0.6B","url":"https://sup.ai/models/alibaba-qwen3-0.6b"},"position":329},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-1.7b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3 hybrid reasoning model supports seamless switching between thinking and non-thinking modes during conversations. It outperforms the Qwen2.5 small-scale series in general capabilities, with stronger human preference alignment and notable gains in creative writing, role-playing, multi-turn dialogue, and instruction following-resulting in a significantly improved user experience.","name":"Qwen3 1.7B","url":"https://sup.ai/models/alibaba-qwen3-1.7b"},"position":330},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-14b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3 hybrid reasoning model enables seamless switching between thinking and non-thinking modes during conversations. It achieves SOTA reasoning performance at its scale and significantly outperforms Qwen2.5-14B in general capabilities.","name":"Qwen3 14B","url":"https://sup.ai/models/alibaba-qwen3-14b"},"position":331},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen-3-235b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"40,960"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"A powerful general-purpose model perfect for complex reasoning tasks, instruction following, and multilingual projects. With 235 billion parameters, this model excels at understanding nuanced instructions and maintaining context across long conversations. Ideal for research, creative writing, and sophisticated problem-solving where you need reliable, well-reasoned responses.","name":"Qwen3 235B","url":"https://sup.ai/models/alibaba-qwen-3-235b"},"position":332},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-235b-a22b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3 hybrid reasoning model enables seamless switching between thinking and non-thinking modes during conversations. It delivers strong reasoning performance with fewer parameters, comparable to QwQ, and significantly outperforms Qwen2.5-72B-Instruct in general capabilities, achieving state-of-the-art (SOTA) results for its scale.","name":"Qwen3 235B A22B","url":"https://sup.ai/models/alibaba-qwen3-235b-a22b"},"position":333},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-235b-a22b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Compared to its predecessor (Qwen3-235B-A22B), the latest open-source Qwen3 model (non-thinking mode) delivers modest improvements in creative performance and model safety.","name":"Qwen3 235B A22B Instruct","url":"https://sup.ai/models/alibaba-qwen3-235b-a22b-instruct"},"position":334},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-235b-a22b-instruct-2507","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Compared to its predecessor (Qwen3-235B-A22B), the latest open-source Qwen3 model (non-thinking mode) delivers modest improvements in creative performance and model safety.","name":"Qwen3 235B A22B Instruct 2507","url":"https://sup.ai/models/alibaba-qwen3-235b-a22b-instruct-2507"},"position":335},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-235b-a22b-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"81,920"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Built upon the Qwen3 framework, this open-source reasoning model offers substantial improvements over its predecessor (Qwen3-235B-A22B) in logic, general capabilities, knowledge, and creativity, making it ideal for highly complex, reasoning-intensive scenarios.","name":"Qwen3 235B A22B Thinking","url":"https://sup.ai/models/alibaba-qwen3-235b-a22b-thinking"},"position":336},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-235b-a22b-thinking-2507","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"81,920"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Built upon the Qwen3 framework, this open-source reasoning model offers substantial improvements over its predecessor (Qwen3-235B-A22B) in logic, general capabilities, knowledge, and creativity, making it ideal for highly complex, reasoning-intensive scenarios.","name":"Qwen3 235B A22B Thinking 2507","url":"https://sup.ai/models/alibaba-qwen3-235b-a22b-thinking-2507"},"position":337},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-30b-a3b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3 hybrid reasoning model enables seamless switching between thinking and non-thinking modes during conversations. It delivers strong reasoning performance with fewer parameters, comparable to QwQ-32B, and significantly outperforms Qwen2.5-14B in general capabilities, achieving state-of-the-art (SOTA) results for its scale.","name":"Qwen3 30B A3B","url":"https://sup.ai/models/alibaba-qwen3-30b-a3b"},"position":338},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-30b-a3b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3 open-source non-thinking model. As an advanced successor to Qwen3-30B-A3B, this model delivers substantial improvements in overall general capabilities across Chinese, English, and multiple languages. Furthermore, it has been specifically optimized for subjective, open-ended tasks, providing responses that better align with user preferences and offer significantly greater helpfulness.","name":"Qwen3 30B A3B Instruct","url":"https://sup.ai/models/alibaba-qwen3-30b-a3b-instruct"},"position":339},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-30b-a3b-instruct-2507","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3 open-source non-thinking model. As an advanced successor to Qwen3-30B-A3B, this model delivers substantial improvements in overall general capabilities across Chinese, English, and multiple languages. Furthermore, it has been specifically optimized for subjective, open-ended tasks, providing responses that better align with user preferences and offer significantly greater helpfulness.","name":"Qwen3 30B A3B Instruct 2507","url":"https://sup.ai/models/alibaba-qwen3-30b-a3b-instruct-2507"},"position":340},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-30b-a3b-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"81,920"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3 Open-Source Reasoning Model. As an advanced successor to Qwen3-30B-A3B, this model features superior complex reasoning, excelling in challenging tasks such as logic, mathematics, science, and coding. Additionally, it demonstrates significant improvements in core capabilities, including instruction following, text comprehension, and multilingual translation.","name":"Qwen3 30B A3B Thinking","url":"https://sup.ai/models/alibaba-qwen3-30b-a3b-thinking"},"position":341},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-30b-a3b-thinking-2507","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"81,920"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3 Open-Source Reasoning Model. As an advanced successor to Qwen3-30B-A3B, this model features superior complex reasoning, excelling in challenging tasks such as logic, mathematics, science, and coding. Additionally, it demonstrates significant improvements in core capabilities, including instruction following, text comprehension, and multilingual translation.","name":"Qwen3 30B A3B Thinking 2507","url":"https://sup.ai/models/alibaba-qwen3-30b-a3b-thinking-2507"},"position":342},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-32b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3 hybrid reasoning model enables seamless switching between thinking and non-thinking modes during conversations. It delivers strong reasoning performance with fewer parameters, comparable to QwQ, and significantly outperforms Qwen2.5-32B-Instruct in general capabilities, achieving state-of-the-art (SOTA) results for its scale.","name":"Qwen3 32B","url":"https://sup.ai/models/alibaba-qwen3-32b"},"position":343},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-4b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"This qwen3 hybrid reasoning model enables seamless switching between thinking and non-thinking modes during conversations, achieving SOTA reasoning performance at its scale. It shows significant improvements in human preference alignment, creative writing, role-playing, multi-turn dialogue, and instruction following, delivering a greatly enhanced user experience.","name":"Qwen3 4B","url":"https://sup.ai/models/alibaba-qwen3-4b"},"position":344},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-8b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,192"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3 hybrid reasoning model enables seamless switching between thinking and non-thinking modes during conversations. It achieves SOTA reasoning performance at its scale and significantly outperforms Qwen2.5-7B in general capabilities.","name":"Qwen3 8B","url":"https://sup.ai/models/alibaba-qwen3-8b"},"position":345},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-coder-30b-a3b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"160,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Your budget-friendly coding companion designed for everyday development work. This model specializes in code generation, debugging, and refactoring with strong tool integration. With a massive 262K context window, it can handle entire codebases and long documentation. Perfect for daily coding tasks where you need fast, reliable assistance without breaking the bank.","name":"Qwen3 Coder 30B A3B","url":"https://sup.ai/models/alibaba-qwen3-coder-30b-a3b"},"position":346},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-coder-30b-a3b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3-based code generation model, inheriting the coding agent capabilities of Qwen3-Coder-480B-A35B-Instruct, achieves State-of-the-Art (SOTA) coding performance among models of comparable size.","name":"Qwen3 Coder 30B A3B Instruct","url":"https://sup.ai/models/alibaba-qwen3-coder-30b-a3b-instruct"},"position":347},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-coder-480b-a35b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts). Pricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.","name":"Qwen3 Coder 480B A35B","url":"https://sup.ai/models/alibaba-qwen3-coder-480b-a35b"},"position":348},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-coder-480b-a35b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Powered by Qwen3, this code generation model is a powerful Coding Agent, achieving state-of-the-art (SOTA) performance among open-source models.","name":"Qwen3 Coder 480B A35B Instruct","url":"https://sup.ai/models/alibaba-qwen3-coder-480b-a35b-instruct"},"position":349},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-coder-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Based on Qwen3, this code generation model inherits the coding agent capabilities of Qwen3-Coder-Plus and supports multi-turn tool interaction. It features focused optimizations on repository-level understanding and enhanced tool-calling stability. This version is a snapshot dated July 28, 2025.","name":"Qwen3 Coder Flash","url":"https://sup.ai/models/alibaba-qwen3-coder-flash"},"position":350},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-coder-next","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3-Coder-Next is an open-weight causal language model optimized for coding agents and local development workflows. It uses a sparse MoE design with 80B total parameters and only 3B activated per token, delivering performance comparable to models with 10 to 20x higher active compute, which makes it well suited for cost-sensitive, always-on agent deployment. The model is trained with a strong agentic focus and performs reliably on long-horizon coding tasks, complex tool usage, and recovery from execution failures. With a native 256k context window, it integrates cleanly into real-world CLI and IDE environments and adapts well to common agent scaffolds used by modern coding tools. The model operates exclusively in non-thinking mode and does not emit blocks, simplifying integration for production coding agents.","name":"Qwen3 Coder Next","url":"https://sup.ai/models/alibaba-qwen3-coder-next"},"position":351},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-coder-plus","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3-based code generation model with strong coding agent power, excels at tool calling and environment interaction, capable of autonomous programming with outstanding code capability while maintaining general ability. This is a snapshot from 23 September , 2025.Compared to the previous version (snapshot from July 22), it demonstrates improved robustness in downstream task performance and tool invocation, along with enhanced code security.","name":"Qwen3 Coder Plus","url":"https://sup.ai/models/alibaba-qwen3-coder-plus"},"position":352},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-max","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Compared with the snapshot as of September 23, 2025, the Qwen-3 series Max model in this release achieves an effective integration of thinking and non-thinking modes, resulting in a comprehensive and substantial improvement in the model's overall performance. In thinking mode, the model simultaneously supports web search, web information extraction, and a code interpreter tool, enabling it to tackle more complex and challenging problems with greater accuracy by leveraging external tools while engaging in slow, deliberative reasoning. This version is based on a snapshot taken on January 23, 2026.","name":"Qwen3 Max","url":"https://sup.ai/models/alibaba-qwen3-max"},"position":353},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-next-80b-a3b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"A new generation of open-source, non-thinking mode model powered by Qwen3. This version demonstrates superior Chinese text understanding, augmented logical reasoning, and enhanced capabilities in text generation tasks over the previous iteration (Qwen3-235B-A22B-Instruct-2507).","name":"Qwen3 Next 80B A3B Instruct","url":"https://sup.ai/models/alibaba-qwen3-next-80b-a3b-instruct"},"position":354},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-next-80b-a3b-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"A new generation of Qwen3-based open-source thinking mode models. This version offers improved instruction following and streamlined summary responses over the previous iteration (Qwen3-235B-A22B-Thinking-2507).","name":"Qwen3 Next 80B A3B Thinking","url":"https://sup.ai/models/alibaba-qwen3-next-80b-a3b-thinking"},"position":355},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-vl-235b-a22b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3 series VL models has been comprehensively upgraded in areas such as visual coding and spatial perception. Its visual perception and recognition capabilities have significantly improved, supporting the understanding of ultra-long videos, and its OCR functionality has undergone a major enhancement.","name":"Qwen3 VL 235B A22B Instruct","url":"https://sup.ai/models/alibaba-qwen3-vl-235b-a22b-instruct"},"position":356},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-vl-235b-a22b-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3 series VL models feature significantly enhanced multimodal reasoning capabilities, with a particular focus on optimizing the model for STEM and mathematical reasoning. Visual perception and recognition abilities have been comprehensively improved, and OCR capabilities have undergone a major upgrade.","name":"Qwen3 VL 235B A22B Thinking","url":"https://sup.ai/models/alibaba-qwen3-vl-235b-a22b-thinking"},"position":357},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-vl-30b-a3b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3-VL's second-largest MoE model delivers fast responses and supports ultra-long contexts (e.g., long videos and documents). It enhances image/video understanding, spatial perception, and object recognition, and includes 2D/3D visual localization to handle complex real-world tasks.","name":"Qwen3 VL 30B A3B Instruct","url":"https://sup.ai/models/alibaba-qwen3-vl-30b-a3b-instruct"},"position":358},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-vl-30b-a3b-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The \"Thinking\" edition of Qwen3-VL's second-largest MoE model offers fast response, enhanced multimodal understanding and reasoning, visual agent capabilities, and ultra-long context support (e.g., long videos and documents). It improves image/video comprehension, spatial perception, and object recognition to handle complex real-world tasks.","name":"Qwen3 VL 30B A3B Thinking","url":"https://sup.ai/models/alibaba-qwen3-vl-30b-a3b-thinking"},"position":359},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-vl-32b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The largest dense model in the Qwen3-VL series, in its non-inference version, delivers overall performance second only to Qwen3-VL-235B-Instruct. It excels in document recognition and comprehension, demonstrates strong spatial awareness and object identification capabilities, and achieves state-of-the-art performance in 2D visual detection and spatial reasoning. It is well-suited for complex perception tasks across a wide range of general-purpose scenarios.","name":"Qwen3 VL 32B Instruct","url":"https://sup.ai/models/alibaba-qwen3-vl-32b-instruct"},"position":360},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-vl-32b-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The largest dense model in the Qwen3-VL series, its reasoning version boasts multimodal reasoning capabilities second only to Qwen3-VL-235B-Thinking. It excels in STEM and math problem-solving, general image and video understanding, and achieves state-of-the-art performance in multimodal agent capabilities, making it ideal for complex multimodal reasoning tasks.","name":"Qwen3 VL 32B Thinking","url":"https://sup.ai/models/alibaba-qwen3-vl-32b-thinking"},"position":361},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-vl-8b-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3-VL 8B Dense model has a reduced memory footprint and delivers comprehensive improvements in image/video understanding, ultra-long context support (e.g., long videos and documents), spatial perception, and object recognition, enabling it to handle complex real-world tasks.","name":"Qwen3 VL 8B Instruct","url":"https://sup.ai/models/alibaba-qwen3-vl-8b-instruct"},"position":362},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-vl-8b-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The \"Thinking\" edition of Qwen3-VL 8B Dense has a reduced memory footprint, enabling multimodal understanding and reasoning. It supports ultra-long contexts (e.g., long videos and documents), 2D/3D visual localization, and enhances image/video comprehension, spatial perception, and object recognition.","name":"Qwen3 VL 8B Thinking","url":"https://sup.ai/models/alibaba-qwen3-vl-8b-thinking"},"position":363},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-vl-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3 series of small-sized visual understanding models effectively integrates thinking and non-thinking modes. Compared with the snapshot taken on October 15, 2025, the overall performance of the model has improved significantly: it delivers enhanced capabilities in general visual recognition and reasoning, and shows marked improvements in recognition accuracy across various business scenarios such as security, in-store inspections, equipment monitoring, and photo-based problem solving. This version is a snapshot as of January 22, 2026.","name":"Qwen3 VL Flash","url":"https://sup.ai/models/alibaba-qwen3-vl-flash"},"position":364},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-vl-plus","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3 series of visual understanding models effectively integrates thinking and non-thinking modes. Compared to the snapshot released on September 23, this version delivers superior performance in reasoning and analysis tasks as well as style control, while also offering lower latency and faster response speeds. This version is based on a snapshot taken on December 19, 2025.","name":"Qwen3 VL Plus","url":"https://sup.ai/models/alibaba-qwen3-vl-plus"},"position":365},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3-vl-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Our premier vision-language model combining 235B parameters with exceptional visual understanding and reasoning. This model excels at analyzing images, charts, diagrams, and documents with a special focus on STEM and mathematical content. With dramatically improved OCR capabilities and visual perception, it handles everything from handwritten equations to complex technical diagrams. Perfect for research, education, and any task requiring sophisticated visual analysis.","name":"Qwen3 VL Thinking","url":"https://sup.ai/models/alibaba-qwen3-vl-thinking"},"position":366},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3.5-122b-a10b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3.5 122B-A10B native vision-language model is built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. In terms of overall performance, this model is second only to Qwen3.5-397B-A17B. Its text capabilities significantly outperform those of Qwen3-235B-2507, and its visual capabilities surpass those of Qwen3-VL-235B.","name":"Qwen3.5 122B A10B","url":"https://sup.ai/models/alibaba-qwen3.5-122b-a10b"},"position":367},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3.5-27b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3.5 27B native vision-language Dense model incorporates a linear attention mechanism, delivering fast response times while balancing inference speed and performance. Its overall capabilities are comparable to those of the Qwen3.5-122B-A10B.","name":"Qwen3.5 27B","url":"https://sup.ai/models/alibaba-qwen3.5-27b"},"position":368},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3.5-35b-a3b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3.5 Series 35B-A3B is a native vision-language model designed with a hybrid architecture that integrates linear attention mechanisms and a sparse mixture-of-experts model, achieving higher inference efficiency. Its overall performance is comparable to that of the Qwen3.5-27B.","name":"Qwen3.5 35B A3B","url":"https://sup.ai/models/alibaba-qwen3.5-35b-a3b"},"position":369},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3.5-397b-a17b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3.5 series 397B-A17B native vision-language model is built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. It delivers state-of-the-art performance comparable to leading-edge models across a wide range of tasks, including language understanding, logical reasoning, code generation, agent-based tasks, image understanding, video understanding, and graphical user interface (GUI) interactions. With its robust code-generation and agent capabilities, the model exhibits strong generalization across diverse agent scenarios.","name":"Qwen3.5 397B A17B","url":"https://sup.ai/models/alibaba-qwen3.5-397b-a17b"},"position":370},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3.5-9b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Qwen3.5 9B is a multimodal foundation model from the Qwen3.5 family, designed to deliver strong reasoning, coding, and visual understanding in an efficient 9B-parameter architecture. It uses a unified vision-language design with early fusion of multimodal tokens, allowing the model to process and reason across text and images within the same context.","name":"Qwen3.5 9B","url":"https://sup.ai/models/alibaba-qwen3.5-9b"},"position":371},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3.5-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3.5 native vision-language Flash models are built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. Compared to the 3 series, these models deliver a leap forward in performance for both pure text and multimodal tasks, offering fast response times while balancing inference speed and overall performance.","name":"Qwen3.5 Flash","url":"https://sup.ai/models/alibaba-qwen3.5-flash"},"position":372},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3.5-plus","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3.5 native vision-language series Plus models are built on a hybrid architecture that integrates linear attention mechanisms with sparse mixture-of-experts models, achieving higher inference efficiency. In a variety of task evaluations, the 3.5 series consistently demonstrates performance on par with state-of-the-art leading models. Compared to the 3 series, these models show a leap forward in both pure-text and multimodal capabilities.","name":"Qwen3.5 Plus","url":"https://sup.ai/models/alibaba-qwen3.5-plus"},"position":373},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3.6-27b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3.6 27B native vision-language dense model builds upon the 3.5-27B version, with key improvements in agentic coding capabilities and enhanced STEM reasoning and inference skills. In the vision modality, it demonstrates significant advances in spatial intelligence, object localization, and detection, while video understanding, document OCR, and visual agent capabilities continue to improve steadily.","name":"Qwen3.6 27B","url":"https://sup.ai/models/alibaba-qwen3.6-27b"},"position":374},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3.6-35b-a3b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3.6 35B-A3B native vision-language model is built on a hybrid architecture that integrates linear attention mechanisms with a sparse mixture-of-experts framework, achieving higher inference efficiency. Compared with the 3.5-35B-A3B, this model demonstrates significantly improved agentic coding capabilities, mathematical and code reasoning abilities, spatial intelligence, as well as object localization and object detection performance.","name":"Qwen3.6 35B A3B","url":"https://sup.ai/models/alibaba-qwen3.6-35b-a3b"},"position":375},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3.6-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3.6 native vision-language Flash model series delivers a significant performance boost over the 3.5-Flash version. This model particularly excels in agentic coding capabilities, substantially outperforming its predecessor on multiple code-agent benchmarks, as well as in mathematical and code reasoning. In terms of vision, it features markedly improved spatial intelligence, with especially notable enhancements in object localization and object detection.","name":"Qwen3.6 Flash","url":"https://sup.ai/models/alibaba-qwen3.6-flash"},"position":376},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3.6-max","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Max model, the largest and most capable variant in the Qwen3.6 series, is now available in a preview version. At present, only its plain-text capabilities are open for experimentation. Compared with the previously released Qwen3-Max and Qwen3.6-Plus, this model features enhanced vibe coding abilities, more efficient coding agent execution, and significantly improved front-end development skills. Additionally, its long-tail knowledge retention has been further upgraded.","name":"Qwen3.6 Max","url":"https://sup.ai/models/alibaba-qwen3.6-max"},"position":377},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwen3.6-plus","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"1M"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"The Qwen3.6 native vision-language Plus series models demonstrate exceptional performance on par with the current state-of-the-art models, with a significant improvement in overall results compared to the 3.5 series. The models have been markedly enhanced in code-related capabilities such as agentic coding, front-end programming, and Vibe coding, as well as in multi-modal general object recognition, OCR, and object localization.","name":"Qwen3.6 Plus","url":"https://sup.ai/models/alibaba-qwen3.6-plus"},"position":378},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-qwq-32b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.","name":"QwQ 32B","url":"https://sup.ai/models/alibaba-qwq-32b"},"position":379},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/deepseek-deepseek-r1-distill-llama-70b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"DeepSeek"},"category":"AI Model","description":"DeepSeek R1 Distill Llama 70B is a distilled large language model based on Llama-3.3-70B-Instruct, using outputs from DeepSeek R1. The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including AIME 2024 pass@1: 70.0, MATH-500 pass@1: 94.5, and CodeForces Rating: 1633. The model leverages fine-tuning from DeepSeek R1 outputs, enabling competitive performance comparable to larger frontier models.","name":"R1 Distill Llama 70B","url":"https://sup.ai/models/deepseek-deepseek-r1-distill-llama-70b"},"position":380},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/deepseek-deepseek-r1-distill-qwen-32b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"DeepSeek"},"category":"AI Model","description":"DeepSeek R1 Distill Qwen 32B is a distilled large language model based on Qwen 2.5 32B, using outputs from DeepSeek R1. It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models. Benchmark results include AIME 2024 pass@1: 72.6, MATH-500 pass@1: 94.3, and CodeForces Rating: 1691. The model leverages fine-tuning from DeepSeek R1 outputs, enabling competitive performance comparable to larger frontier models.","name":"R1 Distill Qwen 32B","url":"https://sup.ai/models/deepseek-deepseek-r1-distill-qwen-32b"},"position":381},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/tngtech-tng-r1t-chimera","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"163,840"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"TNG Technology"},"category":"AI Model","description":"TNG-R1T-Chimera is an experimental LLM with a faible for creative storytelling and character interaction. It is a derivate of the original TNG/DeepSeek-R1T-Chimera released in April 2025. Characteristics and improvements include: creative and pleasant personality, preliminary EQ-Bench3 value of about 1305, quite a bit more intelligent than the original albeit slightly slower, much more think-token consistent with properly delineated reasoning and answer blocks, and much improved tool calling.","name":"R1T Chimera","url":"https://sup.ai/models/tngtech-tng-r1t-chimera"},"position":382},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/reka-reka-edge-2603","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"16,384"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Reka"},"category":"AI Model","description":"Reka Edge is an extremely efficient 7B multimodal vision-language model that accepts image/video+text inputs and generates text outputs. This model is optimized specifically to deliver industry-leading performance in image understanding, video analysis, object detection, and agentic tool-use.","name":"Reka Edge","url":"https://sup.ai/models/reka-reka-edge-2603"},"position":383},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/reka-reka-flash-3","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"65,536"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,536"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Reka"},"category":"AI Model","description":"Reka Flash 3 is a general-purpose, instruction-tuned large language model with 21 billion parameters, developed by Reka. It excels at general chat, coding tasks, instruction-following, and function calling. Featuring a 64K context length and optimized through reinforcement learning (RLOO), it provides competitive performance comparable to proprietary models within a smaller parameter footprint. Ideal for low-latency, local, or on-device deployments, Reka Flash 3 is compact, supports efficient quantization (down to 11GB at 4-bit precision), and employs explicit reasoning tags to indicate its internal thought process. Reka Flash 3 is primarily an English model with limited multilingual understanding capabilities. The model weights are released under the Apache 2.0 license.","name":"Reka Flash 3","url":"https://sup.ai/models/reka-reka-flash-3"},"position":384},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/relace-relace-apply-3","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Relace"},"category":"AI Model","description":"Relace Apply 3 is a specialized code-patching LLM that merges AI-suggested edits straight into your source files. It can apply updates from GPT-4o, Claude, and others into your files at 10,000 tokens/sec on average. The model requires the prompt to be in the following format: {instruction} {initial_code} {edit_snippet} Zero Data Retention is enabled for Relace.","name":"Relace Apply 3","url":"https://sup.ai/models/relace-relace-apply-3"},"position":385},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/relace-relace-search","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Relace"},"category":"AI Model","description":"The relace-search model uses 4-12 `view_file` and `grep` tools in parallel to explore a codebase and return relevant files to the user request. In contrast to RAG, relace-search performs agentic multi-step reasoning to produce highly precise results 4x faster than any frontier model. It's designed to serve as a subagent that passes its findings to an \"oracle\" coding agent, who orchestrates/performs the rest of the coding task. To use relace-search you need to build an appropriate agent harness, and parse the response for relevant information to hand off to the oracle.","name":"Relace Search","url":"https://sup.ai/models/relace-relace-search"},"position":386},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/undi95-remm-slerp-l2-13b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"6,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"4,096"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Undi95"},"category":"AI Model","description":"A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge","name":"ReMM SLERP 13B","url":"https://sup.ai/models/undi95-remm-slerp-l2-13b"},"position":387},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/essentialai-rnj-1-instruct","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Essential AI"},"category":"AI Model","description":"Rnj-1 is an 8B-parameter, dense, open-weight model family developed by Essential AI and trained from scratch with a focus on programming, math, and scientific reasoning. The model demonstrates strong performance across multiple programming languages, tool-use workflows, and agentic execution environments (e.g., mini-SWE-agent). ","name":"Rnj 1 Instruct","url":"https://sup.ai/models/essentialai-rnj-1-instruct"},"position":388},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/thedrummer-rocinante-12b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"TheDrummer"},"category":"AI Model","description":"Rocinante 12B is designed for engaging storytelling and rich prose. Early testers have reported expanded vocabulary with unique and expressive word choices, enhanced creativity for vivid narratives, and adventure-filled and captivating stories.","name":"Rocinante 12B","url":"https://sup.ai/models/thedrummer-rocinante-12b"},"position":389},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/bytedance-seed-1.6","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"ByteDance"},"category":"AI Model","description":"Seed 1.6 is a general-purpose model released by the ByteDance Seed team. It incorporates multimodal capabilities and adaptive deep thinking with a 256K context window.","name":"Seed 1.6","url":"https://sup.ai/models/bytedance-seed-1.6"},"position":390},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/bytedance-seed-1.6-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"ByteDance"},"category":"AI Model","description":"Seed 1.6 Flash is an ultra-fast multimodal deep thinking model by ByteDance Seed, supporting both text and visual understanding. It features a 256k context window and can generate outputs of up to 16k tokens.","name":"Seed 1.6 Flash","url":"https://sup.ai/models/bytedance-seed-1.6-flash"},"position":391},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/bytedance-seed-2.0-lite","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"ByteDance"},"category":"AI Model","description":"Seed 2.0 Lite is a versatile, cost-efficient enterprise workhorse that delivers strong multimodal and agent capabilities while offering noticeably lower latency, making it a practical default choice for most production workloads across text, vision, and tools. Engineered for high-frequency visual understanding and agentic workflows, it's an ideal choice for deployment at scale with minimal latency.","name":"Seed 2.0 Lite","url":"https://sup.ai/models/bytedance-seed-2.0-lite"},"position":392},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/bytedance-seed-2.0-mini","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"262,144"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"ByteDance"},"category":"AI Model","description":"Seed-2.0-mini targets latency-sensitive, high-concurrency, and cost-sensitive scenarios, emphasizing fast response and flexible inference deployment. It delivers performance comparable to ByteDance-Seed-1.6, supports 256k context, four reasoning effort modes (minimal/low/medium/high), multimodal understanding, and is optimized for lightweight tasks where cost and speed take priority.","name":"Seed 2.0 Mini","url":"https://sup.ai/models/bytedance-seed-2.0-mini"},"position":393},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/thedrummer-skyfall-36b-v2","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"TheDrummer"},"category":"AI Model","description":"Skyfall 36B v2 is an enhanced iteration of Mistral Small 2501, specifically fine-tuned for improved creativity, nuanced writing, role-playing, and coherent storytelling.","name":"Skyfall 36B V2","url":"https://sup.ai/models/thedrummer-skyfall-36b-v2"},"position":394},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/upstage-solar-pro-3","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,384"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Upstage"},"category":"AI Model","description":"Solar Pro 3 is Upstage's powerful Mixture-of-Experts (MoE) language model. With 102B total parameters and 12B active parameters per forward pass, it delivers exceptional performance while maintaining computational efficiency. Optimized for Korean with English and Japanese support.","name":"Solar Pro 3","url":"https://sup.ai/models/upstage-solar-pro-3"},"position":395},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/perplexity-sonar","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"127,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Perplexity"},"category":"AI Model","description":"Sonar is lightweight, affordable, fast, and simple to use - now featuring citations and the ability to customize sources. It is designed for companies seeking to integrate lightweight question-and-answer features optimized for speed.","name":"Sonar","url":"https://sup.ai/models/perplexity-sonar"},"position":396},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/perplexity-sonar-deep-research","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Perplexity"},"category":"AI Model","description":"Sonar Deep Research is a research-focused model designed for multi-step retrieval, synthesis, and reasoning across complex topics. It autonomously searches, reads, and evaluates sources, refining its approach as it gathers information. This enables comprehensive report generation across domains like finance, technology, health, and current events. Notes on Pricing: Input tokens comprise of Prompt tokens (user prompt) + Citation tokens (these are processed tokens from running searches). Deep Research runs multiple searches to conduct exhaustive research. Searches are priced at $5/1000 searches. A request that does 30 searches will cost $0.15 in this step. Reasoning is a distinct step in Deep Research since it does extensive automated reasoning through all the material it gathers during its research phase. Reasoning tokens here are a bit different than the CoTs in the answer - these are tokens that we use to reason through the research material prior to generating the outputs via the CoTs. Reasoning tokens are priced at $3/1M tokens.","name":"Sonar Deep Research","url":"https://sup.ai/models/perplexity-sonar-deep-research"},"position":397},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/perplexity-sonar-pro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Perplexity"},"category":"AI Model","description":"Sonar Pro pricing includes Perplexity search pricing. For enterprises seeking more advanced capabilities, the Sonar Pro API can handle in-depth, multi-step queries with added extensibility, like double the number of citations per search as Sonar on average. Plus, with a larger context window, it can handle longer and more nuanced searches and follow-up questions.","name":"Sonar Pro","url":"https://sup.ai/models/perplexity-sonar-pro"},"position":398},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/perplexity-sonar-pro-search","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"200,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Perplexity"},"category":"AI Model","description":"Sonar Pro Search is Perplexity's most advanced agentic search system. It is designed for deeper reasoning and analysis. Pricing is based on tokens plus $18 per thousand requests. This model powers the Pro Search mode on the Perplexity platform. Sonar Pro Search adds autonomous, multi-step reasoning to Sonar Pro. So, instead of just one query + synthesis, it plans and executes entire research workflows using tools.","name":"Sonar Pro Search","url":"https://sup.ai/models/perplexity-sonar-pro-search"},"position":399},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/perplexity-sonar-reasoning-pro","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"128,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Perplexity"},"category":"AI Model","description":"Sonar Pro pricing includes Perplexity search pricing. Sonar Reasoning Pro is a premier reasoning model powered by DeepSeek R1 with Chain of Thought (CoT). Designed for advanced use cases, it supports in-depth, multi-step queries with a larger context window and can surface more citations per search, enabling more comprehensive and extensible responses.","name":"Sonar Reasoning Pro","url":"https://sup.ai/models/perplexity-sonar-reasoning-pro"},"position":400},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/raifle-sorcererlm-8x22b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"16,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"16,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Raifle"},"category":"AI Model","description":"SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on WizardLM-2 8x22B. Features advanced reasoning and emotional intelligence for engaging and immersive interactions, vivid writing capabilities enriched with spatial and contextual awareness, and enhanced narrative depth promoting creative and dynamic storytelling.","name":"SorcererLM 8x22B","url":"https://sup.ai/models/raifle-sorcererlm-8x22b"},"position":401},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/arcee-spotlight","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"65,537"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Arcee"},"category":"AI Model","description":"Spotlight is a 7-billion-parameter vision-language model derived from Qwen 2.5-VL and fine-tuned by Arcee AI for tight image-text grounding tasks. It offers a 32 k-token context window, enabling rich multimodal conversations that combine lengthy documents with one or more images. Training emphasized fast inference on consumer GPUs while retaining strong captioning, visual‐question-answering, and diagram-analysis accuracy. As a result, Spotlight slots neatly into agent workflows where screenshots, charts or UI mock-ups need to be interpreted on the fly. Early benchmarks show it matching or out-scoring larger VLMs such as LLaVA-1.6 13 B on popular VQA and POPE alignment tests. ","name":"Spotlight","url":"https://sup.ai/models/arcee-spotlight"},"position":402},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/stepfun-step-3.5-flash","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"256,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"256,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"StepFun"},"category":"AI Model","description":"Step 3.5 Flash is StepFun's most capable open-source foundation model. Built on a sparse Mixture of Experts (MoE) architecture, it selectively activates only 11B of its 196B parameters per token. It is a reasoning model that is incredibly speed efficient even at long contexts.","name":"Step 3.5 Flash","url":"https://sup.ai/models/stepfun-step-3.5-flash"},"position":403},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/alibaba-tongyi-deepresearch-30b-a3b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Alibaba"},"category":"AI Model","description":"Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token. It's optimized for long-horizon, deep information-seeking tasks and delivers state-of-the-art performance on benchmarks like Humanity's Last Exam, BrowserComp, BrowserComp-ZH, WebWalkerQA, GAIA, xbench-DeepSearch, and FRAMES. This makes it superior for complex agentic search, reasoning, and multi-step problem-solving compared to prior models. The model includes a fully automated synthetic data pipeline for scalable pre-training, fine-tuning, and reinforcement learning. It uses large-scale continual pre-training on diverse agentic data to boost reasoning and stay fresh. It also features end-to-end on-policy RL with a customized Group Relative Policy Optimization, including token-level gradients and negative sample filtering for stable training. The model supports ReAct for core ability checks and an IterResearch-based 'Heavy' mode for max performance through test-time scaling. It's ideal for advanced research agents, tool use, and heavy inference workflows.","name":"Tongyi DeepResearch 30B A3B","url":"https://sup.ai/models/alibaba-tongyi-deepresearch-30b-a3b"},"position":404},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/arcee-trinity-large","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Arcee"},"category":"AI Model","description":"Trinity Large (Preview) is a 400B-parameter (13B active) sparse mixture-of-experts language model, engineered to scale model capacity while maintaining inference efficiency over long contexts. It delivers strong performance in reasoning-heavy workloads including math, coding-related tasks, and multi-step agent workflows. With a 131K context window and native function calling, it excels at complex tasks requiring deep understanding and structured outputs.","name":"Trinity Large","url":"https://sup.ai/models/arcee-trinity-large"},"position":405},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/arcee-trinity-large-thinking","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Arcee"},"category":"AI Model","description":"Trinity-Large-Thinking is a reasoning-optimized variant of Arcee AI's Trinity-Large family, a 398B-parameter sparse Mixture-of-Experts model with approximately 13B active parameters per token. Built on Trinity-Large-Base and post-trained with extended chain-of-thought reasoning and agentic RL, it delivers state-of-the-art performance on agentic benchmarks. The model generates explicit reasoning traces wrapped in think blocks before producing its final response. Purpose-built for tool calling, multi-step planning, and agent workflows.","name":"Trinity Large Thinking","url":"https://sup.ai/models/arcee-trinity-large-thinking"},"position":406},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/arcee-trinity-mini","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"131,072"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Arcee"},"category":"AI Model","description":"Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model, engineered for efficient inference over long contexts with robust function calling and multi-step agent workflows. With 128K context, it delivers an outstanding price-to-performance ratio while maintaining coherent multi-turn reasoning and reliable tool use. Ideal for production deployments where speed and cost efficiency are paramount.","name":"Trinity Mini","url":"https://sup.ai/models/arcee-trinity-mini"},"position":407},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/bytedance-ui-tars-1.5-7b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"128,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"2,048"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"ByteDance"},"category":"AI Model","description":"UI-TARS-1.5 is a multimodal vision-language agent optimized for GUI-based environments, including desktop interfaces, web browsers, mobile systems, and games. Built by ByteDance, it builds upon the UI-TARS framework with reinforcement learning-based reasoning, enabling robust action planning and execution across virtual interfaces. This model achieves state-of-the-art results on a range of interactive and grounding benchmarks, including OSworld, WebVoyager, AndroidWorld, and ScreenSpot. It also demonstrates perfect task completion across diverse Poki games and outperforms prior models in Minecraft agent tasks. UI-TARS-1.5 supports thought decomposition during inference and shows strong scaling across variants, with the 1.5 version notably exceeding the performance of earlier 72B and 7B checkpoints.","name":"UI TARS 7B","url":"https://sup.ai/models/bytedance-ui-tars-1.5-7b"},"position":408},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/venice-uncensored","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Venice"},"category":"AI Model","description":"Venice Uncensored Dolphin Mistral 24B Venice Edition is a fine-tuned variant of Mistral-Small-24B-Instruct-2501, developed by dphn.ai in collaboration with Venice.ai. This model is designed as an \"uncensored\" instruct-tuned LLM, preserving user control over alignment, system prompts, and behavior. Intended for advanced and unrestricted use cases, Venice Uncensored emphasizes steerability and transparent behavior, removing default safety and alignment layers typically found in mainstream assistant models.","name":"Uncensored","url":"https://sup.ai/models/venice-uncensored"},"position":409},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/thedrummer-unslopnemo-12b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"TheDrummer"},"category":"AI Model","description":"UnslopNemo v4.1 is the latest addition from the creator of Rocinante, designed for adventure writing and role-play scenarios.","name":"UnslopNemo 12B","url":"https://sup.ai/models/thedrummer-unslopnemo-12b"},"position":410},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/arcee-virtuoso-large","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"131,072"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"64,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Arcee"},"category":"AI Model","description":"Virtuoso-Large is Arcee's top-tier general-purpose LLM at 72 B parameters, tuned to tackle cross-domain reasoning, creative writing and enterprise QA. Unlike many 70 B peers, it retains the 128 k context inherited from Qwen 2.5, letting it ingest books, codebases or financial filings wholesale. Training blended DeepSeek R1 distillation, multi-epoch supervised fine-tuning and a final DPO/RLHF alignment stage, yielding strong performance on BIG-Bench-Hard, GSM-8K and long-context Needle-In-Haystack tests. Enterprises use Virtuoso-Large as the \"fallback\" brain in Conductor pipelines when other SLMs flag low confidence. Despite its size, aggressive KV-cache optimizations keep first-token latency in the low-second range on 8x H100 nodes, making it a practical production-grade powerhouse.","name":"Virtuoso Large","url":"https://sup.ai/models/arcee-virtuoso-large"},"position":411},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-voxtral-mini","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"A mini audio understanding model released in July 2025","name":"Voxtral Mini","url":"https://sup.ai/models/mistral-voxtral-mini"},"position":412},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mistral-voxtral-small","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"32,768"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"32,768"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mistral"},"category":"AI Model","description":"A small audio understanding model released in July 2025","name":"Voxtral Small","url":"https://sup.ai/models/mistral-voxtral-small"},"position":413},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/mancer-weaver","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"8,000"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"2,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Mancer"},"category":"AI Model","description":"An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.","name":"Weaver","url":"https://sup.ai/models/mancer-weaver"},"position":414},{"@type":"ListItem","item":{"@id":"https://sup.ai/models/microsoft-wizardlm-2-8x22b","@type":"Product","additionalProperty":[{"@type":"PropertyValue","name":"Context Window","unitText":"tokens","value":"65,535"},{"@type":"PropertyValue","name":"Max Output","unitText":"tokens","value":"8,000"}],"audience":{"@type":"Audience","audienceType":"Developers and Researchers"},"brand":{"@type":"Brand","name":"Microsoft"},"category":"AI Model","description":"WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models. It is an instruct finetune of Mixtral 8x22B. #moe","name":"WizardLM 2 8x22B","url":"https://sup.ai/models/microsoft-wizardlm-2-8x22b"},"position":415}],"name":"AI Models","numberOfItems":415},"name":"AI Models","potentialAction":[{"@type":"ViewAction","target":["https://sup.ai/models"]}],"primaryImageOfPage":{"@type":"ImageObject","contentUrl":"https://sup.ai/_next/static/media/any-512.0wk5y3289itg8.png?dpl=dpl_K2zwS6UMxU5ad1bUMdqquqQt3ofb","height":"512","url":"https://sup.ai/_next/static/media/any-512.0wk5y3289itg8.png?dpl=dpl_K2zwS6UMxU5ad1bUMdqquqQt3ofb","width":"512"},"url":"https://sup.ai/models"}