{"data":[{"type":"model","id":"anthropic/claude-haiku-4.5","display_name":"Anthropic: Claude Haiku 4.5","created_at":"2026-01-21T16:09:13Z","owned_by":"bedrock","canonical_slug":"claude-haiku-4-5-20251001","description":"Anthropic's fastest and most cost-effective model optimized for near-instant responsiveness. Features extended thinking capabilities and excels at coding tasks.","context_length":200000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"claude","instruct_type":null},"pricing":{"prompt":"0.000001","completion":"0.000005","input_cache_read":"0.0000001","input_cache_write_5m":"0.00000125","input_cache_write_1h":"0.000002","web_search":"0.015"},"top_provider":{"context_length":200000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"expiration_date":null,"is_deprecated":false,"aliases":["claude-haiku-4.5","claude-haiku-4-5","claude-haiku-4-5-20251001"]},{"type":"model","id":"anthropic/claude-opus-4.5","display_name":"Anthropic: Claude Opus 4.5","created_at":"2026-01-21T16:09:13Z","owned_by":"bedrock","canonical_slug":"claude-opus-4-5-20251101","description":"Anthropic's most powerful model with extended thinking, agentic workflows, and computer use support. Excels at complex analysis, creative writing, and multi-step reasoning tasks.","context_length":200000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"claude","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.000025","input_cache_read":"0.0000005","input_cache_write_5m":"0.00000625","input_cache_write_1h":"0.00001","web_search":"0.015"},"top_provider":{"context_length":200000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["claude-opus-4.5","claude-opus-4-5","claude-opus-4-5-20251101"]},{"type":"model","id":"anthropic/claude-opus-4.6","display_name":"Anthropic: Claude Opus 4.6","created_at":"2026-02-06T01:43:06Z","owned_by":"bedrock","canonical_slug":"claude-opus-4-6-20260205","description":"Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire workflows rather than single prompts, making it especially effective for large codebases, complex refactors, and multi-step debugging that unfolds over time. The model shows deeper contextual understanding, stronger problem decomposition, and greater reliability on hard engineering tasks than prior generations.Beyond coding, Opus 4.6 excels at sustained knowledge work. It produces near-production-ready documents, plans, and analyses in a single pass, and maintains coherence across very long outputs and extended sessions. This makes it a strong default for tasks that require persistence, judgment, and follow-through, such as technical design, migration planning, and end-to-end project execution.","context_length":1000000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"claude","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.000025","input_cache_read":"0.0000005","input_cache_write_5m":"0.00000625","input_cache_write_1h":"0.00001","web_search":"0.015"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["claude-opus-4.6","claude-opus-4-6","claude-opus-4-6-20260205"]},{"type":"model","id":"anthropic/claude-opus-4.7","display_name":"Anthropic: Claude Opus 4.7","created_at":"2026-04-16T15:27:39Z","owned_by":"bedrock","canonical_slug":"claude-opus-4-7","description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on complex, multi-step tasks and more reliable agentic execution across extended workflows. It is especially effective for asynchronous agent pipelines where tasks unfold over time - large codebases, multi-stage debugging, and end-to-end project orchestration.","context_length":1000000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"claude","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.000025","input_cache_read":"0.0000005","input_cache_write_5m":"0.00000625","input_cache_write_1h":"0.00001","web_search":"0.015"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["claude-opus-4.7","claude-opus-4-7","claude-opus-4-7-20260416"]},{"type":"model","id":"anthropic/claude-opus-4.8","display_name":"Anthropic: Claude Opus 4.8","created_at":"2026-05-28T18:28:46Z","owned_by":"bedrock","canonical_slug":"claude-opus-4-8","description":"Claude Opus 4.8 is Anthropic's most capable generally available model in the Opus family. It supports text, image, and file inputs with text output, with reasoning support and a 1M-token context window. It is suited for highly autonomous agents, long-horizon agentic work, knowledge work, and memory-driven tasks where coherence over extended sessions matters. It is particularly strong on multi-step reasoning, complex coding, and end-to-end project orchestration - large codebases, multi-stage debugging, and long-running asynchronous agent pipelines. Beyond coding, it handles knowledge work such as drafting documents, building presentations, and analyzing data, maintaining quality across very long outputs.","context_length":1000000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"claude","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.000025","input_cache_read":"0.0000005","input_cache_write_5m":"0.00000625","input_cache_write_1h":"0.00001","web_search":"0.015"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["claude-opus-4.8","claude-opus-4-8","claude-opus-4-8-20260528"]},{"type":"model","id":"anthropic/claude-sonnet-4.5","display_name":"Anthropic: Claude Sonnet 4.5","created_at":"2026-01-21T16:09:13Z","owned_by":"bedrock","canonical_slug":"claude-sonnet-4-5-20250929","description":"Anthropic's balanced model with state-of-the-art performance on SWE-bench Verified. Ideal for coding, analysis, and general-purpose tasks with excellent cost-performance ratio.","context_length":1000000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"claude","instruct_type":null},"pricing":{"prompt":"0.000003","completion":"0.000015","input_cache_read":"0.0000003","input_cache_write_5m":"0.00000375","input_cache_write_1h":"0.000006","web_search":"0.015"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["claude-sonnet-4.5","claude-sonnet-4-5","claude-sonnet-4-5-20250929"]},{"type":"model","id":"anthropic/claude-sonnet-4.6","display_name":"Anthropic: Claude Sonnet 4.6","created_at":"2026-02-18T08:36:41Z","owned_by":"bedrock","canonical_slug":"claude-sonnet-4-6-20260217","description":"Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It excels at iterative development, complex codebase navigation, end-to-end project management with memory, polished document creation, and confident computer use for web QA and workflow automation.","context_length":1000000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"claude","instruct_type":null},"pricing":{"prompt":"0.000003","completion":"0.000015","input_cache_read":"0.0000003","input_cache_write_5m":"0.00000375","input_cache_write_1h":"0.000006","web_search":"0.015"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["claude-sonnet-4.6","claude-sonnet-4-6","claude-sonnet-4-6-20260217"]},{"type":"model","id":"bailian/qwen-flash","display_name":"Qwen Flash","created_at":"2026-01-21T16:09:14Z","owned_by":"dashscope","canonical_slug":"qwen-flash","description":"Alibaba Qwen Flash via Dashscope. Ultra-fast inference for latency-sensitive tasks. Lowest cost option in the Qwen family.","context_length":1000000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.000000022","completion":"0.00000022","input_cache_read":"0.0000000043","input_cache_write":"0.000000027","web_search":"0.01"},"top_provider":{"context_length":1000000,"max_completion_tokens":32000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"expiration_date":null,"is_deprecated":false,"aliases":["qwen-flash"]},{"type":"model","id":"bailian/qwen3-coder-flash","display_name":"Qwen3 Coder Flash","created_at":"2026-03-09T06:54:22Z","owned_by":"dashscope","canonical_slug":"qwen3-coder-flash-2025-08-05","description":"基于Qwen3的代码生成模型，继承Qwen3-Coder-Plus的coding agent能力，支持多轮工具交互，重点优化仓库级别理解能力并增加工具调用稳定性。","context_length":1000000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000005","completion":"0.0000025","input_cache_read":"0.00000006","input_cache_write":"0.00000027"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["qwen3-coder-flash"]},{"type":"model","id":"bailian/qwen3-coder-next","display_name":"Qwen3 Coder Next","created_at":"2026-03-09T06:54:22Z","owned_by":"dashscope","canonical_slug":"qwen3-coder-next-2026-02-19","description":"Qwen3系列新一代代码生成模型，效果接近Qwen3-Coder-Plus兼具更优性能。模型重点优化仓库级别理解、支持多轮工具交互、提升对于agentic coding类工具的适配能力。","context_length":256000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000002","completion":"0.0000015"},"top_provider":{"context_length":256000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["qwen3-coder-next"]},{"type":"model","id":"bailian/qwen3-coder-plus","display_name":"Qwen3 Coder Plus","created_at":"2026-03-09T06:54:22Z","owned_by":"dashscope","canonical_slug":"qwen3-coder-plus-2025-09-23","description":"基于Qwen3的代码生成模型，具有强大的Coding Agent能力，擅长工具调用和环境交互，能够实现自主编程、代码能力卓越的同时兼具通用能力。","context_length":1000000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000018","completion":"0.000009","input_cache_read":"0.0000002","input_cache_write":"0.000001"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["qwen3-coder-plus"]},{"type":"model","id":"bailian/qwen3.5-flash","display_name":"Qwen: Qwen3.5 Flash","created_at":"2026-02-25T17:11:41Z","owned_by":"dashscope","canonical_slug":"qwen3.5-flash-2026-02-23","description":"Qwen3.5原生视觉语言系列Flash模型，基于混合架构设计，融合了线性注意力机制与稀疏混合专家模型，实现了更高的推理效率。模型效果在纯文本与多模态方面相较3系列均实现飞跃式进步；响应速度快，兼具推理速度和性能。","context_length":1000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000001","completion":"0.0000004","input_cache_read":"0.00000001","input_cache_write":"0.000000125","web_search":"0.01"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["qwen3.5-flash"]},{"type":"model","id":"bailian/qwen3.5-plus","display_name":"Qwen3.5 Plus","created_at":"2026-02-16T20:51:44Z","owned_by":"dashscope","canonical_slug":"qwen3.5-plus-2026-02-15","description":"Qwen3.5原生视觉语言系列Plus模型，基于混合架构设计，融合了线性注意力机制与稀疏混合专家模型，实现了更高的推理效率。在多项任务评测中，3.5系列均展现出与当前顶尖前沿模型相媲美的卓越性能，模型效果在纯文本与多模态方面相较3系列均实现飞跃式进步。","context_length":1000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000004","completion":"0.0000024","input_cache_read":"0.00000004","input_cache_write":"0.0000004","web_search":"0.01"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["qwen3.5-plus"]},{"type":"model","id":"bailian/qwen3.6-27b","display_name":"Qwen: Qwen3.6 27B","created_at":"2026-04-28T09:14:53Z","owned_by":"dashscope","canonical_slug":"qwen3.6-27b","description":"Qwen3.6系列27B原生视觉语言Dense模型，模型效果相较3.5-27B重点提升了Agentic coding能力、模型STEM与推理能力进一步增强；视觉模态方面在空间智能、物体定位与检测能力上显著增强，视频理解、文档OCR及视觉Agent能力稳步提升。","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000006","completion":"0.0000036","web_search":"0.01"},"top_provider":{"context_length":256000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["qwen3.6-27b"]},{"type":"model","id":"bailian/qwen3.6-flash","display_name":"Qwen: Qwen3.6 Flash","created_at":"2026-04-28T09:14:53Z","owned_by":"dashscope","canonical_slug":"qwen3.6-flash-2026-04-16","description":"Qwen3.6原生视觉语言系列Flash模型，模型效果相较3.5-Flash显著提升。本模型重点提升agentic coding能力（在多项代码智能体基准上大幅超越前代）、数学推理和代码推理能力；视觉方面在空间智能能力上显著增强，物体定位与目标检测提升尤为突出。","context_length":1000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.00000025","completion":"0.0000015","input_cache_read":"0.000000025","input_cache_write":"0.00000031","web_search":"0.01"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["qwen3.6-flash"]},{"type":"model","id":"bailian/qwen3.6-max-preview","display_name":"Qwen3.6 Max Preview","created_at":"2026-04-28T09:14:53Z","owned_by":"dashscope","canonical_slug":"qwen3.6-max-preview","description":"Qwen3.6系列中规模最大、综合能力最强的Max模型Preview版本，当前开放纯文本模型能力供体验。相较于此前发布的Qwen3-Max和Qwen3.6-Plus，本模型在vibe coding能力上进一步提升、coding agent执行更加高效、前端编程开发能力显著提升；长尾知识能力进一步升级。","context_length":256000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.000002","completion":"0.000012","input_cache_read":"0.0000002","input_cache_write":"0.000002","web_search":"0.01"},"top_provider":{"context_length":256000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["qwen3.6-max-preview"]},{"type":"model","id":"bailian/qwen3.6-plus","display_name":"Qwen: Qwen3.6 Plus","created_at":"2026-04-03T00:56:11Z","owned_by":"dashscope","canonical_slug":"qwen3.6-plus-2026-04-02","description":"Qwen3.6原生视觉语言系列Plus模型，展现出与当前顶尖前沿模型相媲美的卓越性能，模型效果相较3.5系列显著提升。模型在Agentic coding、前端编程、Vibe coding等代码能力、多模态万物识别、OCR、物体定位等能力上显著增强。","context_length":1000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000005","completion":"0.000003","input_cache_read":"0.00000005","input_cache_write":"0.000000625","web_search":"0.01"},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["bailian/qwen3-6-plus"]},{"type":"model","id":"bailian/qwen3.7-max","display_name":"Qwen: Qwen3.7 Max","created_at":"2026-05-22T04:00:37Z","owned_by":"dashscope","canonical_slug":"qwen3.7-max","description":"阿里云百炼 Qwen3.7 系列 Max 模型, 1M context, 支持深度推理 (reasoning_content), 强编码与长程自治执行能力. 通过 DashScope OpenAI-compatible 端点提供.","context_length":1064000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"qwen","instruct_type":null},"pricing":{"prompt":"0.0000025","completion":"0.0000075","input_cache_read":"0.00000025","input_cache_write":"0.000003125","web_search":"0.01"},"top_provider":{"context_length":1064000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["qwen3.7-max"]},{"type":"model","id":"deepseek/deepseek-v3.2","display_name":"DeepSeek V3.2","created_at":"2026-01-21T16:09:14Z","owned_by":"openrouter","canonical_slug":"deepseek-v3.2","description":"DeepSeek's latest model building upon instruction following and coding abilities. Pre-trained on 15 trillion tokens with excellent cost-performance ratio.","context_length":128000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"deepseek","instruct_type":null},"pricing":{"prompt":"0.00000029","completion":"0.00000043","input_cache_read":"0.00000006"},"top_provider":{"context_length":128000,"max_completion_tokens":32000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"expiration_date":null,"is_deprecated":false,"aliases":["deepseek-v3.2","deepseek-v3-2-251201"]},{"type":"model","id":"deepseek/deepseek-v4-flash","display_name":"DeepSeek V4 Flash","created_at":"2026-04-24T04:45:10Z","owned_by":"deepseek","canonical_slug":"deepseek-v4-flash-20260424","description":"DeepSeek V4 Flash is an efficiency-optimized Mixture-of-Experts model from DeepSeek with 284B total parameters and 13B activated parameters, supporting a 1M-token context window. It is designed for fast inference and high-throughput workloads, while maintaining strong reasoning and coding performance.","context_length":1000000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"deepseek","instruct_type":null},"pricing":{"prompt":"0.00000014","completion":"0.00000028","input_cache_read":"0.000000028"},"top_provider":{"context_length":1000000,"max_completion_tokens":384000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"expiration_date":null,"is_deprecated":false,"aliases":["deepseek-v4-flash","deepseek-v4-flash-20260424"]},{"type":"model","id":"deepseek/deepseek-v4-pro","display_name":"DeepSeek V4 Pro","created_at":"2026-04-24T04:45:10Z","owned_by":"deepseek","canonical_slug":"deepseek-v4-pro-20260424","description":"DeepSeek V4 Pro is a large-scale Mixture-of-Experts model from DeepSeek with 1.6T total parameters and 49B activated parameters, supporting a 1M-token context window. It is designed for advanced reasoning, coding, and long-horizon agent workflows, with strong performance across knowledge, math, and software engineering benchmarks.","context_length":1000000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"deepseek","instruct_type":null},"pricing":{"prompt":"0.00000174","completion":"0.00000348","input_cache_read":"0.000000145"},"top_provider":{"context_length":1000000,"max_completion_tokens":384000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"expiration_date":null,"is_deprecated":false,"aliases":["deepseek-v4-pro","deepseek-v4-pro-20260424"]},{"type":"model","id":"minimax/m2-her","display_name":"MiniMax: MiniMax M2 Her","created_at":"2026-02-12T20:31:59Z","owned_by":"minimax","canonical_slug":"minimax-m2-her-2026-02-12","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000012"},"top_provider":{"context_length":200000,"max_completion_tokens":131000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["m2-her"]},{"type":"model","id":"minimax/minimax-m2","display_name":"MiniMax: MiniMax M2","created_at":"2026-02-12T20:31:59Z","owned_by":"minimax","canonical_slug":"minimax-m2-2025-10-23","description":"MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.","context_length":204800,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000012","input_cache_read":"0.00000003","input_cache_write":"0.000000375"},"top_provider":{"context_length":204800,"max_completion_tokens":131000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["minimax-m2"]},{"type":"model","id":"minimax/minimax-m2.1","display_name":"MiniMax: MiniMax M2.1","created_at":"2026-02-12T20:31:59Z","owned_by":"minimax","canonical_slug":"minimax-m2.1-2025-12-23","description":"MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.","context_length":204800,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000012","input_cache_read":"0.00000003","input_cache_write":"0.000000375"},"top_provider":{"context_length":204800,"max_completion_tokens":131000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["minimax-m2.1"]},{"type":"model","id":"minimax/minimax-m2.1-lightning","display_name":"MiniMax: MiniMax M2.1 Lightning","created_at":"2026-02-12T20:31:59Z","owned_by":"minimax","canonical_slug":"minimax-m2.1-lightning-2025-12-23","description":"MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.","context_length":204800,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000024","input_cache_read":"0.00000003","input_cache_write":"0.000000375"},"top_provider":{"context_length":204800,"max_completion_tokens":131000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["minimax-m2.1-lightning"]},{"type":"model","id":"minimax/minimax-m2.5","display_name":"MiniMax: MiniMax M2.5","created_at":"2026-02-12T20:31:59Z","owned_by":"minimax","canonical_slug":"minimax-m2.5-2026-02-12","description":"MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1 to extend into general office work, reaching fluency in generating and operating Word, Excel, and Powerpoint files, context switching between diverse software environments, and working across different agent and human teams. Scoring 80.2% on SWE-Bench Verified, 51.3% on Multi-SWE-Bench, and 76.3% on BrowseComp, M2.5 is also more token efficient than previous generations, having been trained to optimize its actions and output through planning.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000012","input_cache_read":"0.00000003","input_cache_write":"0.000000375"},"top_provider":{"context_length":200000,"max_completion_tokens":131000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["minimax-m2.5"]},{"type":"model","id":"minimax/minimax-m2.5-lightning","display_name":"MiniMax: MiniMax M2.5 Lightning","created_at":"2026-02-12T20:31:59Z","owned_by":"minimax","canonical_slug":"minimax-m2.5-lightning-2026-02-12","description":"MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1 to extend into general office work, reaching fluency in generating and operating Word, Excel, and Powerpoint files, context switching between diverse software environments, and working across different agent and human teams. Scoring 80.2% on SWE-Bench Verified, 51.3% on Multi-SWE-Bench, and 76.3% on BrowseComp, M2.5 is also more token efficient than previous generations, having been trained to optimize its actions and output through planning.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000024","input_cache_read":"0.00000003","input_cache_write":"0.000000375"},"top_provider":{"context_length":200000,"max_completion_tokens":131000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["minimax-m2.5-lightning"]},{"type":"model","id":"minimax/minimax-m2.7","display_name":"MiniMax: MiniMax M2.7","created_at":"2026-03-18T06:29:06Z","owned_by":"minimax","canonical_slug":"minimax-m2.7-2026-03-18","description":"M2.7 delivers outstanding performance in real-world software engineering, including end-to-end complete project delivery, log analysis and bug triaging, code security, machine learning, and more. On the benchmark SWE-Pro, M2.7 scores 56.22%, nearly matching the level of Opus. This capability also extends to end-to-end complete project delivery scenarios (VIBE-Pro 55.6%) and deep understanding of complex engineering systems on Terminal Bench 2 (57.0%).","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000003","completion":"0.0000012","input_cache_read":"0.00000006","input_cache_write":"0.000000375"},"top_provider":{"context_length":200000,"max_completion_tokens":131000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["minimax-m2.7"]},{"type":"model","id":"minimax/minimax-m2.7-highspeed","display_name":"MiniMax: MiniMax M2.7 Highspeed","created_at":"2026-03-18T06:29:06Z","owned_by":"minimax","canonical_slug":"minimax-m2.7-highspeed-2026-03-18","description":"M2.7 highspeed: Same performance, faster, more agile","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"minimax","instruct_type":null},"pricing":{"prompt":"0.0000006","completion":"0.0000024","input_cache_read":"0.00000006","input_cache_write":"0.000000375"},"top_provider":{"context_length":200000,"max_completion_tokens":131000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["minimax-m2.7-highspeed"]},{"type":"model","id":"moonshotai/kimi-k2.5","display_name":"MoonshotAI: Kimi K2.5","created_at":"2026-01-27T10:32:28Z","owned_by":"MoonshotAI","canonical_slug":"kimi-k2.5-20260127","description":"Kimi K2.5 is Moonshot AI''s native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.","context_length":262144,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"kimi","instruct_type":null},"pricing":{"prompt":"0.0000006","completion":"0.000003","input_cache_read":"0.0000001","web_search":"0.0043"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["kimi-k2.5"]},{"type":"model","id":"moonshotai/kimi-k2.6","display_name":"MoonshotAI: Kimi K2.6","created_at":"2026-04-20T11:49:59Z","owned_by":"MoonshotAI","canonical_slug":"kimi-k2.6-20260421","description":"Kimi K2.6 是 Kimi 最新最智能的模型，Kimi K2.6 的通用 Agent、代码、视觉理解等综合能力得到全面提升，其中在博士级难度的完整版人类最后的考试（Humanity’s Last Exam）、在考察模型真实软件工程能力的 SWE-Bench Pro、评估 Agent 深度检索能力的 DeepSearchQA 等基准测试中均取得行业领先的成绩，同时支持文本、图片与视频输入，思考与非思考模式，对话与 Agent 任务。","context_length":262144,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"kimi","instruct_type":null},"pricing":{"prompt":"0.00000095","completion":"0.000004","input_cache_read":"0.00000016","web_search":"0.0043"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["kimi-k2.6","kimi-k2.6-20260421"]},{"type":"model","id":"openai/gpt-5.4","display_name":"OpenAI: GPT-5.4","created_at":"2026-03-06T01:42:14Z","owned_by":"azure","canonical_slug":"gpt-5.4-2026-03-05","description":"GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling high-context reasoning, coding, and multimodal analysis within the same workflow.The model delivers improved performance in coding, document understanding, tool use, and instruction following. It is designed as a strong default for both general-purpose tasks and software engineering, capable of generating production-quality code, synthesizing information across multiple sources, and executing complex multi-step workflows with fewer iterations and greater token efficiency.","context_length":1050000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.0000025","completion":"0.000015","input_cache_read":"0.00000025","web_search":"0.01"},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["gpt-5.4"]},{"type":"model","id":"openai/gpt-5.4-mini","display_name":"OpenAI: GPT-5.4 Mini","created_at":"2026-03-18T01:56:46Z","owned_by":"azure","canonical_slug":"gpt-5.4-mini-2026-03-17","description":"GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding, and tool use, while reducing latency and cost for large-scale deployments.","context_length":400000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.00000075","completion":"0.0000045","input_cache_read":"0.000000075","web_search":"0.01"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["gpt-5.4-mini"]},{"type":"model","id":"openai/gpt-5.4-nano","display_name":"OpenAI: GPT-5.4 Nano","created_at":"2026-03-18T01:56:46Z","owned_by":"azure","canonical_slug":"gpt-5.4-nano-2026-03-17","description":"GPT-5.4 nano is the most lightweight and cost-efficient variant of the GPT-5.4 family, optimized for speed-critical and high-volume tasks. It supports text and image inputs and is designed for low-latency use cases such as classification, data extraction, ranking, and sub-agent execution.","context_length":400000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.0000002","completion":"0.00000125","input_cache_read":"0.00000002","web_search":"0.01"},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["gpt-5.4-nano"]},{"type":"model","id":"openai/gpt-5.5","display_name":"OpenAI: GPT-5.5","created_at":"2026-04-25T01:38:17Z","owned_by":"azure","canonical_slug":"gpt-5.5-2026-04-25","description":"GPT-5.5 is OpenAI’s frontier model designed for complex professional workloads, building on GPT-5.4 with stronger reasoning, higher reliability, and improved token efficiency on hard tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs, enabling large-scale reasoning, coding, and multimodal workflows within a single system.","context_length":1050000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"gpt","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.00003","input_cache_read":"0.0000005","web_search":"0.01"},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["gpt-5.5","gpt-5.5-2026-04-24"]},{"type":"model","id":"volcengine/doubao-seed-1-8","display_name":"Doubao Seed 1.8","created_at":"2026-01-21T16:09:14Z","owned_by":"volcengine","canonical_slug":"doubao-seed-1-8-251228","description":"ByteDance Doubao Seed 1.8 via Volcengine. Next generation model with improved reasoning, enhanced capabilities, and better performance.","context_length":256000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0.00000012","completion":"0.00000029","input_cache_read":"0.000000023"},"top_provider":{"context_length":256000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["doubao-seed-1-8","doubao-seed-1-8-251228"]},{"type":"model","id":"volcengine/doubao-seed-2.0-code","display_name":"Doubao Seed 2.0 Code","created_at":"2026-02-14T15:48:32Z","owned_by":"volcengine","canonical_slug":"doubao-seed-2-0-code-preview-260215","description":"Doubao-Seed-2.0-Code 面向企业级编程需求优化，在 Seed 2.0 优秀的 Agent、VLM 能力基础上，特别增强了代码能力，不仅前端能力表现出众，也对企业常见的多语言编码需求做了特别优化，适合接入各种 AI 编程工具使用。","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0.00000067","completion":"0.00000336","input_cache_read":"0.00000014","input_cache_write":"0.0000000024"},"top_provider":{"context_length":256000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["doubao-seed-2.0-code","doubao-seed-2-0-code-preview-260215"]},{"type":"model","id":"volcengine/doubao-seed-2.0-lite","display_name":"Doubao Seed 2.0 Lite","created_at":"2026-02-14T15:48:32Z","owned_by":"volcengine","canonical_slug":"doubao-seed-2-0-lite-260215","description":"Doubao-Seed-2.0-lite 是面向高频企业场景兼顾性能与成本的均衡型模型，综合能力超越上一代Doubao-Seed-1.8。胜任非结构化信息处理、内容创作、搜索推荐、数据分析等生产型工作，支持长上下文、多源信息融合、多步指令执行与高保真结构化输出。在保障稳定效果的同时显著优化成本。","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0.00000013","completion":"0.00000076","input_cache_read":"0.00000003","input_cache_write":"0.0000000024"},"top_provider":{"context_length":256000,"max_completion_tokens":32000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["doubao-seed-2.0-lite","doubao-seed-2-0-lite-260215"]},{"type":"model","id":"volcengine/doubao-seed-2.0-mini","display_name":"Doubao Seed 2.0 Mini","created_at":"2026-02-14T15:48:32Z","owned_by":"volcengine","canonical_slug":"doubao-seed-2-0-mini-260215","description":"Doubao-Seed-2.0-mini 面向低时延、高并发与成本敏感场景，强调快速响应与灵活推理部署。模型效果与Doubao-Seed-1.6相当。支持256k上下文、4档思考长度和多模态理解，适合成本和速度优先的轻量级任务。","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0.00000006","completion":"0.00000056","input_cache_read":"0.00000002","input_cache_write":"0.0000000024"},"top_provider":{"context_length":256000,"max_completion_tokens":32000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["doubao-seed-2.0-mini","doubao-seed-2-0-mini-260215"]},{"type":"model","id":"volcengine/doubao-seed-2.0-pro","display_name":"Doubao Seed 2.0 Pro","created_at":"2026-02-14T15:48:32Z","owned_by":"volcengine","canonical_slug":"doubao-seed-2-0-pro-260215","description":"Doubao-Seed-2.0-pro是旗舰级全能通用模型，面向 Agent 时代的复杂推理与长链路任务执行场景。强调多模态理解、长上下文推理、结构化生成与工具增强执行。复杂指令与多约束执行能力突出，可稳定应对多步复杂规划、复杂图文推理、视频内容理解与高难度分析等场景。","context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"doubao","instruct_type":null},"pricing":{"prompt":"0.00000067","completion":"0.00000336","input_cache_read":"0.00000014","input_cache_write":"0.0000000024"},"top_provider":{"context_length":256000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["doubao-seed-2.0-pro","doubao-seed-2-0-pro-260215"]},{"type":"model","id":"z-ai/glm-4.6","display_name":"Z.ai: GLM-4.6","created_at":"2026-01-21T16:09:15Z","owned_by":"zhipu","canonical_slug":"glm-4.6","description":"Zhipu GLM-4.6 with 128K context and strong instruction following. Excellent balance of performance and cost for Chinese language tasks.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0.0000004","completion":"0.0000019","input_cache_read":"0.00000011","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["glm-4.6"]},{"type":"model","id":"z-ai/glm-4.7","display_name":"Z.ai: GLM 4.7","created_at":"2026-01-21T16:09:15Z","owned_by":"zhipu","canonical_slug":"glm-4.7","description":"Zhipu GLM-4.7 - latest flagship model with advanced reasoning, 128K context, and web search capabilities. Best-in-class Chinese language understanding.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0.0000004","completion":"0.000002","input_cache_read":"0.00000008","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["glm-4.7"]},{"type":"model","id":"z-ai/glm-4.7-flash:free","display_name":"Z.ai: GLM-4.7-Flash (Free)","created_at":"2026-02-05T08:36:45Z","owned_by":"zhipu","canonical_slug":"glm-4.7-flash","description":"As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0","completion":"0","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format"],"expiration_date":null,"is_deprecated":false,"aliases":["glm-4.7-flash:free"]},{"type":"model","id":"z-ai/glm-4.7-flashx","display_name":"Z.ai: GLM-4.7 FlashX","created_at":"2026-02-05T08:37:05Z","owned_by":"zhipu","canonical_slug":"glm-4.7-flashx","description":"As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0.000000072","completion":"0.00000043","input_cache_read":"0.000000015","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["glm-4.7-flashx"]},{"type":"model","id":"z-ai/glm-5","display_name":"Z.ai: GLM-5","created_at":"2026-02-12T10:55:26Z","owned_by":"zhipu","canonical_slug":"glm-5","description":"GLM-5 is Z.ai’s flagship open-source foundation model engineered for complex systems design and long-horizon agent workflows. Built for expert developers, it delivers production-grade performance on large-scale programming tasks, rivaling leading closed-source models. With advanced agentic planning, deep backend reasoning, and iterative self-correction, GLM-5 moves beyond code generation to full-system construction and autonomous execution.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0.000001","completion":"0.0000032","input_cache_read":"0.0000002","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["glm-5"]},{"type":"model","id":"z-ai/glm-5-turbo","display_name":"Z.ai: GLM-5-Turbo","created_at":"2026-03-16T09:01:01Z","owned_by":"zhipu","canonical_slug":"glm-5-turbo","description":"GLM-5-Turbo is a foundation model deeply optimized for the OpenClaw scenario. It has been specifically optimized for the core requirements of OpenClaw tasks since the training phase, enhancing key capabilities such as tool invocation, command following, timed and persistent tasks, and long-chain execution.","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0.0000012","completion":"0.000004","input_cache_read":"0.00000024","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["glm-5-turbo"]},{"type":"model","id":"z-ai/glm-5.1","display_name":"Z.ai: GLM 5.1","created_at":"2026-03-27T12:29:41Z","owned_by":"zhipu","canonical_slug":"glm-5.1","context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0.0000014","completion":"0.0000044","input_cache_read":"0.00000026","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["glm-5.1"]},{"type":"model","id":"z-ai/glm-5v-turbo","display_name":"GLM-5V-Turbo","created_at":"2026-04-02T03:14:36Z","owned_by":"zhipu","canonical_slug":"glm-5v-turbo","description":"GLM-5V-Turbo is Z.AI’s first multimodal coding foundation model, built for vision-based coding tasks. It can natively process multimodal inputs such as images, video, and text, while also excelling at long-horizon planning, complex coding, and action execution. ","context_length":200000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"glm","instruct_type":null},"pricing":{"prompt":"0.0000012","completion":"0.000004","input_cache_read":"0.00000024","web_search":"0.01"},"top_provider":{"context_length":200000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["temperature","top_p","max_tokens","stop","tools","tool_choice","response_format","reasoning"],"expiration_date":null,"is_deprecated":false,"aliases":["glm-5v-turbo"]}],"has_more":false,"first_id":"anthropic/claude-haiku-4.5","last_id":"z-ai/glm-5v-turbo"}
